Dana Vrajitoru
B424 Parallel and Distributed Programming

Numerical Parallel Algorithms

Systems of Linear Equations

Gauss Elimination

Sequential Algorithm

for (i=0; i < n-1; i++) { 
   for (j=i+1; j < n; j++) { 
      m = a[j][i]/a[i][i]; // multiplier
      for (k=j; k < n; k++){ 
         a[j][k] = a[j][k] - a[i][k]*m;
      }
      b[j]=b[j] - b[i]*m; // adjust b too
   }
}

Computing the Solution

x[n-1] = b[n-1]/a[n-1][n-1]; 
for (i=n-2; i >= 0; i--) { 
   r = b[i];
   for (j=i+1; j < n; j++) 
      r = r - a[i][j]*x[j]; 
   x[i] = r / a[i][i];
}

Gauss Elimination

Parallel Implementation

Parallel idea:

Improvements

Jacobi Iteration

Parallel Version

Cholesky Decomposition

Method

Compute a Cell of L

float Compute_cell(i, j) {
   x = a[i, j];
   for (k=0; k < i && k < j; k++)
      x -= a[i,k]*a[j,k];
   if (i != j)
      x = x/a[i,i];
   else
      x = sqrt(x);
   return x;
}

Sequential Algorithm

Cholesky() {
   for (i=0; i < n; i++)
      for (j=i; j < n; j++)
         a[i,j] = Compute_cell(i, j);
} 

Workers

Parallel Algorithm

Cholesy_Worker(thread_no) {
   col = id;
   for (d=0; d < 2*n-1; d++) {
      if (d >= 2*col && d < n + col) {
         row = d – col;
         a[row,col] = Compute_cell(row, col);
      }
      Barrier(thread_no);
   }
}

LU Decomposition

Sequential Algorithm

Sequential Algorithm

for (j = 1; j < n; j++)
   for (i = 1; i < j; i++) {
      U[i][j] = A[i][j];
      for (k = 1; k < i; k++) 
         U[i][j] -= L[i][k] * U[k][j];
   }
   for (i = j+1; i < n; i++) {
      L[i][j] = A[i][j];
      for (k = 1; k < j; k++) 
         L[i][j] -= L[i][k] * U[k][j];
      L[i][j] /= U[j][j];
   }
}

Parallel Version

Linear Algebra Libraries