Actual source code: bdfact.c

  1: /*$Id: bdfact.c,v 1.65 2001/08/07 03:02:53 balay Exp $*/

  3: /* Block diagonal matrix format - factorization and triangular solves */

 5:  #include src/mat/impls/bdiag/seq/bdiag.h
 6:  #include src/vec/vecimpl.h
 7:  #include src/inline/ilu.h

  9: #undef __FUNCT__  
 11: int MatILUFactorSymbolic_SeqBDiag(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *B)
 12: {
 13:   PetscTruth   idn;
 14:   int          ierr;

 17:   if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
 18:   if (isrow) {
 19:     ISIdentity(isrow,&idn);
 20:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
 21:   }
 22:   if (iscol) {
 23:     ISIdentity(iscol,&idn);
 24:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
 25:   }
 26:   if (info->levels != 0) {
 27:     SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
 28:   }
 29:   MatConvert(A,MATSAME,B);

 31:   /* Must set to zero for repeated calls with different nonzero structure */
 32:   (*B)->factor = 0;
 33:   return(0);
 34: }

 36: #undef __FUNCT__  
 38: int MatILUFactor_SeqBDiag(Mat A,IS isrow,IS iscol,MatILUInfo *info)
 39: {
 40:   PetscTruth   idn;
 41:   int          ierr;

 44:   /* For now, no fill is allocated in symbolic factorization phase, so we
 45:      directly use the input matrix for numeric factorization. */
 46:   if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
 47:   if (isrow) {
 48:     ISIdentity(isrow,&idn);
 49:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
 50:   }
 51:   if (iscol) {
 52:     ISIdentity(iscol,&idn);
 53:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
 54:   }
 55:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
 56:   MatLUFactorNumeric(A,&A);
 57:   return(0);
 58: }

 60: /* --------------------------------------------------------------------------*/
 61: #undef __FUNCT__  
 63: int MatLUFactorNumeric_SeqBDiag_N(Mat A,Mat *B)
 64: {
 65:   Mat          C = *B;
 66:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
 67:   int          k,d,d2,dgk,elim_row,elim_col,bs = a->bs,knb,knb2,bs2 = bs*bs;
 68:   int          dnum,nd = a->nd,mblock = a->mblock,nblock = a->nblock,ierr;
 69:   int          *diag = a->diag, m = A->m,mainbd = a->mainbd,*dgptr,len,i;
 70:   PetscScalar  **dv = a->diagv,*dd = dv[mainbd],*v_work;
 71:   PetscScalar  *multiplier;

 74:   /* Copy input matrix to factored matrix if we've already factored the
 75:      matrix before AND the nonzero structure remains the same.  This is done
 76:      in symbolic factorization the first time through, but there's no symbolic
 77:      factorization for successive calls with same matrix sparsity structure. */
 78:   if (C->factor == FACTOR_LU) {
 79:     for (i=0; i<a->nd; i++) {
 80:       len = a->bdlen[i] * bs2 * sizeof(PetscScalar);
 81:       d   = diag[i];
 82:       if (d > 0) {
 83:         PetscMemcpy(dv[i]+bs2*d,a1->diagv[i]+bs2*d,len);
 84:       } else {
 85:         PetscMemcpy(dv[i],a1->diagv[i],len);
 86:       }
 87:     }
 88:   }

 90:   if (!a->pivot) {
 91:     PetscMalloc((m+1)*sizeof(int),&a->pivot);
 92:     PetscLogObjectMemory(C,m*sizeof(int));
 93:   }
 94:   ierr       = PetscMalloc((bs2+bs+1)*sizeof(PetscScalar),&v_work);
 95:   multiplier = v_work + bs;
 96:   ierr       = PetscMalloc((mblock+nblock+1)*sizeof(int),&dgptr);
 97:   ierr       = PetscMemzero(dgptr,(mblock+nblock)*sizeof(int));
 98:   for (k=0; k<nd; k++) dgptr[diag[k]+mblock] = k+1;
 99:   for (k=0; k<mblock; k++) { /* k = block pivot_row */
100:     knb = k*bs; knb2 = knb*bs;
101:     /* invert the diagonal block */
102:     Kernel_A_gets_inverse_A(bs,dd+knb2,a->pivot+knb,v_work);
103:     for (d=mainbd-1; d>=0; d--) {
104:       elim_row = k + diag[d];
105:       if (elim_row < mblock) { /* sweep down */
106:         /* dv[d][knb2]: test if entire block is zero? */
107:         Kernel_A_gets_A_times_B(bs,&dv[d][elim_row*bs2],dd+knb2,multiplier);
108:         for (d2=d+1; d2<nd; d2++) {
109:           elim_col = elim_row - diag[d2];
110:           if (elim_col >=0 && elim_col < nblock) {
111:             dgk = k - elim_col;
112:             if ((dnum = dgptr[dgk+mblock])) {
113:               Kernel_A_gets_A_minus_B_times_C(bs,&dv[d2][elim_row*bs2],
114:                              &dv[d][elim_row*bs2],&dv[dnum-1][knb2]);
115:             }
116:           }
117:         }
118:       }
119:     }
120:   }
121:   PetscFree(dgptr);
122:   PetscFree(v_work);
123:   C->factor = FACTOR_LU;
124:   return(0);
125: }

127: #undef __FUNCT__  
129: int MatLUFactorNumeric_SeqBDiag_1(Mat A,Mat *B)
130: {
131:   Mat          C = *B;
132:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
133:   int          k,d,d2,dgk,elim_row,elim_col,dnum,nd = a->nd,i,len,ierr;
134:   int          *diag = a->diag,n = A->n,m = A->m,mainbd = a->mainbd,*dgptr;
135:   PetscScalar  **dv = a->diagv,*dd = dv[mainbd],mult;

138:   /* Copy input matrix to factored matrix if we've already factored the
139:      matrix before AND the nonzero structure remains the same.  This is done
140:      in symbolic factorization the first time through, but there's no symbolic
141:      factorization for successive calls with same matrix sparsity structure. */
142:   if (C->factor == FACTOR_LU) {
143:     for (i=0; i<nd; i++) {
144:       len = a->bdlen[i] * sizeof(PetscScalar);
145:       d   = diag[i];
146:       if (d > 0) {
147:         PetscMemcpy(dv[i]+d,a1->diagv[i]+d,len);
148:       } else {
149:         PetscMemcpy(dv[i],a1->diagv[i],len);
150:       }
151:     }
152:   }

154:   PetscMalloc((m+n+1)*sizeof(int),&dgptr);
155:   ierr  = PetscMemzero(dgptr,(m+n)*sizeof(int));
156:   for (k=0; k<nd; k++) dgptr[diag[k]+m] = k+1;
157:   for (k=0; k<m; k++) { /* k = pivot_row */
158:     dd[k] = 1.0/dd[k];
159:     for (d=mainbd-1; d>=0; d--) {
160:       elim_row = k + diag[d];
161:       if (elim_row < m) { /* sweep down */
162:         if (dv[d][elim_row] != 0.0) {
163:           dv[d][elim_row] *= dd[k];
164:           mult = dv[d][elim_row];
165:           for (d2=d+1; d2<nd; d2++) {
166:             elim_col = elim_row - diag[d2];
167:             dgk = k - elim_col;
168:             if (elim_col >=0 && elim_col < n) {
169:               if ((dnum = dgptr[dgk+m])) {
170:                 dv[d2][elim_row] -= mult * dv[dnum-1][k];
171:               }
172:             }
173:           }
174:         }
175:       }
176:     }
177:   }
178:   PetscFree(dgptr);
179:   C->factor = FACTOR_LU;
180:   return(0);
181: }

183: /* -----------------------------------------------------------------*/

185: #undef __FUNCT__  
187: int MatSolve_SeqBDiag_1(Mat A,Vec xx,Vec yy)
188: {
189:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
190:   int          ierr,i,d,loc,mainbd = a->mainbd;
191:   int          n = A->n,m = A->m,*diag = a->diag,col;
192:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],sum,**dv = a->diagv;

195:   VecGetArray(xx,&x);
196:   VecGetArray(yy,&y);
197:   /* forward solve the lower triangular part */
198:   for (i=0; i<m; i++) {
199:     sum = x[i];
200:     for (d=0; d<mainbd; d++) {
201:       loc = i - diag[d];
202:       if (loc >= 0) sum -= dv[d][i] * y[loc];
203:     }
204:     y[i] = sum;
205:   }
206:   /* backward solve the upper triangular part */
207:   for (i=m-1; i>=0; i--) {
208:     sum = y[i];
209:     for (d=mainbd+1; d<a->nd; d++) {
210:       col = i - diag[d];
211:       if (col < n) sum -= dv[d][i] * y[col];
212:     }
213:     y[i] = sum*dd[i];
214:   }
215:   VecRestoreArray(xx,&x);
216:   VecRestoreArray(yy,&y);
217:   PetscLogFlops(2*a->nz - A->n);
218:   return(0);
219: }

221: #undef __FUNCT__  
223: int MatSolve_SeqBDiag_2(Mat A,Vec xx,Vec yy)
224: {
225:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
226:   int          i,d,loc,mainbd = a->mainbd;
227:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
228:   int          ierr,m = A->m,*diag = a->diag,col;
229:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
230:   PetscScalar  w0,w1,sum0,sum1;

233:   VecGetArray(xx,&x);
234:   VecGetArray(yy,&y);
235:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

237:   /* forward solve the lower triangular part */
238:   if (mainbd != 0) {
239:     inb = 0;
240:     for (i=0; i<mblock; i++) {
241:       sum0 = sum1 = 0.0;
242:       for (d=0; d<mainbd; d++) {
243:         loc = 2*(i - diag[d]);
244:         if (loc >= 0) {
245:           dvt = &dv[d][4*i];
246:           w0 = y[loc]; w1 = y[loc+1];
247:           sum0 += dvt[0]*w0 + dvt[2]*w1;
248:           sum1 += dvt[1]*w0 + dvt[3]*w1;
249:         }
250:       }
251:       y[inb] -= sum0; y[inb+1] -= sum1;

253:       inb += 2;
254:     }
255:   }
256:   /* backward solve the upper triangular part */
257:   inb = 2*(mblock-1); inb2 = 2*inb;
258:   for (i=mblock-1; i>=0; i--) {
259:     sum0 = y[inb]; sum1 = y[inb+1];
260:     for (d=mainbd+1; d<a->nd; d++) {
261:       col = 2*(i - diag[d]);
262:       if (col < 2*nblock) {
263:         dvt = &dv[d][4*i];
264:         w0 = y[col]; w1 = y[col+1];
265:         sum0 -= dvt[0]*w0 + dvt[2]*w1;
266:         sum1 -= dvt[1]*w0 + dvt[3]*w1;
267:       }
268:     }
269:     dvt = dd+inb2;
270:     y[inb]   = dvt[0]*sum0 + dvt[2]*sum1;
271:     y[inb+1] = dvt[1]*sum0 + dvt[3]*sum1;
272:     inb -= 2; inb2 -= 4;
273:   }
274:   VecRestoreArray(xx,&x);
275:   VecRestoreArray(yy,&y);
276:   PetscLogFlops(2*a->nz - A->n);
277:   return(0);
278: }

280: #undef __FUNCT__  
282: int MatSolve_SeqBDiag_3(Mat A,Vec xx,Vec yy)
283: {
284:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
285:   int          i,d,loc,mainbd = a->mainbd;
286:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
287:   int          ierr,m = A->m,*diag = a->diag,col;
288:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
289:   PetscScalar  w0,w1,w2,sum0,sum1,sum2;

292:   VecGetArray(xx,&x);
293:   VecGetArray(yy,&y);
294:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

296:   /* forward solve the lower triangular part */
297:   if (mainbd != 0) {
298:     inb = 0;
299:     for (i=0; i<mblock; i++) {
300:       sum0 = sum1 = sum2 = 0.0;
301:       for (d=0; d<mainbd; d++) {
302:         loc = 3*(i - diag[d]);
303:         if (loc >= 0) {
304:           dvt = &dv[d][9*i];
305:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];
306:           sum0 += dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
307:           sum1 += dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
308:           sum2 += dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
309:         }
310:       }
311:       y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;
312:       inb += 3;
313:     }
314:   }
315:   /* backward solve the upper triangular part */
316:   inb = 3*(mblock-1); inb2 = 3*inb;
317:   for (i=mblock-1; i>=0; i--) {
318:     sum0 = y[inb]; sum1 = y[inb+1]; sum2 =  y[inb+2];
319:     for (d=mainbd+1; d<a->nd; d++) {
320:       col = 3*(i - diag[d]);
321:       if (col < 3*nblock) {
322:         dvt = &dv[d][9*i];
323:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];
324:         sum0 -= dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
325:         sum1 -= dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
326:         sum2 -= dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
327:       }
328:     }
329:     dvt = dd+inb2;
330:     y[inb]   = dvt[0]*sum0 + dvt[3]*sum1 + dvt[6]*sum2;
331:     y[inb+1] = dvt[1]*sum0 + dvt[4]*sum1 + dvt[7]*sum2;
332:     y[inb+2] = dvt[2]*sum0 + dvt[5]*sum1 + dvt[8]*sum2;
333:     inb -= 3; inb2 -= 9;
334:   }
335:   VecRestoreArray(xx,&x);
336:   VecRestoreArray(yy,&y);
337:   PetscLogFlops(2*a->nz - A->n);
338:   return(0);
339: }

341: #undef __FUNCT__  
343: int MatSolve_SeqBDiag_4(Mat A,Vec xx,Vec yy)
344: {
345:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
346:   int          i,d,loc,mainbd = a->mainbd;
347:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
348:   int          ierr,m = A->m,*diag = a->diag,col;
349:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
350:   PetscScalar  w0,w1,w2,w3,sum0,sum1,sum2,sum3;

353:   VecGetArray(xx,&x);
354:   VecGetArray(yy,&y);
355:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

357:   /* forward solve the lower triangular part */
358:   if (mainbd != 0) {
359:     inb = 0;
360:     for (i=0; i<mblock; i++) {
361:       sum0 = sum1 = sum2 = sum3 = 0.0;
362:       for (d=0; d<mainbd; d++) {
363:         loc = 4*(i - diag[d]);
364:         if (loc >= 0) {
365:           dvt = &dv[d][16*i];
366:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];
367:           sum0 += dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2  + dvt[12]*w3;
368:           sum1 += dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2  + dvt[13]*w3;
369:           sum2 += dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
370:           sum3 += dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
371:         }
372:       }
373:       y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
374:       inb += 4;
375:     }
376:   }
377:   /* backward solve the upper triangular part */
378:   inb = 4*(mblock-1); inb2 = 4*inb;
379:   for (i=mblock-1; i>=0; i--) {
380:     sum0 = y[inb]; sum1 = y[inb+1]; sum2 =  y[inb+2]; sum3 =  y[inb+3];
381:     for (d=mainbd+1; d<a->nd; d++) {
382:       col = 4*(i - diag[d]);
383:       if (col < 4*nblock) {
384:         dvt = &dv[d][16*i];
385:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];
386:         sum0 -= dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2  + dvt[12]*w3;
387:         sum1 -= dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2  + dvt[13]*w3;
388:         sum2 -= dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
389:         sum3 -= dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
390:       }
391:     }
392:     dvt = dd+inb2;
393:     y[inb]   = dvt[0]*sum0 + dvt[4]*sum1 + dvt[8]*sum2 + dvt[12]*sum3;
394:     y[inb+1] = dvt[1]*sum0 + dvt[5]*sum1 + dvt[9]*sum2 + dvt[13]*sum3;
395:     y[inb+2] = dvt[2]*sum0 + dvt[6]*sum1 + dvt[10]*sum2 + dvt[14]*sum3;
396:     y[inb+3] = dvt[3]*sum0 + dvt[7]*sum1 + dvt[11]*sum2 + dvt[15]*sum3;
397:     inb -= 4; inb2 -= 16;
398:   }
399:   VecRestoreArray(xx,&x);
400:   VecRestoreArray(yy,&y);
401:   PetscLogFlops(2*a->nz - A->n);
402:   return(0);
403: }

405: #undef __FUNCT__  
407: int MatSolve_SeqBDiag_5(Mat A,Vec xx,Vec yy)
408: {
409:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
410:   int          i,d,loc,mainbd = a->mainbd;
411:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
412:   int          ierr,m = A->m,*diag = a->diag,col;
413:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
414:   PetscScalar  w0,w1,w2,w3,w4,sum0,sum1,sum2,sum3,sum4;

417:   VecGetArray(xx,&x);
418:   VecGetArray(yy,&y);
419:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

421:   /* forward solve the lower triangular part */
422:   if (mainbd != 0) {
423:     inb = 0;
424:     for (i=0; i<mblock; i++) {
425:       sum0 = sum1 = sum2 = sum3 = sum4 = 0.0;
426:       for (d=0; d<mainbd; d++) {
427:         loc = 5*(i - diag[d]);
428:         if (loc >= 0) {
429:           dvt = &dv[d][25*i];
430:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];w4 = y[loc+4];
431:           sum0 += dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
432:           sum1 += dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
433:           sum2 += dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
434:           sum3 += dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
435:           sum4 += dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
436:         }
437:       }
438:       y[inb]   -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
439:       y[inb+4] -= sum4;
440:       inb += 5;
441:     }
442:   }
443:   /* backward solve the upper triangular part */
444:   inb = 5*(mblock-1); inb2 = 5*inb;
445:   for (i=mblock-1; i>=0; i--) {
446:     sum0 = y[inb];sum1 = y[inb+1];sum2 = y[inb+2];sum3 = y[inb+3];sum4 = y[inb+4];
447:     for (d=mainbd+1; d<a->nd; d++) {
448:       col = 5*(i - diag[d]);
449:       if (col < 5*nblock) {
450:         dvt = &dv[d][25*i];
451:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];w4 = y[col+4];
452:         sum0 -= dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
453:         sum1 -= dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
454:         sum2 -= dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
455:         sum3 -= dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
456:         sum4 -= dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
457:       }
458:     }
459:     dvt = dd+inb2;
460:     y[inb]   = dvt[0]*sum0 + dvt[5]*sum1 + dvt[10]*sum2 + dvt[15]*sum3
461:                + dvt[20]*sum4;
462:     y[inb+1] = dvt[1]*sum0 + dvt[6]*sum1 + dvt[11]*sum2 + dvt[16]*sum3
463:                + dvt[21]*sum4;
464:     y[inb+2] = dvt[2]*sum0 + dvt[7]*sum1 + dvt[12]*sum2 + dvt[17]*sum3
465:                + dvt[22]*sum4;
466:     y[inb+3] = dvt[3]*sum0 + dvt[8]*sum1 + dvt[13]*sum2 + dvt[18]*sum3
467:                + dvt[23]*sum4;
468:     y[inb+4] = dvt[4]*sum0 + dvt[9]*sum1 + dvt[14]*sum2 + dvt[19]*sum3
469:                + dvt[24]*sum4;
470:     inb -= 5; inb2 -= 25;
471:   }
472:   VecRestoreArray(xx,&x);
473:   VecRestoreArray(yy,&y);
474:   PetscLogFlops(2*a->nz - A->n);
475:   return(0);
476: }

478: #undef __FUNCT__  
480: int MatSolve_SeqBDiag_N(Mat A,Vec xx,Vec yy)
481: {
482:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
483:   int          i,d,loc,mainbd = a->mainbd;
484:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
485:   int          ierr,bs = a->bs,m = A->m,*diag = a->diag,col,bs2 = bs*bs;
486:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv;
487:   PetscScalar  work[25];

490:   VecGetArray(xx,&x);
491:   VecGetArray(yy,&y);
492:   if (bs > 25) SETERRQ(PETSC_ERR_SUP,"Blocks must be smaller then 25");
493:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

495:   /* forward solve the lower triangular part */
496:   if (mainbd != 0) {
497:     inb = 0;
498:     for (i=0; i<mblock; i++) {
499:       for (d=0; d<mainbd; d++) {
500:         loc = i - diag[d];
501:         if (loc >= 0) {
502:           Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][i*bs2],y+loc*bs);
503:         }
504:       }
505:       inb += bs;
506:     }
507:   }
508:   /* backward solve the upper triangular part */
509:   inb = bs*(mblock-1); inb2 = inb*bs;
510:   for (i=mblock-1; i>=0; i--) {
511:     for (d=mainbd+1; d<a->nd; d++) {
512:       col = i - diag[d];
513:       if (col < nblock) {
514:         Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][inb2],y+col*bs);
515:       }
516:     }
517:     Kernel_w_gets_A_times_v(bs,y+inb,dd+inb2,work);
518:     PetscMemcpy(y+inb,work,bs*sizeof(PetscScalar));
519:     inb -= bs; inb2 -= bs2;
520:   }
521:   VecRestoreArray(xx,&x);
522:   VecRestoreArray(yy,&y);
523:   PetscLogFlops(2*a->nz - A->n);
524:   return(0);
525: }