Actual source code: bdiag2.c

  1: /*$Id: bdiag2.c,v 1.21 2001/08/07 03:02:53 balay Exp $*/

  3: /* Block diagonal matrix format */

 5:  #include src/mat/impls/bdiag/seq/bdiag.h
 6:  #include src/vec/vecimpl.h
 7:  #include src/inline/ilu.h

  9: #undef __FUNCT__  
 11: int MatSetValues_SeqBDiag_1(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
 12: {
 13:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
 14:   int          kk,ldiag,row,newnz,*bdlen_new;
 15:   int          j,k, *diag_new,ierr;
 16:   PetscTruth   roworiented = a->roworiented,dfound;
 17:   PetscScalar  value,**diagv_new;

 20:   for (kk=0; kk<m; kk++) { /* loop over added rows */
 21:     row = im[kk];
 22:     if (row < 0) continue;
 23:     if (row >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 24:     for (j=0; j<n; j++) {
 25:       if (in[j] < 0) continue;
 26:       if (in[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 27:       ldiag  = row - in[j]; /* diagonal number */
 28:       dfound = PETSC_FALSE;
 29:       if (roworiented) {
 30:         value = v[j + kk*n];
 31:       } else {
 32:         value = v[kk + j*m];
 33:       }
 34:       /* search diagonals for required one */
 35:       for (k=0; k<a->nd; k++) {
 36:         if (a->diag[k] == ldiag) {
 37:           dfound = PETSC_TRUE;
 38:           if (is == ADD_VALUES) a->diagv[k][row] += value;
 39:           else                  a->diagv[k][row]  = value;
 40:           break;
 41:         }
 42:       }
 43:       if (!dfound) {
 44:         if (a->nonew || a->nonew_diag) {
 45: #if !defined(PETSC_USE_COMPLEX)
 46:           if (a->user_alloc && value) {
 47: #else
 48:           if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
 49: #endif
 50:             PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocaten",ldiag);
 51:           }
 52:         } else {
 53:           PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %dn",ldiag);
 54:           a->reallocs++;
 55:           /* free old bdiag storage info and reallocate */
 56:           ierr      = PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
 57:           bdlen_new = diag_new + a->nd + 1;
 58:           ierr      = PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
 59:           for (k=0; k<a->nd; k++) {
 60:             diag_new[k]  = a->diag[k];
 61:             diagv_new[k] = a->diagv[k];
 62:             bdlen_new[k] = a->bdlen[k];
 63:           }
 64:           diag_new[a->nd]  = ldiag;
 65:           if (ldiag > 0) { /* lower triangular */
 66:             bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
 67:           } else {         /* upper triangular */
 68:             bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
 69:           }
 70:           newnz = bdlen_new[a->nd];
 71:           PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
 72:           PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
 73:           /* adjust pointers so that dv[diag][row] works for all diagonals*/
 74:           if (diag_new[a->nd] > 0) {
 75:             diagv_new[a->nd] -= diag_new[a->nd];
 76:           }
 77:           a->maxnz += newnz;
 78:           a->nz    += newnz;
 79:           PetscFree(a->diagv);
 80:           PetscFree(a->diag);
 81:           a->diag  = diag_new;
 82:           a->bdlen = bdlen_new;
 83:           a->diagv = diagv_new;

 85:           /* Insert value */
 86:           if (is == ADD_VALUES) a->diagv[a->nd][row] += value;
 87:           else                  a->diagv[a->nd][row] = value;
 88:           a->nd++;
 89:           PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(int)+sizeof(PetscScalar*));
 90:         }
 91:       }
 92:     }
 93:   }
 94:   return(0);
 95: }


 98: #undef __FUNCT__  
100: int MatSetValues_SeqBDiag_N(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
101: {
102:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
103:   int          kk,ldiag,shift,row,newnz,*bdlen_new,ierr;
104:   int          j,k,bs = a->bs,*diag_new;
105:   PetscTruth   roworiented = a->roworiented,dfound;
106:   PetscScalar  value,**diagv_new;

109:   for (kk=0; kk<m; kk++) { /* loop over added rows */
110:     row = im[kk];
111:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
112:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
113:     shift = (row/bs)*bs*bs + row%bs;
114:     for (j=0; j<n; j++) {
115:       ldiag  = row/bs - in[j]/bs; /* block diagonal */
116:       dfound = PETSC_FALSE;
117:       if (roworiented) {
118:         value = *v++;
119:       } else {
120:         value = v[kk + j*m];
121:       }
122:       /* seach for appropriate diagonal */
123:       for (k=0; k<a->nd; k++) {
124:         if (a->diag[k] == ldiag) {
125:           dfound = PETSC_TRUE;
126:           if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
127:           else                  a->diagv[k][shift + (in[j]%bs)*bs] = value;
128:           break;
129:         }
130:       }
131:       if (!dfound) {
132:         if (a->nonew || a->nonew_diag) {
133: #if !defined(PETSC_USE_COMPLEX)
134:           if (a->user_alloc && value) {
135: #else
136:           if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
137: #endif
138:             PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocaten",ldiag);
139:           }
140:         } else {
141:           PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %dn",ldiag);
142:           a->reallocs++;
143:           /* free old bdiag storage info and reallocate */
144:           ierr      = PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
145:           bdlen_new = diag_new + a->nd + 1;
146:           ierr      = PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
147:           for (k=0; k<a->nd; k++) {
148:             diag_new[k]  = a->diag[k];
149:             diagv_new[k] = a->diagv[k];
150:             bdlen_new[k] = a->bdlen[k];
151:           }
152:           diag_new[a->nd]  = ldiag;
153:           if (ldiag > 0) {/* lower triangular */
154:             bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
155:           } else {         /* upper triangular */
156:             bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
157:           }
158:           newnz = bs*bs*bdlen_new[a->nd];
159:           PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
160:           PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
161:           /* adjust pointer so that dv[diag][row] works for all diagonals */
162:           if (diag_new[a->nd] > 0) {
163:             diagv_new[a->nd] -= bs*bs*diag_new[a->nd];
164:           }
165:           a->maxnz += newnz; a->nz += newnz;
166:           PetscFree(a->diagv);
167:           PetscFree(a->diag);
168:           a->diag  = diag_new;
169:           a->bdlen = bdlen_new;
170:           a->diagv = diagv_new;

172:           /* Insert value */
173:           if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
174:           else                  a->diagv[k][shift + (in[j]%bs)*bs] = value;
175:           a->nd++;
176:           PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(int)+sizeof(PetscScalar*));
177:         }
178:       }
179:     }
180:   }
181:   return(0);
182: }

184: #undef __FUNCT__  
186: int MatGetValues_SeqBDiag_1(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
187: {
188:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
189:   int          kk,ldiag,row,j,k;
190:   PetscScalar  zero = 0.0;
191:   PetscTruth   dfound;

194:   for (kk=0; kk<m; kk++) { /* loop over rows */
195:     row = im[kk];
196:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
197:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
198:     for (j=0; j<n; j++) {
199:       if (in[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
200:       if (in[j] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
201:       ldiag = row - in[j]; /* diagonal number */
202:       dfound = PETSC_FALSE;
203:       for (k=0; k<a->nd; k++) {
204:         if (a->diag[k] == ldiag) {
205:           dfound = PETSC_TRUE;
206:           *v++ = a->diagv[k][row];
207:           break;
208:         }
209:       }
210:       if (!dfound) *v++ = zero;
211:     }
212:   }
213:   return(0);
214: }

216: #undef __FUNCT__  
218: int MatGetValues_SeqBDiag_N(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
219: {
220:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
221:   int          kk,ldiag,shift,row,j,k,bs = a->bs;
222:   PetscScalar  zero = 0.0;
223:   PetscTruth   dfound;

226:   for (kk=0; kk<m; kk++) { /* loop over rows */
227:     row = im[kk];
228:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
229:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
230:     shift = (row/bs)*bs*bs + row%bs;
231:     for (j=0; j<n; j++) {
232:       ldiag  = row/bs - in[j]/bs; /* block diagonal */
233:       dfound = PETSC_FALSE;
234:       for (k=0; k<a->nd; k++) {
235:         if (a->diag[k] == ldiag) {
236:           dfound = PETSC_TRUE;
237:           *v++ = a->diagv[k][shift + (in[j]%bs)*bs ];
238:           break;
239:         }
240:       }
241:       if (!dfound) *v++ = zero;
242:     }
243:   }
244:   return(0);
245: }

247: /*
248:     MatMults for blocksize 1 to 5 and N -------------------------------
249:  */
250: #undef __FUNCT__  
252: int MatMult_SeqBDiag_1(Mat A,Vec xx,Vec yy)
253: {
254:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
255:   int             nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen;
256:   int             ierr,d,j,len;
257:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
258:   PetscScalar     *pvin,*pvout,*dv;

261:   VecGetArray(xx,&vin);
262:   VecGetArray(yy,&vout);
263:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
264:   for (d=0; d<nd; d++) {
265:     dv   = a_diagv[d];
266:     diag = a_diag[d];
267:     len  = a_bdlen[d];
268:     if (diag > 0) {             /* lower triangle */
269:       pvin  = vin;
270:       pvout = vout + diag;
271:       dv    = dv   + diag;
272:     } else {                     /* upper triangle,including main diagonal */
273:       pvin  = vin - diag;
274:       pvout = vout;
275:     }
276:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
277:     PetscLogFlops(2*len);
278:   }
279:   VecRestoreArray(xx,&vin);
280:   VecRestoreArray(yy,&vout);
281:   return(0);
282: }

284: #undef __FUNCT__  
286: int MatMult_SeqBDiag_2(Mat A,Vec xx,Vec yy)
287: {
288:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
289:   int             nd = a->nd,nb_diag;
290:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
291:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
292:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1;

295:   VecGetArray(xx,&vin);
296:   VecGetArray(yy,&vout);
297:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
298:   for (d=0; d<nd; d++) {
299:     dv      = a_diagv[d];
300:     nb_diag = 2*a_diag[d];
301:     len     = a_bdlen[d];
302:     if (nb_diag > 0) {                /* lower triangle */
303:       pvin  = vin;
304:       pvout = vout + nb_diag;
305:       dv    = dv   + 2*nb_diag;
306:     } else {                       /* upper triangle, including main diagonal */
307:       pvin  = vin - nb_diag;
308:       pvout = vout;
309:     }
310:     for (k=0; k<len; k++) {
311:       pvin0     = pvin[0]; pvin1 = pvin[1];

313:       pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
314:       pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;

316:       pvout += 2; pvin += 2; dv += 4;
317:     }
318:     PetscLogFlops(8*len);
319:   }
320:   VecRestoreArray(xx,&vin);
321:   VecRestoreArray(yy,&vout);
322:   return(0);
323: }

325: #undef __FUNCT__  
327: int MatMult_SeqBDiag_3(Mat A,Vec xx,Vec yy)
328: {
329:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
330:   int             nd = a->nd,nb_diag;
331:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
332:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
333:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2;

336:   VecGetArray(xx,&vin);
337:   VecGetArray(yy,&vout);
338:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
339:   for (d=0; d<nd; d++) {
340:     dv      = a_diagv[d];
341:     nb_diag = 3*a_diag[d];
342:     len     = a_bdlen[d];
343:     if (nb_diag > 0) {                /* lower triangle */
344:       pvin  = vin;
345:       pvout = vout + nb_diag;
346:       dv    = dv   + 3*nb_diag;
347:     } else {                       /* upper triangle,including main diagonal */
348:       pvin  = vin - nb_diag;
349:       pvout = vout;
350:     }
351:     for (k=0; k<len; k++) {
352:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];

354:       pvout[0] += dv[0]*pvin0 + dv[3]*pvin1  + dv[6]*pvin2;
355:       pvout[1] += dv[1]*pvin0 + dv[4]*pvin1  + dv[7]*pvin2;
356:       pvout[2] += dv[2]*pvin0 + dv[5]*pvin1  + dv[8]*pvin2;

358:       pvout += 3; pvin += 3; dv += 9;
359:     }
360:     PetscLogFlops(18*len);
361:   }
362:   VecRestoreArray(xx,&vin);
363:   VecRestoreArray(yy,&vout);
364:   return(0);
365: }

367: #undef __FUNCT__  
369: int MatMult_SeqBDiag_4(Mat A,Vec xx,Vec yy)
370: {
371:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
372:   int             nd = a->nd,nb_diag;
373:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
374:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
375:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;

378:   VecGetArray(xx,&vin);
379:   VecGetArray(yy,&vout);
380:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
381:   for (d=0; d<nd; d++) {
382:     dv      = a_diagv[d];
383:     nb_diag = 4*a_diag[d];
384:     len     = a_bdlen[d];
385:     if (nb_diag > 0) {                /* lower triangle */
386:       pvin  = vin;
387:       pvout = vout + nb_diag;
388:       dv    = dv   + 4*nb_diag;
389:     } else {                       /* upper triangle,including main diagonal */
390:       pvin  = vin - nb_diag;
391:       pvout = vout;
392:     }
393:     for (k=0; k<len; k++) {
394:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];

396:       pvout[0] += dv[0]*pvin0 + dv[4]*pvin1  + dv[8]*pvin2 + dv[12]*pvin3;
397:       pvout[1] += dv[1]*pvin0 + dv[5]*pvin1  + dv[9]*pvin2 + dv[13]*pvin3;
398:       pvout[2] += dv[2]*pvin0 + dv[6]*pvin1  + dv[10]*pvin2 + dv[14]*pvin3;
399:       pvout[3] += dv[3]*pvin0 + dv[7]*pvin1  + dv[11]*pvin2 + dv[15]*pvin3;

401:       pvout += 4; pvin += 4; dv += 16;
402:     }
403:     PetscLogFlops(32*len);
404:   }
405:   VecRestoreArray(xx,&vin);
406:   VecRestoreArray(yy,&vout);
407:   return(0);
408: }

410: #undef __FUNCT__  
412: int MatMult_SeqBDiag_5(Mat A,Vec xx,Vec yy)
413: {
414:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
415:   int             nd = a->nd,nb_diag;
416:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
417:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
418:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;

421:   VecGetArray(xx,&vin);
422:   VecGetArray(yy,&vout);
423:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
424:   for (d=0; d<nd; d++) {
425:     dv      = a_diagv[d];
426:     nb_diag = 5*a_diag[d];
427:     len     = a_bdlen[d];
428:     if (nb_diag > 0) {                /* lower triangle */
429:       pvin  = vin;
430:       pvout = vout + nb_diag;
431:       dv    = dv   + 5*nb_diag;
432:     } else {                       /* upper triangle,including main diagonal */
433:       pvin  = vin - nb_diag;
434:       pvout = vout;
435:     }
436:     for (k=0; k<len; k++) {
437:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];

439:       pvout[0] += dv[0]*pvin0 + dv[5]*pvin1  + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
440:       pvout[1] += dv[1]*pvin0 + dv[6]*pvin1  + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
441:       pvout[2] += dv[2]*pvin0 + dv[7]*pvin1  + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
442:       pvout[3] += dv[3]*pvin0 + dv[8]*pvin1  + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
443:       pvout[4] += dv[4]*pvin0 + dv[9]*pvin1  + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;

445:       pvout += 5; pvin += 5; dv += 25;
446:     }
447:     PetscLogFlops(50*len);
448:   }
449:   VecRestoreArray(xx,&vin);
450:   VecRestoreArray(yy,&vout);
451:   return(0);
452: }

454: #undef __FUNCT__  
456: int MatMult_SeqBDiag_N(Mat A,Vec xx,Vec yy)
457: {
458:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
459:   int             nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
460:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
461:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
462:   PetscScalar     *pvin,*pvout,*dv;

465:   VecGetArray(xx,&vin);
466:   VecGetArray(yy,&vout);
467:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
468:   for (d=0; d<nd; d++) {
469:     dv      = a_diagv[d];
470:     nb_diag = bs*a_diag[d];
471:     len     = a_bdlen[d];
472:     if (nb_diag > 0) {                /* lower triangle */
473:       pvin  = vin;
474:       pvout = vout + nb_diag;
475:       dv    = dv   + bs*nb_diag;
476:     } else {                       /* upper triangle, including main diagonal */
477:       pvin  = vin - nb_diag;
478:       pvout = vout;
479:     }
480:     for (k=0; k<len; k++) {
481:       Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
482:       pvout += bs; pvin += bs; dv += bs2;
483:     }
484:     PetscLogFlops(2*bs2*len);
485:   }
486:   VecRestoreArray(xx,&vin);
487:   VecRestoreArray(yy,&vout);
488:   return(0);
489: }

491: /*
492:     MatMultAdds for blocksize 1 to 5 and N -------------------------------
493:  */
494: #undef __FUNCT__  
496: int MatMultAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
497: {
498:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
499:   int             ierr,nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen,d,j,len;
500:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
501:   PetscScalar     *pvin,*pvout,*dv;

504:   if (zz != yy) {VecCopy(zz,yy);}
505:   VecGetArray(xx,&vin);
506:   VecGetArray(yy,&vout);
507:   for (d=0; d<nd; d++) {
508:     dv   = a_diagv[d];
509:     diag = a_diag[d];
510:     len  = a_bdlen[d];
511:     if (diag > 0) {             /* lower triangle */
512:       pvin  = vin;
513:       pvout = vout + diag;
514:       dv    = dv   + diag;
515:     } else {                     /* upper triangle, including main diagonal */
516:       pvin  = vin - diag;
517:       pvout = vout;
518:     }
519:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
520:     PetscLogFlops(2*len);
521:   }
522:   VecRestoreArray(xx,&vin);
523:   VecRestoreArray(yy,&vout);
524:   return(0);
525: }

527: #undef __FUNCT__  
529: int MatMultAdd_SeqBDiag_2(Mat A,Vec xx,Vec zz,Vec yy)
530: {
531:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
532:   int             ierr,nd = a->nd,nb_diag;
533:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
534:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
535:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1;

538:   if (zz != yy) {VecCopy(zz,yy);}
539:   VecGetArray(xx,&vin);
540:   VecGetArray(yy,&vout);
541:   for (d=0; d<nd; d++) {
542:     dv      = a_diagv[d];
543:     nb_diag = 2*a_diag[d];
544:     len     = a_bdlen[d];
545:     if (nb_diag > 0) {                /* lower triangle */
546:       pvin  = vin;
547:       pvout = vout + nb_diag;
548:       dv    = dv   + 2*nb_diag;
549:     } else {                       /* upper triangle, including main diagonal */
550:       pvin  = vin - nb_diag;
551:       pvout = vout;
552:     }
553:     for (k=0; k<len; k++) {
554:       pvin0 = pvin[0]; pvin1 = pvin[1];

556:       pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
557:       pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;

559:       pvout += 2; pvin += 2; dv += 4;
560:     }
561:     PetscLogFlops(8*len);
562:   }
563:   VecRestoreArray(xx,&vin);
564:   VecRestoreArray(yy,&vout);
565:   return(0);
566: }

568: #undef __FUNCT__  
570: int MatMultAdd_SeqBDiag_3(Mat A,Vec xx,Vec zz,Vec yy)
571: {
572:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
573:   int             ierr,nd = a->nd,nb_diag;
574:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
575:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
576:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2;

579:   if (zz != yy) {VecCopy(zz,yy);}
580:   VecGetArray(xx,&vin);
581:   VecGetArray(yy,&vout);
582:   for (d=0; d<nd; d++) {
583:     dv      = a_diagv[d];
584:     nb_diag = 3*a_diag[d];
585:     len     = a_bdlen[d];
586:     if (nb_diag > 0) {                /* lower triangle */
587:       pvin  = vin;
588:       pvout = vout + nb_diag;
589:       dv    = dv   + 3*nb_diag;
590:     } else {                       /* upper triangle, including main diagonal */
591:       pvin  = vin - nb_diag;
592:       pvout = vout;
593:     }
594:     for (k=0; k<len; k++) {
595:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];

597:       pvout[0] += dv[0]*pvin0 + dv[3]*pvin1  + dv[6]*pvin2;
598:       pvout[1] += dv[1]*pvin0 + dv[4]*pvin1  + dv[7]*pvin2;
599:       pvout[2] += dv[2]*pvin0 + dv[5]*pvin1  + dv[8]*pvin2;

601:       pvout += 3; pvin += 3; dv += 9;
602:     }
603:     PetscLogFlops(18*len);
604:   }
605:   VecRestoreArray(xx,&vin);
606:   VecRestoreArray(yy,&vout);
607:   return(0);
608: }

610: #undef __FUNCT__  
612: int MatMultAdd_SeqBDiag_4(Mat A,Vec xx,Vec zz,Vec yy)
613: {
614:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
615:   int             ierr,nd = a->nd,nb_diag;
616:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
617:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
618:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;

621:   if (zz != yy) {VecCopy(zz,yy);}
622:   VecGetArray(xx,&vin);
623:   VecGetArray(yy,&vout);
624:   for (d=0; d<nd; d++) {
625:     dv      = a_diagv[d];
626:     nb_diag = 4*a_diag[d];
627:     len     = a_bdlen[d];
628:     if (nb_diag > 0) {                /* lower triangle */
629:       pvin  = vin;
630:       pvout = vout + nb_diag;
631:       dv    = dv   + 4*nb_diag;
632:     } else {                       /* upper triangle, including main diagonal */
633:       pvin  = vin - nb_diag;
634:       pvout = vout;
635:     }
636:     for (k=0; k<len; k++) {
637:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];

639:       pvout[0] += dv[0]*pvin0 + dv[4]*pvin1  + dv[8]*pvin2 + dv[12]*pvin3;
640:       pvout[1] += dv[1]*pvin0 + dv[5]*pvin1  + dv[9]*pvin2 + dv[13]*pvin3;
641:       pvout[2] += dv[2]*pvin0 + dv[6]*pvin1  + dv[10]*pvin2 + dv[14]*pvin3;
642:       pvout[3] += dv[3]*pvin0 + dv[7]*pvin1  + dv[11]*pvin2 + dv[15]*pvin3;

644:       pvout += 4; pvin += 4; dv += 16;
645:     }
646:     PetscLogFlops(32*len);
647:   }
648:   VecRestoreArray(xx,&vin);
649:   VecRestoreArray(yy,&vout);
650:   return(0);
651: }

653: #undef __FUNCT__  
655: int MatMultAdd_SeqBDiag_5(Mat A,Vec xx,Vec zz,Vec yy)
656: {
657:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
658:   int             ierr,nd = a->nd,nb_diag;
659:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
660:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
661:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;

664:   if (zz != yy) {VecCopy(zz,yy);}
665:   VecGetArray(xx,&vin);
666:   VecGetArray(yy,&vout);
667:   for (d=0; d<nd; d++) {
668:     dv      = a_diagv[d];
669:     nb_diag = 5*a_diag[d];
670:     len     = a_bdlen[d];
671:     if (nb_diag > 0) {                /* lower triangle */
672:       pvin  = vin;
673:       pvout = vout + nb_diag;
674:       dv    = dv   + 5*nb_diag;
675:     } else {                       /* upper triangle, including main diagonal */
676:       pvin  = vin - nb_diag;
677:       pvout = vout;
678:     }
679:     for (k=0; k<len; k++) {
680:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];

682:       pvout[0] += dv[0]*pvin0 + dv[5]*pvin1  + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
683:       pvout[1] += dv[1]*pvin0 + dv[6]*pvin1  + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
684:       pvout[2] += dv[2]*pvin0 + dv[7]*pvin1  + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
685:       pvout[3] += dv[3]*pvin0 + dv[8]*pvin1  + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
686:       pvout[4] += dv[4]*pvin0 + dv[9]*pvin1  + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;

688:       pvout += 5; pvin += 5; dv += 25;
689:     }
690:     PetscLogFlops(50*len);
691:   }
692:   VecRestoreArray(xx,&vin);
693:   VecRestoreArray(yy,&vout);
694:   return(0);
695: }

697: #undef __FUNCT__  
699: int MatMultAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
700: {
701:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
702:   int             ierr,nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
703:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
704:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
705:   PetscScalar     *pvin,*pvout,*dv;

708:   if (zz != yy) {VecCopy(zz,yy);}
709:   VecGetArray(xx,&vin);
710:   VecGetArray(yy,&vout);
711:   for (d=0; d<nd; d++) {
712:     dv      = a_diagv[d];
713:     nb_diag = bs*a_diag[d];
714:     len     = a_bdlen[d];
715:     if (nb_diag > 0) {                /* lower triangle */
716:       pvin  = vin;
717:       pvout = vout + nb_diag;
718:       dv    = dv   + bs*nb_diag;
719:     } else {                       /* upper triangle, including main diagonal */
720:       pvin  = vin - nb_diag;
721:       pvout = vout;
722:     }
723:     for (k=0; k<len; k++) {
724:       Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
725:       pvout += bs; pvin += bs; dv += bs2;
726:     }
727:     PetscLogFlops(2*bs2*len);
728:   }
729:   VecRestoreArray(xx,&vin);
730:   VecRestoreArray(yy,&vout);
731:   return(0);
732: }

734: /*
735:      MatMultTranspose ----------------------------------------------
736:  */
737: #undef __FUNCT__  
739: int MatMultTranspose_SeqBDiag_1(Mat A,Vec xx,Vec yy)
740: {
741:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
742:   int             ierr,nd = a->nd,diag,d,j,len;
743:   PetscScalar     *pvin,*pvout,*dv;
744:   PetscScalar     *vin,*vout;
745: 
747:   VecGetArray(xx,&vin);
748:   VecGetArray(yy,&vout);
749:   PetscMemzero(vout,A->n*sizeof(PetscScalar));
750:   for (d=0; d<nd; d++) {
751:     dv   = a->diagv[d];
752:     diag = a->diag[d];
753:     len  = a->bdlen[d];
754:       /* diag of original matrix is (row/bs - col/bs) */
755:       /* diag of transpose matrix is (col/bs - row/bs) */
756:     if (diag < 0) {        /* transpose is lower triangle */
757:       pvin  = vin;
758:       pvout = vout - diag;
759:     } else {        /* transpose is upper triangle, including main diagonal */
760:       pvin  = vin + diag;
761:       pvout = vout;
762:       dv    = dv + diag;
763:     }
764:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
765:   }
766:   VecRestoreArray(xx,&vin);
767:   VecRestoreArray(yy,&vout);
768:   return(0);
769: }

771: #undef __FUNCT__  
773: int MatMultTranspose_SeqBDiag_N(Mat A,Vec xx,Vec yy)
774: {
775:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
776:   int             ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
777:   PetscScalar     *pvin,*pvout,*dv;
778:   PetscScalar     *vin,*vout;
779: 
781:   VecGetArray(xx,&vin);
782:   VecGetArray(yy,&vout);
783:   PetscMemzero(vout,A->n*sizeof(PetscScalar));
784:   for (d=0; d<nd; d++) {
785:     dv   = a->diagv[d];
786:     diag = a->diag[d];
787:     len  = a->bdlen[d];
788:       /* diag of original matrix is (row/bs - col/bs) */
789:       /* diag of transpose matrix is (col/bs - row/bs) */
790:     if (diag < 0) {        /* transpose is lower triangle */
791:       pvin  = vin;
792:       pvout = vout - bs*diag;
793:     } else {        /* transpose is upper triangle, including main diagonal */
794:       pvin  = vin + bs*diag;
795:       pvout = vout;
796:       dv    = dv + diag;
797:     }
798:     for (k=0; k<len; k++) {
799:       kloc = k*bs; kshift = kloc*bs;
800:       for (i=0; i<bs; i++) {         /* i = local column of transpose */
801:         for (j=0; j<bs; j++) {   /* j = local row of transpose */
802:           pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
803:         }
804:       }
805:     }
806:   }
807:   VecRestoreArray(xx,&vin);
808:   VecRestoreArray(yy,&vout);
809:   return(0);
810: }

812: /*
813:      MatMultTransposeAdd ----------------------------------------------
814:  */
815: #undef __FUNCT__  
817: int MatMultTransposeAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
818: {
819:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
820:   int             ierr,nd = a->nd,diag,d,j,len;
821:   PetscScalar     *pvin,*pvout,*dv;
822:   PetscScalar     *vin,*vout;
823: 
825:   if (zz != yy) {VecCopy(zz,yy);}
826:   VecGetArray(xx,&vin);
827:   VecGetArray(yy,&vout);
828:   for (d=0; d<nd; d++) {
829:     dv   = a->diagv[d];
830:     diag = a->diag[d];
831:     len  = a->bdlen[d];
832:       /* diag of original matrix is (row/bs - col/bs) */
833:       /* diag of transpose matrix is (col/bs - row/bs) */
834:     if (diag < 0) {        /* transpose is lower triangle */
835:       pvin  = vin;
836:       pvout = vout - diag;
837:     } else {        /* transpose is upper triangle, including main diagonal */
838:       pvin  = vin + diag;
839:       pvout = vout;
840:       dv    = dv + diag;
841:     }
842:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
843:   }
844:   VecRestoreArray(xx,&vin);
845:   VecRestoreArray(yy,&vout);
846:   return(0);
847: }

849: #undef __FUNCT__  
851: int MatMultTransposeAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
852: {
853:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
854:   int             ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
855:   PetscScalar     *pvin,*pvout,*dv;
856:   PetscScalar     *vin,*vout;
857: 
859:   if (zz != yy) {VecCopy(zz,yy);}
860:   VecGetArray(xx,&vin);
861:   VecGetArray(yy,&vout);
862:   for (d=0; d<nd; d++) {
863:     dv   = a->diagv[d];
864:     diag = a->diag[d];
865:     len  = a->bdlen[d];
866:       /* diag of original matrix is (row/bs - col/bs) */
867:       /* diag of transpose matrix is (col/bs - row/bs) */
868:     if (diag < 0) {        /* transpose is lower triangle */
869:       pvin  = vin;
870:       pvout = vout - bs*diag;
871:     } else {        /* transpose is upper triangle, including main diagonal */
872:       pvin  = vin + bs*diag;
873:       pvout = vout;
874:       dv    = dv + diag;
875:     }
876:     for (k=0; k<len; k++) {
877:       kloc = k*bs; kshift = kloc*bs;
878:       for (i=0; i<bs; i++) {         /* i = local column of transpose */
879:         for (j=0; j<bs; j++) {   /* j = local row of transpose */
880:           pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
881:         }
882:       }
883:     }
884:   }
885:   VecRestoreArray(xx,&vin);
886:   VecRestoreArray(yy,&vout);
887:   return(0);
888: }
889: #undef __FUNCT__  
891: int MatRelax_SeqBDiag_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,int its,int lits,Vec xx)
892: {
893:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
894:   PetscScalar  *x,*b,*xb,*dd,*dv,dval,sum;
895:   int          ierr,i,j,k,d,kbase,bs = a->bs,kloc;
896:   int          mainbd = a->mainbd,diag,mblock = a->mblock,bloc;

899:   its = its*lits;
900:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);

902:   /* Currently this code doesn't use wavefront orderings, although
903:      we should eventually incorporate that option, whatever wavefront
904:      ordering maybe :-) */

906:   if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");

908:   VecGetArray(xx,&x);
909:   if (xx != bb) {
910:     VecGetArray(bb,&b);
911:   } else {
912:     b = x;
913:   }
914:   dd = a->diagv[mainbd];
915:   if (flag == SOR_APPLY_UPPER) {
916:     /* apply (U + D/omega) to the vector */
917:     for (k=0; k<mblock; k++) {
918:       kloc = k*bs; kbase = kloc*bs;
919:       for (i=0; i<bs; i++) {
920:         sum = b[i+kloc] * (shift + dd[i*(bs+1)+kbase]) / omega;
921:         for (j=i+1; j<bs; j++) sum += dd[kbase + j*bs + i] * b[kloc + j];
922:         for (d=mainbd+1; d<a->nd; d++) {
923:           diag = a->diag[d];
924:           dv   = a->diagv[d];
925:           if (k-diag < mblock) {
926:             for (j=0; j<bs; j++) {
927:               sum += dv[kbase + j*bs + i] * b[(k-diag)*bs + j];
928:             }
929:           }
930:         }
931:         x[kloc+i] = sum;
932:       }
933:     }
934:     VecRestoreArray(xx,&x);
935:     if (xx != bb) {VecRestoreArray(bb,&b);}
936:     return(0);
937:   }
938:   if (flag & SOR_ZERO_INITIAL_GUESS) {
939:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
940:       for (k=0; k<mblock; k++) {
941:         kloc = k*bs; kbase = kloc*bs;
942:         for (i=0; i<bs; i++) {
943:           sum  = b[i+kloc];
944:           dval = shift + dd[i*(bs+1)+kbase];
945:           for (d=0; d<mainbd; d++) {
946:             diag = a->diag[d];
947:             dv   = a->diagv[d];
948:             if (k >= diag) {
949:               for (j=0; j<bs; j++)
950:                 sum -= dv[k*bs*bs + j*bs + i] * x[(k-diag)*bs + j];
951:             }
952:           }
953:           for (j=0; j<i; j++){
954:             sum -= dd[kbase + j*bs + i] * x[kloc + j];
955:           }
956:           x[kloc+i] = omega*sum/dval;
957:         }
958:       }
959:       xb = x;
960:     } else xb = b;
961:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
962:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
963:       for (k=0; k<mblock; k++) {
964:         kloc = k*bs; kbase = kloc*bs;
965:         for (i=0; i<bs; i++)
966:           x[kloc+i] *= dd[i*(bs+1)+kbase];
967:       }
968:     }
969:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
970:       for (k=mblock-1; k>=0; k--) {
971:         kloc = k*bs; kbase = kloc*bs;
972:         for (i=bs-1; i>=0; i--) {
973:           sum  = xb[i+kloc];
974:           dval = shift + dd[i*(bs+1)+kbase];
975:           for (j=i+1; j<bs; j++)
976:             sum -= dd[kbase + j*bs + i] * x[kloc + j];
977:           for (d=mainbd+1; d<a->nd; d++) {
978:             diag = a->diag[d];
979:             dv   = a->diagv[d];
980:             bloc = k - diag;
981:             if (bloc < mblock) {
982:               for (j=0; j<bs; j++)
983:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
984:             }
985:           }
986:           x[kloc+i] = omega*sum/dval;
987:         }
988:       }
989:     }
990:     its--;
991:   }
992:   while (its--) {
993:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
994:       for (k=0; k<mblock; k++) {
995:         kloc = k*bs; kbase = kloc*bs;
996:         for (i=0; i<bs; i++) {
997:           sum  = b[i+kloc];
998:           dval = shift + dd[i*(bs+1)+kbase];
999:           for (d=0; d<mainbd; d++) {
1000:             diag = a->diag[d];
1001:             dv   = a->diagv[d];
1002:             bloc = k - diag;
1003:             if (bloc >= 0) {
1004:               for (j=0; j<bs; j++) {
1005:                 sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1006:               }
1007:             }
1008:           }
1009:           for (d=mainbd; d<a->nd; d++) {
1010:             diag = a->diag[d];
1011:             dv   = a->diagv[d];
1012:             bloc = k - diag;
1013:             if (bloc < mblock) {
1014:               for (j=0; j<bs; j++) {
1015:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1016:               }
1017:             }
1018:           }
1019:           x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1020:         }
1021:       }
1022:     }
1023:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1024:       for (k=mblock-1; k>=0; k--) {
1025:         kloc = k*bs; kbase = kloc*bs;
1026:         for (i=bs-1; i>=0; i--) {
1027:           sum  = b[i+kloc];
1028:           dval = shift + dd[i*(bs+1)+kbase];
1029:           for (d=0; d<mainbd; d++) {
1030:             diag = a->diag[d];
1031:             dv   = a->diagv[d];
1032:             bloc = k - diag;
1033:             if (bloc >= 0) {
1034:               for (j=0; j<bs; j++) {
1035:                 sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1036:               }
1037:             }
1038:           }
1039:           for (d=mainbd; d<a->nd; d++) {
1040:             diag = a->diag[d];
1041:             dv   = a->diagv[d];
1042:             bloc = k - diag;
1043:             if (bloc < mblock) {
1044:               for (j=0; j<bs; j++) {
1045:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1046:               }
1047:             }
1048:           }
1049:           x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1050:         }
1051:       }
1052:     }
1053:   }
1054:   VecRestoreArray(xx,&x);
1055:   if (xx != bb) VecRestoreArray(bb,&b);
1056:   return(0);
1057: }

1059: #undef __FUNCT__  
1061: int MatRelax_SeqBDiag_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,int its,int lits,Vec xx)
1062: {
1063:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
1064:   PetscScalar  *x,*b,*xb,*dd,dval,sum;
1065:   int          ierr,m = A->m,i,d,loc;
1066:   int          mainbd = a->mainbd,diag;

1069:   its = its*lits;
1070:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1071:   /* Currently this code doesn't use wavefront orderings,although
1072:      we should eventually incorporate that option, whatever wavefront
1073:      ordering maybe :-) */

1075:   VecGetArray(xx,&x);
1076:   VecGetArray(bb,&b);
1077:   if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
1078:   dd = a->diagv[mainbd];
1079:   if (flag == SOR_APPLY_UPPER) {
1080:     /* apply (U + D/omega) to the vector */
1081:     for (i=0; i<m; i++) {
1082:       sum = b[i] * (shift + dd[i]) / omega;
1083:       for (d=mainbd+1; d<a->nd; d++) {
1084:         diag = a->diag[d];
1085:         if (i-diag < m) sum += a->diagv[d][i] * x[i-diag];
1086:       }
1087:       x[i] = sum;
1088:     }
1089:     VecRestoreArray(xx,&x);
1090:     VecRestoreArray(bb,&b);
1091:     return(0);
1092:   }
1093:   if (flag & SOR_ZERO_INITIAL_GUESS) {
1094:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1095:       for (i=0; i<m; i++) {
1096:         sum  = b[i];
1097:         for (d=0; d<mainbd; d++) {
1098:           if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1099:         }
1100:         x[i] = omega*(sum/(shift + dd[i]));
1101:       }
1102:       xb = x;
1103:     } else xb = b;
1104:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1105:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1106:       for (i=0; i<m; i++) x[i] *= dd[i];
1107:     }
1108:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1109:       for (i=m-1; i>=0; i--) {
1110:         sum = xb[i];
1111:         for (d=mainbd+1; d<a->nd; d++) {
1112:           diag = a->diag[d];
1113:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1114:         }
1115:         x[i] = omega*(sum/(shift + dd[i]));
1116:       }
1117:     }
1118:     its--;
1119:   }
1120:   while (its--) {
1121:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1122:       for (i=0; i<m; i++) {
1123:         sum  = b[i];
1124:         dval = shift + dd[i];
1125:         for (d=0; d<mainbd; d++) {
1126:           if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1127:         }
1128:         for (d=mainbd; d<a->nd; d++) {
1129:           diag = a->diag[d];
1130:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1131:         }
1132:         x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/dval;
1133:       }
1134:     }
1135:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1136:       for (i=m-1; i>=0; i--) {
1137:         sum = b[i];
1138:         for (d=0; d<mainbd; d++) {
1139:           loc = i - a->diag[d];
1140:           if (loc >= 0) sum -= a->diagv[d][i] * x[loc];
1141:         }
1142:         for (d=mainbd; d<a->nd; d++) {
1143:           diag = a->diag[d];
1144:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1145:         }
1146:         x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/(shift + dd[i]);
1147:       }
1148:     }
1149:   }
1150:   VecRestoreArray(xx,&x);
1151:   VecRestoreArray(bb,&b);
1152:   return(0);
1153: }