Actual source code: bdiag2.c

  1: /*$Id: bdiag2.c,v 1.19 2001/03/23 23:22:03 balay Exp $*/

  3: /* Block diagonal matrix format */

 5:  #include petscsys.h
 6:  #include src/mat/impls/bdiag/seq/bdiag.h
 7:  #include src/vec/vecimpl.h
 8:  #include src/inline/ilu.h

 10: int MatSetValues_SeqBDiag_1(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
 11: {
 12:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
 13:   int          kk,ldiag,row,newnz,*bdlen_new;
 14:   int          j,k, *diag_new,ierr;
 15:   PetscTruth   roworiented = a->roworiented,dfound;
 16:   Scalar       value,**diagv_new;

 19:   for (kk=0; kk<m; kk++) { /* loop over added rows */
 20:     row = im[kk];
 21:     if (row < 0) continue;
 22:     if (row >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 23:     for (j=0; j<n; j++) {
 24:       if (in[j] < 0) continue;
 25:       if (in[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 26:       ldiag  = row - in[j]; /* diagonal number */
 27:       dfound = PETSC_FALSE;
 28:       if (roworiented) {
 29:         value = v[j + kk*n];
 30:       } else {
 31:         value = v[kk + j*m];
 32:       }
 33:       /* search diagonals for required one */
 34:       for (k=0; k<a->nd; k++) {
 35:         if (a->diag[k] == ldiag) {
 36:           dfound = PETSC_TRUE;
 37:           if (is == ADD_VALUES) a->diagv[k][row] += value;
 38:           else                  a->diagv[k][row]  = value;
 39:           break;
 40:         }
 41:       }
 42:       if (!dfound) {
 43:         if (a->nonew || a->nonew_diag) {
 44: #if !defined(PETSC_USE_COMPLEX)
 45:           if (a->user_alloc && value) {
 46: #else
 47:           if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
 48: #endif
 49:             PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocaten",ldiag);
 50:           }
 51:         } else {
 52:           PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %dn",ldiag);
 53:           a->reallocs++;
 54:           /* free old bdiag storage info and reallocate */
 55:           ierr      = PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
 56:           bdlen_new = diag_new + a->nd + 1;
 57:           ierr      = PetscMalloc((a->nd+1)*sizeof(Scalar*),&diagv_new);
 58:           for (k=0; k<a->nd; k++) {
 59:             diag_new[k]  = a->diag[k];
 60:             diagv_new[k] = a->diagv[k];
 61:             bdlen_new[k] = a->bdlen[k];
 62:           }
 63:           diag_new[a->nd]  = ldiag;
 64:           if (ldiag > 0) { /* lower triangular */
 65:             bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
 66:           } else {         /* upper triangular */
 67:             bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
 68:           }
 69:           newnz = bdlen_new[a->nd];
 70:           PetscMalloc(newnz*sizeof(Scalar),&diagv_new[a->nd]);
 71:           PetscMemzero(diagv_new[a->nd],newnz*sizeof(Scalar));
 72:           /* adjust pointers so that dv[diag][row] works for all diagonals*/
 73:           if (diag_new[a->nd] > 0) {
 74:             diagv_new[a->nd] -= diag_new[a->nd];
 75:           }
 76:           a->maxnz += newnz;
 77:           a->nz    += newnz;
 78:           PetscFree(a->diagv);
 79:           PetscFree(a->diag);
 80:           a->diag  = diag_new;
 81:           a->bdlen = bdlen_new;
 82:           a->diagv = diagv_new;

 84:           /* Insert value */
 85:           if (is == ADD_VALUES) a->diagv[a->nd][row] += value;
 86:           else                  a->diagv[a->nd][row] = value;
 87:           a->nd++;
 88:           PetscLogObjectMemory(A,newnz*sizeof(Scalar)+2*sizeof(int)+sizeof(Scalar*));
 89:         }
 90:       }
 91:     }
 92:   }
 93:   return(0);
 94: }


 97: int MatSetValues_SeqBDiag_N(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
 98: {
 99:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
100:   int          kk,ldiag,shift,row,newnz,*bdlen_new,ierr;
101:   int          j,k,bs = a->bs,*diag_new;
102:   PetscTruth   roworiented = a->roworiented,dfound;
103:   Scalar       value,**diagv_new;

106:   for (kk=0; kk<m; kk++) { /* loop over added rows */
107:     row = im[kk];
108:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
109:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
110:     shift = (row/bs)*bs*bs + row%bs;
111:     for (j=0; j<n; j++) {
112:       ldiag  = row/bs - in[j]/bs; /* block diagonal */
113:       dfound = PETSC_FALSE;
114:       if (roworiented) {
115:         value = *v++;
116:       } else {
117:         value = v[kk + j*m];
118:       }
119:       /* seach for appropriate diagonal */
120:       for (k=0; k<a->nd; k++) {
121:         if (a->diag[k] == ldiag) {
122:           dfound = PETSC_TRUE;
123:           if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
124:           else                  a->diagv[k][shift + (in[j]%bs)*bs] = value;
125:           break;
126:         }
127:       }
128:       if (!dfound) {
129:         if (a->nonew || a->nonew_diag) {
130: #if !defined(PETSC_USE_COMPLEX)
131:           if (a->user_alloc && value) {
132: #else
133:           if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
134: #endif
135:             PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocaten",ldiag);
136:           }
137:         } else {
138:           PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %dn",ldiag);
139:           a->reallocs++;
140:           /* free old bdiag storage info and reallocate */
141:           ierr      = PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
142:           bdlen_new = diag_new + a->nd + 1;
143:           ierr      = PetscMalloc((a->nd+1)*sizeof(Scalar*),&diagv_new);
144:           for (k=0; k<a->nd; k++) {
145:             diag_new[k]  = a->diag[k];
146:             diagv_new[k] = a->diagv[k];
147:             bdlen_new[k] = a->bdlen[k];
148:           }
149:           diag_new[a->nd]  = ldiag;
150:           if (ldiag > 0) {/* lower triangular */
151:             bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
152:           } else {         /* upper triangular */
153:             bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
154:           }
155:           newnz = bs*bs*bdlen_new[a->nd];
156:           PetscMalloc(newnz*sizeof(Scalar),&diagv_new[a->nd]);
157:           PetscMemzero(diagv_new[a->nd],newnz*sizeof(Scalar));
158:           /* adjust pointer so that dv[diag][row] works for all diagonals */
159:           if (diag_new[a->nd] > 0) {
160:             diagv_new[a->nd] -= bs*bs*diag_new[a->nd];
161:           }
162:           a->maxnz += newnz; a->nz += newnz;
163:           PetscFree(a->diagv);
164:           PetscFree(a->diag);
165:           a->diag  = diag_new;
166:           a->bdlen = bdlen_new;
167:           a->diagv = diagv_new;

169:           /* Insert value */
170:           if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
171:           else                  a->diagv[k][shift + (in[j]%bs)*bs] = value;
172:           a->nd++;
173:           PetscLogObjectMemory(A,newnz*sizeof(Scalar)+2*sizeof(int)+sizeof(Scalar*));
174:         }
175:       }
176:     }
177:   }
178:   return(0);
179: }

181: int MatGetValues_SeqBDiag_1(Mat A,int m,int *im,int n,int *in,Scalar *v)
182: {
183:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
184:   int          kk,ldiag,row,j,k;
185:   Scalar       zero = 0.0;
186:   PetscTruth   dfound;

189:   for (kk=0; kk<m; kk++) { /* loop over rows */
190:     row = im[kk];
191:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
192:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
193:     for (j=0; j<n; j++) {
194:       if (in[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
195:       if (in[j] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
196:       ldiag = row - in[j]; /* diagonal number */
197:       dfound = PETSC_FALSE;
198:       for (k=0; k<a->nd; k++) {
199:         if (a->diag[k] == ldiag) {
200:           dfound = PETSC_TRUE;
201:           *v++ = a->diagv[k][row];
202:           break;
203:         }
204:       }
205:       if (!dfound) *v++ = zero;
206:     }
207:   }
208:   return(0);
209: }

211: int MatGetValues_SeqBDiag_N(Mat A,int m,int *im,int n,int *in,Scalar *v)
212: {
213:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
214:   int          kk,ldiag,shift,row,j,k,bs = a->bs;
215:   Scalar       zero = 0.0;
216:   PetscTruth   dfound;

219:   for (kk=0; kk<m; kk++) { /* loop over rows */
220:     row = im[kk];
221:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
222:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
223:     shift = (row/bs)*bs*bs + row%bs;
224:     for (j=0; j<n; j++) {
225:       ldiag  = row/bs - in[j]/bs; /* block diagonal */
226:       dfound = PETSC_FALSE;
227:       for (k=0; k<a->nd; k++) {
228:         if (a->diag[k] == ldiag) {
229:           dfound = PETSC_TRUE;
230:           *v++ = a->diagv[k][shift + (in[j]%bs)*bs ];
231:           break;
232:         }
233:       }
234:       if (!dfound) *v++ = zero;
235:     }
236:   }
237:   return(0);
238: }

240: /*
241:     MatMults for blocksize 1 to 5 and N -------------------------------
242:  */
243: int MatMult_SeqBDiag_1(Mat A,Vec xx,Vec yy)
244: {
245:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
246:   int             nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen;
247:   int             ierr,d,j,len;
248:   Scalar          *vin,*vout,**a_diagv = a->diagv;
249:   Scalar          *pvin,*pvout,*dv;

252:   VecGetArray(xx,&vin);
253:   VecGetArray(yy,&vout);
254:   PetscMemzero(vout,A->m*sizeof(Scalar));
255:   for (d=0; d<nd; d++) {
256:     dv   = a_diagv[d];
257:     diag = a_diag[d];
258:     len  = a_bdlen[d];
259:     if (diag > 0) {             /* lower triangle */
260:       pvin  = vin;
261:       pvout = vout + diag;
262:       dv    = dv   + diag;
263:     } else {                     /* upper triangle,including main diagonal */
264:       pvin  = vin - diag;
265:       pvout = vout;
266:     }
267:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
268:     PetscLogFlops(2*len);
269:   }
270:   VecRestoreArray(xx,&vin);
271:   VecRestoreArray(yy,&vout);
272:   return(0);
273: }

275: int MatMult_SeqBDiag_2(Mat A,Vec xx,Vec yy)
276: {
277:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
278:   int             nd = a->nd,nb_diag;
279:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
280:   Scalar          *vin,*vout,**a_diagv = a->diagv;
281:   Scalar          *pvin,*pvout,*dv,pvin0,pvin1;

284:   VecGetArray(xx,&vin);
285:   VecGetArray(yy,&vout);
286:   PetscMemzero(vout,A->m*sizeof(Scalar));
287:   for (d=0; d<nd; d++) {
288:     dv      = a_diagv[d];
289:     nb_diag = 2*a_diag[d];
290:     len     = a_bdlen[d];
291:     if (nb_diag > 0) {                /* lower triangle */
292:       pvin  = vin;
293:       pvout = vout + nb_diag;
294:       dv    = dv   + 2*nb_diag;
295:     } else {                       /* upper triangle, including main diagonal */
296:       pvin  = vin - nb_diag;
297:       pvout = vout;
298:     }
299:     for (k=0; k<len; k++) {
300:       pvin0     = pvin[0]; pvin1 = pvin[1];

302:       pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
303:       pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;

305:       pvout += 2; pvin += 2; dv += 4;
306:     }
307:     PetscLogFlops(8*len);
308:   }
309:   VecRestoreArray(xx,&vin);
310:   VecRestoreArray(yy,&vout);
311:   return(0);
312: }

314: int MatMult_SeqBDiag_3(Mat A,Vec xx,Vec yy)
315: {
316:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
317:   int             nd = a->nd,nb_diag;
318:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
319:   Scalar          *vin,*vout,**a_diagv = a->diagv;
320:   Scalar          *pvin,*pvout,*dv,pvin0,pvin1,pvin2;

323:   VecGetArray(xx,&vin);
324:   VecGetArray(yy,&vout);
325:   PetscMemzero(vout,A->m*sizeof(Scalar));
326:   for (d=0; d<nd; d++) {
327:     dv      = a_diagv[d];
328:     nb_diag = 3*a_diag[d];
329:     len     = a_bdlen[d];
330:     if (nb_diag > 0) {                /* lower triangle */
331:       pvin  = vin;
332:       pvout = vout + nb_diag;
333:       dv    = dv   + 3*nb_diag;
334:     } else {                       /* upper triangle,including main diagonal */
335:       pvin  = vin - nb_diag;
336:       pvout = vout;
337:     }
338:     for (k=0; k<len; k++) {
339:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];

341:       pvout[0] += dv[0]*pvin0 + dv[3]*pvin1  + dv[6]*pvin2;
342:       pvout[1] += dv[1]*pvin0 + dv[4]*pvin1  + dv[7]*pvin2;
343:       pvout[2] += dv[2]*pvin0 + dv[5]*pvin1  + dv[8]*pvin2;

345:       pvout += 3; pvin += 3; dv += 9;
346:     }
347:     PetscLogFlops(18*len);
348:   }
349:   VecRestoreArray(xx,&vin);
350:   VecRestoreArray(yy,&vout);
351:   return(0);
352: }

354: int MatMult_SeqBDiag_4(Mat A,Vec xx,Vec yy)
355: {
356:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
357:   int             nd = a->nd,nb_diag;
358:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
359:   Scalar          *vin,*vout,**a_diagv = a->diagv;
360:   Scalar          *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;

363:   VecGetArray(xx,&vin);
364:   VecGetArray(yy,&vout);
365:   PetscMemzero(vout,A->m*sizeof(Scalar));
366:   for (d=0; d<nd; d++) {
367:     dv      = a_diagv[d];
368:     nb_diag = 4*a_diag[d];
369:     len     = a_bdlen[d];
370:     if (nb_diag > 0) {                /* lower triangle */
371:       pvin  = vin;
372:       pvout = vout + nb_diag;
373:       dv    = dv   + 4*nb_diag;
374:     } else {                       /* upper triangle,including main diagonal */
375:       pvin  = vin - nb_diag;
376:       pvout = vout;
377:     }
378:     for (k=0; k<len; k++) {
379:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];

381:       pvout[0] += dv[0]*pvin0 + dv[4]*pvin1  + dv[8]*pvin2 + dv[12]*pvin3;
382:       pvout[1] += dv[1]*pvin0 + dv[5]*pvin1  + dv[9]*pvin2 + dv[13]*pvin3;
383:       pvout[2] += dv[2]*pvin0 + dv[6]*pvin1  + dv[10]*pvin2 + dv[14]*pvin3;
384:       pvout[3] += dv[3]*pvin0 + dv[7]*pvin1  + dv[11]*pvin2 + dv[15]*pvin3;

386:       pvout += 4; pvin += 4; dv += 16;
387:     }
388:     PetscLogFlops(32*len);
389:   }
390:   VecRestoreArray(xx,&vin);
391:   VecRestoreArray(yy,&vout);
392:   return(0);
393: }

395: int MatMult_SeqBDiag_5(Mat A,Vec xx,Vec yy)
396: {
397:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
398:   int             nd = a->nd,nb_diag;
399:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
400:   Scalar          *vin,*vout,**a_diagv = a->diagv;
401:   Scalar          *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;

404:   VecGetArray(xx,&vin);
405:   VecGetArray(yy,&vout);
406:   PetscMemzero(vout,A->m*sizeof(Scalar));
407:   for (d=0; d<nd; d++) {
408:     dv      = a_diagv[d];
409:     nb_diag = 5*a_diag[d];
410:     len     = a_bdlen[d];
411:     if (nb_diag > 0) {                /* lower triangle */
412:       pvin  = vin;
413:       pvout = vout + nb_diag;
414:       dv    = dv   + 5*nb_diag;
415:     } else {                       /* upper triangle,including main diagonal */
416:       pvin  = vin - nb_diag;
417:       pvout = vout;
418:     }
419:     for (k=0; k<len; k++) {
420:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];

422:       pvout[0] += dv[0]*pvin0 + dv[5]*pvin1  + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
423:       pvout[1] += dv[1]*pvin0 + dv[6]*pvin1  + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
424:       pvout[2] += dv[2]*pvin0 + dv[7]*pvin1  + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
425:       pvout[3] += dv[3]*pvin0 + dv[8]*pvin1  + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
426:       pvout[4] += dv[4]*pvin0 + dv[9]*pvin1  + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;

428:       pvout += 5; pvin += 5; dv += 25;
429:     }
430:     PetscLogFlops(50*len);
431:   }
432:   VecRestoreArray(xx,&vin);
433:   VecRestoreArray(yy,&vout);
434:   return(0);
435: }

437: int MatMult_SeqBDiag_N(Mat A,Vec xx,Vec yy)
438: {
439:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
440:   int             nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
441:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
442:   Scalar          *vin,*vout,**a_diagv = a->diagv;
443:   Scalar          *pvin,*pvout,*dv;

446:   VecGetArray(xx,&vin);
447:   VecGetArray(yy,&vout);
448:   PetscMemzero(vout,A->m*sizeof(Scalar));
449:   for (d=0; d<nd; d++) {
450:     dv      = a_diagv[d];
451:     nb_diag = bs*a_diag[d];
452:     len     = a_bdlen[d];
453:     if (nb_diag > 0) {                /* lower triangle */
454:       pvin  = vin;
455:       pvout = vout + nb_diag;
456:       dv    = dv   + bs*nb_diag;
457:     } else {                       /* upper triangle, including main diagonal */
458:       pvin  = vin - nb_diag;
459:       pvout = vout;
460:     }
461:     for (k=0; k<len; k++) {
462:       Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
463:       pvout += bs; pvin += bs; dv += bs2;
464:     }
465:     PetscLogFlops(2*bs2*len);
466:   }
467:   VecRestoreArray(xx,&vin);
468:   VecRestoreArray(yy,&vout);
469:   return(0);
470: }

472: /*
473:     MatMultAdds for blocksize 1 to 5 and N -------------------------------
474:  */
475: int MatMultAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
476: {
477:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
478:   int             ierr,nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen,d,j,len;
479:   Scalar          *vin,*vout,**a_diagv = a->diagv;
480:   Scalar          *pvin,*pvout,*dv;

483:   if (zz != yy) {VecCopy(zz,yy);}
484:   VecGetArray(xx,&vin);
485:   VecGetArray(yy,&vout);
486:   for (d=0; d<nd; d++) {
487:     dv   = a_diagv[d];
488:     diag = a_diag[d];
489:     len  = a_bdlen[d];
490:     if (diag > 0) {             /* lower triangle */
491:       pvin  = vin;
492:       pvout = vout + diag;
493:       dv    = dv   + diag;
494:     } else {                     /* upper triangle, including main diagonal */
495:       pvin  = vin - diag;
496:       pvout = vout;
497:     }
498:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
499:     PetscLogFlops(2*len);
500:   }
501:   VecRestoreArray(xx,&vin);
502:   VecRestoreArray(yy,&vout);
503:   return(0);
504: }

506: int MatMultAdd_SeqBDiag_2(Mat A,Vec xx,Vec zz,Vec yy)
507: {
508:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
509:   int             ierr,nd = a->nd,nb_diag;
510:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
511:   Scalar          *vin,*vout,**a_diagv = a->diagv;
512:   Scalar          *pvin,*pvout,*dv,pvin0,pvin1;

515:   if (zz != yy) {VecCopy(zz,yy);}
516:   VecGetArray(xx,&vin);
517:   VecGetArray(yy,&vout);
518:   for (d=0; d<nd; d++) {
519:     dv      = a_diagv[d];
520:     nb_diag = 2*a_diag[d];
521:     len     = a_bdlen[d];
522:     if (nb_diag > 0) {                /* lower triangle */
523:       pvin  = vin;
524:       pvout = vout + nb_diag;
525:       dv    = dv   + 2*nb_diag;
526:     } else {                       /* upper triangle, including main diagonal */
527:       pvin  = vin - nb_diag;
528:       pvout = vout;
529:     }
530:     for (k=0; k<len; k++) {
531:       pvin0 = pvin[0]; pvin1 = pvin[1];

533:       pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
534:       pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;

536:       pvout += 2; pvin += 2; dv += 4;
537:     }
538:     PetscLogFlops(8*len);
539:   }
540:   VecRestoreArray(xx,&vin);
541:   VecRestoreArray(yy,&vout);
542:   return(0);
543: }

545: int MatMultAdd_SeqBDiag_3(Mat A,Vec xx,Vec zz,Vec yy)
546: {
547:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
548:   int             ierr,nd = a->nd,nb_diag;
549:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
550:   Scalar          *vin,*vout,**a_diagv = a->diagv;
551:   Scalar          *pvin,*pvout,*dv,pvin0,pvin1,pvin2;

554:   if (zz != yy) {VecCopy(zz,yy);}
555:   VecGetArray(xx,&vin);
556:   VecGetArray(yy,&vout);
557:   for (d=0; d<nd; d++) {
558:     dv      = a_diagv[d];
559:     nb_diag = 3*a_diag[d];
560:     len     = a_bdlen[d];
561:     if (nb_diag > 0) {                /* lower triangle */
562:       pvin  = vin;
563:       pvout = vout + nb_diag;
564:       dv    = dv   + 3*nb_diag;
565:     } else {                       /* upper triangle, including main diagonal */
566:       pvin  = vin - nb_diag;
567:       pvout = vout;
568:     }
569:     for (k=0; k<len; k++) {
570:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];

572:       pvout[0] += dv[0]*pvin0 + dv[3]*pvin1  + dv[6]*pvin2;
573:       pvout[1] += dv[1]*pvin0 + dv[4]*pvin1  + dv[7]*pvin2;
574:       pvout[2] += dv[2]*pvin0 + dv[5]*pvin1  + dv[8]*pvin2;

576:       pvout += 3; pvin += 3; dv += 9;
577:     }
578:     PetscLogFlops(18*len);
579:   }
580:   VecRestoreArray(xx,&vin);
581:   VecRestoreArray(yy,&vout);
582:   return(0);
583: }

585: int MatMultAdd_SeqBDiag_4(Mat A,Vec xx,Vec zz,Vec yy)
586: {
587:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
588:   int             ierr,nd = a->nd,nb_diag;
589:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
590:   Scalar          *vin,*vout,**a_diagv = a->diagv;
591:   Scalar          *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;

594:   if (zz != yy) {VecCopy(zz,yy);}
595:   VecGetArray(xx,&vin);
596:   VecGetArray(yy,&vout);
597:   for (d=0; d<nd; d++) {
598:     dv      = a_diagv[d];
599:     nb_diag = 4*a_diag[d];
600:     len     = a_bdlen[d];
601:     if (nb_diag > 0) {                /* lower triangle */
602:       pvin  = vin;
603:       pvout = vout + nb_diag;
604:       dv    = dv   + 4*nb_diag;
605:     } else {                       /* upper triangle, including main diagonal */
606:       pvin  = vin - nb_diag;
607:       pvout = vout;
608:     }
609:     for (k=0; k<len; k++) {
610:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];

612:       pvout[0] += dv[0]*pvin0 + dv[4]*pvin1  + dv[8]*pvin2 + dv[12]*pvin3;
613:       pvout[1] += dv[1]*pvin0 + dv[5]*pvin1  + dv[9]*pvin2 + dv[13]*pvin3;
614:       pvout[2] += dv[2]*pvin0 + dv[6]*pvin1  + dv[10]*pvin2 + dv[14]*pvin3;
615:       pvout[3] += dv[3]*pvin0 + dv[7]*pvin1  + dv[11]*pvin2 + dv[15]*pvin3;

617:       pvout += 4; pvin += 4; dv += 16;
618:     }
619:     PetscLogFlops(32*len);
620:   }
621:   VecRestoreArray(xx,&vin);
622:   VecRestoreArray(yy,&vout);
623:   return(0);
624: }

626: int MatMultAdd_SeqBDiag_5(Mat A,Vec xx,Vec zz,Vec yy)
627: {
628:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
629:   int             ierr,nd = a->nd,nb_diag;
630:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
631:   Scalar          *vin,*vout,**a_diagv = a->diagv;
632:   Scalar          *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;

635:   if (zz != yy) {VecCopy(zz,yy);}
636:   VecGetArray(xx,&vin);
637:   VecGetArray(yy,&vout);
638:   for (d=0; d<nd; d++) {
639:     dv      = a_diagv[d];
640:     nb_diag = 5*a_diag[d];
641:     len     = a_bdlen[d];
642:     if (nb_diag > 0) {                /* lower triangle */
643:       pvin  = vin;
644:       pvout = vout + nb_diag;
645:       dv    = dv   + 5*nb_diag;
646:     } else {                       /* upper triangle, including main diagonal */
647:       pvin  = vin - nb_diag;
648:       pvout = vout;
649:     }
650:     for (k=0; k<len; k++) {
651:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];

653:       pvout[0] += dv[0]*pvin0 + dv[5]*pvin1  + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
654:       pvout[1] += dv[1]*pvin0 + dv[6]*pvin1  + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
655:       pvout[2] += dv[2]*pvin0 + dv[7]*pvin1  + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
656:       pvout[3] += dv[3]*pvin0 + dv[8]*pvin1  + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
657:       pvout[4] += dv[4]*pvin0 + dv[9]*pvin1  + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;

659:       pvout += 5; pvin += 5; dv += 25;
660:     }
661:     PetscLogFlops(50*len);
662:   }
663:   VecRestoreArray(xx,&vin);
664:   VecRestoreArray(yy,&vout);
665:   return(0);
666: }

668: int MatMultAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
669: {
670:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
671:   int             ierr,nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
672:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
673:   Scalar          *vin,*vout,**a_diagv = a->diagv;
674:   Scalar          *pvin,*pvout,*dv;

677:   if (zz != yy) {VecCopy(zz,yy);}
678:   VecGetArray(xx,&vin);
679:   VecGetArray(yy,&vout);
680:   for (d=0; d<nd; d++) {
681:     dv      = a_diagv[d];
682:     nb_diag = bs*a_diag[d];
683:     len     = a_bdlen[d];
684:     if (nb_diag > 0) {                /* lower triangle */
685:       pvin  = vin;
686:       pvout = vout + nb_diag;
687:       dv    = dv   + bs*nb_diag;
688:     } else {                       /* upper triangle, including main diagonal */
689:       pvin  = vin - nb_diag;
690:       pvout = vout;
691:     }
692:     for (k=0; k<len; k++) {
693:       Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
694:       pvout += bs; pvin += bs; dv += bs2;
695:     }
696:     PetscLogFlops(2*bs2*len);
697:   }
698:   VecRestoreArray(xx,&vin);
699:   VecRestoreArray(yy,&vout);
700:   return(0);
701: }

703: /*
704:      MatMultTranspose ----------------------------------------------
705:  */
706: int MatMultTranspose_SeqBDiag_1(Mat A,Vec xx,Vec yy)
707: {
708:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
709:   int             ierr,nd = a->nd,diag,d,j,len;
710:   Scalar          *pvin,*pvout,*dv;
711:   Scalar          *vin,*vout;
712: 
714:   VecGetArray(xx,&vin);
715:   VecGetArray(yy,&vout);
716:   PetscMemzero(vout,A->n*sizeof(Scalar));
717:   for (d=0; d<nd; d++) {
718:     dv   = a->diagv[d];
719:     diag = a->diag[d];
720:     len  = a->bdlen[d];
721:       /* diag of original matrix is (row/bs - col/bs) */
722:       /* diag of transpose matrix is (col/bs - row/bs) */
723:     if (diag < 0) {        /* transpose is lower triangle */
724:       pvin  = vin;
725:       pvout = vout - diag;
726:     } else {        /* transpose is upper triangle, including main diagonal */
727:       pvin  = vin + diag;
728:       pvout = vout;
729:       dv    = dv + diag;
730:     }
731:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
732:   }
733:   VecRestoreArray(xx,&vin);
734:   VecRestoreArray(yy,&vout);
735:   return(0);
736: }

738: int MatMultTranspose_SeqBDiag_N(Mat A,Vec xx,Vec yy)
739: {
740:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
741:   int             ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
742:   Scalar          *pvin,*pvout,*dv;
743:   Scalar          *vin,*vout;
744: 
746:   VecGetArray(xx,&vin);
747:   VecGetArray(yy,&vout);
748:   PetscMemzero(vout,A->n*sizeof(Scalar));
749:   for (d=0; d<nd; d++) {
750:     dv   = a->diagv[d];
751:     diag = a->diag[d];
752:     len  = a->bdlen[d];
753:       /* diag of original matrix is (row/bs - col/bs) */
754:       /* diag of transpose matrix is (col/bs - row/bs) */
755:     if (diag < 0) {        /* transpose is lower triangle */
756:       pvin  = vin;
757:       pvout = vout - bs*diag;
758:     } else {        /* transpose is upper triangle, including main diagonal */
759:       pvin  = vin + bs*diag;
760:       pvout = vout;
761:       dv    = dv + diag;
762:     }
763:     for (k=0; k<len; k++) {
764:       kloc = k*bs; kshift = kloc*bs;
765:       for (i=0; i<bs; i++) {         /* i = local column of transpose */
766:         for (j=0; j<bs; j++) {   /* j = local row of transpose */
767:           pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
768:         }
769:       }
770:     }
771:   }
772:   VecRestoreArray(xx,&vin);
773:   VecRestoreArray(yy,&vout);
774:   return(0);
775: }

777: /*
778:      MatMultTransposeAdd ----------------------------------------------
779:  */
780: int MatMultTransposeAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
781: {
782:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
783:   int             ierr,nd = a->nd,diag,d,j,len;
784:   Scalar          *pvin,*pvout,*dv;
785:   Scalar          *vin,*vout;
786: 
788:   if (zz != yy) {VecCopy(zz,yy);}
789:   VecGetArray(xx,&vin);
790:   VecGetArray(yy,&vout);
791:   for (d=0; d<nd; d++) {
792:     dv   = a->diagv[d];
793:     diag = a->diag[d];
794:     len  = a->bdlen[d];
795:       /* diag of original matrix is (row/bs - col/bs) */
796:       /* diag of transpose matrix is (col/bs - row/bs) */
797:     if (diag < 0) {        /* transpose is lower triangle */
798:       pvin  = vin;
799:       pvout = vout - diag;
800:     } else {        /* transpose is upper triangle, including main diagonal */
801:       pvin  = vin + diag;
802:       pvout = vout;
803:       dv    = dv + diag;
804:     }
805:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
806:   }
807:   VecRestoreArray(xx,&vin);
808:   VecRestoreArray(yy,&vout);
809:   return(0);
810: }

812: int MatMultTransposeAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
813: {
814:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
815:   int             ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
816:   Scalar          *pvin,*pvout,*dv;
817:   Scalar          *vin,*vout;
818: 
820:   if (zz != yy) {VecCopy(zz,yy);}
821:   VecGetArray(xx,&vin);
822:   VecGetArray(yy,&vout);
823:   for (d=0; d<nd; d++) {
824:     dv   = a->diagv[d];
825:     diag = a->diag[d];
826:     len  = a->bdlen[d];
827:       /* diag of original matrix is (row/bs - col/bs) */
828:       /* diag of transpose matrix is (col/bs - row/bs) */
829:     if (diag < 0) {        /* transpose is lower triangle */
830:       pvin  = vin;
831:       pvout = vout - bs*diag;
832:     } else {        /* transpose is upper triangle, including main diagonal */
833:       pvin  = vin + bs*diag;
834:       pvout = vout;
835:       dv    = dv + diag;
836:     }
837:     for (k=0; k<len; k++) {
838:       kloc = k*bs; kshift = kloc*bs;
839:       for (i=0; i<bs; i++) {         /* i = local column of transpose */
840:         for (j=0; j<bs; j++) {   /* j = local row of transpose */
841:           pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
842:         }
843:       }
844:     }
845:   }
846:   VecRestoreArray(xx,&vin);
847:   VecRestoreArray(yy,&vout);
848:   return(0);
849: }
850: int MatRelax_SeqBDiag_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,
851:                              PetscReal shift,int its,Vec xx)
852: {
853:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
854:   Scalar       *x,*b,*xb,*dd,*dv,dval,sum;
855:   int          ierr,i,j,k,d,kbase,bs = a->bs,kloc;
856:   int          mainbd = a->mainbd,diag,mblock = a->mblock,bloc;

859:   /* Currently this code doesn't use wavefront orderings, although
860:      we should eventually incorporate that option, whatever wavefront
861:      ordering maybe :-) */

863:   if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");

865:   VecGetArray(xx,&x);
866:   if (xx != bb) {
867:     VecGetArray(bb,&b);
868:   } else {
869:     b = x;
870:   }
871:   dd = a->diagv[mainbd];
872:   if (flag == SOR_APPLY_UPPER) {
873:     /* apply (U + D/omega) to the vector */
874:     for (k=0; k<mblock; k++) {
875:       kloc = k*bs; kbase = kloc*bs;
876:       for (i=0; i<bs; i++) {
877:         sum = b[i+kloc] * (shift + dd[i*(bs+1)+kbase]) / omega;
878:         for (j=i+1; j<bs; j++) sum += dd[kbase + j*bs + i] * b[kloc + j];
879:         for (d=mainbd+1; d<a->nd; d++) {
880:           diag = a->diag[d];
881:           dv   = a->diagv[d];
882:           if (k-diag < mblock) {
883:             for (j=0; j<bs; j++) {
884:               sum += dv[kbase + j*bs + i] * b[(k-diag)*bs + j];
885:             }
886:           }
887:         }
888:         x[kloc+i] = sum;
889:       }
890:     }
891:     VecRestoreArray(xx,&x);
892:     if (xx != bb) {VecRestoreArray(bb,&b);}
893:     return(0);
894:   }
895:   if (flag & SOR_ZERO_INITIAL_GUESS) {
896:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
897:       for (k=0; k<mblock; k++) {
898:         kloc = k*bs; kbase = kloc*bs;
899:         for (i=0; i<bs; i++) {
900:           sum  = b[i+kloc];
901:           dval = shift + dd[i*(bs+1)+kbase];
902:           for (d=0; d<mainbd; d++) {
903:             diag = a->diag[d];
904:             dv   = a->diagv[d];
905:             if (k >= diag) {
906:               for (j=0; j<bs; j++)
907:                 sum -= dv[k*bs*bs + j*bs + i] * x[(k-diag)*bs + j];
908:             }
909:           }
910:           for (j=0; j<i; j++){
911:             sum -= dd[kbase + j*bs + i] * x[kloc + j];
912:           }
913:           x[kloc+i] = omega*sum/dval;
914:         }
915:       }
916:       xb = x;
917:     } else xb = b;
918:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
919:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
920:       for (k=0; k<mblock; k++) {
921:         kloc = k*bs; kbase = kloc*bs;
922:         for (i=0; i<bs; i++)
923:           x[kloc+i] *= dd[i*(bs+1)+kbase];
924:       }
925:     }
926:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
927:       for (k=mblock-1; k>=0; k--) {
928:         kloc = k*bs; kbase = kloc*bs;
929:         for (i=bs-1; i>=0; i--) {
930:           sum  = xb[i+kloc];
931:           dval = shift + dd[i*(bs+1)+kbase];
932:           for (j=i+1; j<bs; j++)
933:             sum -= dd[kbase + j*bs + i] * x[kloc + j];
934:           for (d=mainbd+1; d<a->nd; d++) {
935:             diag = a->diag[d];
936:             dv   = a->diagv[d];
937:             bloc = k - diag;
938:             if (bloc < mblock) {
939:               for (j=0; j<bs; j++)
940:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
941:             }
942:           }
943:           x[kloc+i] = omega*sum/dval;
944:         }
945:       }
946:     }
947:     its--;
948:   }
949:   while (its--) {
950:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
951:       for (k=0; k<mblock; k++) {
952:         kloc = k*bs; kbase = kloc*bs;
953:         for (i=0; i<bs; i++) {
954:           sum  = b[i+kloc];
955:           dval = shift + dd[i*(bs+1)+kbase];
956:           for (d=0; d<mainbd; d++) {
957:             diag = a->diag[d];
958:             dv   = a->diagv[d];
959:             bloc = k - diag;
960:             if (bloc >= 0) {
961:               for (j=0; j<bs; j++) {
962:                 sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
963:               }
964:             }
965:           }
966:           for (d=mainbd; d<a->nd; d++) {
967:             diag = a->diag[d];
968:             dv   = a->diagv[d];
969:             bloc = k - diag;
970:             if (bloc < mblock) {
971:               for (j=0; j<bs; j++) {
972:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
973:               }
974:             }
975:           }
976:           x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
977:         }
978:       }
979:     }
980:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
981:       for (k=mblock-1; k>=0; k--) {
982:         kloc = k*bs; kbase = kloc*bs;
983:         for (i=bs-1; i>=0; i--) {
984:           sum  = b[i+kloc];
985:           dval = shift + dd[i*(bs+1)+kbase];
986:           for (d=0; d<mainbd; d++) {
987:             diag = a->diag[d];
988:             dv   = a->diagv[d];
989:             bloc = k - diag;
990:             if (bloc >= 0) {
991:               for (j=0; j<bs; j++) {
992:                 sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
993:               }
994:             }
995:           }
996:           for (d=mainbd; d<a->nd; d++) {
997:             diag = a->diag[d];
998:             dv   = a->diagv[d];
999:             bloc = k - diag;
1000:             if (bloc < mblock) {
1001:               for (j=0; j<bs; j++) {
1002:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1003:               }
1004:             }
1005:           }
1006:           x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1007:         }
1008:       }
1009:     }
1010:   }
1011:   VecRestoreArray(xx,&x);
1012:   if (xx != bb) VecRestoreArray(bb,&b);
1013:   return(0);
1014: }

1016: int MatRelax_SeqBDiag_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,
1017:                                PetscReal shift,int its,Vec xx)
1018: {
1019:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
1020:   Scalar       *x,*b,*xb,*dd,dval,sum;
1021:   int          ierr,m = A->m,i,d,loc;
1022:   int          mainbd = a->mainbd,diag;

1025:   /* Currently this code doesn't use wavefront orderings,although
1026:      we should eventually incorporate that option, whatever wavefront
1027:      ordering maybe :-) */

1029:   VecGetArray(xx,&x);
1030:   VecGetArray(bb,&b);
1031:   if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
1032:   dd = a->diagv[mainbd];
1033:   if (flag == SOR_APPLY_UPPER) {
1034:     /* apply (U + D/omega) to the vector */
1035:     for (i=0; i<m; i++) {
1036:       sum = b[i] * (shift + dd[i]) / omega;
1037:       for (d=mainbd+1; d<a->nd; d++) {
1038:         diag = a->diag[d];
1039:         if (i-diag < m) sum += a->diagv[d][i] * x[i-diag];
1040:       }
1041:       x[i] = sum;
1042:     }
1043:     VecRestoreArray(xx,&x);
1044:     VecRestoreArray(bb,&b);
1045:     return(0);
1046:   }
1047:   if (flag & SOR_ZERO_INITIAL_GUESS) {
1048:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1049:       for (i=0; i<m; i++) {
1050:         sum  = b[i];
1051:         for (d=0; d<mainbd; d++) {
1052:           if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1053:         }
1054:         x[i] = omega*(sum/(shift + dd[i]));
1055:       }
1056:       xb = x;
1057:     } else xb = b;
1058:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1059:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1060:       for (i=0; i<m; i++) x[i] *= dd[i];
1061:     }
1062:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1063:       for (i=m-1; i>=0; i--) {
1064:         sum = xb[i];
1065:         for (d=mainbd+1; d<a->nd; d++) {
1066:           diag = a->diag[d];
1067:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1068:         }
1069:         x[i] = omega*(sum/(shift + dd[i]));
1070:       }
1071:     }
1072:     its--;
1073:   }
1074:   while (its--) {
1075:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1076:       for (i=0; i<m; i++) {
1077:         sum  = b[i];
1078:         dval = shift + dd[i];
1079:         for (d=0; d<mainbd; d++) {
1080:           if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1081:         }
1082:         for (d=mainbd; d<a->nd; d++) {
1083:           diag = a->diag[d];
1084:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1085:         }
1086:         x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/dval;
1087:       }
1088:     }
1089:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1090:       for (i=m-1; i>=0; i--) {
1091:         sum = b[i];
1092:         for (d=0; d<mainbd; d++) {
1093:           loc = i - a->diag[d];
1094:           if (loc >= 0) sum -= a->diagv[d][i] * x[loc];
1095:         }
1096:         for (d=mainbd; d<a->nd; d++) {
1097:           diag = a->diag[d];
1098:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1099:         }
1100:         x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/(shift + dd[i]);
1101:       }
1102:     }
1103:   }
1104:   VecRestoreArray(xx,&x);
1105:   VecRestoreArray(bb,&b);
1106:   return(0);
1107: }