Actual source code: bdiag2.c

  1: #define PETSCMAT_DLL

  3: /* Block diagonal matrix format */

 5:  #include src/mat/impls/bdiag/seq/bdiag.h
 6:  #include src/inline/ilu.h

 10: PetscErrorCode MatSetValues_SeqBDiag_1(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
 11: {
 12:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
 13:   PetscInt       kk,ldiag,row,newnz,*bdlen_new;
 15:   PetscInt       j,k, *diag_new;
 16:   PetscTruth     roworiented = a->roworiented,dfound;
 17:   PetscScalar    value,**diagv_new;

 20:   for (kk=0; kk<m; kk++) { /* loop over added rows */
 21:     row = im[kk];
 22:     if (row < 0) continue;
 23:     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
 24:     for (j=0; j<n; j++) {
 25:       if (in[j] < 0) continue;
 26:       if (in[j] >= A->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],A->cmap.N-1);
 27:       ldiag  = row - in[j]; /* diagonal number */
 28:       dfound = PETSC_FALSE;
 29:       if (roworiented) {
 30:         value = v[j + kk*n];
 31:       } else {
 32:         value = v[kk + j*m];
 33:       }
 34:       /* search diagonals for required one */
 35:       for (k=0; k<a->nd; k++) {
 36:         if (a->diag[k] == ldiag) {
 37:           dfound = PETSC_TRUE;
 38:           if (is == ADD_VALUES) a->diagv[k][row] += value;
 39:           else                  a->diagv[k][row]  = value;
 40:           break;
 41:         }
 42:       }
 43:       if (!dfound) {
 44:         if (a->nonew || a->nonew_diag) {
 45: #if !defined(PETSC_USE_COMPLEX)
 46:           if (a->user_alloc && value) {
 47: #else
 48:           if (a->user_alloc && (PetscRealPart(value) || PetscImaginaryPart(value))) {
 49: #endif
 50:             PetscInfo1(A,"Nonzero in diagonal %D that user did not allocate\n",ldiag);
 51:           }
 52:         } else {
 53:           PetscInfo1(A,"Allocating new diagonal: %D\n",ldiag);
 54:           a->reallocs++;
 55:           /* free old bdiag storage info and reallocate */
 56:           PetscMalloc(2*(a->nd+1)*sizeof(PetscInt),&diag_new);
 57:           bdlen_new = diag_new + a->nd + 1;
 58:           PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
 59:           for (k=0; k<a->nd; k++) {
 60:             diag_new[k]  = a->diag[k];
 61:             diagv_new[k] = a->diagv[k];
 62:             bdlen_new[k] = a->bdlen[k];
 63:           }
 64:           diag_new[a->nd]  = ldiag;
 65:           if (ldiag > 0) { /* lower triangular */
 66:             bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
 67:           } else {         /* upper triangular */
 68:             bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
 69:           }
 70:           newnz = bdlen_new[a->nd];
 71:           PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
 72:           PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
 73:           /* adjust pointers so that dv[diag][row] works for all diagonals*/
 74:           if (diag_new[a->nd] > 0) {
 75:             diagv_new[a->nd] -= diag_new[a->nd];
 76:           }
 77:           a->maxnz += newnz;
 78:           a->nz    += newnz;
 79:           PetscFree(a->diagv);
 80:           PetscFree(a->diag);
 81:           a->diag  = diag_new;
 82:           a->bdlen = bdlen_new;
 83:           a->diagv = diagv_new;

 85:           /* Insert value */
 86:           if (is == ADD_VALUES) a->diagv[a->nd][row] += value;
 87:           else                  a->diagv[a->nd][row] = value;
 88:           a->nd++;
 89:           PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(PetscInt)+sizeof(PetscScalar*));
 90:         }
 91:       }
 92:     }
 93:   }
 94:   return(0);
 95: }


100: PetscErrorCode MatSetValues_SeqBDiag_N(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
101: {
102:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
104:   PetscInt       kk,ldiag,shift,row,newnz,*bdlen_new;
105:   PetscInt       j,k,bs = A->rmap.bs,*diag_new,idx=0;
106:   PetscTruth     roworiented = a->roworiented,dfound;
107:   PetscScalar    value,**diagv_new;

110:   for (kk=0; kk<m; kk++) { /* loop over added rows */
111:     row = im[kk];
112:     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
113:     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
114:     shift = (row/bs)*bs*bs + row%bs;
115:     for (j=0; j<n; j++) {
116:       ldiag  = row/bs - in[j]/bs; /* block diagonal */
117:       dfound = PETSC_FALSE;
118:       if (roworiented) {
119:         value = v[idx++];
120:       } else {
121:         value = v[kk + j*m];
122:       }
123:       /* seach for appropriate diagonal */
124:       for (k=0; k<a->nd; k++) {
125:         if (a->diag[k] == ldiag) {
126:           dfound = PETSC_TRUE;
127:           if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
128:           else                  a->diagv[k][shift + (in[j]%bs)*bs] = value;
129:           break;
130:         }
131:       }
132:       if (!dfound) {
133:         if (a->nonew || a->nonew_diag) {
134: #if !defined(PETSC_USE_COMPLEX)
135:           if (a->user_alloc && value) {
136: #else
137:           if (a->user_alloc && (PetscRealPart(value) || PetscImaginaryPart(value))) {
138: #endif
139:             PetscInfo1(A,"Nonzero in diagonal %D that user did not allocate\n",ldiag);
140:           }
141:         } else {
142:           PetscInfo1(A,"Allocating new diagonal: %D\n",ldiag);
143:           a->reallocs++;
144:           /* free old bdiag storage info and reallocate */
145:           PetscMalloc(2*(a->nd+1)*sizeof(PetscInt),&diag_new);
146:           bdlen_new = diag_new + a->nd + 1;
147:           PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
148:           for (k=0; k<a->nd; k++) {
149:             diag_new[k]  = a->diag[k];
150:             diagv_new[k] = a->diagv[k];
151:             bdlen_new[k] = a->bdlen[k];
152:           }
153:           diag_new[a->nd]  = ldiag;
154:           if (ldiag > 0) {/* lower triangular */
155:             bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
156:           } else {         /* upper triangular */
157:             bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
158:           }
159:           newnz = bs*bs*bdlen_new[a->nd];
160:           PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
161:           PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
162:           /* adjust pointer so that dv[diag][row] works for all diagonals */
163:           if (diag_new[a->nd] > 0) {
164:             diagv_new[a->nd] -= bs*bs*diag_new[a->nd];
165:           }
166:           a->maxnz += newnz; a->nz += newnz;
167:           PetscFree(a->diagv);
168:           PetscFree(a->diag);
169:           a->diag  = diag_new;
170:           a->bdlen = bdlen_new;
171:           a->diagv = diagv_new;

173:           /* Insert value */
174:           if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
175:           else                  a->diagv[k][shift + (in[j]%bs)*bs] = value;
176:           a->nd++;
177:           PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(PetscInt)+sizeof(PetscScalar*));
178:         }
179:       }
180:     }
181:   }
182:   return(0);
183: }

187: PetscErrorCode MatGetValues_SeqBDiag_1(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
188: {
189:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
190:   PetscInt     kk,ldiag,row,j,k;
191:   PetscTruth   dfound;

194:   for (kk=0; kk<m; kk++) { /* loop over rows */
195:     row = im[kk];
196:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
197:     if (row >= A->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
198:     for (j=0; j<n; j++) {
199:       if (in[j] < 0) {v++; continue;} SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
200:       if (in[j] >= A->cmap.n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
201:       ldiag = row - in[j]; /* diagonal number */
202:       dfound = PETSC_FALSE;
203:       for (k=0; k<a->nd; k++) {
204:         if (a->diag[k] == ldiag) {
205:           dfound = PETSC_TRUE;
206:           *v++ = a->diagv[k][row];
207:           break;
208:         }
209:       }
210:       if (!dfound) *v++ = 0.0;
211:     }
212:   }
213:   return(0);
214: }

218: PetscErrorCode MatGetValues_SeqBDiag_N(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
219: {
220:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
221:   PetscInt     kk,ldiag,shift,row,j,k,bs = A->rmap.bs;
222:   PetscTruth   dfound;

225:   for (kk=0; kk<m; kk++) { /* loop over rows */
226:     row = im[kk];
227:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
228:     if (row >= A->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
229:     shift = (row/bs)*bs*bs + row%bs;
230:     for (j=0; j<n; j++) {
231:       if (in[j] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
232:       ldiag  = row/bs - in[j]/bs; /* block diagonal */
233:       dfound = PETSC_FALSE;
234:       for (k=0; k<a->nd; k++) {
235:         if (a->diag[k] == ldiag) {
236:           dfound = PETSC_TRUE;
237:           *v++ = a->diagv[k][shift + (in[j]%bs)*bs ];
238:           break;
239:         }
240:       }
241:       if (!dfound) *v++ = 0.0;
242:     }
243:   }
244:   return(0);
245: }

247: /*
248:     MatMults for blocksize 1 to 5 and N -------------------------------
249:  */
252: PetscErrorCode MatMult_SeqBDiag_1(Mat A,Vec xx,Vec yy)
253: {
254:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
255:   PetscInt       nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen;
257:   PetscInt       d,j,len;
258:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
259:   PetscScalar    *pvin,*pvout,*dv;

262:   VecGetArray(xx,&vin);
263:   VecGetArray(yy,&vout);
264:   PetscMemzero(vout,A->rmap.n*sizeof(PetscScalar));
265:   for (d=0; d<nd; d++) {
266:     dv   = a_diagv[d];
267:     diag = a_diag[d];
268:     len  = a_bdlen[d];
269:     if (diag > 0) {             /* lower triangle */
270:       pvin  = vin;
271:       pvout = vout + diag;
272:       dv    = dv   + diag;
273:     } else {                     /* upper triangle,including main diagonal */
274:       pvin  = vin - diag;
275:       pvout = vout;
276:     }
277:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
278:     PetscLogFlops(2*len);
279:   }
280:   VecRestoreArray(xx,&vin);
281:   VecRestoreArray(yy,&vout);
282:   return(0);
283: }

287: PetscErrorCode MatMult_SeqBDiag_2(Mat A,Vec xx,Vec yy)
288: {
289:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
290:   PetscInt       nd = a->nd,nb_diag;
292:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
293:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
294:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1;

297:   VecGetArray(xx,&vin);
298:   VecGetArray(yy,&vout);
299:   PetscMemzero(vout,A->rmap.n*sizeof(PetscScalar));
300:   for (d=0; d<nd; d++) {
301:     dv      = a_diagv[d];
302:     nb_diag = 2*a_diag[d];
303:     len     = a_bdlen[d];
304:     if (nb_diag > 0) {                /* lower triangle */
305:       pvin  = vin;
306:       pvout = vout + nb_diag;
307:       dv    = dv   + 2*nb_diag;
308:     } else {                       /* upper triangle, including main diagonal */
309:       pvin  = vin - nb_diag;
310:       pvout = vout;
311:     }
312:     for (k=0; k<len; k++) {
313:       pvin0     = pvin[0]; pvin1 = pvin[1];

315:       pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
316:       pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;

318:       pvout += 2; pvin += 2; dv += 4;
319:     }
320:     PetscLogFlops(8*len);
321:   }
322:   VecRestoreArray(xx,&vin);
323:   VecRestoreArray(yy,&vout);
324:   return(0);
325: }

329: PetscErrorCode MatMult_SeqBDiag_3(Mat A,Vec xx,Vec yy)
330: {
331:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
332:   PetscInt       nd = a->nd,nb_diag;
334:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
335:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
336:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1,pvin2;

339:   VecGetArray(xx,&vin);
340:   VecGetArray(yy,&vout);
341:   PetscMemzero(vout,A->rmap.n*sizeof(PetscScalar));
342:   for (d=0; d<nd; d++) {
343:     dv      = a_diagv[d];
344:     nb_diag = 3*a_diag[d];
345:     len     = a_bdlen[d];
346:     if (nb_diag > 0) {                /* lower triangle */
347:       pvin  = vin;
348:       pvout = vout + nb_diag;
349:       dv    = dv   + 3*nb_diag;
350:     } else {                       /* upper triangle,including main diagonal */
351:       pvin  = vin - nb_diag;
352:       pvout = vout;
353:     }
354:     for (k=0; k<len; k++) {
355:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];

357:       pvout[0] += dv[0]*pvin0 + dv[3]*pvin1  + dv[6]*pvin2;
358:       pvout[1] += dv[1]*pvin0 + dv[4]*pvin1  + dv[7]*pvin2;
359:       pvout[2] += dv[2]*pvin0 + dv[5]*pvin1  + dv[8]*pvin2;

361:       pvout += 3; pvin += 3; dv += 9;
362:     }
363:     PetscLogFlops(18*len);
364:   }
365:   VecRestoreArray(xx,&vin);
366:   VecRestoreArray(yy,&vout);
367:   return(0);
368: }

372: PetscErrorCode MatMult_SeqBDiag_4(Mat A,Vec xx,Vec yy)
373: {
374:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
375:   PetscInt       nd = a->nd,nb_diag;
377:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
378:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
379:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;

382:   VecGetArray(xx,&vin);
383:   VecGetArray(yy,&vout);
384:   PetscMemzero(vout,A->rmap.n*sizeof(PetscScalar));
385:   for (d=0; d<nd; d++) {
386:     dv      = a_diagv[d];
387:     nb_diag = 4*a_diag[d];
388:     len     = a_bdlen[d];
389:     if (nb_diag > 0) {                /* lower triangle */
390:       pvin  = vin;
391:       pvout = vout + nb_diag;
392:       dv    = dv   + 4*nb_diag;
393:     } else {                       /* upper triangle,including main diagonal */
394:       pvin  = vin - nb_diag;
395:       pvout = vout;
396:     }
397:     for (k=0; k<len; k++) {
398:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];

400:       pvout[0] += dv[0]*pvin0 + dv[4]*pvin1  + dv[8]*pvin2 + dv[12]*pvin3;
401:       pvout[1] += dv[1]*pvin0 + dv[5]*pvin1  + dv[9]*pvin2 + dv[13]*pvin3;
402:       pvout[2] += dv[2]*pvin0 + dv[6]*pvin1  + dv[10]*pvin2 + dv[14]*pvin3;
403:       pvout[3] += dv[3]*pvin0 + dv[7]*pvin1  + dv[11]*pvin2 + dv[15]*pvin3;

405:       pvout += 4; pvin += 4; dv += 16;
406:     }
407:     PetscLogFlops(32*len);
408:   }
409:   VecRestoreArray(xx,&vin);
410:   VecRestoreArray(yy,&vout);
411:   return(0);
412: }

416: PetscErrorCode MatMult_SeqBDiag_5(Mat A,Vec xx,Vec yy)
417: {
418:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
419:   PetscInt       nd = a->nd,nb_diag;
421:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
422:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
423:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;

426:   VecGetArray(xx,&vin);
427:   VecGetArray(yy,&vout);
428:   PetscMemzero(vout,A->rmap.n*sizeof(PetscScalar));
429:   for (d=0; d<nd; d++) {
430:     dv      = a_diagv[d];
431:     nb_diag = 5*a_diag[d];
432:     len     = a_bdlen[d];
433:     if (nb_diag > 0) {                /* lower triangle */
434:       pvin  = vin;
435:       pvout = vout + nb_diag;
436:       dv    = dv   + 5*nb_diag;
437:     } else {                       /* upper triangle,including main diagonal */
438:       pvin  = vin - nb_diag;
439:       pvout = vout;
440:     }
441:     for (k=0; k<len; k++) {
442:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];

444:       pvout[0] += dv[0]*pvin0 + dv[5]*pvin1  + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
445:       pvout[1] += dv[1]*pvin0 + dv[6]*pvin1  + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
446:       pvout[2] += dv[2]*pvin0 + dv[7]*pvin1  + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
447:       pvout[3] += dv[3]*pvin0 + dv[8]*pvin1  + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
448:       pvout[4] += dv[4]*pvin0 + dv[9]*pvin1  + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;

450:       pvout += 5; pvin += 5; dv += 25;
451:     }
452:     PetscLogFlops(50*len);
453:   }
454:   VecRestoreArray(xx,&vin);
455:   VecRestoreArray(yy,&vout);
456:   return(0);
457: }

461: PetscErrorCode MatMult_SeqBDiag_N(Mat A,Vec xx,Vec yy)
462: {
463:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
464:   PetscInt       nd = a->nd,bs = A->rmap.bs,nb_diag,bs2 = bs*bs;
466:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
467:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
468:   PetscScalar    *pvin,*pvout,*dv;

471:   VecGetArray(xx,&vin);
472:   VecGetArray(yy,&vout);
473:   PetscMemzero(vout,A->rmap.n*sizeof(PetscScalar));
474:   for (d=0; d<nd; d++) {
475:     dv      = a_diagv[d];
476:     nb_diag = bs*a_diag[d];
477:     len     = a_bdlen[d];
478:     if (nb_diag > 0) {                /* lower triangle */
479:       pvin  = vin;
480:       pvout = vout + nb_diag;
481:       dv    = dv   + bs*nb_diag;
482:     } else {                       /* upper triangle, including main diagonal */
483:       pvin  = vin - nb_diag;
484:       pvout = vout;
485:     }
486:     for (k=0; k<len; k++) {
487:       Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
488:       pvout += bs; pvin += bs; dv += bs2;
489:     }
490:     PetscLogFlops(2*bs2*len);
491:   }
492:   VecRestoreArray(xx,&vin);
493:   VecRestoreArray(yy,&vout);
494:   return(0);
495: }

497: /*
498:     MatMultAdds for blocksize 1 to 5 and N -------------------------------
499:  */
502: PetscErrorCode MatMultAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
503: {
504:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
506:   PetscInt       nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen,d,j,len;
507:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
508:   PetscScalar    *pvin,*pvout,*dv;

511:   if (zz != yy) {VecCopy(zz,yy);}
512:   VecGetArray(xx,&vin);
513:   VecGetArray(yy,&vout);
514:   for (d=0; d<nd; d++) {
515:     dv   = a_diagv[d];
516:     diag = a_diag[d];
517:     len  = a_bdlen[d];
518:     if (diag > 0) {             /* lower triangle */
519:       pvin  = vin;
520:       pvout = vout + diag;
521:       dv    = dv   + diag;
522:     } else {                     /* upper triangle, including main diagonal */
523:       pvin  = vin - diag;
524:       pvout = vout;
525:     }
526:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
527:     PetscLogFlops(2*len);
528:   }
529:   VecRestoreArray(xx,&vin);
530:   VecRestoreArray(yy,&vout);
531:   return(0);
532: }

536: PetscErrorCode MatMultAdd_SeqBDiag_2(Mat A,Vec xx,Vec zz,Vec yy)
537: {
538:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
540:   PetscInt       nd = a->nd,nb_diag;
541:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
542:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
543:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1;

546:   if (zz != yy) {VecCopy(zz,yy);}
547:   VecGetArray(xx,&vin);
548:   VecGetArray(yy,&vout);
549:   for (d=0; d<nd; d++) {
550:     dv      = a_diagv[d];
551:     nb_diag = 2*a_diag[d];
552:     len     = a_bdlen[d];
553:     if (nb_diag > 0) {                /* lower triangle */
554:       pvin  = vin;
555:       pvout = vout + nb_diag;
556:       dv    = dv   + 2*nb_diag;
557:     } else {                       /* upper triangle, including main diagonal */
558:       pvin  = vin - nb_diag;
559:       pvout = vout;
560:     }
561:     for (k=0; k<len; k++) {
562:       pvin0 = pvin[0]; pvin1 = pvin[1];

564:       pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
565:       pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;

567:       pvout += 2; pvin += 2; dv += 4;
568:     }
569:     PetscLogFlops(8*len);
570:   }
571:   VecRestoreArray(xx,&vin);
572:   VecRestoreArray(yy,&vout);
573:   return(0);
574: }

578: PetscErrorCode MatMultAdd_SeqBDiag_3(Mat A,Vec xx,Vec zz,Vec yy)
579: {
580:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
582:   PetscInt       nd = a->nd,nb_diag;
583:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
584:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
585:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1,pvin2;

588:   if (zz != yy) {VecCopy(zz,yy);}
589:   VecGetArray(xx,&vin);
590:   VecGetArray(yy,&vout);
591:   for (d=0; d<nd; d++) {
592:     dv      = a_diagv[d];
593:     nb_diag = 3*a_diag[d];
594:     len     = a_bdlen[d];
595:     if (nb_diag > 0) {                /* lower triangle */
596:       pvin  = vin;
597:       pvout = vout + nb_diag;
598:       dv    = dv   + 3*nb_diag;
599:     } else {                       /* upper triangle, including main diagonal */
600:       pvin  = vin - nb_diag;
601:       pvout = vout;
602:     }
603:     for (k=0; k<len; k++) {
604:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];

606:       pvout[0] += dv[0]*pvin0 + dv[3]*pvin1  + dv[6]*pvin2;
607:       pvout[1] += dv[1]*pvin0 + dv[4]*pvin1  + dv[7]*pvin2;
608:       pvout[2] += dv[2]*pvin0 + dv[5]*pvin1  + dv[8]*pvin2;

610:       pvout += 3; pvin += 3; dv += 9;
611:     }
612:     PetscLogFlops(18*len);
613:   }
614:   VecRestoreArray(xx,&vin);
615:   VecRestoreArray(yy,&vout);
616:   return(0);
617: }

621: PetscErrorCode MatMultAdd_SeqBDiag_4(Mat A,Vec xx,Vec zz,Vec yy)
622: {
623:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
625:   PetscInt       nd = a->nd,nb_diag;
626:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
627:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
628:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;

631:   if (zz != yy) {VecCopy(zz,yy);}
632:   VecGetArray(xx,&vin);
633:   VecGetArray(yy,&vout);
634:   for (d=0; d<nd; d++) {
635:     dv      = a_diagv[d];
636:     nb_diag = 4*a_diag[d];
637:     len     = a_bdlen[d];
638:     if (nb_diag > 0) {                /* lower triangle */
639:       pvin  = vin;
640:       pvout = vout + nb_diag;
641:       dv    = dv   + 4*nb_diag;
642:     } else {                       /* upper triangle, including main diagonal */
643:       pvin  = vin - nb_diag;
644:       pvout = vout;
645:     }
646:     for (k=0; k<len; k++) {
647:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];

649:       pvout[0] += dv[0]*pvin0 + dv[4]*pvin1  + dv[8]*pvin2 + dv[12]*pvin3;
650:       pvout[1] += dv[1]*pvin0 + dv[5]*pvin1  + dv[9]*pvin2 + dv[13]*pvin3;
651:       pvout[2] += dv[2]*pvin0 + dv[6]*pvin1  + dv[10]*pvin2 + dv[14]*pvin3;
652:       pvout[3] += dv[3]*pvin0 + dv[7]*pvin1  + dv[11]*pvin2 + dv[15]*pvin3;

654:       pvout += 4; pvin += 4; dv += 16;
655:     }
656:     PetscLogFlops(32*len);
657:   }
658:   VecRestoreArray(xx,&vin);
659:   VecRestoreArray(yy,&vout);
660:   return(0);
661: }

665: PetscErrorCode MatMultAdd_SeqBDiag_5(Mat A,Vec xx,Vec zz,Vec yy)
666: {
667:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
669:   PetscInt       nd = a->nd,nb_diag;
670:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
671:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
672:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;

675:   if (zz != yy) {VecCopy(zz,yy);}
676:   VecGetArray(xx,&vin);
677:   VecGetArray(yy,&vout);
678:   for (d=0; d<nd; d++) {
679:     dv      = a_diagv[d];
680:     nb_diag = 5*a_diag[d];
681:     len     = a_bdlen[d];
682:     if (nb_diag > 0) {                /* lower triangle */
683:       pvin  = vin;
684:       pvout = vout + nb_diag;
685:       dv    = dv   + 5*nb_diag;
686:     } else {                       /* upper triangle, including main diagonal */
687:       pvin  = vin - nb_diag;
688:       pvout = vout;
689:     }
690:     for (k=0; k<len; k++) {
691:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];

693:       pvout[0] += dv[0]*pvin0 + dv[5]*pvin1  + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
694:       pvout[1] += dv[1]*pvin0 + dv[6]*pvin1  + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
695:       pvout[2] += dv[2]*pvin0 + dv[7]*pvin1  + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
696:       pvout[3] += dv[3]*pvin0 + dv[8]*pvin1  + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
697:       pvout[4] += dv[4]*pvin0 + dv[9]*pvin1  + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;

699:       pvout += 5; pvin += 5; dv += 25;
700:     }
701:     PetscLogFlops(50*len);
702:   }
703:   VecRestoreArray(xx,&vin);
704:   VecRestoreArray(yy,&vout);
705:   return(0);
706: }

710: PetscErrorCode MatMultAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
711: {
712:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
714:   PetscInt       nd = a->nd,bs = A->rmap.bs,nb_diag,bs2 = bs*bs;
715:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
716:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
717:   PetscScalar    *pvin,*pvout,*dv;

720:   if (zz != yy) {VecCopy(zz,yy);}
721:   VecGetArray(xx,&vin);
722:   VecGetArray(yy,&vout);
723:   for (d=0; d<nd; d++) {
724:     dv      = a_diagv[d];
725:     nb_diag = bs*a_diag[d];
726:     len     = a_bdlen[d];
727:     if (nb_diag > 0) {                /* lower triangle */
728:       pvin  = vin;
729:       pvout = vout + nb_diag;
730:       dv    = dv   + bs*nb_diag;
731:     } else {                       /* upper triangle, including main diagonal */
732:       pvin  = vin - nb_diag;
733:       pvout = vout;
734:     }
735:     for (k=0; k<len; k++) {
736:       Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
737:       pvout += bs; pvin += bs; dv += bs2;
738:     }
739:     PetscLogFlops(2*bs2*len);
740:   }
741:   VecRestoreArray(xx,&vin);
742:   VecRestoreArray(yy,&vout);
743:   return(0);
744: }

746: /*
747:      MatMultTranspose ----------------------------------------------
748:  */
751: PetscErrorCode MatMultTranspose_SeqBDiag_1(Mat A,Vec xx,Vec yy)
752: {
753:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
755:   PetscInt       nd = a->nd,diag,d,j,len;
756:   PetscScalar    *pvin,*pvout,*dv;
757:   PetscScalar    *vin,*vout;
758: 
760:   VecGetArray(xx,&vin);
761:   VecGetArray(yy,&vout);
762:   PetscMemzero(vout,A->cmap.n*sizeof(PetscScalar));
763:   for (d=0; d<nd; d++) {
764:     dv   = a->diagv[d];
765:     diag = a->diag[d];
766:     len  = a->bdlen[d];
767:       /* diag of original matrix is (row/bs - col/bs) */
768:       /* diag of transpose matrix is (col/bs - row/bs) */
769:     if (diag < 0) {        /* transpose is lower triangle */
770:       pvin  = vin;
771:       pvout = vout - diag;
772:     } else {        /* transpose is upper triangle, including main diagonal */
773:       pvin  = vin + diag;
774:       pvout = vout;
775:       dv    = dv + diag;
776:     }
777:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
778:   }
779:   VecRestoreArray(xx,&vin);
780:   VecRestoreArray(yy,&vout);
781:   return(0);
782: }

786: PetscErrorCode MatMultTranspose_SeqBDiag_N(Mat A,Vec xx,Vec yy)
787: {
788:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
790:   PetscInt       nd = a->nd,bs = A->rmap.bs,diag,kshift,kloc,d,i,j,k,len;
791:   PetscScalar    *pvin,*pvout,*dv;
792:   PetscScalar    *vin,*vout;
793: 
795:   VecGetArray(xx,&vin);
796:   VecGetArray(yy,&vout);
797:   PetscMemzero(vout,A->cmap.n*sizeof(PetscScalar));
798:   for (d=0; d<nd; d++) {
799:     dv   = a->diagv[d];
800:     diag = a->diag[d];
801:     len  = a->bdlen[d];
802:       /* diag of original matrix is (row/bs - col/bs) */
803:       /* diag of transpose matrix is (col/bs - row/bs) */
804:     if (diag < 0) {        /* transpose is lower triangle */
805:       pvin  = vin;
806:       pvout = vout - bs*diag;
807:     } else {        /* transpose is upper triangle, including main diagonal */
808:       pvin  = vin + bs*diag;
809:       pvout = vout;
810:       dv    = dv + diag;
811:     }
812:     for (k=0; k<len; k++) {
813:       kloc = k*bs; kshift = kloc*bs;
814:       for (i=0; i<bs; i++) {         /* i = local column of transpose */
815:         for (j=0; j<bs; j++) {   /* j = local row of transpose */
816:           pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
817:         }
818:       }
819:     }
820:   }
821:   VecRestoreArray(xx,&vin);
822:   VecRestoreArray(yy,&vout);
823:   return(0);
824: }

826: /*
827:      MatMultTransposeAdd ----------------------------------------------
828:  */
831: PetscErrorCode MatMultTransposeAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
832: {
833:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
835:   PetscInt       nd = a->nd,diag,d,j,len;
836:   PetscScalar    *pvin,*pvout,*dv;
837:   PetscScalar    *vin,*vout;
838: 
840:   if (zz != yy) {VecCopy(zz,yy);}
841:   VecGetArray(xx,&vin);
842:   VecGetArray(yy,&vout);
843:   for (d=0; d<nd; d++) {
844:     dv   = a->diagv[d];
845:     diag = a->diag[d];
846:     len  = a->bdlen[d];
847:       /* diag of original matrix is (row/bs - col/bs) */
848:       /* diag of transpose matrix is (col/bs - row/bs) */
849:     if (diag < 0) {        /* transpose is lower triangle */
850:       pvin  = vin;
851:       pvout = vout - diag;
852:     } else {        /* transpose is upper triangle, including main diagonal */
853:       pvin  = vin + diag;
854:       pvout = vout;
855:       dv    = dv + diag;
856:     }
857:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
858:   }
859:   VecRestoreArray(xx,&vin);
860:   VecRestoreArray(yy,&vout);
861:   return(0);
862: }

866: PetscErrorCode MatMultTransposeAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
867: {
868:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
870:   PetscInt       nd = a->nd,bs = A->rmap.bs,diag,kshift,kloc,d,i,j,k,len;
871:   PetscScalar    *pvin,*pvout,*dv;
872:   PetscScalar    *vin,*vout;
873: 
875:   if (zz != yy) {VecCopy(zz,yy);}
876:   VecGetArray(xx,&vin);
877:   VecGetArray(yy,&vout);
878:   for (d=0; d<nd; d++) {
879:     dv   = a->diagv[d];
880:     diag = a->diag[d];
881:     len  = a->bdlen[d];
882:       /* diag of original matrix is (row/bs - col/bs) */
883:       /* diag of transpose matrix is (col/bs - row/bs) */
884:     if (diag < 0) {        /* transpose is lower triangle */
885:       pvin  = vin;
886:       pvout = vout - bs*diag;
887:     } else {        /* transpose is upper triangle, including main diagonal */
888:       pvin  = vin + bs*diag;
889:       pvout = vout;
890:       dv    = dv + diag;
891:     }
892:     for (k=0; k<len; k++) {
893:       kloc = k*bs; kshift = kloc*bs;
894:       for (i=0; i<bs; i++) {         /* i = local column of transpose */
895:         for (j=0; j<bs; j++) {   /* j = local row of transpose */
896:           pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
897:         }
898:       }
899:     }
900:   }
901:   VecRestoreArray(xx,&vin);
902:   VecRestoreArray(yy,&vout);
903:   return(0);
904: }
907: PetscErrorCode MatRelax_SeqBDiag_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,PetscInt its,PetscInt lits,Vec xx)
908: {
909:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
910:   PetscScalar    *x,*b,*xb,*dd,*dv,dval,sum;
912:   PetscInt       i,j,k,d,kbase,bs = A->rmap.bs,kloc;
913:   PetscInt       mainbd = a->mainbd,diag,mblock = a->mblock,bloc;

916:   its = its*lits;
917:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);

919:   /* Currently this code doesn't use wavefront orderings, although
920:      we should eventually incorporate that option, whatever wavefront
921:      ordering maybe :-) */

923:   if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");

925:   VecGetArray(xx,&x);
926:   if (xx != bb) {
927:     VecGetArray(bb,&b);
928:   } else {
929:     b = x;
930:   }
931:   dd = a->diagv[mainbd];
932:   if (flag == SOR_APPLY_UPPER) {
933:     /* apply (U + D/omega) to the vector */
934:     for (k=0; k<mblock; k++) {
935:       kloc = k*bs; kbase = kloc*bs;
936:       for (i=0; i<bs; i++) {
937:         sum = b[i+kloc] * (shift + dd[i*(bs+1)+kbase]) / omega;
938:         for (j=i+1; j<bs; j++) sum += dd[kbase + j*bs + i] * b[kloc + j];
939:         for (d=mainbd+1; d<a->nd; d++) {
940:           diag = a->diag[d];
941:           dv   = a->diagv[d];
942:           if (k-diag < mblock) {
943:             for (j=0; j<bs; j++) {
944:               sum += dv[kbase + j*bs + i] * b[(k-diag)*bs + j];
945:             }
946:           }
947:         }
948:         x[kloc+i] = sum;
949:       }
950:     }
951:     VecRestoreArray(xx,&x);
952:     if (xx != bb) {VecRestoreArray(bb,&b);}
953:     return(0);
954:   }
955:   if (flag & SOR_ZERO_INITIAL_GUESS) {
956:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
957:       for (k=0; k<mblock; k++) {
958:         kloc = k*bs; kbase = kloc*bs;
959:         for (i=0; i<bs; i++) {
960:           sum  = b[i+kloc];
961:           dval = shift + dd[i*(bs+1)+kbase];
962:           for (d=0; d<mainbd; d++) {
963:             diag = a->diag[d];
964:             dv   = a->diagv[d];
965:             if (k >= diag) {
966:               for (j=0; j<bs; j++)
967:                 sum -= dv[k*bs*bs + j*bs + i] * x[(k-diag)*bs + j];
968:             }
969:           }
970:           for (j=0; j<i; j++){
971:             sum -= dd[kbase + j*bs + i] * x[kloc + j];
972:           }
973:           x[kloc+i] = omega*sum/dval;
974:         }
975:       }
976:       xb = x;
977:     } else xb = b;
978:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
979:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
980:       for (k=0; k<mblock; k++) {
981:         kloc = k*bs; kbase = kloc*bs;
982:         for (i=0; i<bs; i++)
983:           x[kloc+i] *= dd[i*(bs+1)+kbase];
984:       }
985:     }
986:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
987:       for (k=mblock-1; k>=0; k--) {
988:         kloc = k*bs; kbase = kloc*bs;
989:         for (i=bs-1; i>=0; i--) {
990:           sum  = xb[i+kloc];
991:           dval = shift + dd[i*(bs+1)+kbase];
992:           for (j=i+1; j<bs; j++)
993:             sum -= dd[kbase + j*bs + i] * x[kloc + j];
994:           for (d=mainbd+1; d<a->nd; d++) {
995:             diag = a->diag[d];
996:             dv   = a->diagv[d];
997:             bloc = k - diag;
998:             if (bloc < mblock) {
999:               for (j=0; j<bs; j++)
1000:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1001:             }
1002:           }
1003:           x[kloc+i] = omega*sum/dval;
1004:         }
1005:       }
1006:     }
1007:     its--;
1008:   }
1009:   while (its--) {
1010:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1011:       for (k=0; k<mblock; k++) {
1012:         kloc = k*bs; kbase = kloc*bs;
1013:         for (i=0; i<bs; i++) {
1014:           sum  = b[i+kloc];
1015:           dval = shift + dd[i*(bs+1)+kbase];
1016:           for (d=0; d<mainbd; d++) {
1017:             diag = a->diag[d];
1018:             dv   = a->diagv[d];
1019:             bloc = k - diag;
1020:             if (bloc >= 0) {
1021:               for (j=0; j<bs; j++) {
1022:                 sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1023:               }
1024:             }
1025:           }
1026:           for (d=mainbd; d<a->nd; d++) {
1027:             diag = a->diag[d];
1028:             dv   = a->diagv[d];
1029:             bloc = k - diag;
1030:             if (bloc < mblock) {
1031:               for (j=0; j<bs; j++) {
1032:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1033:               }
1034:             }
1035:           }
1036:           x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1037:         }
1038:       }
1039:     }
1040:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1041:       for (k=mblock-1; k>=0; k--) {
1042:         kloc = k*bs; kbase = kloc*bs;
1043:         for (i=bs-1; i>=0; i--) {
1044:           sum  = b[i+kloc];
1045:           dval = shift + dd[i*(bs+1)+kbase];
1046:           for (d=0; d<mainbd; d++) {
1047:             diag = a->diag[d];
1048:             dv   = a->diagv[d];
1049:             bloc = k - diag;
1050:             if (bloc >= 0) {
1051:               for (j=0; j<bs; j++) {
1052:                 sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1053:               }
1054:             }
1055:           }
1056:           for (d=mainbd; d<a->nd; d++) {
1057:             diag = a->diag[d];
1058:             dv   = a->diagv[d];
1059:             bloc = k - diag;
1060:             if (bloc < mblock) {
1061:               for (j=0; j<bs; j++) {
1062:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1063:               }
1064:             }
1065:           }
1066:           x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1067:         }
1068:       }
1069:     }
1070:   }
1071:   VecRestoreArray(xx,&x);
1072:   if (xx != bb) VecRestoreArray(bb,&b);
1073:   return(0);
1074: }

1078: PetscErrorCode MatRelax_SeqBDiag_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,PetscInt its,PetscInt lits,Vec xx)
1079: {
1080:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
1081:   PetscScalar    *x,*b,*xb,*dd,dval,sum;
1083:   PetscInt       m = A->rmap.n,i,d,loc;
1084:   PetscInt       mainbd = a->mainbd,diag;

1087:   its = its*lits;
1088:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1089:   /* Currently this code doesn't use wavefront orderings,although
1090:      we should eventually incorporate that option, whatever wavefront
1091:      ordering maybe :-) */

1093:   VecGetArray(xx,&x);
1094:   VecGetArray(bb,&b);
1095:   if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
1096:   dd = a->diagv[mainbd];
1097:   if (flag == SOR_APPLY_UPPER) {
1098:     /* apply (U + D/omega) to the vector */
1099:     for (i=0; i<m; i++) {
1100:       sum = b[i] * (shift + dd[i]) / omega;
1101:       for (d=mainbd+1; d<a->nd; d++) {
1102:         diag = a->diag[d];
1103:         if (i-diag < m) sum += a->diagv[d][i] * x[i-diag];
1104:       }
1105:       x[i] = sum;
1106:     }
1107:     VecRestoreArray(xx,&x);
1108:     VecRestoreArray(bb,&b);
1109:     return(0);
1110:   }
1111:   if (flag & SOR_ZERO_INITIAL_GUESS) {
1112:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1113:       for (i=0; i<m; i++) {
1114:         sum  = b[i];
1115:         for (d=0; d<mainbd; d++) {
1116:           if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1117:         }
1118:         x[i] = omega*(sum/(shift + dd[i]));
1119:       }
1120:       xb = x;
1121:     } else xb = b;
1122:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1123:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1124:       for (i=0; i<m; i++) x[i] *= dd[i];
1125:     }
1126:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1127:       for (i=m-1; i>=0; i--) {
1128:         sum = xb[i];
1129:         for (d=mainbd+1; d<a->nd; d++) {
1130:           diag = a->diag[d];
1131:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1132:         }
1133:         x[i] = omega*(sum/(shift + dd[i]));
1134:       }
1135:     }
1136:     its--;
1137:   }
1138:   while (its--) {
1139:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1140:       for (i=0; i<m; i++) {
1141:         sum  = b[i];
1142:         dval = shift + dd[i];
1143:         for (d=0; d<mainbd; d++) {
1144:           if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1145:         }
1146:         for (d=mainbd; d<a->nd; d++) {
1147:           diag = a->diag[d];
1148:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1149:         }
1150:         x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/dval;
1151:       }
1152:     }
1153:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1154:       for (i=m-1; i>=0; i--) {
1155:         sum = b[i];
1156:         for (d=0; d<mainbd; d++) {
1157:           loc = i - a->diag[d];
1158:           if (loc >= 0) sum -= a->diagv[d][i] * x[loc];
1159:         }
1160:         for (d=mainbd; d<a->nd; d++) {
1161:           diag = a->diag[d];
1162:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1163:         }
1164:         x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/(shift + dd[i]);
1165:       }
1166:     }
1167:   }
1168:   VecRestoreArray(xx,&x);
1169:   VecRestoreArray(bb,&b);
1170:   return(0);
1171: }