Actual source code: bdiag2.c
1: #define PETSCMAT_DLL
3: /* Block diagonal matrix format */
5: #include src/mat/impls/bdiag/seq/bdiag.h
6: #include src/inline/ilu.h
10: PetscErrorCode MatSetValues_SeqBDiag_1(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
11: {
12: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
13: PetscInt kk,ldiag,row,newnz,*bdlen_new;
15: PetscInt j,k, *diag_new;
16: PetscTruth roworiented = a->roworiented,dfound;
17: PetscScalar value,**diagv_new;
20: for (kk=0; kk<m; kk++) { /* loop over added rows */
21: row = im[kk];
22: if (row < 0) continue;
23: if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
24: for (j=0; j<n; j++) {
25: if (in[j] < 0) continue;
26: if (in[j] >= A->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],A->cmap.N-1);
27: ldiag = row - in[j]; /* diagonal number */
28: dfound = PETSC_FALSE;
29: if (roworiented) {
30: value = v[j + kk*n];
31: } else {
32: value = v[kk + j*m];
33: }
34: /* search diagonals for required one */
35: for (k=0; k<a->nd; k++) {
36: if (a->diag[k] == ldiag) {
37: dfound = PETSC_TRUE;
38: if (is == ADD_VALUES) a->diagv[k][row] += value;
39: else a->diagv[k][row] = value;
40: break;
41: }
42: }
43: if (!dfound) {
44: if (a->nonew || a->nonew_diag) {
45: #if !defined(PETSC_USE_COMPLEX)
46: if (a->user_alloc && value) {
47: #else
48: if (a->user_alloc && (PetscRealPart(value) || PetscImaginaryPart(value))) {
49: #endif
50: PetscInfo1(A,"Nonzero in diagonal %D that user did not allocate\n",ldiag);
51: }
52: } else {
53: PetscInfo1(A,"Allocating new diagonal: %D\n",ldiag);
54: a->reallocs++;
55: /* free old bdiag storage info and reallocate */
56: PetscMalloc(2*(a->nd+1)*sizeof(PetscInt),&diag_new);
57: bdlen_new = diag_new + a->nd + 1;
58: PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
59: for (k=0; k<a->nd; k++) {
60: diag_new[k] = a->diag[k];
61: diagv_new[k] = a->diagv[k];
62: bdlen_new[k] = a->bdlen[k];
63: }
64: diag_new[a->nd] = ldiag;
65: if (ldiag > 0) { /* lower triangular */
66: bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
67: } else { /* upper triangular */
68: bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
69: }
70: newnz = bdlen_new[a->nd];
71: PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
72: PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
73: /* adjust pointers so that dv[diag][row] works for all diagonals*/
74: if (diag_new[a->nd] > 0) {
75: diagv_new[a->nd] -= diag_new[a->nd];
76: }
77: a->maxnz += newnz;
78: a->nz += newnz;
79: PetscFree(a->diagv);
80: PetscFree(a->diag);
81: a->diag = diag_new;
82: a->bdlen = bdlen_new;
83: a->diagv = diagv_new;
85: /* Insert value */
86: if (is == ADD_VALUES) a->diagv[a->nd][row] += value;
87: else a->diagv[a->nd][row] = value;
88: a->nd++;
89: PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(PetscInt)+sizeof(PetscScalar*));
90: }
91: }
92: }
93: }
94: return(0);
95: }
100: PetscErrorCode MatSetValues_SeqBDiag_N(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
101: {
102: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
104: PetscInt kk,ldiag,shift,row,newnz,*bdlen_new;
105: PetscInt j,k,bs = A->rmap.bs,*diag_new,idx=0;
106: PetscTruth roworiented = a->roworiented,dfound;
107: PetscScalar value,**diagv_new;
110: for (kk=0; kk<m; kk++) { /* loop over added rows */
111: row = im[kk];
112: if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
113: if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
114: shift = (row/bs)*bs*bs + row%bs;
115: for (j=0; j<n; j++) {
116: ldiag = row/bs - in[j]/bs; /* block diagonal */
117: dfound = PETSC_FALSE;
118: if (roworiented) {
119: value = v[idx++];
120: } else {
121: value = v[kk + j*m];
122: }
123: /* seach for appropriate diagonal */
124: for (k=0; k<a->nd; k++) {
125: if (a->diag[k] == ldiag) {
126: dfound = PETSC_TRUE;
127: if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
128: else a->diagv[k][shift + (in[j]%bs)*bs] = value;
129: break;
130: }
131: }
132: if (!dfound) {
133: if (a->nonew || a->nonew_diag) {
134: #if !defined(PETSC_USE_COMPLEX)
135: if (a->user_alloc && value) {
136: #else
137: if (a->user_alloc && (PetscRealPart(value) || PetscImaginaryPart(value))) {
138: #endif
139: PetscInfo1(A,"Nonzero in diagonal %D that user did not allocate\n",ldiag);
140: }
141: } else {
142: PetscInfo1(A,"Allocating new diagonal: %D\n",ldiag);
143: a->reallocs++;
144: /* free old bdiag storage info and reallocate */
145: PetscMalloc(2*(a->nd+1)*sizeof(PetscInt),&diag_new);
146: bdlen_new = diag_new + a->nd + 1;
147: PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
148: for (k=0; k<a->nd; k++) {
149: diag_new[k] = a->diag[k];
150: diagv_new[k] = a->diagv[k];
151: bdlen_new[k] = a->bdlen[k];
152: }
153: diag_new[a->nd] = ldiag;
154: if (ldiag > 0) {/* lower triangular */
155: bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
156: } else { /* upper triangular */
157: bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
158: }
159: newnz = bs*bs*bdlen_new[a->nd];
160: PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
161: PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
162: /* adjust pointer so that dv[diag][row] works for all diagonals */
163: if (diag_new[a->nd] > 0) {
164: diagv_new[a->nd] -= bs*bs*diag_new[a->nd];
165: }
166: a->maxnz += newnz; a->nz += newnz;
167: PetscFree(a->diagv);
168: PetscFree(a->diag);
169: a->diag = diag_new;
170: a->bdlen = bdlen_new;
171: a->diagv = diagv_new;
173: /* Insert value */
174: if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
175: else a->diagv[k][shift + (in[j]%bs)*bs] = value;
176: a->nd++;
177: PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(PetscInt)+sizeof(PetscScalar*));
178: }
179: }
180: }
181: }
182: return(0);
183: }
187: PetscErrorCode MatGetValues_SeqBDiag_1(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
188: {
189: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
190: PetscInt kk,ldiag,row,j,k;
191: PetscTruth dfound;
194: for (kk=0; kk<m; kk++) { /* loop over rows */
195: row = im[kk];
196: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
197: if (row >= A->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
198: for (j=0; j<n; j++) {
199: if (in[j] < 0) {v++; continue;} SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
200: if (in[j] >= A->cmap.n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
201: ldiag = row - in[j]; /* diagonal number */
202: dfound = PETSC_FALSE;
203: for (k=0; k<a->nd; k++) {
204: if (a->diag[k] == ldiag) {
205: dfound = PETSC_TRUE;
206: *v++ = a->diagv[k][row];
207: break;
208: }
209: }
210: if (!dfound) *v++ = 0.0;
211: }
212: }
213: return(0);
214: }
218: PetscErrorCode MatGetValues_SeqBDiag_N(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
219: {
220: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
221: PetscInt kk,ldiag,shift,row,j,k,bs = A->rmap.bs;
222: PetscTruth dfound;
225: for (kk=0; kk<m; kk++) { /* loop over rows */
226: row = im[kk];
227: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
228: if (row >= A->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
229: shift = (row/bs)*bs*bs + row%bs;
230: for (j=0; j<n; j++) {
231: if (in[j] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
232: ldiag = row/bs - in[j]/bs; /* block diagonal */
233: dfound = PETSC_FALSE;
234: for (k=0; k<a->nd; k++) {
235: if (a->diag[k] == ldiag) {
236: dfound = PETSC_TRUE;
237: *v++ = a->diagv[k][shift + (in[j]%bs)*bs ];
238: break;
239: }
240: }
241: if (!dfound) *v++ = 0.0;
242: }
243: }
244: return(0);
245: }
247: /*
248: MatMults for blocksize 1 to 5 and N -------------------------------
249: */
252: PetscErrorCode MatMult_SeqBDiag_1(Mat A,Vec xx,Vec yy)
253: {
254: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
255: PetscInt nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen;
257: PetscInt d,j,len;
258: PetscScalar *vin,*vout,**a_diagv = a->diagv;
259: PetscScalar *pvin,*pvout,*dv;
262: VecGetArray(xx,&vin);
263: VecGetArray(yy,&vout);
264: PetscMemzero(vout,A->rmap.n*sizeof(PetscScalar));
265: for (d=0; d<nd; d++) {
266: dv = a_diagv[d];
267: diag = a_diag[d];
268: len = a_bdlen[d];
269: if (diag > 0) { /* lower triangle */
270: pvin = vin;
271: pvout = vout + diag;
272: dv = dv + diag;
273: } else { /* upper triangle,including main diagonal */
274: pvin = vin - diag;
275: pvout = vout;
276: }
277: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
278: PetscLogFlops(2*len);
279: }
280: VecRestoreArray(xx,&vin);
281: VecRestoreArray(yy,&vout);
282: return(0);
283: }
287: PetscErrorCode MatMult_SeqBDiag_2(Mat A,Vec xx,Vec yy)
288: {
289: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
290: PetscInt nd = a->nd,nb_diag;
292: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
293: PetscScalar *vin,*vout,**a_diagv = a->diagv;
294: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1;
297: VecGetArray(xx,&vin);
298: VecGetArray(yy,&vout);
299: PetscMemzero(vout,A->rmap.n*sizeof(PetscScalar));
300: for (d=0; d<nd; d++) {
301: dv = a_diagv[d];
302: nb_diag = 2*a_diag[d];
303: len = a_bdlen[d];
304: if (nb_diag > 0) { /* lower triangle */
305: pvin = vin;
306: pvout = vout + nb_diag;
307: dv = dv + 2*nb_diag;
308: } else { /* upper triangle, including main diagonal */
309: pvin = vin - nb_diag;
310: pvout = vout;
311: }
312: for (k=0; k<len; k++) {
313: pvin0 = pvin[0]; pvin1 = pvin[1];
315: pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
316: pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;
318: pvout += 2; pvin += 2; dv += 4;
319: }
320: PetscLogFlops(8*len);
321: }
322: VecRestoreArray(xx,&vin);
323: VecRestoreArray(yy,&vout);
324: return(0);
325: }
329: PetscErrorCode MatMult_SeqBDiag_3(Mat A,Vec xx,Vec yy)
330: {
331: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
332: PetscInt nd = a->nd,nb_diag;
334: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
335: PetscScalar *vin,*vout,**a_diagv = a->diagv;
336: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2;
339: VecGetArray(xx,&vin);
340: VecGetArray(yy,&vout);
341: PetscMemzero(vout,A->rmap.n*sizeof(PetscScalar));
342: for (d=0; d<nd; d++) {
343: dv = a_diagv[d];
344: nb_diag = 3*a_diag[d];
345: len = a_bdlen[d];
346: if (nb_diag > 0) { /* lower triangle */
347: pvin = vin;
348: pvout = vout + nb_diag;
349: dv = dv + 3*nb_diag;
350: } else { /* upper triangle,including main diagonal */
351: pvin = vin - nb_diag;
352: pvout = vout;
353: }
354: for (k=0; k<len; k++) {
355: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];
357: pvout[0] += dv[0]*pvin0 + dv[3]*pvin1 + dv[6]*pvin2;
358: pvout[1] += dv[1]*pvin0 + dv[4]*pvin1 + dv[7]*pvin2;
359: pvout[2] += dv[2]*pvin0 + dv[5]*pvin1 + dv[8]*pvin2;
361: pvout += 3; pvin += 3; dv += 9;
362: }
363: PetscLogFlops(18*len);
364: }
365: VecRestoreArray(xx,&vin);
366: VecRestoreArray(yy,&vout);
367: return(0);
368: }
372: PetscErrorCode MatMult_SeqBDiag_4(Mat A,Vec xx,Vec yy)
373: {
374: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
375: PetscInt nd = a->nd,nb_diag;
377: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
378: PetscScalar *vin,*vout,**a_diagv = a->diagv;
379: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;
382: VecGetArray(xx,&vin);
383: VecGetArray(yy,&vout);
384: PetscMemzero(vout,A->rmap.n*sizeof(PetscScalar));
385: for (d=0; d<nd; d++) {
386: dv = a_diagv[d];
387: nb_diag = 4*a_diag[d];
388: len = a_bdlen[d];
389: if (nb_diag > 0) { /* lower triangle */
390: pvin = vin;
391: pvout = vout + nb_diag;
392: dv = dv + 4*nb_diag;
393: } else { /* upper triangle,including main diagonal */
394: pvin = vin - nb_diag;
395: pvout = vout;
396: }
397: for (k=0; k<len; k++) {
398: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];
400: pvout[0] += dv[0]*pvin0 + dv[4]*pvin1 + dv[8]*pvin2 + dv[12]*pvin3;
401: pvout[1] += dv[1]*pvin0 + dv[5]*pvin1 + dv[9]*pvin2 + dv[13]*pvin3;
402: pvout[2] += dv[2]*pvin0 + dv[6]*pvin1 + dv[10]*pvin2 + dv[14]*pvin3;
403: pvout[3] += dv[3]*pvin0 + dv[7]*pvin1 + dv[11]*pvin2 + dv[15]*pvin3;
405: pvout += 4; pvin += 4; dv += 16;
406: }
407: PetscLogFlops(32*len);
408: }
409: VecRestoreArray(xx,&vin);
410: VecRestoreArray(yy,&vout);
411: return(0);
412: }
416: PetscErrorCode MatMult_SeqBDiag_5(Mat A,Vec xx,Vec yy)
417: {
418: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
419: PetscInt nd = a->nd,nb_diag;
421: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
422: PetscScalar *vin,*vout,**a_diagv = a->diagv;
423: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;
426: VecGetArray(xx,&vin);
427: VecGetArray(yy,&vout);
428: PetscMemzero(vout,A->rmap.n*sizeof(PetscScalar));
429: for (d=0; d<nd; d++) {
430: dv = a_diagv[d];
431: nb_diag = 5*a_diag[d];
432: len = a_bdlen[d];
433: if (nb_diag > 0) { /* lower triangle */
434: pvin = vin;
435: pvout = vout + nb_diag;
436: dv = dv + 5*nb_diag;
437: } else { /* upper triangle,including main diagonal */
438: pvin = vin - nb_diag;
439: pvout = vout;
440: }
441: for (k=0; k<len; k++) {
442: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];
444: pvout[0] += dv[0]*pvin0 + dv[5]*pvin1 + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
445: pvout[1] += dv[1]*pvin0 + dv[6]*pvin1 + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
446: pvout[2] += dv[2]*pvin0 + dv[7]*pvin1 + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
447: pvout[3] += dv[3]*pvin0 + dv[8]*pvin1 + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
448: pvout[4] += dv[4]*pvin0 + dv[9]*pvin1 + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;
450: pvout += 5; pvin += 5; dv += 25;
451: }
452: PetscLogFlops(50*len);
453: }
454: VecRestoreArray(xx,&vin);
455: VecRestoreArray(yy,&vout);
456: return(0);
457: }
461: PetscErrorCode MatMult_SeqBDiag_N(Mat A,Vec xx,Vec yy)
462: {
463: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
464: PetscInt nd = a->nd,bs = A->rmap.bs,nb_diag,bs2 = bs*bs;
466: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
467: PetscScalar *vin,*vout,**a_diagv = a->diagv;
468: PetscScalar *pvin,*pvout,*dv;
471: VecGetArray(xx,&vin);
472: VecGetArray(yy,&vout);
473: PetscMemzero(vout,A->rmap.n*sizeof(PetscScalar));
474: for (d=0; d<nd; d++) {
475: dv = a_diagv[d];
476: nb_diag = bs*a_diag[d];
477: len = a_bdlen[d];
478: if (nb_diag > 0) { /* lower triangle */
479: pvin = vin;
480: pvout = vout + nb_diag;
481: dv = dv + bs*nb_diag;
482: } else { /* upper triangle, including main diagonal */
483: pvin = vin - nb_diag;
484: pvout = vout;
485: }
486: for (k=0; k<len; k++) {
487: Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
488: pvout += bs; pvin += bs; dv += bs2;
489: }
490: PetscLogFlops(2*bs2*len);
491: }
492: VecRestoreArray(xx,&vin);
493: VecRestoreArray(yy,&vout);
494: return(0);
495: }
497: /*
498: MatMultAdds for blocksize 1 to 5 and N -------------------------------
499: */
502: PetscErrorCode MatMultAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
503: {
504: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
506: PetscInt nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen,d,j,len;
507: PetscScalar *vin,*vout,**a_diagv = a->diagv;
508: PetscScalar *pvin,*pvout,*dv;
511: if (zz != yy) {VecCopy(zz,yy);}
512: VecGetArray(xx,&vin);
513: VecGetArray(yy,&vout);
514: for (d=0; d<nd; d++) {
515: dv = a_diagv[d];
516: diag = a_diag[d];
517: len = a_bdlen[d];
518: if (diag > 0) { /* lower triangle */
519: pvin = vin;
520: pvout = vout + diag;
521: dv = dv + diag;
522: } else { /* upper triangle, including main diagonal */
523: pvin = vin - diag;
524: pvout = vout;
525: }
526: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
527: PetscLogFlops(2*len);
528: }
529: VecRestoreArray(xx,&vin);
530: VecRestoreArray(yy,&vout);
531: return(0);
532: }
536: PetscErrorCode MatMultAdd_SeqBDiag_2(Mat A,Vec xx,Vec zz,Vec yy)
537: {
538: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
540: PetscInt nd = a->nd,nb_diag;
541: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
542: PetscScalar *vin,*vout,**a_diagv = a->diagv;
543: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1;
546: if (zz != yy) {VecCopy(zz,yy);}
547: VecGetArray(xx,&vin);
548: VecGetArray(yy,&vout);
549: for (d=0; d<nd; d++) {
550: dv = a_diagv[d];
551: nb_diag = 2*a_diag[d];
552: len = a_bdlen[d];
553: if (nb_diag > 0) { /* lower triangle */
554: pvin = vin;
555: pvout = vout + nb_diag;
556: dv = dv + 2*nb_diag;
557: } else { /* upper triangle, including main diagonal */
558: pvin = vin - nb_diag;
559: pvout = vout;
560: }
561: for (k=0; k<len; k++) {
562: pvin0 = pvin[0]; pvin1 = pvin[1];
564: pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
565: pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;
567: pvout += 2; pvin += 2; dv += 4;
568: }
569: PetscLogFlops(8*len);
570: }
571: VecRestoreArray(xx,&vin);
572: VecRestoreArray(yy,&vout);
573: return(0);
574: }
578: PetscErrorCode MatMultAdd_SeqBDiag_3(Mat A,Vec xx,Vec zz,Vec yy)
579: {
580: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
582: PetscInt nd = a->nd,nb_diag;
583: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
584: PetscScalar *vin,*vout,**a_diagv = a->diagv;
585: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2;
588: if (zz != yy) {VecCopy(zz,yy);}
589: VecGetArray(xx,&vin);
590: VecGetArray(yy,&vout);
591: for (d=0; d<nd; d++) {
592: dv = a_diagv[d];
593: nb_diag = 3*a_diag[d];
594: len = a_bdlen[d];
595: if (nb_diag > 0) { /* lower triangle */
596: pvin = vin;
597: pvout = vout + nb_diag;
598: dv = dv + 3*nb_diag;
599: } else { /* upper triangle, including main diagonal */
600: pvin = vin - nb_diag;
601: pvout = vout;
602: }
603: for (k=0; k<len; k++) {
604: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];
606: pvout[0] += dv[0]*pvin0 + dv[3]*pvin1 + dv[6]*pvin2;
607: pvout[1] += dv[1]*pvin0 + dv[4]*pvin1 + dv[7]*pvin2;
608: pvout[2] += dv[2]*pvin0 + dv[5]*pvin1 + dv[8]*pvin2;
610: pvout += 3; pvin += 3; dv += 9;
611: }
612: PetscLogFlops(18*len);
613: }
614: VecRestoreArray(xx,&vin);
615: VecRestoreArray(yy,&vout);
616: return(0);
617: }
621: PetscErrorCode MatMultAdd_SeqBDiag_4(Mat A,Vec xx,Vec zz,Vec yy)
622: {
623: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
625: PetscInt nd = a->nd,nb_diag;
626: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
627: PetscScalar *vin,*vout,**a_diagv = a->diagv;
628: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;
631: if (zz != yy) {VecCopy(zz,yy);}
632: VecGetArray(xx,&vin);
633: VecGetArray(yy,&vout);
634: for (d=0; d<nd; d++) {
635: dv = a_diagv[d];
636: nb_diag = 4*a_diag[d];
637: len = a_bdlen[d];
638: if (nb_diag > 0) { /* lower triangle */
639: pvin = vin;
640: pvout = vout + nb_diag;
641: dv = dv + 4*nb_diag;
642: } else { /* upper triangle, including main diagonal */
643: pvin = vin - nb_diag;
644: pvout = vout;
645: }
646: for (k=0; k<len; k++) {
647: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];
649: pvout[0] += dv[0]*pvin0 + dv[4]*pvin1 + dv[8]*pvin2 + dv[12]*pvin3;
650: pvout[1] += dv[1]*pvin0 + dv[5]*pvin1 + dv[9]*pvin2 + dv[13]*pvin3;
651: pvout[2] += dv[2]*pvin0 + dv[6]*pvin1 + dv[10]*pvin2 + dv[14]*pvin3;
652: pvout[3] += dv[3]*pvin0 + dv[7]*pvin1 + dv[11]*pvin2 + dv[15]*pvin3;
654: pvout += 4; pvin += 4; dv += 16;
655: }
656: PetscLogFlops(32*len);
657: }
658: VecRestoreArray(xx,&vin);
659: VecRestoreArray(yy,&vout);
660: return(0);
661: }
665: PetscErrorCode MatMultAdd_SeqBDiag_5(Mat A,Vec xx,Vec zz,Vec yy)
666: {
667: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
669: PetscInt nd = a->nd,nb_diag;
670: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
671: PetscScalar *vin,*vout,**a_diagv = a->diagv;
672: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;
675: if (zz != yy) {VecCopy(zz,yy);}
676: VecGetArray(xx,&vin);
677: VecGetArray(yy,&vout);
678: for (d=0; d<nd; d++) {
679: dv = a_diagv[d];
680: nb_diag = 5*a_diag[d];
681: len = a_bdlen[d];
682: if (nb_diag > 0) { /* lower triangle */
683: pvin = vin;
684: pvout = vout + nb_diag;
685: dv = dv + 5*nb_diag;
686: } else { /* upper triangle, including main diagonal */
687: pvin = vin - nb_diag;
688: pvout = vout;
689: }
690: for (k=0; k<len; k++) {
691: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];
693: pvout[0] += dv[0]*pvin0 + dv[5]*pvin1 + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
694: pvout[1] += dv[1]*pvin0 + dv[6]*pvin1 + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
695: pvout[2] += dv[2]*pvin0 + dv[7]*pvin1 + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
696: pvout[3] += dv[3]*pvin0 + dv[8]*pvin1 + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
697: pvout[4] += dv[4]*pvin0 + dv[9]*pvin1 + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;
699: pvout += 5; pvin += 5; dv += 25;
700: }
701: PetscLogFlops(50*len);
702: }
703: VecRestoreArray(xx,&vin);
704: VecRestoreArray(yy,&vout);
705: return(0);
706: }
710: PetscErrorCode MatMultAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
711: {
712: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
714: PetscInt nd = a->nd,bs = A->rmap.bs,nb_diag,bs2 = bs*bs;
715: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
716: PetscScalar *vin,*vout,**a_diagv = a->diagv;
717: PetscScalar *pvin,*pvout,*dv;
720: if (zz != yy) {VecCopy(zz,yy);}
721: VecGetArray(xx,&vin);
722: VecGetArray(yy,&vout);
723: for (d=0; d<nd; d++) {
724: dv = a_diagv[d];
725: nb_diag = bs*a_diag[d];
726: len = a_bdlen[d];
727: if (nb_diag > 0) { /* lower triangle */
728: pvin = vin;
729: pvout = vout + nb_diag;
730: dv = dv + bs*nb_diag;
731: } else { /* upper triangle, including main diagonal */
732: pvin = vin - nb_diag;
733: pvout = vout;
734: }
735: for (k=0; k<len; k++) {
736: Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
737: pvout += bs; pvin += bs; dv += bs2;
738: }
739: PetscLogFlops(2*bs2*len);
740: }
741: VecRestoreArray(xx,&vin);
742: VecRestoreArray(yy,&vout);
743: return(0);
744: }
746: /*
747: MatMultTranspose ----------------------------------------------
748: */
751: PetscErrorCode MatMultTranspose_SeqBDiag_1(Mat A,Vec xx,Vec yy)
752: {
753: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
755: PetscInt nd = a->nd,diag,d,j,len;
756: PetscScalar *pvin,*pvout,*dv;
757: PetscScalar *vin,*vout;
758:
760: VecGetArray(xx,&vin);
761: VecGetArray(yy,&vout);
762: PetscMemzero(vout,A->cmap.n*sizeof(PetscScalar));
763: for (d=0; d<nd; d++) {
764: dv = a->diagv[d];
765: diag = a->diag[d];
766: len = a->bdlen[d];
767: /* diag of original matrix is (row/bs - col/bs) */
768: /* diag of transpose matrix is (col/bs - row/bs) */
769: if (diag < 0) { /* transpose is lower triangle */
770: pvin = vin;
771: pvout = vout - diag;
772: } else { /* transpose is upper triangle, including main diagonal */
773: pvin = vin + diag;
774: pvout = vout;
775: dv = dv + diag;
776: }
777: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
778: }
779: VecRestoreArray(xx,&vin);
780: VecRestoreArray(yy,&vout);
781: return(0);
782: }
786: PetscErrorCode MatMultTranspose_SeqBDiag_N(Mat A,Vec xx,Vec yy)
787: {
788: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
790: PetscInt nd = a->nd,bs = A->rmap.bs,diag,kshift,kloc,d,i,j,k,len;
791: PetscScalar *pvin,*pvout,*dv;
792: PetscScalar *vin,*vout;
793:
795: VecGetArray(xx,&vin);
796: VecGetArray(yy,&vout);
797: PetscMemzero(vout,A->cmap.n*sizeof(PetscScalar));
798: for (d=0; d<nd; d++) {
799: dv = a->diagv[d];
800: diag = a->diag[d];
801: len = a->bdlen[d];
802: /* diag of original matrix is (row/bs - col/bs) */
803: /* diag of transpose matrix is (col/bs - row/bs) */
804: if (diag < 0) { /* transpose is lower triangle */
805: pvin = vin;
806: pvout = vout - bs*diag;
807: } else { /* transpose is upper triangle, including main diagonal */
808: pvin = vin + bs*diag;
809: pvout = vout;
810: dv = dv + diag;
811: }
812: for (k=0; k<len; k++) {
813: kloc = k*bs; kshift = kloc*bs;
814: for (i=0; i<bs; i++) { /* i = local column of transpose */
815: for (j=0; j<bs; j++) { /* j = local row of transpose */
816: pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
817: }
818: }
819: }
820: }
821: VecRestoreArray(xx,&vin);
822: VecRestoreArray(yy,&vout);
823: return(0);
824: }
826: /*
827: MatMultTransposeAdd ----------------------------------------------
828: */
831: PetscErrorCode MatMultTransposeAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
832: {
833: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
835: PetscInt nd = a->nd,diag,d,j,len;
836: PetscScalar *pvin,*pvout,*dv;
837: PetscScalar *vin,*vout;
838:
840: if (zz != yy) {VecCopy(zz,yy);}
841: VecGetArray(xx,&vin);
842: VecGetArray(yy,&vout);
843: for (d=0; d<nd; d++) {
844: dv = a->diagv[d];
845: diag = a->diag[d];
846: len = a->bdlen[d];
847: /* diag of original matrix is (row/bs - col/bs) */
848: /* diag of transpose matrix is (col/bs - row/bs) */
849: if (diag < 0) { /* transpose is lower triangle */
850: pvin = vin;
851: pvout = vout - diag;
852: } else { /* transpose is upper triangle, including main diagonal */
853: pvin = vin + diag;
854: pvout = vout;
855: dv = dv + diag;
856: }
857: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
858: }
859: VecRestoreArray(xx,&vin);
860: VecRestoreArray(yy,&vout);
861: return(0);
862: }
866: PetscErrorCode MatMultTransposeAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
867: {
868: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
870: PetscInt nd = a->nd,bs = A->rmap.bs,diag,kshift,kloc,d,i,j,k,len;
871: PetscScalar *pvin,*pvout,*dv;
872: PetscScalar *vin,*vout;
873:
875: if (zz != yy) {VecCopy(zz,yy);}
876: VecGetArray(xx,&vin);
877: VecGetArray(yy,&vout);
878: for (d=0; d<nd; d++) {
879: dv = a->diagv[d];
880: diag = a->diag[d];
881: len = a->bdlen[d];
882: /* diag of original matrix is (row/bs - col/bs) */
883: /* diag of transpose matrix is (col/bs - row/bs) */
884: if (diag < 0) { /* transpose is lower triangle */
885: pvin = vin;
886: pvout = vout - bs*diag;
887: } else { /* transpose is upper triangle, including main diagonal */
888: pvin = vin + bs*diag;
889: pvout = vout;
890: dv = dv + diag;
891: }
892: for (k=0; k<len; k++) {
893: kloc = k*bs; kshift = kloc*bs;
894: for (i=0; i<bs; i++) { /* i = local column of transpose */
895: for (j=0; j<bs; j++) { /* j = local row of transpose */
896: pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
897: }
898: }
899: }
900: }
901: VecRestoreArray(xx,&vin);
902: VecRestoreArray(yy,&vout);
903: return(0);
904: }
907: PetscErrorCode MatRelax_SeqBDiag_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,PetscInt its,PetscInt lits,Vec xx)
908: {
909: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
910: PetscScalar *x,*b,*xb,*dd,*dv,dval,sum;
912: PetscInt i,j,k,d,kbase,bs = A->rmap.bs,kloc;
913: PetscInt mainbd = a->mainbd,diag,mblock = a->mblock,bloc;
916: its = its*lits;
917: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
919: /* Currently this code doesn't use wavefront orderings, although
920: we should eventually incorporate that option, whatever wavefront
921: ordering maybe :-) */
923: if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
925: VecGetArray(xx,&x);
926: if (xx != bb) {
927: VecGetArray(bb,&b);
928: } else {
929: b = x;
930: }
931: dd = a->diagv[mainbd];
932: if (flag == SOR_APPLY_UPPER) {
933: /* apply (U + D/omega) to the vector */
934: for (k=0; k<mblock; k++) {
935: kloc = k*bs; kbase = kloc*bs;
936: for (i=0; i<bs; i++) {
937: sum = b[i+kloc] * (shift + dd[i*(bs+1)+kbase]) / omega;
938: for (j=i+1; j<bs; j++) sum += dd[kbase + j*bs + i] * b[kloc + j];
939: for (d=mainbd+1; d<a->nd; d++) {
940: diag = a->diag[d];
941: dv = a->diagv[d];
942: if (k-diag < mblock) {
943: for (j=0; j<bs; j++) {
944: sum += dv[kbase + j*bs + i] * b[(k-diag)*bs + j];
945: }
946: }
947: }
948: x[kloc+i] = sum;
949: }
950: }
951: VecRestoreArray(xx,&x);
952: if (xx != bb) {VecRestoreArray(bb,&b);}
953: return(0);
954: }
955: if (flag & SOR_ZERO_INITIAL_GUESS) {
956: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
957: for (k=0; k<mblock; k++) {
958: kloc = k*bs; kbase = kloc*bs;
959: for (i=0; i<bs; i++) {
960: sum = b[i+kloc];
961: dval = shift + dd[i*(bs+1)+kbase];
962: for (d=0; d<mainbd; d++) {
963: diag = a->diag[d];
964: dv = a->diagv[d];
965: if (k >= diag) {
966: for (j=0; j<bs; j++)
967: sum -= dv[k*bs*bs + j*bs + i] * x[(k-diag)*bs + j];
968: }
969: }
970: for (j=0; j<i; j++){
971: sum -= dd[kbase + j*bs + i] * x[kloc + j];
972: }
973: x[kloc+i] = omega*sum/dval;
974: }
975: }
976: xb = x;
977: } else xb = b;
978: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
979: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
980: for (k=0; k<mblock; k++) {
981: kloc = k*bs; kbase = kloc*bs;
982: for (i=0; i<bs; i++)
983: x[kloc+i] *= dd[i*(bs+1)+kbase];
984: }
985: }
986: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
987: for (k=mblock-1; k>=0; k--) {
988: kloc = k*bs; kbase = kloc*bs;
989: for (i=bs-1; i>=0; i--) {
990: sum = xb[i+kloc];
991: dval = shift + dd[i*(bs+1)+kbase];
992: for (j=i+1; j<bs; j++)
993: sum -= dd[kbase + j*bs + i] * x[kloc + j];
994: for (d=mainbd+1; d<a->nd; d++) {
995: diag = a->diag[d];
996: dv = a->diagv[d];
997: bloc = k - diag;
998: if (bloc < mblock) {
999: for (j=0; j<bs; j++)
1000: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1001: }
1002: }
1003: x[kloc+i] = omega*sum/dval;
1004: }
1005: }
1006: }
1007: its--;
1008: }
1009: while (its--) {
1010: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1011: for (k=0; k<mblock; k++) {
1012: kloc = k*bs; kbase = kloc*bs;
1013: for (i=0; i<bs; i++) {
1014: sum = b[i+kloc];
1015: dval = shift + dd[i*(bs+1)+kbase];
1016: for (d=0; d<mainbd; d++) {
1017: diag = a->diag[d];
1018: dv = a->diagv[d];
1019: bloc = k - diag;
1020: if (bloc >= 0) {
1021: for (j=0; j<bs; j++) {
1022: sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1023: }
1024: }
1025: }
1026: for (d=mainbd; d<a->nd; d++) {
1027: diag = a->diag[d];
1028: dv = a->diagv[d];
1029: bloc = k - diag;
1030: if (bloc < mblock) {
1031: for (j=0; j<bs; j++) {
1032: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1033: }
1034: }
1035: }
1036: x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1037: }
1038: }
1039: }
1040: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1041: for (k=mblock-1; k>=0; k--) {
1042: kloc = k*bs; kbase = kloc*bs;
1043: for (i=bs-1; i>=0; i--) {
1044: sum = b[i+kloc];
1045: dval = shift + dd[i*(bs+1)+kbase];
1046: for (d=0; d<mainbd; d++) {
1047: diag = a->diag[d];
1048: dv = a->diagv[d];
1049: bloc = k - diag;
1050: if (bloc >= 0) {
1051: for (j=0; j<bs; j++) {
1052: sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1053: }
1054: }
1055: }
1056: for (d=mainbd; d<a->nd; d++) {
1057: diag = a->diag[d];
1058: dv = a->diagv[d];
1059: bloc = k - diag;
1060: if (bloc < mblock) {
1061: for (j=0; j<bs; j++) {
1062: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1063: }
1064: }
1065: }
1066: x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1067: }
1068: }
1069: }
1070: }
1071: VecRestoreArray(xx,&x);
1072: if (xx != bb) VecRestoreArray(bb,&b);
1073: return(0);
1074: }
1078: PetscErrorCode MatRelax_SeqBDiag_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,PetscInt its,PetscInt lits,Vec xx)
1079: {
1080: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
1081: PetscScalar *x,*b,*xb,*dd,dval,sum;
1083: PetscInt m = A->rmap.n,i,d,loc;
1084: PetscInt mainbd = a->mainbd,diag;
1087: its = its*lits;
1088: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1089: /* Currently this code doesn't use wavefront orderings,although
1090: we should eventually incorporate that option, whatever wavefront
1091: ordering maybe :-) */
1093: VecGetArray(xx,&x);
1094: VecGetArray(bb,&b);
1095: if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
1096: dd = a->diagv[mainbd];
1097: if (flag == SOR_APPLY_UPPER) {
1098: /* apply (U + D/omega) to the vector */
1099: for (i=0; i<m; i++) {
1100: sum = b[i] * (shift + dd[i]) / omega;
1101: for (d=mainbd+1; d<a->nd; d++) {
1102: diag = a->diag[d];
1103: if (i-diag < m) sum += a->diagv[d][i] * x[i-diag];
1104: }
1105: x[i] = sum;
1106: }
1107: VecRestoreArray(xx,&x);
1108: VecRestoreArray(bb,&b);
1109: return(0);
1110: }
1111: if (flag & SOR_ZERO_INITIAL_GUESS) {
1112: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1113: for (i=0; i<m; i++) {
1114: sum = b[i];
1115: for (d=0; d<mainbd; d++) {
1116: if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1117: }
1118: x[i] = omega*(sum/(shift + dd[i]));
1119: }
1120: xb = x;
1121: } else xb = b;
1122: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1123: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1124: for (i=0; i<m; i++) x[i] *= dd[i];
1125: }
1126: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1127: for (i=m-1; i>=0; i--) {
1128: sum = xb[i];
1129: for (d=mainbd+1; d<a->nd; d++) {
1130: diag = a->diag[d];
1131: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1132: }
1133: x[i] = omega*(sum/(shift + dd[i]));
1134: }
1135: }
1136: its--;
1137: }
1138: while (its--) {
1139: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1140: for (i=0; i<m; i++) {
1141: sum = b[i];
1142: dval = shift + dd[i];
1143: for (d=0; d<mainbd; d++) {
1144: if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1145: }
1146: for (d=mainbd; d<a->nd; d++) {
1147: diag = a->diag[d];
1148: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1149: }
1150: x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/dval;
1151: }
1152: }
1153: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1154: for (i=m-1; i>=0; i--) {
1155: sum = b[i];
1156: for (d=0; d<mainbd; d++) {
1157: loc = i - a->diag[d];
1158: if (loc >= 0) sum -= a->diagv[d][i] * x[loc];
1159: }
1160: for (d=mainbd; d<a->nd; d++) {
1161: diag = a->diag[d];
1162: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1163: }
1164: x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/(shift + dd[i]);
1165: }
1166: }
1167: }
1168: VecRestoreArray(xx,&x);
1169: VecRestoreArray(bb,&b);
1170: return(0);
1171: }