Actual source code: bdiag2.c
1: /*$Id: bdiag2.c,v 1.21 2001/08/07 03:02:53 balay Exp $*/
3: /* Block diagonal matrix format */
5: #include src/mat/impls/bdiag/seq/bdiag.h
6: #include src/vec/vecimpl.h
7: #include src/inline/ilu.h
11: int MatSetValues_SeqBDiag_1(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
12: {
13: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
14: int kk,ldiag,row,newnz,*bdlen_new;
15: int j,k, *diag_new,ierr;
16: PetscTruth roworiented = a->roworiented,dfound;
17: PetscScalar value,**diagv_new;
20: for (kk=0; kk<m; kk++) { /* loop over added rows */
21: row = im[kk];
22: if (row < 0) continue;
23: if (row >= A->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->M-1);
24: for (j=0; j<n; j++) {
25: if (in[j] < 0) continue;
26: if (in[j] >= A->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],A->N-1);
27: ldiag = row - in[j]; /* diagonal number */
28: dfound = PETSC_FALSE;
29: if (roworiented) {
30: value = v[j + kk*n];
31: } else {
32: value = v[kk + j*m];
33: }
34: /* search diagonals for required one */
35: for (k=0; k<a->nd; k++) {
36: if (a->diag[k] == ldiag) {
37: dfound = PETSC_TRUE;
38: if (is == ADD_VALUES) a->diagv[k][row] += value;
39: else a->diagv[k][row] = value;
40: break;
41: }
42: }
43: if (!dfound) {
44: if (a->nonew || a->nonew_diag) {
45: #if !defined(PETSC_USE_COMPLEX)
46: if (a->user_alloc && value) {
47: #else
48: if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
49: #endif
50: PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocate\n",ldiag);
51: }
52: } else {
53: PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %d\n",ldiag);
54: a->reallocs++;
55: /* free old bdiag storage info and reallocate */
56: PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
57: bdlen_new = diag_new + a->nd + 1;
58: PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
59: for (k=0; k<a->nd; k++) {
60: diag_new[k] = a->diag[k];
61: diagv_new[k] = a->diagv[k];
62: bdlen_new[k] = a->bdlen[k];
63: }
64: diag_new[a->nd] = ldiag;
65: if (ldiag > 0) { /* lower triangular */
66: bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
67: } else { /* upper triangular */
68: bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
69: }
70: newnz = bdlen_new[a->nd];
71: PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
72: PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
73: /* adjust pointers so that dv[diag][row] works for all diagonals*/
74: if (diag_new[a->nd] > 0) {
75: diagv_new[a->nd] -= diag_new[a->nd];
76: }
77: a->maxnz += newnz;
78: a->nz += newnz;
79: PetscFree(a->diagv);
80: PetscFree(a->diag);
81: a->diag = diag_new;
82: a->bdlen = bdlen_new;
83: a->diagv = diagv_new;
85: /* Insert value */
86: if (is == ADD_VALUES) a->diagv[a->nd][row] += value;
87: else a->diagv[a->nd][row] = value;
88: a->nd++;
89: PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(int)+sizeof(PetscScalar*));
90: }
91: }
92: }
93: }
94: return(0);
95: }
100: int MatSetValues_SeqBDiag_N(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
101: {
102: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
103: int kk,ldiag,shift,row,newnz,*bdlen_new,ierr;
104: int j,k,bs = a->bs,*diag_new,idx=0;
105: PetscTruth roworiented = a->roworiented,dfound;
106: PetscScalar value,**diagv_new;
109: for (kk=0; kk<m; kk++) { /* loop over added rows */
110: row = im[kk];
111: if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
112: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
113: shift = (row/bs)*bs*bs + row%bs;
114: for (j=0; j<n; j++) {
115: ldiag = row/bs - in[j]/bs; /* block diagonal */
116: dfound = PETSC_FALSE;
117: if (roworiented) {
118: value = v[idx++];
119: } else {
120: value = v[kk + j*m];
121: }
122: /* seach for appropriate diagonal */
123: for (k=0; k<a->nd; k++) {
124: if (a->diag[k] == ldiag) {
125: dfound = PETSC_TRUE;
126: if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
127: else a->diagv[k][shift + (in[j]%bs)*bs] = value;
128: break;
129: }
130: }
131: if (!dfound) {
132: if (a->nonew || a->nonew_diag) {
133: #if !defined(PETSC_USE_COMPLEX)
134: if (a->user_alloc && value) {
135: #else
136: if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
137: #endif
138: PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocate\n",ldiag);
139: }
140: } else {
141: PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %d\n",ldiag);
142: a->reallocs++;
143: /* free old bdiag storage info and reallocate */
144: PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
145: bdlen_new = diag_new + a->nd + 1;
146: PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
147: for (k=0; k<a->nd; k++) {
148: diag_new[k] = a->diag[k];
149: diagv_new[k] = a->diagv[k];
150: bdlen_new[k] = a->bdlen[k];
151: }
152: diag_new[a->nd] = ldiag;
153: if (ldiag > 0) {/* lower triangular */
154: bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
155: } else { /* upper triangular */
156: bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
157: }
158: newnz = bs*bs*bdlen_new[a->nd];
159: PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
160: PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
161: /* adjust pointer so that dv[diag][row] works for all diagonals */
162: if (diag_new[a->nd] > 0) {
163: diagv_new[a->nd] -= bs*bs*diag_new[a->nd];
164: }
165: a->maxnz += newnz; a->nz += newnz;
166: PetscFree(a->diagv);
167: PetscFree(a->diag);
168: a->diag = diag_new;
169: a->bdlen = bdlen_new;
170: a->diagv = diagv_new;
172: /* Insert value */
173: if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
174: else a->diagv[k][shift + (in[j]%bs)*bs] = value;
175: a->nd++;
176: PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(int)+sizeof(PetscScalar*));
177: }
178: }
179: }
180: }
181: return(0);
182: }
186: int MatGetValues_SeqBDiag_1(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
187: {
188: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
189: int kk,ldiag,row,j,k;
190: PetscScalar zero = 0.0;
191: PetscTruth dfound;
194: for (kk=0; kk<m; kk++) { /* loop over rows */
195: row = im[kk];
196: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
197: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
198: for (j=0; j<n; j++) {
199: if (in[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
200: if (in[j] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
201: ldiag = row - in[j]; /* diagonal number */
202: dfound = PETSC_FALSE;
203: for (k=0; k<a->nd; k++) {
204: if (a->diag[k] == ldiag) {
205: dfound = PETSC_TRUE;
206: *v++ = a->diagv[k][row];
207: break;
208: }
209: }
210: if (!dfound) *v++ = zero;
211: }
212: }
213: return(0);
214: }
218: int MatGetValues_SeqBDiag_N(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
219: {
220: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
221: int kk,ldiag,shift,row,j,k,bs = a->bs;
222: PetscScalar zero = 0.0;
223: PetscTruth dfound;
226: for (kk=0; kk<m; kk++) { /* loop over rows */
227: row = im[kk];
228: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
229: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
230: shift = (row/bs)*bs*bs + row%bs;
231: for (j=0; j<n; j++) {
232: ldiag = row/bs - in[j]/bs; /* block diagonal */
233: dfound = PETSC_FALSE;
234: for (k=0; k<a->nd; k++) {
235: if (a->diag[k] == ldiag) {
236: dfound = PETSC_TRUE;
237: *v++ = a->diagv[k][shift + (in[j]%bs)*bs ];
238: break;
239: }
240: }
241: if (!dfound) *v++ = zero;
242: }
243: }
244: return(0);
245: }
247: /*
248: MatMults for blocksize 1 to 5 and N -------------------------------
249: */
252: int MatMult_SeqBDiag_1(Mat A,Vec xx,Vec yy)
253: {
254: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
255: int nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen;
256: int ierr,d,j,len;
257: PetscScalar *vin,*vout,**a_diagv = a->diagv;
258: PetscScalar *pvin,*pvout,*dv;
261: VecGetArray(xx,&vin);
262: VecGetArray(yy,&vout);
263: PetscMemzero(vout,A->m*sizeof(PetscScalar));
264: for (d=0; d<nd; d++) {
265: dv = a_diagv[d];
266: diag = a_diag[d];
267: len = a_bdlen[d];
268: if (diag > 0) { /* lower triangle */
269: pvin = vin;
270: pvout = vout + diag;
271: dv = dv + diag;
272: } else { /* upper triangle,including main diagonal */
273: pvin = vin - diag;
274: pvout = vout;
275: }
276: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
277: PetscLogFlops(2*len);
278: }
279: VecRestoreArray(xx,&vin);
280: VecRestoreArray(yy,&vout);
281: return(0);
282: }
286: int MatMult_SeqBDiag_2(Mat A,Vec xx,Vec yy)
287: {
288: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
289: int nd = a->nd,nb_diag;
290: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
291: PetscScalar *vin,*vout,**a_diagv = a->diagv;
292: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1;
295: VecGetArray(xx,&vin);
296: VecGetArray(yy,&vout);
297: PetscMemzero(vout,A->m*sizeof(PetscScalar));
298: for (d=0; d<nd; d++) {
299: dv = a_diagv[d];
300: nb_diag = 2*a_diag[d];
301: len = a_bdlen[d];
302: if (nb_diag > 0) { /* lower triangle */
303: pvin = vin;
304: pvout = vout + nb_diag;
305: dv = dv + 2*nb_diag;
306: } else { /* upper triangle, including main diagonal */
307: pvin = vin - nb_diag;
308: pvout = vout;
309: }
310: for (k=0; k<len; k++) {
311: pvin0 = pvin[0]; pvin1 = pvin[1];
313: pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
314: pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;
316: pvout += 2; pvin += 2; dv += 4;
317: }
318: PetscLogFlops(8*len);
319: }
320: VecRestoreArray(xx,&vin);
321: VecRestoreArray(yy,&vout);
322: return(0);
323: }
327: int MatMult_SeqBDiag_3(Mat A,Vec xx,Vec yy)
328: {
329: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
330: int nd = a->nd,nb_diag;
331: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
332: PetscScalar *vin,*vout,**a_diagv = a->diagv;
333: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2;
336: VecGetArray(xx,&vin);
337: VecGetArray(yy,&vout);
338: PetscMemzero(vout,A->m*sizeof(PetscScalar));
339: for (d=0; d<nd; d++) {
340: dv = a_diagv[d];
341: nb_diag = 3*a_diag[d];
342: len = a_bdlen[d];
343: if (nb_diag > 0) { /* lower triangle */
344: pvin = vin;
345: pvout = vout + nb_diag;
346: dv = dv + 3*nb_diag;
347: } else { /* upper triangle,including main diagonal */
348: pvin = vin - nb_diag;
349: pvout = vout;
350: }
351: for (k=0; k<len; k++) {
352: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];
354: pvout[0] += dv[0]*pvin0 + dv[3]*pvin1 + dv[6]*pvin2;
355: pvout[1] += dv[1]*pvin0 + dv[4]*pvin1 + dv[7]*pvin2;
356: pvout[2] += dv[2]*pvin0 + dv[5]*pvin1 + dv[8]*pvin2;
358: pvout += 3; pvin += 3; dv += 9;
359: }
360: PetscLogFlops(18*len);
361: }
362: VecRestoreArray(xx,&vin);
363: VecRestoreArray(yy,&vout);
364: return(0);
365: }
369: int MatMult_SeqBDiag_4(Mat A,Vec xx,Vec yy)
370: {
371: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
372: int nd = a->nd,nb_diag;
373: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
374: PetscScalar *vin,*vout,**a_diagv = a->diagv;
375: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;
378: VecGetArray(xx,&vin);
379: VecGetArray(yy,&vout);
380: PetscMemzero(vout,A->m*sizeof(PetscScalar));
381: for (d=0; d<nd; d++) {
382: dv = a_diagv[d];
383: nb_diag = 4*a_diag[d];
384: len = a_bdlen[d];
385: if (nb_diag > 0) { /* lower triangle */
386: pvin = vin;
387: pvout = vout + nb_diag;
388: dv = dv + 4*nb_diag;
389: } else { /* upper triangle,including main diagonal */
390: pvin = vin - nb_diag;
391: pvout = vout;
392: }
393: for (k=0; k<len; k++) {
394: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];
396: pvout[0] += dv[0]*pvin0 + dv[4]*pvin1 + dv[8]*pvin2 + dv[12]*pvin3;
397: pvout[1] += dv[1]*pvin0 + dv[5]*pvin1 + dv[9]*pvin2 + dv[13]*pvin3;
398: pvout[2] += dv[2]*pvin0 + dv[6]*pvin1 + dv[10]*pvin2 + dv[14]*pvin3;
399: pvout[3] += dv[3]*pvin0 + dv[7]*pvin1 + dv[11]*pvin2 + dv[15]*pvin3;
401: pvout += 4; pvin += 4; dv += 16;
402: }
403: PetscLogFlops(32*len);
404: }
405: VecRestoreArray(xx,&vin);
406: VecRestoreArray(yy,&vout);
407: return(0);
408: }
412: int MatMult_SeqBDiag_5(Mat A,Vec xx,Vec yy)
413: {
414: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
415: int nd = a->nd,nb_diag;
416: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
417: PetscScalar *vin,*vout,**a_diagv = a->diagv;
418: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;
421: VecGetArray(xx,&vin);
422: VecGetArray(yy,&vout);
423: PetscMemzero(vout,A->m*sizeof(PetscScalar));
424: for (d=0; d<nd; d++) {
425: dv = a_diagv[d];
426: nb_diag = 5*a_diag[d];
427: len = a_bdlen[d];
428: if (nb_diag > 0) { /* lower triangle */
429: pvin = vin;
430: pvout = vout + nb_diag;
431: dv = dv + 5*nb_diag;
432: } else { /* upper triangle,including main diagonal */
433: pvin = vin - nb_diag;
434: pvout = vout;
435: }
436: for (k=0; k<len; k++) {
437: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];
439: pvout[0] += dv[0]*pvin0 + dv[5]*pvin1 + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
440: pvout[1] += dv[1]*pvin0 + dv[6]*pvin1 + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
441: pvout[2] += dv[2]*pvin0 + dv[7]*pvin1 + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
442: pvout[3] += dv[3]*pvin0 + dv[8]*pvin1 + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
443: pvout[4] += dv[4]*pvin0 + dv[9]*pvin1 + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;
445: pvout += 5; pvin += 5; dv += 25;
446: }
447: PetscLogFlops(50*len);
448: }
449: VecRestoreArray(xx,&vin);
450: VecRestoreArray(yy,&vout);
451: return(0);
452: }
456: int MatMult_SeqBDiag_N(Mat A,Vec xx,Vec yy)
457: {
458: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
459: int nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
460: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
461: PetscScalar *vin,*vout,**a_diagv = a->diagv;
462: PetscScalar *pvin,*pvout,*dv;
465: VecGetArray(xx,&vin);
466: VecGetArray(yy,&vout);
467: PetscMemzero(vout,A->m*sizeof(PetscScalar));
468: for (d=0; d<nd; d++) {
469: dv = a_diagv[d];
470: nb_diag = bs*a_diag[d];
471: len = a_bdlen[d];
472: if (nb_diag > 0) { /* lower triangle */
473: pvin = vin;
474: pvout = vout + nb_diag;
475: dv = dv + bs*nb_diag;
476: } else { /* upper triangle, including main diagonal */
477: pvin = vin - nb_diag;
478: pvout = vout;
479: }
480: for (k=0; k<len; k++) {
481: Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
482: pvout += bs; pvin += bs; dv += bs2;
483: }
484: PetscLogFlops(2*bs2*len);
485: }
486: VecRestoreArray(xx,&vin);
487: VecRestoreArray(yy,&vout);
488: return(0);
489: }
491: /*
492: MatMultAdds for blocksize 1 to 5 and N -------------------------------
493: */
496: int MatMultAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
497: {
498: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
499: int ierr,nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen,d,j,len;
500: PetscScalar *vin,*vout,**a_diagv = a->diagv;
501: PetscScalar *pvin,*pvout,*dv;
504: if (zz != yy) {VecCopy(zz,yy);}
505: VecGetArray(xx,&vin);
506: VecGetArray(yy,&vout);
507: for (d=0; d<nd; d++) {
508: dv = a_diagv[d];
509: diag = a_diag[d];
510: len = a_bdlen[d];
511: if (diag > 0) { /* lower triangle */
512: pvin = vin;
513: pvout = vout + diag;
514: dv = dv + diag;
515: } else { /* upper triangle, including main diagonal */
516: pvin = vin - diag;
517: pvout = vout;
518: }
519: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
520: PetscLogFlops(2*len);
521: }
522: VecRestoreArray(xx,&vin);
523: VecRestoreArray(yy,&vout);
524: return(0);
525: }
529: int MatMultAdd_SeqBDiag_2(Mat A,Vec xx,Vec zz,Vec yy)
530: {
531: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
532: int ierr,nd = a->nd,nb_diag;
533: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
534: PetscScalar *vin,*vout,**a_diagv = a->diagv;
535: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1;
538: if (zz != yy) {VecCopy(zz,yy);}
539: VecGetArray(xx,&vin);
540: VecGetArray(yy,&vout);
541: for (d=0; d<nd; d++) {
542: dv = a_diagv[d];
543: nb_diag = 2*a_diag[d];
544: len = a_bdlen[d];
545: if (nb_diag > 0) { /* lower triangle */
546: pvin = vin;
547: pvout = vout + nb_diag;
548: dv = dv + 2*nb_diag;
549: } else { /* upper triangle, including main diagonal */
550: pvin = vin - nb_diag;
551: pvout = vout;
552: }
553: for (k=0; k<len; k++) {
554: pvin0 = pvin[0]; pvin1 = pvin[1];
556: pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
557: pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;
559: pvout += 2; pvin += 2; dv += 4;
560: }
561: PetscLogFlops(8*len);
562: }
563: VecRestoreArray(xx,&vin);
564: VecRestoreArray(yy,&vout);
565: return(0);
566: }
570: int MatMultAdd_SeqBDiag_3(Mat A,Vec xx,Vec zz,Vec yy)
571: {
572: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
573: int ierr,nd = a->nd,nb_diag;
574: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
575: PetscScalar *vin,*vout,**a_diagv = a->diagv;
576: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2;
579: if (zz != yy) {VecCopy(zz,yy);}
580: VecGetArray(xx,&vin);
581: VecGetArray(yy,&vout);
582: for (d=0; d<nd; d++) {
583: dv = a_diagv[d];
584: nb_diag = 3*a_diag[d];
585: len = a_bdlen[d];
586: if (nb_diag > 0) { /* lower triangle */
587: pvin = vin;
588: pvout = vout + nb_diag;
589: dv = dv + 3*nb_diag;
590: } else { /* upper triangle, including main diagonal */
591: pvin = vin - nb_diag;
592: pvout = vout;
593: }
594: for (k=0; k<len; k++) {
595: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];
597: pvout[0] += dv[0]*pvin0 + dv[3]*pvin1 + dv[6]*pvin2;
598: pvout[1] += dv[1]*pvin0 + dv[4]*pvin1 + dv[7]*pvin2;
599: pvout[2] += dv[2]*pvin0 + dv[5]*pvin1 + dv[8]*pvin2;
601: pvout += 3; pvin += 3; dv += 9;
602: }
603: PetscLogFlops(18*len);
604: }
605: VecRestoreArray(xx,&vin);
606: VecRestoreArray(yy,&vout);
607: return(0);
608: }
612: int MatMultAdd_SeqBDiag_4(Mat A,Vec xx,Vec zz,Vec yy)
613: {
614: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
615: int ierr,nd = a->nd,nb_diag;
616: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
617: PetscScalar *vin,*vout,**a_diagv = a->diagv;
618: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;
621: if (zz != yy) {VecCopy(zz,yy);}
622: VecGetArray(xx,&vin);
623: VecGetArray(yy,&vout);
624: for (d=0; d<nd; d++) {
625: dv = a_diagv[d];
626: nb_diag = 4*a_diag[d];
627: len = a_bdlen[d];
628: if (nb_diag > 0) { /* lower triangle */
629: pvin = vin;
630: pvout = vout + nb_diag;
631: dv = dv + 4*nb_diag;
632: } else { /* upper triangle, including main diagonal */
633: pvin = vin - nb_diag;
634: pvout = vout;
635: }
636: for (k=0; k<len; k++) {
637: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];
639: pvout[0] += dv[0]*pvin0 + dv[4]*pvin1 + dv[8]*pvin2 + dv[12]*pvin3;
640: pvout[1] += dv[1]*pvin0 + dv[5]*pvin1 + dv[9]*pvin2 + dv[13]*pvin3;
641: pvout[2] += dv[2]*pvin0 + dv[6]*pvin1 + dv[10]*pvin2 + dv[14]*pvin3;
642: pvout[3] += dv[3]*pvin0 + dv[7]*pvin1 + dv[11]*pvin2 + dv[15]*pvin3;
644: pvout += 4; pvin += 4; dv += 16;
645: }
646: PetscLogFlops(32*len);
647: }
648: VecRestoreArray(xx,&vin);
649: VecRestoreArray(yy,&vout);
650: return(0);
651: }
655: int MatMultAdd_SeqBDiag_5(Mat A,Vec xx,Vec zz,Vec yy)
656: {
657: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
658: int ierr,nd = a->nd,nb_diag;
659: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
660: PetscScalar *vin,*vout,**a_diagv = a->diagv;
661: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;
664: if (zz != yy) {VecCopy(zz,yy);}
665: VecGetArray(xx,&vin);
666: VecGetArray(yy,&vout);
667: for (d=0; d<nd; d++) {
668: dv = a_diagv[d];
669: nb_diag = 5*a_diag[d];
670: len = a_bdlen[d];
671: if (nb_diag > 0) { /* lower triangle */
672: pvin = vin;
673: pvout = vout + nb_diag;
674: dv = dv + 5*nb_diag;
675: } else { /* upper triangle, including main diagonal */
676: pvin = vin - nb_diag;
677: pvout = vout;
678: }
679: for (k=0; k<len; k++) {
680: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];
682: pvout[0] += dv[0]*pvin0 + dv[5]*pvin1 + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
683: pvout[1] += dv[1]*pvin0 + dv[6]*pvin1 + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
684: pvout[2] += dv[2]*pvin0 + dv[7]*pvin1 + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
685: pvout[3] += dv[3]*pvin0 + dv[8]*pvin1 + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
686: pvout[4] += dv[4]*pvin0 + dv[9]*pvin1 + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;
688: pvout += 5; pvin += 5; dv += 25;
689: }
690: PetscLogFlops(50*len);
691: }
692: VecRestoreArray(xx,&vin);
693: VecRestoreArray(yy,&vout);
694: return(0);
695: }
699: int MatMultAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
700: {
701: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
702: int ierr,nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
703: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
704: PetscScalar *vin,*vout,**a_diagv = a->diagv;
705: PetscScalar *pvin,*pvout,*dv;
708: if (zz != yy) {VecCopy(zz,yy);}
709: VecGetArray(xx,&vin);
710: VecGetArray(yy,&vout);
711: for (d=0; d<nd; d++) {
712: dv = a_diagv[d];
713: nb_diag = bs*a_diag[d];
714: len = a_bdlen[d];
715: if (nb_diag > 0) { /* lower triangle */
716: pvin = vin;
717: pvout = vout + nb_diag;
718: dv = dv + bs*nb_diag;
719: } else { /* upper triangle, including main diagonal */
720: pvin = vin - nb_diag;
721: pvout = vout;
722: }
723: for (k=0; k<len; k++) {
724: Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
725: pvout += bs; pvin += bs; dv += bs2;
726: }
727: PetscLogFlops(2*bs2*len);
728: }
729: VecRestoreArray(xx,&vin);
730: VecRestoreArray(yy,&vout);
731: return(0);
732: }
734: /*
735: MatMultTranspose ----------------------------------------------
736: */
739: int MatMultTranspose_SeqBDiag_1(Mat A,Vec xx,Vec yy)
740: {
741: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
742: int ierr,nd = a->nd,diag,d,j,len;
743: PetscScalar *pvin,*pvout,*dv;
744: PetscScalar *vin,*vout;
745:
747: VecGetArray(xx,&vin);
748: VecGetArray(yy,&vout);
749: PetscMemzero(vout,A->n*sizeof(PetscScalar));
750: for (d=0; d<nd; d++) {
751: dv = a->diagv[d];
752: diag = a->diag[d];
753: len = a->bdlen[d];
754: /* diag of original matrix is (row/bs - col/bs) */
755: /* diag of transpose matrix is (col/bs - row/bs) */
756: if (diag < 0) { /* transpose is lower triangle */
757: pvin = vin;
758: pvout = vout - diag;
759: } else { /* transpose is upper triangle, including main diagonal */
760: pvin = vin + diag;
761: pvout = vout;
762: dv = dv + diag;
763: }
764: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
765: }
766: VecRestoreArray(xx,&vin);
767: VecRestoreArray(yy,&vout);
768: return(0);
769: }
773: int MatMultTranspose_SeqBDiag_N(Mat A,Vec xx,Vec yy)
774: {
775: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
776: int ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
777: PetscScalar *pvin,*pvout,*dv;
778: PetscScalar *vin,*vout;
779:
781: VecGetArray(xx,&vin);
782: VecGetArray(yy,&vout);
783: PetscMemzero(vout,A->n*sizeof(PetscScalar));
784: for (d=0; d<nd; d++) {
785: dv = a->diagv[d];
786: diag = a->diag[d];
787: len = a->bdlen[d];
788: /* diag of original matrix is (row/bs - col/bs) */
789: /* diag of transpose matrix is (col/bs - row/bs) */
790: if (diag < 0) { /* transpose is lower triangle */
791: pvin = vin;
792: pvout = vout - bs*diag;
793: } else { /* transpose is upper triangle, including main diagonal */
794: pvin = vin + bs*diag;
795: pvout = vout;
796: dv = dv + diag;
797: }
798: for (k=0; k<len; k++) {
799: kloc = k*bs; kshift = kloc*bs;
800: for (i=0; i<bs; i++) { /* i = local column of transpose */
801: for (j=0; j<bs; j++) { /* j = local row of transpose */
802: pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
803: }
804: }
805: }
806: }
807: VecRestoreArray(xx,&vin);
808: VecRestoreArray(yy,&vout);
809: return(0);
810: }
812: /*
813: MatMultTransposeAdd ----------------------------------------------
814: */
817: int MatMultTransposeAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
818: {
819: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
820: int ierr,nd = a->nd,diag,d,j,len;
821: PetscScalar *pvin,*pvout,*dv;
822: PetscScalar *vin,*vout;
823:
825: if (zz != yy) {VecCopy(zz,yy);}
826: VecGetArray(xx,&vin);
827: VecGetArray(yy,&vout);
828: for (d=0; d<nd; d++) {
829: dv = a->diagv[d];
830: diag = a->diag[d];
831: len = a->bdlen[d];
832: /* diag of original matrix is (row/bs - col/bs) */
833: /* diag of transpose matrix is (col/bs - row/bs) */
834: if (diag < 0) { /* transpose is lower triangle */
835: pvin = vin;
836: pvout = vout - diag;
837: } else { /* transpose is upper triangle, including main diagonal */
838: pvin = vin + diag;
839: pvout = vout;
840: dv = dv + diag;
841: }
842: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
843: }
844: VecRestoreArray(xx,&vin);
845: VecRestoreArray(yy,&vout);
846: return(0);
847: }
851: int MatMultTransposeAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
852: {
853: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
854: int ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
855: PetscScalar *pvin,*pvout,*dv;
856: PetscScalar *vin,*vout;
857:
859: if (zz != yy) {VecCopy(zz,yy);}
860: VecGetArray(xx,&vin);
861: VecGetArray(yy,&vout);
862: for (d=0; d<nd; d++) {
863: dv = a->diagv[d];
864: diag = a->diag[d];
865: len = a->bdlen[d];
866: /* diag of original matrix is (row/bs - col/bs) */
867: /* diag of transpose matrix is (col/bs - row/bs) */
868: if (diag < 0) { /* transpose is lower triangle */
869: pvin = vin;
870: pvout = vout - bs*diag;
871: } else { /* transpose is upper triangle, including main diagonal */
872: pvin = vin + bs*diag;
873: pvout = vout;
874: dv = dv + diag;
875: }
876: for (k=0; k<len; k++) {
877: kloc = k*bs; kshift = kloc*bs;
878: for (i=0; i<bs; i++) { /* i = local column of transpose */
879: for (j=0; j<bs; j++) { /* j = local row of transpose */
880: pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
881: }
882: }
883: }
884: }
885: VecRestoreArray(xx,&vin);
886: VecRestoreArray(yy,&vout);
887: return(0);
888: }
891: int MatRelax_SeqBDiag_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,int its,int lits,Vec xx)
892: {
893: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
894: PetscScalar *x,*b,*xb,*dd,*dv,dval,sum;
895: int ierr,i,j,k,d,kbase,bs = a->bs,kloc;
896: int mainbd = a->mainbd,diag,mblock = a->mblock,bloc;
899: its = its*lits;
900: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
902: /* Currently this code doesn't use wavefront orderings, although
903: we should eventually incorporate that option, whatever wavefront
904: ordering maybe :-) */
906: if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
908: VecGetArray(xx,&x);
909: if (xx != bb) {
910: VecGetArray(bb,&b);
911: } else {
912: b = x;
913: }
914: dd = a->diagv[mainbd];
915: if (flag == SOR_APPLY_UPPER) {
916: /* apply (U + D/omega) to the vector */
917: for (k=0; k<mblock; k++) {
918: kloc = k*bs; kbase = kloc*bs;
919: for (i=0; i<bs; i++) {
920: sum = b[i+kloc] * (shift + dd[i*(bs+1)+kbase]) / omega;
921: for (j=i+1; j<bs; j++) sum += dd[kbase + j*bs + i] * b[kloc + j];
922: for (d=mainbd+1; d<a->nd; d++) {
923: diag = a->diag[d];
924: dv = a->diagv[d];
925: if (k-diag < mblock) {
926: for (j=0; j<bs; j++) {
927: sum += dv[kbase + j*bs + i] * b[(k-diag)*bs + j];
928: }
929: }
930: }
931: x[kloc+i] = sum;
932: }
933: }
934: VecRestoreArray(xx,&x);
935: if (xx != bb) {VecRestoreArray(bb,&b);}
936: return(0);
937: }
938: if (flag & SOR_ZERO_INITIAL_GUESS) {
939: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
940: for (k=0; k<mblock; k++) {
941: kloc = k*bs; kbase = kloc*bs;
942: for (i=0; i<bs; i++) {
943: sum = b[i+kloc];
944: dval = shift + dd[i*(bs+1)+kbase];
945: for (d=0; d<mainbd; d++) {
946: diag = a->diag[d];
947: dv = a->diagv[d];
948: if (k >= diag) {
949: for (j=0; j<bs; j++)
950: sum -= dv[k*bs*bs + j*bs + i] * x[(k-diag)*bs + j];
951: }
952: }
953: for (j=0; j<i; j++){
954: sum -= dd[kbase + j*bs + i] * x[kloc + j];
955: }
956: x[kloc+i] = omega*sum/dval;
957: }
958: }
959: xb = x;
960: } else xb = b;
961: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
962: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
963: for (k=0; k<mblock; k++) {
964: kloc = k*bs; kbase = kloc*bs;
965: for (i=0; i<bs; i++)
966: x[kloc+i] *= dd[i*(bs+1)+kbase];
967: }
968: }
969: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
970: for (k=mblock-1; k>=0; k--) {
971: kloc = k*bs; kbase = kloc*bs;
972: for (i=bs-1; i>=0; i--) {
973: sum = xb[i+kloc];
974: dval = shift + dd[i*(bs+1)+kbase];
975: for (j=i+1; j<bs; j++)
976: sum -= dd[kbase + j*bs + i] * x[kloc + j];
977: for (d=mainbd+1; d<a->nd; d++) {
978: diag = a->diag[d];
979: dv = a->diagv[d];
980: bloc = k - diag;
981: if (bloc < mblock) {
982: for (j=0; j<bs; j++)
983: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
984: }
985: }
986: x[kloc+i] = omega*sum/dval;
987: }
988: }
989: }
990: its--;
991: }
992: while (its--) {
993: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
994: for (k=0; k<mblock; k++) {
995: kloc = k*bs; kbase = kloc*bs;
996: for (i=0; i<bs; i++) {
997: sum = b[i+kloc];
998: dval = shift + dd[i*(bs+1)+kbase];
999: for (d=0; d<mainbd; d++) {
1000: diag = a->diag[d];
1001: dv = a->diagv[d];
1002: bloc = k - diag;
1003: if (bloc >= 0) {
1004: for (j=0; j<bs; j++) {
1005: sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1006: }
1007: }
1008: }
1009: for (d=mainbd; d<a->nd; d++) {
1010: diag = a->diag[d];
1011: dv = a->diagv[d];
1012: bloc = k - diag;
1013: if (bloc < mblock) {
1014: for (j=0; j<bs; j++) {
1015: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1016: }
1017: }
1018: }
1019: x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1020: }
1021: }
1022: }
1023: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1024: for (k=mblock-1; k>=0; k--) {
1025: kloc = k*bs; kbase = kloc*bs;
1026: for (i=bs-1; i>=0; i--) {
1027: sum = b[i+kloc];
1028: dval = shift + dd[i*(bs+1)+kbase];
1029: for (d=0; d<mainbd; d++) {
1030: diag = a->diag[d];
1031: dv = a->diagv[d];
1032: bloc = k - diag;
1033: if (bloc >= 0) {
1034: for (j=0; j<bs; j++) {
1035: sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1036: }
1037: }
1038: }
1039: for (d=mainbd; d<a->nd; d++) {
1040: diag = a->diag[d];
1041: dv = a->diagv[d];
1042: bloc = k - diag;
1043: if (bloc < mblock) {
1044: for (j=0; j<bs; j++) {
1045: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1046: }
1047: }
1048: }
1049: x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1050: }
1051: }
1052: }
1053: }
1054: VecRestoreArray(xx,&x);
1055: if (xx != bb) VecRestoreArray(bb,&b);
1056: return(0);
1057: }
1061: int MatRelax_SeqBDiag_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,int its,int lits,Vec xx)
1062: {
1063: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
1064: PetscScalar *x,*b,*xb,*dd,dval,sum;
1065: int ierr,m = A->m,i,d,loc;
1066: int mainbd = a->mainbd,diag;
1069: its = its*lits;
1070: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1071: /* Currently this code doesn't use wavefront orderings,although
1072: we should eventually incorporate that option, whatever wavefront
1073: ordering maybe :-) */
1075: VecGetArray(xx,&x);
1076: VecGetArray(bb,&b);
1077: if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
1078: dd = a->diagv[mainbd];
1079: if (flag == SOR_APPLY_UPPER) {
1080: /* apply (U + D/omega) to the vector */
1081: for (i=0; i<m; i++) {
1082: sum = b[i] * (shift + dd[i]) / omega;
1083: for (d=mainbd+1; d<a->nd; d++) {
1084: diag = a->diag[d];
1085: if (i-diag < m) sum += a->diagv[d][i] * x[i-diag];
1086: }
1087: x[i] = sum;
1088: }
1089: VecRestoreArray(xx,&x);
1090: VecRestoreArray(bb,&b);
1091: return(0);
1092: }
1093: if (flag & SOR_ZERO_INITIAL_GUESS) {
1094: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1095: for (i=0; i<m; i++) {
1096: sum = b[i];
1097: for (d=0; d<mainbd; d++) {
1098: if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1099: }
1100: x[i] = omega*(sum/(shift + dd[i]));
1101: }
1102: xb = x;
1103: } else xb = b;
1104: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1105: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1106: for (i=0; i<m; i++) x[i] *= dd[i];
1107: }
1108: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1109: for (i=m-1; i>=0; i--) {
1110: sum = xb[i];
1111: for (d=mainbd+1; d<a->nd; d++) {
1112: diag = a->diag[d];
1113: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1114: }
1115: x[i] = omega*(sum/(shift + dd[i]));
1116: }
1117: }
1118: its--;
1119: }
1120: while (its--) {
1121: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1122: for (i=0; i<m; i++) {
1123: sum = b[i];
1124: dval = shift + dd[i];
1125: for (d=0; d<mainbd; d++) {
1126: if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1127: }
1128: for (d=mainbd; d<a->nd; d++) {
1129: diag = a->diag[d];
1130: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1131: }
1132: x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/dval;
1133: }
1134: }
1135: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1136: for (i=m-1; i>=0; i--) {
1137: sum = b[i];
1138: for (d=0; d<mainbd; d++) {
1139: loc = i - a->diag[d];
1140: if (loc >= 0) sum -= a->diagv[d][i] * x[loc];
1141: }
1142: for (d=mainbd; d<a->nd; d++) {
1143: diag = a->diag[d];
1144: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1145: }
1146: x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/(shift + dd[i]);
1147: }
1148: }
1149: }
1150: VecRestoreArray(xx,&x);
1151: VecRestoreArray(bb,&b);
1152: return(0);
1153: }