Actual source code: bdiag2.c
1: /*$Id: bdiag2.c,v 1.19 2001/03/23 23:22:03 balay Exp $*/
3: /* Block diagonal matrix format */
5: #include petscsys.h
6: #include src/mat/impls/bdiag/seq/bdiag.h
7: #include src/vec/vecimpl.h
8: #include src/inline/ilu.h
10: int MatSetValues_SeqBDiag_1(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
11: {
12: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
13: int kk,ldiag,row,newnz,*bdlen_new;
14: int j,k, *diag_new,ierr;
15: PetscTruth roworiented = a->roworiented,dfound;
16: Scalar value,**diagv_new;
19: for (kk=0; kk<m; kk++) { /* loop over added rows */
20: row = im[kk];
21: if (row < 0) continue;
22: if (row >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
23: for (j=0; j<n; j++) {
24: if (in[j] < 0) continue;
25: if (in[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
26: ldiag = row - in[j]; /* diagonal number */
27: dfound = PETSC_FALSE;
28: if (roworiented) {
29: value = v[j + kk*n];
30: } else {
31: value = v[kk + j*m];
32: }
33: /* search diagonals for required one */
34: for (k=0; k<a->nd; k++) {
35: if (a->diag[k] == ldiag) {
36: dfound = PETSC_TRUE;
37: if (is == ADD_VALUES) a->diagv[k][row] += value;
38: else a->diagv[k][row] = value;
39: break;
40: }
41: }
42: if (!dfound) {
43: if (a->nonew || a->nonew_diag) {
44: #if !defined(PETSC_USE_COMPLEX)
45: if (a->user_alloc && value) {
46: #else
47: if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
48: #endif
49: PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocaten",ldiag);
50: }
51: } else {
52: PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %dn",ldiag);
53: a->reallocs++;
54: /* free old bdiag storage info and reallocate */
55: ierr = PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
56: bdlen_new = diag_new + a->nd + 1;
57: ierr = PetscMalloc((a->nd+1)*sizeof(Scalar*),&diagv_new);
58: for (k=0; k<a->nd; k++) {
59: diag_new[k] = a->diag[k];
60: diagv_new[k] = a->diagv[k];
61: bdlen_new[k] = a->bdlen[k];
62: }
63: diag_new[a->nd] = ldiag;
64: if (ldiag > 0) { /* lower triangular */
65: bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
66: } else { /* upper triangular */
67: bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
68: }
69: newnz = bdlen_new[a->nd];
70: PetscMalloc(newnz*sizeof(Scalar),&diagv_new[a->nd]);
71: PetscMemzero(diagv_new[a->nd],newnz*sizeof(Scalar));
72: /* adjust pointers so that dv[diag][row] works for all diagonals*/
73: if (diag_new[a->nd] > 0) {
74: diagv_new[a->nd] -= diag_new[a->nd];
75: }
76: a->maxnz += newnz;
77: a->nz += newnz;
78: PetscFree(a->diagv);
79: PetscFree(a->diag);
80: a->diag = diag_new;
81: a->bdlen = bdlen_new;
82: a->diagv = diagv_new;
84: /* Insert value */
85: if (is == ADD_VALUES) a->diagv[a->nd][row] += value;
86: else a->diagv[a->nd][row] = value;
87: a->nd++;
88: PetscLogObjectMemory(A,newnz*sizeof(Scalar)+2*sizeof(int)+sizeof(Scalar*));
89: }
90: }
91: }
92: }
93: return(0);
94: }
97: int MatSetValues_SeqBDiag_N(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
98: {
99: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
100: int kk,ldiag,shift,row,newnz,*bdlen_new,ierr;
101: int j,k,bs = a->bs,*diag_new;
102: PetscTruth roworiented = a->roworiented,dfound;
103: Scalar value,**diagv_new;
106: for (kk=0; kk<m; kk++) { /* loop over added rows */
107: row = im[kk];
108: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
109: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
110: shift = (row/bs)*bs*bs + row%bs;
111: for (j=0; j<n; j++) {
112: ldiag = row/bs - in[j]/bs; /* block diagonal */
113: dfound = PETSC_FALSE;
114: if (roworiented) {
115: value = *v++;
116: } else {
117: value = v[kk + j*m];
118: }
119: /* seach for appropriate diagonal */
120: for (k=0; k<a->nd; k++) {
121: if (a->diag[k] == ldiag) {
122: dfound = PETSC_TRUE;
123: if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
124: else a->diagv[k][shift + (in[j]%bs)*bs] = value;
125: break;
126: }
127: }
128: if (!dfound) {
129: if (a->nonew || a->nonew_diag) {
130: #if !defined(PETSC_USE_COMPLEX)
131: if (a->user_alloc && value) {
132: #else
133: if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
134: #endif
135: PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocaten",ldiag);
136: }
137: } else {
138: PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %dn",ldiag);
139: a->reallocs++;
140: /* free old bdiag storage info and reallocate */
141: ierr = PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
142: bdlen_new = diag_new + a->nd + 1;
143: ierr = PetscMalloc((a->nd+1)*sizeof(Scalar*),&diagv_new);
144: for (k=0; k<a->nd; k++) {
145: diag_new[k] = a->diag[k];
146: diagv_new[k] = a->diagv[k];
147: bdlen_new[k] = a->bdlen[k];
148: }
149: diag_new[a->nd] = ldiag;
150: if (ldiag > 0) {/* lower triangular */
151: bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
152: } else { /* upper triangular */
153: bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
154: }
155: newnz = bs*bs*bdlen_new[a->nd];
156: PetscMalloc(newnz*sizeof(Scalar),&diagv_new[a->nd]);
157: PetscMemzero(diagv_new[a->nd],newnz*sizeof(Scalar));
158: /* adjust pointer so that dv[diag][row] works for all diagonals */
159: if (diag_new[a->nd] > 0) {
160: diagv_new[a->nd] -= bs*bs*diag_new[a->nd];
161: }
162: a->maxnz += newnz; a->nz += newnz;
163: PetscFree(a->diagv);
164: PetscFree(a->diag);
165: a->diag = diag_new;
166: a->bdlen = bdlen_new;
167: a->diagv = diagv_new;
169: /* Insert value */
170: if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
171: else a->diagv[k][shift + (in[j]%bs)*bs] = value;
172: a->nd++;
173: PetscLogObjectMemory(A,newnz*sizeof(Scalar)+2*sizeof(int)+sizeof(Scalar*));
174: }
175: }
176: }
177: }
178: return(0);
179: }
181: int MatGetValues_SeqBDiag_1(Mat A,int m,int *im,int n,int *in,Scalar *v)
182: {
183: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
184: int kk,ldiag,row,j,k;
185: Scalar zero = 0.0;
186: PetscTruth dfound;
189: for (kk=0; kk<m; kk++) { /* loop over rows */
190: row = im[kk];
191: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
192: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
193: for (j=0; j<n; j++) {
194: if (in[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
195: if (in[j] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
196: ldiag = row - in[j]; /* diagonal number */
197: dfound = PETSC_FALSE;
198: for (k=0; k<a->nd; k++) {
199: if (a->diag[k] == ldiag) {
200: dfound = PETSC_TRUE;
201: *v++ = a->diagv[k][row];
202: break;
203: }
204: }
205: if (!dfound) *v++ = zero;
206: }
207: }
208: return(0);
209: }
211: int MatGetValues_SeqBDiag_N(Mat A,int m,int *im,int n,int *in,Scalar *v)
212: {
213: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
214: int kk,ldiag,shift,row,j,k,bs = a->bs;
215: Scalar zero = 0.0;
216: PetscTruth dfound;
219: for (kk=0; kk<m; kk++) { /* loop over rows */
220: row = im[kk];
221: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
222: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
223: shift = (row/bs)*bs*bs + row%bs;
224: for (j=0; j<n; j++) {
225: ldiag = row/bs - in[j]/bs; /* block diagonal */
226: dfound = PETSC_FALSE;
227: for (k=0; k<a->nd; k++) {
228: if (a->diag[k] == ldiag) {
229: dfound = PETSC_TRUE;
230: *v++ = a->diagv[k][shift + (in[j]%bs)*bs ];
231: break;
232: }
233: }
234: if (!dfound) *v++ = zero;
235: }
236: }
237: return(0);
238: }
240: /*
241: MatMults for blocksize 1 to 5 and N -------------------------------
242: */
243: int MatMult_SeqBDiag_1(Mat A,Vec xx,Vec yy)
244: {
245: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
246: int nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen;
247: int ierr,d,j,len;
248: Scalar *vin,*vout,**a_diagv = a->diagv;
249: Scalar *pvin,*pvout,*dv;
252: VecGetArray(xx,&vin);
253: VecGetArray(yy,&vout);
254: PetscMemzero(vout,A->m*sizeof(Scalar));
255: for (d=0; d<nd; d++) {
256: dv = a_diagv[d];
257: diag = a_diag[d];
258: len = a_bdlen[d];
259: if (diag > 0) { /* lower triangle */
260: pvin = vin;
261: pvout = vout + diag;
262: dv = dv + diag;
263: } else { /* upper triangle,including main diagonal */
264: pvin = vin - diag;
265: pvout = vout;
266: }
267: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
268: PetscLogFlops(2*len);
269: }
270: VecRestoreArray(xx,&vin);
271: VecRestoreArray(yy,&vout);
272: return(0);
273: }
275: int MatMult_SeqBDiag_2(Mat A,Vec xx,Vec yy)
276: {
277: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
278: int nd = a->nd,nb_diag;
279: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
280: Scalar *vin,*vout,**a_diagv = a->diagv;
281: Scalar *pvin,*pvout,*dv,pvin0,pvin1;
284: VecGetArray(xx,&vin);
285: VecGetArray(yy,&vout);
286: PetscMemzero(vout,A->m*sizeof(Scalar));
287: for (d=0; d<nd; d++) {
288: dv = a_diagv[d];
289: nb_diag = 2*a_diag[d];
290: len = a_bdlen[d];
291: if (nb_diag > 0) { /* lower triangle */
292: pvin = vin;
293: pvout = vout + nb_diag;
294: dv = dv + 2*nb_diag;
295: } else { /* upper triangle, including main diagonal */
296: pvin = vin - nb_diag;
297: pvout = vout;
298: }
299: for (k=0; k<len; k++) {
300: pvin0 = pvin[0]; pvin1 = pvin[1];
302: pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
303: pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;
305: pvout += 2; pvin += 2; dv += 4;
306: }
307: PetscLogFlops(8*len);
308: }
309: VecRestoreArray(xx,&vin);
310: VecRestoreArray(yy,&vout);
311: return(0);
312: }
314: int MatMult_SeqBDiag_3(Mat A,Vec xx,Vec yy)
315: {
316: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
317: int nd = a->nd,nb_diag;
318: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
319: Scalar *vin,*vout,**a_diagv = a->diagv;
320: Scalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2;
323: VecGetArray(xx,&vin);
324: VecGetArray(yy,&vout);
325: PetscMemzero(vout,A->m*sizeof(Scalar));
326: for (d=0; d<nd; d++) {
327: dv = a_diagv[d];
328: nb_diag = 3*a_diag[d];
329: len = a_bdlen[d];
330: if (nb_diag > 0) { /* lower triangle */
331: pvin = vin;
332: pvout = vout + nb_diag;
333: dv = dv + 3*nb_diag;
334: } else { /* upper triangle,including main diagonal */
335: pvin = vin - nb_diag;
336: pvout = vout;
337: }
338: for (k=0; k<len; k++) {
339: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];
341: pvout[0] += dv[0]*pvin0 + dv[3]*pvin1 + dv[6]*pvin2;
342: pvout[1] += dv[1]*pvin0 + dv[4]*pvin1 + dv[7]*pvin2;
343: pvout[2] += dv[2]*pvin0 + dv[5]*pvin1 + dv[8]*pvin2;
345: pvout += 3; pvin += 3; dv += 9;
346: }
347: PetscLogFlops(18*len);
348: }
349: VecRestoreArray(xx,&vin);
350: VecRestoreArray(yy,&vout);
351: return(0);
352: }
354: int MatMult_SeqBDiag_4(Mat A,Vec xx,Vec yy)
355: {
356: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
357: int nd = a->nd,nb_diag;
358: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
359: Scalar *vin,*vout,**a_diagv = a->diagv;
360: Scalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;
363: VecGetArray(xx,&vin);
364: VecGetArray(yy,&vout);
365: PetscMemzero(vout,A->m*sizeof(Scalar));
366: for (d=0; d<nd; d++) {
367: dv = a_diagv[d];
368: nb_diag = 4*a_diag[d];
369: len = a_bdlen[d];
370: if (nb_diag > 0) { /* lower triangle */
371: pvin = vin;
372: pvout = vout + nb_diag;
373: dv = dv + 4*nb_diag;
374: } else { /* upper triangle,including main diagonal */
375: pvin = vin - nb_diag;
376: pvout = vout;
377: }
378: for (k=0; k<len; k++) {
379: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];
381: pvout[0] += dv[0]*pvin0 + dv[4]*pvin1 + dv[8]*pvin2 + dv[12]*pvin3;
382: pvout[1] += dv[1]*pvin0 + dv[5]*pvin1 + dv[9]*pvin2 + dv[13]*pvin3;
383: pvout[2] += dv[2]*pvin0 + dv[6]*pvin1 + dv[10]*pvin2 + dv[14]*pvin3;
384: pvout[3] += dv[3]*pvin0 + dv[7]*pvin1 + dv[11]*pvin2 + dv[15]*pvin3;
386: pvout += 4; pvin += 4; dv += 16;
387: }
388: PetscLogFlops(32*len);
389: }
390: VecRestoreArray(xx,&vin);
391: VecRestoreArray(yy,&vout);
392: return(0);
393: }
395: int MatMult_SeqBDiag_5(Mat A,Vec xx,Vec yy)
396: {
397: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
398: int nd = a->nd,nb_diag;
399: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
400: Scalar *vin,*vout,**a_diagv = a->diagv;
401: Scalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;
404: VecGetArray(xx,&vin);
405: VecGetArray(yy,&vout);
406: PetscMemzero(vout,A->m*sizeof(Scalar));
407: for (d=0; d<nd; d++) {
408: dv = a_diagv[d];
409: nb_diag = 5*a_diag[d];
410: len = a_bdlen[d];
411: if (nb_diag > 0) { /* lower triangle */
412: pvin = vin;
413: pvout = vout + nb_diag;
414: dv = dv + 5*nb_diag;
415: } else { /* upper triangle,including main diagonal */
416: pvin = vin - nb_diag;
417: pvout = vout;
418: }
419: for (k=0; k<len; k++) {
420: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];
422: pvout[0] += dv[0]*pvin0 + dv[5]*pvin1 + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
423: pvout[1] += dv[1]*pvin0 + dv[6]*pvin1 + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
424: pvout[2] += dv[2]*pvin0 + dv[7]*pvin1 + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
425: pvout[3] += dv[3]*pvin0 + dv[8]*pvin1 + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
426: pvout[4] += dv[4]*pvin0 + dv[9]*pvin1 + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;
428: pvout += 5; pvin += 5; dv += 25;
429: }
430: PetscLogFlops(50*len);
431: }
432: VecRestoreArray(xx,&vin);
433: VecRestoreArray(yy,&vout);
434: return(0);
435: }
437: int MatMult_SeqBDiag_N(Mat A,Vec xx,Vec yy)
438: {
439: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
440: int nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
441: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
442: Scalar *vin,*vout,**a_diagv = a->diagv;
443: Scalar *pvin,*pvout,*dv;
446: VecGetArray(xx,&vin);
447: VecGetArray(yy,&vout);
448: PetscMemzero(vout,A->m*sizeof(Scalar));
449: for (d=0; d<nd; d++) {
450: dv = a_diagv[d];
451: nb_diag = bs*a_diag[d];
452: len = a_bdlen[d];
453: if (nb_diag > 0) { /* lower triangle */
454: pvin = vin;
455: pvout = vout + nb_diag;
456: dv = dv + bs*nb_diag;
457: } else { /* upper triangle, including main diagonal */
458: pvin = vin - nb_diag;
459: pvout = vout;
460: }
461: for (k=0; k<len; k++) {
462: Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
463: pvout += bs; pvin += bs; dv += bs2;
464: }
465: PetscLogFlops(2*bs2*len);
466: }
467: VecRestoreArray(xx,&vin);
468: VecRestoreArray(yy,&vout);
469: return(0);
470: }
472: /*
473: MatMultAdds for blocksize 1 to 5 and N -------------------------------
474: */
475: int MatMultAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
476: {
477: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
478: int ierr,nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen,d,j,len;
479: Scalar *vin,*vout,**a_diagv = a->diagv;
480: Scalar *pvin,*pvout,*dv;
483: if (zz != yy) {VecCopy(zz,yy);}
484: VecGetArray(xx,&vin);
485: VecGetArray(yy,&vout);
486: for (d=0; d<nd; d++) {
487: dv = a_diagv[d];
488: diag = a_diag[d];
489: len = a_bdlen[d];
490: if (diag > 0) { /* lower triangle */
491: pvin = vin;
492: pvout = vout + diag;
493: dv = dv + diag;
494: } else { /* upper triangle, including main diagonal */
495: pvin = vin - diag;
496: pvout = vout;
497: }
498: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
499: PetscLogFlops(2*len);
500: }
501: VecRestoreArray(xx,&vin);
502: VecRestoreArray(yy,&vout);
503: return(0);
504: }
506: int MatMultAdd_SeqBDiag_2(Mat A,Vec xx,Vec zz,Vec yy)
507: {
508: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
509: int ierr,nd = a->nd,nb_diag;
510: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
511: Scalar *vin,*vout,**a_diagv = a->diagv;
512: Scalar *pvin,*pvout,*dv,pvin0,pvin1;
515: if (zz != yy) {VecCopy(zz,yy);}
516: VecGetArray(xx,&vin);
517: VecGetArray(yy,&vout);
518: for (d=0; d<nd; d++) {
519: dv = a_diagv[d];
520: nb_diag = 2*a_diag[d];
521: len = a_bdlen[d];
522: if (nb_diag > 0) { /* lower triangle */
523: pvin = vin;
524: pvout = vout + nb_diag;
525: dv = dv + 2*nb_diag;
526: } else { /* upper triangle, including main diagonal */
527: pvin = vin - nb_diag;
528: pvout = vout;
529: }
530: for (k=0; k<len; k++) {
531: pvin0 = pvin[0]; pvin1 = pvin[1];
533: pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
534: pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;
536: pvout += 2; pvin += 2; dv += 4;
537: }
538: PetscLogFlops(8*len);
539: }
540: VecRestoreArray(xx,&vin);
541: VecRestoreArray(yy,&vout);
542: return(0);
543: }
545: int MatMultAdd_SeqBDiag_3(Mat A,Vec xx,Vec zz,Vec yy)
546: {
547: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
548: int ierr,nd = a->nd,nb_diag;
549: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
550: Scalar *vin,*vout,**a_diagv = a->diagv;
551: Scalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2;
554: if (zz != yy) {VecCopy(zz,yy);}
555: VecGetArray(xx,&vin);
556: VecGetArray(yy,&vout);
557: for (d=0; d<nd; d++) {
558: dv = a_diagv[d];
559: nb_diag = 3*a_diag[d];
560: len = a_bdlen[d];
561: if (nb_diag > 0) { /* lower triangle */
562: pvin = vin;
563: pvout = vout + nb_diag;
564: dv = dv + 3*nb_diag;
565: } else { /* upper triangle, including main diagonal */
566: pvin = vin - nb_diag;
567: pvout = vout;
568: }
569: for (k=0; k<len; k++) {
570: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];
572: pvout[0] += dv[0]*pvin0 + dv[3]*pvin1 + dv[6]*pvin2;
573: pvout[1] += dv[1]*pvin0 + dv[4]*pvin1 + dv[7]*pvin2;
574: pvout[2] += dv[2]*pvin0 + dv[5]*pvin1 + dv[8]*pvin2;
576: pvout += 3; pvin += 3; dv += 9;
577: }
578: PetscLogFlops(18*len);
579: }
580: VecRestoreArray(xx,&vin);
581: VecRestoreArray(yy,&vout);
582: return(0);
583: }
585: int MatMultAdd_SeqBDiag_4(Mat A,Vec xx,Vec zz,Vec yy)
586: {
587: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
588: int ierr,nd = a->nd,nb_diag;
589: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
590: Scalar *vin,*vout,**a_diagv = a->diagv;
591: Scalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;
594: if (zz != yy) {VecCopy(zz,yy);}
595: VecGetArray(xx,&vin);
596: VecGetArray(yy,&vout);
597: for (d=0; d<nd; d++) {
598: dv = a_diagv[d];
599: nb_diag = 4*a_diag[d];
600: len = a_bdlen[d];
601: if (nb_diag > 0) { /* lower triangle */
602: pvin = vin;
603: pvout = vout + nb_diag;
604: dv = dv + 4*nb_diag;
605: } else { /* upper triangle, including main diagonal */
606: pvin = vin - nb_diag;
607: pvout = vout;
608: }
609: for (k=0; k<len; k++) {
610: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];
612: pvout[0] += dv[0]*pvin0 + dv[4]*pvin1 + dv[8]*pvin2 + dv[12]*pvin3;
613: pvout[1] += dv[1]*pvin0 + dv[5]*pvin1 + dv[9]*pvin2 + dv[13]*pvin3;
614: pvout[2] += dv[2]*pvin0 + dv[6]*pvin1 + dv[10]*pvin2 + dv[14]*pvin3;
615: pvout[3] += dv[3]*pvin0 + dv[7]*pvin1 + dv[11]*pvin2 + dv[15]*pvin3;
617: pvout += 4; pvin += 4; dv += 16;
618: }
619: PetscLogFlops(32*len);
620: }
621: VecRestoreArray(xx,&vin);
622: VecRestoreArray(yy,&vout);
623: return(0);
624: }
626: int MatMultAdd_SeqBDiag_5(Mat A,Vec xx,Vec zz,Vec yy)
627: {
628: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
629: int ierr,nd = a->nd,nb_diag;
630: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
631: Scalar *vin,*vout,**a_diagv = a->diagv;
632: Scalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;
635: if (zz != yy) {VecCopy(zz,yy);}
636: VecGetArray(xx,&vin);
637: VecGetArray(yy,&vout);
638: for (d=0; d<nd; d++) {
639: dv = a_diagv[d];
640: nb_diag = 5*a_diag[d];
641: len = a_bdlen[d];
642: if (nb_diag > 0) { /* lower triangle */
643: pvin = vin;
644: pvout = vout + nb_diag;
645: dv = dv + 5*nb_diag;
646: } else { /* upper triangle, including main diagonal */
647: pvin = vin - nb_diag;
648: pvout = vout;
649: }
650: for (k=0; k<len; k++) {
651: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];
653: pvout[0] += dv[0]*pvin0 + dv[5]*pvin1 + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
654: pvout[1] += dv[1]*pvin0 + dv[6]*pvin1 + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
655: pvout[2] += dv[2]*pvin0 + dv[7]*pvin1 + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
656: pvout[3] += dv[3]*pvin0 + dv[8]*pvin1 + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
657: pvout[4] += dv[4]*pvin0 + dv[9]*pvin1 + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;
659: pvout += 5; pvin += 5; dv += 25;
660: }
661: PetscLogFlops(50*len);
662: }
663: VecRestoreArray(xx,&vin);
664: VecRestoreArray(yy,&vout);
665: return(0);
666: }
668: int MatMultAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
669: {
670: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
671: int ierr,nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
672: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
673: Scalar *vin,*vout,**a_diagv = a->diagv;
674: Scalar *pvin,*pvout,*dv;
677: if (zz != yy) {VecCopy(zz,yy);}
678: VecGetArray(xx,&vin);
679: VecGetArray(yy,&vout);
680: for (d=0; d<nd; d++) {
681: dv = a_diagv[d];
682: nb_diag = bs*a_diag[d];
683: len = a_bdlen[d];
684: if (nb_diag > 0) { /* lower triangle */
685: pvin = vin;
686: pvout = vout + nb_diag;
687: dv = dv + bs*nb_diag;
688: } else { /* upper triangle, including main diagonal */
689: pvin = vin - nb_diag;
690: pvout = vout;
691: }
692: for (k=0; k<len; k++) {
693: Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
694: pvout += bs; pvin += bs; dv += bs2;
695: }
696: PetscLogFlops(2*bs2*len);
697: }
698: VecRestoreArray(xx,&vin);
699: VecRestoreArray(yy,&vout);
700: return(0);
701: }
703: /*
704: MatMultTranspose ----------------------------------------------
705: */
706: int MatMultTranspose_SeqBDiag_1(Mat A,Vec xx,Vec yy)
707: {
708: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
709: int ierr,nd = a->nd,diag,d,j,len;
710: Scalar *pvin,*pvout,*dv;
711: Scalar *vin,*vout;
712:
714: VecGetArray(xx,&vin);
715: VecGetArray(yy,&vout);
716: PetscMemzero(vout,A->n*sizeof(Scalar));
717: for (d=0; d<nd; d++) {
718: dv = a->diagv[d];
719: diag = a->diag[d];
720: len = a->bdlen[d];
721: /* diag of original matrix is (row/bs - col/bs) */
722: /* diag of transpose matrix is (col/bs - row/bs) */
723: if (diag < 0) { /* transpose is lower triangle */
724: pvin = vin;
725: pvout = vout - diag;
726: } else { /* transpose is upper triangle, including main diagonal */
727: pvin = vin + diag;
728: pvout = vout;
729: dv = dv + diag;
730: }
731: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
732: }
733: VecRestoreArray(xx,&vin);
734: VecRestoreArray(yy,&vout);
735: return(0);
736: }
738: int MatMultTranspose_SeqBDiag_N(Mat A,Vec xx,Vec yy)
739: {
740: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
741: int ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
742: Scalar *pvin,*pvout,*dv;
743: Scalar *vin,*vout;
744:
746: VecGetArray(xx,&vin);
747: VecGetArray(yy,&vout);
748: PetscMemzero(vout,A->n*sizeof(Scalar));
749: for (d=0; d<nd; d++) {
750: dv = a->diagv[d];
751: diag = a->diag[d];
752: len = a->bdlen[d];
753: /* diag of original matrix is (row/bs - col/bs) */
754: /* diag of transpose matrix is (col/bs - row/bs) */
755: if (diag < 0) { /* transpose is lower triangle */
756: pvin = vin;
757: pvout = vout - bs*diag;
758: } else { /* transpose is upper triangle, including main diagonal */
759: pvin = vin + bs*diag;
760: pvout = vout;
761: dv = dv + diag;
762: }
763: for (k=0; k<len; k++) {
764: kloc = k*bs; kshift = kloc*bs;
765: for (i=0; i<bs; i++) { /* i = local column of transpose */
766: for (j=0; j<bs; j++) { /* j = local row of transpose */
767: pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
768: }
769: }
770: }
771: }
772: VecRestoreArray(xx,&vin);
773: VecRestoreArray(yy,&vout);
774: return(0);
775: }
777: /*
778: MatMultTransposeAdd ----------------------------------------------
779: */
780: int MatMultTransposeAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
781: {
782: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
783: int ierr,nd = a->nd,diag,d,j,len;
784: Scalar *pvin,*pvout,*dv;
785: Scalar *vin,*vout;
786:
788: if (zz != yy) {VecCopy(zz,yy);}
789: VecGetArray(xx,&vin);
790: VecGetArray(yy,&vout);
791: for (d=0; d<nd; d++) {
792: dv = a->diagv[d];
793: diag = a->diag[d];
794: len = a->bdlen[d];
795: /* diag of original matrix is (row/bs - col/bs) */
796: /* diag of transpose matrix is (col/bs - row/bs) */
797: if (diag < 0) { /* transpose is lower triangle */
798: pvin = vin;
799: pvout = vout - diag;
800: } else { /* transpose is upper triangle, including main diagonal */
801: pvin = vin + diag;
802: pvout = vout;
803: dv = dv + diag;
804: }
805: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
806: }
807: VecRestoreArray(xx,&vin);
808: VecRestoreArray(yy,&vout);
809: return(0);
810: }
812: int MatMultTransposeAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
813: {
814: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
815: int ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
816: Scalar *pvin,*pvout,*dv;
817: Scalar *vin,*vout;
818:
820: if (zz != yy) {VecCopy(zz,yy);}
821: VecGetArray(xx,&vin);
822: VecGetArray(yy,&vout);
823: for (d=0; d<nd; d++) {
824: dv = a->diagv[d];
825: diag = a->diag[d];
826: len = a->bdlen[d];
827: /* diag of original matrix is (row/bs - col/bs) */
828: /* diag of transpose matrix is (col/bs - row/bs) */
829: if (diag < 0) { /* transpose is lower triangle */
830: pvin = vin;
831: pvout = vout - bs*diag;
832: } else { /* transpose is upper triangle, including main diagonal */
833: pvin = vin + bs*diag;
834: pvout = vout;
835: dv = dv + diag;
836: }
837: for (k=0; k<len; k++) {
838: kloc = k*bs; kshift = kloc*bs;
839: for (i=0; i<bs; i++) { /* i = local column of transpose */
840: for (j=0; j<bs; j++) { /* j = local row of transpose */
841: pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
842: }
843: }
844: }
845: }
846: VecRestoreArray(xx,&vin);
847: VecRestoreArray(yy,&vout);
848: return(0);
849: }
850: int MatRelax_SeqBDiag_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,
851: PetscReal shift,int its,Vec xx)
852: {
853: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
854: Scalar *x,*b,*xb,*dd,*dv,dval,sum;
855: int ierr,i,j,k,d,kbase,bs = a->bs,kloc;
856: int mainbd = a->mainbd,diag,mblock = a->mblock,bloc;
859: /* Currently this code doesn't use wavefront orderings, although
860: we should eventually incorporate that option, whatever wavefront
861: ordering maybe :-) */
863: if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
865: VecGetArray(xx,&x);
866: if (xx != bb) {
867: VecGetArray(bb,&b);
868: } else {
869: b = x;
870: }
871: dd = a->diagv[mainbd];
872: if (flag == SOR_APPLY_UPPER) {
873: /* apply (U + D/omega) to the vector */
874: for (k=0; k<mblock; k++) {
875: kloc = k*bs; kbase = kloc*bs;
876: for (i=0; i<bs; i++) {
877: sum = b[i+kloc] * (shift + dd[i*(bs+1)+kbase]) / omega;
878: for (j=i+1; j<bs; j++) sum += dd[kbase + j*bs + i] * b[kloc + j];
879: for (d=mainbd+1; d<a->nd; d++) {
880: diag = a->diag[d];
881: dv = a->diagv[d];
882: if (k-diag < mblock) {
883: for (j=0; j<bs; j++) {
884: sum += dv[kbase + j*bs + i] * b[(k-diag)*bs + j];
885: }
886: }
887: }
888: x[kloc+i] = sum;
889: }
890: }
891: VecRestoreArray(xx,&x);
892: if (xx != bb) {VecRestoreArray(bb,&b);}
893: return(0);
894: }
895: if (flag & SOR_ZERO_INITIAL_GUESS) {
896: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
897: for (k=0; k<mblock; k++) {
898: kloc = k*bs; kbase = kloc*bs;
899: for (i=0; i<bs; i++) {
900: sum = b[i+kloc];
901: dval = shift + dd[i*(bs+1)+kbase];
902: for (d=0; d<mainbd; d++) {
903: diag = a->diag[d];
904: dv = a->diagv[d];
905: if (k >= diag) {
906: for (j=0; j<bs; j++)
907: sum -= dv[k*bs*bs + j*bs + i] * x[(k-diag)*bs + j];
908: }
909: }
910: for (j=0; j<i; j++){
911: sum -= dd[kbase + j*bs + i] * x[kloc + j];
912: }
913: x[kloc+i] = omega*sum/dval;
914: }
915: }
916: xb = x;
917: } else xb = b;
918: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
919: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
920: for (k=0; k<mblock; k++) {
921: kloc = k*bs; kbase = kloc*bs;
922: for (i=0; i<bs; i++)
923: x[kloc+i] *= dd[i*(bs+1)+kbase];
924: }
925: }
926: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
927: for (k=mblock-1; k>=0; k--) {
928: kloc = k*bs; kbase = kloc*bs;
929: for (i=bs-1; i>=0; i--) {
930: sum = xb[i+kloc];
931: dval = shift + dd[i*(bs+1)+kbase];
932: for (j=i+1; j<bs; j++)
933: sum -= dd[kbase + j*bs + i] * x[kloc + j];
934: for (d=mainbd+1; d<a->nd; d++) {
935: diag = a->diag[d];
936: dv = a->diagv[d];
937: bloc = k - diag;
938: if (bloc < mblock) {
939: for (j=0; j<bs; j++)
940: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
941: }
942: }
943: x[kloc+i] = omega*sum/dval;
944: }
945: }
946: }
947: its--;
948: }
949: while (its--) {
950: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
951: for (k=0; k<mblock; k++) {
952: kloc = k*bs; kbase = kloc*bs;
953: for (i=0; i<bs; i++) {
954: sum = b[i+kloc];
955: dval = shift + dd[i*(bs+1)+kbase];
956: for (d=0; d<mainbd; d++) {
957: diag = a->diag[d];
958: dv = a->diagv[d];
959: bloc = k - diag;
960: if (bloc >= 0) {
961: for (j=0; j<bs; j++) {
962: sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
963: }
964: }
965: }
966: for (d=mainbd; d<a->nd; d++) {
967: diag = a->diag[d];
968: dv = a->diagv[d];
969: bloc = k - diag;
970: if (bloc < mblock) {
971: for (j=0; j<bs; j++) {
972: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
973: }
974: }
975: }
976: x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
977: }
978: }
979: }
980: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
981: for (k=mblock-1; k>=0; k--) {
982: kloc = k*bs; kbase = kloc*bs;
983: for (i=bs-1; i>=0; i--) {
984: sum = b[i+kloc];
985: dval = shift + dd[i*(bs+1)+kbase];
986: for (d=0; d<mainbd; d++) {
987: diag = a->diag[d];
988: dv = a->diagv[d];
989: bloc = k - diag;
990: if (bloc >= 0) {
991: for (j=0; j<bs; j++) {
992: sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
993: }
994: }
995: }
996: for (d=mainbd; d<a->nd; d++) {
997: diag = a->diag[d];
998: dv = a->diagv[d];
999: bloc = k - diag;
1000: if (bloc < mblock) {
1001: for (j=0; j<bs; j++) {
1002: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1003: }
1004: }
1005: }
1006: x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1007: }
1008: }
1009: }
1010: }
1011: VecRestoreArray(xx,&x);
1012: if (xx != bb) VecRestoreArray(bb,&b);
1013: return(0);
1014: }
1016: int MatRelax_SeqBDiag_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,
1017: PetscReal shift,int its,Vec xx)
1018: {
1019: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
1020: Scalar *x,*b,*xb,*dd,dval,sum;
1021: int ierr,m = A->m,i,d,loc;
1022: int mainbd = a->mainbd,diag;
1025: /* Currently this code doesn't use wavefront orderings,although
1026: we should eventually incorporate that option, whatever wavefront
1027: ordering maybe :-) */
1029: VecGetArray(xx,&x);
1030: VecGetArray(bb,&b);
1031: if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
1032: dd = a->diagv[mainbd];
1033: if (flag == SOR_APPLY_UPPER) {
1034: /* apply (U + D/omega) to the vector */
1035: for (i=0; i<m; i++) {
1036: sum = b[i] * (shift + dd[i]) / omega;
1037: for (d=mainbd+1; d<a->nd; d++) {
1038: diag = a->diag[d];
1039: if (i-diag < m) sum += a->diagv[d][i] * x[i-diag];
1040: }
1041: x[i] = sum;
1042: }
1043: VecRestoreArray(xx,&x);
1044: VecRestoreArray(bb,&b);
1045: return(0);
1046: }
1047: if (flag & SOR_ZERO_INITIAL_GUESS) {
1048: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1049: for (i=0; i<m; i++) {
1050: sum = b[i];
1051: for (d=0; d<mainbd; d++) {
1052: if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1053: }
1054: x[i] = omega*(sum/(shift + dd[i]));
1055: }
1056: xb = x;
1057: } else xb = b;
1058: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1059: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1060: for (i=0; i<m; i++) x[i] *= dd[i];
1061: }
1062: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1063: for (i=m-1; i>=0; i--) {
1064: sum = xb[i];
1065: for (d=mainbd+1; d<a->nd; d++) {
1066: diag = a->diag[d];
1067: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1068: }
1069: x[i] = omega*(sum/(shift + dd[i]));
1070: }
1071: }
1072: its--;
1073: }
1074: while (its--) {
1075: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1076: for (i=0; i<m; i++) {
1077: sum = b[i];
1078: dval = shift + dd[i];
1079: for (d=0; d<mainbd; d++) {
1080: if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1081: }
1082: for (d=mainbd; d<a->nd; d++) {
1083: diag = a->diag[d];
1084: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1085: }
1086: x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/dval;
1087: }
1088: }
1089: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1090: for (i=m-1; i>=0; i--) {
1091: sum = b[i];
1092: for (d=0; d<mainbd; d++) {
1093: loc = i - a->diag[d];
1094: if (loc >= 0) sum -= a->diagv[d][i] * x[loc];
1095: }
1096: for (d=mainbd; d<a->nd; d++) {
1097: diag = a->diag[d];
1098: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1099: }
1100: x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/(shift + dd[i]);
1101: }
1102: }
1103: }
1104: VecRestoreArray(xx,&x);
1105: VecRestoreArray(bb,&b);
1106: return(0);
1107: }