Actual source code: baij2.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/baij/seq/baij.h
4: #include src/inline/spops.h
5: #include src/inline/ilu.h
6: #include petscbt.h
10: PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
11: {
12: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
14: PetscInt row,i,j,k,l,m,n,*idx,*nidx,isz,val,ival;
15: PetscInt start,end,*ai,*aj,bs,*nidx2;
16: PetscBT table;
19: m = a->mbs;
20: ai = a->i;
21: aj = a->j;
22: bs = A->rmap.bs;
24: if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
26: PetscBTCreate(m,table);
27: PetscMalloc((m+1)*sizeof(PetscInt),&nidx);
28: PetscMalloc((A->rmap.N+1)*sizeof(PetscInt),&nidx2);
30: for (i=0; i<is_max; i++) {
31: /* Initialise the two local arrays */
32: isz = 0;
33: PetscBTMemzero(m,table);
34:
35: /* Extract the indices, assume there can be duplicate entries */
36: ISGetIndices(is[i],&idx);
37: ISGetLocalSize(is[i],&n);
39: /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
40: for (j=0; j<n ; ++j){
41: ival = idx[j]/bs; /* convert the indices into block indices */
42: if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43: if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
44: }
45: ISRestoreIndices(is[i],&idx);
46: ISDestroy(is[i]);
47:
48: k = 0;
49: for (j=0; j<ov; j++){ /* for each overlap*/
50: n = isz;
51: for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
52: row = nidx[k];
53: start = ai[row];
54: end = ai[row+1];
55: for (l = start; l<end ; l++){
56: val = aj[l];
57: if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
58: }
59: }
60: }
61: /* expand the Index Set */
62: for (j=0; j<isz; j++) {
63: for (k=0; k<bs; k++)
64: nidx2[j*bs+k] = nidx[j]*bs+k;
65: }
66: ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);
67: }
68: PetscBTDestroy(table);
69: PetscFree(nidx);
70: PetscFree(nidx2);
71: return(0);
72: }
76: PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
77: {
78: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*c;
80: PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens;
81: PetscInt row,mat_i,*mat_j,tcol,*mat_ilen;
82: PetscInt *irow,*icol,nrows,ncols,*ssmap,bs=A->rmap.bs,bs2=a->bs2;
83: PetscInt *aj = a->j,*ai = a->i;
84: MatScalar *mat_a;
85: Mat C;
86: PetscTruth flag;
89: ISSorted(iscol,(PetscTruth*)&i);
90: if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
92: ISGetIndices(isrow,&irow);
93: ISGetIndices(iscol,&icol);
94: ISGetLocalSize(isrow,&nrows);
95: ISGetLocalSize(iscol,&ncols);
97: PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);
98: ssmap = smap;
99: PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);
100: PetscMemzero(smap,oldcols*sizeof(PetscInt));
101: for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
102: /* determine lens of each row */
103: for (i=0; i<nrows; i++) {
104: kstart = ai[irow[i]];
105: kend = kstart + a->ilen[irow[i]];
106: lens[i] = 0;
107: for (k=kstart; k<kend; k++) {
108: if (ssmap[aj[k]]) {
109: lens[i]++;
110: }
111: }
112: }
113: /* Create and fill new matrix */
114: if (scall == MAT_REUSE_MATRIX) {
115: c = (Mat_SeqBAIJ *)((*B)->data);
117: if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap.bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
118: PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
119: if (!flag) {
120: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
121: }
122: PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
123: C = *B;
124: } else {
125: MatCreate(((PetscObject)A)->comm,&C);
126: MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);
127: MatSetType(C,((PetscObject)A)->type_name);
128: MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);
129: }
130: c = (Mat_SeqBAIJ *)(C->data);
131: for (i=0; i<nrows; i++) {
132: row = irow[i];
133: kstart = ai[row];
134: kend = kstart + a->ilen[row];
135: mat_i = c->i[i];
136: mat_j = c->j + mat_i;
137: mat_a = c->a + mat_i*bs2;
138: mat_ilen = c->ilen + i;
139: for (k=kstart; k<kend; k++) {
140: if ((tcol=ssmap[a->j[k]])) {
141: *mat_j++ = tcol - 1;
142: PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
143: mat_a += bs2;
144: (*mat_ilen)++;
145: }
146: }
147: }
148:
149: /* Free work space */
150: ISRestoreIndices(iscol,&icol);
151: PetscFree(smap);
152: PetscFree(lens);
153: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
154: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
155:
156: ISRestoreIndices(isrow,&irow);
157: *B = C;
158: return(0);
159: }
163: PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
164: {
165: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
166: IS is1,is2;
168: PetscInt *vary,*iary,*irow,*icol,nrows,ncols,i,bs=A->rmap.bs,count;
171: ISGetIndices(isrow,&irow);
172: ISGetIndices(iscol,&icol);
173: ISGetLocalSize(isrow,&nrows);
174: ISGetLocalSize(iscol,&ncols);
175:
176: /* Verify if the indices corespond to each element in a block
177: and form the IS with compressed IS */
178: PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);
179: iary = vary + a->mbs;
180: PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));
181: for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
182: count = 0;
183: for (i=0; i<a->mbs; i++) {
184: if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks");
185: if (vary[i]==bs) iary[count++] = i;
186: }
187: ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
188:
189: PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));
190: for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
191: count = 0;
192: for (i=0; i<a->mbs; i++) {
193: if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc");
194: if (vary[i]==bs) iary[count++] = i;
195: }
196: ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);
197: ISRestoreIndices(isrow,&irow);
198: ISRestoreIndices(iscol,&icol);
199: PetscFree(vary);
201: MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);
202: ISDestroy(is1);
203: ISDestroy(is2);
204: return(0);
205: }
209: PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
210: {
212: PetscInt i;
215: if (scall == MAT_INITIAL_MATRIX) {
216: PetscMalloc((n+1)*sizeof(Mat),B);
217: }
219: for (i=0; i<n; i++) {
220: MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
221: }
222: return(0);
223: }
226: /* -------------------------------------------------------*/
227: /* Should check that shapes of vectors and matrices match */
228: /* -------------------------------------------------------*/
229: #include petscblaslapack.h
233: PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
234: {
235: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
236: PetscScalar *x,*z,sum;
237: MatScalar *v;
239: PetscInt mbs,i,*idx,*ii,n,*ridx=PETSC_NULL,nonzerorow=0;
240: PetscTruth usecprow=a->compressedrow.use;
243: VecGetArray(xx,&x);
244: VecGetArray(zz,&z);
246: idx = a->j;
247: v = a->a;
248: if (usecprow){
249: mbs = a->compressedrow.nrows;
250: ii = a->compressedrow.i;
251: ridx = a->compressedrow.rindex;
252: } else {
253: mbs = a->mbs;
254: ii = a->i;
255: }
257: for (i=0; i<mbs; i++) {
258: n = ii[1] - ii[0]; ii++;
259: sum = 0.0;
260: nonzerorow += (n>0);
261: while (n--) sum += *v++ * x[*idx++];
262: if (usecprow){
263: z[ridx[i]] = sum;
264: } else {
265: z[i] = sum;
266: }
267: }
268: VecRestoreArray(xx,&x);
269: VecRestoreArray(zz,&z);
270: PetscLogFlops(2*a->nz - nonzerorow);
271: return(0);
272: }
276: PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
277: {
278: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
279: PetscScalar *x,*z = 0,*xb,sum1,sum2,*zarray;
280: PetscScalar x1,x2;
281: MatScalar *v;
283: PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
284: PetscTruth usecprow=a->compressedrow.use;
287: VecGetArray(xx,&x);
288: VecGetArray(zz,&zarray);
290: idx = a->j;
291: v = a->a;
292: if (usecprow){
293: mbs = a->compressedrow.nrows;
294: ii = a->compressedrow.i;
295: ridx = a->compressedrow.rindex;
296: } else {
297: mbs = a->mbs;
298: ii = a->i;
299: z = zarray;
300: }
302: for (i=0; i<mbs; i++) {
303: n = ii[1] - ii[0]; ii++;
304: sum1 = 0.0; sum2 = 0.0;
305: nonzerorow += (n>0);
306: for (j=0; j<n; j++) {
307: xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
308: sum1 += v[0]*x1 + v[2]*x2;
309: sum2 += v[1]*x1 + v[3]*x2;
310: v += 4;
311: }
312: if (usecprow) z = zarray + 2*ridx[i];
313: z[0] = sum1; z[1] = sum2;
314: if (!usecprow) z += 2;
315: }
316: VecRestoreArray(xx,&x);
317: VecRestoreArray(zz,&zarray);
318: PetscLogFlops(8*a->nz - 2*nonzerorow);
319: return(0);
320: }
324: PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
325: {
326: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
327: PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*zarray;
328: MatScalar *v;
330: PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
331: PetscTruth usecprow=a->compressedrow.use;
332:
334: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
335: #pragma disjoint(*v,*z,*xb)
336: #endif
339: VecGetArray(xx,&x);
340: VecGetArray(zz,&zarray);
342: idx = a->j;
343: v = a->a;
344: if (usecprow){
345: mbs = a->compressedrow.nrows;
346: ii = a->compressedrow.i;
347: ridx = a->compressedrow.rindex;
348: } else {
349: mbs = a->mbs;
350: ii = a->i;
351: z = zarray;
352: }
354: for (i=0; i<mbs; i++) {
355: n = ii[1] - ii[0]; ii++;
356: sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
357: nonzerorow += (n>0);
358: for (j=0; j<n; j++) {
359: xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
360: sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
361: sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
362: sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
363: v += 9;
364: }
365: if (usecprow) z = zarray + 3*ridx[i];
366: z[0] = sum1; z[1] = sum2; z[2] = sum3;
367: if (!usecprow) z += 3;
368: }
369: VecRestoreArray(xx,&x);
370: VecRestoreArray(zz,&zarray);
371: PetscLogFlops(18*a->nz - 3*nonzerorow);
372: return(0);
373: }
377: PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
378: {
379: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
380: PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray;
381: MatScalar *v;
383: PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
384: PetscTruth usecprow=a->compressedrow.use;
387: VecGetArray(xx,&x);
388: VecGetArray(zz,&zarray);
390: idx = a->j;
391: v = a->a;
392: if (usecprow){
393: mbs = a->compressedrow.nrows;
394: ii = a->compressedrow.i;
395: ridx = a->compressedrow.rindex;
396: } else {
397: mbs = a->mbs;
398: ii = a->i;
399: z = zarray;
400: }
402: for (i=0; i<mbs; i++) {
403: n = ii[1] - ii[0]; ii++;
404: sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
405: nonzerorow += (n>0);
406: for (j=0; j<n; j++) {
407: xb = x + 4*(*idx++);
408: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
409: sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
410: sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
411: sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
412: sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
413: v += 16;
414: }
415: if (usecprow) z = zarray + 4*ridx[i];
416: z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
417: if (!usecprow) z += 4;
418: }
419: VecRestoreArray(xx,&x);
420: VecRestoreArray(zz,&zarray);
421: PetscLogFlops(32*a->nz - 4*nonzerorow);
422: return(0);
423: }
427: PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
428: {
429: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
430: PetscScalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*xb,*z = 0,*x,*zarray;
431: MatScalar *v;
433: PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
434: PetscTruth usecprow=a->compressedrow.use;
437: VecGetArray(xx,&x);
438: VecGetArray(zz,&zarray);
440: idx = a->j;
441: v = a->a;
442: if (usecprow){
443: mbs = a->compressedrow.nrows;
444: ii = a->compressedrow.i;
445: ridx = a->compressedrow.rindex;
446: } else {
447: mbs = a->mbs;
448: ii = a->i;
449: z = zarray;
450: }
452: for (i=0; i<mbs; i++) {
453: n = ii[1] - ii[0]; ii++;
454: sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
455: nonzerorow += (n>0);
456: for (j=0; j<n; j++) {
457: xb = x + 5*(*idx++);
458: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
459: sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
460: sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
461: sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
462: sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
463: sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
464: v += 25;
465: }
466: if (usecprow) z = zarray + 5*ridx[i];
467: z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
468: if (!usecprow) z += 5;
469: }
470: VecRestoreArray(xx,&x);
471: VecRestoreArray(zz,&zarray);
472: PetscLogFlops(50*a->nz - 5*nonzerorow);
473: return(0);
474: }
479: PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
480: {
481: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
482: PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
483: PetscScalar x1,x2,x3,x4,x5,x6,*zarray;
484: MatScalar *v;
486: PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
487: PetscTruth usecprow=a->compressedrow.use;
490: VecGetArray(xx,&x);
491: VecGetArray(zz,&zarray);
493: idx = a->j;
494: v = a->a;
495: if (usecprow){
496: mbs = a->compressedrow.nrows;
497: ii = a->compressedrow.i;
498: ridx = a->compressedrow.rindex;
499: } else {
500: mbs = a->mbs;
501: ii = a->i;
502: z = zarray;
503: }
505: for (i=0; i<mbs; i++) {
506: n = ii[1] - ii[0]; ii++;
507: sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
508: nonzerorow += (n>0);
509: for (j=0; j<n; j++) {
510: xb = x + 6*(*idx++);
511: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
512: sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
513: sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
514: sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
515: sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
516: sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
517: sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
518: v += 36;
519: }
520: if (usecprow) z = zarray + 6*ridx[i];
521: z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
522: if (!usecprow) z += 6;
523: }
525: VecRestoreArray(xx,&x);
526: VecRestoreArray(zz,&zarray);
527: PetscLogFlops(72*a->nz - 6*nonzerorow);
528: return(0);
529: }
532: PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
533: {
534: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
535: PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
536: PetscScalar x1,x2,x3,x4,x5,x6,x7,*zarray;
537: MatScalar *v;
539: PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
540: PetscTruth usecprow=a->compressedrow.use;
543: VecGetArray(xx,&x);
544: VecGetArray(zz,&zarray);
546: idx = a->j;
547: v = a->a;
548: if (usecprow){
549: mbs = a->compressedrow.nrows;
550: ii = a->compressedrow.i;
551: ridx = a->compressedrow.rindex;
552: } else {
553: mbs = a->mbs;
554: ii = a->i;
555: z = zarray;
556: }
558: for (i=0; i<mbs; i++) {
559: n = ii[1] - ii[0]; ii++;
560: sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
561: nonzerorow += (n>0);
562: for (j=0; j<n; j++) {
563: xb = x + 7*(*idx++);
564: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
565: sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
566: sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
567: sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
568: sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
569: sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
570: sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
571: sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
572: v += 49;
573: }
574: if (usecprow) z = zarray + 7*ridx[i];
575: z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
576: if (!usecprow) z += 7;
577: }
579: VecRestoreArray(xx,&x);
580: VecRestoreArray(zz,&zarray);
581: PetscLogFlops(98*a->nz - 7*nonzerorow);
582: return(0);
583: }
585: /*
586: This will not work with MatScalar == float because it calls the BLAS
587: */
590: PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
591: {
592: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
593: PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray;
594: MatScalar *v;
596: PetscInt mbs=a->mbs,i,*idx,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2;
597: PetscInt ncols,k,*ridx=PETSC_NULL,nonzerorow=0;
598: PetscTruth usecprow=a->compressedrow.use;
601: VecGetArray(xx,&x);
602: VecGetArray(zz,&zarray);
604: idx = a->j;
605: v = a->a;
606: if (usecprow){
607: mbs = a->compressedrow.nrows;
608: ii = a->compressedrow.i;
609: ridx = a->compressedrow.rindex;
610: } else {
611: mbs = a->mbs;
612: ii = a->i;
613: z = zarray;
614: }
616: if (!a->mult_work) {
617: k = PetscMax(A->rmap.n,A->cmap.n);
618: PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);
619: }
620: work = a->mult_work;
621: for (i=0; i<mbs; i++) {
622: n = ii[1] - ii[0]; ii++;
623: ncols = n*bs;
624: workt = work;
625: nonzerorow += (n>0);
626: for (j=0; j<n; j++) {
627: xb = x + bs*(*idx++);
628: for (k=0; k<bs; k++) workt[k] = xb[k];
629: workt += bs;
630: }
631: if (usecprow) z = zarray + bs*ridx[i];
632: Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
633: /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
634: v += n*bs2;
635: if (!usecprow) z += bs;
636: }
637: VecRestoreArray(xx,&x);
638: VecRestoreArray(zz,&zarray);
639: PetscLogFlops(2*a->nz*bs2 - bs*nonzerorow);
640: return(0);
641: }
646: PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
647: {
648: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
649: PetscScalar *x,*y = 0,*z = 0,sum,*yarray,*zarray;
650: MatScalar *v;
652: PetscInt mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL;
653: PetscTruth usecprow=a->compressedrow.use;
656: VecGetArray(xx,&x);
657: VecGetArray(yy,&yarray);
658: if (zz != yy) {
659: VecGetArray(zz,&zarray);
660: } else {
661: zarray = yarray;
662: }
664: idx = a->j;
665: v = a->a;
666: if (usecprow){
667: if (zz != yy){
668: PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));
669: }
670: mbs = a->compressedrow.nrows;
671: ii = a->compressedrow.i;
672: ridx = a->compressedrow.rindex;
673: } else {
674: ii = a->i;
675: y = yarray;
676: z = zarray;
677: }
679: for (i=0; i<mbs; i++) {
680: n = ii[1] - ii[0]; ii++;
681: if (usecprow){
682: z = zarray + ridx[i];
683: y = yarray + ridx[i];
684: }
685: sum = y[0];
686: while (n--) sum += *v++ * x[*idx++];
687: z[0] = sum;
688: if (!usecprow){
689: z++; y++;
690: }
691: }
692: VecRestoreArray(xx,&x);
693: VecRestoreArray(yy,&yarray);
694: if (zz != yy) {
695: VecRestoreArray(zz,&zarray);
696: }
697: PetscLogFlops(2*a->nz);
698: return(0);
699: }
703: PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
704: {
705: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
706: PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2;
707: PetscScalar x1,x2,*yarray,*zarray;
708: MatScalar *v;
710: PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
711: PetscTruth usecprow=a->compressedrow.use;
714: VecGetArray(xx,&x);
715: VecGetArray(yy,&yarray);
716: if (zz != yy) {
717: VecGetArray(zz,&zarray);
718: } else {
719: zarray = yarray;
720: }
722: idx = a->j;
723: v = a->a;
724: if (usecprow){
725: if (zz != yy){
726: PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));
727: }
728: mbs = a->compressedrow.nrows;
729: ii = a->compressedrow.i;
730: ridx = a->compressedrow.rindex;
731: if (zz != yy){
732: PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));
733: }
734: } else {
735: ii = a->i;
736: y = yarray;
737: z = zarray;
738: }
740: for (i=0; i<mbs; i++) {
741: n = ii[1] - ii[0]; ii++;
742: if (usecprow){
743: z = zarray + 2*ridx[i];
744: y = yarray + 2*ridx[i];
745: }
746: sum1 = y[0]; sum2 = y[1];
747: for (j=0; j<n; j++) {
748: xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
749: sum1 += v[0]*x1 + v[2]*x2;
750: sum2 += v[1]*x1 + v[3]*x2;
751: v += 4;
752: }
753: z[0] = sum1; z[1] = sum2;
754: if (!usecprow){
755: z += 2; y += 2;
756: }
757: }
758: VecRestoreArray(xx,&x);
759: VecRestoreArray(yy,&yarray);
760: if (zz != yy) {
761: VecRestoreArray(zz,&zarray);
762: }
763: PetscLogFlops(4*a->nz);
764: return(0);
765: }
769: PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
770: {
771: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
772: PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray;
773: MatScalar *v;
775: PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
776: PetscTruth usecprow=a->compressedrow.use;
779: VecGetArray(xx,&x);
780: VecGetArray(yy,&yarray);
781: if (zz != yy) {
782: VecGetArray(zz,&zarray);
783: } else {
784: zarray = yarray;
785: }
787: idx = a->j;
788: v = a->a;
789: if (usecprow){
790: if (zz != yy){
791: PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));
792: }
793: mbs = a->compressedrow.nrows;
794: ii = a->compressedrow.i;
795: ridx = a->compressedrow.rindex;
796: } else {
797: ii = a->i;
798: y = yarray;
799: z = zarray;
800: }
802: for (i=0; i<mbs; i++) {
803: n = ii[1] - ii[0]; ii++;
804: if (usecprow){
805: z = zarray + 3*ridx[i];
806: y = yarray + 3*ridx[i];
807: }
808: sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
809: for (j=0; j<n; j++) {
810: xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
811: sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
812: sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
813: sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
814: v += 9;
815: }
816: z[0] = sum1; z[1] = sum2; z[2] = sum3;
817: if (!usecprow){
818: z += 3; y += 3;
819: }
820: }
821: VecRestoreArray(xx,&x);
822: VecRestoreArray(yy,&yarray);
823: if (zz != yy) {
824: VecRestoreArray(zz,&zarray);
825: }
826: PetscLogFlops(18*a->nz);
827: return(0);
828: }
832: PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
833: {
834: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
835: PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray;
836: MatScalar *v;
838: PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
839: PetscTruth usecprow=a->compressedrow.use;
842: VecGetArray(xx,&x);
843: VecGetArray(yy,&yarray);
844: if (zz != yy) {
845: VecGetArray(zz,&zarray);
846: } else {
847: zarray = yarray;
848: }
850: idx = a->j;
851: v = a->a;
852: if (usecprow){
853: if (zz != yy){
854: PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));
855: }
856: mbs = a->compressedrow.nrows;
857: ii = a->compressedrow.i;
858: ridx = a->compressedrow.rindex;
859: } else {
860: ii = a->i;
861: y = yarray;
862: z = zarray;
863: }
865: for (i=0; i<mbs; i++) {
866: n = ii[1] - ii[0]; ii++;
867: if (usecprow){
868: z = zarray + 4*ridx[i];
869: y = yarray + 4*ridx[i];
870: }
871: sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
872: for (j=0; j<n; j++) {
873: xb = x + 4*(*idx++);
874: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
875: sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
876: sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
877: sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
878: sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
879: v += 16;
880: }
881: z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
882: if (!usecprow){
883: z += 4; y += 4;
884: }
885: }
886: VecRestoreArray(xx,&x);
887: VecRestoreArray(yy,&yarray);
888: if (zz != yy) {
889: VecRestoreArray(zz,&zarray);
890: }
891: PetscLogFlops(32*a->nz);
892: return(0);
893: }
897: PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
898: {
899: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
900: PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
901: PetscScalar *yarray,*zarray;
902: MatScalar *v;
904: PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
905: PetscTruth usecprow=a->compressedrow.use;
908: VecGetArray(xx,&x);
909: VecGetArray(yy,&yarray);
910: if (zz != yy) {
911: VecGetArray(zz,&zarray);
912: } else {
913: zarray = yarray;
914: }
916: idx = a->j;
917: v = a->a;
918: if (usecprow){
919: if (zz != yy){
920: PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));
921: }
922: mbs = a->compressedrow.nrows;
923: ii = a->compressedrow.i;
924: ridx = a->compressedrow.rindex;
925: } else {
926: ii = a->i;
927: y = yarray;
928: z = zarray;
929: }
931: for (i=0; i<mbs; i++) {
932: n = ii[1] - ii[0]; ii++;
933: if (usecprow){
934: z = zarray + 5*ridx[i];
935: y = yarray + 5*ridx[i];
936: }
937: sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
938: for (j=0; j<n; j++) {
939: xb = x + 5*(*idx++);
940: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
941: sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
942: sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
943: sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
944: sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
945: sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
946: v += 25;
947: }
948: z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
949: if (!usecprow){
950: z += 5; y += 5;
951: }
952: }
953: VecRestoreArray(xx,&x);
954: VecRestoreArray(yy,&yarray);
955: if (zz != yy) {
956: VecRestoreArray(zz,&zarray);
957: }
958: PetscLogFlops(50*a->nz);
959: return(0);
960: }
963: PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
964: {
965: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
966: PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
967: PetscScalar x1,x2,x3,x4,x5,x6,*yarray,*zarray;
968: MatScalar *v;
970: PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
971: PetscTruth usecprow=a->compressedrow.use;
974: VecGetArray(xx,&x);
975: VecGetArray(yy,&yarray);
976: if (zz != yy) {
977: VecGetArray(zz,&zarray);
978: } else {
979: zarray = yarray;
980: }
982: idx = a->j;
983: v = a->a;
984: if (usecprow){
985: if (zz != yy){
986: PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));
987: }
988: mbs = a->compressedrow.nrows;
989: ii = a->compressedrow.i;
990: ridx = a->compressedrow.rindex;
991: } else {
992: ii = a->i;
993: y = yarray;
994: z = zarray;
995: }
997: for (i=0; i<mbs; i++) {
998: n = ii[1] - ii[0]; ii++;
999: if (usecprow){
1000: z = zarray + 6*ridx[i];
1001: y = yarray + 6*ridx[i];
1002: }
1003: sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
1004: for (j=0; j<n; j++) {
1005: xb = x + 6*(*idx++);
1006: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1007: sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1008: sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1009: sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1010: sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1011: sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1012: sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
1013: v += 36;
1014: }
1015: z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
1016: if (!usecprow){
1017: z += 6; y += 6;
1018: }
1019: }
1020: VecRestoreArray(xx,&x);
1021: VecRestoreArray(yy,&yarray);
1022: if (zz != yy) {
1023: VecRestoreArray(zz,&zarray);
1024: }
1025: PetscLogFlops(72*a->nz);
1026: return(0);
1027: }
1031: PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1032: {
1033: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1034: PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1035: PetscScalar x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray;
1036: MatScalar *v;
1038: PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
1039: PetscTruth usecprow=a->compressedrow.use;
1042: VecGetArray(xx,&x);
1043: VecGetArray(yy,&yarray);
1044: if (zz != yy) {
1045: VecGetArray(zz,&zarray);
1046: } else {
1047: zarray = yarray;
1048: }
1050: idx = a->j;
1051: v = a->a;
1052: if (usecprow){
1053: if (zz != yy){
1054: PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));
1055: }
1056: mbs = a->compressedrow.nrows;
1057: ii = a->compressedrow.i;
1058: ridx = a->compressedrow.rindex;
1059: } else {
1060: ii = a->i;
1061: y = yarray;
1062: z = zarray;
1063: }
1065: for (i=0; i<mbs; i++) {
1066: n = ii[1] - ii[0]; ii++;
1067: if (usecprow){
1068: z = zarray + 7*ridx[i];
1069: y = yarray + 7*ridx[i];
1070: }
1071: sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1072: for (j=0; j<n; j++) {
1073: xb = x + 7*(*idx++);
1074: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1075: sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1076: sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1077: sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1078: sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1079: sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1080: sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1081: sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1082: v += 49;
1083: }
1084: z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1085: if (!usecprow){
1086: z += 7; y += 7;
1087: }
1088: }
1089: VecRestoreArray(xx,&x);
1090: VecRestoreArray(yy,&yarray);
1091: if (zz != yy) {
1092: VecRestoreArray(zz,&zarray);
1093: }
1094: PetscLogFlops(98*a->nz);
1095: return(0);
1096: }
1100: PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1101: {
1102: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1103: PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray;
1104: MatScalar *v;
1106: PetscInt mbs,i,*idx,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2;
1107: PetscInt ncols,k,*ridx=PETSC_NULL;
1108: PetscTruth usecprow=a->compressedrow.use;
1111: VecCopy_Seq(yy,zz);
1112: VecGetArray(xx,&x);
1113: VecGetArray(zz,&zarray);
1115: idx = a->j;
1116: v = a->a;
1117: if (usecprow){
1118: mbs = a->compressedrow.nrows;
1119: ii = a->compressedrow.i;
1120: ridx = a->compressedrow.rindex;
1121: } else {
1122: mbs = a->mbs;
1123: ii = a->i;
1124: z = zarray;
1125: }
1127: if (!a->mult_work) {
1128: k = PetscMax(A->rmap.n,A->cmap.n);
1129: PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);
1130: }
1131: work = a->mult_work;
1132: for (i=0; i<mbs; i++) {
1133: n = ii[1] - ii[0]; ii++;
1134: ncols = n*bs;
1135: workt = work;
1136: for (j=0; j<n; j++) {
1137: xb = x + bs*(*idx++);
1138: for (k=0; k<bs; k++) workt[k] = xb[k];
1139: workt += bs;
1140: }
1141: if (usecprow) z = zarray + bs*ridx[i];
1142: Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1143: /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
1144: v += n*bs2;
1145: if (!usecprow){
1146: z += bs;
1147: }
1148: }
1149: VecRestoreArray(xx,&x);
1150: VecRestoreArray(zz,&zarray);
1151: PetscLogFlops(2*a->nz*bs2);
1152: return(0);
1153: }
1157: PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
1158: {
1159: PetscScalar zero = 0.0;
1163: VecSet(zz,zero);
1164: MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);
1165: return(0);
1166: }
1170: PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1172: {
1173: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1174: PetscScalar *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5;
1175: MatScalar *v;
1176: PetscErrorCode ierr;
1177: PetscInt mbs,i,*idx,*ii,rval,bs=A->rmap.bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL;
1178: Mat_CompressedRow cprow = a->compressedrow;
1179: PetscTruth usecprow=cprow.use;
1182: if (yy != zz) { VecCopy_Seq(yy,zz); }
1183: VecGetArray(xx,&x);
1184: VecGetArray(zz,&z);
1186: idx = a->j;
1187: v = a->a;
1188: if (usecprow){
1189: mbs = cprow.nrows;
1190: ii = cprow.i;
1191: ridx = cprow.rindex;
1192: } else {
1193: mbs=a->mbs;
1194: ii = a->i;
1195: xb = x;
1196: }
1198: switch (bs) {
1199: case 1:
1200: for (i=0; i<mbs; i++) {
1201: if (usecprow) xb = x + ridx[i];
1202: x1 = xb[0];
1203: ib = idx + ii[0];
1204: n = ii[1] - ii[0]; ii++;
1205: for (j=0; j<n; j++) {
1206: rval = ib[j];
1207: z[rval] += *v * x1;
1208: v++;
1209: }
1210: if (!usecprow) xb++;
1211: }
1212: break;
1213: case 2:
1214: for (i=0; i<mbs; i++) {
1215: if (usecprow) xb = x + 2*ridx[i];
1216: x1 = xb[0]; x2 = xb[1];
1217: ib = idx + ii[0];
1218: n = ii[1] - ii[0]; ii++;
1219: for (j=0; j<n; j++) {
1220: rval = ib[j]*2;
1221: z[rval++] += v[0]*x1 + v[1]*x2;
1222: z[rval++] += v[2]*x1 + v[3]*x2;
1223: v += 4;
1224: }
1225: if (!usecprow) xb += 2;
1226: }
1227: break;
1228: case 3:
1229: for (i=0; i<mbs; i++) {
1230: if (usecprow) xb = x + 3*ridx[i];
1231: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1232: ib = idx + ii[0];
1233: n = ii[1] - ii[0]; ii++;
1234: for (j=0; j<n; j++) {
1235: rval = ib[j]*3;
1236: z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1237: z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1238: z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1239: v += 9;
1240: }
1241: if (!usecprow) xb += 3;
1242: }
1243: break;
1244: case 4:
1245: for (i=0; i<mbs; i++) {
1246: if (usecprow) xb = x + 4*ridx[i];
1247: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1248: ib = idx + ii[0];
1249: n = ii[1] - ii[0]; ii++;
1250: for (j=0; j<n; j++) {
1251: rval = ib[j]*4;
1252: z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
1253: z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
1254: z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
1255: z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1256: v += 16;
1257: }
1258: if (!usecprow) xb += 4;
1259: }
1260: break;
1261: case 5:
1262: for (i=0; i<mbs; i++) {
1263: if (usecprow) xb = x + 5*ridx[i];
1264: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1265: x4 = xb[3]; x5 = xb[4];
1266: ib = idx + ii[0];
1267: n = ii[1] - ii[0]; ii++;
1268: for (j=0; j<n; j++) {
1269: rval = ib[j]*5;
1270: z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
1271: z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
1272: z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1273: z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1274: z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1275: v += 25;
1276: }
1277: if (!usecprow) xb += 5;
1278: }
1279: break;
1280: default: { /* block sizes larger then 5 by 5 are handled by BLAS */
1281: PetscInt ncols,k;
1282: PetscScalar *work,*workt,*xtmp;
1284: if (!a->mult_work) {
1285: k = PetscMax(A->rmap.n,A->cmap.n);
1286: PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);
1287: }
1288: work = a->mult_work;
1289: xtmp = x;
1290: for (i=0; i<mbs; i++) {
1291: n = ii[1] - ii[0]; ii++;
1292: ncols = n*bs;
1293: PetscMemzero(work,ncols*sizeof(PetscScalar));
1294: if (usecprow) {
1295: xtmp = x + bs*ridx[i];
1296: }
1297: Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work);
1298: /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */
1299: v += n*bs2;
1300: if (!usecprow) xtmp += bs;
1301: workt = work;
1302: for (j=0; j<n; j++) {
1303: zb = z + bs*(*idx++);
1304: for (k=0; k<bs; k++) zb[k] += workt[k] ;
1305: workt += bs;
1306: }
1307: }
1308: }
1309: }
1310: VecRestoreArray(xx,&x);
1311: VecRestoreArray(zz,&z);
1312: PetscLogFlops(2*a->nz*a->bs2);
1313: return(0);
1314: }
1318: PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha)
1319: {
1320: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1321: PetscInt totalnz = a->bs2*a->nz;
1322: PetscScalar oalpha = alpha;
1324: #if defined(PETSC_USE_MAT_SINGLE)
1325: PetscInt i;
1326: #else
1327: PetscBLASInt tnz = (PetscBLASInt) totalnz,one = 1;
1328: #endif
1331: #if defined(PETSC_USE_MAT_SINGLE)
1332: for (i=0; i<totalnz; i++) a->a[i] *= oalpha;
1333: #else
1334: BLASscal_(&tnz,&oalpha,a->a,&one);
1335: #endif
1336: PetscLogFlops(totalnz);
1337: return(0);
1338: }
1342: PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
1343: {
1345: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1346: MatScalar *v = a->a;
1347: PetscReal sum = 0.0;
1348: PetscInt i,j,k,bs=A->rmap.bs,nz=a->nz,bs2=a->bs2,k1;
1351: if (type == NORM_FROBENIUS) {
1352: for (i=0; i< bs2*nz; i++) {
1353: #if defined(PETSC_USE_COMPLEX)
1354: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1355: #else
1356: sum += (*v)*(*v); v++;
1357: #endif
1358: }
1359: *norm = sqrt(sum);
1360: } else if (type == NORM_1) { /* maximum column sum */
1361: PetscReal *tmp;
1362: PetscInt *bcol = a->j;
1363: PetscMalloc((A->cmap.n+1)*sizeof(PetscReal),&tmp);
1364: PetscMemzero(tmp,A->cmap.n*sizeof(PetscReal));
1365: for (i=0; i<nz; i++){
1366: for (j=0; j<bs; j++){
1367: k1 = bs*(*bcol) + j; /* column index */
1368: for (k=0; k<bs; k++){
1369: tmp[k1] += PetscAbsScalar(*v); v++;
1370: }
1371: }
1372: bcol++;
1373: }
1374: *norm = 0.0;
1375: for (j=0; j<A->cmap.n; j++) {
1376: if (tmp[j] > *norm) *norm = tmp[j];
1377: }
1378: PetscFree(tmp);
1379: } else if (type == NORM_INFINITY) { /* maximum row sum */
1380: *norm = 0.0;
1381: for (k=0; k<bs; k++) {
1382: for (j=0; j<a->mbs; j++) {
1383: v = a->a + bs2*a->i[j] + k;
1384: sum = 0.0;
1385: for (i=0; i<a->i[j+1]-a->i[j]; i++) {
1386: for (k1=0; k1<bs; k1++){
1387: sum += PetscAbsScalar(*v);
1388: v += bs;
1389: }
1390: }
1391: if (sum > *norm) *norm = sum;
1392: }
1393: }
1394: } else {
1395: SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1396: }
1397: return(0);
1398: }
1403: PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
1404: {
1405: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
1409: /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1410: if ((A->rmap.N != B->rmap.N) || (A->cmap.n != B->cmap.n) || (A->rmap.bs != B->rmap.bs)|| (a->nz != b->nz)) {
1411: *flg = PETSC_FALSE;
1412: return(0);
1413: }
1414:
1415: /* if the a->i are the same */
1416: PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1417: if (!*flg) {
1418: return(0);
1419: }
1420:
1421: /* if a->j are the same */
1422: PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1423: if (!*flg) {
1424: return(0);
1425: }
1426: /* if a->a are the same */
1427: PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap.bs)*(B->rmap.bs)*sizeof(PetscScalar),flg);
1428: return(0);
1429:
1430: }
1434: PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1435: {
1436: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1438: PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1439: PetscScalar *x,zero = 0.0;
1440: MatScalar *aa,*aa_j;
1443: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1444: bs = A->rmap.bs;
1445: aa = a->a;
1446: ai = a->i;
1447: aj = a->j;
1448: ambs = a->mbs;
1449: bs2 = a->bs2;
1451: VecSet(v,zero);
1452: VecGetArray(v,&x);
1453: VecGetLocalSize(v,&n);
1454: if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1455: for (i=0; i<ambs; i++) {
1456: for (j=ai[i]; j<ai[i+1]; j++) {
1457: if (aj[j] == i) {
1458: row = i*bs;
1459: aa_j = aa+j*bs2;
1460: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1461: break;
1462: }
1463: }
1464: }
1465: VecRestoreArray(v,&x);
1466: return(0);
1467: }
1471: PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1472: {
1473: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1474: PetscScalar *l,*r,x,*li,*ri;
1475: MatScalar *aa,*v;
1477: PetscInt i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1480: ai = a->i;
1481: aj = a->j;
1482: aa = a->a;
1483: m = A->rmap.n;
1484: n = A->cmap.n;
1485: bs = A->rmap.bs;
1486: mbs = a->mbs;
1487: bs2 = a->bs2;
1488: if (ll) {
1489: VecGetArray(ll,&l);
1490: VecGetLocalSize(ll,&lm);
1491: if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1492: for (i=0; i<mbs; i++) { /* for each block row */
1493: M = ai[i+1] - ai[i];
1494: li = l + i*bs;
1495: v = aa + bs2*ai[i];
1496: for (j=0; j<M; j++) { /* for each block */
1497: for (k=0; k<bs2; k++) {
1498: (*v++) *= li[k%bs];
1499: }
1500: }
1501: }
1502: VecRestoreArray(ll,&l);
1503: PetscLogFlops(a->nz);
1504: }
1505:
1506: if (rr) {
1507: VecGetArray(rr,&r);
1508: VecGetLocalSize(rr,&rn);
1509: if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1510: for (i=0; i<mbs; i++) { /* for each block row */
1511: M = ai[i+1] - ai[i];
1512: v = aa + bs2*ai[i];
1513: for (j=0; j<M; j++) { /* for each block */
1514: ri = r + bs*aj[ai[i]+j];
1515: for (k=0; k<bs; k++) {
1516: x = ri[k];
1517: for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1518: }
1519: }
1520: }
1521: VecRestoreArray(rr,&r);
1522: PetscLogFlops(a->nz);
1523: }
1524: return(0);
1525: }
1530: PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1531: {
1532: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1535: info->rows_global = (double)A->rmap.N;
1536: info->columns_global = (double)A->cmap.n;
1537: info->rows_local = (double)A->rmap.n;
1538: info->columns_local = (double)A->cmap.n;
1539: info->block_size = a->bs2;
1540: info->nz_allocated = a->maxnz;
1541: info->nz_used = a->bs2*a->nz;
1542: info->nz_unneeded = (double)(info->nz_allocated - info->nz_used);
1543: info->assemblies = A->num_ass;
1544: info->mallocs = a->reallocs;
1545: info->memory = ((PetscObject)A)->mem;
1546: if (A->factor) {
1547: info->fill_ratio_given = A->info.fill_ratio_given;
1548: info->fill_ratio_needed = A->info.fill_ratio_needed;
1549: info->factor_mallocs = A->info.factor_mallocs;
1550: } else {
1551: info->fill_ratio_given = 0;
1552: info->fill_ratio_needed = 0;
1553: info->factor_mallocs = 0;
1554: }
1555: return(0);
1556: }
1561: PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A)
1562: {
1563: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1567: PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1568: return(0);
1569: }