Actual source code: sbaij2.c
2: #include src/mat/impls/baij/seq/baij.h
3: #include src/inline/spops.h
4: #include src/inline/ilu.h
5: #include petscbt.h
6: #include src/mat/impls/sbaij/seq/sbaij.h
10: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
11: {
12: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
14: PetscInt brow,i,j,k,l,mbs,n,*idx,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
15: PetscBT table,table0;
18: mbs = a->mbs;
19: ai = a->i;
20: aj = a->j;
21: bs = A->bs;
23: if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
25: PetscBTCreate(mbs,table);
26: PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);
27: PetscMalloc((A->m+1)*sizeof(PetscInt),&nidx2);
28: PetscBTCreate(mbs,table0);
30: for (i=0; i<is_max; i++) { /* for each is */
31: isz = 0;
32: PetscBTMemzero(mbs,table);
33:
34: /* Extract the indices, assume there can be duplicate entries */
35: ISGetIndices(is[i],&idx);
36: ISGetLocalSize(is[i],&n);
38: /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
39: bcol_max = 0;
40: for (j=0; j<n ; ++j){
41: brow = idx[j]/bs; /* convert the indices into block indices */
42: if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43: if(!PetscBTLookupSet(table,brow)) {
44: nidx[isz++] = brow;
45: if (bcol_max < brow) bcol_max = brow;
46: }
47: }
48: ISRestoreIndices(is[i],&idx);
49: ISDestroy(is[i]);
50:
51: k = 0;
52: for (j=0; j<ov; j++){ /* for each overlap */
53: /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
54: PetscBTMemzero(mbs,table0);
55: for (l=k; l<isz; l++) PetscBTSet(table0,nidx[l]);
57: n = isz; /* length of the updated is[i] */
58: for (brow=0; brow<mbs; brow++){
59: start = ai[brow]; end = ai[brow+1];
60: if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
61: for (l = start; l<end ; l++){
62: bcol = aj[l];
63: if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
64: }
65: k++;
66: if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
67: } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
68: for (l = start; l<end ; l++){
69: bcol = aj[l];
70: if (bcol > bcol_max) break;
71: if (PetscBTLookup(table0,bcol)){
72: if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
73: break; /* for l = start; l<end ; l++) */
74: }
75: }
76: }
77: }
78: } /* for each overlap */
80: /* expand the Index Set */
81: for (j=0; j<isz; j++) {
82: for (k=0; k<bs; k++)
83: nidx2[j*bs+k] = nidx[j]*bs+k;
84: }
85: ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);
86: }
87: PetscBTDestroy(table);
88: PetscFree(nidx);
89: PetscFree(nidx2);
90: PetscBTDestroy(table0);
91: return(0);
92: }
96: PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
97: {
98: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c;
100: PetscInt *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
101: PetscInt row,mat_i,*mat_j,tcol,*mat_ilen;
102: PetscInt *irow,nrows,*ssmap,bs=A->bs,bs2=a->bs2;
103: PetscInt *aj = a->j,*ai = a->i;
104: MatScalar *mat_a;
105: Mat C;
106: PetscTruth flag;
109:
110: if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
111: ISSorted(iscol,(PetscTruth*)&i);
112: if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
114: ISGetIndices(isrow,&irow);
115: ISGetSize(isrow,&nrows);
116:
117: PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);
118: ssmap = smap;
119: PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);
120: PetscMemzero(smap,oldcols*sizeof(PetscInt));
121: for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
122: /* determine lens of each row */
123: for (i=0; i<nrows; i++) {
124: kstart = ai[irow[i]];
125: kend = kstart + a->ilen[irow[i]];
126: lens[i] = 0;
127: for (k=kstart; k<kend; k++) {
128: if (ssmap[aj[k]]) {
129: lens[i]++;
130: }
131: }
132: }
133: /* Create and fill new matrix */
134: if (scall == MAT_REUSE_MATRIX) {
135: c = (Mat_SeqSBAIJ *)((*B)->data);
137: if (c->mbs!=nrows || (*B)->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
138: PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
139: if (flag == PETSC_FALSE) {
140: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
141: }
142: PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
143: C = *B;
144: } else {
145: MatCreate(A->comm,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);
146: MatSetType(C,A->type_name);
147: MatSeqSBAIJSetPreallocation(C,bs,0,lens);
148: }
149: c = (Mat_SeqSBAIJ *)(C->data);
150: for (i=0; i<nrows; i++) {
151: row = irow[i];
152: kstart = ai[row];
153: kend = kstart + a->ilen[row];
154: mat_i = c->i[i];
155: mat_j = c->j + mat_i;
156: mat_a = c->a + mat_i*bs2;
157: mat_ilen = c->ilen + i;
158: for (k=kstart; k<kend; k++) {
159: if ((tcol=ssmap[a->j[k]])) {
160: *mat_j++ = tcol - 1;
161: PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
162: mat_a += bs2;
163: (*mat_ilen)++;
164: }
165: }
166: }
167:
168: /* Free work space */
169: PetscFree(smap);
170: PetscFree(lens);
171: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
172: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
173:
174: ISRestoreIndices(isrow,&irow);
175: *B = C;
176: return(0);
177: }
181: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
182: {
183: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
184: IS is1;
186: PetscInt *vary,*iary,*irow,nrows,i,bs=A->bs,count;
189: if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
190:
191: ISGetIndices(isrow,&irow);
192: ISGetSize(isrow,&nrows);
193:
194: /* Verify if the indices corespond to each element in a block
195: and form the IS with compressed IS */
196: PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);
197: iary = vary + a->mbs;
198: PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));
199: for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
200:
201: count = 0;
202: for (i=0; i<a->mbs; i++) {
203: if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
204: if (vary[i]==bs) iary[count++] = i;
205: }
206: ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
207:
208: ISRestoreIndices(isrow,&irow);
209: PetscFree(vary);
211: MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
212: ISDestroy(is1);
213: return(0);
214: }
218: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
219: {
221: PetscInt i;
224: if (scall == MAT_INITIAL_MATRIX) {
225: PetscMalloc((n+1)*sizeof(Mat),B);
226: }
228: for (i=0; i<n; i++) {
229: MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
230: }
231: return(0);
232: }
234: /* -------------------------------------------------------*/
235: /* Should check that shapes of vectors and matrices match */
236: /* -------------------------------------------------------*/
237: #include petscblaslapack.h
241: PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
242: {
243: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
244: PetscScalar *x,*z,*xb,x1,zero=0.0;
245: MatScalar *v;
247: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
250: VecSet(&zero,zz);
251: VecGetArray(xx,&x);
252: VecGetArray(zz,&z);
254: v = a->a;
255: xb = x;
256:
257: for (i=0; i<mbs; i++) {
258: n = ai[1] - ai[0]; /* length of i_th row of A */
259: x1 = xb[0];
260: ib = aj + *ai;
261: jmin = 0;
262: if (*ib == i) { /* (diag of A)*x */
263: z[i] += *v++ * x[*ib++];
264: jmin++;
265: }
266: for (j=jmin; j<n; j++) {
267: cval = *ib;
268: z[cval] += *v * x1; /* (strict lower triangular part of A)*x */
269: z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */
270: }
271: xb++; ai++;
272: }
274: VecRestoreArray(xx,&x);
275: VecRestoreArray(zz,&z);
276: PetscLogFlops(2*(a->nz*2 - A->m) - A->m); /* nz = (nz+m)/2 */
277: return(0);
278: }
282: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
283: {
284: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
285: PetscScalar *x,*z,*xb,x1,x2,zero=0.0;
286: MatScalar *v;
288: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
292: VecSet(&zero,zz);
293: VecGetArray(xx,&x);
294: VecGetArray(zz,&z);
295:
296: v = a->a;
297: xb = x;
299: for (i=0; i<mbs; i++) {
300: n = ai[1] - ai[0]; /* length of i_th block row of A */
301: x1 = xb[0]; x2 = xb[1];
302: ib = aj + *ai;
303: jmin = 0;
304: if (*ib == i){ /* (diag of A)*x */
305: z[2*i] += v[0]*x1 + v[2]*x2;
306: z[2*i+1] += v[2]*x1 + v[3]*x2;
307: v += 4; jmin++;
308: }
309: for (j=jmin; j<n; j++) {
310: /* (strict lower triangular part of A)*x */
311: cval = ib[j]*2;
312: z[cval] += v[0]*x1 + v[1]*x2;
313: z[cval+1] += v[2]*x1 + v[3]*x2;
314: /* (strict upper triangular part of A)*x */
315: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
316: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
317: v += 4;
318: }
319: xb +=2; ai++;
320: }
322: VecRestoreArray(xx,&x);
323: VecRestoreArray(zz,&z);
324: PetscLogFlops(8*(a->nz*2 - A->m) - A->m);
325: return(0);
326: }
330: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
331: {
332: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
333: PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0;
334: MatScalar *v;
336: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
340: VecSet(&zero,zz);
341: VecGetArray(xx,&x);
342: VecGetArray(zz,&z);
343:
344: v = a->a;
345: xb = x;
347: for (i=0; i<mbs; i++) {
348: n = ai[1] - ai[0]; /* length of i_th block row of A */
349: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
350: ib = aj + *ai;
351: jmin = 0;
352: if (*ib == i){ /* (diag of A)*x */
353: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
354: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
355: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
356: v += 9; jmin++;
357: }
358: for (j=jmin; j<n; j++) {
359: /* (strict lower triangular part of A)*x */
360: cval = ib[j]*3;
361: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
362: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
363: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
364: /* (strict upper triangular part of A)*x */
365: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
366: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
367: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
368: v += 9;
369: }
370: xb +=3; ai++;
371: }
373: VecRestoreArray(xx,&x);
374: VecRestoreArray(zz,&z);
375: PetscLogFlops(18*(a->nz*2 - A->m) - A->m);
376: return(0);
377: }
381: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
382: {
383: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
384: PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
385: MatScalar *v;
387: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
390: VecSet(&zero,zz);
391: VecGetArray(xx,&x);
392: VecGetArray(zz,&z);
393:
394: v = a->a;
395: xb = x;
397: for (i=0; i<mbs; i++) {
398: n = ai[1] - ai[0]; /* length of i_th block row of A */
399: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
400: ib = aj + *ai;
401: jmin = 0;
402: if (*ib == i){ /* (diag of A)*x */
403: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
404: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
405: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
406: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
407: v += 16; jmin++;
408: }
409: for (j=jmin; j<n; j++) {
410: /* (strict lower triangular part of A)*x */
411: cval = ib[j]*4;
412: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
413: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
414: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
415: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
416: /* (strict upper triangular part of A)*x */
417: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
418: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
419: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
420: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
421: v += 16;
422: }
423: xb +=4; ai++;
424: }
426: VecRestoreArray(xx,&x);
427: VecRestoreArray(zz,&z);
428: PetscLogFlops(32*(a->nz*2 - A->m) - A->m);
429: return(0);
430: }
434: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
435: {
436: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
437: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
438: MatScalar *v;
440: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
443: VecSet(&zero,zz);
444: VecGetArray(xx,&x);
445: VecGetArray(zz,&z);
446:
447: v = a->a;
448: xb = x;
450: for (i=0; i<mbs; i++) {
451: n = ai[1] - ai[0]; /* length of i_th block row of A */
452: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
453: ib = aj + *ai;
454: jmin = 0;
455: if (*ib == i){ /* (diag of A)*x */
456: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
457: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
458: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
459: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
460: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
461: v += 25; jmin++;
462: }
463: for (j=jmin; j<n; j++) {
464: /* (strict lower triangular part of A)*x */
465: cval = ib[j]*5;
466: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
467: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
468: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
469: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
470: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
471: /* (strict upper triangular part of A)*x */
472: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
473: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
474: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
475: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
476: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
477: v += 25;
478: }
479: xb +=5; ai++;
480: }
482: VecRestoreArray(xx,&x);
483: VecRestoreArray(zz,&z);
484: PetscLogFlops(50*(a->nz*2 - A->m) - A->m);
485: return(0);
486: }
491: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
492: {
493: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
494: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
495: MatScalar *v;
497: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
500: VecSet(&zero,zz);
501: VecGetArray(xx,&x);
502: VecGetArray(zz,&z);
503:
504: v = a->a;
505: xb = x;
507: for (i=0; i<mbs; i++) {
508: n = ai[1] - ai[0]; /* length of i_th block row of A */
509: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
510: ib = aj + *ai;
511: jmin = 0;
512: if (*ib == i){ /* (diag of A)*x */
513: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
514: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
515: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
516: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
517: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
518: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
519: v += 36; jmin++;
520: }
521: for (j=jmin; j<n; j++) {
522: /* (strict lower triangular part of A)*x */
523: cval = ib[j]*6;
524: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
525: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
526: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
527: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
528: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
529: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
530: /* (strict upper triangular part of A)*x */
531: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
532: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
533: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
534: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
535: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
536: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
537: v += 36;
538: }
539: xb +=6; ai++;
540: }
542: VecRestoreArray(xx,&x);
543: VecRestoreArray(zz,&z);
544: PetscLogFlops(72*(a->nz*2 - A->m) - A->m);
545: return(0);
546: }
549: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
550: {
551: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
552: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
553: MatScalar *v;
555: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
558: VecSet(&zero,zz);
559: VecGetArray(xx,&x);
560: VecGetArray(zz,&z);
561:
562: v = a->a;
563: xb = x;
565: for (i=0; i<mbs; i++) {
566: n = ai[1] - ai[0]; /* length of i_th block row of A */
567: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
568: ib = aj + *ai;
569: jmin = 0;
570: if (*ib == i){ /* (diag of A)*x */
571: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
572: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
573: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
574: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
575: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
576: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
577: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
578: v += 49; jmin++;
579: }
580: for (j=jmin; j<n; j++) {
581: /* (strict lower triangular part of A)*x */
582: cval = ib[j]*7;
583: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
584: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
585: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
586: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
587: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
588: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
589: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
590: /* (strict upper triangular part of A)*x */
591: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
592: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
593: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
594: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
595: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
596: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
597: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
598: v += 49;
599: }
600: xb +=7; ai++;
601: }
602: VecRestoreArray(xx,&x);
603: VecRestoreArray(zz,&z);
604: PetscLogFlops(98*(a->nz*2 - A->m) - A->m);
605: return(0);
606: }
608: /*
609: This will not work with MatScalar == float because it calls the BLAS
610: */
613: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
614: {
615: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
616: PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
617: MatScalar *v;
619: PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->bs,j,n,bs2=a->bs2,ncols,k;
622: VecSet(&zero,zz);
623: VecGetArray(xx,&x); x_ptr=x;
624: VecGetArray(zz,&z); z_ptr=z;
626: aj = a->j;
627: v = a->a;
628: ii = a->i;
630: if (!a->mult_work) {
631: PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
632: }
633: work = a->mult_work;
634:
635: for (i=0; i<mbs; i++) {
636: n = ii[1] - ii[0]; ncols = n*bs;
637: workt = work; idx=aj+ii[0];
639: /* upper triangular part */
640: for (j=0; j<n; j++) {
641: xb = x_ptr + bs*(*idx++);
642: for (k=0; k<bs; k++) workt[k] = xb[k];
643: workt += bs;
644: }
645: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
646: Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
647:
648: /* strict lower triangular part */
649: idx = aj+ii[0];
650: if (*idx == i){
651: ncols -= bs; v += bs2; idx++; n--;
652: }
653:
654: if (ncols > 0){
655: workt = work;
656: PetscMemzero(workt,ncols*sizeof(PetscScalar));
657: Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
658: for (j=0; j<n; j++) {
659: zb = z_ptr + bs*(*idx++);
660: for (k=0; k<bs; k++) zb[k] += workt[k] ;
661: workt += bs;
662: }
663: }
664: x += bs; v += n*bs2; z += bs; ii++;
665: }
666:
667: VecRestoreArray(xx,&x);
668: VecRestoreArray(zz,&z);
669: PetscLogFlops(2*(a->nz*2 - A->m)*bs2 - A->m);
670: return(0);
671: }
675: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
676: {
677: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
678: PetscScalar *x,*y,*z,*xb,x1;
679: MatScalar *v;
681: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
684: VecGetArray(xx,&x);
685: if (yy != xx) {
686: VecGetArray(yy,&y);
687: } else {
688: y = x;
689: }
690: if (zz != yy) {
691: /* VecCopy(yy,zz); */
692: VecGetArray(zz,&z);
693: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
694: } else {
695: z = y;
696: }
698: v = a->a;
699: xb = x;
701: for (i=0; i<mbs; i++) {
702: n = ai[1] - ai[0]; /* length of i_th row of A */
703: x1 = xb[0];
704: ib = aj + *ai;
705: jmin = 0;
706: if (*ib == i) { /* (diag of A)*x */
707: z[i] += *v++ * x[*ib++]; jmin++;
708: }
709: for (j=jmin; j<n; j++) {
710: cval = *ib;
711: z[cval] += *v * x1; /* (strict lower triangular part of A)*x */
712: z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */
713: }
714: xb++; ai++;
715: }
717: VecRestoreArray(xx,&x);
718: if (yy != xx) VecRestoreArray(yy,&y);
719: if (zz != yy) VecRestoreArray(zz,&z);
720:
721: PetscLogFlops(2*(a->nz*2 - A->m));
722: return(0);
723: }
727: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
728: {
729: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
730: PetscScalar *x,*y,*z,*xb,x1,x2;
731: MatScalar *v;
733: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
736: VecGetArray(xx,&x);
737: if (yy != xx) {
738: VecGetArray(yy,&y);
739: } else {
740: y = x;
741: }
742: if (zz != yy) {
743: /* VecCopy(yy,zz); */
744: VecGetArray(zz,&z);
745: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
746: } else {
747: z = y;
748: }
750: v = a->a;
751: xb = x;
753: for (i=0; i<mbs; i++) {
754: n = ai[1] - ai[0]; /* length of i_th block row of A */
755: x1 = xb[0]; x2 = xb[1];
756: ib = aj + *ai;
757: jmin = 0;
758: if (*ib == i){ /* (diag of A)*x */
759: z[2*i] += v[0]*x1 + v[2]*x2;
760: z[2*i+1] += v[2]*x1 + v[3]*x2;
761: v += 4; jmin++;
762: }
763: for (j=jmin; j<n; j++) {
764: /* (strict lower triangular part of A)*x */
765: cval = ib[j]*2;
766: z[cval] += v[0]*x1 + v[1]*x2;
767: z[cval+1] += v[2]*x1 + v[3]*x2;
768: /* (strict upper triangular part of A)*x */
769: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
770: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
771: v += 4;
772: }
773: xb +=2; ai++;
774: }
776: VecRestoreArray(xx,&x);
777: if (yy != xx) VecRestoreArray(yy,&y);
778: if (zz != yy) VecRestoreArray(zz,&z);
780: PetscLogFlops(4*(a->nz*2 - A->m));
781: return(0);
782: }
786: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
787: {
788: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
789: PetscScalar *x,*y,*z,*xb,x1,x2,x3;
790: MatScalar *v;
792: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
795: VecGetArray(xx,&x);
796: if (yy != xx) {
797: VecGetArray(yy,&y);
798: } else {
799: y = x;
800: }
801: if (zz != yy) {
802: /* VecCopy(yy,zz); */
803: VecGetArray(zz,&z);
804: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
805: } else {
806: z = y;
807: }
809: v = a->a;
810: xb = x;
812: for (i=0; i<mbs; i++) {
813: n = ai[1] - ai[0]; /* length of i_th block row of A */
814: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
815: ib = aj + *ai;
816: jmin = 0;
817: if (*ib == i){ /* (diag of A)*x */
818: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
819: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
820: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
821: v += 9; jmin++;
822: }
823: for (j=jmin; j<n; j++) {
824: /* (strict lower triangular part of A)*x */
825: cval = ib[j]*3;
826: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
827: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
828: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
829: /* (strict upper triangular part of A)*x */
830: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
831: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
832: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
833: v += 9;
834: }
835: xb +=3; ai++;
836: }
838: VecRestoreArray(xx,&x);
839: if (yy != xx) VecRestoreArray(yy,&y);
840: if (zz != yy) VecRestoreArray(zz,&z);
842: PetscLogFlops(18*(a->nz*2 - A->m));
843: return(0);
844: }
848: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
849: {
850: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
851: PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4;
852: MatScalar *v;
854: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
857: VecGetArray(xx,&x);
858: if (yy != xx) {
859: VecGetArray(yy,&y);
860: } else {
861: y = x;
862: }
863: if (zz != yy) {
864: /* VecCopy(yy,zz); */
865: VecGetArray(zz,&z);
866: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
867: } else {
868: z = y;
869: }
871: v = a->a;
872: xb = x;
874: for (i=0; i<mbs; i++) {
875: n = ai[1] - ai[0]; /* length of i_th block row of A */
876: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
877: ib = aj + *ai;
878: jmin = 0;
879: if (*ib == i){ /* (diag of A)*x */
880: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
881: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
882: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
883: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
884: v += 16; jmin++;
885: }
886: for (j=jmin; j<n; j++) {
887: /* (strict lower triangular part of A)*x */
888: cval = ib[j]*4;
889: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
890: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
891: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
892: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
893: /* (strict upper triangular part of A)*x */
894: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
895: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
896: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
897: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
898: v += 16;
899: }
900: xb +=4; ai++;
901: }
903: VecRestoreArray(xx,&x);
904: if (yy != xx) VecRestoreArray(yy,&y);
905: if (zz != yy) VecRestoreArray(zz,&z);
907: PetscLogFlops(32*(a->nz*2 - A->m));
908: return(0);
909: }
913: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
914: {
915: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
916: PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5;
917: MatScalar *v;
919: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
922: VecGetArray(xx,&x);
923: if (yy != xx) {
924: VecGetArray(yy,&y);
925: } else {
926: y = x;
927: }
928: if (zz != yy) {
929: /* VecCopy(yy,zz); */
930: VecGetArray(zz,&z);
931: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
932: } else {
933: z = y;
934: }
936: v = a->a;
937: xb = x;
939: for (i=0; i<mbs; i++) {
940: n = ai[1] - ai[0]; /* length of i_th block row of A */
941: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
942: ib = aj + *ai;
943: jmin = 0;
944: if (*ib == i){ /* (diag of A)*x */
945: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
946: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
947: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
948: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
949: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
950: v += 25; jmin++;
951: }
952: for (j=jmin; j<n; j++) {
953: /* (strict lower triangular part of A)*x */
954: cval = ib[j]*5;
955: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
956: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
957: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
958: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
959: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
960: /* (strict upper triangular part of A)*x */
961: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
962: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
963: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
964: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
965: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
966: v += 25;
967: }
968: xb +=5; ai++;
969: }
971: VecRestoreArray(xx,&x);
972: if (yy != xx) VecRestoreArray(yy,&y);
973: if (zz != yy) VecRestoreArray(zz,&z);
975: PetscLogFlops(50*(a->nz*2 - A->m));
976: return(0);
977: }
980: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
981: {
982: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
983: PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
984: MatScalar *v;
986: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
989: VecGetArray(xx,&x);
990: if (yy != xx) {
991: VecGetArray(yy,&y);
992: } else {
993: y = x;
994: }
995: if (zz != yy) {
996: /* VecCopy(yy,zz); */
997: VecGetArray(zz,&z);
998: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
999: } else {
1000: z = y;
1001: }
1003: v = a->a;
1004: xb = x;
1006: for (i=0; i<mbs; i++) {
1007: n = ai[1] - ai[0]; /* length of i_th block row of A */
1008: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
1009: ib = aj + *ai;
1010: jmin = 0;
1011: if (*ib == i){ /* (diag of A)*x */
1012: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
1013: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
1014: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
1015: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
1016: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
1017: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
1018: v += 36; jmin++;
1019: }
1020: for (j=jmin; j<n; j++) {
1021: /* (strict lower triangular part of A)*x */
1022: cval = ib[j]*6;
1023: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
1024: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
1025: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
1026: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
1027: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
1028: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
1029: /* (strict upper triangular part of A)*x */
1030: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
1031: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
1032: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
1033: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
1034: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
1035: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
1036: v += 36;
1037: }
1038: xb +=6; ai++;
1039: }
1041: VecRestoreArray(xx,&x);
1042: if (yy != xx) VecRestoreArray(yy,&y);
1043: if (zz != yy) VecRestoreArray(zz,&z);
1045: PetscLogFlops(72*(a->nz*2 - A->m));
1046: return(0);
1047: }
1051: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1052: {
1053: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1054: PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
1055: MatScalar *v;
1057: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
1060: VecGetArray(xx,&x);
1061: if (yy != xx) {
1062: VecGetArray(yy,&y);
1063: } else {
1064: y = x;
1065: }
1066: if (zz != yy) {
1067: /* VecCopy(yy,zz); */
1068: VecGetArray(zz,&z);
1069: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1070: } else {
1071: z = y;
1072: }
1074: v = a->a;
1075: xb = x;
1077: for (i=0; i<mbs; i++) {
1078: n = ai[1] - ai[0]; /* length of i_th block row of A */
1079: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1080: ib = aj + *ai;
1081: jmin = 0;
1082: if (*ib == i){ /* (diag of A)*x */
1083: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1084: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1085: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1086: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1087: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1088: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1089: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1090: v += 49; jmin++;
1091: }
1092: for (j=jmin; j<n; j++) {
1093: /* (strict lower triangular part of A)*x */
1094: cval = ib[j]*7;
1095: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1096: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1097: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1098: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1099: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1100: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1101: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1102: /* (strict upper triangular part of A)*x */
1103: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1104: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1105: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1106: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1107: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1108: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1109: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1110: v += 49;
1111: }
1112: xb +=7; ai++;
1113: }
1115: VecRestoreArray(xx,&x);
1116: if (yy != xx) VecRestoreArray(yy,&y);
1117: if (zz != yy) VecRestoreArray(zz,&z);
1119: PetscLogFlops(98*(a->nz*2 - A->m));
1120: return(0);
1121: }
1125: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1126: {
1127: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1128: PetscScalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1129: MatScalar *v;
1131: PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->bs,j,n,bs2=a->bs2,ncols,k;
1134: VecGetArray(xx,&x); x_ptr=x;
1135: if (yy != xx) {
1136: VecGetArray(yy,&y);
1137: } else {
1138: y = x;
1139: }
1140: if (zz != yy) {
1141: /* VecCopy(yy,zz); */
1142: VecGetArray(zz,&z); z_ptr=z;
1143: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1144: } else {
1145: z = y;
1146: }
1148: aj = a->j;
1149: v = a->a;
1150: ii = a->i;
1152: if (!a->mult_work) {
1153: PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
1154: }
1155: work = a->mult_work;
1156:
1157:
1158: for (i=0; i<mbs; i++) {
1159: n = ii[1] - ii[0]; ncols = n*bs;
1160: workt = work; idx=aj+ii[0];
1162: /* upper triangular part */
1163: for (j=0; j<n; j++) {
1164: xb = x_ptr + bs*(*idx++);
1165: for (k=0; k<bs; k++) workt[k] = xb[k];
1166: workt += bs;
1167: }
1168: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1169: Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1171: /* strict lower triangular part */
1172: idx = aj+ii[0];
1173: if (*idx == i){
1174: ncols -= bs; v += bs2; idx++; n--;
1175: }
1176: if (ncols > 0){
1177: workt = work;
1178: PetscMemzero(workt,ncols*sizeof(PetscScalar));
1179: Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1180: for (j=0; j<n; j++) {
1181: zb = z_ptr + bs*(*idx++);
1182: /* idx++; */
1183: for (k=0; k<bs; k++) zb[k] += workt[k] ;
1184: workt += bs;
1185: }
1186: }
1188: x += bs; v += n*bs2; z += bs; ii++;
1189: }
1191: VecRestoreArray(xx,&x);
1192: if (yy != xx) VecRestoreArray(yy,&y);
1193: if (zz != yy) VecRestoreArray(zz,&z);
1195: PetscLogFlops(2*(a->nz*2 - A->m));
1196: return(0);
1197: }
1201: PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar *alpha,Mat inA)
1202: {
1203: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1204: PetscBLASInt one = 1,totalnz = (PetscBLASInt)a->bs2*a->nz;
1207: BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one);
1208: PetscLogFlops(totalnz);
1209: return(0);
1210: }
1214: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1215: {
1216: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1217: MatScalar *v = a->a;
1218: PetscReal sum_diag = 0.0, sum_off = 0.0, *sum;
1219: PetscInt i,j,k,bs = A->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1221: PetscInt *jl,*il,jmin,jmax,nexti,ik,*col;
1222:
1224: if (type == NORM_FROBENIUS) {
1225: for (k=0; k<mbs; k++){
1226: jmin = a->i[k]; jmax = a->i[k+1];
1227: col = aj + jmin;
1228: if (*col == k){ /* diagonal block */
1229: for (i=0; i<bs2; i++){
1230: #if defined(PETSC_USE_COMPLEX)
1231: sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1232: #else
1233: sum_diag += (*v)*(*v); v++;
1234: #endif
1235: }
1236: jmin++;
1237: }
1238: for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */
1239: for (i=0; i<bs2; i++){
1240: #if defined(PETSC_USE_COMPLEX)
1241: sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1242: #else
1243: sum_off += (*v)*(*v); v++;
1244: #endif
1245: }
1246: }
1247: }
1248: *norm = sqrt(sum_diag + 2*sum_off);
1250: } else if (type == NORM_INFINITY) { /* maximum row sum */
1251: PetscMalloc(mbs*sizeof(PetscInt),&il);
1252: PetscMalloc(mbs*sizeof(PetscInt),&jl);
1253: PetscMalloc(bs*sizeof(PetscReal),&sum);
1254: for (i=0; i<mbs; i++) {
1255: jl[i] = mbs; il[0] = 0;
1256: }
1258: *norm = 0.0;
1259: for (k=0; k<mbs; k++) { /* k_th block row */
1260: for (j=0; j<bs; j++) sum[j]=0.0;
1262: /*-- col sum --*/
1263: i = jl[k]; /* first |A(i,k)| to be added */
1264: /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1265: at step k */
1266: while (i<mbs){
1267: nexti = jl[i]; /* next block row to be added */
1268: ik = il[i]; /* block index of A(i,k) in the array a */
1269: for (j=0; j<bs; j++){
1270: v = a->a + ik*bs2 + j*bs;
1271: for (k1=0; k1<bs; k1++) {
1272: sum[j] += PetscAbsScalar(*v); v++;
1273: }
1274: }
1275: /* update il, jl */
1276: jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1277: jmax = a->i[i+1];
1278: if (jmin < jmax){
1279: il[i] = jmin;
1280: j = a->j[jmin];
1281: jl[i] = jl[j]; jl[j]=i;
1282: }
1283: i = nexti;
1284: }
1285:
1286: /*-- row sum --*/
1287: jmin = a->i[k]; jmax = a->i[k+1];
1288: for (i=jmin; i<jmax; i++) {
1289: for (j=0; j<bs; j++){
1290: v = a->a + i*bs2 + j;
1291: for (k1=0; k1<bs; k1++){
1292: sum[j] += PetscAbsScalar(*v);
1293: v += bs;
1294: }
1295: }
1296: }
1297: /* add k_th block row to il, jl */
1298: col = aj+jmin;
1299: if (*col == k) jmin++;
1300: if (jmin < jmax){
1301: il[k] = jmin;
1302: j = a->j[jmin];
1303: jl[k] = jl[j]; jl[j] = k;
1304: }
1305: for (j=0; j<bs; j++){
1306: if (sum[j] > *norm) *norm = sum[j];
1307: }
1308: }
1309: PetscFree(il);
1310: PetscFree(jl);
1311: PetscFree(sum);
1312: } else {
1313: SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1314: }
1315: return(0);
1316: }
1320: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1321: {
1322: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1327: /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1328: if ((A->m != B->m) || (A->n != B->n) || (A->bs != B->bs)|| (a->nz != b->nz)) {
1329: *flg = PETSC_FALSE;
1330: return(0);
1331: }
1332:
1333: /* if the a->i are the same */
1334: PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1335: if (*flg == PETSC_FALSE) {
1336: return(0);
1337: }
1338:
1339: /* if a->j are the same */
1340: PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1341: if (*flg == PETSC_FALSE) {
1342: return(0);
1343: }
1344: /* if a->a are the same */
1345: PetscMemcmp(a->a,b->a,(a->nz)*(A->bs)*(A->bs)*sizeof(PetscScalar),flg);
1346:
1347: return(0);
1348: }
1352: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1353: {
1354: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1356: PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1357: PetscScalar *x,zero = 0.0;
1358: MatScalar *aa,*aa_j;
1361: bs = A->bs;
1362: if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1363:
1364: aa = a->a;
1365: ai = a->i;
1366: aj = a->j;
1367: ambs = a->mbs;
1368: bs2 = a->bs2;
1370: VecSet(&zero,v);
1371: VecGetArray(v,&x);
1372: VecGetLocalSize(v,&n);
1373: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1374: for (i=0; i<ambs; i++) {
1375: j=ai[i];
1376: if (aj[j] == i) { /* if this is a diagonal element */
1377: row = i*bs;
1378: aa_j = aa + j*bs2;
1379: if (A->factor && bs==1){
1380: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
1381: } else {
1382: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1383: }
1384: }
1385: }
1386:
1387: VecRestoreArray(v,&x);
1388: return(0);
1389: }
1393: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1394: {
1395: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1396: PetscScalar *l,*r,x,*li,*ri;
1397: MatScalar *aa,*v;
1399: PetscInt i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1402: ai = a->i;
1403: aj = a->j;
1404: aa = a->a;
1405: m = A->m;
1406: bs = A->bs;
1407: mbs = a->mbs;
1408: bs2 = a->bs2;
1410: if (ll != rr) {
1411: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1412: }
1413: if (ll) {
1414: VecGetArray(ll,&l);
1415: VecGetLocalSize(ll,&lm);
1416: if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1417: for (i=0; i<mbs; i++) { /* for each block row */
1418: M = ai[i+1] - ai[i];
1419: li = l + i*bs;
1420: v = aa + bs2*ai[i];
1421: for (j=0; j<M; j++) { /* for each block */
1422: for (k=0; k<bs2; k++) {
1423: (*v++) *= li[k%bs];
1424: }
1425: #ifdef CONT
1426: /* will be used to replace the above loop */
1427: ri = l + bs*aj[ai[i]+j];
1428: for (k=0; k<bs; k++) { /* column value */
1429: x = ri[k];
1430: for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1431: }
1432: #endif
1434: }
1435: }
1436: VecRestoreArray(ll,&l);
1437: PetscLogFlops(2*a->nz);
1438: }
1439: /* will be deleted */
1440: if (rr) {
1441: VecGetArray(rr,&r);
1442: VecGetLocalSize(rr,&rn);
1443: if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1444: for (i=0; i<mbs; i++) { /* for each block row */
1445: M = ai[i+1] - ai[i];
1446: v = aa + bs2*ai[i];
1447: for (j=0; j<M; j++) { /* for each block */
1448: ri = r + bs*aj[ai[i]+j];
1449: for (k=0; k<bs; k++) {
1450: x = ri[k];
1451: for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1452: }
1453: }
1454: }
1455: VecRestoreArray(rr,&r);
1456: PetscLogFlops(a->nz);
1457: }
1458: return(0);
1459: }
1463: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1464: {
1465: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1468: info->rows_global = (double)A->m;
1469: info->columns_global = (double)A->m;
1470: info->rows_local = (double)A->m;
1471: info->columns_local = (double)A->m;
1472: info->block_size = a->bs2;
1473: info->nz_allocated = a->maxnz; /*num. of nonzeros in upper triangular part */
1474: info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1475: info->nz_unneeded = (double)(info->nz_allocated - info->nz_used);
1476: info->assemblies = A->num_ass;
1477: info->mallocs = a->reallocs;
1478: info->memory = A->mem;
1479: if (A->factor) {
1480: info->fill_ratio_given = A->info.fill_ratio_given;
1481: info->fill_ratio_needed = A->info.fill_ratio_needed;
1482: info->factor_mallocs = A->info.factor_mallocs;
1483: } else {
1484: info->fill_ratio_given = 0;
1485: info->fill_ratio_needed = 0;
1486: info->factor_mallocs = 0;
1487: }
1488: return(0);
1489: }
1494: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1495: {
1496: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1500: PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1501: return(0);
1502: }
1506: PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1507: {
1508: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1510: PetscInt i,j,n,row,col,bs,*ai,*aj,mbs;
1511: PetscReal atmp;
1512: MatScalar *aa;
1513: PetscScalar zero = 0.0,*x;
1514: PetscInt ncols,brow,bcol,krow,kcol;
1517: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1518: bs = A->bs;
1519: aa = a->a;
1520: ai = a->i;
1521: aj = a->j;
1522: mbs = a->mbs;
1524: VecSet(&zero,v);
1525: VecGetArray(v,&x);
1526: VecGetLocalSize(v,&n);
1527: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1528: for (i=0; i<mbs; i++) {
1529: ncols = ai[1] - ai[0]; ai++;
1530: brow = bs*i;
1531: for (j=0; j<ncols; j++){
1532: bcol = bs*(*aj);
1533: for (kcol=0; kcol<bs; kcol++){
1534: col = bcol + kcol; /* col index */
1535: for (krow=0; krow<bs; krow++){
1536: atmp = PetscAbsScalar(*aa); aa++;
1537: row = brow + krow; /* row index */
1538: if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1539: if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1540: }
1541: }
1542: aj++;
1543: }
1544: }
1545: VecRestoreArray(v,&x);
1546: return(0);
1547: }