Actual source code: sbaij2.c

petsc-3.7.0 2016-04-25
Report Typos and Errors
  2: #include <../src/mat/impls/baij/seq/baij.h>
  3: #include <petsc/private/kernels/blockinvert.h>
  4: #include <petscbt.h>
  5: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  6: #include <petscblaslapack.h>

 10: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
 11: {
 12:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
 14:   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
 15:   const PetscInt *idx;
 16:   PetscBT        table_out,table_in;

 19:   if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
 20:   mbs  = a->mbs;
 21:   ai   = a->i;
 22:   aj   = a->j;
 23:   bs   = A->rmap->bs;
 24:   PetscBTCreate(mbs,&table_out);
 25:   PetscMalloc1(mbs+1,&nidx);
 26:   PetscMalloc1(A->rmap->N+1,&nidx2);
 27:   PetscBTCreate(mbs,&table_in);

 29:   for (i=0; i<is_max; i++) { /* for each is */
 30:     isz  = 0;
 31:     PetscBTMemzero(mbs,table_out);

 33:     /* Extract the indices, assume there can be duplicate entries */
 34:     ISGetIndices(is[i],&idx);
 35:     ISGetLocalSize(is[i],&n);

 37:     /* Enter these into the temp arrays i.e mark table_out[brow], enter brow into new index */
 38:     bcol_max = 0;
 39:     for (j=0; j<n; ++j) {
 40:       brow = idx[j]/bs; /* convert the indices into block indices */
 41:       if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
 42:       if (!PetscBTLookupSet(table_out,brow)) {
 43:         nidx[isz++] = brow;
 44:         if (bcol_max < brow) bcol_max = brow;
 45:       }
 46:     }
 47:     ISRestoreIndices(is[i],&idx);
 48:     ISDestroy(&is[i]);

 50:     k = 0;
 51:     for (j=0; j<ov; j++) { /* for each overlap */
 52:       /* set table_in for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
 53:       PetscBTMemzero(mbs,table_in);
 54:       for (l=k; l<isz; l++) { PetscBTSet(table_in,nidx[l]); }

 56:       n = isz;  /* length of the updated is[i] */
 57:       for (brow=0; brow<mbs; brow++) {
 58:         start = ai[brow]; end   = ai[brow+1];
 59:         if (PetscBTLookup(table_in,brow)) { /* brow is on nidx - row search: collect all bcol in this brow */
 60:           for (l = start; l<end; l++) {
 61:             bcol = aj[l];
 62:             if (!PetscBTLookupSet(table_out,bcol)) {
 63:               nidx[isz++] = bcol;
 64:               if (bcol_max < bcol) bcol_max = bcol;
 65:             }
 66:           }
 67:           k++;
 68:           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
 69:         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
 70:           for (l = start; l<end; l++) {
 71:             bcol = aj[l];
 72:             if (bcol > bcol_max) break;
 73:             if (PetscBTLookup(table_in,bcol)) {
 74:               if (!PetscBTLookupSet(table_out,brow)) nidx[isz++] = brow;
 75:               break; /* for l = start; l<end ; l++) */
 76:             }
 77:           }
 78:         }
 79:       }
 80:     } /* for each overlap */

 82:     /* expand the Index Set */
 83:     for (j=0; j<isz; j++) {
 84:       for (k=0; k<bs; k++) nidx2[j*bs+k] = nidx[j]*bs+k;
 85:     }
 86:     ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,PETSC_COPY_VALUES,is+i);
 87:   }
 88:   PetscBTDestroy(&table_out);
 89:   PetscFree(nidx);
 90:   PetscFree(nidx2);
 91:   PetscBTDestroy(&table_in);
 92:   return(0);
 93: }

 95: /* Bseq is non-symmetric SBAIJ matrix, only used internally by PETSc.
 96:         Zero some ops' to avoid invalid usse */
 99: PetscErrorCode MatSeqSBAIJZeroOps_Private(Mat Bseq)
100: {

104:   MatSetOption(Bseq,MAT_SYMMETRIC,PETSC_FALSE);
105:   Bseq->ops->mult                   = 0;
106:   Bseq->ops->multadd                = 0;
107:   Bseq->ops->multtranspose          = 0;
108:   Bseq->ops->multtransposeadd       = 0;
109:   Bseq->ops->lufactor               = 0;
110:   Bseq->ops->choleskyfactor         = 0;
111:   Bseq->ops->lufactorsymbolic       = 0;
112:   Bseq->ops->choleskyfactorsymbolic = 0;
113:   Bseq->ops->getinertia             = 0;
114:   return(0);
115: }

119: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
120: {

124:   MatGetSubMatrix_SeqBAIJ(A,isrow,iscol,scall,B);

126:   if (isrow != iscol) {
127:     PetscBool isequal;
128:     ISEqual(isrow,iscol,&isequal);
129:     if (!isequal) {
130:       MatSeqSBAIJZeroOps_Private(*B);
131:     }
132:   }
133:   return(0);
134: }

138: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
139: {
141:   PetscInt       i;
142:   PetscBool      flg;

145:   MatGetSubMatrices_SeqBAIJ(A,n,irow,icol,scall,B);
146:   for (i=0; i<n; i++) {
147:     ISEqual(irow[i],icol[i],&flg);
148:     if (!flg) {
149:       MatSeqSBAIJZeroOps_Private(*B[i]);
150:     }
151:   }
152:   return(0);
153: }

155: /* -------------------------------------------------------*/
156: /* Should check that shapes of vectors and matrices match */
157: /* -------------------------------------------------------*/

161: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
162: {
163:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
164:   PetscScalar       *z,x1,x2,zero=0.0;
165:   const PetscScalar *x,*xb;
166:   const MatScalar   *v;
167:   PetscErrorCode    ierr;
168:   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
169:   const PetscInt    *aj=a->j,*ai=a->i,*ib;
170:   PetscInt          nonzerorow=0;

173:   VecSet(zz,zero);
174:   VecGetArrayRead(xx,&x);
175:   VecGetArray(zz,&z);

177:   v  = a->a;
178:   xb = x;

180:   for (i=0; i<mbs; i++) {
181:     n           = ai[1] - ai[0]; /* length of i_th block row of A */
182:     x1          = xb[0]; x2 = xb[1];
183:     ib          = aj + *ai;
184:     jmin        = 0;
185:     nonzerorow += (n>0);
186:     if (*ib == i) {     /* (diag of A)*x */
187:       z[2*i]   += v[0]*x1 + v[2]*x2;
188:       z[2*i+1] += v[2]*x1 + v[3]*x2;
189:       v        += 4; jmin++;
190:     }
191:     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
192:     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
193:     for (j=jmin; j<n; j++) {
194:       /* (strict lower triangular part of A)*x  */
195:       cval       = ib[j]*2;
196:       z[cval]   += v[0]*x1 + v[1]*x2;
197:       z[cval+1] += v[2]*x1 + v[3]*x2;
198:       /* (strict upper triangular part of A)*x  */
199:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
200:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
201:       v        += 4;
202:     }
203:     xb +=2; ai++;
204:   }

206:   VecRestoreArrayRead(xx,&x);
207:   VecRestoreArray(zz,&z);
208:   PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
209:   return(0);
210: }

214: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
215: {
216:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
217:   PetscScalar       *z,x1,x2,x3,zero=0.0;
218:   const PetscScalar *x,*xb;
219:   const MatScalar   *v;
220:   PetscErrorCode    ierr;
221:   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
222:   const PetscInt    *aj = a->j,*ai = a->i,*ib;
223:   PetscInt          nonzerorow=0;

226:   VecSet(zz,zero);
227:   VecGetArrayRead(xx,&x);
228:   VecGetArray(zz,&z);

230:   v  = a->a;
231:   xb = x;

233:   for (i=0; i<mbs; i++) {
234:     n           = ai[1] - ai[0]; /* length of i_th block row of A */
235:     x1          = xb[0]; x2 = xb[1]; x3 = xb[2];
236:     ib          = aj + *ai;
237:     jmin        = 0;
238:     nonzerorow += (n>0);
239:     if (*ib == i) {     /* (diag of A)*x */
240:       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
241:       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
242:       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
243:       v        += 9; jmin++;
244:     }
245:     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
246:     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
247:     for (j=jmin; j<n; j++) {
248:       /* (strict lower triangular part of A)*x  */
249:       cval       = ib[j]*3;
250:       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3;
251:       z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
252:       z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
253:       /* (strict upper triangular part of A)*x  */
254:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
255:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
256:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
257:       v        += 9;
258:     }
259:     xb +=3; ai++;
260:   }

262:   VecRestoreArrayRead(xx,&x);
263:   VecRestoreArray(zz,&z);
264:   PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
265:   return(0);
266: }

270: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
271: {
272:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
273:   PetscScalar       *z,x1,x2,x3,x4,zero=0.0;
274:   const PetscScalar *x,*xb;
275:   const MatScalar   *v;
276:   PetscErrorCode    ierr;
277:   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
278:   const PetscInt    *aj = a->j,*ai = a->i,*ib;
279:   PetscInt          nonzerorow = 0;

282:   VecSet(zz,zero);
283:   VecGetArrayRead(xx,&x);
284:   VecGetArray(zz,&z);

286:   v  = a->a;
287:   xb = x;

289:   for (i=0; i<mbs; i++) {
290:     n           = ai[1] - ai[0]; /* length of i_th block row of A */
291:     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
292:     ib          = aj + *ai;
293:     jmin        = 0;
294:     nonzerorow += (n>0);
295:     if (*ib == i) {     /* (diag of A)*x */
296:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
297:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
298:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
299:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
300:       v        += 16; jmin++;
301:     }
302:     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
303:     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
304:     for (j=jmin; j<n; j++) {
305:       /* (strict lower triangular part of A)*x  */
306:       cval       = ib[j]*4;
307:       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
308:       z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
309:       z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
310:       z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
311:       /* (strict upper triangular part of A)*x  */
312:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
313:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
314:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
315:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
316:       v        += 16;
317:     }
318:     xb +=4; ai++;
319:   }

321:   VecRestoreArrayRead(xx,&x);
322:   VecRestoreArray(zz,&z);
323:   PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
324:   return(0);
325: }

329: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
330: {
331:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
332:   PetscScalar       *z,x1,x2,x3,x4,x5,zero=0.0;
333:   const PetscScalar *x,*xb;
334:   const MatScalar   *v;
335:   PetscErrorCode    ierr;
336:   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
337:   const PetscInt    *aj = a->j,*ai = a->i,*ib;
338:   PetscInt          nonzerorow=0;

341:   VecSet(zz,zero);
342:   VecGetArrayRead(xx,&x);
343:   VecGetArray(zz,&z);

345:   v  = a->a;
346:   xb = x;

348:   for (i=0; i<mbs; i++) {
349:     n           = ai[1] - ai[0]; /* length of i_th block row of A */
350:     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
351:     ib          = aj + *ai;
352:     jmin        = 0;
353:     nonzerorow += (n>0);
354:     if (*ib == i) {      /* (diag of A)*x */
355:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
356:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
357:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
358:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
359:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
360:       v        += 25; jmin++;
361:     }
362:     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
363:     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
364:     for (j=jmin; j<n; j++) {
365:       /* (strict lower triangular part of A)*x  */
366:       cval       = ib[j]*5;
367:       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
368:       z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
369:       z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
370:       z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
371:       z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
372:       /* (strict upper triangular part of A)*x  */
373:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
374:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
375:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
376:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
377:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
378:       v        += 25;
379:     }
380:     xb +=5; ai++;
381:   }

383:   VecRestoreArrayRead(xx,&x);
384:   VecRestoreArray(zz,&z);
385:   PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
386:   return(0);
387: }


392: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
393: {
394:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
395:   PetscScalar       *z,x1,x2,x3,x4,x5,x6,zero=0.0;
396:   const PetscScalar *x,*xb;
397:   const MatScalar   *v;
398:   PetscErrorCode    ierr;
399:   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
400:   const PetscInt    *aj=a->j,*ai=a->i,*ib;
401:   PetscInt          nonzerorow=0;

404:   VecSet(zz,zero);
405:   VecGetArrayRead(xx,&x);
406:   VecGetArray(zz,&z);

408:   v  = a->a;
409:   xb = x;

411:   for (i=0; i<mbs; i++) {
412:     n           = ai[1] - ai[0]; /* length of i_th block row of A */
413:     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
414:     ib          = aj + *ai;
415:     jmin        = 0;
416:     nonzerorow += (n>0);
417:     if (*ib == i) {      /* (diag of A)*x */
418:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
419:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
420:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
421:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
422:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
423:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
424:       v        += 36; jmin++;
425:     }
426:     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
427:     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
428:     for (j=jmin; j<n; j++) {
429:       /* (strict lower triangular part of A)*x  */
430:       cval       = ib[j]*6;
431:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
432:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
433:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
434:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
435:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
436:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
437:       /* (strict upper triangular part of A)*x  */
438:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
439:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
440:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
441:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
442:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
443:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
444:       v        += 36;
445:     }
446:     xb +=6; ai++;
447:   }

449:   VecRestoreArrayRead(xx,&x);
450:   VecRestoreArray(zz,&z);
451:   PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
452:   return(0);
453: }
456: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
457: {
458:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
459:   PetscScalar       *z,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
460:   const PetscScalar *x,*xb;
461:   const MatScalar   *v;
462:   PetscErrorCode    ierr;
463:   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
464:   const PetscInt    *aj=a->j,*ai=a->i,*ib;
465:   PetscInt          nonzerorow=0;

468:   VecSet(zz,zero);
469:   VecGetArrayRead(xx,&x);
470:   VecGetArray(zz,&z);

472:   v  = a->a;
473:   xb = x;

475:   for (i=0; i<mbs; i++) {
476:     n           = ai[1] - ai[0]; /* length of i_th block row of A */
477:     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
478:     ib          = aj + *ai;
479:     jmin        = 0;
480:     nonzerorow += (n>0);
481:     if (*ib == i) {      /* (diag of A)*x */
482:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
483:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
484:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
485:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
486:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
487:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
488:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
489:       v        += 49; jmin++;
490:     }
491:     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
492:     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
493:     for (j=jmin; j<n; j++) {
494:       /* (strict lower triangular part of A)*x  */
495:       cval       = ib[j]*7;
496:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
497:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
498:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
499:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
500:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
501:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
502:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
503:       /* (strict upper triangular part of A)*x  */
504:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
505:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
506:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
507:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
508:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
509:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
510:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
511:       v       += 49;
512:     }
513:     xb +=7; ai++;
514:   }
515:   VecRestoreArrayRead(xx,&x);
516:   VecRestoreArray(zz,&z);
517:   PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
518:   return(0);
519: }

521: /*
522:     This will not work with MatScalar == float because it calls the BLAS
523: */
526: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
527: {
528:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
529:   PetscScalar       *z,*z_ptr,*zb,*work,*workt,zero=0.0;
530:   const PetscScalar *x,*x_ptr,*xb;
531:   const MatScalar   *v;
532:   PetscErrorCode    ierr;
533:   PetscInt          mbs =a->mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
534:   const PetscInt    *idx,*aj,*ii;
535:   PetscInt          nonzerorow=0;

538:   VecSet(zz,zero);
539:   VecGetArrayRead(xx,&x);x_ptr = x;
540:   VecGetArray(zz,&z); z_ptr=z;

542:   aj = a->j;
543:   v  = a->a;
544:   ii = a->i;

546:   if (!a->mult_work) {
547:     PetscMalloc1(A->rmap->N+1,&a->mult_work);
548:   }
549:   work = a->mult_work;

551:   for (i=0; i<mbs; i++) {
552:     n           = ii[1] - ii[0]; ncols = n*bs;
553:     workt       = work; idx=aj+ii[0];
554:     nonzerorow += (n>0);

556:     /* upper triangular part */
557:     for (j=0; j<n; j++) {
558:       xb = x_ptr + bs*(*idx++);
559:       for (k=0; k<bs; k++) workt[k] = xb[k];
560:       workt += bs;
561:     }
562:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
563:     PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);

565:     /* strict lower triangular part */
566:     idx = aj+ii[0];
567:     if (*idx == i) {
568:       ncols -= bs; v += bs2; idx++; n--;
569:     }

571:     if (ncols > 0) {
572:       workt = work;
573:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
574:       PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
575:       for (j=0; j<n; j++) {
576:         zb = z_ptr + bs*(*idx++);
577:         for (k=0; k<bs; k++) zb[k] += workt[k];
578:         workt += bs;
579:       }
580:     }
581:     x += bs; v += n*bs2; z += bs; ii++;
582:   }

584:   VecRestoreArrayRead(xx,&x);
585:   VecRestoreArray(zz,&z);
586:   PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);
587:   return(0);
588: }

592: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
593: {
594:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
595:   PetscScalar       *z,x1;
596:   const PetscScalar *x,*xb;
597:   const MatScalar   *v;
598:   PetscErrorCode    ierr;
599:   PetscInt          mbs =a->mbs,i,n,cval,j,jmin;
600:   const PetscInt    *aj=a->j,*ai=a->i,*ib;
601:   PetscInt          nonzerorow=0;

604:   VecCopy(yy,zz);
605:   VecGetArrayRead(xx,&x);
606:   VecGetArray(zz,&z);
607:   v    = a->a;
608:   xb   = x;

610:   for (i=0; i<mbs; i++) {
611:     n           = ai[1] - ai[0]; /* length of i_th row of A */
612:     x1          = xb[0];
613:     ib          = aj + *ai;
614:     jmin        = 0;
615:     nonzerorow += (n>0);
616:     if (*ib == i) {            /* (diag of A)*x */
617:       z[i] += *v++ * x[*ib++]; jmin++;
618:     }
619:     for (j=jmin; j<n; j++) {
620:       cval    = *ib;
621:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
622:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
623:     }
624:     xb++; ai++;
625:   }

627:   VecRestoreArrayRead(xx,&x);
628:   VecRestoreArray(zz,&z);

630:   PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));
631:   return(0);
632: }

636: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
637: {
638:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
639:   PetscScalar       *z,x1,x2;
640:   const PetscScalar *x,*xb;
641:   const MatScalar   *v;
642:   PetscErrorCode    ierr;
643:   PetscInt          mbs =a->mbs,i,n,cval,j,jmin;
644:   const PetscInt    *aj=a->j,*ai=a->i,*ib;
645:   PetscInt          nonzerorow=0;

648:   VecCopy(yy,zz);
649:   VecGetArrayRead(xx,&x);
650:   VecGetArray(zz,&z);

652:   v  = a->a;
653:   xb = x;

655:   for (i=0; i<mbs; i++) {
656:     n           = ai[1] - ai[0]; /* length of i_th block row of A */
657:     x1          = xb[0]; x2 = xb[1];
658:     ib          = aj + *ai;
659:     jmin        = 0;
660:     nonzerorow += (n>0);
661:     if (*ib == i) {      /* (diag of A)*x */
662:       z[2*i]   += v[0]*x1 + v[2]*x2;
663:       z[2*i+1] += v[2]*x1 + v[3]*x2;
664:       v        += 4; jmin++;
665:     }
666:     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
667:     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
668:     for (j=jmin; j<n; j++) {
669:       /* (strict lower triangular part of A)*x  */
670:       cval       = ib[j]*2;
671:       z[cval]   += v[0]*x1 + v[1]*x2;
672:       z[cval+1] += v[2]*x1 + v[3]*x2;
673:       /* (strict upper triangular part of A)*x  */
674:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
675:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
676:       v        += 4;
677:     }
678:     xb +=2; ai++;
679:   }
680:   VecRestoreArrayRead(xx,&x);
681:   VecRestoreArray(zz,&z);

683:   PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));
684:   return(0);
685: }

689: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
690: {
691:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
692:   PetscScalar       *z,x1,x2,x3;
693:   const PetscScalar *x,*xb;
694:   const MatScalar   *v;
695:   PetscErrorCode    ierr;
696:   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
697:   const PetscInt    *aj=a->j,*ai=a->i,*ib;
698:   PetscInt          nonzerorow=0;

701:   VecCopy(yy,zz);
702:   VecGetArrayRead(xx,&x);
703:   VecGetArray(zz,&z);

705:   v  = a->a;
706:   xb = x;

708:   for (i=0; i<mbs; i++) {
709:     n           = ai[1] - ai[0]; /* length of i_th block row of A */
710:     x1          = xb[0]; x2 = xb[1]; x3 = xb[2];
711:     ib          = aj + *ai;
712:     jmin        = 0;
713:     nonzerorow += (n>0);
714:     if (*ib == i) {     /* (diag of A)*x */
715:       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
716:       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
717:       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
718:       v        += 9; jmin++;
719:     }
720:     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
721:     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
722:     for (j=jmin; j<n; j++) {
723:       /* (strict lower triangular part of A)*x  */
724:       cval       = ib[j]*3;
725:       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3;
726:       z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
727:       z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
728:       /* (strict upper triangular part of A)*x  */
729:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
730:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
731:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
732:       v        += 9;
733:     }
734:     xb +=3; ai++;
735:   }

737:   VecRestoreArrayRead(xx,&x);
738:   VecRestoreArray(zz,&z);

740:   PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));
741:   return(0);
742: }

746: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
747: {
748:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
749:   PetscScalar       *z,x1,x2,x3,x4;
750:   const PetscScalar *x,*xb;
751:   const MatScalar   *v;
752:   PetscErrorCode    ierr;
753:   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
754:   const PetscInt    *aj=a->j,*ai=a->i,*ib;
755:   PetscInt          nonzerorow=0;

758:   VecCopy(yy,zz);
759:   VecGetArrayRead(xx,&x);
760:   VecGetArray(zz,&z);

762:   v  = a->a;
763:   xb = x;

765:   for (i=0; i<mbs; i++) {
766:     n           = ai[1] - ai[0]; /* length of i_th block row of A */
767:     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
768:     ib          = aj + *ai;
769:     jmin        = 0;
770:     nonzerorow += (n>0);
771:     if (*ib == i) {      /* (diag of A)*x */
772:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
773:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
774:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
775:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
776:       v        += 16; jmin++;
777:     }
778:     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
779:     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
780:     for (j=jmin; j<n; j++) {
781:       /* (strict lower triangular part of A)*x  */
782:       cval       = ib[j]*4;
783:       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
784:       z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
785:       z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
786:       z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
787:       /* (strict upper triangular part of A)*x  */
788:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
789:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
790:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
791:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
792:       v        += 16;
793:     }
794:     xb +=4; ai++;
795:   }

797:   VecRestoreArrayRead(xx,&x);
798:   VecRestoreArray(zz,&z);

800:   PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));
801:   return(0);
802: }

806: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
807: {
808:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
809:   PetscScalar       *z,x1,x2,x3,x4,x5;
810:   const PetscScalar *x,*xb;
811:   const MatScalar   *v;
812:   PetscErrorCode    ierr;
813:   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
814:   const PetscInt    *aj=a->j,*ai=a->i,*ib;
815:   PetscInt          nonzerorow=0;

818:   VecCopy(yy,zz);
819:   VecGetArrayRead(xx,&x);
820:   VecGetArray(zz,&z);

822:   v  = a->a;
823:   xb = x;

825:   for (i=0; i<mbs; i++) {
826:     n           = ai[1] - ai[0]; /* length of i_th block row of A */
827:     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
828:     ib          = aj + *ai;
829:     jmin        = 0;
830:     nonzerorow += (n>0);
831:     if (*ib == i) {      /* (diag of A)*x */
832:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
833:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
834:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
835:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
836:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
837:       v        += 25; jmin++;
838:     }
839:     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
840:     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
841:     for (j=jmin; j<n; j++) {
842:       /* (strict lower triangular part of A)*x  */
843:       cval       = ib[j]*5;
844:       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
845:       z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
846:       z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
847:       z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
848:       z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
849:       /* (strict upper triangular part of A)*x  */
850:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
851:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
852:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
853:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
854:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
855:       v        += 25;
856:     }
857:     xb +=5; ai++;
858:   }

860:   VecRestoreArrayRead(xx,&x);
861:   VecRestoreArray(zz,&z);

863:   PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));
864:   return(0);
865: }
868: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
869: {
870:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
871:   PetscScalar       *z,x1,x2,x3,x4,x5,x6;
872:   const PetscScalar *x,*xb;
873:   const MatScalar   *v;
874:   PetscErrorCode    ierr;
875:   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
876:   const PetscInt    *aj=a->j,*ai=a->i,*ib;
877:   PetscInt          nonzerorow=0;

880:   VecCopy(yy,zz);
881:   VecGetArrayRead(xx,&x);
882:   VecGetArray(zz,&z);

884:   v  = a->a;
885:   xb = x;

887:   for (i=0; i<mbs; i++) {
888:     n           = ai[1] - ai[0]; /* length of i_th block row of A */
889:     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
890:     ib          = aj + *ai;
891:     jmin        = 0;
892:     nonzerorow += (n>0);
893:     if (*ib == i) {     /* (diag of A)*x */
894:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
895:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
896:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
897:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
898:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
899:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
900:       v        += 36; jmin++;
901:     }
902:     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
903:     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
904:     for (j=jmin; j<n; j++) {
905:       /* (strict lower triangular part of A)*x  */
906:       cval       = ib[j]*6;
907:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
908:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
909:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
910:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
911:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
912:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
913:       /* (strict upper triangular part of A)*x  */
914:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
915:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
916:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
917:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
918:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
919:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
920:       v        += 36;
921:     }
922:     xb +=6; ai++;
923:   }

925:   VecRestoreArrayRead(xx,&x);
926:   VecRestoreArray(zz,&z);

928:   PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));
929:   return(0);
930: }

934: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
935: {
936:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
937:   PetscScalar       *z,x1,x2,x3,x4,x5,x6,x7;
938:   const PetscScalar *x,*xb;
939:   const MatScalar   *v;
940:   PetscErrorCode    ierr;
941:   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
942:   const PetscInt    *aj=a->j,*ai=a->i,*ib;
943:   PetscInt          nonzerorow=0;

946:   VecCopy(yy,zz);
947:   VecGetArrayRead(xx,&x);
948:   VecGetArray(zz,&z);

950:   v  = a->a;
951:   xb = x;

953:   for (i=0; i<mbs; i++) {
954:     n           = ai[1] - ai[0]; /* length of i_th block row of A */
955:     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
956:     ib          = aj + *ai;
957:     jmin        = 0;
958:     nonzerorow += (n>0);
959:     if (*ib == i) {     /* (diag of A)*x */
960:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
961:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
962:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
963:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
964:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
965:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
966:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
967:       v        += 49; jmin++;
968:     }
969:     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
970:     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
971:     for (j=jmin; j<n; j++) {
972:       /* (strict lower triangular part of A)*x  */
973:       cval       = ib[j]*7;
974:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
975:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
976:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
977:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
978:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
979:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
980:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
981:       /* (strict upper triangular part of A)*x  */
982:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
983:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
984:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
985:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
986:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
987:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
988:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
989:       v       += 49;
990:     }
991:     xb +=7; ai++;
992:   }

994:   VecRestoreArrayRead(xx,&x);
995:   VecRestoreArray(zz,&z);

997:   PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));
998:   return(0);
999: }

1003: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1004: {
1005:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
1006:   PetscScalar       *z,*z_ptr=0,*zb,*work,*workt;
1007:   const PetscScalar *x,*x_ptr,*xb;
1008:   const MatScalar   *v;
1009:   PetscErrorCode    ierr;
1010:   PetscInt          mbs = a->mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
1011:   const PetscInt    *idx,*aj,*ii;
1012:   PetscInt          nonzerorow=0;

1015:   VecCopy(yy,zz);
1016:   VecGetArrayRead(xx,&x); x_ptr=x;
1017:   VecGetArray(zz,&z); z_ptr=z;

1019:   aj = a->j;
1020:   v  = a->a;
1021:   ii = a->i;

1023:   if (!a->mult_work) {
1024:     PetscMalloc1(A->rmap->n+1,&a->mult_work);
1025:   }
1026:   work = a->mult_work;


1029:   for (i=0; i<mbs; i++) {
1030:     n           = ii[1] - ii[0]; ncols = n*bs;
1031:     workt       = work; idx=aj+ii[0];
1032:     nonzerorow += (n>0);

1034:     /* upper triangular part */
1035:     for (j=0; j<n; j++) {
1036:       xb = x_ptr + bs*(*idx++);
1037:       for (k=0; k<bs; k++) workt[k] = xb[k];
1038:       workt += bs;
1039:     }
1040:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1041:     PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);

1043:     /* strict lower triangular part */
1044:     idx = aj+ii[0];
1045:     if (*idx == i) {
1046:       ncols -= bs; v += bs2; idx++; n--;
1047:     }
1048:     if (ncols > 0) {
1049:       workt = work;
1050:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
1051:       PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1052:       for (j=0; j<n; j++) {
1053:         zb = z_ptr + bs*(*idx++);
1054:         for (k=0; k<bs; k++) zb[k] += workt[k];
1055:         workt += bs;
1056:       }
1057:     }

1059:     x += bs; v += n*bs2; z += bs; ii++;
1060:   }

1062:   VecRestoreArrayRead(xx,&x);
1063:   VecRestoreArray(zz,&z);

1065:   PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));
1066:   return(0);
1067: }

1071: PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
1072: {
1073:   Mat_SeqSBAIJ   *a     = (Mat_SeqSBAIJ*)inA->data;
1074:   PetscScalar    oalpha = alpha;
1076:   PetscBLASInt   one = 1,totalnz;

1079:   PetscBLASIntCast(a->bs2*a->nz,&totalnz);
1080:   PetscStackCallBLAS("BLASscal",BLASscal_(&totalnz,&oalpha,a->a,&one));
1081:   PetscLogFlops(totalnz);
1082:   return(0);
1083: }

1087: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1088: {
1089:   Mat_SeqSBAIJ    *a       = (Mat_SeqSBAIJ*)A->data;
1090:   const MatScalar *v       = a->a;
1091:   PetscReal       sum_diag = 0.0, sum_off = 0.0, *sum;
1092:   PetscInt        i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,jmin,jmax,nexti,ik,*jl,*il;
1093:   PetscErrorCode  ierr;
1094:   const PetscInt  *aj=a->j,*col;

1097:   if (type == NORM_FROBENIUS) {
1098:     for (k=0; k<mbs; k++) {
1099:       jmin = a->i[k]; jmax = a->i[k+1];
1100:       col  = aj + jmin;
1101:       if (*col == k) {         /* diagonal block */
1102:         for (i=0; i<bs2; i++) {
1103:           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1104:         }
1105:         jmin++;
1106:       }
1107:       for (j=jmin; j<jmax; j++) {  /* off-diagonal blocks */
1108:         for (i=0; i<bs2; i++) {
1109:           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1110:         }
1111:       }
1112:     }
1113:     *norm = PetscSqrtReal(sum_diag + 2*sum_off);
1114:   } else if (type == NORM_INFINITY || type == NORM_1) {  /* maximum row/column sum */
1115:     PetscMalloc3(bs,&sum,mbs,&il,mbs,&jl);
1116:     for (i=0; i<mbs; i++) jl[i] = mbs;
1117:     il[0] = 0;

1119:     *norm = 0.0;
1120:     for (k=0; k<mbs; k++) { /* k_th block row */
1121:       for (j=0; j<bs; j++) sum[j]=0.0;
1122:       /*-- col sum --*/
1123:       i = jl[k]; /* first |A(i,k)| to be added */
1124:       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1125:                   at step k */
1126:       while (i<mbs) {
1127:         nexti = jl[i];  /* next block row to be added */
1128:         ik    = il[i];  /* block index of A(i,k) in the array a */
1129:         for (j=0; j<bs; j++) {
1130:           v = a->a + ik*bs2 + j*bs;
1131:           for (k1=0; k1<bs; k1++) {
1132:             sum[j] += PetscAbsScalar(*v); v++;
1133:           }
1134:         }
1135:         /* update il, jl */
1136:         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1137:         jmax = a->i[i+1];
1138:         if (jmin < jmax) {
1139:           il[i] = jmin;
1140:           j     = a->j[jmin];
1141:           jl[i] = jl[j]; jl[j]=i;
1142:         }
1143:         i = nexti;
1144:       }
1145:       /*-- row sum --*/
1146:       jmin = a->i[k]; jmax = a->i[k+1];
1147:       for (i=jmin; i<jmax; i++) {
1148:         for (j=0; j<bs; j++) {
1149:           v = a->a + i*bs2 + j;
1150:           for (k1=0; k1<bs; k1++) {
1151:             sum[j] += PetscAbsScalar(*v); v += bs;
1152:           }
1153:         }
1154:       }
1155:       /* add k_th block row to il, jl */
1156:       col = aj+jmin;
1157:       if (*col == k) jmin++;
1158:       if (jmin < jmax) {
1159:         il[k] = jmin;
1160:         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
1161:       }
1162:       for (j=0; j<bs; j++) {
1163:         if (sum[j] > *norm) *norm = sum[j];
1164:       }
1165:     }
1166:     PetscFree3(sum,il,jl);
1167:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
1168:   return(0);
1169: }

1173: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg)
1174: {
1175:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)B->data;

1179:   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1180:   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1181:     *flg = PETSC_FALSE;
1182:     return(0);
1183:   }

1185:   /* if the a->i are the same */
1186:   PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1187:   if (!*flg) return(0);

1189:   /* if a->j are the same */
1190:   PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1191:   if (!*flg) return(0);

1193:   /* if a->a are the same */
1194:   PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);
1195:   return(0);
1196: }

1200: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1201: {
1202:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
1203:   PetscErrorCode  ierr;
1204:   PetscInt        i,j,k,row,bs,ambs,bs2;
1205:   const PetscInt  *ai,*aj;
1206:   PetscScalar     *x,zero = 0.0;
1207:   const MatScalar *aa,*aa_j;

1210:   bs = A->rmap->bs;
1211:   if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");

1213:   aa   = a->a;
1214:   ambs = a->mbs;

1216:   if (A->factortype == MAT_FACTOR_CHOLESKY || A->factortype == MAT_FACTOR_ICC) {
1217:     PetscInt *diag=a->diag;
1218:     aa   = a->a;
1219:     ambs = a->mbs;
1220:     VecGetArray(v,&x);
1221:     for (i=0; i<ambs; i++) x[i] = 1.0/aa[diag[i]];
1222:     VecRestoreArray(v,&x);
1223:     return(0);
1224:   }

1226:   ai   = a->i;
1227:   aj   = a->j;
1228:   bs2  = a->bs2;
1229:   VecSet(v,zero);
1230:   VecGetArray(v,&x);
1231:   for (i=0; i<ambs; i++) {
1232:     j=ai[i];
1233:     if (aj[j] == i) {    /* if this is a diagonal element */
1234:       row  = i*bs;
1235:       aa_j = aa + j*bs2;
1236:       for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1237:     }
1238:   }
1239:   VecRestoreArray(v,&x);
1240:   return(0);
1241: }

1245: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1246: {
1247:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
1248:   PetscScalar       x;
1249:   const PetscScalar *l,*li,*ri;
1250:   MatScalar         *aa,*v;
1251:   PetscErrorCode    ierr;
1252:   PetscInt          i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1253:   PetscBool         flg;

1256:   if (ll != rr) {
1257:     VecEqual(ll,rr,&flg);
1258:     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1259:   }
1260:   if (!ll) return(0);
1261:   ai  = a->i;
1262:   aj  = a->j;
1263:   aa  = a->a;
1264:   m   = A->rmap->N;
1265:   bs  = A->rmap->bs;
1266:   mbs = a->mbs;
1267:   bs2 = a->bs2;

1269:   VecGetArrayRead(ll,&l);
1270:   VecGetLocalSize(ll,&lm);
1271:   if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1272:   for (i=0; i<mbs; i++) { /* for each block row */
1273:     M  = ai[i+1] - ai[i];
1274:     li = l + i*bs;
1275:     v  = aa + bs2*ai[i];
1276:     for (j=0; j<M; j++) { /* for each block */
1277:       ri = l + bs*aj[ai[i]+j];
1278:       for (k=0; k<bs; k++) {
1279:         x = ri[k];
1280:         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1281:       }
1282:     }
1283:   }
1284:   VecRestoreArrayRead(ll,&l);
1285:   PetscLogFlops(2.0*a->nz);
1286:   return(0);
1287: }

1291: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1292: {
1293:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1296:   info->block_size   = a->bs2;
1297:   info->nz_allocated = a->bs2*a->maxnz;   /*num. of nonzeros in upper triangular part */
1298:   info->nz_used      = a->bs2*a->nz;   /*num. of nonzeros in upper triangular part */
1299:   info->nz_unneeded  = (double)(info->nz_allocated - info->nz_used);
1300:   info->assemblies   = A->num_ass;
1301:   info->mallocs      = A->info.mallocs;
1302:   info->memory       = ((PetscObject)A)->mem;
1303:   if (A->factortype) {
1304:     info->fill_ratio_given  = A->info.fill_ratio_given;
1305:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1306:     info->factor_mallocs    = A->info.factor_mallocs;
1307:   } else {
1308:     info->fill_ratio_given  = 0;
1309:     info->fill_ratio_needed = 0;
1310:     info->factor_mallocs    = 0;
1311:   }
1312:   return(0);
1313: }


1318: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1319: {
1320:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;

1324:   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1325:   return(0);
1326: }

1330: /*
1331:    This code does not work since it only checks the upper triangular part of
1332:   the matrix. Hence it is not listed in the function table.
1333: */
1334: PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1335: {
1336:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
1337:   PetscErrorCode  ierr;
1338:   PetscInt        i,j,n,row,col,bs,mbs;
1339:   const PetscInt  *ai,*aj;
1340:   PetscReal       atmp;
1341:   const MatScalar *aa;
1342:   PetscScalar     *x;
1343:   PetscInt        ncols,brow,bcol,krow,kcol;

1346:   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1347:   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1348:   bs  = A->rmap->bs;
1349:   aa  = a->a;
1350:   ai  = a->i;
1351:   aj  = a->j;
1352:   mbs = a->mbs;

1354:   VecSet(v,0.0);
1355:   VecGetArray(v,&x);
1356:   VecGetLocalSize(v,&n);
1357:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1358:   for (i=0; i<mbs; i++) {
1359:     ncols = ai[1] - ai[0]; ai++;
1360:     brow  = bs*i;
1361:     for (j=0; j<ncols; j++) {
1362:       bcol = bs*(*aj);
1363:       for (kcol=0; kcol<bs; kcol++) {
1364:         col = bcol + kcol;      /* col index */
1365:         for (krow=0; krow<bs; krow++) {
1366:           atmp = PetscAbsScalar(*aa); aa++;
1367:           row  = brow + krow;   /* row index */
1368:           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1369:           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1370:         }
1371:       }
1372:       aj++;
1373:     }
1374:   }
1375:   VecRestoreArray(v,&x);
1376:   return(0);
1377: }