Actual source code: sbaij2.c

  1: #define PETSCMAT_DLL

 3:  #include src/mat/impls/baij/seq/baij.h
 4:  #include src/inline/spops.h
 5:  #include src/inline/ilu.h
 6:  #include petscbt.h
 7:  #include src/mat/impls/sbaij/seq/sbaij.h

 11: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
 12: {
 13:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
 15:   PetscInt       brow,i,j,k,l,mbs,n,*idx,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
 16:   PetscBT        table,table0;

 19:   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
 20:   mbs = a->mbs;
 21:   ai  = a->i;
 22:   aj  = a->j;
 23:   bs  = A->rmap.bs;
 24:   PetscBTCreate(mbs,table);
 25:   PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);
 26:   PetscMalloc((A->rmap.N+1)*sizeof(PetscInt),&nidx2);
 27:   PetscBTCreate(mbs,table0);

 29:   for (i=0; i<is_max; i++) { /* for each is */
 30:     isz  = 0;
 31:     PetscBTMemzero(mbs,table);
 32: 
 33:     /* Extract the indices, assume there can be duplicate entries */
 34:     ISGetIndices(is[i],&idx);
 35:     ISGetLocalSize(is[i],&n);

 37:     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
 38:     bcol_max = 0;
 39:     for (j=0; j<n ; ++j){
 40:       brow = idx[j]/bs; /* convert the indices into block indices */
 41:       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
 42:       if(!PetscBTLookupSet(table,brow)) {
 43:         nidx[isz++] = brow;
 44:         if (bcol_max < brow) bcol_max = brow;
 45:       }
 46:     }
 47:     ISRestoreIndices(is[i],&idx);
 48:     ISDestroy(is[i]);
 49: 
 50:     k = 0;
 51:     for (j=0; j<ov; j++){ /* for each overlap */
 52:       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
 53:       PetscBTMemzero(mbs,table0);
 54:       for (l=k; l<isz; l++) { PetscBTSet(table0,nidx[l]); }

 56:       n = isz;  /* length of the updated is[i] */
 57:       for (brow=0; brow<mbs; brow++){
 58:         start = ai[brow]; end   = ai[brow+1];
 59:         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
 60:           for (l = start; l<end ; l++){
 61:             bcol = aj[l];
 62:             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
 63:           }
 64:           k++;
 65:           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
 66:         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
 67:           for (l = start; l<end ; l++){
 68:             bcol = aj[l];
 69:             if (bcol > bcol_max) break;
 70:             if (PetscBTLookup(table0,bcol)){
 71:               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
 72:               break; /* for l = start; l<end ; l++) */
 73:             }
 74:           }
 75:         }
 76:       }
 77:     } /* for each overlap */

 79:     /* expand the Index Set */
 80:     for (j=0; j<isz; j++) {
 81:       for (k=0; k<bs; k++)
 82:         nidx2[j*bs+k] = nidx[j]*bs+k;
 83:     }
 84:     ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);
 85:   }
 86:   PetscBTDestroy(table);
 87:   PetscFree(nidx);
 88:   PetscFree(nidx2);
 89:   PetscBTDestroy(table0);
 90:   return(0);
 91: }

 95: PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
 96: {
 97:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
 99:   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
100:   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
101:   PetscInt       *irow,nrows,*ssmap,bs=A->rmap.bs,bs2=a->bs2;
102:   PetscInt       *aj = a->j,*ai = a->i;
103:   MatScalar      *mat_a;
104:   Mat            C;
105:   PetscTruth     flag;

108:   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
109:   ISSorted(iscol,(PetscTruth*)&i);
110:   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");

112:   ISGetIndices(isrow,&irow);
113:   ISGetSize(isrow,&nrows);
114: 
115:   PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);
116:   ssmap = smap;
117:   PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);
118:   PetscMemzero(smap,oldcols*sizeof(PetscInt));
119:   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
120:   /* determine lens of each row */
121:   for (i=0; i<nrows; i++) {
122:     kstart  = ai[irow[i]];
123:     kend    = kstart + a->ilen[irow[i]];
124:     lens[i] = 0;
125:       for (k=kstart; k<kend; k++) {
126:         if (ssmap[aj[k]]) {
127:           lens[i]++;
128:         }
129:       }
130:     }
131:   /* Create and fill new matrix */
132:   if (scall == MAT_REUSE_MATRIX) {
133:     c = (Mat_SeqSBAIJ *)((*B)->data);

135:     if (c->mbs!=nrows || (*B)->rmap.bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
136:     PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
137:     if (!flag) {
138:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
139:     }
140:     PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
141:     C = *B;
142:   } else {
143:     MatCreate(((PetscObject)A)->comm,&C);
144:     MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);
145:     MatSetType(C,((PetscObject)A)->type_name);
146:     MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);
147:   }
148:   c = (Mat_SeqSBAIJ *)(C->data);
149:   for (i=0; i<nrows; i++) {
150:     row    = irow[i];
151:     kstart = ai[row];
152:     kend   = kstart + a->ilen[row];
153:     mat_i  = c->i[i];
154:     mat_j  = c->j + mat_i;
155:     mat_a  = c->a + mat_i*bs2;
156:     mat_ilen = c->ilen + i;
157:     for (k=kstart; k<kend; k++) {
158:       if ((tcol=ssmap[a->j[k]])) {
159:         *mat_j++ = tcol - 1;
160:         PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
161:         mat_a   += bs2;
162:         (*mat_ilen)++;
163:       }
164:     }
165:   }
166: 
167:   /* Free work space */
168:   PetscFree(smap);
169:   PetscFree(lens);
170:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
171:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
172: 
173:   ISRestoreIndices(isrow,&irow);
174:   *B = C;
175:   return(0);
176: }

180: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
181: {
182:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
183:   IS             is1;
185:   PetscInt       *vary,*iary,*irow,nrows,i,bs=A->rmap.bs,count;

188:   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
189: 
190:   ISGetIndices(isrow,&irow);
191:   ISGetSize(isrow,&nrows);
192: 
193:   /* Verify if the indices corespond to each element in a block 
194:    and form the IS with compressed IS */
195:   PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);
196:   iary = vary + a->mbs;
197:   PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));
198:   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
199: 
200:   count = 0;
201:   for (i=0; i<a->mbs; i++) {
202:     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
203:     if (vary[i]==bs) iary[count++] = i;
204:   }
205:   ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
206: 
207:   ISRestoreIndices(isrow,&irow);
208:   PetscFree(vary);

210:   MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
211:   ISDestroy(is1);
212:   return(0);
213: }

217: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
218: {
220:   PetscInt       i;

223:   if (scall == MAT_INITIAL_MATRIX) {
224:     PetscMalloc((n+1)*sizeof(Mat),B);
225:   }

227:   for (i=0; i<n; i++) {
228:     MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
229:   }
230:   return(0);
231: }

233: /* -------------------------------------------------------*/
234: /* Should check that shapes of vectors and matrices match */
235: /* -------------------------------------------------------*/
236:  #include petscblaslapack.h

240: PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
241: {
242:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
243:   PetscScalar    *x,*z,*xb,x1,zero=0.0;
244:   MatScalar      *v;
246:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
247:   PetscInt       nonzerorow=0;

250:   VecSet(zz,zero);
251:   VecGetArray(xx,&x);
252:   VecGetArray(zz,&z);

254:   v  = a->a;
255:   xb = x;
256: 
257:   for (i=0; i<mbs; i++) {
258:     n    = ai[1] - ai[0];  /* length of i_th row of A */
259:     x1   = *xb++;
260:     ib   = aj + *ai++;
261:     jmin = 0;
262:     nonzerorow += (n>0);
263:     /* if we ALWAYS required a diagonal entry then could remove this if test */
264:     /* should we use a tmp to hold the accumulated z[i] */
265:     if (*ib == i) {      /* (diag of A)*x */
266:       z[i] += *v++ * x[*ib++];
267:       jmin++;
268:     }
269:     for (j=jmin; j<n; j++) {
270:       cval    = *ib;
271:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
272:       z[i]    += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
273:     }
274:   }

276:   VecRestoreArray(xx,&x);
277:   VecRestoreArray(zz,&z);
278:   PetscLogFlops(2*(a->nz*2 - nonzerorow) - nonzerorow);  /* nz = (nz+m)/2 */
279:   return(0);
280: }

284: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
285: {
286:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
287:   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
288:   MatScalar      *v;
290:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
291:   PetscInt       nonzerorow=0;

294:   VecSet(zz,zero);
295:   VecGetArray(xx,&x);
296:   VecGetArray(zz,&z);
297: 
298:   v     = a->a;
299:   xb = x;

301:   for (i=0; i<mbs; i++) {
302:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
303:     x1 = xb[0]; x2 = xb[1];
304:     ib = aj + *ai;
305:     jmin = 0;
306:     nonzerorow += (n>0);
307:     if (*ib == i){     /* (diag of A)*x */
308:       z[2*i]   += v[0]*x1 + v[2]*x2;
309:       z[2*i+1] += v[2]*x1 + v[3]*x2;
310:       v += 4; jmin++;
311:     }
312:     for (j=jmin; j<n; j++) {
313:       /* (strict lower triangular part of A)*x  */
314:       cval       = ib[j]*2;
315:       z[cval]     += v[0]*x1 + v[1]*x2;
316:       z[cval+1]   += v[2]*x1 + v[3]*x2;
317:       /* (strict upper triangular part of A)*x  */
318:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
319:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
320:       v  += 4;
321:     }
322:     xb +=2; ai++;
323:   }

325:   VecRestoreArray(xx,&x);
326:   VecRestoreArray(zz,&z);
327:   PetscLogFlops(8*(a->nz*2 - nonzerorow) - nonzerorow);
328:   return(0);
329: }

333: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
334: {
335:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
336:   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
337:   MatScalar      *v;
339:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
340:   PetscInt       nonzerorow=0;

343:   VecSet(zz,zero);
344:   VecGetArray(xx,&x);
345:   VecGetArray(zz,&z);
346: 
347:   v    = a->a;
348:   xb   = x;

350:   for (i=0; i<mbs; i++) {
351:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
352:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
353:     ib = aj + *ai;
354:     jmin = 0;
355:     nonzerorow += (n>0);
356:     if (*ib == i){     /* (diag of A)*x */
357:       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
358:       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
359:       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
360:       v += 9; jmin++;
361:     }
362:     for (j=jmin; j<n; j++) {
363:       /* (strict lower triangular part of A)*x  */
364:       cval       = ib[j]*3;
365:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
366:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
367:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
368:       /* (strict upper triangular part of A)*x  */
369:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
370:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
371:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
372:       v  += 9;
373:     }
374:     xb +=3; ai++;
375:   }

377:   VecRestoreArray(xx,&x);
378:   VecRestoreArray(zz,&z);
379:   PetscLogFlops(18*(a->nz*2 - nonzerorow) - nonzerorow);
380:   return(0);
381: }

385: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
386: {
387:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
388:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
389:   MatScalar      *v;
391:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
392:   PetscInt       nonzerorow=0;

395:   VecSet(zz,zero);
396:   VecGetArray(xx,&x);
397:   VecGetArray(zz,&z);
398: 
399:   v     = a->a;
400:   xb = x;

402:   for (i=0; i<mbs; i++) {
403:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
404:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
405:     ib = aj + *ai;
406:     jmin = 0;
407:     nonzerorow += (n>0);
408:     if (*ib == i){     /* (diag of A)*x */
409:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
410:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
411:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
412:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
413:       v += 16; jmin++;
414:     }
415:     for (j=jmin; j<n; j++) {
416:       /* (strict lower triangular part of A)*x  */
417:       cval       = ib[j]*4;
418:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
419:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
420:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
421:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
422:       /* (strict upper triangular part of A)*x  */
423:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
424:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
425:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
426:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
427:       v  += 16;
428:     }
429:     xb +=4; ai++;
430:   }

432:   VecRestoreArray(xx,&x);
433:   VecRestoreArray(zz,&z);
434:   PetscLogFlops(32*(a->nz*2 - nonzerorow) - nonzerorow);
435:   return(0);
436: }

440: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
441: {
442:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
443:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
444:   MatScalar      *v;
446:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
447:   PetscInt       nonzerorow=0;

450:   VecSet(zz,zero);
451:   VecGetArray(xx,&x);
452:   VecGetArray(zz,&z);
453: 
454:   v     = a->a;
455:   xb = x;

457:   for (i=0; i<mbs; i++) {
458:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
459:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
460:     ib = aj + *ai;
461:     jmin = 0;
462:     nonzerorow += (n>0);
463:     if (*ib == i){      /* (diag of A)*x */
464:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
465:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
466:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
467:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
468:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
469:       v += 25; jmin++;
470:     }
471:     for (j=jmin; j<n; j++) {
472:       /* (strict lower triangular part of A)*x  */
473:       cval       = ib[j]*5;
474:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
475:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
476:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
477:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
478:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
479:       /* (strict upper triangular part of A)*x  */
480:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
481:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
482:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
483:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
484:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
485:       v  += 25;
486:     }
487:     xb +=5; ai++;
488:   }

490:   VecRestoreArray(xx,&x);
491:   VecRestoreArray(zz,&z);
492:   PetscLogFlops(50*(a->nz*2 - nonzerorow) - nonzerorow);
493:   return(0);
494: }


499: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
500: {
501:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
502:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
503:   MatScalar      *v;
505:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
506:   PetscInt       nonzerorow=0;

509:   VecSet(zz,zero);
510:   VecGetArray(xx,&x);
511:   VecGetArray(zz,&z);
512: 
513:   v     = a->a;
514:   xb = x;

516:   for (i=0; i<mbs; i++) {
517:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
518:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
519:     ib = aj + *ai;
520:     jmin = 0;
521:     nonzerorow += (n>0);
522:     if (*ib == i){      /* (diag of A)*x */
523:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
524:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
525:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
526:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
527:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
528:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
529:       v += 36; jmin++;
530:     }
531:     for (j=jmin; j<n; j++) {
532:       /* (strict lower triangular part of A)*x  */
533:       cval       = ib[j]*6;
534:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
535:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
536:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
537:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
538:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
539:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
540:       /* (strict upper triangular part of A)*x  */
541:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
542:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
543:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
544:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
545:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
546:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
547:       v  += 36;
548:     }
549:     xb +=6; ai++;
550:   }

552:   VecRestoreArray(xx,&x);
553:   VecRestoreArray(zz,&z);
554:   PetscLogFlops(72*(a->nz*2 - nonzerorow) - nonzerorow);
555:   return(0);
556: }
559: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
560: {
561:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
562:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
563:   MatScalar      *v;
565:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
566:   PetscInt       nonzerorow=0;

569:   VecSet(zz,zero);
570:   VecGetArray(xx,&x);
571:   VecGetArray(zz,&z);
572: 
573:   v     = a->a;
574:   xb = x;

576:   for (i=0; i<mbs; i++) {
577:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
578:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
579:     ib = aj + *ai;
580:     jmin = 0;
581:     nonzerorow += (n>0);
582:     if (*ib == i){      /* (diag of A)*x */
583:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
584:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
585:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
586:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
587:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
588:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
589:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
590:       v += 49; jmin++;
591:     }
592:     for (j=jmin; j<n; j++) {
593:       /* (strict lower triangular part of A)*x  */
594:       cval       = ib[j]*7;
595:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
596:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
597:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
598:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
599:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
600:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
601:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
602:       /* (strict upper triangular part of A)*x  */
603:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
604:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
605:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
606:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
607:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
608:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
609:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
610:       v  += 49;
611:     }
612:     xb +=7; ai++;
613:   }
614:   VecRestoreArray(xx,&x);
615:   VecRestoreArray(zz,&z);
616:   PetscLogFlops(98*(a->nz*2 - nonzerorow) - nonzerorow);
617:   return(0);
618: }

620: /*
621:     This will not work with MatScalar == float because it calls the BLAS
622: */
625: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
626: {
627:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
628:   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
629:   MatScalar      *v;
631:   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k;
632:   PetscInt       nonzerorow=0;

635:   VecSet(zz,zero);
636:   VecGetArray(xx,&x); x_ptr=x;
637:   VecGetArray(zz,&z); z_ptr=z;

639:   aj   = a->j;
640:   v    = a->a;
641:   ii   = a->i;

643:   if (!a->mult_work) {
644:     PetscMalloc((A->rmap.N+1)*sizeof(PetscScalar),&a->mult_work);
645:   }
646:   work = a->mult_work;
647: 
648:   for (i=0; i<mbs; i++) {
649:     n     = ii[1] - ii[0]; ncols = n*bs;
650:     workt = work; idx=aj+ii[0];
651:     nonzerorow += (n>0);

653:     /* upper triangular part */
654:     for (j=0; j<n; j++) {
655:       xb = x_ptr + bs*(*idx++);
656:       for (k=0; k<bs; k++) workt[k] = xb[k];
657:       workt += bs;
658:     }
659:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
660:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
661: 
662:     /* strict lower triangular part */
663:     idx = aj+ii[0];
664:     if (*idx == i){
665:       ncols -= bs; v += bs2; idx++; n--;
666:     }
667: 
668:     if (ncols > 0){
669:       workt = work;
670:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
671:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
672:       for (j=0; j<n; j++) {
673:         zb = z_ptr + bs*(*idx++);
674:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
675:         workt += bs;
676:       }
677:     }
678:     x += bs; v += n*bs2; z += bs; ii++;
679:   }
680: 
681:   VecRestoreArray(xx,&x);
682:   VecRestoreArray(zz,&z);
683:   PetscLogFlops(2*(a->nz*2 - nonzerorow)*bs2 - nonzerorow);
684:   return(0);
685: }

690: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
691: {
692:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
693:   PetscScalar    *x,*z,*xb,x1;
694:   MatScalar      *v;
696:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
697:   PetscInt       nonzerorow=0;

700:   VecCopy_Seq(yy,zz);
701:   VecGetArray(xx,&x);
702:   VecGetArray(zz,&z);
703:   v  = a->a;
704:   xb = x;

706:   for (i=0; i<mbs; i++) {
707:     n  = ai[1] - ai[0];  /* length of i_th row of A */
708:     x1 = xb[0];
709:     ib = aj + *ai;
710:     jmin = 0;
711:     nonzerorow += (n>0);
712:     if (*ib == i) {            /* (diag of A)*x */
713:       z[i] += *v++ * x[*ib++]; jmin++;
714:     }
715:     for (j=jmin; j<n; j++) {
716:       cval    = *ib;
717:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
718:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
719:     }
720:     xb++; ai++;
721:   }

723:   VecRestoreArray(xx,&x);
724:   VecRestoreArray(zz,&z);
725: 
726:   PetscLogFlops(2*(a->nz*2 - nonzerorow));
727:   return(0);
728: }

732: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
733: {
734:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
735:   PetscScalar    *x,*z,*xb,x1,x2;
736:   MatScalar      *v;
738:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
739:   PetscInt       nonzerorow=0;

742:   VecCopy_Seq(yy,zz);
743:   VecGetArray(xx,&x);
744:   VecGetArray(zz,&z);

746:   v  = a->a;
747:   xb = x;

749:   for (i=0; i<mbs; i++) {
750:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
751:     x1 = xb[0]; x2 = xb[1];
752:     ib = aj + *ai;
753:     jmin = 0;
754:     nonzerorow += (n>0);
755:     if (*ib == i){      /* (diag of A)*x */
756:       z[2*i]   += v[0]*x1 + v[2]*x2;
757:       z[2*i+1] += v[2]*x1 + v[3]*x2;
758:       v += 4; jmin++;
759:     }
760:     for (j=jmin; j<n; j++) {
761:       /* (strict lower triangular part of A)*x  */
762:       cval       = ib[j]*2;
763:       z[cval]     += v[0]*x1 + v[1]*x2;
764:       z[cval+1]   += v[2]*x1 + v[3]*x2;
765:       /* (strict upper triangular part of A)*x  */
766:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
767:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
768:       v  += 4;
769:     }
770:     xb +=2; ai++;
771:   }
772:   VecRestoreArray(xx,&x);
773:   VecRestoreArray(zz,&z);

775:   PetscLogFlops(4*(a->nz*2 - nonzerorow));
776:   return(0);
777: }

781: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
782: {
783:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
784:   PetscScalar    *x,*z,*xb,x1,x2,x3;
785:   MatScalar      *v;
787:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
788:   PetscInt       nonzerorow=0;

791:   VecCopy_Seq(yy,zz);
792:   VecGetArray(xx,&x);
793:   VecGetArray(zz,&z);

795:   v     = a->a;
796:   xb = x;

798:   for (i=0; i<mbs; i++) {
799:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
800:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
801:     ib = aj + *ai;
802:     jmin = 0;
803:     nonzerorow += (n>0);
804:     if (*ib == i){     /* (diag of A)*x */
805:      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
806:      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
807:      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
808:      v += 9; jmin++;
809:     }
810:     for (j=jmin; j<n; j++) {
811:       /* (strict lower triangular part of A)*x  */
812:       cval       = ib[j]*3;
813:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
814:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
815:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
816:       /* (strict upper triangular part of A)*x  */
817:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
818:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
819:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
820:       v  += 9;
821:     }
822:     xb +=3; ai++;
823:   }

825:   VecRestoreArray(xx,&x);
826:   VecRestoreArray(zz,&z);

828:   PetscLogFlops(18*(a->nz*2 - nonzerorow));
829:   return(0);
830: }

834: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
835: {
836:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
837:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
838:   MatScalar      *v;
840:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
841:   PetscInt       nonzerorow=0;

844:   VecCopy_Seq(yy,zz);
845:   VecGetArray(xx,&x);
846:   VecGetArray(zz,&z);

848:   v     = a->a;
849:   xb = x;

851:   for (i=0; i<mbs; i++) {
852:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
853:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
854:     ib = aj + *ai;
855:     jmin = 0;
856:     nonzerorow += (n>0);
857:     if (*ib == i){      /* (diag of A)*x */
858:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
859:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
860:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
861:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
862:       v += 16; jmin++;
863:     }
864:     for (j=jmin; j<n; j++) {
865:       /* (strict lower triangular part of A)*x  */
866:       cval       = ib[j]*4;
867:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
868:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
869:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
870:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
871:       /* (strict upper triangular part of A)*x  */
872:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
873:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
874:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
875:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
876:       v  += 16;
877:     }
878:     xb +=4; ai++;
879:   }

881:   VecRestoreArray(xx,&x);
882:   VecRestoreArray(zz,&z);

884:   PetscLogFlops(32*(a->nz*2 - nonzerorow));
885:   return(0);
886: }

890: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
891: {
892:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
893:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
894:   MatScalar      *v;
896:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
897:   PetscInt       nonzerorow=0;

900:   VecCopy_Seq(yy,zz);
901:   VecGetArray(xx,&x);
902:   VecGetArray(zz,&z);

904:   v     = a->a;
905:   xb = x;

907:   for (i=0; i<mbs; i++) {
908:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
909:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
910:     ib = aj + *ai;
911:     jmin = 0;
912:     nonzerorow += (n>0);
913:     if (*ib == i){      /* (diag of A)*x */
914:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
915:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
916:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
917:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
918:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
919:       v += 25; jmin++;
920:     }
921:     for (j=jmin; j<n; j++) {
922:       /* (strict lower triangular part of A)*x  */
923:       cval       = ib[j]*5;
924:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
925:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
926:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
927:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
928:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
929:       /* (strict upper triangular part of A)*x  */
930:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
931:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
932:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
933:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
934:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
935:       v  += 25;
936:     }
937:     xb +=5; ai++;
938:   }

940:   VecRestoreArray(xx,&x);
941:   VecRestoreArray(zz,&z);

943:   PetscLogFlops(50*(a->nz*2 - nonzerorow));
944:   return(0);
945: }
948: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
949: {
950:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
951:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
952:   MatScalar      *v;
954:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
955:   PetscInt       nonzerorow=0;

958:   VecCopy_Seq(yy,zz);
959:   VecGetArray(xx,&x);
960:   VecGetArray(zz,&z);

962:   v     = a->a;
963:   xb = x;

965:   for (i=0; i<mbs; i++) {
966:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
967:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
968:     ib = aj + *ai;
969:     jmin = 0;
970:     nonzerorow += (n>0);
971:     if (*ib == i){     /* (diag of A)*x */
972:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
973:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
974:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
975:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
976:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
977:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
978:       v += 36; jmin++;
979:     }
980:     for (j=jmin; j<n; j++) {
981:       /* (strict lower triangular part of A)*x  */
982:       cval       = ib[j]*6;
983:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
984:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
985:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
986:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
987:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
988:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
989:       /* (strict upper triangular part of A)*x  */
990:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
991:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
992:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
993:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
994:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
995:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
996:       v  += 36;
997:     }
998:     xb +=6; ai++;
999:   }

1001:   VecRestoreArray(xx,&x);
1002:   VecRestoreArray(zz,&z);

1004:   PetscLogFlops(72*(a->nz*2 - nonzerorow));
1005:   return(0);
1006: }

1010: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1011: {
1012:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1013:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
1014:   MatScalar      *v;
1016:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
1017:   PetscInt       nonzerorow=0;

1020:   VecCopy_Seq(yy,zz);
1021:   VecGetArray(xx,&x);
1022:   VecGetArray(zz,&z);

1024:   v     = a->a;
1025:   xb = x;

1027:   for (i=0; i<mbs; i++) {
1028:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
1029:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1030:     ib = aj + *ai;
1031:     jmin = 0;
1032:     nonzerorow += (n>0);
1033:     if (*ib == i){     /* (diag of A)*x */
1034:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1035:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1036:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1037:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1038:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1039:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1040:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1041:       v += 49; jmin++;
1042:     }
1043:     for (j=jmin; j<n; j++) {
1044:       /* (strict lower triangular part of A)*x  */
1045:       cval       = ib[j]*7;
1046:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1047:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1048:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1049:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1050:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1051:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1052:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1053:       /* (strict upper triangular part of A)*x  */
1054:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1055:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1056:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1057:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1058:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1059:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1060:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1061:       v  += 49;
1062:     }
1063:     xb +=7; ai++;
1064:   }

1066:   VecRestoreArray(xx,&x);
1067:   VecRestoreArray(zz,&z);

1069:   PetscLogFlops(98*(a->nz*2 - nonzerorow));
1070:   return(0);
1071: }

1075: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1076: {
1077:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1078:   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1079:   MatScalar      *v;
1081:   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k;
1082:   PetscInt       nonzerorow=0;

1085:   VecCopy_Seq(yy,zz);
1086:   VecGetArray(xx,&x); x_ptr=x;
1087:   VecGetArray(zz,&z); z_ptr=z;

1089:   aj   = a->j;
1090:   v    = a->a;
1091:   ii   = a->i;

1093:   if (!a->mult_work) {
1094:     PetscMalloc((A->rmap.n+1)*sizeof(PetscScalar),&a->mult_work);
1095:   }
1096:   work = a->mult_work;
1097: 
1098: 
1099:   for (i=0; i<mbs; i++) {
1100:     n     = ii[1] - ii[0]; ncols = n*bs;
1101:     workt = work; idx=aj+ii[0];
1102:     nonzerorow += (n>0);

1104:     /* upper triangular part */
1105:     for (j=0; j<n; j++) {
1106:       xb = x_ptr + bs*(*idx++);
1107:       for (k=0; k<bs; k++) workt[k] = xb[k];
1108:       workt += bs;
1109:     }
1110:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1111:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);

1113:     /* strict lower triangular part */
1114:     idx = aj+ii[0];
1115:     if (*idx == i){
1116:       ncols -= bs; v += bs2; idx++; n--;
1117:     }
1118:     if (ncols > 0){
1119:       workt = work;
1120:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
1121:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1122:       for (j=0; j<n; j++) {
1123:         zb = z_ptr + bs*(*idx++);
1124:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
1125:         workt += bs;
1126:       }
1127:     }

1129:     x += bs; v += n*bs2; z += bs; ii++;
1130:   }

1132:   VecRestoreArray(xx,&x);
1133:   VecRestoreArray(zz,&z);

1135:   PetscLogFlops(2*(a->nz*2 - nonzerorow));
1136:   return(0);
1137: }

1141: PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
1142: {
1143:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1144:   PetscScalar oalpha = alpha;
1145:   PetscBLASInt one = 1,totalnz = (PetscBLASInt)a->bs2*a->nz;

1149:   BLASscal_(&totalnz,&oalpha,a->a,&one);
1150:   PetscLogFlops(totalnz);
1151:   return(0);
1152: }

1156: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1157: {
1158:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1159:   MatScalar      *v = a->a;
1160:   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1161:   PetscInt       i,j,k,bs = A->rmap.bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1163:   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
1164: 
1166:   if (type == NORM_FROBENIUS) {
1167:     for (k=0; k<mbs; k++){
1168:       jmin = a->i[k]; jmax = a->i[k+1];
1169:       col  = aj + jmin;
1170:       if (*col == k){         /* diagonal block */
1171:         for (i=0; i<bs2; i++){
1172: #if defined(PETSC_USE_COMPLEX)
1173:           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1174: #else
1175:           sum_diag += (*v)*(*v); v++;
1176: #endif
1177:         }
1178:         jmin++;
1179:       }
1180:       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
1181:         for (i=0; i<bs2; i++){
1182: #if defined(PETSC_USE_COMPLEX)
1183:           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1184: #else
1185:           sum_off += (*v)*(*v); v++;
1186: #endif  
1187:         }
1188:       }
1189:     }
1190:     *norm = sqrt(sum_diag + 2*sum_off);
1191:   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
1192:     PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);
1193:     jl   = il + mbs;
1194:     sum  = (PetscReal*)(jl + mbs);
1195:     for (i=0; i<mbs; i++) jl[i] = mbs;
1196:     il[0] = 0;

1198:     *norm = 0.0;
1199:     for (k=0; k<mbs; k++) { /* k_th block row */
1200:       for (j=0; j<bs; j++) sum[j]=0.0;
1201:       /*-- col sum --*/
1202:       i = jl[k]; /* first |A(i,k)| to be added */
1203:       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1204:                   at step k */
1205:       while (i<mbs){
1206:         nexti = jl[i];  /* next block row to be added */
1207:         ik    = il[i];  /* block index of A(i,k) in the array a */
1208:         for (j=0; j<bs; j++){
1209:           v = a->a + ik*bs2 + j*bs;
1210:           for (k1=0; k1<bs; k1++) {
1211:             sum[j] += PetscAbsScalar(*v); v++;
1212:           }
1213:         }
1214:         /* update il, jl */
1215:         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1216:         jmax = a->i[i+1];
1217:         if (jmin < jmax){
1218:           il[i] = jmin;
1219:           j   = a->j[jmin];
1220:           jl[i] = jl[j]; jl[j]=i;
1221:         }
1222:         i = nexti;
1223:       }
1224:       /*-- row sum --*/
1225:       jmin = a->i[k]; jmax = a->i[k+1];
1226:       for (i=jmin; i<jmax; i++) {
1227:         for (j=0; j<bs; j++){
1228:           v = a->a + i*bs2 + j;
1229:           for (k1=0; k1<bs; k1++){
1230:             sum[j] += PetscAbsScalar(*v); v += bs;
1231:           }
1232:         }
1233:       }
1234:       /* add k_th block row to il, jl */
1235:       col = aj+jmin;
1236:       if (*col == k) jmin++;
1237:       if (jmin < jmax){
1238:         il[k] = jmin;
1239:         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
1240:       }
1241:       for (j=0; j<bs; j++){
1242:         if (sum[j] > *norm) *norm = sum[j];
1243:       }
1244:     }
1245:     PetscFree(il);
1246:   } else {
1247:     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1248:   }
1249:   return(0);
1250: }

1254: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1255: {
1256:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;


1261:   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1262:   if ((A->rmap.N != B->rmap.N) || (A->cmap.n != B->cmap.n) || (A->rmap.bs != B->rmap.bs)|| (a->nz != b->nz)) {
1263:     *flg = PETSC_FALSE;
1264:     return(0);
1265:   }
1266: 
1267:   /* if the a->i are the same */
1268:   PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1269:   if (!*flg) {
1270:     return(0);
1271:   }
1272: 
1273:   /* if a->j are the same */
1274:   PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1275:   if (!*flg) {
1276:     return(0);
1277:   }
1278:   /* if a->a are the same */
1279:   PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap.bs)*(A->rmap.bs)*sizeof(PetscScalar),flg);
1280:   return(0);
1281: }

1285: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1286: {
1287:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1289:   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1290:   PetscScalar    *x,zero = 0.0;
1291:   MatScalar      *aa,*aa_j;

1294:   bs   = A->rmap.bs;
1295:   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1296: 
1297:   aa   = a->a;
1298:   ai   = a->i;
1299:   aj   = a->j;
1300:   ambs = a->mbs;
1301:   bs2  = a->bs2;

1303:   VecSet(v,zero);
1304:   VecGetArray(v,&x);
1305:   VecGetLocalSize(v,&n);
1306:   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1307:   for (i=0; i<ambs; i++) {
1308:     j=ai[i];
1309:     if (aj[j] == i) {             /* if this is a diagonal element */
1310:       row  = i*bs;
1311:       aa_j = aa + j*bs2;
1312:       if (A->factor && bs==1){
1313:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
1314:       } else {
1315:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1316:       }
1317:     }
1318:   }
1319: 
1320:   VecRestoreArray(v,&x);
1321:   return(0);
1322: }

1326: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1327: {
1328:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1329:   PetscScalar    *l,x,*li,*ri;
1330:   MatScalar      *aa,*v;
1332:   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1333:   PetscTruth     flg;

1336:   if (ll != rr){
1337:     VecEqual(ll,rr,&flg);
1338:     if (!flg)
1339:       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1340:   }
1341:   if (!ll) return(0);
1342:   ai  = a->i;
1343:   aj  = a->j;
1344:   aa  = a->a;
1345:   m   = A->rmap.N;
1346:   bs  = A->rmap.bs;
1347:   mbs = a->mbs;
1348:   bs2 = a->bs2;

1350:   VecGetArray(ll,&l);
1351:   VecGetLocalSize(ll,&lm);
1352:   if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1353:   for (i=0; i<mbs; i++) { /* for each block row */
1354:     M  = ai[i+1] - ai[i];
1355:     li = l + i*bs;
1356:     v  = aa + bs2*ai[i];
1357:     for (j=0; j<M; j++) { /* for each block */
1358:       ri = l + bs*aj[ai[i]+j];
1359:       for (k=0; k<bs; k++) {
1360:         x = ri[k];
1361:         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1362:       }
1363:     }
1364:   }
1365:   VecRestoreArray(ll,&l);
1366:   PetscLogFlops(2*a->nz);
1367:   return(0);
1368: }

1372: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1373: {
1374:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1377:   info->rows_global    = (double)A->rmap.N;
1378:   info->columns_global = (double)A->rmap.N;
1379:   info->rows_local     = (double)A->rmap.N;
1380:   info->columns_local  = (double)A->rmap.N;
1381:   info->block_size     = a->bs2;
1382:   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
1383:   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1384:   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1385:   info->assemblies   = A->num_ass;
1386:   info->mallocs      = a->reallocs;
1387:   info->memory       = ((PetscObject)A)->mem;
1388:   if (A->factor) {
1389:     info->fill_ratio_given  = A->info.fill_ratio_given;
1390:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1391:     info->factor_mallocs    = A->info.factor_mallocs;
1392:   } else {
1393:     info->fill_ratio_given  = 0;
1394:     info->fill_ratio_needed = 0;
1395:     info->factor_mallocs    = 0;
1396:   }
1397:   return(0);
1398: }


1403: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1404: {
1405:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1409:   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1410:   return(0);
1411: }

1415: /* 
1416:    This code does not work since it only checks the upper triangular part of
1417:   the matrix. Hence it is not listed in the function table.
1418: */
1419: PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1420: {
1421:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1423:   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1424:   PetscReal      atmp;
1425:   MatScalar      *aa;
1426:   PetscScalar    *x;
1427:   PetscInt       ncols,brow,bcol,krow,kcol;

1430:   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1431:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1432:   bs   = A->rmap.bs;
1433:   aa   = a->a;
1434:   ai   = a->i;
1435:   aj   = a->j;
1436:   mbs = a->mbs;

1438:   VecSet(v,0.0);
1439:   VecGetArray(v,&x);
1440:   VecGetLocalSize(v,&n);
1441:   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1442:   for (i=0; i<mbs; i++) {
1443:     ncols = ai[1] - ai[0]; ai++;
1444:     brow  = bs*i;
1445:     for (j=0; j<ncols; j++){
1446:       bcol = bs*(*aj);
1447:       for (kcol=0; kcol<bs; kcol++){
1448:         col = bcol + kcol;      /* col index */
1449:         for (krow=0; krow<bs; krow++){
1450:           atmp = PetscAbsScalar(*aa); aa++;
1451:           row = brow + krow;    /* row index */
1452:           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1453:           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1454:         }
1455:       }
1456:       aj++;
1457:     }
1458:   }
1459:   VecRestoreArray(v,&x);
1460:   return(0);
1461: }