Actual source code: sbaij2.c

  1: /*$Id: sbaij2.c,v 1.29 2001/03/23 23:22:21 balay Exp $*/

  3: #include "petscsys.h"
  4: #include "src/mat/impls/baij/seq/baij.h"
  5: #include "src/vec/vecimpl.h"
  6: #include "src/inline/spops.h"
  7: #include "src/inline/ilu.h"
  8: #include "petscbt.h"
  9: #include "sbaij.h"

 11: int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS *is,int ov)
 12: {
 14:   SETERRQ(1,"Function not yet written for SBAIJ format");
 15:   /* return(0); */
 16: }

 18: int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
 19: {
 20:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
 21:   int          *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens;
 22:   int          row,mat_i,*mat_j,tcol,*mat_ilen;
 23:   int          *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2;
 24:   int          *aj = a->j,*ai = a->i;
 25:   MatScalar    *mat_a;
 26:   Mat          C;
 27:   PetscTruth   flag;

 30: 
 31:   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
 32:   ISSorted(iscol,(PetscTruth*)&i);
 33:   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");

 35:   ISGetIndices(isrow,&irow);
 36:   ISGetSize(isrow,&nrows);
 37: 
 38:   ierr  = PetscMalloc((1+oldcols)*sizeof(int),&smap);
 39:   ssmap = smap;
 40:   ierr  = PetscMalloc((1+nrows)*sizeof(int),&lens);
 41:   ierr  = PetscMemzero(smap,oldcols*sizeof(int));
 42:   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
 43:   /* determine lens of each row */
 44:   for (i=0; i<nrows; i++) {
 45:     kstart  = ai[irow[i]];
 46:     kend    = kstart + a->ilen[irow[i]];
 47:     lens[i] = 0;
 48:       for (k=kstart; k<kend; k++) {
 49:         if (ssmap[aj[k]]) {
 50:           lens[i]++;
 51:         }
 52:       }
 53:     }
 54:   /* Create and fill new matrix */
 55:   if (scall == MAT_REUSE_MATRIX) {
 56:     c = (Mat_SeqSBAIJ *)((*B)->data);

 58:     if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
 59:     PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);
 60:     if (flag == PETSC_FALSE) {
 61:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
 62:     }
 63:     PetscMemzero(c->ilen,c->mbs*sizeof(int));
 64:     C = *B;
 65:   } else {
 66:     MatCreateSeqSBAIJ(A->comm,bs,nrows*bs,nrows*bs,0,lens,&C);
 67:   }
 68:   c = (Mat_SeqSBAIJ *)(C->data);
 69:   for (i=0; i<nrows; i++) {
 70:     row    = irow[i];
 71:     kstart = ai[row];
 72:     kend   = kstart + a->ilen[row];
 73:     mat_i  = c->i[i];
 74:     mat_j  = c->j + mat_i;
 75:     mat_a  = c->a + mat_i*bs2;
 76:     mat_ilen = c->ilen + i;
 77:     for (k=kstart; k<kend; k++) {
 78:       if ((tcol=ssmap[a->j[k]])) {
 79:         *mat_j++ = tcol - 1;
 80:         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
 81:         mat_a   += bs2;
 82:         (*mat_ilen)++;
 83:       }
 84:     }
 85:   }
 86: 
 87:   /* Free work space */
 88:   PetscFree(smap);
 89:   PetscFree(lens);
 90:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
 91:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
 92: 
 93:   ISRestoreIndices(isrow,&irow);
 94:   *B = C;
 95:   return(0);
 96: }

 98: int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
 99: {
100:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
101:   IS          is1;
102:   int         *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count;

105:   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
106: 
107:   ISGetIndices(isrow,&irow);
108:   ISGetSize(isrow,&nrows);
109: 
110:   /* Verify if the indices corespond to each element in a block 
111:    and form the IS with compressed IS */
112:   PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);
113:   iary = vary + a->mbs;
114:   PetscMemzero(vary,(a->mbs)*sizeof(int));
115:   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
116: 
117:   count = 0;
118:   for (i=0; i<a->mbs; i++) {
119:     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks");
120:     if (vary[i]==bs) iary[count++] = i;
121:   }
122:   ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
123: 
124:   ISRestoreIndices(isrow,&irow);
125:   PetscFree(vary);

127:   MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
128:   ISDestroy(is1);
129:   return(0);
130: }

132: int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B)
133: {
134:   int ierr,i;

137:   if (scall == MAT_INITIAL_MATRIX) {
138:     PetscMalloc((n+1)*sizeof(Mat),B);
139:   }

141:   for (i=0; i<n; i++) {
142:     MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
143:   }
144:   return(0);
145: }

147: /* -------------------------------------------------------*/
148: /* Should check that shapes of vectors and matrices match */
149: /* -------------------------------------------------------*/
150: #include "petscblaslapack.h"

152: int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
153: {
154:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
155:   Scalar          *x,*z,*xb,x1,zero=0.0;
156:   MatScalar       *v;
157:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

160:   VecSet(&zero,zz);
161:   VecGetArray(xx,&x);
162:   VecGetArray(zz,&z);

164:   v  = a->a;
165:   xb = x;
166: 
167:   for (i=0; i<mbs; i++) {
168:     n  = ai[1] - ai[0];  /* length of i_th row of A */
169:     x1 = xb[0];
170:     ib = aj + *ai;
171:     jmin = 0;
172:     if (*ib == i) {      /* (diag of A)*x */
173:       z[i] += *v++ * x[*ib++];
174:       jmin++;
175:     }
176:     for (j=jmin; j<n; j++) {
177:       cval    = *ib;
178:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
179:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
180:     }
181:     xb++; ai++;
182:   }

184:   VecRestoreArray(xx,&x);
185:   VecRestoreArray(zz,&z);
186:   PetscLogFlops(2*(a->s_nz*2 - A->m) - A->m);  /* s_nz = (nz+m)/2 */
187:   return(0);
188: }

190: int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
191: {
192:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
193:   Scalar          *x,*z,*xb,x1,x2,zero=0.0;
194:   MatScalar       *v;
195:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;


199:   VecSet(&zero,zz);
200:   VecGetArray(xx,&x);
201:   VecGetArray(zz,&z);
202: 
203:   v     = a->a;
204:   xb = x;

206:   for (i=0; i<mbs; i++) {
207:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
208:     x1 = xb[0]; x2 = xb[1];
209:     ib = aj + *ai;
210:     jmin = 0;
211:     if (*ib == i){     /* (diag of A)*x */
212:       z[2*i]   += v[0]*x1 + v[2]*x2;
213:       z[2*i+1] += v[2]*x1 + v[3]*x2;
214:       v += 4; jmin++;
215:     }
216:     for (j=jmin; j<n; j++) {
217:       /* (strict lower triangular part of A)*x  */
218:       cval       = ib[j]*2;
219:       z[cval]     += v[0]*x1 + v[1]*x2;
220:       z[cval+1]   += v[2]*x1 + v[3]*x2;
221:       /* (strict upper triangular part of A)*x  */
222:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
223:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
224:       v  += 4;
225:     }
226:     xb +=2; ai++;
227:   }

229:   VecRestoreArray(xx,&x);
230:   VecRestoreArray(zz,&z);
231:   PetscLogFlops(8*(a->s_nz*2 - A->m) - A->m);
232:   return(0);
233: }

235: int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
236: {
237:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
238:   Scalar          *x,*z,*xb,x1,x2,x3,zero=0.0;
239:   MatScalar       *v;
240:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;


244:   VecSet(&zero,zz);
245:   VecGetArray(xx,&x);
246:   VecGetArray(zz,&z);
247: 
248:   v     = a->a;
249:   xb = x;

251:   for (i=0; i<mbs; i++) {
252:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
253:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
254:     ib = aj + *ai;
255:     jmin = 0;
256:     if (*ib == i){     /* (diag of A)*x */
257:       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
258:       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
259:       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
260:       v += 9; jmin++;
261:     }
262:     for (j=jmin; j<n; j++) {
263:       /* (strict lower triangular part of A)*x  */
264:       cval       = ib[j]*3;
265:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
266:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
267:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
268:       /* (strict upper triangular part of A)*x  */
269:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
270:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
271:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
272:       v  += 9;
273:     }
274:     xb +=3; ai++;
275:   }

277:   VecRestoreArray(xx,&x);
278:   VecRestoreArray(zz,&z);
279:   PetscLogFlops(18*(a->s_nz*2 - A->m) - A->m);
280:   return(0);
281: }

283: int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
284: {
285:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
286:   Scalar          *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
287:   MatScalar       *v;
288:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

291:   VecSet(&zero,zz);
292:   VecGetArray(xx,&x);
293:   VecGetArray(zz,&z);
294: 
295:   v     = a->a;
296:   xb = x;

298:   for (i=0; i<mbs; i++) {
299:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
300:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
301:     ib = aj + *ai;
302:     jmin = 0;
303:     if (*ib == i){     /* (diag of A)*x */
304:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
305:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
306:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
307:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
308:       v += 16; jmin++;
309:     }
310:     for (j=jmin; j<n; j++) {
311:       /* (strict lower triangular part of A)*x  */
312:       cval       = ib[j]*4;
313:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
314:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
315:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
316:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
317:       /* (strict upper triangular part of A)*x  */
318:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
319:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
320:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
321:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
322:       v  += 16;
323:     }
324:     xb +=4; ai++;
325:   }

327:   VecRestoreArray(xx,&x);
328:   VecRestoreArray(zz,&z);
329:   PetscLogFlops(32*(a->s_nz*2 - A->m) - A->m);
330:   return(0);
331: }

333: int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
334: {
335:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
336:   Scalar          *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
337:   MatScalar       *v;
338:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

341:   VecSet(&zero,zz);
342:   VecGetArray(xx,&x);
343:   VecGetArray(zz,&z);
344: 
345:   v     = a->a;
346:   xb = x;

348:   for (i=0; i<mbs; i++) {
349:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
350:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
351:     ib = aj + *ai;
352:     jmin = 0;
353:     if (*ib == i){      /* (diag of A)*x */
354:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
355:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
356:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
357:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
358:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
359:       v += 25; jmin++;
360:     }
361:     for (j=jmin; j<n; j++) {
362:       /* (strict lower triangular part of A)*x  */
363:       cval       = ib[j]*5;
364:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
365:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
366:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
367:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
368:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
369:       /* (strict upper triangular part of A)*x  */
370:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
371:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
372:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
373:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
374:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
375:       v  += 25;
376:     }
377:     xb +=5; ai++;
378:   }

380:   VecRestoreArray(xx,&x);
381:   VecRestoreArray(zz,&z);
382:   PetscLogFlops(50*(a->s_nz*2 - A->m) - A->m);
383:   return(0);
384: }


387: int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
388: {
389:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
390:   Scalar          *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
391:   MatScalar       *v;
392:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

395:   VecSet(&zero,zz);
396:   VecGetArray(xx,&x);
397:   VecGetArray(zz,&z);
398: 
399:   v     = a->a;
400:   xb = x;

402:   for (i=0; i<mbs; i++) {
403:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
404:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
405:     ib = aj + *ai;
406:     jmin = 0;
407:     if (*ib == i){      /* (diag of A)*x */
408:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
409:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
410:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
411:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
412:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
413:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
414:       v += 36; jmin++;
415:     }
416:     for (j=jmin; j<n; j++) {
417:       /* (strict lower triangular part of A)*x  */
418:       cval       = ib[j]*6;
419:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
420:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
421:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
422:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
423:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
424:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
425:       /* (strict upper triangular part of A)*x  */
426:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
427:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
428:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
429:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
430:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
431:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
432:       v  += 36;
433:     }
434:     xb +=6; ai++;
435:   }

437:   VecRestoreArray(xx,&x);
438:   VecRestoreArray(zz,&z);
439:   PetscLogFlops(72*(a->s_nz*2 - A->m) - A->m);
440:   return(0);
441: }
442: int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
443: {
444:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
445:   Scalar          *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
446:   MatScalar       *v;
447:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

450:   VecSet(&zero,zz);
451:   VecGetArray(xx,&x);
452:   VecGetArray(zz,&z);
453: 
454:   v     = a->a;
455:   xb = x;

457:   for (i=0; i<mbs; i++) {
458:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
459:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
460:     ib = aj + *ai;
461:     jmin = 0;
462:     if (*ib == i){      /* (diag of A)*x */
463:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
464:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
465:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
466:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
467:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
468:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
469:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
470:       v += 49; jmin++;
471:     }
472:     for (j=jmin; j<n; j++) {
473:       /* (strict lower triangular part of A)*x  */
474:       cval       = ib[j]*7;
475:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
476:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
477:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
478:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
479:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
480:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
481:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
482:       /* (strict upper triangular part of A)*x  */
483:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
484:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
485:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
486:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
487:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
488:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
489:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
490:       v  += 49;
491:     }
492:     xb +=7; ai++;
493:   }
494:   VecRestoreArray(xx,&x);
495:   VecRestoreArray(zz,&z);
496:   PetscLogFlops(98*(a->s_nz*2 - A->m) - A->m);
497:   return(0);
498: }

500: /*
501:     This will not work with MatScalar == float because it calls the BLAS
502: */
503: int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
504: {
505:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
506:   Scalar          *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
507:   MatScalar       *v;
508:   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
509:   int             ncols,k;

512:   VecSet(&zero,zz);
513:   VecGetArray(xx,&x); x_ptr=x;
514:   VecGetArray(zz,&z); z_ptr=z;

516:   aj   = a->j;
517:   v    = a->a;
518:   ii   = a->i;

520:   if (!a->mult_work) {
521:     PetscMalloc((A->m+1)*sizeof(Scalar),&a->mult_work);
522:   }
523:   work = a->mult_work;
524: 
525:   for (i=0; i<mbs; i++) {
526:     n     = ii[1] - ii[0]; ncols = n*bs;
527:     workt = work; idx=aj+ii[0];

529:     /* upper triangular part */
530:     for (j=0; j<n; j++) {
531:       xb = x_ptr + bs*(*idx++);
532:       for (k=0; k<bs; k++) workt[k] = xb[k];
533:       workt += bs;
534:     }
535:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
536:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
537: 
538:     /* strict lower triangular part */
539:     idx = aj+ii[0];
540:     if (*idx == i){
541:       ncols -= bs; v += bs2; idx++; n--;
542:     }
543: 
544:     if (ncols > 0){
545:       workt = work;
546:       ierr  = PetscMemzero(workt,ncols*sizeof(Scalar));
547:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
548:       for (j=0; j<n; j++) {
549:         zb = z_ptr + bs*(*idx++);
550:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
551:         workt += bs;
552:       }
553:     }
554:     x += bs; v += n*bs2; z += bs; ii++;
555:   }
556: 
557:   VecRestoreArray(xx,&x);
558:   VecRestoreArray(zz,&z);
559:   PetscLogFlops(2*(a->s_nz*2 - A->m)*bs2 - A->m);
560:   return(0);
561: }

563: int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
564: {
565:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
566:   Scalar          *x,*y,*z,*xb,x1;
567:   MatScalar       *v;
568:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

571:   VecGetArray(xx,&x);
572:   if (yy != xx) {
573:     VecGetArray(yy,&y);
574:   } else {
575:     y = x;
576:   }
577:   if (zz != yy) {
578:     VecCopy(yy,zz);
579:     VecGetArray(zz,&z);
580:   } else {
581:     z = y;
582:   }

584:   v  = a->a;
585:   xb = x;

587:   for (i=0; i<mbs; i++) {
588:     n  = ai[1] - ai[0];  /* length of i_th row of A */
589:     x1 = xb[0];
590:     ib = aj + *ai;
591:     jmin = 0;
592:     if (*ib == i) {            /* (diag of A)*x */
593:       z[i] += *v++ * x[*ib++]; jmin++;
594:     }
595:     for (j=jmin; j<n; j++) {
596:       cval    = *ib;
597:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
598:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
599:     }
600:     xb++; ai++;
601:   }

603:   VecRestoreArray(xx,&x);
604:   if (yy != xx) VecRestoreArray(yy,&y);
605:   if (zz != yy) VecRestoreArray(zz,&z);
606: 
607:   PetscLogFlops(2*(a->s_nz*2 - A->m));
608:   return(0);
609: }

611: int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
612: {
613:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
614:   Scalar          *x,*y,*z,*xb,x1,x2;
615:   MatScalar       *v;
616:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

619:   VecGetArray(xx,&x);
620:   if (yy != xx) {
621:     VecGetArray(yy,&y);
622:   } else {
623:     y = x;
624:   }
625:   if (zz != yy) {
626:     VecCopy(yy,zz);
627:     VecGetArray(zz,&z);
628:   } else {
629:     z = y;
630:   }

632:   v     = a->a;
633:   xb = x;

635:   for (i=0; i<mbs; i++) {
636:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
637:     x1 = xb[0]; x2 = xb[1];
638:     ib = aj + *ai;
639:     jmin = 0;
640:     if (*ib == i){      /* (diag of A)*x */
641:       z[2*i]   += v[0]*x1 + v[2]*x2;
642:       z[2*i+1] += v[2]*x1 + v[3]*x2;
643:       v += 4; jmin++;
644:     }
645:     for (j=jmin; j<n; j++) {
646:       /* (strict lower triangular part of A)*x  */
647:       cval       = ib[j]*2;
648:       z[cval]     += v[0]*x1 + v[1]*x2;
649:       z[cval+1]   += v[2]*x1 + v[3]*x2;
650:       /* (strict upper triangular part of A)*x  */
651:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
652:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
653:       v  += 4;
654:     }
655:     xb +=2; ai++;
656:   }

658:   VecRestoreArray(xx,&x);
659:   if (yy != xx) VecRestoreArray(yy,&y);
660:   if (zz != yy) VecRestoreArray(zz,&z);

662:   PetscLogFlops(4*(a->s_nz*2 - A->m));
663:   return(0);
664: }

666: int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
667: {
668:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
669:   Scalar          *x,*y,*z,*xb,x1,x2,x3;
670:   MatScalar       *v;
671:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

674:   VecGetArray(xx,&x);
675:   if (yy != xx) {
676:     VecGetArray(yy,&y);
677:   } else {
678:     y = x;
679:   }
680:   if (zz != yy) {
681:     VecCopy(yy,zz);
682:     VecGetArray(zz,&z);
683:   } else {
684:     z = y;
685:   }

687:   v     = a->a;
688:   xb = x;

690:   for (i=0; i<mbs; i++) {
691:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
692:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
693:     ib = aj + *ai;
694:     jmin = 0;
695:     if (*ib == i){     /* (diag of A)*x */
696:      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
697:      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
698:      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
699:      v += 9; jmin++;
700:     }
701:     for (j=jmin; j<n; j++) {
702:       /* (strict lower triangular part of A)*x  */
703:       cval       = ib[j]*3;
704:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
705:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
706:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
707:       /* (strict upper triangular part of A)*x  */
708:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
709:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
710:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
711:       v  += 9;
712:     }
713:     xb +=3; ai++;
714:   }

716:   VecRestoreArray(xx,&x);
717:   if (yy != xx) VecRestoreArray(yy,&y);
718:   if (zz != yy) VecRestoreArray(zz,&z);

720:   PetscLogFlops(18*(a->s_nz*2 - A->m));
721:   return(0);
722: }

724: int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
725: {
726:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
727:   Scalar          *x,*y,*z,*xb,x1,x2,x3,x4;
728:   MatScalar       *v;
729:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

732:   VecGetArray(xx,&x);
733:   if (yy != xx) {
734:     VecGetArray(yy,&y);
735:   } else {
736:     y = x;
737:   }
738:   if (zz != yy) {
739:     VecCopy(yy,zz);
740:     VecGetArray(zz,&z);
741:   } else {
742:     z = y;
743:   }

745:   v     = a->a;
746:   xb = x;

748:   for (i=0; i<mbs; i++) {
749:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
750:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
751:     ib = aj + *ai;
752:     jmin = 0;
753:     if (*ib == i){      /* (diag of A)*x */
754:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
755:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
756:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
757:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
758:       v += 16; jmin++;
759:     }
760:     for (j=jmin; j<n; j++) {
761:       /* (strict lower triangular part of A)*x  */
762:       cval       = ib[j]*4;
763:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
764:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
765:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
766:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
767:       /* (strict upper triangular part of A)*x  */
768:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
769:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
770:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
771:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
772:       v  += 16;
773:     }
774:     xb +=4; ai++;
775:   }

777:   VecRestoreArray(xx,&x);
778:   if (yy != xx) VecRestoreArray(yy,&y);
779:   if (zz != yy) VecRestoreArray(zz,&z);

781:   PetscLogFlops(32*(a->s_nz*2 - A->m));
782:   return(0);
783: }

785: int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
786: {
787:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
788:   Scalar          *x,*y,*z,*xb,x1,x2,x3,x4,x5;
789:   MatScalar       *v;
790:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

793:   VecGetArray(xx,&x);
794:   if (yy != xx) {
795:     VecGetArray(yy,&y);
796:   } else {
797:     y = x;
798:   }
799:   if (zz != yy) {
800:     VecCopy(yy,zz);
801:     VecGetArray(zz,&z);
802:   } else {
803:     z = y;
804:   }

806:   v     = a->a;
807:   xb = x;

809:   for (i=0; i<mbs; i++) {
810:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
811:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
812:     ib = aj + *ai;
813:     jmin = 0;
814:     if (*ib == i){      /* (diag of A)*x */
815:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
816:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
817:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
818:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
819:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
820:       v += 25; jmin++;
821:     }
822:     for (j=jmin; j<n; j++) {
823:       /* (strict lower triangular part of A)*x  */
824:       cval       = ib[j]*5;
825:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
826:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
827:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
828:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
829:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
830:       /* (strict upper triangular part of A)*x  */
831:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
832:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
833:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
834:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
835:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
836:       v  += 25;
837:     }
838:     xb +=5; ai++;
839:   }

841:   VecRestoreArray(xx,&x);
842:   if (yy != xx) VecRestoreArray(yy,&y);
843:   if (zz != yy) VecRestoreArray(zz,&z);

845:   PetscLogFlops(50*(a->s_nz*2 - A->m));
846:   return(0);
847: }
848: int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
849: {
850:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
851:   Scalar          *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
852:   MatScalar       *v;
853:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

856:   VecGetArray(xx,&x);
857:   if (yy != xx) {
858:     VecGetArray(yy,&y);
859:   } else {
860:     y = x;
861:   }
862:   if (zz != yy) {
863:     VecCopy(yy,zz);
864:     VecGetArray(zz,&z);
865:   } else {
866:     z = y;
867:   }

869:   v     = a->a;
870:   xb = x;

872:   for (i=0; i<mbs; i++) {
873:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
874:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
875:     ib = aj + *ai;
876:     jmin = 0;
877:     if (*ib == i){     /* (diag of A)*x */
878:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
879:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
880:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
881:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
882:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
883:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
884:       v += 36; jmin++;
885:     }
886:     for (j=jmin; j<n; j++) {
887:       /* (strict lower triangular part of A)*x  */
888:       cval       = ib[j]*6;
889:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
890:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
891:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
892:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
893:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
894:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
895:       /* (strict upper triangular part of A)*x  */
896:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
897:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
898:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
899:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
900:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
901:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
902:       v  += 36;
903:     }
904:     xb +=6; ai++;
905:   }

907:   VecRestoreArray(xx,&x);
908:   if (yy != xx) VecRestoreArray(yy,&y);
909:   if (zz != yy) VecRestoreArray(zz,&z);

911:   PetscLogFlops(72*(a->s_nz*2 - A->m));
912:   return(0);
913: }

915: int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
916: {
917:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
918:   Scalar          *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
919:   MatScalar       *v;
920:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

923:   VecGetArray(xx,&x);
924:   if (yy != xx) {
925:     VecGetArray(yy,&y);
926:   } else {
927:     y = x;
928:   }
929:   if (zz != yy) {
930:     VecCopy(yy,zz);
931:     VecGetArray(zz,&z);
932:   } else {
933:     z = y;
934:   }

936:   v     = a->a;
937:   xb = x;

939:   for (i=0; i<mbs; i++) {
940:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
941:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
942:     ib = aj + *ai;
943:     jmin = 0;
944:     if (*ib == i){     /* (diag of A)*x */
945:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
946:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
947:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
948:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
949:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
950:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
951:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
952:       v += 49; jmin++;
953:     }
954:     for (j=jmin; j<n; j++) {
955:       /* (strict lower triangular part of A)*x  */
956:       cval       = ib[j]*7;
957:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
958:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
959:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
960:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
961:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
962:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
963:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
964:       /* (strict upper triangular part of A)*x  */
965:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
966:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
967:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
968:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
969:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
970:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
971:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
972:       v  += 49;
973:     }
974:     xb +=7; ai++;
975:   }

977:   VecRestoreArray(xx,&x);
978:   if (yy != xx) VecRestoreArray(yy,&y);
979:   if (zz != yy) VecRestoreArray(zz,&z);

981:   PetscLogFlops(98*(a->s_nz*2 - A->m));
982:   return(0);
983: }

985: int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
986: {
987:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
988:   Scalar          *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
989:   MatScalar       *v;
990:   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
991:   int             ncols,k;

994:   VecGetArray(xx,&x); x_ptr=x;
995:   if (yy != xx) {
996:     VecGetArray(yy,&y);
997:   } else {
998:     y = x;
999:   }
1000:   if (zz != yy) {
1001:     VecCopy(yy,zz);
1002:     VecGetArray(zz,&z); z_ptr=z;
1003:   } else {
1004:     z = y;
1005:   }

1007:   aj   = a->j;
1008:   v    = a->a;
1009:   ii   = a->i;

1011:   if (!a->mult_work) {
1012:     PetscMalloc((A->m+1)*sizeof(Scalar),&a->mult_work);
1013:   }
1014:   work = a->mult_work;
1015: 
1016: 
1017:   for (i=0; i<mbs; i++) {
1018:     n     = ii[1] - ii[0]; ncols = n*bs;
1019:     workt = work; idx=aj+ii[0];

1021:     /* upper triangular part */
1022:     for (j=0; j<n; j++) {
1023:       xb = x_ptr + bs*(*idx++);
1024:       for (k=0; k<bs; k++) workt[k] = xb[k];
1025:       workt += bs;
1026:     }
1027:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1028:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);

1030:     /* strict lower triangular part */
1031:     idx = aj+ii[0];
1032:     if (*idx == i){
1033:       ncols -= bs; v += bs2; idx++; n--;
1034:     }
1035:     if (ncols > 0){
1036:       workt = work;
1037:       ierr  = PetscMemzero(workt,ncols*sizeof(Scalar));
1038:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1039:       for (j=0; j<n; j++) {
1040:         zb = z_ptr + bs*(*idx++);
1041:         /* idx++; */
1042:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
1043:         workt += bs;
1044:       }
1045:     }

1047:     x += bs; v += n*bs2; z += bs; ii++;
1048:   }

1050:   VecRestoreArray(xx,&x);
1051:   if (yy != xx) VecRestoreArray(yy,&y);
1052:   if (zz != yy) VecRestoreArray(zz,&z);

1054:   PetscLogFlops(2*(a->s_nz*2 - A->m));
1055:   return(0);
1056: }

1058: int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz)
1059: {
1061:   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1062:   /* return(0); */
1063: }

1065: int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz)

1067: {
1069:   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1070:   /* return(0); */
1071: }

1073: int MatScale_SeqSBAIJ(Scalar *alpha,Mat inA)
1074: {
1075:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1076:   int         one = 1,totalnz = a->bs2*a->s_nz;

1079:   BLscal_(&totalnz,alpha,a->a,&one);
1080:   PetscLogFlops(totalnz);
1081:   return(0);
1082: }

1084: int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1085: {
1086:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1087:   MatScalar   *v = a->a;
1088:   PetscReal   sum_diag = 0.0, sum_off = 0.0, *sum;
1089:   int         i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1090:   int         *jl,*il,jmin,jmax,ierr,nexti,ik,*col;
1091: 
1093:   if (type == NORM_FROBENIUS) {
1094:     for (k=0; k<mbs; k++){
1095:       jmin = a->i[k]; jmax = a->i[k+1];
1096:       col  = aj + jmin;
1097:       if (*col == k){         /* diagonal block */
1098:         for (i=0; i<bs2; i++){
1099: #if defined(PETSC_USE_COMPLEX)
1100:           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1101: #else
1102:           sum_diag += (*v)*(*v); v++;
1103: #endif
1104:         }
1105:         jmin++;
1106:       }
1107:       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
1108:         for (i=0; i<bs2; i++){
1109: #if defined(PETSC_USE_COMPLEX)
1110:           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1111: #else
1112:           sum_off += (*v)*(*v); v++;
1113: #endif  
1114:         }
1115:       }
1116:     }
1117:     *norm = sqrt(sum_diag + 2*sum_off);

1119:   }  else if (type == NORM_INFINITY) { /* maximum row sum */
1120:     PetscMalloc(mbs*sizeof(int),&il);
1121:     PetscMalloc(mbs*sizeof(int),&jl);
1122:     PetscMalloc(bs*sizeof(PetscReal),&sum);
1123:     for (i=0; i<mbs; i++) {
1124:       jl[i] = mbs; il[0] = 0;
1125:     }

1127:     *norm = 0.0;
1128:     for (k=0; k<mbs; k++) { /* k_th block row */
1129:       for (j=0; j<bs; j++) sum[j]=0.0;

1131:       /*-- col sum --*/
1132:       i = jl[k]; /* first |A(i,k)| to be added */
1133:       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1134:                   at step k */
1135:       while (i<mbs){
1136:         nexti = jl[i];  /* next block row to be added */
1137:         ik    = il[i];  /* block index of A(i,k) in the array a */
1138:         for (j=0; j<bs; j++){
1139:           v = a->a + ik*bs2 + j*bs;
1140:           for (k1=0; k1<bs; k1++) {
1141:             sum[j] += PetscAbsScalar(*v); v++;
1142:           }
1143:         }
1144:         /* update il, jl */
1145:         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1146:         jmax = a->i[i+1];
1147:         if (jmin < jmax){
1148:           il[i] = jmin;
1149:           j   = a->j[jmin];
1150:           jl[i] = jl[j]; jl[j]=i;
1151:         }
1152:         i = nexti;
1153:       }
1154: 
1155:       /*-- row sum --*/
1156:       jmin = a->i[k]; jmax = a->i[k+1];
1157:       for (i=jmin; i<jmax; i++) {
1158:         for (j=0; j<bs; j++){
1159:           v = a->a + i*bs2 + j;
1160:           for (k1=0; k1<bs; k1++){
1161:             sum[j] += PetscAbsScalar(*v);
1162:             v   += bs;
1163:           }
1164:         }
1165:       }
1166:       /* add k_th block row to il, jl */
1167:       col = aj+jmin;
1168:       if (*col == k) jmin++;
1169:       if (jmin < jmax){
1170:         il[k] = jmin;
1171:         j   = a->j[jmin];
1172:         jl[k] = jl[j]; jl[j] = k;
1173:       }
1174:       for (j=0; j<bs; j++){
1175:         if (sum[j] > *norm) *norm = sum[j];
1176:       }
1177:     }
1178:     PetscFree(il);
1179:     PetscFree(jl);
1180:     PetscFree(sum);
1181:   } else {
1182:     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1183:   }
1184:   return(0);
1185: }

1187: int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1188: {
1189:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1190:   int          ierr;
1191:   PetscTruth   flag;

1194:   PetscTypeCompare((PetscObject)A,MATSEQBAIJ,&flag);
1195:   if (!flag) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");

1197:   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1198:   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->s_nz != b->s_nz)) {
1199:     *flg = PETSC_FALSE;
1200:     return(0);
1201:   }
1202: 
1203:   /* if the a->i are the same */
1204:   PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);
1205:   if (*flg == PETSC_FALSE) {
1206:     return(0);
1207:   }
1208: 
1209:   /* if a->j are the same */
1210:   PetscMemcmp(a->j,b->j,(a->s_nz)*sizeof(int),flg);
1211:   if (*flg == PETSC_FALSE) {
1212:     return(0);
1213:   }
1214:   /* if a->a are the same */
1215:   PetscMemcmp(a->a,b->a,(a->s_nz)*(a->bs)*(a->bs)*sizeof(Scalar),flg);
1216:   return(0);
1217: 
1218: }

1220: int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1221: {
1222:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1223:   int         ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1224:   Scalar      *x,zero = 0.0;
1225:   MatScalar   *aa,*aa_j;

1228:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1229:   bs   = a->bs;
1230:   aa   = a->a;
1231:   ai   = a->i;
1232:   aj   = a->j;
1233:   ambs = a->mbs;
1234:   bs2  = a->bs2;

1236:   VecSet(&zero,v);
1237:   VecGetArray(v,&x);
1238:   VecGetLocalSize(v,&n);
1239:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1240:   for (i=0; i<ambs; i++) {
1241:     j=ai[i];
1242:     if (aj[j] == i) {             /* if this is a diagonal element */
1243:       row  = i*bs;
1244:       aa_j = aa + j*bs2;
1245:       for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1246:     }
1247:   }
1248:   VecRestoreArray(v,&x);
1249:   return(0);
1250: }

1252: int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1253: {
1254:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1255:   Scalar      *l,*r,x,*li,*ri;
1256:   MatScalar   *aa,*v;
1257:   int         ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2;

1260:   ai  = a->i;
1261:   aj  = a->j;
1262:   aa  = a->a;
1263:   m   = A->m;
1264:   bs  = a->bs;
1265:   mbs = a->mbs;
1266:   bs2 = a->bs2;

1268:   if (ll != rr) {
1269:     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be samen");
1270:   }
1271:   if (ll) {
1272:     VecGetArray(ll,&l);
1273:     VecGetLocalSize(ll,&lm);
1274:     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1275:     for (i=0; i<mbs; i++) { /* for each block row */
1276:       M  = ai[i+1] - ai[i];
1277:       li = l + i*bs;
1278:       v  = aa + bs2*ai[i];
1279:       for (j=0; j<M; j++) { /* for each block */
1280:         for (k=0; k<bs2; k++) {
1281:           (*v++) *= li[k%bs];
1282:         }
1283: #ifdef CONT
1284:         /* will be used to replace the above loop */
1285:         ri = l + bs*aj[ai[i]+j];
1286:         for (k=0; k<bs; k++) { /* column value */
1287:           x = ri[k];
1288:           for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1289:         }
1290: #endif

1292:       }
1293:     }
1294:     VecRestoreArray(ll,&l);
1295:     PetscLogFlops(2*a->s_nz);
1296:   }
1297:   /* will be deleted */
1298:   if (rr) {
1299:     VecGetArray(rr,&r);
1300:     VecGetLocalSize(rr,&rn);
1301:     if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1302:     for (i=0; i<mbs; i++) { /* for each block row */
1303:       M  = ai[i+1] - ai[i];
1304:       v  = aa + bs2*ai[i];
1305:       for (j=0; j<M; j++) { /* for each block */
1306:         ri = r + bs*aj[ai[i]+j];
1307:         for (k=0; k<bs; k++) {
1308:           x = ri[k];
1309:           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1310:         }
1311:       }
1312:     }
1313:     VecRestoreArray(rr,&r);
1314:     PetscLogFlops(a->s_nz);
1315:   }
1316:   return(0);
1317: }

1319: int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1320: {
1321:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1324:   info->rows_global    = (double)A->m;
1325:   info->columns_global = (double)A->m;
1326:   info->rows_local     = (double)A->m;
1327:   info->columns_local  = (double)A->m;
1328:   info->block_size     = a->bs2;
1329:   info->nz_allocated   = a->s_maxnz; /*num. of nonzeros in upper triangular part */
1330:   info->nz_used        = a->bs2*a->s_nz; /*num. of nonzeros in upper triangular part */
1331:   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1332:   info->assemblies   = A->num_ass;
1333:   info->mallocs      = a->reallocs;
1334:   info->memory       = A->mem;
1335:   if (A->factor) {
1336:     info->fill_ratio_given  = A->info.fill_ratio_given;
1337:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1338:     info->factor_mallocs    = A->info.factor_mallocs;
1339:   } else {
1340:     info->fill_ratio_given  = 0;
1341:     info->fill_ratio_needed = 0;
1342:     info->factor_mallocs    = 0;
1343:   }
1344:   return(0);
1345: }


1348: int MatZeroEntries_SeqSBAIJ(Mat A)
1349: {
1350:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1351:   int         ierr;

1354:   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1355:   return(0);
1356: }

1358: int MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1359: {
1360:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1361:   int          ierr,i,j,n,row,col,bs,*ai,*aj,mbs;
1362:   PetscReal    atmp;
1363:   MatScalar    *aa;
1364:   Scalar       zero = 0.0,*x;
1365:   int          ncols,brow,bcol,krow,kcol;

1368:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1369:   bs   = a->bs;
1370:   aa   = a->a;
1371:   ai   = a->i;
1372:   aj   = a->j;
1373:   mbs = a->mbs;

1375:   VecSet(&zero,v);
1376:   VecGetArray(v,&x);
1377:   VecGetLocalSize(v,&n);
1378:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1379:   for (i=0; i<mbs; i++) {
1380:     ncols = ai[1] - ai[0]; ai++;
1381:     brow  = bs*i;
1382:     for (j=0; j<ncols; j++){
1383:       bcol = bs*(*aj);
1384:       for (kcol=0; kcol<bs; kcol++){
1385:         col = bcol + kcol;      /* col index */
1386:         for (krow=0; krow<bs; krow++){
1387:           atmp = PetscAbsScalar(*aa); aa++;
1388:           row = brow + krow;    /* row index */
1389:           /* printf("val[%d,%d]: %gn",row,col,atmp); */
1390:           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1391:           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1392:         }
1393:       }
1394:       aj++;
1395:     }
1396:   }
1397:   VecRestoreArray(v,&x);
1398:   return(0);
1399: }