Actual source code: sbaij2.c

 2:  #include src/mat/impls/baij/seq/baij.h
 3:  #include src/inline/spops.h
 4:  #include src/inline/ilu.h
 5:  #include petscbt.h
 6:  #include src/mat/impls/sbaij/seq/sbaij.h

 10: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
 11: {
 12:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
 14:   PetscInt       brow,i,j,k,l,mbs,n,*idx,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
 15:   PetscBT        table,table0;

 18:   mbs = a->mbs;
 19:   ai  = a->i;
 20:   aj  = a->j;
 21:   bs  = A->bs;

 23:   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");

 25:   PetscBTCreate(mbs,table);
 26:   PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);
 27:   PetscMalloc((A->m+1)*sizeof(PetscInt),&nidx2);
 28:   PetscBTCreate(mbs,table0);

 30:   for (i=0; i<is_max; i++) { /* for each is */
 31:     isz  = 0;
 32:     PetscBTMemzero(mbs,table);
 33: 
 34:     /* Extract the indices, assume there can be duplicate entries */
 35:     ISGetIndices(is[i],&idx);
 36:     ISGetLocalSize(is[i],&n);

 38:     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
 39:     bcol_max = 0;
 40:     for (j=0; j<n ; ++j){
 41:       brow = idx[j]/bs; /* convert the indices into block indices */
 42:       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
 43:       if(!PetscBTLookupSet(table,brow)) {
 44:         nidx[isz++] = brow;
 45:         if (bcol_max < brow) bcol_max = brow;
 46:       }
 47:     }
 48:     ISRestoreIndices(is[i],&idx);
 49:     ISDestroy(is[i]);
 50: 
 51:     k = 0;
 52:     for (j=0; j<ov; j++){ /* for each overlap */
 53:       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
 54:       PetscBTMemzero(mbs,table0);
 55:       for (l=k; l<isz; l++) PetscBTSet(table0,nidx[l]);

 57:       n = isz;  /* length of the updated is[i] */
 58:       for (brow=0; brow<mbs; brow++){
 59:         start = ai[brow]; end   = ai[brow+1];
 60:         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
 61:           for (l = start; l<end ; l++){
 62:             bcol = aj[l];
 63:             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
 64:           }
 65:           k++;
 66:           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
 67:         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
 68:           for (l = start; l<end ; l++){
 69:             bcol = aj[l];
 70:             if (bcol > bcol_max) break;
 71:             if (PetscBTLookup(table0,bcol)){
 72:               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
 73:               break; /* for l = start; l<end ; l++) */
 74:             }
 75:           }
 76:         }
 77:       }
 78:     } /* for each overlap */

 80:     /* expand the Index Set */
 81:     for (j=0; j<isz; j++) {
 82:       for (k=0; k<bs; k++)
 83:         nidx2[j*bs+k] = nidx[j]*bs+k;
 84:     }
 85:     ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);
 86:   }
 87:   PetscBTDestroy(table);
 88:   PetscFree(nidx);
 89:   PetscFree(nidx2);
 90:   PetscBTDestroy(table0);
 91:   return(0);
 92: }

 96: PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
 97: {
 98:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
100:   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
101:   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
102:   PetscInt       *irow,nrows,*ssmap,bs=A->bs,bs2=a->bs2;
103:   PetscInt       *aj = a->j,*ai = a->i;
104:   MatScalar      *mat_a;
105:   Mat            C;
106:   PetscTruth     flag;

109: 
110:   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
111:   ISSorted(iscol,(PetscTruth*)&i);
112:   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");

114:   ISGetIndices(isrow,&irow);
115:   ISGetSize(isrow,&nrows);
116: 
117:   PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);
118:   ssmap = smap;
119:   PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);
120:   PetscMemzero(smap,oldcols*sizeof(PetscInt));
121:   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
122:   /* determine lens of each row */
123:   for (i=0; i<nrows; i++) {
124:     kstart  = ai[irow[i]];
125:     kend    = kstart + a->ilen[irow[i]];
126:     lens[i] = 0;
127:       for (k=kstart; k<kend; k++) {
128:         if (ssmap[aj[k]]) {
129:           lens[i]++;
130:         }
131:       }
132:     }
133:   /* Create and fill new matrix */
134:   if (scall == MAT_REUSE_MATRIX) {
135:     c = (Mat_SeqSBAIJ *)((*B)->data);

137:     if (c->mbs!=nrows || (*B)->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
138:     PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
139:     if (flag == PETSC_FALSE) {
140:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
141:     }
142:     PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
143:     C = *B;
144:   } else {
145:     MatCreate(A->comm,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);
146:     MatSetType(C,A->type_name);
147:     MatSeqSBAIJSetPreallocation(C,bs,0,lens);
148:   }
149:   c = (Mat_SeqSBAIJ *)(C->data);
150:   for (i=0; i<nrows; i++) {
151:     row    = irow[i];
152:     kstart = ai[row];
153:     kend   = kstart + a->ilen[row];
154:     mat_i  = c->i[i];
155:     mat_j  = c->j + mat_i;
156:     mat_a  = c->a + mat_i*bs2;
157:     mat_ilen = c->ilen + i;
158:     for (k=kstart; k<kend; k++) {
159:       if ((tcol=ssmap[a->j[k]])) {
160:         *mat_j++ = tcol - 1;
161:         PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
162:         mat_a   += bs2;
163:         (*mat_ilen)++;
164:       }
165:     }
166:   }
167: 
168:   /* Free work space */
169:   PetscFree(smap);
170:   PetscFree(lens);
171:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
172:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
173: 
174:   ISRestoreIndices(isrow,&irow);
175:   *B = C;
176:   return(0);
177: }

181: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
182: {
183:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
184:   IS             is1;
186:   PetscInt       *vary,*iary,*irow,nrows,i,bs=A->bs,count;

189:   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
190: 
191:   ISGetIndices(isrow,&irow);
192:   ISGetSize(isrow,&nrows);
193: 
194:   /* Verify if the indices corespond to each element in a block 
195:    and form the IS with compressed IS */
196:   PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);
197:   iary = vary + a->mbs;
198:   PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));
199:   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
200: 
201:   count = 0;
202:   for (i=0; i<a->mbs; i++) {
203:     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
204:     if (vary[i]==bs) iary[count++] = i;
205:   }
206:   ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
207: 
208:   ISRestoreIndices(isrow,&irow);
209:   PetscFree(vary);

211:   MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
212:   ISDestroy(is1);
213:   return(0);
214: }

218: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
219: {
221:   PetscInt       i;

224:   if (scall == MAT_INITIAL_MATRIX) {
225:     PetscMalloc((n+1)*sizeof(Mat),B);
226:   }

228:   for (i=0; i<n; i++) {
229:     MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
230:   }
231:   return(0);
232: }

234: /* -------------------------------------------------------*/
235: /* Should check that shapes of vectors and matrices match */
236: /* -------------------------------------------------------*/
237:  #include petscblaslapack.h

241: PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
242: {
243:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
244:   PetscScalar    *x,*z,*xb,x1,zero=0.0;
245:   MatScalar      *v;
247:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

250:   VecSet(&zero,zz);
251:   VecGetArray(xx,&x);
252:   VecGetArray(zz,&z);

254:   v  = a->a;
255:   xb = x;
256: 
257:   for (i=0; i<mbs; i++) {
258:     n  = ai[1] - ai[0];  /* length of i_th row of A */
259:     x1 = xb[0];
260:     ib = aj + *ai;
261:     jmin = 0;
262:     if (*ib == i) {      /* (diag of A)*x */
263:       z[i] += *v++ * x[*ib++];
264:       jmin++;
265:     }
266:     for (j=jmin; j<n; j++) {
267:       cval    = *ib;
268:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
269:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
270:     }
271:     xb++; ai++;
272:   }

274:   VecRestoreArray(xx,&x);
275:   VecRestoreArray(zz,&z);
276:   PetscLogFlops(2*(a->nz*2 - A->m) - A->m);  /* nz = (nz+m)/2 */
277:   return(0);
278: }

282: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
283: {
284:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
285:   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
286:   MatScalar      *v;
288:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;


292:   VecSet(&zero,zz);
293:   VecGetArray(xx,&x);
294:   VecGetArray(zz,&z);
295: 
296:   v     = a->a;
297:   xb = x;

299:   for (i=0; i<mbs; i++) {
300:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
301:     x1 = xb[0]; x2 = xb[1];
302:     ib = aj + *ai;
303:     jmin = 0;
304:     if (*ib == i){     /* (diag of A)*x */
305:       z[2*i]   += v[0]*x1 + v[2]*x2;
306:       z[2*i+1] += v[2]*x1 + v[3]*x2;
307:       v += 4; jmin++;
308:     }
309:     for (j=jmin; j<n; j++) {
310:       /* (strict lower triangular part of A)*x  */
311:       cval       = ib[j]*2;
312:       z[cval]     += v[0]*x1 + v[1]*x2;
313:       z[cval+1]   += v[2]*x1 + v[3]*x2;
314:       /* (strict upper triangular part of A)*x  */
315:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
316:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
317:       v  += 4;
318:     }
319:     xb +=2; ai++;
320:   }

322:   VecRestoreArray(xx,&x);
323:   VecRestoreArray(zz,&z);
324:   PetscLogFlops(8*(a->nz*2 - A->m) - A->m);
325:   return(0);
326: }

330: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
331: {
332:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
333:   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
334:   MatScalar      *v;
336:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;


340:   VecSet(&zero,zz);
341:   VecGetArray(xx,&x);
342:   VecGetArray(zz,&z);
343: 
344:   v     = a->a;
345:   xb = x;

347:   for (i=0; i<mbs; i++) {
348:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
349:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
350:     ib = aj + *ai;
351:     jmin = 0;
352:     if (*ib == i){     /* (diag of A)*x */
353:       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
354:       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
355:       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
356:       v += 9; jmin++;
357:     }
358:     for (j=jmin; j<n; j++) {
359:       /* (strict lower triangular part of A)*x  */
360:       cval       = ib[j]*3;
361:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
362:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
363:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
364:       /* (strict upper triangular part of A)*x  */
365:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
366:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
367:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
368:       v  += 9;
369:     }
370:     xb +=3; ai++;
371:   }

373:   VecRestoreArray(xx,&x);
374:   VecRestoreArray(zz,&z);
375:   PetscLogFlops(18*(a->nz*2 - A->m) - A->m);
376:   return(0);
377: }

381: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
382: {
383:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
384:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
385:   MatScalar      *v;
387:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

390:   VecSet(&zero,zz);
391:   VecGetArray(xx,&x);
392:   VecGetArray(zz,&z);
393: 
394:   v     = a->a;
395:   xb = x;

397:   for (i=0; i<mbs; i++) {
398:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
399:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
400:     ib = aj + *ai;
401:     jmin = 0;
402:     if (*ib == i){     /* (diag of A)*x */
403:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
404:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
405:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
406:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
407:       v += 16; jmin++;
408:     }
409:     for (j=jmin; j<n; j++) {
410:       /* (strict lower triangular part of A)*x  */
411:       cval       = ib[j]*4;
412:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
413:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
414:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
415:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
416:       /* (strict upper triangular part of A)*x  */
417:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
418:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
419:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
420:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
421:       v  += 16;
422:     }
423:     xb +=4; ai++;
424:   }

426:   VecRestoreArray(xx,&x);
427:   VecRestoreArray(zz,&z);
428:   PetscLogFlops(32*(a->nz*2 - A->m) - A->m);
429:   return(0);
430: }

434: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
435: {
436:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
437:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
438:   MatScalar      *v;
440:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

443:   VecSet(&zero,zz);
444:   VecGetArray(xx,&x);
445:   VecGetArray(zz,&z);
446: 
447:   v     = a->a;
448:   xb = x;

450:   for (i=0; i<mbs; i++) {
451:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
452:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
453:     ib = aj + *ai;
454:     jmin = 0;
455:     if (*ib == i){      /* (diag of A)*x */
456:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
457:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
458:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
459:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
460:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
461:       v += 25; jmin++;
462:     }
463:     for (j=jmin; j<n; j++) {
464:       /* (strict lower triangular part of A)*x  */
465:       cval       = ib[j]*5;
466:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
467:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
468:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
469:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
470:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
471:       /* (strict upper triangular part of A)*x  */
472:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
473:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
474:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
475:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
476:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
477:       v  += 25;
478:     }
479:     xb +=5; ai++;
480:   }

482:   VecRestoreArray(xx,&x);
483:   VecRestoreArray(zz,&z);
484:   PetscLogFlops(50*(a->nz*2 - A->m) - A->m);
485:   return(0);
486: }


491: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
492: {
493:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
494:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
495:   MatScalar      *v;
497:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

500:   VecSet(&zero,zz);
501:   VecGetArray(xx,&x);
502:   VecGetArray(zz,&z);
503: 
504:   v     = a->a;
505:   xb = x;

507:   for (i=0; i<mbs; i++) {
508:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
509:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
510:     ib = aj + *ai;
511:     jmin = 0;
512:     if (*ib == i){      /* (diag of A)*x */
513:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
514:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
515:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
516:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
517:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
518:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
519:       v += 36; jmin++;
520:     }
521:     for (j=jmin; j<n; j++) {
522:       /* (strict lower triangular part of A)*x  */
523:       cval       = ib[j]*6;
524:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
525:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
526:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
527:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
528:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
529:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
530:       /* (strict upper triangular part of A)*x  */
531:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
532:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
533:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
534:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
535:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
536:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
537:       v  += 36;
538:     }
539:     xb +=6; ai++;
540:   }

542:   VecRestoreArray(xx,&x);
543:   VecRestoreArray(zz,&z);
544:   PetscLogFlops(72*(a->nz*2 - A->m) - A->m);
545:   return(0);
546: }
549: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
550: {
551:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
552:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
553:   MatScalar      *v;
555:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

558:   VecSet(&zero,zz);
559:   VecGetArray(xx,&x);
560:   VecGetArray(zz,&z);
561: 
562:   v     = a->a;
563:   xb = x;

565:   for (i=0; i<mbs; i++) {
566:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
567:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
568:     ib = aj + *ai;
569:     jmin = 0;
570:     if (*ib == i){      /* (diag of A)*x */
571:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
572:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
573:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
574:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
575:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
576:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
577:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
578:       v += 49; jmin++;
579:     }
580:     for (j=jmin; j<n; j++) {
581:       /* (strict lower triangular part of A)*x  */
582:       cval       = ib[j]*7;
583:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
584:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
585:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
586:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
587:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
588:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
589:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
590:       /* (strict upper triangular part of A)*x  */
591:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
592:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
593:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
594:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
595:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
596:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
597:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
598:       v  += 49;
599:     }
600:     xb +=7; ai++;
601:   }
602:   VecRestoreArray(xx,&x);
603:   VecRestoreArray(zz,&z);
604:   PetscLogFlops(98*(a->nz*2 - A->m) - A->m);
605:   return(0);
606: }

608: /*
609:     This will not work with MatScalar == float because it calls the BLAS
610: */
613: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
614: {
615:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
616:   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
617:   MatScalar      *v;
619:   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->bs,j,n,bs2=a->bs2,ncols,k;

622:   VecSet(&zero,zz);
623:   VecGetArray(xx,&x); x_ptr=x;
624:   VecGetArray(zz,&z); z_ptr=z;

626:   aj   = a->j;
627:   v    = a->a;
628:   ii   = a->i;

630:   if (!a->mult_work) {
631:     PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
632:   }
633:   work = a->mult_work;
634: 
635:   for (i=0; i<mbs; i++) {
636:     n     = ii[1] - ii[0]; ncols = n*bs;
637:     workt = work; idx=aj+ii[0];

639:     /* upper triangular part */
640:     for (j=0; j<n; j++) {
641:       xb = x_ptr + bs*(*idx++);
642:       for (k=0; k<bs; k++) workt[k] = xb[k];
643:       workt += bs;
644:     }
645:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
646:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
647: 
648:     /* strict lower triangular part */
649:     idx = aj+ii[0];
650:     if (*idx == i){
651:       ncols -= bs; v += bs2; idx++; n--;
652:     }
653: 
654:     if (ncols > 0){
655:       workt = work;
656:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
657:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
658:       for (j=0; j<n; j++) {
659:         zb = z_ptr + bs*(*idx++);
660:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
661:         workt += bs;
662:       }
663:     }
664:     x += bs; v += n*bs2; z += bs; ii++;
665:   }
666: 
667:   VecRestoreArray(xx,&x);
668:   VecRestoreArray(zz,&z);
669:   PetscLogFlops(2*(a->nz*2 - A->m)*bs2 - A->m);
670:   return(0);
671: }

675: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
676: {
677:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
678:   PetscScalar    *x,*y,*z,*xb,x1;
679:   MatScalar      *v;
681:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

684:   VecGetArray(xx,&x);
685:   if (yy != xx) {
686:     VecGetArray(yy,&y);
687:   } else {
688:     y = x;
689:   }
690:   if (zz != yy) {
691:     /* VecCopy(yy,zz); */
692:     VecGetArray(zz,&z);
693:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
694:   } else {
695:     z = y;
696:   }

698:   v  = a->a;
699:   xb = x;

701:   for (i=0; i<mbs; i++) {
702:     n  = ai[1] - ai[0];  /* length of i_th row of A */
703:     x1 = xb[0];
704:     ib = aj + *ai;
705:     jmin = 0;
706:     if (*ib == i) {            /* (diag of A)*x */
707:       z[i] += *v++ * x[*ib++]; jmin++;
708:     }
709:     for (j=jmin; j<n; j++) {
710:       cval    = *ib;
711:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
712:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
713:     }
714:     xb++; ai++;
715:   }

717:   VecRestoreArray(xx,&x);
718:   if (yy != xx) VecRestoreArray(yy,&y);
719:   if (zz != yy) VecRestoreArray(zz,&z);
720: 
721:   PetscLogFlops(2*(a->nz*2 - A->m));
722:   return(0);
723: }

727: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
728: {
729:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
730:   PetscScalar    *x,*y,*z,*xb,x1,x2;
731:   MatScalar      *v;
733:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

736:   VecGetArray(xx,&x);
737:   if (yy != xx) {
738:     VecGetArray(yy,&y);
739:   } else {
740:     y = x;
741:   }
742:   if (zz != yy) {
743:     /* VecCopy(yy,zz); */
744:     VecGetArray(zz,&z);
745:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
746:   } else {
747:     z = y;
748:   }

750:   v     = a->a;
751:   xb = x;

753:   for (i=0; i<mbs; i++) {
754:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
755:     x1 = xb[0]; x2 = xb[1];
756:     ib = aj + *ai;
757:     jmin = 0;
758:     if (*ib == i){      /* (diag of A)*x */
759:       z[2*i]   += v[0]*x1 + v[2]*x2;
760:       z[2*i+1] += v[2]*x1 + v[3]*x2;
761:       v += 4; jmin++;
762:     }
763:     for (j=jmin; j<n; j++) {
764:       /* (strict lower triangular part of A)*x  */
765:       cval       = ib[j]*2;
766:       z[cval]     += v[0]*x1 + v[1]*x2;
767:       z[cval+1]   += v[2]*x1 + v[3]*x2;
768:       /* (strict upper triangular part of A)*x  */
769:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
770:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
771:       v  += 4;
772:     }
773:     xb +=2; ai++;
774:   }

776:   VecRestoreArray(xx,&x);
777:   if (yy != xx) VecRestoreArray(yy,&y);
778:   if (zz != yy) VecRestoreArray(zz,&z);

780:   PetscLogFlops(4*(a->nz*2 - A->m));
781:   return(0);
782: }

786: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
787: {
788:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
789:   PetscScalar    *x,*y,*z,*xb,x1,x2,x3;
790:   MatScalar      *v;
792:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

795:   VecGetArray(xx,&x);
796:   if (yy != xx) {
797:     VecGetArray(yy,&y);
798:   } else {
799:     y = x;
800:   }
801:   if (zz != yy) {
802:     /* VecCopy(yy,zz); */
803:     VecGetArray(zz,&z);
804:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
805:   } else {
806:     z = y;
807:   }

809:   v     = a->a;
810:   xb = x;

812:   for (i=0; i<mbs; i++) {
813:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
814:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
815:     ib = aj + *ai;
816:     jmin = 0;
817:     if (*ib == i){     /* (diag of A)*x */
818:      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
819:      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
820:      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
821:      v += 9; jmin++;
822:     }
823:     for (j=jmin; j<n; j++) {
824:       /* (strict lower triangular part of A)*x  */
825:       cval       = ib[j]*3;
826:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
827:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
828:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
829:       /* (strict upper triangular part of A)*x  */
830:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
831:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
832:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
833:       v  += 9;
834:     }
835:     xb +=3; ai++;
836:   }

838:   VecRestoreArray(xx,&x);
839:   if (yy != xx) VecRestoreArray(yy,&y);
840:   if (zz != yy) VecRestoreArray(zz,&z);

842:   PetscLogFlops(18*(a->nz*2 - A->m));
843:   return(0);
844: }

848: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
849: {
850:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
851:   PetscScalar    *x,*y,*z,*xb,x1,x2,x3,x4;
852:   MatScalar      *v;
854:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

857:   VecGetArray(xx,&x);
858:   if (yy != xx) {
859:     VecGetArray(yy,&y);
860:   } else {
861:     y = x;
862:   }
863:   if (zz != yy) {
864:     /* VecCopy(yy,zz); */
865:     VecGetArray(zz,&z);
866:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
867:   } else {
868:     z = y;
869:   }

871:   v     = a->a;
872:   xb = x;

874:   for (i=0; i<mbs; i++) {
875:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
876:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
877:     ib = aj + *ai;
878:     jmin = 0;
879:     if (*ib == i){      /* (diag of A)*x */
880:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
881:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
882:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
883:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
884:       v += 16; jmin++;
885:     }
886:     for (j=jmin; j<n; j++) {
887:       /* (strict lower triangular part of A)*x  */
888:       cval       = ib[j]*4;
889:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
890:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
891:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
892:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
893:       /* (strict upper triangular part of A)*x  */
894:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
895:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
896:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
897:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
898:       v  += 16;
899:     }
900:     xb +=4; ai++;
901:   }

903:   VecRestoreArray(xx,&x);
904:   if (yy != xx) VecRestoreArray(yy,&y);
905:   if (zz != yy) VecRestoreArray(zz,&z);

907:   PetscLogFlops(32*(a->nz*2 - A->m));
908:   return(0);
909: }

913: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
914: {
915:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
916:   PetscScalar    *x,*y,*z,*xb,x1,x2,x3,x4,x5;
917:   MatScalar      *v;
919:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

922:   VecGetArray(xx,&x);
923:   if (yy != xx) {
924:     VecGetArray(yy,&y);
925:   } else {
926:     y = x;
927:   }
928:   if (zz != yy) {
929:     /* VecCopy(yy,zz); */
930:     VecGetArray(zz,&z);
931:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
932:   } else {
933:     z = y;
934:   }

936:   v     = a->a;
937:   xb = x;

939:   for (i=0; i<mbs; i++) {
940:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
941:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
942:     ib = aj + *ai;
943:     jmin = 0;
944:     if (*ib == i){      /* (diag of A)*x */
945:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
946:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
947:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
948:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
949:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
950:       v += 25; jmin++;
951:     }
952:     for (j=jmin; j<n; j++) {
953:       /* (strict lower triangular part of A)*x  */
954:       cval       = ib[j]*5;
955:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
956:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
957:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
958:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
959:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
960:       /* (strict upper triangular part of A)*x  */
961:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
962:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
963:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
964:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
965:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
966:       v  += 25;
967:     }
968:     xb +=5; ai++;
969:   }

971:   VecRestoreArray(xx,&x);
972:   if (yy != xx) VecRestoreArray(yy,&y);
973:   if (zz != yy) VecRestoreArray(zz,&z);

975:   PetscLogFlops(50*(a->nz*2 - A->m));
976:   return(0);
977: }
980: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
981: {
982:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
983:   PetscScalar    *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
984:   MatScalar      *v;
986:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

989:   VecGetArray(xx,&x);
990:   if (yy != xx) {
991:     VecGetArray(yy,&y);
992:   } else {
993:     y = x;
994:   }
995:   if (zz != yy) {
996:     /* VecCopy(yy,zz); */
997:     VecGetArray(zz,&z);
998:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
999:   } else {
1000:     z = y;
1001:   }

1003:   v     = a->a;
1004:   xb = x;

1006:   for (i=0; i<mbs; i++) {
1007:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
1008:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
1009:     ib = aj + *ai;
1010:     jmin = 0;
1011:     if (*ib == i){     /* (diag of A)*x */
1012:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
1013:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
1014:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
1015:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
1016:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
1017:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
1018:       v += 36; jmin++;
1019:     }
1020:     for (j=jmin; j<n; j++) {
1021:       /* (strict lower triangular part of A)*x  */
1022:       cval       = ib[j]*6;
1023:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
1024:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
1025:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
1026:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
1027:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
1028:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
1029:       /* (strict upper triangular part of A)*x  */
1030:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
1031:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
1032:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
1033:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
1034:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
1035:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
1036:       v  += 36;
1037:     }
1038:     xb +=6; ai++;
1039:   }

1041:   VecRestoreArray(xx,&x);
1042:   if (yy != xx) VecRestoreArray(yy,&y);
1043:   if (zz != yy) VecRestoreArray(zz,&z);

1045:   PetscLogFlops(72*(a->nz*2 - A->m));
1046:   return(0);
1047: }

1051: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1052: {
1053:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1054:   PetscScalar    *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
1055:   MatScalar      *v;
1057:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

1060:   VecGetArray(xx,&x);
1061:   if (yy != xx) {
1062:     VecGetArray(yy,&y);
1063:   } else {
1064:     y = x;
1065:   }
1066:   if (zz != yy) {
1067:     /* VecCopy(yy,zz); */
1068:     VecGetArray(zz,&z);
1069:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1070:   } else {
1071:     z = y;
1072:   }

1074:   v     = a->a;
1075:   xb = x;

1077:   for (i=0; i<mbs; i++) {
1078:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
1079:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1080:     ib = aj + *ai;
1081:     jmin = 0;
1082:     if (*ib == i){     /* (diag of A)*x */
1083:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1084:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1085:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1086:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1087:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1088:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1089:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1090:       v += 49; jmin++;
1091:     }
1092:     for (j=jmin; j<n; j++) {
1093:       /* (strict lower triangular part of A)*x  */
1094:       cval       = ib[j]*7;
1095:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1096:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1097:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1098:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1099:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1100:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1101:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1102:       /* (strict upper triangular part of A)*x  */
1103:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1104:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1105:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1106:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1107:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1108:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1109:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1110:       v  += 49;
1111:     }
1112:     xb +=7; ai++;
1113:   }

1115:   VecRestoreArray(xx,&x);
1116:   if (yy != xx) VecRestoreArray(yy,&y);
1117:   if (zz != yy) VecRestoreArray(zz,&z);

1119:   PetscLogFlops(98*(a->nz*2 - A->m));
1120:   return(0);
1121: }

1125: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1126: {
1127:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1128:   PetscScalar    *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1129:   MatScalar      *v;
1131:   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->bs,j,n,bs2=a->bs2,ncols,k;

1134:   VecGetArray(xx,&x); x_ptr=x;
1135:   if (yy != xx) {
1136:     VecGetArray(yy,&y);
1137:   } else {
1138:     y = x;
1139:   }
1140:   if (zz != yy) {
1141:     /* VecCopy(yy,zz); */
1142:     VecGetArray(zz,&z); z_ptr=z;
1143:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1144:   } else {
1145:     z = y;
1146:   }

1148:   aj   = a->j;
1149:   v    = a->a;
1150:   ii   = a->i;

1152:   if (!a->mult_work) {
1153:     PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
1154:   }
1155:   work = a->mult_work;
1156: 
1157: 
1158:   for (i=0; i<mbs; i++) {
1159:     n     = ii[1] - ii[0]; ncols = n*bs;
1160:     workt = work; idx=aj+ii[0];

1162:     /* upper triangular part */
1163:     for (j=0; j<n; j++) {
1164:       xb = x_ptr + bs*(*idx++);
1165:       for (k=0; k<bs; k++) workt[k] = xb[k];
1166:       workt += bs;
1167:     }
1168:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1169:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);

1171:     /* strict lower triangular part */
1172:     idx = aj+ii[0];
1173:     if (*idx == i){
1174:       ncols -= bs; v += bs2; idx++; n--;
1175:     }
1176:     if (ncols > 0){
1177:       workt = work;
1178:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
1179:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1180:       for (j=0; j<n; j++) {
1181:         zb = z_ptr + bs*(*idx++);
1182:         /* idx++; */
1183:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
1184:         workt += bs;
1185:       }
1186:     }

1188:     x += bs; v += n*bs2; z += bs; ii++;
1189:   }

1191:   VecRestoreArray(xx,&x);
1192:   if (yy != xx) VecRestoreArray(yy,&y);
1193:   if (zz != yy) VecRestoreArray(zz,&z);

1195:   PetscLogFlops(2*(a->nz*2 - A->m));
1196:   return(0);
1197: }

1201: PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar *alpha,Mat inA)
1202: {
1203:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1204:   PetscBLASInt one = 1,totalnz = (PetscBLASInt)a->bs2*a->nz;

1207:   BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one);
1208:   PetscLogFlops(totalnz);
1209:   return(0);
1210: }

1214: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1215: {
1216:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1217:   MatScalar      *v = a->a;
1218:   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1219:   PetscInt       i,j,k,bs = A->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1221:   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
1222: 
1224:   if (type == NORM_FROBENIUS) {
1225:     for (k=0; k<mbs; k++){
1226:       jmin = a->i[k]; jmax = a->i[k+1];
1227:       col  = aj + jmin;
1228:       if (*col == k){         /* diagonal block */
1229:         for (i=0; i<bs2; i++){
1230: #if defined(PETSC_USE_COMPLEX)
1231:           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1232: #else
1233:           sum_diag += (*v)*(*v); v++;
1234: #endif
1235:         }
1236:         jmin++;
1237:       }
1238:       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
1239:         for (i=0; i<bs2; i++){
1240: #if defined(PETSC_USE_COMPLEX)
1241:           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1242: #else
1243:           sum_off += (*v)*(*v); v++;
1244: #endif  
1245:         }
1246:       }
1247:     }
1248:     *norm = sqrt(sum_diag + 2*sum_off);

1250:   }  else if (type == NORM_INFINITY) { /* maximum row sum */
1251:     PetscMalloc(mbs*sizeof(PetscInt),&il);
1252:     PetscMalloc(mbs*sizeof(PetscInt),&jl);
1253:     PetscMalloc(bs*sizeof(PetscReal),&sum);
1254:     for (i=0; i<mbs; i++) {
1255:       jl[i] = mbs; il[0] = 0;
1256:     }

1258:     *norm = 0.0;
1259:     for (k=0; k<mbs; k++) { /* k_th block row */
1260:       for (j=0; j<bs; j++) sum[j]=0.0;

1262:       /*-- col sum --*/
1263:       i = jl[k]; /* first |A(i,k)| to be added */
1264:       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1265:                   at step k */
1266:       while (i<mbs){
1267:         nexti = jl[i];  /* next block row to be added */
1268:         ik    = il[i];  /* block index of A(i,k) in the array a */
1269:         for (j=0; j<bs; j++){
1270:           v = a->a + ik*bs2 + j*bs;
1271:           for (k1=0; k1<bs; k1++) {
1272:             sum[j] += PetscAbsScalar(*v); v++;
1273:           }
1274:         }
1275:         /* update il, jl */
1276:         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1277:         jmax = a->i[i+1];
1278:         if (jmin < jmax){
1279:           il[i] = jmin;
1280:           j   = a->j[jmin];
1281:           jl[i] = jl[j]; jl[j]=i;
1282:         }
1283:         i = nexti;
1284:       }
1285: 
1286:       /*-- row sum --*/
1287:       jmin = a->i[k]; jmax = a->i[k+1];
1288:       for (i=jmin; i<jmax; i++) {
1289:         for (j=0; j<bs; j++){
1290:           v = a->a + i*bs2 + j;
1291:           for (k1=0; k1<bs; k1++){
1292:             sum[j] += PetscAbsScalar(*v);
1293:             v   += bs;
1294:           }
1295:         }
1296:       }
1297:       /* add k_th block row to il, jl */
1298:       col = aj+jmin;
1299:       if (*col == k) jmin++;
1300:       if (jmin < jmax){
1301:         il[k] = jmin;
1302:         j   = a->j[jmin];
1303:         jl[k] = jl[j]; jl[j] = k;
1304:       }
1305:       for (j=0; j<bs; j++){
1306:         if (sum[j] > *norm) *norm = sum[j];
1307:       }
1308:     }
1309:     PetscFree(il);
1310:     PetscFree(jl);
1311:     PetscFree(sum);
1312:   } else {
1313:     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1314:   }
1315:   return(0);
1316: }

1320: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1321: {
1322:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;


1327:   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1328:   if ((A->m != B->m) || (A->n != B->n) || (A->bs != B->bs)|| (a->nz != b->nz)) {
1329:     *flg = PETSC_FALSE;
1330:     return(0);
1331:   }
1332: 
1333:   /* if the a->i are the same */
1334:   PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1335:   if (*flg == PETSC_FALSE) {
1336:     return(0);
1337:   }
1338: 
1339:   /* if a->j are the same */
1340:   PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1341:   if (*flg == PETSC_FALSE) {
1342:     return(0);
1343:   }
1344:   /* if a->a are the same */
1345:   PetscMemcmp(a->a,b->a,(a->nz)*(A->bs)*(A->bs)*sizeof(PetscScalar),flg);
1346: 
1347:   return(0);
1348: }

1352: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1353: {
1354:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1356:   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1357:   PetscScalar    *x,zero = 0.0;
1358:   MatScalar      *aa,*aa_j;

1361:   bs   = A->bs;
1362:   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1363: 
1364:   aa   = a->a;
1365:   ai   = a->i;
1366:   aj   = a->j;
1367:   ambs = a->mbs;
1368:   bs2  = a->bs2;

1370:   VecSet(&zero,v);
1371:   VecGetArray(v,&x);
1372:   VecGetLocalSize(v,&n);
1373:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1374:   for (i=0; i<ambs; i++) {
1375:     j=ai[i];
1376:     if (aj[j] == i) {             /* if this is a diagonal element */
1377:       row  = i*bs;
1378:       aa_j = aa + j*bs2;
1379:       if (A->factor && bs==1){
1380:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
1381:       } else {
1382:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1383:       }
1384:     }
1385:   }
1386: 
1387:   VecRestoreArray(v,&x);
1388:   return(0);
1389: }

1393: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1394: {
1395:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1396:   PetscScalar    *l,*r,x,*li,*ri;
1397:   MatScalar      *aa,*v;
1399:   PetscInt       i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2;

1402:   ai  = a->i;
1403:   aj  = a->j;
1404:   aa  = a->a;
1405:   m   = A->m;
1406:   bs  = A->bs;
1407:   mbs = a->mbs;
1408:   bs2 = a->bs2;

1410:   if (ll != rr) {
1411:     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1412:   }
1413:   if (ll) {
1414:     VecGetArray(ll,&l);
1415:     VecGetLocalSize(ll,&lm);
1416:     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1417:     for (i=0; i<mbs; i++) { /* for each block row */
1418:       M  = ai[i+1] - ai[i];
1419:       li = l + i*bs;
1420:       v  = aa + bs2*ai[i];
1421:       for (j=0; j<M; j++) { /* for each block */
1422:         for (k=0; k<bs2; k++) {
1423:           (*v++) *= li[k%bs];
1424:         }
1425: #ifdef CONT
1426:         /* will be used to replace the above loop */
1427:         ri = l + bs*aj[ai[i]+j];
1428:         for (k=0; k<bs; k++) { /* column value */
1429:           x = ri[k];
1430:           for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1431:         }
1432: #endif

1434:       }
1435:     }
1436:     VecRestoreArray(ll,&l);
1437:     PetscLogFlops(2*a->nz);
1438:   }
1439:   /* will be deleted */
1440:   if (rr) {
1441:     VecGetArray(rr,&r);
1442:     VecGetLocalSize(rr,&rn);
1443:     if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1444:     for (i=0; i<mbs; i++) { /* for each block row */
1445:       M  = ai[i+1] - ai[i];
1446:       v  = aa + bs2*ai[i];
1447:       for (j=0; j<M; j++) { /* for each block */
1448:         ri = r + bs*aj[ai[i]+j];
1449:         for (k=0; k<bs; k++) {
1450:           x = ri[k];
1451:           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1452:         }
1453:       }
1454:     }
1455:     VecRestoreArray(rr,&r);
1456:     PetscLogFlops(a->nz);
1457:   }
1458:   return(0);
1459: }

1463: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1464: {
1465:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1468:   info->rows_global    = (double)A->m;
1469:   info->columns_global = (double)A->m;
1470:   info->rows_local     = (double)A->m;
1471:   info->columns_local  = (double)A->m;
1472:   info->block_size     = a->bs2;
1473:   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
1474:   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1475:   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1476:   info->assemblies   = A->num_ass;
1477:   info->mallocs      = a->reallocs;
1478:   info->memory       = A->mem;
1479:   if (A->factor) {
1480:     info->fill_ratio_given  = A->info.fill_ratio_given;
1481:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1482:     info->factor_mallocs    = A->info.factor_mallocs;
1483:   } else {
1484:     info->fill_ratio_given  = 0;
1485:     info->fill_ratio_needed = 0;
1486:     info->factor_mallocs    = 0;
1487:   }
1488:   return(0);
1489: }


1494: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1495: {
1496:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1500:   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1501:   return(0);
1502: }

1506: PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1507: {
1508:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1510:   PetscInt i,j,n,row,col,bs,*ai,*aj,mbs;
1511:   PetscReal    atmp;
1512:   MatScalar    *aa;
1513:   PetscScalar  zero = 0.0,*x;
1514:   PetscInt          ncols,brow,bcol,krow,kcol;

1517:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1518:   bs   = A->bs;
1519:   aa   = a->a;
1520:   ai   = a->i;
1521:   aj   = a->j;
1522:   mbs = a->mbs;

1524:   VecSet(&zero,v);
1525:   VecGetArray(v,&x);
1526:   VecGetLocalSize(v,&n);
1527:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1528:   for (i=0; i<mbs; i++) {
1529:     ncols = ai[1] - ai[0]; ai++;
1530:     brow  = bs*i;
1531:     for (j=0; j<ncols; j++){
1532:       bcol = bs*(*aj);
1533:       for (kcol=0; kcol<bs; kcol++){
1534:         col = bcol + kcol;      /* col index */
1535:         for (krow=0; krow<bs; krow++){
1536:           atmp = PetscAbsScalar(*aa); aa++;
1537:           row = brow + krow;    /* row index */
1538:           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1539:           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1540:         }
1541:       }
1542:       aj++;
1543:     }
1544:   }
1545:   VecRestoreArray(v,&x);
1546:   return(0);
1547: }