Actual source code: sbaij2.c

  1: /*$Id: sbaij2.c,v 1.32 2001/08/07 03:03:01 balay Exp $*/

 3:  #include src/mat/impls/baij/seq/baij.h
 4:  #include src/vec/vecimpl.h
 5:  #include src/inline/spops.h
 6:  #include src/inline/ilu.h
 7:  #include petscbt.h
 8:  #include src/mat/impls/sbaij/seq/sbaij.h

 10: #undef __FUNCT__  
 12: int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS *is,int ov)
 13: {
 15:   SETERRQ(1,"Function not yet written for SBAIJ format");
 16:   /* return(0); */
 17: }

 19: #undef __FUNCT__  
 21: int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
 22: {
 23:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
 24:   int          *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens;
 25:   int          row,mat_i,*mat_j,tcol,*mat_ilen;
 26:   int          *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2;
 27:   int          *aj = a->j,*ai = a->i;
 28:   MatScalar    *mat_a;
 29:   Mat          C;
 30:   PetscTruth   flag;

 33: 
 34:   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
 35:   ISSorted(iscol,(PetscTruth*)&i);
 36:   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");

 38:   ISGetIndices(isrow,&irow);
 39:   ISGetSize(isrow,&nrows);
 40: 
 41:   ierr  = PetscMalloc((1+oldcols)*sizeof(int),&smap);
 42:   ssmap = smap;
 43:   ierr  = PetscMalloc((1+nrows)*sizeof(int),&lens);
 44:   ierr  = PetscMemzero(smap,oldcols*sizeof(int));
 45:   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
 46:   /* determine lens of each row */
 47:   for (i=0; i<nrows; i++) {
 48:     kstart  = ai[irow[i]];
 49:     kend    = kstart + a->ilen[irow[i]];
 50:     lens[i] = 0;
 51:       for (k=kstart; k<kend; k++) {
 52:         if (ssmap[aj[k]]) {
 53:           lens[i]++;
 54:         }
 55:       }
 56:     }
 57:   /* Create and fill new matrix */
 58:   if (scall == MAT_REUSE_MATRIX) {
 59:     c = (Mat_SeqSBAIJ *)((*B)->data);

 61:     if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
 62:     PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);
 63:     if (flag == PETSC_FALSE) {
 64:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
 65:     }
 66:     PetscMemzero(c->ilen,c->mbs*sizeof(int));
 67:     C = *B;
 68:   } else {
 69:     MatCreateSeqSBAIJ(A->comm,bs,nrows*bs,nrows*bs,0,lens,&C);
 70:   }
 71:   c = (Mat_SeqSBAIJ *)(C->data);
 72:   for (i=0; i<nrows; i++) {
 73:     row    = irow[i];
 74:     kstart = ai[row];
 75:     kend   = kstart + a->ilen[row];
 76:     mat_i  = c->i[i];
 77:     mat_j  = c->j + mat_i;
 78:     mat_a  = c->a + mat_i*bs2;
 79:     mat_ilen = c->ilen + i;
 80:     for (k=kstart; k<kend; k++) {
 81:       if ((tcol=ssmap[a->j[k]])) {
 82:         *mat_j++ = tcol - 1;
 83:         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
 84:         mat_a   += bs2;
 85:         (*mat_ilen)++;
 86:       }
 87:     }
 88:   }
 89: 
 90:   /* Free work space */
 91:   PetscFree(smap);
 92:   PetscFree(lens);
 93:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
 94:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
 95: 
 96:   ISRestoreIndices(isrow,&irow);
 97:   *B = C;
 98:   return(0);
 99: }

101: #undef __FUNCT__  
103: int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
104: {
105:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106:   IS          is1;
107:   int         *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count;

110:   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
111: 
112:   ISGetIndices(isrow,&irow);
113:   ISGetSize(isrow,&nrows);
114: 
115:   /* Verify if the indices corespond to each element in a block 
116:    and form the IS with compressed IS */
117:   PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);
118:   iary = vary + a->mbs;
119:   PetscMemzero(vary,(a->mbs)*sizeof(int));
120:   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
121: 
122:   count = 0;
123:   for (i=0; i<a->mbs; i++) {
124:     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks");
125:     if (vary[i]==bs) iary[count++] = i;
126:   }
127:   ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
128: 
129:   ISRestoreIndices(isrow,&irow);
130:   PetscFree(vary);

132:   MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
133:   ISDestroy(is1);
134:   return(0);
135: }

137: #undef __FUNCT__  
139: int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B)
140: {
141:   int ierr,i;

144:   if (scall == MAT_INITIAL_MATRIX) {
145:     PetscMalloc((n+1)*sizeof(Mat),B);
146:   }

148:   for (i=0; i<n; i++) {
149:     MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
150:   }
151:   return(0);
152: }

154: /* -------------------------------------------------------*/
155: /* Should check that shapes of vectors and matrices match */
156: /* -------------------------------------------------------*/
157:  #include petscblaslapack.h

159: #undef __FUNCT__  
161: int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
162: {
163:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
164:   PetscScalar     *x,*z,*xb,x1,zero=0.0;
165:   MatScalar       *v;
166:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

169:   VecSet(&zero,zz);
170:   VecGetArray(xx,&x);
171:   VecGetArray(zz,&z);

173:   v  = a->a;
174:   xb = x;
175: 
176:   for (i=0; i<mbs; i++) {
177:     n  = ai[1] - ai[0];  /* length of i_th row of A */
178:     x1 = xb[0];
179:     ib = aj + *ai;
180:     jmin = 0;
181:     if (*ib == i) {      /* (diag of A)*x */
182:       z[i] += *v++ * x[*ib++];
183:       jmin++;
184:     }
185:     for (j=jmin; j<n; j++) {
186:       cval    = *ib;
187:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
188:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
189:     }
190:     xb++; ai++;
191:   }

193:   VecRestoreArray(xx,&x);
194:   VecRestoreArray(zz,&z);
195:   PetscLogFlops(2*(a->s_nz*2 - A->m) - A->m);  /* s_nz = (nz+m)/2 */
196:   return(0);
197: }

199: #undef __FUNCT__  
201: int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
202: {
203:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
204:   PetscScalar      *x,*z,*xb,x1,x2,zero=0.0;
205:   MatScalar        *v;
206:   int              mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;


210:   VecSet(&zero,zz);
211:   VecGetArray(xx,&x);
212:   VecGetArray(zz,&z);
213: 
214:   v     = a->a;
215:   xb = x;

217:   for (i=0; i<mbs; i++) {
218:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
219:     x1 = xb[0]; x2 = xb[1];
220:     ib = aj + *ai;
221:     jmin = 0;
222:     if (*ib == i){     /* (diag of A)*x */
223:       z[2*i]   += v[0]*x1 + v[2]*x2;
224:       z[2*i+1] += v[2]*x1 + v[3]*x2;
225:       v += 4; jmin++;
226:     }
227:     for (j=jmin; j<n; j++) {
228:       /* (strict lower triangular part of A)*x  */
229:       cval       = ib[j]*2;
230:       z[cval]     += v[0]*x1 + v[1]*x2;
231:       z[cval+1]   += v[2]*x1 + v[3]*x2;
232:       /* (strict upper triangular part of A)*x  */
233:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
234:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
235:       v  += 4;
236:     }
237:     xb +=2; ai++;
238:   }

240:   VecRestoreArray(xx,&x);
241:   VecRestoreArray(zz,&z);
242:   PetscLogFlops(8*(a->s_nz*2 - A->m) - A->m);
243:   return(0);
244: }

246: #undef __FUNCT__  
248: int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
249: {
250:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
251:   PetscScalar   *x,*z,*xb,x1,x2,x3,zero=0.0;
252:   MatScalar     *v;
253:   int           mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;


257:   VecSet(&zero,zz);
258:   VecGetArray(xx,&x);
259:   VecGetArray(zz,&z);
260: 
261:   v     = a->a;
262:   xb = x;

264:   for (i=0; i<mbs; i++) {
265:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
266:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
267:     ib = aj + *ai;
268:     jmin = 0;
269:     if (*ib == i){     /* (diag of A)*x */
270:       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
271:       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
272:       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
273:       v += 9; jmin++;
274:     }
275:     for (j=jmin; j<n; j++) {
276:       /* (strict lower triangular part of A)*x  */
277:       cval       = ib[j]*3;
278:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
279:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
280:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
281:       /* (strict upper triangular part of A)*x  */
282:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
283:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
284:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
285:       v  += 9;
286:     }
287:     xb +=3; ai++;
288:   }

290:   VecRestoreArray(xx,&x);
291:   VecRestoreArray(zz,&z);
292:   PetscLogFlops(18*(a->s_nz*2 - A->m) - A->m);
293:   return(0);
294: }

296: #undef __FUNCT__  
298: int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
299: {
300:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
301:   PetscScalar      *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
302:   MatScalar        *v;
303:   int              mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

306:   VecSet(&zero,zz);
307:   VecGetArray(xx,&x);
308:   VecGetArray(zz,&z);
309: 
310:   v     = a->a;
311:   xb = x;

313:   for (i=0; i<mbs; i++) {
314:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
315:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
316:     ib = aj + *ai;
317:     jmin = 0;
318:     if (*ib == i){     /* (diag of A)*x */
319:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
320:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
321:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
322:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
323:       v += 16; jmin++;
324:     }
325:     for (j=jmin; j<n; j++) {
326:       /* (strict lower triangular part of A)*x  */
327:       cval       = ib[j]*4;
328:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
329:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
330:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
331:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
332:       /* (strict upper triangular part of A)*x  */
333:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
334:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
335:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
336:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
337:       v  += 16;
338:     }
339:     xb +=4; ai++;
340:   }

342:   VecRestoreArray(xx,&x);
343:   VecRestoreArray(zz,&z);
344:   PetscLogFlops(32*(a->s_nz*2 - A->m) - A->m);
345:   return(0);
346: }

348: #undef __FUNCT__  
350: int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
351: {
352:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
353:   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
354:   MatScalar       *v;
355:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

358:   VecSet(&zero,zz);
359:   VecGetArray(xx,&x);
360:   VecGetArray(zz,&z);
361: 
362:   v     = a->a;
363:   xb = x;

365:   for (i=0; i<mbs; i++) {
366:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
367:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
368:     ib = aj + *ai;
369:     jmin = 0;
370:     if (*ib == i){      /* (diag of A)*x */
371:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
372:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
373:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
374:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
375:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
376:       v += 25; jmin++;
377:     }
378:     for (j=jmin; j<n; j++) {
379:       /* (strict lower triangular part of A)*x  */
380:       cval       = ib[j]*5;
381:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
382:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
383:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
384:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
385:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
386:       /* (strict upper triangular part of A)*x  */
387:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
388:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
389:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
390:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
391:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
392:       v  += 25;
393:     }
394:     xb +=5; ai++;
395:   }

397:   VecRestoreArray(xx,&x);
398:   VecRestoreArray(zz,&z);
399:   PetscLogFlops(50*(a->s_nz*2 - A->m) - A->m);
400:   return(0);
401: }


404: #undef __FUNCT__  
406: int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
407: {
408:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
409:   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
410:   MatScalar       *v;
411:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

414:   VecSet(&zero,zz);
415:   VecGetArray(xx,&x);
416:   VecGetArray(zz,&z);
417: 
418:   v     = a->a;
419:   xb = x;

421:   for (i=0; i<mbs; i++) {
422:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
423:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
424:     ib = aj + *ai;
425:     jmin = 0;
426:     if (*ib == i){      /* (diag of A)*x */
427:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
428:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
429:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
430:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
431:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
432:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
433:       v += 36; jmin++;
434:     }
435:     for (j=jmin; j<n; j++) {
436:       /* (strict lower triangular part of A)*x  */
437:       cval       = ib[j]*6;
438:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
439:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
440:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
441:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
442:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
443:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
444:       /* (strict upper triangular part of A)*x  */
445:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
446:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
447:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
448:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
449:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
450:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
451:       v  += 36;
452:     }
453:     xb +=6; ai++;
454:   }

456:   VecRestoreArray(xx,&x);
457:   VecRestoreArray(zz,&z);
458:   PetscLogFlops(72*(a->s_nz*2 - A->m) - A->m);
459:   return(0);
460: }
461: #undef __FUNCT__  
463: int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
464: {
465:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
466:   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
467:   MatScalar       *v;
468:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

471:   VecSet(&zero,zz);
472:   VecGetArray(xx,&x);
473:   VecGetArray(zz,&z);
474: 
475:   v     = a->a;
476:   xb = x;

478:   for (i=0; i<mbs; i++) {
479:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
480:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
481:     ib = aj + *ai;
482:     jmin = 0;
483:     if (*ib == i){      /* (diag of A)*x */
484:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
485:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
486:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
487:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
488:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
489:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
490:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
491:       v += 49; jmin++;
492:     }
493:     for (j=jmin; j<n; j++) {
494:       /* (strict lower triangular part of A)*x  */
495:       cval       = ib[j]*7;
496:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
497:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
498:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
499:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
500:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
501:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
502:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
503:       /* (strict upper triangular part of A)*x  */
504:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
505:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
506:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
507:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
508:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
509:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
510:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
511:       v  += 49;
512:     }
513:     xb +=7; ai++;
514:   }
515:   VecRestoreArray(xx,&x);
516:   VecRestoreArray(zz,&z);
517:   PetscLogFlops(98*(a->s_nz*2 - A->m) - A->m);
518:   return(0);
519: }

521: /*
522:     This will not work with MatScalar == float because it calls the BLAS
523: */
524: #undef __FUNCT__  
526: int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
527: {
528:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
529:   PetscScalar     *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
530:   MatScalar       *v;
531:   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
532:   int             ncols,k;

535:   VecSet(&zero,zz);
536:   VecGetArray(xx,&x); x_ptr=x;
537:   VecGetArray(zz,&z); z_ptr=z;

539:   aj   = a->j;
540:   v    = a->a;
541:   ii   = a->i;

543:   if (!a->mult_work) {
544:     PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
545:   }
546:   work = a->mult_work;
547: 
548:   for (i=0; i<mbs; i++) {
549:     n     = ii[1] - ii[0]; ncols = n*bs;
550:     workt = work; idx=aj+ii[0];

552:     /* upper triangular part */
553:     for (j=0; j<n; j++) {
554:       xb = x_ptr + bs*(*idx++);
555:       for (k=0; k<bs; k++) workt[k] = xb[k];
556:       workt += bs;
557:     }
558:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
559:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
560: 
561:     /* strict lower triangular part */
562:     idx = aj+ii[0];
563:     if (*idx == i){
564:       ncols -= bs; v += bs2; idx++; n--;
565:     }
566: 
567:     if (ncols > 0){
568:       workt = work;
569:       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));
570:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
571:       for (j=0; j<n; j++) {
572:         zb = z_ptr + bs*(*idx++);
573:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
574:         workt += bs;
575:       }
576:     }
577:     x += bs; v += n*bs2; z += bs; ii++;
578:   }
579: 
580:   VecRestoreArray(xx,&x);
581:   VecRestoreArray(zz,&z);
582:   PetscLogFlops(2*(a->s_nz*2 - A->m)*bs2 - A->m);
583:   return(0);
584: }

586: #undef __FUNCT__  
588: int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
589: {
590:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
591:   PetscScalar     *x,*y,*z,*xb,x1;
592:   MatScalar       *v;
593:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

596:   VecGetArray(xx,&x);
597:   if (yy != xx) {
598:     VecGetArray(yy,&y);
599:   } else {
600:     y = x;
601:   }
602:   if (zz != yy) {
603:     /* VecCopy(yy,zz); */
604:     VecGetArray(zz,&z);
605:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
606:   } else {
607:     z = y;
608:   }

610:   v  = a->a;
611:   xb = x;

613:   for (i=0; i<mbs; i++) {
614:     n  = ai[1] - ai[0];  /* length of i_th row of A */
615:     x1 = xb[0];
616:     ib = aj + *ai;
617:     jmin = 0;
618:     if (*ib == i) {            /* (diag of A)*x */
619:       z[i] += *v++ * x[*ib++]; jmin++;
620:     }
621:     for (j=jmin; j<n; j++) {
622:       cval    = *ib;
623:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
624:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
625:     }
626:     xb++; ai++;
627:   }

629:   VecRestoreArray(xx,&x);
630:   if (yy != xx) VecRestoreArray(yy,&y);
631:   if (zz != yy) VecRestoreArray(zz,&z);
632: 
633:   PetscLogFlops(2*(a->s_nz*2 - A->m));
634:   return(0);
635: }

637: #undef __FUNCT__  
639: int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
640: {
641:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
642:   PetscScalar     *x,*y,*z,*xb,x1,x2;
643:   MatScalar       *v;
644:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

647:   VecGetArray(xx,&x);
648:   if (yy != xx) {
649:     VecGetArray(yy,&y);
650:   } else {
651:     y = x;
652:   }
653:   if (zz != yy) {
654:     /* VecCopy(yy,zz); */
655:     VecGetArray(zz,&z);
656:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
657:   } else {
658:     z = y;
659:   }

661:   v     = a->a;
662:   xb = x;

664:   for (i=0; i<mbs; i++) {
665:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
666:     x1 = xb[0]; x2 = xb[1];
667:     ib = aj + *ai;
668:     jmin = 0;
669:     if (*ib == i){      /* (diag of A)*x */
670:       z[2*i]   += v[0]*x1 + v[2]*x2;
671:       z[2*i+1] += v[2]*x1 + v[3]*x2;
672:       v += 4; jmin++;
673:     }
674:     for (j=jmin; j<n; j++) {
675:       /* (strict lower triangular part of A)*x  */
676:       cval       = ib[j]*2;
677:       z[cval]     += v[0]*x1 + v[1]*x2;
678:       z[cval+1]   += v[2]*x1 + v[3]*x2;
679:       /* (strict upper triangular part of A)*x  */
680:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
681:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
682:       v  += 4;
683:     }
684:     xb +=2; ai++;
685:   }

687:   VecRestoreArray(xx,&x);
688:   if (yy != xx) VecRestoreArray(yy,&y);
689:   if (zz != yy) VecRestoreArray(zz,&z);

691:   PetscLogFlops(4*(a->s_nz*2 - A->m));
692:   return(0);
693: }

695: #undef __FUNCT__  
697: int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
698: {
699:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
700:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3;
701:   MatScalar       *v;
702:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

705:   VecGetArray(xx,&x);
706:   if (yy != xx) {
707:     VecGetArray(yy,&y);
708:   } else {
709:     y = x;
710:   }
711:   if (zz != yy) {
712:     /* VecCopy(yy,zz); */
713:     VecGetArray(zz,&z);
714:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
715:   } else {
716:     z = y;
717:   }

719:   v     = a->a;
720:   xb = x;

722:   for (i=0; i<mbs; i++) {
723:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
724:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
725:     ib = aj + *ai;
726:     jmin = 0;
727:     if (*ib == i){     /* (diag of A)*x */
728:      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
729:      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
730:      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
731:      v += 9; jmin++;
732:     }
733:     for (j=jmin; j<n; j++) {
734:       /* (strict lower triangular part of A)*x  */
735:       cval       = ib[j]*3;
736:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
737:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
738:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
739:       /* (strict upper triangular part of A)*x  */
740:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
741:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
742:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
743:       v  += 9;
744:     }
745:     xb +=3; ai++;
746:   }

748:   VecRestoreArray(xx,&x);
749:   if (yy != xx) VecRestoreArray(yy,&y);
750:   if (zz != yy) VecRestoreArray(zz,&z);

752:   PetscLogFlops(18*(a->s_nz*2 - A->m));
753:   return(0);
754: }

756: #undef __FUNCT__  
758: int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
759: {
760:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
761:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4;
762:   MatScalar       *v;
763:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

766:   VecGetArray(xx,&x);
767:   if (yy != xx) {
768:     VecGetArray(yy,&y);
769:   } else {
770:     y = x;
771:   }
772:   if (zz != yy) {
773:     /* VecCopy(yy,zz); */
774:     VecGetArray(zz,&z);
775:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
776:   } else {
777:     z = y;
778:   }

780:   v     = a->a;
781:   xb = x;

783:   for (i=0; i<mbs; i++) {
784:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
785:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
786:     ib = aj + *ai;
787:     jmin = 0;
788:     if (*ib == i){      /* (diag of A)*x */
789:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
790:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
791:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
792:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
793:       v += 16; jmin++;
794:     }
795:     for (j=jmin; j<n; j++) {
796:       /* (strict lower triangular part of A)*x  */
797:       cval       = ib[j]*4;
798:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
799:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
800:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
801:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
802:       /* (strict upper triangular part of A)*x  */
803:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
804:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
805:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
806:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
807:       v  += 16;
808:     }
809:     xb +=4; ai++;
810:   }

812:   VecRestoreArray(xx,&x);
813:   if (yy != xx) VecRestoreArray(yy,&y);
814:   if (zz != yy) VecRestoreArray(zz,&z);

816:   PetscLogFlops(32*(a->s_nz*2 - A->m));
817:   return(0);
818: }

820: #undef __FUNCT__  
822: int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
823: {
824:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
825:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5;
826:   MatScalar       *v;
827:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

830:   VecGetArray(xx,&x);
831:   if (yy != xx) {
832:     VecGetArray(yy,&y);
833:   } else {
834:     y = x;
835:   }
836:   if (zz != yy) {
837:     /* VecCopy(yy,zz); */
838:     VecGetArray(zz,&z);
839:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
840:   } else {
841:     z = y;
842:   }

844:   v     = a->a;
845:   xb = x;

847:   for (i=0; i<mbs; i++) {
848:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
849:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
850:     ib = aj + *ai;
851:     jmin = 0;
852:     if (*ib == i){      /* (diag of A)*x */
853:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
854:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
855:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
856:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
857:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
858:       v += 25; jmin++;
859:     }
860:     for (j=jmin; j<n; j++) {
861:       /* (strict lower triangular part of A)*x  */
862:       cval       = ib[j]*5;
863:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
864:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
865:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
866:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
867:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
868:       /* (strict upper triangular part of A)*x  */
869:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
870:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
871:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
872:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
873:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
874:       v  += 25;
875:     }
876:     xb +=5; ai++;
877:   }

879:   VecRestoreArray(xx,&x);
880:   if (yy != xx) VecRestoreArray(yy,&y);
881:   if (zz != yy) VecRestoreArray(zz,&z);

883:   PetscLogFlops(50*(a->s_nz*2 - A->m));
884:   return(0);
885: }
886: #undef __FUNCT__  
888: int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
889: {
890:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
891:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
892:   MatScalar       *v;
893:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

896:   VecGetArray(xx,&x);
897:   if (yy != xx) {
898:     VecGetArray(yy,&y);
899:   } else {
900:     y = x;
901:   }
902:   if (zz != yy) {
903:     /* VecCopy(yy,zz); */
904:     VecGetArray(zz,&z);
905:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
906:   } else {
907:     z = y;
908:   }

910:   v     = a->a;
911:   xb = x;

913:   for (i=0; i<mbs; i++) {
914:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
915:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
916:     ib = aj + *ai;
917:     jmin = 0;
918:     if (*ib == i){     /* (diag of A)*x */
919:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
920:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
921:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
922:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
923:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
924:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
925:       v += 36; jmin++;
926:     }
927:     for (j=jmin; j<n; j++) {
928:       /* (strict lower triangular part of A)*x  */
929:       cval       = ib[j]*6;
930:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
931:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
932:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
933:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
934:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
935:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
936:       /* (strict upper triangular part of A)*x  */
937:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
938:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
939:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
940:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
941:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
942:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
943:       v  += 36;
944:     }
945:     xb +=6; ai++;
946:   }

948:   VecRestoreArray(xx,&x);
949:   if (yy != xx) VecRestoreArray(yy,&y);
950:   if (zz != yy) VecRestoreArray(zz,&z);

952:   PetscLogFlops(72*(a->s_nz*2 - A->m));
953:   return(0);
954: }

956: #undef __FUNCT__  
958: int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
959: {
960:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
961:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
962:   MatScalar       *v;
963:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

966:   VecGetArray(xx,&x);
967:   if (yy != xx) {
968:     VecGetArray(yy,&y);
969:   } else {
970:     y = x;
971:   }
972:   if (zz != yy) {
973:     /* VecCopy(yy,zz); */
974:     VecGetArray(zz,&z);
975:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
976:   } else {
977:     z = y;
978:   }

980:   v     = a->a;
981:   xb = x;

983:   for (i=0; i<mbs; i++) {
984:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
985:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
986:     ib = aj + *ai;
987:     jmin = 0;
988:     if (*ib == i){     /* (diag of A)*x */
989:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
990:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
991:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
992:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
993:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
994:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
995:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
996:       v += 49; jmin++;
997:     }
998:     for (j=jmin; j<n; j++) {
999:       /* (strict lower triangular part of A)*x  */
1000:       cval       = ib[j]*7;
1001:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1002:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1003:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1004:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1005:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1006:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1007:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1008:       /* (strict upper triangular part of A)*x  */
1009:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1010:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1011:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1012:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1013:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1014:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1015:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1016:       v  += 49;
1017:     }
1018:     xb +=7; ai++;
1019:   }

1021:   VecRestoreArray(xx,&x);
1022:   if (yy != xx) VecRestoreArray(yy,&y);
1023:   if (zz != yy) VecRestoreArray(zz,&z);

1025:   PetscLogFlops(98*(a->s_nz*2 - A->m));
1026:   return(0);
1027: }

1029: #undef __FUNCT__  
1031: int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1032: {
1033:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
1034:   PetscScalar     *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1035:   MatScalar       *v;
1036:   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
1037:   int             ncols,k;

1040:   VecGetArray(xx,&x); x_ptr=x;
1041:   if (yy != xx) {
1042:     VecGetArray(yy,&y);
1043:   } else {
1044:     y = x;
1045:   }
1046:   if (zz != yy) {
1047:     /* VecCopy(yy,zz); */
1048:     VecGetArray(zz,&z); z_ptr=z;
1049:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1050:   } else {
1051:     z = y;
1052:   }

1054:   aj   = a->j;
1055:   v    = a->a;
1056:   ii   = a->i;

1058:   if (!a->mult_work) {
1059:     PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
1060:   }
1061:   work = a->mult_work;
1062: 
1063: 
1064:   for (i=0; i<mbs; i++) {
1065:     n     = ii[1] - ii[0]; ncols = n*bs;
1066:     workt = work; idx=aj+ii[0];

1068:     /* upper triangular part */
1069:     for (j=0; j<n; j++) {
1070:       xb = x_ptr + bs*(*idx++);
1071:       for (k=0; k<bs; k++) workt[k] = xb[k];
1072:       workt += bs;
1073:     }
1074:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1075:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);

1077:     /* strict lower triangular part */
1078:     idx = aj+ii[0];
1079:     if (*idx == i){
1080:       ncols -= bs; v += bs2; idx++; n--;
1081:     }
1082:     if (ncols > 0){
1083:       workt = work;
1084:       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));
1085:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1086:       for (j=0; j<n; j++) {
1087:         zb = z_ptr + bs*(*idx++);
1088:         /* idx++; */
1089:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
1090:         workt += bs;
1091:       }
1092:     }

1094:     x += bs; v += n*bs2; z += bs; ii++;
1095:   }

1097:   VecRestoreArray(xx,&x);
1098:   if (yy != xx) VecRestoreArray(yy,&y);
1099:   if (zz != yy) VecRestoreArray(zz,&z);

1101:   PetscLogFlops(2*(a->s_nz*2 - A->m));
1102:   return(0);
1103: }

1105: #undef __FUNCT__  
1107: int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz)
1108: {
1110:   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1111:   /* return(0); */
1112: }

1114: #undef __FUNCT__  
1116: int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz)

1118: {
1120:   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1121:   /* return(0); */
1122: }

1124: #undef __FUNCT__  
1126: int MatScale_SeqSBAIJ(PetscScalar *alpha,Mat inA)
1127: {
1128:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1129:   int         one = 1,totalnz = a->bs2*a->s_nz;

1132:   BLscal_(&totalnz,alpha,a->a,&one);
1133:   PetscLogFlops(totalnz);
1134:   return(0);
1135: }

1137: #undef __FUNCT__  
1139: int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1140: {
1141:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1142:   MatScalar   *v = a->a;
1143:   PetscReal   sum_diag = 0.0, sum_off = 0.0, *sum;
1144:   int         i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1145:   int         *jl,*il,jmin,jmax,ierr,nexti,ik,*col;
1146: 
1148:   if (type == NORM_FROBENIUS) {
1149:     for (k=0; k<mbs; k++){
1150:       jmin = a->i[k]; jmax = a->i[k+1];
1151:       col  = aj + jmin;
1152:       if (*col == k){         /* diagonal block */
1153:         for (i=0; i<bs2; i++){
1154: #if defined(PETSC_USE_COMPLEX)
1155:           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1156: #else
1157:           sum_diag += (*v)*(*v); v++;
1158: #endif
1159:         }
1160:         jmin++;
1161:       }
1162:       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
1163:         for (i=0; i<bs2; i++){
1164: #if defined(PETSC_USE_COMPLEX)
1165:           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1166: #else
1167:           sum_off += (*v)*(*v); v++;
1168: #endif  
1169:         }
1170:       }
1171:     }
1172:     *norm = sqrt(sum_diag + 2*sum_off);

1174:   }  else if (type == NORM_INFINITY) { /* maximum row sum */
1175:     PetscMalloc(mbs*sizeof(int),&il);
1176:     PetscMalloc(mbs*sizeof(int),&jl);
1177:     PetscMalloc(bs*sizeof(PetscReal),&sum);
1178:     for (i=0; i<mbs; i++) {
1179:       jl[i] = mbs; il[0] = 0;
1180:     }

1182:     *norm = 0.0;
1183:     for (k=0; k<mbs; k++) { /* k_th block row */
1184:       for (j=0; j<bs; j++) sum[j]=0.0;

1186:       /*-- col sum --*/
1187:       i = jl[k]; /* first |A(i,k)| to be added */
1188:       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1189:                   at step k */
1190:       while (i<mbs){
1191:         nexti = jl[i];  /* next block row to be added */
1192:         ik    = il[i];  /* block index of A(i,k) in the array a */
1193:         for (j=0; j<bs; j++){
1194:           v = a->a + ik*bs2 + j*bs;
1195:           for (k1=0; k1<bs; k1++) {
1196:             sum[j] += PetscAbsScalar(*v); v++;
1197:           }
1198:         }
1199:         /* update il, jl */
1200:         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1201:         jmax = a->i[i+1];
1202:         if (jmin < jmax){
1203:           il[i] = jmin;
1204:           j   = a->j[jmin];
1205:           jl[i] = jl[j]; jl[j]=i;
1206:         }
1207:         i = nexti;
1208:       }
1209: 
1210:       /*-- row sum --*/
1211:       jmin = a->i[k]; jmax = a->i[k+1];
1212:       for (i=jmin; i<jmax; i++) {
1213:         for (j=0; j<bs; j++){
1214:           v = a->a + i*bs2 + j;
1215:           for (k1=0; k1<bs; k1++){
1216:             sum[j] += PetscAbsScalar(*v);
1217:             v   += bs;
1218:           }
1219:         }
1220:       }
1221:       /* add k_th block row to il, jl */
1222:       col = aj+jmin;
1223:       if (*col == k) jmin++;
1224:       if (jmin < jmax){
1225:         il[k] = jmin;
1226:         j   = a->j[jmin];
1227:         jl[k] = jl[j]; jl[j] = k;
1228:       }
1229:       for (j=0; j<bs; j++){
1230:         if (sum[j] > *norm) *norm = sum[j];
1231:       }
1232:     }
1233:     PetscFree(il);
1234:     PetscFree(jl);
1235:     PetscFree(sum);
1236:   } else {
1237:     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1238:   }
1239:   return(0);
1240: }

1242: #undef __FUNCT__  
1244: int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1245: {
1246:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1247:   int          ierr;
1248:   PetscTruth   flag;

1251:   PetscTypeCompare((PetscObject)A,MATSEQSBAIJ,&flag);
1252:   if (!flag) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");

1254:   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1255:   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->s_nz != b->s_nz)) {
1256:     *flg = PETSC_FALSE;
1257:     return(0);
1258:   }
1259: 
1260:   /* if the a->i are the same */
1261:   PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);
1262:   if (*flg == PETSC_FALSE) {
1263:     return(0);
1264:   }
1265: 
1266:   /* if a->j are the same */
1267:   PetscMemcmp(a->j,b->j,(a->s_nz)*sizeof(int),flg);
1268:   if (*flg == PETSC_FALSE) {
1269:     return(0);
1270:   }
1271:   /* if a->a are the same */
1272:   PetscMemcmp(a->a,b->a,(a->s_nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);
1273: 
1274:   return(0);
1275: }

1277: #undef __FUNCT__  
1279: int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1280: {
1281:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1282:   int          ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1283:   PetscScalar  *x,zero = 0.0;
1284:   MatScalar    *aa,*aa_j;

1287:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1288:   bs   = a->bs;
1289:   aa   = a->a;
1290:   ai   = a->i;
1291:   aj   = a->j;
1292:   ambs = a->mbs;
1293:   bs2  = a->bs2;

1295:   VecSet(&zero,v);
1296:   VecGetArray(v,&x);
1297:   VecGetLocalSize(v,&n);
1298:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1299:   for (i=0; i<ambs; i++) {
1300:     j=ai[i];
1301:     if (aj[j] == i) {             /* if this is a diagonal element */
1302:       row  = i*bs;
1303:       aa_j = aa + j*bs2;
1304:       for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1305:     }
1306:   }
1307:   VecRestoreArray(v,&x);
1308:   return(0);
1309: }

1311: #undef __FUNCT__  
1313: int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1314: {
1315:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1316:   PetscScalar  *l,*r,x,*li,*ri;
1317:   MatScalar    *aa,*v;
1318:   int          ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2;

1321:   ai  = a->i;
1322:   aj  = a->j;
1323:   aa  = a->a;
1324:   m   = A->m;
1325:   bs  = a->bs;
1326:   mbs = a->mbs;
1327:   bs2 = a->bs2;

1329:   if (ll != rr) {
1330:     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be samen");
1331:   }
1332:   if (ll) {
1333:     VecGetArray(ll,&l);
1334:     VecGetLocalSize(ll,&lm);
1335:     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1336:     for (i=0; i<mbs; i++) { /* for each block row */
1337:       M  = ai[i+1] - ai[i];
1338:       li = l + i*bs;
1339:       v  = aa + bs2*ai[i];
1340:       for (j=0; j<M; j++) { /* for each block */
1341:         for (k=0; k<bs2; k++) {
1342:           (*v++) *= li[k%bs];
1343:         }
1344: #ifdef CONT
1345:         /* will be used to replace the above loop */
1346:         ri = l + bs*aj[ai[i]+j];
1347:         for (k=0; k<bs; k++) { /* column value */
1348:           x = ri[k];
1349:           for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1350:         }
1351: #endif

1353:       }
1354:     }
1355:     VecRestoreArray(ll,&l);
1356:     PetscLogFlops(2*a->s_nz);
1357:   }
1358:   /* will be deleted */
1359:   if (rr) {
1360:     VecGetArray(rr,&r);
1361:     VecGetLocalSize(rr,&rn);
1362:     if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1363:     for (i=0; i<mbs; i++) { /* for each block row */
1364:       M  = ai[i+1] - ai[i];
1365:       v  = aa + bs2*ai[i];
1366:       for (j=0; j<M; j++) { /* for each block */
1367:         ri = r + bs*aj[ai[i]+j];
1368:         for (k=0; k<bs; k++) {
1369:           x = ri[k];
1370:           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1371:         }
1372:       }
1373:     }
1374:     VecRestoreArray(rr,&r);
1375:     PetscLogFlops(a->s_nz);
1376:   }
1377:   return(0);
1378: }

1380: #undef __FUNCT__  
1382: int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1383: {
1384:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1387:   info->rows_global    = (double)A->m;
1388:   info->columns_global = (double)A->m;
1389:   info->rows_local     = (double)A->m;
1390:   info->columns_local  = (double)A->m;
1391:   info->block_size     = a->bs2;
1392:   info->nz_allocated   = a->s_maxnz; /*num. of nonzeros in upper triangular part */
1393:   info->nz_used        = a->bs2*a->s_nz; /*num. of nonzeros in upper triangular part */
1394:   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1395:   info->assemblies   = A->num_ass;
1396:   info->mallocs      = a->reallocs;
1397:   info->memory       = A->mem;
1398:   if (A->factor) {
1399:     info->fill_ratio_given  = A->info.fill_ratio_given;
1400:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1401:     info->factor_mallocs    = A->info.factor_mallocs;
1402:   } else {
1403:     info->fill_ratio_given  = 0;
1404:     info->fill_ratio_needed = 0;
1405:     info->factor_mallocs    = 0;
1406:   }
1407:   return(0);
1408: }


1411: #undef __FUNCT__  
1413: int MatZeroEntries_SeqSBAIJ(Mat A)
1414: {
1415:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1416:   int         ierr;

1419:   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1420:   return(0);
1421: }

1423: #undef __FUNCT__  
1425: int MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1426: {
1427:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1428:   int          ierr,i,j,n,row,col,bs,*ai,*aj,mbs;
1429:   PetscReal    atmp;
1430:   MatScalar    *aa;
1431:   PetscScalar  zero = 0.0,*x;
1432:   int          ncols,brow,bcol,krow,kcol;

1435:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1436:   bs   = a->bs;
1437:   aa   = a->a;
1438:   ai   = a->i;
1439:   aj   = a->j;
1440:   mbs = a->mbs;

1442:   VecSet(&zero,v);
1443:   VecGetArray(v,&x);
1444:   VecGetLocalSize(v,&n);
1445:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1446:   for (i=0; i<mbs; i++) {
1447:     ncols = ai[1] - ai[0]; ai++;
1448:     brow  = bs*i;
1449:     for (j=0; j<ncols; j++){
1450:       bcol = bs*(*aj);
1451:       for (kcol=0; kcol<bs; kcol++){
1452:         col = bcol + kcol;      /* col index */
1453:         for (krow=0; krow<bs; krow++){
1454:           atmp = PetscAbsScalar(*aa); aa++;
1455:           row = brow + krow;    /* row index */
1456:           /* printf("val[%d,%d]: %gn",row,col,atmp); */
1457:           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1458:           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1459:         }
1460:       }
1461:       aj++;
1462:     }
1463:   }
1464:   VecRestoreArray(v,&x);
1465:   return(0);
1466: }