Actual source code: baij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include src/mat/impls/baij/seq/baij.h
 8:  #include src/inline/spops.h
 9:  #include petscsys.h

 11:  #include src/inline/ilu.h

 15: /*@
 16:   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.

 18:   Collective on Mat

 20:   Input Parameters:
 21: . mat - the matrix

 23:   Level: advanced
 24: @*/
 25: PetscErrorCode  MatSeqBAIJInvertBlockDiagonal(Mat mat)
 26: {
 27:   PetscErrorCode ierr,(*f)(Mat);

 31:   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
 32:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

 34:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
 35:   if (f) {
 36:     (*f)(mat);
 37:   } else {
 38:     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
 39:   }
 40:   return(0);
 41: }

 46: PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A)
 47: {
 48:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 50:   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
 51:   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;

 54:   if (a->idiagvalid) return(0);
 55:   MatMarkDiagonal_SeqBAIJ(A);
 56:   diag_offset = a->diag;
 57:   if (!a->idiag) {
 58:     PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
 59:   }
 60:   diag  = a->idiag;
 61:   mdiag = a->idiag+bs*bs*mbs;
 62:   /* factor and invert each block */
 63:   switch (bs){
 64:     case 2:
 65:       for (i=0; i<mbs; i++) {
 66:         odiag   = v + 4*diag_offset[i];
 67:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 68:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 69:         Kernel_A_gets_inverse_A_2(diag);
 70:         diag    += 4;
 71:         mdiag   += 4;
 72:       }
 73:       break;
 74:     case 3:
 75:       for (i=0; i<mbs; i++) {
 76:         odiag    = v + 9*diag_offset[i];
 77:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 78:         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 79:         diag[8]  = odiag[8];
 80:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 81:         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 82:         mdiag[8] = odiag[8];
 83:         Kernel_A_gets_inverse_A_3(diag);
 84:         diag    += 9;
 85:         mdiag   += 9;
 86:       }
 87:       break;
 88:     case 4:
 89:       for (i=0; i<mbs; i++) {
 90:         odiag  = v + 16*diag_offset[i];
 91:         PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 92:         PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 93:         Kernel_A_gets_inverse_A_4(diag);
 94:         diag  += 16;
 95:         mdiag += 16;
 96:       }
 97:       break;
 98:     case 5:
 99:       for (i=0; i<mbs; i++) {
100:         odiag = v + 25*diag_offset[i];
101:         PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
102:         PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
103:         Kernel_A_gets_inverse_A_5(diag);
104:         diag  += 25;
105:         mdiag += 25;
106:       }
107:       break;
108:     default:
109:       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110:   }
111:   a->idiagvalid = PETSC_TRUE;
112:   return(0);
113: }

118: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119: {
120:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121:   PetscScalar        *x,x1,x2,s1,s2;
122:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123:   PetscErrorCode     ierr;
124:   PetscInt           m = a->mbs,i,i2,nz,idx;
125:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

128:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129:   its = its*lits;
130:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
134:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

136:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

138:   diag  = a->diag;
139:   idiag = a->idiag;
140:   VecGetArray(xx,&x);
141:   VecGetArray(bb,(PetscScalar**)&b);

143:   if (flag & SOR_ZERO_INITIAL_GUESS) {
144:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145:       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146:       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147:       i2     = 2;
148:       idiag += 4;
149:       for (i=1; i<m; i++) {
150:         v     = aa + 4*ai[i];
151:         vi    = aj + ai[i];
152:         nz    = diag[i] - ai[i];
153:         s1    = b[i2]; s2 = b[i2+1];
154:         while (nz--) {
155:           idx  = 2*(*vi++);
156:           x1   = x[idx]; x2 = x[1+idx];
157:           s1  -= v[0]*x1 + v[2]*x2;
158:           s2  -= v[1]*x1 + v[3]*x2;
159:           v   += 4;
160:         }
161:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163:         idiag   += 4;
164:         i2      += 2;
165:       }
166:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167:       PetscLogFlops(4*(a->nz));
168:     }
169:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171:       i2    = 0;
172:       mdiag = a->idiag+4*a->mbs;
173:       for (i=0; i<m; i++) {
174:         x1      = x[i2]; x2 = x[i2+1];
175:         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176:         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177:         mdiag  += 4;
178:         i2     += 2;
179:       }
180:       PetscLogFlops(6*m);
181:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
183:     }
184:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185:       idiag   = a->idiag+4*a->mbs - 4;
186:       i2      = 2*m - 2;
187:       x1      = x[i2]; x2 = x[i2+1];
188:       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189:       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190:       idiag -= 4;
191:       i2    -= 2;
192:       for (i=m-2; i>=0; i--) {
193:         v     = aa + 4*(diag[i]+1);
194:         vi    = aj + diag[i] + 1;
195:         nz    = ai[i+1] - diag[i] - 1;
196:         s1    = x[i2]; s2 = x[i2+1];
197:         while (nz--) {
198:           idx  = 2*(*vi++);
199:           x1   = x[idx]; x2 = x[1+idx];
200:           s1  -= v[0]*x1 + v[2]*x2;
201:           s2  -= v[1]*x1 + v[3]*x2;
202:           v   += 4;
203:         }
204:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206:         idiag   -= 4;
207:         i2      -= 2;
208:       }
209:       PetscLogFlops(4*(a->nz));
210:     }
211:   } else {
212:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213:   }
214:   VecRestoreArray(xx,&x);
215:   VecRestoreArray(bb,(PetscScalar**)&b);
216:   return(0);
217: }

221: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222: {
223:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224:   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226:   PetscErrorCode     ierr;
227:   PetscInt           m = a->mbs,i,i2,nz,idx;
228:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

231:   its = its*lits;
232:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
233:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

239:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

241:   diag  = a->diag;
242:   idiag = a->idiag;
243:   VecGetArray(xx,&x);
244:   VecGetArray(bb,(PetscScalar**)&b);

246:   if (flag & SOR_ZERO_INITIAL_GUESS) {
247:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248:       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
249:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
250:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
251:       i2     = 3;
252:       idiag += 9;
253:       for (i=1; i<m; i++) {
254:         v     = aa + 9*ai[i];
255:         vi    = aj + ai[i];
256:         nz    = diag[i] - ai[i];
257:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
258:         while (nz--) {
259:           idx  = 3*(*vi++);
260:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
261:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
262:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
263:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
264:           v   += 9;
265:         }
266:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
267:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
268:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
269:         idiag   += 9;
270:         i2      += 3;
271:       }
272:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
273:       PetscLogFlops(9*(a->nz));
274:     }
275:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
276:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
277:       i2    = 0;
278:       mdiag = a->idiag+9*a->mbs;
279:       for (i=0; i<m; i++) {
280:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
281:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
282:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
283:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
284:         mdiag  += 9;
285:         i2     += 3;
286:       }
287:       PetscLogFlops(15*m);
288:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
289:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
290:     }
291:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
292:       idiag   = a->idiag+9*a->mbs - 9;
293:       i2      = 3*m - 3;
294:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
295:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
296:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
297:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
298:       idiag -= 9;
299:       i2    -= 3;
300:       for (i=m-2; i>=0; i--) {
301:         v     = aa + 9*(diag[i]+1);
302:         vi    = aj + diag[i] + 1;
303:         nz    = ai[i+1] - diag[i] - 1;
304:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
305:         while (nz--) {
306:           idx  = 3*(*vi++);
307:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
308:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
309:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
310:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
311:           v   += 9;
312:         }
313:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
314:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
315:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
316:         idiag   -= 9;
317:         i2      -= 3;
318:       }
319:       PetscLogFlops(9*(a->nz));
320:     }
321:   } else {
322:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
323:   }
324:   VecRestoreArray(xx,&x);
325:   VecRestoreArray(bb,(PetscScalar**)&b);
326:   return(0);
327: }

331: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
332: {
333:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
334:   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
335:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
336:   PetscErrorCode     ierr;
337:   PetscInt           m = a->mbs,i,i2,nz,idx;
338:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

341:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
342:   its = its*lits;
343:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
344:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
345:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
346:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
347:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

349:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

351:   diag  = a->diag;
352:   idiag = a->idiag;
353:   VecGetArray(xx,&x);
354:   VecGetArray(bb,(PetscScalar**)&b);

356:   if (flag & SOR_ZERO_INITIAL_GUESS) {
357:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
358:       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
359:       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
360:       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
361:       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
362:       i2     = 4;
363:       idiag += 16;
364:       for (i=1; i<m; i++) {
365:         v     = aa + 16*ai[i];
366:         vi    = aj + ai[i];
367:         nz    = diag[i] - ai[i];
368:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
369:         while (nz--) {
370:           idx  = 4*(*vi++);
371:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
372:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
373:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
374:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
375:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
376:           v   += 16;
377:         }
378:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
379:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
380:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
381:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
382:         idiag   += 16;
383:         i2      += 4;
384:       }
385:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
386:       PetscLogFlops(16*(a->nz));
387:     }
388:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
389:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
390:       i2    = 0;
391:       mdiag = a->idiag+16*a->mbs;
392:       for (i=0; i<m; i++) {
393:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
394:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
395:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
396:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
397:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
398:         mdiag  += 16;
399:         i2     += 4;
400:       }
401:       PetscLogFlops(28*m);
402:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
403:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
404:     }
405:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
406:       idiag   = a->idiag+16*a->mbs - 16;
407:       i2      = 4*m - 4;
408:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
409:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
410:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
411:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
412:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
413:       idiag -= 16;
414:       i2    -= 4;
415:       for (i=m-2; i>=0; i--) {
416:         v     = aa + 16*(diag[i]+1);
417:         vi    = aj + diag[i] + 1;
418:         nz    = ai[i+1] - diag[i] - 1;
419:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
420:         while (nz--) {
421:           idx  = 4*(*vi++);
422:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
423:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
424:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
425:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
426:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
427:           v   += 16;
428:         }
429:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
430:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
431:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
432:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
433:         idiag   -= 16;
434:         i2      -= 4;
435:       }
436:       PetscLogFlops(16*(a->nz));
437:     }
438:   } else {
439:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
440:   }
441:   VecRestoreArray(xx,&x);
442:   VecRestoreArray(bb,(PetscScalar**)&b);
443:   return(0);
444: }

448: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
449: {
450:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
451:   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
452:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
453:   PetscErrorCode     ierr;
454:   PetscInt           m = a->mbs,i,i2,nz,idx;
455:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

458:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
459:   its = its*lits;
460:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
461:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
462:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
463:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
464:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

466:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

468:   diag  = a->diag;
469:   idiag = a->idiag;
470:   VecGetArray(xx,&x);
471:   VecGetArray(bb,(PetscScalar**)&b);

473:   if (flag & SOR_ZERO_INITIAL_GUESS) {
474:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
475:       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
476:       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
477:       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
478:       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
479:       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
480:       i2     = 5;
481:       idiag += 25;
482:       for (i=1; i<m; i++) {
483:         v     = aa + 25*ai[i];
484:         vi    = aj + ai[i];
485:         nz    = diag[i] - ai[i];
486:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
487:         while (nz--) {
488:           idx  = 5*(*vi++);
489:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
490:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
491:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
492:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
493:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
494:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
495:           v   += 25;
496:         }
497:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
498:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
499:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
500:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
501:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
502:         idiag   += 25;
503:         i2      += 5;
504:       }
505:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
506:       PetscLogFlops(25*(a->nz));
507:     }
508:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
509:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
510:       i2    = 0;
511:       mdiag = a->idiag+25*a->mbs;
512:       for (i=0; i<m; i++) {
513:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
514:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
515:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
516:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
517:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
518:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
519:         mdiag  += 25;
520:         i2     += 5;
521:       }
522:       PetscLogFlops(45*m);
523:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
524:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
525:     }
526:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
527:       idiag   = a->idiag+25*a->mbs - 25;
528:       i2      = 5*m - 5;
529:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
530:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
531:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
532:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
533:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
534:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
535:       idiag -= 25;
536:       i2    -= 5;
537:       for (i=m-2; i>=0; i--) {
538:         v     = aa + 25*(diag[i]+1);
539:         vi    = aj + diag[i] + 1;
540:         nz    = ai[i+1] - diag[i] - 1;
541:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
542:         while (nz--) {
543:           idx  = 5*(*vi++);
544:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
545:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
546:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
547:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
548:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
549:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
550:           v   += 25;
551:         }
552:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
553:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
554:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
555:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
556:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
557:         idiag   -= 25;
558:         i2      -= 5;
559:       }
560:       PetscLogFlops(25*(a->nz));
561:     }
562:   } else {
563:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
564:   }
565:   VecRestoreArray(xx,&x);
566:   VecRestoreArray(bb,(PetscScalar**)&b);
567:   return(0);
568: }

570: /*
571:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
572: */
573: #if defined(PETSC_HAVE_FORTRAN_CAPS)
574: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
575: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
576: #define matsetvaluesblocked4_ matsetvaluesblocked4
577: #endif

582: void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
583: {
584:   Mat               A = *AA;
585:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
586:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
587:   PetscInt          *ai=a->i,*ailen=a->ilen;
588:   PetscInt          *aj=a->j,stepval,lastcol = -1;
589:   const PetscScalar *value = v;
590:   MatScalar         *ap,*aa = a->a,*bap;

593:   if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
594:   stepval = (n-1)*4;
595:   for (k=0; k<m; k++) { /* loop over added rows */
596:     row  = im[k];
597:     rp   = aj + ai[row];
598:     ap   = aa + 16*ai[row];
599:     nrow = ailen[row];
600:     low  = 0;
601:     high = nrow;
602:     for (l=0; l<n; l++) { /* loop over added columns */
603:       col = in[l];
604:       if (col <= lastcol) low = 0; else high = nrow;
605:       lastcol = col;
606:       value = v + k*(stepval+4 + l)*4;
607:       while (high-low > 7) {
608:         t = (low+high)/2;
609:         if (rp[t] > col) high = t;
610:         else             low  = t;
611:       }
612:       for (i=low; i<high; i++) {
613:         if (rp[i] > col) break;
614:         if (rp[i] == col) {
615:           bap  = ap +  16*i;
616:           for (ii=0; ii<4; ii++,value+=stepval) {
617:             for (jj=ii; jj<16; jj+=4) {
618:               bap[jj] += *value++;
619:             }
620:           }
621:           goto noinsert2;
622:         }
623:       }
624:       N = nrow++ - 1;
625:       high++; /* added new column index thus must search to one higher than before */
626:       /* shift up all the later entries in this row */
627:       for (ii=N; ii>=i; ii--) {
628:         rp[ii+1] = rp[ii];
629:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
630:       }
631:       if (N >= i) {
632:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
633:       }
634:       rp[i] = col;
635:       bap   = ap +  16*i;
636:       for (ii=0; ii<4; ii++,value+=stepval) {
637:         for (jj=ii; jj<16; jj+=4) {
638:           bap[jj] = *value++;
639:         }
640:       }
641:       noinsert2:;
642:       low = i;
643:     }
644:     ailen[row] = nrow;
645:   }
646:   PetscFunctionReturnVoid();
647: }

650: #if defined(PETSC_HAVE_FORTRAN_CAPS)
651: #define matsetvalues4_ MATSETVALUES4
652: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
653: #define matsetvalues4_ matsetvalues4
654: #endif

659: void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
660: {
661:   Mat         A = *AA;
662:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
663:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
664:   PetscInt    *ai=a->i,*ailen=a->ilen;
665:   PetscInt    *aj=a->j,brow,bcol;
666:   PetscInt    ridx,cidx,lastcol = -1;
667:   MatScalar   *ap,value,*aa=a->a,*bap;
668: 
670:   for (k=0; k<m; k++) { /* loop over added rows */
671:     row  = im[k]; brow = row/4;
672:     rp   = aj + ai[brow];
673:     ap   = aa + 16*ai[brow];
674:     nrow = ailen[brow];
675:     low  = 0;
676:     high = nrow;
677:     for (l=0; l<n; l++) { /* loop over added columns */
678:       col = in[l]; bcol = col/4;
679:       ridx = row % 4; cidx = col % 4;
680:       value = v[l + k*n];
681:       if (col <= lastcol) low = 0; else high = nrow;
682:       lastcol = col;
683:       while (high-low > 7) {
684:         t = (low+high)/2;
685:         if (rp[t] > bcol) high = t;
686:         else              low  = t;
687:       }
688:       for (i=low; i<high; i++) {
689:         if (rp[i] > bcol) break;
690:         if (rp[i] == bcol) {
691:           bap  = ap +  16*i + 4*cidx + ridx;
692:           *bap += value;
693:           goto noinsert1;
694:         }
695:       }
696:       N = nrow++ - 1;
697:       high++; /* added new column thus must search to one higher than before */
698:       /* shift up all the later entries in this row */
699:       for (ii=N; ii>=i; ii--) {
700:         rp[ii+1] = rp[ii];
701:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
702:       }
703:       if (N>=i) {
704:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
705:       }
706:       rp[i]                    = bcol;
707:       ap[16*i + 4*cidx + ridx] = value;
708:       noinsert1:;
709:       low = i;
710:     }
711:     ailen[brow] = nrow;
712:   }
713:   PetscFunctionReturnVoid();
714: }

717: /*  UGLY, ugly, ugly
718:    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 
719:    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 
720:    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
721:    converts the entries into single precision and then calls ..._MatScalar() to put them
722:    into the single precision data structures.
723: */
724: #if defined(PETSC_USE_MAT_SINGLE)
725: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
726: #else
727: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
728: #endif

730: #define CHUNKSIZE  10

732: /*
733:      Checks for missing diagonals
734: */
737: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
738: {
739:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
741:   PetscInt       *diag,*jj = a->j,i;

744:   MatMarkDiagonal_SeqBAIJ(A);
745:   *missing = PETSC_FALSE;
746:   diag     = a->diag;
747:   for (i=0; i<a->mbs; i++) {
748:     if (jj[diag[i]] != i) {
749:       *missing  = PETSC_TRUE;
750:       if (d) *d = i;
751:     }
752:   }
753:   return(0);
754: }

758: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
759: {
760:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
762:   PetscInt       i,j,m = a->mbs;

765:   if (!a->diag) {
766:     PetscMalloc(m*sizeof(PetscInt),&a->diag);
767:   }
768:   for (i=0; i<m; i++) {
769:     a->diag[i] = a->i[i+1];
770:     for (j=a->i[i]; j<a->i[i+1]; j++) {
771:       if (a->j[j] == i) {
772:         a->diag[i] = j;
773:         break;
774:       }
775:     }
776:   }
777:   return(0);
778: }


781: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);

785: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
786: {
787:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
789:   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt;
790:   PetscInt       *tia, *tja;

793:   *nn = n;
794:   if (!ia) return(0);
795:   if (symmetric) {
796:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
797:   } else {
798:     tia = a->i; tja = a->j;
799:   }
800: 
801:   if (!blockcompressed && bs > 1) {
802:     (*nn) *= bs;
803:     nbs    = bs;
804:     /* malloc & create the natural set of indices */
805:     PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
806:     if (n) {
807:       (*ia)[0] = 0;
808:       for (j=1; j<bs; j++) {
809:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
810:       }
811:     }

813:     for (i=1; i<n; i++) {
814:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
815:       for (j=1; j<bs; j++) {
816:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
817:       }
818:     }
819:     if (n) {
820:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
821:     }

823:     if (ja) {
824:       PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
825:       cnt = 0;
826:       for (i=0; i<n; i++) {
827:         for (j=0; j<bs; j++) {
828:           for (k=tia[i]; k<tia[i+1]; k++) {
829:             for (l=0; l<bs; l++) {
830:               (*ja)[cnt++] = bs*tja[k] + l;
831:             }
832:           }
833:         }
834:       }
835:     }

837:     n     *= bs;
838:     nz *= bs*bs;
839:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
840:       PetscFree(tia);
841:       PetscFree(tja);
842:     }
843:   } else {
844:     *ia = tia;
845:     if (ja) *ja = tja;
846:   }
847:   if (oshift == 1) {
848:     for (i=0; i<n+nbs; i++) (*ia)[i]++;
849:     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
850:   }
851:   return(0);
852: }

856: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
857: {
858:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
860:   PetscInt       i,n = a->mbs,nz = a->i[n];

863:   if (!ia) return(0);
864:   if (!blockcompressed && A->rmap.bs > 1) {
865:     PetscFree(*ia);
866:     if (ja) {PetscFree(*ja);}
867:   } else if (symmetric) {
868:     PetscFree(*ia);
869:     if (ja) {PetscFree(*ja);}
870:   } else if (oshift == 1) { /* blockcompressed */
871:     for (i=0; i<n+1; i++) a->i[i]--;
872:     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
873:   }
874:   return(0);
875: }

879: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
880: {
881:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

885: #if defined(PETSC_USE_LOG)
886:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
887: #endif
888:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
889:   if (a->row) {
890:     ISDestroy(a->row);
891:   }
892:   if (a->col) {
893:     ISDestroy(a->col);
894:   }
895:   PetscFree(a->diag);
896:   PetscFree(a->idiag);
897:   PetscFree2(a->imax,a->ilen);
898:   PetscFree(a->solve_work);
899:   PetscFree(a->mult_work);
900:   if (a->icol) {ISDestroy(a->icol);}
901:   PetscFree(a->saved_values);
902: #if defined(PETSC_USE_MAT_SINGLE)
903:   PetscFree(a->setvaluescopy);
904: #endif
905:   PetscFree(a->xtoy);
906:   if (a->compressedrow.use){PetscFree(a->compressedrow.i);}

908:   PetscFree(a);

910:   PetscObjectChangeTypeName((PetscObject)A,0);
911:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
912:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
913:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
914:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
915:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
916:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
917:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
918:   return(0);
919: }

923: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
924: {
925:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

929:   switch (op) {
930:   case MAT_ROW_ORIENTED:
931:     a->roworiented    = flg;
932:     break;
933:   case MAT_KEEP_ZEROED_ROWS:
934:     a->keepzeroedrows = flg;
935:     break;
936:   case MAT_NEW_NONZERO_LOCATIONS:
937:     a->nonew          = (flg ? 0 : 1);
938:     break;
939:   case MAT_NEW_NONZERO_LOCATION_ERR:
940:     a->nonew          = (flg ? -1 : 0);
941:     break;
942:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
943:     a->nonew          = (flg ? -2 : 0);
944:     break;
945:   case MAT_NEW_DIAGONALS:
946:   case MAT_IGNORE_OFF_PROC_ENTRIES:
947:   case MAT_USE_HASH_TABLE:
948:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
949:     break;
950:   case MAT_SYMMETRIC:
951:   case MAT_STRUCTURALLY_SYMMETRIC:
952:   case MAT_HERMITIAN:
953:   case MAT_SYMMETRY_ETERNAL:
954:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
955:     break;
956:   default:
957:     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
958:   }
959:   return(0);
960: }

964: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
965: {
966:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
968:   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
969:   MatScalar      *aa,*aa_i;
970:   PetscScalar    *v_i;

973:   bs  = A->rmap.bs;
974:   ai  = a->i;
975:   aj  = a->j;
976:   aa  = a->a;
977:   bs2 = a->bs2;
978: 
979:   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
980: 
981:   bn  = row/bs;   /* Block number */
982:   bp  = row % bs; /* Block Position */
983:   M   = ai[bn+1] - ai[bn];
984:   *nz = bs*M;
985: 
986:   if (v) {
987:     *v = 0;
988:     if (*nz) {
989:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
990:       for (i=0; i<M; i++) { /* for each block in the block row */
991:         v_i  = *v + i*bs;
992:         aa_i = aa + bs2*(ai[bn] + i);
993:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
994:       }
995:     }
996:   }

998:   if (idx) {
999:     *idx = 0;
1000:     if (*nz) {
1001:       PetscMalloc((*nz)*sizeof(PetscInt),idx);
1002:       for (i=0; i<M; i++) { /* for each block in the block row */
1003:         idx_i = *idx + i*bs;
1004:         itmp  = bs*aj[ai[bn] + i];
1005:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1006:       }
1007:     }
1008:   }
1009:   return(0);
1010: }

1014: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1015: {

1019:   if (idx) {PetscFree(*idx);}
1020:   if (v)   {PetscFree(*v);}
1021:   return(0);
1022: }

1026: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1027: {
1028:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1029:   Mat            C;
1031:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1032:   PetscInt       *rows,*cols,bs2=a->bs2;
1033:   PetscScalar    *array;

1036:   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1037:   PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1038:   PetscMemzero(col,(1+nbs)*sizeof(PetscInt));

1040: #if defined(PETSC_USE_MAT_SINGLE)
1041:   PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
1042:   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1043: #else
1044:   array = a->a;
1045: #endif

1047:   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1048:   MatCreate(((PetscObject)A)->comm,&C);
1049:   MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);
1050:   MatSetType(C,((PetscObject)A)->type_name);
1051:   MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1052:   PetscFree(col);
1053:   PetscMalloc(2*bs*sizeof(PetscInt),&rows);
1054:   cols = rows + bs;
1055:   for (i=0; i<mbs; i++) {
1056:     cols[0] = i*bs;
1057:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1058:     len = ai[i+1] - ai[i];
1059:     for (j=0; j<len; j++) {
1060:       rows[0] = (*aj++)*bs;
1061:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1062:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
1063:       array += bs2;
1064:     }
1065:   }
1066:   PetscFree(rows);
1067: #if defined(PETSC_USE_MAT_SINGLE)
1068:   PetscFree(array);
1069: #endif
1070: 
1071:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1072:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1073: 
1074:   if (B) {
1075:     *B = C;
1076:   } else {
1077:     MatHeaderCopy(A,C);
1078:   }
1079:   return(0);
1080: }

1084: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1085: {
1086:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1088:   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1089:   int            fd;
1090:   PetscScalar    *aa;
1091:   FILE           *file;

1094:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1095:   PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);
1096:   col_lens[0] = MAT_FILE_COOKIE;

1098:   col_lens[1] = A->rmap.N;
1099:   col_lens[2] = A->cmap.n;
1100:   col_lens[3] = a->nz*bs2;

1102:   /* store lengths of each row and write (including header) to file */
1103:   count = 0;
1104:   for (i=0; i<a->mbs; i++) {
1105:     for (j=0; j<bs; j++) {
1106:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1107:     }
1108:   }
1109:   PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);
1110:   PetscFree(col_lens);

1112:   /* store column indices (zero start index) */
1113:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1114:   count = 0;
1115:   for (i=0; i<a->mbs; i++) {
1116:     for (j=0; j<bs; j++) {
1117:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1118:         for (l=0; l<bs; l++) {
1119:           jj[count++] = bs*a->j[k] + l;
1120:         }
1121:       }
1122:     }
1123:   }
1124:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1125:   PetscFree(jj);

1127:   /* store nonzero values */
1128:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1129:   count = 0;
1130:   for (i=0; i<a->mbs; i++) {
1131:     for (j=0; j<bs; j++) {
1132:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1133:         for (l=0; l<bs; l++) {
1134:           aa[count++] = a->a[bs2*k + l*bs + j];
1135:         }
1136:       }
1137:     }
1138:   }
1139:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1140:   PetscFree(aa);

1142:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1143:   if (file) {
1144:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1145:   }
1146:   return(0);
1147: }

1151: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1152: {
1153:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1154:   PetscErrorCode    ierr;
1155:   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1156:   PetscViewerFormat format;

1159:   PetscViewerGetFormat(viewer,&format);
1160:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1161:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1162:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1163:     Mat aij;
1164:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1165:     MatView(aij,viewer);
1166:     MatDestroy(aij);
1167:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1168:      return(0);
1169:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1170:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1171:     for (i=0; i<a->mbs; i++) {
1172:       for (j=0; j<bs; j++) {
1173:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1174:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1175:           for (l=0; l<bs; l++) {
1176: #if defined(PETSC_USE_COMPLEX)
1177:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1178:               PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1179:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1180:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1181:               PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1182:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1183:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1184:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1185:             }
1186: #else
1187:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1188:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1189:             }
1190: #endif
1191:           }
1192:         }
1193:         PetscViewerASCIIPrintf(viewer,"\n");
1194:       }
1195:     }
1196:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1197:   } else {
1198:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1199:     for (i=0; i<a->mbs; i++) {
1200:       for (j=0; j<bs; j++) {
1201:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1202:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1203:           for (l=0; l<bs; l++) {
1204: #if defined(PETSC_USE_COMPLEX)
1205:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1206:               PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1207:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1208:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1209:               PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1210:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1211:             } else {
1212:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1213:             }
1214: #else
1215:             PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1216: #endif
1217:           }
1218:         }
1219:         PetscViewerASCIIPrintf(viewer,"\n");
1220:       }
1221:     }
1222:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1223:   }
1224:   PetscViewerFlush(viewer);
1225:   return(0);
1226: }

1230: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1231: {
1232:   Mat            A = (Mat) Aa;
1233:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1235:   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1236:   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1237:   MatScalar      *aa;
1238:   PetscViewer    viewer;


1242:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1243:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

1245:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1247:   /* loop over matrix elements drawing boxes */
1248:   color = PETSC_DRAW_BLUE;
1249:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1250:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1251:       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1252:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1253:       aa = a->a + j*bs2;
1254:       for (k=0; k<bs; k++) {
1255:         for (l=0; l<bs; l++) {
1256:           if (PetscRealPart(*aa++) >=  0.) continue;
1257:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1258:         }
1259:       }
1260:     }
1261:   }
1262:   color = PETSC_DRAW_CYAN;
1263:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1264:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1265:       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1266:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1267:       aa = a->a + j*bs2;
1268:       for (k=0; k<bs; k++) {
1269:         for (l=0; l<bs; l++) {
1270:           if (PetscRealPart(*aa++) != 0.) continue;
1271:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1272:         }
1273:       }
1274:     }
1275:   }

1277:   color = PETSC_DRAW_RED;
1278:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1279:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1280:       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1281:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1282:       aa = a->a + j*bs2;
1283:       for (k=0; k<bs; k++) {
1284:         for (l=0; l<bs; l++) {
1285:           if (PetscRealPart(*aa++) <= 0.) continue;
1286:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1287:         }
1288:       }
1289:     }
1290:   }
1291:   return(0);
1292: }

1296: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1297: {
1299:   PetscReal      xl,yl,xr,yr,w,h;
1300:   PetscDraw      draw;
1301:   PetscTruth     isnull;


1305:   PetscViewerDrawGetDraw(viewer,0,&draw);
1306:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1308:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1309:   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1310:   xr += w;    yr += h;  xl = -w;     yl = -h;
1311:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1312:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1313:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1314:   return(0);
1315: }

1319: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1320: {
1322:   PetscTruth     iascii,isbinary,isdraw;

1325:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1326:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1327:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1328:   if (iascii){
1329:     MatView_SeqBAIJ_ASCII(A,viewer);
1330:   } else if (isbinary) {
1331:     MatView_SeqBAIJ_Binary(A,viewer);
1332:   } else if (isdraw) {
1333:     MatView_SeqBAIJ_Draw(A,viewer);
1334:   } else {
1335:     Mat B;
1336:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1337:     MatView(B,viewer);
1338:     MatDestroy(B);
1339:   }
1340:   return(0);
1341: }


1346: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1347: {
1348:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1349:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1350:   PetscInt    *ai = a->i,*ailen = a->ilen;
1351:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1352:   MatScalar   *ap,*aa = a->a;

1355:   for (k=0; k<m; k++) { /* loop over rows */
1356:     row  = im[k]; brow = row/bs;
1357:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1358:     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1359:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1360:     nrow = ailen[brow];
1361:     for (l=0; l<n; l++) { /* loop over columns */
1362:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1363:       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1364:       col  = in[l] ;
1365:       bcol = col/bs;
1366:       cidx = col%bs;
1367:       ridx = row%bs;
1368:       high = nrow;
1369:       low  = 0; /* assume unsorted */
1370:       while (high-low > 5) {
1371:         t = (low+high)/2;
1372:         if (rp[t] > bcol) high = t;
1373:         else             low  = t;
1374:       }
1375:       for (i=low; i<high; i++) {
1376:         if (rp[i] > bcol) break;
1377:         if (rp[i] == bcol) {
1378:           *v++ = ap[bs2*i+bs*cidx+ridx];
1379:           goto finished;
1380:         }
1381:       }
1382:       *v++ = 0.0;
1383:       finished:;
1384:     }
1385:   }
1386:   return(0);
1387: }

1389: #if defined(PETSC_USE_MAT_SINGLE)
1392: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1393: {
1394:   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1396:   PetscInt       i,N = m*n*b->bs2;
1397:   MatScalar      *vsingle;

1400:   if (N > b->setvalueslen) {
1401:     PetscFree(b->setvaluescopy);
1402:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
1403:     b->setvalueslen  = N;
1404:   }
1405:   vsingle = b->setvaluescopy;
1406:   for (i=0; i<N; i++) {
1407:     vsingle[i] = v[i];
1408:   }
1409:   MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
1410:   return(0);
1411: }
1412: #endif


1417: PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1418: {
1419:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1420:   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1421:   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1422:   PetscErrorCode  ierr;
1423:   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1424:   PetscTruth      roworiented=a->roworiented;
1425:   const MatScalar *value = v;
1426:   MatScalar       *ap,*aa = a->a,*bap;

1429:   if (roworiented) {
1430:     stepval = (n-1)*bs;
1431:   } else {
1432:     stepval = (m-1)*bs;
1433:   }
1434:   for (k=0; k<m; k++) { /* loop over added rows */
1435:     row  = im[k];
1436:     if (row < 0) continue;
1437: #if defined(PETSC_USE_DEBUG)  
1438:     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1439: #endif
1440:     rp   = aj + ai[row];
1441:     ap   = aa + bs2*ai[row];
1442:     rmax = imax[row];
1443:     nrow = ailen[row];
1444:     low  = 0;
1445:     high = nrow;
1446:     for (l=0; l<n; l++) { /* loop over added columns */
1447:       if (in[l] < 0) continue;
1448: #if defined(PETSC_USE_DEBUG)  
1449:       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1450: #endif
1451:       col = in[l];
1452:       if (roworiented) {
1453:         value = v + k*(stepval+bs)*bs + l*bs;
1454:       } else {
1455:         value = v + l*(stepval+bs)*bs + k*bs;
1456:       }
1457:       if (col <= lastcol) low = 0; else high = nrow;
1458:       lastcol = col;
1459:       while (high-low > 7) {
1460:         t = (low+high)/2;
1461:         if (rp[t] > col) high = t;
1462:         else             low  = t;
1463:       }
1464:       for (i=low; i<high; i++) {
1465:         if (rp[i] > col) break;
1466:         if (rp[i] == col) {
1467:           bap  = ap +  bs2*i;
1468:           if (roworiented) {
1469:             if (is == ADD_VALUES) {
1470:               for (ii=0; ii<bs; ii++,value+=stepval) {
1471:                 for (jj=ii; jj<bs2; jj+=bs) {
1472:                   bap[jj] += *value++;
1473:                 }
1474:               }
1475:             } else {
1476:               for (ii=0; ii<bs; ii++,value+=stepval) {
1477:                 for (jj=ii; jj<bs2; jj+=bs) {
1478:                   bap[jj] = *value++;
1479:                 }
1480:               }
1481:             }
1482:           } else {
1483:             if (is == ADD_VALUES) {
1484:               for (ii=0; ii<bs; ii++,value+=stepval) {
1485:                 for (jj=0; jj<bs; jj++) {
1486:                   *bap++ += *value++;
1487:                 }
1488:               }
1489:             } else {
1490:               for (ii=0; ii<bs; ii++,value+=stepval) {
1491:                 for (jj=0; jj<bs; jj++) {
1492:                   *bap++  = *value++;
1493:                 }
1494:               }
1495:             }
1496:           }
1497:           goto noinsert2;
1498:         }
1499:       }
1500:       if (nonew == 1) goto noinsert2;
1501:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1502:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1503:       N = nrow++ - 1; high++;
1504:       /* shift up all the later entries in this row */
1505:       for (ii=N; ii>=i; ii--) {
1506:         rp[ii+1] = rp[ii];
1507:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1508:       }
1509:       if (N >= i) {
1510:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1511:       }
1512:       rp[i] = col;
1513:       bap   = ap +  bs2*i;
1514:       if (roworiented) {
1515:         for (ii=0; ii<bs; ii++,value+=stepval) {
1516:           for (jj=ii; jj<bs2; jj+=bs) {
1517:             bap[jj] = *value++;
1518:           }
1519:         }
1520:       } else {
1521:         for (ii=0; ii<bs; ii++,value+=stepval) {
1522:           for (jj=0; jj<bs; jj++) {
1523:             *bap++  = *value++;
1524:           }
1525:         }
1526:       }
1527:       noinsert2:;
1528:       low = i;
1529:     }
1530:     ailen[row] = nrow;
1531:   }
1532:   return(0);
1533: }

1537: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1538: {
1539:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1540:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1541:   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1543:   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1544:   MatScalar      *aa = a->a,*ap;
1545:   PetscReal      ratio=0.6;

1548:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1550:   if (m) rmax = ailen[0];
1551:   for (i=1; i<mbs; i++) {
1552:     /* move each row back by the amount of empty slots (fshift) before it*/
1553:     fshift += imax[i-1] - ailen[i-1];
1554:     rmax   = PetscMax(rmax,ailen[i]);
1555:     if (fshift) {
1556:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1557:       N = ailen[i];
1558:       for (j=0; j<N; j++) {
1559:         ip[j-fshift] = ip[j];
1560:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1561:       }
1562:     }
1563:     ai[i] = ai[i-1] + ailen[i-1];
1564:   }
1565:   if (mbs) {
1566:     fshift += imax[mbs-1] - ailen[mbs-1];
1567:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1568:   }
1569:   /* reset ilen and imax for each row */
1570:   for (i=0; i<mbs; i++) {
1571:     ailen[i] = imax[i] = ai[i+1] - ai[i];
1572:   }
1573:   a->nz = ai[mbs];

1575:   /* diagonals may have moved, so kill the diagonal pointers */
1576:   a->idiagvalid = PETSC_FALSE;
1577:   if (fshift && a->diag) {
1578:     PetscFree(a->diag);
1579:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1580:     a->diag = 0;
1581:   }
1582:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);
1583:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1584:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1585:   a->reallocs          = 0;
1586:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

1588:   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1589:   if (a->compressedrow.use){
1590:     Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1591:   }

1593:   A->same_nonzero = PETSC_TRUE;
1594:   return(0);
1595: }

1597: /* 
1598:    This function returns an array of flags which indicate the locations of contiguous
1599:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1600:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1601:    Assume: sizes should be long enough to hold all the values.
1602: */
1605: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1606: {
1607:   PetscInt   i,j,k,row;
1608:   PetscTruth flg;

1611:   for (i=0,j=0; i<n; j++) {
1612:     row = idx[i];
1613:     if (row%bs!=0) { /* Not the begining of a block */
1614:       sizes[j] = 1;
1615:       i++;
1616:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1617:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1618:       i++;
1619:     } else { /* Begining of the block, so check if the complete block exists */
1620:       flg = PETSC_TRUE;
1621:       for (k=1; k<bs; k++) {
1622:         if (row+k != idx[i+k]) { /* break in the block */
1623:           flg = PETSC_FALSE;
1624:           break;
1625:         }
1626:       }
1627:       if (flg) { /* No break in the bs */
1628:         sizes[j] = bs;
1629:         i+= bs;
1630:       } else {
1631:         sizes[j] = 1;
1632:         i++;
1633:       }
1634:     }
1635:   }
1636:   *bs_max = j;
1637:   return(0);
1638: }
1639: 
1642: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1643: {
1644:   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1646:   PetscInt       i,j,k,count,*rows;
1647:   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1648:   PetscScalar    zero = 0.0;
1649:   MatScalar      *aa;

1652:   /* Make a copy of the IS and  sort it */
1653:   /* allocate memory for rows,sizes */
1654:   PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);
1655:   sizes = rows + is_n;

1657:   /* copy IS values to rows, and sort them */
1658:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1659:   PetscSortInt(is_n,rows);
1660:   if (baij->keepzeroedrows) {
1661:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1662:     bs_max = is_n;
1663:     A->same_nonzero = PETSC_TRUE;
1664:   } else {
1665:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1666:     A->same_nonzero = PETSC_FALSE;
1667:   }

1669:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1670:     row   = rows[j];
1671:     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1672:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1673:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1674:     if (sizes[i] == bs && !baij->keepzeroedrows) {
1675:       if (diag != 0.0) {
1676:         if (baij->ilen[row/bs] > 0) {
1677:           baij->ilen[row/bs]       = 1;
1678:           baij->j[baij->i[row/bs]] = row/bs;
1679:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
1680:         }
1681:         /* Now insert all the diagonal values for this bs */
1682:         for (k=0; k<bs; k++) {
1683:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
1684:         }
1685:       } else { /* (diag == 0.0) */
1686:         baij->ilen[row/bs] = 0;
1687:       } /* end (diag == 0.0) */
1688:     } else { /* (sizes[i] != bs) */
1689: #if defined (PETSC_USE_DEBUG)
1690:       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1691: #endif
1692:       for (k=0; k<count; k++) {
1693:         aa[0] =  zero;
1694:         aa    += bs;
1695:       }
1696:       if (diag != 0.0) {
1697:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
1698:       }
1699:     }
1700:   }

1702:   PetscFree(rows);
1703:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1704:   return(0);
1705: }

1709: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1710: {
1711:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1712:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1713:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1714:   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1716:   PetscInt       ridx,cidx,bs2=a->bs2;
1717:   PetscTruth     roworiented=a->roworiented;
1718:   MatScalar      *ap,value,*aa=a->a,*bap;

1721:   for (k=0; k<m; k++) { /* loop over added rows */
1722:     row  = im[k];
1723:     brow = row/bs;
1724:     if (row < 0) continue;
1725: #if defined(PETSC_USE_DEBUG)  
1726:     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1727: #endif
1728:     rp   = aj + ai[brow];
1729:     ap   = aa + bs2*ai[brow];
1730:     rmax = imax[brow];
1731:     nrow = ailen[brow];
1732:     low  = 0;
1733:     high = nrow;
1734:     for (l=0; l<n; l++) { /* loop over added columns */
1735:       if (in[l] < 0) continue;
1736: #if defined(PETSC_USE_DEBUG)  
1737:       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1738: #endif
1739:       col = in[l]; bcol = col/bs;
1740:       ridx = row % bs; cidx = col % bs;
1741:       if (roworiented) {
1742:         value = v[l + k*n];
1743:       } else {
1744:         value = v[k + l*m];
1745:       }
1746:       if (col <= lastcol) low = 0; else high = nrow;
1747:       lastcol = col;
1748:       while (high-low > 7) {
1749:         t = (low+high)/2;
1750:         if (rp[t] > bcol) high = t;
1751:         else              low  = t;
1752:       }
1753:       for (i=low; i<high; i++) {
1754:         if (rp[i] > bcol) break;
1755:         if (rp[i] == bcol) {
1756:           bap  = ap +  bs2*i + bs*cidx + ridx;
1757:           if (is == ADD_VALUES) *bap += value;
1758:           else                  *bap  = value;
1759:           goto noinsert1;
1760:         }
1761:       }
1762:       if (nonew == 1) goto noinsert1;
1763:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1764:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1765:       N = nrow++ - 1; high++;
1766:       /* shift up all the later entries in this row */
1767:       for (ii=N; ii>=i; ii--) {
1768:         rp[ii+1] = rp[ii];
1769:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1770:       }
1771:       if (N>=i) {
1772:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1773:       }
1774:       rp[i]                      = bcol;
1775:       ap[bs2*i + bs*cidx + ridx] = value;
1776:       a->nz++;
1777:       noinsert1:;
1778:       low = i;
1779:     }
1780:     ailen[brow] = nrow;
1781:   }
1782:   A->same_nonzero = PETSC_FALSE;
1783:   return(0);
1784: }


1789: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1790: {
1791:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1792:   Mat            outA;
1794:   PetscTruth     row_identity,col_identity;

1797:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1798:   ISIdentity(row,&row_identity);
1799:   ISIdentity(col,&col_identity);
1800:   if (!row_identity || !col_identity) {
1801:     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1802:   }

1804:   outA          = inA;
1805:   inA->factor   = FACTOR_LU;

1807:   MatMarkDiagonal_SeqBAIJ(inA);

1809:   PetscObjectReference((PetscObject)row);
1810:   if (a->row) { ISDestroy(a->row); }
1811:   a->row = row;
1812:   PetscObjectReference((PetscObject)col);
1813:   if (a->col) { ISDestroy(a->col); }
1814:   a->col = col;
1815: 
1816:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1817:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1818:   PetscLogObjectParent(inA,a->icol);
1819: 
1820:   /*
1821:       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 
1822:       for ILU(0) factorization with natural ordering
1823:   */
1824:   if (inA->rmap.bs < 8) {
1825:     MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1826:   } else {
1827:     if (!a->solve_work) {
1828:       PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);
1829:       PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));
1830:     }
1831:   }

1833:   MatLUFactorNumeric(inA,info,&outA);

1835:   return(0);
1836: }

1841: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1842: {
1843:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1844:   PetscInt    i,nz,nbs;

1847:   nz  = baij->maxnz/baij->bs2;
1848:   nbs = baij->nbs;
1849:   for (i=0; i<nz; i++) {
1850:     baij->j[i] = indices[i];
1851:   }
1852:   baij->nz = nz;
1853:   for (i=0; i<nbs; i++) {
1854:     baij->ilen[i] = baij->imax[i];
1855:   }

1857:   return(0);
1858: }

1863: /*@
1864:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1865:        in the matrix.

1867:   Input Parameters:
1868: +  mat - the SeqBAIJ matrix
1869: -  indices - the column indices

1871:   Level: advanced

1873:   Notes:
1874:     This can be called if you have precomputed the nonzero structure of the 
1875:   matrix and want to provide it to the matrix object to improve the performance
1876:   of the MatSetValues() operation.

1878:     You MUST have set the correct numbers of nonzeros per row in the call to 
1879:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

1881:     MUST be called before any calls to MatSetValues();

1883: @*/
1884: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1885: {
1886:   PetscErrorCode ierr,(*f)(Mat,PetscInt *);

1891:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1892:   if (f) {
1893:     (*f)(mat,indices);
1894:   } else {
1895:     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1896:   }
1897:   return(0);
1898: }

1902: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
1903: {
1904:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1906:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1907:   PetscReal      atmp;
1908:   PetscScalar    *x,zero = 0.0;
1909:   MatScalar      *aa;
1910:   PetscInt       ncols,brow,krow,kcol;

1913:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1914:   bs   = A->rmap.bs;
1915:   aa   = a->a;
1916:   ai   = a->i;
1917:   aj   = a->j;
1918:   mbs  = a->mbs;

1920:   VecSet(v,zero);
1921:   if (idx) {
1922:     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
1923:   }
1924:   VecGetArray(v,&x);
1925:   VecGetLocalSize(v,&n);
1926:   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1927:   for (i=0; i<mbs; i++) {
1928:     ncols = ai[1] - ai[0]; ai++;
1929:     brow  = bs*i;
1930:     for (j=0; j<ncols; j++){
1931:       for (kcol=0; kcol<bs; kcol++){
1932:         for (krow=0; krow<bs; krow++){
1933:           atmp = PetscAbsScalar(*aa);aa++;
1934:           row = brow + krow;    /* row index */
1935:           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1936:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
1937:         }
1938:       }
1939:       aj++;
1940:     }
1941:   }
1942:   VecRestoreArray(v,&x);
1943:   return(0);
1944: }

1948: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1949: {

1953:   /* If the two matrices have the same copy implementation, use fast copy. */
1954:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1955:     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1956:     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;

1958:     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1959:       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1960:     }
1961:     PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));
1962:   } else {
1963:     MatCopy_Basic(A,B,str);
1964:   }
1965:   return(0);
1966: }

1970: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1971: {

1975:    MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);
1976:   return(0);
1977: }

1981: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1982: {
1983:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1985:   *array = a->a;
1986:   return(0);
1987: }

1991: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1992: {
1994:   return(0);
1995: }

1997:  #include petscblaslapack.h
2000: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2001: {
2002:   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2004:   PetscInt       i,bs=Y->rmap.bs,j,bs2;
2005:   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;

2008:   if (str == SAME_NONZERO_PATTERN) {
2009:     PetscScalar alpha = a;
2010:     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2011:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2012:     if (y->xtoy && y->XtoY != X) {
2013:       PetscFree(y->xtoy);
2014:       MatDestroy(y->XtoY);
2015:     }
2016:     if (!y->xtoy) { /* get xtoy */
2017:       MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
2018:       y->XtoY = X;
2019:     }
2020:     bs2 = bs*bs;
2021:     for (i=0; i<x->nz; i++) {
2022:       j = 0;
2023:       while (j < bs2){
2024:         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2025:         j++;
2026:       }
2027:     }
2028:     PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2029:   } else {
2030:     MatAXPY_Basic(Y,a,X,str);
2031:   }
2032:   return(0);
2033: }

2037: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2038: {
2039:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2040:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2041:   PetscScalar    *aa = a->a;

2044:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2045:   return(0);
2046: }

2050: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2051: {
2052:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2053:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2054:   PetscScalar    *aa = a->a;

2057:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2058:   return(0);
2059: }


2062: /* -------------------------------------------------------------------*/
2063: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2064:        MatGetRow_SeqBAIJ,
2065:        MatRestoreRow_SeqBAIJ,
2066:        MatMult_SeqBAIJ_N,
2067: /* 4*/ MatMultAdd_SeqBAIJ_N,
2068:        MatMultTranspose_SeqBAIJ,
2069:        MatMultTransposeAdd_SeqBAIJ,
2070:        MatSolve_SeqBAIJ_N,
2071:        0,
2072:        0,
2073: /*10*/ 0,
2074:        MatLUFactor_SeqBAIJ,
2075:        0,
2076:        0,
2077:        MatTranspose_SeqBAIJ,
2078: /*15*/ MatGetInfo_SeqBAIJ,
2079:        MatEqual_SeqBAIJ,
2080:        MatGetDiagonal_SeqBAIJ,
2081:        MatDiagonalScale_SeqBAIJ,
2082:        MatNorm_SeqBAIJ,
2083: /*20*/ 0,
2084:        MatAssemblyEnd_SeqBAIJ,
2085:        0,
2086:        MatSetOption_SeqBAIJ,
2087:        MatZeroEntries_SeqBAIJ,
2088: /*25*/ MatZeroRows_SeqBAIJ,
2089:        MatLUFactorSymbolic_SeqBAIJ,
2090:        MatLUFactorNumeric_SeqBAIJ_N,
2091:        MatCholeskyFactorSymbolic_SeqBAIJ,
2092:        MatCholeskyFactorNumeric_SeqBAIJ_N,
2093: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2094:        MatILUFactorSymbolic_SeqBAIJ,
2095:        MatICCFactorSymbolic_SeqBAIJ,
2096:        MatGetArray_SeqBAIJ,
2097:        MatRestoreArray_SeqBAIJ,
2098: /*35*/ MatDuplicate_SeqBAIJ,
2099:        0,
2100:        0,
2101:        MatILUFactor_SeqBAIJ,
2102:        0,
2103: /*40*/ MatAXPY_SeqBAIJ,
2104:        MatGetSubMatrices_SeqBAIJ,
2105:        MatIncreaseOverlap_SeqBAIJ,
2106:        MatGetValues_SeqBAIJ,
2107:        MatCopy_SeqBAIJ,
2108: /*45*/ 0,
2109:        MatScale_SeqBAIJ,
2110:        0,
2111:        0,
2112:        0,
2113: /*50*/ 0,
2114:        MatGetRowIJ_SeqBAIJ,
2115:        MatRestoreRowIJ_SeqBAIJ,
2116:        0,
2117:        0,
2118: /*55*/ 0,
2119:        0,
2120:        0,
2121:        0,
2122:        MatSetValuesBlocked_SeqBAIJ,
2123: /*60*/ MatGetSubMatrix_SeqBAIJ,
2124:        MatDestroy_SeqBAIJ,
2125:        MatView_SeqBAIJ,
2126:        0,
2127:        0,
2128: /*65*/ 0,
2129:        0,
2130:        0,
2131:        0,
2132:        0,
2133: /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2134:        MatConvert_Basic,
2135:        0,
2136:        0,
2137:        0,
2138: /*75*/ 0,
2139:        0,
2140:        0,
2141:        0,
2142:        0,
2143: /*80*/ 0,
2144:        0,
2145:        0,
2146:        0,
2147:        MatLoad_SeqBAIJ,
2148: /*85*/ 0,
2149:        0,
2150:        0,
2151:        0,
2152:        0,
2153: /*90*/ 0,
2154:        0,
2155:        0,
2156:        0,
2157:        0,
2158: /*95*/ 0,
2159:        0,
2160:        0,
2161:        0,
2162:        0,
2163: /*100*/0,
2164:        0,
2165:        0,
2166:        0,
2167:        0,
2168: /*105*/0,
2169:        MatRealPart_SeqBAIJ,
2170:        MatImaginaryPart_SeqBAIJ,
2171:        0,
2172:        0,
2173: /*110*/0,
2174:        0,
2175:        0,
2176:        0,
2177:        MatMissingDiagonal_SeqBAIJ
2178: /*115*/
2179: };

2184: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2185: {
2186:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2187:   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;

2191:   if (aij->nonew != 1) {
2192:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2193:   }

2195:   /* allocate space for values if not already there */
2196:   if (!aij->saved_values) {
2197:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2198:   }

2200:   /* copy values over */
2201:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2202:   return(0);
2203: }

2209: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2210: {
2211:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2213:   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;

2216:   if (aij->nonew != 1) {
2217:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2218:   }
2219:   if (!aij->saved_values) {
2220:     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2221:   }

2223:   /* copy values over */
2224:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2225:   return(0);
2226: }


2237: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2238: {
2239:   Mat_SeqBAIJ    *b;
2241:   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2242:   PetscTruth     flg,skipallocation = PETSC_FALSE;


2246:   if (nz == MAT_SKIP_ALLOCATION) {
2247:     skipallocation = PETSC_TRUE;
2248:     nz             = 0;
2249:   }

2251:   PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");
2252:     PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);
2253:   PetscOptionsEnd();

2255:   if (nnz && newbs != bs) {
2256:     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2257:   }
2258:   bs   = newbs;

2260:   B->rmap.bs = B->cmap.bs = bs;
2261:   PetscMapSetUp(&B->rmap);
2262:   PetscMapSetUp(&B->cmap);

2264:   B->preallocated = PETSC_TRUE;

2266:   mbs  = B->rmap.n/bs;
2267:   nbs  = B->cmap.n/bs;
2268:   bs2  = bs*bs;

2270:   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2271:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2272:   }

2274:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2275:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2276:   if (nnz) {
2277:     for (i=0; i<mbs; i++) {
2278:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2279:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2280:     }
2281:   }

2283:   b       = (Mat_SeqBAIJ*)B->data;
2284:   PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2285:     PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
2286:   PetscOptionsEnd();

2288:   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2289:   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2290:   if (!flg) {
2291:     switch (bs) {
2292:     case 1:
2293:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2294:       B->ops->mult            = MatMult_SeqBAIJ_1;
2295:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2296:       break;
2297:     case 2:
2298:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2299:       B->ops->mult            = MatMult_SeqBAIJ_2;
2300:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2301:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2302:       break;
2303:     case 3:
2304:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2305:       B->ops->mult            = MatMult_SeqBAIJ_3;
2306:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2307:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2308:       break;
2309:     case 4:
2310:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2311:       B->ops->mult            = MatMult_SeqBAIJ_4;
2312:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2313:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2314:       break;
2315:     case 5:
2316:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2317:       B->ops->mult            = MatMult_SeqBAIJ_5;
2318:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2319:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2320:       break;
2321:     case 6:
2322:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2323:       B->ops->mult            = MatMult_SeqBAIJ_6;
2324:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2325:       break;
2326:     case 7:
2327:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2328:       B->ops->mult            = MatMult_SeqBAIJ_7;
2329:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2330:       break;
2331:     default:
2332:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2333:       B->ops->mult            = MatMult_SeqBAIJ_N;
2334:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2335:       break;
2336:     }
2337:   }
2338:   B->rmap.bs      = bs;
2339:   b->mbs     = mbs;
2340:   b->nbs     = nbs;
2341:   if (!skipallocation) {
2342:     if (!b->imax) {
2343:       PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2344:     }
2345:     /* b->ilen will count nonzeros in each block row so far. */
2346:     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2347:     if (!nnz) {
2348:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2349:       else if (nz <= 0)        nz = 1;
2350:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2351:       nz = nz*mbs;
2352:     } else {
2353:       nz = 0;
2354:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2355:     }

2357:     /* allocate the matrix space */
2358:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2359:     PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);
2360:     PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2361:     PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2362:     PetscMemzero(b->j,nz*sizeof(PetscInt));
2363:     b->singlemalloc = PETSC_TRUE;
2364:     b->i[0] = 0;
2365:     for (i=1; i<mbs+1; i++) {
2366:       b->i[i] = b->i[i-1] + b->imax[i-1];
2367:     }
2368:     b->free_a     = PETSC_TRUE;
2369:     b->free_ij    = PETSC_TRUE;
2370:   } else {
2371:     b->free_a     = PETSC_FALSE;
2372:     b->free_ij    = PETSC_FALSE;
2373:   }

2375:   B->rmap.bs          = bs;
2376:   b->bs2              = bs2;
2377:   b->mbs              = mbs;
2378:   b->nz               = 0;
2379:   b->maxnz            = nz*bs2;
2380:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2381:   return(0);
2382: }

2385: /*MC
2386:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 
2387:    block sparse compressed row format.

2389:    Options Database Keys:
2390: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

2392:   Level: beginner

2394: .seealso: MatCreateSeqBAIJ()
2395: M*/

2400: PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
2401: {
2403:   PetscMPIInt    size;
2404:   Mat_SeqBAIJ    *b;

2407:   MPI_Comm_size(((PetscObject)B)->comm,&size);
2408:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

2410:   PetscNewLog(B,Mat_SeqBAIJ,&b);
2411:   B->data = (void*)b;
2412:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2413:   B->factor           = 0;
2414:   B->mapping          = 0;
2415:   b->row              = 0;
2416:   b->col              = 0;
2417:   b->icol             = 0;
2418:   b->reallocs         = 0;
2419:   b->saved_values     = 0;
2420: #if defined(PETSC_USE_MAT_SINGLE)
2421:   b->setvalueslen     = 0;
2422:   b->setvaluescopy    = PETSC_NULL;
2423: #endif

2425:   b->roworiented      = PETSC_TRUE;
2426:   b->nonew            = 0;
2427:   b->diag             = 0;
2428:   b->solve_work       = 0;
2429:   b->mult_work        = 0;
2430:   B->spptr            = 0;
2431:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2432:   b->keepzeroedrows   = PETSC_FALSE;
2433:   b->xtoy              = 0;
2434:   b->XtoY              = 0;
2435:   b->compressedrow.use     = PETSC_FALSE;
2436:   b->compressedrow.nrows   = 0;
2437:   b->compressedrow.i       = PETSC_NULL;
2438:   b->compressedrow.rindex  = PETSC_NULL;
2439:   b->compressedrow.checked = PETSC_FALSE;
2440:   B->same_nonzero          = PETSC_FALSE;

2442:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2443:                                      "MatInvertBlockDiagonal_SeqBAIJ",
2444:                                       MatInvertBlockDiagonal_SeqBAIJ);
2445:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2446:                                      "MatStoreValues_SeqBAIJ",
2447:                                       MatStoreValues_SeqBAIJ);
2448:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2449:                                      "MatRetrieveValues_SeqBAIJ",
2450:                                       MatRetrieveValues_SeqBAIJ);
2451:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2452:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2453:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
2454:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2455:                                      "MatConvert_SeqBAIJ_SeqAIJ",
2456:                                       MatConvert_SeqBAIJ_SeqAIJ);
2457:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2458:                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2459:                                       MatConvert_SeqBAIJ_SeqSBAIJ);
2460:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2461:                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2462:                                       MatSeqBAIJSetPreallocation_SeqBAIJ);
2463:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
2464:   return(0);
2465: }

2470: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2471: {
2472:   Mat            C;
2473:   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2475:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

2478:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

2480:   *B = 0;
2481:   MatCreate(((PetscObject)A)->comm,&C);
2482:   MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);
2483:   MatSetType(C,((PetscObject)A)->type_name);
2484:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
2485:   c    = (Mat_SeqBAIJ*)C->data;

2487:   C->rmap.N   = A->rmap.N;
2488:   C->cmap.N   = A->cmap.N;
2489:   C->rmap.bs  = A->rmap.bs;
2490:   c->bs2 = a->bs2;
2491:   c->mbs = a->mbs;
2492:   c->nbs = a->nbs;

2494:   PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
2495:   for (i=0; i<mbs; i++) {
2496:     c->imax[i] = a->imax[i];
2497:     c->ilen[i] = a->ilen[i];
2498:   }

2500:   /* allocate the matrix space */
2501:   PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
2502:   c->singlemalloc = PETSC_TRUE;
2503:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
2504:   if (mbs > 0) {
2505:     PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
2506:     if (cpvalues == MAT_COPY_VALUES) {
2507:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2508:     } else {
2509:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2510:     }
2511:   }
2512:   c->roworiented = a->roworiented;
2513:   c->nonew       = a->nonew;

2515:   if (a->diag) {
2516:     PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
2517:     PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2518:     for (i=0; i<mbs; i++) {
2519:       c->diag[i] = a->diag[i];
2520:     }
2521:   } else c->diag        = 0;
2522:   c->nz                 = a->nz;
2523:   c->maxnz              = a->maxnz;
2524:   c->solve_work         = 0;
2525:   c->mult_work          = 0;
2526:   c->free_a             = PETSC_TRUE;
2527:   c->free_ij            = PETSC_TRUE;
2528:   C->preallocated       = PETSC_TRUE;
2529:   C->assembled          = PETSC_TRUE;

2531:   c->compressedrow.use     = a->compressedrow.use;
2532:   c->compressedrow.nrows   = a->compressedrow.nrows;
2533:   c->compressedrow.checked = a->compressedrow.checked;
2534:   if ( a->compressedrow.checked && a->compressedrow.use){
2535:     i = a->compressedrow.nrows;
2536:     PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);
2537:     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2538:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
2539:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
2540:   } else {
2541:     c->compressedrow.use    = PETSC_FALSE;
2542:     c->compressedrow.i      = PETSC_NULL;
2543:     c->compressedrow.rindex = PETSC_NULL;
2544:   }
2545:   C->same_nonzero = A->same_nonzero;
2546:   *B = C;
2547:   PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
2548:   return(0);
2549: }

2553: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2554: {
2555:   Mat_SeqBAIJ    *a;
2556:   Mat            B;
2558:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2559:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2560:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2561:   PetscInt       *masked,nmask,tmp,bs2,ishift;
2562:   PetscMPIInt    size;
2563:   int            fd;
2564:   PetscScalar    *aa;
2565:   MPI_Comm       comm = ((PetscObject)viewer)->comm;

2568:   PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
2569:     PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
2570:   PetscOptionsEnd();
2571:   bs2  = bs*bs;

2573:   MPI_Comm_size(comm,&size);
2574:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2575:   PetscViewerBinaryGetDescriptor(viewer,&fd);
2576:   PetscBinaryRead(fd,header,4,PETSC_INT);
2577:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2578:   M = header[1]; N = header[2]; nz = header[3];

2580:   if (header[3] < 0) {
2581:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2582:   }

2584:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

2586:   /* 
2587:      This code adds extra rows to make sure the number of rows is 
2588:     divisible by the blocksize
2589:   */
2590:   mbs        = M/bs;
2591:   extra_rows = bs - M + bs*(mbs);
2592:   if (extra_rows == bs) extra_rows = 0;
2593:   else                  mbs++;
2594:   if (extra_rows) {
2595:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
2596:   }

2598:   /* read in row lengths */
2599:   PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
2600:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2601:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

2603:   /* read in column indices */
2604:   PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
2605:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
2606:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

2608:   /* loop over row lengths determining block row lengths */
2609:   PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
2610:   PetscMemzero(browlengths,mbs*sizeof(PetscInt));
2611:   PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
2612:   PetscMemzero(mask,mbs*sizeof(PetscInt));
2613:   masked   = mask + mbs;
2614:   rowcount = 0; nzcount = 0;
2615:   for (i=0; i<mbs; i++) {
2616:     nmask = 0;
2617:     for (j=0; j<bs; j++) {
2618:       kmax = rowlengths[rowcount];
2619:       for (k=0; k<kmax; k++) {
2620:         tmp = jj[nzcount++]/bs;
2621:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2622:       }
2623:       rowcount++;
2624:     }
2625:     browlengths[i] += nmask;
2626:     /* zero out the mask elements we set */
2627:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2628:   }

2630:   /* create our matrix */
2631:   MatCreate(comm,&B);
2632:   MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2633:   MatSetType(B,type);
2634:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);
2635:   a = (Mat_SeqBAIJ*)B->data;

2637:   /* set matrix "i" values */
2638:   a->i[0] = 0;
2639:   for (i=1; i<= mbs; i++) {
2640:     a->i[i]      = a->i[i-1] + browlengths[i-1];
2641:     a->ilen[i-1] = browlengths[i-1];
2642:   }
2643:   a->nz         = 0;
2644:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

2646:   /* read in nonzero values */
2647:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
2648:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
2649:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

2651:   /* set "a" and "j" values into matrix */
2652:   nzcount = 0; jcount = 0;
2653:   for (i=0; i<mbs; i++) {
2654:     nzcountb = nzcount;
2655:     nmask    = 0;
2656:     for (j=0; j<bs; j++) {
2657:       kmax = rowlengths[i*bs+j];
2658:       for (k=0; k<kmax; k++) {
2659:         tmp = jj[nzcount++]/bs;
2660:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2661:       }
2662:     }
2663:     /* sort the masked values */
2664:     PetscSortInt(nmask,masked);

2666:     /* set "j" values into matrix */
2667:     maskcount = 1;
2668:     for (j=0; j<nmask; j++) {
2669:       a->j[jcount++]  = masked[j];
2670:       mask[masked[j]] = maskcount++;
2671:     }
2672:     /* set "a" values into matrix */
2673:     ishift = bs2*a->i[i];
2674:     for (j=0; j<bs; j++) {
2675:       kmax = rowlengths[i*bs+j];
2676:       for (k=0; k<kmax; k++) {
2677:         tmp       = jj[nzcountb]/bs ;
2678:         block     = mask[tmp] - 1;
2679:         point     = jj[nzcountb] - bs*tmp;
2680:         idx       = ishift + bs2*block + j + bs*point;
2681:         a->a[idx] = (MatScalar)aa[nzcountb++];
2682:       }
2683:     }
2684:     /* zero out the mask elements we set */
2685:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2686:   }
2687:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

2689:   PetscFree(rowlengths);
2690:   PetscFree(browlengths);
2691:   PetscFree(aa);
2692:   PetscFree(jj);
2693:   PetscFree(mask);

2695:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2696:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2697:   MatView_Private(B);

2699:   *A = B;
2700:   return(0);
2701: }

2705: /*@C
2706:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2707:    compressed row) format.  For good matrix assembly performance the
2708:    user should preallocate the matrix storage by setting the parameter nz
2709:    (or the array nnz).  By setting these parameters accurately, performance
2710:    during matrix assembly can be increased by more than a factor of 50.

2712:    Collective on MPI_Comm

2714:    Input Parameters:
2715: +  comm - MPI communicator, set to PETSC_COMM_SELF
2716: .  bs - size of block
2717: .  m - number of rows
2718: .  n - number of columns
2719: .  nz - number of nonzero blocks  per block row (same for all rows)
2720: -  nnz - array containing the number of nonzero blocks in the various block rows 
2721:          (possibly different for each block row) or PETSC_NULL

2723:    Output Parameter:
2724: .  A - the matrix 

2726:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2727:    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
2728:    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
2729:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

2731:    Options Database Keys:
2732: .   -mat_no_unroll - uses code that does not unroll the loops in the 
2733:                      block calculations (much slower)
2734: .    -mat_block_size - size of the blocks to use

2736:    Level: intermediate

2738:    Notes:
2739:    The number of rows and columns must be divisible by blocksize.

2741:    If the nnz parameter is given then the nz parameter is ignored

2743:    A nonzero block is any block that as 1 or more nonzeros in it

2745:    The block AIJ format is fully compatible with standard Fortran 77
2746:    storage.  That is, the stored row and column indices can begin at
2747:    either one (as in Fortran) or zero.  See the users' manual for details.

2749:    Specify the preallocated storage with either nz or nnz (not both).
2750:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2751:    allocation.  For additional details, see the users manual chapter on
2752:    matrices.

2754: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2755: @*/
2756: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2757: {
2759: 
2761:   MatCreate(comm,A);
2762:   MatSetSizes(*A,m,n,m,n);
2763:   MatSetType(*A,MATSEQBAIJ);
2764:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
2765:   return(0);
2766: }

2770: /*@C
2771:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2772:    per row in the matrix. For good matrix assembly performance the
2773:    user should preallocate the matrix storage by setting the parameter nz
2774:    (or the array nnz).  By setting these parameters accurately, performance
2775:    during matrix assembly can be increased by more than a factor of 50.

2777:    Collective on MPI_Comm

2779:    Input Parameters:
2780: +  A - the matrix
2781: .  bs - size of block
2782: .  nz - number of block nonzeros per block row (same for all rows)
2783: -  nnz - array containing the number of block nonzeros in the various block rows 
2784:          (possibly different for each block row) or PETSC_NULL

2786:    Options Database Keys:
2787: .   -mat_no_unroll - uses code that does not unroll the loops in the 
2788:                      block calculations (much slower)
2789: .    -mat_block_size - size of the blocks to use

2791:    Level: intermediate

2793:    Notes:
2794:    If the nnz parameter is given then the nz parameter is ignored

2796:    You can call MatGetInfo() to get information on how effective the preallocation was;
2797:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2798:    You can also run with the option -info and look for messages with the string 
2799:    malloc in them to see if additional memory allocation was needed.

2801:    The block AIJ format is fully compatible with standard Fortran 77
2802:    storage.  That is, the stored row and column indices can begin at
2803:    either one (as in Fortran) or zero.  See the users' manual for details.

2805:    Specify the preallocated storage with either nz or nnz (not both).
2806:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2807:    allocation.  For additional details, see the users manual chapter on
2808:    matrices.

2810: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
2811: @*/
2812: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2813: {
2814:   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);

2817:   PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
2818:   if (f) {
2819:     (*f)(B,bs,nz,nnz);
2820:   }
2821:   return(0);
2822: }

2826: /*@
2827:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 
2828:               (upper triangular entries in CSR format) provided by the user.

2830:      Collective on MPI_Comm

2832:    Input Parameters:
2833: +  comm - must be an MPI communicator of size 1
2834: .  bs - size of block
2835: .  m - number of rows
2836: .  n - number of columns
2837: .  i - row indices
2838: .  j - column indices
2839: -  a - matrix values

2841:    Output Parameter:
2842: .  mat - the matrix

2844:    Level: intermediate

2846:    Notes:
2847:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2848:     once the matrix is destroyed

2850:        You cannot set new nonzero locations into this matrix, that will generate an error.

2852:        The i and j indices are 0 based

2854: .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()

2856: @*/
2857: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2858: {
2860:   PetscInt       ii;
2861:   Mat_SeqBAIJ    *baij;

2864:   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2865:   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2866: 
2867:   MatCreate(comm,mat);
2868:   MatSetSizes(*mat,m,n,m,n);
2869:   MatSetType(*mat,MATSEQBAIJ);
2870:   MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
2871:   baij = (Mat_SeqBAIJ*)(*mat)->data;
2872:   PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);

2874:   baij->i = i;
2875:   baij->j = j;
2876:   baij->a = a;
2877:   baij->singlemalloc = PETSC_FALSE;
2878:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2879:   baij->free_a       = PETSC_FALSE;
2880:   baij->free_ij       = PETSC_FALSE;

2882:   for (ii=0; ii<m; ii++) {
2883:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2884: #if defined(PETSC_USE_DEBUG)
2885:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2886: #endif    
2887:   }
2888: #if defined(PETSC_USE_DEBUG)
2889:   for (ii=0; ii<baij->i[m]; ii++) {
2890:     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2891:     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2892:   }
2893: #endif    

2895:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
2896:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
2897:   return(0);
2898: }