Actual source code: baij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include src/mat/impls/baij/seq/baij.h
 8:  #include src/inline/spops.h
 9:  #include petscsys.h

 11:  #include src/inline/ilu.h

 15: /*@
 16:   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.

 18:   Collective on Mat

 20:   Input Parameters:
 21: . mat - the matrix

 23:   Level: advanced
 24: @*/
 25: PetscErrorCode  MatSeqBAIJInvertBlockDiagonal(Mat mat)
 26: {
 27:   PetscErrorCode ierr,(*f)(Mat);

 31:   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
 32:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

 34:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
 35:   if (f) {
 36:     (*f)(mat);
 37:   } else {
 38:     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
 39:   }
 40:   return(0);
 41: }

 46: PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A)
 47: {
 48:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 50:   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
 51:   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;

 54:   if (a->idiagvalid) return(0);
 55:   MatMarkDiagonal_SeqBAIJ(A);
 56:   diag_offset = a->diag;
 57:   if (!a->idiag) {
 58:     PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
 59:   }
 60:   diag  = a->idiag;
 61:   mdiag = a->idiag+bs*bs*mbs;
 62:   /* factor and invert each block */
 63:   switch (bs){
 64:     case 2:
 65:       for (i=0; i<mbs; i++) {
 66:         odiag   = v + 4*diag_offset[i];
 67:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 68:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 69:         Kernel_A_gets_inverse_A_2(diag);
 70:         diag    += 4;
 71:         mdiag   += 4;
 72:       }
 73:       break;
 74:     case 3:
 75:       for (i=0; i<mbs; i++) {
 76:         odiag    = v + 9*diag_offset[i];
 77:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 78:         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 79:         diag[8]  = odiag[8];
 80:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 81:         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 82:         mdiag[8] = odiag[8];
 83:         Kernel_A_gets_inverse_A_3(diag);
 84:         diag    += 9;
 85:         mdiag   += 9;
 86:       }
 87:       break;
 88:     case 4:
 89:       for (i=0; i<mbs; i++) {
 90:         odiag  = v + 16*diag_offset[i];
 91:         PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 92:         PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 93:         Kernel_A_gets_inverse_A_4(diag);
 94:         diag  += 16;
 95:         mdiag += 16;
 96:       }
 97:       break;
 98:     case 5:
 99:       for (i=0; i<mbs; i++) {
100:         odiag = v + 25*diag_offset[i];
101:         PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
102:         PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
103:         Kernel_A_gets_inverse_A_5(diag);
104:         diag  += 25;
105:         mdiag += 25;
106:       }
107:       break;
108:     default:
109:       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110:   }
111:   a->idiagvalid = PETSC_TRUE;
112:   return(0);
113: }

118: PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119: {
120:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121:   PetscScalar        *x,x1,s1;
122:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123:   PetscErrorCode     ierr;
124:   PetscInt           m = a->mbs,i,i2,nz,idx;
125:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

128:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129:   its = its*lits;
130:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
134:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

136:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

138:   diag  = a->diag;
139:   idiag = a->idiag;
140:   VecGetArray(xx,&x);
141:   VecGetArray(bb,(PetscScalar**)&b);

143:   if (flag & SOR_ZERO_INITIAL_GUESS) {
144:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145:       x[0] = b[0]*idiag[0];
146:       i2     = 1;
147:       idiag += 1;
148:       for (i=1; i<m; i++) {
149:         v     = aa + ai[i];
150:         vi    = aj + ai[i];
151:         nz    = diag[i] - ai[i];
152:         s1    = b[i2];
153:         while (nz--) {
154:           idx  = (*vi++);
155:           x1   = x[idx];
156:           s1  -= v[0]*x1;
157:           v   += 1;
158:         }
159:         x[i2]   = idiag[0]*s1;
160:         idiag   += 1;
161:         i2      += 1;
162:       }
163:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
164:       PetscLogFlops(a->nz);
165:     }
166:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
167:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
168:       i2    = 0;
169:       mdiag = a->idiag+a->mbs;
170:       for (i=0; i<m; i++) {
171:         x1      = x[i2];
172:         x[i2]   = mdiag[0]*x1;
173:         mdiag  += 1;
174:         i2     += 1;
175:       }
176:       PetscLogFlops(m);
177:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
178:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
179:     }
180:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
181:       idiag   = a->idiag+a->mbs - 1;
182:       i2      = m - 1;
183:       x1      = x[i2];
184:       x[i2]   = idiag[0]*x1;
185:       idiag -= 1;
186:       i2    -= 1;
187:       for (i=m-2; i>=0; i--) {
188:         v     = aa + (diag[i]+1);
189:         vi    = aj + diag[i] + 1;
190:         nz    = ai[i+1] - diag[i] - 1;
191:         s1    = x[i2];
192:         while (nz--) {
193:           idx  = (*vi++);
194:           x1   = x[idx];
195:           s1  -= v[0]*x1;
196:           v   += 1;
197:         }
198:         x[i2]   = idiag[0]*s1;
199:         idiag   -= 2;
200:         i2      -= 1;
201:       }
202:       PetscLogFlops(a->nz);
203:     }
204:   } else {
205:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
206:   }
207:   VecRestoreArray(xx,&x);
208:   VecRestoreArray(bb,(PetscScalar**)&b);
209:   return(0);
210: }

214: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
215: {
216:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
217:   PetscScalar        *x,x1,x2,s1,s2;
218:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
219:   PetscErrorCode     ierr;
220:   PetscInt           m = a->mbs,i,i2,nz,idx;
221:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

224:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
225:   its = its*lits;
226:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
227:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
228:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
229:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
230:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

232:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

234:   diag  = a->diag;
235:   idiag = a->idiag;
236:   VecGetArray(xx,&x);
237:   VecGetArray(bb,(PetscScalar**)&b);

239:   if (flag & SOR_ZERO_INITIAL_GUESS) {
240:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
241:       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
242:       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
243:       i2     = 2;
244:       idiag += 4;
245:       for (i=1; i<m; i++) {
246:         v     = aa + 4*ai[i];
247:         vi    = aj + ai[i];
248:         nz    = diag[i] - ai[i];
249:         s1    = b[i2]; s2 = b[i2+1];
250:         while (nz--) {
251:           idx  = 2*(*vi++);
252:           x1   = x[idx]; x2 = x[1+idx];
253:           s1  -= v[0]*x1 + v[2]*x2;
254:           s2  -= v[1]*x1 + v[3]*x2;
255:           v   += 4;
256:         }
257:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
258:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
259:         idiag   += 4;
260:         i2      += 2;
261:       }
262:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
263:       PetscLogFlops(4*(a->nz));
264:     }
265:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
266:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
267:       i2    = 0;
268:       mdiag = a->idiag+4*a->mbs;
269:       for (i=0; i<m; i++) {
270:         x1      = x[i2]; x2 = x[i2+1];
271:         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
272:         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
273:         mdiag  += 4;
274:         i2     += 2;
275:       }
276:       PetscLogFlops(6*m);
277:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
278:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
279:     }
280:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
281:       idiag   = a->idiag+4*a->mbs - 4;
282:       i2      = 2*m - 2;
283:       x1      = x[i2]; x2 = x[i2+1];
284:       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
285:       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
286:       idiag -= 4;
287:       i2    -= 2;
288:       for (i=m-2; i>=0; i--) {
289:         v     = aa + 4*(diag[i]+1);
290:         vi    = aj + diag[i] + 1;
291:         nz    = ai[i+1] - diag[i] - 1;
292:         s1    = x[i2]; s2 = x[i2+1];
293:         while (nz--) {
294:           idx  = 2*(*vi++);
295:           x1   = x[idx]; x2 = x[1+idx];
296:           s1  -= v[0]*x1 + v[2]*x2;
297:           s2  -= v[1]*x1 + v[3]*x2;
298:           v   += 4;
299:         }
300:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
301:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
302:         idiag   -= 4;
303:         i2      -= 2;
304:       }
305:       PetscLogFlops(4*(a->nz));
306:     }
307:   } else {
308:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
309:   }
310:   VecRestoreArray(xx,&x);
311:   VecRestoreArray(bb,(PetscScalar**)&b);
312:   return(0);
313: }

317: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
318: {
319:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
320:   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
321:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
322:   PetscErrorCode     ierr;
323:   PetscInt           m = a->mbs,i,i2,nz,idx;
324:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

327:   its = its*lits;
328:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
329:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
330:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
331:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
332:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
333:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

335:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

337:   diag  = a->diag;
338:   idiag = a->idiag;
339:   VecGetArray(xx,&x);
340:   VecGetArray(bb,(PetscScalar**)&b);

342:   if (flag & SOR_ZERO_INITIAL_GUESS) {
343:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
344:       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
345:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
346:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
347:       i2     = 3;
348:       idiag += 9;
349:       for (i=1; i<m; i++) {
350:         v     = aa + 9*ai[i];
351:         vi    = aj + ai[i];
352:         nz    = diag[i] - ai[i];
353:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
354:         while (nz--) {
355:           idx  = 3*(*vi++);
356:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
357:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
358:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
359:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
360:           v   += 9;
361:         }
362:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
363:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
364:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
365:         idiag   += 9;
366:         i2      += 3;
367:       }
368:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
369:       PetscLogFlops(9*(a->nz));
370:     }
371:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
372:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
373:       i2    = 0;
374:       mdiag = a->idiag+9*a->mbs;
375:       for (i=0; i<m; i++) {
376:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
377:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
378:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
379:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
380:         mdiag  += 9;
381:         i2     += 3;
382:       }
383:       PetscLogFlops(15*m);
384:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
385:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
386:     }
387:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
388:       idiag   = a->idiag+9*a->mbs - 9;
389:       i2      = 3*m - 3;
390:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
391:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
392:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
393:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
394:       idiag -= 9;
395:       i2    -= 3;
396:       for (i=m-2; i>=0; i--) {
397:         v     = aa + 9*(diag[i]+1);
398:         vi    = aj + diag[i] + 1;
399:         nz    = ai[i+1] - diag[i] - 1;
400:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
401:         while (nz--) {
402:           idx  = 3*(*vi++);
403:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
404:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
405:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
406:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
407:           v   += 9;
408:         }
409:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
410:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
411:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
412:         idiag   -= 9;
413:         i2      -= 3;
414:       }
415:       PetscLogFlops(9*(a->nz));
416:     }
417:   } else {
418:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
419:   }
420:   VecRestoreArray(xx,&x);
421:   VecRestoreArray(bb,(PetscScalar**)&b);
422:   return(0);
423: }

427: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
428: {
429:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
430:   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
431:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
432:   PetscErrorCode     ierr;
433:   PetscInt           m = a->mbs,i,i2,nz,idx;
434:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

437:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
438:   its = its*lits;
439:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
440:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
441:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
442:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
443:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

445:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

447:   diag  = a->diag;
448:   idiag = a->idiag;
449:   VecGetArray(xx,&x);
450:   VecGetArray(bb,(PetscScalar**)&b);

452:   if (flag & SOR_ZERO_INITIAL_GUESS) {
453:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
454:       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
455:       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
456:       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
457:       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
458:       i2     = 4;
459:       idiag += 16;
460:       for (i=1; i<m; i++) {
461:         v     = aa + 16*ai[i];
462:         vi    = aj + ai[i];
463:         nz    = diag[i] - ai[i];
464:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
465:         while (nz--) {
466:           idx  = 4*(*vi++);
467:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
468:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
469:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
470:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
471:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
472:           v   += 16;
473:         }
474:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
475:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
476:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
477:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
478:         idiag   += 16;
479:         i2      += 4;
480:       }
481:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
482:       PetscLogFlops(16*(a->nz));
483:     }
484:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
485:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
486:       i2    = 0;
487:       mdiag = a->idiag+16*a->mbs;
488:       for (i=0; i<m; i++) {
489:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
490:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
491:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
492:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
493:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
494:         mdiag  += 16;
495:         i2     += 4;
496:       }
497:       PetscLogFlops(28*m);
498:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
499:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
500:     }
501:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
502:       idiag   = a->idiag+16*a->mbs - 16;
503:       i2      = 4*m - 4;
504:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
505:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
506:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
507:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
508:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
509:       idiag -= 16;
510:       i2    -= 4;
511:       for (i=m-2; i>=0; i--) {
512:         v     = aa + 16*(diag[i]+1);
513:         vi    = aj + diag[i] + 1;
514:         nz    = ai[i+1] - diag[i] - 1;
515:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
516:         while (nz--) {
517:           idx  = 4*(*vi++);
518:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
519:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
520:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
521:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
522:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
523:           v   += 16;
524:         }
525:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
526:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
527:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
528:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
529:         idiag   -= 16;
530:         i2      -= 4;
531:       }
532:       PetscLogFlops(16*(a->nz));
533:     }
534:   } else {
535:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
536:   }
537:   VecRestoreArray(xx,&x);
538:   VecRestoreArray(bb,(PetscScalar**)&b);
539:   return(0);
540: }

544: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
545: {
546:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
547:   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
548:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
549:   PetscErrorCode     ierr;
550:   PetscInt           m = a->mbs,i,i2,nz,idx;
551:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

554:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
555:   its = its*lits;
556:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
557:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
558:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
559:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
560:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

562:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

564:   diag  = a->diag;
565:   idiag = a->idiag;
566:   VecGetArray(xx,&x);
567:   VecGetArray(bb,(PetscScalar**)&b);

569:   if (flag & SOR_ZERO_INITIAL_GUESS) {
570:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
571:       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
572:       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
573:       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
574:       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
575:       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
576:       i2     = 5;
577:       idiag += 25;
578:       for (i=1; i<m; i++) {
579:         v     = aa + 25*ai[i];
580:         vi    = aj + ai[i];
581:         nz    = diag[i] - ai[i];
582:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
583:         while (nz--) {
584:           idx  = 5*(*vi++);
585:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
586:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
587:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
588:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
589:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
590:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
591:           v   += 25;
592:         }
593:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
594:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
595:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
596:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
597:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
598:         idiag   += 25;
599:         i2      += 5;
600:       }
601:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
602:       PetscLogFlops(25*(a->nz));
603:     }
604:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
605:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
606:       i2    = 0;
607:       mdiag = a->idiag+25*a->mbs;
608:       for (i=0; i<m; i++) {
609:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
610:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
611:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
612:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
613:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
614:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
615:         mdiag  += 25;
616:         i2     += 5;
617:       }
618:       PetscLogFlops(45*m);
619:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
620:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
621:     }
622:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
623:       idiag   = a->idiag+25*a->mbs - 25;
624:       i2      = 5*m - 5;
625:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
626:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
627:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
628:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
629:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
630:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
631:       idiag -= 25;
632:       i2    -= 5;
633:       for (i=m-2; i>=0; i--) {
634:         v     = aa + 25*(diag[i]+1);
635:         vi    = aj + diag[i] + 1;
636:         nz    = ai[i+1] - diag[i] - 1;
637:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
638:         while (nz--) {
639:           idx  = 5*(*vi++);
640:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
641:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
642:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
643:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
644:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
645:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
646:           v   += 25;
647:         }
648:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
649:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
650:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
651:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
652:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
653:         idiag   -= 25;
654:         i2      -= 5;
655:       }
656:       PetscLogFlops(25*(a->nz));
657:     }
658:   } else {
659:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
660:   }
661:   VecRestoreArray(xx,&x);
662:   VecRestoreArray(bb,(PetscScalar**)&b);
663:   return(0);
664: }

668: PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
669: {
670:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
671:   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
672:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
673:   PetscErrorCode     ierr;
674:   PetscInt           m = a->mbs,i,i2,nz,idx;
675:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

678:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
679:   its = its*lits;
680:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
681:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
682:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
683:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
684:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

686:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

688:   diag  = a->diag;
689:   idiag = a->idiag;
690:   VecGetArray(xx,&x);
691:   VecGetArray(bb,(PetscScalar**)&b);

693:   if (flag & SOR_ZERO_INITIAL_GUESS) {
694:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
695:       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
696:       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
697:       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
698:       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
699:       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
700:       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
701:       i2     = 6;
702:       idiag += 36;
703:       for (i=1; i<m; i++) {
704:         v     = aa + 36*ai[i];
705:         vi    = aj + ai[i];
706:         nz    = diag[i] - ai[i];
707:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
708:         while (nz--) {
709:           idx  = 6*(*vi++);
710:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
711:           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
712:           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
713:           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
714:           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
715:           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
716:           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
717:           v   += 36;
718:         }
719:         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
720:         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
721:         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
722:         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
723:         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
724:         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
725:         idiag   += 36;
726:         i2      += 6;
727:       }
728:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
729:       PetscLogFlops(36*(a->nz));
730:     }
731:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
732:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
733:       i2    = 0;
734:       mdiag = a->idiag+36*a->mbs;
735:       for (i=0; i<m; i++) {
736:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
737:         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
738:         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
739:         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
740:         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
741:         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
742:         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
743:         mdiag  += 36;
744:         i2     += 6;
745:       }
746:       PetscLogFlops(60*m);
747:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
748:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
749:     }
750:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
751:       idiag   = a->idiag+36*a->mbs - 36;
752:       i2      = 6*m - 6;
753:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
754:       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
755:       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
756:       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
757:       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
758:       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
759:       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
760:       idiag -= 36;
761:       i2    -= 6;
762:       for (i=m-2; i>=0; i--) {
763:         v     = aa + 36*(diag[i]+1);
764:         vi    = aj + diag[i] + 1;
765:         nz    = ai[i+1] - diag[i] - 1;
766:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
767:         while (nz--) {
768:           idx  = 6*(*vi++);
769:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
770:           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
771:           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
772:           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
773:           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
774:           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
775:           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
776:           v   += 36;
777:         }
778:         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
779:         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
780:         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
781:         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
782:         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
783:         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
784:         idiag   -= 36;
785:         i2      -= 6;
786:       }
787:       PetscLogFlops(36*(a->nz));
788:     }
789:   } else {
790:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
791:   }
792:   VecRestoreArray(xx,&x);
793:   VecRestoreArray(bb,(PetscScalar**)&b);
794:   return(0);
795: }

799: PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
800: {
801:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
802:   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
803:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
804:   PetscErrorCode     ierr;
805:   PetscInt           m = a->mbs,i,i2,nz,idx;
806:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

809:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
810:   its = its*lits;
811:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
812:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
813:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
814:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
815:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

817:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

819:   diag  = a->diag;
820:   idiag = a->idiag;
821:   VecGetArray(xx,&x);
822:   VecGetArray(bb,(PetscScalar**)&b);

824:   if (flag & SOR_ZERO_INITIAL_GUESS) {
825:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
826:       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
827:       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
828:       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
829:       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
830:       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
831:       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
832:       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
833:       i2     = 7;
834:       idiag += 49;
835:       for (i=1; i<m; i++) {
836:         v     = aa + 49*ai[i];
837:         vi    = aj + ai[i];
838:         nz    = diag[i] - ai[i];
839:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
840:         while (nz--) {
841:           idx  = 7*(*vi++);
842:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
843:           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
844:           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
845:           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
846:           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
847:           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
848:           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
849:           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
850:           v   += 49;
851:         }
852:         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
853:         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
854:         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
855:         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
856:         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
857:         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
858:         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
859:         idiag   += 49;
860:         i2      += 7;
861:       }
862:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
863:       PetscLogFlops(49*(a->nz));
864:     }
865:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
866:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
867:       i2    = 0;
868:       mdiag = a->idiag+49*a->mbs;
869:       for (i=0; i<m; i++) {
870:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
871:         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
872:         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
873:         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
874:         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
875:         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
876:         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
877:         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
878:         mdiag  += 36;
879:         i2     += 6;
880:       }
881:       PetscLogFlops(93*m);
882:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
883:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
884:     }
885:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
886:       idiag   = a->idiag+49*a->mbs - 49;
887:       i2      = 7*m - 7;
888:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
889:       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
890:       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
891:       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
892:       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
893:       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
894:       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
895:       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
896:       idiag -= 49;
897:       i2    -= 7;
898:       for (i=m-2; i>=0; i--) {
899:         v     = aa + 49*(diag[i]+1);
900:         vi    = aj + diag[i] + 1;
901:         nz    = ai[i+1] - diag[i] - 1;
902:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
903:         while (nz--) {
904:           idx  = 7*(*vi++);
905:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
906:           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
907:           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
908:           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
909:           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
910:           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
911:           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
912:           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
913:           v   += 49;
914:         }
915:         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
916:         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
917:         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
918:         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
919:         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
920:         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
921:         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
922:         idiag   -= 49;
923:         i2      -= 7;
924:       }
925:       PetscLogFlops(49*(a->nz));
926:     }
927:   } else {
928:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
929:   }
930:   VecRestoreArray(xx,&x);
931:   VecRestoreArray(bb,(PetscScalar**)&b);
932:   return(0);
933: }

935: /*
936:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
937: */
938: #if defined(PETSC_HAVE_FORTRAN_CAPS)
939: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
940: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
941: #define matsetvaluesblocked4_ matsetvaluesblocked4
942: #endif

947: void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
948: {
949:   Mat               A = *AA;
950:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
951:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
952:   PetscInt          *ai=a->i,*ailen=a->ilen;
953:   PetscInt          *aj=a->j,stepval,lastcol = -1;
954:   const PetscScalar *value = v;
955:   MatScalar         *ap,*aa = a->a,*bap;

958:   if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
959:   stepval = (n-1)*4;
960:   for (k=0; k<m; k++) { /* loop over added rows */
961:     row  = im[k];
962:     rp   = aj + ai[row];
963:     ap   = aa + 16*ai[row];
964:     nrow = ailen[row];
965:     low  = 0;
966:     high = nrow;
967:     for (l=0; l<n; l++) { /* loop over added columns */
968:       col = in[l];
969:       if (col <= lastcol) low = 0; else high = nrow;
970:       lastcol = col;
971:       value = v + k*(stepval+4 + l)*4;
972:       while (high-low > 7) {
973:         t = (low+high)/2;
974:         if (rp[t] > col) high = t;
975:         else             low  = t;
976:       }
977:       for (i=low; i<high; i++) {
978:         if (rp[i] > col) break;
979:         if (rp[i] == col) {
980:           bap  = ap +  16*i;
981:           for (ii=0; ii<4; ii++,value+=stepval) {
982:             for (jj=ii; jj<16; jj+=4) {
983:               bap[jj] += *value++;
984:             }
985:           }
986:           goto noinsert2;
987:         }
988:       }
989:       N = nrow++ - 1;
990:       high++; /* added new column index thus must search to one higher than before */
991:       /* shift up all the later entries in this row */
992:       for (ii=N; ii>=i; ii--) {
993:         rp[ii+1] = rp[ii];
994:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
995:       }
996:       if (N >= i) {
997:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
998:       }
999:       rp[i] = col;
1000:       bap   = ap +  16*i;
1001:       for (ii=0; ii<4; ii++,value+=stepval) {
1002:         for (jj=ii; jj<16; jj+=4) {
1003:           bap[jj] = *value++;
1004:         }
1005:       }
1006:       noinsert2:;
1007:       low = i;
1008:     }
1009:     ailen[row] = nrow;
1010:   }
1011:   PetscFunctionReturnVoid();
1012: }

1015: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1016: #define matsetvalues4_ MATSETVALUES4
1017: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1018: #define matsetvalues4_ matsetvalues4
1019: #endif

1024: void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1025: {
1026:   Mat         A = *AA;
1027:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1028:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1029:   PetscInt    *ai=a->i,*ailen=a->ilen;
1030:   PetscInt    *aj=a->j,brow,bcol;
1031:   PetscInt    ridx,cidx,lastcol = -1;
1032:   MatScalar   *ap,value,*aa=a->a,*bap;
1033: 
1035:   for (k=0; k<m; k++) { /* loop over added rows */
1036:     row  = im[k]; brow = row/4;
1037:     rp   = aj + ai[brow];
1038:     ap   = aa + 16*ai[brow];
1039:     nrow = ailen[brow];
1040:     low  = 0;
1041:     high = nrow;
1042:     for (l=0; l<n; l++) { /* loop over added columns */
1043:       col = in[l]; bcol = col/4;
1044:       ridx = row % 4; cidx = col % 4;
1045:       value = v[l + k*n];
1046:       if (col <= lastcol) low = 0; else high = nrow;
1047:       lastcol = col;
1048:       while (high-low > 7) {
1049:         t = (low+high)/2;
1050:         if (rp[t] > bcol) high = t;
1051:         else              low  = t;
1052:       }
1053:       for (i=low; i<high; i++) {
1054:         if (rp[i] > bcol) break;
1055:         if (rp[i] == bcol) {
1056:           bap  = ap +  16*i + 4*cidx + ridx;
1057:           *bap += value;
1058:           goto noinsert1;
1059:         }
1060:       }
1061:       N = nrow++ - 1;
1062:       high++; /* added new column thus must search to one higher than before */
1063:       /* shift up all the later entries in this row */
1064:       for (ii=N; ii>=i; ii--) {
1065:         rp[ii+1] = rp[ii];
1066:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1067:       }
1068:       if (N>=i) {
1069:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1070:       }
1071:       rp[i]                    = bcol;
1072:       ap[16*i + 4*cidx + ridx] = value;
1073:       noinsert1:;
1074:       low = i;
1075:     }
1076:     ailen[brow] = nrow;
1077:   }
1078:   PetscFunctionReturnVoid();
1079: }

1082: /*  UGLY, ugly, ugly
1083:    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 
1084:    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 
1085:    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
1086:    converts the entries into single precision and then calls ..._MatScalar() to put them
1087:    into the single precision data structures.
1088: */
1089: #if defined(PETSC_USE_MAT_SINGLE)
1090: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
1091: #else
1092: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
1093: #endif

1095: #define CHUNKSIZE  10

1097: /*
1098:      Checks for missing diagonals
1099: */
1102: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1103: {
1104:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1106:   PetscInt       *diag,*jj = a->j,i;

1109:   MatMarkDiagonal_SeqBAIJ(A);
1110:   *missing = PETSC_FALSE;
1111:   diag     = a->diag;
1112:   for (i=0; i<a->mbs; i++) {
1113:     if (jj[diag[i]] != i) {
1114:       *missing  = PETSC_TRUE;
1115:       if (d) *d = i;
1116:     }
1117:   }
1118:   return(0);
1119: }

1123: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1124: {
1125:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1127:   PetscInt       i,j,m = a->mbs;

1130:   if (!a->diag) {
1131:     PetscMalloc(m*sizeof(PetscInt),&a->diag);
1132:   }
1133:   for (i=0; i<m; i++) {
1134:     a->diag[i] = a->i[i+1];
1135:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1136:       if (a->j[j] == i) {
1137:         a->diag[i] = j;
1138:         break;
1139:       }
1140:     }
1141:   }
1142:   return(0);
1143: }


1146: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);

1150: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1151: {
1152:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1154:   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt;
1155:   PetscInt       *tia, *tja;

1158:   *nn = n;
1159:   if (!ia) return(0);
1160:   if (symmetric) {
1161:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1162:   } else {
1163:     tia = a->i; tja = a->j;
1164:   }
1165: 
1166:   if (!blockcompressed && bs > 1) {
1167:     (*nn) *= bs;
1168:     nbs    = bs;
1169:     /* malloc & create the natural set of indices */
1170:     PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
1171:     if (n) {
1172:       (*ia)[0] = 0;
1173:       for (j=1; j<bs; j++) {
1174:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1175:       }
1176:     }

1178:     for (i=1; i<n; i++) {
1179:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1180:       for (j=1; j<bs; j++) {
1181:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1182:       }
1183:     }
1184:     if (n) {
1185:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1186:     }

1188:     if (ja) {
1189:       PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
1190:       cnt = 0;
1191:       for (i=0; i<n; i++) {
1192:         for (j=0; j<bs; j++) {
1193:           for (k=tia[i]; k<tia[i+1]; k++) {
1194:             for (l=0; l<bs; l++) {
1195:               (*ja)[cnt++] = bs*tja[k] + l;
1196:             }
1197:           }
1198:         }
1199:       }
1200:     }

1202:     n     *= bs;
1203:     nz *= bs*bs;
1204:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1205:       PetscFree(tia);
1206:       PetscFree(tja);
1207:     }
1208:   } else {
1209:     *ia = tia;
1210:     if (ja) *ja = tja;
1211:   }
1212:   if (oshift == 1) {
1213:     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1214:     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1215:   }
1216:   return(0);
1217: }

1221: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1222: {
1223:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1225:   PetscInt       i,n = a->mbs,nz = a->i[n];

1228:   if (!ia) return(0);
1229:   if (!blockcompressed && A->rmap.bs > 1) {
1230:     PetscFree(*ia);
1231:     if (ja) {PetscFree(*ja);}
1232:   } else if (symmetric) {
1233:     PetscFree(*ia);
1234:     if (ja) {PetscFree(*ja);}
1235:   } else if (oshift == 1) { /* blockcompressed */
1236:     for (i=0; i<n+1; i++) a->i[i]--;
1237:     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1238:   }
1239:   return(0);
1240: }

1244: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1245: {
1246:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1250: #if defined(PETSC_USE_LOG)
1251:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
1252: #endif
1253:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1254:   if (a->row) {
1255:     ISDestroy(a->row);
1256:   }
1257:   if (a->col) {
1258:     ISDestroy(a->col);
1259:   }
1260:   PetscFree(a->diag);
1261:   PetscFree(a->idiag);
1262:   PetscFree2(a->imax,a->ilen);
1263:   PetscFree(a->solve_work);
1264:   PetscFree(a->mult_work);
1265:   if (a->icol) {ISDestroy(a->icol);}
1266:   PetscFree(a->saved_values);
1267: #if defined(PETSC_USE_MAT_SINGLE)
1268:   PetscFree(a->setvaluescopy);
1269: #endif
1270:   PetscFree(a->xtoy);
1271:   if (a->compressedrow.use){PetscFree(a->compressedrow.i);}

1273:   PetscFree(a);

1275:   PetscObjectChangeTypeName((PetscObject)A,0);
1276:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
1277:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
1278:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
1279:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
1280:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
1281:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
1282:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
1283:   return(0);
1284: }

1288: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1289: {
1290:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1294:   switch (op) {
1295:   case MAT_ROW_ORIENTED:
1296:     a->roworiented    = flg;
1297:     break;
1298:   case MAT_KEEP_ZEROED_ROWS:
1299:     a->keepzeroedrows = flg;
1300:     break;
1301:   case MAT_NEW_NONZERO_LOCATIONS:
1302:     a->nonew          = (flg ? 0 : 1);
1303:     break;
1304:   case MAT_NEW_NONZERO_LOCATION_ERR:
1305:     a->nonew          = (flg ? -1 : 0);
1306:     break;
1307:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1308:     a->nonew          = (flg ? -2 : 0);
1309:     break;
1310:   case MAT_NEW_DIAGONALS:
1311:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1312:   case MAT_USE_HASH_TABLE:
1313:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1314:     break;
1315:   case MAT_SYMMETRIC:
1316:   case MAT_STRUCTURALLY_SYMMETRIC:
1317:   case MAT_HERMITIAN:
1318:   case MAT_SYMMETRY_ETERNAL:
1319:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1320:     break;
1321:   default:
1322:     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1323:   }
1324:   return(0);
1325: }

1329: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1330: {
1331:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1333:   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1334:   MatScalar      *aa,*aa_i;
1335:   PetscScalar    *v_i;

1338:   bs  = A->rmap.bs;
1339:   ai  = a->i;
1340:   aj  = a->j;
1341:   aa  = a->a;
1342:   bs2 = a->bs2;
1343: 
1344:   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1345: 
1346:   bn  = row/bs;   /* Block number */
1347:   bp  = row % bs; /* Block Position */
1348:   M   = ai[bn+1] - ai[bn];
1349:   *nz = bs*M;
1350: 
1351:   if (v) {
1352:     *v = 0;
1353:     if (*nz) {
1354:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
1355:       for (i=0; i<M; i++) { /* for each block in the block row */
1356:         v_i  = *v + i*bs;
1357:         aa_i = aa + bs2*(ai[bn] + i);
1358:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1359:       }
1360:     }
1361:   }

1363:   if (idx) {
1364:     *idx = 0;
1365:     if (*nz) {
1366:       PetscMalloc((*nz)*sizeof(PetscInt),idx);
1367:       for (i=0; i<M; i++) { /* for each block in the block row */
1368:         idx_i = *idx + i*bs;
1369:         itmp  = bs*aj[ai[bn] + i];
1370:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1371:       }
1372:     }
1373:   }
1374:   return(0);
1375: }

1379: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1380: {

1384:   if (idx) {PetscFree(*idx);}
1385:   if (v)   {PetscFree(*v);}
1386:   return(0);
1387: }

1391: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1392: {
1393:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1394:   Mat            C;
1396:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1397:   PetscInt       *rows,*cols,bs2=a->bs2;
1398:   PetscScalar    *array;

1401:   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1402:   PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1403:   PetscMemzero(col,(1+nbs)*sizeof(PetscInt));

1405: #if defined(PETSC_USE_MAT_SINGLE)
1406:   PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
1407:   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1408: #else
1409:   array = a->a;
1410: #endif

1412:   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1413:   MatCreate(((PetscObject)A)->comm,&C);
1414:   MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);
1415:   MatSetType(C,((PetscObject)A)->type_name);
1416:   MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1417:   PetscFree(col);
1418:   PetscMalloc(2*bs*sizeof(PetscInt),&rows);
1419:   cols = rows + bs;
1420:   for (i=0; i<mbs; i++) {
1421:     cols[0] = i*bs;
1422:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1423:     len = ai[i+1] - ai[i];
1424:     for (j=0; j<len; j++) {
1425:       rows[0] = (*aj++)*bs;
1426:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1427:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
1428:       array += bs2;
1429:     }
1430:   }
1431:   PetscFree(rows);
1432: #if defined(PETSC_USE_MAT_SINGLE)
1433:   PetscFree(array);
1434: #endif
1435: 
1436:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1437:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1438: 
1439:   if (B) {
1440:     *B = C;
1441:   } else {
1442:     MatHeaderCopy(A,C);
1443:   }
1444:   return(0);
1445: }

1449: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1450: {
1451:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1453:   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1454:   int            fd;
1455:   PetscScalar    *aa;
1456:   FILE           *file;

1459:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1460:   PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);
1461:   col_lens[0] = MAT_FILE_COOKIE;

1463:   col_lens[1] = A->rmap.N;
1464:   col_lens[2] = A->cmap.n;
1465:   col_lens[3] = a->nz*bs2;

1467:   /* store lengths of each row and write (including header) to file */
1468:   count = 0;
1469:   for (i=0; i<a->mbs; i++) {
1470:     for (j=0; j<bs; j++) {
1471:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1472:     }
1473:   }
1474:   PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);
1475:   PetscFree(col_lens);

1477:   /* store column indices (zero start index) */
1478:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1479:   count = 0;
1480:   for (i=0; i<a->mbs; i++) {
1481:     for (j=0; j<bs; j++) {
1482:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1483:         for (l=0; l<bs; l++) {
1484:           jj[count++] = bs*a->j[k] + l;
1485:         }
1486:       }
1487:     }
1488:   }
1489:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1490:   PetscFree(jj);

1492:   /* store nonzero values */
1493:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1494:   count = 0;
1495:   for (i=0; i<a->mbs; i++) {
1496:     for (j=0; j<bs; j++) {
1497:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1498:         for (l=0; l<bs; l++) {
1499:           aa[count++] = a->a[bs2*k + l*bs + j];
1500:         }
1501:       }
1502:     }
1503:   }
1504:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1505:   PetscFree(aa);

1507:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1508:   if (file) {
1509:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1510:   }
1511:   return(0);
1512: }

1516: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1517: {
1518:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1519:   PetscErrorCode    ierr;
1520:   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1521:   PetscViewerFormat format;

1524:   PetscViewerGetFormat(viewer,&format);
1525:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1526:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1527:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1528:     Mat aij;
1529:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1530:     MatView(aij,viewer);
1531:     MatDestroy(aij);
1532:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1533:      return(0);
1534:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1535:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1536:     for (i=0; i<a->mbs; i++) {
1537:       for (j=0; j<bs; j++) {
1538:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1539:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1540:           for (l=0; l<bs; l++) {
1541: #if defined(PETSC_USE_COMPLEX)
1542:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1543:               PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1544:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1545:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1546:               PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1547:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1548:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1549:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1550:             }
1551: #else
1552:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1553:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1554:             }
1555: #endif
1556:           }
1557:         }
1558:         PetscViewerASCIIPrintf(viewer,"\n");
1559:       }
1560:     }
1561:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1562:   } else {
1563:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1564:     for (i=0; i<a->mbs; i++) {
1565:       for (j=0; j<bs; j++) {
1566:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1567:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1568:           for (l=0; l<bs; l++) {
1569: #if defined(PETSC_USE_COMPLEX)
1570:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1571:               PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1572:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1573:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1574:               PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1575:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1576:             } else {
1577:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1578:             }
1579: #else
1580:             PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1581: #endif
1582:           }
1583:         }
1584:         PetscViewerASCIIPrintf(viewer,"\n");
1585:       }
1586:     }
1587:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1588:   }
1589:   PetscViewerFlush(viewer);
1590:   return(0);
1591: }

1595: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1596: {
1597:   Mat            A = (Mat) Aa;
1598:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1600:   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1601:   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1602:   MatScalar      *aa;
1603:   PetscViewer    viewer;


1607:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1608:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

1610:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1612:   /* loop over matrix elements drawing boxes */
1613:   color = PETSC_DRAW_BLUE;
1614:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1615:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1616:       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1617:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1618:       aa = a->a + j*bs2;
1619:       for (k=0; k<bs; k++) {
1620:         for (l=0; l<bs; l++) {
1621:           if (PetscRealPart(*aa++) >=  0.) continue;
1622:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1623:         }
1624:       }
1625:     }
1626:   }
1627:   color = PETSC_DRAW_CYAN;
1628:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1629:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1630:       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1631:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1632:       aa = a->a + j*bs2;
1633:       for (k=0; k<bs; k++) {
1634:         for (l=0; l<bs; l++) {
1635:           if (PetscRealPart(*aa++) != 0.) continue;
1636:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1637:         }
1638:       }
1639:     }
1640:   }

1642:   color = PETSC_DRAW_RED;
1643:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1644:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1645:       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1646:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1647:       aa = a->a + j*bs2;
1648:       for (k=0; k<bs; k++) {
1649:         for (l=0; l<bs; l++) {
1650:           if (PetscRealPart(*aa++) <= 0.) continue;
1651:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1652:         }
1653:       }
1654:     }
1655:   }
1656:   return(0);
1657: }

1661: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1662: {
1664:   PetscReal      xl,yl,xr,yr,w,h;
1665:   PetscDraw      draw;
1666:   PetscTruth     isnull;


1670:   PetscViewerDrawGetDraw(viewer,0,&draw);
1671:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1673:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1674:   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1675:   xr += w;    yr += h;  xl = -w;     yl = -h;
1676:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1677:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1678:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1679:   return(0);
1680: }

1684: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1685: {
1687:   PetscTruth     iascii,isbinary,isdraw;

1690:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1691:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1692:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1693:   if (iascii){
1694:     MatView_SeqBAIJ_ASCII(A,viewer);
1695:   } else if (isbinary) {
1696:     MatView_SeqBAIJ_Binary(A,viewer);
1697:   } else if (isdraw) {
1698:     MatView_SeqBAIJ_Draw(A,viewer);
1699:   } else {
1700:     Mat B;
1701:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1702:     MatView(B,viewer);
1703:     MatDestroy(B);
1704:   }
1705:   return(0);
1706: }


1711: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1712: {
1713:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1714:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1715:   PetscInt    *ai = a->i,*ailen = a->ilen;
1716:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1717:   MatScalar   *ap,*aa = a->a;

1720:   for (k=0; k<m; k++) { /* loop over rows */
1721:     row  = im[k]; brow = row/bs;
1722:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1723:     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1724:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1725:     nrow = ailen[brow];
1726:     for (l=0; l<n; l++) { /* loop over columns */
1727:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1728:       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1729:       col  = in[l] ;
1730:       bcol = col/bs;
1731:       cidx = col%bs;
1732:       ridx = row%bs;
1733:       high = nrow;
1734:       low  = 0; /* assume unsorted */
1735:       while (high-low > 5) {
1736:         t = (low+high)/2;
1737:         if (rp[t] > bcol) high = t;
1738:         else             low  = t;
1739:       }
1740:       for (i=low; i<high; i++) {
1741:         if (rp[i] > bcol) break;
1742:         if (rp[i] == bcol) {
1743:           *v++ = ap[bs2*i+bs*cidx+ridx];
1744:           goto finished;
1745:         }
1746:       }
1747:       *v++ = 0.0;
1748:       finished:;
1749:     }
1750:   }
1751:   return(0);
1752: }

1754: #if defined(PETSC_USE_MAT_SINGLE)
1757: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1758: {
1759:   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1761:   PetscInt       i,N = m*n*b->bs2;
1762:   MatScalar      *vsingle;

1765:   if (N > b->setvalueslen) {
1766:     PetscFree(b->setvaluescopy);
1767:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
1768:     b->setvalueslen  = N;
1769:   }
1770:   vsingle = b->setvaluescopy;
1771:   for (i=0; i<N; i++) {
1772:     vsingle[i] = v[i];
1773:   }
1774:   MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
1775:   return(0);
1776: }
1777: #endif


1782: PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1783: {
1784:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1785:   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1786:   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1787:   PetscErrorCode  ierr;
1788:   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1789:   PetscTruth      roworiented=a->roworiented;
1790:   const MatScalar *value = v;
1791:   MatScalar       *ap,*aa = a->a,*bap;

1794:   if (roworiented) {
1795:     stepval = (n-1)*bs;
1796:   } else {
1797:     stepval = (m-1)*bs;
1798:   }
1799:   for (k=0; k<m; k++) { /* loop over added rows */
1800:     row  = im[k];
1801:     if (row < 0) continue;
1802: #if defined(PETSC_USE_DEBUG)  
1803:     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1804: #endif
1805:     rp   = aj + ai[row];
1806:     ap   = aa + bs2*ai[row];
1807:     rmax = imax[row];
1808:     nrow = ailen[row];
1809:     low  = 0;
1810:     high = nrow;
1811:     for (l=0; l<n; l++) { /* loop over added columns */
1812:       if (in[l] < 0) continue;
1813: #if defined(PETSC_USE_DEBUG)  
1814:       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1815: #endif
1816:       col = in[l];
1817:       if (roworiented) {
1818:         value = v + k*(stepval+bs)*bs + l*bs;
1819:       } else {
1820:         value = v + l*(stepval+bs)*bs + k*bs;
1821:       }
1822:       if (col <= lastcol) low = 0; else high = nrow;
1823:       lastcol = col;
1824:       while (high-low > 7) {
1825:         t = (low+high)/2;
1826:         if (rp[t] > col) high = t;
1827:         else             low  = t;
1828:       }
1829:       for (i=low; i<high; i++) {
1830:         if (rp[i] > col) break;
1831:         if (rp[i] == col) {
1832:           bap  = ap +  bs2*i;
1833:           if (roworiented) {
1834:             if (is == ADD_VALUES) {
1835:               for (ii=0; ii<bs; ii++,value+=stepval) {
1836:                 for (jj=ii; jj<bs2; jj+=bs) {
1837:                   bap[jj] += *value++;
1838:                 }
1839:               }
1840:             } else {
1841:               for (ii=0; ii<bs; ii++,value+=stepval) {
1842:                 for (jj=ii; jj<bs2; jj+=bs) {
1843:                   bap[jj] = *value++;
1844:                 }
1845:               }
1846:             }
1847:           } else {
1848:             if (is == ADD_VALUES) {
1849:               for (ii=0; ii<bs; ii++,value+=stepval) {
1850:                 for (jj=0; jj<bs; jj++) {
1851:                   *bap++ += *value++;
1852:                 }
1853:               }
1854:             } else {
1855:               for (ii=0; ii<bs; ii++,value+=stepval) {
1856:                 for (jj=0; jj<bs; jj++) {
1857:                   *bap++  = *value++;
1858:                 }
1859:               }
1860:             }
1861:           }
1862:           goto noinsert2;
1863:         }
1864:       }
1865:       if (nonew == 1) goto noinsert2;
1866:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1867:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1868:       N = nrow++ - 1; high++;
1869:       /* shift up all the later entries in this row */
1870:       for (ii=N; ii>=i; ii--) {
1871:         rp[ii+1] = rp[ii];
1872:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1873:       }
1874:       if (N >= i) {
1875:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1876:       }
1877:       rp[i] = col;
1878:       bap   = ap +  bs2*i;
1879:       if (roworiented) {
1880:         for (ii=0; ii<bs; ii++,value+=stepval) {
1881:           for (jj=ii; jj<bs2; jj+=bs) {
1882:             bap[jj] = *value++;
1883:           }
1884:         }
1885:       } else {
1886:         for (ii=0; ii<bs; ii++,value+=stepval) {
1887:           for (jj=0; jj<bs; jj++) {
1888:             *bap++  = *value++;
1889:           }
1890:         }
1891:       }
1892:       noinsert2:;
1893:       low = i;
1894:     }
1895:     ailen[row] = nrow;
1896:   }
1897:   return(0);
1898: }

1902: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1903: {
1904:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1905:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1906:   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1908:   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1909:   MatScalar      *aa = a->a,*ap;
1910:   PetscReal      ratio=0.6;

1913:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1915:   if (m) rmax = ailen[0];
1916:   for (i=1; i<mbs; i++) {
1917:     /* move each row back by the amount of empty slots (fshift) before it*/
1918:     fshift += imax[i-1] - ailen[i-1];
1919:     rmax   = PetscMax(rmax,ailen[i]);
1920:     if (fshift) {
1921:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1922:       N = ailen[i];
1923:       for (j=0; j<N; j++) {
1924:         ip[j-fshift] = ip[j];
1925:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1926:       }
1927:     }
1928:     ai[i] = ai[i-1] + ailen[i-1];
1929:   }
1930:   if (mbs) {
1931:     fshift += imax[mbs-1] - ailen[mbs-1];
1932:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1933:   }
1934:   /* reset ilen and imax for each row */
1935:   for (i=0; i<mbs; i++) {
1936:     ailen[i] = imax[i] = ai[i+1] - ai[i];
1937:   }
1938:   a->nz = ai[mbs];

1940:   /* diagonals may have moved, so kill the diagonal pointers */
1941:   a->idiagvalid = PETSC_FALSE;
1942:   if (fshift && a->diag) {
1943:     PetscFree(a->diag);
1944:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1945:     a->diag = 0;
1946:   }
1947:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);
1948:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1949:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1950:   a->reallocs          = 0;
1951:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

1953:   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1954:   if (a->compressedrow.use){
1955:     Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1956:   }

1958:   A->same_nonzero = PETSC_TRUE;
1959:   return(0);
1960: }

1962: /* 
1963:    This function returns an array of flags which indicate the locations of contiguous
1964:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1965:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1966:    Assume: sizes should be long enough to hold all the values.
1967: */
1970: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1971: {
1972:   PetscInt   i,j,k,row;
1973:   PetscTruth flg;

1976:   for (i=0,j=0; i<n; j++) {
1977:     row = idx[i];
1978:     if (row%bs!=0) { /* Not the begining of a block */
1979:       sizes[j] = 1;
1980:       i++;
1981:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1982:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1983:       i++;
1984:     } else { /* Begining of the block, so check if the complete block exists */
1985:       flg = PETSC_TRUE;
1986:       for (k=1; k<bs; k++) {
1987:         if (row+k != idx[i+k]) { /* break in the block */
1988:           flg = PETSC_FALSE;
1989:           break;
1990:         }
1991:       }
1992:       if (flg) { /* No break in the bs */
1993:         sizes[j] = bs;
1994:         i+= bs;
1995:       } else {
1996:         sizes[j] = 1;
1997:         i++;
1998:       }
1999:     }
2000:   }
2001:   *bs_max = j;
2002:   return(0);
2003: }
2004: 
2007: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2008: {
2009:   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2011:   PetscInt       i,j,k,count,*rows;
2012:   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
2013:   PetscScalar    zero = 0.0;
2014:   MatScalar      *aa;

2017:   /* Make a copy of the IS and  sort it */
2018:   /* allocate memory for rows,sizes */
2019:   PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);
2020:   sizes = rows + is_n;

2022:   /* copy IS values to rows, and sort them */
2023:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2024:   PetscSortInt(is_n,rows);
2025:   if (baij->keepzeroedrows) {
2026:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2027:     bs_max = is_n;
2028:     A->same_nonzero = PETSC_TRUE;
2029:   } else {
2030:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2031:     A->same_nonzero = PETSC_FALSE;
2032:   }

2034:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2035:     row   = rows[j];
2036:     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2037:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2038:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2039:     if (sizes[i] == bs && !baij->keepzeroedrows) {
2040:       if (diag != 0.0) {
2041:         if (baij->ilen[row/bs] > 0) {
2042:           baij->ilen[row/bs]       = 1;
2043:           baij->j[baij->i[row/bs]] = row/bs;
2044:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
2045:         }
2046:         /* Now insert all the diagonal values for this bs */
2047:         for (k=0; k<bs; k++) {
2048:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2049:         }
2050:       } else { /* (diag == 0.0) */
2051:         baij->ilen[row/bs] = 0;
2052:       } /* end (diag == 0.0) */
2053:     } else { /* (sizes[i] != bs) */
2054: #if defined (PETSC_USE_DEBUG)
2055:       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2056: #endif
2057:       for (k=0; k<count; k++) {
2058:         aa[0] =  zero;
2059:         aa    += bs;
2060:       }
2061:       if (diag != 0.0) {
2062:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2063:       }
2064:     }
2065:   }

2067:   PetscFree(rows);
2068:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2069:   return(0);
2070: }

2074: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2075: {
2076:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2077:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2078:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2079:   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
2081:   PetscInt       ridx,cidx,bs2=a->bs2;
2082:   PetscTruth     roworiented=a->roworiented;
2083:   MatScalar      *ap,value,*aa=a->a,*bap;

2086:   for (k=0; k<m; k++) { /* loop over added rows */
2087:     row  = im[k];
2088:     brow = row/bs;
2089:     if (row < 0) continue;
2090: #if defined(PETSC_USE_DEBUG)  
2091:     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
2092: #endif
2093:     rp   = aj + ai[brow];
2094:     ap   = aa + bs2*ai[brow];
2095:     rmax = imax[brow];
2096:     nrow = ailen[brow];
2097:     low  = 0;
2098:     high = nrow;
2099:     for (l=0; l<n; l++) { /* loop over added columns */
2100:       if (in[l] < 0) continue;
2101: #if defined(PETSC_USE_DEBUG)  
2102:       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
2103: #endif
2104:       col = in[l]; bcol = col/bs;
2105:       ridx = row % bs; cidx = col % bs;
2106:       if (roworiented) {
2107:         value = v[l + k*n];
2108:       } else {
2109:         value = v[k + l*m];
2110:       }
2111:       if (col <= lastcol) low = 0; else high = nrow;
2112:       lastcol = col;
2113:       while (high-low > 7) {
2114:         t = (low+high)/2;
2115:         if (rp[t] > bcol) high = t;
2116:         else              low  = t;
2117:       }
2118:       for (i=low; i<high; i++) {
2119:         if (rp[i] > bcol) break;
2120:         if (rp[i] == bcol) {
2121:           bap  = ap +  bs2*i + bs*cidx + ridx;
2122:           if (is == ADD_VALUES) *bap += value;
2123:           else                  *bap  = value;
2124:           goto noinsert1;
2125:         }
2126:       }
2127:       if (nonew == 1) goto noinsert1;
2128:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2129:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2130:       N = nrow++ - 1; high++;
2131:       /* shift up all the later entries in this row */
2132:       for (ii=N; ii>=i; ii--) {
2133:         rp[ii+1] = rp[ii];
2134:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2135:       }
2136:       if (N>=i) {
2137:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2138:       }
2139:       rp[i]                      = bcol;
2140:       ap[bs2*i + bs*cidx + ridx] = value;
2141:       a->nz++;
2142:       noinsert1:;
2143:       low = i;
2144:     }
2145:     ailen[brow] = nrow;
2146:   }
2147:   A->same_nonzero = PETSC_FALSE;
2148:   return(0);
2149: }


2154: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
2155: {
2156:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2157:   Mat            outA;
2159:   PetscTruth     row_identity,col_identity;

2162:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2163:   ISIdentity(row,&row_identity);
2164:   ISIdentity(col,&col_identity);
2165:   if (!row_identity || !col_identity) {
2166:     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2167:   }

2169:   outA          = inA;
2170:   inA->factor   = FACTOR_LU;

2172:   MatMarkDiagonal_SeqBAIJ(inA);

2174:   PetscObjectReference((PetscObject)row);
2175:   if (a->row) { ISDestroy(a->row); }
2176:   a->row = row;
2177:   PetscObjectReference((PetscObject)col);
2178:   if (a->col) { ISDestroy(a->col); }
2179:   a->col = col;
2180: 
2181:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2182:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2183:   PetscLogObjectParent(inA,a->icol);
2184: 
2185:   /*
2186:       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 
2187:       for ILU(0) factorization with natural ordering
2188:   */
2189:   if (inA->rmap.bs < 8) {
2190:     MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
2191:   } else {
2192:     if (!a->solve_work) {
2193:       PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);
2194:       PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));
2195:     }
2196:   }

2198:   MatLUFactorNumeric(inA,info,&outA);

2200:   return(0);
2201: }

2206: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2207: {
2208:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2209:   PetscInt    i,nz,nbs;

2212:   nz  = baij->maxnz/baij->bs2;
2213:   nbs = baij->nbs;
2214:   for (i=0; i<nz; i++) {
2215:     baij->j[i] = indices[i];
2216:   }
2217:   baij->nz = nz;
2218:   for (i=0; i<nbs; i++) {
2219:     baij->ilen[i] = baij->imax[i];
2220:   }

2222:   return(0);
2223: }

2228: /*@
2229:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2230:        in the matrix.

2232:   Input Parameters:
2233: +  mat - the SeqBAIJ matrix
2234: -  indices - the column indices

2236:   Level: advanced

2238:   Notes:
2239:     This can be called if you have precomputed the nonzero structure of the 
2240:   matrix and want to provide it to the matrix object to improve the performance
2241:   of the MatSetValues() operation.

2243:     You MUST have set the correct numbers of nonzeros per row in the call to 
2244:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2246:     MUST be called before any calls to MatSetValues();

2248: @*/
2249: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2250: {
2251:   PetscErrorCode ierr,(*f)(Mat,PetscInt *);

2256:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
2257:   if (f) {
2258:     (*f)(mat,indices);
2259:   } else {
2260:     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2261:   }
2262:   return(0);
2263: }

2267: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2268: {
2269:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2271:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2272:   PetscReal      atmp;
2273:   PetscScalar    *x,zero = 0.0;
2274:   MatScalar      *aa;
2275:   PetscInt       ncols,brow,krow,kcol;

2278:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2279:   bs   = A->rmap.bs;
2280:   aa   = a->a;
2281:   ai   = a->i;
2282:   aj   = a->j;
2283:   mbs  = a->mbs;

2285:   VecSet(v,zero);
2286:   if (idx) {
2287:     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
2288:   }
2289:   VecGetArray(v,&x);
2290:   VecGetLocalSize(v,&n);
2291:   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2292:   for (i=0; i<mbs; i++) {
2293:     ncols = ai[1] - ai[0]; ai++;
2294:     brow  = bs*i;
2295:     for (j=0; j<ncols; j++){
2296:       for (kcol=0; kcol<bs; kcol++){
2297:         for (krow=0; krow<bs; krow++){
2298:           atmp = PetscAbsScalar(*aa);aa++;
2299:           row = brow + krow;    /* row index */
2300:           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2301:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2302:         }
2303:       }
2304:       aj++;
2305:     }
2306:   }
2307:   VecRestoreArray(v,&x);
2308:   return(0);
2309: }

2313: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2314: {

2318:   /* If the two matrices have the same copy implementation, use fast copy. */
2319:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2320:     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2321:     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;

2323:     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
2324:       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2325:     }
2326:     PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));
2327:   } else {
2328:     MatCopy_Basic(A,B,str);
2329:   }
2330:   return(0);
2331: }

2335: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2336: {

2340:    MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);
2341:   return(0);
2342: }

2346: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2347: {
2348:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2350:   *array = a->a;
2351:   return(0);
2352: }

2356: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2357: {
2359:   return(0);
2360: }

2362:  #include petscblaslapack.h
2365: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2366: {
2367:   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2369:   PetscInt       i,bs=Y->rmap.bs,j,bs2;
2370:   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;

2373:   if (str == SAME_NONZERO_PATTERN) {
2374:     PetscScalar alpha = a;
2375:     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2376:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2377:     if (y->xtoy && y->XtoY != X) {
2378:       PetscFree(y->xtoy);
2379:       MatDestroy(y->XtoY);
2380:     }
2381:     if (!y->xtoy) { /* get xtoy */
2382:       MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
2383:       y->XtoY = X;
2384:     }
2385:     bs2 = bs*bs;
2386:     for (i=0; i<x->nz; i++) {
2387:       j = 0;
2388:       while (j < bs2){
2389:         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2390:         j++;
2391:       }
2392:     }
2393:     PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2394:   } else {
2395:     MatAXPY_Basic(Y,a,X,str);
2396:   }
2397:   return(0);
2398: }

2402: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2403: {
2404:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2405:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2406:   PetscScalar    *aa = a->a;

2409:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2410:   return(0);
2411: }

2415: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2416: {
2417:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2418:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2419:   PetscScalar    *aa = a->a;

2422:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2423:   return(0);
2424: }


2427: /* -------------------------------------------------------------------*/
2428: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2429:        MatGetRow_SeqBAIJ,
2430:        MatRestoreRow_SeqBAIJ,
2431:        MatMult_SeqBAIJ_N,
2432: /* 4*/ MatMultAdd_SeqBAIJ_N,
2433:        MatMultTranspose_SeqBAIJ,
2434:        MatMultTransposeAdd_SeqBAIJ,
2435:        MatSolve_SeqBAIJ_N,
2436:        0,
2437:        0,
2438: /*10*/ 0,
2439:        MatLUFactor_SeqBAIJ,
2440:        0,
2441:        0,
2442:        MatTranspose_SeqBAIJ,
2443: /*15*/ MatGetInfo_SeqBAIJ,
2444:        MatEqual_SeqBAIJ,
2445:        MatGetDiagonal_SeqBAIJ,
2446:        MatDiagonalScale_SeqBAIJ,
2447:        MatNorm_SeqBAIJ,
2448: /*20*/ 0,
2449:        MatAssemblyEnd_SeqBAIJ,
2450:        0,
2451:        MatSetOption_SeqBAIJ,
2452:        MatZeroEntries_SeqBAIJ,
2453: /*25*/ MatZeroRows_SeqBAIJ,
2454:        MatLUFactorSymbolic_SeqBAIJ,
2455:        MatLUFactorNumeric_SeqBAIJ_N,
2456:        MatCholeskyFactorSymbolic_SeqBAIJ,
2457:        MatCholeskyFactorNumeric_SeqBAIJ_N,
2458: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2459:        MatILUFactorSymbolic_SeqBAIJ,
2460:        MatICCFactorSymbolic_SeqBAIJ,
2461:        MatGetArray_SeqBAIJ,
2462:        MatRestoreArray_SeqBAIJ,
2463: /*35*/ MatDuplicate_SeqBAIJ,
2464:        0,
2465:        0,
2466:        MatILUFactor_SeqBAIJ,
2467:        0,
2468: /*40*/ MatAXPY_SeqBAIJ,
2469:        MatGetSubMatrices_SeqBAIJ,
2470:        MatIncreaseOverlap_SeqBAIJ,
2471:        MatGetValues_SeqBAIJ,
2472:        MatCopy_SeqBAIJ,
2473: /*45*/ 0,
2474:        MatScale_SeqBAIJ,
2475:        0,
2476:        0,
2477:        0,
2478: /*50*/ 0,
2479:        MatGetRowIJ_SeqBAIJ,
2480:        MatRestoreRowIJ_SeqBAIJ,
2481:        0,
2482:        0,
2483: /*55*/ 0,
2484:        0,
2485:        0,
2486:        0,
2487:        MatSetValuesBlocked_SeqBAIJ,
2488: /*60*/ MatGetSubMatrix_SeqBAIJ,
2489:        MatDestroy_SeqBAIJ,
2490:        MatView_SeqBAIJ,
2491:        0,
2492:        0,
2493: /*65*/ 0,
2494:        0,
2495:        0,
2496:        0,
2497:        0,
2498: /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2499:        MatConvert_Basic,
2500:        0,
2501:        0,
2502:        0,
2503: /*75*/ 0,
2504:        0,
2505:        0,
2506:        0,
2507:        0,
2508: /*80*/ 0,
2509:        0,
2510:        0,
2511:        0,
2512:        MatLoad_SeqBAIJ,
2513: /*85*/ 0,
2514:        0,
2515:        0,
2516:        0,
2517:        0,
2518: /*90*/ 0,
2519:        0,
2520:        0,
2521:        0,
2522:        0,
2523: /*95*/ 0,
2524:        0,
2525:        0,
2526:        0,
2527:        0,
2528: /*100*/0,
2529:        0,
2530:        0,
2531:        0,
2532:        0,
2533: /*105*/0,
2534:        MatRealPart_SeqBAIJ,
2535:        MatImaginaryPart_SeqBAIJ,
2536:        0,
2537:        0,
2538: /*110*/0,
2539:        0,
2540:        0,
2541:        0,
2542:        MatMissingDiagonal_SeqBAIJ
2543: /*115*/
2544: };

2549: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2550: {
2551:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2552:   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;

2556:   if (aij->nonew != 1) {
2557:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2558:   }

2560:   /* allocate space for values if not already there */
2561:   if (!aij->saved_values) {
2562:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2563:   }

2565:   /* copy values over */
2566:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2567:   return(0);
2568: }

2574: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2575: {
2576:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2578:   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;

2581:   if (aij->nonew != 1) {
2582:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2583:   }
2584:   if (!aij->saved_values) {
2585:     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2586:   }

2588:   /* copy values over */
2589:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2590:   return(0);
2591: }


2602: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2603: {
2604:   Mat_SeqBAIJ    *b;
2606:   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2607:   PetscTruth     flg,skipallocation = PETSC_FALSE;


2611:   if (nz == MAT_SKIP_ALLOCATION) {
2612:     skipallocation = PETSC_TRUE;
2613:     nz             = 0;
2614:   }

2616:   PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");
2617:     PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);
2618:   PetscOptionsEnd();

2620:   if (nnz && newbs != bs) {
2621:     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2622:   }
2623:   bs   = newbs;

2625:   B->rmap.bs = B->cmap.bs = bs;
2626:   PetscMapSetUp(&B->rmap);
2627:   PetscMapSetUp(&B->cmap);

2629:   B->preallocated = PETSC_TRUE;

2631:   mbs  = B->rmap.n/bs;
2632:   nbs  = B->cmap.n/bs;
2633:   bs2  = bs*bs;

2635:   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2636:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2637:   }

2639:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2640:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2641:   if (nnz) {
2642:     for (i=0; i<mbs; i++) {
2643:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2644:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2645:     }
2646:   }

2648:   b       = (Mat_SeqBAIJ*)B->data;
2649:   PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2650:     PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
2651:   PetscOptionsEnd();

2653:   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2654:   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2655:   if (!flg) {
2656:     switch (bs) {
2657:     case 1:
2658:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2659:       B->ops->mult            = MatMult_SeqBAIJ_1;
2660:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2661:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2662:       break;
2663:     case 2:
2664:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2665:       B->ops->mult            = MatMult_SeqBAIJ_2;
2666:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2667:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2668:       break;
2669:     case 3:
2670:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2671:       B->ops->mult            = MatMult_SeqBAIJ_3;
2672:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2673:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2674:       break;
2675:     case 4:
2676:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2677:       B->ops->mult            = MatMult_SeqBAIJ_4;
2678:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2679:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2680:       break;
2681:     case 5:
2682:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2683:       B->ops->mult            = MatMult_SeqBAIJ_5;
2684:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2685:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2686:       break;
2687:     case 6:
2688:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2689:       B->ops->mult            = MatMult_SeqBAIJ_6;
2690:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2691:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2692:       break;
2693:     case 7:
2694:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2695:       B->ops->mult            = MatMult_SeqBAIJ_7;
2696:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2697:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2698:       break;
2699:     default:
2700:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2701:       B->ops->mult            = MatMult_SeqBAIJ_N;
2702:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2703:       break;
2704:     }
2705:   }
2706:   B->rmap.bs      = bs;
2707:   b->mbs     = mbs;
2708:   b->nbs     = nbs;
2709:   if (!skipallocation) {
2710:     if (!b->imax) {
2711:       PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2712:     }
2713:     /* b->ilen will count nonzeros in each block row so far. */
2714:     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2715:     if (!nnz) {
2716:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2717:       else if (nz <= 0)        nz = 1;
2718:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2719:       nz = nz*mbs;
2720:     } else {
2721:       nz = 0;
2722:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2723:     }

2725:     /* allocate the matrix space */
2726:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2727:     PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);
2728:     PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2729:     PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2730:     PetscMemzero(b->j,nz*sizeof(PetscInt));
2731:     b->singlemalloc = PETSC_TRUE;
2732:     b->i[0] = 0;
2733:     for (i=1; i<mbs+1; i++) {
2734:       b->i[i] = b->i[i-1] + b->imax[i-1];
2735:     }
2736:     b->free_a     = PETSC_TRUE;
2737:     b->free_ij    = PETSC_TRUE;
2738:   } else {
2739:     b->free_a     = PETSC_FALSE;
2740:     b->free_ij    = PETSC_FALSE;
2741:   }

2743:   B->rmap.bs          = bs;
2744:   b->bs2              = bs2;
2745:   b->mbs              = mbs;
2746:   b->nz               = 0;
2747:   b->maxnz            = nz*bs2;
2748:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2749:   return(0);
2750: }

2753: /*MC
2754:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 
2755:    block sparse compressed row format.

2757:    Options Database Keys:
2758: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

2760:   Level: beginner

2762: .seealso: MatCreateSeqBAIJ()
2763: M*/

2768: PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
2769: {
2771:   PetscMPIInt    size;
2772:   Mat_SeqBAIJ    *b;

2775:   MPI_Comm_size(((PetscObject)B)->comm,&size);
2776:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

2778:   PetscNewLog(B,Mat_SeqBAIJ,&b);
2779:   B->data = (void*)b;
2780:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2781:   B->factor           = 0;
2782:   B->mapping          = 0;
2783:   b->row              = 0;
2784:   b->col              = 0;
2785:   b->icol             = 0;
2786:   b->reallocs         = 0;
2787:   b->saved_values     = 0;
2788: #if defined(PETSC_USE_MAT_SINGLE)
2789:   b->setvalueslen     = 0;
2790:   b->setvaluescopy    = PETSC_NULL;
2791: #endif

2793:   b->roworiented      = PETSC_TRUE;
2794:   b->nonew            = 0;
2795:   b->diag             = 0;
2796:   b->solve_work       = 0;
2797:   b->mult_work        = 0;
2798:   B->spptr            = 0;
2799:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2800:   b->keepzeroedrows   = PETSC_FALSE;
2801:   b->xtoy              = 0;
2802:   b->XtoY              = 0;
2803:   b->compressedrow.use     = PETSC_FALSE;
2804:   b->compressedrow.nrows   = 0;
2805:   b->compressedrow.i       = PETSC_NULL;
2806:   b->compressedrow.rindex  = PETSC_NULL;
2807:   b->compressedrow.checked = PETSC_FALSE;
2808:   B->same_nonzero          = PETSC_FALSE;

2810:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2811:                                      "MatInvertBlockDiagonal_SeqBAIJ",
2812:                                       MatInvertBlockDiagonal_SeqBAIJ);
2813:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2814:                                      "MatStoreValues_SeqBAIJ",
2815:                                       MatStoreValues_SeqBAIJ);
2816:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2817:                                      "MatRetrieveValues_SeqBAIJ",
2818:                                       MatRetrieveValues_SeqBAIJ);
2819:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2820:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2821:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
2822:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2823:                                      "MatConvert_SeqBAIJ_SeqAIJ",
2824:                                       MatConvert_SeqBAIJ_SeqAIJ);
2825:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2826:                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2827:                                       MatConvert_SeqBAIJ_SeqSBAIJ);
2828:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2829:                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2830:                                       MatSeqBAIJSetPreallocation_SeqBAIJ);
2831:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
2832:   return(0);
2833: }

2838: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2839: {
2840:   Mat            C;
2841:   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2843:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

2846:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

2848:   *B = 0;
2849:   MatCreate(((PetscObject)A)->comm,&C);
2850:   MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);
2851:   MatSetType(C,((PetscObject)A)->type_name);
2852:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
2853:   c    = (Mat_SeqBAIJ*)C->data;

2855:   C->rmap.N   = A->rmap.N;
2856:   C->cmap.N   = A->cmap.N;
2857:   C->rmap.bs  = A->rmap.bs;
2858:   c->bs2 = a->bs2;
2859:   c->mbs = a->mbs;
2860:   c->nbs = a->nbs;

2862:   PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
2863:   for (i=0; i<mbs; i++) {
2864:     c->imax[i] = a->imax[i];
2865:     c->ilen[i] = a->ilen[i];
2866:   }

2868:   /* allocate the matrix space */
2869:   PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
2870:   c->singlemalloc = PETSC_TRUE;
2871:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
2872:   if (mbs > 0) {
2873:     PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
2874:     if (cpvalues == MAT_COPY_VALUES) {
2875:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2876:     } else {
2877:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2878:     }
2879:   }
2880:   c->roworiented = a->roworiented;
2881:   c->nonew       = a->nonew;

2883:   if (a->diag) {
2884:     PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
2885:     PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2886:     for (i=0; i<mbs; i++) {
2887:       c->diag[i] = a->diag[i];
2888:     }
2889:   } else c->diag        = 0;
2890:   c->nz                 = a->nz;
2891:   c->maxnz              = a->maxnz;
2892:   c->solve_work         = 0;
2893:   c->mult_work          = 0;
2894:   c->free_a             = PETSC_TRUE;
2895:   c->free_ij            = PETSC_TRUE;
2896:   C->preallocated       = PETSC_TRUE;
2897:   C->assembled          = PETSC_TRUE;

2899:   c->compressedrow.use     = a->compressedrow.use;
2900:   c->compressedrow.nrows   = a->compressedrow.nrows;
2901:   c->compressedrow.checked = a->compressedrow.checked;
2902:   if ( a->compressedrow.checked && a->compressedrow.use){
2903:     i = a->compressedrow.nrows;
2904:     PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);
2905:     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2906:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
2907:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
2908:   } else {
2909:     c->compressedrow.use    = PETSC_FALSE;
2910:     c->compressedrow.i      = PETSC_NULL;
2911:     c->compressedrow.rindex = PETSC_NULL;
2912:   }
2913:   C->same_nonzero = A->same_nonzero;
2914:   *B = C;
2915:   PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
2916:   return(0);
2917: }

2921: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2922: {
2923:   Mat_SeqBAIJ    *a;
2924:   Mat            B;
2926:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2927:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2928:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2929:   PetscInt       *masked,nmask,tmp,bs2,ishift;
2930:   PetscMPIInt    size;
2931:   int            fd;
2932:   PetscScalar    *aa;
2933:   MPI_Comm       comm = ((PetscObject)viewer)->comm;

2936:   PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
2937:     PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
2938:   PetscOptionsEnd();
2939:   bs2  = bs*bs;

2941:   MPI_Comm_size(comm,&size);
2942:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2943:   PetscViewerBinaryGetDescriptor(viewer,&fd);
2944:   PetscBinaryRead(fd,header,4,PETSC_INT);
2945:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2946:   M = header[1]; N = header[2]; nz = header[3];

2948:   if (header[3] < 0) {
2949:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2950:   }

2952:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

2954:   /* 
2955:      This code adds extra rows to make sure the number of rows is 
2956:     divisible by the blocksize
2957:   */
2958:   mbs        = M/bs;
2959:   extra_rows = bs - M + bs*(mbs);
2960:   if (extra_rows == bs) extra_rows = 0;
2961:   else                  mbs++;
2962:   if (extra_rows) {
2963:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
2964:   }

2966:   /* read in row lengths */
2967:   PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
2968:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2969:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

2971:   /* read in column indices */
2972:   PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
2973:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
2974:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

2976:   /* loop over row lengths determining block row lengths */
2977:   PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
2978:   PetscMemzero(browlengths,mbs*sizeof(PetscInt));
2979:   PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
2980:   PetscMemzero(mask,mbs*sizeof(PetscInt));
2981:   masked   = mask + mbs;
2982:   rowcount = 0; nzcount = 0;
2983:   for (i=0; i<mbs; i++) {
2984:     nmask = 0;
2985:     for (j=0; j<bs; j++) {
2986:       kmax = rowlengths[rowcount];
2987:       for (k=0; k<kmax; k++) {
2988:         tmp = jj[nzcount++]/bs;
2989:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2990:       }
2991:       rowcount++;
2992:     }
2993:     browlengths[i] += nmask;
2994:     /* zero out the mask elements we set */
2995:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2996:   }

2998:   /* create our matrix */
2999:   MatCreate(comm,&B);
3000:   MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3001:   MatSetType(B,type);
3002:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);
3003:   a = (Mat_SeqBAIJ*)B->data;

3005:   /* set matrix "i" values */
3006:   a->i[0] = 0;
3007:   for (i=1; i<= mbs; i++) {
3008:     a->i[i]      = a->i[i-1] + browlengths[i-1];
3009:     a->ilen[i-1] = browlengths[i-1];
3010:   }
3011:   a->nz         = 0;
3012:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

3014:   /* read in nonzero values */
3015:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
3016:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3017:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

3019:   /* set "a" and "j" values into matrix */
3020:   nzcount = 0; jcount = 0;
3021:   for (i=0; i<mbs; i++) {
3022:     nzcountb = nzcount;
3023:     nmask    = 0;
3024:     for (j=0; j<bs; j++) {
3025:       kmax = rowlengths[i*bs+j];
3026:       for (k=0; k<kmax; k++) {
3027:         tmp = jj[nzcount++]/bs;
3028:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3029:       }
3030:     }
3031:     /* sort the masked values */
3032:     PetscSortInt(nmask,masked);

3034:     /* set "j" values into matrix */
3035:     maskcount = 1;
3036:     for (j=0; j<nmask; j++) {
3037:       a->j[jcount++]  = masked[j];
3038:       mask[masked[j]] = maskcount++;
3039:     }
3040:     /* set "a" values into matrix */
3041:     ishift = bs2*a->i[i];
3042:     for (j=0; j<bs; j++) {
3043:       kmax = rowlengths[i*bs+j];
3044:       for (k=0; k<kmax; k++) {
3045:         tmp       = jj[nzcountb]/bs ;
3046:         block     = mask[tmp] - 1;
3047:         point     = jj[nzcountb] - bs*tmp;
3048:         idx       = ishift + bs2*block + j + bs*point;
3049:         a->a[idx] = (MatScalar)aa[nzcountb++];
3050:       }
3051:     }
3052:     /* zero out the mask elements we set */
3053:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3054:   }
3055:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

3057:   PetscFree(rowlengths);
3058:   PetscFree(browlengths);
3059:   PetscFree(aa);
3060:   PetscFree(jj);
3061:   PetscFree(mask);

3063:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3064:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3065:   MatView_Private(B);

3067:   *A = B;
3068:   return(0);
3069: }

3073: /*@C
3074:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3075:    compressed row) format.  For good matrix assembly performance the
3076:    user should preallocate the matrix storage by setting the parameter nz
3077:    (or the array nnz).  By setting these parameters accurately, performance
3078:    during matrix assembly can be increased by more than a factor of 50.

3080:    Collective on MPI_Comm

3082:    Input Parameters:
3083: +  comm - MPI communicator, set to PETSC_COMM_SELF
3084: .  bs - size of block
3085: .  m - number of rows
3086: .  n - number of columns
3087: .  nz - number of nonzero blocks  per block row (same for all rows)
3088: -  nnz - array containing the number of nonzero blocks in the various block rows 
3089:          (possibly different for each block row) or PETSC_NULL

3091:    Output Parameter:
3092: .  A - the matrix 

3094:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3095:    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3096:    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3097:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3099:    Options Database Keys:
3100: .   -mat_no_unroll - uses code that does not unroll the loops in the 
3101:                      block calculations (much slower)
3102: .    -mat_block_size - size of the blocks to use

3104:    Level: intermediate

3106:    Notes:
3107:    The number of rows and columns must be divisible by blocksize.

3109:    If the nnz parameter is given then the nz parameter is ignored

3111:    A nonzero block is any block that as 1 or more nonzeros in it

3113:    The block AIJ format is fully compatible with standard Fortran 77
3114:    storage.  That is, the stored row and column indices can begin at
3115:    either one (as in Fortran) or zero.  See the users' manual for details.

3117:    Specify the preallocated storage with either nz or nnz (not both).
3118:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
3119:    allocation.  For additional details, see the users manual chapter on
3120:    matrices.

3122: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3123: @*/
3124: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3125: {
3127: 
3129:   MatCreate(comm,A);
3130:   MatSetSizes(*A,m,n,m,n);
3131:   MatSetType(*A,MATSEQBAIJ);
3132:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3133:   return(0);
3134: }

3138: /*@C
3139:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3140:    per row in the matrix. For good matrix assembly performance the
3141:    user should preallocate the matrix storage by setting the parameter nz
3142:    (or the array nnz).  By setting these parameters accurately, performance
3143:    during matrix assembly can be increased by more than a factor of 50.

3145:    Collective on MPI_Comm

3147:    Input Parameters:
3148: +  A - the matrix
3149: .  bs - size of block
3150: .  nz - number of block nonzeros per block row (same for all rows)
3151: -  nnz - array containing the number of block nonzeros in the various block rows 
3152:          (possibly different for each block row) or PETSC_NULL

3154:    Options Database Keys:
3155: .   -mat_no_unroll - uses code that does not unroll the loops in the 
3156:                      block calculations (much slower)
3157: .    -mat_block_size - size of the blocks to use

3159:    Level: intermediate

3161:    Notes:
3162:    If the nnz parameter is given then the nz parameter is ignored

3164:    You can call MatGetInfo() to get information on how effective the preallocation was;
3165:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3166:    You can also run with the option -info and look for messages with the string 
3167:    malloc in them to see if additional memory allocation was needed.

3169:    The block AIJ format is fully compatible with standard Fortran 77
3170:    storage.  That is, the stored row and column indices can begin at
3171:    either one (as in Fortran) or zero.  See the users' manual for details.

3173:    Specify the preallocated storage with either nz or nnz (not both).
3174:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
3175:    allocation.  For additional details, see the users manual chapter on
3176:    matrices.

3178: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3179: @*/
3180: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3181: {
3182:   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);

3185:   PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
3186:   if (f) {
3187:     (*f)(B,bs,nz,nnz);
3188:   }
3189:   return(0);
3190: }

3194: /*@
3195:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 
3196:               (upper triangular entries in CSR format) provided by the user.

3198:      Collective on MPI_Comm

3200:    Input Parameters:
3201: +  comm - must be an MPI communicator of size 1
3202: .  bs - size of block
3203: .  m - number of rows
3204: .  n - number of columns
3205: .  i - row indices
3206: .  j - column indices
3207: -  a - matrix values

3209:    Output Parameter:
3210: .  mat - the matrix

3212:    Level: intermediate

3214:    Notes:
3215:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3216:     once the matrix is destroyed

3218:        You cannot set new nonzero locations into this matrix, that will generate an error.

3220:        The i and j indices are 0 based

3222: .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()

3224: @*/
3225: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3226: {
3228:   PetscInt       ii;
3229:   Mat_SeqBAIJ    *baij;

3232:   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3233:   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3234: 
3235:   MatCreate(comm,mat);
3236:   MatSetSizes(*mat,m,n,m,n);
3237:   MatSetType(*mat,MATSEQBAIJ);
3238:   MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3239:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3240:   PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);

3242:   baij->i = i;
3243:   baij->j = j;
3244:   baij->a = a;
3245:   baij->singlemalloc = PETSC_FALSE;
3246:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3247:   baij->free_a       = PETSC_FALSE;
3248:   baij->free_ij       = PETSC_FALSE;

3250:   for (ii=0; ii<m; ii++) {
3251:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3252: #if defined(PETSC_USE_DEBUG)
3253:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3254: #endif    
3255:   }
3256: #if defined(PETSC_USE_DEBUG)
3257:   for (ii=0; ii<baij->i[m]; ii++) {
3258:     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3259:     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3260:   }
3261: #endif    

3263:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3264:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3265:   return(0);
3266: }