Actual source code: baij.c

  1: /*
  2:     Defines the basic matrix operations for the BAIJ (compressed row)
  3:   matrix storage format.
  4: */
 5:  #include src/mat/impls/baij/seq/baij.h
 6:  #include src/inline/spops.h
 7:  #include petscsys.h

 9:  #include src/inline/ilu.h

 13: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
 14: {
 15:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 17:   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
 18:   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;

 21:   if (a->idiagvalid) return(0);
 22:   MatMarkDiagonal_SeqBAIJ(A);
 23:   diag_offset = a->diag;
 24:   if (!a->idiag) {
 25:     PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
 26:   }
 27:   diag  = a->idiag;
 28:   mdiag = a->idiag+bs*bs*mbs;
 29:   /* factor and invert each block */
 30:   switch (bs){
 31:     case 2:
 32:       for (i=0; i<mbs; i++) {
 33:         odiag   = v + 4*diag_offset[i];
 34:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 35:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 36:         Kernel_A_gets_inverse_A_2(diag);
 37:         diag    += 4;
 38:         mdiag   += 4;
 39:       }
 40:       break;
 41:     case 3:
 42:       for (i=0; i<mbs; i++) {
 43:         odiag    = v + 9*diag_offset[i];
 44:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 45:         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 46:         diag[8]  = odiag[8];
 47:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 48:         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 49:         mdiag[8] = odiag[8];
 50:         Kernel_A_gets_inverse_A_3(diag);
 51:         diag    += 9;
 52:         mdiag   += 9;
 53:       }
 54:       break;
 55:     case 4:
 56:       for (i=0; i<mbs; i++) {
 57:         odiag  = v + 16*diag_offset[i];
 58:         PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 59:         PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 60:         Kernel_A_gets_inverse_A_4(diag);
 61:         diag  += 16;
 62:         mdiag += 16;
 63:       }
 64:       break;
 65:     case 5:
 66:       for (i=0; i<mbs; i++) {
 67:         odiag = v + 25*diag_offset[i];
 68:         PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
 69:         PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
 70:         Kernel_A_gets_inverse_A_5(diag);
 71:         diag  += 25;
 72:         mdiag += 25;
 73:       }
 74:       break;
 75:     default:
 76:       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
 77:   }
 78:   a->idiagvalid = PETSC_TRUE;
 79:   return(0);
 80: }

 84: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
 85: {
 86:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
 87:   PetscScalar        *x,x1,x2,s1,s2;
 88:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
 89:   PetscErrorCode     ierr;
 90:   PetscInt           m = a->mbs,i,i2,nz,idx;
 91:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

 94:   its = its*lits;
 95:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
 96:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
 97:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
 98:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
 99:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

101:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

103:   diag  = a->diag;
104:   idiag = a->idiag;
105:   VecGetArray(xx,&x);
106:   VecGetArray(bb,(PetscScalar**)&b);

108:   if (flag & SOR_ZERO_INITIAL_GUESS) {
109:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
110:       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
111:       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
112:       i2     = 2;
113:       idiag += 4;
114:       for (i=1; i<m; i++) {
115:         v     = aa + 4*ai[i];
116:         vi    = aj + ai[i];
117:         nz    = diag[i] - ai[i];
118:         s1    = b[i2]; s2 = b[i2+1];
119:         while (nz--) {
120:           idx  = 2*(*vi++);
121:           x1   = x[idx]; x2 = x[1+idx];
122:           s1  -= v[0]*x1 + v[2]*x2;
123:           s2  -= v[1]*x1 + v[3]*x2;
124:           v   += 4;
125:         }
126:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
127:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
128:         idiag   += 4;
129:         i2      += 2;
130:       }
131:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
132:       PetscLogFlops(4*(a->nz));
133:     }
134:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
135:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
136:       i2    = 0;
137:       mdiag = a->idiag+4*a->mbs;
138:       for (i=0; i<m; i++) {
139:         x1      = x[i2]; x2 = x[i2+1];
140:         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
141:         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
142:         mdiag  += 4;
143:         i2     += 2;
144:       }
145:       PetscLogFlops(6*m);
146:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
147:       PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
148:     }
149:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
150:       idiag   = a->idiag+4*a->mbs - 4;
151:       i2      = 2*m - 2;
152:       x1      = x[i2]; x2 = x[i2+1];
153:       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
154:       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
155:       idiag -= 4;
156:       i2    -= 2;
157:       for (i=m-2; i>=0; i--) {
158:         v     = aa + 4*(diag[i]+1);
159:         vi    = aj + diag[i] + 1;
160:         nz    = ai[i+1] - diag[i] - 1;
161:         s1    = x[i2]; s2 = x[i2+1];
162:         while (nz--) {
163:           idx  = 2*(*vi++);
164:           x1   = x[idx]; x2 = x[1+idx];
165:           s1  -= v[0]*x1 + v[2]*x2;
166:           s2  -= v[1]*x1 + v[3]*x2;
167:           v   += 4;
168:         }
169:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
170:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
171:         idiag   -= 4;
172:         i2      -= 2;
173:       }
174:       PetscLogFlops(4*(a->nz));
175:     }
176:   } else {
177:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
178:   }
179:   VecRestoreArray(xx,&x);
180:   VecRestoreArray(bb,(PetscScalar**)&b);
181:   return(0);
182: }

186: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
187: {
188:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
189:   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
190:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
191:   PetscErrorCode     ierr;
192:   PetscInt           m = a->mbs,i,i2,nz,idx;
193:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

196:   its = its*lits;
197:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
198:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
199:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
200:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
201:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

203:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

205:   diag  = a->diag;
206:   idiag = a->idiag;
207:   VecGetArray(xx,&x);
208:   VecGetArray(bb,(PetscScalar**)&b);

210:   if (flag & SOR_ZERO_INITIAL_GUESS) {
211:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
212:       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
213:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
214:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
215:       i2     = 3;
216:       idiag += 9;
217:       for (i=1; i<m; i++) {
218:         v     = aa + 9*ai[i];
219:         vi    = aj + ai[i];
220:         nz    = diag[i] - ai[i];
221:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
222:         while (nz--) {
223:           idx  = 3*(*vi++);
224:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
225:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
226:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
227:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
228:           v   += 9;
229:         }
230:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
231:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
232:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
233:         idiag   += 9;
234:         i2      += 3;
235:       }
236:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
237:       PetscLogFlops(9*(a->nz));
238:     }
239:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
240:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
241:       i2    = 0;
242:       mdiag = a->idiag+9*a->mbs;
243:       for (i=0; i<m; i++) {
244:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
245:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
246:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
247:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
248:         mdiag  += 9;
249:         i2     += 3;
250:       }
251:       PetscLogFlops(15*m);
252:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
253:       PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
254:     }
255:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
256:       idiag   = a->idiag+9*a->mbs - 9;
257:       i2      = 3*m - 3;
258:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
259:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
260:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
261:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
262:       idiag -= 9;
263:       i2    -= 3;
264:       for (i=m-2; i>=0; i--) {
265:         v     = aa + 9*(diag[i]+1);
266:         vi    = aj + diag[i] + 1;
267:         nz    = ai[i+1] - diag[i] - 1;
268:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
269:         while (nz--) {
270:           idx  = 3*(*vi++);
271:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
272:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
273:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
274:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
275:           v   += 9;
276:         }
277:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
278:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
279:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
280:         idiag   -= 9;
281:         i2      -= 3;
282:       }
283:       PetscLogFlops(9*(a->nz));
284:     }
285:   } else {
286:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
287:   }
288:   VecRestoreArray(xx,&x);
289:   VecRestoreArray(bb,(PetscScalar**)&b);
290:   return(0);
291: }

295: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
296: {
297:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
298:   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
299:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
300:   PetscErrorCode     ierr;
301:   PetscInt           m = a->mbs,i,i2,nz,idx;
302:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

305:   its = its*lits;
306:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
307:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
308:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
309:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
310:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

312:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

314:   diag  = a->diag;
315:   idiag = a->idiag;
316:   VecGetArray(xx,&x);
317:   VecGetArray(bb,(PetscScalar**)&b);

319:   if (flag & SOR_ZERO_INITIAL_GUESS) {
320:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
321:       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
322:       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
323:       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
324:       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
325:       i2     = 4;
326:       idiag += 16;
327:       for (i=1; i<m; i++) {
328:         v     = aa + 16*ai[i];
329:         vi    = aj + ai[i];
330:         nz    = diag[i] - ai[i];
331:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
332:         while (nz--) {
333:           idx  = 4*(*vi++);
334:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
335:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
336:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
337:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
338:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
339:           v   += 16;
340:         }
341:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
342:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
343:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
344:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
345:         idiag   += 16;
346:         i2      += 4;
347:       }
348:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
349:       PetscLogFlops(16*(a->nz));
350:     }
351:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
352:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
353:       i2    = 0;
354:       mdiag = a->idiag+16*a->mbs;
355:       for (i=0; i<m; i++) {
356:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
357:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
358:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
359:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
360:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
361:         mdiag  += 16;
362:         i2     += 4;
363:       }
364:       PetscLogFlops(28*m);
365:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
366:       PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
367:     }
368:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
369:       idiag   = a->idiag+16*a->mbs - 16;
370:       i2      = 4*m - 4;
371:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
372:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
373:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
374:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
375:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
376:       idiag -= 16;
377:       i2    -= 4;
378:       for (i=m-2; i>=0; i--) {
379:         v     = aa + 16*(diag[i]+1);
380:         vi    = aj + diag[i] + 1;
381:         nz    = ai[i+1] - diag[i] - 1;
382:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
383:         while (nz--) {
384:           idx  = 4*(*vi++);
385:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
386:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
387:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
388:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
389:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
390:           v   += 16;
391:         }
392:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
393:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
394:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
395:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
396:         idiag   -= 16;
397:         i2      -= 4;
398:       }
399:       PetscLogFlops(16*(a->nz));
400:     }
401:   } else {
402:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
403:   }
404:   VecRestoreArray(xx,&x);
405:   VecRestoreArray(bb,(PetscScalar**)&b);
406:   return(0);
407: }

411: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
412: {
413:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
414:   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
415:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
416:   PetscErrorCode     ierr;
417:   PetscInt           m = a->mbs,i,i2,nz,idx;
418:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

421:   its = its*lits;
422:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
423:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
424:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
425:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
426:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

428:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

430:   diag  = a->diag;
431:   idiag = a->idiag;
432:   VecGetArray(xx,&x);
433:   VecGetArray(bb,(PetscScalar**)&b);

435:   if (flag & SOR_ZERO_INITIAL_GUESS) {
436:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
437:       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
438:       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
439:       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
440:       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
441:       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
442:       i2     = 5;
443:       idiag += 25;
444:       for (i=1; i<m; i++) {
445:         v     = aa + 25*ai[i];
446:         vi    = aj + ai[i];
447:         nz    = diag[i] - ai[i];
448:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
449:         while (nz--) {
450:           idx  = 5*(*vi++);
451:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
452:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
453:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
454:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
455:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
456:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
457:           v   += 25;
458:         }
459:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
460:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
461:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
462:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
463:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
464:         idiag   += 25;
465:         i2      += 5;
466:       }
467:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
468:       PetscLogFlops(25*(a->nz));
469:     }
470:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
471:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
472:       i2    = 0;
473:       mdiag = a->idiag+25*a->mbs;
474:       for (i=0; i<m; i++) {
475:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
476:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
477:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
478:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
479:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
480:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
481:         mdiag  += 25;
482:         i2     += 5;
483:       }
484:       PetscLogFlops(45*m);
485:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
486:       PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
487:     }
488:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
489:       idiag   = a->idiag+25*a->mbs - 25;
490:       i2      = 5*m - 5;
491:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
492:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
493:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
494:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
495:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
496:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
497:       idiag -= 25;
498:       i2    -= 5;
499:       for (i=m-2; i>=0; i--) {
500:         v     = aa + 25*(diag[i]+1);
501:         vi    = aj + diag[i] + 1;
502:         nz    = ai[i+1] - diag[i] - 1;
503:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
504:         while (nz--) {
505:           idx  = 5*(*vi++);
506:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
507:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
508:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
509:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
510:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
511:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
512:           v   += 25;
513:         }
514:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
515:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
516:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
517:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
518:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
519:         idiag   -= 25;
520:         i2      -= 5;
521:       }
522:       PetscLogFlops(25*(a->nz));
523:     }
524:   } else {
525:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
526:   }
527:   VecRestoreArray(xx,&x);
528:   VecRestoreArray(bb,(PetscScalar**)&b);
529:   return(0);
530: }

532: /*
533:     Special version for Fun3d sequential benchmark
534: */
535: #if defined(PETSC_HAVE_FORTRAN_CAPS)
536: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
537: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
538: #define matsetvaluesblocked4_ matsetvaluesblocked4
539: #endif

544: void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
545: {
546:   Mat               A = *AA;
547:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
548:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
549:   PetscInt          *ai=a->i,*ailen=a->ilen;
550:   PetscInt          *aj=a->j,stepval;
551:   const PetscScalar *value = v;
552:   MatScalar         *ap,*aa = a->a,*bap;

555:   stepval = (n-1)*4;
556:   for (k=0; k<m; k++) { /* loop over added rows */
557:     row  = im[k];
558:     rp   = aj + ai[row];
559:     ap   = aa + 16*ai[row];
560:     nrow = ailen[row];
561:     low  = 0;
562:     for (l=0; l<n; l++) { /* loop over added columns */
563:       col = in[l];
564:       value = v + k*(stepval+4)*4 + l*4;
565:       low = 0; high = nrow;
566:       while (high-low > 7) {
567:         t = (low+high)/2;
568:         if (rp[t] > col) high = t;
569:         else             low  = t;
570:       }
571:       for (i=low; i<high; i++) {
572:         if (rp[i] > col) break;
573:         if (rp[i] == col) {
574:           bap  = ap +  16*i;
575:           for (ii=0; ii<4; ii++,value+=stepval) {
576:             for (jj=ii; jj<16; jj+=4) {
577:               bap[jj] += *value++;
578:             }
579:           }
580:           goto noinsert2;
581:         }
582:       }
583:       N = nrow++ - 1;
584:       /* shift up all the later entries in this row */
585:       for (ii=N; ii>=i; ii--) {
586:         rp[ii+1] = rp[ii];
587:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
588:       }
589:       if (N >= i) {
590:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
591:       }
592:       rp[i] = col;
593:       bap   = ap +  16*i;
594:       for (ii=0; ii<4; ii++,value+=stepval) {
595:         for (jj=ii; jj<16; jj+=4) {
596:           bap[jj] = *value++;
597:         }
598:       }
599:       noinsert2:;
600:       low = i;
601:     }
602:     ailen[row] = nrow;
603:   }
604: }

607: #if defined(PETSC_HAVE_FORTRAN_CAPS)
608: #define matsetvalues4_ MATSETVALUES4
609: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
610: #define matsetvalues4_ matsetvalues4
611: #endif

616: void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
617: {
618:   Mat         A = *AA;
619:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
620:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
621:   PetscInt    *ai=a->i,*ailen=a->ilen;
622:   PetscInt    *aj=a->j,brow,bcol;
623:   PetscInt    ridx,cidx;
624:   MatScalar   *ap,value,*aa=a->a,*bap;
625: 
627:   for (k=0; k<m; k++) { /* loop over added rows */
628:     row  = im[k]; brow = row/4;
629:     rp   = aj + ai[brow];
630:     ap   = aa + 16*ai[brow];
631:     nrow = ailen[brow];
632:     low  = 0;
633:     for (l=0; l<n; l++) { /* loop over added columns */
634:       col = in[l]; bcol = col/4;
635:       ridx = row % 4; cidx = col % 4;
636:       value = v[l + k*n];
637:       low = 0; high = nrow;
638:       while (high-low > 7) {
639:         t = (low+high)/2;
640:         if (rp[t] > bcol) high = t;
641:         else              low  = t;
642:       }
643:       for (i=low; i<high; i++) {
644:         if (rp[i] > bcol) break;
645:         if (rp[i] == bcol) {
646:           bap  = ap +  16*i + 4*cidx + ridx;
647:           *bap += value;
648:           goto noinsert1;
649:         }
650:       }
651:       N = nrow++ - 1;
652:       /* shift up all the later entries in this row */
653:       for (ii=N; ii>=i; ii--) {
654:         rp[ii+1] = rp[ii];
655:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
656:       }
657:       if (N>=i) {
658:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
659:       }
660:       rp[i]                    = bcol;
661:       ap[16*i + 4*cidx + ridx] = value;
662:       noinsert1:;
663:       low = i;
664:     }
665:     ailen[brow] = nrow;
666:   }
667: }

670: /*  UGLY, ugly, ugly
671:    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 
672:    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 
673:    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
674:    converts the entries into single precision and then calls ..._MatScalar() to put them
675:    into the single precision data structures.
676: */
677: #if defined(PETSC_USE_MAT_SINGLE)
678: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
679: #else
680: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
681: #endif

683: #define CHUNKSIZE  10

685: /*
686:      Checks for missing diagonals
687: */
690: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
691: {
692:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
694:   PetscInt       *diag,*jj = a->j,i;

697:   MatMarkDiagonal_SeqBAIJ(A);
698:   diag = a->diag;
699:   for (i=0; i<a->mbs; i++) {
700:     if (jj[diag[i]] != i) {
701:       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
702:     }
703:   }
704:   return(0);
705: }

709: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
710: {
711:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
713:   PetscInt       i,j,*diag,m = a->mbs;

716:   if (a->diag) return(0);

718:   PetscMalloc((m+1)*sizeof(PetscInt),&diag);
719:   PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));
720:   for (i=0; i<m; i++) {
721:     diag[i] = a->i[i+1];
722:     for (j=a->i[i]; j<a->i[i+1]; j++) {
723:       if (a->j[j] == i) {
724:         diag[i] = j;
725:         break;
726:       }
727:     }
728:   }
729:   a->diag = diag;
730:   return(0);
731: }


734: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);

738: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
739: {
740:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
742:   PetscInt       n = a->mbs,i;

745:   *nn = n;
746:   if (!ia) return(0);
747:   if (symmetric) {
748:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
749:   } else if (oshift == 1) {
750:     /* temporarily add 1 to i and j indices */
751:     PetscInt nz = a->i[n];
752:     for (i=0; i<nz; i++) a->j[i]++;
753:     for (i=0; i<n+1; i++) a->i[i]++;
754:     *ia = a->i; *ja = a->j;
755:   } else {
756:     *ia = a->i; *ja = a->j;
757:   }

759:   return(0);
760: }

764: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
765: {
766:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
768:   PetscInt       i,n = a->mbs;

771:   if (!ia) return(0);
772:   if (symmetric) {
773:     PetscFree(*ia);
774:     PetscFree(*ja);
775:   } else if (oshift == 1) {
776:     PetscInt nz = a->i[n]-1;
777:     for (i=0; i<nz; i++) a->j[i]--;
778:     for (i=0; i<n+1; i++) a->i[i]--;
779:   }
780:   return(0);
781: }

785: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
786: {
787:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

791: #if defined(PETSC_USE_LOG)
792:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
793: #endif
794:   PetscFree(a->a);
795:   if (!a->singlemalloc) {
796:     PetscFree(a->i);
797:     PetscFree(a->j);
798:   }
799:   if (a->row) {
800:     ISDestroy(a->row);
801:   }
802:   if (a->col) {
803:     ISDestroy(a->col);
804:   }
805:   if (a->diag) {PetscFree(a->diag);}
806:   if (a->idiag) {PetscFree(a->idiag);}
807:   if (a->ilen) {PetscFree(a->ilen);}
808:   if (a->imax) {PetscFree(a->imax);}
809:   if (a->solve_work) {PetscFree(a->solve_work);}
810:   if (a->mult_work) {PetscFree(a->mult_work);}
811:   if (a->icol) {ISDestroy(a->icol);}
812:   if (a->saved_values) {PetscFree(a->saved_values);}
813: #if defined(PETSC_USE_MAT_SINGLE)
814:   if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
815: #endif
816:   if (a->xtoy) {PetscFree(a->xtoy);}

818:   PetscFree(a);

820:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
821:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
822:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
823:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
824:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
825:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
826:   return(0);
827: }

831: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
832: {
833:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

836:   switch (op) {
837:   case MAT_ROW_ORIENTED:
838:     a->roworiented    = PETSC_TRUE;
839:     break;
840:   case MAT_COLUMN_ORIENTED:
841:     a->roworiented    = PETSC_FALSE;
842:     break;
843:   case MAT_COLUMNS_SORTED:
844:     a->sorted         = PETSC_TRUE;
845:     break;
846:   case MAT_COLUMNS_UNSORTED:
847:     a->sorted         = PETSC_FALSE;
848:     break;
849:   case MAT_KEEP_ZEROED_ROWS:
850:     a->keepzeroedrows = PETSC_TRUE;
851:     break;
852:   case MAT_NO_NEW_NONZERO_LOCATIONS:
853:     a->nonew          = 1;
854:     break;
855:   case MAT_NEW_NONZERO_LOCATION_ERR:
856:     a->nonew          = -1;
857:     break;
858:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
859:     a->nonew          = -2;
860:     break;
861:   case MAT_YES_NEW_NONZERO_LOCATIONS:
862:     a->nonew          = 0;
863:     break;
864:   case MAT_ROWS_SORTED:
865:   case MAT_ROWS_UNSORTED:
866:   case MAT_YES_NEW_DIAGONALS:
867:   case MAT_IGNORE_OFF_PROC_ENTRIES:
868:   case MAT_USE_HASH_TABLE:
869:     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
870:     break;
871:   case MAT_NO_NEW_DIAGONALS:
872:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
873:   case MAT_SYMMETRIC:
874:   case MAT_STRUCTURALLY_SYMMETRIC:
875:   case MAT_NOT_SYMMETRIC:
876:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
877:   case MAT_HERMITIAN:
878:   case MAT_NOT_HERMITIAN:
879:   case MAT_SYMMETRY_ETERNAL:
880:   case MAT_NOT_SYMMETRY_ETERNAL:
881:     break;
882:   default:
883:     SETERRQ(PETSC_ERR_SUP,"unknown option");
884:   }
885:   return(0);
886: }

890: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
891: {
892:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
894:   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
895:   MatScalar      *aa,*aa_i;
896:   PetscScalar    *v_i;

899:   bs  = A->bs;
900:   ai  = a->i;
901:   aj  = a->j;
902:   aa  = a->a;
903:   bs2 = a->bs2;
904: 
905:   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
906: 
907:   bn  = row/bs;   /* Block number */
908:   bp  = row % bs; /* Block Position */
909:   M   = ai[bn+1] - ai[bn];
910:   *nz = bs*M;
911: 
912:   if (v) {
913:     *v = 0;
914:     if (*nz) {
915:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
916:       for (i=0; i<M; i++) { /* for each block in the block row */
917:         v_i  = *v + i*bs;
918:         aa_i = aa + bs2*(ai[bn] + i);
919:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
920:       }
921:     }
922:   }

924:   if (idx) {
925:     *idx = 0;
926:     if (*nz) {
927:       PetscMalloc((*nz)*sizeof(PetscInt),idx);
928:       for (i=0; i<M; i++) { /* for each block in the block row */
929:         idx_i = *idx + i*bs;
930:         itmp  = bs*aj[ai[bn] + i];
931:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
932:       }
933:     }
934:   }
935:   return(0);
936: }

940: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
941: {

945:   if (idx) {if (*idx) {PetscFree(*idx);}}
946:   if (v)   {if (*v)   {PetscFree(*v);}}
947:   return(0);
948: }

952: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
953: {
954:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
955:   Mat            C;
957:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
958:   PetscInt       *rows,*cols,bs2=a->bs2;
959:   PetscScalar    *array;

962:   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
963:   PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
964:   PetscMemzero(col,(1+nbs)*sizeof(PetscInt));

966: #if defined(PETSC_USE_MAT_SINGLE)
967:   PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
968:   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
969: #else
970:   array = a->a;
971: #endif

973:   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
974:   MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);
975:   MatSetType(C,A->type_name);
976:   MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);
977:   PetscFree(col);
978:   PetscMalloc(2*bs*sizeof(PetscInt),&rows);
979:   cols = rows + bs;
980:   for (i=0; i<mbs; i++) {
981:     cols[0] = i*bs;
982:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
983:     len = ai[i+1] - ai[i];
984:     for (j=0; j<len; j++) {
985:       rows[0] = (*aj++)*bs;
986:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
987:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
988:       array += bs2;
989:     }
990:   }
991:   PetscFree(rows);
992: #if defined(PETSC_USE_MAT_SINGLE)
993:   PetscFree(array);
994: #endif
995: 
996:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
997:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
998: 
999:   if (B) {
1000:     *B = C;
1001:   } else {
1002:     MatHeaderCopy(A,C);
1003:   }
1004:   return(0);
1005: }

1009: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1010: {
1011:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1013:   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1014:   int            fd;
1015:   PetscScalar    *aa;
1016:   FILE           *file;

1019:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1020:   PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);
1021:   col_lens[0] = MAT_FILE_COOKIE;

1023:   col_lens[1] = A->m;
1024:   col_lens[2] = A->n;
1025:   col_lens[3] = a->nz*bs2;

1027:   /* store lengths of each row and write (including header) to file */
1028:   count = 0;
1029:   for (i=0; i<a->mbs; i++) {
1030:     for (j=0; j<bs; j++) {
1031:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1032:     }
1033:   }
1034:   PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);
1035:   PetscFree(col_lens);

1037:   /* store column indices (zero start index) */
1038:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1039:   count = 0;
1040:   for (i=0; i<a->mbs; i++) {
1041:     for (j=0; j<bs; j++) {
1042:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1043:         for (l=0; l<bs; l++) {
1044:           jj[count++] = bs*a->j[k] + l;
1045:         }
1046:       }
1047:     }
1048:   }
1049:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1050:   PetscFree(jj);

1052:   /* store nonzero values */
1053:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1054:   count = 0;
1055:   for (i=0; i<a->mbs; i++) {
1056:     for (j=0; j<bs; j++) {
1057:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1058:         for (l=0; l<bs; l++) {
1059:           aa[count++] = a->a[bs2*k + l*bs + j];
1060:         }
1061:       }
1062:     }
1063:   }
1064:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1065:   PetscFree(aa);

1067:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1068:   if (file) {
1069:     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1070:   }
1071:   return(0);
1072: }

1076: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1077: {
1078:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1079:   PetscErrorCode    ierr;
1080:   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1081:   PetscViewerFormat format;

1084:   PetscViewerGetFormat(viewer,&format);
1085:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1086:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1087:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1088:     Mat aij;
1089:     MatConvert(A,MATSEQAIJ,&aij);
1090:     MatView(aij,viewer);
1091:     MatDestroy(aij);
1092:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1093:      return(0);
1094:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1095:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1096:     for (i=0; i<a->mbs; i++) {
1097:       for (j=0; j<bs; j++) {
1098:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1099:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1100:           for (l=0; l<bs; l++) {
1101: #if defined(PETSC_USE_COMPLEX)
1102:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1103:               PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1104:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1105:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1106:               PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1107:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1108:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1109:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1110:             }
1111: #else
1112:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1113:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1114:             }
1115: #endif
1116:           }
1117:         }
1118:         PetscViewerASCIIPrintf(viewer,"\n");
1119:       }
1120:     }
1121:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1122:   } else {
1123:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1124:     for (i=0; i<a->mbs; i++) {
1125:       for (j=0; j<bs; j++) {
1126:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1127:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1128:           for (l=0; l<bs; l++) {
1129: #if defined(PETSC_USE_COMPLEX)
1130:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1131:               PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1132:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1133:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1134:               PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1135:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1136:             } else {
1137:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1138:             }
1139: #else
1140:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1141: #endif
1142:           }
1143:         }
1144:         PetscViewerASCIIPrintf(viewer,"\n");
1145:       }
1146:     }
1147:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1148:   }
1149:   PetscViewerFlush(viewer);
1150:   return(0);
1151: }

1155: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1156: {
1157:   Mat            A = (Mat) Aa;
1158:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1160:   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1161:   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1162:   MatScalar      *aa;
1163:   PetscViewer    viewer;


1167:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1168:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

1170:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1172:   /* loop over matrix elements drawing boxes */
1173:   color = PETSC_DRAW_BLUE;
1174:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1175:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1176:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1177:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1178:       aa = a->a + j*bs2;
1179:       for (k=0; k<bs; k++) {
1180:         for (l=0; l<bs; l++) {
1181:           if (PetscRealPart(*aa++) >=  0.) continue;
1182:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1183:         }
1184:       }
1185:     }
1186:   }
1187:   color = PETSC_DRAW_CYAN;
1188:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1189:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1190:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1191:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1192:       aa = a->a + j*bs2;
1193:       for (k=0; k<bs; k++) {
1194:         for (l=0; l<bs; l++) {
1195:           if (PetscRealPart(*aa++) != 0.) continue;
1196:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1197:         }
1198:       }
1199:     }
1200:   }

1202:   color = PETSC_DRAW_RED;
1203:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1204:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1205:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1206:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1207:       aa = a->a + j*bs2;
1208:       for (k=0; k<bs; k++) {
1209:         for (l=0; l<bs; l++) {
1210:           if (PetscRealPart(*aa++) <= 0.) continue;
1211:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1212:         }
1213:       }
1214:     }
1215:   }
1216:   return(0);
1217: }

1221: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1222: {
1224:   PetscReal      xl,yl,xr,yr,w,h;
1225:   PetscDraw      draw;
1226:   PetscTruth     isnull;


1230:   PetscViewerDrawGetDraw(viewer,0,&draw);
1231:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1233:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1234:   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1235:   xr += w;    yr += h;  xl = -w;     yl = -h;
1236:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1237:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1238:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1239:   return(0);
1240: }

1244: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1245: {
1247:   PetscTruth     iascii,isbinary,isdraw;

1250:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1251:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1252:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1253:   if (iascii){
1254:     MatView_SeqBAIJ_ASCII(A,viewer);
1255:   } else if (isbinary) {
1256:     MatView_SeqBAIJ_Binary(A,viewer);
1257:   } else if (isdraw) {
1258:     MatView_SeqBAIJ_Draw(A,viewer);
1259:   } else {
1260:     Mat B;
1261:     MatConvert(A,MATSEQAIJ,&B);
1262:     MatView(B,viewer);
1263:     MatDestroy(B);
1264:   }
1265:   return(0);
1266: }


1271: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1272: {
1273:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1274:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1275:   PetscInt    *ai = a->i,*ailen = a->ilen;
1276:   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1277:   MatScalar   *ap,*aa = a->a,zero = 0.0;

1280:   for (k=0; k<m; k++) { /* loop over rows */
1281:     row  = im[k]; brow = row/bs;
1282:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1283:     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1284:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1285:     nrow = ailen[brow];
1286:     for (l=0; l<n; l++) { /* loop over columns */
1287:       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1288:       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1289:       col  = in[l] ;
1290:       bcol = col/bs;
1291:       cidx = col%bs;
1292:       ridx = row%bs;
1293:       high = nrow;
1294:       low  = 0; /* assume unsorted */
1295:       while (high-low > 5) {
1296:         t = (low+high)/2;
1297:         if (rp[t] > bcol) high = t;
1298:         else             low  = t;
1299:       }
1300:       for (i=low; i<high; i++) {
1301:         if (rp[i] > bcol) break;
1302:         if (rp[i] == bcol) {
1303:           *v++ = ap[bs2*i+bs*cidx+ridx];
1304:           goto finished;
1305:         }
1306:       }
1307:       *v++ = zero;
1308:       finished:;
1309:     }
1310:   }
1311:   return(0);
1312: }

1314: #if defined(PETSC_USE_MAT_SINGLE)
1317: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1318: {
1319:   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1321:   PetscInt       i,N = m*n*b->bs2;
1322:   MatScalar      *vsingle;

1325:   if (N > b->setvalueslen) {
1326:     if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
1327:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
1328:     b->setvalueslen  = N;
1329:   }
1330:   vsingle = b->setvaluescopy;
1331:   for (i=0; i<N; i++) {
1332:     vsingle[i] = v[i];
1333:   }
1334:   MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
1335:   return(0);
1336: }
1337: #endif


1342: PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1343: {
1344:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1345:   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1346:   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1347:   PetscErrorCode  ierr;
1348:   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1349:   PetscTruth      roworiented=a->roworiented;
1350:   const MatScalar *value = v;
1351:   MatScalar       *ap,*aa = a->a,*bap;

1354:   if (roworiented) {
1355:     stepval = (n-1)*bs;
1356:   } else {
1357:     stepval = (m-1)*bs;
1358:   }
1359:   for (k=0; k<m; k++) { /* loop over added rows */
1360:     row  = im[k];
1361:     if (row < 0) continue;
1362: #if defined(PETSC_USE_BOPT_g)  
1363:     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1364: #endif
1365:     rp   = aj + ai[row];
1366:     ap   = aa + bs2*ai[row];
1367:     rmax = imax[row];
1368:     nrow = ailen[row];
1369:     low  = 0;
1370:     for (l=0; l<n; l++) { /* loop over added columns */
1371:       if (in[l] < 0) continue;
1372: #if defined(PETSC_USE_BOPT_g)  
1373:       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1374: #endif
1375:       col = in[l];
1376:       if (roworiented) {
1377:         value = v + k*(stepval+bs)*bs + l*bs;
1378:       } else {
1379:         value = v + l*(stepval+bs)*bs + k*bs;
1380:       }
1381:       if (!sorted) low = 0; high = nrow;
1382:       while (high-low > 7) {
1383:         t = (low+high)/2;
1384:         if (rp[t] > col) high = t;
1385:         else             low  = t;
1386:       }
1387:       for (i=low; i<high; i++) {
1388:         if (rp[i] > col) break;
1389:         if (rp[i] == col) {
1390:           bap  = ap +  bs2*i;
1391:           if (roworiented) {
1392:             if (is == ADD_VALUES) {
1393:               for (ii=0; ii<bs; ii++,value+=stepval) {
1394:                 for (jj=ii; jj<bs2; jj+=bs) {
1395:                   bap[jj] += *value++;
1396:                 }
1397:               }
1398:             } else {
1399:               for (ii=0; ii<bs; ii++,value+=stepval) {
1400:                 for (jj=ii; jj<bs2; jj+=bs) {
1401:                   bap[jj] = *value++;
1402:                 }
1403:               }
1404:             }
1405:           } else {
1406:             if (is == ADD_VALUES) {
1407:               for (ii=0; ii<bs; ii++,value+=stepval) {
1408:                 for (jj=0; jj<bs; jj++) {
1409:                   *bap++ += *value++;
1410:                 }
1411:               }
1412:             } else {
1413:               for (ii=0; ii<bs; ii++,value+=stepval) {
1414:                 for (jj=0; jj<bs; jj++) {
1415:                   *bap++  = *value++;
1416:                 }
1417:               }
1418:             }
1419:           }
1420:           goto noinsert2;
1421:         }
1422:       }
1423:       if (nonew == 1) goto noinsert2;
1424:       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1425:       if (nrow >= rmax) {
1426:         /* there is no extra room in row, therefore enlarge */
1427:         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1428:         MatScalar *new_a;

1430:         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);

1432:         /* malloc new storage space */
1433:         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1434:         PetscMalloc(len,&new_a);
1435:         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1436:         new_i   = new_j + new_nz;

1438:         /* copy over old data into new slots */
1439:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1440:         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1441:         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));
1442:         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1443:         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));
1444:         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
1445:         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1446:         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
1447:         /* free up old matrix storage */
1448:         PetscFree(a->a);
1449:         if (!a->singlemalloc) {
1450:           PetscFree(a->i);
1451:           PetscFree(a->j);
1452:         }
1453:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1454:         a->singlemalloc = PETSC_TRUE;

1456:         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1457:         rmax = imax[row] = imax[row] + CHUNKSIZE;
1458:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
1459:         a->maxnz += bs2*CHUNKSIZE;
1460:         a->reallocs++;
1461:         a->nz++;
1462:       }
1463:       N = nrow++ - 1;
1464:       /* shift up all the later entries in this row */
1465:       for (ii=N; ii>=i; ii--) {
1466:         rp[ii+1] = rp[ii];
1467:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1468:       }
1469:       if (N >= i) {
1470:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1471:       }
1472:       rp[i] = col;
1473:       bap   = ap +  bs2*i;
1474:       if (roworiented) {
1475:         for (ii=0; ii<bs; ii++,value+=stepval) {
1476:           for (jj=ii; jj<bs2; jj+=bs) {
1477:             bap[jj] = *value++;
1478:           }
1479:         }
1480:       } else {
1481:         for (ii=0; ii<bs; ii++,value+=stepval) {
1482:           for (jj=0; jj<bs; jj++) {
1483:             *bap++  = *value++;
1484:           }
1485:         }
1486:       }
1487:       noinsert2:;
1488:       low = i;
1489:     }
1490:     ailen[row] = nrow;
1491:   }
1492:   return(0);
1493: }

1497: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1498: {
1499:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1500:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1501:   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1503:   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1504:   MatScalar      *aa = a->a,*ap;

1507:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1509:   if (m) rmax = ailen[0];
1510:   for (i=1; i<mbs; i++) {
1511:     /* move each row back by the amount of empty slots (fshift) before it*/
1512:     fshift += imax[i-1] - ailen[i-1];
1513:     rmax   = PetscMax(rmax,ailen[i]);
1514:     if (fshift) {
1515:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1516:       N = ailen[i];
1517:       for (j=0; j<N; j++) {
1518:         ip[j-fshift] = ip[j];
1519:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1520:       }
1521:     }
1522:     ai[i] = ai[i-1] + ailen[i-1];
1523:   }
1524:   if (mbs) {
1525:     fshift += imax[mbs-1] - ailen[mbs-1];
1526:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1527:   }
1528:   /* reset ilen and imax for each row */
1529:   for (i=0; i<mbs; i++) {
1530:     ailen[i] = imax[i] = ai[i+1] - ai[i];
1531:   }
1532:   a->nz = ai[mbs];

1534:   /* diagonals may have moved, so kill the diagonal pointers */
1535:   a->idiagvalid = PETSC_FALSE;
1536:   if (fshift && a->diag) {
1537:     PetscFree(a->diag);
1538:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1539:     a->diag = 0;
1540:   }
1541:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2);
1542:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs);
1543:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax);
1544:   a->reallocs          = 0;
1545:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

1547:   return(0);
1548: }



1552: /* 
1553:    This function returns an array of flags which indicate the locations of contiguous
1554:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1555:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1556:    Assume: sizes should be long enough to hold all the values.
1557: */
1560: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1561: {
1562:   PetscInt   i,j,k,row;
1563:   PetscTruth flg;

1566:   for (i=0,j=0; i<n; j++) {
1567:     row = idx[i];
1568:     if (row%bs!=0) { /* Not the begining of a block */
1569:       sizes[j] = 1;
1570:       i++;
1571:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1572:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1573:       i++;
1574:     } else { /* Begining of the block, so check if the complete block exists */
1575:       flg = PETSC_TRUE;
1576:       for (k=1; k<bs; k++) {
1577:         if (row+k != idx[i+k]) { /* break in the block */
1578:           flg = PETSC_FALSE;
1579:           break;
1580:         }
1581:       }
1582:       if (flg == PETSC_TRUE) { /* No break in the bs */
1583:         sizes[j] = bs;
1584:         i+= bs;
1585:       } else {
1586:         sizes[j] = 1;
1587:         i++;
1588:       }
1589:     }
1590:   }
1591:   *bs_max = j;
1592:   return(0);
1593: }
1594: 
1597: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1598: {
1599:   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1601:   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1602:   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1603:   PetscScalar    zero = 0.0;
1604:   MatScalar      *aa;

1607:   /* Make a copy of the IS and  sort it */
1608:   ISGetLocalSize(is,&is_n);
1609:   ISGetIndices(is,&is_idx);

1611:   /* allocate memory for rows,sizes */
1612:   PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);
1613:   sizes = rows + is_n;

1615:   /* copy IS values to rows, and sort them */
1616:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1617:   PetscSortInt(is_n,rows);
1618:   if (baij->keepzeroedrows) {
1619:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1620:     bs_max = is_n;
1621:   } else {
1622:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1623:   }
1624:   ISRestoreIndices(is,&is_idx);

1626:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1627:     row   = rows[j];
1628:     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1629:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1630:     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1631:     if (sizes[i] == bs && !baij->keepzeroedrows) {
1632:       if (diag) {
1633:         if (baij->ilen[row/bs] > 0) {
1634:           baij->ilen[row/bs]       = 1;
1635:           baij->j[baij->i[row/bs]] = row/bs;
1636:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
1637:         }
1638:         /* Now insert all the diagonal values for this bs */
1639:         for (k=0; k<bs; k++) {
1640:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
1641:         }
1642:       } else { /* (!diag) */
1643:         baij->ilen[row/bs] = 0;
1644:       } /* end (!diag) */
1645:     } else { /* (sizes[i] != bs) */
1646: #if defined (PETSC_USE_DEBUG)
1647:       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1648: #endif
1649:       for (k=0; k<count; k++) {
1650:         aa[0] =  zero;
1651:         aa    += bs;
1652:       }
1653:       if (diag) {
1654:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
1655:       }
1656:     }
1657:   }

1659:   PetscFree(rows);
1660:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1661:   return(0);
1662: }

1666: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1667: {
1668:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1669:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1670:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1671:   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1673:   PetscInt       ridx,cidx,bs2=a->bs2;
1674:   PetscTruth     roworiented=a->roworiented;
1675:   MatScalar      *ap,value,*aa=a->a,*bap;

1678:   for (k=0; k<m; k++) { /* loop over added rows */
1679:     row  = im[k]; brow = row/bs;
1680:     if (row < 0) continue;
1681: #if defined(PETSC_USE_BOPT_g)  
1682:     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1683: #endif
1684:     rp   = aj + ai[brow];
1685:     ap   = aa + bs2*ai[brow];
1686:     rmax = imax[brow];
1687:     nrow = ailen[brow];
1688:     low  = 0;
1689:     for (l=0; l<n; l++) { /* loop over added columns */
1690:       if (in[l] < 0) continue;
1691: #if defined(PETSC_USE_BOPT_g)  
1692:       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1693: #endif
1694:       col = in[l]; bcol = col/bs;
1695:       ridx = row % bs; cidx = col % bs;
1696:       if (roworiented) {
1697:         value = v[l + k*n];
1698:       } else {
1699:         value = v[k + l*m];
1700:       }
1701:       if (!sorted) low = 0; high = nrow;
1702:       while (high-low > 7) {
1703:         t = (low+high)/2;
1704:         if (rp[t] > bcol) high = t;
1705:         else              low  = t;
1706:       }
1707:       for (i=low; i<high; i++) {
1708:         if (rp[i] > bcol) break;
1709:         if (rp[i] == bcol) {
1710:           bap  = ap +  bs2*i + bs*cidx + ridx;
1711:           if (is == ADD_VALUES) *bap += value;
1712:           else                  *bap  = value;
1713:           goto noinsert1;
1714:         }
1715:       }
1716:       if (nonew == 1) goto noinsert1;
1717:       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1718:       if (nrow >= rmax) {
1719:         /* there is no extra room in row, therefore enlarge */
1720:         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1721:         MatScalar *new_a;

1723:         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);

1725:         /* Malloc new storage space */
1726:         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1727:         PetscMalloc(len,&new_a);
1728:         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1729:         new_i   = new_j + new_nz;

1731:         /* copy over old data into new slots */
1732:         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1733:         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1734:         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));
1735:         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1736:         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));
1737:         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1738:         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1739:         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1740:         /* free up old matrix storage */
1741:         PetscFree(a->a);
1742:         if (!a->singlemalloc) {
1743:           PetscFree(a->i);
1744:           PetscFree(a->j);
1745:         }
1746:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1747:         a->singlemalloc = PETSC_TRUE;

1749:         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1750:         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1751:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
1752:         a->maxnz += bs2*CHUNKSIZE;
1753:         a->reallocs++;
1754:         a->nz++;
1755:       }
1756:       N = nrow++ - 1;
1757:       /* shift up all the later entries in this row */
1758:       for (ii=N; ii>=i; ii--) {
1759:         rp[ii+1] = rp[ii];
1760:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1761:       }
1762:       if (N>=i) {
1763:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1764:       }
1765:       rp[i]                      = bcol;
1766:       ap[bs2*i + bs*cidx + ridx] = value;
1767:       noinsert1:;
1768:       low = i;
1769:     }
1770:     ailen[brow] = nrow;
1771:   }
1772:   return(0);
1773: }


1778: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1779: {
1780:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1781:   Mat            outA;
1783:   PetscTruth     row_identity,col_identity;

1786:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1787:   ISIdentity(row,&row_identity);
1788:   ISIdentity(col,&col_identity);
1789:   if (!row_identity || !col_identity) {
1790:     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1791:   }

1793:   outA          = inA;
1794:   inA->factor   = FACTOR_LU;

1796:   if (!a->diag) {
1797:     MatMarkDiagonal_SeqBAIJ(inA);
1798:   }

1800:   a->row        = row;
1801:   a->col        = col;
1802:   PetscObjectReference((PetscObject)row);
1803:   PetscObjectReference((PetscObject)col);
1804: 
1805:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1806:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1807:   PetscLogObjectParent(inA,a->icol);
1808: 
1809:   /*
1810:       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 
1811:       for ILU(0) factorization with natural ordering
1812:   */
1813:   if (inA->bs < 8) {
1814:     MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1815:   } else {
1816:     if (!a->solve_work) {
1817:       PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);
1818:       PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));
1819:     }
1820:   }

1822:   MatLUFactorNumeric(inA,&outA);

1824:   return(0);
1825: }
1828: PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1829: {
1830:   static PetscTruth called = PETSC_FALSE;
1831:   MPI_Comm          comm = A->comm;
1832:   PetscErrorCode    ierr;

1835:   if (called) {return(0);} else called = PETSC_TRUE;
1836:   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1837:   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");
1838:   return(0);
1839: }

1844: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1845: {
1846:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1847:   PetscInt    i,nz,nbs;

1850:   nz  = baij->maxnz/baij->bs2;
1851:   nbs = baij->nbs;
1852:   for (i=0; i<nz; i++) {
1853:     baij->j[i] = indices[i];
1854:   }
1855:   baij->nz = nz;
1856:   for (i=0; i<nbs; i++) {
1857:     baij->ilen[i] = baij->imax[i];
1858:   }

1860:   return(0);
1861: }

1866: /*@
1867:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1868:        in the matrix.

1870:   Input Parameters:
1871: +  mat - the SeqBAIJ matrix
1872: -  indices - the column indices

1874:   Level: advanced

1876:   Notes:
1877:     This can be called if you have precomputed the nonzero structure of the 
1878:   matrix and want to provide it to the matrix object to improve the performance
1879:   of the MatSetValues() operation.

1881:     You MUST have set the correct numbers of nonzeros per row in the call to 
1882:   MatCreateSeqBAIJ().

1884:     MUST be called before any calls to MatSetValues();

1886: @*/
1887: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1888: {
1889:   PetscErrorCode ierr,(*f)(Mat,PetscInt *);

1894:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1895:   if (f) {
1896:     (*f)(mat,indices);
1897:   } else {
1898:     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1899:   }
1900:   return(0);
1901: }

1905: PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1906: {
1907:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1909:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1910:   PetscReal      atmp;
1911:   PetscScalar    *x,zero = 0.0;
1912:   MatScalar      *aa;
1913:   PetscInt       ncols,brow,krow,kcol;

1916:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1917:   bs   = A->bs;
1918:   aa   = a->a;
1919:   ai   = a->i;
1920:   aj   = a->j;
1921:   mbs = a->mbs;

1923:   VecSet(&zero,v);
1924:   VecGetArray(v,&x);
1925:   VecGetLocalSize(v,&n);
1926:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1927:   for (i=0; i<mbs; i++) {
1928:     ncols = ai[1] - ai[0]; ai++;
1929:     brow  = bs*i;
1930:     for (j=0; j<ncols; j++){
1931:       /* bcol = bs*(*aj); */
1932:       for (kcol=0; kcol<bs; kcol++){
1933:         for (krow=0; krow<bs; krow++){
1934:           atmp = PetscAbsScalar(*aa); aa++;
1935:           row = brow + krow;    /* row index */
1936:           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1937:           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1938:         }
1939:       }
1940:       aj++;
1941:     }
1942:   }
1943:   VecRestoreArray(v,&x);
1944:   return(0);
1945: }

1949: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1950: {

1954:    MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1955:   return(0);
1956: }

1960: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1961: {
1962:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1964:   *array = a->a;
1965:   return(0);
1966: }

1970: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1971: {
1973:   return(0);
1974: }

1976:  #include petscblaslapack.h
1979: PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1980: {
1981:   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1983:   PetscInt       i,bs=Y->bs,j,bs2;
1984:   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;

1987:   if (str == SAME_NONZERO_PATTERN) {
1988:     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1989:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1990:     if (y->xtoy && y->XtoY != X) {
1991:       PetscFree(y->xtoy);
1992:       MatDestroy(y->XtoY);
1993:     }
1994:     if (!y->xtoy) { /* get xtoy */
1995:       MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
1996:       y->XtoY = X;
1997:     }
1998:     bs2 = bs*bs;
1999:     for (i=0; i<x->nz; i++) {
2000:       j = 0;
2001:       while (j < bs2){
2002:         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2003:         j++;
2004:       }
2005:     }
2006:     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2007:   } else {
2008:     MatAXPY_Basic(a,X,Y,str);
2009:   }
2010:   return(0);
2011: }

2013: /* -------------------------------------------------------------------*/
2014: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2015:        MatGetRow_SeqBAIJ,
2016:        MatRestoreRow_SeqBAIJ,
2017:        MatMult_SeqBAIJ_N,
2018: /* 4*/ MatMultAdd_SeqBAIJ_N,
2019:        MatMultTranspose_SeqBAIJ,
2020:        MatMultTransposeAdd_SeqBAIJ,
2021:        MatSolve_SeqBAIJ_N,
2022:        0,
2023:        0,
2024: /*10*/ 0,
2025:        MatLUFactor_SeqBAIJ,
2026:        0,
2027:        0,
2028:        MatTranspose_SeqBAIJ,
2029: /*15*/ MatGetInfo_SeqBAIJ,
2030:        MatEqual_SeqBAIJ,
2031:        MatGetDiagonal_SeqBAIJ,
2032:        MatDiagonalScale_SeqBAIJ,
2033:        MatNorm_SeqBAIJ,
2034: /*20*/ 0,
2035:        MatAssemblyEnd_SeqBAIJ,
2036:        0,
2037:        MatSetOption_SeqBAIJ,
2038:        MatZeroEntries_SeqBAIJ,
2039: /*25*/ MatZeroRows_SeqBAIJ,
2040:        MatLUFactorSymbolic_SeqBAIJ,
2041:        MatLUFactorNumeric_SeqBAIJ_N,
2042:        0,
2043:        0,
2044: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2045:        MatILUFactorSymbolic_SeqBAIJ,
2046:        0,
2047:        MatGetArray_SeqBAIJ,
2048:        MatRestoreArray_SeqBAIJ,
2049: /*35*/ MatDuplicate_SeqBAIJ,
2050:        0,
2051:        0,
2052:        MatILUFactor_SeqBAIJ,
2053:        0,
2054: /*40*/ MatAXPY_SeqBAIJ,
2055:        MatGetSubMatrices_SeqBAIJ,
2056:        MatIncreaseOverlap_SeqBAIJ,
2057:        MatGetValues_SeqBAIJ,
2058:        0,
2059: /*45*/ MatPrintHelp_SeqBAIJ,
2060:        MatScale_SeqBAIJ,
2061:        0,
2062:        0,
2063:        0,
2064: /*50*/ 0,
2065:        MatGetRowIJ_SeqBAIJ,
2066:        MatRestoreRowIJ_SeqBAIJ,
2067:        0,
2068:        0,
2069: /*55*/ 0,
2070:        0,
2071:        0,
2072:        0,
2073:        MatSetValuesBlocked_SeqBAIJ,
2074: /*60*/ MatGetSubMatrix_SeqBAIJ,
2075:        MatDestroy_SeqBAIJ,
2076:        MatView_SeqBAIJ,
2077:        MatGetPetscMaps_Petsc,
2078:        0,
2079: /*65*/ 0,
2080:        0,
2081:        0,
2082:        0,
2083:        0,
2084: /*70*/ MatGetRowMax_SeqBAIJ,
2085:        MatConvert_Basic,
2086:        0,
2087:        0,
2088:        0,
2089: /*75*/ 0,
2090:        0,
2091:        0,
2092:        0,
2093:        0,
2094: /*80*/ 0,
2095:        0,
2096:        0,
2097:        0,
2098:        MatLoad_SeqBAIJ,
2099: /*85*/ 0,
2100:        0,
2101:        0,
2102:        0,
2103:        0,
2104: /*90*/ 0,
2105:        0,
2106:        0,
2107:        0,
2108:        0,
2109: /*95*/ 0,
2110:        0,
2111:        0,
2112:        0};

2117: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2118: {
2119:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2120:   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;

2124:   if (aij->nonew != 1) {
2125:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2126:   }

2128:   /* allocate space for values if not already there */
2129:   if (!aij->saved_values) {
2130:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2131:   }

2133:   /* copy values over */
2134:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2135:   return(0);
2136: }

2142: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2143: {
2144:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2146:   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;

2149:   if (aij->nonew != 1) {
2150:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2151:   }
2152:   if (!aij->saved_values) {
2153:     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2154:   }

2156:   /* copy values over */
2157:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2158:   return(0);
2159: }


2170: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2171: {
2172:   Mat_SeqBAIJ    *b;
2174:   PetscInt       i,len,mbs,nbs,bs2,newbs = bs;
2175:   PetscTruth     flg;


2179:   B->preallocated = PETSC_TRUE;
2180:   PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
2181:   if (nnz && newbs != bs) {
2182:     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2183:   }
2184:   bs   = newbs;

2186:   mbs  = B->m/bs;
2187:   nbs  = B->n/bs;
2188:   bs2  = bs*bs;

2190:   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2191:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2192:   }

2194:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2195:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2196:   if (nnz) {
2197:     for (i=0; i<mbs; i++) {
2198:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2199:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2200:     }
2201:   }

2203:   b       = (Mat_SeqBAIJ*)B->data;
2204:   PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
2205:   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2206:   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2207:   if (!flg) {
2208:     switch (bs) {
2209:     case 1:
2210:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2211:       B->ops->mult            = MatMult_SeqBAIJ_1;
2212:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2213:       break;
2214:     case 2:
2215:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2216:       B->ops->mult            = MatMult_SeqBAIJ_2;
2217:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2218:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2219:       break;
2220:     case 3:
2221:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2222:       B->ops->mult            = MatMult_SeqBAIJ_3;
2223:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2224:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2225:       break;
2226:     case 4:
2227:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2228:       B->ops->mult            = MatMult_SeqBAIJ_4;
2229:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2230:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2231:       break;
2232:     case 5:
2233:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2234:       B->ops->mult            = MatMult_SeqBAIJ_5;
2235:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2236:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2237:       break;
2238:     case 6:
2239:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2240:       B->ops->mult            = MatMult_SeqBAIJ_6;
2241:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2242:       break;
2243:     case 7:
2244:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2245:       B->ops->mult            = MatMult_SeqBAIJ_7;
2246:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2247:       break;
2248:     default:
2249:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2250:       B->ops->mult            = MatMult_SeqBAIJ_N;
2251:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2252:       break;
2253:     }
2254:   }
2255:   B->bs      = bs;
2256:   b->mbs     = mbs;
2257:   b->nbs     = nbs;
2258:   PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);
2259:   if (!nnz) {
2260:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2261:     else if (nz <= 0)        nz = 1;
2262:     for (i=0; i<mbs; i++) b->imax[i] = nz;
2263:     nz = nz*mbs;
2264:   } else {
2265:     nz = 0;
2266:     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2267:   }

2269:   /* allocate the matrix space */
2270:   len   = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
2271:   PetscMalloc(len,&b->a);
2272:   PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2273:   b->j  = (PetscInt*)(b->a + nz*bs2);
2274:   PetscMemzero(b->j,nz*sizeof(PetscInt));
2275:   b->i  = b->j + nz;
2276:   b->singlemalloc = PETSC_TRUE;

2278:   b->i[0] = 0;
2279:   for (i=1; i<mbs+1; i++) {
2280:     b->i[i] = b->i[i-1] + b->imax[i-1];
2281:   }

2283:   /* b->ilen will count nonzeros in each block row so far. */
2284:   PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);
2285:   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2286:   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}

2288:   B->bs               = bs;
2289:   b->bs2              = bs2;
2290:   b->mbs              = mbs;
2291:   b->nz               = 0;
2292:   b->maxnz            = nz*bs2;
2293:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2294:   return(0);
2295: }

2298: /*MC
2299:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 
2300:    block sparse compressed row format.

2302:    Options Database Keys:
2303: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

2305:   Level: beginner

2307: .seealso: MatCreateSeqBAIJ
2308: M*/

2313: PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2314: {
2316:   PetscMPIInt    size;
2317:   Mat_SeqBAIJ    *b;

2320:   MPI_Comm_size(B->comm,&size);
2321:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

2323:   B->m = B->M = PetscMax(B->m,B->M);
2324:   B->n = B->N = PetscMax(B->n,B->N);
2325:   PetscNew(Mat_SeqBAIJ,&b);
2326:   B->data = (void*)b;
2327:   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
2328:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2329:   B->factor           = 0;
2330:   B->lupivotthreshold = 1.0;
2331:   B->mapping          = 0;
2332:   b->row              = 0;
2333:   b->col              = 0;
2334:   b->icol             = 0;
2335:   b->reallocs         = 0;
2336:   b->saved_values     = 0;
2337: #if defined(PETSC_USE_MAT_SINGLE)
2338:   b->setvalueslen     = 0;
2339:   b->setvaluescopy    = PETSC_NULL;
2340: #endif

2342:   PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
2343:   PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);

2345:   b->sorted           = PETSC_FALSE;
2346:   b->roworiented      = PETSC_TRUE;
2347:   b->nonew            = 0;
2348:   b->diag             = 0;
2349:   b->solve_work       = 0;
2350:   b->mult_work        = 0;
2351:   B->spptr            = 0;
2352:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2353:   b->keepzeroedrows   = PETSC_FALSE;
2354:   b->xtoy              = 0;
2355:   b->XtoY              = 0;

2357:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2358:                                      "MatStoreValues_SeqBAIJ",
2359:                                       MatStoreValues_SeqBAIJ);
2360:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2361:                                      "MatRetrieveValues_SeqBAIJ",
2362:                                       MatRetrieveValues_SeqBAIJ);
2363:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2364:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2365:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
2366:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2367:                                      "MatConvert_SeqBAIJ_SeqAIJ",
2368:                                       MatConvert_SeqBAIJ_SeqAIJ);
2369: #if !defined(PETSC_USE_64BIT_INT)
2370:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2371:                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2372:                                       MatConvert_SeqBAIJ_SeqSBAIJ);
2373: #endif
2374:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2375:                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2376:                                       MatSeqBAIJSetPreallocation_SeqBAIJ);
2377:   return(0);
2378: }

2383: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2384: {
2385:   Mat            C;
2386:   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2388:   PetscInt       i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

2391:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

2393:   *B = 0;
2394:   MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
2395:   MatSetType(C,A->type_name);
2396:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
2397:   c    = (Mat_SeqBAIJ*)C->data;

2399:   C->M   = A->M;
2400:   C->N   = A->N;
2401:   C->bs  = A->bs;
2402:   c->bs2 = a->bs2;
2403:   c->mbs = a->mbs;
2404:   c->nbs = a->nbs;

2406:   PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);
2407:   PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);
2408:   for (i=0; i<mbs; i++) {
2409:     c->imax[i] = a->imax[i];
2410:     c->ilen[i] = a->ilen[i];
2411:   }

2413:   /* allocate the matrix space */
2414:   c->singlemalloc = PETSC_TRUE;
2415:   len  = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
2416:   PetscMalloc(len,&c->a);
2417:   c->j = (PetscInt*)(c->a + nz*bs2);
2418:   c->i = c->j + nz;
2419:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
2420:   if (mbs > 0) {
2421:     PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
2422:     if (cpvalues == MAT_COPY_VALUES) {
2423:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2424:     } else {
2425:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2426:     }
2427:   }

2429:   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2430:   c->sorted      = a->sorted;
2431:   c->roworiented = a->roworiented;
2432:   c->nonew       = a->nonew;

2434:   if (a->diag) {
2435:     PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
2436:     PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2437:     for (i=0; i<mbs; i++) {
2438:       c->diag[i] = a->diag[i];
2439:     }
2440:   } else c->diag        = 0;
2441:   c->nz                 = a->nz;
2442:   c->maxnz              = a->maxnz;
2443:   c->solve_work         = 0;
2444:   c->mult_work          = 0;
2445:   C->preallocated       = PETSC_TRUE;
2446:   C->assembled          = PETSC_TRUE;
2447:   *B = C;
2448:   PetscFListDuplicate(A->qlist,&C->qlist);
2449:   return(0);
2450: }

2454: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2455: {
2456:   Mat_SeqBAIJ    *a;
2457:   Mat            B;
2459:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2460:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2461:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2462:   PetscInt       *masked,nmask,tmp,bs2,ishift;
2463:   PetscMPIInt    size;
2464:   int            fd;
2465:   PetscScalar    *aa;
2466:   MPI_Comm       comm = ((PetscObject)viewer)->comm;

2469:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
2470:   bs2  = bs*bs;

2472:   MPI_Comm_size(comm,&size);
2473:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2474:   PetscViewerBinaryGetDescriptor(viewer,&fd);
2475:   PetscBinaryRead(fd,header,4,PETSC_INT);
2476:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2477:   M = header[1]; N = header[2]; nz = header[3];

2479:   if (header[3] < 0) {
2480:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2481:   }

2483:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

2485:   /* 
2486:      This code adds extra rows to make sure the number of rows is 
2487:     divisible by the blocksize
2488:   */
2489:   mbs        = M/bs;
2490:   extra_rows = bs - M + bs*(mbs);
2491:   if (extra_rows == bs) extra_rows = 0;
2492:   else                  mbs++;
2493:   if (extra_rows) {
2494:     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2495:   }

2497:   /* read in row lengths */
2498:   PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
2499:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2500:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

2502:   /* read in column indices */
2503:   PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
2504:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
2505:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

2507:   /* loop over row lengths determining block row lengths */
2508:   PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
2509:   PetscMemzero(browlengths,mbs*sizeof(PetscInt));
2510:   PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
2511:   PetscMemzero(mask,mbs*sizeof(PetscInt));
2512:   masked   = mask + mbs;
2513:   rowcount = 0; nzcount = 0;
2514:   for (i=0; i<mbs; i++) {
2515:     nmask = 0;
2516:     for (j=0; j<bs; j++) {
2517:       kmax = rowlengths[rowcount];
2518:       for (k=0; k<kmax; k++) {
2519:         tmp = jj[nzcount++]/bs;
2520:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2521:       }
2522:       rowcount++;
2523:     }
2524:     browlengths[i] += nmask;
2525:     /* zero out the mask elements we set */
2526:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2527:   }

2529:   /* create our matrix */
2530:   MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2531:   MatSetType(B,type);
2532:   MatSeqBAIJSetPreallocation(B,bs,0,browlengths);
2533:   a = (Mat_SeqBAIJ*)B->data;

2535:   /* set matrix "i" values */
2536:   a->i[0] = 0;
2537:   for (i=1; i<= mbs; i++) {
2538:     a->i[i]      = a->i[i-1] + browlengths[i-1];
2539:     a->ilen[i-1] = browlengths[i-1];
2540:   }
2541:   a->nz         = 0;
2542:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

2544:   /* read in nonzero values */
2545:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
2546:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
2547:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

2549:   /* set "a" and "j" values into matrix */
2550:   nzcount = 0; jcount = 0;
2551:   for (i=0; i<mbs; i++) {
2552:     nzcountb = nzcount;
2553:     nmask    = 0;
2554:     for (j=0; j<bs; j++) {
2555:       kmax = rowlengths[i*bs+j];
2556:       for (k=0; k<kmax; k++) {
2557:         tmp = jj[nzcount++]/bs;
2558:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2559:       }
2560:     }
2561:     /* sort the masked values */
2562:     PetscSortInt(nmask,masked);

2564:     /* set "j" values into matrix */
2565:     maskcount = 1;
2566:     for (j=0; j<nmask; j++) {
2567:       a->j[jcount++]  = masked[j];
2568:       mask[masked[j]] = maskcount++;
2569:     }
2570:     /* set "a" values into matrix */
2571:     ishift = bs2*a->i[i];
2572:     for (j=0; j<bs; j++) {
2573:       kmax = rowlengths[i*bs+j];
2574:       for (k=0; k<kmax; k++) {
2575:         tmp       = jj[nzcountb]/bs ;
2576:         block     = mask[tmp] - 1;
2577:         point     = jj[nzcountb] - bs*tmp;
2578:         idx       = ishift + bs2*block + j + bs*point;
2579:         a->a[idx] = (MatScalar)aa[nzcountb++];
2580:       }
2581:     }
2582:     /* zero out the mask elements we set */
2583:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2584:   }
2585:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

2587:   PetscFree(rowlengths);
2588:   PetscFree(browlengths);
2589:   PetscFree(aa);
2590:   PetscFree(jj);
2591:   PetscFree(mask);

2593:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2594:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2595:   MatView_Private(B);

2597:   *A = B;
2598:   return(0);
2599: }

2603: /*@C
2604:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2605:    compressed row) format.  For good matrix assembly performance the
2606:    user should preallocate the matrix storage by setting the parameter nz
2607:    (or the array nnz).  By setting these parameters accurately, performance
2608:    during matrix assembly can be increased by more than a factor of 50.

2610:    Collective on MPI_Comm

2612:    Input Parameters:
2613: +  comm - MPI communicator, set to PETSC_COMM_SELF
2614: .  bs - size of block
2615: .  m - number of rows
2616: .  n - number of columns
2617: .  nz - number of nonzero blocks  per block row (same for all rows)
2618: -  nnz - array containing the number of nonzero blocks in the various block rows 
2619:          (possibly different for each block row) or PETSC_NULL

2621:    Output Parameter:
2622: .  A - the matrix 

2624:    Options Database Keys:
2625: .   -mat_no_unroll - uses code that does not unroll the loops in the 
2626:                      block calculations (much slower)
2627: .    -mat_block_size - size of the blocks to use

2629:    Level: intermediate

2631:    Notes:
2632:    If the nnz parameter is given then the nz parameter is ignored

2634:    A nonzero block is any block that as 1 or more nonzeros in it

2636:    The block AIJ format is fully compatible with standard Fortran 77
2637:    storage.  That is, the stored row and column indices can begin at
2638:    either one (as in Fortran) or zero.  See the users' manual for details.

2640:    Specify the preallocated storage with either nz or nnz (not both).
2641:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2642:    allocation.  For additional details, see the users manual chapter on
2643:    matrices.

2645: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2646: @*/
2647: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2648: {
2650: 
2652:   MatCreate(comm,m,n,m,n,A);
2653:   MatSetType(*A,MATSEQBAIJ);
2654:   MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
2655:   return(0);
2656: }

2660: /*@C
2661:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2662:    per row in the matrix. For good matrix assembly performance the
2663:    user should preallocate the matrix storage by setting the parameter nz
2664:    (or the array nnz).  By setting these parameters accurately, performance
2665:    during matrix assembly can be increased by more than a factor of 50.

2667:    Collective on MPI_Comm

2669:    Input Parameters:
2670: +  A - the matrix
2671: .  bs - size of block
2672: .  nz - number of block nonzeros per block row (same for all rows)
2673: -  nnz - array containing the number of block nonzeros in the various block rows 
2674:          (possibly different for each block row) or PETSC_NULL

2676:    Options Database Keys:
2677: .   -mat_no_unroll - uses code that does not unroll the loops in the 
2678:                      block calculations (much slower)
2679: .    -mat_block_size - size of the blocks to use

2681:    Level: intermediate

2683:    Notes:
2684:    If the nnz parameter is given then the nz parameter is ignored

2686:    The block AIJ format is fully compatible with standard Fortran 77
2687:    storage.  That is, the stored row and column indices can begin at
2688:    either one (as in Fortran) or zero.  See the users' manual for details.

2690:    Specify the preallocated storage with either nz or nnz (not both).
2691:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2692:    allocation.  For additional details, see the users manual chapter on
2693:    matrices.

2695: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2696: @*/
2697: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2698: {
2699:   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);

2702:   PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
2703:   if (f) {
2704:     (*f)(B,bs,nz,nnz);
2705:   }
2706:   return(0);
2707: }