Actual source code: baij.c

  1: /*$Id: baij.c,v 1.225 2001/04/10 19:35:34 bsmith Exp $*/

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include petscsys.h
 8:  #include src/mat/impls/baij/seq/baij.h
 9:  #include src/vec/vecimpl.h
 10:  #include src/inline/spops.h

 12: /*  UGLY, ugly, ugly
 13:    When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 
 14:    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 
 15:    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
 16:    converts the entries into single precision and then calls ..._MatScalar() to put them
 17:    into the single precision data structures.
 18: */
 19: #if defined(PETSC_USE_MAT_SINGLE)
 20: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
 21: #else
 22: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
 23: #endif

 25: #define CHUNKSIZE  10

 27: /*
 28:      Checks for missing diagonals
 29: */
 30: int MatMissingDiagonal_SeqBAIJ(Mat A)
 31: {
 32:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 33:   int         *diag,*jj = a->j,i,ierr;

 36:   MatMarkDiagonal_SeqBAIJ(A);
 37:   diag = a->diag;
 38:   for (i=0; i<a->mbs; i++) {
 39:     if (jj[diag[i]] != i) {
 40:       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
 41:     }
 42:   }
 43:   return(0);
 44: }

 46: int MatMarkDiagonal_SeqBAIJ(Mat A)
 47: {
 48:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 49:   int         i,j,*diag,m = a->mbs,ierr;

 52:   if (a->diag) return(0);

 54:   PetscMalloc((m+1)*sizeof(int),&diag);
 55:   PetscLogObjectMemory(A,(m+1)*sizeof(int));
 56:   for (i=0; i<m; i++) {
 57:     diag[i] = a->i[i+1];
 58:     for (j=a->i[i]; j<a->i[i+1]; j++) {
 59:       if (a->j[j] == i) {
 60:         diag[i] = j;
 61:         break;
 62:       }
 63:     }
 64:   }
 65:   a->diag = diag;
 66:   return(0);
 67: }


 70: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);

 72: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
 73: {
 74:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 75:   int         ierr,n = a->mbs,i;

 78:   *nn = n;
 79:   if (!ia) return(0);
 80:   if (symmetric) {
 81:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
 82:   } else if (oshift == 1) {
 83:     /* temporarily add 1 to i and j indices */
 84:     int nz = a->i[n];
 85:     for (i=0; i<nz; i++) a->j[i]++;
 86:     for (i=0; i<n+1; i++) a->i[i]++;
 87:     *ia = a->i; *ja = a->j;
 88:   } else {
 89:     *ia = a->i; *ja = a->j;
 90:   }

 92:   return(0);
 93: }

 95: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
 96: {
 97:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 98:   int         i,n = a->mbs,ierr;

101:   if (!ia) return(0);
102:   if (symmetric) {
103:     PetscFree(*ia);
104:     PetscFree(*ja);
105:   } else if (oshift == 1) {
106:     int nz = a->i[n]-1;
107:     for (i=0; i<nz; i++) a->j[i]--;
108:     for (i=0; i<n+1; i++) a->i[i]--;
109:   }
110:   return(0);
111: }

113: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
114: {
115:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;

118:   *bs = baij->bs;
119:   return(0);
120: }


123: int MatDestroy_SeqBAIJ(Mat A)
124: {
125:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
126:   int         ierr;

129: #if defined(PETSC_USE_LOG)
130:   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
131: #endif
132:   PetscFree(a->a);
133:   if (!a->singlemalloc) {
134:     PetscFree(a->i);
135:     PetscFree(a->j);
136:   }
137:   if (a->row) {
138:     ISDestroy(a->row);
139:   }
140:   if (a->col) {
141:     ISDestroy(a->col);
142:   }
143:   if (a->diag) {PetscFree(a->diag);}
144:   if (a->ilen) {PetscFree(a->ilen);}
145:   if (a->imax) {PetscFree(a->imax);}
146:   if (a->solve_work) {PetscFree(a->solve_work);}
147:   if (a->mult_work) {PetscFree(a->mult_work);}
148:   if (a->icol) {ISDestroy(a->icol);}
149:   if (a->saved_values) {PetscFree(a->saved_values);}
150: #if defined(PETSC_USE_MAT_SINGLE)
151:   if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
152: #endif
153:   PetscFree(a);
154:   return(0);
155: }

157: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
158: {
159:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

162:   if      (op == MAT_ROW_ORIENTED)                 a->roworiented    = PETSC_TRUE;
163:   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented    = PETSC_FALSE;
164:   else if (op == MAT_COLUMNS_SORTED)               a->sorted         = PETSC_TRUE;
165:   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted         = PETSC_FALSE;
166:   else if (op == MAT_KEEP_ZEROED_ROWS)             a->keepzeroedrows = PETSC_TRUE;
167:   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew          = 1;
168:   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew          = -1;
169:   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew          = -2;
170:   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew          = 0;
171:   else if (op == MAT_ROWS_SORTED ||
172:            op == MAT_ROWS_UNSORTED ||
173:            op == MAT_YES_NEW_DIAGONALS ||
174:            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
175:            op == MAT_USE_HASH_TABLE) {
176:     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignoredn");
177:   } else if (op == MAT_NO_NEW_DIAGONALS) {
178:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
179:   } else {
180:     SETERRQ(PETSC_ERR_SUP,"unknown option");
181:   }
182:   return(0);
183: }

185: int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
186: {
188:   if (m) *m = 0;
189:   if (n) *n = A->m;
190:   return(0);
191: }

193: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
194: {
195:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
196:   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
197:   MatScalar    *aa,*aa_i;
198:   Scalar       *v_i;

201:   bs  = a->bs;
202:   ai  = a->i;
203:   aj  = a->j;
204:   aa  = a->a;
205:   bs2 = a->bs2;
206: 
207:   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
208: 
209:   bn  = row/bs;   /* Block number */
210:   bp  = row % bs; /* Block Position */
211:   M   = ai[bn+1] - ai[bn];
212:   *nz = bs*M;
213: 
214:   if (v) {
215:     *v = 0;
216:     if (*nz) {
217:       PetscMalloc((*nz)*sizeof(Scalar),v);
218:       for (i=0; i<M; i++) { /* for each block in the block row */
219:         v_i  = *v + i*bs;
220:         aa_i = aa + bs2*(ai[bn] + i);
221:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
222:       }
223:     }
224:   }

226:   if (idx) {
227:     *idx = 0;
228:     if (*nz) {
229:       PetscMalloc((*nz)*sizeof(int),idx);
230:       for (i=0; i<M; i++) { /* for each block in the block row */
231:         idx_i = *idx + i*bs;
232:         itmp  = bs*aj[ai[bn] + i];
233:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
234:       }
235:     }
236:   }
237:   return(0);
238: }

240: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
241: {

245:   if (idx) {if (*idx) {PetscFree(*idx);}}
246:   if (v)   {if (*v)   {PetscFree(*v);}}
247:   return(0);
248: }

250: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
251: {
252:   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
253:   Mat         C;
254:   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
255:   int         *rows,*cols,bs2=a->bs2;
256:   Scalar      *array;

259:   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
260:   PetscMalloc((1+nbs)*sizeof(int),&col);
261:   PetscMemzero(col,(1+nbs)*sizeof(int));

263: #if defined(PETSC_USE_MAT_SINGLE)
264:   PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);
265:   for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i];
266: #else
267:   array = a->a;
268: #endif

270:   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
271:   MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);
272:   PetscFree(col);
273:   PetscMalloc(2*bs*sizeof(int),&rows);
274:   cols = rows + bs;
275:   for (i=0; i<mbs; i++) {
276:     cols[0] = i*bs;
277:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
278:     len = ai[i+1] - ai[i];
279:     for (j=0; j<len; j++) {
280:       rows[0] = (*aj++)*bs;
281:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
282:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
283:       array += bs2;
284:     }
285:   }
286:   PetscFree(rows);
287: #if defined(PETSC_USE_MAT_SINGLE)
288:   PetscFree(array);
289: #endif
290: 
291:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
292:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
293: 
294:   if (B) {
295:     *B = C;
296:   } else {
297:     MatHeaderCopy(A,C);
298:   }
299:   return(0);
300: }

302: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
303: {
304:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
305:   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
306:   Scalar      *aa;
307:   FILE        *file;

310:   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);
311:   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);
312:   col_lens[0] = MAT_COOKIE;

314:   col_lens[1] = A->m;
315:   col_lens[2] = A->n;
316:   col_lens[3] = a->nz*bs2;

318:   /* store lengths of each row and write (including header) to file */
319:   count = 0;
320:   for (i=0; i<a->mbs; i++) {
321:     for (j=0; j<bs; j++) {
322:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
323:     }
324:   }
325:   PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
326:   PetscFree(col_lens);

328:   /* store column indices (zero start index) */
329:   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
330:   count = 0;
331:   for (i=0; i<a->mbs; i++) {
332:     for (j=0; j<bs; j++) {
333:       for (k=a->i[i]; k<a->i[i+1]; k++) {
334:         for (l=0; l<bs; l++) {
335:           jj[count++] = bs*a->j[k] + l;
336:         }
337:       }
338:     }
339:   }
340:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
341:   PetscFree(jj);

343:   /* store nonzero values */
344:   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);
345:   count = 0;
346:   for (i=0; i<a->mbs; i++) {
347:     for (j=0; j<bs; j++) {
348:       for (k=a->i[i]; k<a->i[i+1]; k++) {
349:         for (l=0; l<bs; l++) {
350:           aa[count++] = a->a[bs2*k + l*bs + j];
351:         }
352:       }
353:     }
354:   }
355:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
356:   PetscFree(aa);

358:   PetscViewerBinaryGetInfoPointer(viewer,&file);
359:   if (file) {
360:     fprintf(file,"-matload_block_size %dn",a->bs);
361:   }
362:   return(0);
363: }

365: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
366: {
367:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
368:   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
369:   PetscViewerFormat format;

372:   PetscViewerGetFormat(viewer,&format);
373:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
374:     PetscViewerASCIIPrintf(viewer,"  block size is %dn",bs);
375:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
376:     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
377:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
378:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
379:     for (i=0; i<a->mbs; i++) {
380:       for (j=0; j<bs; j++) {
381:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
382:         for (k=a->i[i]; k<a->i[i+1]; k++) {
383:           for (l=0; l<bs; l++) {
384: #if defined(PETSC_USE_COMPLEX)
385:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
386:               PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
387:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
388:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
389:               PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
390:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
391:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
392:               PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
393:             }
394: #else
395:             if (a->a[bs2*k + l*bs + j] != 0.0) {
396:               PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
397:             }
398: #endif
399:           }
400:         }
401:         PetscViewerASCIIPrintf(viewer,"n");
402:       }
403:     }
404:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
405:   } else {
406:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
407:     for (i=0; i<a->mbs; i++) {
408:       for (j=0; j<bs; j++) {
409:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
410:         for (k=a->i[i]; k<a->i[i+1]; k++) {
411:           for (l=0; l<bs; l++) {
412: #if defined(PETSC_USE_COMPLEX)
413:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
414:               PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
415:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
416:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
417:               PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
418:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
419:             } else {
420:               PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
421:             }
422: #else
423:             PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
424: #endif
425:           }
426:         }
427:         PetscViewerASCIIPrintf(viewer,"n");
428:       }
429:     }
430:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
431:   }
432:   PetscViewerFlush(viewer);
433:   return(0);
434: }

436: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
437: {
438:   Mat          A = (Mat) Aa;
439:   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
440:   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
441:   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
442:   MatScalar    *aa;
443:   PetscViewer  viewer;


447:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
448:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

450:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

452:   /* loop over matrix elements drawing boxes */
453:   color = PETSC_DRAW_BLUE;
454:   for (i=0,row=0; i<mbs; i++,row+=bs) {
455:     for (j=a->i[i]; j<a->i[i+1]; j++) {
456:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
457:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
458:       aa = a->a + j*bs2;
459:       for (k=0; k<bs; k++) {
460:         for (l=0; l<bs; l++) {
461:           if (PetscRealPart(*aa++) >=  0.) continue;
462:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
463:         }
464:       }
465:     }
466:   }
467:   color = PETSC_DRAW_CYAN;
468:   for (i=0,row=0; i<mbs; i++,row+=bs) {
469:     for (j=a->i[i]; j<a->i[i+1]; j++) {
470:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
471:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
472:       aa = a->a + j*bs2;
473:       for (k=0; k<bs; k++) {
474:         for (l=0; l<bs; l++) {
475:           if (PetscRealPart(*aa++) != 0.) continue;
476:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
477:         }
478:       }
479:     }
480:   }

482:   color = PETSC_DRAW_RED;
483:   for (i=0,row=0; i<mbs; i++,row+=bs) {
484:     for (j=a->i[i]; j<a->i[i+1]; j++) {
485:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
486:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
487:       aa = a->a + j*bs2;
488:       for (k=0; k<bs; k++) {
489:         for (l=0; l<bs; l++) {
490:           if (PetscRealPart(*aa++) <= 0.) continue;
491:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
492:         }
493:       }
494:     }
495:   }
496:   return(0);
497: }

499: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
500: {
501:   int          ierr;
502:   PetscReal    xl,yl,xr,yr,w,h;
503:   PetscDraw    draw;
504:   PetscTruth   isnull;


508:   PetscViewerDrawGetDraw(viewer,0,&draw);
509:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

511:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
512:   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
513:   xr += w;    yr += h;  xl = -w;     yl = -h;
514:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
515:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
516:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
517:   return(0);
518: }

520: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
521: {
522:   int        ierr;
523:   PetscTruth issocket,isascii,isbinary,isdraw;

526:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
527:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
528:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
529:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
530:   if (issocket) {
531:     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
532:   } else if (isascii){
533:     MatView_SeqBAIJ_ASCII(A,viewer);
534:   } else if (isbinary) {
535:     MatView_SeqBAIJ_Binary(A,viewer);
536:   } else if (isdraw) {
537:     MatView_SeqBAIJ_Draw(A,viewer);
538:   } else {
539:     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
540:   }
541:   return(0);
542: }


545: int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
546: {
547:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
548:   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
549:   int        *ai = a->i,*ailen = a->ilen;
550:   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
551:   MatScalar  *ap,*aa = a->a,zero = 0.0;

554:   for (k=0; k<m; k++) { /* loop over rows */
555:     row  = im[k]; brow = row/bs;
556:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
557:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
558:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
559:     nrow = ailen[brow];
560:     for (l=0; l<n; l++) { /* loop over columns */
561:       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
562:       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
563:       col  = in[l] ;
564:       bcol = col/bs;
565:       cidx = col%bs;
566:       ridx = row%bs;
567:       high = nrow;
568:       low  = 0; /* assume unsorted */
569:       while (high-low > 5) {
570:         t = (low+high)/2;
571:         if (rp[t] > bcol) high = t;
572:         else             low  = t;
573:       }
574:       for (i=low; i<high; i++) {
575:         if (rp[i] > bcol) break;
576:         if (rp[i] == bcol) {
577:           *v++ = ap[bs2*i+bs*cidx+ridx];
578:           goto finished;
579:         }
580:       }
581:       *v++ = zero;
582:       finished:;
583:     }
584:   }
585:   return(0);
586: }

588: #if defined(PETSC_USE_MAT_SINGLE)
589: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
590: {
591:   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
592:   int         ierr,i,N = m*n*b->bs2;
593:   MatScalar   *vsingle;

596:   if (N > b->setvalueslen) {
597:     if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
598:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
599:     b->setvalueslen  = N;
600:   }
601:   vsingle = b->setvaluescopy;
602:   for (i=0; i<N; i++) {
603:     vsingle[i] = v[i];
604:   }
605:   MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
606:   return(0);
607: }
608: #endif


611: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
612: {
613:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
614:   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
615:   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
616:   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
617:   PetscTruth  roworiented=a->roworiented;
618:   MatScalar   *value = v,*ap,*aa = a->a,*bap;

621:   if (roworiented) {
622:     stepval = (n-1)*bs;
623:   } else {
624:     stepval = (m-1)*bs;
625:   }
626:   for (k=0; k<m; k++) { /* loop over added rows */
627:     row  = im[k];
628:     if (row < 0) continue;
629: #if defined(PETSC_USE_BOPT_g)  
630:     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
631: #endif
632:     rp   = aj + ai[row];
633:     ap   = aa + bs2*ai[row];
634:     rmax = imax[row];
635:     nrow = ailen[row];
636:     low  = 0;
637:     for (l=0; l<n; l++) { /* loop over added columns */
638:       if (in[l] < 0) continue;
639: #if defined(PETSC_USE_BOPT_g)  
640:       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
641: #endif
642:       col = in[l];
643:       if (roworiented) {
644:         value = v + k*(stepval+bs)*bs + l*bs;
645:       } else {
646:         value = v + l*(stepval+bs)*bs + k*bs;
647:       }
648:       if (!sorted) low = 0; high = nrow;
649:       while (high-low > 7) {
650:         t = (low+high)/2;
651:         if (rp[t] > col) high = t;
652:         else             low  = t;
653:       }
654:       for (i=low; i<high; i++) {
655:         if (rp[i] > col) break;
656:         if (rp[i] == col) {
657:           bap  = ap +  bs2*i;
658:           if (roworiented) {
659:             if (is == ADD_VALUES) {
660:               for (ii=0; ii<bs; ii++,value+=stepval) {
661:                 for (jj=ii; jj<bs2; jj+=bs) {
662:                   bap[jj] += *value++;
663:                 }
664:               }
665:             } else {
666:               for (ii=0; ii<bs; ii++,value+=stepval) {
667:                 for (jj=ii; jj<bs2; jj+=bs) {
668:                   bap[jj] = *value++;
669:                 }
670:               }
671:             }
672:           } else {
673:             if (is == ADD_VALUES) {
674:               for (ii=0; ii<bs; ii++,value+=stepval) {
675:                 for (jj=0; jj<bs; jj++) {
676:                   *bap++ += *value++;
677:                 }
678:               }
679:             } else {
680:               for (ii=0; ii<bs; ii++,value+=stepval) {
681:                 for (jj=0; jj<bs; jj++) {
682:                   *bap++  = *value++;
683:                 }
684:               }
685:             }
686:           }
687:           goto noinsert2;
688:         }
689:       }
690:       if (nonew == 1) goto noinsert2;
691:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
692:       if (nrow >= rmax) {
693:         /* there is no extra room in row, therefore enlarge */
694:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
695:         MatScalar *new_a;

697:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");

699:         /* malloc new storage space */
700:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
701:         ierr    = PetscMalloc(len,&new_a);
702:         new_j   = (int*)(new_a + bs2*new_nz);
703:         new_i   = new_j + new_nz;

705:         /* copy over old data into new slots */
706:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
707:         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
708:         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
709:         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
710:         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
711:         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
712:         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
713:         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
714:         /* free up old matrix storage */
715:         PetscFree(a->a);
716:         if (!a->singlemalloc) {
717:           PetscFree(a->i);
718:           PetscFree(a->j);
719:         }
720:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
721:         a->singlemalloc = PETSC_TRUE;

723:         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
724:         rmax = imax[row] = imax[row] + CHUNKSIZE;
725:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
726:         a->maxnz += bs2*CHUNKSIZE;
727:         a->reallocs++;
728:         a->nz++;
729:       }
730:       N = nrow++ - 1;
731:       /* shift up all the later entries in this row */
732:       for (ii=N; ii>=i; ii--) {
733:         rp[ii+1] = rp[ii];
734:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
735:       }
736:       if (N >= i) {
737:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
738:       }
739:       rp[i] = col;
740:       bap   = ap +  bs2*i;
741:       if (roworiented) {
742:         for (ii=0; ii<bs; ii++,value+=stepval) {
743:           for (jj=ii; jj<bs2; jj+=bs) {
744:             bap[jj] = *value++;
745:           }
746:         }
747:       } else {
748:         for (ii=0; ii<bs; ii++,value+=stepval) {
749:           for (jj=0; jj<bs; jj++) {
750:             *bap++  = *value++;
751:           }
752:         }
753:       }
754:       noinsert2:;
755:       low = i;
756:     }
757:     ailen[row] = nrow;
758:   }
759:   return(0);
760: }


763: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
764: {
765:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
766:   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
767:   int        m = A->m,*ip,N,*ailen = a->ilen;
768:   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
769:   MatScalar  *aa = a->a,*ap;

772:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

774:   if (m) rmax = ailen[0];
775:   for (i=1; i<mbs; i++) {
776:     /* move each row back by the amount of empty slots (fshift) before it*/
777:     fshift += imax[i-1] - ailen[i-1];
778:     rmax   = PetscMax(rmax,ailen[i]);
779:     if (fshift) {
780:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
781:       N = ailen[i];
782:       for (j=0; j<N; j++) {
783:         ip[j-fshift] = ip[j];
784:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
785:       }
786:     }
787:     ai[i] = ai[i-1] + ailen[i-1];
788:   }
789:   if (mbs) {
790:     fshift += imax[mbs-1] - ailen[mbs-1];
791:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
792:   }
793:   /* reset ilen and imax for each row */
794:   for (i=0; i<mbs; i++) {
795:     ailen[i] = imax[i] = ai[i+1] - ai[i];
796:   }
797:   a->nz = ai[mbs];

799:   /* diagonals may have moved, so kill the diagonal pointers */
800:   if (fshift && a->diag) {
801:     PetscFree(a->diag);
802:     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
803:     a->diag = 0;
804:   }
805:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d usedn",
806:            m,A->n,a->bs,fshift*bs2,a->nz*bs2);
807:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %dn",
808:            a->reallocs);
809:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %dn",rmax);
810:   a->reallocs          = 0;
811:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

813:   return(0);
814: }



818: /* 
819:    This function returns an array of flags which indicate the locations of contiguous
820:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
821:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
822:    Assume: sizes should be long enough to hold all the values.
823: */
824: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
825: {
826:   int        i,j,k,row;
827:   PetscTruth flg;

830:   for (i=0,j=0; i<n; j++) {
831:     row = idx[i];
832:     if (row%bs!=0) { /* Not the begining of a block */
833:       sizes[j] = 1;
834:       i++;
835:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
836:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
837:       i++;
838:     } else { /* Begining of the block, so check if the complete block exists */
839:       flg = PETSC_TRUE;
840:       for (k=1; k<bs; k++) {
841:         if (row+k != idx[i+k]) { /* break in the block */
842:           flg = PETSC_FALSE;
843:           break;
844:         }
845:       }
846:       if (flg == PETSC_TRUE) { /* No break in the bs */
847:         sizes[j] = bs;
848:         i+= bs;
849:       } else {
850:         sizes[j] = 1;
851:         i++;
852:       }
853:     }
854:   }
855:   *bs_max = j;
856:   return(0);
857: }
858: 
859: int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
860: {
861:   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
862:   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
863:   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
864:   Scalar      zero = 0.0;
865:   MatScalar   *aa;

868:   /* Make a copy of the IS and  sort it */
869:   ISGetLocalSize(is,&is_n);
870:   ISGetIndices(is,&is_idx);

872:   /* allocate memory for rows,sizes */
873:   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);
874:   sizes = rows + is_n;

876:   /* copy IS values to rows, and sort them */
877:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
878:   PetscSortInt(is_n,rows);
879:   if (baij->keepzeroedrows) {
880:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
881:     bs_max = is_n;
882:   } else {
883:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
884:   }
885:   ISRestoreIndices(is,&is_idx);

887:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
888:     row   = rows[j];
889:     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
890:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
891:     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
892:     if (sizes[i] == bs && !baij->keepzeroedrows) {
893:       if (diag) {
894:         if (baij->ilen[row/bs] > 0) {
895:           baij->ilen[row/bs]       = 1;
896:           baij->j[baij->i[row/bs]] = row/bs;
897:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
898:         }
899:         /* Now insert all the diagonal values for this bs */
900:         for (k=0; k<bs; k++) {
901:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
902:         }
903:       } else { /* (!diag) */
904:         baij->ilen[row/bs] = 0;
905:       } /* end (!diag) */
906:     } else { /* (sizes[i] != bs) */
907: #if defined (PETSC_USE_DEBUG)
908:       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
909: #endif
910:       for (k=0; k<count; k++) {
911:         aa[0] =  zero;
912:         aa    += bs;
913:       }
914:       if (diag) {
915:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
916:       }
917:     }
918:   }

920:   PetscFree(rows);
921:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
922:   return(0);
923: }

925: int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
926: {
927:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
928:   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
929:   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
930:   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
931:   int         ridx,cidx,bs2=a->bs2,ierr;
932:   PetscTruth  roworiented=a->roworiented;
933:   MatScalar   *ap,value,*aa=a->a,*bap;

936:   for (k=0; k<m; k++) { /* loop over added rows */
937:     row  = im[k]; brow = row/bs;
938:     if (row < 0) continue;
939: #if defined(PETSC_USE_BOPT_g)  
940:     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
941: #endif
942:     rp   = aj + ai[brow];
943:     ap   = aa + bs2*ai[brow];
944:     rmax = imax[brow];
945:     nrow = ailen[brow];
946:     low  = 0;
947:     for (l=0; l<n; l++) { /* loop over added columns */
948:       if (in[l] < 0) continue;
949: #if defined(PETSC_USE_BOPT_g)  
950:       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
951: #endif
952:       col = in[l]; bcol = col/bs;
953:       ridx = row % bs; cidx = col % bs;
954:       if (roworiented) {
955:         value = v[l + k*n];
956:       } else {
957:         value = v[k + l*m];
958:       }
959:       if (!sorted) low = 0; high = nrow;
960:       while (high-low > 7) {
961:         t = (low+high)/2;
962:         if (rp[t] > bcol) high = t;
963:         else              low  = t;
964:       }
965:       for (i=low; i<high; i++) {
966:         if (rp[i] > bcol) break;
967:         if (rp[i] == bcol) {
968:           bap  = ap +  bs2*i + bs*cidx + ridx;
969:           if (is == ADD_VALUES) *bap += value;
970:           else                  *bap  = value;
971:           goto noinsert1;
972:         }
973:       }
974:       if (nonew == 1) goto noinsert1;
975:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
976:       if (nrow >= rmax) {
977:         /* there is no extra room in row, therefore enlarge */
978:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
979:         MatScalar *new_a;

981:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");

983:         /* Malloc new storage space */
984:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
985:         ierr    = PetscMalloc(len,&new_a);
986:         new_j   = (int*)(new_a + bs2*new_nz);
987:         new_i   = new_j + new_nz;

989:         /* copy over old data into new slots */
990:         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
991:         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
992:         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
993:         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
994:         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
995:         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
996:         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
997:         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
998:         /* free up old matrix storage */
999:         PetscFree(a->a);
1000:         if (!a->singlemalloc) {
1001:           PetscFree(a->i);
1002:           PetscFree(a->j);
1003:         }
1004:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1005:         a->singlemalloc = PETSC_TRUE;

1007:         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1008:         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1009:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1010:         a->maxnz += bs2*CHUNKSIZE;
1011:         a->reallocs++;
1012:         a->nz++;
1013:       }
1014:       N = nrow++ - 1;
1015:       /* shift up all the later entries in this row */
1016:       for (ii=N; ii>=i; ii--) {
1017:         rp[ii+1] = rp[ii];
1018:         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1019:       }
1020:       if (N>=i) {
1021:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1022:       }
1023:       rp[i]                      = bcol;
1024:       ap[bs2*i + bs*cidx + ridx] = value;
1025:       noinsert1:;
1026:       low = i;
1027:     }
1028:     ailen[brow] = nrow;
1029:   }
1030:   return(0);
1031: }

1033: EXTERN int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,MatLUInfo*,Mat*);
1034: EXTERN int MatLUFactor_SeqBAIJ(Mat,IS,IS,MatLUInfo*);
1035: EXTERN int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1036: EXTERN int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1037: EXTERN int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1038: EXTERN int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
1039: EXTERN int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1040: EXTERN int MatScale_SeqBAIJ(Scalar*,Mat);
1041: EXTERN int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
1042: EXTERN int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
1043: EXTERN int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1044: EXTERN int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1045: EXTERN int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1046: EXTERN int MatZeroEntries_SeqBAIJ(Mat);

1048: EXTERN int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1049: EXTERN int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1050: EXTERN int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1051: EXTERN int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1052: EXTERN int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1053: EXTERN int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1054: EXTERN int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1055: EXTERN int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1056: EXTERN int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec);
1057: EXTERN int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec);
1058: EXTERN int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec);
1059: EXTERN int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec);
1060: EXTERN int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec);
1061: EXTERN int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec);
1062: EXTERN int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec);

1064: EXTERN int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1065: EXTERN int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1066: EXTERN int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1067: EXTERN int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1068: EXTERN int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1069: EXTERN int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1070: EXTERN int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);

1072: EXTERN int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1073: EXTERN int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1074: EXTERN int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1075: EXTERN int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1076: EXTERN int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1077: EXTERN int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1078: EXTERN int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1079: EXTERN int MatMult_SeqBAIJ_N(Mat,Vec,Vec);

1081: EXTERN int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1082: EXTERN int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1083: EXTERN int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1084: EXTERN int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1085: EXTERN int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1086: EXTERN int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1087: EXTERN int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1088: EXTERN int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);

1090: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1091: {
1092:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1093:   Mat         outA;
1094:   int         ierr;
1095:   PetscTruth  row_identity,col_identity;

1098:   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1099:   ISIdentity(row,&row_identity);
1100:   ISIdentity(col,&col_identity);
1101:   if (!row_identity || !col_identity) {
1102:     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1103:   }

1105:   outA          = inA;
1106:   inA->factor   = FACTOR_LU;

1108:   if (!a->diag) {
1109:     MatMarkDiagonal_SeqBAIJ(inA);
1110:   }
1111:   /*
1112:       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 
1113:       for ILU(0) factorization with natural ordering
1114:   */
1115:   switch (a->bs) {
1116:   case 1:
1117:     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1118:     inA->ops->solve           = MatSolve_SeqBAIJ_1_NaturalOrdering;
1119:     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering;
1120:     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=1n");
1121:   case 2:
1122:     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1123:     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1124:     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1125:     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2n");
1126:     break;
1127:   case 3:
1128:     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1129:     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1130:     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering;
1131:     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3n");
1132:     break;
1133:   case 4:
1134:     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1135:     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1136:     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering;
1137:     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4n");
1138:     break;
1139:   case 5:
1140:     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1141:     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1142:     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering;
1143:     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5n");
1144:     break;
1145:   case 6:
1146:     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1147:     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1148:     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering;
1149:     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6n");
1150:     break;
1151:   case 7:
1152:     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1153:     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering;
1154:     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1155:     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7n");
1156:     break;
1157:   default:
1158:     a->row        = row;
1159:     a->col        = col;
1160:     ierr          = PetscObjectReference((PetscObject)row);
1161:     ierr          = PetscObjectReference((PetscObject)col);

1163:     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1164:     ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1165:     PetscLogObjectParent(inA,a->icol);

1167:     if (!a->solve_work) {
1168:       PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);
1169:       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar));
1170:     }
1171:   }

1173:   MatLUFactorNumeric(inA,&outA);

1175:   return(0);
1176: }
1177: int MatPrintHelp_SeqBAIJ(Mat A)
1178: {
1179:   static PetscTruth called = PETSC_FALSE;
1180:   MPI_Comm          comm = A->comm;
1181:   int               ierr;

1184:   if (called) {return(0);} else called = PETSC_TRUE;
1185:   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):n");
1186:   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>n");
1187:   return(0);
1188: }

1190: EXTERN_C_BEGIN
1191: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1192: {
1193:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1194:   int         i,nz,n;

1197:   nz = baij->maxnz;
1198:   n  = mat->n;
1199:   for (i=0; i<nz; i++) {
1200:     baij->j[i] = indices[i];
1201:   }
1202:   baij->nz = nz;
1203:   for (i=0; i<n; i++) {
1204:     baij->ilen[i] = baij->imax[i];
1205:   }

1207:   return(0);
1208: }
1209: EXTERN_C_END

1211: /*@
1212:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1213:        in the matrix.

1215:   Input Parameters:
1216: +  mat - the SeqBAIJ matrix
1217: -  indices - the column indices

1219:   Level: advanced

1221:   Notes:
1222:     This can be called if you have precomputed the nonzero structure of the 
1223:   matrix and want to provide it to the matrix object to improve the performance
1224:   of the MatSetValues() operation.

1226:     You MUST have set the correct numbers of nonzeros per row in the call to 
1227:   MatCreateSeqBAIJ().

1229:     MUST be called before any calls to MatSetValues();

1231: @*/
1232: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1233: {
1234:   int ierr,(*f)(Mat,int *);

1238:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);
1239:   if (f) {
1240:     (*f)(mat,indices);
1241:   } else {
1242:     SETERRQ(1,"Wrong type of matrix to set column indices");
1243:   }
1244:   return(0);
1245: }

1247: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1248: {
1249:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1250:   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1251:   PetscReal    atmp;
1252:   Scalar       *x,zero = 0.0;
1253:   MatScalar    *aa;
1254:   int          ncols,brow,krow,kcol;

1257:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1258:   bs   = a->bs;
1259:   aa   = a->a;
1260:   ai   = a->i;
1261:   aj   = a->j;
1262:   mbs = a->mbs;

1264:   VecSet(&zero,v);
1265:   VecGetArray(v,&x);
1266:   VecGetLocalSize(v,&n);
1267:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1268:   for (i=0; i<mbs; i++) {
1269:     ncols = ai[1] - ai[0]; ai++;
1270:     brow  = bs*i;
1271:     for (j=0; j<ncols; j++){
1272:       /* bcol = bs*(*aj); */
1273:       for (kcol=0; kcol<bs; kcol++){
1274:         for (krow=0; krow<bs; krow++){
1275:           atmp = PetscAbsScalar(*aa); aa++;
1276:           row = brow + krow;    /* row index */
1277:           /* printf("val[%d,%d]: %gn",row,bcol+kcol,atmp); */
1278:           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1279:         }
1280:       }
1281:       aj++;
1282:     }
1283:   }
1284:   VecRestoreArray(v,&x);
1285:   return(0);
1286: }

1288: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1289: {
1290:   int        ierr;

1293:    MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1294:   return(0);
1295: }

1297: int MatGetArray_SeqBAIJ(Mat A,Scalar **array)
1298: {
1299:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1301:   *array = a->a;
1302:   return(0);
1303: }

1305: int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array)
1306: {
1308:   return(0);
1309: }

1311: /* -------------------------------------------------------------------*/
1312: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1313:        MatGetRow_SeqBAIJ,
1314:        MatRestoreRow_SeqBAIJ,
1315:        MatMult_SeqBAIJ_N,
1316:        MatMultAdd_SeqBAIJ_N,
1317:        MatMultTranspose_SeqBAIJ,
1318:        MatMultTransposeAdd_SeqBAIJ,
1319:        MatSolve_SeqBAIJ_N,
1320:        0,
1321:        0,
1322:        0,
1323:        MatLUFactor_SeqBAIJ,
1324:        0,
1325:        0,
1326:        MatTranspose_SeqBAIJ,
1327:        MatGetInfo_SeqBAIJ,
1328:        MatEqual_SeqBAIJ,
1329:        MatGetDiagonal_SeqBAIJ,
1330:        MatDiagonalScale_SeqBAIJ,
1331:        MatNorm_SeqBAIJ,
1332:        0,
1333:        MatAssemblyEnd_SeqBAIJ,
1334:        0,
1335:        MatSetOption_SeqBAIJ,
1336:        MatZeroEntries_SeqBAIJ,
1337:        MatZeroRows_SeqBAIJ,
1338:        MatLUFactorSymbolic_SeqBAIJ,
1339:        MatLUFactorNumeric_SeqBAIJ_N,
1340:        0,
1341:        0,
1342:        MatSetUpPreallocation_SeqBAIJ,
1343:        0,
1344:        MatGetOwnershipRange_SeqBAIJ,
1345:        MatILUFactorSymbolic_SeqBAIJ,
1346:        0,
1347:        MatGetArray_SeqBAIJ,
1348:        MatRestoreArray_SeqBAIJ,
1349:        MatDuplicate_SeqBAIJ,
1350:        0,
1351:        0,
1352:        MatILUFactor_SeqBAIJ,
1353:        0,
1354:        0,
1355:        MatGetSubMatrices_SeqBAIJ,
1356:        MatIncreaseOverlap_SeqBAIJ,
1357:        MatGetValues_SeqBAIJ,
1358:        0,
1359:        MatPrintHelp_SeqBAIJ,
1360:        MatScale_SeqBAIJ,
1361:        0,
1362:        0,
1363:        0,
1364:        MatGetBlockSize_SeqBAIJ,
1365:        MatGetRowIJ_SeqBAIJ,
1366:        MatRestoreRowIJ_SeqBAIJ,
1367:        0,
1368:        0,
1369:        0,
1370:        0,
1371:        0,
1372:        0,
1373:        MatSetValuesBlocked_SeqBAIJ,
1374:        MatGetSubMatrix_SeqBAIJ,
1375:        MatDestroy_SeqBAIJ,
1376:        MatView_SeqBAIJ,
1377:        MatGetMaps_Petsc,
1378:        0,
1379:        0,
1380:        0,
1381:        0,
1382:        0,
1383:        0,
1384:        MatGetRowMax_SeqBAIJ,
1385:        MatConvert_Basic};

1387: EXTERN_C_BEGIN
1388: int MatStoreValues_SeqBAIJ(Mat mat)
1389: {
1390:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1391:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1392:   int         ierr;

1395:   if (aij->nonew != 1) {
1396:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1397:   }

1399:   /* allocate space for values if not already there */
1400:   if (!aij->saved_values) {
1401:     PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);
1402:   }

1404:   /* copy values over */
1405:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));
1406:   return(0);
1407: }
1408: EXTERN_C_END

1410: EXTERN_C_BEGIN
1411: int MatRetrieveValues_SeqBAIJ(Mat mat)
1412: {
1413:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1414:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;

1417:   if (aij->nonew != 1) {
1418:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1419:   }
1420:   if (!aij->saved_values) {
1421:     SETERRQ(1,"Must call MatStoreValues(A);first");
1422:   }

1424:   /* copy values over */
1425:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));
1426:   return(0);
1427: }
1428: EXTERN_C_END

1430: EXTERN_C_BEGIN
1431: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1432: EXTERN_C_END

1434: EXTERN_C_BEGIN
1435: int MatCreate_SeqBAIJ(Mat B)
1436: {
1437:   int         ierr,size;
1438:   Mat_SeqBAIJ *b;

1441:   MPI_Comm_size(B->comm,&size);
1442:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

1444:   B->m = B->M = PetscMax(B->m,B->M);
1445:   B->n = B->N = PetscMax(B->n,B->N);
1446:   ierr    = PetscNew(Mat_SeqBAIJ,&b);
1447:   B->data = (void*)b;
1448:   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1449:   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1450:   B->factor           = 0;
1451:   B->lupivotthreshold = 1.0;
1452:   B->mapping          = 0;
1453:   b->row              = 0;
1454:   b->col              = 0;
1455:   b->icol             = 0;
1456:   b->reallocs         = 0;
1457:   b->saved_values     = 0;
1458: #if defined(PETSC_USE_MAT_SINGLE)
1459:   b->setvalueslen     = 0;
1460:   b->setvaluescopy    = PETSC_NULL;
1461: #endif
1462: 
1463:   MapCreateMPI(B->comm,B->m,B->m,&B->rmap);
1464:   MapCreateMPI(B->comm,B->n,B->n,&B->cmap);

1466:   b->sorted           = PETSC_FALSE;
1467:   b->roworiented      = PETSC_TRUE;
1468:   b->nonew            = 0;
1469:   b->diag             = 0;
1470:   b->solve_work       = 0;
1471:   b->mult_work        = 0;
1472:   b->spptr            = 0;
1473:   B->info.nz_unneeded = (PetscReal)b->maxnz;
1474:   b->keepzeroedrows   = PETSC_FALSE;

1476:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1477:                                      "MatStoreValues_SeqBAIJ",
1478:                                       MatStoreValues_SeqBAIJ);
1479:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1480:                                      "MatRetrieveValues_SeqBAIJ",
1481:                                       MatRetrieveValues_SeqBAIJ);
1482:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1483:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1484:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
1485:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1486:                                      "MatConvert_SeqBAIJ_SeqAIJ",
1487:                                       MatConvert_SeqBAIJ_SeqAIJ);
1488:   return(0);
1489: }
1490: EXTERN_C_END

1492: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1493: {
1494:   Mat         C;
1495:   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1496:   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;

1499:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

1501:   *B = 0;
1502:   MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1503:   MatSetType(C,MATSEQBAIJ);
1504:   c    = (Mat_SeqBAIJ*)C->data;

1506:   c->bs         = a->bs;
1507:   c->bs2        = a->bs2;
1508:   c->mbs        = a->mbs;
1509:   c->nbs        = a->nbs;
1510:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));

1512:   PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1513:   PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1514:   for (i=0; i<mbs; i++) {
1515:     c->imax[i] = a->imax[i];
1516:     c->ilen[i] = a->ilen[i];
1517:   }

1519:   /* allocate the matrix space */
1520:   c->singlemalloc = PETSC_TRUE;
1521:   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1522:   PetscMalloc(len,&c->a);
1523:   c->j = (int*)(c->a + nz*bs2);
1524:   c->i = c->j + nz;
1525:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1526:   if (mbs > 0) {
1527:     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1528:     if (cpvalues == MAT_COPY_VALUES) {
1529:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1530:     } else {
1531:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1532:     }
1533:   }

1535:   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1536:   c->sorted      = a->sorted;
1537:   c->roworiented = a->roworiented;
1538:   c->nonew       = a->nonew;

1540:   if (a->diag) {
1541:     PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1542:     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1543:     for (i=0; i<mbs; i++) {
1544:       c->diag[i] = a->diag[i];
1545:     }
1546:   } else c->diag        = 0;
1547:   c->nz                 = a->nz;
1548:   c->maxnz              = a->maxnz;
1549:   c->solve_work         = 0;
1550:   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1551:   c->mult_work          = 0;
1552:   C->preallocated       = PETSC_TRUE;
1553:   C->assembled          = PETSC_TRUE;
1554:   *B = C;
1555:   PetscFListDuplicate(A->qlist,&C->qlist);
1556:   return(0);
1557: }

1559: EXTERN_C_BEGIN
1560: int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1561: {
1562:   Mat_SeqBAIJ  *a;
1563:   Mat          B;
1564:   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1565:   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1566:   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1567:   int          *masked,nmask,tmp,bs2,ishift;
1568:   Scalar       *aa;
1569:   MPI_Comm     comm = ((PetscObject)viewer)->comm;

1572:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1573:   bs2  = bs*bs;

1575:   MPI_Comm_size(comm,&size);
1576:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1577:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1578:   PetscBinaryRead(fd,header,4,PETSC_INT);
1579:   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1580:   M = header[1]; N = header[2]; nz = header[3];

1582:   if (header[3] < 0) {
1583:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1584:   }

1586:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

1588:   /* 
1589:      This code adds extra rows to make sure the number of rows is 
1590:     divisible by the blocksize
1591:   */
1592:   mbs        = M/bs;
1593:   extra_rows = bs - M + bs*(mbs);
1594:   if (extra_rows == bs) extra_rows = 0;
1595:   else                  mbs++;
1596:   if (extra_rows) {
1597:     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksizen");
1598:   }

1600:   /* read in row lengths */
1601:   PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1602:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1603:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

1605:   /* read in column indices */
1606:   PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1607:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
1608:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

1610:   /* loop over row lengths determining block row lengths */
1611:   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);
1612:   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));
1613:   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);
1614:   ierr     = PetscMemzero(mask,mbs*sizeof(int));
1615:   masked   = mask + mbs;
1616:   rowcount = 0; nzcount = 0;
1617:   for (i=0; i<mbs; i++) {
1618:     nmask = 0;
1619:     for (j=0; j<bs; j++) {
1620:       kmax = rowlengths[rowcount];
1621:       for (k=0; k<kmax; k++) {
1622:         tmp = jj[nzcount++]/bs;
1623:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1624:       }
1625:       rowcount++;
1626:     }
1627:     browlengths[i] += nmask;
1628:     /* zero out the mask elements we set */
1629:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1630:   }

1632:   /* create our matrix */
1633:   MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1634:   B = *A;
1635:   a = (Mat_SeqBAIJ*)B->data;

1637:   /* set matrix "i" values */
1638:   a->i[0] = 0;
1639:   for (i=1; i<= mbs; i++) {
1640:     a->i[i]      = a->i[i-1] + browlengths[i-1];
1641:     a->ilen[i-1] = browlengths[i-1];
1642:   }
1643:   a->nz         = 0;
1644:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

1646:   /* read in nonzero values */
1647:   PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);
1648:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1649:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

1651:   /* set "a" and "j" values into matrix */
1652:   nzcount = 0; jcount = 0;
1653:   for (i=0; i<mbs; i++) {
1654:     nzcountb = nzcount;
1655:     nmask    = 0;
1656:     for (j=0; j<bs; j++) {
1657:       kmax = rowlengths[i*bs+j];
1658:       for (k=0; k<kmax; k++) {
1659:         tmp = jj[nzcount++]/bs;
1660:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1661:       }
1662:     }
1663:     /* sort the masked values */
1664:     PetscSortInt(nmask,masked);

1666:     /* set "j" values into matrix */
1667:     maskcount = 1;
1668:     for (j=0; j<nmask; j++) {
1669:       a->j[jcount++]  = masked[j];
1670:       mask[masked[j]] = maskcount++;
1671:     }
1672:     /* set "a" values into matrix */
1673:     ishift = bs2*a->i[i];
1674:     for (j=0; j<bs; j++) {
1675:       kmax = rowlengths[i*bs+j];
1676:       for (k=0; k<kmax; k++) {
1677:         tmp       = jj[nzcountb]/bs ;
1678:         block     = mask[tmp] - 1;
1679:         point     = jj[nzcountb] - bs*tmp;
1680:         idx       = ishift + bs2*block + j + bs*point;
1681:         a->a[idx] = (MatScalar)aa[nzcountb++];
1682:       }
1683:     }
1684:     /* zero out the mask elements we set */
1685:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1686:   }
1687:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

1689:   PetscFree(rowlengths);
1690:   PetscFree(browlengths);
1691:   PetscFree(aa);
1692:   PetscFree(jj);
1693:   PetscFree(mask);

1695:   B->assembled = PETSC_TRUE;
1696: 
1697:   MatView_Private(B);
1698:   return(0);
1699: }
1700: EXTERN_C_END

1702: /*@C
1703:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1704:    compressed row) format.  For good matrix assembly performance the
1705:    user should preallocate the matrix storage by setting the parameter nz
1706:    (or the array nnz).  By setting these parameters accurately, performance
1707:    during matrix assembly can be increased by more than a factor of 50.

1709:    Collective on MPI_Comm

1711:    Input Parameters:
1712: +  comm - MPI communicator, set to PETSC_COMM_SELF
1713: .  bs - size of block
1714: .  m - number of rows
1715: .  n - number of columns
1716: .  nz - number of nonzero blocks  per block row (same for all rows)
1717: -  nnz - array containing the number of nonzero blocks in the various block rows 
1718:          (possibly different for each block row) or PETSC_NULL

1720:    Output Parameter:
1721: .  A - the matrix 

1723:    Options Database Keys:
1724: .   -mat_no_unroll - uses code that does not unroll the loops in the 
1725:                      block calculations (much slower)
1726: .    -mat_block_size - size of the blocks to use

1728:    Level: intermediate

1730:    Notes:
1731:    A nonzero block is any block that as 1 or more nonzeros in it

1733:    The block AIJ format is fully compatible with standard Fortran 77
1734:    storage.  That is, the stored row and column indices can begin at
1735:    either one (as in Fortran) or zero.  See the users' manual for details.

1737:    Specify the preallocated storage with either nz or nnz (not both).
1738:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
1739:    allocation.  For additional details, see the users manual chapter on
1740:    matrices.

1742: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1743: @*/
1744: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1745: {
1747: 
1749:   MatCreate(comm,m,n,m,n,A);
1750:   MatSetType(*A,MATSEQBAIJ);
1751:   MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
1752:   return(0);
1753: }

1755: /*@C
1756:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1757:    per row in the matrix. For good matrix assembly performance the
1758:    user should preallocate the matrix storage by setting the parameter nz
1759:    (or the array nnz).  By setting these parameters accurately, performance
1760:    during matrix assembly can be increased by more than a factor of 50.

1762:    Collective on MPI_Comm

1764:    Input Parameters:
1765: +  A - the matrix
1766: .  bs - size of block
1767: .  nz - number of block nonzeros per block row (same for all rows)
1768: -  nnz - array containing the number of block nonzeros in the various block rows 
1769:          (possibly different for each block row) or PETSC_NULL

1771:    Options Database Keys:
1772: .   -mat_no_unroll - uses code that does not unroll the loops in the 
1773:                      block calculations (much slower)
1774: .    -mat_block_size - size of the blocks to use

1776:    Level: intermediate

1778:    Notes:
1779:    The block AIJ format is fully compatible with standard Fortran 77
1780:    storage.  That is, the stored row and column indices can begin at
1781:    either one (as in Fortran) or zero.  See the users' manual for details.

1783:    Specify the preallocated storage with either nz or nnz (not both).
1784:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
1785:    allocation.  For additional details, see the users manual chapter on
1786:    matrices.

1788: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1789: @*/
1790: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1791: {
1792:   Mat_SeqBAIJ *b;
1793:   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1794:   PetscTruth  flg;

1797:   PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);
1798:   if (!flg) return(0);

1800:   B->preallocated = PETSC_TRUE;
1801:   PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
1802:   if (nnz && newbs != bs) {
1803:     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1804:   }
1805:   bs   = newbs;

1807:   mbs  = B->m/bs;
1808:   nbs  = B->n/bs;
1809:   bs2  = bs*bs;

1811:   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1812:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1813:   }

1815:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1816:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1817:   if (nnz) {
1818:     for (i=0; i<mbs; i++) {
1819:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1820:     }
1821:   }

1823:   b       = (Mat_SeqBAIJ*)B->data;
1824:   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1825:   if (!flg) {
1826:     switch (bs) {
1827:     case 1:
1828:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1829:       B->ops->solve           = MatSolve_SeqBAIJ_1;
1830:       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1;
1831:       B->ops->mult            = MatMult_SeqBAIJ_1;
1832:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1833:       break;
1834:     case 2:
1835:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1836:       B->ops->solve           = MatSolve_SeqBAIJ_2;
1837:       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2;
1838:       B->ops->mult            = MatMult_SeqBAIJ_2;
1839:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1840:       break;
1841:     case 3:
1842:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1843:       B->ops->solve           = MatSolve_SeqBAIJ_3;
1844:       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3;
1845:       B->ops->mult            = MatMult_SeqBAIJ_3;
1846:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1847:       break;
1848:     case 4:
1849:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1850:       B->ops->solve           = MatSolve_SeqBAIJ_4;
1851:       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4;
1852:       B->ops->mult            = MatMult_SeqBAIJ_4;
1853:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1854:       break;
1855:     case 5:
1856:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1857:       B->ops->solve           = MatSolve_SeqBAIJ_5;
1858:       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5;
1859:       B->ops->mult            = MatMult_SeqBAIJ_5;
1860:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1861:       break;
1862:     case 6:
1863:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1864:       B->ops->solve           = MatSolve_SeqBAIJ_6;
1865:       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6;
1866:       B->ops->mult            = MatMult_SeqBAIJ_6;
1867:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1868:       break;
1869:     case 7:
1870:       B->ops->mult            = MatMult_SeqBAIJ_7;
1871:       B->ops->solve           = MatSolve_SeqBAIJ_7;
1872:       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7;
1873:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1874:       break;
1875:     default:
1876:       B->ops->mult            = MatMult_SeqBAIJ_N;
1877:       B->ops->solve           = MatSolve_SeqBAIJ_N;
1878:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1879:       break;
1880:     }
1881:   }
1882:   b->bs      = bs;
1883:   b->mbs     = mbs;
1884:   b->nbs     = nbs;
1885:   PetscMalloc((mbs+1)*sizeof(int),&b->imax);
1886:   if (!nnz) {
1887:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1888:     else if (nz <= 0)        nz = 1;
1889:     for (i=0; i<mbs; i++) b->imax[i] = nz;
1890:     nz = nz*mbs;
1891:   } else {
1892:     nz = 0;
1893:     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1894:   }

1896:   /* allocate the matrix space */
1897:   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1898:   ierr  = PetscMalloc(len,&b->a);
1899:   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1900:   b->j  = (int*)(b->a + nz*bs2);
1901:   PetscMemzero(b->j,nz*sizeof(int));
1902:   b->i  = b->j + nz;
1903:   b->singlemalloc = PETSC_TRUE;

1905:   b->i[0] = 0;
1906:   for (i=1; i<mbs+1; i++) {
1907:     b->i[i] = b->i[i-1] + b->imax[i-1];
1908:   }

1910:   /* b->ilen will count nonzeros in each block row so far. */
1911:   PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1912:   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1913:   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}

1915:   b->bs               = bs;
1916:   b->bs2              = bs2;
1917:   b->mbs              = mbs;
1918:   b->nz               = 0;
1919:   b->maxnz            = nz*bs2;
1920:   B->info.nz_unneeded = (PetscReal)b->maxnz;
1921:   return(0);
1922: }