Actual source code: baij.c

  1: /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include src/mat/impls/baij/seq/baij.h
 8:  #include src/vec/vecimpl.h
 9:  #include src/inline/spops.h
 10:  #include petscsys.h

 12: /*  UGLY, ugly, ugly
 13:    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 
 14:    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 
 15:    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
 16:    converts the entries into single precision and then calls ..._MatScalar() to put them
 17:    into the single precision data structures.
 18: */
 19: #if defined(PETSC_USE_MAT_SINGLE)
 20: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
 21: #else
 22: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
 23: #endif

 25: #define CHUNKSIZE  10

 27: /*
 28:      Checks for missing diagonals
 29: */
 30: int MatMissingDiagonal_SeqBAIJ(Mat A)
 31: {
 32:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 33:   int         *diag,*jj = a->j,i,ierr;

 36:   MatMarkDiagonal_SeqBAIJ(A);
 37:   diag = a->diag;
 38:   for (i=0; i<a->mbs; i++) {
 39:     if (jj[diag[i]] != i) {
 40:       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
 41:     }
 42:   }
 43:   return(0);
 44: }

 46: int MatMarkDiagonal_SeqBAIJ(Mat A)
 47: {
 48:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 49:   int         i,j,*diag,m = a->mbs,ierr;

 52:   if (a->diag) return(0);

 54:   PetscMalloc((m+1)*sizeof(int),&diag);
 55:   PetscLogObjectMemory(A,(m+1)*sizeof(int));
 56:   for (i=0; i<m; i++) {
 57:     diag[i] = a->i[i+1];
 58:     for (j=a->i[i]; j<a->i[i+1]; j++) {
 59:       if (a->j[j] == i) {
 60:         diag[i] = j;
 61:         break;
 62:       }
 63:     }
 64:   }
 65:   a->diag = diag;
 66:   return(0);
 67: }


 70: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);

 72: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
 73: {
 74:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 75:   int         ierr,n = a->mbs,i;

 78:   *nn = n;
 79:   if (!ia) return(0);
 80:   if (symmetric) {
 81:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
 82:   } else if (oshift == 1) {
 83:     /* temporarily add 1 to i and j indices */
 84:     int nz = a->i[n];
 85:     for (i=0; i<nz; i++) a->j[i]++;
 86:     for (i=0; i<n+1; i++) a->i[i]++;
 87:     *ia = a->i; *ja = a->j;
 88:   } else {
 89:     *ia = a->i; *ja = a->j;
 90:   }

 92:   return(0);
 93: }

 95: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
 96: {
 97:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 98:   int         i,n = a->mbs,ierr;

101:   if (!ia) return(0);
102:   if (symmetric) {
103:     PetscFree(*ia);
104:     PetscFree(*ja);
105:   } else if (oshift == 1) {
106:     int nz = a->i[n]-1;
107:     for (i=0; i<nz; i++) a->j[i]--;
108:     for (i=0; i<n+1; i++) a->i[i]--;
109:   }
110:   return(0);
111: }

113: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
114: {
115:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;

118:   *bs = baij->bs;
119:   return(0);
120: }


123: int MatDestroy_SeqBAIJ(Mat A)
124: {
125:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
126:   int         ierr;

129: #if defined(PETSC_USE_LOG)
130:   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
131: #endif
132:   PetscFree(a->a);
133:   if (!a->singlemalloc) {
134:     PetscFree(a->i);
135:     PetscFree(a->j);
136:   }
137:   if (a->row) {
138:     ISDestroy(a->row);
139:   }
140:   if (a->col) {
141:     ISDestroy(a->col);
142:   }
143:   if (a->diag) {PetscFree(a->diag);}
144:   if (a->ilen) {PetscFree(a->ilen);}
145:   if (a->imax) {PetscFree(a->imax);}
146:   if (a->solve_work) {PetscFree(a->solve_work);}
147:   if (a->mult_work) {PetscFree(a->mult_work);}
148:   if (a->icol) {ISDestroy(a->icol);}
149:   if (a->saved_values) {PetscFree(a->saved_values);}
150: #if defined(PETSC_USE_MAT_SINGLE)
151:   if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
152: #endif
153:   PetscFree(a);
154:   return(0);
155: }

157: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
158: {
159:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

162:   switch (op) {
163:   case MAT_ROW_ORIENTED:
164:     a->roworiented    = PETSC_TRUE;
165:     break;
166:   case MAT_COLUMN_ORIENTED:
167:     a->roworiented    = PETSC_FALSE;
168:     break;
169:   case MAT_COLUMNS_SORTED:
170:     a->sorted         = PETSC_TRUE;
171:     break;
172:   case MAT_COLUMNS_UNSORTED:
173:     a->sorted         = PETSC_FALSE;
174:     break;
175:   case MAT_KEEP_ZEROED_ROWS:
176:     a->keepzeroedrows = PETSC_TRUE;
177:     break;
178:   case MAT_NO_NEW_NONZERO_LOCATIONS:
179:     a->nonew          = 1;
180:     break;
181:   case MAT_NEW_NONZERO_LOCATION_ERR:
182:     a->nonew          = -1;
183:     break;
184:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
185:     a->nonew          = -2;
186:     break;
187:   case MAT_YES_NEW_NONZERO_LOCATIONS:
188:     a->nonew          = 0;
189:     break;
190:   case MAT_ROWS_SORTED:
191:   case MAT_ROWS_UNSORTED:
192:   case MAT_YES_NEW_DIAGONALS:
193:   case MAT_IGNORE_OFF_PROC_ENTRIES:
194:   case MAT_USE_HASH_TABLE:
195:     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignoredn");
196:     break;
197:   case MAT_NO_NEW_DIAGONALS:
198:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
199:   case MAT_USE_SINGLE_PRECISION_SOLVES:
200:     if (a->bs==4) {
201:       PetscTruth sse_enabled_local,sse_enabled_global;
202:       int        ierr;

204:       sse_enabled_local  = PETSC_FALSE;
205:       sse_enabled_global = PETSC_FALSE;

207:       PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);
208: #if defined(PETSC_HAVE_SSE)
209:       if (sse_enabled_local) {
210:           a->single_precision_solves = PETSC_TRUE;
211:           A->ops->solve              = MatSolve_SeqBAIJ_Update;
212:           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
213:           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES setn");
214:           break;
215:       } else {
216:         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignoredn");
217:       }
218: #else
219:       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignoredn");
220: #endif
221:     }
222:     break;
223:   default:
224:     SETERRQ(PETSC_ERR_SUP,"unknown option");
225:   }
226:   return(0);
227: }

229: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
230: {
231:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
232:   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
233:   MatScalar    *aa,*aa_i;
234:   PetscScalar  *v_i;

237:   bs  = a->bs;
238:   ai  = a->i;
239:   aj  = a->j;
240:   aa  = a->a;
241:   bs2 = a->bs2;
242: 
243:   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
244: 
245:   bn  = row/bs;   /* Block number */
246:   bp  = row % bs; /* Block Position */
247:   M   = ai[bn+1] - ai[bn];
248:   *nz = bs*M;
249: 
250:   if (v) {
251:     *v = 0;
252:     if (*nz) {
253:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
254:       for (i=0; i<M; i++) { /* for each block in the block row */
255:         v_i  = *v + i*bs;
256:         aa_i = aa + bs2*(ai[bn] + i);
257:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
258:       }
259:     }
260:   }

262:   if (idx) {
263:     *idx = 0;
264:     if (*nz) {
265:       PetscMalloc((*nz)*sizeof(int),idx);
266:       for (i=0; i<M; i++) { /* for each block in the block row */
267:         idx_i = *idx + i*bs;
268:         itmp  = bs*aj[ai[bn] + i];
269:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
270:       }
271:     }
272:   }
273:   return(0);
274: }

276: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
277: {

281:   if (idx) {if (*idx) {PetscFree(*idx);}}
282:   if (v)   {if (*v)   {PetscFree(*v);}}
283:   return(0);
284: }

286: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
287: {
288:   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
289:   Mat         C;
290:   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
291:   int         *rows,*cols,bs2=a->bs2;
292:   PetscScalar *array;

295:   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
296:   PetscMalloc((1+nbs)*sizeof(int),&col);
297:   PetscMemzero(col,(1+nbs)*sizeof(int));

299: #if defined(PETSC_USE_MAT_SINGLE)
300:   PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
301:   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
302: #else
303:   array = a->a;
304: #endif

306:   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
307:   MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);
308:   PetscFree(col);
309:   PetscMalloc(2*bs*sizeof(int),&rows);
310:   cols = rows + bs;
311:   for (i=0; i<mbs; i++) {
312:     cols[0] = i*bs;
313:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
314:     len = ai[i+1] - ai[i];
315:     for (j=0; j<len; j++) {
316:       rows[0] = (*aj++)*bs;
317:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
318:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
319:       array += bs2;
320:     }
321:   }
322:   PetscFree(rows);
323: #if defined(PETSC_USE_MAT_SINGLE)
324:   PetscFree(array);
325: #endif
326: 
327:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
328:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
329: 
330:   if (B) {
331:     *B = C;
332:   } else {
333:     MatHeaderCopy(A,C);
334:   }
335:   return(0);
336: }

338: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
339: {
340:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
341:   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
342:   PetscScalar *aa;
343:   FILE        *file;

346:   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);
347:   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);
348:   col_lens[0] = MAT_FILE_COOKIE;

350:   col_lens[1] = A->m;
351:   col_lens[2] = A->n;
352:   col_lens[3] = a->nz*bs2;

354:   /* store lengths of each row and write (including header) to file */
355:   count = 0;
356:   for (i=0; i<a->mbs; i++) {
357:     for (j=0; j<bs; j++) {
358:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
359:     }
360:   }
361:   PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
362:   PetscFree(col_lens);

364:   /* store column indices (zero start index) */
365:   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
366:   count = 0;
367:   for (i=0; i<a->mbs; i++) {
368:     for (j=0; j<bs; j++) {
369:       for (k=a->i[i]; k<a->i[i+1]; k++) {
370:         for (l=0; l<bs; l++) {
371:           jj[count++] = bs*a->j[k] + l;
372:         }
373:       }
374:     }
375:   }
376:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
377:   PetscFree(jj);

379:   /* store nonzero values */
380:   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
381:   count = 0;
382:   for (i=0; i<a->mbs; i++) {
383:     for (j=0; j<bs; j++) {
384:       for (k=a->i[i]; k<a->i[i+1]; k++) {
385:         for (l=0; l<bs; l++) {
386:           aa[count++] = a->a[bs2*k + l*bs + j];
387:         }
388:       }
389:     }
390:   }
391:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
392:   PetscFree(aa);

394:   PetscViewerBinaryGetInfoPointer(viewer,&file);
395:   if (file) {
396:     fprintf(file,"-matload_block_size %dn",a->bs);
397:   }
398:   return(0);
399: }

401: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
402: {
403:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
404:   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
405:   PetscViewerFormat format;

408:   PetscViewerGetFormat(viewer,&format);
409:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
410:     PetscViewerASCIIPrintf(viewer,"  block size is %dn",bs);
411:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
412:     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
413:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
414:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
415:     for (i=0; i<a->mbs; i++) {
416:       for (j=0; j<bs; j++) {
417:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
418:         for (k=a->i[i]; k<a->i[i+1]; k++) {
419:           for (l=0; l<bs; l++) {
420: #if defined(PETSC_USE_COMPLEX)
421:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
422:               PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
423:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
424:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
425:               PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
426:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
427:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
428:               PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
429:             }
430: #else
431:             if (a->a[bs2*k + l*bs + j] != 0.0) {
432:               PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
433:             }
434: #endif
435:           }
436:         }
437:         PetscViewerASCIIPrintf(viewer,"n");
438:       }
439:     }
440:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
441:   } else {
442:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
443:     for (i=0; i<a->mbs; i++) {
444:       for (j=0; j<bs; j++) {
445:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
446:         for (k=a->i[i]; k<a->i[i+1]; k++) {
447:           for (l=0; l<bs; l++) {
448: #if defined(PETSC_USE_COMPLEX)
449:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
450:               PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
451:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
452:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
453:               PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
454:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
455:             } else {
456:               PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
457:             }
458: #else
459:             PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
460: #endif
461:           }
462:         }
463:         PetscViewerASCIIPrintf(viewer,"n");
464:       }
465:     }
466:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
467:   }
468:   PetscViewerFlush(viewer);
469:   return(0);
470: }

472: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
473: {
474:   Mat          A = (Mat) Aa;
475:   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
476:   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
477:   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
478:   MatScalar    *aa;
479:   PetscViewer  viewer;


483:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
484:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

486:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

488:   /* loop over matrix elements drawing boxes */
489:   color = PETSC_DRAW_BLUE;
490:   for (i=0,row=0; i<mbs; i++,row+=bs) {
491:     for (j=a->i[i]; j<a->i[i+1]; j++) {
492:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
493:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
494:       aa = a->a + j*bs2;
495:       for (k=0; k<bs; k++) {
496:         for (l=0; l<bs; l++) {
497:           if (PetscRealPart(*aa++) >=  0.) continue;
498:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
499:         }
500:       }
501:     }
502:   }
503:   color = PETSC_DRAW_CYAN;
504:   for (i=0,row=0; i<mbs; i++,row+=bs) {
505:     for (j=a->i[i]; j<a->i[i+1]; j++) {
506:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
507:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
508:       aa = a->a + j*bs2;
509:       for (k=0; k<bs; k++) {
510:         for (l=0; l<bs; l++) {
511:           if (PetscRealPart(*aa++) != 0.) continue;
512:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
513:         }
514:       }
515:     }
516:   }

518:   color = PETSC_DRAW_RED;
519:   for (i=0,row=0; i<mbs; i++,row+=bs) {
520:     for (j=a->i[i]; j<a->i[i+1]; j++) {
521:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
522:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
523:       aa = a->a + j*bs2;
524:       for (k=0; k<bs; k++) {
525:         for (l=0; l<bs; l++) {
526:           if (PetscRealPart(*aa++) <= 0.) continue;
527:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
528:         }
529:       }
530:     }
531:   }
532:   return(0);
533: }

535: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
536: {
537:   int          ierr;
538:   PetscReal    xl,yl,xr,yr,w,h;
539:   PetscDraw    draw;
540:   PetscTruth   isnull;


544:   PetscViewerDrawGetDraw(viewer,0,&draw);
545:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

547:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
548:   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
549:   xr += w;    yr += h;  xl = -w;     yl = -h;
550:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
551:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
552:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
553:   return(0);
554: }

556: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
557: {
558:   int        ierr;
559:   PetscTruth issocket,isascii,isbinary,isdraw;

562:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
563:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
564:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
565:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
566:   if (issocket) {
567:     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
568:   } else if (isascii){
569:     MatView_SeqBAIJ_ASCII(A,viewer);
570:   } else if (isbinary) {
571:     MatView_SeqBAIJ_Binary(A,viewer);
572:   } else if (isdraw) {
573:     MatView_SeqBAIJ_Draw(A,viewer);
574:   } else {
575:     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
576:   }
577:   return(0);
578: }


581: int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
582: {
583:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
584:   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
585:   int        *ai = a->i,*ailen = a->ilen;
586:   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
587:   MatScalar  *ap,*aa = a->a,zero = 0.0;

590:   for (k=0; k<m; k++) { /* loop over rows */
591:     row  = im[k]; brow = row/bs;
592:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
593:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
594:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
595:     nrow = ailen[brow];
596:     for (l=0; l<n; l++) { /* loop over columns */
597:       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
598:       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
599:       col  = in[l] ;
600:       bcol = col/bs;
601:       cidx = col%bs;
602:       ridx = row%bs;
603:       high = nrow;
604:       low  = 0; /* assume unsorted */
605:       while (high-low > 5) {
606:         t = (low+high)/2;
607:         if (rp[t] > bcol) high = t;
608:         else             low  = t;
609:       }
610:       for (i=low; i<high; i++) {
611:         if (rp[i] > bcol) break;
612:         if (rp[i] == bcol) {
613:           *v++ = ap[bs2*i+bs*cidx+ridx];
614:           goto finished;
615:         }
616:       }
617:       *v++ = zero;
618:       finished:;
619:     }
620:   }
621:   return(0);
622: }

624: #if defined(PETSC_USE_MAT_SINGLE)
625: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
626: {
627:   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
628:   int         ierr,i,N = m*n*b->bs2;
629:   MatScalar   *vsingle;

632:   if (N > b->setvalueslen) {
633:     if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
634:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
635:     b->setvalueslen  = N;
636:   }
637:   vsingle = b->setvaluescopy;
638:   for (i=0; i<N; i++) {
639:     vsingle[i] = v[i];
640:   }
641:   MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
642:   return(0);
643: }
644: #endif


647: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
648: {
649:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
650:   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
651:   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
652:   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
653:   PetscTruth  roworiented=a->roworiented;
654:   MatScalar   *value = v,*ap,*aa = a->a,*bap;

657:   if (roworiented) {
658:     stepval = (n-1)*bs;
659:   } else {
660:     stepval = (m-1)*bs;
661:   }
662:   for (k=0; k<m; k++) { /* loop over added rows */
663:     row  = im[k];
664:     if (row < 0) continue;
665: #if defined(PETSC_USE_BOPT_g)  
666:     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
667: #endif
668:     rp   = aj + ai[row];
669:     ap   = aa + bs2*ai[row];
670:     rmax = imax[row];
671:     nrow = ailen[row];
672:     low  = 0;
673:     for (l=0; l<n; l++) { /* loop over added columns */
674:       if (in[l] < 0) continue;
675: #if defined(PETSC_USE_BOPT_g)  
676:       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
677: #endif
678:       col = in[l];
679:       if (roworiented) {
680:         value = v + k*(stepval+bs)*bs + l*bs;
681:       } else {
682:         value = v + l*(stepval+bs)*bs + k*bs;
683:       }
684:       if (!sorted) low = 0; high = nrow;
685:       while (high-low > 7) {
686:         t = (low+high)/2;
687:         if (rp[t] > col) high = t;
688:         else             low  = t;
689:       }
690:       for (i=low; i<high; i++) {
691:         if (rp[i] > col) break;
692:         if (rp[i] == col) {
693:           bap  = ap +  bs2*i;
694:           if (roworiented) {
695:             if (is == ADD_VALUES) {
696:               for (ii=0; ii<bs; ii++,value+=stepval) {
697:                 for (jj=ii; jj<bs2; jj+=bs) {
698:                   bap[jj] += *value++;
699:                 }
700:               }
701:             } else {
702:               for (ii=0; ii<bs; ii++,value+=stepval) {
703:                 for (jj=ii; jj<bs2; jj+=bs) {
704:                   bap[jj] = *value++;
705:                 }
706:               }
707:             }
708:           } else {
709:             if (is == ADD_VALUES) {
710:               for (ii=0; ii<bs; ii++,value+=stepval) {
711:                 for (jj=0; jj<bs; jj++) {
712:                   *bap++ += *value++;
713:                 }
714:               }
715:             } else {
716:               for (ii=0; ii<bs; ii++,value+=stepval) {
717:                 for (jj=0; jj<bs; jj++) {
718:                   *bap++  = *value++;
719:                 }
720:               }
721:             }
722:           }
723:           goto noinsert2;
724:         }
725:       }
726:       if (nonew == 1) goto noinsert2;
727:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
728:       if (nrow >= rmax) {
729:         /* there is no extra room in row, therefore enlarge */
730:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
731:         MatScalar *new_a;

733:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");

735:         /* malloc new storage space */
736:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
737:         ierr    = PetscMalloc(len,&new_a);
738:         new_j   = (int*)(new_a + bs2*new_nz);
739:         new_i   = new_j + new_nz;

741:         /* copy over old data into new slots */
742:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
743:         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
744:         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
745:         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
746:         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
747:         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
748:         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
749:         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
750:         /* free up old matrix storage */
751:         PetscFree(a->a);
752:         if (!a->singlemalloc) {
753:           PetscFree(a->i);
754:           PetscFree(a->j);
755:         }
756:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
757:         a->singlemalloc = PETSC_TRUE;

759:         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
760:         rmax = imax[row] = imax[row] + CHUNKSIZE;
761:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
762:         a->maxnz += bs2*CHUNKSIZE;
763:         a->reallocs++;
764:         a->nz++;
765:       }
766:       N = nrow++ - 1;
767:       /* shift up all the later entries in this row */
768:       for (ii=N; ii>=i; ii--) {
769:         rp[ii+1] = rp[ii];
770:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
771:       }
772:       if (N >= i) {
773:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
774:       }
775:       rp[i] = col;
776:       bap   = ap +  bs2*i;
777:       if (roworiented) {
778:         for (ii=0; ii<bs; ii++,value+=stepval) {
779:           for (jj=ii; jj<bs2; jj+=bs) {
780:             bap[jj] = *value++;
781:           }
782:         }
783:       } else {
784:         for (ii=0; ii<bs; ii++,value+=stepval) {
785:           for (jj=0; jj<bs; jj++) {
786:             *bap++  = *value++;
787:           }
788:         }
789:       }
790:       noinsert2:;
791:       low = i;
792:     }
793:     ailen[row] = nrow;
794:   }
795:   return(0);
796: }


799: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
800: {
801:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
802:   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
803:   int        m = A->m,*ip,N,*ailen = a->ilen;
804:   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
805:   MatScalar  *aa = a->a,*ap;

808:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

810:   if (m) rmax = ailen[0];
811:   for (i=1; i<mbs; i++) {
812:     /* move each row back by the amount of empty slots (fshift) before it*/
813:     fshift += imax[i-1] - ailen[i-1];
814:     rmax   = PetscMax(rmax,ailen[i]);
815:     if (fshift) {
816:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
817:       N = ailen[i];
818:       for (j=0; j<N; j++) {
819:         ip[j-fshift] = ip[j];
820:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
821:       }
822:     }
823:     ai[i] = ai[i-1] + ailen[i-1];
824:   }
825:   if (mbs) {
826:     fshift += imax[mbs-1] - ailen[mbs-1];
827:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
828:   }
829:   /* reset ilen and imax for each row */
830:   for (i=0; i<mbs; i++) {
831:     ailen[i] = imax[i] = ai[i+1] - ai[i];
832:   }
833:   a->nz = ai[mbs];

835:   /* diagonals may have moved, so kill the diagonal pointers */
836:   if (fshift && a->diag) {
837:     PetscFree(a->diag);
838:     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
839:     a->diag = 0;
840:   }
841:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d usedn",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
842:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %dn",a->reallocs);
843:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %dn",rmax);
844:   a->reallocs          = 0;
845:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

847:   return(0);
848: }



852: /* 
853:    This function returns an array of flags which indicate the locations of contiguous
854:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
855:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
856:    Assume: sizes should be long enough to hold all the values.
857: */
858: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
859: {
860:   int        i,j,k,row;
861:   PetscTruth flg;

864:   for (i=0,j=0; i<n; j++) {
865:     row = idx[i];
866:     if (row%bs!=0) { /* Not the begining of a block */
867:       sizes[j] = 1;
868:       i++;
869:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
870:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
871:       i++;
872:     } else { /* Begining of the block, so check if the complete block exists */
873:       flg = PETSC_TRUE;
874:       for (k=1; k<bs; k++) {
875:         if (row+k != idx[i+k]) { /* break in the block */
876:           flg = PETSC_FALSE;
877:           break;
878:         }
879:       }
880:       if (flg == PETSC_TRUE) { /* No break in the bs */
881:         sizes[j] = bs;
882:         i+= bs;
883:       } else {
884:         sizes[j] = 1;
885:         i++;
886:       }
887:     }
888:   }
889:   *bs_max = j;
890:   return(0);
891: }
892: 
893: int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
894: {
895:   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
896:   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
897:   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
898:   PetscScalar zero = 0.0;
899:   MatScalar   *aa;

902:   /* Make a copy of the IS and  sort it */
903:   ISGetLocalSize(is,&is_n);
904:   ISGetIndices(is,&is_idx);

906:   /* allocate memory for rows,sizes */
907:   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);
908:   sizes = rows + is_n;

910:   /* copy IS values to rows, and sort them */
911:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
912:   PetscSortInt(is_n,rows);
913:   if (baij->keepzeroedrows) {
914:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
915:     bs_max = is_n;
916:   } else {
917:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
918:   }
919:   ISRestoreIndices(is,&is_idx);

921:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
922:     row   = rows[j];
923:     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
924:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
925:     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
926:     if (sizes[i] == bs && !baij->keepzeroedrows) {
927:       if (diag) {
928:         if (baij->ilen[row/bs] > 0) {
929:           baij->ilen[row/bs]       = 1;
930:           baij->j[baij->i[row/bs]] = row/bs;
931:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
932:         }
933:         /* Now insert all the diagonal values for this bs */
934:         for (k=0; k<bs; k++) {
935:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
936:         }
937:       } else { /* (!diag) */
938:         baij->ilen[row/bs] = 0;
939:       } /* end (!diag) */
940:     } else { /* (sizes[i] != bs) */
941: #if defined (PETSC_USE_DEBUG)
942:       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
943: #endif
944:       for (k=0; k<count; k++) {
945:         aa[0] =  zero;
946:         aa    += bs;
947:       }
948:       if (diag) {
949:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
950:       }
951:     }
952:   }

954:   PetscFree(rows);
955:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
956:   return(0);
957: }

959: int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
960: {
961:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
962:   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
963:   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
964:   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
965:   int         ridx,cidx,bs2=a->bs2,ierr;
966:   PetscTruth  roworiented=a->roworiented;
967:   MatScalar   *ap,value,*aa=a->a,*bap;

970:   for (k=0; k<m; k++) { /* loop over added rows */
971:     row  = im[k]; brow = row/bs;
972:     if (row < 0) continue;
973: #if defined(PETSC_USE_BOPT_g)  
974:     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
975: #endif
976:     rp   = aj + ai[brow];
977:     ap   = aa + bs2*ai[brow];
978:     rmax = imax[brow];
979:     nrow = ailen[brow];
980:     low  = 0;
981:     for (l=0; l<n; l++) { /* loop over added columns */
982:       if (in[l] < 0) continue;
983: #if defined(PETSC_USE_BOPT_g)  
984:       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
985: #endif
986:       col = in[l]; bcol = col/bs;
987:       ridx = row % bs; cidx = col % bs;
988:       if (roworiented) {
989:         value = v[l + k*n];
990:       } else {
991:         value = v[k + l*m];
992:       }
993:       if (!sorted) low = 0; high = nrow;
994:       while (high-low > 7) {
995:         t = (low+high)/2;
996:         if (rp[t] > bcol) high = t;
997:         else              low  = t;
998:       }
999:       for (i=low; i<high; i++) {
1000:         if (rp[i] > bcol) break;
1001:         if (rp[i] == bcol) {
1002:           bap  = ap +  bs2*i + bs*cidx + ridx;
1003:           if (is == ADD_VALUES) *bap += value;
1004:           else                  *bap  = value;
1005:           goto noinsert1;
1006:         }
1007:       }
1008:       if (nonew == 1) goto noinsert1;
1009:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1010:       if (nrow >= rmax) {
1011:         /* there is no extra room in row, therefore enlarge */
1012:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1013:         MatScalar *new_a;

1015:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");

1017:         /* Malloc new storage space */
1018:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1019:         ierr    = PetscMalloc(len,&new_a);
1020:         new_j   = (int*)(new_a + bs2*new_nz);
1021:         new_i   = new_j + new_nz;

1023:         /* copy over old data into new slots */
1024:         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1025:         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1026:         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1027:         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1028:         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
1029:         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1030:         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1031:         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1032:         /* free up old matrix storage */
1033:         PetscFree(a->a);
1034:         if (!a->singlemalloc) {
1035:           PetscFree(a->i);
1036:           PetscFree(a->j);
1037:         }
1038:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1039:         a->singlemalloc = PETSC_TRUE;

1041:         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1042:         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1043:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1044:         a->maxnz += bs2*CHUNKSIZE;
1045:         a->reallocs++;
1046:         a->nz++;
1047:       }
1048:       N = nrow++ - 1;
1049:       /* shift up all the later entries in this row */
1050:       for (ii=N; ii>=i; ii--) {
1051:         rp[ii+1] = rp[ii];
1052:         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1053:       }
1054:       if (N>=i) {
1055:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1056:       }
1057:       rp[i]                      = bcol;
1058:       ap[bs2*i + bs*cidx + ridx] = value;
1059:       noinsert1:;
1060:       low = i;
1061:     }
1062:     ailen[brow] = nrow;
1063:   }
1064:   return(0);
1065: }


1068: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1069: {
1070:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1071:   Mat         outA;
1072:   int         ierr;
1073:   PetscTruth  row_identity,col_identity;

1076:   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1077:   ISIdentity(row,&row_identity);
1078:   ISIdentity(col,&col_identity);
1079:   if (!row_identity || !col_identity) {
1080:     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1081:   }

1083:   outA          = inA;
1084:   inA->factor   = FACTOR_LU;

1086:   if (!a->diag) {
1087:     MatMarkDiagonal_SeqBAIJ(inA);
1088:   }

1090:   a->row        = row;
1091:   a->col        = col;
1092:   ierr          = PetscObjectReference((PetscObject)row);
1093:   ierr          = PetscObjectReference((PetscObject)col);
1094: 
1095:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1096:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1097:   PetscLogObjectParent(inA,a->icol);
1098: 
1099:   /*
1100:       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 
1101:       for ILU(0) factorization with natural ordering
1102:   */
1103:   if (a->bs < 8) {
1104:     MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1105:   } else {
1106:     if (!a->solve_work) {
1107:       PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1108:       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1109:     }
1110:   }

1112:   MatLUFactorNumeric(inA,&outA);

1114:   return(0);
1115: }
1116: int MatPrintHelp_SeqBAIJ(Mat A)
1117: {
1118:   static PetscTruth called = PETSC_FALSE;
1119:   MPI_Comm          comm = A->comm;
1120:   int               ierr;

1123:   if (called) {return(0);} else called = PETSC_TRUE;
1124:   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):n");
1125:   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>n");
1126:   return(0);
1127: }

1129: EXTERN_C_BEGIN
1130: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1131: {
1132:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1133:   int         i,nz,nbs;

1136:   nz  = baij->maxnz/baij->bs2;
1137:   nbs = baij->nbs;
1138:   for (i=0; i<nz; i++) {
1139:     baij->j[i] = indices[i];
1140:   }
1141:   baij->nz = nz;
1142:   for (i=0; i<nbs; i++) {
1143:     baij->ilen[i] = baij->imax[i];
1144:   }

1146:   return(0);
1147: }
1148: EXTERN_C_END

1150: /*@
1151:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1152:        in the matrix.

1154:   Input Parameters:
1155: +  mat - the SeqBAIJ matrix
1156: -  indices - the column indices

1158:   Level: advanced

1160:   Notes:
1161:     This can be called if you have precomputed the nonzero structure of the 
1162:   matrix and want to provide it to the matrix object to improve the performance
1163:   of the MatSetValues() operation.

1165:     You MUST have set the correct numbers of nonzeros per row in the call to 
1166:   MatCreateSeqBAIJ().

1168:     MUST be called before any calls to MatSetValues();

1170: @*/
1171: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1172: {
1173:   int ierr,(*f)(Mat,int *);

1177:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1178:   if (f) {
1179:     (*f)(mat,indices);
1180:   } else {
1181:     SETERRQ(1,"Wrong type of matrix to set column indices");
1182:   }
1183:   return(0);
1184: }

1186: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1187: {
1188:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1189:   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1190:   PetscReal    atmp;
1191:   PetscScalar  *x,zero = 0.0;
1192:   MatScalar    *aa;
1193:   int          ncols,brow,krow,kcol;

1196:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1197:   bs   = a->bs;
1198:   aa   = a->a;
1199:   ai   = a->i;
1200:   aj   = a->j;
1201:   mbs = a->mbs;

1203:   VecSet(&zero,v);
1204:   VecGetArray(v,&x);
1205:   VecGetLocalSize(v,&n);
1206:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1207:   for (i=0; i<mbs; i++) {
1208:     ncols = ai[1] - ai[0]; ai++;
1209:     brow  = bs*i;
1210:     for (j=0; j<ncols; j++){
1211:       /* bcol = bs*(*aj); */
1212:       for (kcol=0; kcol<bs; kcol++){
1213:         for (krow=0; krow<bs; krow++){
1214:           atmp = PetscAbsScalar(*aa); aa++;
1215:           row = brow + krow;    /* row index */
1216:           /* printf("val[%d,%d]: %gn",row,bcol+kcol,atmp); */
1217:           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1218:         }
1219:       }
1220:       aj++;
1221:     }
1222:   }
1223:   VecRestoreArray(v,&x);
1224:   return(0);
1225: }

1227: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1228: {
1229:   int        ierr;

1232:    MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1233:   return(0);
1234: }

1236: int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1237: {
1238:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1240:   *array = a->a;
1241:   return(0);
1242: }

1244: int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1245: {
1247:   return(0);
1248: }

1250: /* -------------------------------------------------------------------*/
1251: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1252:        MatGetRow_SeqBAIJ,
1253:        MatRestoreRow_SeqBAIJ,
1254:        MatMult_SeqBAIJ_N,
1255:        MatMultAdd_SeqBAIJ_N,
1256:        MatMultTranspose_SeqBAIJ,
1257:        MatMultTransposeAdd_SeqBAIJ,
1258:        MatSolve_SeqBAIJ_N,
1259:        0,
1260:        0,
1261:        0,
1262:        MatLUFactor_SeqBAIJ,
1263:        0,
1264:        0,
1265:        MatTranspose_SeqBAIJ,
1266:        MatGetInfo_SeqBAIJ,
1267:        MatEqual_SeqBAIJ,
1268:        MatGetDiagonal_SeqBAIJ,
1269:        MatDiagonalScale_SeqBAIJ,
1270:        MatNorm_SeqBAIJ,
1271:        0,
1272:        MatAssemblyEnd_SeqBAIJ,
1273:        0,
1274:        MatSetOption_SeqBAIJ,
1275:        MatZeroEntries_SeqBAIJ,
1276:        MatZeroRows_SeqBAIJ,
1277:        MatLUFactorSymbolic_SeqBAIJ,
1278:        MatLUFactorNumeric_SeqBAIJ_N,
1279:        0,
1280:        0,
1281:        MatSetUpPreallocation_SeqBAIJ,
1282:        MatILUFactorSymbolic_SeqBAIJ,
1283:        0,
1284:        MatGetArray_SeqBAIJ,
1285:        MatRestoreArray_SeqBAIJ,
1286:        MatDuplicate_SeqBAIJ,
1287:        0,
1288:        0,
1289:        MatILUFactor_SeqBAIJ,
1290:        0,
1291:        0,
1292:        MatGetSubMatrices_SeqBAIJ,
1293:        MatIncreaseOverlap_SeqBAIJ,
1294:        MatGetValues_SeqBAIJ,
1295:        0,
1296:        MatPrintHelp_SeqBAIJ,
1297:        MatScale_SeqBAIJ,
1298:        0,
1299:        0,
1300:        0,
1301:        MatGetBlockSize_SeqBAIJ,
1302:        MatGetRowIJ_SeqBAIJ,
1303:        MatRestoreRowIJ_SeqBAIJ,
1304:        0,
1305:        0,
1306:        0,
1307:        0,
1308:        0,
1309:        0,
1310:        MatSetValuesBlocked_SeqBAIJ,
1311:        MatGetSubMatrix_SeqBAIJ,
1312:        MatDestroy_SeqBAIJ,
1313:        MatView_SeqBAIJ,
1314:        MatGetPetscMaps_Petsc,
1315:        0,
1316:        0,
1317:        0,
1318:        0,
1319:        0,
1320:        0,
1321:        MatGetRowMax_SeqBAIJ,
1322:        MatConvert_Basic};

1324: EXTERN_C_BEGIN
1325: int MatStoreValues_SeqBAIJ(Mat mat)
1326: {
1327:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1328:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1329:   int         ierr;

1332:   if (aij->nonew != 1) {
1333:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1334:   }

1336:   /* allocate space for values if not already there */
1337:   if (!aij->saved_values) {
1338:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1339:   }

1341:   /* copy values over */
1342:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1343:   return(0);
1344: }
1345: EXTERN_C_END

1347: EXTERN_C_BEGIN
1348: int MatRetrieveValues_SeqBAIJ(Mat mat)
1349: {
1350:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1351:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;

1354:   if (aij->nonew != 1) {
1355:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1356:   }
1357:   if (!aij->saved_values) {
1358:     SETERRQ(1,"Must call MatStoreValues(A);first");
1359:   }

1361:   /* copy values over */
1362:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1363:   return(0);
1364: }
1365: EXTERN_C_END

1367: EXTERN_C_BEGIN
1368: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1369: EXTERN_C_END

1371: EXTERN_C_BEGIN
1372: int MatCreate_SeqBAIJ(Mat B)
1373: {
1374:   int         ierr,size;
1375:   Mat_SeqBAIJ *b;

1378:   MPI_Comm_size(B->comm,&size);
1379:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

1381:   B->m = B->M = PetscMax(B->m,B->M);
1382:   B->n = B->N = PetscMax(B->n,B->N);
1383:   ierr    = PetscNew(Mat_SeqBAIJ,&b);
1384:   B->data = (void*)b;
1385:   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1386:   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1387:   B->factor           = 0;
1388:   B->lupivotthreshold = 1.0;
1389:   B->mapping          = 0;
1390:   b->row              = 0;
1391:   b->col              = 0;
1392:   b->icol             = 0;
1393:   b->reallocs         = 0;
1394:   b->saved_values     = 0;
1395: #if defined(PETSC_USE_MAT_SINGLE)
1396:   b->setvalueslen     = 0;
1397:   b->setvaluescopy    = PETSC_NULL;
1398: #endif
1399:   b->single_precision_solves = PETSC_FALSE;

1401:   PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
1402:   PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);

1404:   b->sorted           = PETSC_FALSE;
1405:   b->roworiented      = PETSC_TRUE;
1406:   b->nonew            = 0;
1407:   b->diag             = 0;
1408:   b->solve_work       = 0;
1409:   b->mult_work        = 0;
1410:   b->spptr            = 0;
1411:   B->info.nz_unneeded = (PetscReal)b->maxnz;
1412:   b->keepzeroedrows   = PETSC_FALSE;

1414:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1415:                                      "MatStoreValues_SeqBAIJ",
1416:                                       MatStoreValues_SeqBAIJ);
1417:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1418:                                      "MatRetrieveValues_SeqBAIJ",
1419:                                       MatRetrieveValues_SeqBAIJ);
1420:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1421:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1422:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
1423:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1424:                                      "MatConvert_SeqBAIJ_SeqAIJ",
1425:                                       MatConvert_SeqBAIJ_SeqAIJ);
1426:   return(0);
1427: }
1428: EXTERN_C_END

1430: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1431: {
1432:   Mat         C;
1433:   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1434:   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;

1437:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

1439:   *B = 0;
1440:   MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1441:   MatSetType(C,MATSEQBAIJ);
1442:   c    = (Mat_SeqBAIJ*)C->data;

1444:   c->bs         = a->bs;
1445:   c->bs2        = a->bs2;
1446:   c->mbs        = a->mbs;
1447:   c->nbs        = a->nbs;
1448:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));

1450:   PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1451:   PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1452:   for (i=0; i<mbs; i++) {
1453:     c->imax[i] = a->imax[i];
1454:     c->ilen[i] = a->ilen[i];
1455:   }

1457:   /* allocate the matrix space */
1458:   c->singlemalloc = PETSC_TRUE;
1459:   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1460:   PetscMalloc(len,&c->a);
1461:   c->j = (int*)(c->a + nz*bs2);
1462:   c->i = c->j + nz;
1463:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1464:   if (mbs > 0) {
1465:     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1466:     if (cpvalues == MAT_COPY_VALUES) {
1467:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1468:     } else {
1469:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1470:     }
1471:   }

1473:   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1474:   c->sorted      = a->sorted;
1475:   c->roworiented = a->roworiented;
1476:   c->nonew       = a->nonew;

1478:   if (a->diag) {
1479:     PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1480:     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1481:     for (i=0; i<mbs; i++) {
1482:       c->diag[i] = a->diag[i];
1483:     }
1484:   } else c->diag        = 0;
1485:   c->nz                 = a->nz;
1486:   c->maxnz              = a->maxnz;
1487:   c->solve_work         = 0;
1488:   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1489:   c->mult_work          = 0;
1490:   C->preallocated       = PETSC_TRUE;
1491:   C->assembled          = PETSC_TRUE;
1492:   *B = C;
1493:   PetscFListDuplicate(A->qlist,&C->qlist);
1494:   return(0);
1495: }

1497: EXTERN_C_BEGIN
1498: int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1499: {
1500:   Mat_SeqBAIJ  *a;
1501:   Mat          B;
1502:   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1503:   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1504:   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1505:   int          *masked,nmask,tmp,bs2,ishift;
1506:   PetscScalar  *aa;
1507:   MPI_Comm     comm = ((PetscObject)viewer)->comm;

1510:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1511:   bs2  = bs*bs;

1513:   MPI_Comm_size(comm,&size);
1514:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1515:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1516:   PetscBinaryRead(fd,header,4,PETSC_INT);
1517:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1518:   M = header[1]; N = header[2]; nz = header[3];

1520:   if (header[3] < 0) {
1521:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1522:   }

1524:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

1526:   /* 
1527:      This code adds extra rows to make sure the number of rows is 
1528:     divisible by the blocksize
1529:   */
1530:   mbs        = M/bs;
1531:   extra_rows = bs - M + bs*(mbs);
1532:   if (extra_rows == bs) extra_rows = 0;
1533:   else                  mbs++;
1534:   if (extra_rows) {
1535:     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksizen");
1536:   }

1538:   /* read in row lengths */
1539:   PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1540:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1541:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

1543:   /* read in column indices */
1544:   PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1545:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
1546:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

1548:   /* loop over row lengths determining block row lengths */
1549:   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);
1550:   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));
1551:   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);
1552:   ierr     = PetscMemzero(mask,mbs*sizeof(int));
1553:   masked   = mask + mbs;
1554:   rowcount = 0; nzcount = 0;
1555:   for (i=0; i<mbs; i++) {
1556:     nmask = 0;
1557:     for (j=0; j<bs; j++) {
1558:       kmax = rowlengths[rowcount];
1559:       for (k=0; k<kmax; k++) {
1560:         tmp = jj[nzcount++]/bs;
1561:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1562:       }
1563:       rowcount++;
1564:     }
1565:     browlengths[i] += nmask;
1566:     /* zero out the mask elements we set */
1567:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1568:   }

1570:   /* create our matrix */
1571:   MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1572:   B = *A;
1573:   a = (Mat_SeqBAIJ*)B->data;

1575:   /* set matrix "i" values */
1576:   a->i[0] = 0;
1577:   for (i=1; i<= mbs; i++) {
1578:     a->i[i]      = a->i[i-1] + browlengths[i-1];
1579:     a->ilen[i-1] = browlengths[i-1];
1580:   }
1581:   a->nz         = 0;
1582:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

1584:   /* read in nonzero values */
1585:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
1586:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1587:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

1589:   /* set "a" and "j" values into matrix */
1590:   nzcount = 0; jcount = 0;
1591:   for (i=0; i<mbs; i++) {
1592:     nzcountb = nzcount;
1593:     nmask    = 0;
1594:     for (j=0; j<bs; j++) {
1595:       kmax = rowlengths[i*bs+j];
1596:       for (k=0; k<kmax; k++) {
1597:         tmp = jj[nzcount++]/bs;
1598:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1599:       }
1600:     }
1601:     /* sort the masked values */
1602:     PetscSortInt(nmask,masked);

1604:     /* set "j" values into matrix */
1605:     maskcount = 1;
1606:     for (j=0; j<nmask; j++) {
1607:       a->j[jcount++]  = masked[j];
1608:       mask[masked[j]] = maskcount++;
1609:     }
1610:     /* set "a" values into matrix */
1611:     ishift = bs2*a->i[i];
1612:     for (j=0; j<bs; j++) {
1613:       kmax = rowlengths[i*bs+j];
1614:       for (k=0; k<kmax; k++) {
1615:         tmp       = jj[nzcountb]/bs ;
1616:         block     = mask[tmp] - 1;
1617:         point     = jj[nzcountb] - bs*tmp;
1618:         idx       = ishift + bs2*block + j + bs*point;
1619:         a->a[idx] = (MatScalar)aa[nzcountb++];
1620:       }
1621:     }
1622:     /* zero out the mask elements we set */
1623:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1624:   }
1625:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

1627:   PetscFree(rowlengths);
1628:   PetscFree(browlengths);
1629:   PetscFree(aa);
1630:   PetscFree(jj);
1631:   PetscFree(mask);

1633:   B->assembled = PETSC_TRUE;
1634: 
1635:   MatView_Private(B);
1636:   return(0);
1637: }
1638: EXTERN_C_END

1640: /*@C
1641:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1642:    compressed row) format.  For good matrix assembly performance the
1643:    user should preallocate the matrix storage by setting the parameter nz
1644:    (or the array nnz).  By setting these parameters accurately, performance
1645:    during matrix assembly can be increased by more than a factor of 50.

1647:    Collective on MPI_Comm

1649:    Input Parameters:
1650: +  comm - MPI communicator, set to PETSC_COMM_SELF
1651: .  bs - size of block
1652: .  m - number of rows
1653: .  n - number of columns
1654: .  nz - number of nonzero blocks  per block row (same for all rows)
1655: -  nnz - array containing the number of nonzero blocks in the various block rows 
1656:          (possibly different for each block row) or PETSC_NULL

1658:    Output Parameter:
1659: .  A - the matrix 

1661:    Options Database Keys:
1662: .   -mat_no_unroll - uses code that does not unroll the loops in the 
1663:                      block calculations (much slower)
1664: .    -mat_block_size - size of the blocks to use

1666:    Level: intermediate

1668:    Notes:
1669:    A nonzero block is any block that as 1 or more nonzeros in it

1671:    The block AIJ format is fully compatible with standard Fortran 77
1672:    storage.  That is, the stored row and column indices can begin at
1673:    either one (as in Fortran) or zero.  See the users' manual for details.

1675:    Specify the preallocated storage with either nz or nnz (not both).
1676:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
1677:    allocation.  For additional details, see the users manual chapter on
1678:    matrices.

1680: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1681: @*/
1682: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1683: {
1685: 
1687:   MatCreate(comm,m,n,m,n,A);
1688:   MatSetType(*A,MATSEQBAIJ);
1689:   MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
1690:   return(0);
1691: }

1693: /*@C
1694:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1695:    per row in the matrix. For good matrix assembly performance the
1696:    user should preallocate the matrix storage by setting the parameter nz
1697:    (or the array nnz).  By setting these parameters accurately, performance
1698:    during matrix assembly can be increased by more than a factor of 50.

1700:    Collective on MPI_Comm

1702:    Input Parameters:
1703: +  A - the matrix
1704: .  bs - size of block
1705: .  nz - number of block nonzeros per block row (same for all rows)
1706: -  nnz - array containing the number of block nonzeros in the various block rows 
1707:          (possibly different for each block row) or PETSC_NULL

1709:    Options Database Keys:
1710: .   -mat_no_unroll - uses code that does not unroll the loops in the 
1711:                      block calculations (much slower)
1712: .    -mat_block_size - size of the blocks to use

1714:    Level: intermediate

1716:    Notes:
1717:    The block AIJ format is fully compatible with standard Fortran 77
1718:    storage.  That is, the stored row and column indices can begin at
1719:    either one (as in Fortran) or zero.  See the users' manual for details.

1721:    Specify the preallocated storage with either nz or nnz (not both).
1722:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
1723:    allocation.  For additional details, see the users manual chapter on
1724:    matrices.

1726: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1727: @*/
1728: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1729: {
1730:   Mat_SeqBAIJ *b;
1731:   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1732:   PetscTruth  flg;

1735:   PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);
1736:   if (!flg) return(0);

1738:   B->preallocated = PETSC_TRUE;
1739:   PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
1740:   if (nnz && newbs != bs) {
1741:     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1742:   }
1743:   bs   = newbs;

1745:   mbs  = B->m/bs;
1746:   nbs  = B->n/bs;
1747:   bs2  = bs*bs;

1749:   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1750:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1751:   }

1753:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1754:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1755:   if (nnz) {
1756:     for (i=0; i<mbs; i++) {
1757:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1758:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1759:     }
1760:   }

1762:   b       = (Mat_SeqBAIJ*)B->data;
1763:   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1764:   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1765:   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1766:   if (!flg) {
1767:     switch (bs) {
1768:     case 1:
1769:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1770:       B->ops->mult            = MatMult_SeqBAIJ_1;
1771:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1772:       break;
1773:     case 2:
1774:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1775:       B->ops->mult            = MatMult_SeqBAIJ_2;
1776:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1777:       break;
1778:     case 3:
1779:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1780:       B->ops->mult            = MatMult_SeqBAIJ_3;
1781:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1782:       break;
1783:     case 4:
1784:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1785:       B->ops->mult            = MatMult_SeqBAIJ_4;
1786:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1787:       break;
1788:     case 5:
1789:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1790:       B->ops->mult            = MatMult_SeqBAIJ_5;
1791:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1792:       break;
1793:     case 6:
1794:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1795:       B->ops->mult            = MatMult_SeqBAIJ_6;
1796:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1797:       break;
1798:     case 7:
1799:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1800:       B->ops->mult            = MatMult_SeqBAIJ_7;
1801:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1802:       break;
1803:     default:
1804:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1805:       B->ops->mult            = MatMult_SeqBAIJ_N;
1806:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1807:       break;
1808:     }
1809:   }
1810:   b->bs      = bs;
1811:   b->mbs     = mbs;
1812:   b->nbs     = nbs;
1813:   PetscMalloc((mbs+1)*sizeof(int),&b->imax);
1814:   if (!nnz) {
1815:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1816:     else if (nz <= 0)        nz = 1;
1817:     for (i=0; i<mbs; i++) b->imax[i] = nz;
1818:     nz = nz*mbs;
1819:   } else {
1820:     nz = 0;
1821:     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1822:   }

1824:   /* allocate the matrix space */
1825:   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1826:   ierr  = PetscMalloc(len,&b->a);
1827:   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1828:   b->j  = (int*)(b->a + nz*bs2);
1829:   PetscMemzero(b->j,nz*sizeof(int));
1830:   b->i  = b->j + nz;
1831:   b->singlemalloc = PETSC_TRUE;

1833:   b->i[0] = 0;
1834:   for (i=1; i<mbs+1; i++) {
1835:     b->i[i] = b->i[i-1] + b->imax[i-1];
1836:   }

1838:   /* b->ilen will count nonzeros in each block row so far. */
1839:   PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1840:   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1841:   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}

1843:   b->bs               = bs;
1844:   b->bs2              = bs2;
1845:   b->mbs              = mbs;
1846:   b->nz               = 0;
1847:   b->maxnz            = nz*bs2;
1848:   B->info.nz_unneeded = (PetscReal)b->maxnz;
1849:   return(0);
1850: }