Actual source code: baij.c

  1: /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include src/mat/impls/baij/seq/baij.h
 8:  #include src/vec/vecimpl.h
 9:  #include src/inline/spops.h
 10:  #include petscsys.h

 12: /*  UGLY, ugly, ugly
 13:    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 
 14:    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 
 15:    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
 16:    converts the entries into single precision and then calls ..._MatScalar() to put them
 17:    into the single precision data structures.
 18: */
 19: #if defined(PETSC_USE_MAT_SINGLE)
 20: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
 21: #else
 22: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
 23: #endif
 24: #if defined(PETSC_HAVE_DSCPACK)
 25: EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
 26: #endif

 28: #define CHUNKSIZE  10

 30: /*
 31:      Checks for missing diagonals
 32: */
 33: int MatMissingDiagonal_SeqBAIJ(Mat A)
 34: {
 35:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 36:   int         *diag,*jj = a->j,i,ierr;

 39:   MatMarkDiagonal_SeqBAIJ(A);
 40:   diag = a->diag;
 41:   for (i=0; i<a->mbs; i++) {
 42:     if (jj[diag[i]] != i) {
 43:       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
 44:     }
 45:   }
 46:   return(0);
 47: }

 49: int MatMarkDiagonal_SeqBAIJ(Mat A)
 50: {
 51:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 52:   int         i,j,*diag,m = a->mbs,ierr;

 55:   if (a->diag) return(0);

 57:   PetscMalloc((m+1)*sizeof(int),&diag);
 58:   PetscLogObjectMemory(A,(m+1)*sizeof(int));
 59:   for (i=0; i<m; i++) {
 60:     diag[i] = a->i[i+1];
 61:     for (j=a->i[i]; j<a->i[i+1]; j++) {
 62:       if (a->j[j] == i) {
 63:         diag[i] = j;
 64:         break;
 65:       }
 66:     }
 67:   }
 68:   a->diag = diag;
 69:   return(0);
 70: }


 73: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);

 75: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
 76: {
 77:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 78:   int         ierr,n = a->mbs,i;

 81:   *nn = n;
 82:   if (!ia) return(0);
 83:   if (symmetric) {
 84:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
 85:   } else if (oshift == 1) {
 86:     /* temporarily add 1 to i and j indices */
 87:     int nz = a->i[n];
 88:     for (i=0; i<nz; i++) a->j[i]++;
 89:     for (i=0; i<n+1; i++) a->i[i]++;
 90:     *ia = a->i; *ja = a->j;
 91:   } else {
 92:     *ia = a->i; *ja = a->j;
 93:   }

 95:   return(0);
 96: }

 98: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
 99: {
100:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
101:   int         i,n = a->mbs,ierr;

104:   if (!ia) return(0);
105:   if (symmetric) {
106:     PetscFree(*ia);
107:     PetscFree(*ja);
108:   } else if (oshift == 1) {
109:     int nz = a->i[n]-1;
110:     for (i=0; i<nz; i++) a->j[i]--;
111:     for (i=0; i<n+1; i++) a->i[i]--;
112:   }
113:   return(0);
114: }

116: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
117: {
118:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;

121:   *bs = baij->bs;
122:   return(0);
123: }


126: int MatDestroy_SeqBAIJ(Mat A)
127: {
128:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
129:   int         ierr;

132: #if defined(PETSC_USE_LOG)
133:   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
134: #endif
135:   PetscFree(a->a);
136:   if (!a->singlemalloc) {
137:     PetscFree(a->i);
138:     PetscFree(a->j);
139:   }
140:   if (a->row) {
141:     ISDestroy(a->row);
142:   }
143:   if (a->col) {
144:     ISDestroy(a->col);
145:   }
146:   if (a->diag) {PetscFree(a->diag);}
147:   if (a->ilen) {PetscFree(a->ilen);}
148:   if (a->imax) {PetscFree(a->imax);}
149:   if (a->solve_work) {PetscFree(a->solve_work);}
150:   if (a->mult_work) {PetscFree(a->mult_work);}
151:   if (a->icol) {ISDestroy(a->icol);}
152:   if (a->saved_values) {PetscFree(a->saved_values);}
153: #if defined(PETSC_USE_MAT_SINGLE)
154:   if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
155: #endif
156:   PetscFree(a);
157:   return(0);
158: }

160: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
161: {
162:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

165:   switch (op) {
166:   case MAT_ROW_ORIENTED:
167:     a->roworiented    = PETSC_TRUE;
168:     break;
169:   case MAT_COLUMN_ORIENTED:
170:     a->roworiented    = PETSC_FALSE;
171:     break;
172:   case MAT_COLUMNS_SORTED:
173:     a->sorted         = PETSC_TRUE;
174:     break;
175:   case MAT_COLUMNS_UNSORTED:
176:     a->sorted         = PETSC_FALSE;
177:     break;
178:   case MAT_KEEP_ZEROED_ROWS:
179:     a->keepzeroedrows = PETSC_TRUE;
180:     break;
181:   case MAT_NO_NEW_NONZERO_LOCATIONS:
182:     a->nonew          = 1;
183:     break;
184:   case MAT_NEW_NONZERO_LOCATION_ERR:
185:     a->nonew          = -1;
186:     break;
187:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
188:     a->nonew          = -2;
189:     break;
190:   case MAT_YES_NEW_NONZERO_LOCATIONS:
191:     a->nonew          = 0;
192:     break;
193:   case MAT_ROWS_SORTED:
194:   case MAT_ROWS_UNSORTED:
195:   case MAT_YES_NEW_DIAGONALS:
196:   case MAT_IGNORE_OFF_PROC_ENTRIES:
197:   case MAT_USE_HASH_TABLE:
198:     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignoredn");
199:     break;
200:   case MAT_NO_NEW_DIAGONALS:
201:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
202:   case MAT_USE_SINGLE_PRECISION_SOLVES:
203:     if (a->bs==4) {
204:       PetscTruth sse_enabled_local,sse_enabled_global;
205:       int        ierr;

207:       sse_enabled_local  = PETSC_FALSE;
208:       sse_enabled_global = PETSC_FALSE;

210:       PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);
211: #if defined(PETSC_HAVE_SSE)
212:       if (sse_enabled_local) {
213:           a->single_precision_solves = PETSC_TRUE;
214:           A->ops->solve              = MatSolve_SeqBAIJ_Update;
215:           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
216:           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES setn");
217:           break;
218:       } else {
219:         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignoredn");
220:       }
221: #else
222:       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignoredn");
223: #endif
224:     }
225:     break;
226:   default:
227:     SETERRQ(PETSC_ERR_SUP,"unknown option");
228:   }
229:   return(0);
230: }

232: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
233: {
234:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
235:   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
236:   MatScalar    *aa,*aa_i;
237:   PetscScalar  *v_i;

240:   bs  = a->bs;
241:   ai  = a->i;
242:   aj  = a->j;
243:   aa  = a->a;
244:   bs2 = a->bs2;
245: 
246:   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
247: 
248:   bn  = row/bs;   /* Block number */
249:   bp  = row % bs; /* Block Position */
250:   M   = ai[bn+1] - ai[bn];
251:   *nz = bs*M;
252: 
253:   if (v) {
254:     *v = 0;
255:     if (*nz) {
256:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
257:       for (i=0; i<M; i++) { /* for each block in the block row */
258:         v_i  = *v + i*bs;
259:         aa_i = aa + bs2*(ai[bn] + i);
260:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
261:       }
262:     }
263:   }

265:   if (idx) {
266:     *idx = 0;
267:     if (*nz) {
268:       PetscMalloc((*nz)*sizeof(int),idx);
269:       for (i=0; i<M; i++) { /* for each block in the block row */
270:         idx_i = *idx + i*bs;
271:         itmp  = bs*aj[ai[bn] + i];
272:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
273:       }
274:     }
275:   }
276:   return(0);
277: }

279: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
280: {

284:   if (idx) {if (*idx) {PetscFree(*idx);}}
285:   if (v)   {if (*v)   {PetscFree(*v);}}
286:   return(0);
287: }

289: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
290: {
291:   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
292:   Mat         C;
293:   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
294:   int         *rows,*cols,bs2=a->bs2;
295:   PetscScalar *array;

298:   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
299:   PetscMalloc((1+nbs)*sizeof(int),&col);
300:   PetscMemzero(col,(1+nbs)*sizeof(int));

302: #if defined(PETSC_USE_MAT_SINGLE)
303:   PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
304:   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
305: #else
306:   array = a->a;
307: #endif

309:   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
310:   MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);
311:   PetscFree(col);
312:   PetscMalloc(2*bs*sizeof(int),&rows);
313:   cols = rows + bs;
314:   for (i=0; i<mbs; i++) {
315:     cols[0] = i*bs;
316:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
317:     len = ai[i+1] - ai[i];
318:     for (j=0; j<len; j++) {
319:       rows[0] = (*aj++)*bs;
320:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
321:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
322:       array += bs2;
323:     }
324:   }
325:   PetscFree(rows);
326: #if defined(PETSC_USE_MAT_SINGLE)
327:   PetscFree(array);
328: #endif
329: 
330:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
331:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
332: 
333:   if (B) {
334:     *B = C;
335:   } else {
336:     MatHeaderCopy(A,C);
337:   }
338:   return(0);
339: }

341: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
342: {
343:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
344:   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
345:   PetscScalar *aa;
346:   FILE        *file;

349:   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);
350:   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);
351:   col_lens[0] = MAT_FILE_COOKIE;

353:   col_lens[1] = A->m;
354:   col_lens[2] = A->n;
355:   col_lens[3] = a->nz*bs2;

357:   /* store lengths of each row and write (including header) to file */
358:   count = 0;
359:   for (i=0; i<a->mbs; i++) {
360:     for (j=0; j<bs; j++) {
361:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
362:     }
363:   }
364:   PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
365:   PetscFree(col_lens);

367:   /* store column indices (zero start index) */
368:   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
369:   count = 0;
370:   for (i=0; i<a->mbs; i++) {
371:     for (j=0; j<bs; j++) {
372:       for (k=a->i[i]; k<a->i[i+1]; k++) {
373:         for (l=0; l<bs; l++) {
374:           jj[count++] = bs*a->j[k] + l;
375:         }
376:       }
377:     }
378:   }
379:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
380:   PetscFree(jj);

382:   /* store nonzero values */
383:   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
384:   count = 0;
385:   for (i=0; i<a->mbs; i++) {
386:     for (j=0; j<bs; j++) {
387:       for (k=a->i[i]; k<a->i[i+1]; k++) {
388:         for (l=0; l<bs; l++) {
389:           aa[count++] = a->a[bs2*k + l*bs + j];
390:         }
391:       }
392:     }
393:   }
394:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
395:   PetscFree(aa);

397:   PetscViewerBinaryGetInfoPointer(viewer,&file);
398:   if (file) {
399:     fprintf(file,"-matload_block_size %dn",a->bs);
400:   }
401:   return(0);
402: }

404: extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);

406: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
407: {
408:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
409:   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
410:   PetscViewerFormat format;

413:   PetscViewerGetFormat(viewer,&format);
414:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
415:     PetscViewerASCIIPrintf(viewer,"  block size is %dn",bs);
416:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
417:     Mat aij;
418:     MatConvert(A,MATSEQAIJ,&aij);
419:     MatView(aij,viewer);
420:     MatDestroy(aij);
421:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
422: #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
423:      MatMPIBAIJFactorInfo_DSCPACK(A,viewer);
424: #endif
425:      return(0);
426:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
427:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
428:     for (i=0; i<a->mbs; i++) {
429:       for (j=0; j<bs; j++) {
430:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
431:         for (k=a->i[i]; k<a->i[i+1]; k++) {
432:           for (l=0; l<bs; l++) {
433: #if defined(PETSC_USE_COMPLEX)
434:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
435:               PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
436:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
437:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
438:               PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
439:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
440:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
441:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
442:             }
443: #else
444:             if (a->a[bs2*k + l*bs + j] != 0.0) {
445:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
446:             }
447: #endif
448:           }
449:         }
450:         PetscViewerASCIIPrintf(viewer,"n");
451:       }
452:     }
453:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
454:   } else {
455:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
456:     for (i=0; i<a->mbs; i++) {
457:       for (j=0; j<bs; j++) {
458:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
459:         for (k=a->i[i]; k<a->i[i+1]; k++) {
460:           for (l=0; l<bs; l++) {
461: #if defined(PETSC_USE_COMPLEX)
462:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
463:               PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
464:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
465:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
466:               PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
467:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
468:             } else {
469:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
470:             }
471: #else
472:             PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
473: #endif
474:           }
475:         }
476:         PetscViewerASCIIPrintf(viewer,"n");
477:       }
478:     }
479:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
480:   }
481:   PetscViewerFlush(viewer);
482:   return(0);
483: }

485: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
486: {
487:   Mat          A = (Mat) Aa;
488:   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
489:   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
490:   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
491:   MatScalar    *aa;
492:   PetscViewer  viewer;


496:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
497:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

499:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

501:   /* loop over matrix elements drawing boxes */
502:   color = PETSC_DRAW_BLUE;
503:   for (i=0,row=0; i<mbs; i++,row+=bs) {
504:     for (j=a->i[i]; j<a->i[i+1]; j++) {
505:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
506:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
507:       aa = a->a + j*bs2;
508:       for (k=0; k<bs; k++) {
509:         for (l=0; l<bs; l++) {
510:           if (PetscRealPart(*aa++) >=  0.) continue;
511:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
512:         }
513:       }
514:     }
515:   }
516:   color = PETSC_DRAW_CYAN;
517:   for (i=0,row=0; i<mbs; i++,row+=bs) {
518:     for (j=a->i[i]; j<a->i[i+1]; j++) {
519:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
520:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
521:       aa = a->a + j*bs2;
522:       for (k=0; k<bs; k++) {
523:         for (l=0; l<bs; l++) {
524:           if (PetscRealPart(*aa++) != 0.) continue;
525:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
526:         }
527:       }
528:     }
529:   }

531:   color = PETSC_DRAW_RED;
532:   for (i=0,row=0; i<mbs; i++,row+=bs) {
533:     for (j=a->i[i]; j<a->i[i+1]; j++) {
534:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
535:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
536:       aa = a->a + j*bs2;
537:       for (k=0; k<bs; k++) {
538:         for (l=0; l<bs; l++) {
539:           if (PetscRealPart(*aa++) <= 0.) continue;
540:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
541:         }
542:       }
543:     }
544:   }
545:   return(0);
546: }

548: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
549: {
550:   int          ierr;
551:   PetscReal    xl,yl,xr,yr,w,h;
552:   PetscDraw    draw;
553:   PetscTruth   isnull;


557:   PetscViewerDrawGetDraw(viewer,0,&draw);
558:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

560:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
561:   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
562:   xr += w;    yr += h;  xl = -w;     yl = -h;
563:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
564:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
565:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
566:   return(0);
567: }

569: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
570: {
571:   int        ierr;
572:   PetscTruth issocket,isascii,isbinary,isdraw;

575:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
576:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
577:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
578:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
579:   if (issocket) {
580:     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
581:   } else if (isascii){
582:     MatView_SeqBAIJ_ASCII(A,viewer);
583:   } else if (isbinary) {
584:     MatView_SeqBAIJ_Binary(A,viewer);
585:   } else if (isdraw) {
586:     MatView_SeqBAIJ_Draw(A,viewer);
587:   } else {
588:     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
589:   }
590:   return(0);
591: }


594: int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
595: {
596:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
597:   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
598:   int        *ai = a->i,*ailen = a->ilen;
599:   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
600:   MatScalar  *ap,*aa = a->a,zero = 0.0;

603:   for (k=0; k<m; k++) { /* loop over rows */
604:     row  = im[k]; brow = row/bs;
605:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
606:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
607:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
608:     nrow = ailen[brow];
609:     for (l=0; l<n; l++) { /* loop over columns */
610:       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
611:       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
612:       col  = in[l] ;
613:       bcol = col/bs;
614:       cidx = col%bs;
615:       ridx = row%bs;
616:       high = nrow;
617:       low  = 0; /* assume unsorted */
618:       while (high-low > 5) {
619:         t = (low+high)/2;
620:         if (rp[t] > bcol) high = t;
621:         else             low  = t;
622:       }
623:       for (i=low; i<high; i++) {
624:         if (rp[i] > bcol) break;
625:         if (rp[i] == bcol) {
626:           *v++ = ap[bs2*i+bs*cidx+ridx];
627:           goto finished;
628:         }
629:       }
630:       *v++ = zero;
631:       finished:;
632:     }
633:   }
634:   return(0);
635: }

637: #if defined(PETSC_USE_MAT_SINGLE)
638: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
639: {
640:   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
641:   int         ierr,i,N = m*n*b->bs2;
642:   MatScalar   *vsingle;

645:   if (N > b->setvalueslen) {
646:     if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
647:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
648:     b->setvalueslen  = N;
649:   }
650:   vsingle = b->setvaluescopy;
651:   for (i=0; i<N; i++) {
652:     vsingle[i] = v[i];
653:   }
654:   MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
655:   return(0);
656: }
657: #endif


660: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
661: {
662:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
663:   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
664:   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
665:   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
666:   PetscTruth  roworiented=a->roworiented;
667:   MatScalar   *value = v,*ap,*aa = a->a,*bap;

670:   if (roworiented) {
671:     stepval = (n-1)*bs;
672:   } else {
673:     stepval = (m-1)*bs;
674:   }
675:   for (k=0; k<m; k++) { /* loop over added rows */
676:     row  = im[k];
677:     if (row < 0) continue;
678: #if defined(PETSC_USE_BOPT_g)  
679:     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
680: #endif
681:     rp   = aj + ai[row];
682:     ap   = aa + bs2*ai[row];
683:     rmax = imax[row];
684:     nrow = ailen[row];
685:     low  = 0;
686:     for (l=0; l<n; l++) { /* loop over added columns */
687:       if (in[l] < 0) continue;
688: #if defined(PETSC_USE_BOPT_g)  
689:       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
690: #endif
691:       col = in[l];
692:       if (roworiented) {
693:         value = v + k*(stepval+bs)*bs + l*bs;
694:       } else {
695:         value = v + l*(stepval+bs)*bs + k*bs;
696:       }
697:       if (!sorted) low = 0; high = nrow;
698:       while (high-low > 7) {
699:         t = (low+high)/2;
700:         if (rp[t] > col) high = t;
701:         else             low  = t;
702:       }
703:       for (i=low; i<high; i++) {
704:         if (rp[i] > col) break;
705:         if (rp[i] == col) {
706:           bap  = ap +  bs2*i;
707:           if (roworiented) {
708:             if (is == ADD_VALUES) {
709:               for (ii=0; ii<bs; ii++,value+=stepval) {
710:                 for (jj=ii; jj<bs2; jj+=bs) {
711:                   bap[jj] += *value++;
712:                 }
713:               }
714:             } else {
715:               for (ii=0; ii<bs; ii++,value+=stepval) {
716:                 for (jj=ii; jj<bs2; jj+=bs) {
717:                   bap[jj] = *value++;
718:                 }
719:               }
720:             }
721:           } else {
722:             if (is == ADD_VALUES) {
723:               for (ii=0; ii<bs; ii++,value+=stepval) {
724:                 for (jj=0; jj<bs; jj++) {
725:                   *bap++ += *value++;
726:                 }
727:               }
728:             } else {
729:               for (ii=0; ii<bs; ii++,value+=stepval) {
730:                 for (jj=0; jj<bs; jj++) {
731:                   *bap++  = *value++;
732:                 }
733:               }
734:             }
735:           }
736:           goto noinsert2;
737:         }
738:       }
739:       if (nonew == 1) goto noinsert2;
740:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
741:       if (nrow >= rmax) {
742:         /* there is no extra room in row, therefore enlarge */
743:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
744:         MatScalar *new_a;

746:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");

748:         /* malloc new storage space */
749:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
750:         ierr    = PetscMalloc(len,&new_a);
751:         new_j   = (int*)(new_a + bs2*new_nz);
752:         new_i   = new_j + new_nz;

754:         /* copy over old data into new slots */
755:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
756:         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
757:         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
758:         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
759:         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
760:         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
761:         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
762:         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
763:         /* free up old matrix storage */
764:         PetscFree(a->a);
765:         if (!a->singlemalloc) {
766:           PetscFree(a->i);
767:           PetscFree(a->j);
768:         }
769:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
770:         a->singlemalloc = PETSC_TRUE;

772:         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
773:         rmax = imax[row] = imax[row] + CHUNKSIZE;
774:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
775:         a->maxnz += bs2*CHUNKSIZE;
776:         a->reallocs++;
777:         a->nz++;
778:       }
779:       N = nrow++ - 1;
780:       /* shift up all the later entries in this row */
781:       for (ii=N; ii>=i; ii--) {
782:         rp[ii+1] = rp[ii];
783:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
784:       }
785:       if (N >= i) {
786:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
787:       }
788:       rp[i] = col;
789:       bap   = ap +  bs2*i;
790:       if (roworiented) {
791:         for (ii=0; ii<bs; ii++,value+=stepval) {
792:           for (jj=ii; jj<bs2; jj+=bs) {
793:             bap[jj] = *value++;
794:           }
795:         }
796:       } else {
797:         for (ii=0; ii<bs; ii++,value+=stepval) {
798:           for (jj=0; jj<bs; jj++) {
799:             *bap++  = *value++;
800:           }
801:         }
802:       }
803:       noinsert2:;
804:       low = i;
805:     }
806:     ailen[row] = nrow;
807:   }
808:   return(0);
809: }

811: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
812: {
813:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
814:   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
815:   int        m = A->m,*ip,N,*ailen = a->ilen;
816:   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
817:   MatScalar  *aa = a->a,*ap;
818: #if defined(PETSC_HAVE_DSCPACK)
819:   PetscTruth   flag;
820: #endif

823:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

825:   if (m) rmax = ailen[0];
826:   for (i=1; i<mbs; i++) {
827:     /* move each row back by the amount of empty slots (fshift) before it*/
828:     fshift += imax[i-1] - ailen[i-1];
829:     rmax   = PetscMax(rmax,ailen[i]);
830:     if (fshift) {
831:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
832:       N = ailen[i];
833:       for (j=0; j<N; j++) {
834:         ip[j-fshift] = ip[j];
835:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
836:       }
837:     }
838:     ai[i] = ai[i-1] + ailen[i-1];
839:   }
840:   if (mbs) {
841:     fshift += imax[mbs-1] - ailen[mbs-1];
842:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
843:   }
844:   /* reset ilen and imax for each row */
845:   for (i=0; i<mbs; i++) {
846:     ailen[i] = imax[i] = ai[i+1] - ai[i];
847:   }
848:   a->nz = ai[mbs];

850:   /* diagonals may have moved, so kill the diagonal pointers */
851:   if (fshift && a->diag) {
852:     PetscFree(a->diag);
853:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
854:     a->diag = 0;
855:   }
856:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d usedn",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
857:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %dn",a->reallocs);
858:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %dn",rmax);
859:   a->reallocs          = 0;
860:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

862: #if defined(PETSC_HAVE_DSCPACK)
863:   PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);
864:   if (flag) { MatUseDSCPACK_MPIBAIJ(A); }
865: #endif

867:   return(0);
868: }



872: /* 
873:    This function returns an array of flags which indicate the locations of contiguous
874:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
875:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
876:    Assume: sizes should be long enough to hold all the values.
877: */
878: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
879: {
880:   int        i,j,k,row;
881:   PetscTruth flg;

884:   for (i=0,j=0; i<n; j++) {
885:     row = idx[i];
886:     if (row%bs!=0) { /* Not the begining of a block */
887:       sizes[j] = 1;
888:       i++;
889:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
890:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
891:       i++;
892:     } else { /* Begining of the block, so check if the complete block exists */
893:       flg = PETSC_TRUE;
894:       for (k=1; k<bs; k++) {
895:         if (row+k != idx[i+k]) { /* break in the block */
896:           flg = PETSC_FALSE;
897:           break;
898:         }
899:       }
900:       if (flg == PETSC_TRUE) { /* No break in the bs */
901:         sizes[j] = bs;
902:         i+= bs;
903:       } else {
904:         sizes[j] = 1;
905:         i++;
906:       }
907:     }
908:   }
909:   *bs_max = j;
910:   return(0);
911: }
912: 
913: int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
914: {
915:   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
916:   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
917:   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
918:   PetscScalar zero = 0.0;
919:   MatScalar   *aa;

922:   /* Make a copy of the IS and  sort it */
923:   ISGetLocalSize(is,&is_n);
924:   ISGetIndices(is,&is_idx);

926:   /* allocate memory for rows,sizes */
927:   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);
928:   sizes = rows + is_n;

930:   /* copy IS values to rows, and sort them */
931:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
932:   PetscSortInt(is_n,rows);
933:   if (baij->keepzeroedrows) {
934:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
935:     bs_max = is_n;
936:   } else {
937:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
938:   }
939:   ISRestoreIndices(is,&is_idx);

941:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
942:     row   = rows[j];
943:     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
944:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
945:     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
946:     if (sizes[i] == bs && !baij->keepzeroedrows) {
947:       if (diag) {
948:         if (baij->ilen[row/bs] > 0) {
949:           baij->ilen[row/bs]       = 1;
950:           baij->j[baij->i[row/bs]] = row/bs;
951:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
952:         }
953:         /* Now insert all the diagonal values for this bs */
954:         for (k=0; k<bs; k++) {
955:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
956:         }
957:       } else { /* (!diag) */
958:         baij->ilen[row/bs] = 0;
959:       } /* end (!diag) */
960:     } else { /* (sizes[i] != bs) */
961: #if defined (PETSC_USE_DEBUG)
962:       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
963: #endif
964:       for (k=0; k<count; k++) {
965:         aa[0] =  zero;
966:         aa    += bs;
967:       }
968:       if (diag) {
969:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
970:       }
971:     }
972:   }

974:   PetscFree(rows);
975:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
976:   return(0);
977: }

979: int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
980: {
981:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
982:   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
983:   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
984:   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
985:   int         ridx,cidx,bs2=a->bs2,ierr;
986:   PetscTruth  roworiented=a->roworiented;
987:   MatScalar   *ap,value,*aa=a->a,*bap;

990:   for (k=0; k<m; k++) { /* loop over added rows */
991:     row  = im[k]; brow = row/bs;
992:     if (row < 0) continue;
993: #if defined(PETSC_USE_BOPT_g)  
994:     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
995: #endif
996:     rp   = aj + ai[brow];
997:     ap   = aa + bs2*ai[brow];
998:     rmax = imax[brow];
999:     nrow = ailen[brow];
1000:     low  = 0;
1001:     for (l=0; l<n; l++) { /* loop over added columns */
1002:       if (in[l] < 0) continue;
1003: #if defined(PETSC_USE_BOPT_g)  
1004:       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1005: #endif
1006:       col = in[l]; bcol = col/bs;
1007:       ridx = row % bs; cidx = col % bs;
1008:       if (roworiented) {
1009:         value = v[l + k*n];
1010:       } else {
1011:         value = v[k + l*m];
1012:       }
1013:       if (!sorted) low = 0; high = nrow;
1014:       while (high-low > 7) {
1015:         t = (low+high)/2;
1016:         if (rp[t] > bcol) high = t;
1017:         else              low  = t;
1018:       }
1019:       for (i=low; i<high; i++) {
1020:         if (rp[i] > bcol) break;
1021:         if (rp[i] == bcol) {
1022:           bap  = ap +  bs2*i + bs*cidx + ridx;
1023:           if (is == ADD_VALUES) *bap += value;
1024:           else                  *bap  = value;
1025:           goto noinsert1;
1026:         }
1027:       }
1028:       if (nonew == 1) goto noinsert1;
1029:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1030:       if (nrow >= rmax) {
1031:         /* there is no extra room in row, therefore enlarge */
1032:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1033:         MatScalar *new_a;

1035:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");

1037:         /* Malloc new storage space */
1038:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1039:         ierr    = PetscMalloc(len,&new_a);
1040:         new_j   = (int*)(new_a + bs2*new_nz);
1041:         new_i   = new_j + new_nz;

1043:         /* copy over old data into new slots */
1044:         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1045:         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1046:         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1047:         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1048:         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
1049:         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1050:         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1051:         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1052:         /* free up old matrix storage */
1053:         PetscFree(a->a);
1054:         if (!a->singlemalloc) {
1055:           PetscFree(a->i);
1056:           PetscFree(a->j);
1057:         }
1058:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1059:         a->singlemalloc = PETSC_TRUE;

1061:         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1062:         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1063:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1064:         a->maxnz += bs2*CHUNKSIZE;
1065:         a->reallocs++;
1066:         a->nz++;
1067:       }
1068:       N = nrow++ - 1;
1069:       /* shift up all the later entries in this row */
1070:       for (ii=N; ii>=i; ii--) {
1071:         rp[ii+1] = rp[ii];
1072:         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1073:       }
1074:       if (N>=i) {
1075:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1076:       }
1077:       rp[i]                      = bcol;
1078:       ap[bs2*i + bs*cidx + ridx] = value;
1079:       noinsert1:;
1080:       low = i;
1081:     }
1082:     ailen[brow] = nrow;
1083:   }
1084:   return(0);
1085: }


1088: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1089: {
1090:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1091:   Mat         outA;
1092:   int         ierr;
1093:   PetscTruth  row_identity,col_identity;

1096:   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1097:   ISIdentity(row,&row_identity);
1098:   ISIdentity(col,&col_identity);
1099:   if (!row_identity || !col_identity) {
1100:     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1101:   }

1103:   outA          = inA;
1104:   inA->factor   = FACTOR_LU;

1106:   if (!a->diag) {
1107:     MatMarkDiagonal_SeqBAIJ(inA);
1108:   }

1110:   a->row        = row;
1111:   a->col        = col;
1112:   ierr          = PetscObjectReference((PetscObject)row);
1113:   ierr          = PetscObjectReference((PetscObject)col);
1114: 
1115:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1116:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1117:   PetscLogObjectParent(inA,a->icol);
1118: 
1119:   /*
1120:       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 
1121:       for ILU(0) factorization with natural ordering
1122:   */
1123:   if (a->bs < 8) {
1124:     MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1125:   } else {
1126:     if (!a->solve_work) {
1127:       PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1128:       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1129:     }
1130:   }

1132:   MatLUFactorNumeric(inA,&outA);

1134:   return(0);
1135: }
1136: int MatPrintHelp_SeqBAIJ(Mat A)
1137: {
1138:   static PetscTruth called = PETSC_FALSE;
1139:   MPI_Comm          comm = A->comm;
1140:   int               ierr;

1143:   if (called) {return(0);} else called = PETSC_TRUE;
1144:   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):n");
1145:   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>n");
1146:   return(0);
1147: }

1149: EXTERN_C_BEGIN
1150: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1151: {
1152:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1153:   int         i,nz,nbs;

1156:   nz  = baij->maxnz/baij->bs2;
1157:   nbs = baij->nbs;
1158:   for (i=0; i<nz; i++) {
1159:     baij->j[i] = indices[i];
1160:   }
1161:   baij->nz = nz;
1162:   for (i=0; i<nbs; i++) {
1163:     baij->ilen[i] = baij->imax[i];
1164:   }

1166:   return(0);
1167: }
1168: EXTERN_C_END

1170: /*@
1171:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1172:        in the matrix.

1174:   Input Parameters:
1175: +  mat - the SeqBAIJ matrix
1176: -  indices - the column indices

1178:   Level: advanced

1180:   Notes:
1181:     This can be called if you have precomputed the nonzero structure of the 
1182:   matrix and want to provide it to the matrix object to improve the performance
1183:   of the MatSetValues() operation.

1185:     You MUST have set the correct numbers of nonzeros per row in the call to 
1186:   MatCreateSeqBAIJ().

1188:     MUST be called before any calls to MatSetValues();

1190: @*/
1191: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1192: {
1193:   int ierr,(*f)(Mat,int *);

1197:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1198:   if (f) {
1199:     (*f)(mat,indices);
1200:   } else {
1201:     SETERRQ(1,"Wrong type of matrix to set column indices");
1202:   }
1203:   return(0);
1204: }

1206: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1207: {
1208:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1209:   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1210:   PetscReal    atmp;
1211:   PetscScalar  *x,zero = 0.0;
1212:   MatScalar    *aa;
1213:   int          ncols,brow,krow,kcol;

1216:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1217:   bs   = a->bs;
1218:   aa   = a->a;
1219:   ai   = a->i;
1220:   aj   = a->j;
1221:   mbs = a->mbs;

1223:   VecSet(&zero,v);
1224:   VecGetArray(v,&x);
1225:   VecGetLocalSize(v,&n);
1226:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1227:   for (i=0; i<mbs; i++) {
1228:     ncols = ai[1] - ai[0]; ai++;
1229:     brow  = bs*i;
1230:     for (j=0; j<ncols; j++){
1231:       /* bcol = bs*(*aj); */
1232:       for (kcol=0; kcol<bs; kcol++){
1233:         for (krow=0; krow<bs; krow++){
1234:           atmp = PetscAbsScalar(*aa); aa++;
1235:           row = brow + krow;    /* row index */
1236:           /* printf("val[%d,%d]: %gn",row,bcol+kcol,atmp); */
1237:           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1238:         }
1239:       }
1240:       aj++;
1241:     }
1242:   }
1243:   VecRestoreArray(v,&x);
1244:   return(0);
1245: }

1247: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1248: {
1249:   int        ierr;

1252:    MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1253:   return(0);
1254: }

1256: int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1257: {
1258:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1260:   *array = a->a;
1261:   return(0);
1262: }

1264: int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1265: {
1267:   return(0);
1268: }

1270: /* -------------------------------------------------------------------*/
1271: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1272:        MatGetRow_SeqBAIJ,
1273:        MatRestoreRow_SeqBAIJ,
1274:        MatMult_SeqBAIJ_N,
1275:        MatMultAdd_SeqBAIJ_N,
1276:        MatMultTranspose_SeqBAIJ,
1277:        MatMultTransposeAdd_SeqBAIJ,
1278:        MatSolve_SeqBAIJ_N,
1279:        0,
1280:        0,
1281:        0,
1282:        MatLUFactor_SeqBAIJ,
1283:        0,
1284:        0,
1285:        MatTranspose_SeqBAIJ,
1286:        MatGetInfo_SeqBAIJ,
1287:        MatEqual_SeqBAIJ,
1288:        MatGetDiagonal_SeqBAIJ,
1289:        MatDiagonalScale_SeqBAIJ,
1290:        MatNorm_SeqBAIJ,
1291:        0,
1292:        MatAssemblyEnd_SeqBAIJ,
1293:        0,
1294:        MatSetOption_SeqBAIJ,
1295:        MatZeroEntries_SeqBAIJ,
1296:        MatZeroRows_SeqBAIJ,
1297:        MatLUFactorSymbolic_SeqBAIJ,
1298:        MatLUFactorNumeric_SeqBAIJ_N,
1299:        0,
1300:        0,
1301:        MatSetUpPreallocation_SeqBAIJ,
1302:        MatILUFactorSymbolic_SeqBAIJ,
1303:        0,
1304:        MatGetArray_SeqBAIJ,
1305:        MatRestoreArray_SeqBAIJ,
1306:        MatDuplicate_SeqBAIJ,
1307:        0,
1308:        0,
1309:        MatILUFactor_SeqBAIJ,
1310:        0,
1311:        0,
1312:        MatGetSubMatrices_SeqBAIJ,
1313:        MatIncreaseOverlap_SeqBAIJ,
1314:        MatGetValues_SeqBAIJ,
1315:        0,
1316:        MatPrintHelp_SeqBAIJ,
1317:        MatScale_SeqBAIJ,
1318:        0,
1319:        0,
1320:        0,
1321:        MatGetBlockSize_SeqBAIJ,
1322:        MatGetRowIJ_SeqBAIJ,
1323:        MatRestoreRowIJ_SeqBAIJ,
1324:        0,
1325:        0,
1326:        0,
1327:        0,
1328:        0,
1329:        0,
1330:        MatSetValuesBlocked_SeqBAIJ,
1331:        MatGetSubMatrix_SeqBAIJ,
1332:        MatDestroy_SeqBAIJ,
1333:        MatView_SeqBAIJ,
1334:        MatGetPetscMaps_Petsc,
1335:        0,
1336:        0,
1337:        0,
1338:        0,
1339:        0,
1340:        0,
1341:        MatGetRowMax_SeqBAIJ,
1342:        MatConvert_Basic};

1344: EXTERN_C_BEGIN
1345: int MatStoreValues_SeqBAIJ(Mat mat)
1346: {
1347:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1348:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1349:   int         ierr;

1352:   if (aij->nonew != 1) {
1353:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1354:   }

1356:   /* allocate space for values if not already there */
1357:   if (!aij->saved_values) {
1358:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1359:   }

1361:   /* copy values over */
1362:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1363:   return(0);
1364: }
1365: EXTERN_C_END

1367: EXTERN_C_BEGIN
1368: int MatRetrieveValues_SeqBAIJ(Mat mat)
1369: {
1370:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1371:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;

1374:   if (aij->nonew != 1) {
1375:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1376:   }
1377:   if (!aij->saved_values) {
1378:     SETERRQ(1,"Must call MatStoreValues(A);first");
1379:   }

1381:   /* copy values over */
1382:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1383:   return(0);
1384: }
1385: EXTERN_C_END

1387: EXTERN_C_BEGIN
1388: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1389: EXTERN_C_END

1391: EXTERN_C_BEGIN
1392: int MatCreate_SeqBAIJ(Mat B)
1393: {
1394:   int         ierr,size;
1395:   Mat_SeqBAIJ *b;

1398:   MPI_Comm_size(B->comm,&size);
1399:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

1401:   B->m = B->M = PetscMax(B->m,B->M);
1402:   B->n = B->N = PetscMax(B->n,B->N);
1403:   ierr    = PetscNew(Mat_SeqBAIJ,&b);
1404:   B->data = (void*)b;
1405:   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1406:   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1407:   B->factor           = 0;
1408:   B->lupivotthreshold = 1.0;
1409:   B->mapping          = 0;
1410:   b->row              = 0;
1411:   b->col              = 0;
1412:   b->icol             = 0;
1413:   b->reallocs         = 0;
1414:   b->saved_values     = 0;
1415: #if defined(PETSC_USE_MAT_SINGLE)
1416:   b->setvalueslen     = 0;
1417:   b->setvaluescopy    = PETSC_NULL;
1418: #endif
1419:   b->single_precision_solves = PETSC_FALSE;

1421:   PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
1422:   PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);

1424:   b->sorted           = PETSC_FALSE;
1425:   b->roworiented      = PETSC_TRUE;
1426:   b->nonew            = 0;
1427:   b->diag             = 0;
1428:   b->solve_work       = 0;
1429:   b->mult_work        = 0;
1430:   B->spptr            = 0;
1431:   B->info.nz_unneeded = (PetscReal)b->maxnz;
1432:   b->keepzeroedrows   = PETSC_FALSE;

1434:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1435:                                      "MatStoreValues_SeqBAIJ",
1436:                                       MatStoreValues_SeqBAIJ);
1437:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1438:                                      "MatRetrieveValues_SeqBAIJ",
1439:                                       MatRetrieveValues_SeqBAIJ);
1440:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1441:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1442:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
1443:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1444:                                      "MatConvert_SeqBAIJ_SeqAIJ",
1445:                                       MatConvert_SeqBAIJ_SeqAIJ);
1446:   return(0);
1447: }
1448: EXTERN_C_END

1450: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1451: {
1452:   Mat         C;
1453:   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1454:   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;

1457:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

1459:   *B = 0;
1460:   MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1461:   MatSetType(C,MATSEQBAIJ);
1462:   c    = (Mat_SeqBAIJ*)C->data;

1464:   c->bs         = a->bs;
1465:   c->bs2        = a->bs2;
1466:   c->mbs        = a->mbs;
1467:   c->nbs        = a->nbs;
1468:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));

1470:   PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1471:   PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1472:   for (i=0; i<mbs; i++) {
1473:     c->imax[i] = a->imax[i];
1474:     c->ilen[i] = a->ilen[i];
1475:   }

1477:   /* allocate the matrix space */
1478:   c->singlemalloc = PETSC_TRUE;
1479:   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1480:   PetscMalloc(len,&c->a);
1481:   c->j = (int*)(c->a + nz*bs2);
1482:   c->i = c->j + nz;
1483:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1484:   if (mbs > 0) {
1485:     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1486:     if (cpvalues == MAT_COPY_VALUES) {
1487:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1488:     } else {
1489:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1490:     }
1491:   }

1493:   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1494:   c->sorted      = a->sorted;
1495:   c->roworiented = a->roworiented;
1496:   c->nonew       = a->nonew;

1498:   if (a->diag) {
1499:     PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1500:     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1501:     for (i=0; i<mbs; i++) {
1502:       c->diag[i] = a->diag[i];
1503:     }
1504:   } else c->diag        = 0;
1505:   c->nz                 = a->nz;
1506:   c->maxnz              = a->maxnz;
1507:   c->solve_work         = 0;
1508:   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1509:   c->mult_work          = 0;
1510:   C->preallocated       = PETSC_TRUE;
1511:   C->assembled          = PETSC_TRUE;
1512:   *B = C;
1513:   PetscFListDuplicate(A->qlist,&C->qlist);
1514:   return(0);
1515: }

1517: EXTERN_C_BEGIN
1518: int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1519: {
1520:   Mat_SeqBAIJ  *a;
1521:   Mat          B;
1522:   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1523:   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1524:   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1525:   int          *masked,nmask,tmp,bs2,ishift;
1526:   PetscScalar  *aa;
1527:   MPI_Comm     comm = ((PetscObject)viewer)->comm;

1530:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1531:   bs2  = bs*bs;

1533:   MPI_Comm_size(comm,&size);
1534:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1535:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1536:   PetscBinaryRead(fd,header,4,PETSC_INT);
1537:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1538:   M = header[1]; N = header[2]; nz = header[3];

1540:   if (header[3] < 0) {
1541:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1542:   }

1544:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

1546:   /* 
1547:      This code adds extra rows to make sure the number of rows is 
1548:     divisible by the blocksize
1549:   */
1550:   mbs        = M/bs;
1551:   extra_rows = bs - M + bs*(mbs);
1552:   if (extra_rows == bs) extra_rows = 0;
1553:   else                  mbs++;
1554:   if (extra_rows) {
1555:     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksizen");
1556:   }

1558:   /* read in row lengths */
1559:   PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1560:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1561:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

1563:   /* read in column indices */
1564:   PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1565:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
1566:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

1568:   /* loop over row lengths determining block row lengths */
1569:   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);
1570:   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));
1571:   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);
1572:   ierr     = PetscMemzero(mask,mbs*sizeof(int));
1573:   masked   = mask + mbs;
1574:   rowcount = 0; nzcount = 0;
1575:   for (i=0; i<mbs; i++) {
1576:     nmask = 0;
1577:     for (j=0; j<bs; j++) {
1578:       kmax = rowlengths[rowcount];
1579:       for (k=0; k<kmax; k++) {
1580:         tmp = jj[nzcount++]/bs;
1581:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1582:       }
1583:       rowcount++;
1584:     }
1585:     browlengths[i] += nmask;
1586:     /* zero out the mask elements we set */
1587:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1588:   }

1590:   /* create our matrix */
1591:   MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1592:   B = *A;
1593:   a = (Mat_SeqBAIJ*)B->data;

1595:   /* set matrix "i" values */
1596:   a->i[0] = 0;
1597:   for (i=1; i<= mbs; i++) {
1598:     a->i[i]      = a->i[i-1] + browlengths[i-1];
1599:     a->ilen[i-1] = browlengths[i-1];
1600:   }
1601:   a->nz         = 0;
1602:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

1604:   /* read in nonzero values */
1605:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
1606:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1607:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

1609:   /* set "a" and "j" values into matrix */
1610:   nzcount = 0; jcount = 0;
1611:   for (i=0; i<mbs; i++) {
1612:     nzcountb = nzcount;
1613:     nmask    = 0;
1614:     for (j=0; j<bs; j++) {
1615:       kmax = rowlengths[i*bs+j];
1616:       for (k=0; k<kmax; k++) {
1617:         tmp = jj[nzcount++]/bs;
1618:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1619:       }
1620:     }
1621:     /* sort the masked values */
1622:     PetscSortInt(nmask,masked);

1624:     /* set "j" values into matrix */
1625:     maskcount = 1;
1626:     for (j=0; j<nmask; j++) {
1627:       a->j[jcount++]  = masked[j];
1628:       mask[masked[j]] = maskcount++;
1629:     }
1630:     /* set "a" values into matrix */
1631:     ishift = bs2*a->i[i];
1632:     for (j=0; j<bs; j++) {
1633:       kmax = rowlengths[i*bs+j];
1634:       for (k=0; k<kmax; k++) {
1635:         tmp       = jj[nzcountb]/bs ;
1636:         block     = mask[tmp] - 1;
1637:         point     = jj[nzcountb] - bs*tmp;
1638:         idx       = ishift + bs2*block + j + bs*point;
1639:         a->a[idx] = (MatScalar)aa[nzcountb++];
1640:       }
1641:     }
1642:     /* zero out the mask elements we set */
1643:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1644:   }
1645:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

1647:   PetscFree(rowlengths);
1648:   PetscFree(browlengths);
1649:   PetscFree(aa);
1650:   PetscFree(jj);
1651:   PetscFree(mask);

1653:   B->assembled = PETSC_TRUE;
1654: 
1655:   MatView_Private(B);
1656:   return(0);
1657: }
1658: EXTERN_C_END

1660: /*@C
1661:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1662:    compressed row) format.  For good matrix assembly performance the
1663:    user should preallocate the matrix storage by setting the parameter nz
1664:    (or the array nnz).  By setting these parameters accurately, performance
1665:    during matrix assembly can be increased by more than a factor of 50.

1667:    Collective on MPI_Comm

1669:    Input Parameters:
1670: +  comm - MPI communicator, set to PETSC_COMM_SELF
1671: .  bs - size of block
1672: .  m - number of rows
1673: .  n - number of columns
1674: .  nz - number of nonzero blocks  per block row (same for all rows)
1675: -  nnz - array containing the number of nonzero blocks in the various block rows 
1676:          (possibly different for each block row) or PETSC_NULL

1678:    Output Parameter:
1679: .  A - the matrix 

1681:    Options Database Keys:
1682: .   -mat_no_unroll - uses code that does not unroll the loops in the 
1683:                      block calculations (much slower)
1684: .    -mat_block_size - size of the blocks to use

1686:    Level: intermediate

1688:    Notes:
1689:    A nonzero block is any block that as 1 or more nonzeros in it

1691:    The block AIJ format is fully compatible with standard Fortran 77
1692:    storage.  That is, the stored row and column indices can begin at
1693:    either one (as in Fortran) or zero.  See the users' manual for details.

1695:    Specify the preallocated storage with either nz or nnz (not both).
1696:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
1697:    allocation.  For additional details, see the users manual chapter on
1698:    matrices.

1700: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1701: @*/
1702: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1703: {
1705: 
1707:   MatCreate(comm,m,n,m,n,A);
1708:   MatSetType(*A,MATSEQBAIJ);
1709:   MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
1710:   return(0);
1711: }

1713: /*@C
1714:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1715:    per row in the matrix. For good matrix assembly performance the
1716:    user should preallocate the matrix storage by setting the parameter nz
1717:    (or the array nnz).  By setting these parameters accurately, performance
1718:    during matrix assembly can be increased by more than a factor of 50.

1720:    Collective on MPI_Comm

1722:    Input Parameters:
1723: +  A - the matrix
1724: .  bs - size of block
1725: .  nz - number of block nonzeros per block row (same for all rows)
1726: -  nnz - array containing the number of block nonzeros in the various block rows 
1727:          (possibly different for each block row) or PETSC_NULL

1729:    Options Database Keys:
1730: .   -mat_no_unroll - uses code that does not unroll the loops in the 
1731:                      block calculations (much slower)
1732: .    -mat_block_size - size of the blocks to use

1734:    Level: intermediate

1736:    Notes:
1737:    The block AIJ format is fully compatible with standard Fortran 77
1738:    storage.  That is, the stored row and column indices can begin at
1739:    either one (as in Fortran) or zero.  See the users' manual for details.

1741:    Specify the preallocated storage with either nz or nnz (not both).
1742:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
1743:    allocation.  For additional details, see the users manual chapter on
1744:    matrices.

1746: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1747: @*/
1748: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1749: {
1750:   Mat_SeqBAIJ *b;
1751:   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1752:   PetscTruth  flg;

1755:   PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);
1756:   if (!flg) return(0);

1758:   B->preallocated = PETSC_TRUE;
1759:   PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
1760:   if (nnz && newbs != bs) {
1761:     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1762:   }
1763:   bs   = newbs;

1765:   mbs  = B->m/bs;
1766:   nbs  = B->n/bs;
1767:   bs2  = bs*bs;

1769:   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1770:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1771:   }

1773:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1774:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1775:   if (nnz) {
1776:     for (i=0; i<mbs; i++) {
1777:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1778:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1779:     }
1780:   }

1782:   b       = (Mat_SeqBAIJ*)B->data;
1783:   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1784:   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1785:   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1786:   if (!flg) {
1787:     switch (bs) {
1788:     case 1:
1789:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1790:       B->ops->mult            = MatMult_SeqBAIJ_1;
1791:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1792:       break;
1793:     case 2:
1794:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1795:       B->ops->mult            = MatMult_SeqBAIJ_2;
1796:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1797:       break;
1798:     case 3:
1799:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1800:       B->ops->mult            = MatMult_SeqBAIJ_3;
1801:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1802:       break;
1803:     case 4:
1804:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1805:       B->ops->mult            = MatMult_SeqBAIJ_4;
1806:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1807:       break;
1808:     case 5:
1809:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1810:       B->ops->mult            = MatMult_SeqBAIJ_5;
1811:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1812:       break;
1813:     case 6:
1814:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1815:       B->ops->mult            = MatMult_SeqBAIJ_6;
1816:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1817:       break;
1818:     case 7:
1819:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1820:       B->ops->mult            = MatMult_SeqBAIJ_7;
1821:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1822:       break;
1823:     default:
1824:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1825:       B->ops->mult            = MatMult_SeqBAIJ_N;
1826:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1827:       break;
1828:     }
1829:   }
1830:   b->bs      = bs;
1831:   b->mbs     = mbs;
1832:   b->nbs     = nbs;
1833:   PetscMalloc((mbs+1)*sizeof(int),&b->imax);
1834:   if (!nnz) {
1835:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1836:     else if (nz <= 0)        nz = 1;
1837:     for (i=0; i<mbs; i++) b->imax[i] = nz;
1838:     nz = nz*mbs;
1839:   } else {
1840:     nz = 0;
1841:     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1842:   }

1844:   /* allocate the matrix space */
1845:   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1846:   ierr  = PetscMalloc(len,&b->a);
1847:   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1848:   b->j  = (int*)(b->a + nz*bs2);
1849:   PetscMemzero(b->j,nz*sizeof(int));
1850:   b->i  = b->j + nz;
1851:   b->singlemalloc = PETSC_TRUE;

1853:   b->i[0] = 0;
1854:   for (i=1; i<mbs+1; i++) {
1855:     b->i[i] = b->i[i-1] + b->imax[i-1];
1856:   }

1858:   /* b->ilen will count nonzeros in each block row so far. */
1859:   PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1860:   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1861:   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}

1863:   b->bs               = bs;
1864:   b->bs2              = bs2;
1865:   b->mbs              = mbs;
1866:   b->nz               = 0;
1867:   b->maxnz            = nz*bs2;
1868:   B->info.nz_unneeded = (PetscReal)b->maxnz;
1869:   return(0);
1870: }