Actual source code: baij.c

  1: /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include src/mat/impls/baij/seq/baij.h
 8:  #include src/vec/vecimpl.h
 9:  #include src/inline/spops.h
 10:  #include petscsys.h

 12: /*
 13:     Special version for Fun3d sequential benchmark
 14: */
 15: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 16: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
 17: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 18: #define matsetvaluesblocked4_ matsetvaluesblocked4
 19: #endif

 21: EXTERN_C_BEGIN
 24: void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
 25: {
 26:   Mat               A = *AA;
 27:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
 28:   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
 29:   int               *ai=a->i,*ailen=a->ilen;
 30:   int               *aj=a->j,stepval;
 31:   const PetscScalar *value = v;
 32:   MatScalar         *ap,*aa = a->a,*bap;

 35:   stepval = (n-1)*4;
 36:   for (k=0; k<m; k++) { /* loop over added rows */
 37:     row  = im[k];
 38:     rp   = aj + ai[row];
 39:     ap   = aa + 16*ai[row];
 40:     nrow = ailen[row];
 41:     low  = 0;
 42:     for (l=0; l<n; l++) { /* loop over added columns */
 43:       col = in[l];
 44:       value = v + k*(stepval+4)*4 + l*4;
 45:       low = 0; high = nrow;
 46:       while (high-low > 7) {
 47:         t = (low+high)/2;
 48:         if (rp[t] > col) high = t;
 49:         else             low  = t;
 50:       }
 51:       for (i=low; i<high; i++) {
 52:         if (rp[i] > col) break;
 53:         if (rp[i] == col) {
 54:           bap  = ap +  16*i;
 55:           for (ii=0; ii<4; ii++,value+=stepval) {
 56:             for (jj=ii; jj<16; jj+=4) {
 57:               bap[jj] += *value++;
 58:             }
 59:           }
 60:           goto noinsert2;
 61:         }
 62:       }
 63:       N = nrow++ - 1;
 64:       /* shift up all the later entries in this row */
 65:       for (ii=N; ii>=i; ii--) {
 66:         rp[ii+1] = rp[ii];
 67:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
 68:       }
 69:       if (N >= i) {
 70:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
 71:       }
 72:       rp[i] = col;
 73:       bap   = ap +  16*i;
 74:       for (ii=0; ii<4; ii++,value+=stepval) {
 75:         for (jj=ii; jj<16; jj+=4) {
 76:           bap[jj] = *value++;
 77:         }
 78:       }
 79:       noinsert2:;
 80:       low = i;
 81:     }
 82:     ailen[row] = nrow;
 83:   }
 84: }
 85: EXTERN_C_END

 87: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 88: #define matsetvalues4_ MATSETVALUES4
 89: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 90: #define matsetvalues4_ matsetvalues4
 91: #endif

 93: EXTERN_C_BEGIN
 96: void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
 97: {
 98:   Mat         A = *AA;
 99:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
100:   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
101:   int         *ai=a->i,*ailen=a->ilen;
102:   int         *aj=a->j,brow,bcol;
103:   int         ridx,cidx;
104:   MatScalar   *ap,value,*aa=a->a,*bap;
105: 
107:   for (k=0; k<m; k++) { /* loop over added rows */
108:     row  = im[k]; brow = row/4;
109:     rp   = aj + ai[brow];
110:     ap   = aa + 16*ai[brow];
111:     nrow = ailen[brow];
112:     low  = 0;
113:     for (l=0; l<n; l++) { /* loop over added columns */
114:       col = in[l]; bcol = col/4;
115:       ridx = row % 4; cidx = col % 4;
116:       value = v[l + k*n];
117:       low = 0; high = nrow;
118:       while (high-low > 7) {
119:         t = (low+high)/2;
120:         if (rp[t] > bcol) high = t;
121:         else              low  = t;
122:       }
123:       for (i=low; i<high; i++) {
124:         if (rp[i] > bcol) break;
125:         if (rp[i] == bcol) {
126:           bap  = ap +  16*i + 4*cidx + ridx;
127:           *bap += value;
128:           goto noinsert1;
129:         }
130:       }
131:       N = nrow++ - 1;
132:       /* shift up all the later entries in this row */
133:       for (ii=N; ii>=i; ii--) {
134:         rp[ii+1] = rp[ii];
135:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
136:       }
137:       if (N>=i) {
138:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
139:       }
140:       rp[i]                    = bcol;
141:       ap[16*i + 4*cidx + ridx] = value;
142:       noinsert1:;
143:       low = i;
144:     }
145:     ailen[brow] = nrow;
146:   }
147: }
148: EXTERN_C_END

150: /*  UGLY, ugly, ugly
151:    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 
152:    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 
153:    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
154:    converts the entries into single precision and then calls ..._MatScalar() to put them
155:    into the single precision data structures.
156: */
157: #if defined(PETSC_USE_MAT_SINGLE)
158: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
159: #else
160: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
161: #endif

163: #define CHUNKSIZE  10

165: /*
166:      Checks for missing diagonals
167: */
170: int MatMissingDiagonal_SeqBAIJ(Mat A)
171: {
172:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
173:   int         *diag,*jj = a->j,i,ierr;

176:   MatMarkDiagonal_SeqBAIJ(A);
177:   diag = a->diag;
178:   for (i=0; i<a->mbs; i++) {
179:     if (jj[diag[i]] != i) {
180:       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
181:     }
182:   }
183:   return(0);
184: }

188: int MatMarkDiagonal_SeqBAIJ(Mat A)
189: {
190:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
191:   int         i,j,*diag,m = a->mbs,ierr;

194:   if (a->diag) return(0);

196:   PetscMalloc((m+1)*sizeof(int),&diag);
197:   PetscLogObjectMemory(A,(m+1)*sizeof(int));
198:   for (i=0; i<m; i++) {
199:     diag[i] = a->i[i+1];
200:     for (j=a->i[i]; j<a->i[i+1]; j++) {
201:       if (a->j[j] == i) {
202:         diag[i] = j;
203:         break;
204:       }
205:     }
206:   }
207:   a->diag = diag;
208:   return(0);
209: }


212: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);

216: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
217: {
218:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
219:   int         ierr,n = a->mbs,i;

222:   *nn = n;
223:   if (!ia) return(0);
224:   if (symmetric) {
225:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
226:   } else if (oshift == 1) {
227:     /* temporarily add 1 to i and j indices */
228:     int nz = a->i[n];
229:     for (i=0; i<nz; i++) a->j[i]++;
230:     for (i=0; i<n+1; i++) a->i[i]++;
231:     *ia = a->i; *ja = a->j;
232:   } else {
233:     *ia = a->i; *ja = a->j;
234:   }

236:   return(0);
237: }

241: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
242: {
243:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
244:   int         i,n = a->mbs,ierr;

247:   if (!ia) return(0);
248:   if (symmetric) {
249:     PetscFree(*ia);
250:     PetscFree(*ja);
251:   } else if (oshift == 1) {
252:     int nz = a->i[n]-1;
253:     for (i=0; i<nz; i++) a->j[i]--;
254:     for (i=0; i<n+1; i++) a->i[i]--;
255:   }
256:   return(0);
257: }

261: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
262: {
263:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;

266:   *bs = baij->bs;
267:   return(0);
268: }


273: int MatDestroy_SeqBAIJ(Mat A)
274: {
275:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
276:   int         ierr;

279: #if defined(PETSC_USE_LOG)
280:   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
281: #endif
282:   PetscFree(a->a);
283:   if (!a->singlemalloc) {
284:     PetscFree(a->i);
285:     PetscFree(a->j);
286:   }
287:   if (a->row) {
288:     ISDestroy(a->row);
289:   }
290:   if (a->col) {
291:     ISDestroy(a->col);
292:   }
293:   if (a->diag) {PetscFree(a->diag);}
294:   if (a->ilen) {PetscFree(a->ilen);}
295:   if (a->imax) {PetscFree(a->imax);}
296:   if (a->solve_work) {PetscFree(a->solve_work);}
297:   if (a->mult_work) {PetscFree(a->mult_work);}
298:   if (a->icol) {ISDestroy(a->icol);}
299:   if (a->saved_values) {PetscFree(a->saved_values);}
300: #if defined(PETSC_USE_MAT_SINGLE)
301:   if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
302: #endif
303:   if (a->xtoy) {PetscFree(a->xtoy);}

305:   PetscFree(a);
306:   return(0);
307: }

311: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
312: {
313:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

316:   switch (op) {
317:   case MAT_ROW_ORIENTED:
318:     a->roworiented    = PETSC_TRUE;
319:     break;
320:   case MAT_COLUMN_ORIENTED:
321:     a->roworiented    = PETSC_FALSE;
322:     break;
323:   case MAT_COLUMNS_SORTED:
324:     a->sorted         = PETSC_TRUE;
325:     break;
326:   case MAT_COLUMNS_UNSORTED:
327:     a->sorted         = PETSC_FALSE;
328:     break;
329:   case MAT_KEEP_ZEROED_ROWS:
330:     a->keepzeroedrows = PETSC_TRUE;
331:     break;
332:   case MAT_NO_NEW_NONZERO_LOCATIONS:
333:     a->nonew          = 1;
334:     break;
335:   case MAT_NEW_NONZERO_LOCATION_ERR:
336:     a->nonew          = -1;
337:     break;
338:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
339:     a->nonew          = -2;
340:     break;
341:   case MAT_YES_NEW_NONZERO_LOCATIONS:
342:     a->nonew          = 0;
343:     break;
344:   case MAT_ROWS_SORTED:
345:   case MAT_ROWS_UNSORTED:
346:   case MAT_YES_NEW_DIAGONALS:
347:   case MAT_IGNORE_OFF_PROC_ENTRIES:
348:   case MAT_USE_HASH_TABLE:
349:     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
350:     break;
351:   case MAT_NO_NEW_DIAGONALS:
352:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
353:   default:
354:     SETERRQ(PETSC_ERR_SUP,"unknown option");
355:   }
356:   return(0);
357: }

361: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
362: {
363:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
364:   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
365:   MatScalar    *aa,*aa_i;
366:   PetscScalar  *v_i;

369:   bs  = a->bs;
370:   ai  = a->i;
371:   aj  = a->j;
372:   aa  = a->a;
373:   bs2 = a->bs2;
374: 
375:   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
376: 
377:   bn  = row/bs;   /* Block number */
378:   bp  = row % bs; /* Block Position */
379:   M   = ai[bn+1] - ai[bn];
380:   *nz = bs*M;
381: 
382:   if (v) {
383:     *v = 0;
384:     if (*nz) {
385:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
386:       for (i=0; i<M; i++) { /* for each block in the block row */
387:         v_i  = *v + i*bs;
388:         aa_i = aa + bs2*(ai[bn] + i);
389:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
390:       }
391:     }
392:   }

394:   if (idx) {
395:     *idx = 0;
396:     if (*nz) {
397:       PetscMalloc((*nz)*sizeof(int),idx);
398:       for (i=0; i<M; i++) { /* for each block in the block row */
399:         idx_i = *idx + i*bs;
400:         itmp  = bs*aj[ai[bn] + i];
401:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
402:       }
403:     }
404:   }
405:   return(0);
406: }

410: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
411: {

415:   if (idx) {if (*idx) {PetscFree(*idx);}}
416:   if (v)   {if (*v)   {PetscFree(*v);}}
417:   return(0);
418: }

422: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
423: {
424:   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
425:   Mat         C;
426:   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
427:   int         *rows,*cols,bs2=a->bs2;
428:   PetscScalar *array;

431:   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
432:   PetscMalloc((1+nbs)*sizeof(int),&col);
433:   PetscMemzero(col,(1+nbs)*sizeof(int));

435: #if defined(PETSC_USE_MAT_SINGLE)
436:   PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
437:   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
438: #else
439:   array = a->a;
440: #endif

442:   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
443:   MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);
444:   PetscFree(col);
445:   PetscMalloc(2*bs*sizeof(int),&rows);
446:   cols = rows + bs;
447:   for (i=0; i<mbs; i++) {
448:     cols[0] = i*bs;
449:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
450:     len = ai[i+1] - ai[i];
451:     for (j=0; j<len; j++) {
452:       rows[0] = (*aj++)*bs;
453:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
454:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
455:       array += bs2;
456:     }
457:   }
458:   PetscFree(rows);
459: #if defined(PETSC_USE_MAT_SINGLE)
460:   PetscFree(array);
461: #endif
462: 
463:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
464:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
465: 
466:   if (B) {
467:     *B = C;
468:   } else {
469:     MatHeaderCopy(A,C);
470:   }
471:   return(0);
472: }

476: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
477: {
478:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
479:   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
480:   PetscScalar *aa;
481:   FILE        *file;

484:   PetscViewerBinaryGetDescriptor(viewer,&fd);
485:   PetscMalloc((4+A->m)*sizeof(int),&col_lens);
486:   col_lens[0] = MAT_FILE_COOKIE;

488:   col_lens[1] = A->m;
489:   col_lens[2] = A->n;
490:   col_lens[3] = a->nz*bs2;

492:   /* store lengths of each row and write (including header) to file */
493:   count = 0;
494:   for (i=0; i<a->mbs; i++) {
495:     for (j=0; j<bs; j++) {
496:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
497:     }
498:   }
499:   PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
500:   PetscFree(col_lens);

502:   /* store column indices (zero start index) */
503:   PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
504:   count = 0;
505:   for (i=0; i<a->mbs; i++) {
506:     for (j=0; j<bs; j++) {
507:       for (k=a->i[i]; k<a->i[i+1]; k++) {
508:         for (l=0; l<bs; l++) {
509:           jj[count++] = bs*a->j[k] + l;
510:         }
511:       }
512:     }
513:   }
514:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
515:   PetscFree(jj);

517:   /* store nonzero values */
518:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
519:   count = 0;
520:   for (i=0; i<a->mbs; i++) {
521:     for (j=0; j<bs; j++) {
522:       for (k=a->i[i]; k<a->i[i+1]; k++) {
523:         for (l=0; l<bs; l++) {
524:           aa[count++] = a->a[bs2*k + l*bs + j];
525:         }
526:       }
527:     }
528:   }
529:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
530:   PetscFree(aa);

532:   PetscViewerBinaryGetInfoPointer(viewer,&file);
533:   if (file) {
534:     fprintf(file,"-matload_block_size %d\n",a->bs);
535:   }
536:   return(0);
537: }

541: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
542: {
543:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
544:   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
545:   PetscViewerFormat format;

548:   PetscViewerGetFormat(viewer,&format);
549:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
550:     PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);
551:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
552:     Mat aij;
553:     MatConvert(A,MATSEQAIJ,&aij);
554:     MatView(aij,viewer);
555:     MatDestroy(aij);
556:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
557:      return(0);
558:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
559:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
560:     for (i=0; i<a->mbs; i++) {
561:       for (j=0; j<bs; j++) {
562:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
563:         for (k=a->i[i]; k<a->i[i+1]; k++) {
564:           for (l=0; l<bs; l++) {
565: #if defined(PETSC_USE_COMPLEX)
566:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
567:               PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
568:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
569:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
570:               PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
571:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
572:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
573:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
574:             }
575: #else
576:             if (a->a[bs2*k + l*bs + j] != 0.0) {
577:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
578:             }
579: #endif
580:           }
581:         }
582:         PetscViewerASCIIPrintf(viewer,"\n");
583:       }
584:     }
585:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
586:   } else {
587:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
588:     for (i=0; i<a->mbs; i++) {
589:       for (j=0; j<bs; j++) {
590:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
591:         for (k=a->i[i]; k<a->i[i+1]; k++) {
592:           for (l=0; l<bs; l++) {
593: #if defined(PETSC_USE_COMPLEX)
594:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
595:               PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
596:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
597:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
598:               PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
599:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
600:             } else {
601:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
602:             }
603: #else
604:             PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
605: #endif
606:           }
607:         }
608:         PetscViewerASCIIPrintf(viewer,"\n");
609:       }
610:     }
611:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
612:   }
613:   PetscViewerFlush(viewer);
614:   return(0);
615: }

619: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
620: {
621:   Mat          A = (Mat) Aa;
622:   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
623:   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
624:   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
625:   MatScalar    *aa;
626:   PetscViewer  viewer;


630:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
631:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

633:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

635:   /* loop over matrix elements drawing boxes */
636:   color = PETSC_DRAW_BLUE;
637:   for (i=0,row=0; i<mbs; i++,row+=bs) {
638:     for (j=a->i[i]; j<a->i[i+1]; j++) {
639:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
640:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
641:       aa = a->a + j*bs2;
642:       for (k=0; k<bs; k++) {
643:         for (l=0; l<bs; l++) {
644:           if (PetscRealPart(*aa++) >=  0.) continue;
645:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
646:         }
647:       }
648:     }
649:   }
650:   color = PETSC_DRAW_CYAN;
651:   for (i=0,row=0; i<mbs; i++,row+=bs) {
652:     for (j=a->i[i]; j<a->i[i+1]; j++) {
653:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
654:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
655:       aa = a->a + j*bs2;
656:       for (k=0; k<bs; k++) {
657:         for (l=0; l<bs; l++) {
658:           if (PetscRealPart(*aa++) != 0.) continue;
659:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
660:         }
661:       }
662:     }
663:   }

665:   color = PETSC_DRAW_RED;
666:   for (i=0,row=0; i<mbs; i++,row+=bs) {
667:     for (j=a->i[i]; j<a->i[i+1]; j++) {
668:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
669:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
670:       aa = a->a + j*bs2;
671:       for (k=0; k<bs; k++) {
672:         for (l=0; l<bs; l++) {
673:           if (PetscRealPart(*aa++) <= 0.) continue;
674:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
675:         }
676:       }
677:     }
678:   }
679:   return(0);
680: }

684: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
685: {
686:   int          ierr;
687:   PetscReal    xl,yl,xr,yr,w,h;
688:   PetscDraw    draw;
689:   PetscTruth   isnull;


693:   PetscViewerDrawGetDraw(viewer,0,&draw);
694:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

696:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
697:   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
698:   xr += w;    yr += h;  xl = -w;     yl = -h;
699:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
700:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
701:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
702:   return(0);
703: }

707: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
708: {
709:   int        ierr;
710:   PetscTruth issocket,isascii,isbinary,isdraw;

713:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
714:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
715:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
716:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
717:   if (issocket) {
718:     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
719:   } else if (isascii){
720:     MatView_SeqBAIJ_ASCII(A,viewer);
721:   } else if (isbinary) {
722:     MatView_SeqBAIJ_Binary(A,viewer);
723:   } else if (isdraw) {
724:     MatView_SeqBAIJ_Draw(A,viewer);
725:   } else {
726:     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
727:   }
728:   return(0);
729: }


734: int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
735: {
736:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
737:   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
738:   int        *ai = a->i,*ailen = a->ilen;
739:   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
740:   MatScalar  *ap,*aa = a->a,zero = 0.0;

743:   for (k=0; k<m; k++) { /* loop over rows */
744:     row  = im[k]; brow = row/bs;
745:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
746:     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
747:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
748:     nrow = ailen[brow];
749:     for (l=0; l<n; l++) { /* loop over columns */
750:       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
751:       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
752:       col  = in[l] ;
753:       bcol = col/bs;
754:       cidx = col%bs;
755:       ridx = row%bs;
756:       high = nrow;
757:       low  = 0; /* assume unsorted */
758:       while (high-low > 5) {
759:         t = (low+high)/2;
760:         if (rp[t] > bcol) high = t;
761:         else             low  = t;
762:       }
763:       for (i=low; i<high; i++) {
764:         if (rp[i] > bcol) break;
765:         if (rp[i] == bcol) {
766:           *v++ = ap[bs2*i+bs*cidx+ridx];
767:           goto finished;
768:         }
769:       }
770:       *v++ = zero;
771:       finished:;
772:     }
773:   }
774:   return(0);
775: }

777: #if defined(PETSC_USE_MAT_SINGLE)
780: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
781: {
782:   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
783:   int         ierr,i,N = m*n*b->bs2;
784:   MatScalar   *vsingle;

787:   if (N > b->setvalueslen) {
788:     if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
789:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
790:     b->setvalueslen  = N;
791:   }
792:   vsingle = b->setvaluescopy;
793:   for (i=0; i<N; i++) {
794:     vsingle[i] = v[i];
795:   }
796:   MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
797:   return(0);
798: }
799: #endif


804: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
805: {
806:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
807:   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
808:   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
809:   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
810:   PetscTruth        roworiented=a->roworiented;
811:   const MatScalar   *value = v;
812:   MatScalar         *ap,*aa = a->a,*bap;

815:   if (roworiented) {
816:     stepval = (n-1)*bs;
817:   } else {
818:     stepval = (m-1)*bs;
819:   }
820:   for (k=0; k<m; k++) { /* loop over added rows */
821:     row  = im[k];
822:     if (row < 0) continue;
823: #if defined(PETSC_USE_BOPT_g)  
824:     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
825: #endif
826:     rp   = aj + ai[row];
827:     ap   = aa + bs2*ai[row];
828:     rmax = imax[row];
829:     nrow = ailen[row];
830:     low  = 0;
831:     for (l=0; l<n; l++) { /* loop over added columns */
832:       if (in[l] < 0) continue;
833: #if defined(PETSC_USE_BOPT_g)  
834:       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
835: #endif
836:       col = in[l];
837:       if (roworiented) {
838:         value = v + k*(stepval+bs)*bs + l*bs;
839:       } else {
840:         value = v + l*(stepval+bs)*bs + k*bs;
841:       }
842:       if (!sorted) low = 0; high = nrow;
843:       while (high-low > 7) {
844:         t = (low+high)/2;
845:         if (rp[t] > col) high = t;
846:         else             low  = t;
847:       }
848:       for (i=low; i<high; i++) {
849:         if (rp[i] > col) break;
850:         if (rp[i] == col) {
851:           bap  = ap +  bs2*i;
852:           if (roworiented) {
853:             if (is == ADD_VALUES) {
854:               for (ii=0; ii<bs; ii++,value+=stepval) {
855:                 for (jj=ii; jj<bs2; jj+=bs) {
856:                   bap[jj] += *value++;
857:                 }
858:               }
859:             } else {
860:               for (ii=0; ii<bs; ii++,value+=stepval) {
861:                 for (jj=ii; jj<bs2; jj+=bs) {
862:                   bap[jj] = *value++;
863:                 }
864:               }
865:             }
866:           } else {
867:             if (is == ADD_VALUES) {
868:               for (ii=0; ii<bs; ii++,value+=stepval) {
869:                 for (jj=0; jj<bs; jj++) {
870:                   *bap++ += *value++;
871:                 }
872:               }
873:             } else {
874:               for (ii=0; ii<bs; ii++,value+=stepval) {
875:                 for (jj=0; jj<bs; jj++) {
876:                   *bap++  = *value++;
877:                 }
878:               }
879:             }
880:           }
881:           goto noinsert2;
882:         }
883:       }
884:       if (nonew == 1) goto noinsert2;
885:       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
886:       if (nrow >= rmax) {
887:         /* there is no extra room in row, therefore enlarge */
888:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
889:         MatScalar *new_a;

891:         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);

893:         /* malloc new storage space */
894:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
895:         PetscMalloc(len,&new_a);
896:         new_j   = (int*)(new_a + bs2*new_nz);
897:         new_i   = new_j + new_nz;

899:         /* copy over old data into new slots */
900:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
901:         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
902:         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
903:         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
904:         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
905:         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
906:         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
907:         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
908:         /* free up old matrix storage */
909:         PetscFree(a->a);
910:         if (!a->singlemalloc) {
911:           PetscFree(a->i);
912:           PetscFree(a->j);
913:         }
914:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
915:         a->singlemalloc = PETSC_TRUE;

917:         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
918:         rmax = imax[row] = imax[row] + CHUNKSIZE;
919:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
920:         a->maxnz += bs2*CHUNKSIZE;
921:         a->reallocs++;
922:         a->nz++;
923:       }
924:       N = nrow++ - 1;
925:       /* shift up all the later entries in this row */
926:       for (ii=N; ii>=i; ii--) {
927:         rp[ii+1] = rp[ii];
928:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
929:       }
930:       if (N >= i) {
931:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
932:       }
933:       rp[i] = col;
934:       bap   = ap +  bs2*i;
935:       if (roworiented) {
936:         for (ii=0; ii<bs; ii++,value+=stepval) {
937:           for (jj=ii; jj<bs2; jj+=bs) {
938:             bap[jj] = *value++;
939:           }
940:         }
941:       } else {
942:         for (ii=0; ii<bs; ii++,value+=stepval) {
943:           for (jj=0; jj<bs; jj++) {
944:             *bap++  = *value++;
945:           }
946:         }
947:       }
948:       noinsert2:;
949:       low = i;
950:     }
951:     ailen[row] = nrow;
952:   }
953:   return(0);
954: }

958: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
959: {
960:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
961:   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
962:   int        m = A->m,*ip,N,*ailen = a->ilen;
963:   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
964:   MatScalar  *aa = a->a,*ap;

967:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

969:   if (m) rmax = ailen[0];
970:   for (i=1; i<mbs; i++) {
971:     /* move each row back by the amount of empty slots (fshift) before it*/
972:     fshift += imax[i-1] - ailen[i-1];
973:     rmax   = PetscMax(rmax,ailen[i]);
974:     if (fshift) {
975:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
976:       N = ailen[i];
977:       for (j=0; j<N; j++) {
978:         ip[j-fshift] = ip[j];
979:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
980:       }
981:     }
982:     ai[i] = ai[i-1] + ailen[i-1];
983:   }
984:   if (mbs) {
985:     fshift += imax[mbs-1] - ailen[mbs-1];
986:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
987:   }
988:   /* reset ilen and imax for each row */
989:   for (i=0; i<mbs; i++) {
990:     ailen[i] = imax[i] = ai[i+1] - ai[i];
991:   }
992:   a->nz = ai[mbs];

994:   /* diagonals may have moved, so kill the diagonal pointers */
995:   if (fshift && a->diag) {
996:     PetscFree(a->diag);
997:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
998:     a->diag = 0;
999:   }
1000:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1001:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1002:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1003:   a->reallocs          = 0;
1004:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

1006:   return(0);
1007: }



1011: /* 
1012:    This function returns an array of flags which indicate the locations of contiguous
1013:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1014:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1015:    Assume: sizes should be long enough to hold all the values.
1016: */
1019: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1020: {
1021:   int        i,j,k,row;
1022:   PetscTruth flg;

1025:   for (i=0,j=0; i<n; j++) {
1026:     row = idx[i];
1027:     if (row%bs!=0) { /* Not the begining of a block */
1028:       sizes[j] = 1;
1029:       i++;
1030:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1031:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1032:       i++;
1033:     } else { /* Begining of the block, so check if the complete block exists */
1034:       flg = PETSC_TRUE;
1035:       for (k=1; k<bs; k++) {
1036:         if (row+k != idx[i+k]) { /* break in the block */
1037:           flg = PETSC_FALSE;
1038:           break;
1039:         }
1040:       }
1041:       if (flg == PETSC_TRUE) { /* No break in the bs */
1042:         sizes[j] = bs;
1043:         i+= bs;
1044:       } else {
1045:         sizes[j] = 1;
1046:         i++;
1047:       }
1048:     }
1049:   }
1050:   *bs_max = j;
1051:   return(0);
1052: }
1053: 
1056: int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1057: {
1058:   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1059:   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1060:   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1061:   PetscScalar zero = 0.0;
1062:   MatScalar   *aa;

1065:   /* Make a copy of the IS and  sort it */
1066:   ISGetLocalSize(is,&is_n);
1067:   ISGetIndices(is,&is_idx);

1069:   /* allocate memory for rows,sizes */
1070:   PetscMalloc((3*is_n+1)*sizeof(int),&rows);
1071:   sizes = rows + is_n;

1073:   /* copy IS values to rows, and sort them */
1074:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1075:   PetscSortInt(is_n,rows);
1076:   if (baij->keepzeroedrows) {
1077:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1078:     bs_max = is_n;
1079:   } else {
1080:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1081:   }
1082:   ISRestoreIndices(is,&is_idx);

1084:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1085:     row   = rows[j];
1086:     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1087:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1088:     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1089:     if (sizes[i] == bs && !baij->keepzeroedrows) {
1090:       if (diag) {
1091:         if (baij->ilen[row/bs] > 0) {
1092:           baij->ilen[row/bs]       = 1;
1093:           baij->j[baij->i[row/bs]] = row/bs;
1094:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
1095:         }
1096:         /* Now insert all the diagonal values for this bs */
1097:         for (k=0; k<bs; k++) {
1098:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
1099:         }
1100:       } else { /* (!diag) */
1101:         baij->ilen[row/bs] = 0;
1102:       } /* end (!diag) */
1103:     } else { /* (sizes[i] != bs) */
1104: #if defined (PETSC_USE_DEBUG)
1105:       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1106: #endif
1107:       for (k=0; k<count; k++) {
1108:         aa[0] =  zero;
1109:         aa    += bs;
1110:       }
1111:       if (diag) {
1112:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
1113:       }
1114:     }
1115:   }

1117:   PetscFree(rows);
1118:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1119:   return(0);
1120: }

1124: int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1125: {
1126:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1127:   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1128:   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1129:   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1130:   int         ridx,cidx,bs2=a->bs2,ierr;
1131:   PetscTruth  roworiented=a->roworiented;
1132:   MatScalar   *ap,value,*aa=a->a,*bap;

1135:   for (k=0; k<m; k++) { /* loop over added rows */
1136:     row  = im[k]; brow = row/bs;
1137:     if (row < 0) continue;
1138: #if defined(PETSC_USE_BOPT_g)  
1139:     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1140: #endif
1141:     rp   = aj + ai[brow];
1142:     ap   = aa + bs2*ai[brow];
1143:     rmax = imax[brow];
1144:     nrow = ailen[brow];
1145:     low  = 0;
1146:     for (l=0; l<n; l++) { /* loop over added columns */
1147:       if (in[l] < 0) continue;
1148: #if defined(PETSC_USE_BOPT_g)  
1149:       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1150: #endif
1151:       col = in[l]; bcol = col/bs;
1152:       ridx = row % bs; cidx = col % bs;
1153:       if (roworiented) {
1154:         value = v[l + k*n];
1155:       } else {
1156:         value = v[k + l*m];
1157:       }
1158:       if (!sorted) low = 0; high = nrow;
1159:       while (high-low > 7) {
1160:         t = (low+high)/2;
1161:         if (rp[t] > bcol) high = t;
1162:         else              low  = t;
1163:       }
1164:       for (i=low; i<high; i++) {
1165:         if (rp[i] > bcol) break;
1166:         if (rp[i] == bcol) {
1167:           bap  = ap +  bs2*i + bs*cidx + ridx;
1168:           if (is == ADD_VALUES) *bap += value;
1169:           else                  *bap  = value;
1170:           goto noinsert1;
1171:         }
1172:       }
1173:       if (nonew == 1) goto noinsert1;
1174:       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1175:       if (nrow >= rmax) {
1176:         /* there is no extra room in row, therefore enlarge */
1177:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1178:         MatScalar *new_a;

1180:         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);

1182:         /* Malloc new storage space */
1183:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1184:         PetscMalloc(len,&new_a);
1185:         new_j   = (int*)(new_a + bs2*new_nz);
1186:         new_i   = new_j + new_nz;

1188:         /* copy over old data into new slots */
1189:         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1190:         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1191:         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1192:         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1193:         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
1194:         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1195:         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1196:         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1197:         /* free up old matrix storage */
1198:         PetscFree(a->a);
1199:         if (!a->singlemalloc) {
1200:           PetscFree(a->i);
1201:           PetscFree(a->j);
1202:         }
1203:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1204:         a->singlemalloc = PETSC_TRUE;

1206:         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1207:         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1208:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1209:         a->maxnz += bs2*CHUNKSIZE;
1210:         a->reallocs++;
1211:         a->nz++;
1212:       }
1213:       N = nrow++ - 1;
1214:       /* shift up all the later entries in this row */
1215:       for (ii=N; ii>=i; ii--) {
1216:         rp[ii+1] = rp[ii];
1217:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1218:       }
1219:       if (N>=i) {
1220:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1221:       }
1222:       rp[i]                      = bcol;
1223:       ap[bs2*i + bs*cidx + ridx] = value;
1224:       noinsert1:;
1225:       low = i;
1226:     }
1227:     ailen[brow] = nrow;
1228:   }
1229:   return(0);
1230: }


1235: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1236: {
1237:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1238:   Mat         outA;
1239:   int         ierr;
1240:   PetscTruth  row_identity,col_identity;

1243:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1244:   ISIdentity(row,&row_identity);
1245:   ISIdentity(col,&col_identity);
1246:   if (!row_identity || !col_identity) {
1247:     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1248:   }

1250:   outA          = inA;
1251:   inA->factor   = FACTOR_LU;

1253:   if (!a->diag) {
1254:     MatMarkDiagonal_SeqBAIJ(inA);
1255:   }

1257:   a->row        = row;
1258:   a->col        = col;
1259:   PetscObjectReference((PetscObject)row);
1260:   PetscObjectReference((PetscObject)col);
1261: 
1262:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1263:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1264:   PetscLogObjectParent(inA,a->icol);
1265: 
1266:   /*
1267:       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 
1268:       for ILU(0) factorization with natural ordering
1269:   */
1270:   if (a->bs < 8) {
1271:     MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1272:   } else {
1273:     if (!a->solve_work) {
1274:       PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1275:       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1276:     }
1277:   }

1279:   MatLUFactorNumeric(inA,&outA);

1281:   return(0);
1282: }
1285: int MatPrintHelp_SeqBAIJ(Mat A)
1286: {
1287:   static PetscTruth called = PETSC_FALSE;
1288:   MPI_Comm          comm = A->comm;
1289:   int               ierr;

1292:   if (called) {return(0);} else called = PETSC_TRUE;
1293:   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1294:   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");
1295:   return(0);
1296: }

1298: EXTERN_C_BEGIN
1301: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1302: {
1303:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1304:   int         i,nz,nbs;

1307:   nz  = baij->maxnz/baij->bs2;
1308:   nbs = baij->nbs;
1309:   for (i=0; i<nz; i++) {
1310:     baij->j[i] = indices[i];
1311:   }
1312:   baij->nz = nz;
1313:   for (i=0; i<nbs; i++) {
1314:     baij->ilen[i] = baij->imax[i];
1315:   }

1317:   return(0);
1318: }
1319: EXTERN_C_END

1323: /*@
1324:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1325:        in the matrix.

1327:   Input Parameters:
1328: +  mat - the SeqBAIJ matrix
1329: -  indices - the column indices

1331:   Level: advanced

1333:   Notes:
1334:     This can be called if you have precomputed the nonzero structure of the 
1335:   matrix and want to provide it to the matrix object to improve the performance
1336:   of the MatSetValues() operation.

1338:     You MUST have set the correct numbers of nonzeros per row in the call to 
1339:   MatCreateSeqBAIJ().

1341:     MUST be called before any calls to MatSetValues();

1343: @*/
1344: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1345: {
1346:   int ierr,(*f)(Mat,int *);

1350:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1351:   if (f) {
1352:     (*f)(mat,indices);
1353:   } else {
1354:     SETERRQ(1,"Wrong type of matrix to set column indices");
1355:   }
1356:   return(0);
1357: }

1361: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1362: {
1363:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1364:   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1365:   PetscReal    atmp;
1366:   PetscScalar  *x,zero = 0.0;
1367:   MatScalar    *aa;
1368:   int          ncols,brow,krow,kcol;

1371:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1372:   bs   = a->bs;
1373:   aa   = a->a;
1374:   ai   = a->i;
1375:   aj   = a->j;
1376:   mbs = a->mbs;

1378:   VecSet(&zero,v);
1379:   VecGetArray(v,&x);
1380:   VecGetLocalSize(v,&n);
1381:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1382:   for (i=0; i<mbs; i++) {
1383:     ncols = ai[1] - ai[0]; ai++;
1384:     brow  = bs*i;
1385:     for (j=0; j<ncols; j++){
1386:       /* bcol = bs*(*aj); */
1387:       for (kcol=0; kcol<bs; kcol++){
1388:         for (krow=0; krow<bs; krow++){
1389:           atmp = PetscAbsScalar(*aa); aa++;
1390:           row = brow + krow;    /* row index */
1391:           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1392:           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1393:         }
1394:       }
1395:       aj++;
1396:     }
1397:   }
1398:   VecRestoreArray(v,&x);
1399:   return(0);
1400: }

1404: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1405: {
1406:   int        ierr;

1409:    MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1410:   return(0);
1411: }

1415: int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1416: {
1417:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1419:   *array = a->a;
1420:   return(0);
1421: }

1425: int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1426: {
1428:   return(0);
1429: }

1431:  #include petscblaslapack.h
1434: int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1435: {
1436:   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1437:   int          ierr,one=1,i,bs=y->bs,j,bs2;

1440:   if (str == SAME_NONZERO_PATTERN) {
1441:     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1442:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1443:     if (y->xtoy && y->XtoY != X) {
1444:       PetscFree(y->xtoy);
1445:       MatDestroy(y->XtoY);
1446:     }
1447:     if (!y->xtoy) { /* get xtoy */
1448:       MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
1449:       y->XtoY = X;
1450:     }
1451:     bs2 = bs*bs;
1452:     for (i=0; i<x->nz; i++) {
1453:       j = 0;
1454:       while (j < bs2){
1455:         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1456:         j++;
1457:       }
1458:     }
1459:     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1460:   } else {
1461:     MatAXPY_Basic(a,X,Y,str);
1462:   }
1463:   return(0);
1464: }

1466: /* -------------------------------------------------------------------*/
1467: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1468:        MatGetRow_SeqBAIJ,
1469:        MatRestoreRow_SeqBAIJ,
1470:        MatMult_SeqBAIJ_N,
1471: /* 4*/ MatMultAdd_SeqBAIJ_N,
1472:        MatMultTranspose_SeqBAIJ,
1473:        MatMultTransposeAdd_SeqBAIJ,
1474:        MatSolve_SeqBAIJ_N,
1475:        0,
1476:        0,
1477: /*10*/ 0,
1478:        MatLUFactor_SeqBAIJ,
1479:        0,
1480:        0,
1481:        MatTranspose_SeqBAIJ,
1482: /*15*/ MatGetInfo_SeqBAIJ,
1483:        MatEqual_SeqBAIJ,
1484:        MatGetDiagonal_SeqBAIJ,
1485:        MatDiagonalScale_SeqBAIJ,
1486:        MatNorm_SeqBAIJ,
1487: /*20*/ 0,
1488:        MatAssemblyEnd_SeqBAIJ,
1489:        0,
1490:        MatSetOption_SeqBAIJ,
1491:        MatZeroEntries_SeqBAIJ,
1492: /*25*/ MatZeroRows_SeqBAIJ,
1493:        MatLUFactorSymbolic_SeqBAIJ,
1494:        MatLUFactorNumeric_SeqBAIJ_N,
1495:        0,
1496:        0,
1497: /*30*/ MatSetUpPreallocation_SeqBAIJ,
1498:        MatILUFactorSymbolic_SeqBAIJ,
1499:        0,
1500:        MatGetArray_SeqBAIJ,
1501:        MatRestoreArray_SeqBAIJ,
1502: /*35*/ MatDuplicate_SeqBAIJ,
1503:        0,
1504:        0,
1505:        MatILUFactor_SeqBAIJ,
1506:        0,
1507: /*40*/ MatAXPY_SeqBAIJ,
1508:        MatGetSubMatrices_SeqBAIJ,
1509:        MatIncreaseOverlap_SeqBAIJ,
1510:        MatGetValues_SeqBAIJ,
1511:        0,
1512: /*45*/ MatPrintHelp_SeqBAIJ,
1513:        MatScale_SeqBAIJ,
1514:        0,
1515:        0,
1516:        0,
1517: /*50*/ MatGetBlockSize_SeqBAIJ,
1518:        MatGetRowIJ_SeqBAIJ,
1519:        MatRestoreRowIJ_SeqBAIJ,
1520:        0,
1521:        0,
1522: /*55*/ 0,
1523:        0,
1524:        0,
1525:        0,
1526:        MatSetValuesBlocked_SeqBAIJ,
1527: /*60*/ MatGetSubMatrix_SeqBAIJ,
1528:        MatDestroy_SeqBAIJ,
1529:        MatView_SeqBAIJ,
1530:        MatGetPetscMaps_Petsc,
1531:        0,
1532: /*65*/ 0,
1533:        0,
1534:        0,
1535:        0,
1536:        0,
1537: /*70*/ MatGetRowMax_SeqBAIJ,
1538:        MatConvert_Basic,
1539:        0,
1540:        0,
1541:        0,
1542: /*75*/ 0,
1543:        0,
1544:        0,
1545:        0,
1546:        0,
1547: /*80*/ 0,
1548:        0,
1549:        0,
1550:        0,
1551:        0,
1552: /*85*/ MatLoad_SeqBAIJ
1553: };

1555: EXTERN_C_BEGIN
1558: int MatStoreValues_SeqBAIJ(Mat mat)
1559: {
1560:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1561:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1562:   int         ierr;

1565:   if (aij->nonew != 1) {
1566:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1567:   }

1569:   /* allocate space for values if not already there */
1570:   if (!aij->saved_values) {
1571:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1572:   }

1574:   /* copy values over */
1575:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1576:   return(0);
1577: }
1578: EXTERN_C_END

1580: EXTERN_C_BEGIN
1583: int MatRetrieveValues_SeqBAIJ(Mat mat)
1584: {
1585:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1586:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;

1589:   if (aij->nonew != 1) {
1590:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1591:   }
1592:   if (!aij->saved_values) {
1593:     SETERRQ(1,"Must call MatStoreValues(A);first");
1594:   }

1596:   /* copy values over */
1597:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1598:   return(0);
1599: }
1600: EXTERN_C_END

1602: EXTERN_C_BEGIN
1603: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1604: EXTERN_C_END

1606: EXTERN_C_BEGIN
1609: int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
1610: {
1611:   Mat_SeqBAIJ *b;
1612:   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1613:   PetscTruth  flg;


1617:   B->preallocated = PETSC_TRUE;
1618:   PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
1619:   if (nnz && newbs != bs) {
1620:     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1621:   }
1622:   bs   = newbs;

1624:   mbs  = B->m/bs;
1625:   nbs  = B->n/bs;
1626:   bs2  = bs*bs;

1628:   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1629:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1630:   }

1632:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1633:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1634:   if (nnz) {
1635:     for (i=0; i<mbs; i++) {
1636:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1637:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1638:     }
1639:   }

1641:   b       = (Mat_SeqBAIJ*)B->data;
1642:   PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1643:   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1644:   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1645:   if (!flg) {
1646:     switch (bs) {
1647:     case 1:
1648:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1649:       B->ops->mult            = MatMult_SeqBAIJ_1;
1650:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1651:       break;
1652:     case 2:
1653:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1654:       B->ops->mult            = MatMult_SeqBAIJ_2;
1655:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1656:       break;
1657:     case 3:
1658:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1659:       B->ops->mult            = MatMult_SeqBAIJ_3;
1660:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1661:       break;
1662:     case 4:
1663:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1664:       B->ops->mult            = MatMult_SeqBAIJ_4;
1665:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1666:       break;
1667:     case 5:
1668:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1669:       B->ops->mult            = MatMult_SeqBAIJ_5;
1670:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1671:       break;
1672:     case 6:
1673:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1674:       B->ops->mult            = MatMult_SeqBAIJ_6;
1675:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1676:       break;
1677:     case 7:
1678:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1679:       B->ops->mult            = MatMult_SeqBAIJ_7;
1680:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1681:       break;
1682:     default:
1683:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1684:       B->ops->mult            = MatMult_SeqBAIJ_N;
1685:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1686:       break;
1687:     }
1688:   }
1689:   b->bs      = bs;
1690:   b->mbs     = mbs;
1691:   b->nbs     = nbs;
1692:   PetscMalloc((mbs+1)*sizeof(int),&b->imax);
1693:   if (!nnz) {
1694:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1695:     else if (nz <= 0)        nz = 1;
1696:     for (i=0; i<mbs; i++) b->imax[i] = nz;
1697:     nz = nz*mbs;
1698:   } else {
1699:     nz = 0;
1700:     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1701:   }

1703:   /* allocate the matrix space */
1704:   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1705:   PetscMalloc(len,&b->a);
1706:   PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1707:   b->j  = (int*)(b->a + nz*bs2);
1708:   PetscMemzero(b->j,nz*sizeof(int));
1709:   b->i  = b->j + nz;
1710:   b->singlemalloc = PETSC_TRUE;

1712:   b->i[0] = 0;
1713:   for (i=1; i<mbs+1; i++) {
1714:     b->i[i] = b->i[i-1] + b->imax[i-1];
1715:   }

1717:   /* b->ilen will count nonzeros in each block row so far. */
1718:   PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1719:   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1720:   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}

1722:   b->bs               = bs;
1723:   b->bs2              = bs2;
1724:   b->mbs              = mbs;
1725:   b->nz               = 0;
1726:   b->maxnz            = nz*bs2;
1727:   B->info.nz_unneeded = (PetscReal)b->maxnz;
1728:   return(0);
1729: }
1730: EXTERN_C_END

1732: /*MC
1733:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 
1734:    block sparse compressed row format.

1736:    Options Database Keys:
1737: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

1739:   Level: beginner

1741: .seealso: MatCreateSeqBAIJ
1742: M*/

1744: EXTERN_C_BEGIN
1747: int MatCreate_SeqBAIJ(Mat B)
1748: {
1749:   int         ierr,size;
1750:   Mat_SeqBAIJ *b;

1753:   MPI_Comm_size(B->comm,&size);
1754:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

1756:   B->m = B->M = PetscMax(B->m,B->M);
1757:   B->n = B->N = PetscMax(B->n,B->N);
1758:   PetscNew(Mat_SeqBAIJ,&b);
1759:   B->data = (void*)b;
1760:   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1761:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1762:   B->factor           = 0;
1763:   B->lupivotthreshold = 1.0;
1764:   B->mapping          = 0;
1765:   b->row              = 0;
1766:   b->col              = 0;
1767:   b->icol             = 0;
1768:   b->reallocs         = 0;
1769:   b->saved_values     = 0;
1770: #if defined(PETSC_USE_MAT_SINGLE)
1771:   b->setvalueslen     = 0;
1772:   b->setvaluescopy    = PETSC_NULL;
1773: #endif

1775:   PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
1776:   PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);

1778:   b->sorted           = PETSC_FALSE;
1779:   b->roworiented      = PETSC_TRUE;
1780:   b->nonew            = 0;
1781:   b->diag             = 0;
1782:   b->solve_work       = 0;
1783:   b->mult_work        = 0;
1784:   B->spptr            = 0;
1785:   B->info.nz_unneeded = (PetscReal)b->maxnz;
1786:   b->keepzeroedrows   = PETSC_FALSE;
1787:   b->xtoy              = 0;
1788:   b->XtoY              = 0;

1790:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1791:                                      "MatStoreValues_SeqBAIJ",
1792:                                       MatStoreValues_SeqBAIJ);
1793:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1794:                                      "MatRetrieveValues_SeqBAIJ",
1795:                                       MatRetrieveValues_SeqBAIJ);
1796:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1797:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1798:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
1799:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
1800:                                      "MatConvert_SeqBAIJ_SeqAIJ",
1801:                                       MatConvert_SeqBAIJ_SeqAIJ);
1802:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
1803:                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
1804:                                       MatSeqBAIJSetPreallocation_SeqBAIJ);
1805:   return(0);
1806: }
1807: EXTERN_C_END

1811: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1812: {
1813:   Mat         C;
1814:   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1815:   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;

1818:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

1820:   *B = 0;
1821:   MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1822:   MatSetType(C,MATSEQBAIJ);
1823:   c    = (Mat_SeqBAIJ*)C->data;

1825:   c->bs         = a->bs;
1826:   c->bs2        = a->bs2;
1827:   c->mbs        = a->mbs;
1828:   c->nbs        = a->nbs;
1829:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));

1831:   PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1832:   PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1833:   for (i=0; i<mbs; i++) {
1834:     c->imax[i] = a->imax[i];
1835:     c->ilen[i] = a->ilen[i];
1836:   }

1838:   /* allocate the matrix space */
1839:   c->singlemalloc = PETSC_TRUE;
1840:   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1841:   PetscMalloc(len,&c->a);
1842:   c->j = (int*)(c->a + nz*bs2);
1843:   c->i = c->j + nz;
1844:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1845:   if (mbs > 0) {
1846:     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1847:     if (cpvalues == MAT_COPY_VALUES) {
1848:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1849:     } else {
1850:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1851:     }
1852:   }

1854:   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1855:   c->sorted      = a->sorted;
1856:   c->roworiented = a->roworiented;
1857:   c->nonew       = a->nonew;

1859:   if (a->diag) {
1860:     PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1861:     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1862:     for (i=0; i<mbs; i++) {
1863:       c->diag[i] = a->diag[i];
1864:     }
1865:   } else c->diag        = 0;
1866:   c->nz                 = a->nz;
1867:   c->maxnz              = a->maxnz;
1868:   c->solve_work         = 0;
1869:   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1870:   c->mult_work          = 0;
1871:   C->preallocated       = PETSC_TRUE;
1872:   C->assembled          = PETSC_TRUE;
1873:   *B = C;
1874:   PetscFListDuplicate(A->qlist,&C->qlist);
1875:   return(0);
1876: }

1880: int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1881: {
1882:   Mat_SeqBAIJ  *a;
1883:   Mat          B;
1884:   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1885:   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1886:   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1887:   int          *masked,nmask,tmp,bs2,ishift;
1888:   PetscScalar  *aa;
1889:   MPI_Comm     comm = ((PetscObject)viewer)->comm;

1892:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1893:   bs2  = bs*bs;

1895:   MPI_Comm_size(comm,&size);
1896:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1897:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1898:   PetscBinaryRead(fd,header,4,PETSC_INT);
1899:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1900:   M = header[1]; N = header[2]; nz = header[3];

1902:   if (header[3] < 0) {
1903:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1904:   }

1906:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

1908:   /* 
1909:      This code adds extra rows to make sure the number of rows is 
1910:     divisible by the blocksize
1911:   */
1912:   mbs        = M/bs;
1913:   extra_rows = bs - M + bs*(mbs);
1914:   if (extra_rows == bs) extra_rows = 0;
1915:   else                  mbs++;
1916:   if (extra_rows) {
1917:     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1918:   }

1920:   /* read in row lengths */
1921:   PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1922:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1923:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

1925:   /* read in column indices */
1926:   PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1927:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
1928:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

1930:   /* loop over row lengths determining block row lengths */
1931:   PetscMalloc(mbs*sizeof(int),&browlengths);
1932:   PetscMemzero(browlengths,mbs*sizeof(int));
1933:   PetscMalloc(2*mbs*sizeof(int),&mask);
1934:   PetscMemzero(mask,mbs*sizeof(int));
1935:   masked   = mask + mbs;
1936:   rowcount = 0; nzcount = 0;
1937:   for (i=0; i<mbs; i++) {
1938:     nmask = 0;
1939:     for (j=0; j<bs; j++) {
1940:       kmax = rowlengths[rowcount];
1941:       for (k=0; k<kmax; k++) {
1942:         tmp = jj[nzcount++]/bs;
1943:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1944:       }
1945:       rowcount++;
1946:     }
1947:     browlengths[i] += nmask;
1948:     /* zero out the mask elements we set */
1949:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1950:   }

1952:   /* create our matrix */
1953:   MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
1954:   MatSetType(B,type);
1955:   MatSeqBAIJSetPreallocation(B,bs,0,browlengths);
1956:   a = (Mat_SeqBAIJ*)B->data;

1958:   /* set matrix "i" values */
1959:   a->i[0] = 0;
1960:   for (i=1; i<= mbs; i++) {
1961:     a->i[i]      = a->i[i-1] + browlengths[i-1];
1962:     a->ilen[i-1] = browlengths[i-1];
1963:   }
1964:   a->nz         = 0;
1965:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

1967:   /* read in nonzero values */
1968:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
1969:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1970:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

1972:   /* set "a" and "j" values into matrix */
1973:   nzcount = 0; jcount = 0;
1974:   for (i=0; i<mbs; i++) {
1975:     nzcountb = nzcount;
1976:     nmask    = 0;
1977:     for (j=0; j<bs; j++) {
1978:       kmax = rowlengths[i*bs+j];
1979:       for (k=0; k<kmax; k++) {
1980:         tmp = jj[nzcount++]/bs;
1981:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1982:       }
1983:     }
1984:     /* sort the masked values */
1985:     PetscSortInt(nmask,masked);

1987:     /* set "j" values into matrix */
1988:     maskcount = 1;
1989:     for (j=0; j<nmask; j++) {
1990:       a->j[jcount++]  = masked[j];
1991:       mask[masked[j]] = maskcount++;
1992:     }
1993:     /* set "a" values into matrix */
1994:     ishift = bs2*a->i[i];
1995:     for (j=0; j<bs; j++) {
1996:       kmax = rowlengths[i*bs+j];
1997:       for (k=0; k<kmax; k++) {
1998:         tmp       = jj[nzcountb]/bs ;
1999:         block     = mask[tmp] - 1;
2000:         point     = jj[nzcountb] - bs*tmp;
2001:         idx       = ishift + bs2*block + j + bs*point;
2002:         a->a[idx] = (MatScalar)aa[nzcountb++];
2003:       }
2004:     }
2005:     /* zero out the mask elements we set */
2006:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2007:   }
2008:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

2010:   PetscFree(rowlengths);
2011:   PetscFree(browlengths);
2012:   PetscFree(aa);
2013:   PetscFree(jj);
2014:   PetscFree(mask);

2016:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2017:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2018:   MatView_Private(B);

2020:   *A = B;
2021:   return(0);
2022: }

2026: /*@C
2027:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2028:    compressed row) format.  For good matrix assembly performance the
2029:    user should preallocate the matrix storage by setting the parameter nz
2030:    (or the array nnz).  By setting these parameters accurately, performance
2031:    during matrix assembly can be increased by more than a factor of 50.

2033:    Collective on MPI_Comm

2035:    Input Parameters:
2036: +  comm - MPI communicator, set to PETSC_COMM_SELF
2037: .  bs - size of block
2038: .  m - number of rows
2039: .  n - number of columns
2040: .  nz - number of nonzero blocks  per block row (same for all rows)
2041: -  nnz - array containing the number of nonzero blocks in the various block rows 
2042:          (possibly different for each block row) or PETSC_NULL

2044:    Output Parameter:
2045: .  A - the matrix 

2047:    Options Database Keys:
2048: .   -mat_no_unroll - uses code that does not unroll the loops in the 
2049:                      block calculations (much slower)
2050: .    -mat_block_size - size of the blocks to use

2052:    Level: intermediate

2054:    Notes:
2055:    A nonzero block is any block that as 1 or more nonzeros in it

2057:    The block AIJ format is fully compatible with standard Fortran 77
2058:    storage.  That is, the stored row and column indices can begin at
2059:    either one (as in Fortran) or zero.  See the users' manual for details.

2061:    Specify the preallocated storage with either nz or nnz (not both).
2062:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2063:    allocation.  For additional details, see the users manual chapter on
2064:    matrices.

2066: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2067: @*/
2068: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2069: {
2071: 
2073:   MatCreate(comm,m,n,m,n,A);
2074:   MatSetType(*A,MATSEQBAIJ);
2075:   MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
2076:   return(0);
2077: }

2081: /*@C
2082:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2083:    per row in the matrix. For good matrix assembly performance the
2084:    user should preallocate the matrix storage by setting the parameter nz
2085:    (or the array nnz).  By setting these parameters accurately, performance
2086:    during matrix assembly can be increased by more than a factor of 50.

2088:    Collective on MPI_Comm

2090:    Input Parameters:
2091: +  A - the matrix
2092: .  bs - size of block
2093: .  nz - number of block nonzeros per block row (same for all rows)
2094: -  nnz - array containing the number of block nonzeros in the various block rows 
2095:          (possibly different for each block row) or PETSC_NULL

2097:    Options Database Keys:
2098: .   -mat_no_unroll - uses code that does not unroll the loops in the 
2099:                      block calculations (much slower)
2100: .    -mat_block_size - size of the blocks to use

2102:    Level: intermediate

2104:    Notes:
2105:    The block AIJ format is fully compatible with standard Fortran 77
2106:    storage.  That is, the stored row and column indices can begin at
2107:    either one (as in Fortran) or zero.  See the users' manual for details.

2109:    Specify the preallocated storage with either nz or nnz (not both).
2110:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2111:    allocation.  For additional details, see the users manual chapter on
2112:    matrices.

2114: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2115: @*/
2116: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2117: {
2118:   int ierr,(*f)(Mat,int,int,const int[]);

2121:   PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
2122:   if (f) {
2123:     (*f)(B,bs,nz,nnz);
2124:   }
2125:   return(0);
2126: }