Actual source code: mpisbaij.c

  1: /*$Id: mpisbaij.c,v 1.52 2001/04/09 15:17:38 bsmith Exp $*/

  3: #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
  4: #include "src/vec/vecimpl.h"
  5: #include "mpisbaij.h"
  6: #include "src/mat/impls/sbaij/seq/sbaij.h"

  8: extern int MatSetUpMultiply_MPISBAIJ(Mat);
  9: extern int DisAssemble_MPISBAIJ(Mat);
 10: extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
 11: extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
 12: extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *);
 13: extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
 14: extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
 15: extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
 16: extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
 17: extern int MatPrintHelp_SeqSBAIJ(Mat);
 18: extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*);
 19: extern int MatZeroRows_SeqBAIJ(Mat,IS,Scalar *);
 20: extern int MatGetRowMax_MPISBAIJ(Mat,Vec);

 22: /*  UGLY, ugly, ugly
 23:    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 
 24:    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 
 25:    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
 26:    converts the entries into single precision and then calls ..._MatScalar() to put them
 27:    into the single precision data structures.
 28: */
 29: #if defined(PETSC_USE_MAT_SINGLE)
 30: extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
 31: extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
 32: extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
 33: extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
 34: extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
 35: #else
 36: #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
 37: #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
 38: #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
 39: #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
 40: #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
 41: #endif

 43: EXTERN_C_BEGIN
 44: int MatStoreValues_MPISBAIJ(Mat mat)
 45: {
 46:   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
 47:   int          ierr;

 50:   MatStoreValues(aij->A);
 51:   MatStoreValues(aij->B);
 52:   return(0);
 53: }
 54: EXTERN_C_END

 56: EXTERN_C_BEGIN
 57: int MatRetrieveValues_MPISBAIJ(Mat mat)
 58: {
 59:   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
 60:   int          ierr;

 63:   MatRetrieveValues(aij->A);
 64:   MatRetrieveValues(aij->B);
 65:   return(0);
 66: }
 67: EXTERN_C_END

 69: /* 
 70:      Local utility routine that creates a mapping from the global column 
 71:    number to the local number in the off-diagonal part of the local 
 72:    storage of the matrix.  This is done in a non scable way since the 
 73:    length of colmap equals the global matrix length. 
 74: */
 75: static int CreateColmap_MPISBAIJ_Private(Mat mat)
 76: {
 78:   SETERRQ(1,"Function not yet written for SBAIJ format");
 79:   /* return(0); */
 80: }

 82: #define CHUNKSIZE  10

 84: #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) 
 85: { 
 86:  
 87:     brow = row/bs;  
 88:     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; 
 89:     rmax = aimax[brow]; nrow = ailen[brow]; 
 90:       bcol = col/bs; 
 91:       ridx = row % bs; cidx = col % bs; 
 92:       low = 0; high = nrow; 
 93:       while (high-low > 3) { 
 94:         t = (low+high)/2; 
 95:         if (rp[t] > bcol) high = t; 
 96:         else              low  = t; 
 97:       } 
 98:       for (_i=low; _i<high; _i++) { 
 99:         if (rp[_i] > bcol) break; 
100:         if (rp[_i] == bcol) { 
101:           bap  = ap +  bs2*_i + bs*cidx + ridx; 
102:           if (addv == ADD_VALUES) *bap += value;  
103:           else                    *bap  = value;  
104:           goto a_noinsert; 
105:         } 
106:       } 
107:       if (a->nonew == 1) goto a_noinsert; 
108:       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); 
109:       if (nrow >= rmax) { 
110:         /* there is no extra room in row, therefore enlarge */ 
111:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 
112:         MatScalar *new_a; 
113:  
114:         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 
115:  
116:         /* malloc new storage space */ 
117:         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 
118:         ierr  = PetscMalloc(len,&new_a); 
119:         new_j = (int*)(new_a + bs2*new_nz); 
120:         new_i = new_j + new_nz; 
121:  
122:         /* copy over old data into new slots */ 
123:         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 
124:         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 
125:         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 
126:         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 
127:         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int)); 
128:         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar)); 
129:         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 
130:         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 
131:                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));  
132:         /* free up old matrix storage */ 
133:         PetscFree(a->a);  
134:         if (!a->singlemalloc) { 
135:           PetscFree(a->i); 
136:           PetscFree(a->j);
137:         } 
138:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  
139:         a->singlemalloc = PETSC_TRUE; 
140:  
141:         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; 
142:         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; 
143:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 
144:         a->s_maxnz += bs2*CHUNKSIZE; 
145:         a->reallocs++; 
146:         a->s_nz++; 
147:       } 
148:       N = nrow++ - 1;  
149:       /* shift up all the later entries in this row */ 
150:       for (ii=N; ii>=_i; ii--) { 
151:         rp[ii+1] = rp[ii]; 
152:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); 
153:       } 
154:       if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); }  
155:       rp[_i]                      = bcol;  
156:       ap[bs2*_i + bs*cidx + ridx] = value;  
157:       a_noinsert:; 
158:     ailen[brow] = nrow; 
159: } 
160: #ifndef MatSetValues_SeqBAIJ_B_Private
161: #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) 
162: { 
163:     brow = row/bs;  
164:     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; 
165:     rmax = bimax[brow]; nrow = bilen[brow]; 
166:       bcol = col/bs; 
167:       ridx = row % bs; cidx = col % bs; 
168:       low = 0; high = nrow; 
169:       while (high-low > 3) { 
170:         t = (low+high)/2; 
171:         if (rp[t] > bcol) high = t; 
172:         else              low  = t; 
173:       } 
174:       for (_i=low; _i<high; _i++) { 
175:         if (rp[_i] > bcol) break; 
176:         if (rp[_i] == bcol) { 
177:           bap  = ap +  bs2*_i + bs*cidx + ridx; 
178:           if (addv == ADD_VALUES) *bap += value;  
179:           else                    *bap  = value;  
180:           goto b_noinsert; 
181:         } 
182:       } 
183:       if (b->nonew == 1) goto b_noinsert; 
184:       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); 
185:       if (nrow >= rmax) { 
186:         /* there is no extra room in row, therefore enlarge */ 
187:         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; 
188:         MatScalar *new_a; 
189:  
190:         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 
191:  
192:         /* malloc new storage space */ 
193:         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); 
194:         ierr  = PetscMalloc(len,&new_a); 
195:         new_j = (int*)(new_a + bs2*new_nz); 
196:         new_i = new_j + new_nz; 
197:  
198:         /* copy over old data into new slots */ 
199:         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} 
200:         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} 
201:         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); 
202:         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); 
203:         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int)); 
204:         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar)); 
205:         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar)); 
206:         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), 
207:                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));  
208:         /* free up old matrix storage */ 
209:         PetscFree(b->a);  
210:         if (!b->singlemalloc) { 
211:           PetscFree(b->i); 
212:           PetscFree(b->j); 
213:         } 
214:         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  
215:         b->singlemalloc = PETSC_TRUE; 
216:  
217:         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; 
218:         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; 
219:         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 
220:         b->maxnz += bs2*CHUNKSIZE; 
221:         b->reallocs++; 
222:         b->nz++; 
223:       } 
224:       N = nrow++ - 1;  
225:       /* shift up all the later entries in this row */ 
226:       for (ii=N; ii>=_i; ii--) { 
227:         rp[ii+1] = rp[ii]; 
228:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); 
229:       } 
230:       if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));}  
231:       rp[_i]                      = bcol;  
232:       ap[bs2*_i + bs*cidx + ridx] = value;  
233:       b_noinsert:; 
234:     bilen[brow] = nrow; 
235: } 
236: #endif

238: #if defined(PETSC_USE_MAT_SINGLE)
239: int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
240: {
241:   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
242:   int          ierr,i,N = m*n;
243:   MatScalar    *vsingle;

246:   if (N > b->setvalueslen) {
247:     if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
248:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
249:     b->setvalueslen  = N;
250:   }
251:   vsingle = b->setvaluescopy;

253:   for (i=0; i<N; i++) {
254:     vsingle[i] = v[i];
255:   }
256:   MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
257:   return(0);
258: }

260: int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
261: {
262:   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
263:   int         ierr,i,N = m*n*b->bs2;
264:   MatScalar   *vsingle;

267:   if (N > b->setvalueslen) {
268:     if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
269:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
270:     b->setvalueslen  = N;
271:   }
272:   vsingle = b->setvaluescopy;
273:   for (i=0; i<N; i++) {
274:     vsingle[i] = v[i];
275:   }
276:   MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
277:   return(0);
278: }

280: int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
281: {
282:   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
283:   int         ierr,i,N = m*n;
284:   MatScalar   *vsingle;

287:   SETERRQ(1,"Function not yet written for SBAIJ format");
288:   /* return(0); */
289: }

291: int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
292: {
293:   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
294:   int         ierr,i,N = m*n*b->bs2;
295:   MatScalar   *vsingle;

298:   SETERRQ(1,"Function not yet written for SBAIJ format");
299:   /* return(0); */
300: }
301: #endif

303: /* Only add/insert a(i,j) with i<=j (blocks). 
304:    Any a(i,j) with i>j input by user is ingored. 
305: */
306: int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
307: {
308:   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
309:   MatScalar    value;
310:   PetscTruth   roworiented = baij->roworiented;
311:   int          ierr,i,j,row,col;
312:   int          rstart_orig=baij->rstart_bs;
313:   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
314:   int          cend_orig=baij->cend_bs,bs=baij->bs;

316:   /* Some Variables required in the macro */
317:   Mat          A = baij->A;
318:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
319:   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
320:   MatScalar    *aa=a->a;

322:   Mat          B = baij->B;
323:   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
324:   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
325:   MatScalar    *ba=b->a;

327:   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
328:   int          low,high,t,ridx,cidx,bs2=a->bs2;
329:   MatScalar    *ap,*bap;

331:   /* for stash */
332:   int          n_loc, *in_loc=0;
333:   MatScalar    *v_loc=0;


337:   if(!baij->donotstash){
338:     PetscMalloc(n*sizeof(int),&in_loc);
339:     PetscMalloc(n*sizeof(MatScalar),&v_loc);
340:   }

342:   for (i=0; i<m; i++) {
343:     if (im[i] < 0) continue;
344: #if defined(PETSC_USE_BOPT_g)
345:     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
346: #endif
347:     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
348:       row = im[i] - rstart_orig;              /* local row index */
349:       for (j=0; j<n; j++) {
350:         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
351:         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
352:           col = in[j] - cstart_orig;          /* local col index */
353:           brow = row/bs; bcol = col/bs;
354:           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
355:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
356:           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
357:           /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
358:         } else if (in[j] < 0) continue;
359: #if defined(PETSC_USE_BOPT_g)
360:         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");}
361: #endif
362:         else {  /* off-diag entry (B) */
363:           if (mat->was_assembled) {
364:             if (!baij->colmap) {
365:               CreateColmap_MPISBAIJ_Private(mat);
366:             }
367: #if defined (PETSC_USE_CTABLE)
368:             PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
369:             col  = col - 1 + in[j]%bs;
370: #else
371:             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
372: #endif
373:             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
374:               DisAssemble_MPISBAIJ(mat);
375:               col =  in[j];
376:               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
377:               B = baij->B;
378:               b = (Mat_SeqBAIJ*)(B)->data;
379:               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
380:               ba=b->a;
381:             }
382:           } else col = in[j];
383:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
384:           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
385:           /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
386:         }
387:       }
388:     } else {  /* off processor entry */
389:       if (!baij->donotstash) {
390:         n_loc = 0;
391:         for (j=0; j<n; j++){
392:           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
393:           in_loc[n_loc] = in[j];
394:           if (roworiented) {
395:             v_loc[n_loc] = v[i*n+j];
396:           } else {
397:             v_loc[n_loc] = v[j*m+i];
398:           }
399:           n_loc++;
400:         }
401:         MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);
402:       }
403:     }
404:   }

406:   if(!baij->donotstash){
407:     PetscFree(in_loc);
408:     PetscFree(v_loc);
409:   }
410:   return(0);
411: }

413: int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
414: {
416:   SETERRQ(1,"Function not yet written for SBAIJ format");
417:   /* return(0); */
418: }

420: #define HASH_KEY 0.6180339887
421: #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
422: /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
423: /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
424: int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
425: {
427:   SETERRQ(1,"Function not yet written for SBAIJ format");
428:   /* return(0); */
429: }

431: int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
432: {
434:   SETERRQ(1,"Function not yet written for SBAIJ format");
435:   /* return(0); */
436: }

438: int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
439: {
440:   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
441:   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
442:   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;

445:   for (i=0; i<m; i++) {
446:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
447:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
448:     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
449:       row = idxm[i] - bsrstart;
450:       for (j=0; j<n; j++) {
451:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
452:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
453:         if (idxn[j] >= bscstart && idxn[j] < bscend){
454:           col = idxn[j] - bscstart;
455:           MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
456:         } else {
457:           if (!baij->colmap) {
458:             CreateColmap_MPISBAIJ_Private(mat);
459:           }
460: #if defined (PETSC_USE_CTABLE)
461:           PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
462:           data --;
463: #else
464:           data = baij->colmap[idxn[j]/bs]-1;
465: #endif
466:           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
467:           else {
468:             col  = data + idxn[j]%bs;
469:             MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
470:           }
471:         }
472:       }
473:     } else {
474:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
475:     }
476:   }
477:  return(0);
478: }

480: int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
481: {
482:   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
483:   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
484:   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
485:   int        ierr;
486:   PetscReal  sum[2],*lnorm2;

489:   if (baij->size == 1) {
490:      MatNorm(baij->A,type,norm);
491:   } else {
492:     if (type == NORM_FROBENIUS) {
493:       PetscMalloc(2*sizeof(double),&lnorm2);
494:        MatNorm(baij->A,type,lnorm2);
495:       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
496:        MatNorm(baij->B,type,lnorm2);
497:       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
498:       /*
499:       MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
500:       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %gn",rank,lnorm2[0],lnorm2[1]);
501:       */
502:       MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);
503:       /*
504:       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %gn",rank,sum[0],sum[1]);
505:       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
506: 
507:       *norm = sqrt(sum[0] + 2*sum[1]);
508:       PetscFree(lnorm2);
509:     } else {
510:       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
511:     }
512:   }
513:   return(0);
514: }

516: /*
517:   Creates the hash table, and sets the table 
518:   This table is created only once. 
519:   If new entried need to be added to the matrix
520:   then the hash table has to be destroyed and
521:   recreated.
522: */
523: int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
524: {
526:   SETERRQ(1,"Function not yet written for SBAIJ format");
527:   /* return(0); */
528: }

530: int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
531: {
532:   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
533:   int         ierr,nstash,reallocs;
534:   InsertMode  addv;

537:   if (baij->donotstash) {
538:     return(0);
539:   }

541:   /* make sure all processors are either in INSERTMODE or ADDMODE */
542:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
543:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
544:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
545:   }
546:   mat->insertmode = addv; /* in case this processor had no cache */

548:   MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);
549:   MatStashScatterBegin_Private(&mat->bstash,baij->rowners);
550:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
551:   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.n",nstash,reallocs);
552:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
553:   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
554:   return(0);
555: }

557: int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
558: {
559:   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
560:   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
561:   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
562:   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
563:   int         *row,*col,other_disassembled;
564:   PetscTruth  r1,r2,r3;
565:   MatScalar   *val;
566:   InsertMode  addv = mat->insertmode;
567:   /* int         rank;*/

570:   /* remove 2 line below later */
571:   /*MPI_Comm_rank(PETSC_COMM_WORLD, &rank); */

573:   if (!baij->donotstash) {
574:     while (1) {
575:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
576:       /*
577:       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%dn",rank,flg);
578:       PetscSynchronizedFlush(PETSC_COMM_WORLD); 
579:       */
580:       if (!flg) break;

582:       for (i=0; i<n;) {
583:         /* Now identify the consecutive vals belonging to the same row */
584:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
585:         if (j < n) ncols = j-i;
586:         else       ncols = n-i;
587:         /* Now assemble all these values with a single function call */
588:         MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);
589:         i = j;
590:       }
591:     }
592:     MatStashScatterEnd_Private(&mat->stash);
593:     /* Now process the block-stash. Since the values are stashed column-oriented,
594:        set the roworiented flag to column oriented, and after MatSetValues() 
595:        restore the original flags */
596:     r1 = baij->roworiented;
597:     r2 = a->roworiented;
598:     r3 = b->roworiented;
599:     baij->roworiented = PETSC_FALSE;
600:     a->roworiented    = PETSC_FALSE;
601:     b->roworiented    = PETSC_FALSE;
602:     while (1) {
603:       MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
604:       if (!flg) break;
605: 
606:       for (i=0; i<n;) {
607:         /* Now identify the consecutive vals belonging to the same row */
608:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
609:         if (j < n) ncols = j-i;
610:         else       ncols = n-i;
611:         MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);
612:         i = j;
613:       }
614:     }
615:     MatStashScatterEnd_Private(&mat->bstash);
616:     baij->roworiented = r1;
617:     a->roworiented    = r2;
618:     b->roworiented    = r3;
619:   }

621:   MatAssemblyBegin(baij->A,mode);
622:   MatAssemblyEnd(baij->A,mode);

624:   /* determine if any processor has disassembled, if so we must 
625:      also disassemble ourselfs, in order that we may reassemble. */
626:   /*
627:      if nonzero structure of submatrix B cannot change then we know that
628:      no processor disassembled thus we can skip this stuff
629:   */
630:   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
631:     MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
632:     if (mat->was_assembled && !other_disassembled) {
633:       DisAssemble_MPISBAIJ(mat);
634:     }
635:   }

637:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
638:     MatSetUpMultiply_MPISBAIJ(mat);
639:   }
640:   MatAssemblyBegin(baij->B,mode);
641:   MatAssemblyEnd(baij->B,mode);
642: 
643: #if defined(PETSC_USE_BOPT_g)
644:   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
645:     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2fn",((double)baij->ht_total_ct)/baij->ht_insert_ct);
646:     baij->ht_total_ct  = 0;
647:     baij->ht_insert_ct = 0;
648:   }
649: #endif
650:   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
651:     MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);
652:     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
653:     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
654:   }

656:   if (baij->rowvalues) {
657:     PetscFree(baij->rowvalues);
658:     baij->rowvalues = 0;
659:   }
660:   return(0);
661: }

663: static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
664: {
665:   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
666:   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
667:   PetscTruth        isascii,isdraw;
668:   PetscViewer       sviewer;
669:   PetscViewerFormat format;

672:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
673:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
674:   if (isascii) {
675:     PetscViewerGetFormat(viewer,&format);
676:     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
677:       MatInfo info;
678:       MPI_Comm_rank(mat->comm,&rank);
679:       MatGetInfo(mat,MAT_LOCAL,&info);
680:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %dn",
681:               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
682:               baij->bs,(int)info.memory);
683:       MatGetInfo(baij->A,MAT_LOCAL,&info);
684:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d n",rank,(int)info.nz_used*bs);
685:       MatGetInfo(baij->B,MAT_LOCAL,&info);
686:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d n",rank,(int)info.nz_used*bs);
687:       PetscViewerFlush(viewer);
688:       VecScatterView(baij->Mvctx,viewer);
689:       return(0);
690:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
691:       PetscViewerASCIIPrintf(viewer,"  block size is %dn",bs);
692:       return(0);
693:     }
694:   }

696:   if (isdraw) {
697:     PetscDraw       draw;
698:     PetscTruth isnull;
699:     PetscViewerDrawGetDraw(viewer,0,&draw);
700:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
701:   }

703:   if (size == 1) {
704:     MatView(baij->A,viewer);
705:   } else {
706:     /* assemble the entire matrix onto first processor. */
707:     Mat         A;
708:     Mat_SeqSBAIJ *Aloc;
709:     Mat_SeqBAIJ *Bloc;
710:     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
711:     MatScalar   *a;

713:     if (!rank) {
714:       MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
715:     } else {
716:       MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
717:     }
718:     PetscLogObjectParent(mat,A);

720:     /* copy over the A part */
721:     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
722:     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
723:     ierr  = PetscMalloc(bs*sizeof(int),&rvals);

725:     for (i=0; i<mbs; i++) {
726:       rvals[0] = bs*(baij->rstart + i);
727:       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
728:       for (j=ai[i]; j<ai[i+1]; j++) {
729:         col = (baij->cstart+aj[j])*bs;
730:         for (k=0; k<bs; k++) {
731:           MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);
732:           col++; a += bs;
733:         }
734:       }
735:     }
736:     /* copy over the B part */
737:     Bloc = (Mat_SeqBAIJ*)baij->B->data;
738:     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
739:     for (i=0; i<mbs; i++) {
740:       rvals[0] = bs*(baij->rstart + i);
741:       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
742:       for (j=ai[i]; j<ai[i+1]; j++) {
743:         col = baij->garray[aj[j]]*bs;
744:         for (k=0; k<bs; k++) {
745:           MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);
746:           col++; a += bs;
747:         }
748:       }
749:     }
750:     PetscFree(rvals);
751:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
752:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
753:     /* 
754:        Everyone has to call to draw the matrix since the graphics waits are
755:        synchronized across all processors that share the PetscDraw object
756:     */
757:     PetscViewerGetSingleton(viewer,&sviewer);
758:     if (!rank) {
759:       MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);
760:     }
761:     PetscViewerRestoreSingleton(viewer,&sviewer);
762:     MatDestroy(A);
763:   }
764:   return(0);
765: }

767: int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
768: {
769:   int        ierr;
770:   PetscTruth isascii,isdraw,issocket,isbinary;

773:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
774:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
775:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
776:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
777:   if (isascii || isdraw || issocket || isbinary) {
778:     MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);
779:   } else {
780:     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
781:   }
782:   return(0);
783: }

785: int MatDestroy_MPISBAIJ(Mat mat)
786: {
787:   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
788:   int         ierr;

791: #if defined(PETSC_USE_LOG)
792:   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
793: #endif
794:   MatStashDestroy_Private(&mat->stash);
795:   MatStashDestroy_Private(&mat->bstash);
796:   PetscFree(baij->rowners);
797:   MatDestroy(baij->A);
798:   MatDestroy(baij->B);
799: #if defined (PETSC_USE_CTABLE)
800:   if (baij->colmap) {PetscTableDelete(baij->colmap);}
801: #else
802:   if (baij->colmap) {PetscFree(baij->colmap);}
803: #endif
804:   if (baij->garray) {PetscFree(baij->garray);}
805:   if (baij->lvec)   {VecDestroy(baij->lvec);}
806:   if (baij->Mvctx)  {VecScatterDestroy(baij->Mvctx);}
807:   if (baij->rowvalues) {PetscFree(baij->rowvalues);}
808:   if (baij->barray) {PetscFree(baij->barray);}
809:   if (baij->hd) {PetscFree(baij->hd);}
810: #if defined(PETSC_USE_MAT_SINGLE)
811:   if (baij->setvaluescopy) {PetscFree(baij->setvaluescopy);}
812: #endif
813:   PetscFree(baij);
814:   return(0);
815: }

817: int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
818: {
819:   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
820:   int         ierr,nt;

823:   VecGetLocalSize(xx,&nt);
824:   if (nt != A->n) {
825:     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
826:   }
827:   VecGetLocalSize(yy,&nt);
828:   if (nt != A->m) {
829:     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
830:   }

832:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
833:   /* do diagonal part */
834:   (*a->A->ops->mult)(a->A,xx,yy);
835:   /* do supperdiagonal part */
836:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
837:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
838:   /* do subdiagonal part */
839:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
840:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
841:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);

843:   return(0);
844: }

846: int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
847: {
848:   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
849:   int        ierr;

852:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
853:   /* do diagonal part */
854:   (*a->A->ops->multadd)(a->A,xx,yy,zz);
855:   /* do supperdiagonal part */
856:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
857:   (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);

859:   /* do subdiagonal part */
860:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
861:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
862:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);

864:   return(0);
865: }

867: int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
868: {
870:   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
871:   /* return(0); */
872: }

874: int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
875: {
877:   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
878:   /* return(0); */
879: }

881: /*
882:   This only works correctly for square matrices where the subblock A->A is the 
883:    diagonal block
884: */
885: int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
886: {
887:   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
888:   int         ierr;

891:   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
892:   MatGetDiagonal(a->A,v);
893:   return(0);
894: }

896: int MatScale_MPISBAIJ(Scalar *aa,Mat A)
897: {
898:   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
899:   int         ierr;

902:   MatScale(aa,a->A);
903:   MatScale(aa,a->B);
904:   return(0);
905: }

907: int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n)
908: {
909:   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;

912:   if (m) *m = mat->rstart*mat->bs;
913:   if (n) *n = mat->rend*mat->bs;
914:   return(0);
915: }

917: int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
918: {
919:   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
920:   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
921:   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
922:   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
923:   int        *cmap,*idx_p,cstart = mat->cstart;

926:   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
927:   mat->getrowactive = PETSC_TRUE;

929:   if (!mat->rowvalues && (idx || v)) {
930:     /*
931:         allocate enough space to hold information from the longest row.
932:     */
933:     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
934:     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
935:     int     max = 1,mbs = mat->mbs,tmp;
936:     for (i=0; i<mbs; i++) {
937:       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
938:       if (max < tmp) { max = tmp; }
939:     }
940:     PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)),&mat->rowvalues);
941:     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
942:   }
943: 
944:   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
945:   lrow = row - brstart;  /* local row index */

947:   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
948:   if (!v)   {pvA = 0; pvB = 0;}
949:   if (!idx) {pcA = 0; if (!v) pcB = 0;}
950:   (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
951:   (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
952:   nztot = nzA + nzB;

954:   cmap  = mat->garray;
955:   if (v  || idx) {
956:     if (nztot) {
957:       /* Sort by increasing column numbers, assuming A and B already sorted */
958:       int imark = -1;
959:       if (v) {
960:         *v = v_p = mat->rowvalues;
961:         for (i=0; i<nzB; i++) {
962:           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
963:           else break;
964:         }
965:         imark = i;
966:         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
967:         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
968:       }
969:       if (idx) {
970:         *idx = idx_p = mat->rowindices;
971:         if (imark > -1) {
972:           for (i=0; i<imark; i++) {
973:             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
974:           }
975:         } else {
976:           for (i=0; i<nzB; i++) {
977:             if (cmap[cworkB[i]/bs] < cstart)
978:               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
979:             else break;
980:           }
981:           imark = i;
982:         }
983:         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
984:         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
985:       }
986:     } else {
987:       if (idx) *idx = 0;
988:       if (v)   *v   = 0;
989:     }
990:   }
991:   *nz = nztot;
992:   (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
993:   (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
994:   return(0);
995: }

997: int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
998: {
999:   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;

1002:   if (baij->getrowactive == PETSC_FALSE) {
1003:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1004:   }
1005:   baij->getrowactive = PETSC_FALSE;
1006:   return(0);
1007: }

1009: int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1010: {
1011:   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;

1014:   *bs = baij->bs;
1015:   return(0);
1016: }

1018: int MatZeroEntries_MPISBAIJ(Mat A)
1019: {
1020:   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1021:   int         ierr;

1024:   MatZeroEntries(l->A);
1025:   MatZeroEntries(l->B);
1026:   return(0);
1027: }

1029: int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1030: {
1031:   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1032:   Mat         A = a->A,B = a->B;
1033:   int         ierr;
1034:   PetscReal   isend[5],irecv[5];

1037:   info->block_size     = (double)a->bs;
1038:   MatGetInfo(A,MAT_LOCAL,info);
1039:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1040:   isend[3] = info->memory;  isend[4] = info->mallocs;
1041:   MatGetInfo(B,MAT_LOCAL,info);
1042:   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1043:   isend[3] += info->memory;  isend[4] += info->mallocs;
1044:   if (flag == MAT_LOCAL) {
1045:     info->nz_used      = isend[0];
1046:     info->nz_allocated = isend[1];
1047:     info->nz_unneeded  = isend[2];
1048:     info->memory       = isend[3];
1049:     info->mallocs      = isend[4];
1050:   } else if (flag == MAT_GLOBAL_MAX) {
1051:     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
1052:     info->nz_used      = irecv[0];
1053:     info->nz_allocated = irecv[1];
1054:     info->nz_unneeded  = irecv[2];
1055:     info->memory       = irecv[3];
1056:     info->mallocs      = irecv[4];
1057:   } else if (flag == MAT_GLOBAL_SUM) {
1058:     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
1059:     info->nz_used      = irecv[0];
1060:     info->nz_allocated = irecv[1];
1061:     info->nz_unneeded  = irecv[2];
1062:     info->memory       = irecv[3];
1063:     info->mallocs      = irecv[4];
1064:   } else {
1065:     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1066:   }
1067:   info->rows_global       = (double)A->M;
1068:   info->columns_global    = (double)A->N;
1069:   info->rows_local        = (double)A->m;
1070:   info->columns_local     = (double)A->N;
1071:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1072:   info->fill_ratio_needed = 0;
1073:   info->factor_mallocs    = 0;
1074:   return(0);
1075: }

1077: int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1078: {
1079:   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1080:   int         ierr;

1083:   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1084:       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1085:       op == MAT_COLUMNS_UNSORTED ||
1086:       op == MAT_COLUMNS_SORTED ||
1087:       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1088:       op == MAT_KEEP_ZEROED_ROWS ||
1089:       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1090:         MatSetOption(a->A,op);
1091:         MatSetOption(a->B,op);
1092:   } else if (op == MAT_ROW_ORIENTED) {
1093:         a->roworiented = PETSC_TRUE;
1094:         MatSetOption(a->A,op);
1095:         MatSetOption(a->B,op);
1096:   } else if (op == MAT_ROWS_SORTED ||
1097:              op == MAT_ROWS_UNSORTED ||
1098:              op == MAT_YES_NEW_DIAGONALS ||
1099:              op == MAT_USE_HASH_TABLE) {
1100:     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignoredn");
1101:   } else if (op == MAT_COLUMN_ORIENTED) {
1102:     a->roworiented = PETSC_FALSE;
1103:     MatSetOption(a->A,op);
1104:     MatSetOption(a->B,op);
1105:   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1106:     a->donotstash = PETSC_TRUE;
1107:   } else if (op == MAT_NO_NEW_DIAGONALS) {
1108:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1109:   } else if (op == MAT_USE_HASH_TABLE) {
1110:     a->ht_flag = PETSC_TRUE;
1111:   } else {
1112:     SETERRQ(PETSC_ERR_SUP,"unknown option");
1113:   }
1114:   return(0);
1115: }

1117: int MatTranspose_MPISBAIJ(Mat A,Mat *matout)
1118: {
1120:   SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called");
1121:   /* return(0); */
1122: }

1124: int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1125: {
1126:   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1127:   Mat         a = baij->A,b = baij->B;
1128:   int         ierr,s1,s2,s3;

1131:   if (ll != rr) {
1132:     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be samen");
1133:   }
1134:   MatGetLocalSize(mat,&s2,&s3);
1135:   if (rr) {
1136:     VecGetLocalSize(rr,&s1);
1137:     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1138:     /* Overlap communication with computation. */
1139:     VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);
1140:     /*} if (ll) { */
1141:     VecGetLocalSize(ll,&s1);
1142:     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1143:     (*b->ops->diagonalscale)(b,ll,PETSC_NULL);
1144:     /* } */
1145:   /* scale  the diagonal block */
1146:   (*a->ops->diagonalscale)(a,ll,rr);

1148:   /* if (rr) { */
1149:     /* Do a scatter end and then right scale the off-diagonal block */
1150:     VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);
1151:     (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);
1152:   }
1153: 
1154:   return(0);
1155: }

1157: int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag)
1158: {
1159:   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1160:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1161:   int            *procs,*nprocs,j,idx,nsends,*work,row;
1162:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1163:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1164:   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1165:   MPI_Comm       comm = A->comm;
1166:   MPI_Request    *send_waits,*recv_waits;
1167:   MPI_Status     recv_status,*send_status;
1168:   IS             istmp;
1169:   PetscTruth     found;

1172:   ISGetSize(is,&N);
1173:   ISGetIndices(is,&rows);
1174: 
1175:   /*  first count number of contributors to each processor */
1176:   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);
1177:   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));
1178:   procs = nprocs + size;
1179:   ierr  = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
1180:   for (i=0; i<N; i++) {
1181:     idx   = rows[i];
1182:     found = PETSC_FALSE;
1183:     for (j=0; j<size; j++) {
1184:       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1185:         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1186:       }
1187:     }
1188:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1189:   }
1190:   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1191: 
1192:   /* inform other processors of number of messages and max length*/
1193:   ierr   = PetscMalloc(2*size*sizeof(int),&work);
1194:   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
1195:   nmax   = work[rank];
1196:   nrecvs = work[size+rank];
1197:   ierr   = PetscFree(work);
1198: 
1199:   /* post receives:   */
1200:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1201:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1202:   for (i=0; i<nrecvs; i++) {
1203:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1204:   }
1205: 
1206:   /* do sends:
1207:      1) starts[i] gives the starting index in svalues for stuff going to 
1208:      the ith processor
1209:   */
1210:   PetscMalloc((N+1)*sizeof(int),&svalues);
1211:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1212:   PetscMalloc((size+1)*sizeof(int),&starts);
1213:   starts[0]  = 0;
1214:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1215:   for (i=0; i<N; i++) {
1216:     svalues[starts[owner[i]]++] = rows[i];
1217:   }
1218:   ISRestoreIndices(is,&rows);
1219: 
1220:   starts[0] = 0;
1221:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1222:   count = 0;
1223:   for (i=0; i<size; i++) {
1224:     if (procs[i]) {
1225:       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
1226:     }
1227:   }
1228:   PetscFree(starts);

1230:   base = owners[rank]*bs;
1231: 
1232:   /*  wait on receives */
1233:   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1234:   source = lens + nrecvs;
1235:   count  = nrecvs; slen = 0;
1236:   while (count) {
1237:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1238:     /* unpack receives into our local space */
1239:     MPI_Get_count(&recv_status,MPI_INT,&n);
1240:     source[imdex]  = recv_status.MPI_SOURCE;
1241:     lens[imdex]    = n;
1242:     slen          += n;
1243:     count--;
1244:   }
1245:   PetscFree(recv_waits);
1246: 
1247:   /* move the data into the send scatter */
1248:   PetscMalloc((slen+1)*sizeof(int),&lrows);
1249:   count = 0;
1250:   for (i=0; i<nrecvs; i++) {
1251:     values = rvalues + i*nmax;
1252:     for (j=0; j<lens[i]; j++) {
1253:       lrows[count++] = values[j] - base;
1254:     }
1255:   }
1256:   PetscFree(rvalues);
1257:   PetscFree(lens);
1258:   PetscFree(owner);
1259:   PetscFree(nprocs);
1260: 
1261:   /* actually zap the local rows */
1262:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
1263:   PetscLogObjectParent(A,istmp);

1265:   /*
1266:         Zero the required rows. If the "diagonal block" of the matrix
1267:      is square and the user wishes to set the diagonal we use seperate
1268:      code so that MatSetValues() is not called for each diagonal allocating
1269:      new memory, thus calling lots of mallocs and slowing things down.

1271:        Contributed by: Mathew Knepley
1272:   */
1273:   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1274:   MatZeroRows_SeqBAIJ(l->B,istmp,0);
1275:   if (diag && (l->A->M == l->A->N)) {
1276:     MatZeroRows_SeqSBAIJ(l->A,istmp,diag);
1277:   } else if (diag) {
1278:     MatZeroRows_SeqSBAIJ(l->A,istmp,0);
1279:     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1280:       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options n
1281: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1282:     }
1283:     for (i=0; i<slen; i++) {
1284:       row  = lrows[i] + rstart_bs;
1285:       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
1286:     }
1287:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1288:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1289:   } else {
1290:     MatZeroRows_SeqSBAIJ(l->A,istmp,0);
1291:   }

1293:   ISDestroy(istmp);
1294:   PetscFree(lrows);

1296:   /* wait on sends */
1297:   if (nsends) {
1298:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1299:     ierr        = MPI_Waitall(nsends,send_waits,send_status);
1300:     ierr        = PetscFree(send_status);
1301:   }
1302:   PetscFree(send_waits);
1303:   PetscFree(svalues);

1305:   return(0);
1306: }

1308: int MatPrintHelp_MPISBAIJ(Mat A)
1309: {
1310:   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1311:   MPI_Comm    comm = A->comm;
1312:   static int  called = 0;
1313:   int         ierr;

1316:   if (!a->rank) {
1317:     MatPrintHelp_SeqSBAIJ(a->A);
1318:   }
1319:   if (called) {return(0);} else called = 1;
1320:   (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):n");
1321:   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assemblyn");
1322:   return(0);
1323: }

1325: int MatSetUnfactored_MPISBAIJ(Mat A)
1326: {
1327:   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1328:   int         ierr;

1331:   MatSetUnfactored(a->A);
1332:   return(0);
1333: }

1335: static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);

1337: int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1338: {
1339:   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1340:   Mat         a,b,c,d;
1341:   PetscTruth  flg;
1342:   int         ierr;

1345:   PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);
1346:   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1347:   a = matA->A; b = matA->B;
1348:   c = matB->A; d = matB->B;

1350:   MatEqual(a,c,&flg);
1351:   if (flg == PETSC_TRUE) {
1352:     MatEqual(b,d,&flg);
1353:   }
1354:   MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1355:   return(0);
1356: }

1358: int MatSetUpPreallocation_MPISBAIJ(Mat A)
1359: {
1360:   int        ierr;

1363:   MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1364:   return(0);
1365: }
1366: /* -------------------------------------------------------------------*/
1367: static struct _MatOps MatOps_Values = {
1368:   MatSetValues_MPISBAIJ,
1369:   MatGetRow_MPISBAIJ,
1370:   MatRestoreRow_MPISBAIJ,
1371:   MatMult_MPISBAIJ,
1372:   MatMultAdd_MPISBAIJ,
1373:   MatMultTranspose_MPISBAIJ,
1374:   MatMultTransposeAdd_MPISBAIJ,
1375:   0,
1376:   0,
1377:   0,
1378:   0,
1379:   0,
1380:   0,
1381:   0,
1382:   MatTranspose_MPISBAIJ,
1383:   MatGetInfo_MPISBAIJ,
1384:   MatEqual_MPISBAIJ,
1385:   MatGetDiagonal_MPISBAIJ,
1386:   MatDiagonalScale_MPISBAIJ,
1387:   MatNorm_MPISBAIJ,
1388:   MatAssemblyBegin_MPISBAIJ,
1389:   MatAssemblyEnd_MPISBAIJ,
1390:   0,
1391:   MatSetOption_MPISBAIJ,
1392:   MatZeroEntries_MPISBAIJ,
1393:   MatZeroRows_MPISBAIJ,
1394:   0,
1395:   0,
1396:   0,
1397:   0,
1398:   MatSetUpPreallocation_MPISBAIJ,
1399:   0,
1400:   MatGetOwnershipRange_MPISBAIJ,
1401:   0,
1402:   0,
1403:   0,
1404:   0,
1405:   MatDuplicate_MPISBAIJ,
1406:   0,
1407:   0,
1408:   0,
1409:   0,
1410:   0,
1411:   MatGetSubMatrices_MPISBAIJ,
1412:   MatIncreaseOverlap_MPISBAIJ,
1413:   MatGetValues_MPISBAIJ,
1414:   0,
1415:   MatPrintHelp_MPISBAIJ,
1416:   MatScale_MPISBAIJ,
1417:   0,
1418:   0,
1419:   0,
1420:   MatGetBlockSize_MPISBAIJ,
1421:   0,
1422:   0,
1423:   0,
1424:   0,
1425:   0,
1426:   0,
1427:   MatSetUnfactored_MPISBAIJ,
1428:   0,
1429:   MatSetValuesBlocked_MPISBAIJ,
1430:   0,
1431:   0,
1432:   0,
1433:   MatGetMaps_Petsc,
1434:   0,
1435:   0,
1436:   0,
1437:   0,
1438:   0,
1439:   0,
1440:   MatGetRowMax_MPISBAIJ};


1443: EXTERN_C_BEGIN
1444: int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1445: {
1447:   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1448:   *iscopy = PETSC_FALSE;
1449:   return(0);
1450: }
1451: EXTERN_C_END

1453: EXTERN_C_BEGIN
1454: int MatCreate_MPISBAIJ(Mat B)
1455: {
1456:   Mat_MPISBAIJ *b;
1457:   int          ierr;
1458:   PetscTruth   flg;


1462:   ierr    = PetscNew(Mat_MPISBAIJ,&b);
1463:   B->data = (void*)b;
1464:   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));
1465:   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));

1467:   B->ops->destroy    = MatDestroy_MPISBAIJ;
1468:   B->ops->view       = MatView_MPISBAIJ;
1469:   B->mapping    = 0;
1470:   B->factor     = 0;
1471:   B->assembled  = PETSC_FALSE;

1473:   B->insertmode = NOT_SET_VALUES;
1474:   MPI_Comm_rank(B->comm,&b->rank);
1475:   MPI_Comm_size(B->comm,&b->size);

1477:   /* build local table of row and column ownerships */
1478:   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);
1479:   b->cowners    = b->rowners + b->size + 2;
1480:   b->rowners_bs = b->cowners + b->size + 2;
1481:   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));

1483:   /* build cache for off array entries formed */
1484:   MatStashCreate_Private(B->comm,1,&B->stash);
1485:   b->donotstash  = PETSC_FALSE;
1486:   b->colmap      = PETSC_NULL;
1487:   b->garray      = PETSC_NULL;
1488:   b->roworiented = PETSC_TRUE;

1490: #if defined(PETSC_USE_MAT_SINGLE)
1491:   /* stuff for MatSetValues_XXX in single precision */
1492:   b->setvalueslen     = 0;
1493:   b->setvaluescopy    = PETSC_NULL;
1494: #endif

1496:   /* stuff used in block assembly */
1497:   b->barray       = 0;

1499:   /* stuff used for matrix vector multiply */
1500:   b->lvec         = 0;
1501:   b->Mvctx        = 0;

1503:   /* stuff for MatGetRow() */
1504:   b->rowindices   = 0;
1505:   b->rowvalues    = 0;
1506:   b->getrowactive = PETSC_FALSE;

1508:   /* hash table stuff */
1509:   b->ht           = 0;
1510:   b->hd           = 0;
1511:   b->ht_size      = 0;
1512:   b->ht_flag      = PETSC_FALSE;
1513:   b->ht_fact      = 0;
1514:   b->ht_total_ct  = 0;
1515:   b->ht_insert_ct = 0;

1517:   PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);
1518:   if (flg) {
1519:     double fact = 1.39;
1520:     MatSetOption(B,MAT_USE_HASH_TABLE);
1521:     PetscOptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);
1522:     if (fact <= 1.0) fact = 1.39;
1523:     MatMPIBAIJSetHashTableFactor(B,fact);
1524:     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2fn",fact);
1525:   }
1526:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1527:                                      "MatStoreValues_MPISBAIJ",
1528:                                      MatStoreValues_MPISBAIJ);
1529:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1530:                                      "MatRetrieveValues_MPISBAIJ",
1531:                                      MatRetrieveValues_MPISBAIJ);
1532:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1533:                                      "MatGetDiagonalBlock_MPISBAIJ",
1534:                                      MatGetDiagonalBlock_MPISBAIJ);
1535:   return(0);
1536: }
1537: EXTERN_C_END

1539: /*@C
1540:    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1541:    the user should preallocate the matrix storage by setting the parameters 
1542:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1543:    performance can be increased by more than a factor of 50.

1545:    Collective on Mat

1547:    Input Parameters:
1548: +  A - the matrix 
1549: .  bs   - size of blockk
1550: .  d_nz  - number of block nonzeros per block row in diagonal portion of local 
1551:            submatrix  (same for all local rows)
1552: .  d_nnz - array containing the number of block nonzeros in the various block rows 
1553:            of the in diagonal portion of the local (possibly different for each block
1554:            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1555: .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1556:            submatrix (same for all local rows).
1557: -  o_nnz - array containing the number of nonzeros in the various block rows of the
1558:            off-diagonal portion of the local submatrix (possibly different for
1559:            each block row) or PETSC_NULL.


1562:    Options Database Keys:
1563: .   -mat_no_unroll - uses code that does not unroll the loops in the 
1564:                      block calculations (much slower)
1565: .   -mat_block_size - size of the blocks to use

1567:    Notes:

1569:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1570:    than it must be used on all processors that share the object for that argument.

1572:    Storage Information:
1573:    For a square global matrix we define each processor's diagonal portion 
1574:    to be its local rows and the corresponding columns (a square submatrix);  
1575:    each processor's off-diagonal portion encompasses the remainder of the
1576:    local matrix (a rectangular submatrix). 

1578:    The user can specify preallocated storage for the diagonal part of
1579:    the local submatrix with either d_nz or d_nnz (not both).  Set 
1580:    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1581:    memory allocation.  Likewise, specify preallocated storage for the
1582:    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).

1584:    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1585:    the figure below we depict these three local rows and all columns (0-11).

1587: .vb
1588:            0 1 2 3 4 5 6 7 8 9 10 11
1589:           -------------------
1590:    row 3  |  o o o d d d o o o o o o
1591:    row 4  |  o o o d d d o o o o o o
1592:    row 5  |  o o o d d d o o o o o o
1593:           -------------------
1594: .ve
1595:   
1596:    Thus, any entries in the d locations are stored in the d (diagonal) 
1597:    submatrix, and any entries in the o locations are stored in the
1598:    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1599:    stored simply in the MATSEQBAIJ format for compressed row storage.

1601:    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1602:    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1603:    In general, for PDE problems in which most nonzeros are near the diagonal,
1604:    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1605:    or you will get TERRIBLE performance; see the users' manual chapter on
1606:    matrices.

1608:    Level: intermediate

1610: .keywords: matrix, block, aij, compressed row, sparse, parallel

1612: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1613: @*/

1615: int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1616: {
1617:   Mat_MPISBAIJ *b;
1618:   int          ierr,i,mbs,Mbs;
1619:   PetscTruth   flg2;

1622:   PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);
1623:   if (!flg2) return(0);

1625:   PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);

1627:   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1628:   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1629:   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1630:   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1631:   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1632:   if (d_nnz) {
1633:     for (i=0; i<B->m/bs; i++) {
1634:       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1635:     }
1636:   }
1637:   if (o_nnz) {
1638:     for (i=0; i<B->m/bs; i++) {
1639:       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1640:     }
1641:   }
1642:   B->preallocated = PETSC_TRUE;
1643:   PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
1644:   PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);
1645:   MapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1646:   MapCreateMPI(B->comm,B->m,B->M,&B->cmap);

1648:   b   = (Mat_MPISBAIJ*)B->data;
1649:   mbs = B->m/bs;
1650:   Mbs = B->M/bs;
1651:   if (mbs*bs != B->m) {
1652:     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1653:   }

1655:   b->bs  = bs;
1656:   b->bs2 = bs*bs;
1657:   b->mbs = mbs;
1658:   b->nbs = mbs;
1659:   b->Mbs = Mbs;
1660:   b->Nbs = Mbs;

1662:   MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1663:   b->rowners[0]    = 0;
1664:   for (i=2; i<=b->size; i++) {
1665:     b->rowners[i] += b->rowners[i-1];
1666:   }
1667:   b->rstart    = b->rowners[b->rank];
1668:   b->rend      = b->rowners[b->rank+1];
1669:   b->cstart    = b->rstart;
1670:   b->cend      = b->rend;
1671:   for (i=0; i<=b->size; i++) {
1672:     b->rowners_bs[i] = b->rowners[i]*bs;
1673:   }
1674:   b->rstart_bs = b-> rstart*bs;
1675:   b->rend_bs   = b->rend*bs;
1676: 
1677:   b->cstart_bs = b->cstart*bs;
1678:   b->cend_bs   = b->cend*bs;
1679: 

1681:   MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);
1682:   PetscLogObjectParent(B,b->A);
1683:   MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);
1684:   PetscLogObjectParent(B,b->B);

1686:   /* build cache for off array entries formed */
1687:   MatStashCreate_Private(B->comm,bs,&B->bstash);

1689:   return(0);
1690: }

1692: /*@C
1693:    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1694:    (block compressed row).  For good matrix assembly performance
1695:    the user should preallocate the matrix storage by setting the parameters 
1696:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1697:    performance can be increased by more than a factor of 50.

1699:    Collective on MPI_Comm

1701:    Input Parameters:
1702: +  comm - MPI communicator
1703: .  bs   - size of blockk
1704: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1705:            This value should be the same as the local size used in creating the 
1706:            y vector for the matrix-vector product y = Ax.
1707: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1708:            This value should be the same as the local size used in creating the 
1709:            x vector for the matrix-vector product y = Ax.
1710: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1711: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1712: .  d_nz  - number of block nonzeros per block row in diagonal portion of local 
1713:            submatrix  (same for all local rows)
1714: .  d_nnz - array containing the number of block nonzeros in the various block rows 
1715:            of the in diagonal portion of the local (possibly different for each block
1716:            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1717: .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1718:            submatrix (same for all local rows).
1719: -  o_nnz - array containing the number of nonzeros in the various block rows of the
1720:            off-diagonal portion of the local submatrix (possibly different for
1721:            each block row) or PETSC_NULL.

1723:    Output Parameter:
1724: .  A - the matrix 

1726:    Options Database Keys:
1727: .   -mat_no_unroll - uses code that does not unroll the loops in the 
1728:                      block calculations (much slower)
1729: .   -mat_block_size - size of the blocks to use
1730: .   -mat_mpi - use the parallel matrix data structures even on one processor 
1731:                (defaults to using SeqBAIJ format on one processor)

1733:    Notes:
1734:    The user MUST specify either the local or global matrix dimensions
1735:    (possibly both).

1737:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1738:    than it must be used on all processors that share the object for that argument.

1740:    Storage Information:
1741:    For a square global matrix we define each processor's diagonal portion 
1742:    to be its local rows and the corresponding columns (a square submatrix);  
1743:    each processor's off-diagonal portion encompasses the remainder of the
1744:    local matrix (a rectangular submatrix). 

1746:    The user can specify preallocated storage for the diagonal part of
1747:    the local submatrix with either d_nz or d_nnz (not both).  Set 
1748:    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1749:    memory allocation.  Likewise, specify preallocated storage for the
1750:    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).

1752:    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1753:    the figure below we depict these three local rows and all columns (0-11).

1755: .vb
1756:            0 1 2 3 4 5 6 7 8 9 10 11
1757:           -------------------
1758:    row 3  |  o o o d d d o o o o o o
1759:    row 4  |  o o o d d d o o o o o o
1760:    row 5  |  o o o d d d o o o o o o
1761:           -------------------
1762: .ve
1763:   
1764:    Thus, any entries in the d locations are stored in the d (diagonal) 
1765:    submatrix, and any entries in the o locations are stored in the
1766:    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1767:    stored simply in the MATSEQBAIJ format for compressed row storage.

1769:    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1770:    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1771:    In general, for PDE problems in which most nonzeros are near the diagonal,
1772:    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1773:    or you will get TERRIBLE performance; see the users' manual chapter on
1774:    matrices.

1776:    Level: intermediate

1778: .keywords: matrix, block, aij, compressed row, sparse, parallel

1780: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1781: @*/

1783: int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1784: {
1785:   int ierr,size;

1788:   MatCreate(comm,m,n,M,N,A);
1789:   MPI_Comm_size(comm,&size);
1790:   if (size > 1) {
1791:     MatSetType(*A,MATMPISBAIJ);
1792:     MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
1793:   } else {
1794:     MatSetType(*A,MATSEQSBAIJ);
1795:     MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
1796:   }
1797:   return(0);
1798: }


1801: static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1802: {
1803:   Mat          mat;
1804:   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1805:   int          ierr,len=0;

1808:   *newmat       = 0;
1809:   MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1810:   MatSetType(mat,MATMPISBAIJ);
1811:   mat->preallocated = PETSC_TRUE;
1812:   a = (Mat_MPISBAIJ*)mat->data;
1813:   a->bs  = oldmat->bs;
1814:   a->bs2 = oldmat->bs2;
1815:   a->mbs = oldmat->mbs;
1816:   a->nbs = oldmat->nbs;
1817:   a->Mbs = oldmat->Mbs;
1818:   a->Nbs = oldmat->Nbs;
1819: 
1820:   a->rstart       = oldmat->rstart;
1821:   a->rend         = oldmat->rend;
1822:   a->cstart       = oldmat->cstart;
1823:   a->cend         = oldmat->cend;
1824:   a->size         = oldmat->size;
1825:   a->rank         = oldmat->rank;
1826:   a->donotstash   = oldmat->donotstash;
1827:   a->roworiented  = oldmat->roworiented;
1828:   a->rowindices   = 0;
1829:   a->rowvalues    = 0;
1830:   a->getrowactive = PETSC_FALSE;
1831:   a->barray       = 0;
1832:   a->rstart_bs    = oldmat->rstart_bs;
1833:   a->rend_bs      = oldmat->rend_bs;
1834:   a->cstart_bs    = oldmat->cstart_bs;
1835:   a->cend_bs      = oldmat->cend_bs;

1837:   /* hash table stuff */
1838:   a->ht           = 0;
1839:   a->hd           = 0;
1840:   a->ht_size      = 0;
1841:   a->ht_flag      = oldmat->ht_flag;
1842:   a->ht_fact      = oldmat->ht_fact;
1843:   a->ht_total_ct  = 0;
1844:   a->ht_insert_ct = 0;

1846:   PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);
1847:   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1848:   a->cowners    = a->rowners + a->size + 2;
1849:   a->rowners_bs = a->cowners + a->size + 2;
1850:   PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));
1851:   MatStashCreate_Private(matin->comm,1,&mat->stash);
1852:   MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);
1853:   if (oldmat->colmap) {
1854: #if defined (PETSC_USE_CTABLE)
1855:     PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1856: #else
1857:     PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);
1858:     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
1859:     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
1860: #endif
1861:   } else a->colmap = 0;
1862:   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1863:     PetscMalloc(len*sizeof(int),&a->garray);
1864:     PetscLogObjectMemory(mat,len*sizeof(int));
1865:     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1866:   } else a->garray = 0;
1867: 
1868:    VecDuplicate(oldmat->lvec,&a->lvec);
1869:   PetscLogObjectParent(mat,a->lvec);
1870:    VecScatterCopy(oldmat->Mvctx,&a->Mvctx);

1872:   PetscLogObjectParent(mat,a->Mvctx);
1873:    MatDuplicate(oldmat->A,cpvalues,&a->A);
1874:   PetscLogObjectParent(mat,a->A);
1875:    MatDuplicate(oldmat->B,cpvalues,&a->B);
1876:   PetscLogObjectParent(mat,a->B);
1877:   PetscFListDuplicate(mat->qlist,&matin->qlist);
1878:   *newmat = mat;
1879:   return(0);
1880: }

1882: #include "petscsys.h"

1884: EXTERN_C_BEGIN
1885: int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1886: {
1887:   Mat          A;
1888:   int          i,nz,ierr,j,rstart,rend,fd;
1889:   Scalar       *vals,*buf;
1890:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1891:   MPI_Status   status;
1892:   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
1893:   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
1894:   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
1895:   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
1896:   int          dcount,kmax,k,nzcount,tmp;
1897: 
1899:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);

1901:   MPI_Comm_size(comm,&size);
1902:   MPI_Comm_rank(comm,&rank);
1903:   if (!rank) {
1904:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1905:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1906:     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1907:     if (header[3] < 0) {
1908:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
1909:     }
1910:   }

1912:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1913:   M = header[1]; N = header[2];

1915:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

1917:   /* 
1918:      This code adds extra rows to make sure the number of rows is 
1919:      divisible by the blocksize
1920:   */
1921:   Mbs        = M/bs;
1922:   extra_rows = bs - M + bs*(Mbs);
1923:   if (extra_rows == bs) extra_rows = 0;
1924:   else                  Mbs++;
1925:   if (extra_rows &&!rank) {
1926:     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksizen");
1927:   }

1929:   /* determine ownership of all rows */
1930:   mbs        = Mbs/size + ((Mbs % size) > rank);
1931:   m          = mbs*bs;
1932:   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);
1933:   browners   = rowners + size + 1;
1934:   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1935:   rowners[0] = 0;
1936:   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
1937:   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
1938:   rstart = rowners[rank];
1939:   rend   = rowners[rank+1];
1940: 
1941:   /* distribute row lengths to all processors */
1942:   PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);
1943:   if (!rank) {
1944:     PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1945:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1946:     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1947:     PetscMalloc(size*sizeof(int),&sndcounts);
1948:     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
1949:     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
1950:     PetscFree(sndcounts);
1951:   } else {
1952:     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
1953:   }
1954: 
1955:   if (!rank) {   /* procs[0] */
1956:     /* calculate the number of nonzeros on each processor */
1957:     PetscMalloc(size*sizeof(int),&procsnz);
1958:     PetscMemzero(procsnz,size*sizeof(int));
1959:     for (i=0; i<size; i++) {
1960:       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
1961:         procsnz[i] += rowlengths[j];
1962:       }
1963:     }
1964:     PetscFree(rowlengths);
1965: 
1966:     /* determine max buffer needed and allocate it */
1967:     maxnz = 0;
1968:     for (i=0; i<size; i++) {
1969:       maxnz = PetscMax(maxnz,procsnz[i]);
1970:     }
1971:     PetscMalloc(maxnz*sizeof(int),&cols);

1973:     /* read in my part of the matrix column indices  */
1974:     nz     = procsnz[0];
1975:     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);
1976:     mycols = ibuf;
1977:     if (size == 1)  nz -= extra_rows;
1978:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1979:     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }

1981:     /* read in every ones (except the last) and ship off */
1982:     for (i=1; i<size-1; i++) {
1983:       nz   = procsnz[i];
1984:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1985:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1986:     }
1987:     /* read in the stuff for the last proc */
1988:     if (size != 1) {
1989:       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
1990:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1991:       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1992:       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
1993:     }
1994:     PetscFree(cols);
1995:   } else {  /* procs[i], i>0 */
1996:     /* determine buffer space needed for message */
1997:     nz = 0;
1998:     for (i=0; i<m; i++) {
1999:       nz += locrowlens[i];
2000:     }
2001:     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);
2002:     mycols = ibuf;
2003:     /* receive message of column indices*/
2004:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
2005:     MPI_Get_count(&status,MPI_INT,&maxnz);
2006:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2007:   }

2009:   /* loop over local rows, determining number of off diagonal entries */
2010:   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);
2011:   odlens   = dlens + (rend-rstart);
2012:   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);
2013:   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));
2014:   masked1  = mask    + Mbs;
2015:   masked2  = masked1 + Mbs;
2016:   rowcount = 0; nzcount = 0;
2017:   for (i=0; i<mbs; i++) {
2018:     dcount  = 0;
2019:     odcount = 0;
2020:     for (j=0; j<bs; j++) {
2021:       kmax = locrowlens[rowcount];
2022:       for (k=0; k<kmax; k++) {
2023:         tmp = mycols[nzcount++]/bs; /* block col. index */
2024:         if (!mask[tmp]) {
2025:           mask[tmp] = 1;
2026:           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2027:           else masked1[dcount++] = tmp; /* entry in diag portion */
2028:         }
2029:       }
2030:       rowcount++;
2031:     }
2032: 
2033:     dlens[i]  = dcount;  /* d_nzz[i] */
2034:     odlens[i] = odcount; /* o_nzz[i] */

2036:     /* zero out the mask elements we set */
2037:     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2038:     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2039:   }
2040: 
2041:   /* create our matrix */
2042:   MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat);
2043: 
2044:   A = *newmat;
2045:   MatSetOption(A,MAT_COLUMNS_SORTED);
2046: 
2047:   if (!rank) {
2048:     PetscMalloc(maxnz*sizeof(Scalar),&buf);
2049:     /* read in my part of the matrix numerical values  */
2050:     nz = procsnz[0];
2051:     vals = buf;
2052:     mycols = ibuf;
2053:     if (size == 1)  nz -= extra_rows;
2054:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2055:     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }

2057:     /* insert into matrix */
2058:     jj      = rstart*bs;
2059:     for (i=0; i<m; i++) {
2060:       MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2061:       mycols += locrowlens[i];
2062:       vals   += locrowlens[i];
2063:       jj++;
2064:     }

2066:     /* read in other processors (except the last one) and ship out */
2067:     for (i=1; i<size-1; i++) {
2068:       nz   = procsnz[i];
2069:       vals = buf;
2070:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2071:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2072:     }
2073:     /* the last proc */
2074:     if (size != 1){
2075:       nz   = procsnz[i] - extra_rows;
2076:       vals = buf;
2077:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2078:       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2079:       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
2080:     }
2081:     PetscFree(procsnz);

2083:   } else {
2084:     /* receive numeric values */
2085:     PetscMalloc(nz*sizeof(Scalar),&buf);

2087:     /* receive message of values*/
2088:     vals   = buf;
2089:     mycols = ibuf;
2090:     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2091:     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2092:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

2094:     /* insert into matrix */
2095:     jj      = rstart*bs;
2096:     for (i=0; i<m; i++) {
2097:       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2098:       mycols += locrowlens[i];
2099:       vals   += locrowlens[i];
2100:       jj++;
2101:     }
2102:   }

2104:   PetscFree(locrowlens);
2105:   PetscFree(buf);
2106:   PetscFree(ibuf);
2107:   PetscFree(rowners);
2108:   PetscFree(dlens);
2109:   PetscFree(mask);
2110:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2111:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2112:   return(0);
2113: }
2114: EXTERN_C_END

2116: /*@
2117:    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.

2119:    Input Parameters:
2120: .  mat  - the matrix
2121: .  fact - factor

2123:    Collective on Mat

2125:    Level: advanced

2127:   Notes:
2128:    This can also be set by the command line option: -mat_use_hash_table fact

2130: .keywords: matrix, hashtable, factor, HT

2132: .seealso: MatSetOption()
2133: @*/
2134: int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2135: {
2137:   SETERRQ(1,"Function not yet written for SBAIJ format");
2138:   /* return(0); */
2139: }

2141: int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2142: {
2143:   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2144:   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2145:   PetscReal    atmp;
2146:   double       *work,*svalues,*rvalues;
2147:   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2148:   int          rank,size,*rowners_bs,dest,count,source;
2149:   Scalar       *va;
2150:   MatScalar    *ba;
2151:   MPI_Status   stat;

2154:   MatGetRowMax(a->A,v);
2155:   VecGetArray(v,&va);

2157:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
2158:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);

2160:   bs   = a->bs;
2161:   mbs  = a->mbs;
2162:   Mbs  = a->Mbs;
2163:   ba   = b->a;
2164:   bi   = b->i;
2165:   bj   = b->j;
2166:   /*
2167:   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d n",rank,bs*Mbs,bs,mbs); 
2168:   PetscSynchronizedFlush(PETSC_COMM_WORLD);
2169:   */

2171:   /* find ownerships */
2172:   rowners_bs = a->rowners_bs;
2173:   /*
2174:   if (!rank){
2175:     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %dn",i,rowners_bs[i]); 
2176:   }
2177:   */

2179:   /* each proc creates an array to be distributed */
2180:   PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);
2181:   PetscMemzero(work,bs*Mbs*sizeof(PetscReal));

2183:   /* row_max for B */
2184:   if (rank != size-1){
2185:     for (i=0; i<mbs; i++) {
2186:       ncols = bi[1] - bi[0]; bi++;
2187:       brow  = bs*i;
2188:       for (j=0; j<ncols; j++){
2189:         bcol = bs*(*bj);
2190:         for (kcol=0; kcol<bs; kcol++){
2191:           col = bcol + kcol;                 /* local col index */
2192:           col += rowners_bs[rank+1];      /* global col index */
2193:           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %dn",rank,col); */
2194:           for (krow=0; krow<bs; krow++){
2195:             atmp = PetscAbsScalar(*ba); ba++;
2196:             row = brow + krow;    /* local row index */
2197:             /* printf("val[%d,%d]: %gn",row,col,atmp); */
2198:             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2199:             if (work[col] < atmp) work[col] = atmp;
2200:           }
2201:         }
2202:         bj++;
2203:       }
2204:     }
2205:     /*
2206:       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2207:       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2208:       PetscPrintf(PETSC_COMM_SELF,"[%d]: n");
2209:       */

2211:     /* send values to its owners */
2212:     for (dest=rank+1; dest<size; dest++){
2213:       svalues = work + rowners_bs[dest];
2214:       count = rowners_bs[dest+1]-rowners_bs[dest];
2215:       MPI_Send(svalues,count,MPI_DOUBLE,dest,rank,PETSC_COMM_WORLD);
2216:       /*
2217:       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %gn",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 
2218:       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2219:       */
2220:     }
2221:   }
2222: 
2223:   /* receive values */
2224:   if (rank){
2225:     rvalues = work;
2226:     count = rowners_bs[rank+1]-rowners_bs[rank];
2227:     for (source=0; source<rank; source++){
2228:       MPI_Recv(rvalues,count,MPI_DOUBLE,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);
2229:       /* process values */
2230:       for (i=0; i<count; i++){
2231:         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2232:       }
2233:       /*
2234:       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);  
2235:       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2236:       */
2237:     }
2238:   }

2240:   VecRestoreArray(v,&va);
2241:   PetscFree(work);
2242:   return(0);
2243: }