Actual source code: mpibdiag.c

  1: /*$Id: mpibdiag.c,v 1.198 2001/03/23 23:22:05 balay Exp $*/
  2: /*
  3:    The basic matrix operations for the Block diagonal parallel 
  4:   matrices.
  5: */
 6:  #include src/mat/impls/bdiag/mpi/mpibdiag.h
 7:  #include src/vec/vecimpl.h

  9: int MatSetValues_MPIBDiag(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
 10: {
 11:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 12:   int          ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;
 13:   PetscTruth   roworiented = mbd->roworiented;

 16:   for (i=0; i<m; i++) {
 17:     if (idxm[i] < 0) continue;
 18:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 19:     if (idxm[i] >= rstart && idxm[i] < rend) {
 20:       row = idxm[i] - rstart;
 21:       for (j=0; j<n; j++) {
 22:         if (idxn[j] < 0) continue;
 23:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 24:         if (roworiented) {
 25:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
 26:         } else {
 27:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
 28:         }
 29:       }
 30:     } else {
 31:       if (!mbd->donotstash) {
 32:         if (roworiented) {
 33:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
 34:         } else {
 35:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
 36:         }
 37:       }
 38:     }
 39:   }
 40:   return(0);
 41: }

 43: int MatGetValues_MPIBDiag(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
 44: {
 45:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 46:   int          ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;

 49:   for (i=0; i<m; i++) {
 50:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
 51:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 52:     if (idxm[i] >= rstart && idxm[i] < rend) {
 53:       row = idxm[i] - rstart;
 54:       for (j=0; j<n; j++) {
 55:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
 56:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 57:         MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
 58:       }
 59:     } else {
 60:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
 61:     }
 62:   }
 63:   return(0);
 64: }

 66: int MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
 67: {
 68:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 69:   MPI_Comm     comm = mat->comm;
 70:   int          ierr,nstash,reallocs;
 71:   InsertMode   addv;

 74:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
 75:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
 76:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
 77:   }
 78:   mat->insertmode = addv; /* in case this processor had no cache */
 79:   MatStashScatterBegin_Private(&mat->stash,mbd->rowners);
 80:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
 81:   PetscLogInfo(0,"MatAssemblyBegin_MPIBDiag:Stash has %d entries,uses %d mallocs.n",nstash,reallocs);
 82:   return(0);
 83: }
 84: EXTERN int MatSetUpMultiply_MPIBDiag(Mat);

 86: int MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
 87: {
 88:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 89:   Mat_SeqBDiag *mlocal;
 90:   int          i,n,*row,*col;
 91:   int          *tmp1,*tmp2,ierr,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
 92:   Scalar       *val;
 93:   InsertMode   addv = mat->insertmode;


 97:   while (1) {
 98:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
 99:     if (!flg) break;
100: 
101:     for (i=0; i<n;) {
102:       /* Now identify the consecutive vals belonging to the same row */
103:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
104:       if (j < n) ncols = j-i;
105:       else       ncols = n-i;
106:       /* Now assemble all these values with a single function call */
107:       MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
108:       i = j;
109:     }
110:   }
111:   MatStashScatterEnd_Private(&mat->stash);

113:   MatAssemblyBegin(mbd->A,mode);
114:   MatAssemblyEnd(mbd->A,mode);

116:   /* Fix main diagonal location and determine global diagonals */
117:   mlocal         = (Mat_SeqBDiag*)mbd->A->data;
118:   Mblock         = mat->M/mlocal->bs; Nblock = mat->N/mlocal->bs;
119:   len            = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
120:   ierr           = PetscMalloc(2*len*sizeof(int),&tmp1);
121:   tmp2           = tmp1 + len;
122:   ierr           = PetscMemzero(tmp1,2*len*sizeof(int));
123:   mlocal->mainbd = -1;
124:   for (i=0; i<mlocal->nd; i++) {
125:     if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
126:     tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
127:   }
128:   MPI_Allreduce(tmp1,tmp2,len,MPI_INT,MPI_SUM,mat->comm);
129:   ict  = 0;
130:   for (i=0; i<len; i++) {
131:     if (tmp2[i]) {
132:       mbd->gdiag[ict] = i - Mblock;
133:       ict++;
134:     }
135:   }
136:   mbd->gnd = ict;
137:   PetscFree(tmp1);

139:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
140:     MatSetUpMultiply_MPIBDiag(mat);
141:   }
142:   return(0);
143: }

145: int MatGetBlockSize_MPIBDiag(Mat mat,int *bs)
146: {
147:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
148:   Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mbd->A->data;

151:   *bs = dmat->bs;
152:   return(0);
153: }

155: int MatZeroEntries_MPIBDiag(Mat A)
156: {
157:   Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
158:   int          ierr;

161:   MatZeroEntries(l->A);
162:   return(0);
163: }

165: /* again this uses the same basic stratagy as in the assembly and 
166:    scatter create routines, we should try to do it systematically 
167:    if we can figure out the proper level of generality. */

169: /* the code does not do the diagonal entries correctly unless the 
170:    matrix is square and the column and row owerships are identical.
171:    This is a BUG. The only way to fix it seems to be to access 
172:    aij->A and aij->B directly and not through the MatZeroRows() 
173:    routine. 
174: */

176: int MatZeroRows_MPIBDiag(Mat A,IS is,Scalar *diag)
177: {
178:   Mat_MPIBDiag   *l = (Mat_MPIBDiag*)A->data;
179:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
180:   int            *procs,*nprocs,j,idx,nsends,*work;
181:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
182:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
183:   int            *lens,imdex,*lrows,*values;
184:   MPI_Comm       comm = A->comm;
185:   MPI_Request    *send_waits,*recv_waits;
186:   MPI_Status     recv_status,*send_status;
187:   IS             istmp;
188:   PetscTruth     found;

191:   ISGetLocalSize(is,&N);
192:   ISGetIndices(is,&rows);

194:   /*  first count number of contributors to each processor */
195:   ierr   = PetscMalloc(2*size*sizeof(int),&nprocs);
196:   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));
197:   procs  = nprocs + size;
198:   ierr   = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
199:   for (i=0; i<N; i++) {
200:     idx = rows[i];
201:     found = PETSC_FALSE;
202:     for (j=0; j<size; j++) {
203:       if (idx >= owners[j] && idx < owners[j+1]) {
204:         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
205:       }
206:     }
207:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
208:   }
209:   nsends = 0;  for (i=0; i<size; i++) {nsends += procs[i];}

211:   /* inform other processors of number of messages and max length*/
212:   ierr   = PetscMalloc(2*size*sizeof(int),&work);
213:   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
214:   nmax   = work[rank];
215:   nrecvs = work[size+rank];
216:   ierr   = PetscFree(work);

218:   /* post receives:   */
219:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
220:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
221:   for (i=0; i<nrecvs; i++) {
222:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
223:   }

225:   /* do sends:
226:       1) starts[i] gives the starting index in svalues for stuff going to 
227:          the ith processor
228:   */
229:   PetscMalloc((N+1)*sizeof(int),&svalues);
230:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
231:   PetscMalloc((size+1)*sizeof(int),&starts);
232:   starts[0] = 0;
233:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
234:   for (i=0; i<N; i++) {
235:     svalues[starts[owner[i]]++] = rows[i];
236:   }
237:   ISRestoreIndices(is,&rows);

239:   starts[0] = 0;
240:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
241:   count = 0;
242:   for (i=0; i<size; i++) {
243:     if (procs[i]) {
244:       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
245:     }
246:   }
247:   PetscFree(starts);

249:   base = owners[rank];

251:   /*  wait on receives */
252:   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
253:   source = lens + nrecvs;
254:   count  = nrecvs;
255:   slen   = 0;
256:   while (count) {
257:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
258:     /* unpack receives into our local space */
259:     MPI_Get_count(&recv_status,MPI_INT,&n);
260:     source[imdex]  = recv_status.MPI_SOURCE;
261:     lens[imdex]  = n;
262:     slen += n;
263:     count--;
264:   }
265:   PetscFree(recv_waits);
266: 
267:   /* move the data into the send scatter */
268:   ierr  = PetscMalloc((slen+1)*sizeof(int),&lrows);
269:   count = 0;
270:   for (i=0; i<nrecvs; i++) {
271:     values = rvalues + i*nmax;
272:     for (j=0; j<lens[i]; j++) {
273:       lrows[count++] = values[j] - base;
274:     }
275:   }
276:   PetscFree(rvalues);
277:   PetscFree(lens);
278:   PetscFree(owner);
279:   PetscFree(nprocs);
280: 
281:   /* actually zap the local rows */
282:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
283:   PetscLogObjectParent(A,istmp);
284:   PetscFree(lrows);
285:   MatZeroRows(l->A,istmp,diag);
286:   ISDestroy(istmp);

288:   /* wait on sends */
289:   if (nsends) {
290:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
291:     MPI_Waitall(nsends,send_waits,send_status);
292:     PetscFree(send_status);
293:   }
294:   PetscFree(send_waits);
295:   PetscFree(svalues);

297:   return(0);
298: }

300: int MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
301: {
302:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
303:   int          ierr;

306:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
307:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
308:   (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
309:   return(0);
310: }

312: int MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
313: {
314:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
315:   int          ierr;

318:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
319:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
320:   (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
321:   return(0);
322: }

324: int MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
325: {
326:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
327:   int          ierr;
328:   Scalar       zero = 0.0;

331:   VecSet(&zero,yy);
332:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
333:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
334:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
335:   return(0);
336: }

338: int MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
339: {
340:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
341:   int          ierr;

344:   VecCopy(yy,zz);
345:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
346:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
347:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
348:   return(0);
349: }

351: int MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
352: {
353:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
354:   Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mat->A->data;
355:   int          ierr;
356:   PetscReal    isend[5],irecv[5];

359:   info->block_size     = (PetscReal)dmat->bs;
360:   MatGetInfo(mat->A,MAT_LOCAL,info);
361:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
362:   isend[3] = info->memory;  isend[4] = info->mallocs;
363:   if (flag == MAT_LOCAL) {
364:     info->nz_used      = isend[0];
365:     info->nz_allocated = isend[1];
366:     info->nz_unneeded  = isend[2];
367:     info->memory       = isend[3];
368:     info->mallocs      = isend[4];
369:   } else if (flag == MAT_GLOBAL_MAX) {
370:     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
371:     info->nz_used      = irecv[0];
372:     info->nz_allocated = irecv[1];
373:     info->nz_unneeded  = irecv[2];
374:     info->memory       = irecv[3];
375:     info->mallocs      = irecv[4];
376:   } else if (flag == MAT_GLOBAL_SUM) {
377:     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
378:     info->nz_used      = irecv[0];
379:     info->nz_allocated = irecv[1];
380:     info->nz_unneeded  = irecv[2];
381:     info->memory       = irecv[3];
382:     info->mallocs      = irecv[4];
383:   }
384:   info->rows_global    = (double)matin->M;
385:   info->columns_global = (double)matin->N;
386:   info->rows_local     = (double)matin->m;
387:   info->columns_local  = (double)matin->N;
388:   return(0);
389: }

391: int MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
392: {
393:   int          ierr;
394:   Mat_MPIBDiag *A = (Mat_MPIBDiag*)mat->data;

397:   MatGetDiagonal(A->A,v);
398:   return(0);
399: }

401: int MatDestroy_MPIBDiag(Mat mat)
402: {
403:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
404:   int          ierr;
405: #if defined(PETSC_USE_LOG)
406:   Mat_SeqBDiag *ms = (Mat_SeqBDiag*)mbd->A->data;

409:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d, BSize=%d, NDiag=%d",mat->M,mat->N,ms->bs,ms->nd);
410: #else
412: #endif
413:   MatStashDestroy_Private(&mat->stash);
414:   PetscFree(mbd->rowners);
415:   PetscFree(mbd->gdiag);
416:   MatDestroy(mbd->A);
417:   if (mbd->lvec) {VecDestroy(mbd->lvec);}
418:   if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
419:   PetscFree(mbd);
420:   return(0);
421: }


424: static int MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
425: {
426:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
427:   int          ierr;

430:   if (mbd->size == 1) {
431:     MatView(mbd->A,viewer);
432:   } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
433:   return(0);
434: }

436: static int MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
437: {
438:   Mat_MPIBDiag      *mbd = (Mat_MPIBDiag*)mat->data;
439:   Mat_SeqBDiag      *dmat = (Mat_SeqBDiag*)mbd->A->data;
440:   int               ierr,i,size = mbd->size,rank = mbd->rank;
441:   PetscTruth        isascii,isdraw;
442:   PetscViewer       sviewer;
443:   PetscViewerFormat format;

446:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
447:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
448:   if (isascii) {
449:     PetscViewerGetFormat(viewer,&format);
450:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
451:       int nline = PetscMin(10,mbd->gnd),k,nk,np;
452:       PetscViewerASCIIPrintf(viewer,"  block size=%d, total number of diagonals=%dn",dmat->bs,mbd->gnd);
453:       nk = (mbd->gnd-1)/nline + 1;
454:       for (k=0; k<nk; k++) {
455:         PetscViewerASCIIPrintf(viewer,"  global diag numbers:");
456:         np = PetscMin(nline,mbd->gnd - nline*k);
457:         for (i=0; i<np; i++) {
458:           PetscViewerASCIIPrintf(viewer,"  %d",mbd->gdiag[i+nline*k]);
459:         }
460:         PetscViewerASCIIPrintf(viewer,"n");
461:       }
462:       if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
463:         MatInfo info;
464:         MPI_Comm_rank(mat->comm,&rank);
465:         MatGetInfo(mat,MAT_LOCAL,&info);
466:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %d nz %d nz alloced %d mem %d n",rank,mat->m,
467:             (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
468:         PetscViewerFlush(viewer);
469:         VecScatterView(mbd->Mvctx,viewer);
470:       }
471:       return(0);
472:     }
473:   }

475:   if (isdraw) {
476:     PetscDraw       draw;
477:     PetscTruth isnull;
478:     PetscViewerDrawGetDraw(viewer,0,&draw);
479:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
480:   }

482:   if (size == 1) {
483:     MatView(mbd->A,viewer);
484:   } else {
485:     /* assemble the entire matrix onto first processor. */
486:     Mat          A;
487:     int          M = mat->M,N = mat->N,m,row,nz,*cols;
488:     Scalar       *vals;
489:     Mat_SeqBDiag *Ambd = (Mat_SeqBDiag*)mbd->A->data;

491:     if (!rank) {
492:       MatCreateMPIBDiag(mat->comm,M,M,N,mbd->gnd,Ambd->bs,mbd->gdiag,PETSC_NULL,&A);
493:     } else {
494:       MatCreateMPIBDiag(mat->comm,0,M,N,0,Ambd->bs,PETSC_NULL,PETSC_NULL,&A);
495:     }
496:     PetscLogObjectParent(mat,A);

498:     /* Copy the matrix ... This isn't the most efficient means,
499:        but it's quick for now */
500:     row = mbd->rstart;
501:     m = mbd->A->m;
502:     for (i=0; i<m; i++) {
503:       MatGetRow(mat,row,&nz,&cols,&vals);
504:       MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
505:       MatRestoreRow(mat,row,&nz,&cols,&vals);
506:       row++;
507:     }
508:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
509:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
510:     PetscViewerGetSingleton(viewer,&sviewer);
511:     if (!rank) {
512:       MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
513:     }
514:     PetscViewerRestoreSingleton(viewer,&sviewer);
515:     PetscViewerFlush(viewer);
516:     MatDestroy(A);
517:   }
518:   return(0);
519: }

521: int MatView_MPIBDiag(Mat mat,PetscViewer viewer)
522: {
523:   int        ierr;
524:   PetscTruth isascii,isdraw,isbinary;

527:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
528:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
529:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
530:   if (isascii || isdraw) {
531:     MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
532:   } else if (isbinary) {
533:     MatView_MPIBDiag_Binary(mat,viewer);
534:   } else {
535:     SETERRQ1(1,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
536:   }
537:   return(0);
538: }

540: int MatSetOption_MPIBDiag(Mat A,MatOption op)
541: {
542:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
543:   int          ierr;

545:   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
546:       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
547:       op == MAT_NEW_NONZERO_LOCATION_ERR ||
548:       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
549:       op == MAT_NO_NEW_DIAGONALS ||
550:       op == MAT_YES_NEW_DIAGONALS) {
551:         MatSetOption(mbd->A,op);
552:   } else if (op == MAT_ROW_ORIENTED) {
553:     mbd->roworiented = PETSC_TRUE;
554:     MatSetOption(mbd->A,op);
555:   } else if (op == MAT_COLUMN_ORIENTED) {
556:     mbd->roworiented = PETSC_FALSE;
557:     MatSetOption(mbd->A,op);
558:   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
559:     mbd->donotstash = PETSC_TRUE;
560:   } else if (op == MAT_ROWS_SORTED ||
561:              op == MAT_ROWS_UNSORTED ||
562:              op == MAT_COLUMNS_SORTED ||
563:              op == MAT_COLUMNS_UNSORTED ||
564:              op == MAT_YES_NEW_DIAGONALS ||
565:              op == MAT_USE_HASH_TABLE) {
566:     PetscLogInfo(A,"MatSetOption_MPIBDiag:Option ignoredn");
567:   } else {
568:     SETERRQ(PETSC_ERR_SUP,"unknown option");
569:   }
570:   return(0);
571: }

573: int MatGetOwnershipRange_MPIBDiag(Mat matin,int *m,int *n)
574: {
575:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;

578:   if (m) *m = mat->rstart;
579:   if (n) *n = mat->rend;
580:   return(0);
581: }

583: int MatGetRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,Scalar **v)
584: {
585:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
586:   int          lrow,ierr;

589:   if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
590:   lrow = row - mat->rstart;
591:   MatGetRow(mat->A,lrow,nz,idx,v);
592:   return(0);
593: }

595: int MatRestoreRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,
596:                                   Scalar **v)
597: {
598:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
599:   int          lrow,ierr;

602:   lrow = row - mat->rstart;
603:   MatRestoreRow(mat->A,lrow,nz,idx,v);
604:   return(0);
605: }


608: int MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *norm)
609: {
610:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
611:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)mbd->A->data;
612:   PetscReal    sum = 0.0;
613:   int          ierr,d,i,nd = a->nd,bs = a->bs,len;
614:   Scalar       *dv;

617:   if (type == NORM_FROBENIUS) {
618:     for (d=0; d<nd; d++) {
619:       dv   = a->diagv[d];
620:       len  = a->bdlen[d]*bs*bs;
621:       for (i=0; i<len; i++) {
622: #if defined(PETSC_USE_COMPLEX)
623:         sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
624: #else
625:         sum += dv[i]*dv[i];
626: #endif
627:       }
628:     }
629:     MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
630:     *norm = sqrt(*norm);
631:     PetscLogFlops(2*A->n*A->m);
632:   } else if (type == NORM_1) { /* max column norm */
633:     PetscReal *tmp,*tmp2;
634:     int    j;
635:     PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp);
636:     PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp2);
637:     MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->n);
638:     *norm = 0.0;
639:     MPI_Allreduce(tmp,tmp2,mbd->A->n,MPI_DOUBLE,MPI_SUM,A->comm);
640:     for (j=0; j<mbd->A->n; j++) {
641:       if (tmp2[j] > *norm) *norm = tmp2[j];
642:     }
643:     PetscFree(tmp);
644:     PetscFree(tmp2);
645:   } else if (type == NORM_INFINITY) { /* max row norm */
646:     PetscReal normtemp;
647:     MatNorm(mbd->A,type,&normtemp);
648:     MPI_Allreduce(&normtemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
649:   }
650:   return(0);
651: }

653: EXTERN int MatPrintHelp_SeqBDiag(Mat);
654: int MatPrintHelp_MPIBDiag(Mat A)
655: {
656:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
657:   int          ierr;

660:   if (!a->rank) {
661:     MatPrintHelp_SeqBDiag(a->A);
662:   }
663:   return(0);
664: }

666: EXTERN int MatScale_SeqBDiag(Scalar*,Mat);
667: int MatScale_MPIBDiag(Scalar *alpha,Mat A)
668: {
669:   int          ierr;
670:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;

673:   MatScale_SeqBDiag(alpha,a->A);
674:   return(0);
675: }

677: int MatSetUpPreallocation_MPIBDiag(Mat A)
678: {
679:   int        ierr;

682:    MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
683:   return(0);
684: }

686: /* -------------------------------------------------------------------*/

688: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
689:        MatGetRow_MPIBDiag,
690:        MatRestoreRow_MPIBDiag,
691:        MatMult_MPIBDiag,
692:        MatMultAdd_MPIBDiag,
693:        MatMultTranspose_MPIBDiag,
694:        MatMultTransposeAdd_MPIBDiag,
695:        0,
696:        0,
697:        0,
698:        0,
699:        0,
700:        0,
701:        0,
702:        0,
703:        MatGetInfo_MPIBDiag,0,
704:        MatGetDiagonal_MPIBDiag,
705:        0,
706:        MatNorm_MPIBDiag,
707:        MatAssemblyBegin_MPIBDiag,
708:        MatAssemblyEnd_MPIBDiag,
709:        0,
710:        MatSetOption_MPIBDiag,
711:        MatZeroEntries_MPIBDiag,
712:        MatZeroRows_MPIBDiag,
713:        0,
714:        0,
715:        0,
716:        0,
717:        MatSetUpPreallocation_MPIBDiag,
718:        0,
719:        MatGetOwnershipRange_MPIBDiag,
720:        0,
721:        0,
722:        0,
723:        0,
724:        0,
725:        0,
726:        0,
727:        0,
728:        0,
729:        0,
730:        0,
731:        0,
732:        MatGetValues_MPIBDiag,
733:        0,
734:        MatPrintHelp_MPIBDiag,
735:        MatScale_MPIBDiag,
736:        0,
737:        0,
738:        0,
739:        MatGetBlockSize_MPIBDiag,
740:        0,
741:        0,
742:        0,
743:        0,
744:        0,
745:        0,
746:        0,
747:        0,
748:        0,
749:        0,
750:        MatDestroy_MPIBDiag,
751:        MatView_MPIBDiag,
752:        MatGetMaps_Petsc};

754: EXTERN_C_BEGIN
755: int MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
756: {
757:   Mat_MPIBDiag *matin = (Mat_MPIBDiag *)A->data;
758:   int          ierr,lrows,lcols,rstart,rend;
759:   IS           localc,localr;

762:   MatGetLocalSize(A,&lrows,&lcols);
763:   MatGetOwnershipRange(A,&rstart,&rend);
764:   ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
765:   ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
766:   MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
767:   ISDestroy(localr);
768:   ISDestroy(localc);

770:   *iscopy = PETSC_TRUE;
771:   return(0);
772: }
773: EXTERN_C_END

775: EXTERN_C_BEGIN
776: int MatCreate_MPIBDiag(Mat B)
777: {
778:   Mat_MPIBDiag *b;
779:   int          ierr;

782:   ierr            = PetscNew(Mat_MPIBDiag,&b);
783:   B->data         = (void*)b;
784:   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
785:   B->factor       = 0;
786:   B->mapping      = 0;

788:   B->insertmode = NOT_SET_VALUES;
789:   MPI_Comm_rank(B->comm,&b->rank);
790:   MPI_Comm_size(B->comm,&b->size);

792:   /* build local table of row ownerships */
793:   PetscMalloc((b->size+2)*sizeof(int),&b->rowners);

795:   /* build cache for off array entries formed */
796:   MatStashCreate_Private(B->comm,1,&B->stash);
797:   b->donotstash = PETSC_FALSE;

799:   /* stuff used for matrix-vector multiply */
800:   b->lvec        = 0;
801:   b->Mvctx       = 0;

803:   /* used for MatSetValues() input */
804:   b->roworiented = PETSC_TRUE;

806:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
807:                                      "MatGetDiagonalBlock_MPIBDiag",
808:                                       MatGetDiagonalBlock_MPIBDiag);
809:   return(0);
810: }
811: EXTERN_C_END

813: /*@C
814:    MatMPIBDiagSetPreallocation - 

816:    Collective on Mat

818:    Input Parameters:
819: +  A - the matrix 
820: .  nd - number of block diagonals (global) (optional)
821: .  bs - each element of a diagonal is an bs x bs dense matrix
822: .  diag - optional array of block diagonal numbers (length nd).
823:    For a matrix element A[i,j], where i=row and j=column, the
824:    diagonal number is
825: $     diag = i/bs - j/bs  (integer division)
826:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
827:    needed (expensive).
828: -  diagv  - pointer to actual diagonals (in same order as diag array), 
829:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
830:    to control memory allocation.


833:    Options Database Keys:
834: .  -mat_block_size <bs> - Sets blocksize
835: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

837:    Notes:
838:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
839:    than it must be used on all processors that share the object for that argument.

841:    The parallel matrix is partitioned across the processors by rows, where
842:    each local rectangular matrix is stored in the uniprocessor block 
843:    diagonal format.  See the users manual for further details.

845:    The user MUST specify either the local or global numbers of rows
846:    (possibly both).

848:    The case bs=1 (conventional diagonal storage) is implemented as
849:    a special case.

851:    Fortran Notes:
852:    Fortran programmers cannot set diagv; this variable is ignored.

854:    Level: intermediate

856: .keywords: matrix, block, diagonal, parallel, sparse

858: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
859: @*/
860: int MatMPIBDiagSetPreallocation(Mat B,int nd,int bs,int *diag,Scalar **diagv)
861: {
862:   Mat_MPIBDiag *b;
863:   int          ierr,i,k,*ldiag,len,nd2;
864:   Scalar       **ldiagv = 0;
865:   PetscTruth   flg2;

868:   PetscTypeCompare((PetscObject)B,MATMPIBDIAG,&flg2);
869:   if (!flg2) return(0);
870:   B->preallocated = PETSC_TRUE;
871:   if (bs == PETSC_DEFAULT) bs = 1;
872:   if (nd == PETSC_DEFAULT) nd = 0;
873:   PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
874:   PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
875:   PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
876:   if (nd && !diag) {
877:     PetscMalloc(nd*sizeof(int),&diag);
878:     nd2  = nd;
879:     PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
880:     if (nd2 != nd) {
881:       SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
882:     }
883:   } else if (flg2) {
884:     SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
885:   }

887:   if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");

889:   PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
890:   B->n = B->N = PetscMax(B->n,B->N);

892:   if ((B->N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
893:   if ((B->m%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
894:   if ((B->M%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");

896:   /* the information in the maps duplicates the information computed below, eventually 
897:      we should remove the duplicate information that is not contained in the maps */
898:   MapCreateMPI(B->comm,B->m,B->M,&B->rmap);
899:   MapCreateMPI(B->comm,B->m,B->M,&B->cmap);


902:   b          = (Mat_MPIBDiag*)B->data;
903:   b->gnd     = nd;

905:   ierr          = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
906:   b->rowners[0] = 0;
907:   for (i=2; i<=b->size; i++) {
908:     b->rowners[i] += b->rowners[i-1];
909:   }
910:   b->rstart  = b->rowners[b->rank];
911:   b->rend    = b->rowners[b->rank+1];
912:   b->brstart = (b->rstart)/bs;
913:   b->brend   = (b->rend)/bs;


916:   /* Determine local diagonals; for now, assume global rows = global cols */
917:   /* These are sorted in MatCreateSeqBDiag */
918:   PetscMalloc((nd+1)*sizeof(int),&ldiag);
919:   len  = B->M/bs + B->N/bs + 1;
920:   PetscMalloc(len*sizeof(int),&b->gdiag);
921:   k    = 0;
922:   PetscLogObjectMemory(B,(nd+1)*sizeof(int) + (b->size+2)*sizeof(int)
923:                         + sizeof(struct _p_Mat) + sizeof(Mat_MPIBDiag));
924:   if (diagv) {
925:     PetscMalloc((nd+1)*sizeof(Scalar*),&ldiagv);
926:   }
927:   for (i=0; i<nd; i++) {
928:     b->gdiag[i] = diag[i];
929:     if (diag[i] > 0) { /* lower triangular */
930:       if (diag[i] < b->brend) {
931:         ldiag[k] = diag[i] - b->brstart;
932:         if (diagv) ldiagv[k] = diagv[i];
933:         k++;
934:       }
935:     } else { /* upper triangular */
936:       if (B->M/bs - diag[i] > B->N/bs) {
937:         if (B->M/bs + diag[i] > b->brstart) {
938:           ldiag[k] = diag[i] - b->brstart;
939:           if (diagv) ldiagv[k] = diagv[i];
940:           k++;
941:         }
942:       } else {
943:         if (B->M/bs > b->brstart) {
944:           ldiag[k] = diag[i] - b->brstart;
945:           if (diagv) ldiagv[k] = diagv[i];
946:           k++;
947:         }
948:       }
949:     }
950:   }

952:   /* Form local matrix */
953:   MatCreateSeqBDiag(PETSC_COMM_SELF,B->m,B->n,k,bs,ldiag,ldiagv,&b->A);
954:   PetscLogObjectParent(B,b->A);
955:   PetscFree(ldiag);
956:   if (ldiagv) {PetscFree(ldiagv);}

958:   return(0);
959: }

961: /*@C
962:    MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.

964:    Collective on MPI_Comm

966:    Input Parameters:
967: +  comm - MPI communicator
968: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
969: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
970: .  N - number of columns (local and global)
971: .  nd - number of block diagonals (global) (optional)
972: .  bs - each element of a diagonal is an bs x bs dense matrix
973: .  diag - optional array of block diagonal numbers (length nd).
974:    For a matrix element A[i,j], where i=row and j=column, the
975:    diagonal number is
976: $     diag = i/bs - j/bs  (integer division)
977:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
978:    needed (expensive).
979: -  diagv  - pointer to actual diagonals (in same order as diag array), 
980:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
981:    to control memory allocation.

983:    Output Parameter:
984: .  A - the matrix 

986:    Options Database Keys:
987: .  -mat_block_size <bs> - Sets blocksize
988: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

990:    Notes:
991:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
992:    than it must be used on all processors that share the object for that argument.

994:    The parallel matrix is partitioned across the processors by rows, where
995:    each local rectangular matrix is stored in the uniprocessor block 
996:    diagonal format.  See the users manual for further details.

998:    The user MUST specify either the local or global numbers of rows
999:    (possibly both).

1001:    The case bs=1 (conventional diagonal storage) is implemented as
1002:    a special case.

1004:    Fortran Notes:
1005:    Fortran programmers cannot set diagv; this variable is ignored.

1007:    Level: intermediate

1009: .keywords: matrix, block, diagonal, parallel, sparse

1011: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1012: @*/
1013: int MatCreateMPIBDiag(MPI_Comm comm,int m,int M,int N,int nd,int bs,int *diag,Scalar **diagv,Mat *A)
1014: {
1015:   int ierr,size;

1018:   MatCreate(comm,m,m,M,N,A);
1019:   MPI_Comm_size(comm,&size);
1020:   if (size > 1) {
1021:     MatSetType(*A,MATMPIBDIAG);
1022:     MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1023:   } else {
1024:     MatSetType(*A,MATSEQBDIAG);
1025:     MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1026:   }
1027:   return(0);
1028: }

1030: /*@C
1031:    MatBDiagGetData - Gets the data for the block diagonal matrix format.
1032:    For the parallel case, this returns information for the local submatrix.

1034:    Input Parameters:
1035: .  mat - the matrix, stored in block diagonal format.

1037:    Not Collective

1039:    Output Parameters:
1040: +  m - number of rows
1041: .  n - number of columns
1042: .  nd - number of block diagonals
1043: .  bs - each element of a diagonal is an bs x bs dense matrix
1044: .  bdlen - array of total block lengths of block diagonals
1045: .  diag - optional array of block diagonal numbers (length nd).
1046:    For a matrix element A[i,j], where i=row and j=column, the
1047:    diagonal number is
1048: $     diag = i/bs - j/bs  (integer division)
1049:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1050:    needed (expensive).
1051: -  diagv - pointer to actual diagonals (in same order as diag array), 

1053:    Level: advanced

1055:    Notes:
1056:    See the users manual for further details regarding this storage format.

1058: .keywords: matrix, block, diagonal, get, data

1060: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1061: @*/
1062: int MatBDiagGetData(Mat mat,int *nd,int *bs,int **diag,int **bdlen,Scalar ***diagv)
1063: {
1064:   Mat_MPIBDiag *pdmat;
1065:   Mat_SeqBDiag *dmat = 0;
1066:   PetscTruth   isseq,ismpi;
1067:   int          ierr;

1071:   PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1072:   PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1073:   if (isseq) {
1074:     dmat = (Mat_SeqBDiag*)mat->data;
1075:   } else if (ismpi) {
1076:     pdmat = (Mat_MPIBDiag*)mat->data;
1077:     dmat = (Mat_SeqBDiag*)pdmat->A->data;
1078:   } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1079:   *nd    = dmat->nd;
1080:   *bs    = dmat->bs;
1081:   *diag  = dmat->diag;
1082:   *bdlen = dmat->bdlen;
1083:   *diagv = dmat->diagv;
1084:   return(0);
1085: }

1087:  #include petscsys.h

1089: EXTERN_C_BEGIN
1090: int MatLoad_MPIBDiag(PetscViewer viewer,MatType type,Mat *newmat)
1091: {
1092:   Mat          A;
1093:   Scalar       *vals,*svals;
1094:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1095:   MPI_Status   status;
1096:   int          bs,i,nz,ierr,j,rstart,rend,fd,*rowners,maxnz,*cols;
1097:   int          header[4],rank,size,*rowlengths = 0,M,N,m,Mbs;
1098:   int          *ourlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*smycols;
1099:   int          tag = ((PetscObject)viewer)->tag,extra_rows;

1102:   MPI_Comm_size(comm,&size);
1103:   MPI_Comm_rank(comm,&rank);
1104:   if (!rank) {
1105:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1106:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1107:     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1108:     if (header[3] < 0) {
1109:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1110:     }
1111:   }
1112:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1113:   M = header[1]; N = header[2];

1115:   bs = 1;   /* uses a block size of 1 by default; */
1116:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);

1118:   /* 
1119:      This code adds extra rows to make sure the number of rows is 
1120:      divisible by the blocksize
1121:   */
1122:   Mbs        = M/bs;
1123:   extra_rows = bs - M + bs*(Mbs);
1124:   if (extra_rows == bs) extra_rows = 0;
1125:   else                  Mbs++;
1126:   if (extra_rows && !rank) {
1127:     PetscLogInfo(0,"MatLoad_MPIBDiag:Padding loaded matrix to match blocksizen");
1128:   }

1130:   /* determine ownership of all rows */
1131:   m          = bs*(Mbs/size + ((Mbs % size) > rank));
1132:   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);
1133:   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1134:   rowners[0] = 0;
1135:   for (i=2; i<=size; i++) {
1136:     rowners[i] += rowners[i-1];
1137:   }
1138:   rstart = rowners[rank];
1139:   rend   = rowners[rank+1];

1141:   /* distribute row lengths to all processors */
1142:   PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1143:   if (!rank) {
1144:     PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1145:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1146:     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1147:     PetscMalloc(size*sizeof(int),&sndcounts);
1148:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1149:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1150:     PetscFree(sndcounts);
1151:   } else {
1152:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1153:   }

1155:   if (!rank) {
1156:     /* calculate the number of nonzeros on each processor */
1157:     PetscMalloc(size*sizeof(int),&procsnz);
1158:     PetscMemzero(procsnz,size*sizeof(int));
1159:     for (i=0; i<size; i++) {
1160:       for (j=rowners[i]; j<rowners[i+1]; j++) {
1161:         procsnz[i] += rowlengths[j];
1162:       }
1163:     }
1164:     PetscFree(rowlengths);

1166:     /* determine max buffer needed and allocate it */
1167:     maxnz = 0;
1168:     for (i=0; i<size; i++) {
1169:       maxnz = PetscMax(maxnz,procsnz[i]);
1170:     }
1171:     PetscMalloc(maxnz*sizeof(int),&cols);

1173:     /* read in my part of the matrix column indices  */
1174:     nz   = procsnz[0];
1175:     PetscMalloc(nz*sizeof(int),&mycols);
1176:     if (size == 1)  nz -= extra_rows;
1177:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1178:     if (size == 1)  for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }

1180:     /* read in every one elses and ship off */
1181:     for (i=1; i<size-1; i++) {
1182:       nz   = procsnz[i];
1183:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1184:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1185:     }
1186:     /* read in the stuff for the last proc */
1187:     if (size != 1) {
1188:       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
1189:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1190:       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1191:       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
1192:     }
1193:     PetscFree(cols);
1194:   } else {
1195:     /* determine buffer space needed for message */
1196:     nz = 0;
1197:     for (i=0; i<m; i++) {
1198:       nz += ourlens[i];
1199:     }
1200:     PetscMalloc(nz*sizeof(int),&mycols);

1202:     /* receive message of column indices*/
1203:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1204:     MPI_Get_count(&status,MPI_INT,&maxnz);
1205:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1206:   }

1208:   MatCreateMPIBDiag(comm,m,M+extra_rows,N+extra_rows,PETSC_NULL,bs,PETSC_NULL,PETSC_NULL,newmat);
1209:   A = *newmat;

1211:   if (!rank) {
1212:     PetscMalloc(maxnz*sizeof(Scalar),&vals);

1214:     /* read in my part of the matrix numerical values  */
1215:     nz = procsnz[0];
1216:     if (size == 1)  nz -= extra_rows;
1217:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1218:     if (size == 1)  for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }

1220:     /* insert into matrix */
1221:     jj      = rstart;
1222:     smycols = mycols;
1223:     svals   = vals;
1224:     for (i=0; i<m; i++) {
1225:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1226:       smycols += ourlens[i];
1227:       svals   += ourlens[i];
1228:       jj++;
1229:     }

1231:     /* read in other processors (except the last one) and ship out */
1232:     for (i=1; i<size-1; i++) {
1233:       nz   = procsnz[i];
1234:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1235:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1236:     }
1237:     /* the last proc */
1238:     if (size != 1){
1239:       nz   = procsnz[i] - extra_rows;
1240:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1241:       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1242:       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1243:     }
1244:     PetscFree(procsnz);
1245:   } else {
1246:     /* receive numeric values */
1247:     PetscMalloc(nz*sizeof(Scalar),&vals);

1249:     /* receive message of values*/
1250:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1251:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1252:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1254:     /* insert into matrix */
1255:     jj      = rstart;
1256:     smycols = mycols;
1257:     svals   = vals;
1258:     for (i=0; i<m; i++) {
1259:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1260:       smycols += ourlens[i];
1261:       svals   += ourlens[i];
1262:       jj++;
1263:     }
1264:   }
1265:   PetscFree(ourlens);
1266:   PetscFree(vals);
1267:   PetscFree(mycols);
1268:   PetscFree(rowners);

1270:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1271:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1272:   return(0);
1273: }
1274: EXTERN_C_END