Actual source code: mpidense.c

  1: /*$Id: mpidense.c,v 1.153 2001/03/23 23:21:49 balay Exp $*/

  3: /*
  4:    Basic functions for basic parallel dense matrices.
  5: */
  6: 
  7: #include "src/mat/impls/dense/mpi/mpidense.h"
  8: #include "src/vec/vecimpl.h"

 10: EXTERN_C_BEGIN
 11: int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
 12: {
 13:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
 14:   int          m = A->m,rstart = mdn->rstart,ierr;
 15:   Scalar       *array;
 16:   MPI_Comm     comm;

 19:   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");

 21:   /* The reuse aspect is not implemented efficiently */
 22:   if (reuse) { MatDestroy(*B);}

 24:   PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 25:   MatGetArray(mdn->A,&array);
 26:   MatCreateSeqDense(comm,m,m,array+m*rstart,B);
 27:   MatRestoreArray(mdn->A,&array);
 28:   MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
 29:   MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
 30: 
 31:   *iscopy = PETSC_TRUE;
 32:   return(0);
 33: }
 34: EXTERN_C_END

 36: EXTERN int MatSetUpMultiply_MPIDense(Mat);

 38: int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
 39: {
 40:   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
 41:   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
 42:   PetscTruth   roworiented = A->roworiented;

 45:   for (i=0; i<m; i++) {
 46:     if (idxm[i] < 0) continue;
 47:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 48:     if (idxm[i] >= rstart && idxm[i] < rend) {
 49:       row = idxm[i] - rstart;
 50:       if (roworiented) {
 51:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
 52:       } else {
 53:         for (j=0; j<n; j++) {
 54:           if (idxn[j] < 0) continue;
 55:           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 56:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
 57:         }
 58:       }
 59:     } else {
 60:       if (!A->donotstash) {
 61:         if (roworiented) {
 62:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
 63:         } else {
 64:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
 65:         }
 66:       }
 67:     }
 68:   }
 69:   return(0);
 70: }

 72: int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
 73: {
 74:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
 75:   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;

 78:   for (i=0; i<m; i++) {
 79:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
 80:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 81:     if (idxm[i] >= rstart && idxm[i] < rend) {
 82:       row = idxm[i] - rstart;
 83:       for (j=0; j<n; j++) {
 84:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
 85:         if (idxn[j] >= mat->N) {
 86:           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 87:         }
 88:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
 89:       }
 90:     } else {
 91:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
 92:     }
 93:   }
 94:   return(0);
 95: }

 97: int MatGetArray_MPIDense(Mat A,Scalar **array)
 98: {
 99:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
100:   int          ierr;

103:   MatGetArray(a->A,array);
104:   return(0);
105: }

107: static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
108: {
109:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
110:   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
111:   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
112:   Scalar       *av,*bv,*v = lmat->v;
113:   Mat          newmat;

116:   ISGetIndices(isrow,&irow);
117:   ISGetIndices(iscol,&icol);
118:   ISGetLocalSize(isrow,&nrows);
119:   ISGetLocalSize(iscol,&ncols);

121:   /* No parallel redistribution currently supported! Should really check each index set
122:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
123:      original matrix! */

125:   MatGetLocalSize(A,&nlrows,&nlcols);
126:   MatGetOwnershipRange(A,&rstart,&rend);
127: 
128:   /* Check submatrix call */
129:   if (scall == MAT_REUSE_MATRIX) {
130:     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
131:     /* Really need to test rows and column sizes! */
132:     newmat = *B;
133:   } else {
134:     /* Create and fill new matrix */
135:     MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);
136:   }

138:   /* Now extract the data pointers and do the copy, column at a time */
139:   newmatd = (Mat_MPIDense*)newmat->data;
140:   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
141: 
142:   for (i=0; i<ncols; i++) {
143:     av = v + nlrows*icol[i];
144:     for (j=0; j<nrows; j++) {
145:       *bv++ = av[irow[j] - rstart];
146:     }
147:   }

149:   /* Assemble the matrices so that the correct flags are set */
150:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
151:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

153:   /* Free work space */
154:   ISRestoreIndices(isrow,&irow);
155:   ISRestoreIndices(iscol,&icol);
156:   *B = newmat;
157:   return(0);
158: }

160: int MatRestoreArray_MPIDense(Mat A,Scalar **array)
161: {
163:   return(0);
164: }

166: int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
167: {
168:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
169:   MPI_Comm     comm = mat->comm;
170:   int          ierr,nstash,reallocs;
171:   InsertMode   addv;

174:   /* make sure all processors are either in INSERTMODE or ADDMODE */
175:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
176:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
177:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
178:   }
179:   mat->insertmode = addv; /* in case this processor had no cache */

181:   MatStashScatterBegin_Private(&mat->stash,mdn->rowners);
182:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
183:   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
184:   return(0);
185: }

187: int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
188: {
189:   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
190:   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
191:   Scalar       *val;
192:   InsertMode   addv=mat->insertmode;

195:   /*  wait on receives */
196:   while (1) {
197:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
198:     if (!flg) break;
199: 
200:     for (i=0; i<n;) {
201:       /* Now identify the consecutive vals belonging to the same row */
202:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
203:       if (j < n) ncols = j-i;
204:       else       ncols = n-i;
205:       /* Now assemble all these values with a single function call */
206:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
207:       i = j;
208:     }
209:   }
210:   MatStashScatterEnd_Private(&mat->stash);
211: 
212:   MatAssemblyBegin(mdn->A,mode);
213:   MatAssemblyEnd(mdn->A,mode);

215:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
216:     MatSetUpMultiply_MPIDense(mat);
217:   }
218:   return(0);
219: }

221: int MatZeroEntries_MPIDense(Mat A)
222: {
223:   int          ierr;
224:   Mat_MPIDense *l = (Mat_MPIDense*)A->data;

227:   MatZeroEntries(l->A);
228:   return(0);
229: }

231: int MatGetBlockSize_MPIDense(Mat A,int *bs)
232: {
234:   *bs = 1;
235:   return(0);
236: }

238: /* the code does not do the diagonal entries correctly unless the 
239:    matrix is square and the column and row owerships are identical.
240:    This is a BUG. The only way to fix it seems to be to access 
241:    mdn->A and mdn->B directly and not through the MatZeroRows() 
242:    routine. 
243: */
244: int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
245: {
246:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
247:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
248:   int            *procs,*nprocs,j,idx,nsends,*work;
249:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
250:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
251:   int            *lens,imdex,*lrows,*values;
252:   MPI_Comm       comm = A->comm;
253:   MPI_Request    *send_waits,*recv_waits;
254:   MPI_Status     recv_status,*send_status;
255:   IS             istmp;
256:   PetscTruth     found;

259:   ISGetLocalSize(is,&N);
260:   ISGetIndices(is,&rows);

262:   /*  first count number of contributors to each processor */
263:   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);
264:   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));
265:   procs = nprocs + size;
266:   ierr  = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
267:   for (i=0; i<N; i++) {
268:     idx = rows[i];
269:     found = PETSC_FALSE;
270:     for (j=0; j<size; j++) {
271:       if (idx >= owners[j] && idx < owners[j+1]) {
272:         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
273:       }
274:     }
275:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
276:   }
277:   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}

279:   /* inform other processors of number of messages and max length*/
280:   ierr   = PetscMalloc(2*size*sizeof(int),&work);
281:   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
282:   nmax   = work[rank];
283:   nrecvs = work[size+rank];
284:   ierr   = PetscFree(work);

286:   /* post receives:   */
287:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
288:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
289:   for (i=0; i<nrecvs; i++) {
290:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
291:   }

293:   /* do sends:
294:       1) starts[i] gives the starting index in svalues for stuff going to 
295:          the ith processor
296:   */
297:   PetscMalloc((N+1)*sizeof(int),&svalues);
298:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
299:   PetscMalloc((size+1)*sizeof(int),&starts);
300:   starts[0]  = 0;
301:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
302:   for (i=0; i<N; i++) {
303:     svalues[starts[owner[i]]++] = rows[i];
304:   }
305:   ISRestoreIndices(is,&rows);

307:   starts[0] = 0;
308:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
309:   count = 0;
310:   for (i=0; i<size; i++) {
311:     if (procs[i]) {
312:       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
313:     }
314:   }
315:   PetscFree(starts);

317:   base = owners[rank];

319:   /*  wait on receives */
320:   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
321:   source = lens + nrecvs;
322:   count  = nrecvs; slen = 0;
323:   while (count) {
324:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
325:     /* unpack receives into our local space */
326:     MPI_Get_count(&recv_status,MPI_INT,&n);
327:     source[imdex]  = recv_status.MPI_SOURCE;
328:     lens[imdex]  = n;
329:     slen += n;
330:     count--;
331:   }
332:   PetscFree(recv_waits);
333: 
334:   /* move the data into the send scatter */
335:   PetscMalloc((slen+1)*sizeof(int),&lrows);
336:   count = 0;
337:   for (i=0; i<nrecvs; i++) {
338:     values = rvalues + i*nmax;
339:     for (j=0; j<lens[i]; j++) {
340:       lrows[count++] = values[j] - base;
341:     }
342:   }
343:   PetscFree(rvalues);
344:   PetscFree(lens);
345:   PetscFree(owner);
346:   PetscFree(nprocs);
347: 
348:   /* actually zap the local rows */
349:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
350:   PetscLogObjectParent(A,istmp);
351:   PetscFree(lrows);
352:   MatZeroRows(l->A,istmp,diag);
353:   ISDestroy(istmp);

355:   /* wait on sends */
356:   if (nsends) {
357:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
358:     MPI_Waitall(nsends,send_waits,send_status);
359:     PetscFree(send_status);
360:   }
361:   PetscFree(send_waits);
362:   PetscFree(svalues);

364:   return(0);
365: }

367: int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
368: {
369:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
370:   int          ierr;

373:   VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
374:   VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
375:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
376:   return(0);
377: }

379: int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
380: {
381:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
382:   int          ierr;

385:   VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
386:   VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
387:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
388:   return(0);
389: }

391: int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
392: {
393:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
394:   int          ierr;
395:   Scalar       zero = 0.0;

398:   VecSet(&zero,yy);
399:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
400:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
401:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
402:   return(0);
403: }

405: int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
406: {
407:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
408:   int          ierr;

411:   VecCopy(yy,zz);
412:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
413:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
414:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
415:   return(0);
416: }

418: int MatGetDiagonal_MPIDense(Mat A,Vec v)
419: {
420:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
421:   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
422:   int          ierr,len,i,n,m = A->m,radd;
423:   Scalar       *x,zero = 0.0;
424: 
426:   VecSet(&zero,v);
427:   VecGetArray(v,&x);
428:   VecGetSize(v,&n);
429:   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
430:   len  = PetscMin(a->A->m,a->A->n);
431:   radd = a->rstart*m;
432:   for (i=0; i<len; i++) {
433:     x[i] = aloc->v[radd + i*m + i];
434:   }
435:   VecRestoreArray(v,&x);
436:   return(0);
437: }

439: int MatDestroy_MPIDense(Mat mat)
440: {
441:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
442:   int          ierr;


446: #if defined(PETSC_USE_LOG)
447:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
448: #endif
449:   MatStashDestroy_Private(&mat->stash);
450:   PetscFree(mdn->rowners);
451:   MatDestroy(mdn->A);
452:   if (mdn->lvec)   VecDestroy(mdn->lvec);
453:   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
454:   if (mdn->factor) {
455:     if (mdn->factor->temp)   {PetscFree(mdn->factor->temp);}
456:     if (mdn->factor->tag)    {PetscFree(mdn->factor->tag);}
457:     if (mdn->factor->pivots) {PetscFree(mdn->factor->pivots);}
458:     PetscFree(mdn->factor);
459:   }
460:   PetscFree(mdn);
461:   return(0);
462: }

464: static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
465: {
466:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
467:   int          ierr;

470:   if (mdn->size == 1) {
471:     MatView(mdn->A,viewer);
472:   }
473:   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
474:   return(0);
475: }

477: static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
478: {
479:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
480:   int               ierr,size = mdn->size,rank = mdn->rank;
481:   PetscViewerType   vtype;
482:   PetscTruth        isascii,isdraw;
483:   PetscViewer       sviewer;
484:   PetscViewerFormat format;

487:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
488:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
489:   if (isascii) {
490:     PetscViewerGetType(viewer,&vtype);
491:     PetscViewerGetFormat(viewer,&format);
492:     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
493:       MatInfo info;
494:       MatGetInfo(mat,MAT_LOCAL,&info);
495:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d n",rank,mat->m,
496:                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
497:       PetscViewerFlush(viewer);
498:       VecScatterView(mdn->Mvctx,viewer);
499:       return(0);
500:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
501:       return(0);
502:     }
503:   } else if (isdraw) {
504:     PetscDraw       draw;
505:     PetscTruth isnull;

507:     PetscViewerDrawGetDraw(viewer,0,&draw);
508:     PetscDrawIsNull(draw,&isnull);
509:     if (isnull) return(0);
510:   }

512:   if (size == 1) {
513:     MatView(mdn->A,viewer);
514:   } else {
515:     /* assemble the entire matrix onto first processor. */
516:     Mat          A;
517:     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
518:     Scalar       *vals;

520:     if (!rank) {
521:       MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);
522:     } else {
523:       MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);
524:     }
525:     PetscLogObjectParent(mat,A);

527:     /* Copy the matrix ... This isn't the most efficient means,
528:        but it's quick for now */
529:     row = mdn->rstart; m = mdn->A->m;
530:     for (i=0; i<m; i++) {
531:       MatGetRow(mat,row,&nz,&cols,&vals);
532:       MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
533:       MatRestoreRow(mat,row,&nz,&cols,&vals);
534:       row++;
535:     }

537:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
538:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
539:     PetscViewerGetSingleton(viewer,&sviewer);
540:     if (!rank) {
541:       MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
542:     }
543:     PetscViewerRestoreSingleton(viewer,&sviewer);
544:     PetscViewerFlush(viewer);
545:     MatDestroy(A);
546:   }
547:   return(0);
548: }

550: int MatView_MPIDense(Mat mat,PetscViewer viewer)
551: {
552:   int        ierr;
553:   PetscTruth isascii,isbinary,isdraw,issocket;
554: 
556: 
557:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
558:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
559:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
560:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);

562:   if (isascii || issocket || isdraw) {
563:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
564:   } else if (isbinary) {
565:     MatView_MPIDense_Binary(mat,viewer);
566:   } else {
567:     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
568:   }
569:   return(0);
570: }

572: int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
573: {
574:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
575:   Mat          mdn = mat->A;
576:   int          ierr;
577:   PetscReal    isend[5],irecv[5];

580:   info->rows_global    = (double)A->M;
581:   info->columns_global = (double)A->N;
582:   info->rows_local     = (double)A->m;
583:   info->columns_local  = (double)A->N;
584:   info->block_size     = 1.0;
585:   MatGetInfo(mdn,MAT_LOCAL,info);
586:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
587:   isend[3] = info->memory;  isend[4] = info->mallocs;
588:   if (flag == MAT_LOCAL) {
589:     info->nz_used      = isend[0];
590:     info->nz_allocated = isend[1];
591:     info->nz_unneeded  = isend[2];
592:     info->memory       = isend[3];
593:     info->mallocs      = isend[4];
594:   } else if (flag == MAT_GLOBAL_MAX) {
595:     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);
596:     info->nz_used      = irecv[0];
597:     info->nz_allocated = irecv[1];
598:     info->nz_unneeded  = irecv[2];
599:     info->memory       = irecv[3];
600:     info->mallocs      = irecv[4];
601:   } else if (flag == MAT_GLOBAL_SUM) {
602:     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);
603:     info->nz_used      = irecv[0];
604:     info->nz_allocated = irecv[1];
605:     info->nz_unneeded  = irecv[2];
606:     info->memory       = irecv[3];
607:     info->mallocs      = irecv[4];
608:   }
609:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
610:   info->fill_ratio_needed = 0;
611:   info->factor_mallocs    = 0;
612:   return(0);
613: }

615: int MatSetOption_MPIDense(Mat A,MatOption op)
616: {
617:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
618:   int          ierr;

621:   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
622:       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
623:       op == MAT_NEW_NONZERO_LOCATION_ERR ||
624:       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
625:       op == MAT_COLUMNS_SORTED ||
626:       op == MAT_COLUMNS_UNSORTED) {
627:         MatSetOption(a->A,op);
628:   } else if (op == MAT_ROW_ORIENTED) {
629:         a->roworiented = PETSC_TRUE;
630:         MatSetOption(a->A,op);
631:   } else if (op == MAT_ROWS_SORTED ||
632:              op == MAT_ROWS_UNSORTED ||
633:              op == MAT_YES_NEW_DIAGONALS ||
634:              op == MAT_USE_HASH_TABLE) {
635:     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignoredn");
636:   } else if (op == MAT_COLUMN_ORIENTED) {
637:     a->roworiented = PETSC_FALSE;
638:     MatSetOption(a->A,op);
639:   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
640:     a->donotstash = PETSC_TRUE;
641:   } else if (op == MAT_NO_NEW_DIAGONALS) {
642:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
643:   } else {
644:     SETERRQ(PETSC_ERR_SUP,"unknown option");
645:   }
646:   return(0);
647: }

649: int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
650: {
651:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;

654:   if (m) *m = mat->rstart;
655:   if (n) *n = mat->rend;
656:   return(0);
657: }

659: int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
660: {
661:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
662:   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;

665:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
666:   lrow = row - rstart;
667:   MatGetRow(mat->A,lrow,nz,idx,v);
668:   return(0);
669: }

671: int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
672: {

676:   if (idx) {PetscFree(*idx);}
677:   if (v) {PetscFree(*v);}
678:   return(0);
679: }

681: int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
682: {
683:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
684:   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
685:   Scalar       *l,*r,x,*v;
686:   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;

689:   MatGetLocalSize(A,&s2,&s3);
690:   if (ll) {
691:     VecGetLocalSize(ll,&s2a);
692:     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
693:     VecGetArray(ll,&l);
694:     for (i=0; i<m; i++) {
695:       x = l[i];
696:       v = mat->v + i;
697:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
698:     }
699:     VecRestoreArray(ll,&l);
700:     PetscLogFlops(n*m);
701:   }
702:   if (rr) {
703:     VecGetSize(rr,&s3a);
704:     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
705:     VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
706:     VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
707:     VecGetArray(mdn->lvec,&r);
708:     for (i=0; i<n; i++) {
709:       x = r[i];
710:       v = mat->v + i*m;
711:       for (j=0; j<m; j++) { (*v++) *= x;}
712:     }
713:     VecRestoreArray(mdn->lvec,&r);
714:     PetscLogFlops(n*m);
715:   }
716:   return(0);
717: }

719: int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm)
720: {
721:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
722:   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
723:   int          ierr,i,j;
724:   PetscReal    sum = 0.0;
725:   Scalar       *v = mat->v;

728:   if (mdn->size == 1) {
729:      MatNorm(mdn->A,type,norm);
730:   } else {
731:     if (type == NORM_FROBENIUS) {
732:       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
733: #if defined(PETSC_USE_COMPLEX)
734:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
735: #else
736:         sum += (*v)*(*v); v++;
737: #endif
738:       }
739:       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
740:       *norm = sqrt(*norm);
741:       PetscLogFlops(2*mdn->A->n*mdn->A->m);
742:     } else if (type == NORM_1) {
743:       PetscReal *tmp,*tmp2;
744:       PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);
745:       tmp2 = tmp + A->N;
746:       PetscMemzero(tmp,2*A->N*sizeof(PetscReal));
747:       *norm = 0.0;
748:       v = mat->v;
749:       for (j=0; j<mdn->A->n; j++) {
750:         for (i=0; i<mdn->A->m; i++) {
751:           tmp[j] += PetscAbsScalar(*v);  v++;
752:         }
753:       }
754:       MPI_Allreduce(tmp,tmp2,A->N,MPI_DOUBLE,MPI_SUM,A->comm);
755:       for (j=0; j<A->N; j++) {
756:         if (tmp2[j] > *norm) *norm = tmp2[j];
757:       }
758:       PetscFree(tmp);
759:       PetscLogFlops(A->n*A->m);
760:     } else if (type == NORM_INFINITY) { /* max row norm */
761:       PetscReal ntemp;
762:       MatNorm(mdn->A,type,&ntemp);
763:       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
764:     } else {
765:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
766:     }
767:   }
768:   return(0);
769: }

771: int MatTranspose_MPIDense(Mat A,Mat *matout)
772: {
773:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
774:   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
775:   Mat          B;
776:   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
777:   int          j,i,ierr;
778:   Scalar       *v;

781:   if (!matout && M != N) {
782:     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
783:   }
784:   MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);

786:   m = a->A->m; n = a->A->n; v = Aloc->v;
787:   PetscMalloc(n*sizeof(int),&rwork);
788:   for (j=0; j<n; j++) {
789:     for (i=0; i<m; i++) rwork[i] = rstart + i;
790:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
791:     v   += m;
792:   }
793:   PetscFree(rwork);
794:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
795:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
796:   if (matout) {
797:     *matout = B;
798:   } else {
799:     MatHeaderCopy(A,B);
800:   }
801:   return(0);
802: }

804: #include "petscblaslapack.h"
805: int MatScale_MPIDense(Scalar *alpha,Mat inA)
806: {
807:   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
808:   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
809:   int          one = 1,nz;

812:   nz = inA->m*inA->N;
813:   BLscal_(&nz,alpha,a->v,&one);
814:   PetscLogFlops(nz);
815:   return(0);
816: }

818: static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
819: EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);

821: int MatSetUpPreallocation_MPIDense(Mat A)
822: {
823:   int        ierr;

826:    MatMPIDenseSetPreallocation(A,0);
827:   return(0);
828: }

830: /* -------------------------------------------------------------------*/
831: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
832:        MatGetRow_MPIDense,
833:        MatRestoreRow_MPIDense,
834:        MatMult_MPIDense,
835:        MatMultAdd_MPIDense,
836:        MatMultTranspose_MPIDense,
837:        MatMultTransposeAdd_MPIDense,
838:        0,
839:        0,
840:        0,
841:        0,
842:        0,
843:        0,
844:        0,
845:        MatTranspose_MPIDense,
846:        MatGetInfo_MPIDense,0,
847:        MatGetDiagonal_MPIDense,
848:        MatDiagonalScale_MPIDense,
849:        MatNorm_MPIDense,
850:        MatAssemblyBegin_MPIDense,
851:        MatAssemblyEnd_MPIDense,
852:        0,
853:        MatSetOption_MPIDense,
854:        MatZeroEntries_MPIDense,
855:        MatZeroRows_MPIDense,
856:        0,
857:        0,
858:        0,
859:        0,
860:        MatSetUpPreallocation_MPIDense,
861:        0,
862:        MatGetOwnershipRange_MPIDense,
863:        0,
864:        0,
865:        MatGetArray_MPIDense,
866:        MatRestoreArray_MPIDense,
867:        MatDuplicate_MPIDense,
868:        0,
869:        0,
870:        0,
871:        0,
872:        0,
873:        MatGetSubMatrices_MPIDense,
874:        0,
875:        MatGetValues_MPIDense,
876:        0,
877:        0,
878:        MatScale_MPIDense,
879:        0,
880:        0,
881:        0,
882:        MatGetBlockSize_MPIDense,
883:        0,
884:        0,
885:        0,
886:        0,
887:        0,
888:        0,
889:        0,
890:        0,
891:        0,
892:        MatGetSubMatrix_MPIDense,
893:        MatDestroy_MPIDense,
894:        MatView_MPIDense,
895:        MatGetMaps_Petsc};

897: EXTERN_C_BEGIN
898: int MatCreate_MPIDense(Mat mat)
899: {
900:   Mat_MPIDense *a;
901:   int          ierr,i;

904:   ierr              = PetscNew(Mat_MPIDense,&a);
905:   mat->data         = (void*)a;
906:   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
907:   mat->factor       = 0;
908:   mat->mapping      = 0;

910:   a->factor       = 0;
911:   mat->insertmode = NOT_SET_VALUES;
912:   MPI_Comm_rank(mat->comm,&a->rank);
913:   MPI_Comm_size(mat->comm,&a->size);

915:   PetscSplitOwnership(mat->comm,&mat->m,&mat->M);
916:   PetscSplitOwnership(mat->comm,&mat->n,&mat->N);
917:   a->nvec = mat->n;

919:   /* the information in the maps duplicates the information computed below, eventually 
920:      we should remove the duplicate information that is not contained in the maps */
921:   MapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);
922:   MapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);

924:   /* build local table of row and column ownerships */
925:   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);
926:   a->cowners = a->rowners + a->size + 1;
927:   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
928:   MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);
929:   a->rowners[0] = 0;
930:   for (i=2; i<=a->size; i++) {
931:     a->rowners[i] += a->rowners[i-1];
932:   }
933:   a->rstart = a->rowners[a->rank];
934:   a->rend   = a->rowners[a->rank+1];
935:   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);
936:   a->cowners[0] = 0;
937:   for (i=2; i<=a->size; i++) {
938:     a->cowners[i] += a->cowners[i-1];
939:   }

941:   /* build cache for off array entries formed */
942:   a->donotstash = PETSC_FALSE;
943:   MatStashCreate_Private(mat->comm,1,&mat->stash);

945:   /* stuff used for matrix vector multiply */
946:   a->lvec        = 0;
947:   a->Mvctx       = 0;
948:   a->roworiented = PETSC_TRUE;

950:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
951:                                      "MatGetDiagonalBlock_MPIDense",
952:                                      MatGetDiagonalBlock_MPIDense);
953:   return(0);
954: }
955: EXTERN_C_END

957: /*@C
958:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

960:    Not collective

962:    Input Parameters:
963: .  A - the matrix
964: -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
965:    to control all matrix memory allocation.

967:    Notes:
968:    The dense format is fully compatible with standard Fortran 77
969:    storage by columns.

971:    The data input variable is intended primarily for Fortran programmers
972:    who wish to allocate their own matrix memory space.  Most users should
973:    set data=PETSC_NULL.

975:    Level: intermediate

977: .keywords: matrix,dense, parallel

979: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
980: @*/
981: int MatMPIDenseSetPreallocation(Mat mat,Scalar *data)
982: {
983:   Mat_MPIDense *a;
984:   int          ierr;
985:   PetscTruth   flg2;

988:   PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);
989:   if (!flg2) return(0);
990:   mat->preallocated = PETSC_TRUE;
991:   /* Note:  For now, when data is specified above, this assumes the user correctly
992:    allocates the local dense storage space.  We should add error checking. */

994:   a    = (Mat_MPIDense*)mat->data;
995:   MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);
996:   PetscLogObjectParent(mat,a->A);
997:   return(0);
998: }

1000: /*@C
1001:    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.

1003:    Collective on MPI_Comm

1005:    Input Parameters:
1006: +  comm - MPI communicator
1007: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1008: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1009: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1010: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1011: -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1012:    to control all matrix memory allocation.

1014:    Output Parameter:
1015: .  A - the matrix

1017:    Notes:
1018:    The dense format is fully compatible with standard Fortran 77
1019:    storage by columns.

1021:    The data input variable is intended primarily for Fortran programmers
1022:    who wish to allocate their own matrix memory space.  Most users should
1023:    set data=PETSC_NULL.

1025:    The user MUST specify either the local or global matrix dimensions
1026:    (possibly both).

1028:    Level: intermediate

1030: .keywords: matrix,dense, parallel

1032: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1033: @*/
1034: int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1035: {
1036:   int ierr,size;

1039:   MatCreate(comm,m,n,M,N,A);
1040:   MPI_Comm_size(comm,&size);
1041:   if (size > 1) {
1042:     MatSetType(*A,MATMPIDENSE);
1043:     MatMPIDenseSetPreallocation(*A,data);
1044:   } else {
1045:     MatSetType(*A,MATSEQDENSE);
1046:     MatSeqDenseSetPreallocation(*A,data);
1047:   }
1048:   return(0);
1049: }

1051: static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1052: {
1053:   Mat          mat;
1054:   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1055:   int          ierr;

1058:   *newmat       = 0;
1059:   MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);
1060:   MatSetType(mat,MATMPIDENSE);
1061:   ierr              = PetscNew(Mat_MPIDense,&a);
1062:   mat->data         = (void*)a;
1063:   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1064:   mat->factor       = A->factor;
1065:   mat->assembled    = PETSC_TRUE;
1066:   mat->preallocated = PETSC_TRUE;

1068:   a->rstart       = oldmat->rstart;
1069:   a->rend         = oldmat->rend;
1070:   a->size         = oldmat->size;
1071:   a->rank         = oldmat->rank;
1072:   mat->insertmode = NOT_SET_VALUES;
1073:   a->nvec         = oldmat->nvec;
1074:   a->donotstash   = oldmat->donotstash;
1075:   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);
1076:   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1077:   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1078:   MatStashCreate_Private(A->comm,1,&mat->stash);

1080:   MatSetUpMultiply_MPIDense(mat);
1081:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1082:   PetscLogObjectParent(mat,a->A);
1083:   *newmat = mat;
1084:   return(0);
1085: }

1087: #include "petscsys.h"

1089: int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1090: {
1091:   int        *rowners,i,size,rank,m,ierr,nz,j;
1092:   Scalar     *array,*vals,*vals_ptr;
1093:   MPI_Status status;

1096:   MPI_Comm_rank(comm,&rank);
1097:   MPI_Comm_size(comm,&size);

1099:   /* determine ownership of all rows */
1100:   m          = M/size + ((M % size) > rank);
1101:   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);
1102:   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1103:   rowners[0] = 0;
1104:   for (i=2; i<=size; i++) {
1105:     rowners[i] += rowners[i-1];
1106:   }

1108:   MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1109:   MatGetArray(*newmat,&array);

1111:   if (!rank) {
1112:     PetscMalloc(m*N*sizeof(Scalar),&vals);

1114:     /* read in my part of the matrix numerical values  */
1115:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1116: 
1117:     /* insert into matrix-by row (this is why cannot directly read into array */
1118:     vals_ptr = vals;
1119:     for (i=0; i<m; i++) {
1120:       for (j=0; j<N; j++) {
1121:         array[i + j*m] = *vals_ptr++;
1122:       }
1123:     }

1125:     /* read in other processors and ship out */
1126:     for (i=1; i<size; i++) {
1127:       nz   = (rowners[i+1] - rowners[i])*N;
1128:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1129:       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1130:     }
1131:   } else {
1132:     /* receive numeric values */
1133:     PetscMalloc(m*N*sizeof(Scalar),&vals);

1135:     /* receive message of values*/
1136:     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);

1138:     /* insert into matrix-by row (this is why cannot directly read into array */
1139:     vals_ptr = vals;
1140:     for (i=0; i<m; i++) {
1141:       for (j=0; j<N; j++) {
1142:         array[i + j*m] = *vals_ptr++;
1143:       }
1144:     }
1145:   }
1146:   PetscFree(rowners);
1147:   PetscFree(vals);
1148:   MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1149:   MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1150:   return(0);
1151: }

1153: EXTERN_C_BEGIN
1154: int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1155: {
1156:   Mat          A;
1157:   Scalar       *vals,*svals;
1158:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1159:   MPI_Status   status;
1160:   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1161:   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1162:   int          tag = ((PetscObject)viewer)->tag;
1163:   int          i,nz,ierr,j,rstart,rend,fd;

1166:   MPI_Comm_size(comm,&size);
1167:   MPI_Comm_rank(comm,&rank);
1168:   if (!rank) {
1169:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1170:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1171:     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1172:   }

1174:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1175:   M = header[1]; N = header[2]; nz = header[3];

1177:   /*
1178:        Handle case where matrix is stored on disk as a dense matrix 
1179:   */
1180:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1181:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1182:     return(0);
1183:   }

1185:   /* determine ownership of all rows */
1186:   m          = M/size + ((M % size) > rank);
1187:   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);
1188:   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1189:   rowners[0] = 0;
1190:   for (i=2; i<=size; i++) {
1191:     rowners[i] += rowners[i-1];
1192:   }
1193:   rstart = rowners[rank];
1194:   rend   = rowners[rank+1];

1196:   /* distribute row lengths to all processors */
1197:   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);
1198:   offlens = ourlens + (rend-rstart);
1199:   if (!rank) {
1200:     PetscMalloc(M*sizeof(int),&rowlengths);
1201:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1202:     PetscMalloc(size*sizeof(int),&sndcounts);
1203:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1204:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1205:     PetscFree(sndcounts);
1206:   } else {
1207:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1208:   }

1210:   if (!rank) {
1211:     /* calculate the number of nonzeros on each processor */
1212:     PetscMalloc(size*sizeof(int),&procsnz);
1213:     PetscMemzero(procsnz,size*sizeof(int));
1214:     for (i=0; i<size; i++) {
1215:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1216:         procsnz[i] += rowlengths[j];
1217:       }
1218:     }
1219:     PetscFree(rowlengths);

1221:     /* determine max buffer needed and allocate it */
1222:     maxnz = 0;
1223:     for (i=0; i<size; i++) {
1224:       maxnz = PetscMax(maxnz,procsnz[i]);
1225:     }
1226:     PetscMalloc(maxnz*sizeof(int),&cols);

1228:     /* read in my part of the matrix column indices  */
1229:     nz = procsnz[0];
1230:     PetscMalloc(nz*sizeof(int),&mycols);
1231:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1233:     /* read in every one elses and ship off */
1234:     for (i=1; i<size; i++) {
1235:       nz   = procsnz[i];
1236:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1237:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1238:     }
1239:     PetscFree(cols);
1240:   } else {
1241:     /* determine buffer space needed for message */
1242:     nz = 0;
1243:     for (i=0; i<m; i++) {
1244:       nz += ourlens[i];
1245:     }
1246:     PetscMalloc(nz*sizeof(int),&mycols);

1248:     /* receive message of column indices*/
1249:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1250:     MPI_Get_count(&status,MPI_INT,&maxnz);
1251:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1252:   }

1254:   /* loop over local rows, determining number of off diagonal entries */
1255:   PetscMemzero(offlens,m*sizeof(int));
1256:   jj = 0;
1257:   for (i=0; i<m; i++) {
1258:     for (j=0; j<ourlens[i]; j++) {
1259:       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1260:       jj++;
1261:     }
1262:   }

1264:   /* create our matrix */
1265:   for (i=0; i<m; i++) {
1266:     ourlens[i] -= offlens[i];
1267:   }
1268:   MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1269:   A = *newmat;
1270:   for (i=0; i<m; i++) {
1271:     ourlens[i] += offlens[i];
1272:   }

1274:   if (!rank) {
1275:     PetscMalloc(maxnz*sizeof(Scalar),&vals);

1277:     /* read in my part of the matrix numerical values  */
1278:     nz = procsnz[0];
1279:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1280: 
1281:     /* insert into matrix */
1282:     jj      = rstart;
1283:     smycols = mycols;
1284:     svals   = vals;
1285:     for (i=0; i<m; i++) {
1286:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1287:       smycols += ourlens[i];
1288:       svals   += ourlens[i];
1289:       jj++;
1290:     }

1292:     /* read in other processors and ship out */
1293:     for (i=1; i<size; i++) {
1294:       nz   = procsnz[i];
1295:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1296:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1297:     }
1298:     PetscFree(procsnz);
1299:   } else {
1300:     /* receive numeric values */
1301:     PetscMalloc(nz*sizeof(Scalar),&vals);

1303:     /* receive message of values*/
1304:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1305:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1306:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1308:     /* insert into matrix */
1309:     jj      = rstart;
1310:     smycols = mycols;
1311:     svals   = vals;
1312:     for (i=0; i<m; i++) {
1313:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1314:       smycols += ourlens[i];
1315:       svals   += ourlens[i];
1316:       jj++;
1317:     }
1318:   }
1319:   PetscFree(ourlens);
1320:   PetscFree(vals);
1321:   PetscFree(mycols);
1322:   PetscFree(rowners);

1324:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1325:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1326:   return(0);
1327: }
1328: EXTERN_C_END