Actual source code: mpidense.c

petsc-dev 2014-02-02
Report Typos and Errors
  2: /*
  3:    Basic functions for basic parallel dense matrices.
  4: */


  7: #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
  8: #include <../src/mat/impls/aij/mpi/mpiaij.h>

 12: /*@

 14:       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
 15:               matrix that represents the operator. For sequential matrices it returns itself.

 17:     Input Parameter:
 18: .      A - the Seq or MPI dense matrix

 20:     Output Parameter:
 21: .      B - the inner matrix

 23:     Level: intermediate

 25: @*/
 26: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
 27: {
 28:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 30:   PetscBool      flg;

 33:   PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
 34:   if (flg) *B = mat->A;
 35:   else *B = A;
 36:   return(0);
 37: }

 41: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 42: {
 43:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 45:   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;

 48:   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
 49:   lrow = row - rstart;
 50:   MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
 51:   return(0);
 52: }

 56: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 57: {

 61:   if (idx) {PetscFree(*idx);}
 62:   if (v) {PetscFree(*v);}
 63:   return(0);
 64: }

 68: PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
 69: {
 70:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
 72:   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
 73:   PetscScalar    *array;
 74:   MPI_Comm       comm;
 75:   Mat            B;

 78:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");

 80:   PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
 81:   if (!B) {
 82:     PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 83:     MatCreate(comm,&B);
 84:     MatSetSizes(B,m,m,m,m);
 85:     MatSetType(B,((PetscObject)mdn->A)->type_name);
 86:     MatDenseGetArray(mdn->A,&array);
 87:     MatSeqDenseSetPreallocation(B,array+m*rstart);
 88:     MatDenseRestoreArray(mdn->A,&array);
 89:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
 90:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
 91:     PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
 92:     *a   = B;
 93:     MatDestroy(&B);
 94:   } else *a = B;
 95:   return(0);
 96: }

100: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
101: {
102:   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
104:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
105:   PetscBool      roworiented = A->roworiented;

109:   for (i=0; i<m; i++) {
110:     if (idxm[i] < 0) continue;
111:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
112:     if (idxm[i] >= rstart && idxm[i] < rend) {
113:       row = idxm[i] - rstart;
114:       if (roworiented) {
115:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
116:       } else {
117:         for (j=0; j<n; j++) {
118:           if (idxn[j] < 0) continue;
119:           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
121:         }
122:       }
123:     } else if (!A->donotstash) {
124:       mat->assembled = PETSC_FALSE;
125:       if (roworiented) {
126:         MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
127:       } else {
128:         MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
129:       }
130:     }
131:   }
132:   return(0);
133: }

137: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138: {
139:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
141:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;

144:   for (i=0; i<m; i++) {
145:     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147:     if (idxm[i] >= rstart && idxm[i] < rend) {
148:       row = idxm[i] - rstart;
149:       for (j=0; j<n; j++) {
150:         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151:         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
153:       }
154:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155:   }
156:   return(0);
157: }

161: PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
162: {
163:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

167:   MatDenseGetArray(a->A,array);
168:   return(0);
169: }

173: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
174: {
175:   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
176:   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
178:   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
179:   const PetscInt *irow,*icol;
180:   PetscScalar    *av,*bv,*v = lmat->v;
181:   Mat            newmat;
182:   IS             iscol_local;

185:   ISAllGather(iscol,&iscol_local);
186:   ISGetIndices(isrow,&irow);
187:   ISGetIndices(iscol_local,&icol);
188:   ISGetLocalSize(isrow,&nrows);
189:   ISGetLocalSize(iscol,&ncols);
190:   ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */

192:   /* No parallel redistribution currently supported! Should really check each index set
193:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
194:      original matrix! */

196:   MatGetLocalSize(A,&nlrows,&nlcols);
197:   MatGetOwnershipRange(A,&rstart,&rend);

199:   /* Check submatrix call */
200:   if (scall == MAT_REUSE_MATRIX) {
201:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
202:     /* Really need to test rows and column sizes! */
203:     newmat = *B;
204:   } else {
205:     /* Create and fill new matrix */
206:     MatCreate(PetscObjectComm((PetscObject)A),&newmat);
207:     MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
208:     MatSetType(newmat,((PetscObject)A)->type_name);
209:     MatMPIDenseSetPreallocation(newmat,NULL);
210:   }

212:   /* Now extract the data pointers and do the copy, column at a time */
213:   newmatd = (Mat_MPIDense*)newmat->data;
214:   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;

216:   for (i=0; i<Ncols; i++) {
217:     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
218:     for (j=0; j<nrows; j++) {
219:       *bv++ = av[irow[j] - rstart];
220:     }
221:   }

223:   /* Assemble the matrices so that the correct flags are set */
224:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
225:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

227:   /* Free work space */
228:   ISRestoreIndices(isrow,&irow);
229:   ISRestoreIndices(iscol_local,&icol);
230:   ISDestroy(&iscol_local);
231:   *B   = newmat;
232:   return(0);
233: }

237: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
238: {
239:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

243:   MatDenseRestoreArray(a->A,array);
244:   return(0);
245: }

249: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250: {
251:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252:   MPI_Comm       comm;
254:   PetscInt       nstash,reallocs;
255:   InsertMode     addv;

258:   PetscObjectGetComm((PetscObject)mat,&comm);
259:   /* make sure all processors are either in INSERTMODE or ADDMODE */
260:   MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);
261:   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262:   mat->insertmode = addv; /* in case this processor had no cache */

264:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
265:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
266:   PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
267:   return(0);
268: }

272: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273: {
274:   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
276:   PetscInt       i,*row,*col,flg,j,rstart,ncols;
277:   PetscMPIInt    n;
278:   PetscScalar    *val;
279:   InsertMode     addv=mat->insertmode;

282:   /*  wait on receives */
283:   while (1) {
284:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
285:     if (!flg) break;

287:     for (i=0; i<n;) {
288:       /* Now identify the consecutive vals belonging to the same row */
289:       for (j=i,rstart=row[j]; j<n; j++) {
290:         if (row[j] != rstart) break;
291:       }
292:       if (j < n) ncols = j-i;
293:       else       ncols = n-i;
294:       /* Now assemble all these values with a single function call */
295:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
296:       i    = j;
297:     }
298:   }
299:   MatStashScatterEnd_Private(&mat->stash);

301:   MatAssemblyBegin(mdn->A,mode);
302:   MatAssemblyEnd(mdn->A,mode);

304:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
305:     MatSetUpMultiply_MPIDense(mat);
306:   }
307:   return(0);
308: }

312: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
313: {
315:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;

318:   MatZeroEntries(l->A);
319:   return(0);
320: }

322: /* the code does not do the diagonal entries correctly unless the
323:    matrix is square and the column and row owerships are identical.
324:    This is a BUG. The only way to fix it seems to be to access
325:    mdn->A and mdn->B directly and not through the MatZeroRows()
326:    routine.
327: */
330: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
331: {
332:   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
333:   PetscErrorCode    ierr;
334:   PetscInt          i,*owners = A->rmap->range;
335:   PetscInt          *sizes,j,idx,nsends;
336:   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
337:   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
338:   PetscInt          *lens,*lrows,*values;
339:   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
340:   MPI_Comm          comm;
341:   MPI_Request       *send_waits,*recv_waits;
342:   MPI_Status        recv_status,*send_status;
343:   PetscBool         found;
344:   const PetscScalar *xx;
345:   PetscScalar       *bb;

348:   PetscObjectGetComm((PetscObject)A,&comm);
349:   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
350:   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
351:   /*  first count number of contributors to each processor */
352:   PetscCalloc1(2*size,&sizes);
353:   PetscMalloc1(N+1,&owner);  /* see note*/
354:   for (i=0; i<N; i++) {
355:     idx   = rows[i];
356:     found = PETSC_FALSE;
357:     for (j=0; j<size; j++) {
358:       if (idx >= owners[j] && idx < owners[j+1]) {
359:         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
360:       }
361:     }
362:     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
363:   }
364:   nsends = 0;
365:   for (i=0; i<size; i++) nsends += sizes[2*i+1];

367:   /* inform other processors of number of messages and max length*/
368:   PetscMaxSum(comm,sizes,&nmax,&nrecvs);

370:   /* post receives:   */
371:   PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
372:   PetscMalloc1((nrecvs+1),&recv_waits);
373:   for (i=0; i<nrecvs; i++) {
374:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
375:   }

377:   /* do sends:
378:       1) starts[i] gives the starting index in svalues for stuff going to
379:          the ith processor
380:   */
381:   PetscMalloc1((N+1),&svalues);
382:   PetscMalloc1((nsends+1),&send_waits);
383:   PetscMalloc1((size+1),&starts);

385:   starts[0] = 0;
386:   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
387:   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];

389:   starts[0] = 0;
390:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
391:   count = 0;
392:   for (i=0; i<size; i++) {
393:     if (sizes[2*i+1]) {
394:       MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
395:     }
396:   }
397:   PetscFree(starts);

399:   base = owners[rank];

401:   /*  wait on receives */
402:   PetscMalloc2(nrecvs,&lens,nrecvs,&source);
403:   count = nrecvs;
404:   slen  = 0;
405:   while (count) {
406:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
407:     /* unpack receives into our local space */
408:     MPI_Get_count(&recv_status,MPIU_INT,&n);

410:     source[imdex] = recv_status.MPI_SOURCE;
411:     lens[imdex]   = n;
412:     slen += n;
413:     count--;
414:   }
415:   PetscFree(recv_waits);

417:   /* move the data into the send scatter */
418:   PetscMalloc1((slen+1),&lrows);
419:   count = 0;
420:   for (i=0; i<nrecvs; i++) {
421:     values = rvalues + i*nmax;
422:     for (j=0; j<lens[i]; j++) {
423:       lrows[count++] = values[j] - base;
424:     }
425:   }
426:   PetscFree(rvalues);
427:   PetscFree2(lens,source);
428:   PetscFree(owner);
429:   PetscFree(sizes);

431:   /* fix right hand side if needed */
432:   if (x && b) {
433:     VecGetArrayRead(x,&xx);
434:     VecGetArray(b,&bb);
435:     for (i=0; i<slen; i++) {
436:       bb[lrows[i]] = diag*xx[lrows[i]];
437:     }
438:     VecRestoreArrayRead(x,&xx);
439:     VecRestoreArray(b,&bb);
440:   }

442:   /* actually zap the local rows */
443:   MatZeroRows(l->A,slen,lrows,0.0,0,0);
444:   if (diag != 0.0) {
445:     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
446:     PetscInt     m   = ll->lda, i;

448:     for (i=0; i<slen; i++) {
449:       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
450:     }
451:   }
452:   PetscFree(lrows);

454:   /* wait on sends */
455:   if (nsends) {
456:     PetscMalloc1(nsends,&send_status);
457:     MPI_Waitall(nsends,send_waits,send_status);
458:     PetscFree(send_status);
459:   }
460:   PetscFree(send_waits);
461:   PetscFree(svalues);
462:   return(0);
463: }

467: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
468: {
469:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

473:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
474:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
475:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
476:   return(0);
477: }

481: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
482: {
483:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

487:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
488:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
489:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
490:   return(0);
491: }

495: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
496: {
497:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
499:   PetscScalar    zero = 0.0;

502:   VecSet(yy,zero);
503:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
504:   VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
505:   VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
506:   return(0);
507: }

511: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
512: {
513:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

517:   VecCopy(yy,zz);
518:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
519:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
520:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
521:   return(0);
522: }

526: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
527: {
528:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
529:   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
531:   PetscInt       len,i,n,m = A->rmap->n,radd;
532:   PetscScalar    *x,zero = 0.0;

535:   VecSet(v,zero);
536:   VecGetArray(v,&x);
537:   VecGetSize(v,&n);
538:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
539:   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
540:   radd = A->rmap->rstart*m;
541:   for (i=0; i<len; i++) {
542:     x[i] = aloc->v[radd + i*m + i];
543:   }
544:   VecRestoreArray(v,&x);
545:   return(0);
546: }

550: PetscErrorCode MatDestroy_MPIDense(Mat mat)
551: {
552:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

556: #if defined(PETSC_USE_LOG)
557:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
558: #endif
559:   MatStashDestroy_Private(&mat->stash);
560:   MatDestroy(&mdn->A);
561:   VecDestroy(&mdn->lvec);
562:   VecScatterDestroy(&mdn->Mvctx);

564:   PetscFree(mat->data);
565:   PetscObjectChangeTypeName((PetscObject)mat,0);
566:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);
567:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
568:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);
569:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
570:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
571:   return(0);
572: }

576: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
577: {
578:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
579:   PetscErrorCode    ierr;
580:   PetscViewerFormat format;
581:   int               fd;
582:   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
583:   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
584:   PetscScalar       *work,*v,*vv;
585:   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;

588:   if (mdn->size == 1) {
589:     MatView(mdn->A,viewer);
590:   } else {
591:     PetscViewerBinaryGetDescriptor(viewer,&fd);
592:     MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
593:     MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);

595:     PetscViewerGetFormat(viewer,&format);
596:     if (format == PETSC_VIEWER_NATIVE) {

598:       if (!rank) {
599:         /* store the matrix as a dense matrix */
600:         header[0] = MAT_FILE_CLASSID;
601:         header[1] = mat->rmap->N;
602:         header[2] = N;
603:         header[3] = MATRIX_BINARY_FORMAT_DENSE;
604:         PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);

606:         /* get largest work array needed for transposing array */
607:         mmax = mat->rmap->n;
608:         for (i=1; i<size; i++) {
609:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
610:         }
611:         PetscMalloc1(mmax*N,&work);

613:         /* write out local array, by rows */
614:         m = mat->rmap->n;
615:         v = a->v;
616:         for (j=0; j<N; j++) {
617:           for (i=0; i<m; i++) {
618:             work[j + i*N] = *v++;
619:           }
620:         }
621:         PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
622:         /* get largest work array to receive messages from other processes, excludes process zero */
623:         mmax = 0;
624:         for (i=1; i<size; i++) {
625:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
626:         }
627:         PetscMalloc1(mmax*N,&vv);
628:         for (k = 1; k < size; k++) {
629:           v    = vv;
630:           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
631:           MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));

633:           for (j = 0; j < N; j++) {
634:             for (i = 0; i < m; i++) {
635:               work[j + i*N] = *v++;
636:             }
637:           }
638:           PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
639:         }
640:         PetscFree(work);
641:         PetscFree(vv);
642:       } else {
643:         MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
644:       }
645:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
646:   }
647:   return(0);
648: }

650: #include <petscdraw.h>
653: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
654: {
655:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
656:   PetscErrorCode    ierr;
657:   PetscMPIInt       size = mdn->size,rank = mdn->rank;
658:   PetscViewerType   vtype;
659:   PetscBool         iascii,isdraw;
660:   PetscViewer       sviewer;
661:   PetscViewerFormat format;

664:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
665:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
666:   if (iascii) {
667:     PetscViewerGetType(viewer,&vtype);
668:     PetscViewerGetFormat(viewer,&format);
669:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
670:       MatInfo info;
671:       MatGetInfo(mat,MAT_LOCAL,&info);
672:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
673:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
674:       PetscViewerFlush(viewer);
675:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
676:       VecScatterView(mdn->Mvctx,viewer);
677:       return(0);
678:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
679:       return(0);
680:     }
681:   } else if (isdraw) {
682:     PetscDraw draw;
683:     PetscBool isnull;

685:     PetscViewerDrawGetDraw(viewer,0,&draw);
686:     PetscDrawIsNull(draw,&isnull);
687:     if (isnull) return(0);
688:   }

690:   if (size == 1) {
691:     MatView(mdn->A,viewer);
692:   } else {
693:     /* assemble the entire matrix onto first processor. */
694:     Mat         A;
695:     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
696:     PetscInt    *cols;
697:     PetscScalar *vals;

699:     MatCreate(PetscObjectComm((PetscObject)mat),&A);
700:     if (!rank) {
701:       MatSetSizes(A,M,N,M,N);
702:     } else {
703:       MatSetSizes(A,0,0,M,N);
704:     }
705:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
706:     MatSetType(A,MATMPIDENSE);
707:     MatMPIDenseSetPreallocation(A,NULL);
708:     PetscLogObjectParent((PetscObject)mat,(PetscObject)A);

710:     /* Copy the matrix ... This isn't the most efficient means,
711:        but it's quick for now */
712:     A->insertmode = INSERT_VALUES;

714:     row = mat->rmap->rstart;
715:     m   = mdn->A->rmap->n;
716:     for (i=0; i<m; i++) {
717:       MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
718:       MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
719:       MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
720:       row++;
721:     }

723:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
724:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
725:     PetscViewerGetSingleton(viewer,&sviewer);
726:     if (!rank) {
727:       PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
728:       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
729:       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
730:       MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
731:     }
732:     PetscViewerRestoreSingleton(viewer,&sviewer);
733:     PetscViewerFlush(viewer);
734:     MatDestroy(&A);
735:   }
736:   return(0);
737: }

741: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
742: {
744:   PetscBool      iascii,isbinary,isdraw,issocket;

747:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
748:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
749:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
750:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);

752:   if (iascii || issocket || isdraw) {
753:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
754:   } else if (isbinary) {
755:     MatView_MPIDense_Binary(mat,viewer);
756:   }
757:   return(0);
758: }

762: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
763: {
764:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
765:   Mat            mdn  = mat->A;
767:   PetscReal      isend[5],irecv[5];

770:   info->block_size = 1.0;

772:   MatGetInfo(mdn,MAT_LOCAL,info);

774:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
775:   isend[3] = info->memory;  isend[4] = info->mallocs;
776:   if (flag == MAT_LOCAL) {
777:     info->nz_used      = isend[0];
778:     info->nz_allocated = isend[1];
779:     info->nz_unneeded  = isend[2];
780:     info->memory       = isend[3];
781:     info->mallocs      = isend[4];
782:   } else if (flag == MAT_GLOBAL_MAX) {
783:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));

785:     info->nz_used      = irecv[0];
786:     info->nz_allocated = irecv[1];
787:     info->nz_unneeded  = irecv[2];
788:     info->memory       = irecv[3];
789:     info->mallocs      = irecv[4];
790:   } else if (flag == MAT_GLOBAL_SUM) {
791:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));

793:     info->nz_used      = irecv[0];
794:     info->nz_allocated = irecv[1];
795:     info->nz_unneeded  = irecv[2];
796:     info->memory       = irecv[3];
797:     info->mallocs      = irecv[4];
798:   }
799:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
800:   info->fill_ratio_needed = 0;
801:   info->factor_mallocs    = 0;
802:   return(0);
803: }

807: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
808: {
809:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

813:   switch (op) {
814:   case MAT_NEW_NONZERO_LOCATIONS:
815:   case MAT_NEW_NONZERO_LOCATION_ERR:
816:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
817:     MatSetOption(a->A,op,flg);
818:     break;
819:   case MAT_ROW_ORIENTED:
820:     a->roworiented = flg;

822:     MatSetOption(a->A,op,flg);
823:     break;
824:   case MAT_NEW_DIAGONALS:
825:   case MAT_KEEP_NONZERO_PATTERN:
826:   case MAT_USE_HASH_TABLE:
827:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
828:     break;
829:   case MAT_IGNORE_OFF_PROC_ENTRIES:
830:     a->donotstash = flg;
831:     break;
832:   case MAT_SYMMETRIC:
833:   case MAT_STRUCTURALLY_SYMMETRIC:
834:   case MAT_HERMITIAN:
835:   case MAT_SYMMETRY_ETERNAL:
836:   case MAT_IGNORE_LOWER_TRIANGULAR:
837:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
838:     break;
839:   default:
840:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
841:   }
842:   return(0);
843: }


848: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
849: {
850:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
851:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
852:   PetscScalar    *l,*r,x,*v;
854:   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;

857:   MatGetLocalSize(A,&s2,&s3);
858:   if (ll) {
859:     VecGetLocalSize(ll,&s2a);
860:     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
861:     VecGetArray(ll,&l);
862:     for (i=0; i<m; i++) {
863:       x = l[i];
864:       v = mat->v + i;
865:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
866:     }
867:     VecRestoreArray(ll,&l);
868:     PetscLogFlops(n*m);
869:   }
870:   if (rr) {
871:     VecGetLocalSize(rr,&s3a);
872:     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
873:     VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
874:     VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
875:     VecGetArray(mdn->lvec,&r);
876:     for (i=0; i<n; i++) {
877:       x = r[i];
878:       v = mat->v + i*m;
879:       for (j=0; j<m; j++) (*v++) *= x;
880:     }
881:     VecRestoreArray(mdn->lvec,&r);
882:     PetscLogFlops(n*m);
883:   }
884:   return(0);
885: }

889: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
890: {
891:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
892:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
894:   PetscInt       i,j;
895:   PetscReal      sum = 0.0;
896:   PetscScalar    *v  = mat->v;

899:   if (mdn->size == 1) {
900:      MatNorm(mdn->A,type,nrm);
901:   } else {
902:     if (type == NORM_FROBENIUS) {
903:       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
904:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
905:       }
906:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
907:       *nrm = PetscSqrtReal(*nrm);
908:       PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
909:     } else if (type == NORM_1) {
910:       PetscReal *tmp,*tmp2;
911:       PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
912:       PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
913:       PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
914:       *nrm = 0.0;
915:       v    = mat->v;
916:       for (j=0; j<mdn->A->cmap->n; j++) {
917:         for (i=0; i<mdn->A->rmap->n; i++) {
918:           tmp[j] += PetscAbsScalar(*v);  v++;
919:         }
920:       }
921:       MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
922:       for (j=0; j<A->cmap->N; j++) {
923:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
924:       }
925:       PetscFree2(tmp,tmp);
926:       PetscLogFlops(A->cmap->n*A->rmap->n);
927:     } else if (type == NORM_INFINITY) { /* max row norm */
928:       PetscReal ntemp;
929:       MatNorm(mdn->A,type,&ntemp);
930:       MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
931:     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
932:   }
933:   return(0);
934: }

938: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
939: {
940:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
941:   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
942:   Mat            B;
943:   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
945:   PetscInt       j,i;
946:   PetscScalar    *v;

949:   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
950:   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
951:     MatCreate(PetscObjectComm((PetscObject)A),&B);
952:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
953:     MatSetType(B,((PetscObject)A)->type_name);
954:     MatMPIDenseSetPreallocation(B,NULL);
955:   } else {
956:     B = *matout;
957:   }

959:   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
960:   PetscMalloc1(m,&rwork);
961:   for (i=0; i<m; i++) rwork[i] = rstart + i;
962:   for (j=0; j<n; j++) {
963:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
964:     v   += m;
965:   }
966:   PetscFree(rwork);
967:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
968:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
969:   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
970:     *matout = B;
971:   } else {
972:     MatHeaderMerge(A,B);
973:   }
974:   return(0);
975: }


978: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
979: extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);

983: PetscErrorCode MatSetUp_MPIDense(Mat A)
984: {

988:    MatMPIDenseSetPreallocation(A,0);
989:   return(0);
990: }

994: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
995: {
997:   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;

1000:   MatAXPY(A->A,alpha,B->A,str);
1001:   return(0);
1002: }

1006: PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1007: {
1008:   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;

1012:   MatConjugate(a->A);
1013:   return(0);
1014: }

1018: PetscErrorCode MatRealPart_MPIDense(Mat A)
1019: {
1020:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1024:   MatRealPart(a->A);
1025:   return(0);
1026: }

1030: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1031: {
1032:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1036:   MatImaginaryPart(a->A);
1037:   return(0);
1038: }

1040: extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1043: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1044: {
1046:   PetscInt       i,n;
1047:   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1048:   PetscReal      *work;

1051:   MatGetSize(A,NULL,&n);
1052:   PetscMalloc1(n,&work);
1053:   MatGetColumnNorms_SeqDense(a->A,type,work);
1054:   if (type == NORM_2) {
1055:     for (i=0; i<n; i++) work[i] *= work[i];
1056:   }
1057:   if (type == NORM_INFINITY) {
1058:     MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1059:   } else {
1060:     MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1061:   }
1062:   PetscFree(work);
1063:   if (type == NORM_2) {
1064:     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1065:   }
1066:   return(0);
1067: }

1071: static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1072: {
1073:   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1075:   PetscScalar    *a;
1076:   PetscInt       m,n,i;

1079:   MatGetSize(d->A,&m,&n);
1080:   MatDenseGetArray(d->A,&a);
1081:   for (i=0; i<m*n; i++) {
1082:     PetscRandomGetValue(rctx,a+i);
1083:   }
1084:   MatDenseRestoreArray(d->A,&a);
1085:   return(0);
1086: }

1088: /* -------------------------------------------------------------------*/
1089: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1090:                                         MatGetRow_MPIDense,
1091:                                         MatRestoreRow_MPIDense,
1092:                                         MatMult_MPIDense,
1093:                                 /*  4*/ MatMultAdd_MPIDense,
1094:                                         MatMultTranspose_MPIDense,
1095:                                         MatMultTransposeAdd_MPIDense,
1096:                                         0,
1097:                                         0,
1098:                                         0,
1099:                                 /* 10*/ 0,
1100:                                         0,
1101:                                         0,
1102:                                         0,
1103:                                         MatTranspose_MPIDense,
1104:                                 /* 15*/ MatGetInfo_MPIDense,
1105:                                         MatEqual_MPIDense,
1106:                                         MatGetDiagonal_MPIDense,
1107:                                         MatDiagonalScale_MPIDense,
1108:                                         MatNorm_MPIDense,
1109:                                 /* 20*/ MatAssemblyBegin_MPIDense,
1110:                                         MatAssemblyEnd_MPIDense,
1111:                                         MatSetOption_MPIDense,
1112:                                         MatZeroEntries_MPIDense,
1113:                                 /* 24*/ MatZeroRows_MPIDense,
1114:                                         0,
1115:                                         0,
1116:                                         0,
1117:                                         0,
1118:                                 /* 29*/ MatSetUp_MPIDense,
1119:                                         0,
1120:                                         0,
1121:                                         0,
1122:                                         0,
1123:                                 /* 34*/ MatDuplicate_MPIDense,
1124:                                         0,
1125:                                         0,
1126:                                         0,
1127:                                         0,
1128:                                 /* 39*/ MatAXPY_MPIDense,
1129:                                         MatGetSubMatrices_MPIDense,
1130:                                         0,
1131:                                         MatGetValues_MPIDense,
1132:                                         0,
1133:                                 /* 44*/ 0,
1134:                                         MatScale_MPIDense,
1135:                                         0,
1136:                                         0,
1137:                                         0,
1138:                                 /* 49*/ MatSetRandom_MPIDense,
1139:                                         0,
1140:                                         0,
1141:                                         0,
1142:                                         0,
1143:                                 /* 54*/ 0,
1144:                                         0,
1145:                                         0,
1146:                                         0,
1147:                                         0,
1148:                                 /* 59*/ MatGetSubMatrix_MPIDense,
1149:                                         MatDestroy_MPIDense,
1150:                                         MatView_MPIDense,
1151:                                         0,
1152:                                         0,
1153:                                 /* 64*/ 0,
1154:                                         0,
1155:                                         0,
1156:                                         0,
1157:                                         0,
1158:                                 /* 69*/ 0,
1159:                                         0,
1160:                                         0,
1161:                                         0,
1162:                                         0,
1163:                                 /* 74*/ 0,
1164:                                         0,
1165:                                         0,
1166:                                         0,
1167:                                         0,
1168:                                 /* 79*/ 0,
1169:                                         0,
1170:                                         0,
1171:                                         0,
1172:                                 /* 83*/ MatLoad_MPIDense,
1173:                                         0,
1174:                                         0,
1175:                                         0,
1176:                                         0,
1177:                                         0,
1178:                                 /* 89*/
1179:                                         0,
1180:                                         0,
1181:                                         0,
1182:                                         0,
1183:                                         0,
1184:                                 /* 94*/ 0,
1185:                                         0,
1186:                                         0,
1187:                                         0,
1188:                                         0,
1189:                                 /* 99*/ 0,
1190:                                         0,
1191:                                         0,
1192:                                         MatConjugate_MPIDense,
1193:                                         0,
1194:                                 /*104*/ 0,
1195:                                         MatRealPart_MPIDense,
1196:                                         MatImaginaryPart_MPIDense,
1197:                                         0,
1198:                                         0,
1199:                                 /*109*/ 0,
1200:                                         0,
1201:                                         0,
1202:                                         0,
1203:                                         0,
1204:                                 /*114*/ 0,
1205:                                         0,
1206:                                         0,
1207:                                         0,
1208:                                         0,
1209:                                 /*119*/ 0,
1210:                                         0,
1211:                                         0,
1212:                                         0,
1213:                                         0,
1214:                                 /*124*/ 0,
1215:                                         MatGetColumnNorms_MPIDense,
1216:                                         0,
1217:                                         0,
1218:                                         0,
1219:                                 /*129*/ 0,
1220:                                         0,
1221:                                         0,
1222:                                         0,
1223:                                         0,
1224:                                 /*134*/ 0,
1225:                                         0,
1226:                                         0,
1227:                                         0,
1228:                                         0,
1229:                                 /*139*/ 0,
1230:                                         0,
1231:                                         0
1232: };

1236: PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1237: {
1238:   Mat_MPIDense   *a;

1242:   mat->preallocated = PETSC_TRUE;
1243:   /* Note:  For now, when data is specified above, this assumes the user correctly
1244:    allocates the local dense storage space.  We should add error checking. */

1246:   a       = (Mat_MPIDense*)mat->data;
1247:   PetscLayoutSetUp(mat->rmap);
1248:   PetscLayoutSetUp(mat->cmap);
1249:   a->nvec = mat->cmap->n;

1251:   MatCreate(PETSC_COMM_SELF,&a->A);
1252:   MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1253:   MatSetType(a->A,MATSEQDENSE);
1254:   MatSeqDenseSetPreallocation(a->A,data);
1255:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1256:   return(0);
1257: }

1261: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1262: {
1263:   Mat_MPIDense   *a;

1267:   PetscNewLog(mat,&a);
1268:   mat->data = (void*)a;
1269:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));

1271:   mat->insertmode = NOT_SET_VALUES;
1272:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1273:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);

1275:   /* build cache for off array entries formed */
1276:   a->donotstash = PETSC_FALSE;

1278:   MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);

1280:   /* stuff used for matrix vector multiply */
1281:   a->lvec        = 0;
1282:   a->Mvctx       = 0;
1283:   a->roworiented = PETSC_TRUE;

1285:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1286:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);

1288:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);
1289:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1290:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);
1291:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1292:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);

1294:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);
1295:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1296:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1297:   PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1298:   return(0);
1299: }

1301: /*MC
1302:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1304:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1305:    and MATMPIDENSE otherwise.

1307:    Options Database Keys:
1308: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1310:   Level: beginner


1313: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1314: M*/

1318: /*@C
1319:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1321:    Not collective

1323:    Input Parameters:
1324: .  A - the matrix
1325: -  data - optional location of matrix data.  Set data=NULL for PETSc
1326:    to control all matrix memory allocation.

1328:    Notes:
1329:    The dense format is fully compatible with standard Fortran 77
1330:    storage by columns.

1332:    The data input variable is intended primarily for Fortran programmers
1333:    who wish to allocate their own matrix memory space.  Most users should
1334:    set data=NULL.

1336:    Level: intermediate

1338: .keywords: matrix,dense, parallel

1340: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1341: @*/
1342: PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1343: {

1347:   PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));
1348:   return(0);
1349: }

1353: /*@C
1354:    MatCreateDense - Creates a parallel matrix in dense format.

1356:    Collective on MPI_Comm

1358:    Input Parameters:
1359: +  comm - MPI communicator
1360: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1361: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1362: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1363: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1364: -  data - optional location of matrix data.  Set data=NULL (NULL_SCALAR for Fortran users) for PETSc
1365:    to control all matrix memory allocation.

1367:    Output Parameter:
1368: .  A - the matrix

1370:    Notes:
1371:    The dense format is fully compatible with standard Fortran 77
1372:    storage by columns.

1374:    The data input variable is intended primarily for Fortran programmers
1375:    who wish to allocate their own matrix memory space.  Most users should
1376:    set data=NULL (NULL_SCALAR for Fortran users).

1378:    The user MUST specify either the local or global matrix dimensions
1379:    (possibly both).

1381:    Level: intermediate

1383: .keywords: matrix,dense, parallel

1385: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1386: @*/
1387: PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1388: {
1390:   PetscMPIInt    size;

1393:   MatCreate(comm,A);
1394:   MatSetSizes(*A,m,n,M,N);
1395:   MPI_Comm_size(comm,&size);
1396:   if (size > 1) {
1397:     MatSetType(*A,MATMPIDENSE);
1398:     MatMPIDenseSetPreallocation(*A,data);
1399:   } else {
1400:     MatSetType(*A,MATSEQDENSE);
1401:     MatSeqDenseSetPreallocation(*A,data);
1402:   }
1403:   return(0);
1404: }

1408: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1409: {
1410:   Mat            mat;
1411:   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;

1415:   *newmat = 0;
1416:   MatCreate(PetscObjectComm((PetscObject)A),&mat);
1417:   MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1418:   MatSetType(mat,((PetscObject)A)->type_name);
1419:   a       = (Mat_MPIDense*)mat->data;
1420:   PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));

1422:   mat->factortype   = A->factortype;
1423:   mat->assembled    = PETSC_TRUE;
1424:   mat->preallocated = PETSC_TRUE;

1426:   a->size         = oldmat->size;
1427:   a->rank         = oldmat->rank;
1428:   mat->insertmode = NOT_SET_VALUES;
1429:   a->nvec         = oldmat->nvec;
1430:   a->donotstash   = oldmat->donotstash;

1432:   PetscLayoutReference(A->rmap,&mat->rmap);
1433:   PetscLayoutReference(A->cmap,&mat->cmap);

1435:   MatSetUpMultiply_MPIDense(mat);
1436:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1437:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);

1439:   *newmat = mat;
1440:   return(0);
1441: }

1445: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1446: {
1448:   PetscMPIInt    rank,size;
1449:   PetscInt       *rowners,i,m,nz,j;
1450:   PetscScalar    *array,*vals,*vals_ptr;

1453:   MPI_Comm_rank(comm,&rank);
1454:   MPI_Comm_size(comm,&size);

1456:   /* determine ownership of all rows */
1457:   if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1458:   else m = newmat->rmap->n;
1459:   PetscMalloc1((size+2),&rowners);
1460:   MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1461:   rowners[0] = 0;
1462:   for (i=2; i<=size; i++) {
1463:     rowners[i] += rowners[i-1];
1464:   }

1466:   if (!sizesset) {
1467:     MatSetSizes(newmat,m,PETSC_DECIDE,M,N);
1468:   }
1469:   MatMPIDenseSetPreallocation(newmat,NULL);
1470:   MatDenseGetArray(newmat,&array);

1472:   if (!rank) {
1473:     PetscMalloc1(m*N,&vals);

1475:     /* read in my part of the matrix numerical values  */
1476:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);

1478:     /* insert into matrix-by row (this is why cannot directly read into array */
1479:     vals_ptr = vals;
1480:     for (i=0; i<m; i++) {
1481:       for (j=0; j<N; j++) {
1482:         array[i + j*m] = *vals_ptr++;
1483:       }
1484:     }

1486:     /* read in other processors and ship out */
1487:     for (i=1; i<size; i++) {
1488:       nz   = (rowners[i+1] - rowners[i])*N;
1489:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1490:       MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1491:     }
1492:   } else {
1493:     /* receive numeric values */
1494:     PetscMalloc1(m*N,&vals);

1496:     /* receive message of values*/
1497:     MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);

1499:     /* insert into matrix-by row (this is why cannot directly read into array */
1500:     vals_ptr = vals;
1501:     for (i=0; i<m; i++) {
1502:       for (j=0; j<N; j++) {
1503:         array[i + j*m] = *vals_ptr++;
1504:       }
1505:     }
1506:   }
1507:   MatDenseRestoreArray(newmat,&array);
1508:   PetscFree(rowners);
1509:   PetscFree(vals);
1510:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1511:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1512:   return(0);
1513: }

1517: PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1518: {
1519:   PetscScalar    *vals,*svals;
1520:   MPI_Comm       comm;
1521:   MPI_Status     status;
1522:   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1523:   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1524:   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1525:   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1526:   int            fd;

1530:   PetscObjectGetComm((PetscObject)viewer,&comm);
1531:   MPI_Comm_size(comm,&size);
1532:   MPI_Comm_rank(comm,&rank);
1533:   if (!rank) {
1534:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1535:     PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
1536:     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1537:   }
1538:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;

1540:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1541:   M    = header[1]; N = header[2]; nz = header[3];

1543:   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1544:   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1545:   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;

1547:   /* If global sizes are set, check if they are consistent with that given in the file */
1548:   if (sizesset) {
1549:     MatGetSize(newmat,&grows,&gcols);
1550:   }
1551:   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1552:   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);

1554:   /*
1555:        Handle case where matrix is stored on disk as a dense matrix
1556:   */
1557:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1558:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);
1559:     return(0);
1560:   }

1562:   /* determine ownership of all rows */
1563:   if (newmat->rmap->n < 0) {
1564:     PetscMPIIntCast(M/size + ((M % size) > rank),&m);
1565:   } else {
1566:     PetscMPIIntCast(newmat->rmap->n,&m);
1567:   }
1568:   PetscMalloc1((size+2),&rowners);
1569:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1570:   rowners[0] = 0;
1571:   for (i=2; i<=size; i++) {
1572:     rowners[i] += rowners[i-1];
1573:   }
1574:   rstart = rowners[rank];
1575:   rend   = rowners[rank+1];

1577:   /* distribute row lengths to all processors */
1578:   PetscMalloc2(rend-rstart,&ourlens,rend-rstart,&offlens);
1579:   if (!rank) {
1580:     PetscMalloc1(M,&rowlengths);
1581:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1582:     PetscMalloc1(size,&sndcounts);
1583:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1584:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1585:     PetscFree(sndcounts);
1586:   } else {
1587:     MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1588:   }

1590:   if (!rank) {
1591:     /* calculate the number of nonzeros on each processor */
1592:     PetscMalloc1(size,&procsnz);
1593:     PetscMemzero(procsnz,size*sizeof(PetscInt));
1594:     for (i=0; i<size; i++) {
1595:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1596:         procsnz[i] += rowlengths[j];
1597:       }
1598:     }
1599:     PetscFree(rowlengths);

1601:     /* determine max buffer needed and allocate it */
1602:     maxnz = 0;
1603:     for (i=0; i<size; i++) {
1604:       maxnz = PetscMax(maxnz,procsnz[i]);
1605:     }
1606:     PetscMalloc1(maxnz,&cols);

1608:     /* read in my part of the matrix column indices  */
1609:     nz   = procsnz[0];
1610:     PetscMalloc1(nz,&mycols);
1611:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1613:     /* read in every one elses and ship off */
1614:     for (i=1; i<size; i++) {
1615:       nz   = procsnz[i];
1616:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1617:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1618:     }
1619:     PetscFree(cols);
1620:   } else {
1621:     /* determine buffer space needed for message */
1622:     nz = 0;
1623:     for (i=0; i<m; i++) {
1624:       nz += ourlens[i];
1625:     }
1626:     PetscMalloc1((nz+1),&mycols);

1628:     /* receive message of column indices*/
1629:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1630:     MPI_Get_count(&status,MPIU_INT,&maxnz);
1631:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1632:   }

1634:   /* loop over local rows, determining number of off diagonal entries */
1635:   PetscMemzero(offlens,m*sizeof(PetscInt));
1636:   jj   = 0;
1637:   for (i=0; i<m; i++) {
1638:     for (j=0; j<ourlens[i]; j++) {
1639:       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1640:       jj++;
1641:     }
1642:   }

1644:   /* create our matrix */
1645:   for (i=0; i<m; i++) ourlens[i] -= offlens[i];

1647:   if (!sizesset) {
1648:     MatSetSizes(newmat,m,PETSC_DECIDE,M,N);
1649:   }
1650:   MatMPIDenseSetPreallocation(newmat,NULL);
1651:   for (i=0; i<m; i++) ourlens[i] += offlens[i];

1653:   if (!rank) {
1654:     PetscMalloc1(maxnz,&vals);

1656:     /* read in my part of the matrix numerical values  */
1657:     nz   = procsnz[0];
1658:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);

1660:     /* insert into matrix */
1661:     jj      = rstart;
1662:     smycols = mycols;
1663:     svals   = vals;
1664:     for (i=0; i<m; i++) {
1665:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1666:       smycols += ourlens[i];
1667:       svals   += ourlens[i];
1668:       jj++;
1669:     }

1671:     /* read in other processors and ship out */
1672:     for (i=1; i<size; i++) {
1673:       nz   = procsnz[i];
1674:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1675:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
1676:     }
1677:     PetscFree(procsnz);
1678:   } else {
1679:     /* receive numeric values */
1680:     PetscMalloc1((nz+1),&vals);

1682:     /* receive message of values*/
1683:     MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
1684:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1685:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1687:     /* insert into matrix */
1688:     jj      = rstart;
1689:     smycols = mycols;
1690:     svals   = vals;
1691:     for (i=0; i<m; i++) {
1692:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1693:       smycols += ourlens[i];
1694:       svals   += ourlens[i];
1695:       jj++;
1696:     }
1697:   }
1698:   PetscFree2(ourlens,offlens);
1699:   PetscFree(vals);
1700:   PetscFree(mycols);
1701:   PetscFree(rowners);

1703:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1704:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1705:   return(0);
1706: }

1710: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1711: {
1712:   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1713:   Mat            a,b;
1714:   PetscBool      flg;

1718:   a    = matA->A;
1719:   b    = matB->A;
1720:   MatEqual(a,b,&flg);
1721:   MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1722:   return(0);
1723: }