Actual source code: mpidense.c
1: /*
2: Basic functions for basic parallel dense matrices.
3: */
4:
5: #include src/mat/impls/dense/mpi/mpidense.h
9: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
10: {
11: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
13: PetscInt lrow,rstart = mat->rstart,rend = mat->rend;
16: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
17: lrow = row - rstart;
18: MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);
19: return(0);
20: }
24: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
25: {
29: if (idx) {PetscFree(*idx);}
30: if (v) {PetscFree(*v);}
31: return(0);
32: }
37: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
38: {
39: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
41: PetscInt m = A->m,rstart = mdn->rstart;
42: PetscScalar *array;
43: MPI_Comm comm;
46: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
48: /* The reuse aspect is not implemented efficiently */
49: if (reuse) { MatDestroy(*B);}
51: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
52: MatGetArray(mdn->A,&array);
53: MatCreate(comm,m,m,m,m,B);
54: MatSetType(*B,mdn->A->type_name);
55: MatSeqDenseSetPreallocation(*B,array+m*rstart);
56: MatRestoreArray(mdn->A,&array);
57: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
58: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
59:
60: *iscopy = PETSC_TRUE;
61: return(0);
62: }
67: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
68: {
69: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
71: PetscInt i,j,rstart = A->rstart,rend = A->rend,row;
72: PetscTruth roworiented = A->roworiented;
75: for (i=0; i<m; i++) {
76: if (idxm[i] < 0) continue;
77: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
78: if (idxm[i] >= rstart && idxm[i] < rend) {
79: row = idxm[i] - rstart;
80: if (roworiented) {
81: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
82: } else {
83: for (j=0; j<n; j++) {
84: if (idxn[j] < 0) continue;
85: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
86: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
87: }
88: }
89: } else {
90: if (!A->donotstash) {
91: if (roworiented) {
92: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
93: } else {
94: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
95: }
96: }
97: }
98: }
99: return(0);
100: }
104: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
105: {
106: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
108: PetscInt i,j,rstart = mdn->rstart,rend = mdn->rend,row;
111: for (i=0; i<m; i++) {
112: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
113: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
114: if (idxm[i] >= rstart && idxm[i] < rend) {
115: row = idxm[i] - rstart;
116: for (j=0; j<n; j++) {
117: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
118: if (idxn[j] >= mat->N) {
119: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120: }
121: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
122: }
123: } else {
124: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
125: }
126: }
127: return(0);
128: }
132: PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
133: {
134: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
138: MatGetArray(a->A,array);
139: return(0);
140: }
144: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
145: {
146: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
147: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
149: PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
150: PetscScalar *av,*bv,*v = lmat->v;
151: Mat newmat;
154: ISGetIndices(isrow,&irow);
155: ISGetIndices(iscol,&icol);
156: ISGetLocalSize(isrow,&nrows);
157: ISGetLocalSize(iscol,&ncols);
159: /* No parallel redistribution currently supported! Should really check each index set
160: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
161: original matrix! */
163: MatGetLocalSize(A,&nlrows,&nlcols);
164: MatGetOwnershipRange(A,&rstart,&rend);
165:
166: /* Check submatrix call */
167: if (scall == MAT_REUSE_MATRIX) {
168: /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
169: /* Really need to test rows and column sizes! */
170: newmat = *B;
171: } else {
172: /* Create and fill new matrix */
173: MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);
174: MatSetType(newmat,A->type_name);
175: MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
176: }
178: /* Now extract the data pointers and do the copy, column at a time */
179: newmatd = (Mat_MPIDense*)newmat->data;
180: bv = ((Mat_SeqDense *)newmatd->A->data)->v;
181:
182: for (i=0; i<ncols; i++) {
183: av = v + nlrows*icol[i];
184: for (j=0; j<nrows; j++) {
185: *bv++ = av[irow[j] - rstart];
186: }
187: }
189: /* Assemble the matrices so that the correct flags are set */
190: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
191: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
193: /* Free work space */
194: ISRestoreIndices(isrow,&irow);
195: ISRestoreIndices(iscol,&icol);
196: *B = newmat;
197: return(0);
198: }
202: PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
203: {
205: return(0);
206: }
210: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
211: {
212: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
213: MPI_Comm comm = mat->comm;
215: PetscInt nstash,reallocs;
216: InsertMode addv;
219: /* make sure all processors are either in INSERTMODE or ADDMODE */
220: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
221: if (addv == (ADD_VALUES|INSERT_VALUES)) {
222: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
223: }
224: mat->insertmode = addv; /* in case this processor had no cache */
226: MatStashScatterBegin_Private(&mat->stash,mdn->rowners);
227: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
228: PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
229: return(0);
230: }
234: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
235: {
236: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
237: PetscErrorCode ierr;
238: PetscInt i,*row,*col,flg,j,rstart,ncols;
239: PetscMPIInt n;
240: PetscScalar *val;
241: InsertMode addv=mat->insertmode;
244: /* wait on receives */
245: while (1) {
246: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
247: if (!flg) break;
248:
249: for (i=0; i<n;) {
250: /* Now identify the consecutive vals belonging to the same row */
251: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
252: if (j < n) ncols = j-i;
253: else ncols = n-i;
254: /* Now assemble all these values with a single function call */
255: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
256: i = j;
257: }
258: }
259: MatStashScatterEnd_Private(&mat->stash);
260:
261: MatAssemblyBegin(mdn->A,mode);
262: MatAssemblyEnd(mdn->A,mode);
264: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
265: MatSetUpMultiply_MPIDense(mat);
266: }
267: return(0);
268: }
272: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
273: {
275: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
278: MatZeroEntries(l->A);
279: return(0);
280: }
282: /* the code does not do the diagonal entries correctly unless the
283: matrix is square and the column and row owerships are identical.
284: This is a BUG. The only way to fix it seems to be to access
285: mdn->A and mdn->B directly and not through the MatZeroRows()
286: routine.
287: */
290: PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
291: {
292: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
294: PetscInt i,N,*rows,*owners = l->rowners;
295: PetscInt *nprocs,j,idx,nsends;
296: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
297: PetscInt *rvalues,tag = A->tag,count,base,slen,*source;
298: PetscInt *lens,*lrows,*values;
299: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
300: MPI_Comm comm = A->comm;
301: MPI_Request *send_waits,*recv_waits;
302: MPI_Status recv_status,*send_status;
303: IS istmp;
304: PetscTruth found;
307: ISGetLocalSize(is,&N);
308: ISGetIndices(is,&rows);
310: /* first count number of contributors to each processor */
311: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
312: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
313: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
314: for (i=0; i<N; i++) {
315: idx = rows[i];
316: found = PETSC_FALSE;
317: for (j=0; j<size; j++) {
318: if (idx >= owners[j] && idx < owners[j+1]) {
319: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
320: }
321: }
322: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
323: }
324: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
326: /* inform other processors of number of messages and max length*/
327: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
329: /* post receives: */
330: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
331: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
332: for (i=0; i<nrecvs; i++) {
333: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
334: }
336: /* do sends:
337: 1) starts[i] gives the starting index in svalues for stuff going to
338: the ith processor
339: */
340: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
341: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
342: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
343: starts[0] = 0;
344: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
345: for (i=0; i<N; i++) {
346: svalues[starts[owner[i]]++] = rows[i];
347: }
348: ISRestoreIndices(is,&rows);
350: starts[0] = 0;
351: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
352: count = 0;
353: for (i=0; i<size; i++) {
354: if (nprocs[2*i+1]) {
355: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
356: }
357: }
358: PetscFree(starts);
360: base = owners[rank];
362: /* wait on receives */
363: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
364: source = lens + nrecvs;
365: count = nrecvs; slen = 0;
366: while (count) {
367: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
368: /* unpack receives into our local space */
369: MPI_Get_count(&recv_status,MPIU_INT,&n);
370: source[imdex] = recv_status.MPI_SOURCE;
371: lens[imdex] = n;
372: slen += n;
373: count--;
374: }
375: PetscFree(recv_waits);
376:
377: /* move the data into the send scatter */
378: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
379: count = 0;
380: for (i=0; i<nrecvs; i++) {
381: values = rvalues + i*nmax;
382: for (j=0; j<lens[i]; j++) {
383: lrows[count++] = values[j] - base;
384: }
385: }
386: PetscFree(rvalues);
387: PetscFree(lens);
388: PetscFree(owner);
389: PetscFree(nprocs);
390:
391: /* actually zap the local rows */
392: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
393: PetscLogObjectParent(A,istmp);
394: PetscFree(lrows);
395: MatZeroRows(l->A,istmp,diag);
396: ISDestroy(istmp);
398: /* wait on sends */
399: if (nsends) {
400: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
401: MPI_Waitall(nsends,send_waits,send_status);
402: PetscFree(send_status);
403: }
404: PetscFree(send_waits);
405: PetscFree(svalues);
407: return(0);
408: }
412: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
413: {
414: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
418: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
419: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
420: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
421: return(0);
422: }
426: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
427: {
428: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
432: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
433: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
434: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
435: return(0);
436: }
440: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
441: {
442: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
444: PetscScalar zero = 0.0;
447: VecSet(&zero,yy);
448: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
449: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
450: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
451: return(0);
452: }
456: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
457: {
458: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
462: VecCopy(yy,zz);
463: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
464: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
465: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
466: return(0);
467: }
471: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
472: {
473: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
474: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
476: PetscInt len,i,n,m = A->m,radd;
477: PetscScalar *x,zero = 0.0;
478:
480: VecSet(&zero,v);
481: VecGetArray(v,&x);
482: VecGetSize(v,&n);
483: if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
484: len = PetscMin(a->A->m,a->A->n);
485: radd = a->rstart*m;
486: for (i=0; i<len; i++) {
487: x[i] = aloc->v[radd + i*m + i];
488: }
489: VecRestoreArray(v,&x);
490: return(0);
491: }
495: PetscErrorCode MatDestroy_MPIDense(Mat mat)
496: {
497: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
502: #if defined(PETSC_USE_LOG)
503: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
504: #endif
505: MatStashDestroy_Private(&mat->stash);
506: PetscFree(mdn->rowners);
507: MatDestroy(mdn->A);
508: if (mdn->lvec) VecDestroy(mdn->lvec);
509: if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx);
510: if (mdn->factor) {
511: if (mdn->factor->temp) {PetscFree(mdn->factor->temp);}
512: if (mdn->factor->tag) {PetscFree(mdn->factor->tag);}
513: if (mdn->factor->pivots) {PetscFree(mdn->factor->pivots);}
514: PetscFree(mdn->factor);
515: }
516: PetscFree(mdn);
517: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
518: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);
519: return(0);
520: }
524: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
525: {
526: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
530: if (mdn->size == 1) {
531: MatView(mdn->A,viewer);
532: }
533: else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
534: return(0);
535: }
539: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
540: {
541: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
542: PetscErrorCode ierr;
543: PetscMPIInt size = mdn->size,rank = mdn->rank;
544: PetscViewerType vtype;
545: PetscTruth iascii,isdraw;
546: PetscViewer sviewer;
547: PetscViewerFormat format;
550: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
551: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
552: if (iascii) {
553: PetscViewerGetType(viewer,&vtype);
554: PetscViewerGetFormat(viewer,&format);
555: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
556: MatInfo info;
557: MatGetInfo(mat,MAT_LOCAL,&info);
558: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
559: (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
560: PetscViewerFlush(viewer);
561: VecScatterView(mdn->Mvctx,viewer);
562: return(0);
563: } else if (format == PETSC_VIEWER_ASCII_INFO) {
564: return(0);
565: }
566: } else if (isdraw) {
567: PetscDraw draw;
568: PetscTruth isnull;
570: PetscViewerDrawGetDraw(viewer,0,&draw);
571: PetscDrawIsNull(draw,&isnull);
572: if (isnull) return(0);
573: }
575: if (size == 1) {
576: MatView(mdn->A,viewer);
577: } else {
578: /* assemble the entire matrix onto first processor. */
579: Mat A;
580: PetscInt M = mat->M,N = mat->N,m,row,i,nz;
581: PetscInt *cols;
582: PetscScalar *vals;
584: if (!rank) {
585: MatCreate(mat->comm,M,N,M,N,&A);
586: } else {
587: MatCreate(mat->comm,0,0,M,N,&A);
588: }
589: /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
590: MatSetType(A,MATMPIDENSE);
591: MatMPIDenseSetPreallocation(A,PETSC_NULL);
592: PetscLogObjectParent(mat,A);
594: /* Copy the matrix ... This isn't the most efficient means,
595: but it's quick for now */
596: A->insertmode = INSERT_VALUES;
597: row = mdn->rstart; m = mdn->A->m;
598: for (i=0; i<m; i++) {
599: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
600: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
601: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
602: row++;
603: }
605: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
606: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
607: PetscViewerGetSingleton(viewer,&sviewer);
608: if (!rank) {
609: MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
610: }
611: PetscViewerRestoreSingleton(viewer,&sviewer);
612: PetscViewerFlush(viewer);
613: MatDestroy(A);
614: }
615: return(0);
616: }
620: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
621: {
623: PetscTruth iascii,isbinary,isdraw,issocket;
624:
626:
627: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
628: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
629: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
630: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
632: if (iascii || issocket || isdraw) {
633: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
634: } else if (isbinary) {
635: MatView_MPIDense_Binary(mat,viewer);
636: } else {
637: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
638: }
639: return(0);
640: }
644: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
645: {
646: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
647: Mat mdn = mat->A;
649: PetscReal isend[5],irecv[5];
652: info->rows_global = (double)A->M;
653: info->columns_global = (double)A->N;
654: info->rows_local = (double)A->m;
655: info->columns_local = (double)A->N;
656: info->block_size = 1.0;
657: MatGetInfo(mdn,MAT_LOCAL,info);
658: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
659: isend[3] = info->memory; isend[4] = info->mallocs;
660: if (flag == MAT_LOCAL) {
661: info->nz_used = isend[0];
662: info->nz_allocated = isend[1];
663: info->nz_unneeded = isend[2];
664: info->memory = isend[3];
665: info->mallocs = isend[4];
666: } else if (flag == MAT_GLOBAL_MAX) {
667: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);
668: info->nz_used = irecv[0];
669: info->nz_allocated = irecv[1];
670: info->nz_unneeded = irecv[2];
671: info->memory = irecv[3];
672: info->mallocs = irecv[4];
673: } else if (flag == MAT_GLOBAL_SUM) {
674: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);
675: info->nz_used = irecv[0];
676: info->nz_allocated = irecv[1];
677: info->nz_unneeded = irecv[2];
678: info->memory = irecv[3];
679: info->mallocs = irecv[4];
680: }
681: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
682: info->fill_ratio_needed = 0;
683: info->factor_mallocs = 0;
684: return(0);
685: }
689: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
690: {
691: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
695: switch (op) {
696: case MAT_NO_NEW_NONZERO_LOCATIONS:
697: case MAT_YES_NEW_NONZERO_LOCATIONS:
698: case MAT_NEW_NONZERO_LOCATION_ERR:
699: case MAT_NEW_NONZERO_ALLOCATION_ERR:
700: case MAT_COLUMNS_SORTED:
701: case MAT_COLUMNS_UNSORTED:
702: MatSetOption(a->A,op);
703: break;
704: case MAT_ROW_ORIENTED:
705: a->roworiented = PETSC_TRUE;
706: MatSetOption(a->A,op);
707: break;
708: case MAT_ROWS_SORTED:
709: case MAT_ROWS_UNSORTED:
710: case MAT_YES_NEW_DIAGONALS:
711: case MAT_USE_HASH_TABLE:
712: PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
713: break;
714: case MAT_COLUMN_ORIENTED:
715: a->roworiented = PETSC_FALSE;
716: MatSetOption(a->A,op);
717: break;
718: case MAT_IGNORE_OFF_PROC_ENTRIES:
719: a->donotstash = PETSC_TRUE;
720: break;
721: case MAT_NO_NEW_DIAGONALS:
722: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
723: case MAT_SYMMETRIC:
724: case MAT_STRUCTURALLY_SYMMETRIC:
725: case MAT_NOT_SYMMETRIC:
726: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
727: case MAT_HERMITIAN:
728: case MAT_NOT_HERMITIAN:
729: case MAT_SYMMETRY_ETERNAL:
730: case MAT_NOT_SYMMETRY_ETERNAL:
731: break;
732: default:
733: SETERRQ(PETSC_ERR_SUP,"unknown option");
734: }
735: return(0);
736: }
741: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
742: {
743: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
744: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
745: PetscScalar *l,*r,x,*v;
747: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
750: MatGetLocalSize(A,&s2,&s3);
751: if (ll) {
752: VecGetLocalSize(ll,&s2a);
753: if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
754: VecGetArray(ll,&l);
755: for (i=0; i<m; i++) {
756: x = l[i];
757: v = mat->v + i;
758: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
759: }
760: VecRestoreArray(ll,&l);
761: PetscLogFlops(n*m);
762: }
763: if (rr) {
764: VecGetSize(rr,&s3a);
765: if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
766: VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
767: VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
768: VecGetArray(mdn->lvec,&r);
769: for (i=0; i<n; i++) {
770: x = r[i];
771: v = mat->v + i*m;
772: for (j=0; j<m; j++) { (*v++) *= x;}
773: }
774: VecRestoreArray(mdn->lvec,&r);
775: PetscLogFlops(n*m);
776: }
777: return(0);
778: }
782: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
783: {
784: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
785: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
787: PetscInt i,j;
788: PetscReal sum = 0.0;
789: PetscScalar *v = mat->v;
792: if (mdn->size == 1) {
793: MatNorm(mdn->A,type,nrm);
794: } else {
795: if (type == NORM_FROBENIUS) {
796: for (i=0; i<mdn->A->n*mdn->A->m; i++) {
797: #if defined(PETSC_USE_COMPLEX)
798: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
799: #else
800: sum += (*v)*(*v); v++;
801: #endif
802: }
803: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
804: *nrm = sqrt(*nrm);
805: PetscLogFlops(2*mdn->A->n*mdn->A->m);
806: } else if (type == NORM_1) {
807: PetscReal *tmp,*tmp2;
808: PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);
809: tmp2 = tmp + A->N;
810: PetscMemzero(tmp,2*A->N*sizeof(PetscReal));
811: *nrm = 0.0;
812: v = mat->v;
813: for (j=0; j<mdn->A->n; j++) {
814: for (i=0; i<mdn->A->m; i++) {
815: tmp[j] += PetscAbsScalar(*v); v++;
816: }
817: }
818: MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);
819: for (j=0; j<A->N; j++) {
820: if (tmp2[j] > *nrm) *nrm = tmp2[j];
821: }
822: PetscFree(tmp);
823: PetscLogFlops(A->n*A->m);
824: } else if (type == NORM_INFINITY) { /* max row norm */
825: PetscReal ntemp;
826: MatNorm(mdn->A,type,&ntemp);
827: MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
828: } else {
829: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
830: }
831: }
832: return(0);
833: }
837: PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
838: {
839: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
840: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
841: Mat B;
842: PetscInt M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
844: PetscInt j,i;
845: PetscScalar *v;
848: if (!matout && M != N) {
849: SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
850: }
851: MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);
852: MatSetType(B,A->type_name);
853: MatMPIDenseSetPreallocation(B,PETSC_NULL);
855: m = a->A->m; n = a->A->n; v = Aloc->v;
856: PetscMalloc(n*sizeof(PetscInt),&rwork);
857: for (j=0; j<n; j++) {
858: for (i=0; i<m; i++) rwork[i] = rstart + i;
859: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
860: v += m;
861: }
862: PetscFree(rwork);
863: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
864: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
865: if (matout) {
866: *matout = B;
867: } else {
868: MatHeaderCopy(A,B);
869: }
870: return(0);
871: }
873: #include petscblaslapack.h
876: PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
877: {
878: Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
879: Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
880: PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N;
883: BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
884: PetscLogFlops(nz);
885: return(0);
886: }
888: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
892: PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
893: {
897: MatMPIDenseSetPreallocation(A,0);
898: return(0);
899: }
901: /* -------------------------------------------------------------------*/
902: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
903: MatGetRow_MPIDense,
904: MatRestoreRow_MPIDense,
905: MatMult_MPIDense,
906: /* 4*/ MatMultAdd_MPIDense,
907: MatMultTranspose_MPIDense,
908: MatMultTransposeAdd_MPIDense,
909: 0,
910: 0,
911: 0,
912: /*10*/ 0,
913: 0,
914: 0,
915: 0,
916: MatTranspose_MPIDense,
917: /*15*/ MatGetInfo_MPIDense,
918: 0,
919: MatGetDiagonal_MPIDense,
920: MatDiagonalScale_MPIDense,
921: MatNorm_MPIDense,
922: /*20*/ MatAssemblyBegin_MPIDense,
923: MatAssemblyEnd_MPIDense,
924: 0,
925: MatSetOption_MPIDense,
926: MatZeroEntries_MPIDense,
927: /*25*/ MatZeroRows_MPIDense,
928: 0,
929: 0,
930: 0,
931: 0,
932: /*30*/ MatSetUpPreallocation_MPIDense,
933: 0,
934: 0,
935: MatGetArray_MPIDense,
936: MatRestoreArray_MPIDense,
937: /*35*/ MatDuplicate_MPIDense,
938: 0,
939: 0,
940: 0,
941: 0,
942: /*40*/ 0,
943: MatGetSubMatrices_MPIDense,
944: 0,
945: MatGetValues_MPIDense,
946: 0,
947: /*45*/ 0,
948: MatScale_MPIDense,
949: 0,
950: 0,
951: 0,
952: /*50*/ 0,
953: 0,
954: 0,
955: 0,
956: 0,
957: /*55*/ 0,
958: 0,
959: 0,
960: 0,
961: 0,
962: /*60*/ MatGetSubMatrix_MPIDense,
963: MatDestroy_MPIDense,
964: MatView_MPIDense,
965: MatGetPetscMaps_Petsc,
966: 0,
967: /*65*/ 0,
968: 0,
969: 0,
970: 0,
971: 0,
972: /*70*/ 0,
973: 0,
974: 0,
975: 0,
976: 0,
977: /*75*/ 0,
978: 0,
979: 0,
980: 0,
981: 0,
982: /*80*/ 0,
983: 0,
984: 0,
985: 0,
986: /*84*/ MatLoad_MPIDense,
987: 0,
988: 0,
989: 0,
990: 0,
991: 0,
992: /*90*/ 0,
993: 0,
994: 0,
995: 0,
996: 0,
997: /*95*/ 0,
998: 0,
999: 0,
1000: 0};
1005: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1006: {
1007: Mat_MPIDense *a;
1011: mat->preallocated = PETSC_TRUE;
1012: /* Note: For now, when data is specified above, this assumes the user correctly
1013: allocates the local dense storage space. We should add error checking. */
1015: a = (Mat_MPIDense*)mat->data;
1016: MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);
1017: MatSetType(a->A,MATSEQDENSE);
1018: MatSeqDenseSetPreallocation(a->A,data);
1019: PetscLogObjectParent(mat,a->A);
1020: return(0);
1021: }
1024: /*MC
1025: MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1027: Options Database Keys:
1028: . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1030: Level: beginner
1032: .seealso: MatCreateMPIDense
1033: M*/
1038: PetscErrorCode MatCreate_MPIDense(Mat mat)
1039: {
1040: Mat_MPIDense *a;
1042: PetscInt i;
1045: PetscNew(Mat_MPIDense,&a);
1046: mat->data = (void*)a;
1047: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1048: mat->factor = 0;
1049: mat->mapping = 0;
1051: a->factor = 0;
1052: mat->insertmode = NOT_SET_VALUES;
1053: MPI_Comm_rank(mat->comm,&a->rank);
1054: MPI_Comm_size(mat->comm,&a->size);
1056: PetscSplitOwnership(mat->comm,&mat->m,&mat->M);
1057: PetscSplitOwnership(mat->comm,&mat->n,&mat->N);
1058: a->nvec = mat->n;
1060: /* the information in the maps duplicates the information computed below, eventually
1061: we should remove the duplicate information that is not contained in the maps */
1062: PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);
1063: PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);
1065: /* build local table of row and column ownerships */
1066: PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);
1067: a->cowners = a->rowners + a->size + 1;
1068: PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1069: MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);
1070: a->rowners[0] = 0;
1071: for (i=2; i<=a->size; i++) {
1072: a->rowners[i] += a->rowners[i-1];
1073: }
1074: a->rstart = a->rowners[a->rank];
1075: a->rend = a->rowners[a->rank+1];
1076: MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);
1077: a->cowners[0] = 0;
1078: for (i=2; i<=a->size; i++) {
1079: a->cowners[i] += a->cowners[i-1];
1080: }
1082: /* build cache for off array entries formed */
1083: a->donotstash = PETSC_FALSE;
1084: MatStashCreate_Private(mat->comm,1,&mat->stash);
1086: /* stuff used for matrix vector multiply */
1087: a->lvec = 0;
1088: a->Mvctx = 0;
1089: a->roworiented = PETSC_TRUE;
1091: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1092: "MatGetDiagonalBlock_MPIDense",
1093: MatGetDiagonalBlock_MPIDense);
1094: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1095: "MatMPIDenseSetPreallocation_MPIDense",
1096: MatMPIDenseSetPreallocation_MPIDense);
1097: return(0);
1098: }
1101: /*MC
1102: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1104: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1105: and MATMPIDENSE otherwise.
1107: Options Database Keys:
1108: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1110: Level: beginner
1112: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1113: M*/
1118: PetscErrorCode MatCreate_Dense(Mat A)
1119: {
1121: PetscMPIInt size;
1124: PetscObjectChangeTypeName((PetscObject)A,MATDENSE);
1125: MPI_Comm_size(A->comm,&size);
1126: if (size == 1) {
1127: MatSetType(A,MATSEQDENSE);
1128: } else {
1129: MatSetType(A,MATMPIDENSE);
1130: }
1131: return(0);
1132: }
1137: /*@C
1138: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1140: Not collective
1142: Input Parameters:
1143: . A - the matrix
1144: - data - optional location of matrix data. Set data=PETSC_NULL for PETSc
1145: to control all matrix memory allocation.
1147: Notes:
1148: The dense format is fully compatible with standard Fortran 77
1149: storage by columns.
1151: The data input variable is intended primarily for Fortran programmers
1152: who wish to allocate their own matrix memory space. Most users should
1153: set data=PETSC_NULL.
1155: Level: intermediate
1157: .keywords: matrix,dense, parallel
1159: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1160: @*/
1161: PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1162: {
1163: PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1166: PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);
1167: if (f) {
1168: (*f)(mat,data);
1169: }
1170: return(0);
1171: }
1175: /*@C
1176: MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1178: Collective on MPI_Comm
1180: Input Parameters:
1181: + comm - MPI communicator
1182: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1183: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1184: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1185: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1186: - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1187: to control all matrix memory allocation.
1189: Output Parameter:
1190: . A - the matrix
1192: Notes:
1193: The dense format is fully compatible with standard Fortran 77
1194: storage by columns.
1196: The data input variable is intended primarily for Fortran programmers
1197: who wish to allocate their own matrix memory space. Most users should
1198: set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1200: The user MUST specify either the local or global matrix dimensions
1201: (possibly both).
1203: Level: intermediate
1205: .keywords: matrix,dense, parallel
1207: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1208: @*/
1209: PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1210: {
1212: PetscMPIInt size;
1215: MatCreate(comm,m,n,M,N,A);
1216: MPI_Comm_size(comm,&size);
1217: if (size > 1) {
1218: MatSetType(*A,MATMPIDENSE);
1219: MatMPIDenseSetPreallocation(*A,data);
1220: } else {
1221: MatSetType(*A,MATSEQDENSE);
1222: MatSeqDenseSetPreallocation(*A,data);
1223: }
1224: return(0);
1225: }
1229: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1230: {
1231: Mat mat;
1232: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1236: *newmat = 0;
1237: MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);
1238: MatSetType(mat,A->type_name);
1239: PetscNew(Mat_MPIDense,&a);
1240: mat->data = (void*)a;
1241: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1242: mat->factor = A->factor;
1243: mat->assembled = PETSC_TRUE;
1244: mat->preallocated = PETSC_TRUE;
1246: a->rstart = oldmat->rstart;
1247: a->rend = oldmat->rend;
1248: a->size = oldmat->size;
1249: a->rank = oldmat->rank;
1250: mat->insertmode = NOT_SET_VALUES;
1251: a->nvec = oldmat->nvec;
1252: a->donotstash = oldmat->donotstash;
1253: PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);
1254: PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1255: PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));
1256: MatStashCreate_Private(A->comm,1,&mat->stash);
1258: MatSetUpMultiply_MPIDense(mat);
1259: MatDuplicate(oldmat->A,cpvalues,&a->A);
1260: PetscLogObjectParent(mat,a->A);
1261: *newmat = mat;
1262: return(0);
1263: }
1265: #include petscsys.h
1269: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,const MatType type,Mat *newmat)
1270: {
1272: PetscMPIInt rank,size;
1273: PetscInt *rowners,i,m,nz,j;
1274: PetscScalar *array,*vals,*vals_ptr;
1275: MPI_Status status;
1278: MPI_Comm_rank(comm,&rank);
1279: MPI_Comm_size(comm,&size);
1281: /* determine ownership of all rows */
1282: m = M/size + ((M % size) > rank);
1283: PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1284: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1285: rowners[0] = 0;
1286: for (i=2; i<=size; i++) {
1287: rowners[i] += rowners[i-1];
1288: }
1290: MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);
1291: MatSetType(*newmat,type);
1292: MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1293: MatGetArray(*newmat,&array);
1295: if (!rank) {
1296: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1298: /* read in my part of the matrix numerical values */
1299: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1300:
1301: /* insert into matrix-by row (this is why cannot directly read into array */
1302: vals_ptr = vals;
1303: for (i=0; i<m; i++) {
1304: for (j=0; j<N; j++) {
1305: array[i + j*m] = *vals_ptr++;
1306: }
1307: }
1309: /* read in other processors and ship out */
1310: for (i=1; i<size; i++) {
1311: nz = (rowners[i+1] - rowners[i])*N;
1312: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1313: MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1314: }
1315: } else {
1316: /* receive numeric values */
1317: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1319: /* receive message of values*/
1320: MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1322: /* insert into matrix-by row (this is why cannot directly read into array */
1323: vals_ptr = vals;
1324: for (i=0; i<m; i++) {
1325: for (j=0; j<N; j++) {
1326: array[i + j*m] = *vals_ptr++;
1327: }
1328: }
1329: }
1330: PetscFree(rowners);
1331: PetscFree(vals);
1332: MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1333: MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1334: return(0);
1335: }
1339: PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1340: {
1341: Mat A;
1342: PetscScalar *vals,*svals;
1343: MPI_Comm comm = ((PetscObject)viewer)->comm;
1344: MPI_Status status;
1345: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1346: PetscInt header[4],*rowlengths = 0,M,N,*cols;
1347: PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1348: PetscInt i,nz,j,rstart,rend;
1349: int fd;
1353: MPI_Comm_size(comm,&size);
1354: MPI_Comm_rank(comm,&rank);
1355: if (!rank) {
1356: PetscViewerBinaryGetDescriptor(viewer,&fd);
1357: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1358: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1359: }
1361: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1362: M = header[1]; N = header[2]; nz = header[3];
1364: /*
1365: Handle case where matrix is stored on disk as a dense matrix
1366: */
1367: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1368: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);
1369: return(0);
1370: }
1372: /* determine ownership of all rows */
1373: m = M/size + ((M % size) > rank);
1374: PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1375: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1376: rowners[0] = 0;
1377: for (i=2; i<=size; i++) {
1378: rowners[i] += rowners[i-1];
1379: }
1380: rstart = rowners[rank];
1381: rend = rowners[rank+1];
1383: /* distribute row lengths to all processors */
1384: PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);
1385: offlens = ourlens + (rend-rstart);
1386: if (!rank) {
1387: PetscMalloc(M*sizeof(PetscInt),&rowlengths);
1388: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1389: PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1390: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1391: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1392: PetscFree(sndcounts);
1393: } else {
1394: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1395: }
1397: if (!rank) {
1398: /* calculate the number of nonzeros on each processor */
1399: PetscMalloc(size*sizeof(PetscInt),&procsnz);
1400: PetscMemzero(procsnz,size*sizeof(PetscInt));
1401: for (i=0; i<size; i++) {
1402: for (j=rowners[i]; j< rowners[i+1]; j++) {
1403: procsnz[i] += rowlengths[j];
1404: }
1405: }
1406: PetscFree(rowlengths);
1408: /* determine max buffer needed and allocate it */
1409: maxnz = 0;
1410: for (i=0; i<size; i++) {
1411: maxnz = PetscMax(maxnz,procsnz[i]);
1412: }
1413: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
1415: /* read in my part of the matrix column indices */
1416: nz = procsnz[0];
1417: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1418: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1420: /* read in every one elses and ship off */
1421: for (i=1; i<size; i++) {
1422: nz = procsnz[i];
1423: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1424: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1425: }
1426: PetscFree(cols);
1427: } else {
1428: /* determine buffer space needed for message */
1429: nz = 0;
1430: for (i=0; i<m; i++) {
1431: nz += ourlens[i];
1432: }
1433: PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);
1435: /* receive message of column indices*/
1436: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1437: MPI_Get_count(&status,MPIU_INT,&maxnz);
1438: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1439: }
1441: /* loop over local rows, determining number of off diagonal entries */
1442: PetscMemzero(offlens,m*sizeof(PetscInt));
1443: jj = 0;
1444: for (i=0; i<m; i++) {
1445: for (j=0; j<ourlens[i]; j++) {
1446: if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1447: jj++;
1448: }
1449: }
1451: /* create our matrix */
1452: for (i=0; i<m; i++) {
1453: ourlens[i] -= offlens[i];
1454: }
1455: MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);
1456: MatSetType(*newmat,type);
1457: MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1458: A = *newmat;
1459: for (i=0; i<m; i++) {
1460: ourlens[i] += offlens[i];
1461: }
1463: if (!rank) {
1464: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1466: /* read in my part of the matrix numerical values */
1467: nz = procsnz[0];
1468: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1469:
1470: /* insert into matrix */
1471: jj = rstart;
1472: smycols = mycols;
1473: svals = vals;
1474: for (i=0; i<m; i++) {
1475: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1476: smycols += ourlens[i];
1477: svals += ourlens[i];
1478: jj++;
1479: }
1481: /* read in other processors and ship out */
1482: for (i=1; i<size; i++) {
1483: nz = procsnz[i];
1484: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1485: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1486: }
1487: PetscFree(procsnz);
1488: } else {
1489: /* receive numeric values */
1490: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
1492: /* receive message of values*/
1493: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1494: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1495: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1497: /* insert into matrix */
1498: jj = rstart;
1499: smycols = mycols;
1500: svals = vals;
1501: for (i=0; i<m; i++) {
1502: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1503: smycols += ourlens[i];
1504: svals += ourlens[i];
1505: jj++;
1506: }
1507: }
1508: PetscFree(ourlens);
1509: PetscFree(vals);
1510: PetscFree(mycols);
1511: PetscFree(rowners);
1513: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1514: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1515: return(0);
1516: }