Actual source code: mpibdiag.c
1: /*
2: The basic matrix operations for the Block diagonal parallel
3: matrices.
4: */
5: #include src/mat/impls/bdiag/mpi/mpibdiag.h
9: PetscErrorCode MatSetValues_MPIBDiag(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
10: {
11: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
13: PetscInt i,j,row,rstart = mbd->rstart,rend = mbd->rend;
14: PetscTruth roworiented = mbd->roworiented;
17: for (i=0; i<m; i++) {
18: if (idxm[i] < 0) continue;
19: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
20: if (idxm[i] >= rstart && idxm[i] < rend) {
21: row = idxm[i] - rstart;
22: for (j=0; j<n; j++) {
23: if (idxn[j] < 0) continue;
24: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
25: if (roworiented) {
26: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
27: } else {
28: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
29: }
30: }
31: } else {
32: if (!mbd->donotstash) {
33: if (roworiented) {
34: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
35: } else {
36: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
37: }
38: }
39: }
40: }
41: return(0);
42: }
46: PetscErrorCode MatGetValues_MPIBDiag(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
47: {
48: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
50: PetscInt i,j,row,rstart = mbd->rstart,rend = mbd->rend;
53: for (i=0; i<m; i++) {
54: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
55: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
56: if (idxm[i] >= rstart && idxm[i] < rend) {
57: row = idxm[i] - rstart;
58: for (j=0; j<n; j++) {
59: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
60: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
61: MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
62: }
63: } else {
64: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
65: }
66: }
67: return(0);
68: }
72: PetscErrorCode MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
73: {
74: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
75: MPI_Comm comm = mat->comm;
77: PetscInt nstash,reallocs;
78: InsertMode addv;
81: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
82: if (addv == (ADD_VALUES|INSERT_VALUES)) {
83: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
84: }
85: mat->insertmode = addv; /* in case this processor had no cache */
86: MatStashScatterBegin_Private(&mat->stash,mbd->rowners);
87: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
88: PetscLogInfo(0,"MatAssemblyBegin_MPIBDiag:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
89: return(0);
90: }
94: PetscErrorCode MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
95: {
96: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
97: Mat_SeqBDiag *mlocal;
99: PetscMPIInt n;
100: PetscInt i,*row,*col;
101: PetscInt *tmp1,*tmp2,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
102: PetscScalar *val;
103: InsertMode addv = mat->insertmode;
107: while (1) {
108: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
109: if (!flg) break;
110:
111: for (i=0; i<n;) {
112: /* Now identify the consecutive vals belonging to the same row */
113: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
114: if (j < n) ncols = j-i;
115: else ncols = n-i;
116: /* Now assemble all these values with a single function call */
117: MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
118: i = j;
119: }
120: }
121: MatStashScatterEnd_Private(&mat->stash);
123: MatAssemblyBegin(mbd->A,mode);
124: MatAssemblyEnd(mbd->A,mode);
126: /* Fix main diagonal location and determine global diagonals */
127: mlocal = (Mat_SeqBDiag*)mbd->A->data;
128: Mblock = mat->M/mat->bs; Nblock = mat->N/mat->bs;
129: len = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
130: PetscMalloc(2*len*sizeof(PetscInt),&tmp1);
131: tmp2 = tmp1 + len;
132: PetscMemzero(tmp1,2*len*sizeof(PetscInt));
133: mlocal->mainbd = -1;
134: for (i=0; i<mlocal->nd; i++) {
135: if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
136: tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
137: }
138: MPI_Allreduce(tmp1,tmp2,len,MPIU_INT,MPI_SUM,mat->comm);
139: ict = 0;
140: for (i=0; i<len; i++) {
141: if (tmp2[i]) {
142: mbd->gdiag[ict] = i - Mblock;
143: ict++;
144: }
145: }
146: mbd->gnd = ict;
147: PetscFree(tmp1);
149: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
150: MatSetUpMultiply_MPIBDiag(mat);
151: }
152: return(0);
153: }
157: PetscErrorCode MatZeroEntries_MPIBDiag(Mat A)
158: {
159: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
163: MatZeroEntries(l->A);
164: return(0);
165: }
167: /* again this uses the same basic stratagy as in the assembly and
168: scatter create routines, we should try to do it systematically
169: if we can figure out the proper level of generality. */
171: /* the code does not do the diagonal entries correctly unless the
172: matrix is square and the column and row owerships are identical.
173: This is a BUG. The only way to fix it seems to be to access
174: aij->A and aij->B directly and not through the MatZeroRows()
175: routine.
176: */
180: PetscErrorCode MatZeroRows_MPIBDiag(Mat A,IS is,const PetscScalar *diag)
181: {
182: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
184: PetscMPIInt n,imdex,size = l->size,rank = l->rank,tag = A->tag;
185: PetscInt i,N,*rows,*owners = l->rowners;
186: PetscInt *nprocs,j,idx,nsends;
187: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
188: PetscInt *rvalues,count,base,slen,*source;
189: PetscInt *lens,*lrows,*values;
190: MPI_Comm comm = A->comm;
191: MPI_Request *send_waits,*recv_waits;
192: MPI_Status recv_status,*send_status;
193: IS istmp;
194: PetscTruth found;
197: ISGetLocalSize(is,&N);
198: ISGetIndices(is,&rows);
200: /* first count number of contributors to each processor */
201: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
202: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
203: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
204: for (i=0; i<N; i++) {
205: idx = rows[i];
206: found = PETSC_FALSE;
207: for (j=0; j<size; j++) {
208: if (idx >= owners[j] && idx < owners[j+1]) {
209: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
210: }
211: }
212: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
213: }
214: nsends = 0; for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}
216: /* inform other processors of number of messages and max length*/
217: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
219: /* post receives: */
220: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
221: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
222: for (i=0; i<nrecvs; i++) {
223: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
224: }
226: /* do sends:
227: 1) starts[i] gives the starting index in svalues for stuff going to
228: the ith processor
229: */
230: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
231: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
232: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
233: starts[0] = 0;
234: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
235: for (i=0; i<N; i++) {
236: svalues[starts[owner[i]]++] = rows[i];
237: }
238: ISRestoreIndices(is,&rows);
240: starts[0] = 0;
241: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
242: count = 0;
243: for (i=0; i<size; i++) {
244: if (nprocs[2*i+1]) {
245: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
246: }
247: }
248: PetscFree(starts);
250: base = owners[rank];
252: /* wait on receives */
253: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
254: source = lens + nrecvs;
255: count = nrecvs;
256: slen = 0;
257: while (count) {
258: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
259: /* unpack receives into our local space */
260: MPI_Get_count(&recv_status,MPIU_INT,&n);
261: source[imdex] = recv_status.MPI_SOURCE;
262: lens[imdex] = n;
263: slen += n;
264: count--;
265: }
266: PetscFree(recv_waits);
267:
268: /* move the data into the send scatter */
269: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
270: count = 0;
271: for (i=0; i<nrecvs; i++) {
272: values = rvalues + i*nmax;
273: for (j=0; j<lens[i]; j++) {
274: lrows[count++] = values[j] - base;
275: }
276: }
277: PetscFree(rvalues);
278: PetscFree(lens);
279: PetscFree(owner);
280: PetscFree(nprocs);
281:
282: /* actually zap the local rows */
283: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
284: PetscLogObjectParent(A,istmp);
285: PetscFree(lrows);
286: MatZeroRows(l->A,istmp,diag);
287: ISDestroy(istmp);
289: /* wait on sends */
290: if (nsends) {
291: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
292: MPI_Waitall(nsends,send_waits,send_status);
293: PetscFree(send_status);
294: }
295: PetscFree(send_waits);
296: PetscFree(svalues);
298: return(0);
299: }
303: PetscErrorCode MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
304: {
305: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
309: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
310: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
311: (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
312: return(0);
313: }
317: PetscErrorCode MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
318: {
319: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
323: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
324: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
325: (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
326: return(0);
327: }
331: PetscErrorCode MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
332: {
333: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
335: PetscScalar zero = 0.0;
338: VecSet(&zero,yy);
339: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
340: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
341: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
342: return(0);
343: }
347: PetscErrorCode MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
348: {
349: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
353: VecCopy(yy,zz);
354: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
355: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
356: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
357: return(0);
358: }
362: PetscErrorCode MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
363: {
364: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
366: PetscReal isend[5],irecv[5];
369: info->block_size = (PetscReal)mat->A->bs;
370: MatGetInfo(mat->A,MAT_LOCAL,info);
371: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
372: isend[3] = info->memory; isend[4] = info->mallocs;
373: if (flag == MAT_LOCAL) {
374: info->nz_used = isend[0];
375: info->nz_allocated = isend[1];
376: info->nz_unneeded = isend[2];
377: info->memory = isend[3];
378: info->mallocs = isend[4];
379: } else if (flag == MAT_GLOBAL_MAX) {
380: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
381: info->nz_used = irecv[0];
382: info->nz_allocated = irecv[1];
383: info->nz_unneeded = irecv[2];
384: info->memory = irecv[3];
385: info->mallocs = irecv[4];
386: } else if (flag == MAT_GLOBAL_SUM) {
387: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
388: info->nz_used = irecv[0];
389: info->nz_allocated = irecv[1];
390: info->nz_unneeded = irecv[2];
391: info->memory = irecv[3];
392: info->mallocs = irecv[4];
393: }
394: info->rows_global = (double)matin->M;
395: info->columns_global = (double)matin->N;
396: info->rows_local = (double)matin->m;
397: info->columns_local = (double)matin->N;
398: return(0);
399: }
403: PetscErrorCode MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
404: {
406: Mat_MPIBDiag *A = (Mat_MPIBDiag*)mat->data;
409: MatGetDiagonal(A->A,v);
410: return(0);
411: }
415: PetscErrorCode MatDestroy_MPIBDiag(Mat mat)
416: {
417: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
419: #if defined(PETSC_USE_LOG)
420: Mat_SeqBDiag *ms = (Mat_SeqBDiag*)mbd->A->data;
423: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D, BSize=%D, NDiag=%D",mat->M,mat->N,mat->bs,ms->nd);
424: #else
426: #endif
427: MatStashDestroy_Private(&mat->stash);
428: PetscFree(mbd->rowners);
429: PetscFree(mbd->gdiag);
430: MatDestroy(mbd->A);
431: if (mbd->lvec) {VecDestroy(mbd->lvec);}
432: if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
433: PetscFree(mbd);
434: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
435: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBDiagSetPreallocation_C","",PETSC_NULL);
436: return(0);
437: }
442: static PetscErrorCode MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
443: {
444: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
448: if (mbd->size == 1) {
449: MatView(mbd->A,viewer);
450: } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
451: return(0);
452: }
456: static PetscErrorCode MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
457: {
458: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
459: PetscErrorCode ierr;
460: PetscMPIInt size = mbd->size,rank = mbd->rank;
461: PetscInt i;
462: PetscTruth iascii,isdraw;
463: PetscViewer sviewer;
464: PetscViewerFormat format;
467: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
468: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
469: if (iascii) {
470: PetscViewerGetFormat(viewer,&format);
471: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
472: PetscInt nline = PetscMin(10,mbd->gnd),k,nk,np;
473: PetscViewerASCIIPrintf(viewer," block size=%D, total number of diagonals=%D\n",mat->bs,mbd->gnd);
474: nk = (mbd->gnd-1)/nline + 1;
475: for (k=0; k<nk; k++) {
476: PetscViewerASCIIPrintf(viewer," global diag numbers:");
477: np = PetscMin(nline,mbd->gnd - nline*k);
478: for (i=0; i<np; i++) {
479: PetscViewerASCIIPrintf(viewer," %D",mbd->gdiag[i+nline*k]);
480: }
481: PetscViewerASCIIPrintf(viewer,"\n");
482: }
483: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
484: MatInfo info;
485: MPI_Comm_rank(mat->comm,&rank);
486: MatGetInfo(mat,MAT_LOCAL,&info);
487: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
488: (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
489: PetscViewerFlush(viewer);
490: VecScatterView(mbd->Mvctx,viewer);
491: }
492: return(0);
493: }
494: }
496: if (isdraw) {
497: PetscDraw draw;
498: PetscTruth isnull;
499: PetscViewerDrawGetDraw(viewer,0,&draw);
500: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
501: }
503: if (size == 1) {
504: MatView(mbd->A,viewer);
505: } else {
506: /* assemble the entire matrix onto first processor. */
507: Mat A;
508: PetscInt M = mat->M,N = mat->N,m,row,nz,*cols;
509: PetscScalar *vals;
511: /* Here we are constructing a temporary matrix, so we will explicitly set the type to MPIBDiag */
512: if (!rank) {
513: MatCreate(mat->comm,M,M,M,N,&A);
514: MatSetType(A,MATMPIBDIAG);
515: MatMPIBDiagSetPreallocation(A,mbd->gnd,mbd->A->bs,mbd->gdiag,PETSC_NULL);
516: } else {
517: MatCreate(mat->comm,0,0,M,N,&A);
518: MatSetType(A,MATMPIBDIAG);
519: MatMPIBDiagSetPreallocation(A,0,mbd->A->bs,PETSC_NULL,PETSC_NULL);
520: }
521: PetscLogObjectParent(mat,A);
523: /* Copy the matrix ... This isn't the most efficient means,
524: but it's quick for now */
525: row = mbd->rstart;
526: m = mbd->A->m;
527: for (i=0; i<m; i++) {
528: MatGetRow_MPIBDiag(mat,row,&nz,&cols,&vals);
529: MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
530: MatRestoreRow_MPIBDiag(mat,row,&nz,&cols,&vals);
531: row++;
532: }
533: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
534: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
535: PetscViewerGetSingleton(viewer,&sviewer);
536: if (!rank) {
537: MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
538: }
539: PetscViewerRestoreSingleton(viewer,&sviewer);
540: PetscViewerFlush(viewer);
541: MatDestroy(A);
542: }
543: return(0);
544: }
548: PetscErrorCode MatView_MPIBDiag(Mat mat,PetscViewer viewer)
549: {
551: PetscTruth iascii,isdraw,isbinary;
554: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
555: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
556: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
557: if (iascii || isdraw) {
558: MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
559: } else if (isbinary) {
560: MatView_MPIBDiag_Binary(mat,viewer);
561: } else {
562: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
563: }
564: return(0);
565: }
569: PetscErrorCode MatSetOption_MPIBDiag(Mat A,MatOption op)
570: {
571: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
574: switch (op) {
575: case MAT_NO_NEW_NONZERO_LOCATIONS:
576: case MAT_YES_NEW_NONZERO_LOCATIONS:
577: case MAT_NEW_NONZERO_LOCATION_ERR:
578: case MAT_NEW_NONZERO_ALLOCATION_ERR:
579: case MAT_NO_NEW_DIAGONALS:
580: case MAT_YES_NEW_DIAGONALS:
581: MatSetOption(mbd->A,op);
582: break;
583: case MAT_ROW_ORIENTED:
584: mbd->roworiented = PETSC_TRUE;
585: MatSetOption(mbd->A,op);
586: break;
587: case MAT_COLUMN_ORIENTED:
588: mbd->roworiented = PETSC_FALSE;
589: MatSetOption(mbd->A,op);
590: break;
591: case MAT_IGNORE_OFF_PROC_ENTRIES:
592: mbd->donotstash = PETSC_TRUE;
593: break;
594: case MAT_ROWS_SORTED:
595: case MAT_ROWS_UNSORTED:
596: case MAT_COLUMNS_SORTED:
597: case MAT_COLUMNS_UNSORTED:
598: PetscLogInfo(A,"MatSetOption_MPIBDiag:Option ignored\n");
599: break;
600: case MAT_SYMMETRIC:
601: case MAT_STRUCTURALLY_SYMMETRIC:
602: case MAT_NOT_SYMMETRIC:
603: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
604: case MAT_HERMITIAN:
605: case MAT_NOT_HERMITIAN:
606: case MAT_SYMMETRY_ETERNAL:
607: case MAT_NOT_SYMMETRY_ETERNAL:
608: break;
609: default:
610: SETERRQ(PETSC_ERR_SUP,"unknown option");
611: }
612: return(0);
613: }
618: PetscErrorCode MatGetRow_MPIBDiag(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
619: {
620: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
622: PetscInt lrow;
625: if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
626: lrow = row - mat->rstart;
627: MatGetRow_SeqBDiag(mat->A,lrow,nz,idx,v);
628: return(0);
629: }
633: PetscErrorCode MatRestoreRow_MPIBDiag(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
634: {
635: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
637: PetscInt lrow;
640: lrow = row - mat->rstart;
641: MatRestoreRow_SeqBDiag(mat->A,lrow,nz,idx,v);
642: return(0);
643: }
648: PetscErrorCode MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *nrm)
649: {
650: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
651: Mat_SeqBDiag *a = (Mat_SeqBDiag*)mbd->A->data;
652: PetscReal sum = 0.0;
654: PetscInt d,i,nd = a->nd,bs = A->bs,len;
655: PetscScalar *dv;
658: if (type == NORM_FROBENIUS) {
659: for (d=0; d<nd; d++) {
660: dv = a->diagv[d];
661: len = a->bdlen[d]*bs*bs;
662: for (i=0; i<len; i++) {
663: #if defined(PETSC_USE_COMPLEX)
664: sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
665: #else
666: sum += dv[i]*dv[i];
667: #endif
668: }
669: }
670: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
671: *nrm = sqrt(*nrm);
672: PetscLogFlops(2*A->n*A->m);
673: } else if (type == NORM_1) { /* max column norm */
674: PetscReal *tmp,*tmp2;
675: PetscInt j;
676: PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp);
677: PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp2);
678: MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->n);
679: *nrm = 0.0;
680: MPI_Allreduce(tmp,tmp2,mbd->A->n,MPIU_REAL,MPI_SUM,A->comm);
681: for (j=0; j<mbd->A->n; j++) {
682: if (tmp2[j] > *nrm) *nrm = tmp2[j];
683: }
684: PetscFree(tmp);
685: PetscFree(tmp2);
686: } else if (type == NORM_INFINITY) { /* max row norm */
687: PetscReal normtemp;
688: MatNorm(mbd->A,type,&normtemp);
689: MPI_Allreduce(&normtemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
690: }
691: return(0);
692: }
696: PetscErrorCode MatPrintHelp_MPIBDiag(Mat A)
697: {
698: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
702: if (!a->rank) {
703: MatPrintHelp_SeqBDiag(a->A);
704: }
705: return(0);
706: }
710: PetscErrorCode MatScale_MPIBDiag(const PetscScalar *alpha,Mat A)
711: {
713: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
716: MatScale_SeqBDiag(alpha,a->A);
717: return(0);
718: }
722: PetscErrorCode MatSetUpPreallocation_MPIBDiag(Mat A)
723: {
727: MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
728: return(0);
729: }
731: /* -------------------------------------------------------------------*/
733: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
734: MatGetRow_MPIBDiag,
735: MatRestoreRow_MPIBDiag,
736: MatMult_MPIBDiag,
737: /* 4*/ MatMultAdd_MPIBDiag,
738: MatMultTranspose_MPIBDiag,
739: MatMultTransposeAdd_MPIBDiag,
740: 0,
741: 0,
742: 0,
743: /*10*/ 0,
744: 0,
745: 0,
746: 0,
747: 0,
748: /*15*/ MatGetInfo_MPIBDiag,
749: 0,
750: MatGetDiagonal_MPIBDiag,
751: 0,
752: MatNorm_MPIBDiag,
753: /*20*/ MatAssemblyBegin_MPIBDiag,
754: MatAssemblyEnd_MPIBDiag,
755: 0,
756: MatSetOption_MPIBDiag,
757: MatZeroEntries_MPIBDiag,
758: /*25*/ MatZeroRows_MPIBDiag,
759: 0,
760: 0,
761: 0,
762: 0,
763: /*30*/ MatSetUpPreallocation_MPIBDiag,
764: 0,
765: 0,
766: 0,
767: 0,
768: /*35*/ 0,
769: 0,
770: 0,
771: 0,
772: 0,
773: /*40*/ 0,
774: 0,
775: 0,
776: MatGetValues_MPIBDiag,
777: 0,
778: /*45*/ MatPrintHelp_MPIBDiag,
779: MatScale_MPIBDiag,
780: 0,
781: 0,
782: 0,
783: /*50*/ 0,
784: 0,
785: 0,
786: 0,
787: 0,
788: /*55*/ 0,
789: 0,
790: 0,
791: 0,
792: 0,
793: /*60*/ 0,
794: MatDestroy_MPIBDiag,
795: MatView_MPIBDiag,
796: MatGetPetscMaps_Petsc,
797: 0,
798: /*65*/ 0,
799: 0,
800: 0,
801: 0,
802: 0,
803: /*70*/ 0,
804: 0,
805: 0,
806: 0,
807: 0,
808: /*75*/ 0,
809: 0,
810: 0,
811: 0,
812: 0,
813: /*80*/ 0,
814: 0,
815: 0,
816: 0,
817: MatLoad_MPIBDiag,
818: /*85*/ 0,
819: 0,
820: 0,
821: 0,
822: 0,
823: /*90*/ 0,
824: 0,
825: 0,
826: 0,
827: 0,
828: /*95*/ 0,
829: 0,
830: 0,
831: 0};
836: PetscErrorCode MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
837: {
838: Mat_MPIBDiag *matin = (Mat_MPIBDiag *)A->data;
840: PetscInt lrows,lcols,rstart,rend;
841: IS localc,localr;
844: MatGetLocalSize(A,&lrows,&lcols);
845: MatGetOwnershipRange(A,&rstart,&rend);
846: ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
847: ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
848: MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
849: ISDestroy(localr);
850: ISDestroy(localc);
852: *iscopy = PETSC_TRUE;
853: return(0);
854: }
860: PetscErrorCode MatMPIBDiagSetPreallocation_MPIBDiag(Mat B,PetscInt nd,PetscInt bs,PetscInt *diag,PetscScalar **diagv)
861: {
862: Mat_MPIBDiag *b;
864: PetscInt i,k,*ldiag,len,nd2;
865: PetscScalar **ldiagv = 0;
866: PetscTruth flg2;
869: B->preallocated = PETSC_TRUE;
870: if (bs == PETSC_DEFAULT) bs = 1;
871: if (nd == PETSC_DEFAULT) nd = 0;
872: PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
873: PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
874: PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
875: if (nd && !diag) {
876: PetscMalloc(nd*sizeof(PetscInt),&diag);
877: nd2 = nd;
878: PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
879: if (nd2 != nd) {
880: SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
881: }
882: } else if (flg2) {
883: SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
884: }
886: if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");
888: PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
889: PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);
891: if ((B->N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
892: if ((B->m%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
893: if ((B->M%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");
894: B->bs = bs;
896: /* the information in the maps duplicates the information computed below, eventually
897: we should remove the duplicate information that is not contained in the maps */
898: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
899: PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);
902: b = (Mat_MPIBDiag*)B->data;
903: b->gnd = nd;
905: MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);
906: b->rowners[0] = 0;
907: for (i=2; i<=b->size; i++) {
908: b->rowners[i] += b->rowners[i-1];
909: }
910: b->rstart = b->rowners[b->rank];
911: b->rend = b->rowners[b->rank+1];
912: b->brstart = (b->rstart)/bs;
913: b->brend = (b->rend)/bs;
916: /* Determine local diagonals; for now, assume global rows = global cols */
917: /* These are sorted in MatCreateSeqBDiag */
918: PetscMalloc((nd+1)*sizeof(PetscInt),&ldiag);
919: len = B->M/bs + B->N/bs + 1;
920: PetscMalloc(len*sizeof(PetscInt),&b->gdiag);
921: k = 0;
922: PetscLogObjectMemory(B,(nd+1)*sizeof(PetscInt) + (b->size+2)*sizeof(PetscInt)
923: + sizeof(struct _p_Mat) + sizeof(Mat_MPIBDiag));
924: if (diagv) {
925: PetscMalloc((nd+1)*sizeof(PetscScalar*),&ldiagv);
926: }
927: for (i=0; i<nd; i++) {
928: b->gdiag[i] = diag[i];
929: if (diag[i] > 0) { /* lower triangular */
930: if (diag[i] < b->brend) {
931: ldiag[k] = diag[i] - b->brstart;
932: if (diagv) ldiagv[k] = diagv[i];
933: k++;
934: }
935: } else { /* upper triangular */
936: if (B->M/bs - diag[i] > B->N/bs) {
937: if (B->M/bs + diag[i] > b->brstart) {
938: ldiag[k] = diag[i] - b->brstart;
939: if (diagv) ldiagv[k] = diagv[i];
940: k++;
941: }
942: } else {
943: if (B->M/bs > b->brstart) {
944: ldiag[k] = diag[i] - b->brstart;
945: if (diagv) ldiagv[k] = diagv[i];
946: k++;
947: }
948: }
949: }
950: }
952: /* Form local matrix */
953: MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->A);
954: MatSetType(b->A,MATSEQBDIAG);
955: MatSeqBDiagSetPreallocation(b->A,k,bs,ldiag,ldiagv);
956: PetscLogObjectParent(B,b->A);
957: PetscFree(ldiag);
958: if (ldiagv) {PetscFree(ldiagv);}
960: return(0);
961: }
964: /*MC
965: MATMPIBDIAG - MATMPIBDIAG = "mpibdiag" - A matrix type to be used for distributed block diagonal matrices.
967: Options Database Keys:
968: . -mat_type mpibdiag - sets the matrix type to "mpibdiag" during a call to MatSetFromOptions()
970: Level: beginner
972: .seealso: MatCreateMPIBDiag
973: M*/
978: PetscErrorCode MatCreate_MPIBDiag(Mat B)
979: {
980: Mat_MPIBDiag *b;
984: PetscNew(Mat_MPIBDiag,&b);
985: B->data = (void*)b;
986: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
987: B->factor = 0;
988: B->mapping = 0;
990: B->insertmode = NOT_SET_VALUES;
991: MPI_Comm_rank(B->comm,&b->rank);
992: MPI_Comm_size(B->comm,&b->size);
994: /* build local table of row ownerships */
995: PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rowners);
997: /* build cache for off array entries formed */
998: MatStashCreate_Private(B->comm,1,&B->stash);
999: b->donotstash = PETSC_FALSE;
1001: /* stuff used for matrix-vector multiply */
1002: b->lvec = 0;
1003: b->Mvctx = 0;
1005: /* used for MatSetValues() input */
1006: b->roworiented = PETSC_TRUE;
1008: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1009: "MatGetDiagonalBlock_MPIBDiag",
1010: MatGetDiagonalBlock_MPIBDiag);
1011: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBDiagSetPreallocation_C",
1012: "MatMPIBDiagSetPreallocation_MPIBDiag",
1013: MatMPIBDiagSetPreallocation_MPIBDiag);
1014: return(0);
1015: }
1018: /*MC
1019: MATBDIAG - MATBDIAG = "bdiag" - A matrix type to be used for block diagonal matrices.
1021: This matrix type is identical to MATSEQBDIAG when constructed with a single process communicator,
1022: and MATMPIBDIAG otherwise.
1024: Options Database Keys:
1025: . -mat_type bdiag - sets the matrix type to "bdiag" during a call to MatSetFromOptions()
1027: Level: beginner
1029: .seealso: MatCreateMPIBDiag,MATSEQBDIAG,MATMPIBDIAG
1030: M*/
1035: PetscErrorCode MatCreate_BDiag(Mat A)
1036: {
1038: PetscMPIInt size;
1041: PetscObjectChangeTypeName((PetscObject)A,MATBDIAG);
1042: MPI_Comm_size(A->comm,&size);
1043: if (size == 1) {
1044: MatSetType(A,MATSEQBDIAG);
1045: } else {
1046: MatSetType(A,MATMPIBDIAG);
1047: }
1048: return(0);
1049: }
1054: /*@C
1055: MatMPIBDiagSetPreallocation -
1057: Collective on Mat
1059: Input Parameters:
1060: + A - the matrix
1061: . nd - number of block diagonals (global) (optional)
1062: . bs - each element of a diagonal is an bs x bs dense matrix
1063: . diag - optional array of block diagonal numbers (length nd).
1064: For a matrix element A[i,j], where i=row and j=column, the
1065: diagonal number is
1066: $ diag = i/bs - j/bs (integer division)
1067: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1068: needed (expensive).
1069: - diagv - pointer to actual diagonals (in same order as diag array),
1070: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1071: to control memory allocation.
1074: Options Database Keys:
1075: . -mat_block_size <bs> - Sets blocksize
1076: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
1078: Notes:
1079: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1080: than it must be used on all processors that share the object for that argument.
1082: The parallel matrix is partitioned across the processors by rows, where
1083: each local rectangular matrix is stored in the uniprocessor block
1084: diagonal format. See the users manual for further details.
1086: The user MUST specify either the local or global numbers of rows
1087: (possibly both).
1089: The case bs=1 (conventional diagonal storage) is implemented as
1090: a special case.
1092: Fortran Notes:
1093: Fortran programmers cannot set diagv; this variable is ignored.
1095: Level: intermediate
1097: .keywords: matrix, block, diagonal, parallel, sparse
1099: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1100: @*/
1101: PetscErrorCode MatMPIBDiagSetPreallocation(Mat B,PetscInt nd,PetscInt bs,const PetscInt diag[],PetscScalar *diagv[])
1102: {
1103: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscScalar*[]);
1106: PetscObjectQueryFunction((PetscObject)B,"MatMPIBDiagSetPreallocation_C",(void (**)(void))&f);
1107: if (f) {
1108: (*f)(B,nd,bs,diag,diagv);
1109: }
1110: return(0);
1111: }
1115: /*@C
1116: MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.
1118: Collective on MPI_Comm
1120: Input Parameters:
1121: + comm - MPI communicator
1122: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1123: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1124: . N - number of columns (local and global)
1125: . nd - number of block diagonals (global) (optional)
1126: . bs - each element of a diagonal is an bs x bs dense matrix
1127: . diag - optional array of block diagonal numbers (length nd).
1128: For a matrix element A[i,j], where i=row and j=column, the
1129: diagonal number is
1130: $ diag = i/bs - j/bs (integer division)
1131: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1132: needed (expensive).
1133: - diagv - pointer to actual diagonals (in same order as diag array),
1134: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1135: to control memory allocation.
1137: Output Parameter:
1138: . A - the matrix
1140: Options Database Keys:
1141: . -mat_block_size <bs> - Sets blocksize
1142: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
1144: Notes:
1145: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1146: than it must be used on all processors that share the object for that argument.
1148: The parallel matrix is partitioned across the processors by rows, where
1149: each local rectangular matrix is stored in the uniprocessor block
1150: diagonal format. See the users manual for further details.
1152: The user MUST specify either the local or global numbers of rows
1153: (possibly both).
1155: The case bs=1 (conventional diagonal storage) is implemented as
1156: a special case.
1158: Fortran Notes:
1159: Fortran programmers cannot set diagv; this variable is ignored.
1161: Level: intermediate
1163: .keywords: matrix, block, diagonal, parallel, sparse
1165: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1166: @*/
1167: PetscErrorCode MatCreateMPIBDiag(MPI_Comm comm,PetscInt m,PetscInt M,PetscInt N,PetscInt nd,PetscInt bs,const PetscInt diag[],PetscScalar *diagv[],Mat *A)
1168: {
1170: PetscMPIInt size;
1173: MatCreate(comm,m,m,M,N,A);
1174: MPI_Comm_size(comm,&size);
1175: if (size > 1) {
1176: MatSetType(*A,MATMPIBDIAG);
1177: MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1178: } else {
1179: MatSetType(*A,MATSEQBDIAG);
1180: MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1181: }
1182: return(0);
1183: }
1187: /*@C
1188: MatBDiagGetData - Gets the data for the block diagonal matrix format.
1189: For the parallel case, this returns information for the local submatrix.
1191: Input Parameters:
1192: . mat - the matrix, stored in block diagonal format.
1194: Not Collective
1196: Output Parameters:
1197: + m - number of rows
1198: . n - number of columns
1199: . nd - number of block diagonals
1200: . bs - each element of a diagonal is an bs x bs dense matrix
1201: . bdlen - array of total block lengths of block diagonals
1202: . diag - optional array of block diagonal numbers (length nd).
1203: For a matrix element A[i,j], where i=row and j=column, the
1204: diagonal number is
1205: $ diag = i/bs - j/bs (integer division)
1206: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1207: needed (expensive).
1208: - diagv - pointer to actual diagonals (in same order as diag array),
1210: Level: advanced
1212: Notes:
1213: See the users manual for further details regarding this storage format.
1215: .keywords: matrix, block, diagonal, get, data
1217: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1218: @*/
1219: PetscErrorCode MatBDiagGetData(Mat mat,PetscInt *nd,PetscInt *bs,PetscInt *diag[],PetscInt *bdlen[],PetscScalar ***diagv)
1220: {
1221: Mat_MPIBDiag *pdmat;
1222: Mat_SeqBDiag *dmat = 0;
1223: PetscTruth isseq,ismpi;
1228: PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1229: PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1230: if (isseq) {
1231: dmat = (Mat_SeqBDiag*)mat->data;
1232: } else if (ismpi) {
1233: pdmat = (Mat_MPIBDiag*)mat->data;
1234: dmat = (Mat_SeqBDiag*)pdmat->A->data;
1235: } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1236: *nd = dmat->nd;
1237: *bs = mat->bs;
1238: *diag = dmat->diag;
1239: *bdlen = dmat->bdlen;
1240: *diagv = dmat->diagv;
1241: return(0);
1242: }
1244: #include petscsys.h
1248: PetscErrorCode MatLoad_MPIBDiag(PetscViewer viewer,const MatType type,Mat *newmat)
1249: {
1250: Mat A;
1251: PetscScalar *vals,*svals;
1252: MPI_Comm comm = ((PetscObject)viewer)->comm;
1253: MPI_Status status;
1255: int fd;
1256: PetscMPIInt tag = ((PetscObject)viewer)->tag,rank,size,*sndcounts = 0,*rowners,maxnz,mm;
1257: PetscInt bs,i,nz,j,rstart,rend,*cols;
1258: PetscInt header[4],*rowlengths = 0,M,N,m,Mbs;
1259: PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1260: PetscInt extra_rows;
1263: MPI_Comm_size(comm,&size);
1264: MPI_Comm_rank(comm,&rank);
1265: if (!rank) {
1266: PetscViewerBinaryGetDescriptor(viewer,&fd);
1267: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1268: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1269: if (header[3] < 0) {
1270: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1271: }
1272: }
1273: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1274: M = header[1]; N = header[2];
1276: bs = 1; /* uses a block size of 1 by default; */
1277: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1279: /*
1280: This code adds extra rows to make sure the number of rows is
1281: divisible by the blocksize
1282: */
1283: Mbs = M/bs;
1284: extra_rows = bs - M + bs*(Mbs);
1285: if (extra_rows == bs) extra_rows = 0;
1286: else Mbs++;
1287: if (extra_rows && !rank) {
1288: PetscLogInfo(0,"MatLoad_MPIBDiag:Padding loaded matrix to match blocksize\n");
1289: }
1291: /* determine ownership of all rows */
1292: m = bs*(Mbs/size + ((Mbs % size) > rank));
1293: PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1294: mm = (PetscMPIInt)m;
1295: MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1296: rowners[0] = 0;
1297: for (i=2; i<=size; i++) {
1298: rowners[i] += rowners[i-1];
1299: }
1300: rstart = rowners[rank];
1301: rend = rowners[rank+1];
1303: /* distribute row lengths to all processors */
1304: PetscMalloc((rend-rstart)*sizeof(PetscInt),&ourlens);
1305: if (!rank) {
1306: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
1307: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1308: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1309: PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1310: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1311: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1312: PetscFree(sndcounts);
1313: } else {
1314: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1315: }
1317: if (!rank) {
1318: /* calculate the number of nonzeros on each processor */
1319: PetscMalloc(size*sizeof(PetscInt),&procsnz);
1320: PetscMemzero(procsnz,size*sizeof(PetscInt));
1321: for (i=0; i<size; i++) {
1322: for (j=rowners[i]; j<rowners[i+1]; j++) {
1323: procsnz[i] += rowlengths[j];
1324: }
1325: }
1326: PetscFree(rowlengths);
1328: /* determine max buffer needed and allocate it */
1329: maxnz = 0;
1330: for (i=0; i<size; i++) {
1331: maxnz = PetscMax(maxnz,procsnz[i]);
1332: }
1333: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
1335: /* read in my part of the matrix column indices */
1336: nz = procsnz[0];
1337: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1338: if (size == 1) nz -= extra_rows;
1339: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1340: if (size == 1) for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }
1342: /* read in every one elses and ship off */
1343: for (i=1; i<size-1; i++) {
1344: nz = procsnz[i];
1345: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1346: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1347: }
1348: /* read in the stuff for the last proc */
1349: if (size != 1) {
1350: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
1351: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1352: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1353: MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
1354: }
1355: PetscFree(cols);
1356: } else {
1357: /* determine buffer space needed for message */
1358: nz = 0;
1359: for (i=0; i<m; i++) {
1360: nz += ourlens[i];
1361: }
1362: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1364: /* receive message of column indices*/
1365: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1366: MPI_Get_count(&status,MPIU_INT,&maxnz);
1367: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1368: }
1370: MatCreate(comm,m,m,M+extra_rows,N+extra_rows,newmat);
1371: MatSetType(*newmat,type);
1372: MatMPIBDiagSetPreallocation(*newmat,0,bs,PETSC_NULL,PETSC_NULL);
1373: A = *newmat;
1375: if (!rank) {
1376: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1378: /* read in my part of the matrix numerical values */
1379: nz = procsnz[0];
1380: if (size == 1) nz -= extra_rows;
1381: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1382: if (size == 1) for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }
1384: /* insert into matrix */
1385: jj = rstart;
1386: smycols = mycols;
1387: svals = vals;
1388: for (i=0; i<m; i++) {
1389: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1390: smycols += ourlens[i];
1391: svals += ourlens[i];
1392: jj++;
1393: }
1395: /* read in other processors (except the last one) and ship out */
1396: for (i=1; i<size-1; i++) {
1397: nz = procsnz[i];
1398: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1399: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1400: }
1401: /* the last proc */
1402: if (size != 1){
1403: nz = procsnz[i] - extra_rows;
1404: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1405: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1406: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1407: }
1408: PetscFree(procsnz);
1409: } else {
1410: /* receive numeric values */
1411: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1413: /* receive message of values*/
1414: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1415: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1416: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1418: /* insert into matrix */
1419: jj = rstart;
1420: smycols = mycols;
1421: svals = vals;
1422: for (i=0; i<m; i++) {
1423: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1424: smycols += ourlens[i];
1425: svals += ourlens[i];
1426: jj++;
1427: }
1428: }
1429: PetscFree(ourlens);
1430: PetscFree(vals);
1431: PetscFree(mycols);
1432: PetscFree(rowners);
1434: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1435: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1436: return(0);
1437: }