Actual source code: mpirowbs.c
2: #include src/mat/impls/rowbs/mpi/mpirowbs.h
4: #define CHUNCKSIZE_LOCAL 10
8: static PetscErrorCode MatFreeRowbs_Private(Mat A,int n,int *i,PetscScalar *v)
9: {
13: if (v) {
14: #if defined(PETSC_USE_LOG)
15: int len = -n*(sizeof(int)+sizeof(PetscScalar));
16: #endif
17: PetscFree(v);
18: PetscLogObjectMemory(A,len);
19: }
20: return(0);
21: }
25: static PetscErrorCode MatMallocRowbs_Private(Mat A,int n,int **i,PetscScalar **v)
26: {
28: int len;
31: if (!n) {
32: *i = 0; *v = 0;
33: } else {
34: len = n*(sizeof(int) + sizeof(PetscScalar));
35: PetscMalloc(len,v);
36: PetscLogObjectMemory(A,len);
37: *i = (int*)(*v + n);
38: }
39: return(0);
40: }
44: PetscErrorCode MatScale_MPIRowbs(const PetscScalar *alphain,Mat inA)
45: {
46: Mat_MPIRowbs *a = (Mat_MPIRowbs*)inA->data;
47: BSspmat *A = a->A;
48: BSsprow *vs;
49: PetscScalar *ap,alpha = *alphain;
50: int i,m = inA->m,nrow,j;
53: for (i=0; i<m; i++) {
54: vs = A->rows[i];
55: nrow = vs->length;
56: ap = vs->nz;
57: for (j=0; j<nrow; j++) {
58: ap[j] *= alpha;
59: }
60: }
61: PetscLogFlops(a->nz);
62: return(0);
63: }
65: /* ----------------------------------------------------------------- */
68: static PetscErrorCode MatCreateMPIRowbs_local(Mat A,int nz,const int nnz[])
69: {
70: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)A->data;
72: int i,len,m = A->m,*tnnz;
73: BSspmat *bsmat;
74: BSsprow *vs;
77: PetscMalloc((m+1)*sizeof(int),&tnnz);
78: if (!nnz) {
79: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
80: if (nz <= 0) nz = 1;
81: for (i=0; i<m; i++) tnnz[i] = nz;
82: nz = nz*m;
83: } else {
84: nz = 0;
85: for (i=0; i<m; i++) {
86: if (nnz[i] <= 0) tnnz[i] = 1;
87: else tnnz[i] = nnz[i];
88: nz += tnnz[i];
89: }
90: }
92: /* Allocate BlockSolve matrix context */
93: PetscNew(BSspmat,&bsif->A);
94: bsmat = bsif->A;
95: BSset_mat_icc_storage(bsmat,PETSC_FALSE);
96: BSset_mat_symmetric(bsmat,PETSC_FALSE);
97: len = m*(sizeof(BSsprow*)+ sizeof(BSsprow)) + 1;
98: PetscMalloc(len,&bsmat->rows);
99: bsmat->num_rows = m;
100: bsmat->global_num_rows = A->M;
101: bsmat->map = bsif->bsmap;
102: vs = (BSsprow*)(bsmat->rows + m);
103: for (i=0; i<m; i++) {
104: bsmat->rows[i] = vs;
105: bsif->imax[i] = tnnz[i];
106: vs->diag_ind = -1;
107: MatMallocRowbs_Private(A,tnnz[i],&(vs->col),&(vs->nz));
108: /* put zero on diagonal */
109: /*vs->length = 1;
110: vs->col[0] = i + bsif->rstart;
111: vs->nz[0] = 0.0;*/
112: vs->length = 0;
113: vs++;
114: }
115: PetscLogObjectMemory(A,sizeof(BSspmat) + len);
116: bsif->nz = 0;
117: bsif->maxnz = nz;
118: bsif->sorted = 0;
119: bsif->roworiented = PETSC_TRUE;
120: bsif->nonew = 0;
121: bsif->bs_color_single = 0;
123: PetscFree(tnnz);
124: return(0);
125: }
129: static PetscErrorCode MatSetValues_MPIRowbs_local(Mat AA,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
130: {
131: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
132: BSspmat *A = mat->A;
133: BSsprow *vs;
135: int *rp,k,a,b,t,ii,row,nrow,i,col,l,rmax;
136: int *imax = mat->imax,nonew = mat->nonew,sorted = mat->sorted;
137: PetscScalar *ap,value;
140: for (k=0; k<m; k++) { /* loop over added rows */
141: row = im[k];
142: if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
143: if (row >= AA->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,AA->m-1);
144: vs = A->rows[row];
145: ap = vs->nz; rp = vs->col;
146: rmax = imax[row]; nrow = vs->length;
147: a = 0;
148: for (l=0; l<n; l++) { /* loop over added columns */
149: if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative col: %d",in[l]);
150: if (in[l] >= AA->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],AA->N-1);
151: col = in[l]; value = *v++;
152: if (!sorted) a = 0; b = nrow;
153: while (b-a > 5) {
154: t = (b+a)/2;
155: if (rp[t] > col) b = t;
156: else a = t;
157: }
158: for (i=a; i<b; i++) {
159: if (rp[i] > col) break;
160: if (rp[i] == col) {
161: if (addv == ADD_VALUES) ap[i] += value;
162: else ap[i] = value;
163: goto noinsert;
164: }
165: }
166: if (nonew) goto noinsert;
167: if (nrow >= rmax) {
168: /* there is no extra room in row, therefore enlarge */
169: int *itemp,*iout,*iin = vs->col;
170: PetscScalar *vout,*vin = vs->nz,*vtemp;
172: /* malloc new storage space */
173: imax[row] += CHUNCKSIZE_LOCAL;
174: MatMallocRowbs_Private(AA,imax[row],&itemp,&vtemp);
175: vout = vtemp; iout = itemp;
176: for (ii=0; ii<i; ii++) {
177: vout[ii] = vin[ii];
178: iout[ii] = iin[ii];
179: }
180: vout[i] = value;
181: iout[i] = col;
182: for (ii=i+1; ii<=nrow; ii++) {
183: vout[ii] = vin[ii-1];
184: iout[ii] = iin[ii-1];
185: }
186: /* free old row storage */
187: if (rmax > 0) {
188: MatFreeRowbs_Private(AA,rmax,vs->col,vs->nz);
189: }
190: vs->col = iout; vs->nz = vout;
191: rmax = imax[row];
192: mat->maxnz += CHUNCKSIZE_LOCAL;
193: mat->reallocs++;
194: } else {
195: /* shift higher columns over to make room for newie */
196: for (ii=nrow-1; ii>=i; ii--) {
197: rp[ii+1] = rp[ii];
198: ap[ii+1] = ap[ii];
199: }
200: rp[i] = col;
201: ap[i] = value;
202: }
203: nrow++;
204: mat->nz++;
205: AA->same_nonzero = PETSC_FALSE;
206: noinsert:;
207: a = i + 1;
208: }
209: vs->length = nrow;
210: }
211: return(0);
212: }
217: static PetscErrorCode MatAssemblyBegin_MPIRowbs_local(Mat A,MatAssemblyType mode)
218: {
220: return(0);
221: }
225: static PetscErrorCode MatAssemblyEnd_MPIRowbs_local(Mat AA,MatAssemblyType mode)
226: {
227: Mat_MPIRowbs *a = (Mat_MPIRowbs*)AA->data;
228: BSspmat *A = a->A;
229: BSsprow *vs;
230: int i,j,rstart = a->rstart;
233: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
235: /* Mark location of diagonal */
236: for (i=0; i<AA->m; i++) {
237: vs = A->rows[i];
238: for (j=0; j<vs->length; j++) {
239: if (vs->col[j] == i + rstart) {
240: vs->diag_ind = j;
241: break;
242: }
243: }
244: if (vs->diag_ind == -1) {
245: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"no diagonal entry");
246: }
247: }
248: return(0);
249: }
253: static PetscErrorCode MatZeroRows_MPIRowbs_local(Mat A,IS is,const PetscScalar *diag)
254: {
255: Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
256: BSspmat *l = a->A;
258: int i,N,*rz,m = A->m - 1,col,base=a->rowners[a->rank];
261: ISGetLocalSize(is,&N);
262: ISGetIndices(is,&rz);
263: if (a->keepzeroedrows) {
264: for (i=0; i<N; i++) {
265: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
266: PetscMemzero(l->rows[rz[i]]->nz,l->rows[rz[i]]->length*sizeof(PetscScalar));
267: if (diag) {
268: col=rz[i]+base;
269: MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,diag,INSERT_VALUES);
270: }
271: }
272: } else {
273: if (diag) {
274: for (i=0; i<N; i++) {
275: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
276: if (l->rows[rz[i]]->length > 0) { /* in case row was completely empty */
277: l->rows[rz[i]]->length = 1;
278: l->rows[rz[i]]->nz[0] = *diag;
279: l->rows[rz[i]]->col[0] = a->rstart + rz[i];
280: } else {
281: col=rz[i]+base;
282: MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,diag,INSERT_VALUES);
283: }
284: }
285: } else {
286: for (i=0; i<N; i++) {
287: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
288: l->rows[rz[i]]->length = 0;
289: }
290: }
291: A->same_nonzero = PETSC_FALSE;
292: }
293: ISRestoreIndices(is,&rz);
294: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
295: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
296: return(0);
297: }
301: static PetscErrorCode MatNorm_MPIRowbs_local(Mat A,NormType type,PetscReal *norm)
302: {
303: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
304: BSsprow *vs,**rs;
305: PetscScalar *xv;
306: PetscReal sum = 0.0;
308: int *xi,nz,i,j;
311: rs = mat->A->rows;
312: if (type == NORM_FROBENIUS) {
313: for (i=0; i<A->m; i++) {
314: vs = *rs++;
315: nz = vs->length;
316: xv = vs->nz;
317: while (nz--) {
318: #if defined(PETSC_USE_COMPLEX)
319: sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
320: #else
321: sum += (*xv)*(*xv); xv++;
322: #endif
323: }
324: }
325: *norm = sqrt(sum);
326: } else if (type == NORM_1) { /* max column norm */
327: PetscReal *tmp;
328: PetscMalloc(A->n*sizeof(PetscReal),&tmp);
329: PetscMemzero(tmp,A->n*sizeof(PetscReal));
330: *norm = 0.0;
331: for (i=0; i<A->m; i++) {
332: vs = *rs++;
333: nz = vs->length;
334: xi = vs->col;
335: xv = vs->nz;
336: while (nz--) {
337: tmp[*xi] += PetscAbsScalar(*xv);
338: xi++; xv++;
339: }
340: }
341: for (j=0; j<A->n; j++) {
342: if (tmp[j] > *norm) *norm = tmp[j];
343: }
344: PetscFree(tmp);
345: } else if (type == NORM_INFINITY) { /* max row norm */
346: *norm = 0.0;
347: for (i=0; i<A->m; i++) {
348: vs = *rs++;
349: nz = vs->length;
350: xv = vs->nz;
351: sum = 0.0;
352: while (nz--) {
353: sum += PetscAbsScalar(*xv); xv++;
354: }
355: if (sum > *norm) *norm = sum;
356: }
357: } else {
358: SETERRQ(PETSC_ERR_SUP,"No support for the two norm");
359: }
360: return(0);
361: }
363: /* ----------------------------------------------------------------- */
367: PetscErrorCode MatSetValues_MPIRowbs(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode av)
368: {
369: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
371: int i,j,row,col,rstart = a->rstart,rend = a->rend;
372: PetscTruth roworiented = a->roworiented;
375: /* Note: There's no need to "unscale" the matrix, since scaling is
376: confined to a->pA, and we're working with a->A here */
377: for (i=0; i<m; i++) {
378: if (im[i] < 0) continue;
379: if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
380: if (im[i] >= rstart && im[i] < rend) {
381: row = im[i] - rstart;
382: for (j=0; j<n; j++) {
383: if (in[j] < 0) continue;
384: if (in[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);
385: if (in[j] >= 0 && in[j] < mat->N){
386: col = in[j];
387: if (roworiented) {
388: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i*n+j,av);
389: } else {
390: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i+j*m,av);
391: }
392: } else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid column");}
393: }
394: } else {
395: if (!a->donotstash) {
396: if (roworiented) {
397: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
398: } else {
399: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
400: }
401: }
402: }
403: }
404: return(0);
405: }
409: PetscErrorCode MatAssemblyBegin_MPIRowbs(Mat mat,MatAssemblyType mode)
410: {
411: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
412: MPI_Comm comm = mat->comm;
414: int nstash,reallocs;
415: InsertMode addv;
418: /* Note: There's no need to "unscale" the matrix, since scaling is
419: confined to a->pA, and we're working with a->A here */
421: /* make sure all processors are either in INSERTMODE or ADDMODE */
422: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
423: if (addv == (ADD_VALUES|INSERT_VALUES)) {
424: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some procs inserted; others added");
425: }
426: mat->insertmode = addv; /* in case this processor had no cache */
428: MatStashScatterBegin_Private(&mat->stash,a->rowners);
429: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
430: PetscLogInfo(0,"MatAssemblyBegin_MPIRowbs:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
431: return(0);
432: }
436: static PetscErrorCode MatView_MPIRowbs_ASCII(Mat mat,PetscViewer viewer)
437: {
438: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
440: int i,j;
441: PetscTruth iascii;
442: BSspmat *A = a->A;
443: BSsprow **rs = A->rows;
444: PetscViewerFormat format;
447: PetscViewerGetFormat(viewer,&format);
448: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
450: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
451: int ind_l,ind_g,clq_l,clq_g,color;
452: ind_l = BSlocal_num_inodes(a->pA);CHKERRBS(0);
453: ind_g = BSglobal_num_inodes(a->pA);CHKERRBS(0);
454: clq_l = BSlocal_num_cliques(a->pA);CHKERRBS(0);
455: clq_g = BSglobal_num_cliques(a->pA);CHKERRBS(0);
456: color = BSnum_colors(a->pA);CHKERRBS(0);
457: PetscViewerASCIIPrintf(viewer," %d global inode(s), %d global clique(s), %d color(s)\n",ind_g,clq_g,color);
458: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d local inode(s), %d local clique(s)\n",a->rank,ind_l,clq_l);
459: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
460: for (i=0; i<A->num_rows; i++) {
461: PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
462: for (j=0; j<rs[i]->length; j++) {
463: if (rs[i]->nz[j]) {PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);}
464: }
465: PetscViewerASCIISynchronizedPrintf(viewer,"\n");
466: }
467: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
468: SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
469: } else {
470: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
471: for (i=0; i<A->num_rows; i++) {
472: PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
473: for (j=0; j<rs[i]->length; j++) {
474: PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);
475: }
476: PetscViewerASCIISynchronizedPrintf(viewer,"\n");
477: }
478: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
479: }
480: PetscViewerFlush(viewer);
481: return(0);
482: }
486: static PetscErrorCode MatView_MPIRowbs_Binary(Mat mat,PetscViewer viewer)
487: {
488: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
490: PetscMPIInt rank,size;
491: PetscInt i,M,m,*sbuff,*rowlengths;
492: PetscInt *recvcts,*recvdisp,fd,*cols,maxnz,nz,j;
493: BSspmat *A = a->A;
494: BSsprow **rs = A->rows;
495: MPI_Comm comm = mat->comm;
496: MPI_Status status;
497: PetscScalar *vals;
498: MatInfo info;
501: MPI_Comm_size(comm,&size);
502: MPI_Comm_rank(comm,&rank);
504: M = mat->M; m = mat->m;
505: /* First gather together on the first processor the lengths of
506: each row, and write them out to the file */
507: PetscMalloc(m*sizeof(int),&sbuff);
508: for (i=0; i<A->num_rows; i++) {
509: sbuff[i] = rs[i]->length;
510: }
511: MatGetInfo(mat,MAT_GLOBAL_SUM,&info);
512: if (!rank) {
513: PetscViewerBinaryGetDescriptor(viewer,&fd);
514: PetscMalloc((4+M)*sizeof(int),&rowlengths);
515: PetscMalloc(size*sizeof(int),&recvcts);
516: recvdisp = a->rowners;
517: for (i=0; i<size; i++) {
518: recvcts[i] = recvdisp[i+1] - recvdisp[i];
519: }
520: /* first four elements of rowlength are the header */
521: rowlengths[0] = mat->cookie;
522: rowlengths[1] = mat->M;
523: rowlengths[2] = mat->N;
524: rowlengths[3] = (int)info.nz_used;
525: MPI_Gatherv(sbuff,m,MPI_INT,rowlengths+4,recvcts,recvdisp,MPI_INT,0,comm);
526: PetscFree(sbuff);
527: PetscBinaryWrite(fd,rowlengths,4+M,PETSC_INT,PETSC_FALSE);
528: /* count the number of nonzeros on each processor */
529: PetscMemzero(recvcts,size*sizeof(int));
530: for (i=0; i<size; i++) {
531: for (j=recvdisp[i]; j<recvdisp[i+1]; j++) {
532: recvcts[i] += rowlengths[j+3];
533: }
534: }
535: /* allocate buffer long enough to hold largest one */
536: maxnz = 0;
537: for (i=0; i<size; i++) {
538: maxnz = PetscMax(maxnz,recvcts[i]);
539: }
540: PetscFree(rowlengths);
541: PetscFree(recvcts);
542: PetscMalloc(maxnz*sizeof(int),&cols);
544: /* binary store column indices for 0th processor */
545: nz = 0;
546: for (i=0; i<A->num_rows; i++) {
547: for (j=0; j<rs[i]->length; j++) {
548: cols[nz++] = rs[i]->col[j];
549: }
550: }
551: PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);
553: /* receive and store column indices for all other processors */
554: for (i=1; i<size; i++) {
555: /* should tell processor that I am now ready and to begin the send */
556: MPI_Recv(cols,maxnz,MPI_INT,i,mat->tag,comm,&status);
557: MPI_Get_count(&status,MPI_INT,&nz);
558: PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);
559: }
560: PetscFree(cols);
561: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
563: /* binary store values for 0th processor */
564: nz = 0;
565: for (i=0; i<A->num_rows; i++) {
566: for (j=0; j<rs[i]->length; j++) {
567: vals[nz++] = rs[i]->nz[j];
568: }
569: }
570: PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);
572: /* receive and store nonzeros for all other processors */
573: for (i=1; i<size; i++) {
574: /* should tell processor that I am now ready and to begin the send */
575: MPI_Recv(vals,maxnz,MPIU_SCALAR,i,mat->tag,comm,&status);
576: MPI_Get_count(&status,MPIU_SCALAR,&nz);
577: PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);
578: }
579: PetscFree(vals);
580: } else {
581: MPI_Gatherv(sbuff,m,MPI_INT,0,0,0,MPI_INT,0,comm);
582: PetscFree(sbuff);
584: /* count local nonzeros */
585: nz = 0;
586: for (i=0; i<A->num_rows; i++) {
587: for (j=0; j<rs[i]->length; j++) {
588: nz++;
589: }
590: }
591: /* copy into buffer column indices */
592: PetscMalloc(nz*sizeof(int),&cols);
593: nz = 0;
594: for (i=0; i<A->num_rows; i++) {
595: for (j=0; j<rs[i]->length; j++) {
596: cols[nz++] = rs[i]->col[j];
597: }
598: }
599: /* send */ /* should wait until processor zero tells me to go */
600: MPI_Send(cols,nz,MPI_INT,0,mat->tag,comm);
601: PetscFree(cols);
603: /* copy into buffer column values */
604: PetscMalloc(nz*sizeof(PetscScalar),&vals);
605: nz = 0;
606: for (i=0; i<A->num_rows; i++) {
607: for (j=0; j<rs[i]->length; j++) {
608: vals[nz++] = rs[i]->nz[j];
609: }
610: }
611: /* send */ /* should wait until processor zero tells me to go */
612: MPI_Send(vals,nz,MPIU_SCALAR,0,mat->tag,comm);
613: PetscFree(vals);
614: }
616: return(0);
617: }
621: PetscErrorCode MatView_MPIRowbs(Mat mat,PetscViewer viewer)
622: {
623: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
625: PetscTruth iascii,isbinary;
628: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
629: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
630: if (!bsif->blocksolveassembly) {
631: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
632: }
633: if (iascii) {
634: MatView_MPIRowbs_ASCII(mat,viewer);
635: } else if (isbinary) {
636: MatView_MPIRowbs_Binary(mat,viewer);
637: } else {
638: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIRowbs matrices",((PetscObject)viewer)->type_name);
639: }
640: return(0);
641: }
642:
645: static PetscErrorCode MatAssemblyEnd_MPIRowbs_MakeSymmetric(Mat mat)
646: {
647: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
648: BSspmat *A = a->A;
649: BSsprow *vs;
650: int size,rank,M,rstart,tag,i,j,*rtable,*w1,*w3,*w4,len,proc,nrqs;
651: int msz,*pa,bsz,nrqr,**rbuf1,**sbuf1,**ptr,*tmp,*ctr,col,idx,row;
653: int ctr_j,*sbuf1_j,k;
654: PetscScalar val=0.0;
655: MPI_Comm comm;
656: MPI_Request *s_waits1,*r_waits1;
657: MPI_Status *s_status,*r_status;
660: comm = mat->comm;
661: tag = mat->tag;
662: size = a->size;
663: rank = a->rank;
664: M = mat->M;
665: rstart = a->rstart;
667: PetscMalloc(M*sizeof(int),&rtable);
668: /* Create hash table for the mapping :row -> proc */
669: for (i=0,j=0; i<size; i++) {
670: len = a->rowners[i+1];
671: for (; j<len; j++) {
672: rtable[j] = i;
673: }
674: }
676: /* Evaluate communication - mesg to whom, length of mesg, and buffer space
677: required. Based on this, buffers are allocated, and data copied into them. */
678: PetscMalloc(size*4*sizeof(int),&w1);/* mesg size */
679: w3 = w1 + 2*size; /* no of IS that needs to be sent to proc i */
680: w4 = w3 + size; /* temp work space used in determining w1, w3 */
681: PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector */
683: for (i=0; i<mat->m; i++) {
684: PetscMemzero(w4,size*sizeof(int)); /* initialize work vector */
685: vs = A->rows[i];
686: for (j=0; j<vs->length; j++) {
687: proc = rtable[vs->col[j]];
688: w4[proc]++;
689: }
690: for (j=0; j<size; j++) {
691: if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;}
692: }
693: }
694:
695: nrqs = 0; /* number of outgoing messages */
696: msz = 0; /* total mesg length (for all proc */
697: w1[2*rank] = 0; /* no mesg sent to itself */
698: w3[rank] = 0;
699: for (i=0; i<size; i++) {
700: if (w1[2*i]) {w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
701: }
702: /* pa - is list of processors to communicate with */
703: PetscMalloc((nrqs+1)*sizeof(int),&pa);
704: for (i=0,j=0; i<size; i++) {
705: if (w1[2*i]) {pa[j] = i; j++;}
706: }
708: /* Each message would have a header = 1 + 2*(no of ROWS) + data */
709: for (i=0; i<nrqs; i++) {
710: j = pa[i];
711: w1[2*j] += w1[2*j+1] + 2*w3[j];
712: msz += w1[2*j];
713: }
714:
715: /* Do a global reduction to determine how many messages to expect */
716: PetscMaxSum(comm,w1,&bsz,&nrqr);
718: /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */
719: len = (nrqr+1)*sizeof(int*) + nrqr*bsz*sizeof(int);
720: PetscMalloc(len,&rbuf1);
721: rbuf1[0] = (int*)(rbuf1 + nrqr);
722: for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;
724: /* Post the receives */
725: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);
726: for (i=0; i<nrqr; ++i){
727: MPI_Irecv(rbuf1[i],bsz,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits1+i);
728: }
729:
730: /* Allocate Memory for outgoing messages */
731: len = 2*size*sizeof(int*) + (size+msz)*sizeof(int);
732: PetscMalloc(len,&sbuf1);
733: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
734: PetscMemzero(sbuf1,2*size*sizeof(int*));
735: tmp = (int*)(sbuf1 + 2*size);
736: ctr = tmp + msz;
738: {
739: int *iptr = tmp,ict = 0;
740: for (i=0; i<nrqs; i++) {
741: j = pa[i];
742: iptr += ict;
743: sbuf1[j] = iptr;
744: ict = w1[2*j];
745: }
746: }
748: /* Form the outgoing messages */
749: /* Clean up the header space */
750: for (i=0; i<nrqs; i++) {
751: j = pa[i];
752: sbuf1[j][0] = 0;
753: PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
754: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
755: }
757: /* Parse the matrix and copy the data into sbuf1 */
758: for (i=0; i<mat->m; i++) {
759: PetscMemzero(ctr,size*sizeof(int));
760: vs = A->rows[i];
761: for (j=0; j<vs->length; j++) {
762: col = vs->col[j];
763: proc = rtable[col];
764: if (proc != rank) { /* copy to the outgoing buffer */
765: ctr[proc]++;
766: *ptr[proc] = col;
767: ptr[proc]++;
768: } else {
769: row = col - rstart;
770: col = i + rstart;
771: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
772: }
773: }
774: /* Update the headers for the current row */
775: for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
776: if ((ctr_j = ctr[j])) {
777: sbuf1_j = sbuf1[j];
778: k = ++sbuf1_j[0];
779: sbuf1_j[2*k] = ctr_j;
780: sbuf1_j[2*k-1] = i + rstart;
781: }
782: }
783: }
784: /* Check Validity of the outgoing messages */
785: {
786: int sum;
787: for (i=0 ; i<nrqs ; i++) {
788: j = pa[i];
789: if (w3[j] != sbuf1[j][0]) {SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[1] mismatch!\n"); }
790: }
792: for (i=0 ; i<nrqs ; i++) {
793: j = pa[i];
794: sum = 1;
795: for (k = 1; k <= w3[j]; k++) sum += sbuf1[j][2*k]+2;
796: if (sum != w1[2*j]) { SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[2-n] mismatch!\n"); }
797: }
798: }
799:
800: /* Now post the sends */
801: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
802: for (i=0; i<nrqs; ++i) {
803: j = pa[i];
804: MPI_Isend(sbuf1[j],w1[2*j],MPI_INT,j,tag,comm,s_waits1+i);
805: }
806:
807: /* Receive messages*/
808: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status);
809: for (i=0; i<nrqr; ++i) {
810: MPI_Waitany(nrqr,r_waits1,&idx,r_status+i);
811: /* Process the Message */
812: {
813: int *rbuf1_i,n_row,ct1;
815: rbuf1_i = rbuf1[idx];
816: n_row = rbuf1_i[0];
817: ct1 = 2*n_row+1;
818: val = 0.0;
819: /* Optimise this later */
820: for (j=1; j<=n_row; j++) {
821: col = rbuf1_i[2*j-1];
822: for (k=0; k<rbuf1_i[2*j]; k++,ct1++) {
823: row = rbuf1_i[ct1] - rstart;
824: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
825: }
826: }
827: }
828: }
830: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
831: MPI_Waitall(nrqs,s_waits1,s_status);
833: PetscFree(rtable);
834: PetscFree(w1);
835: PetscFree(pa);
836: PetscFree(rbuf1);
837: PetscFree(sbuf1);
838: PetscFree(r_waits1);
839: PetscFree(s_waits1);
840: PetscFree(r_status);
841: PetscFree(s_status);
842: return(0);
843: }
845: /*
846: This does the BlockSolve portion of the matrix assembly.
847: It is provided in a seperate routine so that users can
848: operate on the matrix (using MatScale(), MatShift() etc.) after
849: the matrix has been assembled but before BlockSolve has sucked it
850: in and devoured it.
851: */
854: PetscErrorCode MatAssemblyEnd_MPIRowbs_ForBlockSolve(Mat mat)
855: {
856: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
858: int ldim,low,high,i;
859: PetscScalar *diag;
862: if ((mat->was_assembled) && (!mat->same_nonzero)) { /* Free the old info */
863: if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);}
864: if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);}
865: }
867: if ((!mat->same_nonzero) || (!mat->was_assembled)) {
868: /* Indicates bypassing cliques in coloring */
869: if (a->bs_color_single) {
870: BSctx_set_si(a->procinfo,100);
871: }
872: /* Form permuted matrix for efficient parallel execution */
873: a->pA = BSmain_perm(a->procinfo,a->A);CHKERRBS(0);
874: /* Set up the communication */
875: a->comm_pA = BSsetup_forward(a->pA,a->procinfo);CHKERRBS(0);
876: } else {
877: /* Repermute the matrix */
878: BSmain_reperm(a->procinfo,a->A,a->pA);CHKERRBS(0);
879: }
881: /* Symmetrically scale the matrix by the diagonal */
882: BSscale_diag(a->pA,a->pA->diag,a->procinfo);CHKERRBS(0);
884: /* Store inverse of square root of permuted diagonal scaling matrix */
885: VecGetLocalSize(a->diag,&ldim);
886: VecGetOwnershipRange(a->diag,&low,&high);
887: VecGetArray(a->diag,&diag);
888: for (i=0; i<ldim; i++) {
889: if (a->pA->scale_diag[i] != 0.0) {
890: diag[i] = 1.0/sqrt(PetscAbsScalar(a->pA->scale_diag[i]));
891: } else {
892: diag[i] = 1.0;
893: }
894: }
895: VecRestoreArray(a->diag,&diag);
896: a->assembled_icc_storage = a->A->icc_storage;
897: a->blocksolveassembly = 1;
898: mat->was_assembled = PETSC_TRUE;
899: mat->same_nonzero = PETSC_TRUE;
900: PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs_ForBlockSolve:Completed BlockSolve95 matrix assembly\n");
901: return(0);
902: }
906: PetscErrorCode MatAssemblyEnd_MPIRowbs(Mat mat,MatAssemblyType mode)
907: {
908: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
910: int i,n,row,col,*rows,*cols,rstart,nzcount,flg,j,ncols;
911: PetscScalar *vals,val;
912: InsertMode addv = mat->insertmode;
915: while (1) {
916: MatStashScatterGetMesg_Private(&mat->stash,&n,&rows,&cols,&vals,&flg);
917: if (!flg) break;
918:
919: for (i=0; i<n;) {
920: /* Now identify the consecutive vals belonging to the same row */
921: for (j=i,rstart=rows[j]; j<n; j++) { if (rows[j] != rstart) break; }
922: if (j < n) ncols = j-i;
923: else ncols = n-i;
924: /* Now assemble all these values with a single function call */
925: MatSetValues_MPIRowbs(mat,1,rows+i,ncols,cols+i,vals+i,addv);
926: i = j;
927: }
928: }
929: MatStashScatterEnd_Private(&mat->stash);
931: rstart = a->rstart;
932: nzcount = a->nz; /* This is the number of nonzeros entered by the user */
933: /* BlockSolve requires that the matrix is structurally symmetric */
934: if (mode == MAT_FINAL_ASSEMBLY && !mat->structurally_symmetric) {
935: MatAssemblyEnd_MPIRowbs_MakeSymmetric(mat);
936: }
937:
938: /* BlockSolve requires that all the diagonal elements are set */
939: val = 0.0;
940: for (i=0; i<mat->m; i++) {
941: row = i; col = i + rstart;
942: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
943: }
944:
945: MatAssemblyBegin_MPIRowbs_local(mat,mode);
946: MatAssemblyEnd_MPIRowbs_local(mat,mode);
947:
948: a->blocksolveassembly = 0;
949: PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs:Matrix size: %d X %d; storage space: %d unneeded,%d used\n",mat->m,mat->n,a->maxnz-a->nz,a->nz);
950: PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs: User entered %d nonzeros, PETSc added %d\n",nzcount,a->nz-nzcount);
951: PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs:Number of mallocs during MatSetValues is %d\n",a->reallocs);
952: return(0);
953: }
957: PetscErrorCode MatZeroEntries_MPIRowbs(Mat mat)
958: {
959: Mat_MPIRowbs *l = (Mat_MPIRowbs*)mat->data;
960: BSspmat *A = l->A;
961: BSsprow *vs;
962: int i,j;
965: for (i=0; i <mat->m; i++) {
966: vs = A->rows[i];
967: for (j=0; j< vs->length; j++) vs->nz[j] = 0.0;
968: }
969: return(0);
970: }
972: /* the code does not do the diagonal entries correctly unless the
973: matrix is square and the column and row owerships are identical.
974: This is a BUG.
975: */
979: PetscErrorCode MatZeroRows_MPIRowbs(Mat A,IS is,const PetscScalar *diag)
980: {
981: Mat_MPIRowbs *l = (Mat_MPIRowbs*)A->data;
983: int i,N,*rows,*owners = l->rowners,size = l->size;
984: int *nprocs,j,idx,nsends;
985: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
986: int *rvalues,tag = A->tag,count,base,slen,n,*source;
987: int *lens,imdex,*lrows,*values;
988: MPI_Comm comm = A->comm;
989: MPI_Request *send_waits,*recv_waits;
990: MPI_Status recv_status,*send_status;
991: IS istmp;
992: PetscTruth found;
995: ISGetLocalSize(is,&N);
996: ISGetIndices(is,&rows);
998: /* first count number of contributors to each processor */
999: PetscMalloc(2*size*sizeof(int),&nprocs);
1000: PetscMemzero(nprocs,2*size*sizeof(int));
1001: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
1002: for (i=0; i<N; i++) {
1003: idx = rows[i];
1004: found = PETSC_FALSE;
1005: for (j=0; j<size; j++) {
1006: if (idx >= owners[j] && idx < owners[j+1]) {
1007: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1008: }
1009: }
1010: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
1011: }
1012: nsends = 0; for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}
1014: /* inform other processors of number of messages and max length*/
1015: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
1017: /* post receives: */
1018: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1019: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1020: for (i=0; i<nrecvs; i++) {
1021: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1022: }
1024: /* do sends:
1025: 1) starts[i] gives the starting index in svalues for stuff going to
1026: the ith processor
1027: */
1028: PetscMalloc((N+1)*sizeof(int),&svalues);
1029: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1030: PetscMalloc((size+1)*sizeof(int),&starts);
1031: starts[0] = 0;
1032: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1033: for (i=0; i<N; i++) {
1034: svalues[starts[owner[i]]++] = rows[i];
1035: }
1036: ISRestoreIndices(is,&rows);
1038: starts[0] = 0;
1039: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1040: count = 0;
1041: for (i=0; i<size; i++) {
1042: if (nprocs[2*i+1]) {
1043: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1044: }
1045: }
1046: PetscFree(starts);
1048: base = owners[rank];
1050: /* wait on receives */
1051: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1052: source = lens + nrecvs;
1053: count = nrecvs; slen = 0;
1054: while (count) {
1055: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1056: /* unpack receives into our local space */
1057: MPI_Get_count(&recv_status,MPI_INT,&n);
1058: source[imdex] = recv_status.MPI_SOURCE;
1059: lens[imdex] = n;
1060: slen += n;
1061: count--;
1062: }
1063: PetscFree(recv_waits);
1064:
1065: /* move the data into the send scatter */
1066: PetscMalloc((slen+1)*sizeof(int),&lrows);
1067: count = 0;
1068: for (i=0; i<nrecvs; i++) {
1069: values = rvalues + i*nmax;
1070: for (j=0; j<lens[i]; j++) {
1071: lrows[count++] = values[j] - base;
1072: }
1073: }
1074: PetscFree(rvalues);
1075: PetscFree(lens);
1076: PetscFree(owner);
1077: PetscFree(nprocs);
1078:
1079: /* actually zap the local rows */
1080: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
1081: PetscLogObjectParent(A,istmp);
1082: PetscFree(lrows);
1083: MatZeroRows_MPIRowbs_local(A,istmp,diag);
1084: ISDestroy(istmp);
1086: /* wait on sends */
1087: if (nsends) {
1088: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1089: MPI_Waitall(nsends,send_waits,send_status);
1090: PetscFree(send_status);
1091: }
1092: PetscFree(send_waits);
1093: PetscFree(svalues);
1095: return(0);
1096: }
1100: PetscErrorCode MatNorm_MPIRowbs(Mat mat,NormType type,PetscReal *norm)
1101: {
1102: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1103: BSsprow *vs,**rs;
1104: PetscScalar *xv;
1105: PetscReal sum = 0.0;
1107: int *xi,nz,i,j;
1110: if (a->size == 1) {
1111: MatNorm_MPIRowbs_local(mat,type,norm);
1112: } else {
1113: rs = a->A->rows;
1114: if (type == NORM_FROBENIUS) {
1115: for (i=0; i<mat->m; i++) {
1116: vs = *rs++;
1117: nz = vs->length;
1118: xv = vs->nz;
1119: while (nz--) {
1120: #if defined(PETSC_USE_COMPLEX)
1121: sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
1122: #else
1123: sum += (*xv)*(*xv); xv++;
1124: #endif
1125: }
1126: }
1127: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1128: *norm = sqrt(*norm);
1129: } else if (type == NORM_1) { /* max column norm */
1130: PetscReal *tmp,*tmp2;
1131: PetscMalloc(mat->n*sizeof(PetscReal),&tmp);
1132: PetscMalloc(mat->n*sizeof(PetscReal),&tmp2);
1133: PetscMemzero(tmp,mat->n*sizeof(PetscReal));
1134: *norm = 0.0;
1135: for (i=0; i<mat->m; i++) {
1136: vs = *rs++;
1137: nz = vs->length;
1138: xi = vs->col;
1139: xv = vs->nz;
1140: while (nz--) {
1141: tmp[*xi] += PetscAbsScalar(*xv);
1142: xi++; xv++;
1143: }
1144: }
1145: MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1146: for (j=0; j<mat->n; j++) {
1147: if (tmp2[j] > *norm) *norm = tmp2[j];
1148: }
1149: PetscFree(tmp);
1150: PetscFree(tmp2);
1151: } else if (type == NORM_INFINITY) { /* max row norm */
1152: PetscReal ntemp = 0.0;
1153: for (i=0; i<mat->m; i++) {
1154: vs = *rs++;
1155: nz = vs->length;
1156: xv = vs->nz;
1157: sum = 0.0;
1158: while (nz--) {
1159: sum += PetscAbsScalar(*xv); xv++;
1160: }
1161: if (sum > ntemp) ntemp = sum;
1162: }
1163: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1164: } else {
1165: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1166: }
1167: }
1168: return(0);
1169: }
1173: PetscErrorCode MatMult_MPIRowbs(Mat mat,Vec xx,Vec yy)
1174: {
1175: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
1176: BSprocinfo *bspinfo = bsif->procinfo;
1177: PetscScalar *xxa,*xworka,*yya;
1181: if (!bsif->blocksolveassembly) {
1182: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1183: }
1185: /* Permute and apply diagonal scaling: [ xwork = D^{1/2} * x ] */
1186: if (!bsif->vecs_permscale) {
1187: VecGetArray(bsif->xwork,&xworka);
1188: VecGetArray(xx,&xxa);
1189: BSperm_dvec(xxa,xworka,bsif->pA->perm);CHKERRBS(0);
1190: VecRestoreArray(bsif->xwork,&xworka);
1191: VecRestoreArray(xx,&xxa);
1192: VecPointwiseDivide(bsif->xwork,bsif->diag,xx);
1193: }
1195: VecGetArray(xx,&xxa);
1196: VecGetArray(yy,&yya);
1197: /* Do lower triangular multiplication: [ y = L * xwork ] */
1198: if (bspinfo->single) {
1199: BSforward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1200: } else {
1201: BSforward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1202: }
1203:
1204: /* Do upper triangular multiplication: [ y = y + L^{T} * xwork ] */
1205: if (mat->symmetric) {
1206: if (bspinfo->single){
1207: BSbackward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1208: } else {
1209: BSbackward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1210: }
1211: }
1212: /* not needed for ILU version since forward does it all */
1213: VecRestoreArray(xx,&xxa);
1214: VecRestoreArray(yy,&yya);
1216: /* Apply diagonal scaling to vector: [ y = D^{1/2} * y ] */
1217: if (!bsif->vecs_permscale) {
1218: VecGetArray(bsif->xwork,&xworka);
1219: VecGetArray(xx,&xxa);
1220: BSiperm_dvec(xworka,xxa,bsif->pA->perm);CHKERRBS(0);
1221: VecRestoreArray(bsif->xwork,&xworka);
1222: VecRestoreArray(xx,&xxa);
1223: VecPointwiseDivide(yy,bsif->diag,bsif->xwork);
1224: VecGetArray(bsif->xwork,&xworka);
1225: VecGetArray(yy,&yya);
1226: BSiperm_dvec(xworka,yya,bsif->pA->perm);CHKERRBS(0);
1227: VecRestoreArray(bsif->xwork,&xworka);
1228: VecRestoreArray(yy,&yya);
1229: }
1230: PetscLogFlops(2*bsif->nz - mat->m);
1232: return(0);
1233: }
1237: PetscErrorCode MatMultAdd_MPIRowbs(Mat mat,Vec xx,Vec yy,Vec zz)
1238: {
1240: PetscScalar one = 1.0;
1243: (*mat->ops->mult)(mat,xx,zz);
1244: VecAXPY(&one,yy,zz);
1245: return(0);
1246: }
1250: PetscErrorCode MatGetInfo_MPIRowbs(Mat A,MatInfoType flag,MatInfo *info)
1251: {
1252: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
1253: PetscReal isend[5],irecv[5];
1257: info->rows_global = (double)A->M;
1258: info->columns_global = (double)A->N;
1259: info->rows_local = (double)A->m;
1260: info->columns_local = (double)A->N;
1261: info->block_size = 1.0;
1262: info->mallocs = (double)mat->reallocs;
1263: isend[0] = mat->nz; isend[1] = mat->maxnz; isend[2] = mat->maxnz - mat->nz;
1264: isend[3] = A->mem; isend[4] = info->mallocs;
1266: if (flag == MAT_LOCAL) {
1267: info->nz_used = isend[0];
1268: info->nz_allocated = isend[1];
1269: info->nz_unneeded = isend[2];
1270: info->memory = isend[3];
1271: info->mallocs = isend[4];
1272: } else if (flag == MAT_GLOBAL_MAX) {
1273: MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_MAX,A->comm);
1274: info->nz_used = irecv[0];
1275: info->nz_allocated = irecv[1];
1276: info->nz_unneeded = irecv[2];
1277: info->memory = irecv[3];
1278: info->mallocs = irecv[4];
1279: } else if (flag == MAT_GLOBAL_SUM) {
1280: MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_SUM,A->comm);
1281: info->nz_used = irecv[0];
1282: info->nz_allocated = irecv[1];
1283: info->nz_unneeded = irecv[2];
1284: info->memory = irecv[3];
1285: info->mallocs = irecv[4];
1286: }
1287: return(0);
1288: }
1292: PetscErrorCode MatGetDiagonal_MPIRowbs(Mat mat,Vec v)
1293: {
1294: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1295: BSsprow **rs = a->A->rows;
1297: int i,n;
1298: PetscScalar *x,zero = 0.0;
1301: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1302: if (!a->blocksolveassembly) {
1303: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1304: }
1306: VecSet(&zero,v);
1307: VecGetLocalSize(v,&n);
1308: if (n != mat->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
1309: VecGetArray(v,&x);
1310: for (i=0; i<mat->m; i++) {
1311: x[i] = rs[i]->nz[rs[i]->diag_ind];
1312: }
1313: VecRestoreArray(v,&x);
1314: return(0);
1315: }
1319: PetscErrorCode MatDestroy_MPIRowbs(Mat mat)
1320: {
1321: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1322: BSspmat *A = a->A;
1323: BSsprow *vs;
1325: int i;
1328: #if defined(PETSC_USE_LOG)
1329: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1330: #endif
1331: PetscFree(a->rowners);
1332: MatStashDestroy_Private(&mat->stash);
1333: if (a->bsmap) {
1334: if (a->bsmap->vlocal2global) {PetscFree(a->bsmap->vlocal2global);}
1335: if (a->bsmap->vglobal2local) {PetscFree(a->bsmap->vglobal2local);}
1336: if (a->bsmap->vglobal2proc) (*a->bsmap->free_g2p)(a->bsmap->vglobal2proc);
1337: PetscFree(a->bsmap);
1338: }
1340: if (A) {
1341: for (i=0; i<mat->m; i++) {
1342: vs = A->rows[i];
1343: MatFreeRowbs_Private(mat,vs->length,vs->col,vs->nz);
1344: }
1345: /* Note: A->map = a->bsmap is freed above */
1346: PetscFree(A->rows);
1347: PetscFree(A);
1348: }
1349: if (a->procinfo) {BSfree_ctx(a->procinfo);CHKERRBS(0);}
1350: if (a->diag) {VecDestroy(a->diag);}
1351: if (a->xwork) {VecDestroy(a->xwork);}
1352: if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);}
1353: if (a->fpA) {BSfree_copy_par_mat(a->fpA);CHKERRBS(0);}
1354: if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);}
1355: if (a->comm_fpA) {BSfree_comm(a->comm_fpA);CHKERRBS(0);}
1356: if (a->imax) {PetscFree(a->imax);}
1357: MPI_Comm_free(&(a->comm_mpirowbs));
1358: PetscFree(a);
1359: PetscObjectComposeFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C","",PETSC_NULL);
1360: return(0);
1361: }
1365: PetscErrorCode MatSetOption_MPIRowbs(Mat A,MatOption op)
1366: {
1367: Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
1370: switch (op) {
1371: case MAT_ROW_ORIENTED:
1372: a->roworiented = PETSC_TRUE;
1373: break;
1374: case MAT_COLUMN_ORIENTED:
1375: a->roworiented = PETSC_FALSE;
1376: break;
1377: case MAT_COLUMNS_SORTED:
1378: a->sorted = 1;
1379: break;
1380: case MAT_COLUMNS_UNSORTED:
1381: a->sorted = 0;
1382: break;
1383: case MAT_NO_NEW_NONZERO_LOCATIONS:
1384: a->nonew = 1;
1385: break;
1386: case MAT_YES_NEW_NONZERO_LOCATIONS:
1387: a->nonew = 0;
1388: break;
1389: case MAT_DO_NOT_USE_INODES:
1390: a->bs_color_single = 1;
1391: break;
1392: case MAT_YES_NEW_DIAGONALS:
1393: case MAT_ROWS_SORTED:
1394: case MAT_NEW_NONZERO_LOCATION_ERR:
1395: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1396: case MAT_ROWS_UNSORTED:
1397: case MAT_USE_HASH_TABLE:
1398: PetscLogInfo(A,"MatSetOption_MPIRowbs:Option ignored\n");
1399: break;
1400: case MAT_IGNORE_OFF_PROC_ENTRIES:
1401: a->donotstash = PETSC_TRUE;
1402: break;
1403: case MAT_NO_NEW_DIAGONALS:
1404: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1405: break;
1406: case MAT_KEEP_ZEROED_ROWS:
1407: a->keepzeroedrows = PETSC_TRUE;
1408: break;
1409: case MAT_SYMMETRIC:
1410: BSset_mat_symmetric(a->A,PETSC_TRUE);CHKERRBS(0);
1411: break;
1412: case MAT_STRUCTURALLY_SYMMETRIC:
1413: case MAT_NOT_SYMMETRIC:
1414: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1415: case MAT_HERMITIAN:
1416: case MAT_NOT_HERMITIAN:
1417: case MAT_SYMMETRY_ETERNAL:
1418: case MAT_NOT_SYMMETRY_ETERNAL:
1419: break;
1420: default:
1421: SETERRQ(PETSC_ERR_SUP,"unknown option");
1422: break;
1423: }
1424: return(0);
1425: }
1429: PetscErrorCode MatGetRow_MPIRowbs(Mat AA,int row,int *nz,int **idx,PetscScalar **v)
1430: {
1431: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
1432: BSspmat *A = mat->A;
1433: BSsprow *rs;
1434:
1436: if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1438: rs = A->rows[row - mat->rstart];
1439: *nz = rs->length;
1440: if (v) *v = rs->nz;
1441: if (idx) *idx = rs->col;
1442: return(0);
1443: }
1447: PetscErrorCode MatRestoreRow_MPIRowbs(Mat A,int row,int *nz,int **idx,PetscScalar **v)
1448: {
1450: return(0);
1451: }
1453: /* ------------------------------------------------------------------ */
1457: PetscErrorCode MatPrintHelp_MPIRowbs(Mat A)
1458: {
1459: static PetscTruth called = PETSC_FALSE;
1460: MPI_Comm comm = A->comm;
1464: if (called) {return(0);} else called = PETSC_TRUE;
1465: (*PetscHelpPrintf)(comm," Options for MATMPIROWBS matrix format (needed for BlockSolve):\n");
1466: (*PetscHelpPrintf)(comm," -mat_rowbs_no_inode - Do not use inodes\n");
1467: return(0);
1468: }
1472: PetscErrorCode MatSetUpPreallocation_MPIRowbs(Mat A)
1473: {
1477: MatMPIRowbsSetPreallocation(A,PETSC_DEFAULT,0);
1478: return(0);
1479: }
1481: /* -------------------------------------------------------------------*/
1482: static struct _MatOps MatOps_Values = {MatSetValues_MPIRowbs,
1483: MatGetRow_MPIRowbs,
1484: MatRestoreRow_MPIRowbs,
1485: MatMult_MPIRowbs,
1486: /* 4*/ MatMultAdd_MPIRowbs,
1487: MatMult_MPIRowbs,
1488: MatMultAdd_MPIRowbs,
1489: MatSolve_MPIRowbs,
1490: 0,
1491: 0,
1492: /*10*/ 0,
1493: 0,
1494: 0,
1495: 0,
1496: 0,
1497: /*15*/ MatGetInfo_MPIRowbs,
1498: 0,
1499: MatGetDiagonal_MPIRowbs,
1500: 0,
1501: MatNorm_MPIRowbs,
1502: /*20*/ MatAssemblyBegin_MPIRowbs,
1503: MatAssemblyEnd_MPIRowbs,
1504: 0,
1505: MatSetOption_MPIRowbs,
1506: MatZeroEntries_MPIRowbs,
1507: /*25*/ MatZeroRows_MPIRowbs,
1508: 0,
1509: MatLUFactorNumeric_MPIRowbs,
1510: 0,
1511: MatCholeskyFactorNumeric_MPIRowbs,
1512: /*30*/ MatSetUpPreallocation_MPIRowbs,
1513: MatILUFactorSymbolic_MPIRowbs,
1514: MatIncompleteCholeskyFactorSymbolic_MPIRowbs,
1515: 0,
1516: 0,
1517: /*35*/ 0,
1518: MatForwardSolve_MPIRowbs,
1519: MatBackwardSolve_MPIRowbs,
1520: 0,
1521: 0,
1522: /*40*/ 0,
1523: MatGetSubMatrices_MPIRowbs,
1524: 0,
1525: 0,
1526: 0,
1527: /*45*/ MatPrintHelp_MPIRowbs,
1528: MatScale_MPIRowbs,
1529: 0,
1530: 0,
1531: 0,
1532: /*50*/ 0,
1533: 0,
1534: 0,
1535: 0,
1536: 0,
1537: /*55*/ 0,
1538: 0,
1539: 0,
1540: 0,
1541: 0,
1542: /*60*/ MatGetSubMatrix_MPIRowbs,
1543: MatDestroy_MPIRowbs,
1544: MatView_MPIRowbs,
1545: MatGetPetscMaps_Petsc,
1546: MatUseScaledForm_MPIRowbs,
1547: /*65*/ MatScaleSystem_MPIRowbs,
1548: MatUnScaleSystem_MPIRowbs,
1549: 0,
1550: 0,
1551: 0,
1552: /*70*/ 0,
1553: 0,
1554: 0,
1555: 0,
1556: 0,
1557: /*75*/ 0,
1558: 0,
1559: 0,
1560: 0,
1561: 0,
1562: /*80*/ 0,
1563: 0,
1564: 0,
1565: 0,
1566: MatLoad_MPIRowbs,
1567: /*85*/ 0,
1568: 0,
1569: 0,
1570: 0,
1571: 0,
1572: /*90*/ 0,
1573: 0,
1574: 0,
1575: 0,
1576: 0,
1577: /*95*/ 0,
1578: 0,
1579: 0,
1580: 0};
1582: /* ------------------------------------------------------------------- */
1587: PetscErrorCode MatMPIRowbsSetPreallocation_MPIRowbs(Mat mat,int nz,const int nnz[])
1588: {
1592: mat->preallocated = PETSC_TRUE;
1593: MatCreateMPIRowbs_local(mat,nz,nnz);
1594: return(0);
1595: }
1598: /*MC
1599: MATMPIROWBS - MATMPIROWBS = "mpirowbs" - A matrix type providing ILU and ICC for distributed sparse matrices for use
1600: with the external package BlockSolve95. If BlockSolve95 is installed (see the manual for instructions
1601: on how to declare the existence of external packages), a matrix type can be constructed which invokes
1602: BlockSolve95 preconditioners and solvers.
1604: Options Database Keys:
1605: . -mat_type mpirowbs - sets the matrix type to "mpirowbs" during a call to MatSetFromOptions()
1607: Level: beginner
1609: .seealso: MatCreateMPIRowbs
1610: M*/
1615: PetscErrorCode MatCreate_MPIRowbs(Mat A)
1616: {
1617: Mat_MPIRowbs *a;
1618: BSmapping *bsmap;
1619: BSoff_map *bsoff;
1621: int i,*offset,m,M;
1622: PetscTruth flg1,flg2,flg3;
1623: BSprocinfo *bspinfo;
1624: MPI_Comm comm;
1625:
1627: comm = A->comm;
1628: m = A->m;
1629: M = A->M;
1631: PetscNew(Mat_MPIRowbs,&a);
1632: A->data = (void*)a;
1633: PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1634: A->factor = 0;
1635: A->mapping = 0;
1636: a->vecs_permscale = PETSC_FALSE;
1637: A->insertmode = NOT_SET_VALUES;
1638: a->blocksolveassembly = 0;
1639: a->keepzeroedrows = PETSC_FALSE;
1641: MPI_Comm_rank(comm,&a->rank);
1642: MPI_Comm_size(comm,&a->size);
1644: PetscSplitOwnership(comm,&m,&M);
1646: A->N = M;
1647: A->M = M;
1648: A->m = m;
1649: A->n = A->N; /* each row stores all columns */
1650: PetscMalloc((A->m+1)*sizeof(int),&a->imax);
1651: a->reallocs = 0;
1653: /* the information in the maps duplicates the information computed below, eventually
1654: we should remove the duplicate information that is not contained in the maps */
1655: PetscMapCreateMPI(comm,m,M,&A->rmap);
1656: PetscMapCreateMPI(comm,m,M,&A->cmap);
1658: /* build local table of row ownerships */
1659: PetscMalloc((a->size+2)*sizeof(int),&a->rowners);
1660: MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1661: a->rowners[0] = 0;
1662: for (i=2; i<=a->size; i++) {
1663: a->rowners[i] += a->rowners[i-1];
1664: }
1665: a->rstart = a->rowners[a->rank];
1666: a->rend = a->rowners[a->rank+1];
1667: PetscLogObjectMemory(A,(A->m+a->size+3)*sizeof(int));
1669: /* build cache for off array entries formed */
1670: MatStashCreate_Private(A->comm,1,&A->stash);
1671: a->donotstash = PETSC_FALSE;
1673: /* Initialize BlockSolve information */
1674: a->A = 0;
1675: a->pA = 0;
1676: a->comm_pA = 0;
1677: a->fpA = 0;
1678: a->comm_fpA = 0;
1679: a->alpha = 1.0;
1680: a->0;
1681: a->failures = 0;
1682: MPI_Comm_dup(A->comm,&(a->comm_mpirowbs));
1683: VecCreateMPI(A->comm,A->m,A->M,&(a->diag));
1684: VecDuplicate(a->diag,&(a->xwork));
1685: PetscLogObjectParent(A,a->diag); PetscLogObjectParent(A,a->xwork);
1686: PetscLogObjectMemory(A,(A->m+1)*sizeof(PetscScalar));
1687: bspinfo = BScreate_ctx();CHKERRBS(0);
1688: a->procinfo = bspinfo;
1689: BSctx_set_id(bspinfo,a->rank);CHKERRBS(0);
1690: BSctx_set_np(bspinfo,a->size);CHKERRBS(0);
1691: BSctx_set_ps(bspinfo,a->comm_mpirowbs);CHKERRBS(0);
1692: BSctx_set_cs(bspinfo,INT_MAX);CHKERRBS(0);
1693: BSctx_set_is(bspinfo,INT_MAX);CHKERRBS(0);
1694: BSctx_set_ct(bspinfo,IDO);CHKERRBS(0);
1695: #if defined(PETSC_USE_DEBUG)
1696: BSctx_set_err(bspinfo,1);CHKERRBS(0); /* BS error checking */
1697: #endif
1698: BSctx_set_rt(bspinfo,1);CHKERRBS(0);
1699: PetscOptionsHasName(PETSC_NULL,"-log_info",&flg1);
1700: if (flg1) {
1701: BSctx_set_pr(bspinfo,1);CHKERRBS(0);
1702: }
1703: PetscOptionsHasName(PETSC_NULL,"-pc_ilu_factorpointwise",&flg1);
1704: PetscOptionsHasName(PETSC_NULL,"-pc_icc_factorpointwise",&flg2);
1705: PetscOptionsHasName(PETSC_NULL,"-mat_rowbs_no_inode",&flg3);
1706: if (flg1 || flg2 || flg3) {
1707: BSctx_set_si(bspinfo,1);CHKERRBS(0);
1708: } else {
1709: BSctx_set_si(bspinfo,0);CHKERRBS(0);
1710: }
1711: #if defined(PETSC_USE_LOG)
1712: MLOG_INIT(); /* Initialize logging */
1713: #endif
1715: /* Compute global offsets */
1716: offset = &a->rstart;
1718: PetscNew(BSmapping,&a->bsmap);
1719: PetscLogObjectMemory(A,sizeof(BSmapping));
1720: bsmap = a->bsmap;
1721: PetscMalloc(sizeof(int),&bsmap->vlocal2global);
1722: *((int*)bsmap->vlocal2global) = (*offset);
1723: bsmap->flocal2global = BSloc2glob;
1724: bsmap->free_l2g = 0;
1725: PetscMalloc(sizeof(int),&bsmap->vglobal2local);
1726: *((int*)bsmap->vglobal2local) = (*offset);
1727: bsmap->fglobal2local = BSglob2loc;
1728: bsmap->free_g2l = 0;
1729: bsoff = BSmake_off_map(*offset,bspinfo,A->M);
1730: bsmap->vglobal2proc = (void*)bsoff;
1731: bsmap->fglobal2proc = BSglob2proc;
1732: bsmap->free_g2p = (void(*)(void*)) BSfree_off_map;
1733: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIRowbsSetPreallocation_C",
1734: "MatMPIRowbsSetPreallocation_MPIRowbs",
1735: MatMPIRowbsSetPreallocation_MPIRowbs);
1736: return(0);
1737: }
1742: /* @
1743: MatMPIRowbsSetPreallocation - Sets the number of expected nonzeros
1744: per row in the matrix.
1746: Input Parameter:
1747: + mat - matrix
1748: . nz - maximum expected for any row
1749: - nzz - number expected in each row
1751: Note:
1752: This routine is valid only for matrices stored in the MATMPIROWBS
1753: format.
1754: @ */
1755: PetscErrorCode MatMPIRowbsSetPreallocation(Mat mat,int nz,const int nnz[])
1756: {
1757: PetscErrorCode ierr,(*f)(Mat,int,const int[]);
1760: PetscObjectQueryFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C",(void (**)(void))&f);
1761: if (f) {
1762: (*f)(mat,nz,nnz);
1763: }
1764: return(0);
1765: }
1767: /* --------------- extra BlockSolve-specific routines -------------- */
1770: /* @
1771: MatGetBSProcinfo - Gets the BlockSolve BSprocinfo context, which the
1772: user can then manipulate to alter the default parameters.
1774: Input Parameter:
1775: mat - matrix
1777: Output Parameter:
1778: procinfo - processor information context
1780: Note:
1781: This routine is valid only for matrices stored in the MATMPIROWBS
1782: format.
1783: @ */
1784: PetscErrorCode MatGetBSProcinfo(Mat mat,BSprocinfo *procinfo)
1785: {
1786: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1787: PetscTruth ismpirowbs;
1791: PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);
1792: if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type");
1793: procinfo = a->procinfo;
1794: return(0);
1795: }
1799: PetscErrorCode MatLoad_MPIRowbs(PetscViewer viewer,const MatType type,Mat *newmat)
1800: {
1801: Mat_MPIRowbs *a;
1802: BSspmat *A;
1803: BSsprow **rs;
1804: Mat mat;
1806: int i,nz,j,rstart,rend,fd,*ourlens,*sndcounts = 0,*procsnz;
1807: int header[4],rank,size,*rowlengths = 0,M,m,*rowners,maxnz,*cols;
1808: PetscScalar *vals;
1809: MPI_Comm comm = ((PetscObject)viewer)->comm;
1810: MPI_Status status;
1813: MPI_Comm_size(comm,&size);
1814: MPI_Comm_rank(comm,&rank);
1815: if (!rank) {
1816: PetscViewerBinaryGetDescriptor(viewer,&fd);
1817: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1818: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Not matrix object");
1819: if (header[3] < 0) {
1820: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIRowbs");
1821: }
1822: }
1824: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1825: M = header[1];
1826: /* determine ownership of all rows */
1827: m = M/size + ((M % size) > rank);
1828: PetscMalloc((size+2)*sizeof(int),&rowners);
1829: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1830: rowners[0] = 0;
1831: for (i=2; i<=size; i++) {
1832: rowners[i] += rowners[i-1];
1833: }
1834: rstart = rowners[rank];
1835: rend = rowners[rank+1];
1837: /* distribute row lengths to all processors */
1838: PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1839: if (!rank) {
1840: PetscMalloc(M*sizeof(int),&rowlengths);
1841: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1842: PetscMalloc(size*sizeof(int),&sndcounts);
1843: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1844: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1845: PetscFree(sndcounts);
1846: } else {
1847: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1848: }
1850: /* create our matrix */
1851: MatCreate(comm,m,m,M,M,newmat);
1852: MatSetType(*newmat,type);
1853: MatMPIRowbsSetPreallocation(*newmat,0,ourlens);
1854: mat = *newmat;
1855: PetscFree(ourlens);
1857: a = (Mat_MPIRowbs*)mat->data;
1858: A = a->A;
1859: rs = A->rows;
1861: if (!rank) {
1862: /* calculate the number of nonzeros on each processor */
1863: PetscMalloc(size*sizeof(int),&procsnz);
1864: PetscMemzero(procsnz,size*sizeof(int));
1865: for (i=0; i<size; i++) {
1866: for (j=rowners[i]; j< rowners[i+1]; j++) {
1867: procsnz[i] += rowlengths[j];
1868: }
1869: }
1870: PetscFree(rowlengths);
1872: /* determine max buffer needed and allocate it */
1873: maxnz = 0;
1874: for (i=0; i<size; i++) {
1875: maxnz = PetscMax(maxnz,procsnz[i]);
1876: }
1877: PetscMalloc(maxnz*sizeof(int),&cols);
1879: /* read in my part of the matrix column indices */
1880: nz = procsnz[0];
1881: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1882:
1883: /* insert it into my part of matrix */
1884: nz = 0;
1885: for (i=0; i<A->num_rows; i++) {
1886: for (j=0; j<a->imax[i]; j++) {
1887: rs[i]->col[j] = cols[nz++];
1888: }
1889: rs[i]->length = a->imax[i];
1890: }
1891: /* read in parts for all other processors */
1892: for (i=1; i<size; i++) {
1893: nz = procsnz[i];
1894: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1895: MPI_Send(cols,nz,MPI_INT,i,mat->tag,comm);
1896: }
1897: PetscFree(cols);
1898: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1900: /* read in my part of the matrix numerical values */
1901: nz = procsnz[0];
1902: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1903:
1904: /* insert it into my part of matrix */
1905: nz = 0;
1906: for (i=0; i<A->num_rows; i++) {
1907: for (j=0; j<a->imax[i]; j++) {
1908: rs[i]->nz[j] = vals[nz++];
1909: }
1910: }
1911: /* read in parts for all other processors */
1912: for (i=1; i<size; i++) {
1913: nz = procsnz[i];
1914: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1915: MPI_Send(vals,nz,MPIU_SCALAR,i,mat->tag,comm);
1916: }
1917: PetscFree(vals);
1918: PetscFree(procsnz);
1919: } else {
1920: /* determine buffer space needed for message */
1921: nz = 0;
1922: for (i=0; i<A->num_rows; i++) {
1923: nz += a->imax[i];
1924: }
1925: PetscMalloc(nz*sizeof(int),&cols);
1927: /* receive message of column indices*/
1928: MPI_Recv(cols,nz,MPI_INT,0,mat->tag,comm,&status);
1929: MPI_Get_count(&status,MPI_INT,&maxnz);
1930: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");
1932: /* insert it into my part of matrix */
1933: nz = 0;
1934: for (i=0; i<A->num_rows; i++) {
1935: for (j=0; j<a->imax[i]; j++) {
1936: rs[i]->col[j] = cols[nz++];
1937: }
1938: rs[i]->length = a->imax[i];
1939: }
1940: PetscFree(cols);
1941: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1943: /* receive message of values*/
1944: MPI_Recv(vals,nz,MPIU_SCALAR,0,mat->tag,comm,&status);
1945: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1946: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");
1948: /* insert it into my part of matrix */
1949: nz = 0;
1950: for (i=0; i<A->num_rows; i++) {
1951: for (j=0; j<a->imax[i]; j++) {
1952: rs[i]->nz[j] = vals[nz++];
1953: }
1954: rs[i]->length = a->imax[i];
1955: }
1956: PetscFree(vals);
1957: }
1958: PetscFree(rowners);
1959: a->nz = a->maxnz;
1960: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1961: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
1962: return(0);
1963: }
1965: /*
1966: Special destroy and view routines for factored matrices
1967: */
1970: static PetscErrorCode MatDestroy_MPIRowbs_Factored(Mat mat)
1971: {
1973: #if defined(PETSC_USE_LOG)
1974: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1975: #endif
1976: return(0);
1977: }
1981: static PetscErrorCode MatView_MPIRowbs_Factored(Mat mat,PetscViewer viewer)
1982: {
1986: MatView((Mat) mat->data,viewer);
1987: return(0);
1988: }
1992: PetscErrorCode MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat mat,IS isrow,MatFactorInfo *info,Mat *newfact)
1993: {
1994: /* Note: f is not currently used in BlockSolve */
1995: Mat newmat;
1996: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1998: PetscTruth idn;
2001: if (isrow) {
2002: ISIdentity(isrow,&idn);
2003: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
2004: }
2006: if (!mat->symmetric) {
2007: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use incomplete Cholesky \n\
2008: preconditioning with a MATMPIROWBS matrix you must declare it to be \n\
2009: symmetric using the option MatSetOption(A,MAT_SYMMETRIC)");
2010: }
2012: /* If the icc_storage flag wasn't set before the last blocksolveassembly, */
2013: /* we must completely redo the assembly as a different storage format is required. */
2014: if (mbs->blocksolveassembly && !mbs->assembled_icc_storage) {
2015: mat->same_nonzero = PETSC_FALSE;
2016: mbs->blocksolveassembly = 0;
2017: }
2019: if (!mbs->blocksolveassembly) {
2020: BSset_mat_icc_storage(mbs->A,PETSC_TRUE);CHKERRBS(0);
2021: BSset_mat_symmetric(mbs->A,PETSC_TRUE);CHKERRBS(0);
2022: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
2023: }
2025: /* Copy permuted matrix */
2026: if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2027: mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);
2029: /* Set up the communication for factorization */
2030: if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2031: mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);
2033: /*
2034: Create a new Mat structure to hold the "factored" matrix,
2035: not this merely contains a pointer to the original matrix, since
2036: the original matrix contains the factor information.
2037: */
2038: PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2039: PetscLogObjectCreate(newmat);
2040: PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));
2042: newmat->data = (void*)mat;
2043: PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2044: newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2045: newmat->ops->view = MatView_MPIRowbs_Factored;
2046: newmat->factor = 1;
2047: newmat->preallocated = PETSC_TRUE;
2048: newmat->M = mat->M;
2049: newmat->N = mat->N;
2050: newmat->m = mat->m;
2051: newmat->n = mat->n;
2052: PetscStrallocpy(MATMPIROWBS,&newmat->type_name);
2054: *newfact = newmat;
2055: return(0);
2056: }
2060: PetscErrorCode MatILUFactorSymbolic_MPIRowbs(Mat mat,IS isrow,IS iscol,MatFactorInfo* info,Mat *newfact)
2061: {
2062: Mat newmat;
2063: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
2065: PetscTruth idn;
2068: if (info->levels) SETERRQ(PETSC_ERR_SUP,"Blocksolve ILU only supports 0 fill");
2069: if (isrow) {
2070: ISIdentity(isrow,&idn);
2071: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
2072: }
2073: if (iscol) {
2074: ISIdentity(iscol,&idn);
2075: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
2076: }
2078: if (!mbs->blocksolveassembly) {
2079: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
2080: }
2081:
2082: /* if (mat->symmetric) { */
2083: /* SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use ILU preconditioner with \n\ */
2084: /* MatCreateMPIRowbs() matrix you CANNOT declare it to be a symmetric matrix\n\ */
2085: /* using the option MatSetOption(A,MAT_SYMMETRIC)"); */
2086: /* } */
2088: /* Copy permuted matrix */
2089: if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2090: mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);
2092: /* Set up the communication for factorization */
2093: if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2094: mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);
2096: /*
2097: Create a new Mat structure to hold the "factored" matrix,
2098: not this merely contains a pointer to the original matrix, since
2099: the original matrix contains the factor information.
2100: */
2101: PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2102: PetscLogObjectCreate(newmat);
2103: PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));
2105: newmat->data = (void*)mat;
2106: PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2107: newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2108: newmat->ops->view = MatView_MPIRowbs_Factored;
2109: newmat->factor = 1;
2110: newmat->preallocated = PETSC_TRUE;
2111: newmat->M = mat->M;
2112: newmat->N = mat->N;
2113: newmat->m = mat->m;
2114: newmat->n = mat->n;
2115: PetscStrallocpy(MATMPIROWBS,&newmat->type_name);
2117: *newfact = newmat;
2118: return(0);
2119: }
2123: PetscErrorCode MatMPIRowbsGetColor(Mat mat,ISColoring *coloring)
2124: {
2130: if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2131: ISColoringCreate(mat->comm,mat->m,0,coloring);
2133: return(0);
2134: }
2138: /*@C
2139: MatCreateMPIRowbs - Creates a sparse parallel matrix in the MATMPIROWBS
2140: format. This format is intended primarily as an interface for BlockSolve95.
2142: Collective on MPI_Comm
2144: Input Parameters:
2145: + comm - MPI communicator
2146: . m - number of local rows (or PETSC_DECIDE to have calculated)
2147: . M - number of global rows (or PETSC_DECIDE to have calculated)
2148: . nz - number of nonzeros per row (same for all local rows)
2149: - nnz - number of nonzeros per row (possibly different for each row).
2151: Output Parameter:
2152: . newA - the matrix
2154: Notes:
2155: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2156: than it must be used on all processors that share the object for that argument.
2158: The user MUST specify either the local or global matrix dimensions
2159: (possibly both).
2161: Specify the preallocated storage with either nz or nnz (not both). Set
2162: nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2163: allocation.
2165: Notes:
2166: By default, the matrix is assumed to be nonsymmetric; the user can
2167: take advantage of special optimizations for symmetric matrices by calling
2168: $ MatSetOption(mat,MAT_SYMMETRIC)
2169: $ MatSetOption(mat,MAT_SYMMETRY_ETERNAL)
2170: BEFORE calling the routine MatAssemblyBegin().
2172: Internally, the MATMPIROWBS format inserts zero elements to the
2173: matrix if necessary, so that nonsymmetric matrices are considered
2174: to be symmetric in terms of their sparsity structure; this format
2175: is required for use of the parallel communication routines within
2176: BlockSolve95. In particular, if the matrix element A[i,j] exists,
2177: then PETSc will internally allocate a 0 value for the element
2178: A[j,i] during MatAssemblyEnd() if the user has not already set
2179: a value for the matrix element A[j,i].
2181: Options Database Keys:
2182: . -mat_rowbs_no_inode - Do not use inodes.
2184: Level: intermediate
2185:
2186: .keywords: matrix, row, symmetric, sparse, parallel, BlockSolve
2188: .seealso: MatCreate(), MatSetValues()
2189: @*/
2190: PetscErrorCode MatCreateMPIRowbs(MPI_Comm comm,int m,int M,int nz,const int nnz[],Mat *newA)
2191: {
2193:
2195: MatCreate(comm,m,m,M,M,newA);
2196: MatSetType(*newA,MATMPIROWBS);
2197: MatMPIRowbsSetPreallocation(*newA,nz,nnz);
2198: return(0);
2199: }
2202: /* -------------------------------------------------------------------------*/
2204: #include src/mat/impls/aij/seq/aij.h
2205: #include src/mat/impls/aij/mpi/mpiaij.h
2209: PetscErrorCode MatGetSubMatrices_MPIRowbs(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
2210: {
2212: int nmax,nstages_local,nstages,i,pos,max_no;
2216: /* Allocate memory to hold all the submatrices */
2217: if (scall != MAT_REUSE_MATRIX) {
2218: PetscMalloc((ismax+1)*sizeof(Mat),submat);
2219: }
2220:
2221: /* Determine the number of stages through which submatrices are done */
2222: nmax = 20*1000000 / (C->N * sizeof(int));
2223: if (!nmax) nmax = 1;
2224: nstages_local = ismax/nmax + ((ismax % nmax)?1:0);
2226: /* Make sure every processor loops through the nstages */
2227: MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);
2229: for (i=0,pos=0; i<nstages; i++) {
2230: if (pos+nmax <= ismax) max_no = nmax;
2231: else if (pos == ismax) max_no = 0;
2232: else max_no = ismax-pos;
2233: MatGetSubMatrices_MPIRowbs_Local(C,max_no,isrow+pos,iscol+pos,scall,*submat+pos);
2234: pos += max_no;
2235: }
2236: return(0);
2237: }
2238: /* -------------------------------------------------------------------------*/
2239: /* for now MatGetSubMatrices_MPIRowbs_Local get MPIAij submatrices of input
2240: matrix and preservs zeroes from structural symetry
2241: */
2244: PetscErrorCode MatGetSubMatrices_MPIRowbs_Local(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
2245: {
2246: Mat_MPIRowbs *c = (Mat_MPIRowbs *)(C->data);
2247: BSspmat *A = c->A;
2248: Mat_SeqAIJ *mat;
2250: int **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size;
2251: int **sbuf1,**sbuf2,rank,m,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
2252: int nrqs,msz,**ptr,idx,*req_size,*ctr,*pa,*tmp,tcol,nrqr;
2253: int **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2,**rmap;
2254: int **cmap,**lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax,*irow_i;
2255: int len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*cmap_i,*lens_i;
2256: int *rmap_i,tag0,tag1,tag2,tag3;
2257: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2258: MPI_Request *r_waits4,*s_waits3,*s_waits4;
2259: MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2260: MPI_Status *r_status3,*r_status4,*s_status4;
2261: MPI_Comm comm;
2262: FLOAT **rbuf4,**sbuf_aa,*vals,*sbuf_aa_i;
2263: PetscScalar *mat_a;
2264: PetscTruth sorted;
2265: int *onodes1,*olengths1;
2268: comm = C->comm;
2269: tag0 = C->tag;
2270: size = c->size;
2271: rank = c->rank;
2272: m = C->M;
2273:
2274: /* Get some new tags to keep the communication clean */
2275: PetscObjectGetNewTag((PetscObject)C,&tag1);
2276: PetscObjectGetNewTag((PetscObject)C,&tag2);
2277: PetscObjectGetNewTag((PetscObject)C,&tag3);
2279: /* Check if the col indices are sorted */
2280: for (i=0; i<ismax; i++) {
2281: ISSorted(isrow[i],&sorted);
2282: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2283: ISSorted(iscol[i],&sorted);
2284: /* if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); */
2285: }
2287: len = (2*ismax+1)*(sizeof(int*)+ sizeof(int)) + (m+1)*sizeof(int);
2288: PetscMalloc(len,&irow);
2289: icol = irow + ismax;
2290: nrow = (int*)(icol + ismax);
2291: ncol = nrow + ismax;
2292: rtable = ncol + ismax;
2294: for (i=0; i<ismax; i++) {
2295: ISGetIndices(isrow[i],&irow[i]);
2296: ISGetIndices(iscol[i],&icol[i]);
2297: ISGetLocalSize(isrow[i],&nrow[i]);
2298: ISGetLocalSize(iscol[i],&ncol[i]);
2299: }
2301: /* Create hash table for the mapping :row -> proc*/
2302: for (i=0,j=0; i<size; i++) {
2303: jmax = c->rowners[i+1];
2304: for (; j<jmax; j++) {
2305: rtable[j] = i;
2306: }
2307: }
2309: /* evaluate communication - mesg to who, length of mesg, and buffer space
2310: required. Based on this, buffers are allocated, and data copied into them*/
2311: PetscMalloc(size*4*sizeof(int),&w1); /* mesg size */
2312: w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/
2313: w3 = w2 + size; /* no of IS that needs to be sent to proc i */
2314: w4 = w3 + size; /* temp work space used in determining w1, w2, w3 */
2315: PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector*/
2316: for (i=0; i<ismax; i++) {
2317: PetscMemzero(w4,size*sizeof(int)); /* initialize work vector*/
2318: jmax = nrow[i];
2319: irow_i = irow[i];
2320: for (j=0; j<jmax; j++) {
2321: row = irow_i[j];
2322: proc = rtable[row];
2323: w4[proc]++;
2324: }
2325: for (j=0; j<size; j++) {
2326: if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
2327: }
2328: }
2329:
2330: nrqs = 0; /* no of outgoing messages */
2331: msz = 0; /* total mesg length (for all procs) */
2332: w1[rank] = 0; /* no mesg sent to self */
2333: w3[rank] = 0;
2334: for (i=0; i<size; i++) {
2335: if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2336: }
2337: PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2338: for (i=0,j=0; i<size; i++) {
2339: if (w1[i]) { pa[j] = i; j++; }
2340: }
2342: /* Each message would have a header = 1 + 2*(no of IS) + data */
2343: for (i=0; i<nrqs; i++) {
2344: j = pa[i];
2345: w1[j] += w2[j] + 2* w3[j];
2346: msz += w1[j];
2347: }
2349: /* Determine the number of messages to expect, their lengths, from from-ids */
2350: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2351: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2353: /* Now post the Irecvs corresponding to these messages */
2354: PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2355:
2356: PetscFree(onodes1);
2357: PetscFree(olengths1);
2358:
2359: /* Allocate Memory for outgoing messages */
2360: len = 2*size*sizeof(int*) + 2*msz*sizeof(int) + size*sizeof(int);
2361: PetscMalloc(len,&sbuf1);
2362: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
2363: PetscMemzero(sbuf1,2*size*sizeof(int*));
2364: /* allocate memory for outgoing data + buf to receive the first reply */
2365: tmp = (int*)(ptr + size);
2366: ctr = tmp + 2*msz;
2368: {
2369: int *iptr = tmp,ict = 0;
2370: for (i=0; i<nrqs; i++) {
2371: j = pa[i];
2372: iptr += ict;
2373: sbuf1[j] = iptr;
2374: ict = w1[j];
2375: }
2376: }
2378: /* Form the outgoing messages */
2379: /* Initialize the header space */
2380: for (i=0; i<nrqs; i++) {
2381: j = pa[i];
2382: sbuf1[j][0] = 0;
2383: PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
2384: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
2385: }
2386:
2387: /* Parse the isrow and copy data into outbuf */
2388: for (i=0; i<ismax; i++) {
2389: PetscMemzero(ctr,size*sizeof(int));
2390: irow_i = irow[i];
2391: jmax = nrow[i];
2392: for (j=0; j<jmax; j++) { /* parse the indices of each IS */
2393: row = irow_i[j];
2394: proc = rtable[row];
2395: if (proc != rank) { /* copy to the outgoing buf*/
2396: ctr[proc]++;
2397: *ptr[proc] = row;
2398: ptr[proc]++;
2399: }
2400: }
2401: /* Update the headers for the current IS */
2402: for (j=0; j<size; j++) { /* Can Optimise this loop too */
2403: if ((ctr_j = ctr[j])) {
2404: sbuf1_j = sbuf1[j];
2405: k = ++sbuf1_j[0];
2406: sbuf1_j[2*k] = ctr_j;
2407: sbuf1_j[2*k-1] = i;
2408: }
2409: }
2410: }
2412: /* Now post the sends */
2413: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2414: for (i=0; i<nrqs; ++i) {
2415: j = pa[i];
2416: MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2417: }
2419: /* Post Receives to capture the buffer size */
2420: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2421: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
2422: rbuf2[0] = tmp + msz;
2423: for (i=1; i<nrqs; ++i) {
2424: rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2425: }
2426: for (i=0; i<nrqs; ++i) {
2427: j = pa[i];
2428: MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2429: }
2431: /* Send to other procs the buf size they should allocate */
2432:
2434: /* Receive messages*/
2435: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
2436: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
2437: len = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
2438: PetscMalloc(len,&sbuf2);
2439: req_size = (int*)(sbuf2 + nrqr);
2440: req_source = req_size + nrqr;
2441:
2442: {
2443: BSsprow **sAi = A->rows;
2444: int id,rstart = c->rstart;
2445: int *sbuf2_i;
2447: for (i=0; i<nrqr; ++i) {
2448: MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
2449: req_size[idx] = 0;
2450: rbuf1_i = rbuf1[idx];
2451: start = 2*rbuf1_i[0] + 1;
2452: MPI_Get_count(r_status1+i,MPI_INT,&end);
2453: PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
2454: sbuf2_i = sbuf2[idx];
2455: for (j=start; j<end; j++) {
2456: id = rbuf1_i[j] - rstart;
2457: ncols = (sAi[id])->length;
2458: sbuf2_i[j] = ncols;
2459: req_size[idx] += ncols;
2460: }
2461: req_source[idx] = r_status1[i].MPI_SOURCE;
2462: /* form the header */
2463: sbuf2_i[0] = req_size[idx];
2464: for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
2465: MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
2466: }
2467: }
2468: PetscFree(r_status1);
2469: PetscFree(r_waits1);
2471: /* recv buffer sizes */
2472: /* Receive messages*/
2473:
2474: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
2475: PetscMalloc((nrqs+1)*sizeof(FLOAT *),&rbuf4);
2476: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
2477: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
2478: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);
2480: for (i=0; i<nrqs; ++i) {
2481: MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
2482: PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
2483: PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
2484: MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
2485: MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
2486: }
2487: PetscFree(r_status2);
2488: PetscFree(r_waits2);
2489:
2490: /* Wait on sends1 and sends2 */
2491: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
2492: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);
2494: MPI_Waitall(nrqs,s_waits1,s_status1);
2495: MPI_Waitall(nrqr,s_waits2,s_status2);
2496: PetscFree(s_status1);
2497: PetscFree(s_status2);
2498: PetscFree(s_waits1);
2499: PetscFree(s_waits2);
2501: /* Now allocate buffers for a->j, and send them off */
2502: PetscMalloc((nrqr+1)*sizeof(int*),&sbuf_aj);
2503: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2504: PetscMalloc((j+1)*sizeof(int),&sbuf_aj[0]);
2505: for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
2506:
2507: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
2508: {
2509: BSsprow *brow;
2510: int *Acol;
2511: int rstart = c->rstart;
2513: for (i=0; i<nrqr; i++) {
2514: rbuf1_i = rbuf1[i];
2515: sbuf_aj_i = sbuf_aj[i];
2516: ct1 = 2*rbuf1_i[0] + 1;
2517: ct2 = 0;
2518: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2519: kmax = rbuf1[i][2*j];
2520: for (k=0; k<kmax; k++,ct1++) {
2521: brow = A->rows[rbuf1_i[ct1] - rstart];
2522: ncols = brow->length;
2523: Acol = brow->col;
2524: /* load the column indices for this row into cols*/
2525: cols = sbuf_aj_i + ct2;
2526: PetscMemcpy(cols,Acol,ncols*sizeof(int));
2527: /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with
2528: mappings?? */
2529: ct2 += ncols;
2530: }
2531: }
2532: MPI_Isend(sbuf_aj_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
2533: }
2534: }
2535: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
2536: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);
2538: /* Allocate buffers for a->a, and send them off */
2539: PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf_aa);
2540: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2541: PetscMalloc((j+1)*sizeof(FLOAT),&sbuf_aa[0]);
2542: for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
2543:
2544: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
2545: {
2546: BSsprow *brow;
2547: FLOAT *Aval;
2548: int rstart = c->rstart;
2549:
2550: for (i=0; i<nrqr; i++) {
2551: rbuf1_i = rbuf1[i];
2552: sbuf_aa_i = sbuf_aa[i];
2553: ct1 = 2*rbuf1_i[0]+1;
2554: ct2 = 0;
2555: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2556: kmax = rbuf1_i[2*j];
2557: for (k=0; k<kmax; k++,ct1++) {
2558: brow = A->rows[rbuf1_i[ct1] - rstart];
2559: ncols = brow->length;
2560: Aval = brow->nz;
2561: /* load the column values for this row into vals*/
2562: vals = sbuf_aa_i+ct2;
2563: PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
2564: ct2 += ncols;
2565: }
2566: }
2567: MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
2568: }
2569: }
2570: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
2571: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
2572: PetscFree(rbuf1);
2574: /* Form the matrix */
2575: /* create col map */
2576: {
2577: int *icol_i;
2578:
2579: len = (1+ismax)*sizeof(int*)+ ismax*C->N*sizeof(int);
2580: PetscMalloc(len,&cmap);
2581: cmap[0] = (int*)(cmap + ismax);
2582: PetscMemzero(cmap[0],(1+ismax*C->N)*sizeof(int));
2583: for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + C->N; }
2584: for (i=0; i<ismax; i++) {
2585: jmax = ncol[i];
2586: icol_i = icol[i];
2587: cmap_i = cmap[i];
2588: for (j=0; j<jmax; j++) {
2589: cmap_i[icol_i[j]] = j+1;
2590: }
2591: }
2592: }
2594: /* Create lens which is required for MatCreate... */
2595: for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
2596: len = (1+ismax)*sizeof(int*)+ j*sizeof(int);
2597: PetscMalloc(len,&lens);
2598: lens[0] = (int*)(lens + ismax);
2599: PetscMemzero(lens[0],j*sizeof(int));
2600: for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
2601:
2602: /* Update lens from local data */
2603: { BSsprow *Arow;
2604: for (i=0; i<ismax; i++) {
2605: jmax = nrow[i];
2606: cmap_i = cmap[i];
2607: irow_i = irow[i];
2608: lens_i = lens[i];
2609: for (j=0; j<jmax; j++) {
2610: row = irow_i[j];
2611: proc = rtable[row];
2612: if (proc == rank) {
2613: Arow=A->rows[row-c->rstart];
2614: ncols=Arow->length;
2615: cols=Arow->col;
2616: for (k=0; k<ncols; k++) {
2617: if (cmap_i[cols[k]]) { lens_i[j]++;}
2618: }
2619: }
2620: }
2621: }
2622: }
2623:
2624: /* Create row map*/
2625: len = (1+ismax)*sizeof(int*)+ ismax*C->M*sizeof(int);
2626: PetscMalloc(len,&rmap);
2627: rmap[0] = (int*)(rmap + ismax);
2628: PetscMemzero(rmap[0],ismax*C->M*sizeof(int));
2629: for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->M;}
2630: for (i=0; i<ismax; i++) {
2631: rmap_i = rmap[i];
2632: irow_i = irow[i];
2633: jmax = nrow[i];
2634: for (j=0; j<jmax; j++) {
2635: rmap_i[irow_i[j]] = j;
2636: }
2637: }
2638:
2639: /* Update lens from offproc data */
2640: {
2641: int *rbuf2_i,*rbuf3_i,*sbuf1_i;
2643: for (tmp2=0; tmp2<nrqs; tmp2++) {
2644: MPI_Waitany(nrqs,r_waits3,&i,r_status3+tmp2);
2645: idx = pa[i];
2646: sbuf1_i = sbuf1[idx];
2647: jmax = sbuf1_i[0];
2648: ct1 = 2*jmax+1;
2649: ct2 = 0;
2650: rbuf2_i = rbuf2[i];
2651: rbuf3_i = rbuf3[i];
2652: for (j=1; j<=jmax; j++) {
2653: is_no = sbuf1_i[2*j-1];
2654: max1 = sbuf1_i[2*j];
2655: lens_i = lens[is_no];
2656: cmap_i = cmap[is_no];
2657: rmap_i = rmap[is_no];
2658: for (k=0; k<max1; k++,ct1++) {
2659: row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
2660: max2 = rbuf2_i[ct1];
2661: for (l=0; l<max2; l++,ct2++) {
2662: if (cmap_i[rbuf3_i[ct2]]) {
2663: lens_i[row]++;
2664: }
2665: }
2666: }
2667: }
2668: }
2669: }
2670: PetscFree(r_status3);
2671: PetscFree(r_waits3);
2672: MPI_Waitall(nrqr,s_waits3,s_status3);
2673: PetscFree(s_status3);
2674: PetscFree(s_waits3);
2676: /* Create the submatrices */
2677: if (scall == MAT_REUSE_MATRIX) {
2678: PetscTruth same;
2679:
2680: /*
2681: Assumes new rows are same length as the old rows,hence bug!
2682: */
2683: for (i=0; i<ismax; i++) {
2684: PetscTypeCompare((PetscObject)(submats[i]),MATSEQAIJ,&same);
2685: if (same == PETSC_FALSE) {
2686: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
2687: }
2688: mat = (Mat_SeqAIJ*)(submats[i]->data);
2689: if ((submats[i]->m != nrow[i]) || (submats[i]->n != ncol[i])) {
2690: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
2691: }
2692: PetscMemcmp(mat->ilen,lens[i],submats[i]->m*sizeof(int),&same);
2693: if (same == PETSC_FALSE) {
2694: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
2695: }
2696: /* Initial matrix as if empty */
2697: PetscMemzero(mat->ilen,submats[i]->m*sizeof(int));
2698: submats[i]->factor = C->factor;
2699: }
2700: } else {
2701: for (i=0; i<ismax; i++) {
2702: /* Here we want to explicitly generate SeqAIJ matrices */
2703: MatCreate(PETSC_COMM_SELF,nrow[i],ncol[i],nrow[i],ncol[i],submats+i);
2704: MatSetType(submats[i],MATSEQAIJ);
2705: MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
2706: }
2707: }
2709: /* Assemble the matrices */
2710: /* First assemble the local rows */
2711: {
2712: int ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
2713: PetscScalar *imat_a;
2714: BSsprow *Arow;
2715:
2716: for (i=0; i<ismax; i++) {
2717: mat = (Mat_SeqAIJ*)submats[i]->data;
2718: imat_ilen = mat->ilen;
2719: imat_j = mat->j;
2720: imat_i = mat->i;
2721: imat_a = mat->a;
2722: cmap_i = cmap[i];
2723: rmap_i = rmap[i];
2724: irow_i = irow[i];
2725: jmax = nrow[i];
2726: for (j=0; j<jmax; j++) {
2727: row = irow_i[j];
2728: proc = rtable[row];
2729: if (proc == rank) {
2730: old_row = row;
2731: row = rmap_i[row];
2732: ilen_row = imat_ilen[row];
2733:
2734: Arow=A->rows[old_row-c->rstart];
2735: ncols=Arow->length;
2736: cols=Arow->col;
2737: vals=Arow->nz;
2738:
2739: mat_i = imat_i[row];
2740: mat_a = imat_a + mat_i;
2741: mat_j = imat_j + mat_i;
2742: for (k=0; k<ncols; k++) {
2743: if ((tcol = cmap_i[cols[k]])) {
2744: *mat_j++ = tcol - 1;
2745: *mat_a++ = (PetscScalar)vals[k];
2746: ilen_row++;
2747: }
2748: }
2749: imat_ilen[row] = ilen_row;
2750: }
2751: }
2752: }
2753: }
2755: /* Now assemble the off proc rows*/
2756: {
2757: int *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
2758: int *imat_j,*imat_i;
2759: PetscScalar *imat_a;
2760: FLOAT *rbuf4_i;
2761:
2762: for (tmp2=0; tmp2<nrqs; tmp2++) {
2763: MPI_Waitany(nrqs,r_waits4,&i,r_status4+tmp2);
2764: idx = pa[i];
2765: sbuf1_i = sbuf1[idx];
2766: jmax = sbuf1_i[0];
2767: ct1 = 2*jmax + 1;
2768: ct2 = 0;
2769: rbuf2_i = rbuf2[i];
2770: rbuf3_i = rbuf3[i];
2771: rbuf4_i = rbuf4[i];
2772: for (j=1; j<=jmax; j++) {
2773: is_no = sbuf1_i[2*j-1];
2774: rmap_i = rmap[is_no];
2775: cmap_i = cmap[is_no];
2776: mat = (Mat_SeqAIJ*)submats[is_no]->data;
2777: imat_ilen = mat->ilen;
2778: imat_j = mat->j;
2779: imat_i = mat->i;
2780: imat_a = mat->a;
2781: max1 = sbuf1_i[2*j];
2782: for (k=0; k<max1; k++,ct1++) {
2783: row = sbuf1_i[ct1];
2784: row = rmap_i[row];
2785: ilen = imat_ilen[row];
2786: mat_i = imat_i[row];
2787: mat_a = imat_a + mat_i;
2788: mat_j = imat_j + mat_i;
2789: max2 = rbuf2_i[ct1];
2790: for (l=0; l<max2; l++,ct2++) {
2791: if ((tcol = cmap_i[rbuf3_i[ct2]])) {
2792: *mat_j++ = tcol - 1;
2793: *mat_a++ = (PetscScalar)rbuf4_i[ct2];
2794: ilen++;
2795: }
2796: }
2797: imat_ilen[row] = ilen;
2798: }
2799: }
2800: }
2801: }
2802: PetscFree(r_status4);
2803: PetscFree(r_waits4);
2804: MPI_Waitall(nrqr,s_waits4,s_status4);
2805: PetscFree(s_waits4);
2806: PetscFree(s_status4);
2808: /* Restore the indices */
2809: for (i=0; i<ismax; i++) {
2810: ISRestoreIndices(isrow[i],irow+i);
2811: ISRestoreIndices(iscol[i],icol+i);
2812: }
2814: /* Destroy allocated memory */
2815: PetscFree(irow);
2816: PetscFree(w1);
2817: PetscFree(pa);
2819: PetscFree(sbuf1);
2820: PetscFree(rbuf2);
2821: for (i=0; i<nrqr; ++i) {
2822: PetscFree(sbuf2[i]);
2823: }
2824: for (i=0; i<nrqs; ++i) {
2825: PetscFree(rbuf3[i]);
2826: PetscFree(rbuf4[i]);
2827: }
2829: PetscFree(sbuf2);
2830: PetscFree(rbuf3);
2831: PetscFree(rbuf4);
2832: PetscFree(sbuf_aj[0]);
2833: PetscFree(sbuf_aj);
2834: PetscFree(sbuf_aa[0]);
2835: PetscFree(sbuf_aa);
2836:
2837: PetscFree(cmap);
2838: PetscFree(rmap);
2839: PetscFree(lens);
2841: for (i=0; i<ismax; i++) {
2842: MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
2843: MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
2844: }
2845: return(0);
2846: }
2848: /*
2849: can be optimized by send only non-zeroes in iscol IS -
2850: so prebuild submatrix on sending side including A,B partitioning
2851: */
2854: #include src/vec/is/impls/general/general.h
2855: PetscErrorCode MatGetSubMatrix_MPIRowbs(Mat C,IS isrow,IS iscol,int csize,MatReuse scall,Mat *submat)
2856: {
2857: Mat_MPIRowbs *c = (Mat_MPIRowbs*)C->data;
2858: BSspmat *A = c->A;
2859: BSsprow *Arow;
2860: Mat_SeqAIJ *matA,*matB; /* on prac , off proc part of submat */
2861: Mat_MPIAIJ *mat; /* submat->data */
2863: int *irow,*icol,nrow,ncol,*rtable,size,rank,tag0,tag1,tag2,tag3;
2864: int *w1,*w2,*pa,nrqs,nrqr,msz,row_t;
2865: int i,j,k,l,len,jmax,proc,idx;
2866: int **sbuf1,**sbuf2,**rbuf1,**rbuf2,*req_size,**sbuf3,**rbuf3;
2867: FLOAT **rbuf4,**sbuf4; /* FLOAT is from Block Solve 95 library */
2869: int *cmap,*rmap,nlocal,*o_nz,*d_nz,cstart,cend;
2870: int *req_source;
2871: int ncols_t;
2872:
2873:
2874: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2875: MPI_Request *r_waits4,*s_waits3,*s_waits4;
2876:
2877: MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2878: MPI_Status *r_status3,*r_status4,*s_status4;
2879: MPI_Comm comm;
2883: comm = C->comm;
2884: tag0 = C->tag;
2885: size = c->size;
2886: rank = c->rank;
2888: if (size==1) {
2889: if (scall == MAT_REUSE_MATRIX) {
2890: ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_REUSE_MATRIX,&submat);
2891: return(0);
2892: } else {
2893: Mat *newsubmat;
2894:
2895: ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&newsubmat);
2896: *submat=*newsubmat;
2897: ierr=PetscFree(newsubmat);
2898: return(0);
2899: }
2900: }
2901:
2902: /* Get some new tags to keep the communication clean */
2903: PetscObjectGetNewTag((PetscObject)C,&tag1);
2904: PetscObjectGetNewTag((PetscObject)C,&tag2);
2905: PetscObjectGetNewTag((PetscObject)C,&tag3);
2907: /* Check if the col indices are sorted */
2908: {PetscTruth sorted;
2909: ISSorted(isrow,&sorted);
2910: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2911: ISSorted(iscol,&sorted);
2912: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted");
2913: }
2914:
2915: ISGetIndices(isrow,&irow);
2916: ISGetIndices(iscol,&icol);
2917: ISGetLocalSize(isrow,&nrow);
2918: ISGetLocalSize(iscol,&ncol);
2919:
2920: if (!isrow) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty ISrow");
2921: if (!iscol) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty IScol");
2922:
2923:
2924: len = (C->M+1)*sizeof(int);
2925: PetscMalloc(len,&rtable);
2926: /* Create hash table for the mapping :row -> proc*/
2927: for (i=0,j=0; i<size; i++) {
2928: jmax = c->rowners[i+1];
2929: for (; j<jmax; j++) {
2930: rtable[j] = i;
2931: }
2932: }
2934: /* evaluate communication - mesg to who, length of mesg, and buffer space
2935: required. Based on this, buffers are allocated, and data copied into them*/
2936: PetscMalloc(size*2*sizeof(int),&w1); /* mesg size */
2937: w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/
2938: PetscMemzero(w1,size*2*sizeof(int)); /* initialize work vector*/
2939: for (j=0; j<nrow; j++) {
2940: row_t = irow[j];
2941: proc = rtable[row_t];
2942: w1[proc]++;
2943: }
2944: nrqs = 0; /* no of outgoing messages */
2945: msz = 0; /* total mesg length (for all procs) */
2946: w1[rank] = 0; /* no mesg sent to self */
2947: for (i=0; i<size; i++) {
2948: if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2949: }
2950:
2951: PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2952: for (i=0,j=0; i<size; i++) {
2953: if (w1[i]) {
2954: pa[j++] = i;
2955: w1[i]++; /* header for return data */
2956: msz+=w1[i];
2957: }
2958: }
2959:
2960: {int *onodes1,*olengths1;
2961: /* Determine the number of messages to expect, their lengths, from from-ids */
2962: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2963: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2964: /* Now post the Irecvs corresponding to these messages */
2965: PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2966: PetscFree(onodes1);
2967: PetscFree(olengths1);
2968: }
2969:
2970: { int **ptr,*iptr,*tmp;
2971: /* Allocate Memory for outgoing messages */
2972: len = 2*size*sizeof(int*) + msz*sizeof(int);
2973: PetscMalloc(len,&sbuf1);
2974: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
2975: PetscMemzero(sbuf1,2*size*sizeof(int*));
2976: /* allocate memory for outgoing data + buf to receive the first reply */
2977: tmp = (int*)(ptr + size);
2979: for (i=0,iptr=tmp; i<nrqs; i++) {
2980: j = pa[i];
2981: sbuf1[j] = iptr;
2982: iptr += w1[j];
2983: }
2985: /* Form the outgoing messages */
2986: for (i=0; i<nrqs; i++) {
2987: j = pa[i];
2988: sbuf1[j][0] = 0; /*header */
2989: ptr[j] = sbuf1[j] + 1;
2990: }
2991:
2992: /* Parse the isrow and copy data into outbuf */
2993: for (j=0; j<nrow; j++) {
2994: row_t = irow[j];
2995: proc = rtable[row_t];
2996: if (proc != rank) { /* copy to the outgoing buf*/
2997: sbuf1[proc][0]++;
2998: *ptr[proc] = row_t;
2999: ptr[proc]++;
3000: }
3001: }
3002: } /* block */
3004: /* Now post the sends */
3005:
3006: /* structure of sbuf1[i]/rbuf1[i] : 1 (num of rows) + nrow-local rows (nuberes
3007: * of requested rows)*/
3009: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
3010: for (i=0; i<nrqs; ++i) {
3011: j = pa[i];
3012: MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
3013: }
3015: /* Post Receives to capture the buffer size */
3016: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
3017: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
3018: PetscMalloc(msz*sizeof(int)+1,&(rbuf2[0]));
3019: for (i=1; i<nrqs; ++i) {
3020: rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
3021: }
3022: for (i=0; i<nrqs; ++i) {
3023: j = pa[i];
3024: MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
3025: }
3027: /* Send to other procs the buf size they should allocate */
3028: /* structure of sbuf2[i]/rbuf2[i]: 1 (total size to allocate) + nrow-locrow
3029: * (row sizes) */
3031: /* Receive messages*/
3032: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
3033: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
3034: len = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
3035: PetscMalloc(len,&sbuf2);
3036: req_size = (int*)(sbuf2 + nrqr);
3037: req_source = req_size + nrqr;
3038:
3039: {
3040: BSsprow **sAi = A->rows;
3041: int id,rstart = c->rstart;
3042: int *sbuf2_i,*rbuf1_i,end;
3044: for (i=0; i<nrqr; ++i) {
3045: MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
3046: req_size[idx] = 0;
3047: rbuf1_i = rbuf1[idx];
3048: MPI_Get_count(r_status1+i,MPI_INT,&end);
3049: PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
3050: sbuf2_i = sbuf2[idx];
3051: for (j=1; j<end; j++) {
3052: id = rbuf1_i[j] - rstart;
3053: ncols_t = (sAi[id])->length;
3054: sbuf2_i[j] = ncols_t;
3055: req_size[idx] += ncols_t;
3056: }
3057: req_source[idx] = r_status1[i].MPI_SOURCE;
3058: /* form the header */
3059: sbuf2_i[0] = req_size[idx];
3060: MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
3061: }
3062: }
3063: PetscFree(r_status1);
3064: PetscFree(r_waits1);
3066: /* recv buffer sizes */
3067: /* Receive messages*/
3068:
3069: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
3070: PetscMalloc((nrqs+1)*sizeof(FLOAT*),&rbuf4);
3071: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
3072: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
3073: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);
3075: for (i=0; i<nrqs; ++i) {
3076: MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
3077: PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
3078: PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
3079: MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
3080: MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
3081: }
3082: PetscFree(r_status2);
3083: PetscFree(r_waits2);
3084:
3085: /* Wait on sends1 and sends2 */
3086: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
3087: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);
3089: MPI_Waitall(nrqs,s_waits1,s_status1);
3090: MPI_Waitall(nrqr,s_waits2,s_status2);
3091: PetscFree(s_status1);
3092: PetscFree(s_status2);
3093: PetscFree(s_waits1);
3094: PetscFree(s_waits2);
3096: /* Now allocate buffers for a->j, and send them off */
3097: /* structure of sbuf3[i]/rbuf3[i],sbuf4[i]/rbuf4[i]: reqsize[i] (cols resp.
3098: * vals of all req. rows; row sizes was in rbuf2; vals are of FLOAT type */
3099:
3100: PetscMalloc((nrqr+1)*sizeof(int*),&sbuf3);
3101: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3102: PetscMalloc((j+1)*sizeof(int),&sbuf3[0]);
3103: for (i=1; i<nrqr; i++) sbuf3[i] = sbuf3[i-1] + req_size[i-1];
3104:
3105: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
3106: {
3107: int *Acol,*rbuf1_i,*sbuf3_i,rqrow,noutcols,kmax,*cols,ncols;
3108: int rstart = c->rstart;
3110: for (i=0; i<nrqr; i++) {
3111: rbuf1_i = rbuf1[i];
3112: sbuf3_i = sbuf3[i];
3113: noutcols = 0;
3114: kmax = rbuf1_i[0]; /* num. of req. rows */
3115: for (k=0,rqrow=1; k<kmax; k++,rqrow++) {
3116: Arow = A->rows[rbuf1_i[rqrow] - rstart];
3117: ncols = Arow->length;
3118: Acol = Arow->col;
3119: /* load the column indices for this row into cols*/
3120: cols = sbuf3_i + noutcols;
3121: PetscMemcpy(cols,Acol,ncols*sizeof(int));
3122: /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with mappings?? */
3123: noutcols += ncols;
3124: }
3125: MPI_Isend(sbuf3_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
3126: }
3127: }
3128: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
3129: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);
3131: /* Allocate buffers for a->a, and send them off */
3132: /* can be optimized by conect with previous block */
3133: PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf4);
3134: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3135: PetscMalloc((j+1)*sizeof(FLOAT),&sbuf4[0]);
3136: for (i=1; i<nrqr; i++) sbuf4[i] = sbuf4[i-1] + req_size[i-1];
3137:
3138: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
3139: {
3140: FLOAT *Aval,*vals,*sbuf4_i;
3141: int rstart = c->rstart,*rbuf1_i,rqrow,noutvals,kmax,ncols;
3142:
3143:
3144: for (i=0; i<nrqr; i++) {
3145: rbuf1_i = rbuf1[i];
3146: sbuf4_i = sbuf4[i];
3147: rqrow = 1;
3148: noutvals = 0;
3149: kmax = rbuf1_i[0]; /* num of req. rows */
3150: for (k=0; k<kmax; k++,rqrow++) {
3151: Arow = A->rows[rbuf1_i[rqrow] - rstart];
3152: ncols = Arow->length;
3153: Aval = Arow->nz;
3154: /* load the column values for this row into vals*/
3155: vals = sbuf4_i+noutvals;
3156: PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
3157: noutvals += ncols;
3158: }
3159: MPI_Isend(sbuf4_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
3160: }
3161: }
3162: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
3163: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
3164: PetscFree(rbuf1);
3166: /* Form the matrix */
3168: /* create col map */
3169: len = C->N*sizeof(int)+1;
3170: PetscMalloc(len,&cmap);
3171: PetscMemzero(cmap,C->N*sizeof(int));
3172: for (j=0; j<ncol; j++) {
3173: cmap[icol[j]] = j+1;
3174: }
3175:
3176: /* Create row map / maybe I will need global rowmap but here is local rowmap*/
3177: len = C->M*sizeof(int)+1;
3178: PetscMalloc(len,&rmap);
3179: PetscMemzero(rmap,C->M*sizeof(int));
3180: for (j=0; j<nrow; j++) {
3181: rmap[irow[j]] = j;
3182: }
3184: /*
3185: Determine the number of non-zeros in the diagonal and off-diagonal
3186: portions of the matrix in order to do correct preallocation
3187: */
3189: /* first get start and end of "diagonal" columns */
3190: if (csize == PETSC_DECIDE) {
3191: nlocal = ncol/size + ((ncol % size) > rank);
3192: } else {
3193: nlocal = csize;
3194: }
3195: {
3196: int ncols,*cols,olen,dlen,thecol;
3197: int *rbuf2_i,*rbuf3_i,*sbuf1_i,row,kmax,cidx;
3198:
3199: MPI_Scan(&nlocal,&cend,1,MPI_INT,MPI_SUM,comm);
3200: cstart = cend - nlocal;
3201: if (rank == size - 1 && cend != ncol) {
3202: SETERRQ(PETSC_ERR_ARG_SIZ,"Local column sizes do not add up to total number of columns");
3203: }
3205: PetscMalloc((2*nrow+1)*sizeof(int),&d_nz);
3206: o_nz = d_nz + nrow;
3207:
3208: /* Update lens from local data */
3209: for (j=0; j<nrow; j++) {
3210: row = irow[j];
3211: proc = rtable[row];
3212: if (proc == rank) {
3213: Arow=A->rows[row-c->rstart];
3214: ncols=Arow->length;
3215: cols=Arow->col;
3216: olen=dlen=0;
3217: for (k=0; k<ncols; k++) {
3218: if ((thecol=cmap[cols[k]])) {
3219: if (cstart<thecol && thecol<=cend) dlen++; /* thecol is from 1 */
3220: else olen++;
3221: }
3222: }
3223: o_nz[j]=olen;
3224: d_nz[j]=dlen;
3225: } else d_nz[j]=o_nz[j]=0;
3226: }
3227: /* Update lens from offproc data and done waits */
3228: /* this will be much simplier after sending only appropriate columns */
3229: for (j=0; j<nrqs;j++) {
3230: MPI_Waitany(nrqs,r_waits3,&i,r_status3+j);
3231: proc = pa[i];
3232: sbuf1_i = sbuf1[proc];
3233: cidx = 0;
3234: rbuf2_i = rbuf2[i];
3235: rbuf3_i = rbuf3[i];
3236: kmax = sbuf1_i[0]; /*num of rq. rows*/
3237: for (k=1; k<=kmax; k++) {
3238: row = rmap[sbuf1_i[k]]; /* the val in the new matrix to be */
3239: for (l=0; l<rbuf2_i[k]; l++,cidx++) {
3240: if ((thecol=cmap[rbuf3_i[cidx]])) {
3241:
3242: if (cstart<thecol && thecol<=cend) d_nz[row]++; /* thecol is from 1 */
3243: else o_nz[row]++;
3244: }
3245: }
3246: }
3247: }
3248: }
3249: PetscFree(r_status3);
3250: PetscFree(r_waits3);
3251: MPI_Waitall(nrqr,s_waits3,s_status3);
3252: PetscFree(s_status3);
3253: PetscFree(s_waits3);
3255: if (scall == MAT_INITIAL_MATRIX) {
3256: MatCreate(comm,nrow,nlocal,PETSC_DECIDE,ncol,submat);
3257: MatSetType(*submat,C->type_name);
3258: MatMPIAIJSetPreallocation(*submat,0,d_nz,0,o_nz);
3259: mat=(Mat_MPIAIJ *)((*submat)->data);
3260: matA=(Mat_SeqAIJ *)(mat->A->data);
3261: matB=(Mat_SeqAIJ *)(mat->B->data);
3262:
3263: } else {
3264: PetscTruth same;
3265: /* folowing code can be optionaly dropped for debuged versions of users
3266: * program, but I don't know PETSc option which can switch off such safety
3267: * tests - in a same way counting of o_nz,d_nz can be droped for REUSE
3268: * matrix */
3269:
3270: PetscTypeCompare((PetscObject)(*submat),MATMPIAIJ,&same);
3271: if (same == PETSC_FALSE) {
3272: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
3273: }
3274: if (((*submat)->m != nrow) || ((*submat)->N != ncol)) {
3275: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
3276: }
3277: mat=(Mat_MPIAIJ *)((*submat)->data);
3278: matA=(Mat_SeqAIJ *)(mat->A->data);
3279: matB=(Mat_SeqAIJ *)(mat->B->data);
3280: PetscMemcmp(matA->ilen,d_nz,nrow*sizeof(int),&same);
3281: if (same == PETSC_FALSE) {
3282: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3283: }
3284: PetscMemcmp(matB->ilen,o_nz,nrow*sizeof(int),&same);
3285: if (same == PETSC_FALSE) {
3286: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3287: }
3288: /* Initial matrix as if empty */
3289: PetscMemzero(matA->ilen,nrow*sizeof(int));
3290: PetscMemzero(matB->ilen,nrow*sizeof(int));
3291: /* Perhaps MatZeroEnteries may be better - look what it is exactly doing - I must
3292: * delete all possibly nonactual inforamtion */
3293: /*submats[i]->factor = C->factor; !!! ??? if factor will be same then I must
3294: * copy some factor information - where are thay */
3295: (*submat)->was_assembled=PETSC_FALSE;
3296: (*submat)->assembled=PETSC_FALSE;
3297:
3298: }
3299: PetscFree(d_nz);
3301: /* Assemble the matrix */
3302: /* First assemble from local rows */
3303: {
3304: int i_row,oldrow,row,ncols,*cols,*matA_j,*matB_j,ilenA,ilenB,tcol;
3305: FLOAT *vals;
3306: PetscScalar *matA_a,*matB_a;
3307:
3308: for (j=0; j<nrow; j++) {
3309: oldrow = irow[j];
3310: proc = rtable[oldrow];
3311: if (proc == rank) {
3312: row = rmap[oldrow];
3313:
3314: Arow = A->rows[oldrow-c->rstart];
3315: ncols = Arow->length;
3316: cols = Arow->col;
3317: vals = Arow->nz;
3318:
3319: i_row = matA->i[row];
3320: matA_a = matA->a + i_row;
3321: matA_j = matA->j + i_row;
3322: i_row = matB->i[row];
3323: matB_a = matB->a + i_row;
3324: matB_j = matB->j + i_row;
3325: for (k=0,ilenA=0,ilenB=0; k<ncols; k++) {
3326: if ((tcol = cmap[cols[k]])) {
3327: if (tcol<=cstart) {
3328: *matB_j++ = tcol-1;
3329: *matB_a++ = vals[k];
3330: ilenB++;
3331: } else if (tcol<=cend) {
3332: *matA_j++ = (tcol-1)-cstart;
3333: *matA_a++ = (PetscScalar)(vals[k]);
3334: ilenA++;
3335: } else {
3336: *matB_j++ = tcol-1;
3337: *matB_a++ = vals[k];
3338: ilenB++;
3339: }
3340: }
3341: }
3342: matA->ilen[row]=ilenA;
3343: matB->ilen[row]=ilenB;
3344:
3345: }
3346: }
3347: }
3349: /* Now assemble the off proc rows*/
3350: {
3351: int *sbuf1_i,*rbuf2_i,*rbuf3_i,cidx,kmax,row,i_row;
3352: int *matA_j,*matB_j,lmax,tcol,ilenA,ilenB;
3353: PetscScalar *matA_a,*matB_a;
3354: FLOAT *rbuf4_i;
3356: for (j=0; j<nrqs; j++) {
3357: MPI_Waitany(nrqs,r_waits4,&i,r_status4+j);
3358: proc = pa[i];
3359: sbuf1_i = sbuf1[proc];
3360:
3361: cidx = 0;
3362: rbuf2_i = rbuf2[i];
3363: rbuf3_i = rbuf3[i];
3364: rbuf4_i = rbuf4[i];
3365: kmax = sbuf1_i[0];
3366: for (k=1; k<=kmax; k++) {
3367: row = rmap[sbuf1_i[k]];
3368:
3369: i_row = matA->i[row];
3370: matA_a = matA->a + i_row;
3371: matA_j = matA->j + i_row;
3372: i_row = matB->i[row];
3373: matB_a = matB->a + i_row;
3374: matB_j = matB->j + i_row;
3375:
3376: lmax = rbuf2_i[k];
3377: for (l=0,ilenA=0,ilenB=0; l<lmax; l++,cidx++) {
3378: if ((tcol = cmap[rbuf3_i[cidx]])) {
3379: if (tcol<=cstart) {
3380: *matB_j++ = tcol-1;
3381: *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);;
3382: ilenB++;
3383: } else if (tcol<=cend) {
3384: *matA_j++ = (tcol-1)-cstart;
3385: *matA_a++ = (PetscScalar)(rbuf4_i[cidx]);
3386: ilenA++;
3387: } else {
3388: *matB_j++ = tcol-1;
3389: *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);
3390: ilenB++;
3391: }
3392: }
3393: }
3394: matA->ilen[row]=ilenA;
3395: matB->ilen[row]=ilenB;
3396: }
3397: }
3398: }
3400: PetscFree(r_status4);
3401: PetscFree(r_waits4);
3402: MPI_Waitall(nrqr,s_waits4,s_status4);
3403: PetscFree(s_waits4);
3404: PetscFree(s_status4);
3406: /* Restore the indices */
3407: ISRestoreIndices(isrow,&irow);
3408: ISRestoreIndices(iscol,&icol);
3410: /* Destroy allocated memory */
3411: PetscFree(rtable);
3412: PetscFree(w1);
3413: PetscFree(pa);
3415: PetscFree(sbuf1);
3416: PetscFree(rbuf2[0]);
3417: PetscFree(rbuf2);
3418: for (i=0; i<nrqr; ++i) {
3419: PetscFree(sbuf2[i]);
3420: }
3421: for (i=0; i<nrqs; ++i) {
3422: PetscFree(rbuf3[i]);
3423: PetscFree(rbuf4[i]);
3424: }
3426: PetscFree(sbuf2);
3427: PetscFree(rbuf3);
3428: PetscFree(rbuf4);
3429: PetscFree(sbuf3[0]);
3430: PetscFree(sbuf3);
3431: PetscFree(sbuf4[0]);
3432: PetscFree(sbuf4);
3433:
3434: PetscFree(cmap);
3435: PetscFree(rmap);
3438: MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);
3439: MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);
3442: return(0);
3443: }