Actual source code: mpirowbs.c

 2:  #include src/mat/impls/rowbs/mpi/mpirowbs.h

  4: #define CHUNCKSIZE_LOCAL   10

  8: static PetscErrorCode MatFreeRowbs_Private(Mat A,int n,int *i,PetscScalar *v)
  9: {

 13:   if (v) {
 14: #if defined(PETSC_USE_LOG)
 15:     int len = -n*(sizeof(int)+sizeof(PetscScalar));
 16: #endif
 17:     PetscFree(v);
 18:     PetscLogObjectMemory(A,len);
 19:   }
 20:   return(0);
 21: }

 25: static PetscErrorCode MatMallocRowbs_Private(Mat A,int n,int **i,PetscScalar **v)
 26: {
 28:   int len;

 31:   if (!n) {
 32:     *i = 0; *v = 0;
 33:   } else {
 34:     len = n*(sizeof(int) + sizeof(PetscScalar));
 35:     PetscMalloc(len,v);
 36:     PetscLogObjectMemory(A,len);
 37:     *i = (int*)(*v + n);
 38:   }
 39:   return(0);
 40: }

 44: PetscErrorCode MatScale_MPIRowbs(const PetscScalar *alphain,Mat inA)
 45: {
 46:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)inA->data;
 47:   BSspmat      *A = a->A;
 48:   BSsprow      *vs;
 49:   PetscScalar  *ap,alpha = *alphain;
 50:   int          i,m = inA->m,nrow,j;

 53:   for (i=0; i<m; i++) {
 54:     vs   = A->rows[i];
 55:     nrow = vs->length;
 56:     ap   = vs->nz;
 57:     for (j=0; j<nrow; j++) {
 58:       ap[j] *= alpha;
 59:     }
 60:   }
 61:   PetscLogFlops(a->nz);
 62:   return(0);
 63: }

 65: /* ----------------------------------------------------------------- */
 68: static PetscErrorCode MatCreateMPIRowbs_local(Mat A,int nz,const int nnz[])
 69: {
 70:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)A->data;
 72:   int   i,len,m = A->m,*tnnz;
 73:   BSspmat      *bsmat;
 74:   BSsprow      *vs;

 77:   PetscMalloc((m+1)*sizeof(int),&tnnz);
 78:   if (!nnz) {
 79:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
 80:     if (nz <= 0)             nz = 1;
 81:     for (i=0; i<m; i++) tnnz[i] = nz;
 82:     nz      = nz*m;
 83:   } else {
 84:     nz = 0;
 85:     for (i=0; i<m; i++) {
 86:       if (nnz[i] <= 0) tnnz[i] = 1;
 87:       else             tnnz[i] = nnz[i];
 88:       nz += tnnz[i];
 89:     }
 90:   }

 92:   /* Allocate BlockSolve matrix context */
 93:   PetscNew(BSspmat,&bsif->A);
 94:   bsmat = bsif->A;
 95:   BSset_mat_icc_storage(bsmat,PETSC_FALSE);
 96:   BSset_mat_symmetric(bsmat,PETSC_FALSE);
 97:   len                    = m*(sizeof(BSsprow*)+ sizeof(BSsprow)) + 1;
 98:   PetscMalloc(len,&bsmat->rows);
 99:   bsmat->num_rows        = m;
100:   bsmat->global_num_rows = A->M;
101:   bsmat->map             = bsif->bsmap;
102:   vs                     = (BSsprow*)(bsmat->rows + m);
103:   for (i=0; i<m; i++) {
104:     bsmat->rows[i]  = vs;
105:     bsif->imax[i]   = tnnz[i];
106:     vs->diag_ind    = -1;
107:     MatMallocRowbs_Private(A,tnnz[i],&(vs->col),&(vs->nz));
108:     /* put zero on diagonal */
109:     /*vs->length            = 1;
110:     vs->col[0]      = i + bsif->rstart;
111:     vs->nz[0]       = 0.0;*/
112:     vs->length = 0;
113:     vs++;
114:   }
115:   PetscLogObjectMemory(A,sizeof(BSspmat) + len);
116:   bsif->nz               = 0;
117:   bsif->maxnz            = nz;
118:   bsif->sorted           = 0;
119:   bsif->roworiented      = PETSC_TRUE;
120:   bsif->nonew            = 0;
121:   bsif->bs_color_single  = 0;

123:   PetscFree(tnnz);
124:   return(0);
125: }

129: static PetscErrorCode MatSetValues_MPIRowbs_local(Mat AA,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
130: {
131:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
132:   BSspmat      *A = mat->A;
133:   BSsprow      *vs;
135:   int          *rp,k,a,b,t,ii,row,nrow,i,col,l,rmax;
136:   int          *imax = mat->imax,nonew = mat->nonew,sorted = mat->sorted;
137:   PetscScalar  *ap,value;

140:   for (k=0; k<m; k++) { /* loop over added rows */
141:     row = im[k];
142:     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
143:     if (row >= AA->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,AA->m-1);
144:     vs   = A->rows[row];
145:     ap   = vs->nz; rp = vs->col;
146:     rmax = imax[row]; nrow = vs->length;
147:     a    = 0;
148:     for (l=0; l<n; l++) { /* loop over added columns */
149:       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative col: %d",in[l]);
150:       if (in[l] >= AA->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],AA->N-1);
151:       col = in[l]; value = *v++;
152:       if (!sorted) a = 0; b = nrow;
153:       while (b-a > 5) {
154:         t = (b+a)/2;
155:         if (rp[t] > col) b = t;
156:         else             a = t;
157:       }
158:       for (i=a; i<b; i++) {
159:         if (rp[i] > col) break;
160:         if (rp[i] == col) {
161:           if (addv == ADD_VALUES) ap[i] += value;
162:           else                    ap[i] = value;
163:           goto noinsert;
164:         }
165:       }
166:       if (nonew) goto noinsert;
167:       if (nrow >= rmax) {
168:         /* there is no extra room in row, therefore enlarge */
169:         int    *itemp,*iout,*iin = vs->col;
170:         PetscScalar *vout,*vin = vs->nz,*vtemp;

172:         /* malloc new storage space */
173:         imax[row] += CHUNCKSIZE_LOCAL;
174:         MatMallocRowbs_Private(AA,imax[row],&itemp,&vtemp);
175:         vout = vtemp; iout = itemp;
176:         for (ii=0; ii<i; ii++) {
177:           vout[ii] = vin[ii];
178:           iout[ii] = iin[ii];
179:         }
180:         vout[i] = value;
181:         iout[i] = col;
182:         for (ii=i+1; ii<=nrow; ii++) {
183:           vout[ii] = vin[ii-1];
184:           iout[ii] = iin[ii-1];
185:         }
186:         /* free old row storage */
187:         if (rmax > 0) {
188:           MatFreeRowbs_Private(AA,rmax,vs->col,vs->nz);
189:         }
190:         vs->col           =  iout; vs->nz = vout;
191:         rmax              =  imax[row];
192:         mat->maxnz        += CHUNCKSIZE_LOCAL;
193:         mat->reallocs++;
194:       } else {
195:         /* shift higher columns over to make room for newie */
196:         for (ii=nrow-1; ii>=i; ii--) {
197:           rp[ii+1] = rp[ii];
198:           ap[ii+1] = ap[ii];
199:         }
200:         rp[i] = col;
201:         ap[i] = value;
202:       }
203:       nrow++;
204:       mat->nz++;
205:       AA->same_nonzero = PETSC_FALSE;
206:       noinsert:;
207:       a = i + 1;
208:     }
209:     vs->length = nrow;
210:   }
211:   return(0);
212: }


217: static PetscErrorCode MatAssemblyBegin_MPIRowbs_local(Mat A,MatAssemblyType mode)
218: {
220:   return(0);
221: }

225: static PetscErrorCode MatAssemblyEnd_MPIRowbs_local(Mat AA,MatAssemblyType mode)
226: {
227:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)AA->data;
228:   BSspmat      *A = a->A;
229:   BSsprow      *vs;
230:   int          i,j,rstart = a->rstart;

233:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

235:   /* Mark location of diagonal */
236:   for (i=0; i<AA->m; i++) {
237:     vs = A->rows[i];
238:     for (j=0; j<vs->length; j++) {
239:       if (vs->col[j] == i + rstart) {
240:         vs->diag_ind = j;
241:         break;
242:       }
243:     }
244:     if (vs->diag_ind == -1) {
245:       SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"no diagonal entry");
246:     }
247:   }
248:   return(0);
249: }

253: static PetscErrorCode MatZeroRows_MPIRowbs_local(Mat A,IS is,const PetscScalar *diag)
254: {
255:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
256:   BSspmat      *l = a->A;
258:   int          i,N,*rz,m = A->m - 1,col,base=a->rowners[a->rank];

261:   ISGetLocalSize(is,&N);
262:   ISGetIndices(is,&rz);
263:   if (a->keepzeroedrows) {
264:     for (i=0; i<N; i++) {
265:       if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
266:       PetscMemzero(l->rows[rz[i]]->nz,l->rows[rz[i]]->length*sizeof(PetscScalar));
267:       if (diag) {
268:         col=rz[i]+base;
269:         MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,diag,INSERT_VALUES);
270:       }
271:     }
272:   } else {
273:     if (diag) {
274:       for (i=0; i<N; i++) {
275:         if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
276:         if (l->rows[rz[i]]->length > 0) { /* in case row was completely empty */
277:           l->rows[rz[i]]->length = 1;
278:           l->rows[rz[i]]->nz[0]  = *diag;
279:           l->rows[rz[i]]->col[0] = a->rstart + rz[i];
280:         } else {
281:           col=rz[i]+base;
282:           MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,diag,INSERT_VALUES);
283:         }
284:       }
285:     } else {
286:       for (i=0; i<N; i++) {
287:         if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
288:         l->rows[rz[i]]->length = 0;
289:       }
290:     }
291:     A->same_nonzero = PETSC_FALSE;
292:   }
293:   ISRestoreIndices(is,&rz);
294:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
295:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
296:   return(0);
297: }

301: static PetscErrorCode MatNorm_MPIRowbs_local(Mat A,NormType type,PetscReal *norm)
302: {
303:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
304:   BSsprow      *vs,**rs;
305:   PetscScalar  *xv;
306:   PetscReal    sum = 0.0;
308:   int          *xi,nz,i,j;

311:   rs = mat->A->rows;
312:   if (type == NORM_FROBENIUS) {
313:     for (i=0; i<A->m; i++) {
314:       vs = *rs++;
315:       nz = vs->length;
316:       xv = vs->nz;
317:       while (nz--) {
318: #if defined(PETSC_USE_COMPLEX)
319:         sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
320: #else
321:         sum += (*xv)*(*xv); xv++;
322: #endif
323:       }
324:     }
325:     *norm = sqrt(sum);
326:   } else if (type == NORM_1) { /* max column norm */
327:     PetscReal *tmp;
328:     PetscMalloc(A->n*sizeof(PetscReal),&tmp);
329:     PetscMemzero(tmp,A->n*sizeof(PetscReal));
330:     *norm = 0.0;
331:     for (i=0; i<A->m; i++) {
332:       vs = *rs++;
333:       nz = vs->length;
334:       xi = vs->col;
335:       xv = vs->nz;
336:       while (nz--) {
337:         tmp[*xi] += PetscAbsScalar(*xv);
338:         xi++; xv++;
339:       }
340:     }
341:     for (j=0; j<A->n; j++) {
342:       if (tmp[j] > *norm) *norm = tmp[j];
343:     }
344:     PetscFree(tmp);
345:   } else if (type == NORM_INFINITY) { /* max row norm */
346:     *norm = 0.0;
347:     for (i=0; i<A->m; i++) {
348:       vs = *rs++;
349:       nz = vs->length;
350:       xv = vs->nz;
351:       sum = 0.0;
352:       while (nz--) {
353:         sum += PetscAbsScalar(*xv); xv++;
354:       }
355:       if (sum > *norm) *norm = sum;
356:     }
357:   } else {
358:     SETERRQ(PETSC_ERR_SUP,"No support for the two norm");
359:   }
360:   return(0);
361: }

363: /* ----------------------------------------------------------------- */

367: PetscErrorCode MatSetValues_MPIRowbs(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode av)
368: {
369:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
371:   int   i,j,row,col,rstart = a->rstart,rend = a->rend;
372:   PetscTruth   roworiented = a->roworiented;

375:   /* Note:  There's no need to "unscale" the matrix, since scaling is
376:      confined to a->pA, and we're working with a->A here */
377:   for (i=0; i<m; i++) {
378:     if (im[i] < 0) continue;
379:     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
380:     if (im[i] >= rstart && im[i] < rend) {
381:       row = im[i] - rstart;
382:       for (j=0; j<n; j++) {
383:         if (in[j] < 0) continue;
384:         if (in[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);
385:         if (in[j] >= 0 && in[j] < mat->N){
386:           col = in[j];
387:           if (roworiented) {
388:             MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i*n+j,av);
389:           } else {
390:             MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i+j*m,av);
391:           }
392:         } else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid column");}
393:       }
394:     } else {
395:       if (!a->donotstash) {
396:         if (roworiented) {
397:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
398:         } else {
399:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
400:         }
401:       }
402:     }
403:   }
404:   return(0);
405: }

409: PetscErrorCode MatAssemblyBegin_MPIRowbs(Mat mat,MatAssemblyType mode)
410: {
411:   Mat_MPIRowbs  *a = (Mat_MPIRowbs*)mat->data;
412:   MPI_Comm      comm = mat->comm;
414:   int         nstash,reallocs;
415:   InsertMode    addv;

418:   /* Note:  There's no need to "unscale" the matrix, since scaling is
419:             confined to a->pA, and we're working with a->A here */

421:   /* make sure all processors are either in INSERTMODE or ADDMODE */
422:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
423:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
424:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some procs inserted; others added");
425:   }
426:   mat->insertmode = addv; /* in case this processor had no cache */

428:   MatStashScatterBegin_Private(&mat->stash,a->rowners);
429:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
430:   PetscLogInfo(0,"MatAssemblyBegin_MPIRowbs:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
431:   return(0);
432: }

436: static PetscErrorCode MatView_MPIRowbs_ASCII(Mat mat,PetscViewer viewer)
437: {
438:   Mat_MPIRowbs      *a = (Mat_MPIRowbs*)mat->data;
440:   int               i,j;
441:   PetscTruth        iascii;
442:   BSspmat           *A = a->A;
443:   BSsprow           **rs = A->rows;
444:   PetscViewerFormat format;

447:   PetscViewerGetFormat(viewer,&format);
448:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);

450:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
451:     int ind_l,ind_g,clq_l,clq_g,color;
452:     ind_l = BSlocal_num_inodes(a->pA);CHKERRBS(0);
453:     ind_g = BSglobal_num_inodes(a->pA);CHKERRBS(0);
454:     clq_l = BSlocal_num_cliques(a->pA);CHKERRBS(0);
455:     clq_g = BSglobal_num_cliques(a->pA);CHKERRBS(0);
456:     color = BSnum_colors(a->pA);CHKERRBS(0);
457:     PetscViewerASCIIPrintf(viewer,"  %d global inode(s), %d global clique(s), %d color(s)\n",ind_g,clq_g,color);
458:     PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d local inode(s), %d local clique(s)\n",a->rank,ind_l,clq_l);
459:   } else  if (format == PETSC_VIEWER_ASCII_COMMON) {
460:     for (i=0; i<A->num_rows; i++) {
461:       PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
462:       for (j=0; j<rs[i]->length; j++) {
463:         if (rs[i]->nz[j]) {PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);}
464:       }
465:       PetscViewerASCIISynchronizedPrintf(viewer,"\n");
466:     }
467:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
468:     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
469:   } else {
470:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
471:     for (i=0; i<A->num_rows; i++) {
472:       PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
473:       for (j=0; j<rs[i]->length; j++) {
474:         PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);
475:       }
476:       PetscViewerASCIISynchronizedPrintf(viewer,"\n");
477:     }
478:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
479:   }
480:   PetscViewerFlush(viewer);
481:   return(0);
482: }

486: static PetscErrorCode MatView_MPIRowbs_Binary(Mat mat,PetscViewer viewer)
487: {
488:   Mat_MPIRowbs   *a = (Mat_MPIRowbs*)mat->data;
490:   PetscMPIInt    rank,size;
491:   PetscInt       i,M,m,*sbuff,*rowlengths;
492:   PetscInt       *recvcts,*recvdisp,fd,*cols,maxnz,nz,j;
493:   BSspmat        *A = a->A;
494:   BSsprow        **rs = A->rows;
495:   MPI_Comm       comm = mat->comm;
496:   MPI_Status     status;
497:   PetscScalar    *vals;
498:   MatInfo        info;

501:   MPI_Comm_size(comm,&size);
502:   MPI_Comm_rank(comm,&rank);

504:   M = mat->M; m = mat->m;
505:   /* First gather together on the first processor the lengths of 
506:      each row, and write them out to the file */
507:   PetscMalloc(m*sizeof(int),&sbuff);
508:   for (i=0; i<A->num_rows; i++) {
509:     sbuff[i] = rs[i]->length;
510:   }
511:   MatGetInfo(mat,MAT_GLOBAL_SUM,&info);
512:   if (!rank) {
513:     PetscViewerBinaryGetDescriptor(viewer,&fd);
514:     PetscMalloc((4+M)*sizeof(int),&rowlengths);
515:     PetscMalloc(size*sizeof(int),&recvcts);
516:     recvdisp = a->rowners;
517:     for (i=0; i<size; i++) {
518:       recvcts[i] = recvdisp[i+1] - recvdisp[i];
519:     }
520:     /* first four elements of rowlength are the header */
521:     rowlengths[0] = mat->cookie;
522:     rowlengths[1] = mat->M;
523:     rowlengths[2] = mat->N;
524:     rowlengths[3] = (int)info.nz_used;
525:     MPI_Gatherv(sbuff,m,MPI_INT,rowlengths+4,recvcts,recvdisp,MPI_INT,0,comm);
526:     PetscFree(sbuff);
527:     PetscBinaryWrite(fd,rowlengths,4+M,PETSC_INT,PETSC_FALSE);
528:     /* count the number of nonzeros on each processor */
529:     PetscMemzero(recvcts,size*sizeof(int));
530:     for (i=0; i<size; i++) {
531:       for (j=recvdisp[i]; j<recvdisp[i+1]; j++) {
532:         recvcts[i] += rowlengths[j+3];
533:       }
534:     }
535:     /* allocate buffer long enough to hold largest one */
536:     maxnz = 0;
537:     for (i=0; i<size; i++) {
538:       maxnz = PetscMax(maxnz,recvcts[i]);
539:     }
540:     PetscFree(rowlengths);
541:     PetscFree(recvcts);
542:     PetscMalloc(maxnz*sizeof(int),&cols);

544:     /* binary store column indices for 0th processor */
545:     nz = 0;
546:     for (i=0; i<A->num_rows; i++) {
547:       for (j=0; j<rs[i]->length; j++) {
548:         cols[nz++] = rs[i]->col[j];
549:       }
550:     }
551:     PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);

553:     /* receive and store column indices for all other processors */
554:     for (i=1; i<size; i++) {
555:       /* should tell processor that I am now ready and to begin the send */
556:       MPI_Recv(cols,maxnz,MPI_INT,i,mat->tag,comm,&status);
557:       MPI_Get_count(&status,MPI_INT,&nz);
558:       PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);
559:     }
560:     PetscFree(cols);
561:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

563:     /* binary store values for 0th processor */
564:     nz = 0;
565:     for (i=0; i<A->num_rows; i++) {
566:       for (j=0; j<rs[i]->length; j++) {
567:         vals[nz++] = rs[i]->nz[j];
568:       }
569:     }
570:     PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);

572:     /* receive and store nonzeros for all other processors */
573:     for (i=1; i<size; i++) {
574:       /* should tell processor that I am now ready and to begin the send */
575:       MPI_Recv(vals,maxnz,MPIU_SCALAR,i,mat->tag,comm,&status);
576:       MPI_Get_count(&status,MPIU_SCALAR,&nz);
577:       PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);
578:     }
579:     PetscFree(vals);
580:   } else {
581:     MPI_Gatherv(sbuff,m,MPI_INT,0,0,0,MPI_INT,0,comm);
582:     PetscFree(sbuff);

584:     /* count local nonzeros */
585:     nz = 0;
586:     for (i=0; i<A->num_rows; i++) {
587:       for (j=0; j<rs[i]->length; j++) {
588:         nz++;
589:       }
590:     }
591:     /* copy into buffer column indices */
592:     PetscMalloc(nz*sizeof(int),&cols);
593:     nz = 0;
594:     for (i=0; i<A->num_rows; i++) {
595:       for (j=0; j<rs[i]->length; j++) {
596:         cols[nz++] = rs[i]->col[j];
597:       }
598:     }
599:     /* send */  /* should wait until processor zero tells me to go */
600:     MPI_Send(cols,nz,MPI_INT,0,mat->tag,comm);
601:     PetscFree(cols);

603:     /* copy into buffer column values */
604:     PetscMalloc(nz*sizeof(PetscScalar),&vals);
605:     nz   = 0;
606:     for (i=0; i<A->num_rows; i++) {
607:       for (j=0; j<rs[i]->length; j++) {
608:         vals[nz++] = rs[i]->nz[j];
609:       }
610:     }
611:     /* send */  /* should wait until processor zero tells me to go */
612:     MPI_Send(vals,nz,MPIU_SCALAR,0,mat->tag,comm);
613:     PetscFree(vals);
614:   }

616:   return(0);
617: }

621: PetscErrorCode MatView_MPIRowbs(Mat mat,PetscViewer viewer)
622: {
623:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
625:   PetscTruth   iascii,isbinary;

628:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
629:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
630:   if (!bsif->blocksolveassembly) {
631:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
632:   }
633:   if (iascii) {
634:     MatView_MPIRowbs_ASCII(mat,viewer);
635:   } else if (isbinary) {
636:     MatView_MPIRowbs_Binary(mat,viewer);
637:   } else {
638:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIRowbs matrices",((PetscObject)viewer)->type_name);
639:   }
640:   return(0);
641: }
642: 
645: static PetscErrorCode MatAssemblyEnd_MPIRowbs_MakeSymmetric(Mat mat)
646: {
647:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
648:   BSspmat      *A = a->A;
649:   BSsprow      *vs;
650:   int          size,rank,M,rstart,tag,i,j,*rtable,*w1,*w3,*w4,len,proc,nrqs;
651:   int          msz,*pa,bsz,nrqr,**rbuf1,**sbuf1,**ptr,*tmp,*ctr,col,idx,row;
653:   int          ctr_j,*sbuf1_j,k;
654:   PetscScalar  val=0.0;
655:   MPI_Comm     comm;
656:   MPI_Request  *s_waits1,*r_waits1;
657:   MPI_Status   *s_status,*r_status;

660:   comm   = mat->comm;
661:   tag    = mat->tag;
662:   size   = a->size;
663:   rank   = a->rank;
664:   M      = mat->M;
665:   rstart = a->rstart;

667:   PetscMalloc(M*sizeof(int),&rtable);
668:   /* Create hash table for the mapping :row -> proc */
669:   for (i=0,j=0; i<size; i++) {
670:     len = a->rowners[i+1];
671:     for (; j<len; j++) {
672:       rtable[j] = i;
673:     }
674:   }

676:   /* Evaluate communication - mesg to whom, length of mesg, and buffer space
677:      required. Based on this, buffers are allocated, and data copied into them. */
678:   PetscMalloc(size*4*sizeof(int),&w1);/*  mesg size */
679:   w3   = w1 + 2*size;       /* no of IS that needs to be sent to proc i */
680:   w4   = w3 + size;       /* temp work space used in determining w1,  w3 */
681:   PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector */

683:   for (i=0;  i<mat->m; i++) {
684:     PetscMemzero(w4,size*sizeof(int)); /* initialize work vector */
685:     vs = A->rows[i];
686:     for (j=0; j<vs->length; j++) {
687:       proc = rtable[vs->col[j]];
688:       w4[proc]++;
689:     }
690:     for (j=0; j<size; j++) {
691:       if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;}
692:     }
693:   }
694: 
695:   nrqs       = 0;              /* number of outgoing messages */
696:   msz        = 0;              /* total mesg length (for all proc */
697:   w1[2*rank] = 0;              /* no mesg sent to itself */
698:   w3[rank]   = 0;
699:   for (i=0; i<size; i++) {
700:     if (w1[2*i])  {w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
701:   }
702:   /* pa - is list of processors to communicate with */
703:   PetscMalloc((nrqs+1)*sizeof(int),&pa);
704:   for (i=0,j=0; i<size; i++) {
705:     if (w1[2*i]) {pa[j] = i; j++;}
706:   }

708:   /* Each message would have a header = 1 + 2*(no of ROWS) + data */
709:   for (i=0; i<nrqs; i++) {
710:     j       = pa[i];
711:     w1[2*j] += w1[2*j+1] + 2*w3[j];
712:     msz     += w1[2*j];
713:   }
714: 
715:   /* Do a global reduction to determine how many messages to expect */
716:   PetscMaxSum(comm,w1,&bsz,&nrqr);

718:   /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */
719:   len      = (nrqr+1)*sizeof(int*) + nrqr*bsz*sizeof(int);
720:   PetscMalloc(len,&rbuf1);
721:   rbuf1[0] = (int*)(rbuf1 + nrqr);
722:   for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;

724:   /* Post the receives */
725:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);
726:   for (i=0; i<nrqr; ++i){
727:     MPI_Irecv(rbuf1[i],bsz,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits1+i);
728:   }
729: 
730:   /* Allocate Memory for outgoing messages */
731:   len   = 2*size*sizeof(int*) + (size+msz)*sizeof(int);
732:   PetscMalloc(len,&sbuf1);
733:   ptr   = sbuf1 + size;     /* Pointers to the data in outgoing buffers */
734:   PetscMemzero(sbuf1,2*size*sizeof(int*));
735:   tmp   = (int*)(sbuf1 + 2*size);
736:   ctr   = tmp + msz;

738:   {
739:     int *iptr = tmp,ict  = 0;
740:     for (i=0; i<nrqs; i++) {
741:       j        = pa[i];
742:       iptr    += ict;
743:       sbuf1[j] = iptr;
744:       ict      = w1[2*j];
745:     }
746:   }

748:   /* Form the outgoing messages */
749:   /* Clean up the header space */
750:   for (i=0; i<nrqs; i++) {
751:     j           = pa[i];
752:     sbuf1[j][0] = 0;
753:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
754:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
755:   }

757:   /* Parse the matrix and copy the data into sbuf1 */
758:   for (i=0; i<mat->m; i++) {
759:     PetscMemzero(ctr,size*sizeof(int));
760:     vs = A->rows[i];
761:     for (j=0; j<vs->length; j++) {
762:       col  = vs->col[j];
763:       proc = rtable[col];
764:       if (proc != rank) { /* copy to the outgoing buffer */
765:         ctr[proc]++;
766:           *ptr[proc] = col;
767:           ptr[proc]++;
768:       } else {
769:         row = col - rstart;
770:         col = i + rstart;
771:         MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
772:       }
773:     }
774:     /* Update the headers for the current row */
775:     for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
776:       if ((ctr_j = ctr[j])) {
777:         sbuf1_j        = sbuf1[j];
778:         k               = ++sbuf1_j[0];
779:         sbuf1_j[2*k]   = ctr_j;
780:         sbuf1_j[2*k-1] = i + rstart;
781:       }
782:     }
783:   }
784:    /* Check Validity of the outgoing messages */
785:   {
786:     int sum;
787:     for (i=0 ; i<nrqs ; i++) {
788:       j = pa[i];
789:       if (w3[j] != sbuf1[j][0]) {SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[1] mismatch!\n"); }
790:     }

792:     for (i=0 ; i<nrqs ; i++) {
793:       j = pa[i];
794:       sum = 1;
795:       for (k = 1; k <= w3[j]; k++) sum += sbuf1[j][2*k]+2;
796:       if (sum != w1[2*j]) { SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[2-n] mismatch!\n"); }
797:     }
798:   }
799: 
800:   /* Now post the sends */
801:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
802:   for (i=0; i<nrqs; ++i) {
803:     j    = pa[i];
804:     MPI_Isend(sbuf1[j],w1[2*j],MPI_INT,j,tag,comm,s_waits1+i);
805:   }
806: 
807:   /* Receive messages*/
808:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status);
809:   for (i=0; i<nrqr; ++i) {
810:     MPI_Waitany(nrqr,r_waits1,&idx,r_status+i);
811:     /* Process the Message */
812:     {
813:       int    *rbuf1_i,n_row,ct1;

815:       rbuf1_i = rbuf1[idx];
816:       n_row   = rbuf1_i[0];
817:       ct1     = 2*n_row+1;
818:       val     = 0.0;
819:       /* Optimise this later */
820:       for (j=1; j<=n_row; j++) {
821:         col = rbuf1_i[2*j-1];
822:         for (k=0; k<rbuf1_i[2*j]; k++,ct1++) {
823:           row = rbuf1_i[ct1] - rstart;
824:           MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
825:         }
826:       }
827:     }
828:   }

830:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
831:   MPI_Waitall(nrqs,s_waits1,s_status);

833:   PetscFree(rtable);
834:   PetscFree(w1);
835:   PetscFree(pa);
836:   PetscFree(rbuf1);
837:   PetscFree(sbuf1);
838:   PetscFree(r_waits1);
839:   PetscFree(s_waits1);
840:   PetscFree(r_status);
841:   PetscFree(s_status);
842:   return(0);
843: }

845: /*
846:      This does the BlockSolve portion of the matrix assembly.
847:    It is provided in a seperate routine so that users can
848:    operate on the matrix (using MatScale(), MatShift() etc.) after 
849:    the matrix has been assembled but before BlockSolve has sucked it
850:    in and devoured it.
851: */
854: PetscErrorCode MatAssemblyEnd_MPIRowbs_ForBlockSolve(Mat mat)
855: {
856:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
858:   int          ldim,low,high,i;
859:   PetscScalar  *diag;

862:   if ((mat->was_assembled) && (!mat->same_nonzero)) {  /* Free the old info */
863:     if (a->pA)       {BSfree_par_mat(a->pA);CHKERRBS(0);}
864:     if (a->comm_pA)  {BSfree_comm(a->comm_pA);CHKERRBS(0);}
865:   }

867:   if ((!mat->same_nonzero) || (!mat->was_assembled)) {
868:     /* Indicates bypassing cliques in coloring */
869:     if (a->bs_color_single) {
870:       BSctx_set_si(a->procinfo,100);
871:     }
872:     /* Form permuted matrix for efficient parallel execution */
873:     a->pA = BSmain_perm(a->procinfo,a->A);CHKERRBS(0);
874:     /* Set up the communication */
875:     a->comm_pA = BSsetup_forward(a->pA,a->procinfo);CHKERRBS(0);
876:   } else {
877:     /* Repermute the matrix */
878:     BSmain_reperm(a->procinfo,a->A,a->pA);CHKERRBS(0);
879:   }

881:   /* Symmetrically scale the matrix by the diagonal */
882:   BSscale_diag(a->pA,a->pA->diag,a->procinfo);CHKERRBS(0);

884:   /* Store inverse of square root of permuted diagonal scaling matrix */
885:   VecGetLocalSize(a->diag,&ldim);
886:   VecGetOwnershipRange(a->diag,&low,&high);
887:   VecGetArray(a->diag,&diag);
888:   for (i=0; i<ldim; i++) {
889:     if (a->pA->scale_diag[i] != 0.0) {
890:       diag[i] = 1.0/sqrt(PetscAbsScalar(a->pA->scale_diag[i]));
891:     } else {
892:       diag[i] = 1.0;
893:     }
894:   }
895:   VecRestoreArray(a->diag,&diag);
896:   a->assembled_icc_storage = a->A->icc_storage;
897:   a->blocksolveassembly = 1;
898:   mat->was_assembled    = PETSC_TRUE;
899:   mat->same_nonzero     = PETSC_TRUE;
900:   PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs_ForBlockSolve:Completed BlockSolve95 matrix assembly\n");
901:   return(0);
902: }

906: PetscErrorCode MatAssemblyEnd_MPIRowbs(Mat mat,MatAssemblyType mode)
907: {
908:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
910:   int          i,n,row,col,*rows,*cols,rstart,nzcount,flg,j,ncols;
911:   PetscScalar  *vals,val;
912:   InsertMode   addv = mat->insertmode;

915:   while (1) {
916:     MatStashScatterGetMesg_Private(&mat->stash,&n,&rows,&cols,&vals,&flg);
917:     if (!flg) break;
918: 
919:     for (i=0; i<n;) {
920:       /* Now identify the consecutive vals belonging to the same row */
921:       for (j=i,rstart=rows[j]; j<n; j++) { if (rows[j] != rstart) break; }
922:       if (j < n) ncols = j-i;
923:       else       ncols = n-i;
924:       /* Now assemble all these values with a single function call */
925:       MatSetValues_MPIRowbs(mat,1,rows+i,ncols,cols+i,vals+i,addv);
926:       i = j;
927:     }
928:   }
929:   MatStashScatterEnd_Private(&mat->stash);

931:   rstart = a->rstart;
932:   nzcount = a->nz; /* This is the number of nonzeros entered by the user */
933:   /* BlockSolve requires that the matrix is structurally symmetric */
934:   if (mode == MAT_FINAL_ASSEMBLY && !mat->structurally_symmetric) {
935:     MatAssemblyEnd_MPIRowbs_MakeSymmetric(mat);
936:   }
937: 
938:   /* BlockSolve requires that all the diagonal elements are set */
939:   val  = 0.0;
940:   for (i=0; i<mat->m; i++) {
941:     row = i; col = i + rstart;
942:     MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
943:   }
944: 
945:   MatAssemblyBegin_MPIRowbs_local(mat,mode);
946:   MatAssemblyEnd_MPIRowbs_local(mat,mode);
947: 
948:   a->blocksolveassembly = 0;
949:   PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs:Matrix size: %d X %d; storage space: %d unneeded,%d used\n",mat->m,mat->n,a->maxnz-a->nz,a->nz);
950:   PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs: User entered %d nonzeros, PETSc added %d\n",nzcount,a->nz-nzcount);
951:   PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs:Number of mallocs during MatSetValues is %d\n",a->reallocs);
952:   return(0);
953: }

957: PetscErrorCode MatZeroEntries_MPIRowbs(Mat mat)
958: {
959:   Mat_MPIRowbs *l = (Mat_MPIRowbs*)mat->data;
960:   BSspmat      *A = l->A;
961:   BSsprow      *vs;
962:   int          i,j;

965:   for (i=0; i <mat->m; i++) {
966:     vs = A->rows[i];
967:     for (j=0; j< vs->length; j++) vs->nz[j] = 0.0;
968:   }
969:   return(0);
970: }

972: /* the code does not do the diagonal entries correctly unless the 
973:    matrix is square and the column and row owerships are identical.
974:    This is a BUG.
975: */

979: PetscErrorCode MatZeroRows_MPIRowbs(Mat A,IS is,const PetscScalar *diag)
980: {
981:   Mat_MPIRowbs   *l = (Mat_MPIRowbs*)A->data;
983:   int            i,N,*rows,*owners = l->rowners,size = l->size;
984:   int            *nprocs,j,idx,nsends;
985:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
986:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
987:   int            *lens,imdex,*lrows,*values;
988:   MPI_Comm       comm = A->comm;
989:   MPI_Request    *send_waits,*recv_waits;
990:   MPI_Status     recv_status,*send_status;
991:   IS             istmp;
992:   PetscTruth     found;

995:   ISGetLocalSize(is,&N);
996:   ISGetIndices(is,&rows);

998:   /*  first count number of contributors to each processor */
999:   PetscMalloc(2*size*sizeof(int),&nprocs);
1000:   PetscMemzero(nprocs,2*size*sizeof(int));
1001:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
1002:   for (i=0; i<N; i++) {
1003:     idx   = rows[i];
1004:     found = PETSC_FALSE;
1005:     for (j=0; j<size; j++) {
1006:       if (idx >= owners[j] && idx < owners[j+1]) {
1007:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1008:       }
1009:     }
1010:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
1011:   }
1012:   nsends = 0;  for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}

1014:   /* inform other processors of number of messages and max length*/
1015:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

1017:   /* post receives:   */
1018:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1019:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1020:   for (i=0; i<nrecvs; i++) {
1021:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1022:   }

1024:   /* do sends:
1025:       1) starts[i] gives the starting index in svalues for stuff going to 
1026:          the ith processor
1027:   */
1028:   PetscMalloc((N+1)*sizeof(int),&svalues);
1029:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1030:   PetscMalloc((size+1)*sizeof(int),&starts);
1031:   starts[0] = 0;
1032:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1033:   for (i=0; i<N; i++) {
1034:     svalues[starts[owner[i]]++] = rows[i];
1035:   }
1036:   ISRestoreIndices(is,&rows);

1038:   starts[0] = 0;
1039:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1040:   count = 0;
1041:   for (i=0; i<size; i++) {
1042:     if (nprocs[2*i+1]) {
1043:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1044:     }
1045:   }
1046:   PetscFree(starts);

1048:   base = owners[rank];

1050:   /*  wait on receives */
1051:   PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1052:   source = lens + nrecvs;
1053:   count = nrecvs; slen = 0;
1054:   while (count) {
1055:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1056:     /* unpack receives into our local space */
1057:     MPI_Get_count(&recv_status,MPI_INT,&n);
1058:     source[imdex]  = recv_status.MPI_SOURCE;
1059:     lens[imdex]    = n;
1060:     slen           += n;
1061:     count--;
1062:   }
1063:   PetscFree(recv_waits);
1064: 
1065:   /* move the data into the send scatter */
1066:   PetscMalloc((slen+1)*sizeof(int),&lrows);
1067:   count = 0;
1068:   for (i=0; i<nrecvs; i++) {
1069:     values = rvalues + i*nmax;
1070:     for (j=0; j<lens[i]; j++) {
1071:       lrows[count++] = values[j] - base;
1072:     }
1073:   }
1074:   PetscFree(rvalues);
1075:   PetscFree(lens);
1076:   PetscFree(owner);
1077:   PetscFree(nprocs);
1078: 
1079:   /* actually zap the local rows */
1080:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
1081:   PetscLogObjectParent(A,istmp);
1082:   PetscFree(lrows);
1083:   MatZeroRows_MPIRowbs_local(A,istmp,diag);
1084:   ISDestroy(istmp);

1086:   /* wait on sends */
1087:   if (nsends) {
1088:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1089:     MPI_Waitall(nsends,send_waits,send_status);
1090:     PetscFree(send_status);
1091:   }
1092:   PetscFree(send_waits);
1093:   PetscFree(svalues);

1095:   return(0);
1096: }

1100: PetscErrorCode MatNorm_MPIRowbs(Mat mat,NormType type,PetscReal *norm)
1101: {
1102:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1103:   BSsprow      *vs,**rs;
1104:   PetscScalar  *xv;
1105:   PetscReal    sum = 0.0;
1107:   int          *xi,nz,i,j;

1110:   if (a->size == 1) {
1111:     MatNorm_MPIRowbs_local(mat,type,norm);
1112:   } else {
1113:     rs = a->A->rows;
1114:     if (type == NORM_FROBENIUS) {
1115:       for (i=0; i<mat->m; i++) {
1116:         vs = *rs++;
1117:         nz = vs->length;
1118:         xv = vs->nz;
1119:         while (nz--) {
1120: #if defined(PETSC_USE_COMPLEX)
1121:           sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
1122: #else
1123:           sum += (*xv)*(*xv); xv++;
1124: #endif
1125:         }
1126:       }
1127:       MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1128:       *norm = sqrt(*norm);
1129:     } else if (type == NORM_1) { /* max column norm */
1130:       PetscReal *tmp,*tmp2;
1131:       PetscMalloc(mat->n*sizeof(PetscReal),&tmp);
1132:       PetscMalloc(mat->n*sizeof(PetscReal),&tmp2);
1133:       PetscMemzero(tmp,mat->n*sizeof(PetscReal));
1134:       *norm = 0.0;
1135:       for (i=0; i<mat->m; i++) {
1136:         vs = *rs++;
1137:         nz = vs->length;
1138:         xi = vs->col;
1139:         xv = vs->nz;
1140:         while (nz--) {
1141:           tmp[*xi] += PetscAbsScalar(*xv);
1142:           xi++; xv++;
1143:         }
1144:       }
1145:       MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1146:       for (j=0; j<mat->n; j++) {
1147:         if (tmp2[j] > *norm) *norm = tmp2[j];
1148:       }
1149:       PetscFree(tmp);
1150:       PetscFree(tmp2);
1151:     } else if (type == NORM_INFINITY) { /* max row norm */
1152:       PetscReal ntemp = 0.0;
1153:       for (i=0; i<mat->m; i++) {
1154:         vs = *rs++;
1155:         nz = vs->length;
1156:         xv = vs->nz;
1157:         sum = 0.0;
1158:         while (nz--) {
1159:           sum += PetscAbsScalar(*xv); xv++;
1160:         }
1161:         if (sum > ntemp) ntemp = sum;
1162:       }
1163:       MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1164:     } else {
1165:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1166:     }
1167:   }
1168:   return(0);
1169: }

1173: PetscErrorCode MatMult_MPIRowbs(Mat mat,Vec xx,Vec yy)
1174: {
1175:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
1176:   BSprocinfo   *bspinfo = bsif->procinfo;
1177:   PetscScalar  *xxa,*xworka,*yya;

1181:   if (!bsif->blocksolveassembly) {
1182:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1183:   }

1185:   /* Permute and apply diagonal scaling:  [ xwork = D^{1/2} * x ] */
1186:   if (!bsif->vecs_permscale) {
1187:     VecGetArray(bsif->xwork,&xworka);
1188:     VecGetArray(xx,&xxa);
1189:     BSperm_dvec(xxa,xworka,bsif->pA->perm);CHKERRBS(0);
1190:     VecRestoreArray(bsif->xwork,&xworka);
1191:     VecRestoreArray(xx,&xxa);
1192:     VecPointwiseDivide(bsif->xwork,bsif->diag,xx);
1193:   }

1195:   VecGetArray(xx,&xxa);
1196:   VecGetArray(yy,&yya);
1197:   /* Do lower triangular multiplication:  [ y = L * xwork ] */
1198:   if (bspinfo->single) {
1199:     BSforward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1200:   }  else {
1201:     BSforward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1202:   }
1203: 
1204:   /* Do upper triangular multiplication:  [ y = y + L^{T} * xwork ] */
1205:   if (mat->symmetric) {
1206:     if (bspinfo->single){
1207:       BSbackward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1208:     } else {
1209:       BSbackward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1210:     }
1211:   }
1212:   /* not needed for ILU version since forward does it all */
1213:   VecRestoreArray(xx,&xxa);
1214:   VecRestoreArray(yy,&yya);

1216:   /* Apply diagonal scaling to vector:  [  y = D^{1/2} * y ] */
1217:   if (!bsif->vecs_permscale) {
1218:     VecGetArray(bsif->xwork,&xworka);
1219:     VecGetArray(xx,&xxa);
1220:     BSiperm_dvec(xworka,xxa,bsif->pA->perm);CHKERRBS(0);
1221:     VecRestoreArray(bsif->xwork,&xworka);
1222:     VecRestoreArray(xx,&xxa);
1223:     VecPointwiseDivide(yy,bsif->diag,bsif->xwork);
1224:     VecGetArray(bsif->xwork,&xworka);
1225:     VecGetArray(yy,&yya);
1226:     BSiperm_dvec(xworka,yya,bsif->pA->perm);CHKERRBS(0);
1227:     VecRestoreArray(bsif->xwork,&xworka);
1228:     VecRestoreArray(yy,&yya);
1229:   }
1230:   PetscLogFlops(2*bsif->nz - mat->m);

1232:   return(0);
1233: }

1237: PetscErrorCode MatMultAdd_MPIRowbs(Mat mat,Vec xx,Vec yy,Vec zz)
1238: {
1240:   PetscScalar  one = 1.0;

1243:   (*mat->ops->mult)(mat,xx,zz);
1244:   VecAXPY(&one,yy,zz);
1245:   return(0);
1246: }

1250: PetscErrorCode MatGetInfo_MPIRowbs(Mat A,MatInfoType flag,MatInfo *info)
1251: {
1252:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
1253:   PetscReal    isend[5],irecv[5];

1257:   info->rows_global    = (double)A->M;
1258:   info->columns_global = (double)A->N;
1259:   info->rows_local     = (double)A->m;
1260:   info->columns_local  = (double)A->N;
1261:   info->block_size     = 1.0;
1262:   info->mallocs        = (double)mat->reallocs;
1263:   isend[0] = mat->nz; isend[1] = mat->maxnz; isend[2] =  mat->maxnz -  mat->nz;
1264:   isend[3] = A->mem;  isend[4] = info->mallocs;

1266:   if (flag == MAT_LOCAL) {
1267:     info->nz_used      = isend[0];
1268:     info->nz_allocated = isend[1];
1269:     info->nz_unneeded  = isend[2];
1270:     info->memory       = isend[3];
1271:     info->mallocs      = isend[4];
1272:   } else if (flag == MAT_GLOBAL_MAX) {
1273:     MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_MAX,A->comm);
1274:     info->nz_used      = irecv[0];
1275:     info->nz_allocated = irecv[1];
1276:     info->nz_unneeded  = irecv[2];
1277:     info->memory       = irecv[3];
1278:     info->mallocs      = irecv[4];
1279:   } else if (flag == MAT_GLOBAL_SUM) {
1280:     MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_SUM,A->comm);
1281:     info->nz_used      = irecv[0];
1282:     info->nz_allocated = irecv[1];
1283:     info->nz_unneeded  = irecv[2];
1284:     info->memory       = irecv[3];
1285:     info->mallocs      = irecv[4];
1286:   }
1287:   return(0);
1288: }

1292: PetscErrorCode MatGetDiagonal_MPIRowbs(Mat mat,Vec v)
1293: {
1294:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1295:   BSsprow      **rs = a->A->rows;
1297:   int          i,n;
1298:   PetscScalar  *x,zero = 0.0;

1301:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1302:   if (!a->blocksolveassembly) {
1303:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1304:   }

1306:   VecSet(&zero,v);
1307:   VecGetLocalSize(v,&n);
1308:   if (n != mat->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
1309:   VecGetArray(v,&x);
1310:   for (i=0; i<mat->m; i++) {
1311:     x[i] = rs[i]->nz[rs[i]->diag_ind];
1312:   }
1313:   VecRestoreArray(v,&x);
1314:   return(0);
1315: }

1319: PetscErrorCode MatDestroy_MPIRowbs(Mat mat)
1320: {
1321:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1322:   BSspmat      *A = a->A;
1323:   BSsprow      *vs;
1325:   int          i;

1328: #if defined(PETSC_USE_LOG)
1329:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1330: #endif
1331:   PetscFree(a->rowners);
1332:   MatStashDestroy_Private(&mat->stash);
1333:   if (a->bsmap) {
1334:     if (a->bsmap->vlocal2global) {PetscFree(a->bsmap->vlocal2global);}
1335:     if (a->bsmap->vglobal2local) {PetscFree(a->bsmap->vglobal2local);}
1336:     if (a->bsmap->vglobal2proc)  (*a->bsmap->free_g2p)(a->bsmap->vglobal2proc);
1337:     PetscFree(a->bsmap);
1338:   }

1340:   if (A) {
1341:     for (i=0; i<mat->m; i++) {
1342:       vs = A->rows[i];
1343:       MatFreeRowbs_Private(mat,vs->length,vs->col,vs->nz);
1344:     }
1345:     /* Note: A->map = a->bsmap is freed above */
1346:     PetscFree(A->rows);
1347:     PetscFree(A);
1348:   }
1349:   if (a->procinfo) {BSfree_ctx(a->procinfo);CHKERRBS(0);}
1350:   if (a->diag)     {VecDestroy(a->diag);}
1351:   if (a->xwork)    {VecDestroy(a->xwork);}
1352:   if (a->pA)       {BSfree_par_mat(a->pA);CHKERRBS(0);}
1353:   if (a->fpA)      {BSfree_copy_par_mat(a->fpA);CHKERRBS(0);}
1354:   if (a->comm_pA)  {BSfree_comm(a->comm_pA);CHKERRBS(0);}
1355:   if (a->comm_fpA) {BSfree_comm(a->comm_fpA);CHKERRBS(0);}
1356:   if (a->imax)     {PetscFree(a->imax);}
1357:   MPI_Comm_free(&(a->comm_mpirowbs));
1358:   PetscFree(a);
1359:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C","",PETSC_NULL);
1360:   return(0);
1361: }

1365: PetscErrorCode MatSetOption_MPIRowbs(Mat A,MatOption op)
1366: {
1367:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;

1370:   switch (op) {
1371:   case MAT_ROW_ORIENTED:
1372:     a->roworiented = PETSC_TRUE;
1373:     break;
1374:   case MAT_COLUMN_ORIENTED:
1375:     a->roworiented = PETSC_FALSE;
1376:     break;
1377:   case MAT_COLUMNS_SORTED:
1378:     a->sorted      = 1;
1379:     break;
1380:   case MAT_COLUMNS_UNSORTED:
1381:     a->sorted      = 0;
1382:     break;
1383:   case MAT_NO_NEW_NONZERO_LOCATIONS:
1384:     a->nonew       = 1;
1385:     break;
1386:   case MAT_YES_NEW_NONZERO_LOCATIONS:
1387:     a->nonew       = 0;
1388:     break;
1389:   case MAT_DO_NOT_USE_INODES:
1390:     a->bs_color_single = 1;
1391:     break;
1392:   case MAT_YES_NEW_DIAGONALS:
1393:   case MAT_ROWS_SORTED:
1394:   case MAT_NEW_NONZERO_LOCATION_ERR:
1395:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1396:   case MAT_ROWS_UNSORTED:
1397:   case MAT_USE_HASH_TABLE:
1398:     PetscLogInfo(A,"MatSetOption_MPIRowbs:Option ignored\n");
1399:     break;
1400:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1401:     a->donotstash = PETSC_TRUE;
1402:     break;
1403:   case MAT_NO_NEW_DIAGONALS:
1404:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1405:     break;
1406:   case MAT_KEEP_ZEROED_ROWS:
1407:     a->keepzeroedrows    = PETSC_TRUE;
1408:     break;
1409:   case MAT_SYMMETRIC:
1410:     BSset_mat_symmetric(a->A,PETSC_TRUE);CHKERRBS(0);
1411:     break;
1412:   case MAT_STRUCTURALLY_SYMMETRIC:
1413:   case MAT_NOT_SYMMETRIC:
1414:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1415:   case MAT_HERMITIAN:
1416:   case MAT_NOT_HERMITIAN:
1417:   case MAT_SYMMETRY_ETERNAL:
1418:   case MAT_NOT_SYMMETRY_ETERNAL:
1419:     break;
1420:   default:
1421:     SETERRQ(PETSC_ERR_SUP,"unknown option");
1422:     break;
1423:   }
1424:   return(0);
1425: }

1429: PetscErrorCode MatGetRow_MPIRowbs(Mat AA,int row,int *nz,int **idx,PetscScalar **v)
1430: {
1431:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
1432:   BSspmat      *A = mat->A;
1433:   BSsprow      *rs;
1434: 
1436:   if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");

1438:   rs  = A->rows[row - mat->rstart];
1439:   *nz = rs->length;
1440:   if (v)   *v   = rs->nz;
1441:   if (idx) *idx = rs->col;
1442:   return(0);
1443: }

1447: PetscErrorCode MatRestoreRow_MPIRowbs(Mat A,int row,int *nz,int **idx,PetscScalar **v)
1448: {
1450:   return(0);
1451: }

1453: /* ------------------------------------------------------------------ */

1457: PetscErrorCode MatPrintHelp_MPIRowbs(Mat A)
1458: {
1459:   static PetscTruth called = PETSC_FALSE;
1460:   MPI_Comm          comm = A->comm;

1464:   if (called) {return(0);} else called = PETSC_TRUE;
1465:   (*PetscHelpPrintf)(comm," Options for MATMPIROWBS matrix format (needed for BlockSolve):\n");
1466:   (*PetscHelpPrintf)(comm,"  -mat_rowbs_no_inode  - Do not use inodes\n");
1467:   return(0);
1468: }

1472: PetscErrorCode MatSetUpPreallocation_MPIRowbs(Mat A)
1473: {

1477:    MatMPIRowbsSetPreallocation(A,PETSC_DEFAULT,0);
1478:   return(0);
1479: }

1481: /* -------------------------------------------------------------------*/
1482: static struct _MatOps MatOps_Values = {MatSetValues_MPIRowbs,
1483:        MatGetRow_MPIRowbs,
1484:        MatRestoreRow_MPIRowbs,
1485:        MatMult_MPIRowbs,
1486: /* 4*/ MatMultAdd_MPIRowbs,
1487:        MatMult_MPIRowbs,
1488:        MatMultAdd_MPIRowbs,
1489:        MatSolve_MPIRowbs,
1490:        0,
1491:        0,
1492: /*10*/ 0,
1493:        0,
1494:        0,
1495:        0,
1496:        0,
1497: /*15*/ MatGetInfo_MPIRowbs,
1498:        0,
1499:        MatGetDiagonal_MPIRowbs,
1500:        0,
1501:        MatNorm_MPIRowbs,
1502: /*20*/ MatAssemblyBegin_MPIRowbs,
1503:        MatAssemblyEnd_MPIRowbs,
1504:        0,
1505:        MatSetOption_MPIRowbs,
1506:        MatZeroEntries_MPIRowbs,
1507: /*25*/ MatZeroRows_MPIRowbs,
1508:        0,
1509:        MatLUFactorNumeric_MPIRowbs,
1510:        0,
1511:        MatCholeskyFactorNumeric_MPIRowbs,
1512: /*30*/ MatSetUpPreallocation_MPIRowbs,
1513:        MatILUFactorSymbolic_MPIRowbs,
1514:        MatIncompleteCholeskyFactorSymbolic_MPIRowbs,
1515:        0,
1516:        0,
1517: /*35*/ 0,
1518:        MatForwardSolve_MPIRowbs,
1519:        MatBackwardSolve_MPIRowbs,
1520:        0,
1521:        0,
1522: /*40*/ 0,
1523:        MatGetSubMatrices_MPIRowbs,
1524:        0,
1525:        0,
1526:        0,
1527: /*45*/ MatPrintHelp_MPIRowbs,
1528:        MatScale_MPIRowbs,
1529:        0,
1530:        0,
1531:        0,
1532: /*50*/ 0,
1533:        0,
1534:        0,
1535:        0,
1536:        0,
1537: /*55*/ 0,
1538:        0,
1539:        0,
1540:        0,
1541:        0,
1542: /*60*/ MatGetSubMatrix_MPIRowbs,
1543:        MatDestroy_MPIRowbs,
1544:        MatView_MPIRowbs,
1545:        MatGetPetscMaps_Petsc,
1546:        MatUseScaledForm_MPIRowbs,
1547: /*65*/ MatScaleSystem_MPIRowbs,
1548:        MatUnScaleSystem_MPIRowbs,
1549:        0,
1550:        0,
1551:        0,
1552: /*70*/ 0,
1553:        0,
1554:        0,
1555:        0,
1556:        0,
1557: /*75*/ 0,
1558:        0,
1559:        0,
1560:        0,
1561:        0,
1562: /*80*/ 0,
1563:        0,
1564:        0,
1565:        0,
1566:        MatLoad_MPIRowbs,
1567: /*85*/ 0,
1568:        0,
1569:        0,
1570:        0,
1571:        0,
1572: /*90*/ 0,
1573:        0,
1574:        0,
1575:        0,
1576:        0,
1577: /*95*/ 0,
1578:        0,
1579:        0,
1580:        0};

1582: /* ------------------------------------------------------------------- */

1587: PetscErrorCode MatMPIRowbsSetPreallocation_MPIRowbs(Mat mat,int nz,const int nnz[])
1588: {

1592:   mat->preallocated = PETSC_TRUE;
1593:   MatCreateMPIRowbs_local(mat,nz,nnz);
1594:   return(0);
1595: }

1598: /*MC
1599:    MATMPIROWBS - MATMPIROWBS = "mpirowbs" - A matrix type providing ILU and ICC for distributed sparse matrices for use
1600:    with the external package BlockSolve95.  If BlockSolve95 is installed (see the manual for instructions
1601:    on how to declare the existence of external packages), a matrix type can be constructed which invokes
1602:    BlockSolve95 preconditioners and solvers. 

1604:    Options Database Keys:
1605: . -mat_type mpirowbs - sets the matrix type to "mpirowbs" during a call to MatSetFromOptions()

1607:   Level: beginner

1609: .seealso: MatCreateMPIRowbs
1610: M*/

1615: PetscErrorCode MatCreate_MPIRowbs(Mat A)
1616: {
1617:   Mat_MPIRowbs *a;
1618:   BSmapping    *bsmap;
1619:   BSoff_map    *bsoff;
1621:   int          i,*offset,m,M;
1622:   PetscTruth   flg1,flg2,flg3;
1623:   BSprocinfo   *bspinfo;
1624:   MPI_Comm     comm;
1625: 
1627:   comm = A->comm;
1628:   m    = A->m;
1629:   M    = A->M;

1631:   PetscNew(Mat_MPIRowbs,&a);
1632:   A->data               = (void*)a;
1633:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1634:   A->factor             = 0;
1635:   A->mapping            = 0;
1636:   a->vecs_permscale     = PETSC_FALSE;
1637:   A->insertmode         = NOT_SET_VALUES;
1638:   a->blocksolveassembly = 0;
1639:   a->keepzeroedrows     = PETSC_FALSE;

1641:   MPI_Comm_rank(comm,&a->rank);
1642:   MPI_Comm_size(comm,&a->size);

1644:   PetscSplitOwnership(comm,&m,&M);

1646:   A->N = M;
1647:   A->M = M;
1648:   A->m = m;
1649:   A->n = A->N;  /* each row stores all columns */
1650:   PetscMalloc((A->m+1)*sizeof(int),&a->imax);
1651:   a->reallocs                      = 0;

1653:   /* the information in the maps duplicates the information computed below, eventually 
1654:      we should remove the duplicate information that is not contained in the maps */
1655:   PetscMapCreateMPI(comm,m,M,&A->rmap);
1656:   PetscMapCreateMPI(comm,m,M,&A->cmap);

1658:   /* build local table of row ownerships */
1659:   PetscMalloc((a->size+2)*sizeof(int),&a->rowners);
1660:   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1661:   a->rowners[0] = 0;
1662:   for (i=2; i<=a->size; i++) {
1663:     a->rowners[i] += a->rowners[i-1];
1664:   }
1665:   a->rstart = a->rowners[a->rank];
1666:   a->rend   = a->rowners[a->rank+1];
1667:   PetscLogObjectMemory(A,(A->m+a->size+3)*sizeof(int));

1669:   /* build cache for off array entries formed */
1670:   MatStashCreate_Private(A->comm,1,&A->stash);
1671:   a->donotstash = PETSC_FALSE;

1673:   /* Initialize BlockSolve information */
1674:   a->A              = 0;
1675:   a->pA              = 0;
1676:   a->comm_pA  = 0;
1677:   a->fpA      = 0;
1678:   a->comm_fpA = 0;
1679:   a->alpha    = 1.0;
1680:   a->0;
1681:   a->failures = 0;
1682:   MPI_Comm_dup(A->comm,&(a->comm_mpirowbs));
1683:   VecCreateMPI(A->comm,A->m,A->M,&(a->diag));
1684:   VecDuplicate(a->diag,&(a->xwork));
1685:   PetscLogObjectParent(A,a->diag);  PetscLogObjectParent(A,a->xwork);
1686:   PetscLogObjectMemory(A,(A->m+1)*sizeof(PetscScalar));
1687:   bspinfo = BScreate_ctx();CHKERRBS(0);
1688:   a->procinfo = bspinfo;
1689:   BSctx_set_id(bspinfo,a->rank);CHKERRBS(0);
1690:   BSctx_set_np(bspinfo,a->size);CHKERRBS(0);
1691:   BSctx_set_ps(bspinfo,a->comm_mpirowbs);CHKERRBS(0);
1692:   BSctx_set_cs(bspinfo,INT_MAX);CHKERRBS(0);
1693:   BSctx_set_is(bspinfo,INT_MAX);CHKERRBS(0);
1694:   BSctx_set_ct(bspinfo,IDO);CHKERRBS(0);
1695: #if defined(PETSC_USE_DEBUG)
1696:   BSctx_set_err(bspinfo,1);CHKERRBS(0);  /* BS error checking */
1697: #endif
1698:   BSctx_set_rt(bspinfo,1);CHKERRBS(0);
1699:   PetscOptionsHasName(PETSC_NULL,"-log_info",&flg1);
1700:   if (flg1) {
1701:     BSctx_set_pr(bspinfo,1);CHKERRBS(0);
1702:   }
1703:   PetscOptionsHasName(PETSC_NULL,"-pc_ilu_factorpointwise",&flg1);
1704:   PetscOptionsHasName(PETSC_NULL,"-pc_icc_factorpointwise",&flg2);
1705:   PetscOptionsHasName(PETSC_NULL,"-mat_rowbs_no_inode",&flg3);
1706:   if (flg1 || flg2 || flg3) {
1707:     BSctx_set_si(bspinfo,1);CHKERRBS(0);
1708:   } else {
1709:     BSctx_set_si(bspinfo,0);CHKERRBS(0);
1710:   }
1711: #if defined(PETSC_USE_LOG)
1712:   MLOG_INIT();  /* Initialize logging */
1713: #endif

1715:   /* Compute global offsets */
1716:   offset = &a->rstart;

1718:   PetscNew(BSmapping,&a->bsmap);
1719:   PetscLogObjectMemory(A,sizeof(BSmapping));
1720:   bsmap = a->bsmap;
1721:   PetscMalloc(sizeof(int),&bsmap->vlocal2global);
1722:   *((int*)bsmap->vlocal2global) = (*offset);
1723:   bsmap->flocal2global                 = BSloc2glob;
1724:   bsmap->free_l2g                = 0;
1725:   PetscMalloc(sizeof(int),&bsmap->vglobal2local);
1726:   *((int*)bsmap->vglobal2local) = (*offset);
1727:   bsmap->fglobal2local                 = BSglob2loc;
1728:   bsmap->free_g2l                 = 0;
1729:   bsoff                          = BSmake_off_map(*offset,bspinfo,A->M);
1730:   bsmap->vglobal2proc                 = (void*)bsoff;
1731:   bsmap->fglobal2proc                 = BSglob2proc;
1732:   bsmap->free_g2p                = (void(*)(void*)) BSfree_off_map;
1733:   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIRowbsSetPreallocation_C",
1734:                                     "MatMPIRowbsSetPreallocation_MPIRowbs",
1735:                                      MatMPIRowbsSetPreallocation_MPIRowbs);
1736:   return(0);
1737: }

1742: /* @
1743:   MatMPIRowbsSetPreallocation - Sets the number of expected nonzeros 
1744:   per row in the matrix.

1746:   Input Parameter:
1747: +  mat - matrix
1748: .  nz - maximum expected for any row
1749: -  nzz - number expected in each row

1751:   Note:
1752:   This routine is valid only for matrices stored in the MATMPIROWBS
1753:   format.
1754: @ */
1755: PetscErrorCode MatMPIRowbsSetPreallocation(Mat mat,int nz,const int nnz[])
1756: {
1757:   PetscErrorCode ierr,(*f)(Mat,int,const int[]);

1760:   PetscObjectQueryFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C",(void (**)(void))&f);
1761:   if (f) {
1762:     (*f)(mat,nz,nnz);
1763:   }
1764:   return(0);
1765: }

1767: /* --------------- extra BlockSolve-specific routines -------------- */
1770: /* @
1771:   MatGetBSProcinfo - Gets the BlockSolve BSprocinfo context, which the
1772:   user can then manipulate to alter the default parameters.

1774:   Input Parameter:
1775:   mat - matrix

1777:   Output Parameter:
1778:   procinfo - processor information context

1780:   Note:
1781:   This routine is valid only for matrices stored in the MATMPIROWBS
1782:   format.
1783: @ */
1784: PetscErrorCode MatGetBSProcinfo(Mat mat,BSprocinfo *procinfo)
1785: {
1786:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1787:   PetscTruth   ismpirowbs;

1791:   PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);
1792:   if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type");
1793:   procinfo = a->procinfo;
1794:   return(0);
1795: }

1799: PetscErrorCode MatLoad_MPIRowbs(PetscViewer viewer,const MatType type,Mat *newmat)
1800: {
1801:   Mat_MPIRowbs *a;
1802:   BSspmat      *A;
1803:   BSsprow      **rs;
1804:   Mat          mat;
1806:   int          i,nz,j,rstart,rend,fd,*ourlens,*sndcounts = 0,*procsnz;
1807:   int          header[4],rank,size,*rowlengths = 0,M,m,*rowners,maxnz,*cols;
1808:   PetscScalar  *vals;
1809:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1810:   MPI_Status   status;

1813:   MPI_Comm_size(comm,&size);
1814:   MPI_Comm_rank(comm,&rank);
1815:   if (!rank) {
1816:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1817:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1818:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Not matrix object");
1819:     if (header[3] < 0) {
1820:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIRowbs");
1821:     }
1822:   }

1824:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1825:   M = header[1];
1826:   /* determine ownership of all rows */
1827:   m          = M/size + ((M % size) > rank);
1828:   PetscMalloc((size+2)*sizeof(int),&rowners);
1829:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1830:   rowners[0] = 0;
1831:   for (i=2; i<=size; i++) {
1832:     rowners[i] += rowners[i-1];
1833:   }
1834:   rstart = rowners[rank];
1835:   rend   = rowners[rank+1];

1837:   /* distribute row lengths to all processors */
1838:   PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1839:   if (!rank) {
1840:     PetscMalloc(M*sizeof(int),&rowlengths);
1841:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1842:     PetscMalloc(size*sizeof(int),&sndcounts);
1843:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1844:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1845:     PetscFree(sndcounts);
1846:   } else {
1847:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1848:   }

1850:   /* create our matrix */
1851:   MatCreate(comm,m,m,M,M,newmat);
1852:   MatSetType(*newmat,type);
1853:   MatMPIRowbsSetPreallocation(*newmat,0,ourlens);
1854:   mat = *newmat;
1855:   PetscFree(ourlens);

1857:   a = (Mat_MPIRowbs*)mat->data;
1858:   A = a->A;
1859:   rs = A->rows;

1861:   if (!rank) {
1862:     /* calculate the number of nonzeros on each processor */
1863:     PetscMalloc(size*sizeof(int),&procsnz);
1864:     PetscMemzero(procsnz,size*sizeof(int));
1865:     for (i=0; i<size; i++) {
1866:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1867:         procsnz[i] += rowlengths[j];
1868:       }
1869:     }
1870:     PetscFree(rowlengths);

1872:     /* determine max buffer needed and allocate it */
1873:     maxnz = 0;
1874:     for (i=0; i<size; i++) {
1875:       maxnz = PetscMax(maxnz,procsnz[i]);
1876:     }
1877:     PetscMalloc(maxnz*sizeof(int),&cols);

1879:     /* read in my part of the matrix column indices  */
1880:     nz = procsnz[0];
1881:     PetscBinaryRead(fd,cols,nz,PETSC_INT);
1882: 
1883:     /* insert it into my part of matrix */
1884:     nz = 0;
1885:     for (i=0; i<A->num_rows; i++) {
1886:       for (j=0; j<a->imax[i]; j++) {
1887:         rs[i]->col[j] = cols[nz++];
1888:       }
1889:       rs[i]->length = a->imax[i];
1890:     }
1891:     /* read in parts for all other processors */
1892:     for (i=1; i<size; i++) {
1893:       nz   = procsnz[i];
1894:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1895:       MPI_Send(cols,nz,MPI_INT,i,mat->tag,comm);
1896:     }
1897:     PetscFree(cols);
1898:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1900:     /* read in my part of the matrix numerical values  */
1901:     nz   = procsnz[0];
1902:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1903: 
1904:     /* insert it into my part of matrix */
1905:     nz = 0;
1906:     for (i=0; i<A->num_rows; i++) {
1907:       for (j=0; j<a->imax[i]; j++) {
1908:         rs[i]->nz[j] = vals[nz++];
1909:       }
1910:     }
1911:     /* read in parts for all other processors */
1912:     for (i=1; i<size; i++) {
1913:       nz   = procsnz[i];
1914:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1915:       MPI_Send(vals,nz,MPIU_SCALAR,i,mat->tag,comm);
1916:     }
1917:     PetscFree(vals);
1918:     PetscFree(procsnz);
1919:   } else {
1920:     /* determine buffer space needed for message */
1921:     nz = 0;
1922:     for (i=0; i<A->num_rows; i++) {
1923:       nz += a->imax[i];
1924:     }
1925:     PetscMalloc(nz*sizeof(int),&cols);

1927:     /* receive message of column indices*/
1928:     MPI_Recv(cols,nz,MPI_INT,0,mat->tag,comm,&status);
1929:     MPI_Get_count(&status,MPI_INT,&maxnz);
1930:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");

1932:     /* insert it into my part of matrix */
1933:     nz = 0;
1934:     for (i=0; i<A->num_rows; i++) {
1935:       for (j=0; j<a->imax[i]; j++) {
1936:         rs[i]->col[j] = cols[nz++];
1937:       }
1938:       rs[i]->length = a->imax[i];
1939:     }
1940:     PetscFree(cols);
1941:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1943:     /* receive message of values*/
1944:     MPI_Recv(vals,nz,MPIU_SCALAR,0,mat->tag,comm,&status);
1945:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1946:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");

1948:     /* insert it into my part of matrix */
1949:     nz = 0;
1950:     for (i=0; i<A->num_rows; i++) {
1951:       for (j=0; j<a->imax[i]; j++) {
1952:         rs[i]->nz[j] = vals[nz++];
1953:       }
1954:       rs[i]->length = a->imax[i];
1955:     }
1956:     PetscFree(vals);
1957:   }
1958:   PetscFree(rowners);
1959:   a->nz = a->maxnz;
1960:   MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1961:   MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
1962:   return(0);
1963: }

1965: /* 
1966:     Special destroy and view routines for factored matrices 
1967: */
1970: static PetscErrorCode MatDestroy_MPIRowbs_Factored(Mat mat)
1971: {
1973: #if defined(PETSC_USE_LOG)
1974:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1975: #endif
1976:   return(0);
1977: }

1981: static PetscErrorCode MatView_MPIRowbs_Factored(Mat mat,PetscViewer viewer)
1982: {

1986:   MatView((Mat) mat->data,viewer);
1987:   return(0);
1988: }

1992: PetscErrorCode MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat mat,IS isrow,MatFactorInfo *info,Mat *newfact)
1993: {
1994:   /* Note:  f is not currently used in BlockSolve */
1995:   Mat          newmat;
1996:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1998:   PetscTruth   idn;

2001:   if (isrow) {
2002:     ISIdentity(isrow,&idn);
2003:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
2004:   }

2006:   if (!mat->symmetric) {
2007:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use incomplete Cholesky \n\
2008:         preconditioning with a MATMPIROWBS matrix you must declare it to be \n\
2009:         symmetric using the option MatSetOption(A,MAT_SYMMETRIC)");
2010:   }

2012:   /* If the icc_storage flag wasn't set before the last blocksolveassembly,          */
2013:   /* we must completely redo the assembly as a different storage format is required. */
2014:   if (mbs->blocksolveassembly && !mbs->assembled_icc_storage) {
2015:     mat->same_nonzero       = PETSC_FALSE;
2016:     mbs->blocksolveassembly = 0;
2017:   }

2019:   if (!mbs->blocksolveassembly) {
2020:     BSset_mat_icc_storage(mbs->A,PETSC_TRUE);CHKERRBS(0);
2021:     BSset_mat_symmetric(mbs->A,PETSC_TRUE);CHKERRBS(0);
2022:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
2023:   }

2025:   /* Copy permuted matrix */
2026:   if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2027:   mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);

2029:   /* Set up the communication for factorization */
2030:   if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2031:   mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);

2033:   /* 
2034:       Create a new Mat structure to hold the "factored" matrix, 
2035:     not this merely contains a pointer to the original matrix, since
2036:     the original matrix contains the factor information.
2037:   */
2038:   PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2039:   PetscLogObjectCreate(newmat);
2040:   PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));

2042:   newmat->data         = (void*)mat;
2043:   PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2044:   newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2045:   newmat->ops->view    = MatView_MPIRowbs_Factored;
2046:   newmat->factor       = 1;
2047:   newmat->preallocated = PETSC_TRUE;
2048:   newmat->M            = mat->M;
2049:   newmat->N            = mat->N;
2050:   newmat->m            = mat->m;
2051:   newmat->n            = mat->n;
2052:   PetscStrallocpy(MATMPIROWBS,&newmat->type_name);

2054:   *newfact = newmat;
2055:   return(0);
2056: }

2060: PetscErrorCode MatILUFactorSymbolic_MPIRowbs(Mat mat,IS isrow,IS iscol,MatFactorInfo* info,Mat *newfact)
2061: {
2062:   Mat          newmat;
2063:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
2065:   PetscTruth   idn;

2068:   if (info->levels) SETERRQ(PETSC_ERR_SUP,"Blocksolve ILU only supports 0 fill");
2069:   if (isrow) {
2070:     ISIdentity(isrow,&idn);
2071:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
2072:   }
2073:   if (iscol) {
2074:     ISIdentity(iscol,&idn);
2075:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
2076:   }

2078:   if (!mbs->blocksolveassembly) {
2079:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
2080:   }
2081: 
2082: /*   if (mat->symmetric) { */
2083: /*     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use ILU preconditioner with \n\ */
2084: /*         MatCreateMPIRowbs() matrix you CANNOT declare it to be a symmetric matrix\n\ */
2085: /*         using the option MatSetOption(A,MAT_SYMMETRIC)"); */
2086: /*   } */

2088:   /* Copy permuted matrix */
2089:   if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2090:   mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);

2092:   /* Set up the communication for factorization */
2093:   if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2094:   mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);

2096:   /* 
2097:       Create a new Mat structure to hold the "factored" matrix,
2098:     not this merely contains a pointer to the original matrix, since
2099:     the original matrix contains the factor information.
2100:   */
2101:   PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2102:   PetscLogObjectCreate(newmat);
2103:   PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));

2105:   newmat->data         = (void*)mat;
2106:   PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2107:   newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2108:   newmat->ops->view    = MatView_MPIRowbs_Factored;
2109:   newmat->factor       = 1;
2110:   newmat->preallocated = PETSC_TRUE;
2111:   newmat->M            = mat->M;
2112:   newmat->N            = mat->N;
2113:   newmat->m            = mat->m;
2114:   newmat->n            = mat->n;
2115:   PetscStrallocpy(MATMPIROWBS,&newmat->type_name);

2117:   *newfact = newmat;
2118:   return(0);
2119: }

2123: PetscErrorCode MatMPIRowbsGetColor(Mat mat,ISColoring *coloring)
2124: {

2130:   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2131:   ISColoringCreate(mat->comm,mat->m,0,coloring);

2133:   return(0);
2134: }

2138: /*@C
2139:    MatCreateMPIRowbs - Creates a sparse parallel matrix in the MATMPIROWBS
2140:    format.  This format is intended primarily as an interface for BlockSolve95.

2142:    Collective on MPI_Comm

2144:    Input Parameters:
2145: +  comm - MPI communicator
2146: .  m - number of local rows (or PETSC_DECIDE to have calculated)
2147: .  M - number of global rows (or PETSC_DECIDE to have calculated)
2148: .  nz - number of nonzeros per row (same for all local rows)
2149: -  nnz - number of nonzeros per row (possibly different for each row).

2151:    Output Parameter:
2152: .  newA - the matrix 

2154:    Notes:
2155:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2156:    than it must be used on all processors that share the object for that argument.

2158:    The user MUST specify either the local or global matrix dimensions
2159:    (possibly both).

2161:    Specify the preallocated storage with either nz or nnz (not both).  Set 
2162:    nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2163:    allocation.

2165:    Notes:
2166:    By default, the matrix is assumed to be nonsymmetric; the user can
2167:    take advantage of special optimizations for symmetric matrices by calling
2168: $     MatSetOption(mat,MAT_SYMMETRIC)
2169: $     MatSetOption(mat,MAT_SYMMETRY_ETERNAL)
2170:    BEFORE calling the routine MatAssemblyBegin().

2172:    Internally, the MATMPIROWBS format inserts zero elements to the
2173:    matrix if necessary, so that nonsymmetric matrices are considered
2174:    to be symmetric in terms of their sparsity structure; this format
2175:    is required for use of the parallel communication routines within
2176:    BlockSolve95. In particular, if the matrix element A[i,j] exists,
2177:    then PETSc will internally allocate a 0 value for the element
2178:    A[j,i] during MatAssemblyEnd() if the user has not already set
2179:    a value for the matrix element A[j,i].

2181:    Options Database Keys:
2182: .  -mat_rowbs_no_inode - Do not use inodes.

2184:    Level: intermediate
2185:   
2186: .keywords: matrix, row, symmetric, sparse, parallel, BlockSolve

2188: .seealso: MatCreate(), MatSetValues()
2189: @*/
2190: PetscErrorCode MatCreateMPIRowbs(MPI_Comm comm,int m,int M,int nz,const int nnz[],Mat *newA)
2191: {
2193: 
2195:   MatCreate(comm,m,m,M,M,newA);
2196:   MatSetType(*newA,MATMPIROWBS);
2197:   MatMPIRowbsSetPreallocation(*newA,nz,nnz);
2198:   return(0);
2199: }


2202: /* -------------------------------------------------------------------------*/

2204:  #include src/mat/impls/aij/seq/aij.h
2205:  #include src/mat/impls/aij/mpi/mpiaij.h

2209: PetscErrorCode MatGetSubMatrices_MPIRowbs(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
2210: {
2212:   int         nmax,nstages_local,nstages,i,pos,max_no;


2216:   /* Allocate memory to hold all the submatrices */
2217:   if (scall != MAT_REUSE_MATRIX) {
2218:     PetscMalloc((ismax+1)*sizeof(Mat),submat);
2219:   }
2220: 
2221:   /* Determine the number of stages through which submatrices are done */
2222:   nmax          = 20*1000000 / (C->N * sizeof(int));
2223:   if (!nmax) nmax = 1;
2224:   nstages_local = ismax/nmax + ((ismax % nmax)?1:0);

2226:   /* Make sure every processor loops through the nstages */
2227:   MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);

2229:   for (i=0,pos=0; i<nstages; i++) {
2230:     if (pos+nmax <= ismax) max_no = nmax;
2231:     else if (pos == ismax) max_no = 0;
2232:     else                   max_no = ismax-pos;
2233:     MatGetSubMatrices_MPIRowbs_Local(C,max_no,isrow+pos,iscol+pos,scall,*submat+pos);
2234:     pos += max_no;
2235:   }
2236:   return(0);
2237: }
2238: /* -------------------------------------------------------------------------*/
2239: /* for now MatGetSubMatrices_MPIRowbs_Local get MPIAij submatrices of input
2240:    matrix and preservs zeroes from structural symetry
2241:  */
2244: PetscErrorCode MatGetSubMatrices_MPIRowbs_Local(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
2245: {
2246:   Mat_MPIRowbs  *c = (Mat_MPIRowbs *)(C->data);
2247:   BSspmat       *A = c->A;
2248:   Mat_SeqAIJ    *mat;
2250:   int         **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size;
2251:   int         **sbuf1,**sbuf2,rank,m,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
2252:   int         nrqs,msz,**ptr,idx,*req_size,*ctr,*pa,*tmp,tcol,nrqr;
2253:   int         **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2,**rmap;
2254:   int         **cmap,**lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax,*irow_i;
2255:   int         len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*cmap_i,*lens_i;
2256:   int         *rmap_i,tag0,tag1,tag2,tag3;
2257:   MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2258:   MPI_Request *r_waits4,*s_waits3,*s_waits4;
2259:   MPI_Status  *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2260:   MPI_Status  *r_status3,*r_status4,*s_status4;
2261:   MPI_Comm    comm;
2262:   FLOAT       **rbuf4,**sbuf_aa,*vals,*sbuf_aa_i;
2263:   PetscScalar *mat_a;
2264:   PetscTruth  sorted;
2265:   int         *onodes1,*olengths1;

2268:   comm   = C->comm;
2269:   tag0   = C->tag;
2270:   size   = c->size;
2271:   rank   = c->rank;
2272:   m      = C->M;
2273: 
2274:   /* Get some new tags to keep the communication clean */
2275:   PetscObjectGetNewTag((PetscObject)C,&tag1);
2276:   PetscObjectGetNewTag((PetscObject)C,&tag2);
2277:   PetscObjectGetNewTag((PetscObject)C,&tag3);

2279:     /* Check if the col indices are sorted */
2280:   for (i=0; i<ismax; i++) {
2281:     ISSorted(isrow[i],&sorted);
2282:     if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2283:     ISSorted(iscol[i],&sorted);
2284:     /*    if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); */
2285:   }

2287:   len    = (2*ismax+1)*(sizeof(int*)+ sizeof(int)) + (m+1)*sizeof(int);
2288:   PetscMalloc(len,&irow);
2289:   icol   = irow + ismax;
2290:   nrow   = (int*)(icol + ismax);
2291:   ncol   = nrow + ismax;
2292:   rtable = ncol + ismax;

2294:   for (i=0; i<ismax; i++) {
2295:     ISGetIndices(isrow[i],&irow[i]);
2296:     ISGetIndices(iscol[i],&icol[i]);
2297:     ISGetLocalSize(isrow[i],&nrow[i]);
2298:     ISGetLocalSize(iscol[i],&ncol[i]);
2299:   }

2301:   /* Create hash table for the mapping :row -> proc*/
2302:   for (i=0,j=0; i<size; i++) {
2303:     jmax = c->rowners[i+1];
2304:     for (; j<jmax; j++) {
2305:       rtable[j] = i;
2306:     }
2307:   }

2309:   /* evaluate communication - mesg to who, length of mesg, and buffer space
2310:      required. Based on this, buffers are allocated, and data copied into them*/
2311:   PetscMalloc(size*4*sizeof(int),&w1); /* mesg size */
2312:   w2     = w1 + size;      /* if w2[i] marked, then a message to proc i*/
2313:   w3     = w2 + size;      /* no of IS that needs to be sent to proc i */
2314:   w4     = w3 + size;      /* temp work space used in determining w1, w2, w3 */
2315:   PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector*/
2316:   for (i=0; i<ismax; i++) {
2317:     PetscMemzero(w4,size*sizeof(int)); /* initialize work vector*/
2318:     jmax   = nrow[i];
2319:     irow_i = irow[i];
2320:     for (j=0; j<jmax; j++) {
2321:       row  = irow_i[j];
2322:       proc = rtable[row];
2323:       w4[proc]++;
2324:     }
2325:     for (j=0; j<size; j++) {
2326:       if (w4[j]) { w1[j] += w4[j];  w3[j]++;}
2327:     }
2328:   }
2329: 
2330:   nrqs     = 0;              /* no of outgoing messages */
2331:   msz      = 0;              /* total mesg length (for all procs) */
2332:   w1[rank] = 0;              /* no mesg sent to self */
2333:   w3[rank] = 0;
2334:   for (i=0; i<size; i++) {
2335:     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2336:   }
2337:   PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2338:   for (i=0,j=0; i<size; i++) {
2339:     if (w1[i]) { pa[j] = i; j++; }
2340:   }

2342:   /* Each message would have a header = 1 + 2*(no of IS) + data */
2343:   for (i=0; i<nrqs; i++) {
2344:     j     = pa[i];
2345:     w1[j] += w2[j] + 2* w3[j];
2346:     msz   += w1[j];
2347:   }

2349:   /* Determine the number of messages to expect, their lengths, from from-ids */
2350:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2351:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

2353:   /* Now post the Irecvs corresponding to these messages */
2354:   PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2355: 
2356:   PetscFree(onodes1);
2357:   PetscFree(olengths1);
2358: 
2359:   /* Allocate Memory for outgoing messages */
2360:   len      = 2*size*sizeof(int*) + 2*msz*sizeof(int) + size*sizeof(int);
2361:   PetscMalloc(len,&sbuf1);
2362:   ptr      = sbuf1 + size;   /* Pointers to the data in outgoing buffers */
2363:   PetscMemzero(sbuf1,2*size*sizeof(int*));
2364:   /* allocate memory for outgoing data + buf to receive the first reply */
2365:   tmp      = (int*)(ptr + size);
2366:   ctr      = tmp + 2*msz;

2368:   {
2369:     int *iptr = tmp,ict = 0;
2370:     for (i=0; i<nrqs; i++) {
2371:       j         = pa[i];
2372:       iptr     += ict;
2373:       sbuf1[j]  = iptr;
2374:       ict       = w1[j];
2375:     }
2376:   }

2378:   /* Form the outgoing messages */
2379:   /* Initialize the header space */
2380:   for (i=0; i<nrqs; i++) {
2381:     j           = pa[i];
2382:     sbuf1[j][0] = 0;
2383:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
2384:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
2385:   }
2386: 
2387:   /* Parse the isrow and copy data into outbuf */
2388:   for (i=0; i<ismax; i++) {
2389:     PetscMemzero(ctr,size*sizeof(int));
2390:     irow_i = irow[i];
2391:     jmax   = nrow[i];
2392:     for (j=0; j<jmax; j++) {  /* parse the indices of each IS */
2393:       row  = irow_i[j];
2394:       proc = rtable[row];
2395:       if (proc != rank) { /* copy to the outgoing buf*/
2396:         ctr[proc]++;
2397:         *ptr[proc] = row;
2398:         ptr[proc]++;
2399:       }
2400:     }
2401:     /* Update the headers for the current IS */
2402:     for (j=0; j<size; j++) { /* Can Optimise this loop too */
2403:       if ((ctr_j = ctr[j])) {
2404:         sbuf1_j        = sbuf1[j];
2405:         k              = ++sbuf1_j[0];
2406:         sbuf1_j[2*k]   = ctr_j;
2407:         sbuf1_j[2*k-1] = i;
2408:       }
2409:     }
2410:   }

2412:   /*  Now  post the sends */
2413:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2414:   for (i=0; i<nrqs; ++i) {
2415:     j    = pa[i];
2416:     MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2417:   }

2419:   /* Post Receives to capture the buffer size */
2420:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2421:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
2422:   rbuf2[0] = tmp + msz;
2423:   for (i=1; i<nrqs; ++i) {
2424:     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2425:   }
2426:   for (i=0; i<nrqs; ++i) {
2427:     j    = pa[i];
2428:     MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2429:   }

2431:   /* Send to other procs the buf size they should allocate */
2432: 

2434:   /* Receive messages*/
2435:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
2436:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
2437:   len         = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
2438:   PetscMalloc(len,&sbuf2);
2439:   req_size    = (int*)(sbuf2 + nrqr);
2440:   req_source  = req_size + nrqr;
2441: 
2442:   {
2443:     BSsprow    **sAi = A->rows;
2444:     int        id,rstart = c->rstart;
2445:     int        *sbuf2_i;

2447:     for (i=0; i<nrqr; ++i) {
2448:       MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
2449:       req_size[idx]   = 0;
2450:       rbuf1_i         = rbuf1[idx];
2451:       start           = 2*rbuf1_i[0] + 1;
2452:       MPI_Get_count(r_status1+i,MPI_INT,&end);
2453:       PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
2454:       sbuf2_i         = sbuf2[idx];
2455:       for (j=start; j<end; j++) {
2456:         id               = rbuf1_i[j] - rstart;
2457:         ncols            = (sAi[id])->length;
2458:         sbuf2_i[j]       = ncols;
2459:         req_size[idx]   += ncols;
2460:       }
2461:       req_source[idx] = r_status1[i].MPI_SOURCE;
2462:       /* form the header */
2463:       sbuf2_i[0]   = req_size[idx];
2464:       for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
2465:       MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
2466:     }
2467:   }
2468:   PetscFree(r_status1);
2469:   PetscFree(r_waits1);

2471:   /*  recv buffer sizes */
2472:   /* Receive messages*/
2473: 
2474:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
2475:   PetscMalloc((nrqs+1)*sizeof(FLOAT *),&rbuf4);
2476:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
2477:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
2478:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);

2480:   for (i=0; i<nrqs; ++i) {
2481:     MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
2482:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
2483:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
2484:     MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
2485:     MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
2486:   }
2487:   PetscFree(r_status2);
2488:   PetscFree(r_waits2);
2489: 
2490:   /* Wait on sends1 and sends2 */
2491:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
2492:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);

2494:   MPI_Waitall(nrqs,s_waits1,s_status1);
2495:   MPI_Waitall(nrqr,s_waits2,s_status2);
2496:   PetscFree(s_status1);
2497:   PetscFree(s_status2);
2498:   PetscFree(s_waits1);
2499:   PetscFree(s_waits2);

2501:   /* Now allocate buffers for a->j, and send them off */
2502:   PetscMalloc((nrqr+1)*sizeof(int*),&sbuf_aj);
2503:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2504:   PetscMalloc((j+1)*sizeof(int),&sbuf_aj[0]);
2505:   for (i=1; i<nrqr; i++)  sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
2506: 
2507:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
2508:   {
2509:     BSsprow *brow;
2510:     int *Acol;
2511:     int rstart = c->rstart;

2513:     for (i=0; i<nrqr; i++) {
2514:       rbuf1_i   = rbuf1[i];
2515:       sbuf_aj_i = sbuf_aj[i];
2516:       ct1       = 2*rbuf1_i[0] + 1;
2517:       ct2       = 0;
2518:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2519:         kmax = rbuf1[i][2*j];
2520:         for (k=0; k<kmax; k++,ct1++) {
2521:           brow   = A->rows[rbuf1_i[ct1] - rstart];
2522:           ncols  = brow->length;
2523:           Acol   = brow->col;
2524:           /* load the column indices for this row into cols*/
2525:           cols  = sbuf_aj_i + ct2;
2526:           PetscMemcpy(cols,Acol,ncols*sizeof(int));
2527:           /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with
2528:                                                           mappings?? */
2529:           ct2 += ncols;
2530:         }
2531:       }
2532:       MPI_Isend(sbuf_aj_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
2533:     }
2534:   }
2535:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
2536:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);

2538:   /* Allocate buffers for a->a, and send them off */
2539:   PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf_aa);
2540:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2541:   PetscMalloc((j+1)*sizeof(FLOAT),&sbuf_aa[0]);
2542:   for (i=1; i<nrqr; i++)  sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
2543: 
2544:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
2545:   {
2546:     BSsprow *brow;
2547:     FLOAT *Aval;
2548:     int rstart = c->rstart;
2549: 
2550:     for (i=0; i<nrqr; i++) {
2551:       rbuf1_i   = rbuf1[i];
2552:       sbuf_aa_i = sbuf_aa[i];
2553:       ct1       = 2*rbuf1_i[0]+1;
2554:       ct2       = 0;
2555:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2556:         kmax = rbuf1_i[2*j];
2557:         for (k=0; k<kmax; k++,ct1++) {
2558:           brow  = A->rows[rbuf1_i[ct1] - rstart];
2559:           ncols = brow->length;
2560:           Aval  = brow->nz;
2561:           /* load the column values for this row into vals*/
2562:           vals  = sbuf_aa_i+ct2;
2563:           PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
2564:           ct2 += ncols;
2565:         }
2566:       }
2567:       MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
2568:     }
2569:   }
2570:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
2571:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
2572:   PetscFree(rbuf1);

2574:   /* Form the matrix */
2575:   /* create col map */
2576:   {
2577:     int *icol_i;
2578: 
2579:     len     = (1+ismax)*sizeof(int*)+ ismax*C->N*sizeof(int);
2580:     PetscMalloc(len,&cmap);
2581:     cmap[0] = (int*)(cmap + ismax);
2582:     PetscMemzero(cmap[0],(1+ismax*C->N)*sizeof(int));
2583:     for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + C->N; }
2584:     for (i=0; i<ismax; i++) {
2585:       jmax   = ncol[i];
2586:       icol_i = icol[i];
2587:       cmap_i = cmap[i];
2588:       for (j=0; j<jmax; j++) {
2589:         cmap_i[icol_i[j]] = j+1;
2590:       }
2591:     }
2592:   }

2594:   /* Create lens which is required for MatCreate... */
2595:   for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
2596:   len     = (1+ismax)*sizeof(int*)+ j*sizeof(int);
2597:   PetscMalloc(len,&lens);
2598:   lens[0] = (int*)(lens + ismax);
2599:   PetscMemzero(lens[0],j*sizeof(int));
2600:   for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
2601: 
2602:   /* Update lens from local data */
2603:   { BSsprow *Arow;
2604:     for (i=0; i<ismax; i++) {
2605:       jmax   = nrow[i];
2606:       cmap_i = cmap[i];
2607:       irow_i = irow[i];
2608:       lens_i = lens[i];
2609:       for (j=0; j<jmax; j++) {
2610:         row  = irow_i[j];
2611:         proc = rtable[row];
2612:         if (proc == rank) {
2613:           Arow=A->rows[row-c->rstart];
2614:           ncols=Arow->length;
2615:           cols=Arow->col;
2616:           for (k=0; k<ncols; k++) {
2617:             if (cmap_i[cols[k]]) { lens_i[j]++;}
2618:           }
2619:         }
2620:       }
2621:     }
2622:   }
2623: 
2624:   /* Create row map*/
2625:   len     = (1+ismax)*sizeof(int*)+ ismax*C->M*sizeof(int);
2626:   PetscMalloc(len,&rmap);
2627:   rmap[0] = (int*)(rmap + ismax);
2628:   PetscMemzero(rmap[0],ismax*C->M*sizeof(int));
2629:   for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->M;}
2630:   for (i=0; i<ismax; i++) {
2631:     rmap_i = rmap[i];
2632:     irow_i = irow[i];
2633:     jmax   = nrow[i];
2634:     for (j=0; j<jmax; j++) {
2635:       rmap_i[irow_i[j]] = j;
2636:     }
2637:   }
2638: 
2639:   /* Update lens from offproc data */
2640:   {
2641:     int *rbuf2_i,*rbuf3_i,*sbuf1_i;

2643:     for (tmp2=0; tmp2<nrqs; tmp2++) {
2644:       MPI_Waitany(nrqs,r_waits3,&i,r_status3+tmp2);
2645:       idx     = pa[i];
2646:       sbuf1_i = sbuf1[idx];
2647:       jmax    = sbuf1_i[0];
2648:       ct1     = 2*jmax+1;
2649:       ct2     = 0;
2650:       rbuf2_i = rbuf2[i];
2651:       rbuf3_i = rbuf3[i];
2652:       for (j=1; j<=jmax; j++) {
2653:         is_no   = sbuf1_i[2*j-1];
2654:         max1    = sbuf1_i[2*j];
2655:         lens_i  = lens[is_no];
2656:         cmap_i  = cmap[is_no];
2657:         rmap_i  = rmap[is_no];
2658:         for (k=0; k<max1; k++,ct1++) {
2659:           row  = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
2660:           max2 = rbuf2_i[ct1];
2661:           for (l=0; l<max2; l++,ct2++) {
2662:             if (cmap_i[rbuf3_i[ct2]]) {
2663:               lens_i[row]++;
2664:             }
2665:           }
2666:         }
2667:       }
2668:     }
2669:   }
2670:   PetscFree(r_status3);
2671:   PetscFree(r_waits3);
2672:   MPI_Waitall(nrqr,s_waits3,s_status3);
2673:   PetscFree(s_status3);
2674:   PetscFree(s_waits3);

2676:   /* Create the submatrices */
2677:   if (scall == MAT_REUSE_MATRIX) {
2678:     PetscTruth same;
2679: 
2680:     /*
2681:         Assumes new rows are same length as the old rows,hence bug!
2682:     */
2683:     for (i=0; i<ismax; i++) {
2684:       PetscTypeCompare((PetscObject)(submats[i]),MATSEQAIJ,&same);
2685:       if (same == PETSC_FALSE) {
2686:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
2687:       }
2688:       mat = (Mat_SeqAIJ*)(submats[i]->data);
2689:       if ((submats[i]->m != nrow[i]) || (submats[i]->n != ncol[i])) {
2690:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
2691:       }
2692:       PetscMemcmp(mat->ilen,lens[i],submats[i]->m*sizeof(int),&same);
2693:       if (same == PETSC_FALSE) {
2694:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
2695:       }
2696:       /* Initial matrix as if empty */
2697:       PetscMemzero(mat->ilen,submats[i]->m*sizeof(int));
2698:       submats[i]->factor = C->factor;
2699:     }
2700:   } else {
2701:     for (i=0; i<ismax; i++) {
2702:       /* Here we want to explicitly generate SeqAIJ matrices */
2703:       MatCreate(PETSC_COMM_SELF,nrow[i],ncol[i],nrow[i],ncol[i],submats+i);
2704:       MatSetType(submats[i],MATSEQAIJ);
2705:       MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
2706:     }
2707:   }

2709:   /* Assemble the matrices */
2710:   /* First assemble the local rows */
2711:   {
2712:     int    ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
2713:     PetscScalar *imat_a;
2714:     BSsprow *Arow;
2715: 
2716:     for (i=0; i<ismax; i++) {
2717:       mat       = (Mat_SeqAIJ*)submats[i]->data;
2718:       imat_ilen = mat->ilen;
2719:       imat_j    = mat->j;
2720:       imat_i    = mat->i;
2721:       imat_a    = mat->a;
2722:       cmap_i    = cmap[i];
2723:       rmap_i    = rmap[i];
2724:       irow_i    = irow[i];
2725:       jmax      = nrow[i];
2726:       for (j=0; j<jmax; j++) {
2727:         row      = irow_i[j];
2728:         proc     = rtable[row];
2729:         if (proc == rank) {
2730:           old_row  = row;
2731:           row      = rmap_i[row];
2732:           ilen_row = imat_ilen[row];
2733: 
2734:           Arow=A->rows[old_row-c->rstart];
2735:           ncols=Arow->length;
2736:           cols=Arow->col;
2737:           vals=Arow->nz;
2738: 
2739:           mat_i    = imat_i[row];
2740:           mat_a    = imat_a + mat_i;
2741:           mat_j    = imat_j + mat_i;
2742:           for (k=0; k<ncols; k++) {
2743:             if ((tcol = cmap_i[cols[k]])) {
2744:               *mat_j++ = tcol - 1;
2745:               *mat_a++ = (PetscScalar)vals[k];
2746:               ilen_row++;
2747:             }
2748:           }
2749:           imat_ilen[row] = ilen_row;
2750:         }
2751:       }
2752:     }
2753:   }

2755:   /*   Now assemble the off proc rows*/
2756:   {
2757:     int    *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
2758:     int    *imat_j,*imat_i;
2759:     PetscScalar *imat_a;
2760:     FLOAT *rbuf4_i;
2761: 
2762:     for (tmp2=0; tmp2<nrqs; tmp2++) {
2763:       MPI_Waitany(nrqs,r_waits4,&i,r_status4+tmp2);
2764:       idx     = pa[i];
2765:       sbuf1_i = sbuf1[idx];
2766:       jmax    = sbuf1_i[0];
2767:       ct1     = 2*jmax + 1;
2768:       ct2     = 0;
2769:       rbuf2_i = rbuf2[i];
2770:       rbuf3_i = rbuf3[i];
2771:       rbuf4_i = rbuf4[i];
2772:       for (j=1; j<=jmax; j++) {
2773:         is_no     = sbuf1_i[2*j-1];
2774:         rmap_i    = rmap[is_no];
2775:         cmap_i    = cmap[is_no];
2776:         mat       = (Mat_SeqAIJ*)submats[is_no]->data;
2777:         imat_ilen = mat->ilen;
2778:         imat_j    = mat->j;
2779:         imat_i    = mat->i;
2780:         imat_a    = mat->a;
2781:         max1      = sbuf1_i[2*j];
2782:         for (k=0; k<max1; k++,ct1++) {
2783:           row   = sbuf1_i[ct1];
2784:           row   = rmap_i[row];
2785:           ilen  = imat_ilen[row];
2786:           mat_i = imat_i[row];
2787:           mat_a = imat_a + mat_i;
2788:           mat_j = imat_j + mat_i;
2789:           max2 = rbuf2_i[ct1];
2790:           for (l=0; l<max2; l++,ct2++) {
2791:             if ((tcol = cmap_i[rbuf3_i[ct2]])) {
2792:               *mat_j++ = tcol - 1;
2793:               *mat_a++ = (PetscScalar)rbuf4_i[ct2];
2794:               ilen++;
2795:             }
2796:           }
2797:           imat_ilen[row] = ilen;
2798:         }
2799:       }
2800:     }
2801:   }
2802:   PetscFree(r_status4);
2803:   PetscFree(r_waits4);
2804:   MPI_Waitall(nrqr,s_waits4,s_status4);
2805:   PetscFree(s_waits4);
2806:   PetscFree(s_status4);

2808:   /* Restore the indices */
2809:   for (i=0; i<ismax; i++) {
2810:     ISRestoreIndices(isrow[i],irow+i);
2811:     ISRestoreIndices(iscol[i],icol+i);
2812:   }

2814:   /* Destroy allocated memory */
2815:   PetscFree(irow);
2816:   PetscFree(w1);
2817:   PetscFree(pa);

2819:   PetscFree(sbuf1);
2820:   PetscFree(rbuf2);
2821:   for (i=0; i<nrqr; ++i) {
2822:     PetscFree(sbuf2[i]);
2823:   }
2824:   for (i=0; i<nrqs; ++i) {
2825:     PetscFree(rbuf3[i]);
2826:     PetscFree(rbuf4[i]);
2827:   }

2829:   PetscFree(sbuf2);
2830:   PetscFree(rbuf3);
2831:   PetscFree(rbuf4);
2832:   PetscFree(sbuf_aj[0]);
2833:   PetscFree(sbuf_aj);
2834:   PetscFree(sbuf_aa[0]);
2835:   PetscFree(sbuf_aa);
2836: 
2837:   PetscFree(cmap);
2838:   PetscFree(rmap);
2839:   PetscFree(lens);

2841:   for (i=0; i<ismax; i++) {
2842:     MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
2843:     MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
2844:   }
2845:   return(0);
2846: }

2848: /*
2849:   can be optimized by send only non-zeroes in iscol IS  -
2850:   so prebuild submatrix on sending side including A,B partitioning
2851:   */
2854:  #include src/vec/is/impls/general/general.h
2855: PetscErrorCode MatGetSubMatrix_MPIRowbs(Mat C,IS isrow,IS iscol,int csize,MatReuse scall,Mat *submat)
2856: {
2857:   Mat_MPIRowbs  *c = (Mat_MPIRowbs*)C->data;
2858:   BSspmat       *A = c->A;
2859:   BSsprow *Arow;
2860:   Mat_SeqAIJ    *matA,*matB; /* on prac , off proc part of submat */
2861:   Mat_MPIAIJ    *mat;  /* submat->data */
2863:   int    *irow,*icol,nrow,ncol,*rtable,size,rank,tag0,tag1,tag2,tag3;
2864:   int    *w1,*w2,*pa,nrqs,nrqr,msz,row_t;
2865:   int    i,j,k,l,len,jmax,proc,idx;
2866:   int    **sbuf1,**sbuf2,**rbuf1,**rbuf2,*req_size,**sbuf3,**rbuf3;
2867:   FLOAT  **rbuf4,**sbuf4; /* FLOAT is from Block Solve 95 library */

2869:   int    *cmap,*rmap,nlocal,*o_nz,*d_nz,cstart,cend;
2870:   int    *req_source;
2871:   int    ncols_t;
2872: 
2873: 
2874:   MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2875:   MPI_Request *r_waits4,*s_waits3,*s_waits4;
2876: 
2877:   MPI_Status  *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2878:   MPI_Status  *r_status3,*r_status4,*s_status4;
2879:   MPI_Comm    comm;


2883:   comm   = C->comm;
2884:   tag0   = C->tag;
2885:   size   = c->size;
2886:   rank   = c->rank;

2888:   if (size==1) {
2889:     if (scall == MAT_REUSE_MATRIX) {
2890:       ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_REUSE_MATRIX,&submat);
2891:       return(0);
2892:     } else {
2893:       Mat *newsubmat;
2894: 
2895:       ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&newsubmat);
2896:       *submat=*newsubmat;
2897:       ierr=PetscFree(newsubmat);
2898:       return(0);
2899:     }
2900:   }
2901: 
2902:   /* Get some new tags to keep the communication clean */
2903:   PetscObjectGetNewTag((PetscObject)C,&tag1);
2904:   PetscObjectGetNewTag((PetscObject)C,&tag2);
2905:   PetscObjectGetNewTag((PetscObject)C,&tag3);

2907:   /* Check if the col indices are sorted */
2908:   {PetscTruth sorted;
2909:   ISSorted(isrow,&sorted);
2910:   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2911:   ISSorted(iscol,&sorted);
2912:   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted");
2913:   }
2914: 
2915:   ISGetIndices(isrow,&irow);
2916:   ISGetIndices(iscol,&icol);
2917:   ISGetLocalSize(isrow,&nrow);
2918:   ISGetLocalSize(iscol,&ncol);
2919: 
2920:   if (!isrow) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty ISrow");
2921:   if (!iscol) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty IScol");
2922: 
2923: 
2924:   len    = (C->M+1)*sizeof(int);
2925:   PetscMalloc(len,&rtable);
2926:   /* Create hash table for the mapping :row -> proc*/
2927:   for (i=0,j=0; i<size; i++) {
2928:     jmax = c->rowners[i+1];
2929:     for (; j<jmax; j++) {
2930:       rtable[j] = i;
2931:     }
2932:   }

2934:   /* evaluate communication - mesg to who, length of mesg, and buffer space
2935:      required. Based on this, buffers are allocated, and data copied into them*/
2936:   PetscMalloc(size*2*sizeof(int),&w1); /* mesg size */
2937:   w2     = w1 + size;      /* if w2[i] marked, then a message to proc i*/
2938:   PetscMemzero(w1,size*2*sizeof(int)); /* initialize work vector*/
2939:   for (j=0; j<nrow; j++) {
2940:     row_t  = irow[j];
2941:     proc   = rtable[row_t];
2942:     w1[proc]++;
2943:   }
2944:   nrqs     = 0;              /* no of outgoing messages */
2945:   msz      = 0;              /* total mesg length (for all procs) */
2946:   w1[rank] = 0;              /* no mesg sent to self */
2947:   for (i=0; i<size; i++) {
2948:     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2949:   }
2950: 
2951:   PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2952:   for (i=0,j=0; i<size; i++) {
2953:     if (w1[i]) {
2954:       pa[j++] = i;
2955:       w1[i]++;  /* header for return data */
2956:       msz+=w1[i];
2957:     }
2958:   }
2959: 
2960:   {int  *onodes1,*olengths1;
2961:   /* Determine the number of messages to expect, their lengths, from from-ids */
2962:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2963:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2964:   /* Now post the Irecvs corresponding to these messages */
2965:   PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2966:   PetscFree(onodes1);
2967:   PetscFree(olengths1);
2968:   }
2969: 
2970: { int **ptr,*iptr,*tmp;
2971:   /* Allocate Memory for outgoing messages */
2972:   len      = 2*size*sizeof(int*) + msz*sizeof(int);
2973:   PetscMalloc(len,&sbuf1);
2974:   ptr      = sbuf1 + size;   /* Pointers to the data in outgoing buffers */
2975:   PetscMemzero(sbuf1,2*size*sizeof(int*));
2976:   /* allocate memory for outgoing data + buf to receive the first reply */
2977:   tmp      = (int*)(ptr + size);

2979:   for (i=0,iptr=tmp; i<nrqs; i++) {
2980:     j         = pa[i];
2981:     sbuf1[j]  = iptr;
2982:     iptr     += w1[j];
2983:   }

2985:   /* Form the outgoing messages */
2986:   for (i=0; i<nrqs; i++) {
2987:     j           = pa[i];
2988:     sbuf1[j][0] = 0;   /*header */
2989:     ptr[j]      = sbuf1[j] + 1;
2990:   }
2991: 
2992:   /* Parse the isrow and copy data into outbuf */
2993:   for (j=0; j<nrow; j++) {
2994:     row_t  = irow[j];
2995:     proc = rtable[row_t];
2996:     if (proc != rank) { /* copy to the outgoing buf*/
2997:       sbuf1[proc][0]++;
2998:       *ptr[proc] = row_t;
2999:       ptr[proc]++;
3000:     }
3001:   }
3002: } /* block */

3004:   /*  Now  post the sends */
3005: 
3006:   /* structure of sbuf1[i]/rbuf1[i] : 1 (num of rows) + nrow-local rows (nuberes
3007:    * of requested rows)*/

3009:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
3010:   for (i=0; i<nrqs; ++i) {
3011:     j    = pa[i];
3012:     MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
3013:   }

3015:   /* Post Receives to capture the buffer size */
3016:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
3017:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
3018:   PetscMalloc(msz*sizeof(int)+1,&(rbuf2[0]));
3019:   for (i=1; i<nrqs; ++i) {
3020:     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
3021:   }
3022:   for (i=0; i<nrqs; ++i) {
3023:     j    = pa[i];
3024:     MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
3025:   }

3027:   /* Send to other procs the buf size they should allocate */
3028:   /* structure of sbuf2[i]/rbuf2[i]: 1 (total size to allocate) + nrow-locrow
3029:    * (row sizes) */

3031:   /* Receive messages*/
3032:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
3033:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
3034:   len         = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
3035:   PetscMalloc(len,&sbuf2);
3036:   req_size    = (int*)(sbuf2 + nrqr);
3037:   req_source  = req_size + nrqr;
3038: 
3039:   {
3040:     BSsprow    **sAi = A->rows;
3041:     int        id,rstart = c->rstart;
3042:     int        *sbuf2_i,*rbuf1_i,end;

3044:     for (i=0; i<nrqr; ++i) {
3045:       MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
3046:       req_size[idx]   = 0;
3047:       rbuf1_i         = rbuf1[idx];
3048:       MPI_Get_count(r_status1+i,MPI_INT,&end);
3049:       PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
3050:       sbuf2_i         = sbuf2[idx];
3051:       for (j=1; j<end; j++) {
3052:         id               = rbuf1_i[j] - rstart;
3053:         ncols_t          = (sAi[id])->length;
3054:         sbuf2_i[j]       = ncols_t;
3055:         req_size[idx]   += ncols_t;
3056:       }
3057:       req_source[idx] = r_status1[i].MPI_SOURCE;
3058:       /* form the header */
3059:       sbuf2_i[0]   = req_size[idx];
3060:       MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
3061:     }
3062:   }
3063:   PetscFree(r_status1);
3064:   PetscFree(r_waits1);

3066:   /*  recv buffer sizes */
3067:   /* Receive messages*/
3068: 
3069:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
3070:   PetscMalloc((nrqs+1)*sizeof(FLOAT*),&rbuf4);
3071:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
3072:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
3073:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);

3075:   for (i=0; i<nrqs; ++i) {
3076:     MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
3077:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
3078:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
3079:     MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
3080:     MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
3081:   }
3082:   PetscFree(r_status2);
3083:   PetscFree(r_waits2);
3084: 
3085:   /* Wait on sends1 and sends2 */
3086:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
3087:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);

3089:   MPI_Waitall(nrqs,s_waits1,s_status1);
3090:   MPI_Waitall(nrqr,s_waits2,s_status2);
3091:   PetscFree(s_status1);
3092:   PetscFree(s_status2);
3093:   PetscFree(s_waits1);
3094:   PetscFree(s_waits2);

3096:   /* Now allocate buffers for a->j, and send them off */
3097:   /* structure of sbuf3[i]/rbuf3[i],sbuf4[i]/rbuf4[i]: reqsize[i] (cols resp.
3098:    * vals of all req. rows; row sizes was in rbuf2; vals are of FLOAT type */
3099: 
3100:   PetscMalloc((nrqr+1)*sizeof(int*),&sbuf3);
3101:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3102:   PetscMalloc((j+1)*sizeof(int),&sbuf3[0]);
3103:   for (i=1; i<nrqr; i++)  sbuf3[i] = sbuf3[i-1] + req_size[i-1];
3104: 
3105:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
3106:   {
3107:     int *Acol,*rbuf1_i,*sbuf3_i,rqrow,noutcols,kmax,*cols,ncols;
3108:     int rstart = c->rstart;

3110:     for (i=0; i<nrqr; i++) {
3111:       rbuf1_i   = rbuf1[i];
3112:       sbuf3_i   = sbuf3[i];
3113:       noutcols  = 0;
3114:       kmax = rbuf1_i[0];  /* num. of req. rows */
3115:       for (k=0,rqrow=1; k<kmax; k++,rqrow++) {
3116:         Arow    = A->rows[rbuf1_i[rqrow] - rstart];
3117:         ncols  = Arow->length;
3118:         Acol   = Arow->col;
3119:         /* load the column indices for this row into cols*/
3120:         cols  = sbuf3_i + noutcols;
3121:         PetscMemcpy(cols,Acol,ncols*sizeof(int));
3122:         /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with mappings?? */
3123:         noutcols += ncols;
3124:       }
3125:       MPI_Isend(sbuf3_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
3126:     }
3127:   }
3128:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
3129:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);

3131:   /* Allocate buffers for a->a, and send them off */
3132:   /* can be optimized by conect with previous block */
3133:   PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf4);
3134:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3135:   PetscMalloc((j+1)*sizeof(FLOAT),&sbuf4[0]);
3136:   for (i=1; i<nrqr; i++)  sbuf4[i] = sbuf4[i-1] + req_size[i-1];
3137: 
3138:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
3139:   {
3140:     FLOAT *Aval,*vals,*sbuf4_i;
3141:     int rstart = c->rstart,*rbuf1_i,rqrow,noutvals,kmax,ncols;
3142: 
3143: 
3144:     for (i=0; i<nrqr; i++) {
3145:       rbuf1_i   = rbuf1[i];
3146:       sbuf4_i   = sbuf4[i];
3147:       rqrow     = 1;
3148:       noutvals  = 0;
3149:       kmax      = rbuf1_i[0];  /* num of req. rows */
3150:       for (k=0; k<kmax; k++,rqrow++) {
3151:         Arow    = A->rows[rbuf1_i[rqrow] - rstart];
3152:         ncols  = Arow->length;
3153:         Aval = Arow->nz;
3154:         /* load the column values for this row into vals*/
3155:         vals  = sbuf4_i+noutvals;
3156:         PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
3157:         noutvals += ncols;
3158:       }
3159:       MPI_Isend(sbuf4_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
3160:     }
3161:   }
3162:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
3163:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
3164:   PetscFree(rbuf1);

3166:   /* Form the matrix */

3168:   /* create col map */
3169:   len     = C->N*sizeof(int)+1;
3170:   PetscMalloc(len,&cmap);
3171:   PetscMemzero(cmap,C->N*sizeof(int));
3172:   for (j=0; j<ncol; j++) {
3173:       cmap[icol[j]] = j+1;
3174:   }
3175: 
3176:   /* Create row map / maybe I will need global rowmap but here is local rowmap*/
3177:   len     = C->M*sizeof(int)+1;
3178:   PetscMalloc(len,&rmap);
3179:   PetscMemzero(rmap,C->M*sizeof(int));
3180:   for (j=0; j<nrow; j++) {
3181:     rmap[irow[j]] = j;
3182:   }

3184:   /*
3185:      Determine the number of non-zeros in the diagonal and off-diagonal 
3186:      portions of the matrix in order to do correct preallocation
3187:    */

3189:   /* first get start and end of "diagonal" columns */
3190:   if (csize == PETSC_DECIDE) {
3191:     nlocal = ncol/size + ((ncol % size) > rank);
3192:   } else {
3193:     nlocal = csize;
3194:   }
3195:   {
3196:     int ncols,*cols,olen,dlen,thecol;
3197:     int *rbuf2_i,*rbuf3_i,*sbuf1_i,row,kmax,cidx;
3198: 
3199:     MPI_Scan(&nlocal,&cend,1,MPI_INT,MPI_SUM,comm);
3200:     cstart = cend - nlocal;
3201:     if (rank == size - 1 && cend != ncol) {
3202:       SETERRQ(PETSC_ERR_ARG_SIZ,"Local column sizes do not add up to total number of columns");
3203:     }

3205:     PetscMalloc((2*nrow+1)*sizeof(int),&d_nz);
3206:     o_nz = d_nz + nrow;
3207: 
3208:     /* Update lens from local data */
3209:     for (j=0; j<nrow; j++) {
3210:       row  = irow[j];
3211:       proc = rtable[row];
3212:       if (proc == rank) {
3213:         Arow=A->rows[row-c->rstart];
3214:         ncols=Arow->length;
3215:         cols=Arow->col;
3216:         olen=dlen=0;
3217:         for (k=0; k<ncols; k++) {
3218:           if ((thecol=cmap[cols[k]])) {
3219:             if (cstart<thecol && thecol<=cend) dlen++; /* thecol is from 1 */
3220:             else olen++;
3221:           }
3222:         }
3223:         o_nz[j]=olen;
3224:         d_nz[j]=dlen;
3225:       } else d_nz[j]=o_nz[j]=0;
3226:     }
3227:     /* Update lens from offproc data and done waits */
3228:     /* this will be much simplier after sending only appropriate columns */
3229:     for (j=0; j<nrqs;j++) {
3230:       MPI_Waitany(nrqs,r_waits3,&i,r_status3+j);
3231:       proc   = pa[i];
3232:       sbuf1_i = sbuf1[proc];
3233:       cidx    = 0;
3234:       rbuf2_i = rbuf2[i];
3235:       rbuf3_i = rbuf3[i];
3236:       kmax    = sbuf1_i[0]; /*num of rq. rows*/
3237:       for (k=1; k<=kmax; k++) {
3238:         row  = rmap[sbuf1_i[k]]; /* the val in the new matrix to be */
3239:         for (l=0; l<rbuf2_i[k]; l++,cidx++) {
3240:           if ((thecol=cmap[rbuf3_i[cidx]])) {
3241: 
3242:             if (cstart<thecol && thecol<=cend) d_nz[row]++; /* thecol is from 1 */
3243:             else o_nz[row]++;
3244:           }
3245:         }
3246:       }
3247:     }
3248:   }
3249:   PetscFree(r_status3);
3250:   PetscFree(r_waits3);
3251:   MPI_Waitall(nrqr,s_waits3,s_status3);
3252:   PetscFree(s_status3);
3253:   PetscFree(s_waits3);

3255:   if (scall ==  MAT_INITIAL_MATRIX) {
3256:     MatCreate(comm,nrow,nlocal,PETSC_DECIDE,ncol,submat);
3257:     MatSetType(*submat,C->type_name);
3258:     MatMPIAIJSetPreallocation(*submat,0,d_nz,0,o_nz);
3259:     mat=(Mat_MPIAIJ *)((*submat)->data);
3260:     matA=(Mat_SeqAIJ *)(mat->A->data);
3261:     matB=(Mat_SeqAIJ *)(mat->B->data);
3262: 
3263:   } else {
3264:     PetscTruth same;
3265:     /* folowing code can be optionaly dropped for debuged versions of users
3266:      * program, but I don't know PETSc option which can switch off such safety
3267:      * tests - in a same way counting of o_nz,d_nz can be droped for  REUSE
3268:      * matrix */
3269: 
3270:     PetscTypeCompare((PetscObject)(*submat),MATMPIAIJ,&same);
3271:     if (same == PETSC_FALSE) {
3272:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
3273:     }
3274:     if (((*submat)->m != nrow) || ((*submat)->N != ncol)) {
3275:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
3276:     }
3277:     mat=(Mat_MPIAIJ *)((*submat)->data);
3278:     matA=(Mat_SeqAIJ *)(mat->A->data);
3279:     matB=(Mat_SeqAIJ *)(mat->B->data);
3280:     PetscMemcmp(matA->ilen,d_nz,nrow*sizeof(int),&same);
3281:     if (same == PETSC_FALSE) {
3282:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3283:     }
3284:     PetscMemcmp(matB->ilen,o_nz,nrow*sizeof(int),&same);
3285:     if (same == PETSC_FALSE) {
3286:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3287:     }
3288:   /* Initial matrix as if empty */
3289:     PetscMemzero(matA->ilen,nrow*sizeof(int));
3290:     PetscMemzero(matB->ilen,nrow*sizeof(int));
3291:     /* Perhaps MatZeroEnteries may be better - look what it is exactly doing - I must
3292:      * delete all possibly nonactual inforamtion */
3293:     /*submats[i]->factor = C->factor; !!! ??? if factor will be same then I must
3294:      * copy some factor information - where are thay */
3295:     (*submat)->was_assembled=PETSC_FALSE;
3296:     (*submat)->assembled=PETSC_FALSE;
3297: 
3298:   }
3299:   PetscFree(d_nz);

3301:   /* Assemble the matrix */
3302:   /* First assemble from local rows */
3303:   {
3304:     int    i_row,oldrow,row,ncols,*cols,*matA_j,*matB_j,ilenA,ilenB,tcol;
3305:     FLOAT  *vals;
3306:     PetscScalar *matA_a,*matB_a;
3307: 
3308:     for (j=0; j<nrow; j++) {
3309:       oldrow = irow[j];
3310:       proc   = rtable[oldrow];
3311:       if (proc == rank) {
3312:         row  = rmap[oldrow];
3313: 
3314:         Arow  = A->rows[oldrow-c->rstart];
3315:         ncols = Arow->length;
3316:         cols  = Arow->col;
3317:         vals  = Arow->nz;
3318: 
3319:         i_row   = matA->i[row];
3320:         matA_a = matA->a + i_row;
3321:         matA_j = matA->j + i_row;
3322:         i_row   = matB->i[row];
3323:         matB_a = matB->a + i_row;
3324:         matB_j = matB->j + i_row;
3325:         for (k=0,ilenA=0,ilenB=0; k<ncols; k++) {
3326:           if ((tcol = cmap[cols[k]])) {
3327:             if (tcol<=cstart) {
3328:               *matB_j++ = tcol-1;
3329:               *matB_a++ = vals[k];
3330:               ilenB++;
3331:             } else if (tcol<=cend) {
3332:               *matA_j++ = (tcol-1)-cstart;
3333:               *matA_a++ = (PetscScalar)(vals[k]);
3334:               ilenA++;
3335:             } else {
3336:               *matB_j++ = tcol-1;
3337:               *matB_a++ = vals[k];
3338:               ilenB++;
3339:             }
3340:           }
3341:         }
3342:         matA->ilen[row]=ilenA;
3343:         matB->ilen[row]=ilenB;
3344: 
3345:       }
3346:     }
3347:   }

3349:   /*   Now assemble the off proc rows*/
3350:   {
3351:     int  *sbuf1_i,*rbuf2_i,*rbuf3_i,cidx,kmax,row,i_row;
3352:     int  *matA_j,*matB_j,lmax,tcol,ilenA,ilenB;
3353:     PetscScalar *matA_a,*matB_a;
3354:     FLOAT *rbuf4_i;

3356:     for (j=0; j<nrqs; j++) {
3357:       MPI_Waitany(nrqs,r_waits4,&i,r_status4+j);
3358:       proc   = pa[i];
3359:       sbuf1_i = sbuf1[proc];
3360: 
3361:       cidx    = 0;
3362:       rbuf2_i = rbuf2[i];
3363:       rbuf3_i = rbuf3[i];
3364:       rbuf4_i = rbuf4[i];
3365:       kmax    = sbuf1_i[0];
3366:       for (k=1; k<=kmax; k++) {
3367:         row = rmap[sbuf1_i[k]];
3368: 
3369:         i_row  = matA->i[row];
3370:         matA_a = matA->a + i_row;
3371:         matA_j = matA->j + i_row;
3372:         i_row  = matB->i[row];
3373:         matB_a = matB->a + i_row;
3374:         matB_j = matB->j + i_row;
3375: 
3376:         lmax = rbuf2_i[k];
3377:         for (l=0,ilenA=0,ilenB=0; l<lmax; l++,cidx++) {
3378:           if ((tcol = cmap[rbuf3_i[cidx]])) {
3379:             if (tcol<=cstart) {
3380:               *matB_j++ = tcol-1;
3381:               *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);;
3382:               ilenB++;
3383:             } else if (tcol<=cend) {
3384:               *matA_j++ = (tcol-1)-cstart;
3385:               *matA_a++ = (PetscScalar)(rbuf4_i[cidx]);
3386:               ilenA++;
3387:             } else {
3388:               *matB_j++ = tcol-1;
3389:               *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);
3390:               ilenB++;
3391:             }
3392:           }
3393:         }
3394:         matA->ilen[row]=ilenA;
3395:         matB->ilen[row]=ilenB;
3396:       }
3397:     }
3398:   }

3400:   PetscFree(r_status4);
3401:   PetscFree(r_waits4);
3402:   MPI_Waitall(nrqr,s_waits4,s_status4);
3403:   PetscFree(s_waits4);
3404:   PetscFree(s_status4);

3406:   /* Restore the indices */
3407:   ISRestoreIndices(isrow,&irow);
3408:   ISRestoreIndices(iscol,&icol);

3410:   /* Destroy allocated memory */
3411:   PetscFree(rtable);
3412:   PetscFree(w1);
3413:   PetscFree(pa);

3415:   PetscFree(sbuf1);
3416:   PetscFree(rbuf2[0]);
3417:   PetscFree(rbuf2);
3418:   for (i=0; i<nrqr; ++i) {
3419:     PetscFree(sbuf2[i]);
3420:   }
3421:   for (i=0; i<nrqs; ++i) {
3422:     PetscFree(rbuf3[i]);
3423:     PetscFree(rbuf4[i]);
3424:   }

3426:   PetscFree(sbuf2);
3427:   PetscFree(rbuf3);
3428:   PetscFree(rbuf4);
3429:   PetscFree(sbuf3[0]);
3430:   PetscFree(sbuf3);
3431:   PetscFree(sbuf4[0]);
3432:   PetscFree(sbuf4);
3433: 
3434:   PetscFree(cmap);
3435:   PetscFree(rmap);


3438:   MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);
3439:   MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);


3442:   return(0);
3443: }