Actual source code: vpscat.c

  1: /*
  2:     Defines parallel vector scatters.
  3: */

 5:  #include src/vec/is/isimpl.h
 6:  #include vecimpl.h
 7:  #include src/vec/impls/dvecimpl.h
 8:  #include src/vec/impls/mpi/pvecimpl.h
 9:  #include petscsys.h

 13: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
 14: {
 15:   VecScatter_MPI_General *to=(VecScatter_MPI_General*)ctx->todata;
 16:   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
 17:   PetscErrorCode         ierr;
 18:   PetscInt               i;
 19:   PetscMPIInt            rank;
 20:   PetscViewerFormat      format;
 21:   PetscTruth             iascii;

 24:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
 25:   if (iascii) {
 26:     MPI_Comm_rank(ctx->comm,&rank);
 27:     PetscViewerGetFormat(viewer,&format);
 28:     if (format ==  PETSC_VIEWER_ASCII_INFO) {
 29:       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;

 31:       MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
 32:       MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
 33:       itmp = to->starts[to->n+1];
 34:       MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
 35:       itmp = from->starts[from->n+1];
 36:       MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
 37:       MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,ctx->comm);

 39:       PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
 40:       PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);
 41:       PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);
 42:       PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
 43:       PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
 44:       PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));

 46:     } else {
 47:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
 48:       if (to->n) {
 49:         for (i=0; i<to->n; i++){
 50:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %D\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
 51:         }
 52:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
 53:         for (i=0; i<to->starts[to->n]; i++){
 54:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,to->indices[i]);
 55:         }
 56:       }

 58:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
 59:       if (from->n) {
 60:         for (i=0; i<from->n; i++){
 61:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %D\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
 62:         }

 64:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
 65:         for (i=0; i<from->starts[from->n]; i++){
 66:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,from->indices[i]);
 67:         }
 68:       }
 69:       if (to->local.n) {
 70:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Indices for local part of scatter\n",rank);
 71:         for (i=0; i<to->local.n; i++){
 72:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]From %D to %D \n",rank,from->local.slots[i],to->local.slots[i]);
 73:         }
 74:       }

 76:       PetscViewerFlush(viewer);
 77:     }
 78:   } else {
 79:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for this scatter",((PetscObject)viewer)->type_name);
 80:   }
 81:   return(0);
 82: }

 84: /* -----------------------------------------------------------------------------------*/
 85: /*
 86:       The next routine determines what part of  the local part of the scatter is an
 87:   exact copy of values into their current location. We check this here and
 88:   then know that we need not perform that portion of the scatter.
 89: */
 92: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter_Seq_General *gen_to,VecScatter_Seq_General *gen_from)
 93: {
 94:   PetscInt       n = gen_to->n,n_nonmatching = 0,i,*to_slots = gen_to->slots,*from_slots = gen_from->slots;
 96:   PetscInt       *nto_slots,*nfrom_slots,j = 0;
 97: 
 99:   for (i=0; i<n; i++) {
100:     if (to_slots[i] != from_slots[i]) n_nonmatching++;
101:   }

103:   if (!n_nonmatching) {
104:     gen_to->nonmatching_computed = PETSC_TRUE;
105:     gen_to->n_nonmatching        = gen_from->n_nonmatching = 0;
106:     PetscLogInfo(0,"VecScatterLocalOptimize_Private:Reduced %D to 0\n", n);
107:   } else if (n_nonmatching == n) {
108:     gen_to->nonmatching_computed = PETSC_FALSE;
109:     PetscLogInfo(0,"VecScatterLocalOptimize_Private:All values non-matching\n");
110:   } else {
111:     gen_to->nonmatching_computed= PETSC_TRUE;
112:     gen_to->n_nonmatching       = gen_from->n_nonmatching = n_nonmatching;
113:     PetscMalloc2(n_nonmatching,PetscInt,&nto_slots,n_nonmatching,PetscInt,&nfrom_slots);
114:     gen_to->slots_nonmatching   = nto_slots;
115:     gen_from->slots_nonmatching = nfrom_slots;
116:     for (i=0; i<n; i++) {
117:       if (to_slots[i] != from_slots[i]) {
118:         nto_slots[j]   = to_slots[i];
119:         nfrom_slots[j] = from_slots[i];
120:         j++;
121:       }
122:     }
123:     PetscLogInfo(0,"VecScatterLocalOptimize_Private:Reduced %D to %D\n",n,n_nonmatching);
124:   }
125:   return(0);
126: }

128: /* --------------------------------------------------------------------------------------*/
131: PetscErrorCode VecScatterCopy_PtoP(VecScatter in,VecScatter out)
132: {
133:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
134:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
135:   PetscErrorCode         ierr;
136:   PetscInt               len,ny;

139:   out->postrecvs = in->postrecvs;
140:   out->begin     = in->begin;
141:   out->end       = in->end;
142:   out->copy      = in->copy;
143:   out->destroy   = in->destroy;
144:   out->view      = in->view;

146:   /* allocate entire send scatter context */
147:   PetscNew(VecScatter_MPI_General,&out_to);
148:   PetscNew(VecScatter_MPI_General,&out_from);
149:   PetscMemzero(out_from,sizeof(VecScatter_MPI_General));
150:   PetscMemzero(out_to,sizeof(VecScatter_MPI_General));

152:   ny                = in_to->starts[in_to->n];
153:   len               = ny*(sizeof(PetscInt) + sizeof(PetscScalar)) + (in_to->n+1)*sizeof(PetscInt) +
154:                      (in_to->n)*(sizeof(PetscInt) + sizeof(MPI_Request));
155:   out_to->n         = in_to->n;
156:   out_to->type      = in_to->type;
157:   out_to->sendfirst = in_to->sendfirst;
158:   PetscMalloc(len,&out_to->values);
159:   out_to->requests  = (MPI_Request*)(out_to->values + ny);
160:   out_to->indices   = (PetscInt*)(out_to->requests + out_to->n);
161:   out_to->starts    = (PetscInt*)(out_to->indices + ny);
162:   out_to->procs     = (PetscMPIInt*)(out_to->starts + out_to->n + 1);

164:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
165:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
166:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscInt));

168:   PetscMalloc(2*(PetscMax(in_to->n,in_from->n)+1)*sizeof(MPI_Status),&out_to->sstatus);
169:   out_to->rstatus = out_to->rstatus + PetscMax(in_to->n,in_from->n) + 1;

171:   out->todata      = (void*)out_to;
172:   out_to->local.n  = in_to->local.n;
173:   out_to->local.nonmatching_computed = PETSC_FALSE;
174:   out_to->local.n_nonmatching        = 0;
175:   out_to->local.slots_nonmatching    = 0;
176:   if (in_to->local.n) {
177:     PetscMalloc2(in_to->local.n,PetscInt,&out_to->local.slots,in_from->local.n,PetscInt,&out_from->local.slots);
178:     PetscMemcpy(out_to->local.slots,in_to->local.slots,in_to->local.n*sizeof(PetscInt));
179:     PetscMemcpy(out_from->local.slots,in_from->local.slots,in_from->local.n*sizeof(PetscInt));
180:   } else {
181:     out_to->local.slots   = 0;
182:     out_from->local.slots = 0;
183:   }

185:   /* allocate entire receive context */
186:   out_from->type      = in_from->type;
187:   ny                  = in_from->starts[in_from->n];
188:   len                 = ny*(sizeof(PetscInt) + sizeof(PetscScalar)) + (in_from->n+1)*sizeof(PetscInt) +
189:                        (in_from->n)*(sizeof(PetscInt) + sizeof(MPI_Request));
190:   out_from->n         = in_from->n;
191:   out_from->sendfirst = in_from->sendfirst;
192:   PetscMalloc(len,&out_from->values);
193:   out_from->requests  = (MPI_Request*)(out_from->values + ny);
194:   out_from->indices   = (PetscInt*)(out_from->requests + out_from->n);
195:   out_from->starts    = (PetscInt*)(out_from->indices + ny);
196:   out_from->procs     = (PetscMPIInt*)(out_from->starts + out_from->n + 1);
197:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
198:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
199:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscInt));
200:   out->fromdata       = (void*)out_from;
201:   out_from->local.n   = in_from->local.n;
202:   out_from->local.nonmatching_computed = PETSC_FALSE;
203:   out_from->local.n_nonmatching        = 0;
204:   out_from->local.slots_nonmatching    = 0;
205:   return(0);
206: }

208: /* -------------------------------------------------------------------------------------*/
211: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
212: {
213:   VecScatter_MPI_General *gen_to   = (VecScatter_MPI_General*)ctx->todata;
214:   VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
215:   PetscErrorCode         ierr;

218:   CHKMEMQ;
219:   if (gen_to->local.slots)             {PetscFree2(gen_to->local.slots,gen_from->local.slots);}
220:   if (gen_to->local.slots_nonmatching) {PetscFree2(gen_to->local.slots_nonmatching,gen_from->local.slots_nonmatching);}
221:   PetscFree(gen_to->sstatus);
222:   PetscFree(gen_to->values);
223:   PetscFree(gen_from->values);
224:   PetscFree(gen_from);
225:   PetscFree(gen_to);
226:   PetscHeaderDestroy(ctx);
227:   return(0);
228: }

230: /* --------------------------------------------------------------------------------------*/
231: /*
232:      Even though the next routines are written with parallel 
233:   vectors, either xin or yin (but not both) may be Seq
234:   vectors, one for each processor.
235:   
236:      gen_from indices indicate where arriving stuff is stashed
237:      gen_to   indices indicate where departing stuff came from. 
238:      the naming can be VERY confusing.

240: */
243: PetscErrorCode VecScatterBegin_PtoP(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
244: {
245:   VecScatter_MPI_General *gen_to,*gen_from;
246:   MPI_Comm               comm = ctx->comm;
247:   PetscScalar            *xv,*yv,*val,*rvalues,*svalues;
248:   MPI_Request            *rwaits,*swaits;
249:   PetscInt               i,j,*indices,*rstarts,*sstarts;
250:   PetscMPIInt            tag = ctx->tag,*rprocs,*sprocs;
251:   PetscErrorCode         ierr;
252:   PetscInt               nrecvs,nsends,iend;

255:   CHKMEMQ;
256:   VecGetArray(xin,&xv);
257:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
258:   if (mode & SCATTER_REVERSE){
259:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
260:     gen_from = (VecScatter_MPI_General*)ctx->todata;
261:   } else {
262:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
263:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
264:   }
265:   rvalues  = gen_from->values;
266:   svalues  = gen_to->values;
267:   nrecvs   = gen_from->n;
268:   nsends   = gen_to->n;
269:   rwaits   = gen_from->requests;
270:   swaits   = gen_to->requests;
271:   indices  = gen_to->indices;
272:   rstarts  = gen_from->starts;
273:   sstarts  = gen_to->starts;
274:   rprocs   = gen_from->procs;
275:   sprocs   = gen_to->procs;

277:   if (!(mode & SCATTER_LOCAL)) {

279:     if (gen_to->sendfirst) {
280:       /* do sends:  */
281:       for (i=0; i<nsends; i++) {
282:         val  = svalues + sstarts[i];
283:         iend = sstarts[i+1]-sstarts[i];
284:         /* pack the message */
285:         for (j=0; j<iend; j++) {
286:           val[j] = xv[*indices++];
287:         }
288:         MPI_Isend(val,iend,MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
289:       }
290:     }
291: 
292:     /* post receives:   */
293:     for (i=0; i<nrecvs; i++) {
294:       MPI_Irecv(rvalues+rstarts[i],rstarts[i+1]-rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
295:     }

297:     if (!gen_to->sendfirst) {
298:       /* do sends:  */
299:       for (i=0; i<nsends; i++) {
300:         val  = svalues + sstarts[i];
301:         iend = sstarts[i+1]-sstarts[i];
302:         /* pack the message */
303:         for (j=0; j<iend; j++) {
304:           val[j] = xv[*indices++];
305:         }
306:         MPI_Isend(val,iend,MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
307:       }
308:     }
309:   }

311:   /* take care of local scatters */
312:   if (gen_to->local.n && addv == INSERT_VALUES) {
313:     if (yv == xv && !gen_to->local.nonmatching_computed) {
314:       VecScatterLocalOptimize_Private(&gen_to->local,&gen_from->local);
315:     }
316:     if (gen_to->local.is_copy) {
317:       PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
318:     } else if (yv != xv || !gen_to->local.nonmatching_computed) {
319:       PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
320:       PetscInt n       = gen_to->local.n;
321:       for (i=0; i<n; i++) {yv[fslots[i]] = xv[tslots[i]];}
322:     } else {
323:       /* 
324:         In this case, it is copying the values into their old locations, thus we can skip those  
325:       */
326:       PetscInt *tslots = gen_to->local.slots_nonmatching,*fslots = gen_from->local.slots_nonmatching;
327:       PetscInt n       = gen_to->local.n_nonmatching;
328:       for (i=0; i<n; i++) {yv[fslots[i]] = xv[tslots[i]];}
329:     }
330:   } else if (gen_to->local.n) {
331:     PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
332:     PetscInt n = gen_to->local.n;
333:     if (addv == ADD_VALUES) {
334:       for (i=0; i<n; i++) {yv[fslots[i]] += xv[tslots[i]];}
335: #if !defined(PETSC_USE_COMPLEX)
336:     } else if (addv == MAX_VALUES) {
337:       for (i=0; i<n; i++) {yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[tslots[i]]);}
338: #endif
339:     } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
340:   }

342:   VecRestoreArray(xin,&xv);
343:   if (xin != yin) {VecRestoreArray(yin,&yv);}
344:   CHKMEMQ;
345:   return(0);
346: }

348: /* --------------------------------------------------------------------------------------*/
351: PetscErrorCode VecScatterEnd_PtoP(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
352: {
353:   VecScatter_MPI_General *gen_to,*gen_from;
354:   PetscScalar            *rvalues,*yv,*val;
355:   PetscErrorCode         ierr;
356:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices;
357:   PetscMPIInt            imdex;
358:   MPI_Request            *rwaits,*swaits;
359:   MPI_Status             rstatus,*sstatus;

362:   CHKMEMQ;
363:   if (mode & SCATTER_LOCAL) return(0);
364:   VecGetArray(yin,&yv);

366:   if (mode & SCATTER_REVERSE){
367:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
368:     gen_from = (VecScatter_MPI_General*)ctx->todata;
369:     sstatus  = gen_from->sstatus;
370:   } else {
371:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
372:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
373:     sstatus  = gen_to->sstatus;
374:   }
375:   rvalues  = gen_from->values;
376:   nrecvs   = gen_from->n;
377:   nsends   = gen_to->n;
378:   rwaits   = gen_from->requests;
379:   swaits   = gen_to->requests;
380:   indices  = gen_from->indices;
381:   rstarts  = gen_from->starts;

383:   /*  wait on receives */
384:   count = nrecvs;
385:   while (count) {
386:     MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
387:     /* unpack receives into our local space */
388:     val      = rvalues + rstarts[imdex];
389:     n        = rstarts[imdex+1]-rstarts[imdex];
390:     lindices = indices + rstarts[imdex];
391:     if (addv == INSERT_VALUES) {
392:       for (i=0; i<n; i++) {
393:         yv[lindices[i]] = *val++;
394:       }
395:     } else if (addv == ADD_VALUES) {
396:       for (i=0; i<n; i++) {
397:         yv[lindices[i]] += *val++;
398:       }
399: #if !defined(PETSC_USE_COMPLEX)
400:     } else if (addv == MAX_VALUES) {
401:       for (i=0; i<n; i++) {
402:         yv[lindices[i]] = PetscMax(yv[lindices[i]],*val); val++;
403:       }
404: #endif
405:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
406:     count--;
407:   }

409:   /* wait on sends */
410:   if (nsends) {
411:     MPI_Waitall(nsends,swaits,sstatus);
412:   }
413:   VecRestoreArray(yin,&yv);
414:   CHKMEMQ;
415:   return(0);
416: }
417: /* ==========================================================================================*/
418: /*
419:     Special scatters for fixed block sizes. These provide better performance
420:     because the local copying and packing and unpacking are done with loop 
421:     unrolling to the size of the block.

423:     Also uses MPI persistent sends and receives, these (at least in theory)
424:     allow MPI to optimize repeated sends and receives of the same type.
425: */

427: /*
428:     This is for use with the "ready-receiver" mode. In theory on some
429:     machines it could lead to better performance. In practice we've never
430:     seen it give better performance. Accessed with the -vecscatter_rr flag.
431: */
434: PetscErrorCode VecScatterPostRecvs_PtoP_X(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
435: {
436:   VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;

439:   MPI_Startall_irecv(gen_from->starts[gen_from->n]*gen_from->bs,gen_from->n,gen_from->requests);
440:   return(0);
441: }

443: /* --------------------------------------------------------------------------------------*/
444: /*
445:     Special optimization to see if the local part of the scatter is actually 
446:     a copy. The scatter routines call PetscMemcpy() instead.
447:  
448: */
451: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter_Seq_General *gen_to,VecScatter_Seq_General *gen_from,PetscInt bs)
452: {
453:   PetscInt n = gen_to->n,i,*to_slots = gen_to->slots,*from_slots = gen_from->slots;
454:   PetscInt to_start,from_start;
455: 
457:   to_start   = to_slots[0];
458:   from_start = from_slots[0];

460:   for (i=1; i<n; i++) {
461:     to_start   += bs;
462:     from_start += bs;
463:     if (to_slots[i]   != to_start)   return(0);
464:     if (from_slots[i] != from_start) return(0);
465:   }
466:   gen_to->is_copy       = PETSC_TRUE;
467:   gen_to->copy_start    = to_slots[0];
468:   gen_to->copy_length   = bs*sizeof(PetscScalar)*n;
469:   gen_from->is_copy     = PETSC_TRUE;
470:   gen_from->copy_start  = from_slots[0];
471:   gen_from->copy_length = bs*sizeof(PetscScalar)*n;

473:   PetscLogInfo(0,"VecScatterLocalOptimizeCopy_Private:Local scatter is a copy, optimizing for it\n");

475:   return(0);
476: }

478: /* --------------------------------------------------------------------------------------*/

482: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
483: {
484:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
485:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
486:   PetscErrorCode         ierr;
487:   PetscInt               len,ny,bs = in_from->bs;

490:   out->postrecvs = in->postrecvs;
491:   out->begin     = in->begin;
492:   out->end       = in->end;
493:   out->copy      = in->copy;
494:   out->destroy   = in->destroy;
495:   out->view      = in->view;

497:   /* allocate entire send scatter context */
498:   PetscNew(VecScatter_MPI_General,&out_to);
499:   PetscMemzero(out_to,sizeof(VecScatter_MPI_General));
500:   PetscNew(VecScatter_MPI_General,&out_from);
501:   PetscMemzero(out_from,sizeof(VecScatter_MPI_General));

503:   ny                = in_to->starts[in_to->n];
504:   len               = ny*(sizeof(PetscInt) + bs*sizeof(PetscScalar)) + (in_to->n+1)*sizeof(PetscInt) +
505:                      (in_to->n)*(sizeof(PetscInt) + sizeof(MPI_Request));
506:   out_to->n         = in_to->n;
507:   out_to->type      = in_to->type;
508:   out_to->sendfirst = in_to->sendfirst;

510:   PetscMalloc(len,&out_to->values);
511:   out_to->requests  = (MPI_Request*)(out_to->values + bs*ny);
512:   out_to->indices   = (PetscInt*)(out_to->requests + out_to->n);
513:   out_to->starts    = (PetscInt*)(out_to->indices + ny);
514:   out_to->procs     = (PetscMPIInt*)(out_to->starts + out_to->n + 1);
515:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
516:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
517:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
518:   PetscMalloc(2*(PetscMax(in_to->n,in_from->n)+1)*sizeof(MPI_Status),&out_to->sstatus);
519:   out_to->rstatus   =  out_to->sstatus + PetscMax(in_to->n,in_from->n) + 1;
520: 
521:   out->todata       = (void*)out_to;
522:   out_to->local.n   = in_to->local.n;
523:   out_to->local.nonmatching_computed = PETSC_FALSE;
524:   out_to->local.n_nonmatching        = 0;
525:   out_to->local.slots_nonmatching    = 0;
526:   if (in_to->local.n) {
527:     PetscMalloc2(in_to->local.n,PetscInt,&out_to->local.slots,in_from->local.n,PetscInt,&out_from->local.slots);
528:     PetscMemcpy(out_to->local.slots,in_to->local.slots,in_to->local.n*sizeof(PetscInt));
529:     PetscMemcpy(out_from->local.slots,in_from->local.slots,in_from->local.n*sizeof(PetscInt));
530:   } else {
531:     out_to->local.slots   = 0;
532:     out_from->local.slots = 0;
533:   }

535:   /* allocate entire receive context */
536:   out_from->type      = in_from->type;
537:   ny                  = in_from->starts[in_from->n];
538:   len                 = ny*(sizeof(PetscInt) + bs*sizeof(PetscScalar)) + (in_from->n+1)*sizeof(PetscInt) +
539:                        (in_from->n)*(sizeof(PetscInt) + sizeof(MPI_Request));
540:   out_from->n         = in_from->n;
541:   out_from->sendfirst = in_from->sendfirst;
542:   PetscMalloc(len,&out_from->values);
543:   out_from->requests  = (MPI_Request*)(out_from->values + bs*ny);
544:   out_from->indices   = (PetscInt*)(out_from->requests + out_from->n);
545:   out_from->starts    = (PetscInt*)(out_from->indices + ny);
546:   out_from->procs     = (PetscMPIInt*)(out_from->starts + out_from->n + 1);
547:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
548:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
549:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
550:   out->fromdata       = (void*)out_from;
551:   out_from->local.n   = in_from->local.n;
552:   out_from->local.nonmatching_computed = PETSC_FALSE;
553:   out_from->local.n_nonmatching        = 0;
554:   out_from->local.slots_nonmatching    = 0;

556:   /* 
557:       set up the request arrays for use with isend_init() and irecv_init()
558:   */
559:   {
560:     PetscMPIInt tag;
561:     MPI_Comm    comm;
562:     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
563:     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
564:     PetscInt    i;
565:     PetscTruth  flg;
566:     MPI_Request *swaits  = out_to->requests,*rwaits  = out_from->requests;
567:     MPI_Request *rev_swaits,*rev_rwaits;
568:     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;

570:     PetscMalloc2(in_to->n,MPI_Request,&out_to->rev_requests,in_from->n,MPI_Request,&out_from->rev_requests);

572:     rev_rwaits = out_to->rev_requests;
573:     rev_swaits = out_from->rev_requests;

575:     out_from->bs = out_to->bs = bs;
576:     tag     = out->tag;
577:     comm    = out->comm;

579:     /* Register the receives that you will use later (sends for scatter reverse) */
580:     for (i=0; i<out_from->n; i++) {
581:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
582:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
583:     }

585:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_rr",&flg);
586:     if (flg) {
587:       out->postrecvs               = VecScatterPostRecvs_PtoP_X;
588:       out_to->use_readyreceiver    = PETSC_TRUE;
589:       out_from->use_readyreceiver  = PETSC_TRUE;
590:       for (i=0; i<out_to->n; i++) {
591:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
592:       }
593:       PetscLogInfo(0,"VecScatterCopy_PtoP_X:Using VecScatter ready receiver mode\n");
594:     } else {
595:       out->postrecvs               = 0;
596:       out_to->use_readyreceiver    = PETSC_FALSE;
597:       out_from->use_readyreceiver  = PETSC_FALSE;
598:       flg                          = PETSC_FALSE;
599:       PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flg);
600:       if (flg) {
601:         PetscLogInfo(0,"VecScatterCopy_PtoP_X:Using VecScatter Ssend mode\n");
602:       }
603:       for (i=0; i<out_to->n; i++) {
604:         if (!flg) {
605:           MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
606:         } else {
607:           MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
608:         }
609:       }
610:     }
611:     /* Register receives for scatter reverse */
612:     for (i=0; i<out_to->n; i++) {
613:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
614:     }
615:   }

617:   return(0);
618: }

620: /* --------------------------------------------------------------------------------------*/

624: PetscErrorCode VecScatterBegin_PtoP_12(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
625: {
626:   VecScatter_MPI_General *gen_to,*gen_from;
627:   PetscScalar            *xv,*yv,*val,*svalues;
628:   MPI_Request            *rwaits,*swaits;
629:   PetscInt               *indices,*sstarts,iend,i,j,nrecvs,nsends,idx,len;
630:   PetscErrorCode         ierr;

633:   VecGetArray(xin,&xv);
634:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}

636:   if (mode & SCATTER_REVERSE) {
637:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
638:     gen_from = (VecScatter_MPI_General*)ctx->todata;
639:     rwaits   = gen_from->rev_requests;
640:     swaits   = gen_to->rev_requests;
641:   } else {
642:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
643:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
644:     rwaits   = gen_from->requests;
645:     swaits   = gen_to->requests;
646:   }
647:   svalues  = gen_to->values;
648:   nrecvs   = gen_from->n;
649:   nsends   = gen_to->n;
650:   indices  = gen_to->indices;
651:   sstarts  = gen_to->starts;

653:   if (!(mode & SCATTER_LOCAL)) {

655:     if (!gen_from->use_readyreceiver && !gen_to->sendfirst) {
656:       /* post receives since they were not posted in VecScatterPostRecvs()   */
657:       MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
658:     }
659:     if (ctx->packtogether) {
660:       /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
661:       len  = 12*sstarts[nsends];
662:       val  = svalues;
663:       for (i=0; i<len; i += 12) {
664:         idx     = *indices++;
665:         val[0]  = xv[idx];
666:         val[1]  = xv[idx+1];
667:         val[2]  = xv[idx+2];
668:         val[3]  = xv[idx+3];
669:         val[4]  = xv[idx+4];
670:         val[5]  = xv[idx+5];
671:         val[6]  = xv[idx+6];
672:         val[7]  = xv[idx+7];
673:         val[8]  = xv[idx+8];
674:         val[9]  = xv[idx+9];
675:         val[10] = xv[idx+10];
676:         val[11] = xv[idx+11];
677:         val    += 12;
678:       }
679:       MPI_Startall_isend(len,nsends,swaits);
680:     } else {
681:       /* this version packs and sends one at a time */
682:       val  = svalues;
683:       for (i=0; i<nsends; i++) {
684:         iend = sstarts[i+1]-sstarts[i];

686:         for (j=0; j<iend; j++) {
687:           idx     = *indices++;
688:           val[0]  = xv[idx];
689:           val[1]  = xv[idx+1];
690:           val[2]  = xv[idx+2];
691:           val[3]  = xv[idx+3];
692:           val[4]  = xv[idx+4];
693:           val[5]  = xv[idx+5];
694:           val[6]  = xv[idx+6];
695:           val[7]  = xv[idx+7];
696:           val[8]  = xv[idx+8];
697:           val[9]  = xv[idx+9];
698:           val[10] = xv[idx+10];
699:           val[11] = xv[idx+11];
700:           val    += 12;
701:         }
702:         MPI_Start_isend(12*iend,swaits+i);
703:       }
704:     }

706:     if (!gen_from->use_readyreceiver && gen_to->sendfirst) {
707:       /* post receives since they were not posted in VecScatterPostRecvs()   */
708:       MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
709:     }
710:   }

712:   /* take care of local scatters */
713:   if (gen_to->local.n) {
714:     PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
715:     PetscInt n       = gen_to->local.n,il,ir;
716:     if (addv == INSERT_VALUES) {
717:       if (gen_to->local.is_copy) {
718:         PetscMemcpy(yv+gen_from->local.copy_start,xv+gen_to->local.copy_start,gen_to->local.copy_length);
719:       } else {
720:         for (i=0; i<n; i++) {
721:           il = fslots[i]; ir = tslots[i];
722:           yv[il]    = xv[ir];
723:           yv[il+1]  = xv[ir+1];
724:           yv[il+2]  = xv[ir+2];
725:           yv[il+3]  = xv[ir+3];
726:           yv[il+4]  = xv[ir+4];
727:           yv[il+5]  = xv[ir+5];
728:           yv[il+6]  = xv[ir+6];
729:           yv[il+7]  = xv[ir+7];
730:           yv[il+8]  = xv[ir+8];
731:           yv[il+9]  = xv[ir+9];
732:           yv[il+10] = xv[ir+10];
733:           yv[il+11] = xv[ir+11];
734:         }
735:       }
736:     }  else if (addv == ADD_VALUES) {
737:       for (i=0; i<n; i++) {
738:         il = fslots[i]; ir = tslots[i];
739:         yv[il]    += xv[ir];
740:         yv[il+1]  += xv[ir+1];
741:         yv[il+2]  += xv[ir+2];
742:         yv[il+3]  += xv[ir+3];
743:         yv[il+4]  += xv[ir+4];
744:         yv[il+5]  += xv[ir+5];
745:         yv[il+6]  += xv[ir+6];
746:         yv[il+7]  += xv[ir+7];
747:         yv[il+8]  += xv[ir+8];
748:         yv[il+9]  += xv[ir+9];
749:         yv[il+10] += xv[ir+10];
750:         yv[il+11] += xv[ir+11];
751:       }
752: #if !defined(PETSC_USE_COMPLEX)
753:     }  else if (addv == MAX_VALUES) {
754:       for (i=0; i<n; i++) {
755:         il = fslots[i]; ir = tslots[i];
756:         yv[il]    = PetscMax(yv[il],xv[ir]);
757:         yv[il+1]  = PetscMax(yv[il+1],xv[ir+1]);
758:         yv[il+2]  = PetscMax(yv[il+2],xv[ir+2]);
759:         yv[il+3]  = PetscMax(yv[il+3],xv[ir+3]);
760:         yv[il+4]  = PetscMax(yv[il+4],xv[ir+4]);
761:         yv[il+5]  = PetscMax(yv[il+5],xv[ir+5]);
762:         yv[il+6]  = PetscMax(yv[il+6],xv[ir+6]);
763:         yv[il+7]  = PetscMax(yv[il+7],xv[ir+7]);
764:         yv[il+8]  = PetscMax(yv[il+8],xv[ir+8]);
765:         yv[il+9]  = PetscMax(yv[il+9],xv[ir+9]);
766:         yv[il+10] = PetscMax(yv[il+10],xv[ir+10]);
767:         yv[il+11] = PetscMax(yv[il+11],xv[ir+11]);
768:       }
769: #endif
770:     } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
771:   }
772:   VecRestoreArray(xin,&xv);
773:   if (xin != yin) {VecRestoreArray(yin,&yv);}
774:   return(0);
775: }

777: /* --------------------------------------------------------------------------------------*/

781: PetscErrorCode VecScatterEnd_PtoP_12(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
782: {
783:   VecScatter_MPI_General *gen_to,*gen_from;
784:   PetscScalar            *rvalues,*yv,*val;
785:   PetscErrorCode         ierr;
786:   PetscMPIInt            imdex;
787:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
788:   MPI_Request            *rwaits,*swaits;
789:   MPI_Status             *rstatus,*sstatus;

792:   if (mode & SCATTER_LOCAL) return(0);
793:   VecGetArray(yin,&yv);

795:   if (mode & SCATTER_REVERSE) {
796:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
797:     gen_from = (VecScatter_MPI_General*)ctx->todata;
798:     rwaits   = gen_from->rev_requests;
799:     swaits   = gen_to->rev_requests;
800:     sstatus  = gen_from->sstatus;
801:     rstatus  = gen_from->rstatus;
802:   } else {
803:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
804:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
805:     rwaits   = gen_from->requests;
806:     swaits   = gen_to->requests;
807:     sstatus  = gen_to->sstatus;
808:     rstatus  = gen_to->rstatus;
809:   }
810:   rvalues  = gen_from->values;
811:   nrecvs   = gen_from->n;
812:   nsends   = gen_to->n;
813:   indices  = gen_from->indices;
814:   rstarts  = gen_from->starts;

816:   /*  wait on receives */
817:   count = nrecvs;
818:   if (ctx->packtogether) { /* receive all messages, then unpack all, when -vecscatter_packtogether used */
819:     MPI_Waitall(nrecvs,rwaits,rstatus);
820:     n        = rstarts[count];
821:     val      = rvalues;
822:     lindices = indices;
823:     if (addv == INSERT_VALUES) {
824:       for (i=0; i<n; i++) {
825:         idx        = lindices[i];
826:         yv[idx]    = val[0];
827:         yv[idx+1]  = val[1];
828:         yv[idx+2]  = val[2];
829:         yv[idx+3]  = val[3];
830:         yv[idx+4]  = val[4];
831:         yv[idx+5]  = val[5];
832:         yv[idx+6]  = val[6];
833:         yv[idx+7]  = val[7];
834:         yv[idx+8]  = val[8];
835:         yv[idx+9]  = val[9];
836:         yv[idx+10] = val[10];
837:         yv[idx+11] = val[11];
838:         val       += 12;
839:       }
840:     } else if (addv == ADD_VALUES) {
841:       for (i=0; i<n; i++) {
842:         idx         = lindices[i];
843:         yv[idx]    += val[0];
844:         yv[idx+1]  += val[1];
845:         yv[idx+2]  += val[2];
846:         yv[idx+3]  += val[3];
847:         yv[idx+4]  += val[4];
848:         yv[idx+5]  += val[5];
849:         yv[idx+6]  += val[6];
850:         yv[idx+7]  += val[7];
851:         yv[idx+8]  += val[8];
852:         yv[idx+9]  += val[9];
853:         yv[idx+10] += val[10];
854:         yv[idx+11] += val[11];
855:         val        += 12;
856:       }
857: #if !defined(PETSC_USE_COMPLEX)
858:     } else if (addv == MAX_VALUES) {
859:       for (i=0; i<n; i++) {
860:         idx        = lindices[i];
861:         yv[idx]    = PetscMax(yv[idx],val[0]);
862:         yv[idx+1]  = PetscMax(yv[idx+1],val[1]);
863:         yv[idx+2]  = PetscMax(yv[idx+2],val[2]);
864:         yv[idx+3]  = PetscMax(yv[idx+3],val[3]);
865:         yv[idx+4]  = PetscMax(yv[idx+4],val[4]);
866:         yv[idx+5]  = PetscMax(yv[idx+5],val[5]);
867:         yv[idx+6]  = PetscMax(yv[idx+6],val[6]);
868:         yv[idx+7]  = PetscMax(yv[idx+7],val[7]);
869:         yv[idx+8]  = PetscMax(yv[idx+8],val[8]);
870:         yv[idx+9]  = PetscMax(yv[idx+9],val[9]);
871:         yv[idx+10] = PetscMax(yv[idx+10],val[10]);
872:         yv[idx+11] = PetscMax(yv[idx+11],val[11]);
873:         val       += 12;
874:       }
875: #endif
876:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
877:   } else { /* unpack each message as it arrives, default version */
878:     while (count) {
879:       MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus[0]);
880:       /* unpack receives into our local space */
881:       val      = rvalues + 12*rstarts[imdex];
882:       lindices = indices + rstarts[imdex];
883:       n        = rstarts[imdex+1] - rstarts[imdex];
884:       if (addv == INSERT_VALUES) {
885:         for (i=0; i<n; i++) {
886:           idx        = lindices[i];
887:           yv[idx]    = val[0];
888:           yv[idx+1]  = val[1];
889:           yv[idx+2]  = val[2];
890:           yv[idx+3]  = val[3];
891:           yv[idx+4]  = val[4];
892:           yv[idx+5]  = val[5];
893:           yv[idx+6]  = val[6];
894:           yv[idx+7]  = val[7];
895:           yv[idx+8]  = val[8];
896:           yv[idx+9]  = val[9];
897:           yv[idx+10] = val[10];
898:           yv[idx+11] = val[11];
899:           val       += 12;
900:       }
901:     } else if (addv == ADD_VALUES) {
902:       for (i=0; i<n; i++) {
903:         idx        = lindices[i];
904:         yv[idx]    += val[0];
905:         yv[idx+1]  += val[1];
906:         yv[idx+2]  += val[2];
907:         yv[idx+3]  += val[3];
908:         yv[idx+4]  += val[4];
909:         yv[idx+5]  += val[5];
910:         yv[idx+6]  += val[6];
911:         yv[idx+7]  += val[7];
912:         yv[idx+8]  += val[8];
913:         yv[idx+9]  += val[9];
914:         yv[idx+10] += val[10];
915:         yv[idx+11] += val[11];
916:         val        += 12;
917:       }
918: #if !defined(PETSC_USE_COMPLEX)
919:     } else if (addv == MAX_VALUES) {
920:       for (i=0; i<n; i++) {
921:         idx        = lindices[i];
922:         yv[idx]    = PetscMax(yv[idx],val[0]);
923:         yv[idx+1]  = PetscMax(yv[idx+1],val[1]);
924:         yv[idx+2]  = PetscMax(yv[idx+2],val[2]);
925:         yv[idx+3]  = PetscMax(yv[idx+3],val[3]);
926:         yv[idx+4]  = PetscMax(yv[idx+4],val[4]);
927:         yv[idx+5]  = PetscMax(yv[idx+5],val[5]);
928:         yv[idx+6]  = PetscMax(yv[idx+6],val[6]);
929:         yv[idx+7]  = PetscMax(yv[idx+7],val[7]);
930:         yv[idx+8]  = PetscMax(yv[idx+8],val[8]);
931:         yv[idx+9]  = PetscMax(yv[idx+9],val[9]);
932:         yv[idx+10] = PetscMax(yv[idx+10],val[10]);
933:         yv[idx+11] = PetscMax(yv[idx+11],val[11]);
934:         val        += 12;
935:       }
936: #endif
937:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
938:     count--;
939:     }
940:   }
941:   /* wait on sends */
942:   if (nsends) {
943:     MPI_Waitall(nsends,swaits,sstatus);
944:   }
945:   VecRestoreArray(yin,&yv);
946:   return(0);
947: }

949: /* --------------------------------------------------------------------------------------*/

953: PetscErrorCode VecScatterBegin_PtoP_6(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
954: {
955:   VecScatter_MPI_General *gen_to,*gen_from;
956:   PetscScalar            *xv,*yv,*val,*svalues;
957:   MPI_Request            *rwaits,*swaits;
958:   PetscErrorCode         ierr;
959:   PetscInt               i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;

962:   VecGetArray(xin,&xv);
963:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
964:   if (mode & SCATTER_REVERSE) {
965:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
966:     gen_from = (VecScatter_MPI_General*)ctx->todata;
967:     rwaits   = gen_from->rev_requests;
968:     swaits   = gen_to->rev_requests;
969:   } else {
970:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
971:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
972:     rwaits   = gen_from->requests;
973:     swaits   = gen_to->requests;
974:   }
975:   svalues  = gen_to->values;
976:   nrecvs   = gen_from->n;
977:   nsends   = gen_to->n;
978:   indices  = gen_to->indices;
979:   sstarts  = gen_to->starts;

981:   if (!(mode & SCATTER_LOCAL)) {

983:     if (gen_to->sendfirst) {
984:       /* this version packs and sends one at a time */
985:       val  = svalues;
986:       for (i=0; i<nsends; i++) {
987:         iend = sstarts[i+1]-sstarts[i];

989:         for (j=0; j<iend; j++) {
990:           idx     = *indices++;
991:           val[0] = xv[idx];
992:           val[1] = xv[idx+1];
993:           val[2] = xv[idx+2];
994:           val[3] = xv[idx+3];
995:           val[4] = xv[idx+4];
996:           val[5] = xv[idx+5];
997:           val    += 6;
998:         }
999:         MPI_Start_isend(6*iend,swaits+i);
1000:       }
1001:     }

1003:     if (!gen_from->use_readyreceiver) {
1004:       /* post receives since they were not posted in VecScatterPostRecvs()   */
1005:       MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1006:     }

1008:     if (!gen_to->sendfirst) {
1009:       /* this version packs all the messages together and sends */
1010:       /*
1011:       len  = 5*sstarts[nsends];
1012:       val  = svalues;
1013:       for (i=0; i<len; i += 5) {
1014:         idx     = *indices++;
1015:         val[0] = xv[idx];
1016:         val[1] = xv[idx+1];
1017:         val[2] = xv[idx+2];
1018:         val[3] = xv[idx+3];
1019:         val[4] = xv[idx+4];
1020:         val      += 5;
1021:       }
1022:       MPI_Startall_isend(len,nsends,swaits);
1023:       */

1025:       /* this version packs and sends one at a time */
1026:       val  = svalues;
1027:       for (i=0; i<nsends; i++) {
1028:         iend = sstarts[i+1]-sstarts[i];

1030:         for (j=0; j<iend; j++) {
1031:           idx     = *indices++;
1032:           val[0] = xv[idx];
1033:           val[1] = xv[idx+1];
1034:           val[2] = xv[idx+2];
1035:           val[3] = xv[idx+3];
1036:           val[4] = xv[idx+4];
1037:           val[5] = xv[idx+5];
1038:           val    += 6;
1039:         }
1040:         MPI_Start_isend(6*iend,swaits+i);
1041:       }
1042:     }
1043:   }

1045:   /* take care of local scatters */
1046:   if (gen_to->local.n) {
1047:     PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1048:     PetscInt n       = gen_to->local.n,il,ir;
1049:     if (addv == INSERT_VALUES) {
1050:       if (gen_to->local.is_copy) {
1051:         PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1052:       } else {
1053:         for (i=0; i<n; i++) {
1054:           il = fslots[i]; ir = tslots[i];
1055:           yv[il]   = xv[ir];
1056:           yv[il+1] = xv[ir+1];
1057:           yv[il+2] = xv[ir+2];
1058:           yv[il+3] = xv[ir+3];
1059:           yv[il+4] = xv[ir+4];
1060:           yv[il+5] = xv[ir+5];
1061:         }
1062:       }
1063:     }  else if (addv == ADD_VALUES) {
1064:       for (i=0; i<n; i++) {
1065:         il = fslots[i]; ir = tslots[i];
1066:         yv[il]   += xv[ir];
1067:         yv[il+1] += xv[ir+1];
1068:         yv[il+2] += xv[ir+2];
1069:         yv[il+3] += xv[ir+3];
1070:         yv[il+4] += xv[ir+4];
1071:         yv[il+5] += xv[ir+5];
1072:       }
1073: #if !defined(PETSC_USE_COMPLEX)
1074:     }  else if (addv == MAX_VALUES) {
1075:       for (i=0; i<n; i++) {
1076:         il = fslots[i]; ir = tslots[i];
1077:         yv[il]   = PetscMax(yv[il],xv[ir]);
1078:         yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1079:         yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1080:         yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1081:         yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1082:         yv[il+5] = PetscMax(yv[il+5],xv[ir+5]);
1083:       }
1084: #endif
1085:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1086:   }
1087:   VecRestoreArray(xin,&xv);
1088:   if (xin != yin) {VecRestoreArray(yin,&yv);}
1089:   return(0);
1090: }

1092: /* --------------------------------------------------------------------------------------*/

1096: PetscErrorCode VecScatterEnd_PtoP_6(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1097: {
1098:   VecScatter_MPI_General *gen_to,*gen_from;
1099:   PetscScalar            *rvalues,*yv,*val;
1100:   PetscErrorCode         ierr;
1101:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1102:   PetscMPIInt            imdex;
1103:   MPI_Request            *rwaits,*swaits;
1104:   MPI_Status             rstatus,*sstatus;

1107:   if (mode & SCATTER_LOCAL) return(0);
1108:   VecGetArray(yin,&yv);

1110:   if (mode & SCATTER_REVERSE) {
1111:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1112:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1113:     rwaits   = gen_from->rev_requests;
1114:     swaits   = gen_to->rev_requests;
1115:     sstatus  = gen_from->sstatus;
1116:   } else {
1117:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1118:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1119:     rwaits   = gen_from->requests;
1120:     swaits   = gen_to->requests;
1121:     sstatus  = gen_to->sstatus;
1122:   }
1123:   rvalues  = gen_from->values;
1124:   nrecvs   = gen_from->n;
1125:   nsends   = gen_to->n;
1126:   indices  = gen_from->indices;
1127:   rstarts  = gen_from->starts;

1129:   /*  wait on receives */
1130:   count = nrecvs;
1131:   while (count) {
1132:     MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1133:     /* unpack receives into our local space */
1134:     val      = rvalues + 6*rstarts[imdex];
1135:     lindices = indices + rstarts[imdex];
1136:     n        = rstarts[imdex+1] - rstarts[imdex];
1137:     if (addv == INSERT_VALUES) {
1138:       for (i=0; i<n; i++) {
1139:         idx       = lindices[i];
1140:         yv[idx]   = val[0];
1141:         yv[idx+1] = val[1];
1142:         yv[idx+2] = val[2];
1143:         yv[idx+3] = val[3];
1144:         yv[idx+4] = val[4];
1145:         yv[idx+5] = val[5];
1146:         val      += 6;
1147:       }
1148:     } else if (addv == ADD_VALUES) {
1149:       for (i=0; i<n; i++) {
1150:         idx       = lindices[i];
1151:         yv[idx]   += val[0];
1152:         yv[idx+1] += val[1];
1153:         yv[idx+2] += val[2];
1154:         yv[idx+3] += val[3];
1155:         yv[idx+4] += val[4];
1156:         yv[idx+5] += val[5];
1157:         val       += 6;
1158:       }
1159: #if !defined(PETSC_USE_COMPLEX)
1160:     } else if (addv == MAX_VALUES) {
1161:       for (i=0; i<n; i++) {
1162:         idx       = lindices[i];
1163:         yv[idx]   = PetscMax(yv[idx],val[0]);
1164:         yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1165:         yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1166:         yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1167:         yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1168:         yv[idx+5] = PetscMax(yv[idx+4],val[5]);
1169:         val       += 6;
1170:       }
1171: #endif
1172:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1173:     count--;
1174:   }
1175:   /* wait on sends */
1176:   if (nsends) {
1177:     MPI_Waitall(nsends,swaits,sstatus);
1178:   }
1179:   VecRestoreArray(yin,&yv);
1180:   return(0);
1181: }

1183: /* --------------------------------------------------------------------------------------*/

1187: PetscErrorCode VecScatterBegin_PtoP_5(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1188: {
1189:   VecScatter_MPI_General *gen_to,*gen_from;
1190:   PetscScalar            *xv,*yv,*val,*svalues;
1191:   MPI_Request            *rwaits,*swaits;
1192:   PetscErrorCode         ierr;
1193:   PetscInt               i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;

1196:   VecGetArray(xin,&xv);
1197:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1198:   if (mode & SCATTER_REVERSE) {
1199:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1200:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1201:     rwaits   = gen_from->rev_requests;
1202:     swaits   = gen_to->rev_requests;
1203:   } else {
1204:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1205:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1206:     rwaits   = gen_from->requests;
1207:     swaits   = gen_to->requests;
1208:   }
1209:   svalues  = gen_to->values;
1210:   nrecvs   = gen_from->n;
1211:   nsends   = gen_to->n;
1212:   indices  = gen_to->indices;
1213:   sstarts  = gen_to->starts;

1215:   if (!(mode & SCATTER_LOCAL)) {

1217:     if (gen_to->sendfirst) {
1218:       /* this version packs and sends one at a time */
1219:       val  = svalues;
1220:       for (i=0; i<nsends; i++) {
1221:         iend = sstarts[i+1]-sstarts[i];

1223:         for (j=0; j<iend; j++) {
1224:           idx     = *indices++;
1225:           val[0] = xv[idx];
1226:           val[1] = xv[idx+1];
1227:           val[2] = xv[idx+2];
1228:           val[3] = xv[idx+3];
1229:           val[4] = xv[idx+4];
1230:           val    += 5;
1231:         }
1232:         MPI_Start_isend(5*iend,swaits+i);
1233:       }
1234:     }

1236:     if (!gen_from->use_readyreceiver) {
1237:       /* post receives since they were not posted in VecScatterPostRecvs()   */
1238:       MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1239:     }

1241:     if (!gen_to->sendfirst) {
1242:       /* this version packs all the messages together and sends */
1243:       /*
1244:       len  = 5*sstarts[nsends];
1245:       val  = svalues;
1246:       for (i=0; i<len; i += 5) {
1247:         idx     = *indices++;
1248:         val[0] = xv[idx];
1249:         val[1] = xv[idx+1];
1250:         val[2] = xv[idx+2];
1251:         val[3] = xv[idx+3];
1252:         val[4] = xv[idx+4];
1253:         val      += 5;
1254:       }
1255:       MPI_Startall_isend(len,nsends,swaits);
1256:       */

1258:       /* this version packs and sends one at a time */
1259:       val  = svalues;
1260:       for (i=0; i<nsends; i++) {
1261:         iend = sstarts[i+1]-sstarts[i];

1263:         for (j=0; j<iend; j++) {
1264:           idx     = *indices++;
1265:           val[0] = xv[idx];
1266:           val[1] = xv[idx+1];
1267:           val[2] = xv[idx+2];
1268:           val[3] = xv[idx+3];
1269:           val[4] = xv[idx+4];
1270:           val    += 5;
1271:         }
1272:         MPI_Start_isend(5*iend,swaits+i);
1273:       }
1274:     }
1275:   }

1277:   /* take care of local scatters */
1278:   if (gen_to->local.n) {
1279:     PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1280:     PetscInt n       = gen_to->local.n,il,ir;
1281:     if (addv == INSERT_VALUES) {
1282:       if (gen_to->local.is_copy) {
1283:         PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1284:       } else {
1285:         for (i=0; i<n; i++) {
1286:           il = fslots[i]; ir = tslots[i];
1287:           yv[il]   = xv[ir];
1288:           yv[il+1] = xv[ir+1];
1289:           yv[il+2] = xv[ir+2];
1290:           yv[il+3] = xv[ir+3];
1291:           yv[il+4] = xv[ir+4];
1292:         }
1293:       }
1294:     }  else if (addv == ADD_VALUES) {
1295:       for (i=0; i<n; i++) {
1296:         il = fslots[i]; ir = tslots[i];
1297:         yv[il]   += xv[ir];
1298:         yv[il+1] += xv[ir+1];
1299:         yv[il+2] += xv[ir+2];
1300:         yv[il+3] += xv[ir+3];
1301:         yv[il+4] += xv[ir+4];
1302:       }
1303: #if !defined(PETSC_USE_COMPLEX)
1304:     }  else if (addv == MAX_VALUES) {
1305:       for (i=0; i<n; i++) {
1306:         il = fslots[i]; ir = tslots[i];
1307:         yv[il]   = PetscMax(yv[il],xv[ir]);
1308:         yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1309:         yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1310:         yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1311:         yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1312:       }
1313: #endif
1314:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1315:   }
1316:   VecRestoreArray(xin,&xv);
1317:   if (xin != yin) {VecRestoreArray(yin,&yv);}
1318:   return(0);
1319: }

1321: /* --------------------------------------------------------------------------------------*/

1325: PetscErrorCode VecScatterEnd_PtoP_5(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1326: {
1327:   VecScatter_MPI_General *gen_to,*gen_from;
1328:   PetscScalar            *rvalues,*yv,*val;
1329:   PetscErrorCode         ierr;
1330:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1331:   PetscMPIInt            imdex;
1332:   MPI_Request            *rwaits,*swaits;
1333:   MPI_Status             rstatus,*sstatus;

1336:   if (mode & SCATTER_LOCAL) return(0);
1337:   VecGetArray(yin,&yv);

1339:   if (mode & SCATTER_REVERSE) {
1340:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1341:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1342:     rwaits   = gen_from->rev_requests;
1343:     swaits   = gen_to->rev_requests;
1344:     sstatus  = gen_from->sstatus;
1345:   } else {
1346:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1347:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1348:     rwaits   = gen_from->requests;
1349:     swaits   = gen_to->requests;
1350:     sstatus  = gen_to->sstatus;
1351:   }
1352:   rvalues  = gen_from->values;
1353:   nrecvs   = gen_from->n;
1354:   nsends   = gen_to->n;
1355:   indices  = gen_from->indices;
1356:   rstarts  = gen_from->starts;

1358:   /*  wait on receives */
1359:   count = nrecvs;
1360:   while (count) {
1361:     MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1362:     /* unpack receives into our local space */
1363:     val      = rvalues + 5*rstarts[imdex];
1364:     lindices = indices + rstarts[imdex];
1365:     n        = rstarts[imdex+1] - rstarts[imdex];
1366:     if (addv == INSERT_VALUES) {
1367:       for (i=0; i<n; i++) {
1368:         idx       = lindices[i];
1369:         yv[idx]   = val[0];
1370:         yv[idx+1] = val[1];
1371:         yv[idx+2] = val[2];
1372:         yv[idx+3] = val[3];
1373:         yv[idx+4] = val[4];
1374:         val      += 5;
1375:       }
1376:     } else if (addv == ADD_VALUES) {
1377:       for (i=0; i<n; i++) {
1378:         idx       = lindices[i];
1379:         yv[idx]   += val[0];
1380:         yv[idx+1] += val[1];
1381:         yv[idx+2] += val[2];
1382:         yv[idx+3] += val[3];
1383:         yv[idx+4] += val[4];
1384:         val       += 5;
1385:       }
1386: #if !defined(PETSC_USE_COMPLEX)
1387:     } else if (addv == MAX_VALUES) {
1388:       for (i=0; i<n; i++) {
1389:         idx       = lindices[i];
1390:         yv[idx]   = PetscMax(yv[idx],val[0]);
1391:         yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1392:         yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1393:         yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1394:         yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1395:         val       += 5;
1396:       }
1397: #endif
1398:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1399:     count--;
1400:   }
1401:   /* wait on sends */
1402:   if (nsends) {
1403:     MPI_Waitall(nsends,swaits,sstatus);
1404:   }
1405:   VecRestoreArray(yin,&yv);
1406:   return(0);
1407: }

1409: /* --------------------------------------------------------------------------------------*/

1413: PetscErrorCode VecScatterBegin_PtoP_4(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1414: {
1415:   VecScatter_MPI_General *gen_to,*gen_from;
1416:   PetscScalar            *xv,*yv,*val,*svalues;
1417:   MPI_Request            *rwaits,*swaits;
1418:   PetscInt               *indices,*sstarts,iend,i,j,nrecvs,nsends,idx,len;
1419:   PetscErrorCode         ierr;

1422:   VecGetArray(xin,&xv);
1423:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}

1425:   if (mode & SCATTER_REVERSE) {
1426:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1427:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1428:     rwaits   = gen_from->rev_requests;
1429:     swaits   = gen_to->rev_requests;
1430:   } else {
1431:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1432:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1433:     rwaits   = gen_from->requests;
1434:     swaits   = gen_to->requests;
1435:   }
1436:   svalues  = gen_to->values;
1437:   nrecvs   = gen_from->n;
1438:   nsends   = gen_to->n;
1439:   indices  = gen_to->indices;
1440:   sstarts  = gen_to->starts;

1442:   if (!(mode & SCATTER_LOCAL)) {

1444:     if (!gen_from->use_readyreceiver && !gen_to->sendfirst) {
1445:       /* post receives since they were not posted in VecScatterPostRecvs()   */
1446:       MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1447:     }

1449:     if (ctx->packtogether) {
1450:       /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
1451:       len  = 4*sstarts[nsends];
1452:       val  = svalues;
1453:       for (i=0; i<len; i += 4) {
1454:         idx    = *indices++;
1455:         val[0] = xv[idx];
1456:         val[1] = xv[idx+1];
1457:         val[2] = xv[idx+2];
1458:         val[3] = xv[idx+3];
1459:         val    += 4;
1460:       }
1461:       MPI_Startall_isend(len,nsends,swaits);
1462:     } else {
1463:       /* this version packs and sends one at a time, default */
1464:       val  = svalues;
1465:       for (i=0; i<nsends; i++) {
1466:         iend = sstarts[i+1]-sstarts[i];

1468:         for (j=0; j<iend; j++) {
1469:           idx     = *indices++;
1470:           val[0] = xv[idx];
1471:           val[1] = xv[idx+1];
1472:           val[2] = xv[idx+2];
1473:           val[3] = xv[idx+3];
1474:           val    += 4;
1475:         }
1476:         MPI_Start_isend(4*iend,swaits+i);
1477:       }
1478:     }

1480:     if (!gen_from->use_readyreceiver && gen_to->sendfirst) {
1481:       /* post receives since they were not posted in VecScatterPostRecvs()   */
1482:       MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1483:     }
1484:   }

1486:   /* take care of local scatters */
1487:   if (gen_to->local.n) {
1488:     PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1489:     PetscInt n       = gen_to->local.n,il,ir;
1490:     if (addv == INSERT_VALUES) {
1491:       if (gen_to->local.is_copy) {
1492:         PetscMemcpy(yv+gen_from->local.copy_start,xv+gen_to->local.copy_start,gen_to->local.copy_length);
1493:       } else {
1494:         for (i=0; i<n; i++) {
1495:           il = fslots[i]; ir = tslots[i];
1496:           yv[il]   = xv[ir];
1497:           yv[il+1] = xv[ir+1];
1498:           yv[il+2] = xv[ir+2];
1499:           yv[il+3] = xv[ir+3];
1500:         }
1501:       }
1502:     }  else if (addv == ADD_VALUES) {
1503:       for (i=0; i<n; i++) {
1504:         il = fslots[i]; ir = tslots[i];
1505:         yv[il]   += xv[ir];
1506:         yv[il+1] += xv[ir+1];
1507:         yv[il+2] += xv[ir+2];
1508:         yv[il+3] += xv[ir+3];
1509:       }
1510: #if !defined(PETSC_USE_COMPLEX)
1511:     }  else if (addv == MAX_VALUES) {
1512:       for (i=0; i<n; i++) {
1513:         il = fslots[i]; ir = tslots[i];
1514:         yv[il]   = PetscMax(yv[il],xv[ir]);
1515:         yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1516:         yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1517:         yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1518:       }
1519: #endif
1520:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1521:   }
1522:   VecRestoreArray(xin,&xv);
1523:   if (xin != yin) {VecRestoreArray(yin,&yv);}
1524:   return(0);
1525: }

1527: /* --------------------------------------------------------------------------------------*/

1531: PetscErrorCode VecScatterEnd_PtoP_4(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1532: {
1533:   VecScatter_MPI_General *gen_to,*gen_from;
1534:   PetscScalar            *rvalues,*yv,*val;
1535:   PetscErrorCode         ierr;
1536:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1537:   PetscMPIInt            imdex;
1538:   MPI_Request            *rwaits,*swaits;
1539:   MPI_Status             *rstatus,*sstatus;

1542:   if (mode & SCATTER_LOCAL) return(0);
1543:   VecGetArray(yin,&yv);

1545:   if (mode & SCATTER_REVERSE) {
1546:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1547:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1548:     rwaits   = gen_from->rev_requests;
1549:     swaits   = gen_to->rev_requests;
1550:     sstatus  = gen_from->sstatus;
1551:     rstatus  = gen_from->rstatus;
1552:   } else {
1553:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1554:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1555:     rwaits   = gen_from->requests;
1556:     swaits   = gen_to->requests;
1557:     sstatus  = gen_to->sstatus;
1558:     rstatus  = gen_to->rstatus;
1559:   }
1560:   rvalues  = gen_from->values;
1561:   nrecvs   = gen_from->n;
1562:   nsends   = gen_to->n;
1563:   indices  = gen_from->indices;
1564:   rstarts  = gen_from->starts;

1566:   /*  wait on receives */
1567:   count = nrecvs;
1568:   if (ctx->packtogether) { /* receive all messages, then unpack all, when -vecscatter_packtogether used */
1569:     MPI_Waitall(nrecvs,rwaits,rstatus);
1570:     n        = rstarts[count];
1571:     val      = rvalues;
1572:     lindices = indices;
1573:     if (addv == INSERT_VALUES) {
1574:       for (i=0; i<n; i++) {
1575:         idx       = lindices[i];
1576:         yv[idx]   = val[0];
1577:         yv[idx+1] = val[1];
1578:         yv[idx+2] = val[2];
1579:         yv[idx+3] = val[3];
1580:         val      += 4;
1581:       }
1582:     } else if (addv == ADD_VALUES) {
1583:       for (i=0; i<n; i++) {
1584:         idx       = lindices[i];
1585:         yv[idx]   += val[0];
1586:         yv[idx+1] += val[1];
1587:         yv[idx+2] += val[2];
1588:         yv[idx+3] += val[3];
1589:         val       += 4;
1590:       }
1591: #if !defined(PETSC_USE_COMPLEX)
1592:     } else if (addv == MAX_VALUES) {
1593:       for (i=0; i<n; i++) {
1594:         idx       = lindices[i];
1595:         yv[idx]   = PetscMax(yv[idx],val[0]);
1596:         yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1597:         yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1598:         yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1599:         val       += 4;
1600:       }
1601: #endif
1602:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1603:   } else { /* unpack each message as it arrives, default version */
1604:     while (count) {
1605:       MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus[0]);
1606:       /* unpack receives into our local space */
1607:       val      = rvalues + 4*rstarts[imdex];
1608:       lindices = indices + rstarts[imdex];
1609:       n        = rstarts[imdex+1] - rstarts[imdex];
1610:       if (addv == INSERT_VALUES) {
1611:         for (i=0; i<n; i++) {
1612:           idx       = lindices[i];
1613:           yv[idx]   = val[0];
1614:           yv[idx+1] = val[1];
1615:           yv[idx+2] = val[2];
1616:           yv[idx+3] = val[3];
1617:           val      += 4;
1618:         }
1619:       } else if (addv == ADD_VALUES) {
1620:         for (i=0; i<n; i++) {
1621:           idx       = lindices[i];
1622:           yv[idx]   += val[0];
1623:           yv[idx+1] += val[1];
1624:           yv[idx+2] += val[2];
1625:           yv[idx+3] += val[3];
1626:           val       += 4;
1627:         }
1628: #if !defined(PETSC_USE_COMPLEX)
1629:       } else if (addv == MAX_VALUES) {
1630:         for (i=0; i<n; i++) {
1631:           idx       = lindices[i];
1632:           yv[idx]   = PetscMax(yv[idx],val[0]);
1633:           yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1634:           yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1635:           yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1636:           val       += 4;
1637:         }
1638: #endif
1639:       }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1640:       count--;
1641:     }
1642:   }

1644:   /* wait on sends */
1645:   if (nsends) {
1646:     MPI_Waitall(nsends,swaits,sstatus);
1647:   }
1648:   VecRestoreArray(yin,&yv);
1649:   return(0);
1650: }

1652: /* --------------------------------------------------------------------------------------*/

1656: PetscErrorCode VecScatterBegin_PtoP_3(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1657: {
1658:   VecScatter_MPI_General *gen_to,*gen_from;
1659:   PetscScalar            *xv,*yv,*val,*svalues;
1660:   MPI_Request            *rwaits,*swaits;
1661:   PetscErrorCode         ierr;
1662:   PetscInt               i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;

1665:   VecGetArray(xin,&xv);
1666:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}

1668:   if (mode & SCATTER_REVERSE) {
1669:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1670:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1671:     rwaits   = gen_from->rev_requests;
1672:     swaits   = gen_to->rev_requests;
1673:   } else {
1674:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1675:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1676:     rwaits   = gen_from->requests;
1677:     swaits   = gen_to->requests;
1678:   }
1679:   svalues  = gen_to->values;
1680:   nrecvs   = gen_from->n;
1681:   nsends   = gen_to->n;
1682:   indices  = gen_to->indices;
1683:   sstarts  = gen_to->starts;

1685:   if (!(mode & SCATTER_LOCAL)) {

1687:     if (gen_to->sendfirst) {
1688:       /* this version packs and sends one at a time */
1689:       val  = svalues;
1690:       for (i=0; i<nsends; i++) {
1691:         iend = sstarts[i+1]-sstarts[i];

1693:         for (j=0; j<iend; j++) {
1694:           idx     = *indices++;
1695:           val[0] = xv[idx];
1696:           val[1] = xv[idx+1];
1697:           val[2] = xv[idx+2];
1698:           val    += 3;
1699:         }
1700:         MPI_Start_isend(3*iend,swaits+i);
1701:       }
1702:     }

1704:     if (!gen_from->use_readyreceiver) {
1705:       /* post receives since they were not posted in VecScatterPostRecvs()   */
1706:       MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1707:     }

1709:     if (!gen_to->sendfirst) {
1710:       /* this version packs all the messages together and sends */
1711:       /*
1712:       len  = 3*sstarts[nsends];
1713:       val  = svalues;
1714:       for (i=0; i<len; i += 3) {
1715:         idx     = *indices++;
1716:         val[0] = xv[idx];
1717:         val[1] = xv[idx+1];
1718:         val[2] = xv[idx+2];
1719:         val      += 3;
1720:       }
1721:       MPI_Startall_isend(len,nsends,swaits);
1722:       */

1724:       /* this version packs and sends one at a time */
1725:       val  = svalues;
1726:       for (i=0; i<nsends; i++) {
1727:         iend = sstarts[i+1]-sstarts[i];

1729:         for (j=0; j<iend; j++) {
1730:           idx     = *indices++;
1731:           val[0] = xv[idx];
1732:           val[1] = xv[idx+1];
1733:           val[2] = xv[idx+2];
1734:           val    += 3;
1735:         }
1736:         MPI_Start_isend(3*iend,swaits+i);
1737:       }
1738:     }
1739:   }

1741:   /* take care of local scatters */
1742:   if (gen_to->local.n) {
1743:     PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1744:     PetscInt n       = gen_to->local.n,il,ir;
1745:     if (addv == INSERT_VALUES) {
1746:       if (gen_to->local.is_copy) {
1747:         PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1748:       } else {
1749:         for (i=0; i<n; i++) {
1750:           il = fslots[i]; ir = tslots[i];
1751:           yv[il]   = xv[ir];
1752:           yv[il+1] = xv[ir+1];
1753:           yv[il+2] = xv[ir+2];
1754:         }
1755:       }
1756:     }  else if (addv == ADD_VALUES) {
1757:       for (i=0; i<n; i++) {
1758:         il = fslots[i]; ir = tslots[i];
1759:         yv[il]   += xv[ir];
1760:         yv[il+1] += xv[ir+1];
1761:         yv[il+2] += xv[ir+2];
1762:       }
1763: #if !defined(PETSC_USE_COMPLEX)
1764:     }  else if (addv == MAX_VALUES) {
1765:       for (i=0; i<n; i++) {
1766:         il = fslots[i]; ir = tslots[i];
1767:         yv[il]   = PetscMax(yv[il],xv[ir]);
1768:         yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1769:         yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1770:       }
1771: #endif
1772:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1773:   }
1774:   VecRestoreArray(xin,&xv);
1775:   if (xin != yin) {VecRestoreArray(yin,&yv);}
1776:   return(0);
1777: }

1779: /* --------------------------------------------------------------------------------------*/

1783: PetscErrorCode VecScatterEnd_PtoP_3(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1784: {
1785:   VecScatter_MPI_General *gen_to,*gen_from;
1786:   PetscScalar            *rvalues,*yv,*val;
1787:   PetscErrorCode         ierr;
1788:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1789:   PetscMPIInt            imdex;
1790:   MPI_Request            *rwaits,*swaits;
1791:   MPI_Status             rstatus,*sstatus;

1794:   if (mode & SCATTER_LOCAL) return(0);
1795:   VecGetArray(yin,&yv);

1797:   if (mode & SCATTER_REVERSE) {
1798:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1799:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1800:     rwaits   = gen_from->rev_requests;
1801:     swaits   = gen_to->rev_requests;
1802:     sstatus  = gen_from->sstatus;
1803:   } else {
1804:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1805:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1806:     rwaits   = gen_from->requests;
1807:     swaits   = gen_to->requests;
1808:     sstatus  = gen_to->sstatus;
1809:   }
1810:   rvalues  = gen_from->values;
1811:   nrecvs   = gen_from->n;
1812:   nsends   = gen_to->n;
1813:   indices  = gen_from->indices;
1814:   rstarts  = gen_from->starts;

1816:   /*  wait on receives */
1817:   count = nrecvs;
1818:   while (count) {
1819:     MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1820:     /* unpack receives into our local space */
1821:     val      = rvalues + 3*rstarts[imdex];
1822:     lindices = indices + rstarts[imdex];
1823:     n        = rstarts[imdex+1] - rstarts[imdex];
1824:     if (addv == INSERT_VALUES) {
1825:       for (i=0; i<n; i++) {
1826:         idx       = lindices[i];
1827:         yv[idx]   = val[0];
1828:         yv[idx+1] = val[1];
1829:         yv[idx+2] = val[2];
1830:         val      += 3;
1831:       }
1832:     } else if (addv == ADD_VALUES) {
1833:       for (i=0; i<n; i++) {
1834:         idx       = lindices[i];
1835:         yv[idx]   += val[0];
1836:         yv[idx+1] += val[1];
1837:         yv[idx+2] += val[2];
1838:         val       += 3;
1839:       }
1840: #if !defined(PETSC_USE_COMPLEX)
1841:     } else if (addv == MAX_VALUES) {
1842:       for (i=0; i<n; i++) {
1843:         idx       = lindices[i];
1844:         yv[idx]   = PetscMax(yv[idx],val[0]);
1845:         yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1846:         yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1847:         val       += 3;
1848:       }
1849: #endif
1850:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1851:     count--;
1852:   }
1853:   /* wait on sends */
1854:   if (nsends) {
1855:     MPI_Waitall(nsends,swaits,sstatus);
1856:   }
1857:   VecRestoreArray(yin,&yv);
1858:   return(0);
1859: }

1861: /* --------------------------------------------------------------------------------------*/

1865: PetscErrorCode VecScatterBegin_PtoP_2(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1866: {
1867:   VecScatter_MPI_General *gen_to,*gen_from;
1868:   PetscScalar            *xv,*yv,*val,*svalues;
1869:   MPI_Request            *rwaits,*swaits;
1870:   PetscErrorCode         ierr;
1871:   PetscInt               i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;

1874:   VecGetArray(xin,&xv);
1875:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1876:   if (mode & SCATTER_REVERSE) {
1877:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
1878:     gen_from = (VecScatter_MPI_General*)ctx->todata;
1879:     rwaits   = gen_from->rev_requests;
1880:     swaits   = gen_to->rev_requests;
1881:   } else {
1882:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
1883:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1884:     rwaits   = gen_from->requests;
1885:     swaits   = gen_to->requests;
1886:   }
1887:   svalues  = gen_to->values;
1888:   nrecvs   = gen_from->n;
1889:   nsends   = gen_to->n;
1890:   indices  = gen_to->indices;
1891:   sstarts  = gen_to->starts;

1893:   if (!(mode & SCATTER_LOCAL)) {

1895:     if (gen_to->sendfirst) {
1896:       /* this version packs and sends one at a time */
1897:       val  = svalues;
1898:       for (i=0; i<nsends; i++) {
1899:         iend = sstarts[i+1]-sstarts[i];

1901:         for (j=0; j<iend; j++) {
1902:           idx     = *indices++;
1903:           val[0] = xv[idx];
1904:           val[1] = xv[idx+1];
1905:           val    += 2;
1906:         }
1907:         MPI_Start_isend(2*iend,swaits+i);
1908:       }
1909:     }

1911:     if (!gen_from->use_readyreceiver) {
1912:       /* post receives since they were not posted in VecScatterPostRecvs()   */
1913:       MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1914:     }

1916:     if (!gen_to->sendfirst) {
1917:       /* this version packs all the messages together and sends */
1918:       /*
1919:       len  = 2*sstarts[nsends];
1920:       val  = svalues;
1921:       for (i=0; i<len; i += 2) {
1922:         idx     = *indices++;
1923:         val[0] = xv[idx];
1924:         val[1] = xv[idx+1];
1925:         val      += 2;
1926:       }
1927:       MPI_Startall_isend(len,nsends,swaits);
1928:       */

1930:       /* this version packs and sends one at a time */
1931:       val  = svalues;
1932:       for (i=0; i<nsends; i++) {
1933:         iend = sstarts[i+1]-sstarts[i];

1935:         for (j=0; j<iend; j++) {
1936:           idx     = *indices++;
1937:           val[0] = xv[idx];
1938:           val[1] = xv[idx+1];
1939:           val    += 2;
1940:         }
1941:         MPI_Start_isend(2*iend,swaits+i);
1942:       }
1943:     }
1944:   }

1946:   /* take care of local scatters */
1947:   if (gen_to->local.n) {
1948:     PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1949:     PetscInt n       = gen_to->local.n,il,ir;
1950:     if (addv == INSERT_VALUES) {
1951:       if (gen_to->local.is_copy) {
1952:         PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1953:       } else {
1954:         for (i=0; i<n; i++) {
1955:           il = fslots[i]; ir = tslots[i];
1956:           yv[il]   = xv[ir];
1957:           yv[il+1] = xv[ir+1];
1958:         }
1959:       }
1960:     }  else if (addv == ADD_VALUES) {
1961:       for (i=0; i<n; i++) {
1962:         il = fslots[i]; ir = tslots[i];
1963:         yv[il]   += xv[ir];
1964:         yv[il+1] += xv[ir+1];
1965:       }
1966: #if !defined(PETSC_USE_COMPLEX)
1967:     }  else if (addv == MAX_VALUES) {
1968:       for (i=0; i<n; i++) {
1969:         il = fslots[i]; ir = tslots[i];
1970:         yv[il]   = PetscMax(yv[il],xv[ir]);
1971:         yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1972:       }
1973: #endif
1974:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1975:   }
1976:   VecRestoreArray(xin,&xv);
1977:   if (xin != yin) {VecRestoreArray(yin,&yv);}
1978:   return(0);
1979: }

1981: /* --------------------------------------------------------------------------------------*/

1985: PetscErrorCode VecScatterEnd_PtoP_2(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1986: {
1987:   VecScatter_MPI_General *gen_to,*gen_from;
1988:   PetscScalar            *rvalues,*yv,*val;
1989:   PetscErrorCode         ierr;
1990:   PetscInt               nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1991:   PetscMPIInt            imdex;
1992:   MPI_Request            *rwaits,*swaits;
1993:   MPI_Status             rstatus,*sstatus;

1996:   if (mode & SCATTER_LOCAL) return(0);
1997:   VecGetArray(yin,&yv);

1999:   if (mode & SCATTER_REVERSE) {
2000:     gen_to   = (VecScatter_MPI_General*)ctx->fromdata;
2001:     gen_from = (VecScatter_MPI_General*)ctx->todata;
2002:     rwaits   = gen_from->rev_requests;
2003:     swaits   = gen_to->rev_requests;
2004:     sstatus  = gen_from->sstatus;
2005:   } else {
2006:     gen_to   = (VecScatter_MPI_General*)ctx->todata;
2007:     gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2008:     rwaits   = gen_from->requests;
2009:     swaits   = gen_to->requests;
2010:     sstatus  = gen_to->sstatus;
2011:   }
2012:   rvalues  = gen_from->values;
2013:   nrecvs   = gen_from->n;
2014:   nsends   = gen_to->n;
2015:   indices  = gen_from->indices;
2016:   rstarts  = gen_from->starts;

2018:   /*  wait on receives */
2019:   count = nrecvs;
2020:   while (count) {
2021:     MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
2022:     /* unpack receives into our local space */
2023:     val      = rvalues + 2*rstarts[imdex];
2024:     lindices = indices + rstarts[imdex];
2025:     n        = rstarts[imdex+1] - rstarts[imdex];
2026:     if (addv == INSERT_VALUES) {
2027:       for (i=0; i<n; i++) {
2028:         idx       = lindices[i];
2029:         yv[idx]   = val[0];
2030:         yv[idx+1] = val[1];
2031:         val      += 2;
2032:       }
2033:     } else if (addv == ADD_VALUES) {
2034:       for (i=0; i<n; i++) {
2035:         idx       = lindices[i];
2036:         yv[idx]   += val[0];
2037:         yv[idx+1] += val[1];
2038:         val       += 2;
2039:       }
2040: #if !defined(PETSC_USE_COMPLEX)
2041:     } else if (addv == MAX_VALUES) {
2042:       for (i=0; i<n; i++) {
2043:         idx       = lindices[i];
2044:         yv[idx]   = PetscMax(yv[idx],val[0]);
2045:         yv[idx+1] = PetscMax(yv[idx+1],val[1]);
2046:         val       += 2;
2047:       }
2048: #endif
2049:     }  else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2050:     count--;
2051:   }
2052:   /* wait on sends */
2053:   if (nsends) {
2054:     MPI_Waitall(nsends,swaits,sstatus);
2055:   }
2056:   VecRestoreArray(yin,&yv);
2057:   return(0);
2058: }

2060: /* ---------------------------------------------------------------------------------*/

2064: PetscErrorCode VecScatterDestroy_PtoP_X(VecScatter ctx)
2065: {
2066:   VecScatter_MPI_General *gen_to   = (VecScatter_MPI_General*)ctx->todata;
2067:   VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2068:   PetscErrorCode         ierr;
2069:   PetscInt               i;

2072:   if (gen_to->use_readyreceiver) {
2073:     /*
2074:        Since we have already posted sends we must cancel them before freeing 
2075:        the requests
2076:     */
2077:     for (i=0; i<gen_from->n; i++) {
2078:       MPI_Cancel(gen_from->requests+i);
2079:     }
2080:   }

2082:   if (gen_to->local.slots)              {PetscFree2(gen_to->local.slots,gen_from->local.slots);}
2083:   if (gen_to->local.slots_nonmatching)  {PetscFree2(gen_to->local.slots_nonmatching,gen_from->local.slots_nonmatching);}

2085:   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
2086:   /* 
2087:      IBM's PE version of MPI has a bug where freeing these guys will screw up later
2088:      message passing.
2089:   */
2090: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
2091:   for (i=0; i<gen_to->n; i++) {
2092:     MPI_Request_free(gen_to->requests + i);
2093:     MPI_Request_free(gen_to->rev_requests + i);
2094:   }

2096:   /*
2097:       MPICH could not properly cancel requests thus with ready receiver mode we
2098:     cannot free the requests. It may be fixed now, if not then put the following 
2099:     code inside a if !gen_to->use_readyreceiver) {
2100:   */
2101:   for (i=0; i<gen_from->n; i++) {
2102:     MPI_Request_free(gen_from->requests + i);
2103:     MPI_Request_free(gen_from->rev_requests + i);
2104:   }
2105: #endif
2106: 
2107:   PetscFree(gen_to->sstatus);
2108:   PetscFree(gen_to->values);
2109:   PetscFree2(gen_to->rev_requests,gen_from->rev_requests);
2110:   PetscFree(gen_from->values);
2111:   PetscFree(gen_to);
2112:   PetscFree(gen_from);
2113:   PetscHeaderDestroy(ctx);
2114:   return(0);
2115: }

2117: /* ==========================================================================================*/

2119: /*              create parallel to sequential scatter context                           */
2120: /*
2121:    bs indicates how many elements there are in each block. Normally
2122:    this would be 1.
2123: */
2126: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,PetscInt *inidx,PetscInt ny,PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2127: {
2128:   VecScatter_MPI_General *from,*to;
2129:   PetscErrorCode         ierr;
2130:   PetscMPIInt            size,rank,imdex,tag,n;
2131:   PetscInt               *source,*lens,*owners;
2132:   PetscInt               *lowner,*start,lengthy;
2133:   PetscInt               *nprocs,i,j,idx,nsends,nrecvs;
2134:   PetscInt               *owner,*starts,count,slen;
2135:   PetscInt               *rvalues,*svalues,base,nmax,*values,len,*indx,nprocslocal;
2136:   MPI_Comm               comm;
2137:   MPI_Request            *send_waits,*recv_waits;
2138:   MPI_Status             recv_status,*send_status;
2139:   PetscMap               map;
2140:   PetscTruth             found;
2141: 
2143:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2144:   PetscObjectGetComm((PetscObject)xin,&comm);
2145:   MPI_Comm_rank(comm,&rank);
2146:   MPI_Comm_size(comm,&size);
2147:   VecGetPetscMap(xin,&map);
2148:   PetscMapGetGlobalRange(map,&owners);
2149:   VecGetSize(yin,&lengthy);

2151:   /*  first count number of contributors to each processor */
2152:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
2153:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
2154:   PetscMalloc(nx*sizeof(PetscInt),&owner);
2155:   for (i=0; i<nx; i++) {
2156:     idx = inidx[i];
2157:     found = PETSC_FALSE;
2158:     for (j=0; j<size; j++) {
2159:       if (idx >= owners[j] && idx < owners[j+1]) {
2160:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
2161:       }
2162:     }
2163:     if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2164:   }
2165:   nprocslocal    = nprocs[2*rank];
2166:   nprocs[2*rank] = nprocs[2*rank+1] = 0;
2167:   nsends         = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

2169:   /* inform other processors of number of messages and max length*/
2170:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

2172:   /* post receives:   */
2173:   PetscMalloc(nrecvs*(nmax+1)*sizeof(PetscInt),&rvalues);
2174:   PetscMalloc(2*nrecvs*sizeof(PetscInt),&lens);
2175:   source = lens + nrecvs;
2176:   PetscMalloc(nrecvs*sizeof(MPI_Request),&recv_waits);
2177:   for (i=0; i<nrecvs; i++) {
2178:     MPI_Irecv((rvalues+nmax*i),nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2179:   }

2181:   /* do sends:
2182:       1) starts[i] gives the starting index in svalues for stuff going to 
2183:          the ith processor
2184:   */
2185:   PetscMalloc(nx*sizeof(PetscInt),&svalues);
2186:   PetscMalloc(nsends*sizeof(MPI_Request),&send_waits);
2187:   PetscMalloc((size+1)*sizeof(PetscInt),&starts);
2188:   starts[0]  = 0;
2189:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2190:   for (i=0; i<nx; i++) {
2191:     if (owner[i] != rank) {
2192:       svalues[starts[owner[i]]++] = inidx[i];
2193:     }
2194:   }

2196:   starts[0] = 0;
2197:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2198:   count = 0;
2199:   for (i=0; i<size; i++) {
2200:     if (nprocs[2*i+1]) {
2201:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
2202:     }
2203:   }
2204:   PetscFree(starts);

2206:   /*  wait on receives */
2207:   count  = nrecvs;
2208:   slen   = 0;
2209:   while (count) {
2210:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2211:     /* unpack receives into our local space */
2212:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2213:     source[imdex]  = recv_status.MPI_SOURCE;
2214:     lens[imdex]    = n;
2215:     slen          += n;
2216:     count--;
2217:   }
2218: 
2219:   /* allocate entire send scatter context */
2220:   PetscNew(VecScatter_MPI_General,&to);
2221:   PetscMemzero(to,sizeof(VecScatter_MPI_General));
2222:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
2223:   len = slen*sizeof(PetscInt) + bs*slen*sizeof(PetscScalar) + (nrecvs+1)*sizeof(PetscInt) +
2224:         nrecvs*(sizeof(PetscInt) + sizeof(MPI_Request));
2225:   to->n         = nrecvs;
2226:   PetscMalloc(len,&to->values);
2227:   to->requests  = (MPI_Request*)(to->values + bs*slen);
2228:   to->indices   = (PetscInt*)(to->requests + nrecvs);
2229:   to->starts    = (PetscInt*)(to->indices + slen);
2230:   to->procs     = (PetscMPIInt*)(to->starts + nrecvs + 1);
2231:   PetscMalloc(2*(PetscMax(nrecvs,nsends)+1)*sizeof(MPI_Status),&to->sstatus);
2232:   to->rstatus   = to->sstatus + PetscMax(nrecvs,nsends) + 1;
2233:   ctx->todata   = (void*)to;
2234:   to->starts[0] = 0;

2236:   if (nrecvs) {
2237:     PetscMalloc(nrecvs*sizeof(PetscInt),&indx);
2238:     for (i=0; i<nrecvs; i++) indx[i] = i;
2239:     PetscSortIntWithPermutation(nrecvs,source,indx);

2241:     /* move the data into the send scatter */
2242:     base = owners[rank];
2243:     for (i=0; i<nrecvs; i++) {
2244:       to->starts[i+1] = to->starts[i] + lens[indx[i]];
2245:       to->procs[i]    = source[indx[i]];
2246:       values = rvalues + indx[i]*nmax;
2247:       for (j=0; j<lens[indx[i]]; j++) {
2248:         to->indices[to->starts[i] + j] = values[j] - base;
2249:       }
2250:     }
2251:     PetscFree(indx);
2252:   }
2253:   PetscFree(rvalues);
2254:   PetscFree(lens);
2255:   PetscFree(recv_waits);
2256: 
2257:   /* allocate entire receive scatter context */
2258:   PetscNew(VecScatter_MPI_General,&from);
2259:   PetscMemzero(from,sizeof(VecScatter_MPI_General));
2260:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&from->sendfirst);
2261:   len = ny*sizeof(PetscInt) + ny*bs*sizeof(PetscScalar) + (nsends+1)*sizeof(PetscInt) +
2262:         nsends*(sizeof(PetscInt) + sizeof(MPI_Request));
2263:   from->n        = nsends;
2264:   PetscMalloc(len,&from->values);
2265:   from->requests = (MPI_Request*)(from->values + bs*ny);
2266:   from->indices  = (PetscInt*)(from->requests + nsends);
2267:   from->starts   = (PetscInt*)(from->indices + ny);
2268:   from->procs    = (PetscMPIInt*)(from->starts + nsends + 1);
2269:   ctx->fromdata  = (void*)from;

2271:   /* move data into receive scatter */
2272:   PetscMalloc((size+nsends+1)*sizeof(PetscInt),&lowner);
2273:   start = lowner + size;
2274:   count = 0; from->starts[0] = start[0] = 0;
2275:   for (i=0; i<size; i++) {
2276:     if (nprocs[2*i+1]) {
2277:       lowner[i]            = count;
2278:       from->procs[count++] = i;
2279:       from->starts[count]  = start[count] = start[count-1] + nprocs[2*i];
2280:     }
2281:   }
2282:   for (i=0; i<nx; i++) {
2283:     if (owner[i] != rank) {
2284:       from->indices[start[lowner[owner[i]]]++] = inidy[i];
2285:       if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2286:     }
2287:   }
2288:   PetscFree(lowner);
2289:   PetscFree(owner);
2290:   PetscFree(nprocs);
2291: 
2292:   /* wait on sends */
2293:   if (nsends) {
2294:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
2295:     MPI_Waitall(nsends,send_waits,send_status);
2296:     PetscFree(send_status);
2297:   }
2298:   PetscFree(send_waits);
2299:   PetscFree(svalues);

2301:   if (nprocslocal) {
2302:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
2303:     /* we have a scatter to ourselves */
2304:     PetscMalloc2(nt,PetscInt,&to->local.slots,nt,PetscInt,&from->local.slots);
2305:     nt   = 0;
2306:     for (i=0; i<nx; i++) {
2307:       idx = inidx[i];
2308:       if (idx >= owners[rank] && idx < owners[rank+1]) {
2309:         to->local.slots[nt]     = idx - owners[rank];
2310:         from->local.slots[nt++] = inidy[i];
2311:         if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2312:       }
2313:     }
2314:   } else {
2315:     from->local.n     = 0;
2316:     from->local.slots = 0;
2317:     to->local.n       = 0;
2318:     to->local.slots   = 0;
2319:   }
2320:   from->local.nonmatching_computed = PETSC_FALSE;
2321:   from->local.n_nonmatching        = 0;
2322:   from->local.slots_nonmatching    = 0;
2323:   to->local.nonmatching_computed   = PETSC_FALSE;
2324:   to->local.n_nonmatching          = 0;
2325:   to->local.slots_nonmatching      = 0;

2327:   to->type   = VEC_SCATTER_MPI_GENERAL;
2328:   from->type = VEC_SCATTER_MPI_GENERAL;

2330:   from->bs = bs;
2331:   to->bs   = bs;
2332:   if (bs > 1) {
2333:     PetscTruth  flg,flgs = PETSC_FALSE;
2334:     PetscInt    *sstarts = to->starts,  *rstarts = from->starts;
2335:     PetscMPIInt *sprocs  = to->procs,   *rprocs  = from->procs;
2336:     MPI_Request *swaits  = to->requests,*rwaits  = from->requests;
2337:     MPI_Request *rev_swaits,*rev_rwaits;
2338:     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;

2340:     ctx->destroy   = VecScatterDestroy_PtoP_X;
2341:     ctx->copy      = VecScatterCopy_PtoP_X;
2342:     ctx->view      = VecScatterView_MPI;
2343: 
2344:     tag      = ctx->tag;
2345:     comm     = ctx->comm;

2347:     /* allocate additional wait variables for the "reverse" scatter */
2348:     PetscMalloc2(nrecvs,MPI_Request,&rev_rwaits,nsends,MPI_Request,&rev_swaits);
2349:     to->rev_requests   = rev_rwaits;
2350:     from->rev_requests = rev_swaits;

2352:     /* Register the receives that you will use later (sends for scatter reverse) */
2353:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flgs);
2354:     if (flgs) {
2355:       PetscLogInfo(0,"VecScatterCreate_PtoS:Using VecScatter Ssend mode\n");
2356:     }
2357:     for (i=0; i<from->n; i++) {
2358:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2359:       if (!flgs) {
2360:         MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
2361:       } else {
2362:         MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
2363:       }
2364:     }

2366:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_rr",&flg);
2367:     if (flg) {
2368:       ctx->postrecvs           = VecScatterPostRecvs_PtoP_X;
2369:       to->use_readyreceiver    = PETSC_TRUE;
2370:       from->use_readyreceiver  = PETSC_TRUE;
2371:       for (i=0; i<to->n; i++) {
2372:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2373:       }
2374:       PetscLogInfo(0,"VecScatterCreate_PtoS:Using VecScatter ready receiver mode\n");
2375:     } else {
2376:       ctx->postrecvs           = 0;
2377:       to->use_readyreceiver    = PETSC_FALSE;
2378:       from->use_readyreceiver  = PETSC_FALSE;
2379:       for (i=0; i<to->n; i++) {
2380:         if (!flgs) {
2381:           MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2382:         } else {
2383:           MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2384:         }
2385:       }
2386:     }
2387:     /* Register receives for scatter reverse */
2388:     for (i=0; i<to->n; i++) {
2389:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
2390:     }

2392:     PetscLogInfo(0,"VecScatterCreate_PtoS:Using blocksize %D scatter\n",bs);
2393:     switch (bs) {
2394:     case 12:
2395:       ctx->begin     = VecScatterBegin_PtoP_12;
2396:       ctx->end       = VecScatterEnd_PtoP_12;
2397:       break;
2398:     case 6:
2399:       ctx->begin     = VecScatterBegin_PtoP_6;
2400:       ctx->end       = VecScatterEnd_PtoP_6;
2401:       break;
2402:     case 5:
2403:       ctx->begin     = VecScatterBegin_PtoP_5;
2404:       ctx->end       = VecScatterEnd_PtoP_5;
2405:       break;
2406:     case 4:
2407:       ctx->begin     = VecScatterBegin_PtoP_4;
2408:       ctx->end       = VecScatterEnd_PtoP_4;
2409:       break;
2410:     case 3:
2411:       ctx->begin     = VecScatterBegin_PtoP_3;
2412:       ctx->end       = VecScatterEnd_PtoP_3;
2413:       break;
2414:     case 2:
2415:       ctx->begin     = VecScatterBegin_PtoP_2;
2416:       ctx->end       = VecScatterEnd_PtoP_2;
2417:       break;
2418:     default:
2419:       SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
2420:     }
2421:   } else {
2422:     ctx->postrecvs = 0;
2423:     ctx->destroy   = VecScatterDestroy_PtoP;
2424:     ctx->begin     = VecScatterBegin_PtoP;
2425:     ctx->end       = VecScatterEnd_PtoP;
2426:     ctx->copy      = VecScatterCopy_PtoP;
2427:     ctx->view      = VecScatterView_MPI;
2428:   }

2430:   /* Check if the local scatter is actually a copy; important special case */
2431:   if (nprocslocal) {
2432:     VecScatterLocalOptimizeCopy_Private(&to->local,&from->local,bs);
2433:   }

2435:   return(0);
2436: }

2438: /* ------------------------------------------------------------------------------------*/
2439: /*
2440:          Scatter from local Seq vectors to a parallel vector. 
2441: */
2444: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,PetscInt *inidx,PetscInt ny,PetscInt *inidy,Vec yin,VecScatter ctx)
2445: {
2446:   VecScatter_MPI_General *from,*to;
2447:   PetscInt               *source,nprocslocal,*lens,*owners = yin->map->range;
2448:   PetscMPIInt            rank = yin->stash.rank,size = yin->stash.size,tag,imdex,n;
2449:   PetscErrorCode         ierr;
2450:   PetscInt               *lowner,*start;
2451:   PetscInt               *nprocs,i,j,idx,nsends,nrecvs;
2452:   PetscInt               *owner,*starts,count,slen;
2453:   PetscInt               *rvalues,*svalues,base,nmax,*values,len;
2454:   PetscTruth             found;
2455:   MPI_Comm               comm = yin->comm;
2456:   MPI_Request            *send_waits,*recv_waits;
2457:   MPI_Status             recv_status,*send_status;

2460:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2461:   PetscMalloc5(2*size,PetscInt,&nprocs,nx,PetscInt,&owner,size,PetscInt,&lowner,size,PetscInt,&start,size+1,PetscInt,&starts);

2463:   /*  count number of contributors to each processor */
2464:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
2465:   for (i=0; i<nx; i++) {
2466:     idx   = inidy[i];
2467:     found = PETSC_FALSE;
2468:     for (j=0; j<size; j++) {
2469:       if (idx >= owners[j] && idx < owners[j+1]) {
2470:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
2471:       }
2472:     }
2473:     if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2474:   }
2475:   nprocslocal    = nprocs[2*rank];
2476:   nprocs[2*rank] = nprocs[2*rank+1] = 0;
2477:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

2479:   /* inform other processors of number of messages and max length*/
2480:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

2482:   /* post receives:   */
2483:   PetscMalloc6(nrecvs*nmax,PetscInt,&rvalues,nrecvs,MPI_Request,&recv_waits,nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);

2485:   for (i=0; i<nrecvs; i++) {
2486:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2487:   }

2489:   /* do sends:
2490:       1) starts[i] gives the starting index in svalues for stuff going to 
2491:          the ith processor
2492:   */

2494:   starts[0]  = 0;
2495:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2496:   for (i=0; i<nx; i++) {
2497:     if (owner[i] != rank) {
2498:       svalues[starts[owner[i]]++] = inidy[i];
2499:     }
2500:   }

2502:   /* reset starts because it is destroyed above */
2503:   starts[0]  = 0;
2504:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2505:   count = 0;
2506:   for (i=0; i<size; i++) {
2507:     if (nprocs[2*i+1]) {
2508:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count);
2509:       count++;
2510:     }
2511:   }

2513:   /* allocate entire send scatter context */
2514:   PetscNew(VecScatter_MPI_General,&to);
2515:   PetscMemzero(to,sizeof(VecScatter_MPI_General));
2516:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
2517:   len  = ny*(sizeof(PetscInt) + sizeof(PetscScalar)) + (nsends+1)*sizeof(PetscInt) +
2518:         nsends*(sizeof(PetscInt) + sizeof(MPI_Request));
2519:   to->n        = nsends;
2520:   PetscMalloc(len,&to->values);
2521:   to->requests = (MPI_Request*)(to->values + ny);
2522:   to->indices  = (PetscInt*)(to->requests + nsends);
2523:   to->starts   = (PetscInt*)(to->indices + ny);
2524:   to->procs    = (PetscMPIInt*)(to->starts + nsends + 1);
2525:   PetscMalloc((PetscMax(nsends,nrecvs) + 1)*sizeof(MPI_Status),&to->sstatus);
2526:   to->rstatus  = to->sstatus + PetscMax(nsends,nrecvs) + 1;
2527:   ctx->todata  = (void*)to;

2529:   /* move data into send scatter context */
2530:   count         = 0;
2531:   to->starts[0] = start[0] = 0;
2532:   for (i=0; i<size; i++) {
2533:     if (nprocs[2*i+1]) {
2534:       lowner[i]          = count;
2535:       to->procs[count++] = i;
2536:       to->starts[count]  = start[count] = start[count-1] + nprocs[2*i];
2537:     }
2538:   }
2539:   for (i=0; i<nx; i++) {
2540:     if (owner[i] != rank) {
2541:       to->indices[start[lowner[owner[i]]]++] = inidx[i];
2542:     }
2543:   }
2544:   PetscFree5(nprocs,owner,lowner,start,starts);

2546:   /*  wait on receives */
2547:   count = nrecvs;
2548:   slen  = 0;
2549:   while (count) {
2550:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2551:     /* unpack receives into our local space */
2552:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2553:     source[imdex]  = recv_status.MPI_SOURCE;
2554:     lens[imdex]    = n;
2555:     slen          += n;
2556:     count--;
2557:   }
2558: 
2559:   /* allocate entire receive scatter context */
2560:   PetscNew(VecScatter_MPI_General,&from);
2561:   PetscMemzero(from,sizeof(VecScatter_MPI_General));
2562:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&from->sendfirst);
2563:   len  = slen*(sizeof(PetscInt) + sizeof(PetscScalar)) + (nrecvs+1)*sizeof(PetscInt) +
2564:           nrecvs*(sizeof(PetscInt) + sizeof(MPI_Request));
2565:   from->n        = nrecvs;
2566:   PetscMalloc(len,&from->values);
2567:   from->requests = (MPI_Request*)(from->values + slen);
2568:   from->indices  = (PetscInt*)(from->requests + nrecvs);
2569:   from->starts   = (PetscInt*)(from->indices + slen);
2570:   from->procs    = (PetscMPIInt*)(from->starts + nrecvs + 1);
2571:   ctx->fromdata  = (void*)from;

2573:   /* move the data into the receive scatter context*/
2574:   base            = owners[rank];
2575:   from->starts[0] = 0;
2576:   for (i=0; i<nrecvs; i++) {
2577:     from->starts[i+1] = from->starts[i] + lens[i];
2578:     from->procs[i]    = source[i];
2579:     values            = rvalues + i*nmax;
2580:     for (j=0; j<lens[i]; j++) {
2581:       from->indices[from->starts[i] + j] = values[j] - base;
2582:     }
2583:   }
2584: 
2585:   /* wait on sends */
2586:   if (nsends) {
2587:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
2588:     MPI_Waitall(nsends,send_waits,send_status);
2589:     PetscFree(send_status);
2590:   }
2591:   PetscFree6(rvalues,recv_waits,svalues,send_waits,lens,source);

2593:   if (nprocslocal) {
2594:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
2595:     /* we have a scatter to ourselves */
2596:     PetscMalloc2(nt,PetscInt,&to->local.slots,nt,PetscInt,&from->local.slots);
2597:     nt   = 0;
2598:     for (i=0; i<ny; i++) {
2599:       idx = inidy[i];
2600:       if (idx >= owners[rank] && idx < owners[rank+1]) {
2601:         from->local.slots[nt] = idx - owners[rank];
2602:         to->local.slots[nt++] = inidx[i];
2603:       }
2604:     }
2605:   } else {
2606:     from->local.n     = 0;
2607:     from->local.slots = 0;
2608:     to->local.n       = 0;
2609:     to->local.slots   = 0;

2611:   }
2612:   from->local.nonmatching_computed = PETSC_FALSE;
2613:   from->local.n_nonmatching        = 0;
2614:   from->local.slots_nonmatching    = 0;
2615:   to->local.nonmatching_computed   = PETSC_FALSE;
2616:   to->local.n_nonmatching          = 0;
2617:   to->local.slots_nonmatching      = 0;

2619:   to->type   = VEC_SCATTER_MPI_GENERAL;
2620:   from->type = VEC_SCATTER_MPI_GENERAL;

2622:   ctx->destroy    = VecScatterDestroy_PtoP;
2623:   ctx->postrecvs  = 0;
2624:   ctx->begin      = VecScatterBegin_PtoP;
2625:   ctx->end        = VecScatterEnd_PtoP;
2626:   ctx->copy       = 0;
2627:   ctx->view       = VecScatterView_MPI;

2629:   to->bs   = 1;
2630:   from->bs = 1;
2631:   return(0);
2632: }

2634: /* ---------------------------------------------------------------------------------*/
2637: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,PetscInt *inidx,PetscInt ny,PetscInt *inidy,Vec xin,Vec yin,VecScatter ctx)
2638: {
2640:   PetscMPIInt    size,rank,tag,imdex,n;
2641:   PetscInt       *lens,*owners = xin->map->range;
2642:   PetscInt       *nprocs,i,j,idx,nsends,nrecvs,*local_inidx,*local_inidy;
2643:   PetscInt       *owner,*starts,count,slen;
2644:   PetscInt       *rvalues,*svalues,base,nmax,*values;
2645:   MPI_Comm       comm;
2646:   MPI_Request    *send_waits,*recv_waits;
2647:   MPI_Status     recv_status,*send_status;
2648:   PetscTruth     duplicate = PETSC_FALSE,found;

2651:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2652:   PetscObjectGetComm((PetscObject)xin,&comm);
2653:   MPI_Comm_size(comm,&size);
2654:   MPI_Comm_rank(comm,&rank);
2655:   if (size == 1) {
2656:     VecScatterCreate_StoP(nx,inidx,ny,inidy,yin,ctx);
2657:     return(0);
2658:   }

2660:   /*
2661:      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2662:      They then call the StoPScatterCreate()
2663:   */
2664:   /*  first count number of contributors to each processor */
2665:   PetscMalloc3(2*size,PetscInt,&nprocs,nx,PetscInt,&owner,(size+1),PetscInt,&starts);
2666:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
2667:   for (i=0; i<nx; i++) {
2668:     idx   = inidx[i];
2669:     found = PETSC_FALSE;
2670:     for (j=0; j<size; j++) {
2671:       if (idx >= owners[j] && idx < owners[j+1]) {
2672:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
2673:       }
2674:     }
2675:     if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2676:   }
2677:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

2679:   /* inform other processors of number of messages and max length*/
2680:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

2682:   /* post receives:   */
2683:   PetscMalloc6(2*nrecvs*nmax,PetscInt,&rvalues,2*nx,PetscInt,&svalues,2*nrecvs,PetscInt,&lens,nrecvs,MPI_Request,&recv_waits,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);

2685:   for (i=0; i<nrecvs; i++) {
2686:     MPI_Irecv(rvalues+2*nmax*i,2*nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2687:   }

2689:   /* do sends:
2690:       1) starts[i] gives the starting index in svalues for stuff going to 
2691:          the ith processor
2692:   */
2693:   starts[0]= 0;
2694:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2695:   for (i=0; i<nx; i++) {
2696:     svalues[2*starts[owner[i]]]       = inidx[i];
2697:     svalues[1 + 2*starts[owner[i]]++] = inidy[i];
2698:   }

2700:   starts[0] = 0;
2701:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2702:   count = 0;
2703:   for (i=0; i<size; i++) {
2704:     if (nprocs[2*i+1]) {
2705:       MPI_Isend(svalues+2*starts[i],2*nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count);
2706:       count++;
2707:     }
2708:   }
2709:   PetscFree3(nprocs,owner,starts);

2711:   /*  wait on receives */
2712:   count = nrecvs;
2713:   slen  = 0;
2714:   while (count) {
2715:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2716:     /* unpack receives into our local space */
2717:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2718:     lens[imdex]  =  n/2;
2719:     slen         += n/2;
2720:     count--;
2721:   }
2722: 
2723:   PetscMalloc2(slen,PetscInt,&local_inidx,slen,PetscInt,&local_inidy);
2724:   base  = owners[rank];
2725:   count = 0;
2726:   for (i=0; i<nrecvs; i++) {
2727:     values = rvalues + 2*i*nmax;
2728:     for (j=0; j<lens[i]; j++) {
2729:       local_inidx[count]   = values[2*j] - base;
2730:       local_inidy[count++] = values[2*j+1];
2731:     }
2732:   }

2734:   /* wait on sends */
2735:   if (nsends) {
2736:     MPI_Waitall(nsends,send_waits,send_status);
2737:   }
2738:   PetscFree6(rvalues,svalues,lens,recv_waits,send_waits,send_status);

2740:   /*
2741:      should sort and remove duplicates from local_inidx,local_inidy 
2742:   */

2744: #if defined(do_it_slow)
2745:   /* sort on the from index */
2746:   PetscSortIntWithArray(slen,local_inidx,local_inidy);
2747:   start = 0;
2748:   while (start < slen) {
2749:     count = start+1;
2750:     last  = local_inidx[start];
2751:     while (count < slen && last == local_inidx[count]) count++;
2752:     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2753:       /* sort on to index */
2754:       PetscSortInt(count-start,local_inidy+start);
2755:     }
2756:     /* remove duplicates; not most efficient way, but probably good enough */
2757:     i = start;
2758:     while (i < count-1) {
2759:       if (local_inidy[i] != local_inidy[i+1]) {
2760:         i++;
2761:       } else { /* found a duplicate */
2762:         duplicate = PETSC_TRUE;
2763:         for (j=i; j<slen-1; j++) {
2764:           local_inidx[j] = local_inidx[j+1];
2765:           local_inidy[j] = local_inidy[j+1];
2766:         }
2767:         slen--;
2768:         count--;
2769:       }
2770:     }
2771:     start = count;
2772:   }
2773: #endif
2774:   if (duplicate) {
2775:     PetscLogInfo(0,"VecScatterCreate_PtoP:Duplicate to from indices passed in VecScatterCreate(), they are ignored\n");
2776:   }
2777:   VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,yin,ctx);
2778:   PetscFree2(local_inidx,local_inidy);
2779:   return(0);
2780: }