Actual source code: vpscat.c
petsc-dev 2014-02-02
2: /*
3: Defines parallel vector scatters.
4: */
6: #include <../src/vec/vec/impls/dvecimpl.h> /*I "petscvec.h" I*/
7: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
11: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
12: {
13: VecScatter_MPI_General *to =(VecScatter_MPI_General*)ctx->todata;
14: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
15: PetscErrorCode ierr;
16: PetscInt i;
17: PetscMPIInt rank;
18: PetscViewerFormat format;
19: PetscBool iascii;
22: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
23: if (iascii) {
24: MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
25: PetscViewerGetFormat(viewer,&format);
26: if (format == PETSC_VIEWER_ASCII_INFO) {
27: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
29: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
30: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
31: itmp = to->starts[to->n+1];
32: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
33: itmp = from->starts[from->n+1];
34: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
35: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));
37: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
38: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
39: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
40: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
41: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
42: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
44: } else {
45: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
46: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
47: if (to->n) {
48: for (i=0; i<to->n; i++) {
49: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
50: }
51: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
52: for (i=0; i<to->starts[to->n]; i++) {
53: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
54: }
55: }
57: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
58: if (from->n) {
59: for (i=0; i<from->n; i++) {
60: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
61: }
63: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
64: for (i=0; i<from->starts[from->n]; i++) {
65: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
66: }
67: }
68: if (to->local.n) {
69: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
70: for (i=0; i<to->local.n; i++) {
71: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
72: }
73: }
75: PetscViewerFlush(viewer);
76: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
77: }
78: }
79: return(0);
80: }
82: /* -----------------------------------------------------------------------------------*/
83: /*
84: The next routine determines what part of the local part of the scatter is an
85: exact copy of values into their current location. We check this here and
86: then know that we need not perform that portion of the scatter when the vector is
87: scattering to itself with INSERT_VALUES.
89: This is currently not used but would speed up, for example DMLocalToLocalBegin/End()
91: */
94: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
95: {
96: PetscInt n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
98: PetscInt *nto_slots,*nfrom_slots,j = 0;
101: for (i=0; i<n; i++) {
102: if (to_slots[i] != from_slots[i]) n_nonmatching++;
103: }
105: if (!n_nonmatching) {
106: to->nonmatching_computed = PETSC_TRUE;
107: to->n_nonmatching = from->n_nonmatching = 0;
109: PetscInfo1(scatter,"Reduced %D to 0\n", n);
110: } else if (n_nonmatching == n) {
111: to->nonmatching_computed = PETSC_FALSE;
113: PetscInfo(scatter,"All values non-matching\n");
114: } else {
115: to->nonmatching_computed= PETSC_TRUE;
116: to->n_nonmatching = from->n_nonmatching = n_nonmatching;
118: PetscMalloc1(n_nonmatching,&nto_slots);
119: PetscMalloc1(n_nonmatching,&nfrom_slots);
121: to->slots_nonmatching = nto_slots;
122: from->slots_nonmatching = nfrom_slots;
123: for (i=0; i<n; i++) {
124: if (to_slots[i] != from_slots[i]) {
125: nto_slots[j] = to_slots[i];
126: nfrom_slots[j] = from_slots[i];
127: j++;
128: }
129: }
130: PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
131: }
132: return(0);
133: }
135: /* --------------------------------------------------------------------------------------*/
137: /* -------------------------------------------------------------------------------------*/
140: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
141: {
142: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
143: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
144: PetscErrorCode ierr;
145: PetscInt i;
148: if (to->use_readyreceiver) {
149: /*
150: Since we have already posted sends we must cancel them before freeing
151: the requests
152: */
153: for (i=0; i<from->n; i++) {
154: MPI_Cancel(from->requests+i);
155: }
156: for (i=0; i<to->n; i++) {
157: MPI_Cancel(to->rev_requests+i);
158: }
159: MPI_Waitall(from->n,from->requests,to->rstatus);
160: MPI_Waitall(to->n,to->rev_requests,to->rstatus);
161: }
163: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
164: if (to->use_alltoallw) {
165: PetscFree3(to->wcounts,to->wdispls,to->types);
166: PetscFree3(from->wcounts,from->wdispls,from->types);
167: }
168: #endif
170: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
171: if (to->use_window) {
172: MPI_Win_free(&from->window);
173: MPI_Win_free(&to->window);
174: PetscFree(from->winstarts);
175: PetscFree(to->winstarts);
176: }
177: #endif
179: if (to->use_alltoallv) {
180: PetscFree2(to->counts,to->displs);
181: PetscFree2(from->counts,from->displs);
182: }
184: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
185: /*
186: IBM's PE version of MPI has a bug where freeing these guys will screw up later
187: message passing.
188: */
189: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
190: if (!to->use_alltoallv && !to->use_window) { /* currently the to->requests etc are ALWAYS allocated even if not used */
191: if (to->requests) {
192: for (i=0; i<to->n; i++) {
193: MPI_Request_free(to->requests + i);
194: }
195: }
196: if (to->rev_requests) {
197: for (i=0; i<to->n; i++) {
198: MPI_Request_free(to->rev_requests + i);
199: }
200: }
201: }
202: /*
203: MPICH could not properly cancel requests thus with ready receiver mode we
204: cannot free the requests. It may be fixed now, if not then put the following
205: code inside a if (!to->use_readyreceiver) {
206: */
207: if (!to->use_alltoallv && !to->use_window) { /* currently the from->requests etc are ALWAYS allocated even if not used */
208: if (from->requests) {
209: for (i=0; i<from->n; i++) {
210: MPI_Request_free(from->requests + i);
211: }
212: }
214: if (from->rev_requests) {
215: for (i=0; i<from->n; i++) {
216: MPI_Request_free(from->rev_requests + i);
217: }
218: }
219: }
220: #endif
222: PetscFree(to->local.vslots);
223: PetscFree(from->local.vslots);
224: PetscFree2(to->counts,to->displs);
225: PetscFree2(from->counts,from->displs);
226: PetscFree(to->local.slots_nonmatching);
227: PetscFree(from->local.slots_nonmatching);
228: PetscFree(to->rev_requests);
229: PetscFree(from->rev_requests);
230: PetscFree(to->requests);
231: PetscFree(from->requests);
232: PetscFree4(to->values,to->indices,to->starts,to->procs);
233: PetscFree2(to->sstatus,to->rstatus);
234: PetscFree4(from->values,from->indices,from->starts,from->procs);
235: PetscFree(from);
236: PetscFree(to);
237: return(0);
238: }
242: /* --------------------------------------------------------------------------------------*/
243: /*
244: Special optimization to see if the local part of the scatter is actually
245: a copy. The scatter routines call PetscMemcpy() instead.
247: */
250: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
251: {
252: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
253: PetscInt to_start,from_start;
257: to_start = to_slots[0];
258: from_start = from_slots[0];
260: for (i=1; i<n; i++) {
261: to_start += bs;
262: from_start += bs;
263: if (to_slots[i] != to_start) return(0);
264: if (from_slots[i] != from_start) return(0);
265: }
266: to->is_copy = PETSC_TRUE;
267: to->copy_start = to_slots[0];
268: to->copy_length = bs*sizeof(PetscScalar)*n;
269: from->is_copy = PETSC_TRUE;
270: from->copy_start = from_slots[0];
271: from->copy_length = bs*sizeof(PetscScalar)*n;
272: PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
273: return(0);
274: }
276: /* --------------------------------------------------------------------------------------*/
280: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
281: {
282: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
283: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
284: PetscErrorCode ierr;
285: PetscInt ny,bs = in_from->bs;
288: out->begin = in->begin;
289: out->end = in->end;
290: out->copy = in->copy;
291: out->destroy = in->destroy;
292: out->view = in->view;
294: /* allocate entire send scatter context */
295: PetscNewLog(out,&out_to);
296: PetscNewLog(out,&out_from);
298: ny = in_to->starts[in_to->n];
299: out_to->n = in_to->n;
300: out_to->type = in_to->type;
301: out_to->sendfirst = in_to->sendfirst;
303: PetscMalloc1(out_to->n,&out_to->requests);
304: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
305: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
306: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
307: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
308: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
310: out->todata = (void*)out_to;
311: out_to->local.n = in_to->local.n;
312: out_to->local.nonmatching_computed = PETSC_FALSE;
313: out_to->local.n_nonmatching = 0;
314: out_to->local.slots_nonmatching = 0;
315: if (in_to->local.n) {
316: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
317: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
318: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
319: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
320: } else {
321: out_to->local.vslots = 0;
322: out_from->local.vslots = 0;
323: }
325: /* allocate entire receive context */
326: out_from->type = in_from->type;
327: ny = in_from->starts[in_from->n];
328: out_from->n = in_from->n;
329: out_from->sendfirst = in_from->sendfirst;
331: PetscMalloc1(out_from->n,&out_from->requests);
332: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
333: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
334: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
335: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
337: out->fromdata = (void*)out_from;
338: out_from->local.n = in_from->local.n;
339: out_from->local.nonmatching_computed = PETSC_FALSE;
340: out_from->local.n_nonmatching = 0;
341: out_from->local.slots_nonmatching = 0;
343: /*
344: set up the request arrays for use with isend_init() and irecv_init()
345: */
346: {
347: PetscMPIInt tag;
348: MPI_Comm comm;
349: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
350: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
351: PetscInt i;
352: PetscBool flg;
353: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
354: MPI_Request *rev_swaits,*rev_rwaits;
355: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
357: PetscMalloc1(in_to->n,&out_to->rev_requests);
358: PetscMalloc1(in_from->n,&out_from->rev_requests);
360: rev_rwaits = out_to->rev_requests;
361: rev_swaits = out_from->rev_requests;
363: out_from->bs = out_to->bs = bs;
364: tag = ((PetscObject)out)->tag;
365: PetscObjectGetComm((PetscObject)out,&comm);
367: /* Register the receives that you will use later (sends for scatter reverse) */
368: for (i=0; i<out_from->n; i++) {
369: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
370: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
371: }
373: flg = PETSC_FALSE;
374: PetscOptionsGetBool(NULL,"-vecscatter_rsend",&flg,NULL);
375: if (flg) {
376: out_to->use_readyreceiver = PETSC_TRUE;
377: out_from->use_readyreceiver = PETSC_TRUE;
378: for (i=0; i<out_to->n; i++) {
379: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
380: }
381: if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
382: MPI_Barrier(comm);
383: PetscInfo(in,"Using VecScatter ready receiver mode\n");
384: } else {
385: out_to->use_readyreceiver = PETSC_FALSE;
386: out_from->use_readyreceiver = PETSC_FALSE;
387: flg = PETSC_FALSE;
388: PetscOptionsGetBool(NULL,"-vecscatter_ssend",&flg,NULL);
389: if (flg) {
390: PetscInfo(in,"Using VecScatter Ssend mode\n");
391: }
392: for (i=0; i<out_to->n; i++) {
393: if (!flg) {
394: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
395: } else {
396: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
397: }
398: }
399: }
400: /* Register receives for scatter reverse */
401: for (i=0; i<out_to->n; i++) {
402: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
403: }
404: }
405: return(0);
406: }
410: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
411: {
412: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
413: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
414: PetscErrorCode ierr;
415: PetscInt ny,bs = in_from->bs;
416: PetscMPIInt size;
419: MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);
421: out->begin = in->begin;
422: out->end = in->end;
423: out->copy = in->copy;
424: out->destroy = in->destroy;
425: out->view = in->view;
427: /* allocate entire send scatter context */
428: PetscNewLog(out,&out_to);
429: PetscNewLog(out,&out_from);
431: ny = in_to->starts[in_to->n];
432: out_to->n = in_to->n;
433: out_to->type = in_to->type;
434: out_to->sendfirst = in_to->sendfirst;
436: PetscMalloc1(out_to->n,&out_to->requests);
437: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
438: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
439: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
440: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
441: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
443: out->todata = (void*)out_to;
444: out_to->local.n = in_to->local.n;
445: out_to->local.nonmatching_computed = PETSC_FALSE;
446: out_to->local.n_nonmatching = 0;
447: out_to->local.slots_nonmatching = 0;
448: if (in_to->local.n) {
449: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
450: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
451: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
452: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
453: } else {
454: out_to->local.vslots = 0;
455: out_from->local.vslots = 0;
456: }
458: /* allocate entire receive context */
459: out_from->type = in_from->type;
460: ny = in_from->starts[in_from->n];
461: out_from->n = in_from->n;
462: out_from->sendfirst = in_from->sendfirst;
464: PetscMalloc1(out_from->n,&out_from->requests);
465: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
466: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
467: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
468: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
470: out->fromdata = (void*)out_from;
471: out_from->local.n = in_from->local.n;
472: out_from->local.nonmatching_computed = PETSC_FALSE;
473: out_from->local.n_nonmatching = 0;
474: out_from->local.slots_nonmatching = 0;
476: out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;
478: PetscMalloc2(size,&out_to->counts,size,&out_to->displs);
479: PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
480: PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));
482: PetscMalloc2(size,&out_from->counts,size,&out_from->displs);
483: PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
484: PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
485: return(0);
486: }
487: /* --------------------------------------------------------------------------------------------------
488: Packs and unpacks the message data into send or from receive buffers.
490: These could be generated automatically.
492: Fortran kernels etc. could be used.
493: */
494: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
495: {
496: PetscInt i;
497: for (i=0; i<n; i++) y[i] = x[indicesx[i]];
498: }
502: PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
503: {
504: PetscInt i;
507: switch (addv) {
508: case INSERT_VALUES:
509: case INSERT_ALL_VALUES:
510: for (i=0; i<n; i++) y[indicesy[i]] = x[i];
511: break;
512: case ADD_VALUES:
513: case ADD_ALL_VALUES:
514: for (i=0; i<n; i++) y[indicesy[i]] += x[i];
515: break;
516: #if !defined(PETSC_USE_COMPLEX)
517: case MAX_VALUES:
518: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
519: #else
520: case MAX_VALUES:
521: #endif
522: case NOT_SET_VALUES:
523: break;
524: default:
525: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
526: }
527: return(0);
528: }
532: PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
533: {
534: PetscInt i;
537: switch (addv) {
538: case INSERT_VALUES:
539: case INSERT_ALL_VALUES:
540: for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
541: break;
542: case ADD_VALUES:
543: case ADD_ALL_VALUES:
544: for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
545: break;
546: #if !defined(PETSC_USE_COMPLEX)
547: case MAX_VALUES:
548: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
549: #else
550: case MAX_VALUES:
551: #endif
552: case NOT_SET_VALUES:
553: break;
554: default:
555: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
556: }
557: return(0);
558: }
560: /* ----------------------------------------------------------------------------------------------- */
561: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
562: {
563: PetscInt i,idx;
565: for (i=0; i<n; i++) {
566: idx = *indicesx++;
567: y[0] = x[idx];
568: y[1] = x[idx+1];
569: y += 2;
570: }
571: }
575: PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
576: {
577: PetscInt i,idy;
580: switch (addv) {
581: case INSERT_VALUES:
582: case INSERT_ALL_VALUES:
583: for (i=0; i<n; i++) {
584: idy = *indicesy++;
585: y[idy] = x[0];
586: y[idy+1] = x[1];
587: x += 2;
588: }
589: break;
590: case ADD_VALUES:
591: case ADD_ALL_VALUES:
592: for (i=0; i<n; i++) {
593: idy = *indicesy++;
594: y[idy] += x[0];
595: y[idy+1] += x[1];
596: x += 2;
597: }
598: break;
599: #if !defined(PETSC_USE_COMPLEX)
600: case MAX_VALUES:
601: for (i=0; i<n; i++) {
602: idy = *indicesy++;
603: y[idy] = PetscMax(y[idy],x[0]);
604: y[idy+1] = PetscMax(y[idy+1],x[1]);
605: x += 2;
606: }
607: #else
608: case MAX_VALUES:
609: #endif
610: case NOT_SET_VALUES:
611: break;
612: default:
613: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
614: }
615: return(0);
616: }
620: PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
621: {
622: PetscInt i,idx,idy;
625: switch (addv) {
626: case INSERT_VALUES:
627: case INSERT_ALL_VALUES:
628: for (i=0; i<n; i++) {
629: idx = *indicesx++;
630: idy = *indicesy++;
631: y[idy] = x[idx];
632: y[idy+1] = x[idx+1];
633: }
634: break;
635: case ADD_VALUES:
636: case ADD_ALL_VALUES:
637: for (i=0; i<n; i++) {
638: idx = *indicesx++;
639: idy = *indicesy++;
640: y[idy] += x[idx];
641: y[idy+1] += x[idx+1];
642: }
643: break;
644: #if !defined(PETSC_USE_COMPLEX)
645: case MAX_VALUES:
646: for (i=0; i<n; i++) {
647: idx = *indicesx++;
648: idy = *indicesy++;
649: y[idy] = PetscMax(y[idy],x[idx]);
650: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
651: }
652: #else
653: case MAX_VALUES:
654: #endif
655: case NOT_SET_VALUES:
656: break;
657: default:
658: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
659: }
660: return(0);
661: }
662: /* ----------------------------------------------------------------------------------------------- */
663: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
664: {
665: PetscInt i,idx;
667: for (i=0; i<n; i++) {
668: idx = *indicesx++;
669: y[0] = x[idx];
670: y[1] = x[idx+1];
671: y[2] = x[idx+2];
672: y += 3;
673: }
674: }
677: PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
678: {
679: PetscInt i,idy;
682: switch (addv) {
683: case INSERT_VALUES:
684: case INSERT_ALL_VALUES:
685: for (i=0; i<n; i++) {
686: idy = *indicesy++;
687: y[idy] = x[0];
688: y[idy+1] = x[1];
689: y[idy+2] = x[2];
690: x += 3;
691: }
692: break;
693: case ADD_VALUES:
694: case ADD_ALL_VALUES:
695: for (i=0; i<n; i++) {
696: idy = *indicesy++;
697: y[idy] += x[0];
698: y[idy+1] += x[1];
699: y[idy+2] += x[2];
700: x += 3;
701: }
702: break;
703: #if !defined(PETSC_USE_COMPLEX)
704: case MAX_VALUES:
705: for (i=0; i<n; i++) {
706: idy = *indicesy++;
707: y[idy] = PetscMax(y[idy],x[0]);
708: y[idy+1] = PetscMax(y[idy+1],x[1]);
709: y[idy+2] = PetscMax(y[idy+2],x[2]);
710: x += 3;
711: }
712: #else
713: case MAX_VALUES:
714: #endif
715: case NOT_SET_VALUES:
716: break;
717: default:
718: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
719: }
720: return(0);
721: }
725: PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
726: {
727: PetscInt i,idx,idy;
730: switch (addv) {
731: case INSERT_VALUES:
732: case INSERT_ALL_VALUES:
733: for (i=0; i<n; i++) {
734: idx = *indicesx++;
735: idy = *indicesy++;
736: y[idy] = x[idx];
737: y[idy+1] = x[idx+1];
738: y[idy+2] = x[idx+2];
739: }
740: break;
741: case ADD_VALUES:
742: case ADD_ALL_VALUES:
743: for (i=0; i<n; i++) {
744: idx = *indicesx++;
745: idy = *indicesy++;
746: y[idy] += x[idx];
747: y[idy+1] += x[idx+1];
748: y[idy+2] += x[idx+2];
749: }
750: break;
751: #if !defined(PETSC_USE_COMPLEX)
752: case MAX_VALUES:
753: for (i=0; i<n; i++) {
754: idx = *indicesx++;
755: idy = *indicesy++;
756: y[idy] = PetscMax(y[idy],x[idx]);
757: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
758: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
759: }
760: #else
761: case MAX_VALUES:
762: #endif
763: case NOT_SET_VALUES:
764: break;
765: default:
766: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
767: }
768: return(0);
769: }
770: /* ----------------------------------------------------------------------------------------------- */
771: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
772: {
773: PetscInt i,idx;
775: for (i=0; i<n; i++) {
776: idx = *indicesx++;
777: y[0] = x[idx];
778: y[1] = x[idx+1];
779: y[2] = x[idx+2];
780: y[3] = x[idx+3];
781: y += 4;
782: }
783: }
786: PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
787: {
788: PetscInt i,idy;
791: switch (addv) {
792: case INSERT_VALUES:
793: case INSERT_ALL_VALUES:
794: for (i=0; i<n; i++) {
795: idy = *indicesy++;
796: y[idy] = x[0];
797: y[idy+1] = x[1];
798: y[idy+2] = x[2];
799: y[idy+3] = x[3];
800: x += 4;
801: }
802: break;
803: case ADD_VALUES:
804: case ADD_ALL_VALUES:
805: for (i=0; i<n; i++) {
806: idy = *indicesy++;
807: y[idy] += x[0];
808: y[idy+1] += x[1];
809: y[idy+2] += x[2];
810: y[idy+3] += x[3];
811: x += 4;
812: }
813: break;
814: #if !defined(PETSC_USE_COMPLEX)
815: case MAX_VALUES:
816: for (i=0; i<n; i++) {
817: idy = *indicesy++;
818: y[idy] = PetscMax(y[idy],x[0]);
819: y[idy+1] = PetscMax(y[idy+1],x[1]);
820: y[idy+2] = PetscMax(y[idy+2],x[2]);
821: y[idy+3] = PetscMax(y[idy+3],x[3]);
822: x += 4;
823: }
824: #else
825: case MAX_VALUES:
826: #endif
827: case NOT_SET_VALUES:
828: break;
829: default:
830: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
831: }
832: return(0);
833: }
837: PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
838: {
839: PetscInt i,idx,idy;
842: switch (addv) {
843: case INSERT_VALUES:
844: case INSERT_ALL_VALUES:
845: for (i=0; i<n; i++) {
846: idx = *indicesx++;
847: idy = *indicesy++;
848: y[idy] = x[idx];
849: y[idy+1] = x[idx+1];
850: y[idy+2] = x[idx+2];
851: y[idy+3] = x[idx+3];
852: }
853: break;
854: case ADD_VALUES:
855: case ADD_ALL_VALUES:
856: for (i=0; i<n; i++) {
857: idx = *indicesx++;
858: idy = *indicesy++;
859: y[idy] += x[idx];
860: y[idy+1] += x[idx+1];
861: y[idy+2] += x[idx+2];
862: y[idy+3] += x[idx+3];
863: }
864: break;
865: #if !defined(PETSC_USE_COMPLEX)
866: case MAX_VALUES:
867: for (i=0; i<n; i++) {
868: idx = *indicesx++;
869: idy = *indicesy++;
870: y[idy] = PetscMax(y[idy],x[idx]);
871: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
872: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
873: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
874: }
875: #else
876: case MAX_VALUES:
877: #endif
878: case NOT_SET_VALUES:
879: break;
880: default:
881: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
882: }
883: return(0);
884: }
885: /* ----------------------------------------------------------------------------------------------- */
886: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
887: {
888: PetscInt i,idx;
890: for (i=0; i<n; i++) {
891: idx = *indicesx++;
892: y[0] = x[idx];
893: y[1] = x[idx+1];
894: y[2] = x[idx+2];
895: y[3] = x[idx+3];
896: y[4] = x[idx+4];
897: y += 5;
898: }
899: }
903: PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
904: {
905: PetscInt i,idy;
908: switch (addv) {
909: case INSERT_VALUES:
910: case INSERT_ALL_VALUES:
911: for (i=0; i<n; i++) {
912: idy = *indicesy++;
913: y[idy] = x[0];
914: y[idy+1] = x[1];
915: y[idy+2] = x[2];
916: y[idy+3] = x[3];
917: y[idy+4] = x[4];
918: x += 5;
919: }
920: break;
921: case ADD_VALUES:
922: case ADD_ALL_VALUES:
923: for (i=0; i<n; i++) {
924: idy = *indicesy++;
925: y[idy] += x[0];
926: y[idy+1] += x[1];
927: y[idy+2] += x[2];
928: y[idy+3] += x[3];
929: y[idy+4] += x[4];
930: x += 5;
931: }
932: break;
933: #if !defined(PETSC_USE_COMPLEX)
934: case MAX_VALUES:
935: for (i=0; i<n; i++) {
936: idy = *indicesy++;
937: y[idy] = PetscMax(y[idy],x[0]);
938: y[idy+1] = PetscMax(y[idy+1],x[1]);
939: y[idy+2] = PetscMax(y[idy+2],x[2]);
940: y[idy+3] = PetscMax(y[idy+3],x[3]);
941: y[idy+4] = PetscMax(y[idy+4],x[4]);
942: x += 5;
943: }
944: #else
945: case MAX_VALUES:
946: #endif
947: case NOT_SET_VALUES:
948: break;
949: default:
950: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
951: }
952: return(0);
953: }
957: PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
958: {
959: PetscInt i,idx,idy;
962: switch (addv) {
963: case INSERT_VALUES:
964: case INSERT_ALL_VALUES:
965: for (i=0; i<n; i++) {
966: idx = *indicesx++;
967: idy = *indicesy++;
968: y[idy] = x[idx];
969: y[idy+1] = x[idx+1];
970: y[idy+2] = x[idx+2];
971: y[idy+3] = x[idx+3];
972: y[idy+4] = x[idx+4];
973: }
974: break;
975: case ADD_VALUES:
976: case ADD_ALL_VALUES:
977: for (i=0; i<n; i++) {
978: idx = *indicesx++;
979: idy = *indicesy++;
980: y[idy] += x[idx];
981: y[idy+1] += x[idx+1];
982: y[idy+2] += x[idx+2];
983: y[idy+3] += x[idx+3];
984: y[idy+4] += x[idx+4];
985: }
986: break;
987: #if !defined(PETSC_USE_COMPLEX)
988: case MAX_VALUES:
989: for (i=0; i<n; i++) {
990: idx = *indicesx++;
991: idy = *indicesy++;
992: y[idy] = PetscMax(y[idy],x[idx]);
993: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
994: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
995: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
996: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
997: }
998: #else
999: case MAX_VALUES:
1000: #endif
1001: case NOT_SET_VALUES:
1002: break;
1003: default:
1004: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1005: }
1006: return(0);
1007: }
1008: /* ----------------------------------------------------------------------------------------------- */
1009: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1010: {
1011: PetscInt i,idx;
1013: for (i=0; i<n; i++) {
1014: idx = *indicesx++;
1015: y[0] = x[idx];
1016: y[1] = x[idx+1];
1017: y[2] = x[idx+2];
1018: y[3] = x[idx+3];
1019: y[4] = x[idx+4];
1020: y[5] = x[idx+5];
1021: y += 6;
1022: }
1023: }
1027: PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1028: {
1029: PetscInt i,idy;
1032: switch (addv) {
1033: case INSERT_VALUES:
1034: case INSERT_ALL_VALUES:
1035: for (i=0; i<n; i++) {
1036: idy = *indicesy++;
1037: y[idy] = x[0];
1038: y[idy+1] = x[1];
1039: y[idy+2] = x[2];
1040: y[idy+3] = x[3];
1041: y[idy+4] = x[4];
1042: y[idy+5] = x[5];
1043: x += 6;
1044: }
1045: break;
1046: case ADD_VALUES:
1047: case ADD_ALL_VALUES:
1048: for (i=0; i<n; i++) {
1049: idy = *indicesy++;
1050: y[idy] += x[0];
1051: y[idy+1] += x[1];
1052: y[idy+2] += x[2];
1053: y[idy+3] += x[3];
1054: y[idy+4] += x[4];
1055: y[idy+5] += x[5];
1056: x += 6;
1057: }
1058: break;
1059: #if !defined(PETSC_USE_COMPLEX)
1060: case MAX_VALUES:
1061: for (i=0; i<n; i++) {
1062: idy = *indicesy++;
1063: y[idy] = PetscMax(y[idy],x[0]);
1064: y[idy+1] = PetscMax(y[idy+1],x[1]);
1065: y[idy+2] = PetscMax(y[idy+2],x[2]);
1066: y[idy+3] = PetscMax(y[idy+3],x[3]);
1067: y[idy+4] = PetscMax(y[idy+4],x[4]);
1068: y[idy+5] = PetscMax(y[idy+5],x[5]);
1069: x += 6;
1070: }
1071: #else
1072: case MAX_VALUES:
1073: #endif
1074: case NOT_SET_VALUES:
1075: break;
1076: default:
1077: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1078: }
1079: return(0);
1080: }
1084: PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1085: {
1086: PetscInt i,idx,idy;
1089: switch (addv) {
1090: case INSERT_VALUES:
1091: case INSERT_ALL_VALUES:
1092: for (i=0; i<n; i++) {
1093: idx = *indicesx++;
1094: idy = *indicesy++;
1095: y[idy] = x[idx];
1096: y[idy+1] = x[idx+1];
1097: y[idy+2] = x[idx+2];
1098: y[idy+3] = x[idx+3];
1099: y[idy+4] = x[idx+4];
1100: y[idy+5] = x[idx+5];
1101: }
1102: break;
1103: case ADD_VALUES:
1104: case ADD_ALL_VALUES:
1105: for (i=0; i<n; i++) {
1106: idx = *indicesx++;
1107: idy = *indicesy++;
1108: y[idy] += x[idx];
1109: y[idy+1] += x[idx+1];
1110: y[idy+2] += x[idx+2];
1111: y[idy+3] += x[idx+3];
1112: y[idy+4] += x[idx+4];
1113: y[idy+5] += x[idx+5];
1114: }
1115: break;
1116: #if !defined(PETSC_USE_COMPLEX)
1117: case MAX_VALUES:
1118: for (i=0; i<n; i++) {
1119: idx = *indicesx++;
1120: idy = *indicesy++;
1121: y[idy] = PetscMax(y[idy],x[idx]);
1122: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1123: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1124: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1125: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1126: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1127: }
1128: #else
1129: case MAX_VALUES:
1130: #endif
1131: case NOT_SET_VALUES:
1132: break;
1133: default:
1134: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1135: }
1136: return(0);
1137: }
1138: /* ----------------------------------------------------------------------------------------------- */
1139: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1140: {
1141: PetscInt i,idx;
1143: for (i=0; i<n; i++) {
1144: idx = *indicesx++;
1145: y[0] = x[idx];
1146: y[1] = x[idx+1];
1147: y[2] = x[idx+2];
1148: y[3] = x[idx+3];
1149: y[4] = x[idx+4];
1150: y[5] = x[idx+5];
1151: y[6] = x[idx+6];
1152: y += 7;
1153: }
1154: }
1158: PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1159: {
1160: PetscInt i,idy;
1163: switch (addv) {
1164: case INSERT_VALUES:
1165: case INSERT_ALL_VALUES:
1166: for (i=0; i<n; i++) {
1167: idy = *indicesy++;
1168: y[idy] = x[0];
1169: y[idy+1] = x[1];
1170: y[idy+2] = x[2];
1171: y[idy+3] = x[3];
1172: y[idy+4] = x[4];
1173: y[idy+5] = x[5];
1174: y[idy+6] = x[6];
1175: x += 7;
1176: }
1177: break;
1178: case ADD_VALUES:
1179: case ADD_ALL_VALUES:
1180: for (i=0; i<n; i++) {
1181: idy = *indicesy++;
1182: y[idy] += x[0];
1183: y[idy+1] += x[1];
1184: y[idy+2] += x[2];
1185: y[idy+3] += x[3];
1186: y[idy+4] += x[4];
1187: y[idy+5] += x[5];
1188: y[idy+6] += x[6];
1189: x += 7;
1190: }
1191: break;
1192: #if !defined(PETSC_USE_COMPLEX)
1193: case MAX_VALUES:
1194: for (i=0; i<n; i++) {
1195: idy = *indicesy++;
1196: y[idy] = PetscMax(y[idy],x[0]);
1197: y[idy+1] = PetscMax(y[idy+1],x[1]);
1198: y[idy+2] = PetscMax(y[idy+2],x[2]);
1199: y[idy+3] = PetscMax(y[idy+3],x[3]);
1200: y[idy+4] = PetscMax(y[idy+4],x[4]);
1201: y[idy+5] = PetscMax(y[idy+5],x[5]);
1202: y[idy+6] = PetscMax(y[idy+6],x[6]);
1203: x += 7;
1204: }
1205: #else
1206: case MAX_VALUES:
1207: #endif
1208: case NOT_SET_VALUES:
1209: break;
1210: default:
1211: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1212: }
1213: return(0);
1214: }
1218: PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1219: {
1220: PetscInt i,idx,idy;
1223: switch (addv) {
1224: case INSERT_VALUES:
1225: case INSERT_ALL_VALUES:
1226: for (i=0; i<n; i++) {
1227: idx = *indicesx++;
1228: idy = *indicesy++;
1229: y[idy] = x[idx];
1230: y[idy+1] = x[idx+1];
1231: y[idy+2] = x[idx+2];
1232: y[idy+3] = x[idx+3];
1233: y[idy+4] = x[idx+4];
1234: y[idy+5] = x[idx+5];
1235: y[idy+6] = x[idx+6];
1236: }
1237: break;
1238: case ADD_VALUES:
1239: case ADD_ALL_VALUES:
1240: for (i=0; i<n; i++) {
1241: idx = *indicesx++;
1242: idy = *indicesy++;
1243: y[idy] += x[idx];
1244: y[idy+1] += x[idx+1];
1245: y[idy+2] += x[idx+2];
1246: y[idy+3] += x[idx+3];
1247: y[idy+4] += x[idx+4];
1248: y[idy+5] += x[idx+5];
1249: y[idy+6] += x[idx+6];
1250: }
1251: break;
1252: #if !defined(PETSC_USE_COMPLEX)
1253: case MAX_VALUES:
1254: for (i=0; i<n; i++) {
1255: idx = *indicesx++;
1256: idy = *indicesy++;
1257: y[idy] = PetscMax(y[idy],x[idx]);
1258: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1259: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1260: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1261: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1262: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1263: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1264: }
1265: #else
1266: case MAX_VALUES:
1267: #endif
1268: case NOT_SET_VALUES:
1269: break;
1270: default:
1271: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1272: }
1273: return(0);
1274: }
1275: /* ----------------------------------------------------------------------------------------------- */
1276: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1277: {
1278: PetscInt i,idx;
1280: for (i=0; i<n; i++) {
1281: idx = *indicesx++;
1282: y[0] = x[idx];
1283: y[1] = x[idx+1];
1284: y[2] = x[idx+2];
1285: y[3] = x[idx+3];
1286: y[4] = x[idx+4];
1287: y[5] = x[idx+5];
1288: y[6] = x[idx+6];
1289: y[7] = x[idx+7];
1290: y += 8;
1291: }
1292: }
1296: PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1297: {
1298: PetscInt i,idy;
1301: switch (addv) {
1302: case INSERT_VALUES:
1303: case INSERT_ALL_VALUES:
1304: for (i=0; i<n; i++) {
1305: idy = *indicesy++;
1306: y[idy] = x[0];
1307: y[idy+1] = x[1];
1308: y[idy+2] = x[2];
1309: y[idy+3] = x[3];
1310: y[idy+4] = x[4];
1311: y[idy+5] = x[5];
1312: y[idy+6] = x[6];
1313: y[idy+7] = x[7];
1314: x += 8;
1315: }
1316: break;
1317: case ADD_VALUES:
1318: case ADD_ALL_VALUES:
1319: for (i=0; i<n; i++) {
1320: idy = *indicesy++;
1321: y[idy] += x[0];
1322: y[idy+1] += x[1];
1323: y[idy+2] += x[2];
1324: y[idy+3] += x[3];
1325: y[idy+4] += x[4];
1326: y[idy+5] += x[5];
1327: y[idy+6] += x[6];
1328: y[idy+7] += x[7];
1329: x += 8;
1330: }
1331: break;
1332: #if !defined(PETSC_USE_COMPLEX)
1333: case MAX_VALUES:
1334: for (i=0; i<n; i++) {
1335: idy = *indicesy++;
1336: y[idy] = PetscMax(y[idy],x[0]);
1337: y[idy+1] = PetscMax(y[idy+1],x[1]);
1338: y[idy+2] = PetscMax(y[idy+2],x[2]);
1339: y[idy+3] = PetscMax(y[idy+3],x[3]);
1340: y[idy+4] = PetscMax(y[idy+4],x[4]);
1341: y[idy+5] = PetscMax(y[idy+5],x[5]);
1342: y[idy+6] = PetscMax(y[idy+6],x[6]);
1343: y[idy+7] = PetscMax(y[idy+7],x[7]);
1344: x += 8;
1345: }
1346: #else
1347: case MAX_VALUES:
1348: #endif
1349: case NOT_SET_VALUES:
1350: break;
1351: default:
1352: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1353: }
1354: return(0);
1355: }
1359: PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1360: {
1361: PetscInt i,idx,idy;
1364: switch (addv) {
1365: case INSERT_VALUES:
1366: case INSERT_ALL_VALUES:
1367: for (i=0; i<n; i++) {
1368: idx = *indicesx++;
1369: idy = *indicesy++;
1370: y[idy] = x[idx];
1371: y[idy+1] = x[idx+1];
1372: y[idy+2] = x[idx+2];
1373: y[idy+3] = x[idx+3];
1374: y[idy+4] = x[idx+4];
1375: y[idy+5] = x[idx+5];
1376: y[idy+6] = x[idx+6];
1377: y[idy+7] = x[idx+7];
1378: }
1379: break;
1380: case ADD_VALUES:
1381: case ADD_ALL_VALUES:
1382: for (i=0; i<n; i++) {
1383: idx = *indicesx++;
1384: idy = *indicesy++;
1385: y[idy] += x[idx];
1386: y[idy+1] += x[idx+1];
1387: y[idy+2] += x[idx+2];
1388: y[idy+3] += x[idx+3];
1389: y[idy+4] += x[idx+4];
1390: y[idy+5] += x[idx+5];
1391: y[idy+6] += x[idx+6];
1392: y[idy+7] += x[idx+7];
1393: }
1394: break;
1395: #if !defined(PETSC_USE_COMPLEX)
1396: case MAX_VALUES:
1397: for (i=0; i<n; i++) {
1398: idx = *indicesx++;
1399: idy = *indicesy++;
1400: y[idy] = PetscMax(y[idy],x[idx]);
1401: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1402: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1403: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1404: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1405: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1406: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1407: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1408: }
1409: #else
1410: case MAX_VALUES:
1411: #endif
1412: case NOT_SET_VALUES:
1413: break;
1414: default:
1415: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1416: }
1417: return(0);
1418: }
1420: /* ----------------------------------------------------------------------------------------------- */
1421: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1422: {
1423: PetscInt i,idx;
1425: for (i=0; i<n; i++) {
1426: idx = *indicesx++;
1427: y[0] = x[idx];
1428: y[1] = x[idx+1];
1429: y[2] = x[idx+2];
1430: y[3] = x[idx+3];
1431: y[4] = x[idx+4];
1432: y[5] = x[idx+5];
1433: y[6] = x[idx+6];
1434: y[7] = x[idx+7];
1435: y[8] = x[idx+8];
1436: y[9] = x[idx+9];
1437: y[10] = x[idx+10];
1438: y[11] = x[idx+11];
1439: y += 12;
1440: }
1441: }
1445: PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1446: {
1447: PetscInt i,idy;
1450: switch (addv) {
1451: case INSERT_VALUES:
1452: case INSERT_ALL_VALUES:
1453: for (i=0; i<n; i++) {
1454: idy = *indicesy++;
1455: y[idy] = x[0];
1456: y[idy+1] = x[1];
1457: y[idy+2] = x[2];
1458: y[idy+3] = x[3];
1459: y[idy+4] = x[4];
1460: y[idy+5] = x[5];
1461: y[idy+6] = x[6];
1462: y[idy+7] = x[7];
1463: y[idy+8] = x[8];
1464: y[idy+9] = x[9];
1465: y[idy+10] = x[10];
1466: y[idy+11] = x[11];
1467: x += 12;
1468: }
1469: break;
1470: case ADD_VALUES:
1471: case ADD_ALL_VALUES:
1472: for (i=0; i<n; i++) {
1473: idy = *indicesy++;
1474: y[idy] += x[0];
1475: y[idy+1] += x[1];
1476: y[idy+2] += x[2];
1477: y[idy+3] += x[3];
1478: y[idy+4] += x[4];
1479: y[idy+5] += x[5];
1480: y[idy+6] += x[6];
1481: y[idy+7] += x[7];
1482: y[idy+8] += x[8];
1483: y[idy+9] += x[9];
1484: y[idy+10] += x[10];
1485: y[idy+11] += x[11];
1486: x += 12;
1487: }
1488: break;
1489: #if !defined(PETSC_USE_COMPLEX)
1490: case MAX_VALUES:
1491: for (i=0; i<n; i++) {
1492: idy = *indicesy++;
1493: y[idy] = PetscMax(y[idy],x[0]);
1494: y[idy+1] = PetscMax(y[idy+1],x[1]);
1495: y[idy+2] = PetscMax(y[idy+2],x[2]);
1496: y[idy+3] = PetscMax(y[idy+3],x[3]);
1497: y[idy+4] = PetscMax(y[idy+4],x[4]);
1498: y[idy+5] = PetscMax(y[idy+5],x[5]);
1499: y[idy+6] = PetscMax(y[idy+6],x[6]);
1500: y[idy+7] = PetscMax(y[idy+7],x[7]);
1501: y[idy+8] = PetscMax(y[idy+8],x[8]);
1502: y[idy+9] = PetscMax(y[idy+9],x[9]);
1503: y[idy+10] = PetscMax(y[idy+10],x[10]);
1504: y[idy+11] = PetscMax(y[idy+11],x[11]);
1505: x += 12;
1506: }
1507: #else
1508: case MAX_VALUES:
1509: #endif
1510: case NOT_SET_VALUES:
1511: break;
1512: default:
1513: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1514: }
1515: return(0);
1516: }
1520: PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1521: {
1522: PetscInt i,idx,idy;
1525: switch (addv) {
1526: case INSERT_VALUES:
1527: case INSERT_ALL_VALUES:
1528: for (i=0; i<n; i++) {
1529: idx = *indicesx++;
1530: idy = *indicesy++;
1531: y[idy] = x[idx];
1532: y[idy+1] = x[idx+1];
1533: y[idy+2] = x[idx+2];
1534: y[idy+3] = x[idx+3];
1535: y[idy+4] = x[idx+4];
1536: y[idy+5] = x[idx+5];
1537: y[idy+6] = x[idx+6];
1538: y[idy+7] = x[idx+7];
1539: y[idy+8] = x[idx+8];
1540: y[idy+9] = x[idx+9];
1541: y[idy+10] = x[idx+10];
1542: y[idy+11] = x[idx+11];
1543: }
1544: break;
1545: case ADD_VALUES:
1546: case ADD_ALL_VALUES:
1547: for (i=0; i<n; i++) {
1548: idx = *indicesx++;
1549: idy = *indicesy++;
1550: y[idy] += x[idx];
1551: y[idy+1] += x[idx+1];
1552: y[idy+2] += x[idx+2];
1553: y[idy+3] += x[idx+3];
1554: y[idy+4] += x[idx+4];
1555: y[idy+5] += x[idx+5];
1556: y[idy+6] += x[idx+6];
1557: y[idy+7] += x[idx+7];
1558: y[idy+8] += x[idx+8];
1559: y[idy+9] += x[idx+9];
1560: y[idy+10] += x[idx+10];
1561: y[idy+11] += x[idx+11];
1562: }
1563: break;
1564: #if !defined(PETSC_USE_COMPLEX)
1565: case MAX_VALUES:
1566: for (i=0; i<n; i++) {
1567: idx = *indicesx++;
1568: idy = *indicesy++;
1569: y[idy] = PetscMax(y[idy],x[idx]);
1570: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1571: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1572: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1573: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1574: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1575: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1576: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1577: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1578: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1579: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1580: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1581: }
1582: #else
1583: case MAX_VALUES:
1584: #endif
1585: case NOT_SET_VALUES:
1586: break;
1587: default:
1588: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1589: }
1590: return(0);
1591: }
1593: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1594: #define BS 1
1595: #include <../src/vec/vec/utils/vpscat.h>
1596: #define BS 2
1597: #include <../src/vec/vec/utils/vpscat.h>
1598: #define BS 3
1599: #include <../src/vec/vec/utils/vpscat.h>
1600: #define BS 4
1601: #include <../src/vec/vec/utils/vpscat.h>
1602: #define BS 5
1603: #include <../src/vec/vec/utils/vpscat.h>
1604: #define BS 6
1605: #include <../src/vec/vec/utils/vpscat.h>
1606: #define BS 7
1607: #include <../src/vec/vec/utils/vpscat.h>
1608: #define BS 8
1609: #include <../src/vec/vec/utils/vpscat.h>
1610: #define BS 12
1611: #include <../src/vec/vec/utils/vpscat.h>
1613: /* ==========================================================================================*/
1615: /* create parallel to sequential scatter context */
1617: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
1621: /*@
1622: VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.
1624: Collective on VecScatter
1626: Input Parameters:
1627: + VecScatter - obtained with VecScatterCreateEmpty()
1628: . nsends -
1629: . sendSizes -
1630: . sendProcs -
1631: . sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
1632: . nrecvs - number of receives to expect
1633: . recvSizes -
1634: . recvProcs - processes who are sending to me
1635: . recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
1636: - bs - size of block
1638: Notes: sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
1639: to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.
1641: Probably does not handle sends to self properly. It should remove those from the counts that are used
1642: in allocating space inside of the from struct
1644: Level: intermediate
1646: @*/
1647: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
1648: {
1649: VecScatter_MPI_General *from, *to;
1650: PetscInt sendSize, recvSize;
1651: PetscInt n, i;
1652: PetscErrorCode ierr;
1654: /* allocate entire send scatter context */
1655: PetscNewLog(ctx,&to);
1656: to->n = nsends;
1657: for (n = 0, sendSize = 0; n < to->n; n++) sendSize += sendSizes[n];
1659: PetscMalloc1(to->n,&to->requests);
1660: PetscMalloc4(bs*sendSize,&to->values,sendSize,&to->indices,to->n+1,&to->starts,to->n,&to->procs);
1661: PetscMalloc2(PetscMax(to->n,nrecvs),&to->sstatus,PetscMax(to->n,nrecvs),&to->rstatus);
1663: to->starts[0] = 0;
1664: for (n = 0; n < to->n; n++) {
1665: if (sendSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
1666: to->starts[n+1] = to->starts[n] + sendSizes[n];
1667: to->procs[n] = sendProcs[n];
1668: for (i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) to->indices[i] = sendIdx[i];
1669: }
1670: ctx->todata = (void*) to;
1672: /* allocate entire receive scatter context */
1673: PetscNewLog(ctx,&from);
1674: from->n = nrecvs;
1675: for (n = 0, recvSize = 0; n < from->n; n++) recvSize += recvSizes[n];
1677: PetscMalloc1(from->n,&from->requests);
1678: PetscMalloc4(bs*recvSize,&from->values,recvSize,&from->indices,from->n+1,&from->starts,from->n,&from->procs);
1680: from->starts[0] = 0;
1681: for (n = 0; n < from->n; n++) {
1682: if (recvSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
1683: from->starts[n+1] = from->starts[n] + recvSizes[n];
1684: from->procs[n] = recvProcs[n];
1685: for (i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) from->indices[i] = recvIdx[i];
1686: }
1687: ctx->fromdata = (void*)from;
1689: /* No local scatter optimization */
1690: from->local.n = 0;
1691: from->local.vslots = 0;
1692: to->local.n = 0;
1693: to->local.vslots = 0;
1694: from->local.nonmatching_computed = PETSC_FALSE;
1695: from->local.n_nonmatching = 0;
1696: from->local.slots_nonmatching = 0;
1697: to->local.nonmatching_computed = PETSC_FALSE;
1698: to->local.n_nonmatching = 0;
1699: to->local.slots_nonmatching = 0;
1701: from->type = VEC_SCATTER_MPI_GENERAL;
1702: to->type = VEC_SCATTER_MPI_GENERAL;
1703: from->bs = bs;
1704: to->bs = bs;
1705: VecScatterCreateCommon_PtoS(from, to, ctx);
1707: /* mark lengths as negative so it won't check local vector lengths */
1708: ctx->from_n = ctx->to_n = -1;
1709: return(0);
1710: }
1712: /*
1713: bs indicates how many elements there are in each block. Normally this would be 1.
1715: contains check that PetscMPIInt can handle the sizes needed
1716: */
1719: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
1720: {
1721: VecScatter_MPI_General *from,*to;
1722: PetscMPIInt size,rank,imdex,tag,n;
1723: PetscInt *source = NULL,*owners = NULL;
1724: PetscInt *lowner = NULL,*start = NULL,lengthy,lengthx;
1725: PetscMPIInt *nprocs = NULL,nrecvs;
1726: PetscInt i,j,idx,nsends;
1727: PetscInt *owner = NULL,*starts = NULL,count,slen;
1728: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
1729: PetscMPIInt *onodes1,*olengths1;
1730: MPI_Comm comm;
1731: MPI_Request *send_waits = NULL,*recv_waits = NULL;
1732: MPI_Status recv_status,*send_status;
1733: PetscErrorCode ierr;
1736: PetscObjectGetNewTag((PetscObject)ctx,&tag);
1737: PetscObjectGetComm((PetscObject)xin,&comm);
1738: MPI_Comm_rank(comm,&rank);
1739: MPI_Comm_size(comm,&size);
1740: owners = xin->map->range;
1741: VecGetSize(yin,&lengthy);
1742: VecGetSize(xin,&lengthx);
1744: /* first count number of contributors to each processor */
1745: PetscMalloc2(size,&nprocs,nx,&owner);
1746: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
1748: j = 0;
1749: nsends = 0;
1750: for (i=0; i<nx; i++) {
1751: idx = bs*inidx[i];
1752: if (idx < owners[j]) j = 0;
1753: for (; j<size; j++) {
1754: if (idx < owners[j+1]) {
1755: if (!nprocs[j]++) nsends++;
1756: owner[i] = j;
1757: break;
1758: }
1759: }
1760: if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
1761: }
1763: nprocslocal = nprocs[rank];
1764: nprocs[rank] = 0;
1765: if (nprocslocal) nsends--;
1766: /* inform other processors of number of messages and max length*/
1767: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
1768: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
1769: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
1770: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
1772: /* post receives: */
1773: PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
1774: count = 0;
1775: for (i=0; i<nrecvs; i++) {
1776: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
1777: count += olengths1[i];
1778: }
1780: /* do sends:
1781: 1) starts[i] gives the starting index in svalues for stuff going to
1782: the ith processor
1783: */
1784: PetscMalloc3(nx,&svalues,nsends,&send_waits,size+1,&starts);
1786: starts[0] = 0;
1787: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
1788: for (i=0; i<nx; i++) {
1789: if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
1790: }
1791: starts[0] = 0;
1792: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
1793: count = 0;
1794: for (i=0; i<size; i++) {
1795: if (nprocs[i]) {
1796: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
1797: }
1798: }
1800: /* wait on receives */
1801: count = nrecvs;
1802: slen = 0;
1803: while (count) {
1804: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1805: /* unpack receives into our local space */
1806: MPI_Get_count(&recv_status,MPIU_INT,&n);
1807: slen += n;
1808: count--;
1809: }
1811: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
1813: /* allocate entire send scatter context */
1814: PetscNewLog(ctx,&to);
1815: to->n = nrecvs;
1817: PetscMalloc1(nrecvs,&to->requests);
1818: PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);
1819: PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);
1821: ctx->todata = (void*)to;
1822: to->starts[0] = 0;
1824: if (nrecvs) {
1825: /* move the data into the send scatter */
1826: base = owners[rank];
1827: rsvalues = rvalues;
1828: for (i=0; i<nrecvs; i++) {
1829: to->starts[i+1] = to->starts[i] + olengths1[i];
1830: to->procs[i] = onodes1[i];
1831: values = rsvalues;
1832: rsvalues += olengths1[i];
1833: for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
1834: }
1835: }
1836: PetscFree(olengths1);
1837: PetscFree(onodes1);
1838: PetscFree3(rvalues,source,recv_waits);
1840: /* allocate entire receive scatter context */
1841: PetscNewLog(ctx,&from);
1842: from->n = nsends;
1844: PetscMalloc1(nsends,&from->requests);
1845: PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
1846: ctx->fromdata = (void*)from;
1848: /* move data into receive scatter */
1849: PetscMalloc2(size,&lowner,nsends+1,&start);
1850: count = 0; from->starts[0] = start[0] = 0;
1851: for (i=0; i<size; i++) {
1852: if (nprocs[i]) {
1853: lowner[i] = count;
1854: from->procs[count++] = i;
1855: from->starts[count] = start[count] = start[count-1] + nprocs[i];
1856: }
1857: }
1859: for (i=0; i<nx; i++) {
1860: if (owner[i] != rank) {
1861: from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
1862: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1863: }
1864: }
1865: PetscFree2(lowner,start);
1866: PetscFree2(nprocs,owner);
1868: /* wait on sends */
1869: if (nsends) {
1870: PetscMalloc1(nsends,&send_status);
1871: MPI_Waitall(nsends,send_waits,send_status);
1872: PetscFree(send_status);
1873: }
1874: PetscFree3(svalues,send_waits,starts);
1876: if (nprocslocal) {
1877: PetscInt nt = from->local.n = to->local.n = nprocslocal;
1878: /* we have a scatter to ourselves */
1879: PetscMalloc1(nt,&to->local.vslots);
1880: PetscMalloc1(nt,&from->local.vslots);
1881: nt = 0;
1882: for (i=0; i<nx; i++) {
1883: idx = bs*inidx[i];
1884: if (idx >= owners[rank] && idx < owners[rank+1]) {
1885: to->local.vslots[nt] = idx - owners[rank];
1886: from->local.vslots[nt++] = bs*inidy[i];
1887: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1888: }
1889: }
1890: } else {
1891: from->local.n = 0;
1892: from->local.vslots = 0;
1893: to->local.n = 0;
1894: to->local.vslots = 0;
1895: }
1897: from->local.nonmatching_computed = PETSC_FALSE;
1898: from->local.n_nonmatching = 0;
1899: from->local.slots_nonmatching = 0;
1900: to->local.nonmatching_computed = PETSC_FALSE;
1901: to->local.n_nonmatching = 0;
1902: to->local.slots_nonmatching = 0;
1904: from->type = VEC_SCATTER_MPI_GENERAL;
1905: to->type = VEC_SCATTER_MPI_GENERAL;
1906: from->bs = bs;
1907: to->bs = bs;
1909: VecScatterCreateCommon_PtoS(from,to,ctx);
1910: return(0);
1911: }
1913: /*
1914: bs indicates how many elements there are in each block. Normally this would be 1.
1915: */
1918: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
1919: {
1920: MPI_Comm comm;
1921: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
1922: PetscInt bs = to->bs;
1923: PetscMPIInt size;
1924: PetscInt i, n;
1928: PetscObjectGetComm((PetscObject)ctx,&comm);
1929: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
1930: ctx->destroy = VecScatterDestroy_PtoP;
1932: ctx->reproduce = PETSC_FALSE;
1933: to->sendfirst = PETSC_FALSE;
1934: PetscOptionsGetBool(NULL,"-vecscatter_reproduce",&ctx->reproduce,NULL);
1935: PetscOptionsGetBool(NULL,"-vecscatter_sendfirst",&to->sendfirst,NULL);
1936: from->sendfirst = to->sendfirst;
1938: MPI_Comm_size(comm,&size);
1939: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
1940: to->contiq = PETSC_FALSE;
1941: n = from->starts[from->n];
1942: from->contiq = PETSC_TRUE;
1943: for (i=1; i<n; i++) {
1944: if (from->indices[i] != from->indices[i-1] + bs) {
1945: from->contiq = PETSC_FALSE;
1946: break;
1947: }
1948: }
1950: to->use_alltoallv = PETSC_FALSE;
1951: PetscOptionsGetBool(NULL,"-vecscatter_alltoall",&to->use_alltoallv,NULL);
1952: from->use_alltoallv = to->use_alltoallv;
1953: if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
1954: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1955: if (to->use_alltoallv) {
1956: to->use_alltoallw = PETSC_FALSE;
1957: PetscOptionsGetBool(NULL,"-vecscatter_nopack",&to->use_alltoallw,NULL);
1958: }
1959: from->use_alltoallw = to->use_alltoallw;
1960: if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
1961: #endif
1963: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1964: to->use_window = PETSC_FALSE;
1965: PetscOptionsGetBool(NULL,"-vecscatter_window",&to->use_window,NULL);
1966: from->use_window = to->use_window;
1967: #endif
1969: if (to->use_alltoallv) {
1971: PetscMalloc2(size,&to->counts,size,&to->displs);
1972: PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
1973: for (i=0; i<to->n; i++) to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);
1975: to->displs[0] = 0;
1976: for (i=1; i<size; i++) to->displs[i] = to->displs[i-1] + to->counts[i-1];
1978: PetscMalloc2(size,&from->counts,size,&from->displs);
1979: PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
1980: for (i=0; i<from->n; i++) from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1981: from->displs[0] = 0;
1982: for (i=1; i<size; i++) from->displs[i] = from->displs[i-1] + from->counts[i-1];
1984: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1985: if (to->use_alltoallw) {
1986: PetscMPIInt mpibs, mpilen;
1988: ctx->packtogether = PETSC_FALSE;
1989: PetscMPIIntCast(bs,&mpibs);
1990: PetscMalloc3(size,&to->wcounts,size,&to->wdispls,size,&to->types);
1991: PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
1992: PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
1993: for (i=0; i<size; i++) to->types[i] = MPIU_SCALAR;
1995: for (i=0; i<to->n; i++) {
1996: to->wcounts[to->procs[i]] = 1;
1997: PetscMPIIntCast(to->starts[i+1]-to->starts[i],&mpilen);
1998: MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
1999: MPI_Type_commit(to->types+to->procs[i]);
2000: }
2001: PetscMalloc3(size,&from->wcounts,size,&from->wdispls,size,&from->types);
2002: PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
2003: PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
2004: for (i=0; i<size; i++) from->types[i] = MPIU_SCALAR;
2006: if (from->contiq) {
2007: PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
2008: for (i=0; i<from->n; i++) from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2010: if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
2011: for (i=1; i<from->n; i++) from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];
2013: } else {
2014: for (i=0; i<from->n; i++) {
2015: from->wcounts[from->procs[i]] = 1;
2016: PetscMPIIntCast(from->starts[i+1]-from->starts[i],&mpilen);
2017: MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
2018: MPI_Type_commit(from->types+from->procs[i]);
2019: }
2020: }
2021: } else ctx->copy = VecScatterCopy_PtoP_AllToAll;
2023: #else
2024: to->use_alltoallw = PETSC_FALSE;
2025: from->use_alltoallw = PETSC_FALSE;
2026: ctx->copy = VecScatterCopy_PtoP_AllToAll;
2027: #endif
2028: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2029: } else if (to->use_window) {
2030: PetscMPIInt temptag,winsize;
2031: MPI_Request *request;
2032: MPI_Status *status;
2034: PetscObjectGetNewTag((PetscObject)ctx,&temptag);
2035: winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
2036: MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
2037: PetscMalloc1(to->n,&to->winstarts);
2038: PetscMalloc2(to->n,&request,to->n,&status);
2039: for (i=0; i<to->n; i++) {
2040: MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
2041: }
2042: for (i=0; i<from->n; i++) {
2043: MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
2044: }
2045: MPI_Waitall(to->n,request,status);
2046: PetscFree2(request,status);
2048: winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
2049: MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
2050: PetscMalloc1(from->n,&from->winstarts);
2051: PetscMalloc2(from->n,&request,from->n,&status);
2052: for (i=0; i<from->n; i++) {
2053: MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
2054: }
2055: for (i=0; i<to->n; i++) {
2056: MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
2057: }
2058: MPI_Waitall(from->n,request,status);
2059: PetscFree2(request,status);
2060: #endif
2061: } else {
2062: PetscBool use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
2063: PetscInt *sstarts = to->starts, *rstarts = from->starts;
2064: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2065: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2066: MPI_Request *rev_swaits,*rev_rwaits;
2067: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2069: /* allocate additional wait variables for the "reverse" scatter */
2070: PetscMalloc1(to->n,&rev_rwaits);
2071: PetscMalloc1(from->n,&rev_swaits);
2072: to->rev_requests = rev_rwaits;
2073: from->rev_requests = rev_swaits;
2075: /* Register the receives that you will use later (sends for scatter reverse) */
2076: PetscOptionsGetBool(NULL,"-vecscatter_rsend",&use_rsend,NULL);
2077: PetscOptionsGetBool(NULL,"-vecscatter_ssend",&use_ssend,NULL);
2078: if (use_rsend) {
2079: PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
2080: to->use_readyreceiver = PETSC_TRUE;
2081: from->use_readyreceiver = PETSC_TRUE;
2082: } else {
2083: to->use_readyreceiver = PETSC_FALSE;
2084: from->use_readyreceiver = PETSC_FALSE;
2085: }
2086: if (use_ssend) {
2087: PetscInfo(ctx,"Using VecScatter Ssend mode\n");
2088: }
2090: for (i=0; i<from->n; i++) {
2091: if (use_rsend) {
2092: MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2093: } else if (use_ssend) {
2094: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2095: } else {
2096: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2097: }
2098: }
2100: for (i=0; i<to->n; i++) {
2101: if (use_rsend) {
2102: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2103: } else if (use_ssend) {
2104: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2105: } else {
2106: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2107: }
2108: }
2109: /* Register receives for scatter and reverse */
2110: for (i=0; i<from->n; i++) {
2111: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2112: }
2113: for (i=0; i<to->n; i++) {
2114: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2115: }
2116: if (use_rsend) {
2117: if (to->n) {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
2118: if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
2119: MPI_Barrier(comm);
2120: }
2122: ctx->copy = VecScatterCopy_PtoP_X;
2123: }
2124: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
2126: #if defined(PETSC_USE_DEBUG)
2127: MPI_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2128: MPI_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2129: if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2130: #endif
2132: switch (bs) {
2133: case 12:
2134: ctx->begin = VecScatterBegin_12;
2135: ctx->end = VecScatterEnd_12;
2136: break;
2137: case 8:
2138: ctx->begin = VecScatterBegin_8;
2139: ctx->end = VecScatterEnd_8;
2140: break;
2141: case 7:
2142: ctx->begin = VecScatterBegin_7;
2143: ctx->end = VecScatterEnd_7;
2144: break;
2145: case 6:
2146: ctx->begin = VecScatterBegin_6;
2147: ctx->end = VecScatterEnd_6;
2148: break;
2149: case 5:
2150: ctx->begin = VecScatterBegin_5;
2151: ctx->end = VecScatterEnd_5;
2152: break;
2153: case 4:
2154: ctx->begin = VecScatterBegin_4;
2155: ctx->end = VecScatterEnd_4;
2156: break;
2157: case 3:
2158: ctx->begin = VecScatterBegin_3;
2159: ctx->end = VecScatterEnd_3;
2160: break;
2161: case 2:
2162: ctx->begin = VecScatterBegin_2;
2163: ctx->end = VecScatterEnd_2;
2164: break;
2165: case 1:
2166: ctx->begin = VecScatterBegin_1;
2167: ctx->end = VecScatterEnd_1;
2168: break;
2169: default:
2170: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Blocksize not supported");
2171: }
2172: ctx->view = VecScatterView_MPI;
2173: /* Check if the local scatter is actually a copy; important special case */
2174: if (to->local.n) {
2175: VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2176: }
2177: return(0);
2178: }
2182: /* ------------------------------------------------------------------------------------*/
2183: /*
2184: Scatter from local Seq vectors to a parallel vector.
2185: Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2186: reverses the result.
2187: */
2190: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2191: {
2192: PetscErrorCode ierr;
2193: MPI_Request *waits;
2194: VecScatter_MPI_General *to,*from;
2197: VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2198: to = (VecScatter_MPI_General*)ctx->fromdata;
2199: from = (VecScatter_MPI_General*)ctx->todata;
2200: ctx->todata = (void*)to;
2201: ctx->fromdata = (void*)from;
2202: /* these two are special, they are ALWAYS stored in to struct */
2203: to->sstatus = from->sstatus;
2204: to->rstatus = from->rstatus;
2206: from->sstatus = 0;
2207: from->rstatus = 0;
2209: waits = from->rev_requests;
2210: from->rev_requests = from->requests;
2211: from->requests = waits;
2212: waits = to->rev_requests;
2213: to->rev_requests = to->requests;
2214: to->requests = waits;
2215: return(0);
2216: }
2218: /* ---------------------------------------------------------------------------------*/
2221: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2222: {
2224: PetscMPIInt size,rank,tag,imdex,n;
2225: PetscInt *owners = xin->map->range;
2226: PetscMPIInt *nprocs = NULL;
2227: PetscInt i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2228: PetscInt *owner = NULL,*starts = NULL,count,slen;
2229: PetscInt *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2230: PetscMPIInt *onodes1,*olengths1,nrecvs;
2231: MPI_Comm comm;
2232: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2233: MPI_Status recv_status,*send_status = NULL;
2234: PetscBool duplicate = PETSC_FALSE;
2235: #if defined(PETSC_USE_DEBUG)
2236: PetscBool found = PETSC_FALSE;
2237: #endif
2240: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2241: PetscObjectGetComm((PetscObject)xin,&comm);
2242: MPI_Comm_size(comm,&size);
2243: MPI_Comm_rank(comm,&rank);
2244: if (size == 1) {
2245: VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2246: return(0);
2247: }
2249: /*
2250: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2251: They then call the StoPScatterCreate()
2252: */
2253: /* first count number of contributors to each processor */
2254: PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2255: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2257: lastidx = -1;
2258: j = 0;
2259: for (i=0; i<nx; i++) {
2260: /* if indices are NOT locally sorted, need to start search at the beginning */
2261: if (lastidx > (idx = bs*inidx[i])) j = 0;
2262: lastidx = idx;
2263: for (; j<size; j++) {
2264: if (idx >= owners[j] && idx < owners[j+1]) {
2265: nprocs[j]++;
2266: owner[i] = j;
2267: #if defined(PETSC_USE_DEBUG)
2268: found = PETSC_TRUE;
2269: #endif
2270: break;
2271: }
2272: }
2273: #if defined(PETSC_USE_DEBUG)
2274: if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2275: found = PETSC_FALSE;
2276: #endif
2277: }
2278: nsends = 0;
2279: for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2281: /* inform other processors of number of messages and max length*/
2282: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2283: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2284: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2285: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2287: /* post receives: */
2288: PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);
2290: count = 0;
2291: for (i=0; i<nrecvs; i++) {
2292: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2293: count += olengths1[i];
2294: }
2295: PetscFree(onodes1);
2297: /* do sends:
2298: 1) starts[i] gives the starting index in svalues for stuff going to
2299: the ith processor
2300: */
2301: starts[0]= 0;
2302: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2303: for (i=0; i<nx; i++) {
2304: svalues[2*starts[owner[i]]] = bs*inidx[i];
2305: svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2306: }
2308: starts[0] = 0;
2309: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2310: count = 0;
2311: for (i=0; i<size; i++) {
2312: if (nprocs[i]) {
2313: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2314: count++;
2315: }
2316: }
2317: PetscFree3(nprocs,owner,starts);
2319: /* wait on receives */
2320: count = nrecvs;
2321: slen = 0;
2322: while (count) {
2323: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2324: /* unpack receives into our local space */
2325: MPI_Get_count(&recv_status,MPIU_INT,&n);
2326: slen += n/2;
2327: count--;
2328: }
2329: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2331: PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2332: base = owners[rank];
2333: count = 0;
2334: rsvalues = rvalues;
2335: for (i=0; i<nrecvs; i++) {
2336: values = rsvalues;
2337: rsvalues += 2*olengths1[i];
2338: for (j=0; j<olengths1[i]; j++) {
2339: local_inidx[count] = values[2*j] - base;
2340: local_inidy[count++] = values[2*j+1];
2341: }
2342: }
2343: PetscFree(olengths1);
2345: /* wait on sends */
2346: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2347: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2349: /*
2350: should sort and remove duplicates from local_inidx,local_inidy
2351: */
2353: #if defined(do_it_slow)
2354: /* sort on the from index */
2355: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2356: start = 0;
2357: while (start < slen) {
2358: count = start+1;
2359: last = local_inidx[start];
2360: while (count < slen && last == local_inidx[count]) count++;
2361: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2362: /* sort on to index */
2363: PetscSortInt(count-start,local_inidy+start);
2364: }
2365: /* remove duplicates; not most efficient way, but probably good enough */
2366: i = start;
2367: while (i < count-1) {
2368: if (local_inidy[i] != local_inidy[i+1]) i++;
2369: else { /* found a duplicate */
2370: duplicate = PETSC_TRUE;
2371: for (j=i; j<slen-1; j++) {
2372: local_inidx[j] = local_inidx[j+1];
2373: local_inidy[j] = local_inidy[j+1];
2374: }
2375: slen--;
2376: count--;
2377: }
2378: }
2379: start = count;
2380: }
2381: #endif
2382: if (duplicate) {
2383: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2384: }
2385: VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2386: PetscFree2(local_inidx,local_inidy);
2387: return(0);
2388: }