Actual source code: vpscat.c
1: #define PETSCVEC_DLL
3: /*
4: Defines parallel vector scatters.
5: */
7: #include private/isimpl.h
8: #include private/vecimpl.h
9: #include ../src/vec/vec/impls/dvecimpl.h
10: #include ../src/vec/vec/impls/mpi/pvecimpl.h
14: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
15: {
16: VecScatter_MPI_General *to=(VecScatter_MPI_General*)ctx->todata;
17: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
18: PetscErrorCode ierr;
19: PetscInt i;
20: PetscMPIInt rank;
21: PetscViewerFormat format;
22: PetscTruth iascii;
25: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
26: if (iascii) {
27: MPI_Comm_rank(((PetscObject)ctx)->comm,&rank);
28: PetscViewerGetFormat(viewer,&format);
29: if (format == PETSC_VIEWER_ASCII_INFO) {
30: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
32: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
33: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
34: itmp = to->starts[to->n+1];
35: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
36: itmp = from->starts[from->n+1];
37: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
38: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,((PetscObject)ctx)->comm);
40: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
41: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
42: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
43: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
44: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
45: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
47: } else {
48: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
49: if (to->n) {
50: for (i=0; i<to->n; i++){
51: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %D\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
52: }
53: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
54: for (i=0; i<to->starts[to->n]; i++){
55: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,to->indices[i]);
56: }
57: }
59: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
60: if (from->n) {
61: for (i=0; i<from->n; i++){
62: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %D\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
63: }
65: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
66: for (i=0; i<from->starts[from->n]; i++){
67: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,from->indices[i]);
68: }
69: }
70: if (to->local.n) {
71: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Indices for local part of scatter\n",rank);
72: for (i=0; i<to->local.n; i++){
73: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
74: }
75: }
77: PetscViewerFlush(viewer);
78: }
79: } else {
80: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for this scatter",((PetscObject)viewer)->type_name);
81: }
82: return(0);
83: }
85: /* -----------------------------------------------------------------------------------*/
86: /*
87: The next routine determines what part of the local part of the scatter is an
88: exact copy of values into their current location. We check this here and
89: then know that we need not perform that portion of the scatter when the vector is
90: scattering to itself with INSERT_VALUES.
92: This is currently not used but would speed up, for example DALocalToLocalBegin/End()
94: */
97: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
98: {
99: PetscInt n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
101: PetscInt *nto_slots,*nfrom_slots,j = 0;
102:
104: for (i=0; i<n; i++) {
105: if (to_slots[i] != from_slots[i]) n_nonmatching++;
106: }
108: if (!n_nonmatching) {
109: to->nonmatching_computed = PETSC_TRUE;
110: to->n_nonmatching = from->n_nonmatching = 0;
111: PetscInfo1(scatter,"Reduced %D to 0\n", n);
112: } else if (n_nonmatching == n) {
113: to->nonmatching_computed = PETSC_FALSE;
114: PetscInfo(scatter,"All values non-matching\n");
115: } else {
116: to->nonmatching_computed= PETSC_TRUE;
117: to->n_nonmatching = from->n_nonmatching = n_nonmatching;
118: PetscMalloc(n_nonmatching*sizeof(PetscInt),&nto_slots);
119: PetscMalloc(n_nonmatching*sizeof(PetscInt),&nfrom_slots);
120: to->slots_nonmatching = nto_slots;
121: from->slots_nonmatching = nfrom_slots;
122: for (i=0; i<n; i++) {
123: if (to_slots[i] != from_slots[i]) {
124: nto_slots[j] = to_slots[i];
125: nfrom_slots[j] = from_slots[i];
126: j++;
127: }
128: }
129: PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
130: }
131: return(0);
132: }
134: /* --------------------------------------------------------------------------------------*/
136: /* -------------------------------------------------------------------------------------*/
139: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
140: {
141: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
142: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
143: PetscErrorCode ierr;
144: PetscInt i;
147: if (to->use_readyreceiver) {
148: /*
149: Since we have already posted sends we must cancel them before freeing
150: the requests
151: */
152: for (i=0; i<from->n; i++) {
153: MPI_Cancel(from->requests+i);
154: }
155: for (i=0; i<to->n; i++) {
156: MPI_Cancel(to->rev_requests+i);
157: }
158: }
160: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
161: if (to->use_alltoallw) {
162: PetscFree3(to->wcounts,to->wdispls,to->types);
163: PetscFree3(from->wcounts,from->wdispls,from->types);
164: }
165: #endif
167: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
168: if (to->use_window) {
169: MPI_Win_free(&from->window);
170: MPI_Win_free(&to->window);
171: }
172: #endif
174: if (to->use_alltoallv) {
175: PetscFree2(to->counts,to->displs);
176: PetscFree2(from->counts,from->displs);
177: }
179: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
180: /*
181: IBM's PE version of MPI has a bug where freeing these guys will screw up later
182: message passing.
183: */
184: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
185: if (!to->use_alltoallv && !to->use_window) { /* currently the to->requests etc are ALWAYS allocated even if not used */
186: if (to->requests) {
187: for (i=0; i<to->n; i++) {
188: MPI_Request_free(to->requests + i);
189: }
190: }
191: if (to->rev_requests) {
192: for (i=0; i<to->n; i++) {
193: MPI_Request_free(to->rev_requests + i);
194: }
195: }
196: }
197: /*
198: MPICH could not properly cancel requests thus with ready receiver mode we
199: cannot free the requests. It may be fixed now, if not then put the following
200: code inside a if !to->use_readyreceiver) {
201: */
202: if (!to->use_alltoallv && !to->use_window) { /* currently the from->requests etc are ALWAYS allocated even if not used */
203: if (from->requests) {
204: for (i=0; i<from->n; i++) {
205: MPI_Request_free(from->requests + i);
206: }
207: }
209: if (from->rev_requests) {
210: for (i=0; i<from->n; i++) {
211: MPI_Request_free(from->rev_requests + i);
212: }
213: }
214: }
215: #endif
217: PetscFree(to->local.vslots);
218: PetscFree(from->local.vslots);
219: PetscFree2(to->counts,to->displs);
220: PetscFree2(from->counts,from->displs);
221: PetscFree(to->local.slots_nonmatching);
222: PetscFree(from->local.slots_nonmatching);
223: PetscFree(to->rev_requests);
224: PetscFree(from->rev_requests);
225: PetscFree(to->requests);
226: PetscFree(from->requests);
227: PetscFree4(to->values,to->indices,to->starts,to->procs);
228: PetscFree2(to->sstatus,to->rstatus);
229: PetscFree4(from->values,from->indices,from->starts,from->procs);
230: PetscFree(from);
231: PetscFree(to);
232: PetscHeaderDestroy(ctx);
233: return(0);
234: }
238: /* --------------------------------------------------------------------------------------*/
239: /*
240: Special optimization to see if the local part of the scatter is actually
241: a copy. The scatter routines call PetscMemcpy() instead.
242:
243: */
246: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
247: {
248: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
249: PetscInt to_start,from_start;
253: to_start = to_slots[0];
254: from_start = from_slots[0];
256: for (i=1; i<n; i++) {
257: to_start += bs;
258: from_start += bs;
259: if (to_slots[i] != to_start) return(0);
260: if (from_slots[i] != from_start) return(0);
261: }
262: to->is_copy = PETSC_TRUE;
263: to->copy_start = to_slots[0];
264: to->copy_length = bs*sizeof(PetscScalar)*n;
265: from->is_copy = PETSC_TRUE;
266: from->copy_start = from_slots[0];
267: from->copy_length = bs*sizeof(PetscScalar)*n;
268: PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
269: return(0);
270: }
272: /* --------------------------------------------------------------------------------------*/
276: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
277: {
278: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
279: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
280: PetscErrorCode ierr;
281: PetscInt ny,bs = in_from->bs;
284: out->begin = in->begin;
285: out->end = in->end;
286: out->copy = in->copy;
287: out->destroy = in->destroy;
288: out->view = in->view;
290: /* allocate entire send scatter context */
291: PetscNewLog(out,VecScatter_MPI_General,&out_to);
292: PetscNewLog(out,VecScatter_MPI_General,&out_from);
294: ny = in_to->starts[in_to->n];
295: out_to->n = in_to->n;
296: out_to->type = in_to->type;
297: out_to->sendfirst = in_to->sendfirst;
299: PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
300: PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
301: PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
302: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
303: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
304: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
305:
306: out->todata = (void*)out_to;
307: out_to->local.n = in_to->local.n;
308: out_to->local.nonmatching_computed = PETSC_FALSE;
309: out_to->local.n_nonmatching = 0;
310: out_to->local.slots_nonmatching = 0;
311: if (in_to->local.n) {
312: PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
313: PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
314: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
315: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
316: } else {
317: out_to->local.vslots = 0;
318: out_from->local.vslots = 0;
319: }
321: /* allocate entire receive context */
322: out_from->type = in_from->type;
323: ny = in_from->starts[in_from->n];
324: out_from->n = in_from->n;
325: out_from->sendfirst = in_from->sendfirst;
327: PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
328: PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
329: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
330: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
331: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
332: out->fromdata = (void*)out_from;
333: out_from->local.n = in_from->local.n;
334: out_from->local.nonmatching_computed = PETSC_FALSE;
335: out_from->local.n_nonmatching = 0;
336: out_from->local.slots_nonmatching = 0;
338: /*
339: set up the request arrays for use with isend_init() and irecv_init()
340: */
341: {
342: PetscMPIInt tag;
343: MPI_Comm comm;
344: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
345: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
346: PetscInt i;
347: PetscTruth flg;
348: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
349: MPI_Request *rev_swaits,*rev_rwaits;
350: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
352: PetscMalloc(in_to->n*sizeof(MPI_Request),&out_to->rev_requests);
353: PetscMalloc(in_from->n*sizeof(MPI_Request),&out_from->rev_requests);
355: rev_rwaits = out_to->rev_requests;
356: rev_swaits = out_from->rev_requests;
358: out_from->bs = out_to->bs = bs;
359: tag = ((PetscObject)out)->tag;
360: comm = ((PetscObject)out)->comm;
362: /* Register the receives that you will use later (sends for scatter reverse) */
363: for (i=0; i<out_from->n; i++) {
364: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
365: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
366: }
368: flg = PETSC_FALSE;
369: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_rsend",&flg,PETSC_NULL);
370: if (flg) {
371: out_to->use_readyreceiver = PETSC_TRUE;
372: out_from->use_readyreceiver = PETSC_TRUE;
373: for (i=0; i<out_to->n; i++) {
374: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
375: }
376: if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
377: MPI_Barrier(comm);
378: PetscInfo(in,"Using VecScatter ready receiver mode\n");
379: } else {
380: out_to->use_readyreceiver = PETSC_FALSE;
381: out_from->use_readyreceiver = PETSC_FALSE;
382: flg = PETSC_FALSE;
383: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_ssend",&flg,PETSC_NULL);
384: if (flg) {
385: PetscInfo(in,"Using VecScatter Ssend mode\n");
386: }
387: for (i=0; i<out_to->n; i++) {
388: if (!flg) {
389: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
390: } else {
391: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
392: }
393: }
394: }
395: /* Register receives for scatter reverse */
396: for (i=0; i<out_to->n; i++) {
397: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
398: }
399: }
401: return(0);
402: }
406: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
407: {
408: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
409: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
410: PetscErrorCode ierr;
411: PetscInt ny,bs = in_from->bs;
412: PetscMPIInt size;
415: MPI_Comm_size(((PetscObject)in)->comm,&size);
416: out->begin = in->begin;
417: out->end = in->end;
418: out->copy = in->copy;
419: out->destroy = in->destroy;
420: out->view = in->view;
422: /* allocate entire send scatter context */
423: PetscNewLog(out,VecScatter_MPI_General,&out_to);
424: PetscNewLog(out,VecScatter_MPI_General,&out_from);
426: ny = in_to->starts[in_to->n];
427: out_to->n = in_to->n;
428: out_to->type = in_to->type;
429: out_to->sendfirst = in_to->sendfirst;
431: PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
432: PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
433: PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
434: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
435: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
436: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
437:
438: out->todata = (void*)out_to;
439: out_to->local.n = in_to->local.n;
440: out_to->local.nonmatching_computed = PETSC_FALSE;
441: out_to->local.n_nonmatching = 0;
442: out_to->local.slots_nonmatching = 0;
443: if (in_to->local.n) {
444: PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
445: PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
446: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
447: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
448: } else {
449: out_to->local.vslots = 0;
450: out_from->local.vslots = 0;
451: }
453: /* allocate entire receive context */
454: out_from->type = in_from->type;
455: ny = in_from->starts[in_from->n];
456: out_from->n = in_from->n;
457: out_from->sendfirst = in_from->sendfirst;
459: PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
460: PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
461: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
462: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
463: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
464: out->fromdata = (void*)out_from;
465: out_from->local.n = in_from->local.n;
466: out_from->local.nonmatching_computed = PETSC_FALSE;
467: out_from->local.n_nonmatching = 0;
468: out_from->local.slots_nonmatching = 0;
470: out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;
472: PetscMalloc2(size,PetscMPIInt,&out_to->counts,size,PetscMPIInt,&out_to->displs);
473: PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
474: PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));
476: PetscMalloc2(size,PetscMPIInt,&out_from->counts,size,PetscMPIInt,&out_from->displs);
477: PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
478: PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
479: return(0);
480: }
481: /* --------------------------------------------------------------------------------------------------
482: Packs and unpacks the message data into send or from receive buffers.
484: These could be generated automatically.
486: Fortran kernels etc. could be used.
487: */
488: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
489: {
490: PetscInt i;
491: for (i=0; i<n; i++) {
492: y[i] = x[indicesx[i]];
493: }
494: }
495: PETSC_STATIC_INLINE void UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
496: {
497: PetscInt i;
498: switch (addv) {
499: case INSERT_VALUES:
500: for (i=0; i<n; i++) {
501: y[indicesy[i]] = x[i];
502: }
503: break;
504: case ADD_VALUES:
505: for (i=0; i<n; i++) {
506: y[indicesy[i]] += x[i];
507: }
508: break;
509: #if !defined(PETSC_USE_COMPLEX)
510: case MAX_VALUES:
511: for (i=0; i<n; i++) {
512: y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
513: }
514: #else
515: case MAX_VALUES:
516: #endif
517: case NOT_SET_VALUES:
518: break;
519: }
520: }
522: PETSC_STATIC_INLINE void Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
523: {
524: PetscInt i;
525: switch (addv) {
526: case INSERT_VALUES:
527: for (i=0; i<n; i++) {
528: y[indicesy[i]] = x[indicesx[i]];
529: }
530: break;
531: case ADD_VALUES:
532: for (i=0; i<n; i++) {
533: y[indicesy[i]] += x[indicesx[i]];
534: }
535: break;
536: #if !defined(PETSC_USE_COMPLEX)
537: case MAX_VALUES:
538: for (i=0; i<n; i++) {
539: y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
540: }
541: #else
542: case MAX_VALUES:
543: #endif
544: case NOT_SET_VALUES:
545: break;
546: }
547: }
549: /* ----------------------------------------------------------------------------------------------- */
550: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
551: {
552: PetscInt i,idx;
554: for (i=0; i<n; i++) {
555: idx = *indicesx++;
556: y[0] = x[idx];
557: y[1] = x[idx+1];
558: y += 2;
559: }
560: }
561: PETSC_STATIC_INLINE void UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
562: {
563: PetscInt i,idy;
565: switch (addv) {
566: case INSERT_VALUES:
567: for (i=0; i<n; i++) {
568: idy = *indicesy++;
569: y[idy] = x[0];
570: y[idy+1] = x[1];
571: x += 2;
572: }
573: break;
574: case ADD_VALUES:
575: for (i=0; i<n; i++) {
576: idy = *indicesy++;
577: y[idy] += x[0];
578: y[idy+1] += x[1];
579: x += 2;
580: }
581: break;
582: #if !defined(PETSC_USE_COMPLEX)
583: case MAX_VALUES:
584: for (i=0; i<n; i++) {
585: idy = *indicesy++;
586: y[idy] = PetscMax(y[idy],x[0]);
587: y[idy+1] = PetscMax(y[idy+1],x[1]);
588: x += 2;
589: }
590: #else
591: case MAX_VALUES:
592: #endif
593: case NOT_SET_VALUES:
594: break;
595: }
596: }
598: PETSC_STATIC_INLINE void Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
599: {
600: PetscInt i,idx,idy;
602: switch (addv) {
603: case INSERT_VALUES:
604: for (i=0; i<n; i++) {
605: idx = *indicesx++;
606: idy = *indicesy++;
607: y[idy] = x[idx];
608: y[idy+1] = x[idx+1];
609: }
610: break;
611: case ADD_VALUES:
612: for (i=0; i<n; i++) {
613: idx = *indicesx++;
614: idy = *indicesy++;
615: y[idy] += x[idx];
616: y[idy+1] += x[idx+1];
617: }
618: break;
619: #if !defined(PETSC_USE_COMPLEX)
620: case MAX_VALUES:
621: for (i=0; i<n; i++) {
622: idx = *indicesx++;
623: idy = *indicesy++;
624: y[idy] = PetscMax(y[idy],x[idx]);
625: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
626: }
627: #else
628: case MAX_VALUES:
629: #endif
630: case NOT_SET_VALUES:
631: break;
632: }
633: }
634: /* ----------------------------------------------------------------------------------------------- */
635: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
636: {
637: PetscInt i,idx;
639: for (i=0; i<n; i++) {
640: idx = *indicesx++;
641: y[0] = x[idx];
642: y[1] = x[idx+1];
643: y[2] = x[idx+2];
644: y += 3;
645: }
646: }
647: PETSC_STATIC_INLINE void UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
648: {
649: PetscInt i,idy;
651: switch (addv) {
652: case INSERT_VALUES:
653: for (i=0; i<n; i++) {
654: idy = *indicesy++;
655: y[idy] = x[0];
656: y[idy+1] = x[1];
657: y[idy+2] = x[2];
658: x += 3;
659: }
660: break;
661: case ADD_VALUES:
662: for (i=0; i<n; i++) {
663: idy = *indicesy++;
664: y[idy] += x[0];
665: y[idy+1] += x[1];
666: y[idy+2] += x[2];
667: x += 3;
668: }
669: break;
670: #if !defined(PETSC_USE_COMPLEX)
671: case MAX_VALUES:
672: for (i=0; i<n; i++) {
673: idy = *indicesy++;
674: y[idy] = PetscMax(y[idy],x[0]);
675: y[idy+1] = PetscMax(y[idy+1],x[1]);
676: y[idy+2] = PetscMax(y[idy+2],x[2]);
677: x += 3;
678: }
679: #else
680: case MAX_VALUES:
681: #endif
682: case NOT_SET_VALUES:
683: break;
684: }
685: }
687: PETSC_STATIC_INLINE void Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
688: {
689: PetscInt i,idx,idy;
691: switch (addv) {
692: case INSERT_VALUES:
693: for (i=0; i<n; i++) {
694: idx = *indicesx++;
695: idy = *indicesy++;
696: y[idy] = x[idx];
697: y[idy+1] = x[idx+1];
698: y[idy+2] = x[idx+2];
699: }
700: break;
701: case ADD_VALUES:
702: for (i=0; i<n; i++) {
703: idx = *indicesx++;
704: idy = *indicesy++;
705: y[idy] += x[idx];
706: y[idy+1] += x[idx+1];
707: y[idy+2] += x[idx+2];
708: }
709: break;
710: #if !defined(PETSC_USE_COMPLEX)
711: case MAX_VALUES:
712: for (i=0; i<n; i++) {
713: idx = *indicesx++;
714: idy = *indicesy++;
715: y[idy] = PetscMax(y[idy],x[idx]);
716: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
717: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
718: }
719: #else
720: case MAX_VALUES:
721: #endif
722: case NOT_SET_VALUES:
723: break;
724: }
725: }
726: /* ----------------------------------------------------------------------------------------------- */
727: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
728: {
729: PetscInt i,idx;
731: for (i=0; i<n; i++) {
732: idx = *indicesx++;
733: y[0] = x[idx];
734: y[1] = x[idx+1];
735: y[2] = x[idx+2];
736: y[3] = x[idx+3];
737: y += 4;
738: }
739: }
740: PETSC_STATIC_INLINE void UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
741: {
742: PetscInt i,idy;
744: switch (addv) {
745: case INSERT_VALUES:
746: for (i=0; i<n; i++) {
747: idy = *indicesy++;
748: y[idy] = x[0];
749: y[idy+1] = x[1];
750: y[idy+2] = x[2];
751: y[idy+3] = x[3];
752: x += 4;
753: }
754: break;
755: case ADD_VALUES:
756: for (i=0; i<n; i++) {
757: idy = *indicesy++;
758: y[idy] += x[0];
759: y[idy+1] += x[1];
760: y[idy+2] += x[2];
761: y[idy+3] += x[3];
762: x += 4;
763: }
764: break;
765: #if !defined(PETSC_USE_COMPLEX)
766: case MAX_VALUES:
767: for (i=0; i<n; i++) {
768: idy = *indicesy++;
769: y[idy] = PetscMax(y[idy],x[0]);
770: y[idy+1] = PetscMax(y[idy+1],x[1]);
771: y[idy+2] = PetscMax(y[idy+2],x[2]);
772: y[idy+3] = PetscMax(y[idy+3],x[3]);
773: x += 4;
774: }
775: #else
776: case MAX_VALUES:
777: #endif
778: case NOT_SET_VALUES:
779: break;
780: }
781: }
783: PETSC_STATIC_INLINE void Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
784: {
785: PetscInt i,idx,idy;
787: switch (addv) {
788: case INSERT_VALUES:
789: for (i=0; i<n; i++) {
790: idx = *indicesx++;
791: idy = *indicesy++;
792: y[idy] = x[idx];
793: y[idy+1] = x[idx+1];
794: y[idy+2] = x[idx+2];
795: y[idy+3] = x[idx+3];
796: }
797: break;
798: case ADD_VALUES:
799: for (i=0; i<n; i++) {
800: idx = *indicesx++;
801: idy = *indicesy++;
802: y[idy] += x[idx];
803: y[idy+1] += x[idx+1];
804: y[idy+2] += x[idx+2];
805: y[idy+3] += x[idx+3];
806: }
807: break;
808: #if !defined(PETSC_USE_COMPLEX)
809: case MAX_VALUES:
810: for (i=0; i<n; i++) {
811: idx = *indicesx++;
812: idy = *indicesy++;
813: y[idy] = PetscMax(y[idy],x[idx]);
814: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
815: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
816: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
817: }
818: #else
819: case MAX_VALUES:
820: #endif
821: case NOT_SET_VALUES:
822: break;
823: }
824: }
825: /* ----------------------------------------------------------------------------------------------- */
826: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
827: {
828: PetscInt i,idx;
830: for (i=0; i<n; i++) {
831: idx = *indicesx++;
832: y[0] = x[idx];
833: y[1] = x[idx+1];
834: y[2] = x[idx+2];
835: y[3] = x[idx+3];
836: y[4] = x[idx+4];
837: y += 5;
838: }
839: }
840: PETSC_STATIC_INLINE void UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
841: {
842: PetscInt i,idy;
844: switch (addv) {
845: case INSERT_VALUES:
846: for (i=0; i<n; i++) {
847: idy = *indicesy++;
848: y[idy] = x[0];
849: y[idy+1] = x[1];
850: y[idy+2] = x[2];
851: y[idy+3] = x[3];
852: y[idy+4] = x[4];
853: x += 5;
854: }
855: break;
856: case ADD_VALUES:
857: for (i=0; i<n; i++) {
858: idy = *indicesy++;
859: y[idy] += x[0];
860: y[idy+1] += x[1];
861: y[idy+2] += x[2];
862: y[idy+3] += x[3];
863: y[idy+4] += x[4];
864: x += 5;
865: }
866: break;
867: #if !defined(PETSC_USE_COMPLEX)
868: case MAX_VALUES:
869: for (i=0; i<n; i++) {
870: idy = *indicesy++;
871: y[idy] = PetscMax(y[idy],x[0]);
872: y[idy+1] = PetscMax(y[idy+1],x[1]);
873: y[idy+2] = PetscMax(y[idy+2],x[2]);
874: y[idy+3] = PetscMax(y[idy+3],x[3]);
875: y[idy+4] = PetscMax(y[idy+4],x[4]);
876: x += 5;
877: }
878: #else
879: case MAX_VALUES:
880: #endif
881: case NOT_SET_VALUES:
882: break;
883: }
884: }
886: PETSC_STATIC_INLINE void Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
887: {
888: PetscInt i,idx,idy;
890: switch (addv) {
891: case INSERT_VALUES:
892: for (i=0; i<n; i++) {
893: idx = *indicesx++;
894: idy = *indicesy++;
895: y[idy] = x[idx];
896: y[idy+1] = x[idx+1];
897: y[idy+2] = x[idx+2];
898: y[idy+3] = x[idx+3];
899: y[idy+4] = x[idx+4];
900: }
901: break;
902: case ADD_VALUES:
903: for (i=0; i<n; i++) {
904: idx = *indicesx++;
905: idy = *indicesy++;
906: y[idy] += x[idx];
907: y[idy+1] += x[idx+1];
908: y[idy+2] += x[idx+2];
909: y[idy+3] += x[idx+3];
910: y[idy+4] += x[idx+4];
911: }
912: break;
913: #if !defined(PETSC_USE_COMPLEX)
914: case MAX_VALUES:
915: for (i=0; i<n; i++) {
916: idx = *indicesx++;
917: idy = *indicesy++;
918: y[idy] = PetscMax(y[idy],x[idx]);
919: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
920: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
921: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
922: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
923: }
924: #else
925: case MAX_VALUES:
926: #endif
927: case NOT_SET_VALUES:
928: break;
929: }
930: }
931: /* ----------------------------------------------------------------------------------------------- */
932: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
933: {
934: PetscInt i,idx;
936: for (i=0; i<n; i++) {
937: idx = *indicesx++;
938: y[0] = x[idx];
939: y[1] = x[idx+1];
940: y[2] = x[idx+2];
941: y[3] = x[idx+3];
942: y[4] = x[idx+4];
943: y[5] = x[idx+5];
944: y += 6;
945: }
946: }
947: PETSC_STATIC_INLINE void UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
948: {
949: PetscInt i,idy;
951: switch (addv) {
952: case INSERT_VALUES:
953: for (i=0; i<n; i++) {
954: idy = *indicesy++;
955: y[idy] = x[0];
956: y[idy+1] = x[1];
957: y[idy+2] = x[2];
958: y[idy+3] = x[3];
959: y[idy+4] = x[4];
960: y[idy+5] = x[5];
961: x += 6;
962: }
963: break;
964: case ADD_VALUES:
965: for (i=0; i<n; i++) {
966: idy = *indicesy++;
967: y[idy] += x[0];
968: y[idy+1] += x[1];
969: y[idy+2] += x[2];
970: y[idy+3] += x[3];
971: y[idy+4] += x[4];
972: y[idy+5] += x[5];
973: x += 6;
974: }
975: break;
976: #if !defined(PETSC_USE_COMPLEX)
977: case MAX_VALUES:
978: for (i=0; i<n; i++) {
979: idy = *indicesy++;
980: y[idy] = PetscMax(y[idy],x[0]);
981: y[idy+1] = PetscMax(y[idy+1],x[1]);
982: y[idy+2] = PetscMax(y[idy+2],x[2]);
983: y[idy+3] = PetscMax(y[idy+3],x[3]);
984: y[idy+4] = PetscMax(y[idy+4],x[4]);
985: y[idy+5] = PetscMax(y[idy+5],x[5]);
986: x += 6;
987: }
988: #else
989: case MAX_VALUES:
990: #endif
991: case NOT_SET_VALUES:
992: break;
993: }
994: }
996: PETSC_STATIC_INLINE void Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
997: {
998: PetscInt i,idx,idy;
1000: switch (addv) {
1001: case INSERT_VALUES:
1002: for (i=0; i<n; i++) {
1003: idx = *indicesx++;
1004: idy = *indicesy++;
1005: y[idy] = x[idx];
1006: y[idy+1] = x[idx+1];
1007: y[idy+2] = x[idx+2];
1008: y[idy+3] = x[idx+3];
1009: y[idy+4] = x[idx+4];
1010: y[idy+5] = x[idx+5];
1011: }
1012: break;
1013: case ADD_VALUES:
1014: for (i=0; i<n; i++) {
1015: idx = *indicesx++;
1016: idy = *indicesy++;
1017: y[idy] += x[idx];
1018: y[idy+1] += x[idx+1];
1019: y[idy+2] += x[idx+2];
1020: y[idy+3] += x[idx+3];
1021: y[idy+4] += x[idx+4];
1022: y[idy+5] += x[idx+5];
1023: }
1024: break;
1025: #if !defined(PETSC_USE_COMPLEX)
1026: case MAX_VALUES:
1027: for (i=0; i<n; i++) {
1028: idx = *indicesx++;
1029: idy = *indicesy++;
1030: y[idy] = PetscMax(y[idy],x[idx]);
1031: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1032: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1033: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1034: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1035: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1036: }
1037: #else
1038: case MAX_VALUES:
1039: #endif
1040: case NOT_SET_VALUES:
1041: break;
1042: }
1043: }
1044: /* ----------------------------------------------------------------------------------------------- */
1045: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1046: {
1047: PetscInt i,idx;
1049: for (i=0; i<n; i++) {
1050: idx = *indicesx++;
1051: y[0] = x[idx];
1052: y[1] = x[idx+1];
1053: y[2] = x[idx+2];
1054: y[3] = x[idx+3];
1055: y[4] = x[idx+4];
1056: y[5] = x[idx+5];
1057: y[6] = x[idx+6];
1058: y += 7;
1059: }
1060: }
1061: PETSC_STATIC_INLINE void UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1062: {
1063: PetscInt i,idy;
1065: switch (addv) {
1066: case INSERT_VALUES:
1067: for (i=0; i<n; i++) {
1068: idy = *indicesy++;
1069: y[idy] = x[0];
1070: y[idy+1] = x[1];
1071: y[idy+2] = x[2];
1072: y[idy+3] = x[3];
1073: y[idy+4] = x[4];
1074: y[idy+5] = x[5];
1075: y[idy+6] = x[6];
1076: x += 7;
1077: }
1078: break;
1079: case ADD_VALUES:
1080: for (i=0; i<n; i++) {
1081: idy = *indicesy++;
1082: y[idy] += x[0];
1083: y[idy+1] += x[1];
1084: y[idy+2] += x[2];
1085: y[idy+3] += x[3];
1086: y[idy+4] += x[4];
1087: y[idy+5] += x[5];
1088: y[idy+6] += x[6];
1089: x += 7;
1090: }
1091: break;
1092: #if !defined(PETSC_USE_COMPLEX)
1093: case MAX_VALUES:
1094: for (i=0; i<n; i++) {
1095: idy = *indicesy++;
1096: y[idy] = PetscMax(y[idy],x[0]);
1097: y[idy+1] = PetscMax(y[idy+1],x[1]);
1098: y[idy+2] = PetscMax(y[idy+2],x[2]);
1099: y[idy+3] = PetscMax(y[idy+3],x[3]);
1100: y[idy+4] = PetscMax(y[idy+4],x[4]);
1101: y[idy+5] = PetscMax(y[idy+5],x[5]);
1102: y[idy+6] = PetscMax(y[idy+6],x[6]);
1103: x += 7;
1104: }
1105: #else
1106: case MAX_VALUES:
1107: #endif
1108: case NOT_SET_VALUES:
1109: break;
1110: }
1111: }
1113: PETSC_STATIC_INLINE void Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1114: {
1115: PetscInt i,idx,idy;
1117: switch (addv) {
1118: case INSERT_VALUES:
1119: for (i=0; i<n; i++) {
1120: idx = *indicesx++;
1121: idy = *indicesy++;
1122: y[idy] = x[idx];
1123: y[idy+1] = x[idx+1];
1124: y[idy+2] = x[idx+2];
1125: y[idy+3] = x[idx+3];
1126: y[idy+4] = x[idx+4];
1127: y[idy+5] = x[idx+5];
1128: y[idy+6] = x[idx+6];
1129: }
1130: break;
1131: case ADD_VALUES:
1132: for (i=0; i<n; i++) {
1133: idx = *indicesx++;
1134: idy = *indicesy++;
1135: y[idy] += x[idx];
1136: y[idy+1] += x[idx+1];
1137: y[idy+2] += x[idx+2];
1138: y[idy+3] += x[idx+3];
1139: y[idy+4] += x[idx+4];
1140: y[idy+5] += x[idx+5];
1141: y[idy+6] += x[idx+6];
1142: }
1143: break;
1144: #if !defined(PETSC_USE_COMPLEX)
1145: case MAX_VALUES:
1146: for (i=0; i<n; i++) {
1147: idx = *indicesx++;
1148: idy = *indicesy++;
1149: y[idy] = PetscMax(y[idy],x[idx]);
1150: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1151: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1152: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1153: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1154: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1155: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1156: }
1157: #else
1158: case MAX_VALUES:
1159: #endif
1160: case NOT_SET_VALUES:
1161: break;
1162: }
1163: }
1164: /* ----------------------------------------------------------------------------------------------- */
1165: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1166: {
1167: PetscInt i,idx;
1169: for (i=0; i<n; i++) {
1170: idx = *indicesx++;
1171: y[0] = x[idx];
1172: y[1] = x[idx+1];
1173: y[2] = x[idx+2];
1174: y[3] = x[idx+3];
1175: y[4] = x[idx+4];
1176: y[5] = x[idx+5];
1177: y[6] = x[idx+6];
1178: y[7] = x[idx+7];
1179: y += 8;
1180: }
1181: }
1182: PETSC_STATIC_INLINE void UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1183: {
1184: PetscInt i,idy;
1186: switch (addv) {
1187: case INSERT_VALUES:
1188: for (i=0; i<n; i++) {
1189: idy = *indicesy++;
1190: y[idy] = x[0];
1191: y[idy+1] = x[1];
1192: y[idy+2] = x[2];
1193: y[idy+3] = x[3];
1194: y[idy+4] = x[4];
1195: y[idy+5] = x[5];
1196: y[idy+6] = x[6];
1197: y[idy+7] = x[7];
1198: x += 8;
1199: }
1200: break;
1201: case ADD_VALUES:
1202: for (i=0; i<n; i++) {
1203: idy = *indicesy++;
1204: y[idy] += x[0];
1205: y[idy+1] += x[1];
1206: y[idy+2] += x[2];
1207: y[idy+3] += x[3];
1208: y[idy+4] += x[4];
1209: y[idy+5] += x[5];
1210: y[idy+6] += x[6];
1211: y[idy+7] += x[7];
1212: x += 8;
1213: }
1214: break;
1215: #if !defined(PETSC_USE_COMPLEX)
1216: case MAX_VALUES:
1217: for (i=0; i<n; i++) {
1218: idy = *indicesy++;
1219: y[idy] = PetscMax(y[idy],x[0]);
1220: y[idy+1] = PetscMax(y[idy+1],x[1]);
1221: y[idy+2] = PetscMax(y[idy+2],x[2]);
1222: y[idy+3] = PetscMax(y[idy+3],x[3]);
1223: y[idy+4] = PetscMax(y[idy+4],x[4]);
1224: y[idy+5] = PetscMax(y[idy+5],x[5]);
1225: y[idy+6] = PetscMax(y[idy+6],x[6]);
1226: y[idy+7] = PetscMax(y[idy+7],x[7]);
1227: x += 8;
1228: }
1229: #else
1230: case MAX_VALUES:
1231: #endif
1232: case NOT_SET_VALUES:
1233: break;
1234: }
1235: }
1237: PETSC_STATIC_INLINE void Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1238: {
1239: PetscInt i,idx,idy;
1241: switch (addv) {
1242: case INSERT_VALUES:
1243: for (i=0; i<n; i++) {
1244: idx = *indicesx++;
1245: idy = *indicesy++;
1246: y[idy] = x[idx];
1247: y[idy+1] = x[idx+1];
1248: y[idy+2] = x[idx+2];
1249: y[idy+3] = x[idx+3];
1250: y[idy+4] = x[idx+4];
1251: y[idy+5] = x[idx+5];
1252: y[idy+6] = x[idx+6];
1253: y[idy+7] = x[idx+7];
1254: }
1255: break;
1256: case ADD_VALUES:
1257: for (i=0; i<n; i++) {
1258: idx = *indicesx++;
1259: idy = *indicesy++;
1260: y[idy] += x[idx];
1261: y[idy+1] += x[idx+1];
1262: y[idy+2] += x[idx+2];
1263: y[idy+3] += x[idx+3];
1264: y[idy+4] += x[idx+4];
1265: y[idy+5] += x[idx+5];
1266: y[idy+6] += x[idx+6];
1267: y[idy+7] += x[idx+7];
1268: }
1269: break;
1270: #if !defined(PETSC_USE_COMPLEX)
1271: case MAX_VALUES:
1272: for (i=0; i<n; i++) {
1273: idx = *indicesx++;
1274: idy = *indicesy++;
1275: y[idy] = PetscMax(y[idy],x[idx]);
1276: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1277: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1278: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1279: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1280: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1281: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1282: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1283: }
1284: #else
1285: case MAX_VALUES:
1286: #endif
1287: case NOT_SET_VALUES:
1288: break;
1289: }
1290: }
1292: /* ----------------------------------------------------------------------------------------------- */
1293: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1294: {
1295: PetscInt i,idx;
1297: for (i=0; i<n; i++) {
1298: idx = *indicesx++;
1299: y[0] = x[idx];
1300: y[1] = x[idx+1];
1301: y[2] = x[idx+2];
1302: y[3] = x[idx+3];
1303: y[4] = x[idx+4];
1304: y[5] = x[idx+5];
1305: y[6] = x[idx+6];
1306: y[7] = x[idx+7];
1307: y[8] = x[idx+8];
1308: y[9] = x[idx+9];
1309: y[10] = x[idx+10];
1310: y[11] = x[idx+11];
1311: y += 12;
1312: }
1313: }
1314: PETSC_STATIC_INLINE void UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1315: {
1316: PetscInt i,idy;
1318: switch (addv) {
1319: case INSERT_VALUES:
1320: for (i=0; i<n; i++) {
1321: idy = *indicesy++;
1322: y[idy] = x[0];
1323: y[idy+1] = x[1];
1324: y[idy+2] = x[2];
1325: y[idy+3] = x[3];
1326: y[idy+4] = x[4];
1327: y[idy+5] = x[5];
1328: y[idy+6] = x[6];
1329: y[idy+7] = x[7];
1330: y[idy+8] = x[8];
1331: y[idy+9] = x[9];
1332: y[idy+10] = x[10];
1333: y[idy+11] = x[11];
1334: x += 12;
1335: }
1336: break;
1337: case ADD_VALUES:
1338: for (i=0; i<n; i++) {
1339: idy = *indicesy++;
1340: y[idy] += x[0];
1341: y[idy+1] += x[1];
1342: y[idy+2] += x[2];
1343: y[idy+3] += x[3];
1344: y[idy+4] += x[4];
1345: y[idy+5] += x[5];
1346: y[idy+6] += x[6];
1347: y[idy+7] += x[7];
1348: y[idy+8] += x[8];
1349: y[idy+9] += x[9];
1350: y[idy+10] += x[10];
1351: y[idy+11] += x[11];
1352: x += 12;
1353: }
1354: break;
1355: #if !defined(PETSC_USE_COMPLEX)
1356: case MAX_VALUES:
1357: for (i=0; i<n; i++) {
1358: idy = *indicesy++;
1359: y[idy] = PetscMax(y[idy],x[0]);
1360: y[idy+1] = PetscMax(y[idy+1],x[1]);
1361: y[idy+2] = PetscMax(y[idy+2],x[2]);
1362: y[idy+3] = PetscMax(y[idy+3],x[3]);
1363: y[idy+4] = PetscMax(y[idy+4],x[4]);
1364: y[idy+5] = PetscMax(y[idy+5],x[5]);
1365: y[idy+6] = PetscMax(y[idy+6],x[6]);
1366: y[idy+7] = PetscMax(y[idy+7],x[7]);
1367: y[idy+8] = PetscMax(y[idy+8],x[8]);
1368: y[idy+9] = PetscMax(y[idy+9],x[9]);
1369: y[idy+10] = PetscMax(y[idy+10],x[10]);
1370: y[idy+11] = PetscMax(y[idy+11],x[11]);
1371: x += 12;
1372: }
1373: #else
1374: case MAX_VALUES:
1375: #endif
1376: case NOT_SET_VALUES:
1377: break;
1378: }
1379: }
1381: PETSC_STATIC_INLINE void Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1382: {
1383: PetscInt i,idx,idy;
1385: switch (addv) {
1386: case INSERT_VALUES:
1387: for (i=0; i<n; i++) {
1388: idx = *indicesx++;
1389: idy = *indicesy++;
1390: y[idy] = x[idx];
1391: y[idy+1] = x[idx+1];
1392: y[idy+2] = x[idx+2];
1393: y[idy+3] = x[idx+3];
1394: y[idy+4] = x[idx+4];
1395: y[idy+5] = x[idx+5];
1396: y[idy+6] = x[idx+6];
1397: y[idy+7] = x[idx+7];
1398: y[idy+8] = x[idx+8];
1399: y[idy+9] = x[idx+9];
1400: y[idy+10] = x[idx+10];
1401: y[idy+11] = x[idx+11];
1402: }
1403: break;
1404: case ADD_VALUES:
1405: for (i=0; i<n; i++) {
1406: idx = *indicesx++;
1407: idy = *indicesy++;
1408: y[idy] += x[idx];
1409: y[idy+1] += x[idx+1];
1410: y[idy+2] += x[idx+2];
1411: y[idy+3] += x[idx+3];
1412: y[idy+4] += x[idx+4];
1413: y[idy+5] += x[idx+5];
1414: y[idy+6] += x[idx+6];
1415: y[idy+7] += x[idx+7];
1416: y[idy+8] += x[idx+8];
1417: y[idy+9] += x[idx+9];
1418: y[idy+10] += x[idx+10];
1419: y[idy+11] += x[idx+11];
1420: }
1421: break;
1422: #if !defined(PETSC_USE_COMPLEX)
1423: case MAX_VALUES:
1424: for (i=0; i<n; i++) {
1425: idx = *indicesx++;
1426: idy = *indicesy++;
1427: y[idy] = PetscMax(y[idy],x[idx]);
1428: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1429: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1430: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1431: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1432: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1433: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1434: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1435: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1436: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1437: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1438: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1439: }
1440: #else
1441: case MAX_VALUES:
1442: #endif
1443: case NOT_SET_VALUES:
1444: break;
1445: }
1446: }
1448: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1449: #define BS 1
1450: #include ../src/vec/vec/utils/vpscat.h
1451: #define BS 2
1452: #include ../src/vec/vec/utils/vpscat.h
1453: #define BS 3
1454: #include ../src/vec/vec/utils/vpscat.h
1455: #define BS 4
1456: #include ../src/vec/vec/utils/vpscat.h
1457: #define BS 5
1458: #include ../src/vec/vec/utils/vpscat.h
1459: #define BS 6
1460: #include ../src/vec/vec/utils/vpscat.h
1461: #define BS 7
1462: #include ../src/vec/vec/utils/vpscat.h
1463: #define BS 8
1464: #include ../src/vec/vec/utils/vpscat.h
1465: #define BS 12
1466: #include ../src/vec/vec/utils/vpscat.h
1468: /* ==========================================================================================*/
1470: /* create parallel to sequential scatter context */
1472: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *,VecScatter_MPI_General *,VecScatter);
1476: /*@
1477: VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.
1479: Collective on VecScatter
1481: Input Parameters:
1482: + VecScatter - obtained with VecScatterCreateEmpty()
1483: . nsends -
1484: . sendSizes -
1485: . sendProcs -
1486: . sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
1487: . nrecvs - number of receives to expect
1488: . recvSizes -
1489: . recvProcs - processes who are sending to me
1490: . recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
1491: - bs - size of block
1493: Notes: sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
1494: to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.
1496: Probably does not handle sends to self properly. It should remove those from the counts that are used
1497: in allocating space inside of the from struct
1499: Level: intermediate
1501: @*/
1502: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
1503: {
1504: VecScatter_MPI_General *from, *to;
1505: PetscInt sendSize, recvSize;
1506: PetscInt n, i;
1507: PetscErrorCode ierr;
1509: /* allocate entire send scatter context */
1510: PetscNewLog(ctx,VecScatter_MPI_General,&to);
1511: to->n = nsends;
1512: for(n = 0, sendSize = 0; n < to->n; n++) {sendSize += sendSizes[n];}
1513: PetscMalloc(to->n*sizeof(MPI_Request),&to->requests);
1514: PetscMalloc4(bs*sendSize,PetscScalar,&to->values,sendSize,PetscInt,&to->indices,to->n+1,PetscInt,&to->starts,to->n,PetscMPIInt,&to->procs);
1515: PetscMalloc2(PetscMax(to->n,nrecvs),MPI_Status,&to->sstatus,PetscMax(to->n,nrecvs),MPI_Status,&to->rstatus);
1516: to->starts[0] = 0;
1517: for(n = 0; n < to->n; n++) {
1518: if (sendSizes[n] <=0 ) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
1519: to->starts[n+1] = to->starts[n] + sendSizes[n];
1520: to->procs[n] = sendProcs[n];
1521: for(i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) {
1522: to->indices[i] = sendIdx[i];
1523: }
1524: }
1525: ctx->todata = (void *) to;
1527: /* allocate entire receive scatter context */
1528: PetscNewLog(ctx,VecScatter_MPI_General,&from);
1529: from->n = nrecvs;
1530: for(n = 0, recvSize = 0; n < from->n; n++) {recvSize += recvSizes[n];}
1531: PetscMalloc(from->n*sizeof(MPI_Request),&from->requests);
1532: PetscMalloc4(bs*recvSize,PetscScalar,&from->values,recvSize,PetscInt,&from->indices,from->n+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1533: from->starts[0] = 0;
1534: for(n = 0; n < from->n; n++) {
1535: if (recvSizes[n] <=0 ) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
1536: from->starts[n+1] = from->starts[n] + recvSizes[n];
1537: from->procs[n] = recvProcs[n];
1538: for(i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) {
1539: from->indices[i] = recvIdx[i];
1540: }
1541: }
1542: ctx->fromdata = (void *)from;
1544: /* No local scatter optimization */
1545: from->local.n = 0;
1546: from->local.vslots = 0;
1547: to->local.n = 0;
1548: to->local.vslots = 0;
1549: from->local.nonmatching_computed = PETSC_FALSE;
1550: from->local.n_nonmatching = 0;
1551: from->local.slots_nonmatching = 0;
1552: to->local.nonmatching_computed = PETSC_FALSE;
1553: to->local.n_nonmatching = 0;
1554: to->local.slots_nonmatching = 0;
1556: from->type = VEC_SCATTER_MPI_GENERAL;
1557: to->type = VEC_SCATTER_MPI_GENERAL;
1558: from->bs = bs;
1559: to->bs = bs;
1560: VecScatterCreateCommon_PtoS(from, to, ctx);
1561: return(0);
1562: }
1564: /*
1565: bs indicates how many elements there are in each block. Normally this would be 1.
1567: contains check that PetscMPIInt can handle the sizes needed
1568: */
1571: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
1572: {
1573: VecScatter_MPI_General *from,*to;
1574: PetscMPIInt size,rank,imdex,tag,n;
1575: PetscInt *source = PETSC_NULL,*owners = PETSC_NULL;
1576: PetscInt *lowner = PETSC_NULL,*start = PETSC_NULL,lengthy,lengthx;
1577: PetscMPIInt *nprocs = PETSC_NULL,nrecvs;
1578: PetscInt i,j,idx,nsends;
1579: PetscInt *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
1580: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
1581: PetscMPIInt *onodes1,*olengths1;
1582: MPI_Comm comm;
1583: MPI_Request *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
1584: MPI_Status recv_status,*send_status;
1585: PetscErrorCode ierr;
1588: PetscObjectGetNewTag((PetscObject)ctx,&tag);
1589: PetscObjectGetComm((PetscObject)xin,&comm);
1590: MPI_Comm_rank(comm,&rank);
1591: MPI_Comm_size(comm,&size);
1592: owners = xin->map->range;
1593: VecGetSize(yin,&lengthy);
1594: VecGetSize(xin,&lengthx);
1596: /* first count number of contributors to each processor */
1597: PetscMalloc2(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner);
1598: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
1599: j = 0;
1600: nsends = 0;
1601: for (i=0; i<nx; i++) {
1602: idx = inidx[i];
1603: if (idx < owners[j]) j = 0;
1604: for (; j<size; j++) {
1605: if (idx < owners[j+1]) {
1606: if (!nprocs[j]++) nsends++;
1607: owner[i] = j;
1608: break;
1609: }
1610: }
1611: }
1612: nprocslocal = nprocs[rank];
1613: nprocs[rank] = 0;
1614: if (nprocslocal) nsends--;
1615: /* inform other processors of number of messages and max length*/
1616: PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
1617: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
1618: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
1619: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
1621: /* post receives: */
1622: PetscMalloc3(recvtotal,PetscInt,&rvalues,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);
1623: count = 0;
1624: for (i=0; i<nrecvs; i++) {
1625: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
1626: count += olengths1[i];
1627: }
1629: /* do sends:
1630: 1) starts[i] gives the starting index in svalues for stuff going to
1631: the ith processor
1632: */
1633: PetscMalloc3(nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size+1,PetscInt,&starts);
1634: starts[0] = 0;
1635: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1636: for (i=0; i<nx; i++) {
1637: if (owner[i] != rank) {
1638: svalues[starts[owner[i]]++] = inidx[i];
1639: }
1640: }
1642: starts[0] = 0;
1643: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1644: count = 0;
1645: for (i=0; i<size; i++) {
1646: if (nprocs[i]) {
1647: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
1648: }
1649: }
1651: /* wait on receives */
1652: count = nrecvs;
1653: slen = 0;
1654: while (count) {
1655: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1656: /* unpack receives into our local space */
1657: MPI_Get_count(&recv_status,MPIU_INT,&n);
1658: slen += n;
1659: count--;
1660: }
1662: if (slen != recvtotal) SETERRQ2(PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
1663:
1664: /* allocate entire send scatter context */
1665: PetscNewLog(ctx,VecScatter_MPI_General,&to);
1666: to->n = nrecvs;
1667: PetscMalloc(nrecvs*sizeof(MPI_Request),&to->requests);
1668: PetscMalloc4(bs*slen,PetscScalar,&to->values,slen,PetscInt,&to->indices,nrecvs+1,PetscInt,&to->starts,nrecvs,PetscMPIInt,&to->procs);
1669: PetscMalloc2(PetscMax(to->n,nsends),MPI_Status,&to->sstatus,PetscMax(to->n,nsends),MPI_Status,&to->rstatus);
1670: ctx->todata = (void*)to;
1671: to->starts[0] = 0;
1673: if (nrecvs) {
1675: /* move the data into the send scatter */
1676: base = owners[rank];
1677: rsvalues = rvalues;
1678: for (i=0; i<nrecvs; i++) {
1679: to->starts[i+1] = to->starts[i] + olengths1[i];
1680: to->procs[i] = onodes1[i];
1681: values = rsvalues;
1682: rsvalues += olengths1[i];
1683: for (j=0; j<olengths1[i]; j++) {
1684: to->indices[to->starts[i] + j] = values[j] - base;
1685: }
1686: }
1687: }
1688: PetscFree(olengths1);
1689: PetscFree(onodes1);
1690: PetscFree3(rvalues,source,recv_waits);
1692: /* allocate entire receive scatter context */
1693: PetscNewLog(ctx,VecScatter_MPI_General,&from);
1694: from->n = nsends;
1696: PetscMalloc(nsends*sizeof(MPI_Request),&from->requests);
1697: PetscMalloc4((ny-nprocslocal)*bs,PetscScalar,&from->values,ny-nprocslocal,PetscInt,&from->indices,nsends+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1698: ctx->fromdata = (void*)from;
1700: /* move data into receive scatter */
1701: PetscMalloc2(size,PetscInt,&lowner,nsends+1,PetscInt,&start);
1702: count = 0; from->starts[0] = start[0] = 0;
1703: for (i=0; i<size; i++) {
1704: if (nprocs[i]) {
1705: lowner[i] = count;
1706: from->procs[count++] = i;
1707: from->starts[count] = start[count] = start[count-1] + nprocs[i];
1708: }
1709: }
1711: for (i=0; i<nx; i++) {
1712: if (owner[i] != rank) {
1713: from->indices[start[lowner[owner[i]]]++] = inidy[i];
1714: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1715: }
1716: }
1717: PetscFree2(lowner,start);
1718: PetscFree2(nprocs,owner);
1719:
1720: /* wait on sends */
1721: if (nsends) {
1722: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1723: MPI_Waitall(nsends,send_waits,send_status);
1724: PetscFree(send_status);
1725: }
1726: PetscFree3(svalues,send_waits,starts);
1728: if (nprocslocal) {
1729: PetscInt nt = from->local.n = to->local.n = nprocslocal;
1730: /* we have a scatter to ourselves */
1731: PetscMalloc(nt*sizeof(PetscInt),&to->local.vslots);
1732: PetscMalloc(nt*sizeof(PetscInt),&from->local.vslots);
1733: nt = 0;
1734: for (i=0; i<nx; i++) {
1735: idx = inidx[i];
1736: if (idx >= owners[rank] && idx < owners[rank+1]) {
1737: to->local.vslots[nt] = idx - owners[rank];
1738: from->local.vslots[nt++] = inidy[i];
1739: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1740: }
1741: }
1742: } else {
1743: from->local.n = 0;
1744: from->local.vslots = 0;
1745: to->local.n = 0;
1746: to->local.vslots = 0;
1747: }
1749: from->local.nonmatching_computed = PETSC_FALSE;
1750: from->local.n_nonmatching = 0;
1751: from->local.slots_nonmatching = 0;
1752: to->local.nonmatching_computed = PETSC_FALSE;
1753: to->local.n_nonmatching = 0;
1754: to->local.slots_nonmatching = 0;
1756: from->type = VEC_SCATTER_MPI_GENERAL;
1757: to->type = VEC_SCATTER_MPI_GENERAL;
1758: from->bs = bs;
1759: to->bs = bs;
1760: VecScatterCreateCommon_PtoS(from,to,ctx);
1761: return(0);
1762: }
1764: /*
1765: bs indicates how many elements there are in each block. Normally this would be 1.
1766: */
1769: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
1770: {
1771: MPI_Comm comm = ((PetscObject)ctx)->comm;
1772: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
1773: PetscInt bs = to->bs;
1774: PetscMPIInt size;
1775: PetscInt i, n;
1777:
1779: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
1780: ctx->destroy = VecScatterDestroy_PtoP;
1782: ctx->reproduce = PETSC_FALSE;
1783: to->sendfirst = PETSC_FALSE;
1784: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_reproduce",&ctx->reproduce,PETSC_NULL);
1785: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst,PETSC_NULL);
1786: from->sendfirst = to->sendfirst;
1788: MPI_Comm_size(comm,&size);
1789: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
1790: to->contiq = PETSC_FALSE;
1791: n = from->starts[from->n];
1792: from->contiq = PETSC_TRUE;
1793: for (i=1; i<n; i++) {
1794: if (from->indices[i] != from->indices[i-1] + bs) {
1795: from->contiq = PETSC_FALSE;
1796: break;
1797: }
1798: }
1800: to->use_alltoallv = PETSC_FALSE;
1801: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_alltoall",&to->use_alltoallv,PETSC_NULL);
1802: from->use_alltoallv = to->use_alltoallv;
1803: if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
1804: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1805: if (to->use_alltoallv) {
1806: to->use_alltoallw = PETSC_FALSE;
1807: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_nopack",&to->use_alltoallw,PETSC_NULL);
1808: }
1809: from->use_alltoallw = to->use_alltoallw;
1810: if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
1811: #endif
1813: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1814: to->use_window = PETSC_FALSE;
1815: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_window",&to->use_window,PETSC_NULL);
1816: from->use_window = to->use_window;
1817: #endif
1819: if (to->use_alltoallv) {
1821: PetscMalloc2(size,PetscMPIInt,&to->counts,size,PetscMPIInt,&to->displs);
1822: PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
1823: for (i=0; i<to->n; i++) {
1824: to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);
1825: }
1826: to->displs[0] = 0;
1827: for (i=1; i<size; i++) {
1828: to->displs[i] = to->displs[i-1] + to->counts[i-1];
1829: }
1831: PetscMalloc2(size,PetscMPIInt,&from->counts,size,PetscMPIInt,&from->displs);
1832: PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
1833: for (i=0; i<from->n; i++) {
1834: from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1835: }
1836: from->displs[0] = 0;
1837: for (i=1; i<size; i++) {
1838: from->displs[i] = from->displs[i-1] + from->counts[i-1];
1839: }
1840: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1841: if (to->use_alltoallw) {
1842: PetscMPIInt mpibs = PetscMPIIntCast(bs), mpilen;
1843: ctx->packtogether = PETSC_FALSE;
1844: PetscMalloc3(size,PetscMPIInt,&to->wcounts,size,PetscMPIInt,&to->wdispls,size,MPI_Datatype,&to->types);
1845: PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
1846: PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
1847: for (i=0; i<size; i++) {
1848: to->types[i] = MPIU_SCALAR;
1849: }
1851: for (i=0; i<to->n; i++) {
1852: to->wcounts[to->procs[i]] = 1;
1853: mpilen = PetscMPIIntCast(to->starts[i+1]-to->starts[i]);
1854: MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
1855: MPI_Type_commit(to->types+to->procs[i]);
1856: }
1857: PetscMalloc3(size,PetscMPIInt,&from->wcounts,size,PetscMPIInt,&from->wdispls,size,MPI_Datatype,&from->types);
1858: PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
1859: PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
1860: for (i=0; i<size; i++) {
1861: from->types[i] = MPIU_SCALAR;
1862: }
1863: if (from->contiq) {
1864: PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
1865: for (i=0; i<from->n; i++) {
1866: from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1867: }
1868: if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
1869: for (i=1; i<from->n; i++) {
1870: from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];
1871: }
1872: } else {
1873: for (i=0; i<from->n; i++) {
1874: from->wcounts[from->procs[i]] = 1;
1875: mpilen = PetscMPIIntCast(from->starts[i+1]-from->starts[i]);
1876: MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
1877: MPI_Type_commit(from->types+from->procs[i]);
1878: }
1879: }
1880: } else {
1881: ctx->copy = VecScatterCopy_PtoP_AllToAll;
1882: }
1883: #else
1884: to->use_alltoallw = PETSC_FALSE;
1885: from->use_alltoallw = PETSC_FALSE;
1886: ctx->copy = VecScatterCopy_PtoP_AllToAll;
1887: #endif
1888: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1889: } else if (to->use_window) {
1890: PetscMPIInt temptag,winsize;
1891: MPI_Request *request;
1892: MPI_Status *status;
1893:
1894: PetscObjectGetNewTag((PetscObject)ctx,&temptag);
1895: winsize = (to->n ? to->starts[to->n] : 0)*sizeof(PetscScalar);
1896: MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
1897: PetscMalloc(to->n,&to->winstarts);
1898: PetscMalloc2(to->n,MPI_Request,&request,to->n,MPI_Status,&status);
1899: for (i=0; i<to->n; i++) {
1900: MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
1901: }
1902: for (i=0; i<from->n; i++) {
1903: MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
1904: }
1905: MPI_Waitall(to->n,request,status);
1906: PetscFree2(request,status);
1908: winsize = (from->n ? from->starts[from->n] : 0)*sizeof(PetscScalar);
1909: MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
1910: PetscMalloc(from->n,&from->winstarts);
1911: PetscMalloc2(from->n,MPI_Request,&request,from->n,MPI_Status,&status);
1912: for (i=0; i<from->n; i++) {
1913: MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
1914: }
1915: for (i=0; i<to->n; i++) {
1916: MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
1917: }
1918: MPI_Waitall(from->n,request,status);
1919: PetscFree2(request,status);
1920: #endif
1921: } else {
1922: PetscTruth use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
1923: PetscInt *sstarts = to->starts, *rstarts = from->starts;
1924: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
1925: MPI_Request *swaits = to->requests,*rwaits = from->requests;
1926: MPI_Request *rev_swaits,*rev_rwaits;
1927: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
1929: /* allocate additional wait variables for the "reverse" scatter */
1930: PetscMalloc(to->n*sizeof(MPI_Request),&rev_rwaits);
1931: PetscMalloc(from->n*sizeof(MPI_Request),&rev_swaits);
1932: to->rev_requests = rev_rwaits;
1933: from->rev_requests = rev_swaits;
1935: /* Register the receives that you will use later (sends for scatter reverse) */
1936: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_rsend",&use_rsend,PETSC_NULL);
1937: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_ssend",&use_ssend,PETSC_NULL);
1938: if (use_rsend) {
1939: PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
1940: to->use_readyreceiver = PETSC_TRUE;
1941: from->use_readyreceiver = PETSC_TRUE;
1942: } else {
1943: to->use_readyreceiver = PETSC_FALSE;
1944: from->use_readyreceiver = PETSC_FALSE;
1945: }
1946: if (use_ssend) {
1947: PetscInfo(ctx,"Using VecScatter Ssend mode\n");
1948: }
1950: for (i=0; i<from->n; i++) {
1951: if (use_rsend) {
1952: MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1953: } else if (use_ssend) {
1954: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1955: } else {
1956: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1957: }
1958: }
1960: for (i=0; i<to->n; i++) {
1961: if (use_rsend) {
1962: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1963: } else if (use_ssend) {
1964: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1965: } else {
1966: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1967: }
1968: }
1969: /* Register receives for scatter and reverse */
1970: for (i=0; i<from->n; i++) {
1971: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
1972: }
1973: for (i=0; i<to->n; i++) {
1974: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
1975: }
1976: if (use_rsend) {
1977: if (to->n) {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
1978: if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
1979: MPI_Barrier(comm);
1980: }
1982: ctx->copy = VecScatterCopy_PtoP_X;
1983: }
1984: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
1985: switch (bs) {
1986: case 12:
1987: ctx->begin = VecScatterBegin_12;
1988: ctx->end = VecScatterEnd_12;
1989: break;
1990: case 8:
1991: ctx->begin = VecScatterBegin_8;
1992: ctx->end = VecScatterEnd_8;
1993: break;
1994: case 7:
1995: ctx->begin = VecScatterBegin_7;
1996: ctx->end = VecScatterEnd_7;
1997: break;
1998: case 6:
1999: ctx->begin = VecScatterBegin_6;
2000: ctx->end = VecScatterEnd_6;
2001: break;
2002: case 5:
2003: ctx->begin = VecScatterBegin_5;
2004: ctx->end = VecScatterEnd_5;
2005: break;
2006: case 4:
2007: ctx->begin = VecScatterBegin_4;
2008: ctx->end = VecScatterEnd_4;
2009: break;
2010: case 3:
2011: ctx->begin = VecScatterBegin_3;
2012: ctx->end = VecScatterEnd_3;
2013: break;
2014: case 2:
2015: ctx->begin = VecScatterBegin_2;
2016: ctx->end = VecScatterEnd_2;
2017: break;
2018: case 1:
2019: ctx->begin = VecScatterBegin_1;
2020: ctx->end = VecScatterEnd_1;
2021: break;
2022: default:
2023: SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
2024: }
2025: ctx->view = VecScatterView_MPI;
2026: /* Check if the local scatter is actually a copy; important special case */
2027: if (to->local.n) {
2028: VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2029: }
2030: return(0);
2031: }
2035: /* ------------------------------------------------------------------------------------*/
2036: /*
2037: Scatter from local Seq vectors to a parallel vector.
2038: Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2039: reverses the result.
2040: */
2043: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2044: {
2045: PetscErrorCode ierr;
2046: MPI_Request *waits;
2047: VecScatter_MPI_General *to,*from;
2050: VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2051: to = (VecScatter_MPI_General*)ctx->fromdata;
2052: from = (VecScatter_MPI_General*)ctx->todata;
2053: ctx->todata = (void*)to;
2054: ctx->fromdata = (void*)from;
2055: /* these two are special, they are ALWAYS stored in to struct */
2056: to->sstatus = from->sstatus;
2057: to->rstatus = from->rstatus;
2059: from->sstatus = 0;
2060: from->rstatus = 0;
2062: waits = from->rev_requests;
2063: from->rev_requests = from->requests;
2064: from->requests = waits;
2065: waits = to->rev_requests;
2066: to->rev_requests = to->requests;
2067: to->requests = waits;
2068: return(0);
2069: }
2071: /* ---------------------------------------------------------------------------------*/
2074: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,VecScatter ctx)
2075: {
2077: PetscMPIInt size,rank,tag,imdex,n;
2078: PetscInt *owners = xin->map->range;
2079: PetscMPIInt *nprocs = PETSC_NULL;
2080: PetscInt i,j,idx,nsends,*local_inidx = PETSC_NULL,*local_inidy = PETSC_NULL;
2081: PetscInt *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
2082: PetscInt *rvalues = PETSC_NULL,*svalues = PETSC_NULL,base,*values = PETSC_NULL,*rsvalues,recvtotal,lastidx;
2083: PetscMPIInt *onodes1,*olengths1,nrecvs;
2084: MPI_Comm comm;
2085: MPI_Request *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
2086: MPI_Status recv_status,*send_status = PETSC_NULL;
2087: PetscTruth duplicate = PETSC_FALSE;
2088: #if defined(PETSC_USE_DEBUG)
2089: PetscTruth found = PETSC_FALSE;
2090: #endif
2093: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2094: PetscObjectGetComm((PetscObject)xin,&comm);
2095: MPI_Comm_size(comm,&size);
2096: MPI_Comm_rank(comm,&rank);
2097: if (size == 1) {
2098: VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,1,ctx);
2099: return(0);
2100: }
2102: /*
2103: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2104: They then call the StoPScatterCreate()
2105: */
2106: /* first count number of contributors to each processor */
2107: PetscMalloc3(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner,(size+1),PetscInt,&starts);
2108: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2109: lastidx = -1;
2110: j = 0;
2111: for (i=0; i<nx; i++) {
2112: /* if indices are NOT locally sorted, need to start search at the beginning */
2113: if (lastidx > (idx = inidx[i])) j = 0;
2114: lastidx = idx;
2115: for (; j<size; j++) {
2116: if (idx >= owners[j] && idx < owners[j+1]) {
2117: nprocs[j]++;
2118: owner[i] = j;
2119: #if defined(PETSC_USE_DEBUG)
2120: found = PETSC_TRUE;
2121: #endif
2122: break;
2123: }
2124: }
2125: #if defined(PETSC_USE_DEBUG)
2126: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2127: found = PETSC_FALSE;
2128: #endif
2129: }
2130: nsends = 0; for (i=0; i<size; i++) { nsends += (nprocs[i] > 0);}
2132: /* inform other processors of number of messages and max length*/
2133: PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
2134: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2135: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2136: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2138: /* post receives: */
2139: PetscMalloc5(2*recvtotal,PetscInt,&rvalues,2*nx,PetscInt,&svalues,nrecvs,MPI_Request,&recv_waits,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);
2141: count = 0;
2142: for (i=0; i<nrecvs; i++) {
2143: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2144: count += olengths1[i];
2145: }
2146: PetscFree(onodes1);
2148: /* do sends:
2149: 1) starts[i] gives the starting index in svalues for stuff going to
2150: the ith processor
2151: */
2152: starts[0]= 0;
2153: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2154: for (i=0; i<nx; i++) {
2155: svalues[2*starts[owner[i]]] = inidx[i];
2156: svalues[1 + 2*starts[owner[i]]++] = inidy[i];
2157: }
2159: starts[0] = 0;
2160: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2161: count = 0;
2162: for (i=0; i<size; i++) {
2163: if (nprocs[i]) {
2164: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2165: count++;
2166: }
2167: }
2168: PetscFree3(nprocs,owner,starts);
2170: /* wait on receives */
2171: count = nrecvs;
2172: slen = 0;
2173: while (count) {
2174: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2175: /* unpack receives into our local space */
2176: MPI_Get_count(&recv_status,MPIU_INT,&n);
2177: slen += n/2;
2178: count--;
2179: }
2180: if (slen != recvtotal) SETERRQ2(PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2181:
2182: PetscMalloc2(slen,PetscInt,&local_inidx,slen,PetscInt,&local_inidy);
2183: base = owners[rank];
2184: count = 0;
2185: rsvalues = rvalues;
2186: for (i=0; i<nrecvs; i++) {
2187: values = rsvalues;
2188: rsvalues += 2*olengths1[i];
2189: for (j=0; j<olengths1[i]; j++) {
2190: local_inidx[count] = values[2*j] - base;
2191: local_inidy[count++] = values[2*j+1];
2192: }
2193: }
2194: PetscFree(olengths1);
2196: /* wait on sends */
2197: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2198: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2200: /*
2201: should sort and remove duplicates from local_inidx,local_inidy
2202: */
2204: #if defined(do_it_slow)
2205: /* sort on the from index */
2206: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2207: start = 0;
2208: while (start < slen) {
2209: count = start+1;
2210: last = local_inidx[start];
2211: while (count < slen && last == local_inidx[count]) count++;
2212: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2213: /* sort on to index */
2214: PetscSortInt(count-start,local_inidy+start);
2215: }
2216: /* remove duplicates; not most efficient way, but probably good enough */
2217: i = start;
2218: while (i < count-1) {
2219: if (local_inidy[i] != local_inidy[i+1]) {
2220: i++;
2221: } else { /* found a duplicate */
2222: duplicate = PETSC_TRUE;
2223: for (j=i; j<slen-1; j++) {
2224: local_inidx[j] = local_inidx[j+1];
2225: local_inidy[j] = local_inidy[j+1];
2226: }
2227: slen--;
2228: count--;
2229: }
2230: }
2231: start = count;
2232: }
2233: #endif
2234: if (duplicate) {
2235: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2236: }
2237: VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,1,ctx);
2238: PetscFree2(local_inidx,local_inidy);
2239: return(0);
2240: }