Actual source code: vpscat.c
1: /*
2: Defines parallel vector scatters.
3: */
5: #include src/vec/is/isimpl.h
6: #include vecimpl.h
7: #include src/vec/impls/dvecimpl.h
8: #include src/vec/impls/mpi/pvecimpl.h
9: #include petscsys.h
13: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
14: {
15: VecScatter_MPI_General *to=(VecScatter_MPI_General*)ctx->todata;
16: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
17: PetscErrorCode ierr;
18: PetscInt i;
19: PetscMPIInt rank;
20: PetscViewerFormat format;
21: PetscTruth iascii;
24: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
25: if (iascii) {
26: MPI_Comm_rank(ctx->comm,&rank);
27: PetscViewerGetFormat(viewer,&format);
28: if (format == PETSC_VIEWER_ASCII_INFO) {
29: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
31: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
32: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
33: itmp = to->starts[to->n+1];
34: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
35: itmp = from->starts[from->n+1];
36: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,ctx->comm);
37: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,ctx->comm);
39: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
40: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
41: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
42: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
43: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
44: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
46: } else {
47: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
48: if (to->n) {
49: for (i=0; i<to->n; i++){
50: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %D\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
51: }
52: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
53: for (i=0; i<to->starts[to->n]; i++){
54: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,to->indices[i]);
55: }
56: }
58: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
59: if (from->n) {
60: for (i=0; i<from->n; i++){
61: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %D\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
62: }
64: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
65: for (i=0; i<from->starts[from->n]; i++){
66: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,from->indices[i]);
67: }
68: }
69: if (to->local.n) {
70: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Indices for local part of scatter\n",rank);
71: for (i=0; i<to->local.n; i++){
72: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]From %D to %D \n",rank,from->local.slots[i],to->local.slots[i]);
73: }
74: }
76: PetscViewerFlush(viewer);
77: }
78: } else {
79: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for this scatter",((PetscObject)viewer)->type_name);
80: }
81: return(0);
82: }
84: /* -----------------------------------------------------------------------------------*/
85: /*
86: The next routine determines what part of the local part of the scatter is an
87: exact copy of values into their current location. We check this here and
88: then know that we need not perform that portion of the scatter.
89: */
92: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter_Seq_General *gen_to,VecScatter_Seq_General *gen_from)
93: {
94: PetscInt n = gen_to->n,n_nonmatching = 0,i,*to_slots = gen_to->slots,*from_slots = gen_from->slots;
96: PetscInt *nto_slots,*nfrom_slots,j = 0;
97:
99: for (i=0; i<n; i++) {
100: if (to_slots[i] != from_slots[i]) n_nonmatching++;
101: }
103: if (!n_nonmatching) {
104: gen_to->nonmatching_computed = PETSC_TRUE;
105: gen_to->n_nonmatching = gen_from->n_nonmatching = 0;
106: PetscLogInfo(0,"VecScatterLocalOptimize_Private:Reduced %D to 0\n", n);
107: } else if (n_nonmatching == n) {
108: gen_to->nonmatching_computed = PETSC_FALSE;
109: PetscLogInfo(0,"VecScatterLocalOptimize_Private:All values non-matching\n");
110: } else {
111: gen_to->nonmatching_computed= PETSC_TRUE;
112: gen_to->n_nonmatching = gen_from->n_nonmatching = n_nonmatching;
113: PetscMalloc2(n_nonmatching,PetscInt,&nto_slots,n_nonmatching,PetscInt,&nfrom_slots);
114: gen_to->slots_nonmatching = nto_slots;
115: gen_from->slots_nonmatching = nfrom_slots;
116: for (i=0; i<n; i++) {
117: if (to_slots[i] != from_slots[i]) {
118: nto_slots[j] = to_slots[i];
119: nfrom_slots[j] = from_slots[i];
120: j++;
121: }
122: }
123: PetscLogInfo(0,"VecScatterLocalOptimize_Private:Reduced %D to %D\n",n,n_nonmatching);
124: }
125: return(0);
126: }
128: /* --------------------------------------------------------------------------------------*/
131: PetscErrorCode VecScatterCopy_PtoP(VecScatter in,VecScatter out)
132: {
133: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
134: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
135: PetscErrorCode ierr;
136: PetscInt len,ny;
139: out->postrecvs = in->postrecvs;
140: out->begin = in->begin;
141: out->end = in->end;
142: out->copy = in->copy;
143: out->destroy = in->destroy;
144: out->view = in->view;
146: /* allocate entire send scatter context */
147: PetscNew(VecScatter_MPI_General,&out_to);
148: PetscNew(VecScatter_MPI_General,&out_from);
149: PetscMemzero(out_from,sizeof(VecScatter_MPI_General));
150: PetscMemzero(out_to,sizeof(VecScatter_MPI_General));
152: ny = in_to->starts[in_to->n];
153: len = ny*(sizeof(PetscInt) + sizeof(PetscScalar)) + (in_to->n+1)*sizeof(PetscInt) +
154: (in_to->n)*(sizeof(PetscInt) + sizeof(MPI_Request));
155: out_to->n = in_to->n;
156: out_to->type = in_to->type;
157: out_to->sendfirst = in_to->sendfirst;
158: PetscMalloc(len,&out_to->values);
159: out_to->requests = (MPI_Request*)(out_to->values + ny);
160: out_to->indices = (PetscInt*)(out_to->requests + out_to->n);
161: out_to->starts = (PetscInt*)(out_to->indices + ny);
162: out_to->procs = (PetscMPIInt*)(out_to->starts + out_to->n + 1);
164: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
165: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
166: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscInt));
168: PetscMalloc(2*(PetscMax(in_to->n,in_from->n)+1)*sizeof(MPI_Status),&out_to->sstatus);
169: out_to->rstatus = out_to->rstatus + PetscMax(in_to->n,in_from->n) + 1;
171: out->todata = (void*)out_to;
172: out_to->local.n = in_to->local.n;
173: out_to->local.nonmatching_computed = PETSC_FALSE;
174: out_to->local.n_nonmatching = 0;
175: out_to->local.slots_nonmatching = 0;
176: if (in_to->local.n) {
177: PetscMalloc2(in_to->local.n,PetscInt,&out_to->local.slots,in_from->local.n,PetscInt,&out_from->local.slots);
178: PetscMemcpy(out_to->local.slots,in_to->local.slots,in_to->local.n*sizeof(PetscInt));
179: PetscMemcpy(out_from->local.slots,in_from->local.slots,in_from->local.n*sizeof(PetscInt));
180: } else {
181: out_to->local.slots = 0;
182: out_from->local.slots = 0;
183: }
185: /* allocate entire receive context */
186: out_from->type = in_from->type;
187: ny = in_from->starts[in_from->n];
188: len = ny*(sizeof(PetscInt) + sizeof(PetscScalar)) + (in_from->n+1)*sizeof(PetscInt) +
189: (in_from->n)*(sizeof(PetscInt) + sizeof(MPI_Request));
190: out_from->n = in_from->n;
191: out_from->sendfirst = in_from->sendfirst;
192: PetscMalloc(len,&out_from->values);
193: out_from->requests = (MPI_Request*)(out_from->values + ny);
194: out_from->indices = (PetscInt*)(out_from->requests + out_from->n);
195: out_from->starts = (PetscInt*)(out_from->indices + ny);
196: out_from->procs = (PetscMPIInt*)(out_from->starts + out_from->n + 1);
197: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
198: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
199: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscInt));
200: out->fromdata = (void*)out_from;
201: out_from->local.n = in_from->local.n;
202: out_from->local.nonmatching_computed = PETSC_FALSE;
203: out_from->local.n_nonmatching = 0;
204: out_from->local.slots_nonmatching = 0;
205: return(0);
206: }
208: /* -------------------------------------------------------------------------------------*/
211: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
212: {
213: VecScatter_MPI_General *gen_to = (VecScatter_MPI_General*)ctx->todata;
214: VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
215: PetscErrorCode ierr;
218: CHKMEMQ;
219: if (gen_to->local.slots) {PetscFree2(gen_to->local.slots,gen_from->local.slots);}
220: if (gen_to->local.slots_nonmatching) {PetscFree2(gen_to->local.slots_nonmatching,gen_from->local.slots_nonmatching);}
221: PetscFree(gen_to->sstatus);
222: PetscFree(gen_to->values);
223: PetscFree(gen_from->values);
224: PetscFree(gen_from);
225: PetscFree(gen_to);
226: PetscHeaderDestroy(ctx);
227: return(0);
228: }
230: /* --------------------------------------------------------------------------------------*/
231: /*
232: Even though the next routines are written with parallel
233: vectors, either xin or yin (but not both) may be Seq
234: vectors, one for each processor.
235:
236: gen_from indices indicate where arriving stuff is stashed
237: gen_to indices indicate where departing stuff came from.
238: the naming can be VERY confusing.
240: */
243: PetscErrorCode VecScatterBegin_PtoP(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
244: {
245: VecScatter_MPI_General *gen_to,*gen_from;
246: MPI_Comm comm = ctx->comm;
247: PetscScalar *xv,*yv,*val,*rvalues,*svalues;
248: MPI_Request *rwaits,*swaits;
249: PetscInt i,j,*indices,*rstarts,*sstarts;
250: PetscMPIInt tag = ctx->tag,*rprocs,*sprocs;
251: PetscErrorCode ierr;
252: PetscInt nrecvs,nsends,iend;
255: CHKMEMQ;
256: VecGetArray(xin,&xv);
257: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
258: if (mode & SCATTER_REVERSE){
259: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
260: gen_from = (VecScatter_MPI_General*)ctx->todata;
261: } else {
262: gen_to = (VecScatter_MPI_General*)ctx->todata;
263: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
264: }
265: rvalues = gen_from->values;
266: svalues = gen_to->values;
267: nrecvs = gen_from->n;
268: nsends = gen_to->n;
269: rwaits = gen_from->requests;
270: swaits = gen_to->requests;
271: indices = gen_to->indices;
272: rstarts = gen_from->starts;
273: sstarts = gen_to->starts;
274: rprocs = gen_from->procs;
275: sprocs = gen_to->procs;
277: if (!(mode & SCATTER_LOCAL)) {
279: if (gen_to->sendfirst) {
280: /* do sends: */
281: for (i=0; i<nsends; i++) {
282: val = svalues + sstarts[i];
283: iend = sstarts[i+1]-sstarts[i];
284: /* pack the message */
285: for (j=0; j<iend; j++) {
286: val[j] = xv[*indices++];
287: }
288: MPI_Isend(val,iend,MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
289: }
290: }
291:
292: /* post receives: */
293: for (i=0; i<nrecvs; i++) {
294: MPI_Irecv(rvalues+rstarts[i],rstarts[i+1]-rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
295: }
297: if (!gen_to->sendfirst) {
298: /* do sends: */
299: for (i=0; i<nsends; i++) {
300: val = svalues + sstarts[i];
301: iend = sstarts[i+1]-sstarts[i];
302: /* pack the message */
303: for (j=0; j<iend; j++) {
304: val[j] = xv[*indices++];
305: }
306: MPI_Isend(val,iend,MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
307: }
308: }
309: }
311: /* take care of local scatters */
312: if (gen_to->local.n && addv == INSERT_VALUES) {
313: if (yv == xv && !gen_to->local.nonmatching_computed) {
314: VecScatterLocalOptimize_Private(&gen_to->local,&gen_from->local);
315: }
316: if (gen_to->local.is_copy) {
317: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
318: } else if (yv != xv || !gen_to->local.nonmatching_computed) {
319: PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
320: PetscInt n = gen_to->local.n;
321: for (i=0; i<n; i++) {yv[fslots[i]] = xv[tslots[i]];}
322: } else {
323: /*
324: In this case, it is copying the values into their old locations, thus we can skip those
325: */
326: PetscInt *tslots = gen_to->local.slots_nonmatching,*fslots = gen_from->local.slots_nonmatching;
327: PetscInt n = gen_to->local.n_nonmatching;
328: for (i=0; i<n; i++) {yv[fslots[i]] = xv[tslots[i]];}
329: }
330: } else if (gen_to->local.n) {
331: PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
332: PetscInt n = gen_to->local.n;
333: if (addv == ADD_VALUES) {
334: for (i=0; i<n; i++) {yv[fslots[i]] += xv[tslots[i]];}
335: #if !defined(PETSC_USE_COMPLEX)
336: } else if (addv == MAX_VALUES) {
337: for (i=0; i<n; i++) {yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[tslots[i]]);}
338: #endif
339: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
340: }
342: VecRestoreArray(xin,&xv);
343: if (xin != yin) {VecRestoreArray(yin,&yv);}
344: CHKMEMQ;
345: return(0);
346: }
348: /* --------------------------------------------------------------------------------------*/
351: PetscErrorCode VecScatterEnd_PtoP(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
352: {
353: VecScatter_MPI_General *gen_to,*gen_from;
354: PetscScalar *rvalues,*yv,*val;
355: PetscErrorCode ierr;
356: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices;
357: PetscMPIInt imdex;
358: MPI_Request *rwaits,*swaits;
359: MPI_Status rstatus,*sstatus;
362: CHKMEMQ;
363: if (mode & SCATTER_LOCAL) return(0);
364: VecGetArray(yin,&yv);
366: if (mode & SCATTER_REVERSE){
367: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
368: gen_from = (VecScatter_MPI_General*)ctx->todata;
369: sstatus = gen_from->sstatus;
370: } else {
371: gen_to = (VecScatter_MPI_General*)ctx->todata;
372: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
373: sstatus = gen_to->sstatus;
374: }
375: rvalues = gen_from->values;
376: nrecvs = gen_from->n;
377: nsends = gen_to->n;
378: rwaits = gen_from->requests;
379: swaits = gen_to->requests;
380: indices = gen_from->indices;
381: rstarts = gen_from->starts;
383: /* wait on receives */
384: count = nrecvs;
385: while (count) {
386: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
387: /* unpack receives into our local space */
388: val = rvalues + rstarts[imdex];
389: n = rstarts[imdex+1]-rstarts[imdex];
390: lindices = indices + rstarts[imdex];
391: if (addv == INSERT_VALUES) {
392: for (i=0; i<n; i++) {
393: yv[lindices[i]] = *val++;
394: }
395: } else if (addv == ADD_VALUES) {
396: for (i=0; i<n; i++) {
397: yv[lindices[i]] += *val++;
398: }
399: #if !defined(PETSC_USE_COMPLEX)
400: } else if (addv == MAX_VALUES) {
401: for (i=0; i<n; i++) {
402: yv[lindices[i]] = PetscMax(yv[lindices[i]],*val); val++;
403: }
404: #endif
405: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
406: count--;
407: }
409: /* wait on sends */
410: if (nsends) {
411: MPI_Waitall(nsends,swaits,sstatus);
412: }
413: VecRestoreArray(yin,&yv);
414: CHKMEMQ;
415: return(0);
416: }
417: /* ==========================================================================================*/
418: /*
419: Special scatters for fixed block sizes. These provide better performance
420: because the local copying and packing and unpacking are done with loop
421: unrolling to the size of the block.
423: Also uses MPI persistent sends and receives, these (at least in theory)
424: allow MPI to optimize repeated sends and receives of the same type.
425: */
427: /*
428: This is for use with the "ready-receiver" mode. In theory on some
429: machines it could lead to better performance. In practice we've never
430: seen it give better performance. Accessed with the -vecscatter_rr flag.
431: */
434: PetscErrorCode VecScatterPostRecvs_PtoP_X(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
435: {
436: VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
439: MPI_Startall_irecv(gen_from->starts[gen_from->n]*gen_from->bs,gen_from->n,gen_from->requests);
440: return(0);
441: }
443: /* --------------------------------------------------------------------------------------*/
444: /*
445: Special optimization to see if the local part of the scatter is actually
446: a copy. The scatter routines call PetscMemcpy() instead.
447:
448: */
451: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter_Seq_General *gen_to,VecScatter_Seq_General *gen_from,PetscInt bs)
452: {
453: PetscInt n = gen_to->n,i,*to_slots = gen_to->slots,*from_slots = gen_from->slots;
454: PetscInt to_start,from_start;
455:
457: to_start = to_slots[0];
458: from_start = from_slots[0];
460: for (i=1; i<n; i++) {
461: to_start += bs;
462: from_start += bs;
463: if (to_slots[i] != to_start) return(0);
464: if (from_slots[i] != from_start) return(0);
465: }
466: gen_to->is_copy = PETSC_TRUE;
467: gen_to->copy_start = to_slots[0];
468: gen_to->copy_length = bs*sizeof(PetscScalar)*n;
469: gen_from->is_copy = PETSC_TRUE;
470: gen_from->copy_start = from_slots[0];
471: gen_from->copy_length = bs*sizeof(PetscScalar)*n;
473: PetscLogInfo(0,"VecScatterLocalOptimizeCopy_Private:Local scatter is a copy, optimizing for it\n");
475: return(0);
476: }
478: /* --------------------------------------------------------------------------------------*/
482: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
483: {
484: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
485: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
486: PetscErrorCode ierr;
487: PetscInt len,ny,bs = in_from->bs;
490: out->postrecvs = in->postrecvs;
491: out->begin = in->begin;
492: out->end = in->end;
493: out->copy = in->copy;
494: out->destroy = in->destroy;
495: out->view = in->view;
497: /* allocate entire send scatter context */
498: PetscNew(VecScatter_MPI_General,&out_to);
499: PetscMemzero(out_to,sizeof(VecScatter_MPI_General));
500: PetscNew(VecScatter_MPI_General,&out_from);
501: PetscMemzero(out_from,sizeof(VecScatter_MPI_General));
503: ny = in_to->starts[in_to->n];
504: len = ny*(sizeof(PetscInt) + bs*sizeof(PetscScalar)) + (in_to->n+1)*sizeof(PetscInt) +
505: (in_to->n)*(sizeof(PetscInt) + sizeof(MPI_Request));
506: out_to->n = in_to->n;
507: out_to->type = in_to->type;
508: out_to->sendfirst = in_to->sendfirst;
510: PetscMalloc(len,&out_to->values);
511: out_to->requests = (MPI_Request*)(out_to->values + bs*ny);
512: out_to->indices = (PetscInt*)(out_to->requests + out_to->n);
513: out_to->starts = (PetscInt*)(out_to->indices + ny);
514: out_to->procs = (PetscMPIInt*)(out_to->starts + out_to->n + 1);
515: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
516: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
517: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
518: PetscMalloc(2*(PetscMax(in_to->n,in_from->n)+1)*sizeof(MPI_Status),&out_to->sstatus);
519: out_to->rstatus = out_to->sstatus + PetscMax(in_to->n,in_from->n) + 1;
520:
521: out->todata = (void*)out_to;
522: out_to->local.n = in_to->local.n;
523: out_to->local.nonmatching_computed = PETSC_FALSE;
524: out_to->local.n_nonmatching = 0;
525: out_to->local.slots_nonmatching = 0;
526: if (in_to->local.n) {
527: PetscMalloc2(in_to->local.n,PetscInt,&out_to->local.slots,in_from->local.n,PetscInt,&out_from->local.slots);
528: PetscMemcpy(out_to->local.slots,in_to->local.slots,in_to->local.n*sizeof(PetscInt));
529: PetscMemcpy(out_from->local.slots,in_from->local.slots,in_from->local.n*sizeof(PetscInt));
530: } else {
531: out_to->local.slots = 0;
532: out_from->local.slots = 0;
533: }
535: /* allocate entire receive context */
536: out_from->type = in_from->type;
537: ny = in_from->starts[in_from->n];
538: len = ny*(sizeof(PetscInt) + bs*sizeof(PetscScalar)) + (in_from->n+1)*sizeof(PetscInt) +
539: (in_from->n)*(sizeof(PetscInt) + sizeof(MPI_Request));
540: out_from->n = in_from->n;
541: out_from->sendfirst = in_from->sendfirst;
542: PetscMalloc(len,&out_from->values);
543: out_from->requests = (MPI_Request*)(out_from->values + bs*ny);
544: out_from->indices = (PetscInt*)(out_from->requests + out_from->n);
545: out_from->starts = (PetscInt*)(out_from->indices + ny);
546: out_from->procs = (PetscMPIInt*)(out_from->starts + out_from->n + 1);
547: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
548: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
549: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
550: out->fromdata = (void*)out_from;
551: out_from->local.n = in_from->local.n;
552: out_from->local.nonmatching_computed = PETSC_FALSE;
553: out_from->local.n_nonmatching = 0;
554: out_from->local.slots_nonmatching = 0;
556: /*
557: set up the request arrays for use with isend_init() and irecv_init()
558: */
559: {
560: PetscMPIInt tag;
561: MPI_Comm comm;
562: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
563: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
564: PetscInt i;
565: PetscTruth flg;
566: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
567: MPI_Request *rev_swaits,*rev_rwaits;
568: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
570: PetscMalloc2(in_to->n,MPI_Request,&out_to->rev_requests,in_from->n,MPI_Request,&out_from->rev_requests);
572: rev_rwaits = out_to->rev_requests;
573: rev_swaits = out_from->rev_requests;
575: out_from->bs = out_to->bs = bs;
576: tag = out->tag;
577: comm = out->comm;
579: /* Register the receives that you will use later (sends for scatter reverse) */
580: for (i=0; i<out_from->n; i++) {
581: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
582: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
583: }
585: PetscOptionsHasName(PETSC_NULL,"-vecscatter_rr",&flg);
586: if (flg) {
587: out->postrecvs = VecScatterPostRecvs_PtoP_X;
588: out_to->use_readyreceiver = PETSC_TRUE;
589: out_from->use_readyreceiver = PETSC_TRUE;
590: for (i=0; i<out_to->n; i++) {
591: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
592: }
593: PetscLogInfo(0,"VecScatterCopy_PtoP_X:Using VecScatter ready receiver mode\n");
594: } else {
595: out->postrecvs = 0;
596: out_to->use_readyreceiver = PETSC_FALSE;
597: out_from->use_readyreceiver = PETSC_FALSE;
598: flg = PETSC_FALSE;
599: PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flg);
600: if (flg) {
601: PetscLogInfo(0,"VecScatterCopy_PtoP_X:Using VecScatter Ssend mode\n");
602: }
603: for (i=0; i<out_to->n; i++) {
604: if (!flg) {
605: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
606: } else {
607: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
608: }
609: }
610: }
611: /* Register receives for scatter reverse */
612: for (i=0; i<out_to->n; i++) {
613: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
614: }
615: }
617: return(0);
618: }
620: /* --------------------------------------------------------------------------------------*/
624: PetscErrorCode VecScatterBegin_PtoP_12(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
625: {
626: VecScatter_MPI_General *gen_to,*gen_from;
627: PetscScalar *xv,*yv,*val,*svalues;
628: MPI_Request *rwaits,*swaits;
629: PetscInt *indices,*sstarts,iend,i,j,nrecvs,nsends,idx,len;
630: PetscErrorCode ierr;
633: VecGetArray(xin,&xv);
634: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
636: if (mode & SCATTER_REVERSE) {
637: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
638: gen_from = (VecScatter_MPI_General*)ctx->todata;
639: rwaits = gen_from->rev_requests;
640: swaits = gen_to->rev_requests;
641: } else {
642: gen_to = (VecScatter_MPI_General*)ctx->todata;
643: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
644: rwaits = gen_from->requests;
645: swaits = gen_to->requests;
646: }
647: svalues = gen_to->values;
648: nrecvs = gen_from->n;
649: nsends = gen_to->n;
650: indices = gen_to->indices;
651: sstarts = gen_to->starts;
653: if (!(mode & SCATTER_LOCAL)) {
655: if (!gen_from->use_readyreceiver && !gen_to->sendfirst) {
656: /* post receives since they were not posted in VecScatterPostRecvs() */
657: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
658: }
659: if (ctx->packtogether) {
660: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
661: len = 12*sstarts[nsends];
662: val = svalues;
663: for (i=0; i<len; i += 12) {
664: idx = *indices++;
665: val[0] = xv[idx];
666: val[1] = xv[idx+1];
667: val[2] = xv[idx+2];
668: val[3] = xv[idx+3];
669: val[4] = xv[idx+4];
670: val[5] = xv[idx+5];
671: val[6] = xv[idx+6];
672: val[7] = xv[idx+7];
673: val[8] = xv[idx+8];
674: val[9] = xv[idx+9];
675: val[10] = xv[idx+10];
676: val[11] = xv[idx+11];
677: val += 12;
678: }
679: MPI_Startall_isend(len,nsends,swaits);
680: } else {
681: /* this version packs and sends one at a time */
682: val = svalues;
683: for (i=0; i<nsends; i++) {
684: iend = sstarts[i+1]-sstarts[i];
686: for (j=0; j<iend; j++) {
687: idx = *indices++;
688: val[0] = xv[idx];
689: val[1] = xv[idx+1];
690: val[2] = xv[idx+2];
691: val[3] = xv[idx+3];
692: val[4] = xv[idx+4];
693: val[5] = xv[idx+5];
694: val[6] = xv[idx+6];
695: val[7] = xv[idx+7];
696: val[8] = xv[idx+8];
697: val[9] = xv[idx+9];
698: val[10] = xv[idx+10];
699: val[11] = xv[idx+11];
700: val += 12;
701: }
702: MPI_Start_isend(12*iend,swaits+i);
703: }
704: }
706: if (!gen_from->use_readyreceiver && gen_to->sendfirst) {
707: /* post receives since they were not posted in VecScatterPostRecvs() */
708: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
709: }
710: }
712: /* take care of local scatters */
713: if (gen_to->local.n) {
714: PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
715: PetscInt n = gen_to->local.n,il,ir;
716: if (addv == INSERT_VALUES) {
717: if (gen_to->local.is_copy) {
718: PetscMemcpy(yv+gen_from->local.copy_start,xv+gen_to->local.copy_start,gen_to->local.copy_length);
719: } else {
720: for (i=0; i<n; i++) {
721: il = fslots[i]; ir = tslots[i];
722: yv[il] = xv[ir];
723: yv[il+1] = xv[ir+1];
724: yv[il+2] = xv[ir+2];
725: yv[il+3] = xv[ir+3];
726: yv[il+4] = xv[ir+4];
727: yv[il+5] = xv[ir+5];
728: yv[il+6] = xv[ir+6];
729: yv[il+7] = xv[ir+7];
730: yv[il+8] = xv[ir+8];
731: yv[il+9] = xv[ir+9];
732: yv[il+10] = xv[ir+10];
733: yv[il+11] = xv[ir+11];
734: }
735: }
736: } else if (addv == ADD_VALUES) {
737: for (i=0; i<n; i++) {
738: il = fslots[i]; ir = tslots[i];
739: yv[il] += xv[ir];
740: yv[il+1] += xv[ir+1];
741: yv[il+2] += xv[ir+2];
742: yv[il+3] += xv[ir+3];
743: yv[il+4] += xv[ir+4];
744: yv[il+5] += xv[ir+5];
745: yv[il+6] += xv[ir+6];
746: yv[il+7] += xv[ir+7];
747: yv[il+8] += xv[ir+8];
748: yv[il+9] += xv[ir+9];
749: yv[il+10] += xv[ir+10];
750: yv[il+11] += xv[ir+11];
751: }
752: #if !defined(PETSC_USE_COMPLEX)
753: } else if (addv == MAX_VALUES) {
754: for (i=0; i<n; i++) {
755: il = fslots[i]; ir = tslots[i];
756: yv[il] = PetscMax(yv[il],xv[ir]);
757: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
758: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
759: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
760: yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
761: yv[il+5] = PetscMax(yv[il+5],xv[ir+5]);
762: yv[il+6] = PetscMax(yv[il+6],xv[ir+6]);
763: yv[il+7] = PetscMax(yv[il+7],xv[ir+7]);
764: yv[il+8] = PetscMax(yv[il+8],xv[ir+8]);
765: yv[il+9] = PetscMax(yv[il+9],xv[ir+9]);
766: yv[il+10] = PetscMax(yv[il+10],xv[ir+10]);
767: yv[il+11] = PetscMax(yv[il+11],xv[ir+11]);
768: }
769: #endif
770: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
771: }
772: VecRestoreArray(xin,&xv);
773: if (xin != yin) {VecRestoreArray(yin,&yv);}
774: return(0);
775: }
777: /* --------------------------------------------------------------------------------------*/
781: PetscErrorCode VecScatterEnd_PtoP_12(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
782: {
783: VecScatter_MPI_General *gen_to,*gen_from;
784: PetscScalar *rvalues,*yv,*val;
785: PetscErrorCode ierr;
786: PetscMPIInt imdex;
787: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
788: MPI_Request *rwaits,*swaits;
789: MPI_Status *rstatus,*sstatus;
792: if (mode & SCATTER_LOCAL) return(0);
793: VecGetArray(yin,&yv);
795: if (mode & SCATTER_REVERSE) {
796: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
797: gen_from = (VecScatter_MPI_General*)ctx->todata;
798: rwaits = gen_from->rev_requests;
799: swaits = gen_to->rev_requests;
800: sstatus = gen_from->sstatus;
801: rstatus = gen_from->rstatus;
802: } else {
803: gen_to = (VecScatter_MPI_General*)ctx->todata;
804: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
805: rwaits = gen_from->requests;
806: swaits = gen_to->requests;
807: sstatus = gen_to->sstatus;
808: rstatus = gen_to->rstatus;
809: }
810: rvalues = gen_from->values;
811: nrecvs = gen_from->n;
812: nsends = gen_to->n;
813: indices = gen_from->indices;
814: rstarts = gen_from->starts;
816: /* wait on receives */
817: count = nrecvs;
818: if (ctx->packtogether) { /* receive all messages, then unpack all, when -vecscatter_packtogether used */
819: MPI_Waitall(nrecvs,rwaits,rstatus);
820: n = rstarts[count];
821: val = rvalues;
822: lindices = indices;
823: if (addv == INSERT_VALUES) {
824: for (i=0; i<n; i++) {
825: idx = lindices[i];
826: yv[idx] = val[0];
827: yv[idx+1] = val[1];
828: yv[idx+2] = val[2];
829: yv[idx+3] = val[3];
830: yv[idx+4] = val[4];
831: yv[idx+5] = val[5];
832: yv[idx+6] = val[6];
833: yv[idx+7] = val[7];
834: yv[idx+8] = val[8];
835: yv[idx+9] = val[9];
836: yv[idx+10] = val[10];
837: yv[idx+11] = val[11];
838: val += 12;
839: }
840: } else if (addv == ADD_VALUES) {
841: for (i=0; i<n; i++) {
842: idx = lindices[i];
843: yv[idx] += val[0];
844: yv[idx+1] += val[1];
845: yv[idx+2] += val[2];
846: yv[idx+3] += val[3];
847: yv[idx+4] += val[4];
848: yv[idx+5] += val[5];
849: yv[idx+6] += val[6];
850: yv[idx+7] += val[7];
851: yv[idx+8] += val[8];
852: yv[idx+9] += val[9];
853: yv[idx+10] += val[10];
854: yv[idx+11] += val[11];
855: val += 12;
856: }
857: #if !defined(PETSC_USE_COMPLEX)
858: } else if (addv == MAX_VALUES) {
859: for (i=0; i<n; i++) {
860: idx = lindices[i];
861: yv[idx] = PetscMax(yv[idx],val[0]);
862: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
863: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
864: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
865: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
866: yv[idx+5] = PetscMax(yv[idx+5],val[5]);
867: yv[idx+6] = PetscMax(yv[idx+6],val[6]);
868: yv[idx+7] = PetscMax(yv[idx+7],val[7]);
869: yv[idx+8] = PetscMax(yv[idx+8],val[8]);
870: yv[idx+9] = PetscMax(yv[idx+9],val[9]);
871: yv[idx+10] = PetscMax(yv[idx+10],val[10]);
872: yv[idx+11] = PetscMax(yv[idx+11],val[11]);
873: val += 12;
874: }
875: #endif
876: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
877: } else { /* unpack each message as it arrives, default version */
878: while (count) {
879: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus[0]);
880: /* unpack receives into our local space */
881: val = rvalues + 12*rstarts[imdex];
882: lindices = indices + rstarts[imdex];
883: n = rstarts[imdex+1] - rstarts[imdex];
884: if (addv == INSERT_VALUES) {
885: for (i=0; i<n; i++) {
886: idx = lindices[i];
887: yv[idx] = val[0];
888: yv[idx+1] = val[1];
889: yv[idx+2] = val[2];
890: yv[idx+3] = val[3];
891: yv[idx+4] = val[4];
892: yv[idx+5] = val[5];
893: yv[idx+6] = val[6];
894: yv[idx+7] = val[7];
895: yv[idx+8] = val[8];
896: yv[idx+9] = val[9];
897: yv[idx+10] = val[10];
898: yv[idx+11] = val[11];
899: val += 12;
900: }
901: } else if (addv == ADD_VALUES) {
902: for (i=0; i<n; i++) {
903: idx = lindices[i];
904: yv[idx] += val[0];
905: yv[idx+1] += val[1];
906: yv[idx+2] += val[2];
907: yv[idx+3] += val[3];
908: yv[idx+4] += val[4];
909: yv[idx+5] += val[5];
910: yv[idx+6] += val[6];
911: yv[idx+7] += val[7];
912: yv[idx+8] += val[8];
913: yv[idx+9] += val[9];
914: yv[idx+10] += val[10];
915: yv[idx+11] += val[11];
916: val += 12;
917: }
918: #if !defined(PETSC_USE_COMPLEX)
919: } else if (addv == MAX_VALUES) {
920: for (i=0; i<n; i++) {
921: idx = lindices[i];
922: yv[idx] = PetscMax(yv[idx],val[0]);
923: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
924: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
925: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
926: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
927: yv[idx+5] = PetscMax(yv[idx+5],val[5]);
928: yv[idx+6] = PetscMax(yv[idx+6],val[6]);
929: yv[idx+7] = PetscMax(yv[idx+7],val[7]);
930: yv[idx+8] = PetscMax(yv[idx+8],val[8]);
931: yv[idx+9] = PetscMax(yv[idx+9],val[9]);
932: yv[idx+10] = PetscMax(yv[idx+10],val[10]);
933: yv[idx+11] = PetscMax(yv[idx+11],val[11]);
934: val += 12;
935: }
936: #endif
937: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
938: count--;
939: }
940: }
941: /* wait on sends */
942: if (nsends) {
943: MPI_Waitall(nsends,swaits,sstatus);
944: }
945: VecRestoreArray(yin,&yv);
946: return(0);
947: }
949: /* --------------------------------------------------------------------------------------*/
953: PetscErrorCode VecScatterBegin_PtoP_6(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
954: {
955: VecScatter_MPI_General *gen_to,*gen_from;
956: PetscScalar *xv,*yv,*val,*svalues;
957: MPI_Request *rwaits,*swaits;
958: PetscErrorCode ierr;
959: PetscInt i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
962: VecGetArray(xin,&xv);
963: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
964: if (mode & SCATTER_REVERSE) {
965: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
966: gen_from = (VecScatter_MPI_General*)ctx->todata;
967: rwaits = gen_from->rev_requests;
968: swaits = gen_to->rev_requests;
969: } else {
970: gen_to = (VecScatter_MPI_General*)ctx->todata;
971: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
972: rwaits = gen_from->requests;
973: swaits = gen_to->requests;
974: }
975: svalues = gen_to->values;
976: nrecvs = gen_from->n;
977: nsends = gen_to->n;
978: indices = gen_to->indices;
979: sstarts = gen_to->starts;
981: if (!(mode & SCATTER_LOCAL)) {
983: if (gen_to->sendfirst) {
984: /* this version packs and sends one at a time */
985: val = svalues;
986: for (i=0; i<nsends; i++) {
987: iend = sstarts[i+1]-sstarts[i];
989: for (j=0; j<iend; j++) {
990: idx = *indices++;
991: val[0] = xv[idx];
992: val[1] = xv[idx+1];
993: val[2] = xv[idx+2];
994: val[3] = xv[idx+3];
995: val[4] = xv[idx+4];
996: val[5] = xv[idx+5];
997: val += 6;
998: }
999: MPI_Start_isend(6*iend,swaits+i);
1000: }
1001: }
1003: if (!gen_from->use_readyreceiver) {
1004: /* post receives since they were not posted in VecScatterPostRecvs() */
1005: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1006: }
1008: if (!gen_to->sendfirst) {
1009: /* this version packs all the messages together and sends */
1010: /*
1011: len = 5*sstarts[nsends];
1012: val = svalues;
1013: for (i=0; i<len; i += 5) {
1014: idx = *indices++;
1015: val[0] = xv[idx];
1016: val[1] = xv[idx+1];
1017: val[2] = xv[idx+2];
1018: val[3] = xv[idx+3];
1019: val[4] = xv[idx+4];
1020: val += 5;
1021: }
1022: MPI_Startall_isend(len,nsends,swaits);
1023: */
1025: /* this version packs and sends one at a time */
1026: val = svalues;
1027: for (i=0; i<nsends; i++) {
1028: iend = sstarts[i+1]-sstarts[i];
1030: for (j=0; j<iend; j++) {
1031: idx = *indices++;
1032: val[0] = xv[idx];
1033: val[1] = xv[idx+1];
1034: val[2] = xv[idx+2];
1035: val[3] = xv[idx+3];
1036: val[4] = xv[idx+4];
1037: val[5] = xv[idx+5];
1038: val += 6;
1039: }
1040: MPI_Start_isend(6*iend,swaits+i);
1041: }
1042: }
1043: }
1045: /* take care of local scatters */
1046: if (gen_to->local.n) {
1047: PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1048: PetscInt n = gen_to->local.n,il,ir;
1049: if (addv == INSERT_VALUES) {
1050: if (gen_to->local.is_copy) {
1051: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1052: } else {
1053: for (i=0; i<n; i++) {
1054: il = fslots[i]; ir = tslots[i];
1055: yv[il] = xv[ir];
1056: yv[il+1] = xv[ir+1];
1057: yv[il+2] = xv[ir+2];
1058: yv[il+3] = xv[ir+3];
1059: yv[il+4] = xv[ir+4];
1060: yv[il+5] = xv[ir+5];
1061: }
1062: }
1063: } else if (addv == ADD_VALUES) {
1064: for (i=0; i<n; i++) {
1065: il = fslots[i]; ir = tslots[i];
1066: yv[il] += xv[ir];
1067: yv[il+1] += xv[ir+1];
1068: yv[il+2] += xv[ir+2];
1069: yv[il+3] += xv[ir+3];
1070: yv[il+4] += xv[ir+4];
1071: yv[il+5] += xv[ir+5];
1072: }
1073: #if !defined(PETSC_USE_COMPLEX)
1074: } else if (addv == MAX_VALUES) {
1075: for (i=0; i<n; i++) {
1076: il = fslots[i]; ir = tslots[i];
1077: yv[il] = PetscMax(yv[il],xv[ir]);
1078: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1079: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1080: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1081: yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1082: yv[il+5] = PetscMax(yv[il+5],xv[ir+5]);
1083: }
1084: #endif
1085: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1086: }
1087: VecRestoreArray(xin,&xv);
1088: if (xin != yin) {VecRestoreArray(yin,&yv);}
1089: return(0);
1090: }
1092: /* --------------------------------------------------------------------------------------*/
1096: PetscErrorCode VecScatterEnd_PtoP_6(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1097: {
1098: VecScatter_MPI_General *gen_to,*gen_from;
1099: PetscScalar *rvalues,*yv,*val;
1100: PetscErrorCode ierr;
1101: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1102: PetscMPIInt imdex;
1103: MPI_Request *rwaits,*swaits;
1104: MPI_Status rstatus,*sstatus;
1107: if (mode & SCATTER_LOCAL) return(0);
1108: VecGetArray(yin,&yv);
1110: if (mode & SCATTER_REVERSE) {
1111: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1112: gen_from = (VecScatter_MPI_General*)ctx->todata;
1113: rwaits = gen_from->rev_requests;
1114: swaits = gen_to->rev_requests;
1115: sstatus = gen_from->sstatus;
1116: } else {
1117: gen_to = (VecScatter_MPI_General*)ctx->todata;
1118: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1119: rwaits = gen_from->requests;
1120: swaits = gen_to->requests;
1121: sstatus = gen_to->sstatus;
1122: }
1123: rvalues = gen_from->values;
1124: nrecvs = gen_from->n;
1125: nsends = gen_to->n;
1126: indices = gen_from->indices;
1127: rstarts = gen_from->starts;
1129: /* wait on receives */
1130: count = nrecvs;
1131: while (count) {
1132: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1133: /* unpack receives into our local space */
1134: val = rvalues + 6*rstarts[imdex];
1135: lindices = indices + rstarts[imdex];
1136: n = rstarts[imdex+1] - rstarts[imdex];
1137: if (addv == INSERT_VALUES) {
1138: for (i=0; i<n; i++) {
1139: idx = lindices[i];
1140: yv[idx] = val[0];
1141: yv[idx+1] = val[1];
1142: yv[idx+2] = val[2];
1143: yv[idx+3] = val[3];
1144: yv[idx+4] = val[4];
1145: yv[idx+5] = val[5];
1146: val += 6;
1147: }
1148: } else if (addv == ADD_VALUES) {
1149: for (i=0; i<n; i++) {
1150: idx = lindices[i];
1151: yv[idx] += val[0];
1152: yv[idx+1] += val[1];
1153: yv[idx+2] += val[2];
1154: yv[idx+3] += val[3];
1155: yv[idx+4] += val[4];
1156: yv[idx+5] += val[5];
1157: val += 6;
1158: }
1159: #if !defined(PETSC_USE_COMPLEX)
1160: } else if (addv == MAX_VALUES) {
1161: for (i=0; i<n; i++) {
1162: idx = lindices[i];
1163: yv[idx] = PetscMax(yv[idx],val[0]);
1164: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1165: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1166: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1167: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1168: yv[idx+5] = PetscMax(yv[idx+4],val[5]);
1169: val += 6;
1170: }
1171: #endif
1172: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1173: count--;
1174: }
1175: /* wait on sends */
1176: if (nsends) {
1177: MPI_Waitall(nsends,swaits,sstatus);
1178: }
1179: VecRestoreArray(yin,&yv);
1180: return(0);
1181: }
1183: /* --------------------------------------------------------------------------------------*/
1187: PetscErrorCode VecScatterBegin_PtoP_5(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1188: {
1189: VecScatter_MPI_General *gen_to,*gen_from;
1190: PetscScalar *xv,*yv,*val,*svalues;
1191: MPI_Request *rwaits,*swaits;
1192: PetscErrorCode ierr;
1193: PetscInt i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
1196: VecGetArray(xin,&xv);
1197: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1198: if (mode & SCATTER_REVERSE) {
1199: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1200: gen_from = (VecScatter_MPI_General*)ctx->todata;
1201: rwaits = gen_from->rev_requests;
1202: swaits = gen_to->rev_requests;
1203: } else {
1204: gen_to = (VecScatter_MPI_General*)ctx->todata;
1205: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1206: rwaits = gen_from->requests;
1207: swaits = gen_to->requests;
1208: }
1209: svalues = gen_to->values;
1210: nrecvs = gen_from->n;
1211: nsends = gen_to->n;
1212: indices = gen_to->indices;
1213: sstarts = gen_to->starts;
1215: if (!(mode & SCATTER_LOCAL)) {
1217: if (gen_to->sendfirst) {
1218: /* this version packs and sends one at a time */
1219: val = svalues;
1220: for (i=0; i<nsends; i++) {
1221: iend = sstarts[i+1]-sstarts[i];
1223: for (j=0; j<iend; j++) {
1224: idx = *indices++;
1225: val[0] = xv[idx];
1226: val[1] = xv[idx+1];
1227: val[2] = xv[idx+2];
1228: val[3] = xv[idx+3];
1229: val[4] = xv[idx+4];
1230: val += 5;
1231: }
1232: MPI_Start_isend(5*iend,swaits+i);
1233: }
1234: }
1236: if (!gen_from->use_readyreceiver) {
1237: /* post receives since they were not posted in VecScatterPostRecvs() */
1238: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1239: }
1241: if (!gen_to->sendfirst) {
1242: /* this version packs all the messages together and sends */
1243: /*
1244: len = 5*sstarts[nsends];
1245: val = svalues;
1246: for (i=0; i<len; i += 5) {
1247: idx = *indices++;
1248: val[0] = xv[idx];
1249: val[1] = xv[idx+1];
1250: val[2] = xv[idx+2];
1251: val[3] = xv[idx+3];
1252: val[4] = xv[idx+4];
1253: val += 5;
1254: }
1255: MPI_Startall_isend(len,nsends,swaits);
1256: */
1258: /* this version packs and sends one at a time */
1259: val = svalues;
1260: for (i=0; i<nsends; i++) {
1261: iend = sstarts[i+1]-sstarts[i];
1263: for (j=0; j<iend; j++) {
1264: idx = *indices++;
1265: val[0] = xv[idx];
1266: val[1] = xv[idx+1];
1267: val[2] = xv[idx+2];
1268: val[3] = xv[idx+3];
1269: val[4] = xv[idx+4];
1270: val += 5;
1271: }
1272: MPI_Start_isend(5*iend,swaits+i);
1273: }
1274: }
1275: }
1277: /* take care of local scatters */
1278: if (gen_to->local.n) {
1279: PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1280: PetscInt n = gen_to->local.n,il,ir;
1281: if (addv == INSERT_VALUES) {
1282: if (gen_to->local.is_copy) {
1283: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1284: } else {
1285: for (i=0; i<n; i++) {
1286: il = fslots[i]; ir = tslots[i];
1287: yv[il] = xv[ir];
1288: yv[il+1] = xv[ir+1];
1289: yv[il+2] = xv[ir+2];
1290: yv[il+3] = xv[ir+3];
1291: yv[il+4] = xv[ir+4];
1292: }
1293: }
1294: } else if (addv == ADD_VALUES) {
1295: for (i=0; i<n; i++) {
1296: il = fslots[i]; ir = tslots[i];
1297: yv[il] += xv[ir];
1298: yv[il+1] += xv[ir+1];
1299: yv[il+2] += xv[ir+2];
1300: yv[il+3] += xv[ir+3];
1301: yv[il+4] += xv[ir+4];
1302: }
1303: #if !defined(PETSC_USE_COMPLEX)
1304: } else if (addv == MAX_VALUES) {
1305: for (i=0; i<n; i++) {
1306: il = fslots[i]; ir = tslots[i];
1307: yv[il] = PetscMax(yv[il],xv[ir]);
1308: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1309: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1310: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1311: yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1312: }
1313: #endif
1314: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1315: }
1316: VecRestoreArray(xin,&xv);
1317: if (xin != yin) {VecRestoreArray(yin,&yv);}
1318: return(0);
1319: }
1321: /* --------------------------------------------------------------------------------------*/
1325: PetscErrorCode VecScatterEnd_PtoP_5(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1326: {
1327: VecScatter_MPI_General *gen_to,*gen_from;
1328: PetscScalar *rvalues,*yv,*val;
1329: PetscErrorCode ierr;
1330: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1331: PetscMPIInt imdex;
1332: MPI_Request *rwaits,*swaits;
1333: MPI_Status rstatus,*sstatus;
1336: if (mode & SCATTER_LOCAL) return(0);
1337: VecGetArray(yin,&yv);
1339: if (mode & SCATTER_REVERSE) {
1340: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1341: gen_from = (VecScatter_MPI_General*)ctx->todata;
1342: rwaits = gen_from->rev_requests;
1343: swaits = gen_to->rev_requests;
1344: sstatus = gen_from->sstatus;
1345: } else {
1346: gen_to = (VecScatter_MPI_General*)ctx->todata;
1347: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1348: rwaits = gen_from->requests;
1349: swaits = gen_to->requests;
1350: sstatus = gen_to->sstatus;
1351: }
1352: rvalues = gen_from->values;
1353: nrecvs = gen_from->n;
1354: nsends = gen_to->n;
1355: indices = gen_from->indices;
1356: rstarts = gen_from->starts;
1358: /* wait on receives */
1359: count = nrecvs;
1360: while (count) {
1361: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1362: /* unpack receives into our local space */
1363: val = rvalues + 5*rstarts[imdex];
1364: lindices = indices + rstarts[imdex];
1365: n = rstarts[imdex+1] - rstarts[imdex];
1366: if (addv == INSERT_VALUES) {
1367: for (i=0; i<n; i++) {
1368: idx = lindices[i];
1369: yv[idx] = val[0];
1370: yv[idx+1] = val[1];
1371: yv[idx+2] = val[2];
1372: yv[idx+3] = val[3];
1373: yv[idx+4] = val[4];
1374: val += 5;
1375: }
1376: } else if (addv == ADD_VALUES) {
1377: for (i=0; i<n; i++) {
1378: idx = lindices[i];
1379: yv[idx] += val[0];
1380: yv[idx+1] += val[1];
1381: yv[idx+2] += val[2];
1382: yv[idx+3] += val[3];
1383: yv[idx+4] += val[4];
1384: val += 5;
1385: }
1386: #if !defined(PETSC_USE_COMPLEX)
1387: } else if (addv == MAX_VALUES) {
1388: for (i=0; i<n; i++) {
1389: idx = lindices[i];
1390: yv[idx] = PetscMax(yv[idx],val[0]);
1391: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1392: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1393: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1394: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1395: val += 5;
1396: }
1397: #endif
1398: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1399: count--;
1400: }
1401: /* wait on sends */
1402: if (nsends) {
1403: MPI_Waitall(nsends,swaits,sstatus);
1404: }
1405: VecRestoreArray(yin,&yv);
1406: return(0);
1407: }
1409: /* --------------------------------------------------------------------------------------*/
1413: PetscErrorCode VecScatterBegin_PtoP_4(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1414: {
1415: VecScatter_MPI_General *gen_to,*gen_from;
1416: PetscScalar *xv,*yv,*val,*svalues;
1417: MPI_Request *rwaits,*swaits;
1418: PetscInt *indices,*sstarts,iend,i,j,nrecvs,nsends,idx,len;
1419: PetscErrorCode ierr;
1422: VecGetArray(xin,&xv);
1423: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1425: if (mode & SCATTER_REVERSE) {
1426: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1427: gen_from = (VecScatter_MPI_General*)ctx->todata;
1428: rwaits = gen_from->rev_requests;
1429: swaits = gen_to->rev_requests;
1430: } else {
1431: gen_to = (VecScatter_MPI_General*)ctx->todata;
1432: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1433: rwaits = gen_from->requests;
1434: swaits = gen_to->requests;
1435: }
1436: svalues = gen_to->values;
1437: nrecvs = gen_from->n;
1438: nsends = gen_to->n;
1439: indices = gen_to->indices;
1440: sstarts = gen_to->starts;
1442: if (!(mode & SCATTER_LOCAL)) {
1444: if (!gen_from->use_readyreceiver && !gen_to->sendfirst) {
1445: /* post receives since they were not posted in VecScatterPostRecvs() */
1446: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1447: }
1449: if (ctx->packtogether) {
1450: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
1451: len = 4*sstarts[nsends];
1452: val = svalues;
1453: for (i=0; i<len; i += 4) {
1454: idx = *indices++;
1455: val[0] = xv[idx];
1456: val[1] = xv[idx+1];
1457: val[2] = xv[idx+2];
1458: val[3] = xv[idx+3];
1459: val += 4;
1460: }
1461: MPI_Startall_isend(len,nsends,swaits);
1462: } else {
1463: /* this version packs and sends one at a time, default */
1464: val = svalues;
1465: for (i=0; i<nsends; i++) {
1466: iend = sstarts[i+1]-sstarts[i];
1468: for (j=0; j<iend; j++) {
1469: idx = *indices++;
1470: val[0] = xv[idx];
1471: val[1] = xv[idx+1];
1472: val[2] = xv[idx+2];
1473: val[3] = xv[idx+3];
1474: val += 4;
1475: }
1476: MPI_Start_isend(4*iend,swaits+i);
1477: }
1478: }
1480: if (!gen_from->use_readyreceiver && gen_to->sendfirst) {
1481: /* post receives since they were not posted in VecScatterPostRecvs() */
1482: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1483: }
1484: }
1486: /* take care of local scatters */
1487: if (gen_to->local.n) {
1488: PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1489: PetscInt n = gen_to->local.n,il,ir;
1490: if (addv == INSERT_VALUES) {
1491: if (gen_to->local.is_copy) {
1492: PetscMemcpy(yv+gen_from->local.copy_start,xv+gen_to->local.copy_start,gen_to->local.copy_length);
1493: } else {
1494: for (i=0; i<n; i++) {
1495: il = fslots[i]; ir = tslots[i];
1496: yv[il] = xv[ir];
1497: yv[il+1] = xv[ir+1];
1498: yv[il+2] = xv[ir+2];
1499: yv[il+3] = xv[ir+3];
1500: }
1501: }
1502: } else if (addv == ADD_VALUES) {
1503: for (i=0; i<n; i++) {
1504: il = fslots[i]; ir = tslots[i];
1505: yv[il] += xv[ir];
1506: yv[il+1] += xv[ir+1];
1507: yv[il+2] += xv[ir+2];
1508: yv[il+3] += xv[ir+3];
1509: }
1510: #if !defined(PETSC_USE_COMPLEX)
1511: } else if (addv == MAX_VALUES) {
1512: for (i=0; i<n; i++) {
1513: il = fslots[i]; ir = tslots[i];
1514: yv[il] = PetscMax(yv[il],xv[ir]);
1515: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1516: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1517: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1518: }
1519: #endif
1520: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1521: }
1522: VecRestoreArray(xin,&xv);
1523: if (xin != yin) {VecRestoreArray(yin,&yv);}
1524: return(0);
1525: }
1527: /* --------------------------------------------------------------------------------------*/
1531: PetscErrorCode VecScatterEnd_PtoP_4(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1532: {
1533: VecScatter_MPI_General *gen_to,*gen_from;
1534: PetscScalar *rvalues,*yv,*val;
1535: PetscErrorCode ierr;
1536: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1537: PetscMPIInt imdex;
1538: MPI_Request *rwaits,*swaits;
1539: MPI_Status *rstatus,*sstatus;
1542: if (mode & SCATTER_LOCAL) return(0);
1543: VecGetArray(yin,&yv);
1545: if (mode & SCATTER_REVERSE) {
1546: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1547: gen_from = (VecScatter_MPI_General*)ctx->todata;
1548: rwaits = gen_from->rev_requests;
1549: swaits = gen_to->rev_requests;
1550: sstatus = gen_from->sstatus;
1551: rstatus = gen_from->rstatus;
1552: } else {
1553: gen_to = (VecScatter_MPI_General*)ctx->todata;
1554: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1555: rwaits = gen_from->requests;
1556: swaits = gen_to->requests;
1557: sstatus = gen_to->sstatus;
1558: rstatus = gen_to->rstatus;
1559: }
1560: rvalues = gen_from->values;
1561: nrecvs = gen_from->n;
1562: nsends = gen_to->n;
1563: indices = gen_from->indices;
1564: rstarts = gen_from->starts;
1566: /* wait on receives */
1567: count = nrecvs;
1568: if (ctx->packtogether) { /* receive all messages, then unpack all, when -vecscatter_packtogether used */
1569: MPI_Waitall(nrecvs,rwaits,rstatus);
1570: n = rstarts[count];
1571: val = rvalues;
1572: lindices = indices;
1573: if (addv == INSERT_VALUES) {
1574: for (i=0; i<n; i++) {
1575: idx = lindices[i];
1576: yv[idx] = val[0];
1577: yv[idx+1] = val[1];
1578: yv[idx+2] = val[2];
1579: yv[idx+3] = val[3];
1580: val += 4;
1581: }
1582: } else if (addv == ADD_VALUES) {
1583: for (i=0; i<n; i++) {
1584: idx = lindices[i];
1585: yv[idx] += val[0];
1586: yv[idx+1] += val[1];
1587: yv[idx+2] += val[2];
1588: yv[idx+3] += val[3];
1589: val += 4;
1590: }
1591: #if !defined(PETSC_USE_COMPLEX)
1592: } else if (addv == MAX_VALUES) {
1593: for (i=0; i<n; i++) {
1594: idx = lindices[i];
1595: yv[idx] = PetscMax(yv[idx],val[0]);
1596: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1597: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1598: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1599: val += 4;
1600: }
1601: #endif
1602: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1603: } else { /* unpack each message as it arrives, default version */
1604: while (count) {
1605: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus[0]);
1606: /* unpack receives into our local space */
1607: val = rvalues + 4*rstarts[imdex];
1608: lindices = indices + rstarts[imdex];
1609: n = rstarts[imdex+1] - rstarts[imdex];
1610: if (addv == INSERT_VALUES) {
1611: for (i=0; i<n; i++) {
1612: idx = lindices[i];
1613: yv[idx] = val[0];
1614: yv[idx+1] = val[1];
1615: yv[idx+2] = val[2];
1616: yv[idx+3] = val[3];
1617: val += 4;
1618: }
1619: } else if (addv == ADD_VALUES) {
1620: for (i=0; i<n; i++) {
1621: idx = lindices[i];
1622: yv[idx] += val[0];
1623: yv[idx+1] += val[1];
1624: yv[idx+2] += val[2];
1625: yv[idx+3] += val[3];
1626: val += 4;
1627: }
1628: #if !defined(PETSC_USE_COMPLEX)
1629: } else if (addv == MAX_VALUES) {
1630: for (i=0; i<n; i++) {
1631: idx = lindices[i];
1632: yv[idx] = PetscMax(yv[idx],val[0]);
1633: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1634: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1635: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1636: val += 4;
1637: }
1638: #endif
1639: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1640: count--;
1641: }
1642: }
1644: /* wait on sends */
1645: if (nsends) {
1646: MPI_Waitall(nsends,swaits,sstatus);
1647: }
1648: VecRestoreArray(yin,&yv);
1649: return(0);
1650: }
1652: /* --------------------------------------------------------------------------------------*/
1656: PetscErrorCode VecScatterBegin_PtoP_3(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1657: {
1658: VecScatter_MPI_General *gen_to,*gen_from;
1659: PetscScalar *xv,*yv,*val,*svalues;
1660: MPI_Request *rwaits,*swaits;
1661: PetscErrorCode ierr;
1662: PetscInt i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
1665: VecGetArray(xin,&xv);
1666: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1668: if (mode & SCATTER_REVERSE) {
1669: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1670: gen_from = (VecScatter_MPI_General*)ctx->todata;
1671: rwaits = gen_from->rev_requests;
1672: swaits = gen_to->rev_requests;
1673: } else {
1674: gen_to = (VecScatter_MPI_General*)ctx->todata;
1675: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1676: rwaits = gen_from->requests;
1677: swaits = gen_to->requests;
1678: }
1679: svalues = gen_to->values;
1680: nrecvs = gen_from->n;
1681: nsends = gen_to->n;
1682: indices = gen_to->indices;
1683: sstarts = gen_to->starts;
1685: if (!(mode & SCATTER_LOCAL)) {
1687: if (gen_to->sendfirst) {
1688: /* this version packs and sends one at a time */
1689: val = svalues;
1690: for (i=0; i<nsends; i++) {
1691: iend = sstarts[i+1]-sstarts[i];
1693: for (j=0; j<iend; j++) {
1694: idx = *indices++;
1695: val[0] = xv[idx];
1696: val[1] = xv[idx+1];
1697: val[2] = xv[idx+2];
1698: val += 3;
1699: }
1700: MPI_Start_isend(3*iend,swaits+i);
1701: }
1702: }
1704: if (!gen_from->use_readyreceiver) {
1705: /* post receives since they were not posted in VecScatterPostRecvs() */
1706: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1707: }
1709: if (!gen_to->sendfirst) {
1710: /* this version packs all the messages together and sends */
1711: /*
1712: len = 3*sstarts[nsends];
1713: val = svalues;
1714: for (i=0; i<len; i += 3) {
1715: idx = *indices++;
1716: val[0] = xv[idx];
1717: val[1] = xv[idx+1];
1718: val[2] = xv[idx+2];
1719: val += 3;
1720: }
1721: MPI_Startall_isend(len,nsends,swaits);
1722: */
1724: /* this version packs and sends one at a time */
1725: val = svalues;
1726: for (i=0; i<nsends; i++) {
1727: iend = sstarts[i+1]-sstarts[i];
1729: for (j=0; j<iend; j++) {
1730: idx = *indices++;
1731: val[0] = xv[idx];
1732: val[1] = xv[idx+1];
1733: val[2] = xv[idx+2];
1734: val += 3;
1735: }
1736: MPI_Start_isend(3*iend,swaits+i);
1737: }
1738: }
1739: }
1741: /* take care of local scatters */
1742: if (gen_to->local.n) {
1743: PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1744: PetscInt n = gen_to->local.n,il,ir;
1745: if (addv == INSERT_VALUES) {
1746: if (gen_to->local.is_copy) {
1747: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1748: } else {
1749: for (i=0; i<n; i++) {
1750: il = fslots[i]; ir = tslots[i];
1751: yv[il] = xv[ir];
1752: yv[il+1] = xv[ir+1];
1753: yv[il+2] = xv[ir+2];
1754: }
1755: }
1756: } else if (addv == ADD_VALUES) {
1757: for (i=0; i<n; i++) {
1758: il = fslots[i]; ir = tslots[i];
1759: yv[il] += xv[ir];
1760: yv[il+1] += xv[ir+1];
1761: yv[il+2] += xv[ir+2];
1762: }
1763: #if !defined(PETSC_USE_COMPLEX)
1764: } else if (addv == MAX_VALUES) {
1765: for (i=0; i<n; i++) {
1766: il = fslots[i]; ir = tslots[i];
1767: yv[il] = PetscMax(yv[il],xv[ir]);
1768: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1769: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1770: }
1771: #endif
1772: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1773: }
1774: VecRestoreArray(xin,&xv);
1775: if (xin != yin) {VecRestoreArray(yin,&yv);}
1776: return(0);
1777: }
1779: /* --------------------------------------------------------------------------------------*/
1783: PetscErrorCode VecScatterEnd_PtoP_3(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1784: {
1785: VecScatter_MPI_General *gen_to,*gen_from;
1786: PetscScalar *rvalues,*yv,*val;
1787: PetscErrorCode ierr;
1788: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1789: PetscMPIInt imdex;
1790: MPI_Request *rwaits,*swaits;
1791: MPI_Status rstatus,*sstatus;
1794: if (mode & SCATTER_LOCAL) return(0);
1795: VecGetArray(yin,&yv);
1797: if (mode & SCATTER_REVERSE) {
1798: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1799: gen_from = (VecScatter_MPI_General*)ctx->todata;
1800: rwaits = gen_from->rev_requests;
1801: swaits = gen_to->rev_requests;
1802: sstatus = gen_from->sstatus;
1803: } else {
1804: gen_to = (VecScatter_MPI_General*)ctx->todata;
1805: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1806: rwaits = gen_from->requests;
1807: swaits = gen_to->requests;
1808: sstatus = gen_to->sstatus;
1809: }
1810: rvalues = gen_from->values;
1811: nrecvs = gen_from->n;
1812: nsends = gen_to->n;
1813: indices = gen_from->indices;
1814: rstarts = gen_from->starts;
1816: /* wait on receives */
1817: count = nrecvs;
1818: while (count) {
1819: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1820: /* unpack receives into our local space */
1821: val = rvalues + 3*rstarts[imdex];
1822: lindices = indices + rstarts[imdex];
1823: n = rstarts[imdex+1] - rstarts[imdex];
1824: if (addv == INSERT_VALUES) {
1825: for (i=0; i<n; i++) {
1826: idx = lindices[i];
1827: yv[idx] = val[0];
1828: yv[idx+1] = val[1];
1829: yv[idx+2] = val[2];
1830: val += 3;
1831: }
1832: } else if (addv == ADD_VALUES) {
1833: for (i=0; i<n; i++) {
1834: idx = lindices[i];
1835: yv[idx] += val[0];
1836: yv[idx+1] += val[1];
1837: yv[idx+2] += val[2];
1838: val += 3;
1839: }
1840: #if !defined(PETSC_USE_COMPLEX)
1841: } else if (addv == MAX_VALUES) {
1842: for (i=0; i<n; i++) {
1843: idx = lindices[i];
1844: yv[idx] = PetscMax(yv[idx],val[0]);
1845: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1846: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1847: val += 3;
1848: }
1849: #endif
1850: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1851: count--;
1852: }
1853: /* wait on sends */
1854: if (nsends) {
1855: MPI_Waitall(nsends,swaits,sstatus);
1856: }
1857: VecRestoreArray(yin,&yv);
1858: return(0);
1859: }
1861: /* --------------------------------------------------------------------------------------*/
1865: PetscErrorCode VecScatterBegin_PtoP_2(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1866: {
1867: VecScatter_MPI_General *gen_to,*gen_from;
1868: PetscScalar *xv,*yv,*val,*svalues;
1869: MPI_Request *rwaits,*swaits;
1870: PetscErrorCode ierr;
1871: PetscInt i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
1874: VecGetArray(xin,&xv);
1875: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1876: if (mode & SCATTER_REVERSE) {
1877: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1878: gen_from = (VecScatter_MPI_General*)ctx->todata;
1879: rwaits = gen_from->rev_requests;
1880: swaits = gen_to->rev_requests;
1881: } else {
1882: gen_to = (VecScatter_MPI_General*)ctx->todata;
1883: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1884: rwaits = gen_from->requests;
1885: swaits = gen_to->requests;
1886: }
1887: svalues = gen_to->values;
1888: nrecvs = gen_from->n;
1889: nsends = gen_to->n;
1890: indices = gen_to->indices;
1891: sstarts = gen_to->starts;
1893: if (!(mode & SCATTER_LOCAL)) {
1895: if (gen_to->sendfirst) {
1896: /* this version packs and sends one at a time */
1897: val = svalues;
1898: for (i=0; i<nsends; i++) {
1899: iend = sstarts[i+1]-sstarts[i];
1901: for (j=0; j<iend; j++) {
1902: idx = *indices++;
1903: val[0] = xv[idx];
1904: val[1] = xv[idx+1];
1905: val += 2;
1906: }
1907: MPI_Start_isend(2*iend,swaits+i);
1908: }
1909: }
1911: if (!gen_from->use_readyreceiver) {
1912: /* post receives since they were not posted in VecScatterPostRecvs() */
1913: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1914: }
1916: if (!gen_to->sendfirst) {
1917: /* this version packs all the messages together and sends */
1918: /*
1919: len = 2*sstarts[nsends];
1920: val = svalues;
1921: for (i=0; i<len; i += 2) {
1922: idx = *indices++;
1923: val[0] = xv[idx];
1924: val[1] = xv[idx+1];
1925: val += 2;
1926: }
1927: MPI_Startall_isend(len,nsends,swaits);
1928: */
1930: /* this version packs and sends one at a time */
1931: val = svalues;
1932: for (i=0; i<nsends; i++) {
1933: iend = sstarts[i+1]-sstarts[i];
1935: for (j=0; j<iend; j++) {
1936: idx = *indices++;
1937: val[0] = xv[idx];
1938: val[1] = xv[idx+1];
1939: val += 2;
1940: }
1941: MPI_Start_isend(2*iend,swaits+i);
1942: }
1943: }
1944: }
1946: /* take care of local scatters */
1947: if (gen_to->local.n) {
1948: PetscInt *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1949: PetscInt n = gen_to->local.n,il,ir;
1950: if (addv == INSERT_VALUES) {
1951: if (gen_to->local.is_copy) {
1952: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1953: } else {
1954: for (i=0; i<n; i++) {
1955: il = fslots[i]; ir = tslots[i];
1956: yv[il] = xv[ir];
1957: yv[il+1] = xv[ir+1];
1958: }
1959: }
1960: } else if (addv == ADD_VALUES) {
1961: for (i=0; i<n; i++) {
1962: il = fslots[i]; ir = tslots[i];
1963: yv[il] += xv[ir];
1964: yv[il+1] += xv[ir+1];
1965: }
1966: #if !defined(PETSC_USE_COMPLEX)
1967: } else if (addv == MAX_VALUES) {
1968: for (i=0; i<n; i++) {
1969: il = fslots[i]; ir = tslots[i];
1970: yv[il] = PetscMax(yv[il],xv[ir]);
1971: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1972: }
1973: #endif
1974: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
1975: }
1976: VecRestoreArray(xin,&xv);
1977: if (xin != yin) {VecRestoreArray(yin,&yv);}
1978: return(0);
1979: }
1981: /* --------------------------------------------------------------------------------------*/
1985: PetscErrorCode VecScatterEnd_PtoP_2(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1986: {
1987: VecScatter_MPI_General *gen_to,*gen_from;
1988: PetscScalar *rvalues,*yv,*val;
1989: PetscErrorCode ierr;
1990: PetscInt nrecvs,nsends,i,*indices,count,n,*rstarts,*lindices,idx;
1991: PetscMPIInt imdex;
1992: MPI_Request *rwaits,*swaits;
1993: MPI_Status rstatus,*sstatus;
1996: if (mode & SCATTER_LOCAL) return(0);
1997: VecGetArray(yin,&yv);
1999: if (mode & SCATTER_REVERSE) {
2000: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
2001: gen_from = (VecScatter_MPI_General*)ctx->todata;
2002: rwaits = gen_from->rev_requests;
2003: swaits = gen_to->rev_requests;
2004: sstatus = gen_from->sstatus;
2005: } else {
2006: gen_to = (VecScatter_MPI_General*)ctx->todata;
2007: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2008: rwaits = gen_from->requests;
2009: swaits = gen_to->requests;
2010: sstatus = gen_to->sstatus;
2011: }
2012: rvalues = gen_from->values;
2013: nrecvs = gen_from->n;
2014: nsends = gen_to->n;
2015: indices = gen_from->indices;
2016: rstarts = gen_from->starts;
2018: /* wait on receives */
2019: count = nrecvs;
2020: while (count) {
2021: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
2022: /* unpack receives into our local space */
2023: val = rvalues + 2*rstarts[imdex];
2024: lindices = indices + rstarts[imdex];
2025: n = rstarts[imdex+1] - rstarts[imdex];
2026: if (addv == INSERT_VALUES) {
2027: for (i=0; i<n; i++) {
2028: idx = lindices[i];
2029: yv[idx] = val[0];
2030: yv[idx+1] = val[1];
2031: val += 2;
2032: }
2033: } else if (addv == ADD_VALUES) {
2034: for (i=0; i<n; i++) {
2035: idx = lindices[i];
2036: yv[idx] += val[0];
2037: yv[idx+1] += val[1];
2038: val += 2;
2039: }
2040: #if !defined(PETSC_USE_COMPLEX)
2041: } else if (addv == MAX_VALUES) {
2042: for (i=0; i<n; i++) {
2043: idx = lindices[i];
2044: yv[idx] = PetscMax(yv[idx],val[0]);
2045: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
2046: val += 2;
2047: }
2048: #endif
2049: } else {SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");}
2050: count--;
2051: }
2052: /* wait on sends */
2053: if (nsends) {
2054: MPI_Waitall(nsends,swaits,sstatus);
2055: }
2056: VecRestoreArray(yin,&yv);
2057: return(0);
2058: }
2060: /* ---------------------------------------------------------------------------------*/
2064: PetscErrorCode VecScatterDestroy_PtoP_X(VecScatter ctx)
2065: {
2066: VecScatter_MPI_General *gen_to = (VecScatter_MPI_General*)ctx->todata;
2067: VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
2068: PetscErrorCode ierr;
2069: PetscInt i;
2072: if (gen_to->use_readyreceiver) {
2073: /*
2074: Since we have already posted sends we must cancel them before freeing
2075: the requests
2076: */
2077: for (i=0; i<gen_from->n; i++) {
2078: MPI_Cancel(gen_from->requests+i);
2079: }
2080: }
2082: if (gen_to->local.slots) {PetscFree2(gen_to->local.slots,gen_from->local.slots);}
2083: if (gen_to->local.slots_nonmatching) {PetscFree2(gen_to->local.slots_nonmatching,gen_from->local.slots_nonmatching);}
2085: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
2086: /*
2087: IBM's PE version of MPI has a bug where freeing these guys will screw up later
2088: message passing.
2089: */
2090: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
2091: for (i=0; i<gen_to->n; i++) {
2092: MPI_Request_free(gen_to->requests + i);
2093: MPI_Request_free(gen_to->rev_requests + i);
2094: }
2096: /*
2097: MPICH could not properly cancel requests thus with ready receiver mode we
2098: cannot free the requests. It may be fixed now, if not then put the following
2099: code inside a if !gen_to->use_readyreceiver) {
2100: */
2101: for (i=0; i<gen_from->n; i++) {
2102: MPI_Request_free(gen_from->requests + i);
2103: MPI_Request_free(gen_from->rev_requests + i);
2104: }
2105: #endif
2106:
2107: PetscFree(gen_to->sstatus);
2108: PetscFree(gen_to->values);
2109: PetscFree2(gen_to->rev_requests,gen_from->rev_requests);
2110: PetscFree(gen_from->values);
2111: PetscFree(gen_to);
2112: PetscFree(gen_from);
2113: PetscHeaderDestroy(ctx);
2114: return(0);
2115: }
2117: /* ==========================================================================================*/
2119: /* create parallel to sequential scatter context */
2120: /*
2121: bs indicates how many elements there are in each block. Normally
2122: this would be 1.
2123: */
2126: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,PetscInt *inidx,PetscInt ny,PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2127: {
2128: VecScatter_MPI_General *from,*to;
2129: PetscErrorCode ierr;
2130: PetscMPIInt size,rank,imdex,tag,n;
2131: PetscInt *source,*lens,*owners;
2132: PetscInt *lowner,*start,lengthy;
2133: PetscInt *nprocs,i,j,idx,nsends,nrecvs;
2134: PetscInt *owner,*starts,count,slen;
2135: PetscInt *rvalues,*svalues,base,nmax,*values,len,*indx,nprocslocal;
2136: MPI_Comm comm;
2137: MPI_Request *send_waits,*recv_waits;
2138: MPI_Status recv_status,*send_status;
2139: PetscMap map;
2140: PetscTruth found;
2141:
2143: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2144: PetscObjectGetComm((PetscObject)xin,&comm);
2145: MPI_Comm_rank(comm,&rank);
2146: MPI_Comm_size(comm,&size);
2147: VecGetPetscMap(xin,&map);
2148: PetscMapGetGlobalRange(map,&owners);
2149: VecGetSize(yin,&lengthy);
2151: /* first count number of contributors to each processor */
2152: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
2153: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
2154: PetscMalloc(nx*sizeof(PetscInt),&owner);
2155: for (i=0; i<nx; i++) {
2156: idx = inidx[i];
2157: found = PETSC_FALSE;
2158: for (j=0; j<size; j++) {
2159: if (idx >= owners[j] && idx < owners[j+1]) {
2160: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
2161: }
2162: }
2163: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2164: }
2165: nprocslocal = nprocs[2*rank];
2166: nprocs[2*rank] = nprocs[2*rank+1] = 0;
2167: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
2169: /* inform other processors of number of messages and max length*/
2170: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
2172: /* post receives: */
2173: PetscMalloc(nrecvs*(nmax+1)*sizeof(PetscInt),&rvalues);
2174: PetscMalloc(2*nrecvs*sizeof(PetscInt),&lens);
2175: source = lens + nrecvs;
2176: PetscMalloc(nrecvs*sizeof(MPI_Request),&recv_waits);
2177: for (i=0; i<nrecvs; i++) {
2178: MPI_Irecv((rvalues+nmax*i),nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2179: }
2181: /* do sends:
2182: 1) starts[i] gives the starting index in svalues for stuff going to
2183: the ith processor
2184: */
2185: PetscMalloc(nx*sizeof(PetscInt),&svalues);
2186: PetscMalloc(nsends*sizeof(MPI_Request),&send_waits);
2187: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
2188: starts[0] = 0;
2189: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2190: for (i=0; i<nx; i++) {
2191: if (owner[i] != rank) {
2192: svalues[starts[owner[i]]++] = inidx[i];
2193: }
2194: }
2196: starts[0] = 0;
2197: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2198: count = 0;
2199: for (i=0; i<size; i++) {
2200: if (nprocs[2*i+1]) {
2201: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
2202: }
2203: }
2204: PetscFree(starts);
2206: /* wait on receives */
2207: count = nrecvs;
2208: slen = 0;
2209: while (count) {
2210: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2211: /* unpack receives into our local space */
2212: MPI_Get_count(&recv_status,MPIU_INT,&n);
2213: source[imdex] = recv_status.MPI_SOURCE;
2214: lens[imdex] = n;
2215: slen += n;
2216: count--;
2217: }
2218:
2219: /* allocate entire send scatter context */
2220: PetscNew(VecScatter_MPI_General,&to);
2221: PetscMemzero(to,sizeof(VecScatter_MPI_General));
2222: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
2223: len = slen*sizeof(PetscInt) + bs*slen*sizeof(PetscScalar) + (nrecvs+1)*sizeof(PetscInt) +
2224: nrecvs*(sizeof(PetscInt) + sizeof(MPI_Request));
2225: to->n = nrecvs;
2226: PetscMalloc(len,&to->values);
2227: to->requests = (MPI_Request*)(to->values + bs*slen);
2228: to->indices = (PetscInt*)(to->requests + nrecvs);
2229: to->starts = (PetscInt*)(to->indices + slen);
2230: to->procs = (PetscMPIInt*)(to->starts + nrecvs + 1);
2231: PetscMalloc(2*(PetscMax(nrecvs,nsends)+1)*sizeof(MPI_Status),&to->sstatus);
2232: to->rstatus = to->sstatus + PetscMax(nrecvs,nsends) + 1;
2233: ctx->todata = (void*)to;
2234: to->starts[0] = 0;
2236: if (nrecvs) {
2237: PetscMalloc(nrecvs*sizeof(PetscInt),&indx);
2238: for (i=0; i<nrecvs; i++) indx[i] = i;
2239: PetscSortIntWithPermutation(nrecvs,source,indx);
2241: /* move the data into the send scatter */
2242: base = owners[rank];
2243: for (i=0; i<nrecvs; i++) {
2244: to->starts[i+1] = to->starts[i] + lens[indx[i]];
2245: to->procs[i] = source[indx[i]];
2246: values = rvalues + indx[i]*nmax;
2247: for (j=0; j<lens[indx[i]]; j++) {
2248: to->indices[to->starts[i] + j] = values[j] - base;
2249: }
2250: }
2251: PetscFree(indx);
2252: }
2253: PetscFree(rvalues);
2254: PetscFree(lens);
2255: PetscFree(recv_waits);
2256:
2257: /* allocate entire receive scatter context */
2258: PetscNew(VecScatter_MPI_General,&from);
2259: PetscMemzero(from,sizeof(VecScatter_MPI_General));
2260: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&from->sendfirst);
2261: len = ny*sizeof(PetscInt) + ny*bs*sizeof(PetscScalar) + (nsends+1)*sizeof(PetscInt) +
2262: nsends*(sizeof(PetscInt) + sizeof(MPI_Request));
2263: from->n = nsends;
2264: PetscMalloc(len,&from->values);
2265: from->requests = (MPI_Request*)(from->values + bs*ny);
2266: from->indices = (PetscInt*)(from->requests + nsends);
2267: from->starts = (PetscInt*)(from->indices + ny);
2268: from->procs = (PetscMPIInt*)(from->starts + nsends + 1);
2269: ctx->fromdata = (void*)from;
2271: /* move data into receive scatter */
2272: PetscMalloc((size+nsends+1)*sizeof(PetscInt),&lowner);
2273: start = lowner + size;
2274: count = 0; from->starts[0] = start[0] = 0;
2275: for (i=0; i<size; i++) {
2276: if (nprocs[2*i+1]) {
2277: lowner[i] = count;
2278: from->procs[count++] = i;
2279: from->starts[count] = start[count] = start[count-1] + nprocs[2*i];
2280: }
2281: }
2282: for (i=0; i<nx; i++) {
2283: if (owner[i] != rank) {
2284: from->indices[start[lowner[owner[i]]]++] = inidy[i];
2285: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2286: }
2287: }
2288: PetscFree(lowner);
2289: PetscFree(owner);
2290: PetscFree(nprocs);
2291:
2292: /* wait on sends */
2293: if (nsends) {
2294: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
2295: MPI_Waitall(nsends,send_waits,send_status);
2296: PetscFree(send_status);
2297: }
2298: PetscFree(send_waits);
2299: PetscFree(svalues);
2301: if (nprocslocal) {
2302: PetscInt nt = from->local.n = to->local.n = nprocslocal;
2303: /* we have a scatter to ourselves */
2304: PetscMalloc2(nt,PetscInt,&to->local.slots,nt,PetscInt,&from->local.slots);
2305: nt = 0;
2306: for (i=0; i<nx; i++) {
2307: idx = inidx[i];
2308: if (idx >= owners[rank] && idx < owners[rank+1]) {
2309: to->local.slots[nt] = idx - owners[rank];
2310: from->local.slots[nt++] = inidy[i];
2311: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2312: }
2313: }
2314: } else {
2315: from->local.n = 0;
2316: from->local.slots = 0;
2317: to->local.n = 0;
2318: to->local.slots = 0;
2319: }
2320: from->local.nonmatching_computed = PETSC_FALSE;
2321: from->local.n_nonmatching = 0;
2322: from->local.slots_nonmatching = 0;
2323: to->local.nonmatching_computed = PETSC_FALSE;
2324: to->local.n_nonmatching = 0;
2325: to->local.slots_nonmatching = 0;
2327: to->type = VEC_SCATTER_MPI_GENERAL;
2328: from->type = VEC_SCATTER_MPI_GENERAL;
2330: from->bs = bs;
2331: to->bs = bs;
2332: if (bs > 1) {
2333: PetscTruth flg,flgs = PETSC_FALSE;
2334: PetscInt *sstarts = to->starts, *rstarts = from->starts;
2335: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2336: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2337: MPI_Request *rev_swaits,*rev_rwaits;
2338: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2340: ctx->destroy = VecScatterDestroy_PtoP_X;
2341: ctx->copy = VecScatterCopy_PtoP_X;
2342: ctx->view = VecScatterView_MPI;
2343:
2344: tag = ctx->tag;
2345: comm = ctx->comm;
2347: /* allocate additional wait variables for the "reverse" scatter */
2348: PetscMalloc2(nrecvs,MPI_Request,&rev_rwaits,nsends,MPI_Request,&rev_swaits);
2349: to->rev_requests = rev_rwaits;
2350: from->rev_requests = rev_swaits;
2352: /* Register the receives that you will use later (sends for scatter reverse) */
2353: PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flgs);
2354: if (flgs) {
2355: PetscLogInfo(0,"VecScatterCreate_PtoS:Using VecScatter Ssend mode\n");
2356: }
2357: for (i=0; i<from->n; i++) {
2358: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2359: if (!flgs) {
2360: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
2361: } else {
2362: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
2363: }
2364: }
2366: PetscOptionsHasName(PETSC_NULL,"-vecscatter_rr",&flg);
2367: if (flg) {
2368: ctx->postrecvs = VecScatterPostRecvs_PtoP_X;
2369: to->use_readyreceiver = PETSC_TRUE;
2370: from->use_readyreceiver = PETSC_TRUE;
2371: for (i=0; i<to->n; i++) {
2372: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2373: }
2374: PetscLogInfo(0,"VecScatterCreate_PtoS:Using VecScatter ready receiver mode\n");
2375: } else {
2376: ctx->postrecvs = 0;
2377: to->use_readyreceiver = PETSC_FALSE;
2378: from->use_readyreceiver = PETSC_FALSE;
2379: for (i=0; i<to->n; i++) {
2380: if (!flgs) {
2381: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2382: } else {
2383: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2384: }
2385: }
2386: }
2387: /* Register receives for scatter reverse */
2388: for (i=0; i<to->n; i++) {
2389: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
2390: }
2392: PetscLogInfo(0,"VecScatterCreate_PtoS:Using blocksize %D scatter\n",bs);
2393: switch (bs) {
2394: case 12:
2395: ctx->begin = VecScatterBegin_PtoP_12;
2396: ctx->end = VecScatterEnd_PtoP_12;
2397: break;
2398: case 6:
2399: ctx->begin = VecScatterBegin_PtoP_6;
2400: ctx->end = VecScatterEnd_PtoP_6;
2401: break;
2402: case 5:
2403: ctx->begin = VecScatterBegin_PtoP_5;
2404: ctx->end = VecScatterEnd_PtoP_5;
2405: break;
2406: case 4:
2407: ctx->begin = VecScatterBegin_PtoP_4;
2408: ctx->end = VecScatterEnd_PtoP_4;
2409: break;
2410: case 3:
2411: ctx->begin = VecScatterBegin_PtoP_3;
2412: ctx->end = VecScatterEnd_PtoP_3;
2413: break;
2414: case 2:
2415: ctx->begin = VecScatterBegin_PtoP_2;
2416: ctx->end = VecScatterEnd_PtoP_2;
2417: break;
2418: default:
2419: SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
2420: }
2421: } else {
2422: ctx->postrecvs = 0;
2423: ctx->destroy = VecScatterDestroy_PtoP;
2424: ctx->begin = VecScatterBegin_PtoP;
2425: ctx->end = VecScatterEnd_PtoP;
2426: ctx->copy = VecScatterCopy_PtoP;
2427: ctx->view = VecScatterView_MPI;
2428: }
2430: /* Check if the local scatter is actually a copy; important special case */
2431: if (nprocslocal) {
2432: VecScatterLocalOptimizeCopy_Private(&to->local,&from->local,bs);
2433: }
2435: return(0);
2436: }
2438: /* ------------------------------------------------------------------------------------*/
2439: /*
2440: Scatter from local Seq vectors to a parallel vector.
2441: */
2444: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,PetscInt *inidx,PetscInt ny,PetscInt *inidy,Vec yin,VecScatter ctx)
2445: {
2446: VecScatter_MPI_General *from,*to;
2447: PetscInt *source,nprocslocal,*lens,*owners = yin->map->range;
2448: PetscMPIInt rank = yin->stash.rank,size = yin->stash.size,tag,imdex,n;
2449: PetscErrorCode ierr;
2450: PetscInt *lowner,*start;
2451: PetscInt *nprocs,i,j,idx,nsends,nrecvs;
2452: PetscInt *owner,*starts,count,slen;
2453: PetscInt *rvalues,*svalues,base,nmax,*values,len;
2454: PetscTruth found;
2455: MPI_Comm comm = yin->comm;
2456: MPI_Request *send_waits,*recv_waits;
2457: MPI_Status recv_status,*send_status;
2460: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2461: PetscMalloc5(2*size,PetscInt,&nprocs,nx,PetscInt,&owner,size,PetscInt,&lowner,size,PetscInt,&start,size+1,PetscInt,&starts);
2463: /* count number of contributors to each processor */
2464: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
2465: for (i=0; i<nx; i++) {
2466: idx = inidy[i];
2467: found = PETSC_FALSE;
2468: for (j=0; j<size; j++) {
2469: if (idx >= owners[j] && idx < owners[j+1]) {
2470: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
2471: }
2472: }
2473: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2474: }
2475: nprocslocal = nprocs[2*rank];
2476: nprocs[2*rank] = nprocs[2*rank+1] = 0;
2477: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
2479: /* inform other processors of number of messages and max length*/
2480: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
2482: /* post receives: */
2483: PetscMalloc6(nrecvs*nmax,PetscInt,&rvalues,nrecvs,MPI_Request,&recv_waits,nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
2485: for (i=0; i<nrecvs; i++) {
2486: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2487: }
2489: /* do sends:
2490: 1) starts[i] gives the starting index in svalues for stuff going to
2491: the ith processor
2492: */
2494: starts[0] = 0;
2495: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2496: for (i=0; i<nx; i++) {
2497: if (owner[i] != rank) {
2498: svalues[starts[owner[i]]++] = inidy[i];
2499: }
2500: }
2502: /* reset starts because it is destroyed above */
2503: starts[0] = 0;
2504: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2505: count = 0;
2506: for (i=0; i<size; i++) {
2507: if (nprocs[2*i+1]) {
2508: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count);
2509: count++;
2510: }
2511: }
2513: /* allocate entire send scatter context */
2514: PetscNew(VecScatter_MPI_General,&to);
2515: PetscMemzero(to,sizeof(VecScatter_MPI_General));
2516: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
2517: len = ny*(sizeof(PetscInt) + sizeof(PetscScalar)) + (nsends+1)*sizeof(PetscInt) +
2518: nsends*(sizeof(PetscInt) + sizeof(MPI_Request));
2519: to->n = nsends;
2520: PetscMalloc(len,&to->values);
2521: to->requests = (MPI_Request*)(to->values + ny);
2522: to->indices = (PetscInt*)(to->requests + nsends);
2523: to->starts = (PetscInt*)(to->indices + ny);
2524: to->procs = (PetscMPIInt*)(to->starts + nsends + 1);
2525: PetscMalloc((PetscMax(nsends,nrecvs) + 1)*sizeof(MPI_Status),&to->sstatus);
2526: to->rstatus = to->sstatus + PetscMax(nsends,nrecvs) + 1;
2527: ctx->todata = (void*)to;
2529: /* move data into send scatter context */
2530: count = 0;
2531: to->starts[0] = start[0] = 0;
2532: for (i=0; i<size; i++) {
2533: if (nprocs[2*i+1]) {
2534: lowner[i] = count;
2535: to->procs[count++] = i;
2536: to->starts[count] = start[count] = start[count-1] + nprocs[2*i];
2537: }
2538: }
2539: for (i=0; i<nx; i++) {
2540: if (owner[i] != rank) {
2541: to->indices[start[lowner[owner[i]]]++] = inidx[i];
2542: }
2543: }
2544: PetscFree5(nprocs,owner,lowner,start,starts);
2546: /* wait on receives */
2547: count = nrecvs;
2548: slen = 0;
2549: while (count) {
2550: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2551: /* unpack receives into our local space */
2552: MPI_Get_count(&recv_status,MPIU_INT,&n);
2553: source[imdex] = recv_status.MPI_SOURCE;
2554: lens[imdex] = n;
2555: slen += n;
2556: count--;
2557: }
2558:
2559: /* allocate entire receive scatter context */
2560: PetscNew(VecScatter_MPI_General,&from);
2561: PetscMemzero(from,sizeof(VecScatter_MPI_General));
2562: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&from->sendfirst);
2563: len = slen*(sizeof(PetscInt) + sizeof(PetscScalar)) + (nrecvs+1)*sizeof(PetscInt) +
2564: nrecvs*(sizeof(PetscInt) + sizeof(MPI_Request));
2565: from->n = nrecvs;
2566: PetscMalloc(len,&from->values);
2567: from->requests = (MPI_Request*)(from->values + slen);
2568: from->indices = (PetscInt*)(from->requests + nrecvs);
2569: from->starts = (PetscInt*)(from->indices + slen);
2570: from->procs = (PetscMPIInt*)(from->starts + nrecvs + 1);
2571: ctx->fromdata = (void*)from;
2573: /* move the data into the receive scatter context*/
2574: base = owners[rank];
2575: from->starts[0] = 0;
2576: for (i=0; i<nrecvs; i++) {
2577: from->starts[i+1] = from->starts[i] + lens[i];
2578: from->procs[i] = source[i];
2579: values = rvalues + i*nmax;
2580: for (j=0; j<lens[i]; j++) {
2581: from->indices[from->starts[i] + j] = values[j] - base;
2582: }
2583: }
2584:
2585: /* wait on sends */
2586: if (nsends) {
2587: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
2588: MPI_Waitall(nsends,send_waits,send_status);
2589: PetscFree(send_status);
2590: }
2591: PetscFree6(rvalues,recv_waits,svalues,send_waits,lens,source);
2593: if (nprocslocal) {
2594: PetscInt nt = from->local.n = to->local.n = nprocslocal;
2595: /* we have a scatter to ourselves */
2596: PetscMalloc2(nt,PetscInt,&to->local.slots,nt,PetscInt,&from->local.slots);
2597: nt = 0;
2598: for (i=0; i<ny; i++) {
2599: idx = inidy[i];
2600: if (idx >= owners[rank] && idx < owners[rank+1]) {
2601: from->local.slots[nt] = idx - owners[rank];
2602: to->local.slots[nt++] = inidx[i];
2603: }
2604: }
2605: } else {
2606: from->local.n = 0;
2607: from->local.slots = 0;
2608: to->local.n = 0;
2609: to->local.slots = 0;
2611: }
2612: from->local.nonmatching_computed = PETSC_FALSE;
2613: from->local.n_nonmatching = 0;
2614: from->local.slots_nonmatching = 0;
2615: to->local.nonmatching_computed = PETSC_FALSE;
2616: to->local.n_nonmatching = 0;
2617: to->local.slots_nonmatching = 0;
2619: to->type = VEC_SCATTER_MPI_GENERAL;
2620: from->type = VEC_SCATTER_MPI_GENERAL;
2622: ctx->destroy = VecScatterDestroy_PtoP;
2623: ctx->postrecvs = 0;
2624: ctx->begin = VecScatterBegin_PtoP;
2625: ctx->end = VecScatterEnd_PtoP;
2626: ctx->copy = 0;
2627: ctx->view = VecScatterView_MPI;
2629: to->bs = 1;
2630: from->bs = 1;
2631: return(0);
2632: }
2634: /* ---------------------------------------------------------------------------------*/
2637: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,PetscInt *inidx,PetscInt ny,PetscInt *inidy,Vec xin,Vec yin,VecScatter ctx)
2638: {
2640: PetscMPIInt size,rank,tag,imdex,n;
2641: PetscInt *lens,*owners = xin->map->range;
2642: PetscInt *nprocs,i,j,idx,nsends,nrecvs,*local_inidx,*local_inidy;
2643: PetscInt *owner,*starts,count,slen;
2644: PetscInt *rvalues,*svalues,base,nmax,*values;
2645: MPI_Comm comm;
2646: MPI_Request *send_waits,*recv_waits;
2647: MPI_Status recv_status,*send_status;
2648: PetscTruth duplicate = PETSC_FALSE,found;
2651: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2652: PetscObjectGetComm((PetscObject)xin,&comm);
2653: MPI_Comm_size(comm,&size);
2654: MPI_Comm_rank(comm,&rank);
2655: if (size == 1) {
2656: VecScatterCreate_StoP(nx,inidx,ny,inidy,yin,ctx);
2657: return(0);
2658: }
2660: /*
2661: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2662: They then call the StoPScatterCreate()
2663: */
2664: /* first count number of contributors to each processor */
2665: PetscMalloc3(2*size,PetscInt,&nprocs,nx,PetscInt,&owner,(size+1),PetscInt,&starts);
2666: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
2667: for (i=0; i<nx; i++) {
2668: idx = inidx[i];
2669: found = PETSC_FALSE;
2670: for (j=0; j<size; j++) {
2671: if (idx >= owners[j] && idx < owners[j+1]) {
2672: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
2673: }
2674: }
2675: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2676: }
2677: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
2679: /* inform other processors of number of messages and max length*/
2680: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
2682: /* post receives: */
2683: PetscMalloc6(2*nrecvs*nmax,PetscInt,&rvalues,2*nx,PetscInt,&svalues,2*nrecvs,PetscInt,&lens,nrecvs,MPI_Request,&recv_waits,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);
2685: for (i=0; i<nrecvs; i++) {
2686: MPI_Irecv(rvalues+2*nmax*i,2*nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2687: }
2689: /* do sends:
2690: 1) starts[i] gives the starting index in svalues for stuff going to
2691: the ith processor
2692: */
2693: starts[0]= 0;
2694: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2695: for (i=0; i<nx; i++) {
2696: svalues[2*starts[owner[i]]] = inidx[i];
2697: svalues[1 + 2*starts[owner[i]]++] = inidy[i];
2698: }
2700: starts[0] = 0;
2701: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2702: count = 0;
2703: for (i=0; i<size; i++) {
2704: if (nprocs[2*i+1]) {
2705: MPI_Isend(svalues+2*starts[i],2*nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count);
2706: count++;
2707: }
2708: }
2709: PetscFree3(nprocs,owner,starts);
2711: /* wait on receives */
2712: count = nrecvs;
2713: slen = 0;
2714: while (count) {
2715: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2716: /* unpack receives into our local space */
2717: MPI_Get_count(&recv_status,MPIU_INT,&n);
2718: lens[imdex] = n/2;
2719: slen += n/2;
2720: count--;
2721: }
2722:
2723: PetscMalloc2(slen,PetscInt,&local_inidx,slen,PetscInt,&local_inidy);
2724: base = owners[rank];
2725: count = 0;
2726: for (i=0; i<nrecvs; i++) {
2727: values = rvalues + 2*i*nmax;
2728: for (j=0; j<lens[i]; j++) {
2729: local_inidx[count] = values[2*j] - base;
2730: local_inidy[count++] = values[2*j+1];
2731: }
2732: }
2734: /* wait on sends */
2735: if (nsends) {
2736: MPI_Waitall(nsends,send_waits,send_status);
2737: }
2738: PetscFree6(rvalues,svalues,lens,recv_waits,send_waits,send_status);
2740: /*
2741: should sort and remove duplicates from local_inidx,local_inidy
2742: */
2744: #if defined(do_it_slow)
2745: /* sort on the from index */
2746: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2747: start = 0;
2748: while (start < slen) {
2749: count = start+1;
2750: last = local_inidx[start];
2751: while (count < slen && last == local_inidx[count]) count++;
2752: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2753: /* sort on to index */
2754: PetscSortInt(count-start,local_inidy+start);
2755: }
2756: /* remove duplicates; not most efficient way, but probably good enough */
2757: i = start;
2758: while (i < count-1) {
2759: if (local_inidy[i] != local_inidy[i+1]) {
2760: i++;
2761: } else { /* found a duplicate */
2762: duplicate = PETSC_TRUE;
2763: for (j=i; j<slen-1; j++) {
2764: local_inidx[j] = local_inidx[j+1];
2765: local_inidy[j] = local_inidy[j+1];
2766: }
2767: slen--;
2768: count--;
2769: }
2770: }
2771: start = count;
2772: }
2773: #endif
2774: if (duplicate) {
2775: PetscLogInfo(0,"VecScatterCreate_PtoP:Duplicate to from indices passed in VecScatterCreate(), they are ignored\n");
2776: }
2777: VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,yin,ctx);
2778: PetscFree2(local_inidx,local_inidy);
2779: return(0);
2780: }