Actual source code: sf.c
petsc-master 2020-08-25
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petscctable.h>
5: #if defined(PETSC_USE_DEBUG)
6: # define PetscSFCheckGraphSet(sf,arg) do { \
7: if (PetscUnlikely(!(sf)->graphset)) \
8: SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
9: } while (0)
10: #else
11: # define PetscSFCheckGraphSet(sf,arg) do {} while (0)
12: #endif
14: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
16: /*@
17: PetscSFCreate - create a star forest communication context
19: Collective
21: Input Arguments:
22: . comm - communicator on which the star forest will operate
24: Output Arguments:
25: . sf - new star forest context
27: Options Database Keys:
28: + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default)
29: . -sf_type window -Use MPI-3 one-sided window for communication
30: - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication
32: Level: intermediate
34: Notes:
35: When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
36: MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
37: SFs are optimized and they have better performance than general SFs.
39: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
40: @*/
41: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
42: {
44: PetscSF b;
48: PetscSFInitializePackage();
50: PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);
52: b->nroots = -1;
53: b->nleaves = -1;
54: b->minleaf = PETSC_MAX_INT;
55: b->maxleaf = PETSC_MIN_INT;
56: b->nranks = -1;
57: b->rankorder = PETSC_TRUE;
58: b->ingroup = MPI_GROUP_NULL;
59: b->outgroup = MPI_GROUP_NULL;
60: b->graphset = PETSC_FALSE;
62: *sf = b;
63: return(0);
64: }
66: /*@
67: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
69: Collective
71: Input Arguments:
72: . sf - star forest
74: Level: advanced
76: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
77: @*/
78: PetscErrorCode PetscSFReset(PetscSF sf)
79: {
84: if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
85: sf->nroots = -1;
86: sf->nleaves = -1;
87: sf->minleaf = PETSC_MAX_INT;
88: sf->maxleaf = PETSC_MIN_INT;
89: sf->mine = NULL;
90: sf->remote = NULL;
91: sf->graphset = PETSC_FALSE;
92: PetscFree(sf->mine_alloc);
93: PetscFree(sf->remote_alloc);
94: sf->nranks = -1;
95: PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
96: #if defined(PETSC_HAVE_CUDA)
97: {
98: PetscInt i;
99: for (i=0; i<2; i++) {if (sf->rmine_d[i]) {cudaError_t err = cudaFree(sf->rmine_d[i]);CHKERRCUDA(err);sf->rmine_d[i]=NULL;}}
100: }
101: #endif
102: sf->degreeknown = PETSC_FALSE;
103: PetscFree(sf->degree);
104: if (sf->ingroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
105: if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
106: PetscSFDestroy(&sf->multi);
107: PetscLayoutDestroy(&sf->map);
108: sf->setupcalled = PETSC_FALSE;
109: return(0);
110: }
112: /*@C
113: PetscSFSetType - Set the PetscSF communication implementation
115: Collective on PetscSF
117: Input Parameters:
118: + sf - the PetscSF context
119: - type - a known method
121: Options Database Key:
122: . -sf_type <type> - Sets the method; use -help for a list
123: of available methods (for instance, window, basic, neighbor)
125: Notes:
126: See "include/petscsf.h" for available methods (for instance)
127: + PETSCSFWINDOW - MPI-2/3 one-sided
128: - PETSCSFBASIC - basic implementation using MPI-1 two-sided
130: Level: intermediate
132: .seealso: PetscSFType, PetscSFCreate()
133: @*/
134: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
135: {
136: PetscErrorCode ierr,(*r)(PetscSF);
137: PetscBool match;
143: PetscObjectTypeCompare((PetscObject)sf,type,&match);
144: if (match) return(0);
146: PetscFunctionListFind(PetscSFList,type,&r);
147: if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
148: /* Destroy the previous PetscSF implementation context */
149: if (sf->ops->Destroy) {(*(sf)->ops->Destroy)(sf);}
150: PetscMemzero(sf->ops,sizeof(*sf->ops));
151: PetscObjectChangeTypeName((PetscObject)sf,type);
152: (*r)(sf);
153: return(0);
154: }
156: /*@C
157: PetscSFGetType - Get the PetscSF communication implementation
159: Not Collective
161: Input Parameter:
162: . sf - the PetscSF context
164: Output Parameter:
165: . type - the PetscSF type name
167: Level: intermediate
169: .seealso: PetscSFSetType(), PetscSFCreate()
170: @*/
171: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
172: {
176: *type = ((PetscObject)sf)->type_name;
177: return(0);
178: }
180: /*@
181: PetscSFDestroy - destroy star forest
183: Collective
185: Input Arguments:
186: . sf - address of star forest
188: Level: intermediate
190: .seealso: PetscSFCreate(), PetscSFReset()
191: @*/
192: PetscErrorCode PetscSFDestroy(PetscSF *sf)
193: {
197: if (!*sf) return(0);
199: if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return(0);}
200: PetscSFReset(*sf);
201: if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
202: PetscHeaderDestroy(sf);
203: return(0);
204: }
206: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
207: {
208: PetscInt i, nleaves;
209: PetscMPIInt size;
210: const PetscInt *ilocal;
211: const PetscSFNode *iremote;
212: PetscErrorCode ierr;
215: if (!sf->graphset || !PetscDefined(USE_DEBUG)) return(0);
216: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
217: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
218: for (i = 0; i < nleaves; i++) {
219: const PetscInt rank = iremote[i].rank;
220: const PetscInt remote = iremote[i].index;
221: const PetscInt leaf = ilocal ? ilocal[i] : i;
222: if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
223: if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
224: if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
225: }
226: return(0);
227: }
229: /*@
230: PetscSFSetUp - set up communication structures
232: Collective
234: Input Arguments:
235: . sf - star forest communication object
237: Level: beginner
239: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
240: @*/
241: PetscErrorCode PetscSFSetUp(PetscSF sf)
242: {
247: PetscSFCheckGraphSet(sf,1);
248: if (sf->setupcalled) return(0);
249: PetscSFCheckGraphValid_Private(sf);
250: sf->use_gpu_aware_mpi = use_gpu_aware_mpi;
251: if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);}
252: PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
253: if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
254: PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
255: sf->setupcalled = PETSC_TRUE;
256: return(0);
257: }
259: /*@
260: PetscSFSetFromOptions - set PetscSF options using the options database
262: Logically Collective
264: Input Arguments:
265: . sf - star forest
267: Options Database Keys:
268: + -sf_type - implementation type, see PetscSFSetType()
269: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
270: . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
271: use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
272: If true, this option only works with -use_cuda_aware_mpi 1.
273: - -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
274: If true, this option only works with -use_cuda_aware_mpi 1.
276: Level: intermediate
277: @*/
278: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
279: {
280: PetscSFType deft;
281: char type[256];
283: PetscBool flg;
287: PetscObjectOptionsBegin((PetscObject)sf);
288: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
289: PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
290: PetscSFSetType(sf,flg ? type : deft);
291: PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
293: #if defined(PETSC_HAVE_CUDA)
294: sf->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */
295: PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);
296: sf->use_stream_aware_mpi = PETSC_FALSE;
297: PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);
298: #endif
299: if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(PetscOptionsObject,sf);}
300: PetscOptionsEnd();
301: return(0);
302: }
304: /*@
305: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
307: Logically Collective
309: Input Arguments:
310: + sf - star forest
311: - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
313: Level: advanced
315: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
316: @*/
317: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
318: {
322: if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
323: sf->rankorder = flg;
324: return(0);
325: }
327: /*@
328: PetscSFSetGraph - Set a parallel star forest
330: Collective
332: Input Arguments:
333: + sf - star forest
334: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
335: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
336: . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
337: during setup in debug mode)
338: . localmode - copy mode for ilocal
339: . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
340: during setup in debug mode)
341: - remotemode - copy mode for iremote
343: Level: intermediate
345: Notes:
346: In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
348: Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
349: encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
350: needed
352: Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
353: indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
355: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
356: @*/
357: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
358: {
365: if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
366: if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
368: if (sf->nroots >= 0) { /* Reset only if graph already set */
369: PetscSFReset(sf);
370: }
372: PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);
374: sf->nroots = nroots;
375: sf->nleaves = nleaves;
377: if (nleaves && ilocal) {
378: PetscInt i;
379: PetscInt minleaf = PETSC_MAX_INT;
380: PetscInt maxleaf = PETSC_MIN_INT;
381: int contiguous = 1;
382: for (i=0; i<nleaves; i++) {
383: minleaf = PetscMin(minleaf,ilocal[i]);
384: maxleaf = PetscMax(maxleaf,ilocal[i]);
385: contiguous &= (ilocal[i] == i);
386: }
387: sf->minleaf = minleaf;
388: sf->maxleaf = maxleaf;
389: if (contiguous) {
390: if (localmode == PETSC_OWN_POINTER) {
391: PetscFree(ilocal);
392: }
393: ilocal = NULL;
394: }
395: } else {
396: sf->minleaf = 0;
397: sf->maxleaf = nleaves - 1;
398: }
400: if (ilocal) {
401: switch (localmode) {
402: case PETSC_COPY_VALUES:
403: PetscMalloc1(nleaves,&sf->mine_alloc);
404: PetscArraycpy(sf->mine_alloc,ilocal,nleaves);
405: sf->mine = sf->mine_alloc;
406: break;
407: case PETSC_OWN_POINTER:
408: sf->mine_alloc = (PetscInt*)ilocal;
409: sf->mine = sf->mine_alloc;
410: break;
411: case PETSC_USE_POINTER:
412: sf->mine_alloc = NULL;
413: sf->mine = (PetscInt*)ilocal;
414: break;
415: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
416: }
417: }
419: switch (remotemode) {
420: case PETSC_COPY_VALUES:
421: PetscMalloc1(nleaves,&sf->remote_alloc);
422: PetscArraycpy(sf->remote_alloc,iremote,nleaves);
423: sf->remote = sf->remote_alloc;
424: break;
425: case PETSC_OWN_POINTER:
426: sf->remote_alloc = (PetscSFNode*)iremote;
427: sf->remote = sf->remote_alloc;
428: break;
429: case PETSC_USE_POINTER:
430: sf->remote_alloc = NULL;
431: sf->remote = (PetscSFNode*)iremote;
432: break;
433: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
434: }
436: PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
437: sf->graphset = PETSC_TRUE;
438: return(0);
439: }
441: /*@
442: PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
444: Collective
446: Input Parameters:
447: + sf - The PetscSF
448: . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
449: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
451: Notes:
452: It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
453: n and N are local and global sizes of x respectively.
455: With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
456: sequential vectors y on all ranks.
458: With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
459: sequential vector y on rank 0.
461: In above cases, entries of x are roots and entries of y are leaves.
463: With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
464: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
465: of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
466: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
467: items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
469: In this case, roots and leaves are symmetric.
471: Level: intermediate
472: @*/
473: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
474: {
475: MPI_Comm comm;
476: PetscInt n,N,res[2];
477: PetscMPIInt rank,size;
478: PetscSFType type;
482: PetscObjectGetComm((PetscObject)sf, &comm);
483: if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
484: MPI_Comm_rank(comm,&rank);
485: MPI_Comm_size(comm,&size);
487: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
488: type = PETSCSFALLTOALL;
489: PetscLayoutCreate(comm,&sf->map);
490: PetscLayoutSetLocalSize(sf->map,size);
491: PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
492: PetscLayoutSetUp(sf->map);
493: } else {
494: PetscLayoutGetLocalSize(map,&n);
495: PetscLayoutGetSize(map,&N);
496: res[0] = n;
497: res[1] = -n;
498: /* Check if n are same over all ranks so that we can optimize it */
499: MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
500: if (res[0] == -res[1]) { /* same n */
501: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
502: } else {
503: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
504: }
505: PetscLayoutReference(map,&sf->map);
506: }
507: PetscSFSetType(sf,type);
509: sf->pattern = pattern;
510: sf->mine = NULL; /* Contiguous */
512: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
513: Also set other easy stuff.
514: */
515: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
516: sf->nleaves = N;
517: sf->nroots = n;
518: sf->nranks = size;
519: sf->minleaf = 0;
520: sf->maxleaf = N - 1;
521: } else if (pattern == PETSCSF_PATTERN_GATHER) {
522: sf->nleaves = rank ? 0 : N;
523: sf->nroots = n;
524: sf->nranks = rank ? 0 : size;
525: sf->minleaf = 0;
526: sf->maxleaf = rank ? -1 : N - 1;
527: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
528: sf->nleaves = size;
529: sf->nroots = size;
530: sf->nranks = size;
531: sf->minleaf = 0;
532: sf->maxleaf = size - 1;
533: }
534: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
535: sf->graphset = PETSC_TRUE;
536: return(0);
537: }
539: /*@
540: PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
542: Collective
544: Input Arguments:
546: . sf - star forest to invert
548: Output Arguments:
549: . isf - inverse of sf
550: Level: advanced
552: Notes:
553: All roots must have degree 1.
555: The local space may be a permutation, but cannot be sparse.
557: .seealso: PetscSFSetGraph()
558: @*/
559: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
560: {
562: PetscMPIInt rank;
563: PetscInt i,nroots,nleaves,maxlocal,count,*newilocal;
564: const PetscInt *ilocal;
565: PetscSFNode *roots,*leaves;
569: PetscSFCheckGraphSet(sf,1);
572: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
573: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
575: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
576: PetscMalloc2(nroots,&roots,maxlocal,&leaves);
577: for (i=0; i<maxlocal; i++) {
578: leaves[i].rank = rank;
579: leaves[i].index = i;
580: }
581: for (i=0; i <nroots; i++) {
582: roots[i].rank = -1;
583: roots[i].index = -1;
584: }
585: PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
586: PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
588: /* Check whether our leaves are sparse */
589: for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
590: if (count == nroots) newilocal = NULL;
591: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
592: PetscMalloc1(count,&newilocal);
593: for (i=0,count=0; i<nroots; i++) {
594: if (roots[i].rank >= 0) {
595: newilocal[count] = i;
596: roots[count].rank = roots[i].rank;
597: roots[count].index = roots[i].index;
598: count++;
599: }
600: }
601: }
603: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
604: PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
605: PetscFree2(roots,leaves);
606: return(0);
607: }
609: /*@
610: PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
612: Collective
614: Input Arguments:
615: + sf - communication object to duplicate
616: - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
618: Output Arguments:
619: . newsf - new communication object
621: Level: beginner
623: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
624: @*/
625: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
626: {
627: PetscSFType type;
634: PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
635: PetscSFGetType(sf,&type);
636: if (type) {PetscSFSetType(*newsf,type);}
637: if (opt == PETSCSF_DUPLICATE_GRAPH) {
638: PetscSFCheckGraphSet(sf,1);
639: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
640: PetscInt nroots,nleaves;
641: const PetscInt *ilocal;
642: const PetscSFNode *iremote;
643: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
644: PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
645: } else {
646: PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
647: }
648: }
649: if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
650: return(0);
651: }
653: /*@C
654: PetscSFGetGraph - Get the graph specifying a parallel star forest
656: Not Collective
658: Input Arguments:
659: . sf - star forest
661: Output Arguments:
662: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
663: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
664: . ilocal - locations of leaves in leafdata buffers
665: - iremote - remote locations of root vertices for each leaf on the current process
667: Notes:
668: We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
670: When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
671: want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
673: Level: intermediate
675: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
676: @*/
677: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
678: {
683: if (sf->ops->GetGraph) {
684: (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
685: } else {
686: if (nroots) *nroots = sf->nroots;
687: if (nleaves) *nleaves = sf->nleaves;
688: if (ilocal) *ilocal = sf->mine;
689: if (iremote) *iremote = sf->remote;
690: }
691: return(0);
692: }
694: /*@
695: PetscSFGetLeafRange - Get the active leaf ranges
697: Not Collective
699: Input Arguments:
700: . sf - star forest
702: Output Arguments:
703: + minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
704: - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
706: Level: developer
708: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
709: @*/
710: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
711: {
714: PetscSFCheckGraphSet(sf,1);
715: if (minleaf) *minleaf = sf->minleaf;
716: if (maxleaf) *maxleaf = sf->maxleaf;
717: return(0);
718: }
720: /*@C
721: PetscSFViewFromOptions - View from Options
723: Collective on PetscSF
725: Input Parameters:
726: + A - the star forest
727: . obj - Optional object
728: - name - command line option
730: Level: intermediate
731: .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
732: @*/
733: PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
734: {
739: PetscObjectViewFromOptions((PetscObject)A,obj,name);
740: return(0);
741: }
743: /*@C
744: PetscSFView - view a star forest
746: Collective
748: Input Arguments:
749: + sf - star forest
750: - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
752: Level: beginner
754: .seealso: PetscSFCreate(), PetscSFSetGraph()
755: @*/
756: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
757: {
758: PetscErrorCode ierr;
759: PetscBool iascii;
760: PetscViewerFormat format;
764: if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
767: if (sf->graphset) {PetscSFSetUp(sf);}
768: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
769: if (iascii) {
770: PetscMPIInt rank;
771: PetscInt ii,i,j;
773: PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
774: PetscViewerASCIIPushTab(viewer);
775: if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
776: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
777: if (!sf->graphset) {
778: PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
779: PetscViewerASCIIPopTab(viewer);
780: return(0);
781: }
782: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
783: PetscViewerASCIIPushSynchronized(viewer);
784: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
785: for (i=0; i<sf->nleaves; i++) {
786: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
787: }
788: PetscViewerFlush(viewer);
789: PetscViewerGetFormat(viewer,&format);
790: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
791: PetscMPIInt *tmpranks,*perm;
792: PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
793: PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
794: for (i=0; i<sf->nranks; i++) perm[i] = i;
795: PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
796: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
797: for (ii=0; ii<sf->nranks; ii++) {
798: i = perm[ii];
799: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
800: for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
801: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
802: }
803: }
804: PetscFree2(tmpranks,perm);
805: }
806: PetscViewerFlush(viewer);
807: PetscViewerASCIIPopSynchronized(viewer);
808: }
809: PetscViewerASCIIPopTab(viewer);
810: }
811: return(0);
812: }
814: /*@C
815: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
817: Not Collective
819: Input Arguments:
820: . sf - star forest
822: Output Arguments:
823: + nranks - number of ranks referenced by local part
824: . ranks - array of ranks
825: . roffset - offset in rmine/rremote for each rank (length nranks+1)
826: . rmine - concatenated array holding local indices referencing each remote rank
827: - rremote - concatenated array holding remote indices referenced for each remote rank
829: Level: developer
831: .seealso: PetscSFGetLeafRanks()
832: @*/
833: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
834: {
839: if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
840: if (sf->ops->GetRootRanks) {
841: (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
842: } else {
843: /* The generic implementation */
844: if (nranks) *nranks = sf->nranks;
845: if (ranks) *ranks = sf->ranks;
846: if (roffset) *roffset = sf->roffset;
847: if (rmine) *rmine = sf->rmine;
848: if (rremote) *rremote = sf->rremote;
849: }
850: return(0);
851: }
853: /*@C
854: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
856: Not Collective
858: Input Arguments:
859: . sf - star forest
861: Output Arguments:
862: + niranks - number of leaf ranks referencing roots on this process
863: . iranks - array of ranks
864: . ioffset - offset in irootloc for each rank (length niranks+1)
865: - irootloc - concatenated array holding local indices of roots referenced by each leaf rank
867: Level: developer
869: .seealso: PetscSFGetRootRanks()
870: @*/
871: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
872: {
877: if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
878: if (sf->ops->GetLeafRanks) {
879: (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
880: } else {
881: PetscSFType type;
882: PetscSFGetType(sf,&type);
883: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
884: }
885: return(0);
886: }
888: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
889: PetscInt i;
890: for (i=0; i<n; i++) {
891: if (needle == list[i]) return PETSC_TRUE;
892: }
893: return PETSC_FALSE;
894: }
896: /*@C
897: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
899: Collective
901: Input Arguments:
902: + sf - PetscSF to set up; PetscSFSetGraph() must have been called
903: - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
905: Level: developer
907: .seealso: PetscSFGetRootRanks()
908: @*/
909: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
910: {
911: PetscErrorCode ierr;
912: PetscTable table;
913: PetscTablePosition pos;
914: PetscMPIInt size,groupsize,*groupranks;
915: PetscInt *rcount,*ranks;
916: PetscInt i, irank = -1,orank = -1;
920: PetscSFCheckGraphSet(sf,1);
921: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
922: PetscTableCreate(10,size,&table);
923: for (i=0; i<sf->nleaves; i++) {
924: /* Log 1-based rank */
925: PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
926: }
927: PetscTableGetCount(table,&sf->nranks);
928: PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
929: PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
930: PetscTableGetHeadPosition(table,&pos);
931: for (i=0; i<sf->nranks; i++) {
932: PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
933: ranks[i]--; /* Convert back to 0-based */
934: }
935: PetscTableDestroy(&table);
937: /* We expect that dgroup is reliably "small" while nranks could be large */
938: {
939: MPI_Group group = MPI_GROUP_NULL;
940: PetscMPIInt *dgroupranks;
941: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
942: MPI_Group_size(dgroup,&groupsize);
943: PetscMalloc1(groupsize,&dgroupranks);
944: PetscMalloc1(groupsize,&groupranks);
945: for (i=0; i<groupsize; i++) dgroupranks[i] = i;
946: if (groupsize) {MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);}
947: MPI_Group_free(&group);
948: PetscFree(dgroupranks);
949: }
951: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
952: for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
953: for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
954: if (InList(ranks[i],groupsize,groupranks)) break;
955: }
956: for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
957: if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
958: }
959: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
960: PetscInt tmprank,tmpcount;
962: tmprank = ranks[i];
963: tmpcount = rcount[i];
964: ranks[i] = ranks[sf->ndranks];
965: rcount[i] = rcount[sf->ndranks];
966: ranks[sf->ndranks] = tmprank;
967: rcount[sf->ndranks] = tmpcount;
968: sf->ndranks++;
969: }
970: }
971: PetscFree(groupranks);
972: PetscSortIntWithArray(sf->ndranks,ranks,rcount);
973: PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
974: sf->roffset[0] = 0;
975: for (i=0; i<sf->nranks; i++) {
976: PetscMPIIntCast(ranks[i],sf->ranks+i);
977: sf->roffset[i+1] = sf->roffset[i] + rcount[i];
978: rcount[i] = 0;
979: }
980: for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
981: /* short circuit */
982: if (orank != sf->remote[i].rank) {
983: /* Search for index of iremote[i].rank in sf->ranks */
984: PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
985: if (irank < 0) {
986: PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
987: if (irank >= 0) irank += sf->ndranks;
988: }
989: orank = sf->remote[i].rank;
990: }
991: if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
992: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
993: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
994: rcount[irank]++;
995: }
996: PetscFree2(rcount,ranks);
997: return(0);
998: }
1000: /*@C
1001: PetscSFGetGroups - gets incoming and outgoing process groups
1003: Collective
1005: Input Argument:
1006: . sf - star forest
1008: Output Arguments:
1009: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1010: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1012: Level: developer
1014: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1015: @*/
1016: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1017: {
1019: MPI_Group group = MPI_GROUP_NULL;
1022: if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1023: if (sf->ingroup == MPI_GROUP_NULL) {
1024: PetscInt i;
1025: const PetscInt *indegree;
1026: PetscMPIInt rank,*outranks,*inranks;
1027: PetscSFNode *remote;
1028: PetscSF bgcount;
1030: /* Compute the number of incoming ranks */
1031: PetscMalloc1(sf->nranks,&remote);
1032: for (i=0; i<sf->nranks; i++) {
1033: remote[i].rank = sf->ranks[i];
1034: remote[i].index = 0;
1035: }
1036: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1037: PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1038: PetscSFComputeDegreeBegin(bgcount,&indegree);
1039: PetscSFComputeDegreeEnd(bgcount,&indegree);
1040: /* Enumerate the incoming ranks */
1041: PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1042: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1043: for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1044: PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1045: PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1046: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1047: MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1048: MPI_Group_free(&group);
1049: PetscFree2(inranks,outranks);
1050: PetscSFDestroy(&bgcount);
1051: }
1052: *incoming = sf->ingroup;
1054: if (sf->outgroup == MPI_GROUP_NULL) {
1055: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1056: MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1057: MPI_Group_free(&group);
1058: }
1059: *outgoing = sf->outgroup;
1060: return(0);
1061: }
1063: /*@
1064: PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1066: Collective
1068: Input Argument:
1069: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1071: Output Arguments:
1072: . multi - star forest with split roots, such that each root has degree exactly 1
1074: Level: developer
1076: Notes:
1078: In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1079: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1080: edge, it is a candidate for future optimization that might involve its removal.
1082: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1083: @*/
1084: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1085: {
1091: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1092: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1093: *multi = sf->multi;
1094: return(0);
1095: }
1096: if (!sf->multi) {
1097: const PetscInt *indegree;
1098: PetscInt i,*inoffset,*outones,*outoffset,maxlocal;
1099: PetscSFNode *remote;
1100: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1101: PetscSFComputeDegreeBegin(sf,&indegree);
1102: PetscSFComputeDegreeEnd(sf,&indegree);
1103: PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1104: inoffset[0] = 0;
1105: for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1106: for (i=0; i<maxlocal; i++) outones[i] = 1;
1107: PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1108: PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1109: for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1110: if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1111: for (i=0; i<sf->nroots; i++) {
1112: if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1113: }
1114: }
1115: PetscMalloc1(sf->nleaves,&remote);
1116: for (i=0; i<sf->nleaves; i++) {
1117: remote[i].rank = sf->remote[i].rank;
1118: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1119: }
1120: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1121: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1122: if (sf->rankorder) { /* Sort the ranks */
1123: PetscMPIInt rank;
1124: PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1125: PetscSFNode *newremote;
1126: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1127: for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1128: PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1129: for (i=0; i<maxlocal; i++) outranks[i] = rank;
1130: PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1131: PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1132: /* Sort the incoming ranks at each vertex, build the inverse map */
1133: for (i=0; i<sf->nroots; i++) {
1134: PetscInt j;
1135: for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1136: PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1137: for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1138: }
1139: PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);
1140: PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);
1141: PetscMalloc1(sf->nleaves,&newremote);
1142: for (i=0; i<sf->nleaves; i++) {
1143: newremote[i].rank = sf->remote[i].rank;
1144: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1145: }
1146: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1147: PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1148: }
1149: PetscFree3(inoffset,outones,outoffset);
1150: }
1151: *multi = sf->multi;
1152: return(0);
1153: }
1155: /*@C
1156: PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1158: Collective
1160: Input Arguments:
1161: + sf - original star forest
1162: . nselected - number of selected roots on this process
1163: - selected - indices of the selected roots on this process
1165: Output Arguments:
1166: . esf - new star forest
1168: Level: advanced
1170: Note:
1171: To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1172: be done by calling PetscSFGetGraph().
1174: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1175: @*/
1176: PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1177: {
1178: PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1179: const PetscInt *ilocal;
1180: signed char *rootdata,*leafdata,*leafmem;
1181: const PetscSFNode *iremote;
1182: PetscSFNode *new_iremote;
1183: MPI_Comm comm;
1184: PetscErrorCode ierr;
1188: PetscSFCheckGraphSet(sf,1);
1192: PetscSFSetUp(sf);
1194: PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1195: PetscObjectGetComm((PetscObject)sf,&comm);
1196: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
1198: if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1200: PetscBool dups;
1202: if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1203: for (i=0; i<nselected; i++)
1204: if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1205: }
1207: if (sf->ops->CreateEmbeddedSF) {
1208: (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);
1209: } else {
1210: /* A generic version of creating embedded sf */
1211: PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
1212: maxlocal = maxleaf - minleaf + 1;
1213: PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
1214: leafdata = leafmem - minleaf;
1215: /* Tag selected roots and bcast to leaves */
1216: for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1217: PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);
1218: PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);
1220: /* Build esf with leaves that are still connected */
1221: esf_nleaves = 0;
1222: for (i=0; i<nleaves; i++) {
1223: j = ilocal ? ilocal[i] : i;
1224: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1225: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1226: */
1227: esf_nleaves += (leafdata[j] ? 1 : 0);
1228: }
1229: PetscMalloc1(esf_nleaves,&new_ilocal);
1230: PetscMalloc1(esf_nleaves,&new_iremote);
1231: for (i=n=0; i<nleaves; i++) {
1232: j = ilocal ? ilocal[i] : i;
1233: if (leafdata[j]) {
1234: new_ilocal[n] = j;
1235: new_iremote[n].rank = iremote[i].rank;
1236: new_iremote[n].index = iremote[i].index;
1237: ++n;
1238: }
1239: }
1240: PetscSFCreate(comm,esf);
1241: PetscSFSetFromOptions(*esf);
1242: PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1243: PetscFree2(rootdata,leafmem);
1244: }
1245: PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1246: return(0);
1247: }
1249: /*@C
1250: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1252: Collective
1254: Input Arguments:
1255: + sf - original star forest
1256: . nselected - number of selected leaves on this process
1257: - selected - indices of the selected leaves on this process
1259: Output Arguments:
1260: . newsf - new star forest
1262: Level: advanced
1264: .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1265: @*/
1266: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1267: {
1268: const PetscSFNode *iremote;
1269: PetscSFNode *new_iremote;
1270: const PetscInt *ilocal;
1271: PetscInt i,nroots,*leaves,*new_ilocal;
1272: MPI_Comm comm;
1273: PetscErrorCode ierr;
1277: PetscSFCheckGraphSet(sf,1);
1281: /* Uniq selected[] and put results in leaves[] */
1282: PetscObjectGetComm((PetscObject)sf,&comm);
1283: PetscMalloc1(nselected,&leaves);
1284: PetscArraycpy(leaves,selected,nselected);
1285: PetscSortedRemoveDupsInt(&nselected,leaves);
1286: if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1288: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1289: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1290: (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1291: } else {
1292: PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1293: PetscMalloc1(nselected,&new_ilocal);
1294: PetscMalloc1(nselected,&new_iremote);
1295: for (i=0; i<nselected; ++i) {
1296: const PetscInt l = leaves[i];
1297: new_ilocal[i] = ilocal ? ilocal[l] : l;
1298: new_iremote[i].rank = iremote[l].rank;
1299: new_iremote[i].index = iremote[l].index;
1300: }
1301: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);
1302: PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1303: }
1304: PetscFree(leaves);
1305: return(0);
1306: }
1308: /*@C
1309: PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1311: Collective on PetscSF
1313: Input Arguments:
1314: + sf - star forest on which to communicate
1315: . unit - data type associated with each node
1316: . rootdata - buffer to broadcast
1317: - op - operation to use for reduction
1319: Output Arguments:
1320: . leafdata - buffer to be reduced with values from each leaf's respective root
1322: Level: intermediate
1324: .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1325: @*/
1326: PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1327: {
1329: PetscMemType rootmtype,leafmtype;
1333: PetscSFSetUp(sf);
1334: PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1335: PetscGetMemType(rootdata,&rootmtype);
1336: PetscGetMemType(leafdata,&leafmtype);
1337: (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1338: PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1339: return(0);
1340: }
1342: /*@C
1343: PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1345: Collective
1347: Input Arguments:
1348: + sf - star forest
1349: . unit - data type
1350: . rootdata - buffer to broadcast
1351: - op - operation to use for reduction
1353: Output Arguments:
1354: . leafdata - buffer to be reduced with values from each leaf's respective root
1356: Level: intermediate
1358: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1359: @*/
1360: PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1361: {
1366: PetscSFSetUp(sf);
1367: PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1368: (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);
1369: PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1370: return(0);
1371: }
1373: /*@C
1374: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1376: Collective
1378: Input Arguments:
1379: + sf - star forest
1380: . unit - data type
1381: . leafdata - values to reduce
1382: - op - reduction operation
1384: Output Arguments:
1385: . rootdata - result of reduction of values from all leaves of each root
1387: Level: intermediate
1389: .seealso: PetscSFBcastBegin()
1390: @*/
1391: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1392: {
1394: PetscMemType rootmtype,leafmtype;
1398: PetscSFSetUp(sf);
1399: PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1400: PetscGetMemType(rootdata,&rootmtype);
1401: PetscGetMemType(leafdata,&leafmtype);
1402: (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1403: PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1404: return(0);
1405: }
1407: /*@C
1408: PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1410: Collective
1412: Input Arguments:
1413: + sf - star forest
1414: . unit - data type
1415: . leafdata - values to reduce
1416: - op - reduction operation
1418: Output Arguments:
1419: . rootdata - result of reduction of values from all leaves of each root
1421: Level: intermediate
1423: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1424: @*/
1425: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1426: {
1431: PetscSFSetUp(sf);
1432: PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);
1433: (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1434: PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);
1435: return(0);
1436: }
1438: /*@C
1439: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1441: Collective
1443: Input Arguments:
1444: + sf - star forest
1445: . unit - data type
1446: . leafdata - leaf values to use in reduction
1447: - op - operation to use for reduction
1449: Output Arguments:
1450: + rootdata - root values to be updated, input state is seen by first process to perform an update
1451: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1453: Level: advanced
1455: Note:
1456: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1457: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1458: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1459: integers.
1461: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1462: @*/
1463: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1464: {
1466: PetscMemType rootmtype,leafmtype,leafupdatemtype;
1470: PetscSFSetUp(sf);
1471: PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1472: PetscGetMemType(rootdata,&rootmtype);
1473: PetscGetMemType(leafdata,&leafmtype);
1474: PetscGetMemType(leafupdate,&leafupdatemtype);
1475: if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1476: (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1477: PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1478: return(0);
1479: }
1481: /*@C
1482: PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1484: Collective
1486: Input Arguments:
1487: + sf - star forest
1488: . unit - data type
1489: . leafdata - leaf values to use in reduction
1490: - op - operation to use for reduction
1492: Output Arguments:
1493: + rootdata - root values to be updated, input state is seen by first process to perform an update
1494: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1496: Level: advanced
1498: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1499: @*/
1500: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1501: {
1506: PetscSFSetUp(sf);
1507: PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1508: (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1509: PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1510: return(0);
1511: }
1513: /*@C
1514: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1516: Collective
1518: Input Arguments:
1519: . sf - star forest
1521: Output Arguments:
1522: . degree - degree of each root vertex
1524: Level: advanced
1526: Notes:
1527: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1529: .seealso: PetscSFGatherBegin()
1530: @*/
1531: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1532: {
1537: PetscSFCheckGraphSet(sf,1);
1539: if (!sf->degreeknown) {
1540: PetscInt i, nroots = sf->nroots, maxlocal;
1541: if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1542: maxlocal = sf->maxleaf-sf->minleaf+1;
1543: PetscMalloc1(nroots,&sf->degree);
1544: PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1545: for (i=0; i<nroots; i++) sf->degree[i] = 0;
1546: for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1547: PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1548: }
1549: *degree = NULL;
1550: return(0);
1551: }
1553: /*@C
1554: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1556: Collective
1558: Input Arguments:
1559: . sf - star forest
1561: Output Arguments:
1562: . degree - degree of each root vertex
1564: Level: developer
1566: Notes:
1567: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1569: .seealso:
1570: @*/
1571: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1572: {
1577: PetscSFCheckGraphSet(sf,1);
1579: if (!sf->degreeknown) {
1580: if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1581: PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1582: PetscFree(sf->degreetmp);
1583: sf->degreeknown = PETSC_TRUE;
1584: }
1585: *degree = sf->degree;
1586: return(0);
1587: }
1590: /*@C
1591: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1592: Each multi-root is assigned index of the corresponding original root.
1594: Collective
1596: Input Arguments:
1597: + sf - star forest
1598: - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1600: Output Arguments:
1601: + nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1602: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1604: Level: developer
1606: Notes:
1607: The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1609: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1610: @*/
1611: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1612: {
1613: PetscSF msf;
1614: PetscInt i, j, k, nroots, nmroots;
1615: PetscErrorCode ierr;
1619: PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1623: PetscSFGetMultiSF(sf,&msf);
1624: PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1625: PetscMalloc1(nmroots, multiRootsOrigNumbering);
1626: for (i=0,j=0,k=0; i<nroots; i++) {
1627: if (!degree[i]) continue;
1628: for (j=0; j<degree[i]; j++,k++) {
1629: (*multiRootsOrigNumbering)[k] = i;
1630: }
1631: }
1632: if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1633: if (nMultiRoots) *nMultiRoots = nmroots;
1634: return(0);
1635: }
1637: /*@C
1638: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1640: Collective
1642: Input Arguments:
1643: + sf - star forest
1644: . unit - data type
1645: - leafdata - leaf data to gather to roots
1647: Output Argument:
1648: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1650: Level: intermediate
1652: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1653: @*/
1654: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1655: {
1657: PetscSF multi = NULL;
1661: PetscSFSetUp(sf);
1662: PetscSFGetMultiSF(sf,&multi);
1663: PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1664: return(0);
1665: }
1667: /*@C
1668: PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1670: Collective
1672: Input Arguments:
1673: + sf - star forest
1674: . unit - data type
1675: - leafdata - leaf data to gather to roots
1677: Output Argument:
1678: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1680: Level: intermediate
1682: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1683: @*/
1684: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1685: {
1687: PetscSF multi = NULL;
1691: PetscSFSetUp(sf);
1692: PetscSFGetMultiSF(sf,&multi);
1693: PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1694: return(0);
1695: }
1697: /*@C
1698: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1700: Collective
1702: Input Arguments:
1703: + sf - star forest
1704: . unit - data type
1705: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1707: Output Argument:
1708: . leafdata - leaf data to be update with personal data from each respective root
1710: Level: intermediate
1712: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1713: @*/
1714: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1715: {
1717: PetscSF multi = NULL;
1721: PetscSFSetUp(sf);
1722: PetscSFGetMultiSF(sf,&multi);
1723: PetscSFBcastBegin(multi,unit,multirootdata,leafdata);
1724: return(0);
1725: }
1727: /*@C
1728: PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1730: Collective
1732: Input Arguments:
1733: + sf - star forest
1734: . unit - data type
1735: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1737: Output Argument:
1738: . leafdata - leaf data to be update with personal data from each respective root
1740: Level: intermediate
1742: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1743: @*/
1744: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1745: {
1747: PetscSF multi = NULL;
1751: PetscSFSetUp(sf);
1752: PetscSFGetMultiSF(sf,&multi);
1753: PetscSFBcastEnd(multi,unit,multirootdata,leafdata);
1754: return(0);
1755: }
1757: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1758: {
1759: PetscInt i, n, nleaves;
1760: const PetscInt *ilocal = NULL;
1761: PetscHSetI seen;
1762: PetscErrorCode ierr;
1765: if (!PetscDefined(USE_DEBUG)) return(0);
1766: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1767: PetscHSetICreate(&seen);
1768: for (i = 0; i < nleaves; i++) {
1769: const PetscInt leaf = ilocal ? ilocal[i] : i;
1770: PetscHSetIAdd(seen,leaf);
1771: }
1772: PetscHSetIGetSize(seen,&n);
1773: if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1774: PetscHSetIDestroy(&seen);
1775: return(0);
1776: }
1778: /*@
1779: PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1781: Input Parameters:
1782: + sfA - The first PetscSF
1783: - sfB - The second PetscSF
1785: Output Parameters:
1786: . sfBA - The composite SF
1788: Level: developer
1790: Notes:
1791: Currently, the two SFs must be defined on congruent communicators and they must be true star
1792: forests, i.e. the same leaf is not connected with different roots.
1794: sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1795: a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1796: nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1797: Bcast on sfA, then a Bcast on sfB, on connected nodes.
1799: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1800: @*/
1801: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1802: {
1803: PetscErrorCode ierr;
1804: const PetscSFNode *remotePointsA,*remotePointsB;
1805: PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1806: const PetscInt *localPointsA,*localPointsB;
1807: PetscInt *localPointsBA;
1808: PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1809: PetscBool denseB;
1813: PetscSFCheckGraphSet(sfA,1);
1815: PetscSFCheckGraphSet(sfB,2);
1818: PetscSFCheckLeavesUnique_Private(sfA);
1819: PetscSFCheckLeavesUnique_Private(sfB);
1821: PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
1822: PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
1823: if (localPointsA) {
1824: PetscMalloc1(numRootsB,&reorderedRemotePointsA);
1825: for (i=0; i<numRootsB; i++) {
1826: reorderedRemotePointsA[i].rank = -1;
1827: reorderedRemotePointsA[i].index = -1;
1828: }
1829: for (i=0; i<numLeavesA; i++) {
1830: if (localPointsA[i] >= numRootsB) continue;
1831: reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1832: }
1833: remotePointsA = reorderedRemotePointsA;
1834: }
1835: PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
1836: PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
1837: PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1838: PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1839: PetscFree(reorderedRemotePointsA);
1841: denseB = (PetscBool)!localPointsB;
1842: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1843: if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1844: else numLeavesBA++;
1845: }
1846: if (denseB) {
1847: localPointsBA = NULL;
1848: remotePointsBA = leafdataB;
1849: } else {
1850: PetscMalloc1(numLeavesBA,&localPointsBA);
1851: PetscMalloc1(numLeavesBA,&remotePointsBA);
1852: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1853: const PetscInt l = localPointsB ? localPointsB[i] : i;
1855: if (leafdataB[l-minleaf].rank == -1) continue;
1856: remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1857: localPointsBA[numLeavesBA] = l;
1858: numLeavesBA++;
1859: }
1860: PetscFree(leafdataB);
1861: }
1862: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
1863: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
1864: return(0);
1865: }
1867: /*@
1868: PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
1870: Input Parameters:
1871: + sfA - The first PetscSF
1872: - sfB - The second PetscSF
1874: Output Parameters:
1875: . sfBA - The composite SF.
1877: Level: developer
1879: Notes:
1880: Currently, the two SFs must be defined on congruent communicators and they must be true star
1881: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1882: second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
1884: sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1885: a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1886: roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
1887: a Reduce on sfB, on connected roots.
1889: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
1890: @*/
1891: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1892: {
1893: PetscErrorCode ierr;
1894: const PetscSFNode *remotePointsA,*remotePointsB;
1895: PetscSFNode *remotePointsBA;
1896: const PetscInt *localPointsA,*localPointsB;
1897: PetscSFNode *reorderedRemotePointsA = NULL;
1898: PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
1899: MPI_Op op;
1900: #if defined(PETSC_USE_64BIT_INDICES)
1901: PetscBool iswin;
1902: #endif
1906: PetscSFCheckGraphSet(sfA,1);
1908: PetscSFCheckGraphSet(sfB,2);
1911: PetscSFCheckLeavesUnique_Private(sfA);
1912: PetscSFCheckLeavesUnique_Private(sfB);
1914: PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
1915: PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
1917: /* TODO: Check roots of sfB have degree of 1 */
1918: /* Once we implement it, we can replace the MPI_MAXLOC
1919: with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
1920: We use MPI_MAXLOC only to have a deterministic output from this routine if
1921: the root condition is not meet.
1922: */
1923: op = MPI_MAXLOC;
1924: #if defined(PETSC_USE_64BIT_INDICES)
1925: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
1926: PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);
1927: if (iswin) op = MPIU_REPLACE;
1928: #endif
1930: PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
1931: PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
1932: for (i=0; i<maxleaf - minleaf + 1; i++) {
1933: reorderedRemotePointsA[i].rank = -1;
1934: reorderedRemotePointsA[i].index = -1;
1935: }
1936: if (localPointsA) {
1937: for (i=0; i<numLeavesA; i++) {
1938: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
1939: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
1940: }
1941: } else {
1942: for (i=0; i<numLeavesA; i++) {
1943: if (i > maxleaf || i < minleaf) continue;
1944: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
1945: }
1946: }
1948: PetscMalloc1(numRootsB,&localPointsBA);
1949: PetscMalloc1(numRootsB,&remotePointsBA);
1950: for (i=0; i<numRootsB; i++) {
1951: remotePointsBA[i].rank = -1;
1952: remotePointsBA[i].index = -1;
1953: }
1955: PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
1956: PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
1957: PetscFree(reorderedRemotePointsA);
1958: for (i=0,numLeavesBA=0; i<numRootsB; i++) {
1959: if (remotePointsBA[i].rank == -1) continue;
1960: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
1961: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
1962: localPointsBA[numLeavesBA] = i;
1963: numLeavesBA++;
1964: }
1965: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
1966: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
1967: return(0);
1968: }
1970: /*
1971: PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
1973: Input Parameters:
1974: . sf - The global PetscSF
1976: Output Parameters:
1977: . out - The local PetscSF
1978: */
1979: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1980: {
1981: MPI_Comm comm;
1982: PetscMPIInt myrank;
1983: const PetscInt *ilocal;
1984: const PetscSFNode *iremote;
1985: PetscInt i,j,nroots,nleaves,lnleaves,*lilocal;
1986: PetscSFNode *liremote;
1987: PetscSF lsf;
1988: PetscErrorCode ierr;
1992: if (sf->ops->CreateLocalSF) {
1993: (*sf->ops->CreateLocalSF)(sf,out);
1994: } else {
1995: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
1996: PetscObjectGetComm((PetscObject)sf,&comm);
1997: MPI_Comm_rank(comm,&myrank);
1999: /* Find out local edges and build a local SF */
2000: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2001: for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2002: PetscMalloc1(lnleaves,&lilocal);
2003: PetscMalloc1(lnleaves,&liremote);
2005: for (i=j=0; i<nleaves; i++) {
2006: if (iremote[i].rank == (PetscInt)myrank) {
2007: lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2008: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2009: liremote[j].index = iremote[i].index;
2010: j++;
2011: }
2012: }
2013: PetscSFCreate(PETSC_COMM_SELF,&lsf);
2014: PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2015: PetscSFSetUp(lsf);
2016: *out = lsf;
2017: }
2018: return(0);
2019: }
2021: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2022: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2023: {
2024: PetscErrorCode ierr;
2025: PetscMemType rootmtype,leafmtype;
2029: PetscSFSetUp(sf);
2030: PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
2031: PetscGetMemType(rootdata,&rootmtype);
2032: PetscGetMemType(leafdata,&leafmtype);
2033: if (sf->ops->BcastToZero) {
2034: (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2035: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2036: PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
2037: return(0);
2038: }