Actual source code: sf.c
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petsc/private/viewerimpl.h>
4: #include <petscctable.h>
6: #if defined(PETSC_HAVE_CUDA)
7: #include <cuda_runtime.h>
8: #endif
10: #if defined(PETSC_HAVE_HIP)
11: #include <hip/hip_runtime.h>
12: #endif
14: #if defined(PETSC_CLANG_STATIC_ANALYZER)
15: void PetscSFCheckGraphSet(PetscSF, int);
16: #else
17: #if defined(PETSC_USE_DEBUG)
19: #else
20: #define PetscSFCheckGraphSet(sf, arg) \
21: do { \
22: } while (0)
23: #endif
24: #endif
26: const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
28: /*@
29: PetscSFCreate - create a star forest communication context
31: Collective
33: Input Parameter:
34: . comm - communicator on which the star forest will operate
36: Output Parameter:
37: . sf - new star forest context
39: Options Database Keys:
40: + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default)
41: . -sf_type window -Use MPI-3 one-sided window for communication
42: - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication
44: Level: intermediate
46: Notes:
47: When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
48: MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
49: SFs are optimized and they have better performance than general SFs.
51: .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
52: @*/
53: PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
54: {
55: PetscSF b;
58: PetscSFInitializePackage();
60: PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView);
62: b->nroots = -1;
63: b->nleaves = -1;
64: b->minleaf = PETSC_MAX_INT;
65: b->maxleaf = PETSC_MIN_INT;
66: b->nranks = -1;
67: b->rankorder = PETSC_TRUE;
68: b->ingroup = MPI_GROUP_NULL;
69: b->outgroup = MPI_GROUP_NULL;
70: b->graphset = PETSC_FALSE;
71: #if defined(PETSC_HAVE_DEVICE)
72: b->use_gpu_aware_mpi = use_gpu_aware_mpi;
73: b->use_stream_aware_mpi = PETSC_FALSE;
74: b->unknown_input_stream = PETSC_FALSE;
75: #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
76: b->backend = PETSCSF_BACKEND_KOKKOS;
77: #elif defined(PETSC_HAVE_CUDA)
78: b->backend = PETSCSF_BACKEND_CUDA;
79: #elif defined(PETSC_HAVE_HIP)
80: b->backend = PETSCSF_BACKEND_HIP;
81: #endif
83: #if defined(PETSC_HAVE_NVSHMEM)
84: b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */
85: b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
86: PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL);
87: PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL);
88: #endif
89: #endif
90: b->vscat.from_n = -1;
91: b->vscat.to_n = -1;
92: b->vscat.unit = MPIU_SCALAR;
93: *sf = b;
94: return 0;
95: }
97: /*@
98: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
100: Collective
102: Input Parameter:
103: . sf - star forest
105: Level: advanced
107: .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
108: @*/
109: PetscErrorCode PetscSFReset(PetscSF sf)
110: {
112: PetscTryTypeMethod(sf, Reset);
113: sf->nroots = -1;
114: sf->nleaves = -1;
115: sf->minleaf = PETSC_MAX_INT;
116: sf->maxleaf = PETSC_MIN_INT;
117: sf->mine = NULL;
118: sf->remote = NULL;
119: sf->graphset = PETSC_FALSE;
120: PetscFree(sf->mine_alloc);
121: PetscFree(sf->remote_alloc);
122: sf->nranks = -1;
123: PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote);
124: sf->degreeknown = PETSC_FALSE;
125: PetscFree(sf->degree);
126: if (sf->ingroup != MPI_GROUP_NULL) MPI_Group_free(&sf->ingroup);
127: if (sf->outgroup != MPI_GROUP_NULL) MPI_Group_free(&sf->outgroup);
128: if (sf->multi) sf->multi->multi = NULL;
129: PetscSFDestroy(&sf->multi);
130: PetscLayoutDestroy(&sf->map);
132: #if defined(PETSC_HAVE_DEVICE)
133: for (PetscInt i = 0; i < 2; i++) PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]);
134: #endif
136: sf->setupcalled = PETSC_FALSE;
137: return 0;
138: }
140: /*@C
141: PetscSFSetType - Set the PetscSF communication implementation
143: Collective on PetscSF
145: Input Parameters:
146: + sf - the PetscSF context
147: - type - a known method
149: Options Database Key:
150: . -sf_type <type> - Sets the method; use -help for a list
151: of available methods (for instance, window, basic, neighbor)
153: Notes:
154: See "include/petscsf.h" for available methods (for instance)
155: + PETSCSFWINDOW - MPI-2/3 one-sided
156: - PETSCSFBASIC - basic implementation using MPI-1 two-sided
158: Level: intermediate
160: .seealso: `PetscSFType`, `PetscSFCreate()`
161: @*/
162: PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
163: {
164: PetscBool match;
165: PetscErrorCode (*r)(PetscSF);
170: PetscObjectTypeCompare((PetscObject)sf, type, &match);
171: if (match) return 0;
173: PetscFunctionListFind(PetscSFList, type, &r);
175: /* Destroy the previous PetscSF implementation context */
176: PetscTryTypeMethod(sf, Destroy);
177: PetscMemzero(sf->ops, sizeof(*sf->ops));
178: PetscObjectChangeTypeName((PetscObject)sf, type);
179: (*r)(sf);
180: return 0;
181: }
183: /*@C
184: PetscSFGetType - Get the PetscSF communication implementation
186: Not Collective
188: Input Parameter:
189: . sf - the PetscSF context
191: Output Parameter:
192: . type - the PetscSF type name
194: Level: intermediate
196: .seealso: `PetscSFSetType()`, `PetscSFCreate()`
197: @*/
198: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
199: {
202: *type = ((PetscObject)sf)->type_name;
203: return 0;
204: }
206: /*@C
207: PetscSFDestroy - destroy star forest
209: Collective
211: Input Parameter:
212: . sf - address of star forest
214: Level: intermediate
216: .seealso: `PetscSFCreate()`, `PetscSFReset()`
217: @*/
218: PetscErrorCode PetscSFDestroy(PetscSF *sf)
219: {
220: if (!*sf) return 0;
222: if (--((PetscObject)(*sf))->refct > 0) {
223: *sf = NULL;
224: return 0;
225: }
226: PetscSFReset(*sf);
227: PetscTryTypeMethod((*sf), Destroy);
228: PetscSFDestroy(&(*sf)->vscat.lsf);
229: if ((*sf)->vscat.bs > 1) MPI_Type_free(&(*sf)->vscat.unit);
230: PetscHeaderDestroy(sf);
231: return 0;
232: }
234: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
235: {
236: PetscInt i, nleaves;
237: PetscMPIInt size;
238: const PetscInt *ilocal;
239: const PetscSFNode *iremote;
241: if (!sf->graphset || !PetscDefined(USE_DEBUG)) return 0;
242: PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote);
243: MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);
244: for (i = 0; i < nleaves; i++) {
245: const PetscInt rank = iremote[i].rank;
246: const PetscInt remote = iremote[i].index;
247: const PetscInt leaf = ilocal ? ilocal[i] : i;
251: }
252: return 0;
253: }
255: /*@
256: PetscSFSetUp - set up communication structures
258: Collective
260: Input Parameter:
261: . sf - star forest communication object
263: Level: beginner
265: .seealso: `PetscSFSetFromOptions()`, `PetscSFSetType()`
266: @*/
267: PetscErrorCode PetscSFSetUp(PetscSF sf)
268: {
270: PetscSFCheckGraphSet(sf, 1);
271: if (sf->setupcalled) return 0;
272: PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0);
273: PetscSFCheckGraphValid_Private(sf);
274: if (!((PetscObject)sf)->type_name) PetscSFSetType(sf, PETSCSFBASIC); /* Zero all sf->ops */
275: PetscTryTypeMethod(sf, SetUp);
276: #if defined(PETSC_HAVE_CUDA)
277: if (sf->backend == PETSCSF_BACKEND_CUDA) {
278: sf->ops->Malloc = PetscSFMalloc_CUDA;
279: sf->ops->Free = PetscSFFree_CUDA;
280: }
281: #endif
282: #if defined(PETSC_HAVE_HIP)
283: if (sf->backend == PETSCSF_BACKEND_HIP) {
284: sf->ops->Malloc = PetscSFMalloc_HIP;
285: sf->ops->Free = PetscSFFree_HIP;
286: }
287: #endif
289: #
290: #if defined(PETSC_HAVE_KOKKOS)
291: if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
292: sf->ops->Malloc = PetscSFMalloc_Kokkos;
293: sf->ops->Free = PetscSFFree_Kokkos;
294: }
295: #endif
296: PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0);
297: sf->setupcalled = PETSC_TRUE;
298: return 0;
299: }
301: /*@
302: PetscSFSetFromOptions - set PetscSF options using the options database
304: Logically Collective
306: Input Parameter:
307: . sf - star forest
309: Options Database Keys:
310: + -sf_type - implementation type, see PetscSFSetType()
311: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
312: . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
313: use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
314: If true, this option only works with -use_gpu_aware_mpi 1.
315: . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
316: If true, this option only works with -use_gpu_aware_mpi 1.
318: - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
319: On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
320: the only available is kokkos.
322: Level: intermediate
323: @*/
324: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
325: {
326: PetscSFType deft;
327: char type[256];
328: PetscBool flg;
331: PetscObjectOptionsBegin((PetscObject)sf);
332: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
333: PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg);
334: PetscSFSetType(sf, flg ? type : deft);
335: PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL);
336: #if defined(PETSC_HAVE_DEVICE)
337: {
338: char backendstr[32] = {0};
339: PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
340: /* Change the defaults set in PetscSFCreate() with command line options */
341: PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL);
342: PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL);
343: PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set);
344: PetscStrcasecmp("cuda", backendstr, &isCuda);
345: PetscStrcasecmp("kokkos", backendstr, &isKokkos);
346: PetscStrcasecmp("hip", backendstr, &isHip);
347: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
348: if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
349: else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
350: else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
352: #elif defined(PETSC_HAVE_KOKKOS)
354: #endif
355: }
356: #endif
357: PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
358: PetscOptionsEnd();
359: return 0;
360: }
362: /*@
363: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
365: Logically Collective
367: Input Parameters:
368: + sf - star forest
369: - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
371: Level: advanced
373: .seealso: `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
374: @*/
375: PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
376: {
380: sf->rankorder = flg;
381: return 0;
382: }
384: /*@C
385: PetscSFSetGraph - Set a parallel star forest
387: Collective
389: Input Parameters:
390: + sf - star forest
391: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
392: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
393: . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
394: during setup in debug mode)
395: . localmode - copy mode for ilocal
396: . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
397: during setup in debug mode)
398: - remotemode - copy mode for iremote
400: Level: intermediate
402: Notes:
403: Leaf indices in ilocal must be unique, otherwise an error occurs.
405: Input arrays ilocal and iremote follow the PetscCopyMode semantics.
406: In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
407: PETSc might modify the respective array;
408: if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
409: Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
411: Fortran Notes:
412: In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
414: Developer Notes:
415: We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
416: This also allows to compare leaf sets of two SFs easily.
418: .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
419: @*/
420: PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
421: {
422: PetscBool unique, contiguous;
429: /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
430: * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
434: if (sf->nroots >= 0) { /* Reset only if graph already set */
435: PetscSFReset(sf);
436: }
438: PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0);
440: sf->nroots = nroots;
441: sf->nleaves = nleaves;
443: if (localmode == PETSC_COPY_VALUES && ilocal) {
444: PetscInt *tlocal = NULL;
446: PetscMalloc1(nleaves, &tlocal);
447: PetscArraycpy(tlocal, ilocal, nleaves);
448: ilocal = tlocal;
449: }
450: if (remotemode == PETSC_COPY_VALUES) {
451: PetscSFNode *tremote = NULL;
453: PetscMalloc1(nleaves, &tremote);
454: PetscArraycpy(tremote, iremote, nleaves);
455: iremote = tremote;
456: }
458: if (nleaves && ilocal) {
459: PetscSFNode work;
461: PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work);
462: PetscSortedCheckDupsInt(nleaves, ilocal, &unique);
463: unique = PetscNot(unique);
465: sf->minleaf = ilocal[0];
466: sf->maxleaf = ilocal[nleaves - 1];
467: contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
468: } else {
469: sf->minleaf = 0;
470: sf->maxleaf = nleaves - 1;
471: unique = PETSC_TRUE;
472: contiguous = PETSC_TRUE;
473: }
475: if (contiguous) {
476: if (localmode == PETSC_USE_POINTER) {
477: ilocal = NULL;
478: } else {
479: PetscFree(ilocal);
480: }
481: }
482: sf->mine = ilocal;
483: if (localmode == PETSC_USE_POINTER) {
484: sf->mine_alloc = NULL;
485: } else {
486: sf->mine_alloc = ilocal;
487: }
488: sf->remote = iremote;
489: if (remotemode == PETSC_USE_POINTER) {
490: sf->remote_alloc = NULL;
491: } else {
492: sf->remote_alloc = iremote;
493: }
494: PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0);
495: sf->graphset = PETSC_TRUE;
496: return 0;
497: }
499: /*@
500: PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
502: Collective
504: Input Parameters:
505: + sf - The PetscSF
506: . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
507: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
509: Notes:
510: It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
511: n and N are local and global sizes of x respectively.
513: With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
514: sequential vectors y on all ranks.
516: With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
517: sequential vector y on rank 0.
519: In above cases, entries of x are roots and entries of y are leaves.
521: With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
522: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
523: of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
524: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
525: items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
527: In this case, roots and leaves are symmetric.
529: Level: intermediate
530: @*/
531: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
532: {
533: MPI_Comm comm;
534: PetscInt n, N, res[2];
535: PetscMPIInt rank, size;
536: PetscSFType type;
540: PetscObjectGetComm((PetscObject)sf, &comm);
542: MPI_Comm_rank(comm, &rank);
543: MPI_Comm_size(comm, &size);
545: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
546: type = PETSCSFALLTOALL;
547: PetscLayoutCreate(comm, &sf->map);
548: PetscLayoutSetLocalSize(sf->map, size);
549: PetscLayoutSetSize(sf->map, ((PetscInt)size) * size);
550: PetscLayoutSetUp(sf->map);
551: } else {
552: PetscLayoutGetLocalSize(map, &n);
553: PetscLayoutGetSize(map, &N);
554: res[0] = n;
555: res[1] = -n;
556: /* Check if n are same over all ranks so that we can optimize it */
557: MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm);
558: if (res[0] == -res[1]) { /* same n */
559: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
560: } else {
561: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
562: }
563: PetscLayoutReference(map, &sf->map);
564: }
565: PetscSFSetType(sf, type);
567: sf->pattern = pattern;
568: sf->mine = NULL; /* Contiguous */
570: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
571: Also set other easy stuff.
572: */
573: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
574: sf->nleaves = N;
575: sf->nroots = n;
576: sf->nranks = size;
577: sf->minleaf = 0;
578: sf->maxleaf = N - 1;
579: } else if (pattern == PETSCSF_PATTERN_GATHER) {
580: sf->nleaves = rank ? 0 : N;
581: sf->nroots = n;
582: sf->nranks = rank ? 0 : size;
583: sf->minleaf = 0;
584: sf->maxleaf = rank ? -1 : N - 1;
585: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
586: sf->nleaves = size;
587: sf->nroots = size;
588: sf->nranks = size;
589: sf->minleaf = 0;
590: sf->maxleaf = size - 1;
591: }
592: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
593: sf->graphset = PETSC_TRUE;
594: return 0;
595: }
597: /*@
598: PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
600: Collective
602: Input Parameter:
603: . sf - star forest to invert
605: Output Parameter:
606: . isf - inverse of sf
608: Level: advanced
610: Notes:
611: All roots must have degree 1.
613: The local space may be a permutation, but cannot be sparse.
615: .seealso: `PetscSFSetGraph()`
616: @*/
617: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
618: {
619: PetscMPIInt rank;
620: PetscInt i, nroots, nleaves, maxlocal, count, *newilocal;
621: const PetscInt *ilocal;
622: PetscSFNode *roots, *leaves;
625: PetscSFCheckGraphSet(sf, 1);
628: PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL);
629: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
631: MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
632: PetscMalloc2(nroots, &roots, maxlocal, &leaves);
633: for (i = 0; i < maxlocal; i++) {
634: leaves[i].rank = rank;
635: leaves[i].index = i;
636: }
637: for (i = 0; i < nroots; i++) {
638: roots[i].rank = -1;
639: roots[i].index = -1;
640: }
641: PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE);
642: PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE);
644: /* Check whether our leaves are sparse */
645: for (i = 0, count = 0; i < nroots; i++)
646: if (roots[i].rank >= 0) count++;
647: if (count == nroots) newilocal = NULL;
648: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscMalloc1(count, &newilocal);
649: for (i = 0, count = 0; i < nroots; i++) {
650: if (roots[i].rank >= 0) {
651: newilocal[count] = i;
652: roots[count].rank = roots[i].rank;
653: roots[count].index = roots[i].index;
654: count++;
655: }
656: }
657: }
659: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf);
660: PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES);
661: PetscFree2(roots, leaves);
662: return 0;
663: }
665: /*@
666: PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
668: Collective
670: Input Parameters:
671: + sf - communication object to duplicate
672: - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
674: Output Parameter:
675: . newsf - new communication object
677: Level: beginner
679: .seealso: `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
680: @*/
681: PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
682: {
683: PetscSFType type;
684: MPI_Datatype dtype = MPIU_SCALAR;
689: PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf);
690: PetscSFGetType(sf, &type);
691: if (type) PetscSFSetType(*newsf, type);
692: (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */
693: if (opt == PETSCSF_DUPLICATE_GRAPH) {
694: PetscSFCheckGraphSet(sf, 1);
695: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
696: PetscInt nroots, nleaves;
697: const PetscInt *ilocal;
698: const PetscSFNode *iremote;
699: PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote);
700: PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES);
701: } else {
702: PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern);
703: }
704: }
705: /* Since oldtype is committed, so is newtype, according to MPI */
706: if (sf->vscat.bs > 1) MPI_Type_dup(sf->vscat.unit, &dtype);
707: (*newsf)->vscat.bs = sf->vscat.bs;
708: (*newsf)->vscat.unit = dtype;
709: (*newsf)->vscat.to_n = sf->vscat.to_n;
710: (*newsf)->vscat.from_n = sf->vscat.from_n;
711: /* Do not copy lsf. Build it on demand since it is rarely used */
713: #if defined(PETSC_HAVE_DEVICE)
714: (*newsf)->backend = sf->backend;
715: (*newsf)->unknown_input_stream = sf->unknown_input_stream;
716: (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
717: (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
718: #endif
719: PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
720: /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
721: return 0;
722: }
724: /*@C
725: PetscSFGetGraph - Get the graph specifying a parallel star forest
727: Not Collective
729: Input Parameter:
730: . sf - star forest
732: Output Parameters:
733: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
734: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
735: . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
736: - iremote - remote locations of root vertices for each leaf on the current process
738: Notes:
739: We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
741: The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
742: see its manpage for details.
744: Fortran Notes:
745: The returned iremote array is a copy and must be deallocated after use. Consequently, if you
746: want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
748: To check for a NULL ilocal use
749: $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
751: Level: intermediate
753: .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
754: @*/
755: PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
756: {
758: if (sf->ops->GetGraph) {
759: (sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote);
760: } else {
761: if (nroots) *nroots = sf->nroots;
762: if (nleaves) *nleaves = sf->nleaves;
763: if (ilocal) *ilocal = sf->mine;
764: if (iremote) *iremote = sf->remote;
765: }
766: return 0;
767: }
769: /*@
770: PetscSFGetLeafRange - Get the active leaf ranges
772: Not Collective
774: Input Parameter:
775: . sf - star forest
777: Output Parameters:
778: + minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
779: - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
781: Level: developer
783: .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
784: @*/
785: PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
786: {
788: PetscSFCheckGraphSet(sf, 1);
789: if (minleaf) *minleaf = sf->minleaf;
790: if (maxleaf) *maxleaf = sf->maxleaf;
791: return 0;
792: }
794: /*@C
795: PetscSFViewFromOptions - View from Options
797: Collective on PetscSF
799: Input Parameters:
800: + A - the star forest
801: . obj - Optional object
802: - name - command line option
804: Level: intermediate
805: .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
806: @*/
807: PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
808: {
810: PetscObjectViewFromOptions((PetscObject)A, obj, name);
811: return 0;
812: }
814: /*@C
815: PetscSFView - view a star forest
817: Collective
819: Input Parameters:
820: + sf - star forest
821: - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
823: Level: beginner
825: .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`
826: @*/
827: PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
828: {
829: PetscBool iascii;
830: PetscViewerFormat format;
833: if (!viewer) PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer);
836: if (sf->graphset) PetscSFSetUp(sf);
837: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
838: if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
839: PetscMPIInt rank;
840: PetscInt ii, i, j;
842: PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer);
843: PetscViewerASCIIPushTab(viewer);
844: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
845: if (!sf->graphset) {
846: PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n");
847: PetscViewerASCIIPopTab(viewer);
848: return 0;
849: }
850: MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
851: PetscViewerASCIIPushSynchronized(viewer);
852: PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks);
853: for (i = 0; i < sf->nleaves; i++) PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index);
854: PetscViewerFlush(viewer);
855: PetscViewerGetFormat(viewer, &format);
856: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
857: PetscMPIInt *tmpranks, *perm;
858: PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm);
859: PetscArraycpy(tmpranks, sf->ranks, sf->nranks);
860: for (i = 0; i < sf->nranks; i++) perm[i] = i;
861: PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm);
862: PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank);
863: for (ii = 0; ii < sf->nranks; ii++) {
864: i = perm[ii];
865: PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]);
866: for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]);
867: }
868: PetscFree2(tmpranks, perm);
869: }
870: PetscViewerFlush(viewer);
871: PetscViewerASCIIPopSynchronized(viewer);
872: }
873: PetscViewerASCIIPopTab(viewer);
874: }
875: PetscTryTypeMethod(sf, View, viewer);
876: return 0;
877: }
879: /*@C
880: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
882: Not Collective
884: Input Parameter:
885: . sf - star forest
887: Output Parameters:
888: + nranks - number of ranks referenced by local part
889: . ranks - array of ranks
890: . roffset - offset in rmine/rremote for each rank (length nranks+1)
891: . rmine - concatenated array holding local indices referencing each remote rank
892: - rremote - concatenated array holding remote indices referenced for each remote rank
894: Level: developer
896: .seealso: `PetscSFGetLeafRanks()`
897: @*/
898: PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
899: {
902: if (sf->ops->GetRootRanks) {
903: (sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote);
904: } else {
905: /* The generic implementation */
906: if (nranks) *nranks = sf->nranks;
907: if (ranks) *ranks = sf->ranks;
908: if (roffset) *roffset = sf->roffset;
909: if (rmine) *rmine = sf->rmine;
910: if (rremote) *rremote = sf->rremote;
911: }
912: return 0;
913: }
915: /*@C
916: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
918: Not Collective
920: Input Parameter:
921: . sf - star forest
923: Output Parameters:
924: + niranks - number of leaf ranks referencing roots on this process
925: . iranks - array of ranks
926: . ioffset - offset in irootloc for each rank (length niranks+1)
927: - irootloc - concatenated array holding local indices of roots referenced by each leaf rank
929: Level: developer
931: .seealso: `PetscSFGetRootRanks()`
932: @*/
933: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
934: {
937: if (sf->ops->GetLeafRanks) {
938: (sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc);
939: } else {
940: PetscSFType type;
941: PetscSFGetType(sf, &type);
942: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
943: }
944: return 0;
945: }
947: static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
948: {
949: PetscInt i;
950: for (i = 0; i < n; i++) {
951: if (needle == list[i]) return PETSC_TRUE;
952: }
953: return PETSC_FALSE;
954: }
956: /*@C
957: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
959: Collective
961: Input Parameters:
962: + sf - PetscSF to set up; PetscSFSetGraph() must have been called
963: - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
965: Level: developer
967: .seealso: `PetscSFGetRootRanks()`
968: @*/
969: PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
970: {
971: PetscTable table;
972: PetscTablePosition pos;
973: PetscMPIInt size, groupsize, *groupranks;
974: PetscInt *rcount, *ranks;
975: PetscInt i, irank = -1, orank = -1;
978: PetscSFCheckGraphSet(sf, 1);
979: MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);
980: PetscTableCreate(10, size, &table);
981: for (i = 0; i < sf->nleaves; i++) {
982: /* Log 1-based rank */
983: PetscTableAdd(table, sf->remote[i].rank + 1, 1, ADD_VALUES);
984: }
985: PetscTableGetCount(table, &sf->nranks);
986: PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote);
987: PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks);
988: PetscTableGetHeadPosition(table, &pos);
989: for (i = 0; i < sf->nranks; i++) {
990: PetscTableGetNext(table, &pos, &ranks[i], &rcount[i]);
991: ranks[i]--; /* Convert back to 0-based */
992: }
993: PetscTableDestroy(&table);
995: /* We expect that dgroup is reliably "small" while nranks could be large */
996: {
997: MPI_Group group = MPI_GROUP_NULL;
998: PetscMPIInt *dgroupranks;
999: MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group);
1000: MPI_Group_size(dgroup, &groupsize);
1001: PetscMalloc1(groupsize, &dgroupranks);
1002: PetscMalloc1(groupsize, &groupranks);
1003: for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1004: if (groupsize) MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks);
1005: MPI_Group_free(&group);
1006: PetscFree(dgroupranks);
1007: }
1009: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1010: for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1011: for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1012: if (InList(ranks[i], groupsize, groupranks)) break;
1013: }
1014: for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1015: if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1016: }
1017: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1018: PetscInt tmprank, tmpcount;
1020: tmprank = ranks[i];
1021: tmpcount = rcount[i];
1022: ranks[i] = ranks[sf->ndranks];
1023: rcount[i] = rcount[sf->ndranks];
1024: ranks[sf->ndranks] = tmprank;
1025: rcount[sf->ndranks] = tmpcount;
1026: sf->ndranks++;
1027: }
1028: }
1029: PetscFree(groupranks);
1030: PetscSortIntWithArray(sf->ndranks, ranks, rcount);
1031: PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks);
1032: sf->roffset[0] = 0;
1033: for (i = 0; i < sf->nranks; i++) {
1034: PetscMPIIntCast(ranks[i], sf->ranks + i);
1035: sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1036: rcount[i] = 0;
1037: }
1038: for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1039: /* short circuit */
1040: if (orank != sf->remote[i].rank) {
1041: /* Search for index of iremote[i].rank in sf->ranks */
1042: PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank);
1043: if (irank < 0) {
1044: PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank);
1045: if (irank >= 0) irank += sf->ndranks;
1046: }
1047: orank = sf->remote[i].rank;
1048: }
1050: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
1051: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1052: rcount[irank]++;
1053: }
1054: PetscFree2(rcount, ranks);
1055: return 0;
1056: }
1058: /*@C
1059: PetscSFGetGroups - gets incoming and outgoing process groups
1061: Collective
1063: Input Parameter:
1064: . sf - star forest
1066: Output Parameters:
1067: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1068: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1070: Level: developer
1072: .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1073: @*/
1074: PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1075: {
1076: MPI_Group group = MPI_GROUP_NULL;
1079: if (sf->ingroup == MPI_GROUP_NULL) {
1080: PetscInt i;
1081: const PetscInt *indegree;
1082: PetscMPIInt rank, *outranks, *inranks;
1083: PetscSFNode *remote;
1084: PetscSF bgcount;
1086: /* Compute the number of incoming ranks */
1087: PetscMalloc1(sf->nranks, &remote);
1088: for (i = 0; i < sf->nranks; i++) {
1089: remote[i].rank = sf->ranks[i];
1090: remote[i].index = 0;
1091: }
1092: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount);
1093: PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER);
1094: PetscSFComputeDegreeBegin(bgcount, &indegree);
1095: PetscSFComputeDegreeEnd(bgcount, &indegree);
1096: /* Enumerate the incoming ranks */
1097: PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks);
1098: MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
1099: for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1100: PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks);
1101: PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks);
1102: MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group);
1103: MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup);
1104: MPI_Group_free(&group);
1105: PetscFree2(inranks, outranks);
1106: PetscSFDestroy(&bgcount);
1107: }
1108: *incoming = sf->ingroup;
1110: if (sf->outgroup == MPI_GROUP_NULL) {
1111: MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group);
1112: MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup);
1113: MPI_Group_free(&group);
1114: }
1115: *outgoing = sf->outgroup;
1116: return 0;
1117: }
1119: /*@
1120: PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1122: Collective
1124: Input Parameter:
1125: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1127: Output Parameter:
1128: . multi - star forest with split roots, such that each root has degree exactly 1
1130: Level: developer
1132: Notes:
1134: In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1135: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1136: edge, it is a candidate for future optimization that might involve its removal.
1138: .seealso: `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1139: @*/
1140: PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1141: {
1144: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1145: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi);
1146: *multi = sf->multi;
1147: sf->multi->multi = sf->multi;
1148: return 0;
1149: }
1150: if (!sf->multi) {
1151: const PetscInt *indegree;
1152: PetscInt i, *inoffset, *outones, *outoffset, maxlocal;
1153: PetscSFNode *remote;
1154: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1155: PetscSFComputeDegreeBegin(sf, &indegree);
1156: PetscSFComputeDegreeEnd(sf, &indegree);
1157: PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset);
1158: inoffset[0] = 0;
1159: for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1160: for (i = 0; i < maxlocal; i++) outones[i] = 1;
1161: PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM);
1162: PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM);
1163: for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1164: if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1166: }
1167: PetscMalloc1(sf->nleaves, &remote);
1168: for (i = 0; i < sf->nleaves; i++) {
1169: remote[i].rank = sf->remote[i].rank;
1170: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1171: }
1172: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi);
1173: sf->multi->multi = sf->multi;
1174: PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER);
1175: if (sf->rankorder) { /* Sort the ranks */
1176: PetscMPIInt rank;
1177: PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1178: PetscSFNode *newremote;
1179: MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
1180: for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1181: PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset);
1182: for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1183: PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE);
1184: PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE);
1185: /* Sort the incoming ranks at each vertex, build the inverse map */
1186: for (i = 0; i < sf->nroots; i++) {
1187: PetscInt j;
1188: for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1189: PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset);
1190: for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1191: }
1192: PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE);
1193: PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE);
1194: PetscMalloc1(sf->nleaves, &newremote);
1195: for (i = 0; i < sf->nleaves; i++) {
1196: newremote[i].rank = sf->remote[i].rank;
1197: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1198: }
1199: PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER);
1200: PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset);
1201: }
1202: PetscFree3(inoffset, outones, outoffset);
1203: }
1204: *multi = sf->multi;
1205: return 0;
1206: }
1208: /*@C
1209: PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1211: Collective
1213: Input Parameters:
1214: + sf - original star forest
1215: . nselected - number of selected roots on this process
1216: - selected - indices of the selected roots on this process
1218: Output Parameter:
1219: . esf - new star forest
1221: Level: advanced
1223: Note:
1224: To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1225: be done by calling PetscSFGetGraph().
1227: .seealso: `PetscSFSetGraph()`, `PetscSFGetGraph()`
1228: @*/
1229: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1230: {
1231: PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1232: const PetscInt *ilocal;
1233: signed char *rootdata, *leafdata, *leafmem;
1234: const PetscSFNode *iremote;
1235: PetscSFNode *new_iremote;
1236: MPI_Comm comm;
1239: PetscSFCheckGraphSet(sf, 1);
1243: PetscSFSetUp(sf);
1244: PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0);
1245: PetscObjectGetComm((PetscObject)sf, &comm);
1246: PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote);
1248: if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1249: PetscBool dups;
1253: }
1255: if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1256: else {
1257: /* A generic version of creating embedded sf */
1258: PetscSFGetLeafRange(sf, &minleaf, &maxleaf);
1259: maxlocal = maxleaf - minleaf + 1;
1260: PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem);
1261: leafdata = leafmem - minleaf;
1262: /* Tag selected roots and bcast to leaves */
1263: for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1264: PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE);
1265: PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE);
1267: /* Build esf with leaves that are still connected */
1268: esf_nleaves = 0;
1269: for (i = 0; i < nleaves; i++) {
1270: j = ilocal ? ilocal[i] : i;
1271: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1272: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1273: */
1274: esf_nleaves += (leafdata[j] ? 1 : 0);
1275: }
1276: PetscMalloc1(esf_nleaves, &new_ilocal);
1277: PetscMalloc1(esf_nleaves, &new_iremote);
1278: for (i = n = 0; i < nleaves; i++) {
1279: j = ilocal ? ilocal[i] : i;
1280: if (leafdata[j]) {
1281: new_ilocal[n] = j;
1282: new_iremote[n].rank = iremote[i].rank;
1283: new_iremote[n].index = iremote[i].index;
1284: ++n;
1285: }
1286: }
1287: PetscSFCreate(comm, esf);
1288: PetscSFSetFromOptions(*esf);
1289: PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER);
1290: PetscFree2(rootdata, leafmem);
1291: }
1292: PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0);
1293: return 0;
1294: }
1296: /*@C
1297: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1299: Collective
1301: Input Parameters:
1302: + sf - original star forest
1303: . nselected - number of selected leaves on this process
1304: - selected - indices of the selected leaves on this process
1306: Output Parameter:
1307: . newsf - new star forest
1309: Level: advanced
1311: .seealso: `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1312: @*/
1313: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1314: {
1315: const PetscSFNode *iremote;
1316: PetscSFNode *new_iremote;
1317: const PetscInt *ilocal;
1318: PetscInt i, nroots, *leaves, *new_ilocal;
1319: MPI_Comm comm;
1322: PetscSFCheckGraphSet(sf, 1);
1326: /* Uniq selected[] and put results in leaves[] */
1327: PetscObjectGetComm((PetscObject)sf, &comm);
1328: PetscMalloc1(nselected, &leaves);
1329: PetscArraycpy(leaves, selected, nselected);
1330: PetscSortedRemoveDupsInt(&nselected, leaves);
1333: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1334: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1335: else {
1336: PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote);
1337: PetscMalloc1(nselected, &new_ilocal);
1338: PetscMalloc1(nselected, &new_iremote);
1339: for (i = 0; i < nselected; ++i) {
1340: const PetscInt l = leaves[i];
1341: new_ilocal[i] = ilocal ? ilocal[l] : l;
1342: new_iremote[i].rank = iremote[l].rank;
1343: new_iremote[i].index = iremote[l].index;
1344: }
1345: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf);
1346: PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER);
1347: }
1348: PetscFree(leaves);
1349: return 0;
1350: }
1352: /*@C
1353: PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1355: Collective on PetscSF
1357: Input Parameters:
1358: + sf - star forest on which to communicate
1359: . unit - data type associated with each node
1360: . rootdata - buffer to broadcast
1361: - op - operation to use for reduction
1363: Output Parameter:
1364: . leafdata - buffer to be reduced with values from each leaf's respective root
1366: Level: intermediate
1368: Notes:
1369: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1370: are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1371: use PetscSFBcastWithMemTypeBegin() instead.
1372: .seealso: `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1373: @*/
1374: PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1375: {
1376: PetscMemType rootmtype, leafmtype;
1379: PetscSFSetUp(sf);
1380: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0);
1381: PetscGetMemType(rootdata, &rootmtype);
1382: PetscGetMemType(leafdata, &leafmtype);
1383: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1384: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0);
1385: return 0;
1386: }
1388: /*@C
1389: PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1391: Collective on PetscSF
1393: Input Parameters:
1394: + sf - star forest on which to communicate
1395: . unit - data type associated with each node
1396: . rootmtype - memory type of rootdata
1397: . rootdata - buffer to broadcast
1398: . leafmtype - memory type of leafdata
1399: - op - operation to use for reduction
1401: Output Parameter:
1402: . leafdata - buffer to be reduced with values from each leaf's respective root
1404: Level: intermediate
1406: .seealso: `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1407: @*/
1408: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1409: {
1411: PetscSFSetUp(sf);
1412: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0);
1413: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1414: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0);
1415: return 0;
1416: }
1418: /*@C
1419: PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1421: Collective
1423: Input Parameters:
1424: + sf - star forest
1425: . unit - data type
1426: . rootdata - buffer to broadcast
1427: - op - operation to use for reduction
1429: Output Parameter:
1430: . leafdata - buffer to be reduced with values from each leaf's respective root
1432: Level: intermediate
1434: .seealso: `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1435: @*/
1436: PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1437: {
1439: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0);
1440: PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1441: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0);
1442: return 0;
1443: }
1445: /*@C
1446: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1448: Collective
1450: Input Parameters:
1451: + sf - star forest
1452: . unit - data type
1453: . leafdata - values to reduce
1454: - op - reduction operation
1456: Output Parameter:
1457: . rootdata - result of reduction of values from all leaves of each root
1459: Level: intermediate
1461: Notes:
1462: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1463: are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1464: use PetscSFReduceWithMemTypeBegin() instead.
1466: .seealso: `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`
1467: @*/
1468: PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1469: {
1470: PetscMemType rootmtype, leafmtype;
1473: PetscSFSetUp(sf);
1474: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1475: PetscGetMemType(rootdata, &rootmtype);
1476: PetscGetMemType(leafdata, &leafmtype);
1477: (sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op);
1478: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1479: return 0;
1480: }
1482: /*@C
1483: PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1485: Collective
1487: Input Parameters:
1488: + sf - star forest
1489: . unit - data type
1490: . leafmtype - memory type of leafdata
1491: . leafdata - values to reduce
1492: . rootmtype - memory type of rootdata
1493: - op - reduction operation
1495: Output Parameter:
1496: . rootdata - result of reduction of values from all leaves of each root
1498: Level: intermediate
1500: .seealso: `PetscSFBcastBegin()`, `PetscSFReduceBegin()`
1501: @*/
1502: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1503: {
1505: PetscSFSetUp(sf);
1506: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1507: (sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op);
1508: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1509: return 0;
1510: }
1512: /*@C
1513: PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1515: Collective
1517: Input Parameters:
1518: + sf - star forest
1519: . unit - data type
1520: . leafdata - values to reduce
1521: - op - reduction operation
1523: Output Parameter:
1524: . rootdata - result of reduction of values from all leaves of each root
1526: Level: intermediate
1528: .seealso: `PetscSFSetGraph()`, `PetscSFBcastEnd()`
1529: @*/
1530: PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1531: {
1533: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0);
1534: PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1535: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0);
1536: return 0;
1537: }
1539: /*@C
1540: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1542: Collective
1544: Input Parameters:
1545: + sf - star forest
1546: . unit - data type
1547: . leafdata - leaf values to use in reduction
1548: - op - operation to use for reduction
1550: Output Parameters:
1551: + rootdata - root values to be updated, input state is seen by first process to perform an update
1552: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1554: Level: advanced
1556: Note:
1557: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1558: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1559: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1560: integers.
1562: .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1563: @*/
1564: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1565: {
1566: PetscMemType rootmtype, leafmtype, leafupdatemtype;
1569: PetscSFSetUp(sf);
1570: PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1571: PetscGetMemType(rootdata, &rootmtype);
1572: PetscGetMemType(leafdata, &leafmtype);
1573: PetscGetMemType(leafupdate, &leafupdatemtype);
1575: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1576: PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1577: return 0;
1578: }
1580: /*@C
1581: PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1583: Collective
1585: Input Parameters:
1586: + sf - star forest
1587: . unit - data type
1588: . rootmtype - memory type of rootdata
1589: . leafmtype - memory type of leafdata
1590: . leafdata - leaf values to use in reduction
1591: . leafupdatemtype - memory type of leafupdate
1592: - op - operation to use for reduction
1594: Output Parameters:
1595: + rootdata - root values to be updated, input state is seen by first process to perform an update
1596: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1598: Level: advanced
1600: Note: See PetscSFFetchAndOpBegin() for more details.
1602: .seealso: `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1603: @*/
1604: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1605: {
1607: PetscSFSetUp(sf);
1608: PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1610: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1611: PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1612: return 0;
1613: }
1615: /*@C
1616: PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1618: Collective
1620: Input Parameters:
1621: + sf - star forest
1622: . unit - data type
1623: . leafdata - leaf values to use in reduction
1624: - op - operation to use for reduction
1626: Output Parameters:
1627: + rootdata - root values to be updated, input state is seen by first process to perform an update
1628: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1630: Level: advanced
1632: .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`
1633: @*/
1634: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1635: {
1637: PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0);
1638: PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1639: PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0);
1640: return 0;
1641: }
1643: /*@C
1644: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1646: Collective
1648: Input Parameter:
1649: . sf - star forest
1651: Output Parameter:
1652: . degree - degree of each root vertex
1654: Level: advanced
1656: Notes:
1657: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1659: .seealso: `PetscSFGatherBegin()`
1660: @*/
1661: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1662: {
1664: PetscSFCheckGraphSet(sf, 1);
1666: if (!sf->degreeknown) {
1667: PetscInt i, nroots = sf->nroots, maxlocal;
1669: maxlocal = sf->maxleaf - sf->minleaf + 1;
1670: PetscMalloc1(nroots, &sf->degree);
1671: PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1672: for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1673: for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1674: PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM);
1675: }
1676: *degree = NULL;
1677: return 0;
1678: }
1680: /*@C
1681: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1683: Collective
1685: Input Parameter:
1686: . sf - star forest
1688: Output Parameter:
1689: . degree - degree of each root vertex
1691: Level: developer
1693: Notes:
1694: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1696: .seealso:
1697: @*/
1698: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1699: {
1701: PetscSFCheckGraphSet(sf, 1);
1703: if (!sf->degreeknown) {
1705: PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM);
1706: PetscFree(sf->degreetmp);
1707: sf->degreeknown = PETSC_TRUE;
1708: }
1709: *degree = sf->degree;
1710: return 0;
1711: }
1713: /*@C
1714: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1715: Each multi-root is assigned index of the corresponding original root.
1717: Collective
1719: Input Parameters:
1720: + sf - star forest
1721: - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1723: Output Parameters:
1724: + nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1725: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1727: Level: developer
1729: Notes:
1730: The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1732: .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1733: @*/
1734: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1735: {
1736: PetscSF msf;
1737: PetscInt i, j, k, nroots, nmroots;
1740: PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1744: PetscSFGetMultiSF(sf, &msf);
1745: PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1746: PetscMalloc1(nmroots, multiRootsOrigNumbering);
1747: for (i = 0, j = 0, k = 0; i < nroots; i++) {
1748: if (!degree[i]) continue;
1749: for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1750: }
1752: if (nMultiRoots) *nMultiRoots = nmroots;
1753: return 0;
1754: }
1756: /*@C
1757: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1759: Collective
1761: Input Parameters:
1762: + sf - star forest
1763: . unit - data type
1764: - leafdata - leaf data to gather to roots
1766: Output Parameter:
1767: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1769: Level: intermediate
1771: .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1772: @*/
1773: PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1774: {
1775: PetscSF multi = NULL;
1778: PetscSFSetUp(sf);
1779: PetscSFGetMultiSF(sf, &multi);
1780: PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE);
1781: return 0;
1782: }
1784: /*@C
1785: PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1787: Collective
1789: Input Parameters:
1790: + sf - star forest
1791: . unit - data type
1792: - leafdata - leaf data to gather to roots
1794: Output Parameter:
1795: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1797: Level: intermediate
1799: .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1800: @*/
1801: PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1802: {
1803: PetscSF multi = NULL;
1806: PetscSFGetMultiSF(sf, &multi);
1807: PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE);
1808: return 0;
1809: }
1811: /*@C
1812: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1814: Collective
1816: Input Parameters:
1817: + sf - star forest
1818: . unit - data type
1819: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1821: Output Parameter:
1822: . leafdata - leaf data to be update with personal data from each respective root
1824: Level: intermediate
1826: .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1827: @*/
1828: PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1829: {
1830: PetscSF multi = NULL;
1833: PetscSFSetUp(sf);
1834: PetscSFGetMultiSF(sf, &multi);
1835: PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE);
1836: return 0;
1837: }
1839: /*@C
1840: PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1842: Collective
1844: Input Parameters:
1845: + sf - star forest
1846: . unit - data type
1847: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1849: Output Parameter:
1850: . leafdata - leaf data to be update with personal data from each respective root
1852: Level: intermediate
1854: .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1855: @*/
1856: PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1857: {
1858: PetscSF multi = NULL;
1861: PetscSFGetMultiSF(sf, &multi);
1862: PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE);
1863: return 0;
1864: }
1866: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1867: {
1868: PetscInt i, n, nleaves;
1869: const PetscInt *ilocal = NULL;
1870: PetscHSetI seen;
1872: if (PetscDefined(USE_DEBUG)) {
1873: PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL);
1874: PetscHSetICreate(&seen);
1875: for (i = 0; i < nleaves; i++) {
1876: const PetscInt leaf = ilocal ? ilocal[i] : i;
1877: PetscHSetIAdd(seen, leaf);
1878: }
1879: PetscHSetIGetSize(seen, &n);
1881: PetscHSetIDestroy(&seen);
1882: }
1883: return 0;
1884: }
1886: /*@
1887: PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1889: Input Parameters:
1890: + sfA - The first PetscSF
1891: - sfB - The second PetscSF
1893: Output Parameters:
1894: . sfBA - The composite SF
1896: Level: developer
1898: Notes:
1899: Currently, the two SFs must be defined on congruent communicators and they must be true star
1900: forests, i.e. the same leaf is not connected with different roots.
1902: sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1903: a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1904: nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1905: Bcast on sfA, then a Bcast on sfB, on connected nodes.
1907: .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1908: @*/
1909: PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
1910: {
1911: const PetscSFNode *remotePointsA, *remotePointsB;
1912: PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
1913: const PetscInt *localPointsA, *localPointsB;
1914: PetscInt *localPointsBA;
1915: PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
1916: PetscBool denseB;
1919: PetscSFCheckGraphSet(sfA, 1);
1921: PetscSFCheckGraphSet(sfB, 2);
1924: PetscSFCheckLeavesUnique_Private(sfA);
1925: PetscSFCheckLeavesUnique_Private(sfB);
1927: PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
1928: PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
1929: /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size */
1930: /* numRootsB; otherwise, garbage will be broadcasted. */
1931: /* Example (comm size = 1): */
1932: /* sfA: 0 <- (0, 0) */
1933: /* sfB: 100 <- (0, 0) */
1934: /* 101 <- (0, 1) */
1935: /* Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget */
1936: /* of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would */
1937: /* receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on */
1938: /* remotePointsA; if not recasted, point 101 would receive a garbage value. */
1939: PetscMalloc1(numRootsB, &reorderedRemotePointsA);
1940: for (i = 0; i < numRootsB; i++) {
1941: reorderedRemotePointsA[i].rank = -1;
1942: reorderedRemotePointsA[i].index = -1;
1943: }
1944: for (i = 0; i < numLeavesA; i++) {
1945: PetscInt localp = localPointsA ? localPointsA[i] : i;
1947: if (localp >= numRootsB) continue;
1948: reorderedRemotePointsA[localp] = remotePointsA[i];
1949: }
1950: remotePointsA = reorderedRemotePointsA;
1951: PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
1952: PetscMalloc1(maxleaf - minleaf + 1, &leafdataB);
1953: for (i = 0; i < maxleaf - minleaf + 1; i++) {
1954: leafdataB[i].rank = -1;
1955: leafdataB[i].index = -1;
1956: }
1957: PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE);
1958: PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE);
1959: PetscFree(reorderedRemotePointsA);
1961: denseB = (PetscBool)!localPointsB;
1962: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
1963: if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
1964: else numLeavesBA++;
1965: }
1966: if (denseB) {
1967: localPointsBA = NULL;
1968: remotePointsBA = leafdataB;
1969: } else {
1970: PetscMalloc1(numLeavesBA, &localPointsBA);
1971: PetscMalloc1(numLeavesBA, &remotePointsBA);
1972: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
1973: const PetscInt l = localPointsB ? localPointsB[i] : i;
1975: if (leafdataB[l - minleaf].rank == -1) continue;
1976: remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
1977: localPointsBA[numLeavesBA] = l;
1978: numLeavesBA++;
1979: }
1980: PetscFree(leafdataB);
1981: }
1982: PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA);
1983: PetscSFSetFromOptions(*sfBA);
1984: PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER);
1985: return 0;
1986: }
1988: /*@
1989: PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
1991: Input Parameters:
1992: + sfA - The first PetscSF
1993: - sfB - The second PetscSF
1995: Output Parameters:
1996: . sfBA - The composite SF.
1998: Level: developer
2000: Notes:
2001: Currently, the two SFs must be defined on congruent communicators and they must be true star
2002: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2003: second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2005: sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2006: a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2007: roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2008: a Reduce on sfB, on connected roots.
2010: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2011: @*/
2012: PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2013: {
2014: const PetscSFNode *remotePointsA, *remotePointsB;
2015: PetscSFNode *remotePointsBA;
2016: const PetscInt *localPointsA, *localPointsB;
2017: PetscSFNode *reorderedRemotePointsA = NULL;
2018: PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2019: MPI_Op op;
2020: #if defined(PETSC_USE_64BIT_INDICES)
2021: PetscBool iswin;
2022: #endif
2025: PetscSFCheckGraphSet(sfA, 1);
2027: PetscSFCheckGraphSet(sfB, 2);
2030: PetscSFCheckLeavesUnique_Private(sfA);
2031: PetscSFCheckLeavesUnique_Private(sfB);
2033: PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
2034: PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
2036: /* TODO: Check roots of sfB have degree of 1 */
2037: /* Once we implement it, we can replace the MPI_MAXLOC
2038: with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2039: We use MPI_MAXLOC only to have a deterministic output from this routine if
2040: the root condition is not meet.
2041: */
2042: op = MPI_MAXLOC;
2043: #if defined(PETSC_USE_64BIT_INDICES)
2044: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2045: PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin);
2046: if (iswin) op = MPI_REPLACE;
2047: #endif
2049: PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2050: PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA);
2051: for (i = 0; i < maxleaf - minleaf + 1; i++) {
2052: reorderedRemotePointsA[i].rank = -1;
2053: reorderedRemotePointsA[i].index = -1;
2054: }
2055: if (localPointsA) {
2056: for (i = 0; i < numLeavesA; i++) {
2057: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2058: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2059: }
2060: } else {
2061: for (i = 0; i < numLeavesA; i++) {
2062: if (i > maxleaf || i < minleaf) continue;
2063: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2064: }
2065: }
2067: PetscMalloc1(numRootsB, &localPointsBA);
2068: PetscMalloc1(numRootsB, &remotePointsBA);
2069: for (i = 0; i < numRootsB; i++) {
2070: remotePointsBA[i].rank = -1;
2071: remotePointsBA[i].index = -1;
2072: }
2074: PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op);
2075: PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op);
2076: PetscFree(reorderedRemotePointsA);
2077: for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2078: if (remotePointsBA[i].rank == -1) continue;
2079: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2080: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2081: localPointsBA[numLeavesBA] = i;
2082: numLeavesBA++;
2083: }
2084: PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA);
2085: PetscSFSetFromOptions(*sfBA);
2086: PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER);
2087: return 0;
2088: }
2090: /*
2091: PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2093: Input Parameters:
2094: . sf - The global PetscSF
2096: Output Parameters:
2097: . out - The local PetscSF
2098: */
2099: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2100: {
2101: MPI_Comm comm;
2102: PetscMPIInt myrank;
2103: const PetscInt *ilocal;
2104: const PetscSFNode *iremote;
2105: PetscInt i, j, nroots, nleaves, lnleaves, *lilocal;
2106: PetscSFNode *liremote;
2107: PetscSF lsf;
2110: if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2111: else {
2112: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2113: PetscObjectGetComm((PetscObject)sf, &comm);
2114: MPI_Comm_rank(comm, &myrank);
2116: /* Find out local edges and build a local SF */
2117: PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote);
2118: for (i = lnleaves = 0; i < nleaves; i++) {
2119: if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
2120: }
2121: PetscMalloc1(lnleaves, &lilocal);
2122: PetscMalloc1(lnleaves, &liremote);
2124: for (i = j = 0; i < nleaves; i++) {
2125: if (iremote[i].rank == (PetscInt)myrank) {
2126: lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2127: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2128: liremote[j].index = iremote[i].index;
2129: j++;
2130: }
2131: }
2132: PetscSFCreate(PETSC_COMM_SELF, &lsf);
2133: PetscSFSetFromOptions(lsf);
2134: PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER);
2135: PetscSFSetUp(lsf);
2136: *out = lsf;
2137: }
2138: return 0;
2139: }
2141: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2142: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2143: {
2144: PetscMemType rootmtype, leafmtype;
2147: PetscSFSetUp(sf);
2148: PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0);
2149: PetscGetMemType(rootdata, &rootmtype);
2150: PetscGetMemType(leafdata, &leafmtype);
2151: PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2152: PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0);
2153: return 0;
2154: }
2156: /*@
2157: PetscSFConcatenate - concatenate multiple SFs into one
2159: Input Parameters:
2160: + comm - the communicator
2161: . nsfs - the number of input PetscSF
2162: . sfs - the array of input PetscSF
2163: . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2164: - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage
2166: Output Parameters:
2167: . newsf - The resulting PetscSF
2169: Level: developer
2171: Notes:
2172: The communicator of all SFs in sfs must be comm.
2174: The offsets in leafOffsets are added to the original leaf indices.
2176: If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2177: In this case, NULL is also passed as ilocal to the resulting SF.
2179: If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2180: In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2182: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2183: @*/
2184: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2185: {
2186: PetscInt i, s, nLeaves, nRoots;
2187: PetscInt *leafArrayOffsets;
2188: PetscInt *ilocal_new;
2189: PetscSFNode *iremote_new;
2190: PetscInt *rootOffsets;
2191: PetscBool all_ilocal_null = PETSC_FALSE;
2192: PetscMPIInt rank;
2194: {
2195: PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2197: PetscSFCreate(comm, &dummy);
2200: for (i = 0; i < nsfs; i++) {
2203: }
2207: PetscSFDestroy(&dummy);
2208: }
2209: if (!nsfs) {
2210: PetscSFCreate(comm, newsf);
2211: PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER);
2212: return 0;
2213: }
2214: MPI_Comm_rank(comm, &rank);
2216: PetscCalloc1(nsfs + 1, &rootOffsets);
2217: if (shareRoots) {
2218: PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL);
2219: if (PetscDefined(USE_DEBUG)) {
2220: for (s = 1; s < nsfs; s++) {
2221: PetscInt nr;
2223: PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2225: }
2226: }
2227: } else {
2228: for (s = 0; s < nsfs; s++) {
2229: PetscInt nr;
2231: PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2232: rootOffsets[s + 1] = rootOffsets[s] + nr;
2233: }
2234: nRoots = rootOffsets[nsfs];
2235: }
2237: /* Calculate leaf array offsets and automatic root offsets */
2238: PetscMalloc1(nsfs + 1, &leafArrayOffsets);
2239: leafArrayOffsets[0] = 0;
2240: for (s = 0; s < nsfs; s++) {
2241: PetscInt nl;
2243: PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL);
2244: leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2245: }
2246: nLeaves = leafArrayOffsets[nsfs];
2248: if (!leafOffsets) {
2249: all_ilocal_null = PETSC_TRUE;
2250: for (s = 0; s < nsfs; s++) {
2251: const PetscInt *ilocal;
2253: PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL);
2254: if (ilocal) {
2255: all_ilocal_null = PETSC_FALSE;
2256: break;
2257: }
2258: }
2260: }
2262: /* Renumber and concatenate local leaves */
2263: ilocal_new = NULL;
2264: if (!all_ilocal_null) {
2265: PetscMalloc1(nLeaves, &ilocal_new);
2266: for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2267: for (s = 0; s < nsfs; s++) {
2268: const PetscInt *ilocal;
2269: PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2270: PetscInt i, nleaves_l;
2272: PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL);
2273: for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2274: }
2275: }
2277: /* Renumber and concatenate remote roots */
2278: PetscMalloc1(nLeaves, &iremote_new);
2279: for (i = 0; i < nLeaves; i++) {
2280: iremote_new[i].rank = -1;
2281: iremote_new[i].index = -1;
2282: }
2283: for (s = 0; s < nsfs; s++) {
2284: PetscInt i, nl, nr;
2285: PetscSF tmp_sf;
2286: const PetscSFNode *iremote;
2287: PetscSFNode *tmp_rootdata;
2288: PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2290: PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote);
2291: PetscSFCreate(comm, &tmp_sf);
2292: /* create helper SF with contiguous leaves */
2293: PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES);
2294: PetscSFSetUp(tmp_sf);
2295: PetscMalloc1(nr, &tmp_rootdata);
2296: for (i = 0; i < nr; i++) {
2297: tmp_rootdata[i].index = i + rootOffsets[s];
2298: tmp_rootdata[i].rank = (PetscInt)rank;
2299: }
2300: PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2301: PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2302: PetscSFDestroy(&tmp_sf);
2303: PetscFree(tmp_rootdata);
2304: }
2306: /* Build the new SF */
2307: PetscSFCreate(comm, newsf);
2308: PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER);
2309: PetscSFSetUp(*newsf);
2310: PetscFree(rootOffsets);
2311: PetscFree(leafArrayOffsets);
2312: return 0;
2313: }