Actual source code: sf.c
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petsc/private/viewerimpl.h>
4: #include <petsc/private/hashmapi.h>
6: #if defined(PETSC_HAVE_CUDA)
7: #include <cuda_runtime.h>
8: #endif
10: #if defined(PETSC_HAVE_HIP)
11: #include <hip/hip_runtime.h>
12: #endif
14: #if defined(PETSC_CLANG_STATIC_ANALYZER)
15: void PetscSFCheckGraphSet(PetscSF, int);
16: #else
17: #if defined(PETSC_USE_DEBUG)
18: #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME);
19: #else
20: #define PetscSFCheckGraphSet(sf, arg) \
21: do { \
22: } while (0)
23: #endif
24: #endif
26: const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
27: const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_../../../../..MODE_", NULL};
29: /*@
30: PetscSFCreate - create a star forest communication context
32: Collective
34: Input Parameter:
35: . comm - communicator on which the star forest will operate
37: Output Parameter:
38: . sf - new star forest context
40: Options Database Key:
41: . -sf_type type - value of type may be
42: .vb
43: basic -Use MPI persistent Isend/Irecv for communication (Default)
44: window -Use MPI-3 one-sided window for communication
45: neighbor -Use MPI-3 neighborhood collectives for communication
46: .ve
48: Level: intermediate
50: Note:
51: When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
52: `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
53: `SF`s are optimized and they have better performance than the general `SF`s.
55: .seealso: `PetscSF`, `PetscSFSetType`, PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
56: @*/
57: PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
58: {
59: PetscSF b;
61: PetscFunctionBegin;
63: PetscCall(PetscSFInitializePackage());
65: PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
67: b->nroots = -1;
68: b->nleaves = -1;
69: b->minleaf = PETSC_MAX_INT;
70: b->maxleaf = PETSC_MIN_INT;
71: b->nranks = -1;
72: b->rankorder = PETSC_TRUE;
73: b->ingroup = MPI_GROUP_NULL;
74: b->outgroup = MPI_GROUP_NULL;
75: b->graphset = PETSC_FALSE;
76: #if defined(PETSC_HAVE_DEVICE)
77: b->use_gpu_aware_mpi = use_gpu_aware_mpi;
78: b->use_stream_aware_mpi = PETSC_FALSE;
79: b->unknown_input_stream = PETSC_FALSE;
80: #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
81: b->backend = PETSCSF_BACKEND_KOKKOS;
82: #elif defined(PETSC_HAVE_CUDA)
83: b->backend = PETSCSF_BACKEND_CUDA;
84: #elif defined(PETSC_HAVE_HIP)
85: b->backend = PETSCSF_BACKEND_HIP;
86: #endif
88: #if defined(PETSC_HAVE_NVSHMEM)
89: b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */
90: b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
91: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
92: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
93: #endif
94: #endif
95: b->vscat.from_n = -1;
96: b->vscat.to_n = -1;
97: b->vscat.unit = MPIU_SCALAR;
98: *sf = b;
99: PetscFunctionReturn(PETSC_SUCCESS);
100: }
102: /*@
103: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
105: Collective
107: Input Parameter:
108: . sf - star forest
110: Level: advanced
112: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
113: @*/
114: PetscErrorCode PetscSFReset(PetscSF sf)
115: {
116: PetscFunctionBegin;
118: PetscTryTypeMethod(sf, Reset);
119: sf->nroots = -1;
120: sf->nleaves = -1;
121: sf->minleaf = PETSC_MAX_INT;
122: sf->maxleaf = PETSC_MIN_INT;
123: sf->mine = NULL;
124: sf->remote = NULL;
125: sf->graphset = PETSC_FALSE;
126: PetscCall(PetscFree(sf->mine_alloc));
127: PetscCall(PetscFree(sf->remote_alloc));
128: sf->nranks = -1;
129: PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
130: sf->degreeknown = PETSC_FALSE;
131: PetscCall(PetscFree(sf->degree));
132: if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
133: if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
134: if (sf->multi) sf->multi->multi = NULL;
135: PetscCall(PetscSFDestroy(&sf->multi));
136: PetscCall(PetscLayoutDestroy(&sf->map));
138: #if defined(PETSC_HAVE_DEVICE)
139: for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
140: #endif
142: sf->setupcalled = PETSC_FALSE;
143: PetscFunctionReturn(PETSC_SUCCESS);
144: }
146: /*@C
147: PetscSFSetType - Set the `PetscSF` communication implementation
149: Collective
151: Input Parameters:
152: + sf - the `PetscSF` context
153: - type - a known method
154: .vb
155: PETSCSFWINDOW - MPI-2/3 one-sided
156: PETSCSFBASIC - basic implementation using MPI-1 two-sided
157: .ve
159: Options Database Key:
160: . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
162: Level: intermediate
164: Notes:
165: See `PetscSFType` for possible values
167: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
168: @*/
169: PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
170: {
171: PetscBool match;
172: PetscErrorCode (*r)(PetscSF);
174: PetscFunctionBegin;
178: PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
179: if (match) PetscFunctionReturn(PETSC_SUCCESS);
181: PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
182: PetscCheck(r, PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
183: /* Destroy the previous PetscSF implementation context */
184: PetscTryTypeMethod(sf, Destroy);
185: PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
186: PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
187: PetscCall((*r)(sf));
188: PetscFunctionReturn(PETSC_SUCCESS);
189: }
191: /*@C
192: PetscSFGetType - Get the `PetscSF` communication implementation
194: Not Collective
196: Input Parameter:
197: . sf - the `PetscSF` context
199: Output Parameter:
200: . type - the `PetscSF` type name
202: Level: intermediate
204: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
205: @*/
206: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
207: {
208: PetscFunctionBegin;
211: *type = ((PetscObject)sf)->type_name;
212: PetscFunctionReturn(PETSC_SUCCESS);
213: }
215: /*@C
216: PetscSFDestroy - destroy a star forest
218: Collective
220: Input Parameter:
221: . sf - address of star forest
223: Level: intermediate
225: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
226: @*/
227: PetscErrorCode PetscSFDestroy(PetscSF *sf)
228: {
229: PetscFunctionBegin;
230: if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
232: if (--((PetscObject)(*sf))->refct > 0) {
233: *sf = NULL;
234: PetscFunctionReturn(PETSC_SUCCESS);
235: }
236: PetscCall(PetscSFReset(*sf));
237: PetscTryTypeMethod((*sf), Destroy);
238: PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
239: if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
240: PetscCall(PetscHeaderDestroy(sf));
241: PetscFunctionReturn(PETSC_SUCCESS);
242: }
244: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
245: {
246: PetscInt i, nleaves;
247: PetscMPIInt size;
248: const PetscInt *ilocal;
249: const PetscSFNode *iremote;
251: PetscFunctionBegin;
252: if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
253: PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
254: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
255: for (i = 0; i < nleaves; i++) {
256: const PetscInt rank = iremote[i].rank;
257: const PetscInt remote = iremote[i].index;
258: const PetscInt leaf = ilocal ? ilocal[i] : i;
259: PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
260: PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
261: PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
262: }
263: PetscFunctionReturn(PETSC_SUCCESS);
264: }
266: /*@
267: PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
269: Collective
271: Input Parameter:
272: . sf - star forest communication object
274: Level: beginner
276: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
277: @*/
278: PetscErrorCode PetscSFSetUp(PetscSF sf)
279: {
280: PetscFunctionBegin;
282: PetscSFCheckGraphSet(sf, 1);
283: if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
284: PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
285: PetscCall(PetscSFCheckGraphValid_Private(sf));
286: if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
287: PetscTryTypeMethod(sf, SetUp);
288: #if defined(PETSC_HAVE_CUDA)
289: if (sf->backend == PETSCSF_BACKEND_CUDA) {
290: sf->ops->Malloc = PetscSFMalloc_CUDA;
291: sf->ops->Free = PetscSFFree_CUDA;
292: }
293: #endif
294: #if defined(PETSC_HAVE_HIP)
295: if (sf->backend == PETSCSF_BACKEND_HIP) {
296: sf->ops->Malloc = PetscSFMalloc_HIP;
297: sf->ops->Free = PetscSFFree_HIP;
298: }
299: #endif
301: #
302: #if defined(PETSC_HAVE_KOKKOS)
303: if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
304: sf->ops->Malloc = PetscSFMalloc_Kokkos;
305: sf->ops->Free = PetscSFFree_Kokkos;
306: }
307: #endif
308: PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
309: sf->setupcalled = PETSC_TRUE;
310: PetscFunctionReturn(PETSC_SUCCESS);
311: }
313: /*@
314: PetscSFSetFromOptions - set `PetscSF` options using the options database
316: Logically Collective
318: Input Parameter:
319: . sf - star forest
321: Options Database Keys:
322: + -sf_type - implementation type, see `PetscSFSetType()`
323: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
324: . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
325: use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
326: If true, this option only works with `-use_gpu_aware_mpi 1`.
327: . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
328: If true, this option only works with `-use_gpu_aware_mpi 1`.
330: - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda, hip and Kokkos.
331: On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
332: the only available is kokkos.
334: Level: intermediate
336: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
337: @*/
338: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
339: {
340: PetscSFType deft;
341: char type[256];
342: PetscBool flg;
344: PetscFunctionBegin;
346: PetscObjectOptionsBegin((PetscObject)sf);
347: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
348: PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
349: PetscCall(PetscSFSetType(sf, flg ? type : deft));
350: PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
351: #if defined(PETSC_HAVE_DEVICE)
352: {
353: char backendstr[32] = {0};
354: PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
355: /* Change the defaults set in PetscSFCreate() with command line options */
356: PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
357: PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
358: PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
359: PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
360: PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
361: PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
362: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
363: if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
364: else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
365: else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
366: else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
367: #elif defined(PETSC_HAVE_KOKKOS)
368: PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
369: #endif
370: }
371: #endif
372: PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
373: PetscOptionsEnd();
374: PetscFunctionReturn(PETSC_SUCCESS);
375: }
377: /*@
378: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
380: Logically Collective
382: Input Parameters:
383: + sf - star forest
384: - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
386: Level: advanced
388: .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
389: @*/
390: PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
391: {
392: PetscFunctionBegin;
395: PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
396: sf->rankorder = flg;
397: PetscFunctionReturn(PETSC_SUCCESS);
398: }
400: /*@C
401: PetscSFSetGraph - Set a parallel star forest
403: Collective
405: Input Parameters:
406: + sf - star forest
407: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
408: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
409: . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
410: during setup in debug mode)
411: . localmode - copy mode for `ilocal`
412: . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
413: during setup in debug mode)
414: - remotemode - copy mode for `iremote`
416: Level: intermediate
418: Notes:
419: Leaf indices in `ilocal` must be unique, otherwise an error occurs.
421: Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
422: In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
423: PETSc might modify the respective array;
424: if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
425: Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
427: Fortran Note:
428: In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
430: Developer Note:
431: We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
432: This also allows to compare leaf sets of two `PetscSF`s easily.
434: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
435: @*/
436: PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
437: {
438: PetscBool unique, contiguous;
440: PetscFunctionBegin;
444: PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
445: PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
446: /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
447: * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
448: PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
449: PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
451: if (sf->nroots >= 0) { /* Reset only if graph already set */
452: PetscCall(PetscSFReset(sf));
453: }
455: PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
457: sf->nroots = nroots;
458: sf->nleaves = nleaves;
460: if (localmode == PETSC_COPY_VALUES && ilocal) {
461: PetscInt *tlocal = NULL;
463: PetscCall(PetscMalloc1(nleaves, &tlocal));
464: PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
465: ilocal = tlocal;
466: }
467: if (remotemode == PETSC_COPY_VALUES) {
468: PetscSFNode *tremote = NULL;
470: PetscCall(PetscMalloc1(nleaves, &tremote));
471: PetscCall(PetscArraycpy(tremote, iremote, nleaves));
472: iremote = tremote;
473: }
475: if (nleaves && ilocal) {
476: PetscSFNode work;
478: PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
479: PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
480: unique = PetscNot(unique);
481: PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
482: sf->minleaf = ilocal[0];
483: sf->maxleaf = ilocal[nleaves - 1];
484: contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
485: } else {
486: sf->minleaf = 0;
487: sf->maxleaf = nleaves - 1;
488: unique = PETSC_TRUE;
489: contiguous = PETSC_TRUE;
490: }
492: if (contiguous) {
493: if (localmode == PETSC_USE_POINTER) {
494: ilocal = NULL;
495: } else {
496: PetscCall(PetscFree(ilocal));
497: }
498: }
499: sf->mine = ilocal;
500: if (localmode == PETSC_USE_POINTER) {
501: sf->mine_alloc = NULL;
502: } else {
503: sf->mine_alloc = ilocal;
504: }
505: sf->remote = iremote;
506: if (remotemode == PETSC_USE_POINTER) {
507: sf->remote_alloc = NULL;
508: } else {
509: sf->remote_alloc = iremote;
510: }
511: PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
512: sf->graphset = PETSC_TRUE;
513: PetscFunctionReturn(PETSC_SUCCESS);
514: }
516: /*@
517: PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
519: Collective
521: Input Parameters:
522: + sf - The `PetscSF`
523: . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
524: - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
526: Level: intermediate
528: Notes:
529: It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
530: `n` and `N` are the local and global sizes of `x` respectively.
532: With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
533: sequential vectors `y` on all MPI processes.
535: With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
536: sequential vector `y` on rank 0.
538: In above cases, entries of `x` are roots and entries of `y` are leaves.
540: With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
541: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
542: of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
543: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
544: items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
546: In this case, roots and leaves are symmetric.
548: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
549: @*/
550: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
551: {
552: MPI_Comm comm;
553: PetscInt n, N, res[2];
554: PetscMPIInt rank, size;
555: PetscSFType type;
557: PetscFunctionBegin;
560: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
561: PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
562: PetscCallMPI(MPI_Comm_rank(comm, &rank));
563: PetscCallMPI(MPI_Comm_size(comm, &size));
565: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
566: type = PETSCSFALLTOALL;
567: PetscCall(PetscLayoutCreate(comm, &sf->map));
568: PetscCall(PetscLayoutSetLocalSize(sf->map, size));
569: PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size));
570: PetscCall(PetscLayoutSetUp(sf->map));
571: } else {
572: PetscCall(PetscLayoutGetLocalSize(map, &n));
573: PetscCall(PetscLayoutGetSize(map, &N));
574: res[0] = n;
575: res[1] = -n;
576: /* Check if n are same over all ranks so that we can optimize it */
577: PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
578: if (res[0] == -res[1]) { /* same n */
579: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
580: } else {
581: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
582: }
583: PetscCall(PetscLayoutReference(map, &sf->map));
584: }
585: PetscCall(PetscSFSetType(sf, type));
587: sf->pattern = pattern;
588: sf->mine = NULL; /* Contiguous */
590: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
591: Also set other easy stuff.
592: */
593: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
594: sf->nleaves = N;
595: sf->nroots = n;
596: sf->nranks = size;
597: sf->minleaf = 0;
598: sf->maxleaf = N - 1;
599: } else if (pattern == PETSCSF_PATTERN_GATHER) {
600: sf->nleaves = rank ? 0 : N;
601: sf->nroots = n;
602: sf->nranks = rank ? 0 : size;
603: sf->minleaf = 0;
604: sf->maxleaf = rank ? -1 : N - 1;
605: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
606: sf->nleaves = size;
607: sf->nroots = size;
608: sf->nranks = size;
609: sf->minleaf = 0;
610: sf->maxleaf = size - 1;
611: }
612: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
613: sf->graphset = PETSC_TRUE;
614: PetscFunctionReturn(PETSC_SUCCESS);
615: }
617: /*@
618: PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
620: Collective
622: Input Parameter:
623: . sf - star forest to invert
625: Output Parameter:
626: . isf - inverse of `sf`
628: Level: advanced
630: Notes:
631: All roots must have degree 1.
633: The local space may be a permutation, but cannot be sparse.
635: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
636: @*/
637: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
638: {
639: PetscMPIInt rank;
640: PetscInt i, nroots, nleaves, maxlocal, count, *newilocal;
641: const PetscInt *ilocal;
642: PetscSFNode *roots, *leaves;
644: PetscFunctionBegin;
646: PetscSFCheckGraphSet(sf, 1);
649: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
650: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
652: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
653: PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
654: for (i = 0; i < maxlocal; i++) {
655: leaves[i].rank = rank;
656: leaves[i].index = i;
657: }
658: for (i = 0; i < nroots; i++) {
659: roots[i].rank = -1;
660: roots[i].index = -1;
661: }
662: PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
663: PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
665: /* Check whether our leaves are sparse */
666: for (i = 0, count = 0; i < nroots; i++)
667: if (roots[i].rank >= 0) count++;
668: if (count == nroots) newilocal = NULL;
669: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
670: for (i = 0, count = 0; i < nroots; i++) {
671: if (roots[i].rank >= 0) {
672: newilocal[count] = i;
673: roots[count].rank = roots[i].rank;
674: roots[count].index = roots[i].index;
675: count++;
676: }
677: }
678: }
680: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
681: PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
682: PetscCall(PetscFree2(roots, leaves));
683: PetscFunctionReturn(PETSC_SUCCESS);
684: }
686: /*@
687: PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
689: Collective
691: Input Parameters:
692: + sf - communication object to duplicate
693: - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
695: Output Parameter:
696: . newsf - new communication object
698: Level: beginner
700: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
701: @*/
702: PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
703: {
704: PetscSFType type;
705: MPI_Datatype dtype = MPIU_SCALAR;
707: PetscFunctionBegin;
711: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
712: PetscCall(PetscSFGetType(sf, &type));
713: if (type) PetscCall(PetscSFSetType(*newsf, type));
714: (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
715: if (opt == PETSCSF_DUPLICATE_GRAPH) {
716: PetscSFCheckGraphSet(sf, 1);
717: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
718: PetscInt nroots, nleaves;
719: const PetscInt *ilocal;
720: const PetscSFNode *iremote;
721: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
722: PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
723: } else {
724: PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
725: }
726: }
727: /* Since oldtype is committed, so is newtype, according to MPI */
728: if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
729: (*newsf)->vscat.bs = sf->vscat.bs;
730: (*newsf)->vscat.unit = dtype;
731: (*newsf)->vscat.to_n = sf->vscat.to_n;
732: (*newsf)->vscat.from_n = sf->vscat.from_n;
733: /* Do not copy lsf. Build it on demand since it is rarely used */
735: #if defined(PETSC_HAVE_DEVICE)
736: (*newsf)->backend = sf->backend;
737: (*newsf)->unknown_input_stream = sf->unknown_input_stream;
738: (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
739: (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
740: #endif
741: PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
742: /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
743: PetscFunctionReturn(PETSC_SUCCESS);
744: }
746: /*@C
747: PetscSFGetGraph - Get the graph specifying a parallel star forest
749: Not Collective
751: Input Parameter:
752: . sf - star forest
754: Output Parameters:
755: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
756: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
757: . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
758: - iremote - remote locations of root vertices for each leaf on the current process
760: Level: intermediate
762: Notes:
763: We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
765: The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
767: Fortran Notes:
768: The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you
769: want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array.
771: To check for a `NULL` `ilocal` use
772: $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
774: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
775: @*/
776: PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
777: {
778: PetscFunctionBegin;
780: if (sf->ops->GetGraph) {
781: PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote));
782: } else {
783: if (nroots) *nroots = sf->nroots;
784: if (nleaves) *nleaves = sf->nleaves;
785: if (ilocal) *ilocal = sf->mine;
786: if (iremote) *iremote = sf->remote;
787: }
788: PetscFunctionReturn(PETSC_SUCCESS);
789: }
791: /*@
792: PetscSFGetLeafRange - Get the active leaf ranges
794: Not Collective
796: Input Parameter:
797: . sf - star forest
799: Output Parameters:
800: + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
801: - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
803: Level: developer
805: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
806: @*/
807: PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
808: {
809: PetscFunctionBegin;
811: PetscSFCheckGraphSet(sf, 1);
812: if (minleaf) *minleaf = sf->minleaf;
813: if (maxleaf) *maxleaf = sf->maxleaf;
814: PetscFunctionReturn(PETSC_SUCCESS);
815: }
817: /*@C
818: PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
820: Collective on A
822: Input Parameters:
823: + A - the star forest
824: . obj - Optional object that provides the prefix for the option names
825: - name - command line option
827: Level: intermediate
829: Note:
830: See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
832: .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
833: @*/
834: PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
835: {
836: PetscFunctionBegin;
838: PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
839: PetscFunctionReturn(PETSC_SUCCESS);
840: }
842: /*@C
843: PetscSFView - view a star forest
845: Collective
847: Input Parameters:
848: + sf - star forest
849: - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
851: Level: beginner
853: .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
854: @*/
855: PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
856: {
857: PetscBool iascii;
858: PetscViewerFormat format;
860: PetscFunctionBegin;
862: if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
864: PetscCheckSameComm(sf, 1, viewer, 2);
865: if (sf->graphset) PetscCall(PetscSFSetUp(sf));
866: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
867: if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
868: PetscMPIInt rank;
869: PetscInt ii, i, j;
871: PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
872: PetscCall(PetscViewerASCIIPushTab(viewer));
873: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
874: if (!sf->graphset) {
875: PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
876: PetscCall(PetscViewerASCIIPopTab(viewer));
877: PetscFunctionReturn(PETSC_SUCCESS);
878: }
879: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
880: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
881: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks));
882: for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
883: PetscCall(PetscViewerFlush(viewer));
884: PetscCall(PetscViewerGetFormat(viewer, &format));
885: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
886: PetscMPIInt *tmpranks, *perm;
887: PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
888: PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
889: for (i = 0; i < sf->nranks; i++) perm[i] = i;
890: PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
891: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
892: for (ii = 0; ii < sf->nranks; ii++) {
893: i = perm[ii];
894: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
895: for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
896: }
897: PetscCall(PetscFree2(tmpranks, perm));
898: }
899: PetscCall(PetscViewerFlush(viewer));
900: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
901: }
902: PetscCall(PetscViewerASCIIPopTab(viewer));
903: }
904: PetscTryTypeMethod(sf, View, viewer);
905: PetscFunctionReturn(PETSC_SUCCESS);
906: }
908: /*@C
909: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
911: Not Collective
913: Input Parameter:
914: . sf - star forest
916: Output Parameters:
917: + nranks - number of ranks referenced by local part
918: . ranks - [`nranks`] array of ranks
919: . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
920: . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank
921: - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank
923: Level: developer
925: .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
926: @*/
927: PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
928: {
929: PetscFunctionBegin;
931: PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
932: if (sf->ops->GetRootRanks) {
933: PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote));
934: } else {
935: /* The generic implementation */
936: if (nranks) *nranks = sf->nranks;
937: if (ranks) *ranks = sf->ranks;
938: if (roffset) *roffset = sf->roffset;
939: if (rmine) *rmine = sf->rmine;
940: if (rremote) *rremote = sf->rremote;
941: }
942: PetscFunctionReturn(PETSC_SUCCESS);
943: }
945: /*@C
946: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
948: Not Collective
950: Input Parameter:
951: . sf - star forest
953: Output Parameters:
954: + niranks - number of leaf ranks referencing roots on this process
955: . iranks - [`niranks`] array of ranks
956: . ioffset - [`niranks`+1] offset in `irootloc` for each rank
957: - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
959: Level: developer
961: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
962: @*/
963: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
964: {
965: PetscFunctionBegin;
967: PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
968: if (sf->ops->GetLeafRanks) {
969: PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc));
970: } else {
971: PetscSFType type;
972: PetscCall(PetscSFGetType(sf, &type));
973: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
974: }
975: PetscFunctionReturn(PETSC_SUCCESS);
976: }
978: static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
979: {
980: PetscInt i;
981: for (i = 0; i < n; i++) {
982: if (needle == list[i]) return PETSC_TRUE;
983: }
984: return PETSC_FALSE;
985: }
987: /*@C
988: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
990: Collective
992: Input Parameters:
993: + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
994: - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
996: Level: developer
998: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
999: @*/
1000: PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1001: {
1002: PetscHMapI table;
1003: PetscHashIter pos;
1004: PetscMPIInt size, groupsize, *groupranks;
1005: PetscInt *rcount, *ranks;
1006: PetscInt i, irank = -1, orank = -1;
1008: PetscFunctionBegin;
1010: PetscSFCheckGraphSet(sf, 1);
1011: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1012: PetscCall(PetscHMapICreateWithSize(10, &table));
1013: for (i = 0; i < sf->nleaves; i++) {
1014: /* Log 1-based rank */
1015: PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1016: }
1017: PetscCall(PetscHMapIGetSize(table, &sf->nranks));
1018: PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1019: PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1020: PetscHashIterBegin(table, pos);
1021: for (i = 0; i < sf->nranks; i++) {
1022: PetscHashIterGetKey(table, pos, ranks[i]);
1023: PetscHashIterGetVal(table, pos, rcount[i]);
1024: PetscHashIterNext(table, pos);
1025: ranks[i]--; /* Convert back to 0-based */
1026: }
1027: PetscCall(PetscHMapIDestroy(&table));
1029: /* We expect that dgroup is reliably "small" while nranks could be large */
1030: {
1031: MPI_Group group = MPI_GROUP_NULL;
1032: PetscMPIInt *dgroupranks;
1033: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1034: PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1035: PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1036: PetscCall(PetscMalloc1(groupsize, &groupranks));
1037: for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1038: if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1039: PetscCallMPI(MPI_Group_free(&group));
1040: PetscCall(PetscFree(dgroupranks));
1041: }
1043: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1044: for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1045: for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1046: if (InList(ranks[i], groupsize, groupranks)) break;
1047: }
1048: for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1049: if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1050: }
1051: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1052: PetscInt tmprank, tmpcount;
1054: tmprank = ranks[i];
1055: tmpcount = rcount[i];
1056: ranks[i] = ranks[sf->ndranks];
1057: rcount[i] = rcount[sf->ndranks];
1058: ranks[sf->ndranks] = tmprank;
1059: rcount[sf->ndranks] = tmpcount;
1060: sf->ndranks++;
1061: }
1062: }
1063: PetscCall(PetscFree(groupranks));
1064: PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount));
1065: PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1066: sf->roffset[0] = 0;
1067: for (i = 0; i < sf->nranks; i++) {
1068: PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1069: sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1070: rcount[i] = 0;
1071: }
1072: for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1073: /* short circuit */
1074: if (orank != sf->remote[i].rank) {
1075: /* Search for index of iremote[i].rank in sf->ranks */
1076: PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank));
1077: if (irank < 0) {
1078: PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1079: if (irank >= 0) irank += sf->ndranks;
1080: }
1081: orank = sf->remote[i].rank;
1082: }
1083: PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank);
1084: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
1085: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1086: rcount[irank]++;
1087: }
1088: PetscCall(PetscFree2(rcount, ranks));
1089: PetscFunctionReturn(PETSC_SUCCESS);
1090: }
1092: /*@C
1093: PetscSFGetGroups - gets incoming and outgoing process groups
1095: Collective
1097: Input Parameter:
1098: . sf - star forest
1100: Output Parameters:
1101: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1102: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1104: Level: developer
1106: .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1107: @*/
1108: PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1109: {
1110: MPI_Group group = MPI_GROUP_NULL;
1112: PetscFunctionBegin;
1113: PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1114: if (sf->ingroup == MPI_GROUP_NULL) {
1115: PetscInt i;
1116: const PetscInt *indegree;
1117: PetscMPIInt rank, *outranks, *inranks;
1118: PetscSFNode *remote;
1119: PetscSF bgcount;
1121: /* Compute the number of incoming ranks */
1122: PetscCall(PetscMalloc1(sf->nranks, &remote));
1123: for (i = 0; i < sf->nranks; i++) {
1124: remote[i].rank = sf->ranks[i];
1125: remote[i].index = 0;
1126: }
1127: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1128: PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1129: PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1130: PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1131: /* Enumerate the incoming ranks */
1132: PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1133: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1134: for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1135: PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1136: PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1137: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1138: PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup));
1139: PetscCallMPI(MPI_Group_free(&group));
1140: PetscCall(PetscFree2(inranks, outranks));
1141: PetscCall(PetscSFDestroy(&bgcount));
1142: }
1143: *incoming = sf->ingroup;
1145: if (sf->outgroup == MPI_GROUP_NULL) {
1146: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1147: PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1148: PetscCallMPI(MPI_Group_free(&group));
1149: }
1150: *outgoing = sf->outgroup;
1151: PetscFunctionReturn(PETSC_SUCCESS);
1152: }
1154: /*@
1155: PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1157: Collective
1159: Input Parameter:
1160: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1162: Output Parameter:
1163: . multi - star forest with split roots, such that each root has degree exactly 1
1165: Level: developer
1167: Note:
1168: In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1169: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1170: edge, it is a candidate for future optimization that might involve its removal.
1172: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1173: @*/
1174: PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1175: {
1176: PetscFunctionBegin;
1179: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1180: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1181: *multi = sf->multi;
1182: sf->multi->multi = sf->multi;
1183: PetscFunctionReturn(PETSC_SUCCESS);
1184: }
1185: if (!sf->multi) {
1186: const PetscInt *indegree;
1187: PetscInt i, *inoffset, *outones, *outoffset, maxlocal;
1188: PetscSFNode *remote;
1189: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1190: PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1191: PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1192: PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1193: inoffset[0] = 0;
1194: for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1195: for (i = 0; i < maxlocal; i++) outones[i] = 1;
1196: PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1197: PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1198: for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1199: if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1200: for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1201: }
1202: PetscCall(PetscMalloc1(sf->nleaves, &remote));
1203: for (i = 0; i < sf->nleaves; i++) {
1204: remote[i].rank = sf->remote[i].rank;
1205: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1206: }
1207: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1208: sf->multi->multi = sf->multi;
1209: PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1210: if (sf->rankorder) { /* Sort the ranks */
1211: PetscMPIInt rank;
1212: PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1213: PetscSFNode *newremote;
1214: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1215: for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1216: PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1217: for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1218: PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1219: PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1220: /* Sort the incoming ranks at each vertex, build the inverse map */
1221: for (i = 0; i < sf->nroots; i++) {
1222: PetscInt j;
1223: for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1224: PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset));
1225: for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1226: }
1227: PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1228: PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1229: PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1230: for (i = 0; i < sf->nleaves; i++) {
1231: newremote[i].rank = sf->remote[i].rank;
1232: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1233: }
1234: PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1235: PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1236: }
1237: PetscCall(PetscFree3(inoffset, outones, outoffset));
1238: }
1239: *multi = sf->multi;
1240: PetscFunctionReturn(PETSC_SUCCESS);
1241: }
1243: /*@C
1244: PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
1246: Collective
1248: Input Parameters:
1249: + sf - original star forest
1250: . nselected - number of selected roots on this process
1251: - selected - indices of the selected roots on this process
1253: Output Parameter:
1254: . esf - new star forest
1256: Level: advanced
1258: Note:
1259: To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1260: be done by calling PetscSFGetGraph().
1262: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1263: @*/
1264: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1265: {
1266: PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1267: const PetscInt *ilocal;
1268: signed char *rootdata, *leafdata, *leafmem;
1269: const PetscSFNode *iremote;
1270: PetscSFNode *new_iremote;
1271: MPI_Comm comm;
1273: PetscFunctionBegin;
1275: PetscSFCheckGraphSet(sf, 1);
1279: PetscCall(PetscSFSetUp(sf));
1280: PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1281: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1282: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1284: if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1285: PetscBool dups;
1286: PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1287: PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1288: for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1289: }
1291: if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1292: else {
1293: /* A generic version of creating embedded sf */
1294: PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1295: maxlocal = maxleaf - minleaf + 1;
1296: PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1297: leafdata = leafmem - minleaf;
1298: /* Tag selected roots and bcast to leaves */
1299: for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1300: PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1301: PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1303: /* Build esf with leaves that are still connected */
1304: esf_nleaves = 0;
1305: for (i = 0; i < nleaves; i++) {
1306: j = ilocal ? ilocal[i] : i;
1307: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1308: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1309: */
1310: esf_nleaves += (leafdata[j] ? 1 : 0);
1311: }
1312: PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1313: PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1314: for (i = n = 0; i < nleaves; i++) {
1315: j = ilocal ? ilocal[i] : i;
1316: if (leafdata[j]) {
1317: new_ilocal[n] = j;
1318: new_iremote[n].rank = iremote[i].rank;
1319: new_iremote[n].index = iremote[i].index;
1320: ++n;
1321: }
1322: }
1323: PetscCall(PetscSFCreate(comm, esf));
1324: PetscCall(PetscSFSetFromOptions(*esf));
1325: PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1326: PetscCall(PetscFree2(rootdata, leafmem));
1327: }
1328: PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1329: PetscFunctionReturn(PETSC_SUCCESS);
1330: }
1332: /*@C
1333: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
1335: Collective
1337: Input Parameters:
1338: + sf - original star forest
1339: . nselected - number of selected leaves on this process
1340: - selected - indices of the selected leaves on this process
1342: Output Parameter:
1343: . newsf - new star forest
1345: Level: advanced
1347: .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1348: @*/
1349: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1350: {
1351: const PetscSFNode *iremote;
1352: PetscSFNode *new_iremote;
1353: const PetscInt *ilocal;
1354: PetscInt i, nroots, *leaves, *new_ilocal;
1355: MPI_Comm comm;
1357: PetscFunctionBegin;
1359: PetscSFCheckGraphSet(sf, 1);
1363: /* Uniq selected[] and put results in leaves[] */
1364: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1365: PetscCall(PetscMalloc1(nselected, &leaves));
1366: PetscCall(PetscArraycpy(leaves, selected, nselected));
1367: PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1368: PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1370: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1371: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1372: else {
1373: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1374: PetscCall(PetscMalloc1(nselected, &new_ilocal));
1375: PetscCall(PetscMalloc1(nselected, &new_iremote));
1376: for (i = 0; i < nselected; ++i) {
1377: const PetscInt l = leaves[i];
1378: new_ilocal[i] = ilocal ? ilocal[l] : l;
1379: new_iremote[i].rank = iremote[l].rank;
1380: new_iremote[i].index = iremote[l].index;
1381: }
1382: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1383: PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1384: }
1385: PetscCall(PetscFree(leaves));
1386: PetscFunctionReturn(PETSC_SUCCESS);
1387: }
1389: /*@C
1390: PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1392: Collective
1394: Input Parameters:
1395: + sf - star forest on which to communicate
1396: . unit - data type associated with each node
1397: . rootdata - buffer to broadcast
1398: - op - operation to use for reduction
1400: Output Parameter:
1401: . leafdata - buffer to be reduced with values from each leaf's respective root
1403: Level: intermediate
1405: Note:
1406: When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1407: are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1408: use `PetscSFBcastWithMemTypeBegin()` instead.
1410: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1411: @*/
1412: PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1413: {
1414: PetscMemType rootmtype, leafmtype;
1416: PetscFunctionBegin;
1418: PetscCall(PetscSFSetUp(sf));
1419: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1420: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1421: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1422: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1423: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1424: PetscFunctionReturn(PETSC_SUCCESS);
1425: }
1427: /*@C
1428: PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1429: to `PetscSFBcastEnd()`
1431: Collective
1433: Input Parameters:
1434: + sf - star forest on which to communicate
1435: . unit - data type associated with each node
1436: . rootmtype - memory type of rootdata
1437: . rootdata - buffer to broadcast
1438: . leafmtype - memory type of leafdata
1439: - op - operation to use for reduction
1441: Output Parameter:
1442: . leafdata - buffer to be reduced with values from each leaf's respective root
1444: Level: intermediate
1446: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1447: @*/
1448: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1449: {
1450: PetscFunctionBegin;
1452: PetscCall(PetscSFSetUp(sf));
1453: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1454: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1455: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1456: PetscFunctionReturn(PETSC_SUCCESS);
1457: }
1459: /*@C
1460: PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
1462: Collective
1464: Input Parameters:
1465: + sf - star forest
1466: . unit - data type
1467: . rootdata - buffer to broadcast
1468: - op - operation to use for reduction
1470: Output Parameter:
1471: . leafdata - buffer to be reduced with values from each leaf's respective root
1473: Level: intermediate
1475: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1476: @*/
1477: PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1478: {
1479: PetscFunctionBegin;
1481: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1482: PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1483: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1484: PetscFunctionReturn(PETSC_SUCCESS);
1485: }
1487: /*@C
1488: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
1490: Collective
1492: Input Parameters:
1493: + sf - star forest
1494: . unit - data type
1495: . leafdata - values to reduce
1496: - op - reduction operation
1498: Output Parameter:
1499: . rootdata - result of reduction of values from all leaves of each root
1501: Level: intermediate
1503: Note:
1504: When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1505: are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1506: use `PetscSFReduceWithMemTypeBegin()` instead.
1508: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
1509: @*/
1510: PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1511: {
1512: PetscMemType rootmtype, leafmtype;
1514: PetscFunctionBegin;
1516: PetscCall(PetscSFSetUp(sf));
1517: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1518: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1519: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1520: PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1521: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1522: PetscFunctionReturn(PETSC_SUCCESS);
1523: }
1525: /*@C
1526: PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1528: Collective
1530: Input Parameters:
1531: + sf - star forest
1532: . unit - data type
1533: . leafmtype - memory type of leafdata
1534: . leafdata - values to reduce
1535: . rootmtype - memory type of rootdata
1536: - op - reduction operation
1538: Output Parameter:
1539: . rootdata - result of reduction of values from all leaves of each root
1541: Level: intermediate
1543: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1544: @*/
1545: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1546: {
1547: PetscFunctionBegin;
1549: PetscCall(PetscSFSetUp(sf));
1550: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1551: PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1552: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1553: PetscFunctionReturn(PETSC_SUCCESS);
1554: }
1556: /*@C
1557: PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
1559: Collective
1561: Input Parameters:
1562: + sf - star forest
1563: . unit - data type
1564: . leafdata - values to reduce
1565: - op - reduction operation
1567: Output Parameter:
1568: . rootdata - result of reduction of values from all leaves of each root
1570: Level: intermediate
1572: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1573: @*/
1574: PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1575: {
1576: PetscFunctionBegin;
1578: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1579: PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1580: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1581: PetscFunctionReturn(PETSC_SUCCESS);
1582: }
1584: /*@C
1585: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1586: to be completed with `PetscSFFetchAndOpEnd()`
1588: Collective
1590: Input Parameters:
1591: + sf - star forest
1592: . unit - data type
1593: . leafdata - leaf values to use in reduction
1594: - op - operation to use for reduction
1596: Output Parameters:
1597: + rootdata - root values to be updated, input state is seen by first process to perform an update
1598: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1600: Level: advanced
1602: Note:
1603: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1604: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1605: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1606: integers.
1608: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1609: @*/
1610: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1611: {
1612: PetscMemType rootmtype, leafmtype, leafupdatemtype;
1614: PetscFunctionBegin;
1616: PetscCall(PetscSFSetUp(sf));
1617: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1618: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1619: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1620: PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1621: PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1622: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1623: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1624: PetscFunctionReturn(PETSC_SUCCESS);
1625: }
1627: /*@C
1628: PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1629: applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1631: Collective
1633: Input Parameters:
1634: + sf - star forest
1635: . unit - data type
1636: . rootmtype - memory type of rootdata
1637: . leafmtype - memory type of leafdata
1638: . leafdata - leaf values to use in reduction
1639: . leafupdatemtype - memory type of leafupdate
1640: - op - operation to use for reduction
1642: Output Parameters:
1643: + rootdata - root values to be updated, input state is seen by first process to perform an update
1644: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1646: Level: advanced
1648: Note:
1649: See `PetscSFFetchAndOpBegin()` for more details.
1651: .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1652: @*/
1653: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1654: {
1655: PetscFunctionBegin;
1657: PetscCall(PetscSFSetUp(sf));
1658: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1659: PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1660: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1661: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1662: PetscFunctionReturn(PETSC_SUCCESS);
1663: }
1665: /*@C
1666: PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1667: to fetch values from roots and update atomically by applying operation using my leaf value
1669: Collective
1671: Input Parameters:
1672: + sf - star forest
1673: . unit - data type
1674: . leafdata - leaf values to use in reduction
1675: - op - operation to use for reduction
1677: Output Parameters:
1678: + rootdata - root values to be updated, input state is seen by first process to perform an update
1679: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1681: Level: advanced
1683: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1684: @*/
1685: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1686: {
1687: PetscFunctionBegin;
1689: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1690: PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1691: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1692: PetscFunctionReturn(PETSC_SUCCESS);
1693: }
1695: /*@C
1696: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1698: Collective
1700: Input Parameter:
1701: . sf - star forest
1703: Output Parameter:
1704: . degree - degree of each root vertex
1706: Level: advanced
1708: Note:
1709: The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1711: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1712: @*/
1713: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1714: {
1715: PetscFunctionBegin;
1717: PetscSFCheckGraphSet(sf, 1);
1719: if (!sf->degreeknown) {
1720: PetscInt i, nroots = sf->nroots, maxlocal;
1721: PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1722: maxlocal = sf->maxleaf - sf->minleaf + 1;
1723: PetscCall(PetscMalloc1(nroots, &sf->degree));
1724: PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1725: for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1726: for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1727: PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1728: }
1729: *degree = NULL;
1730: PetscFunctionReturn(PETSC_SUCCESS);
1731: }
1733: /*@C
1734: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1736: Collective
1738: Input Parameter:
1739: . sf - star forest
1741: Output Parameter:
1742: . degree - degree of each root vertex
1744: Level: developer
1746: Note:
1747: The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1749: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1750: @*/
1751: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1752: {
1753: PetscFunctionBegin;
1755: PetscSFCheckGraphSet(sf, 1);
1757: if (!sf->degreeknown) {
1758: PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1759: PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1760: PetscCall(PetscFree(sf->degreetmp));
1761: sf->degreeknown = PETSC_TRUE;
1762: }
1763: *degree = sf->degree;
1764: PetscFunctionReturn(PETSC_SUCCESS);
1765: }
1767: /*@C
1768: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1769: Each multi-root is assigned index of the corresponding original root.
1771: Collective
1773: Input Parameters:
1774: + sf - star forest
1775: - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1777: Output Parameters:
1778: + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`)
1779: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1781: Level: developer
1783: Note:
1784: The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1786: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1787: @*/
1788: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1789: {
1790: PetscSF msf;
1791: PetscInt i, j, k, nroots, nmroots;
1793: PetscFunctionBegin;
1795: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1799: PetscCall(PetscSFGetMultiSF(sf, &msf));
1800: PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1801: PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1802: for (i = 0, j = 0, k = 0; i < nroots; i++) {
1803: if (!degree[i]) continue;
1804: for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1805: }
1806: PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1807: if (nMultiRoots) *nMultiRoots = nmroots;
1808: PetscFunctionReturn(PETSC_SUCCESS);
1809: }
1811: /*@C
1812: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1814: Collective
1816: Input Parameters:
1817: + sf - star forest
1818: . unit - data type
1819: - leafdata - leaf data to gather to roots
1821: Output Parameter:
1822: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1824: Level: intermediate
1826: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1827: @*/
1828: PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1829: {
1830: PetscSF multi = NULL;
1832: PetscFunctionBegin;
1834: PetscCall(PetscSFSetUp(sf));
1835: PetscCall(PetscSFGetMultiSF(sf, &multi));
1836: PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1837: PetscFunctionReturn(PETSC_SUCCESS);
1838: }
1840: /*@C
1841: PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1843: Collective
1845: Input Parameters:
1846: + sf - star forest
1847: . unit - data type
1848: - leafdata - leaf data to gather to roots
1850: Output Parameter:
1851: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1853: Level: intermediate
1855: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1856: @*/
1857: PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1858: {
1859: PetscSF multi = NULL;
1861: PetscFunctionBegin;
1863: PetscCall(PetscSFGetMultiSF(sf, &multi));
1864: PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1865: PetscFunctionReturn(PETSC_SUCCESS);
1866: }
1868: /*@C
1869: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1871: Collective
1873: Input Parameters:
1874: + sf - star forest
1875: . unit - data type
1876: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1878: Output Parameter:
1879: . leafdata - leaf data to be update with personal data from each respective root
1881: Level: intermediate
1883: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
1884: @*/
1885: PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1886: {
1887: PetscSF multi = NULL;
1889: PetscFunctionBegin;
1891: PetscCall(PetscSFSetUp(sf));
1892: PetscCall(PetscSFGetMultiSF(sf, &multi));
1893: PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1894: PetscFunctionReturn(PETSC_SUCCESS);
1895: }
1897: /*@C
1898: PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1900: Collective
1902: Input Parameters:
1903: + sf - star forest
1904: . unit - data type
1905: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1907: Output Parameter:
1908: . leafdata - leaf data to be update with personal data from each respective root
1910: Level: intermediate
1912: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1913: @*/
1914: PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1915: {
1916: PetscSF multi = NULL;
1918: PetscFunctionBegin;
1920: PetscCall(PetscSFGetMultiSF(sf, &multi));
1921: PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1922: PetscFunctionReturn(PETSC_SUCCESS);
1923: }
1925: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1926: {
1927: PetscInt i, n, nleaves;
1928: const PetscInt *ilocal = NULL;
1929: PetscHSetI seen;
1931: PetscFunctionBegin;
1932: if (PetscDefined(USE_DEBUG)) {
1933: PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
1934: PetscCall(PetscHSetICreate(&seen));
1935: for (i = 0; i < nleaves; i++) {
1936: const PetscInt leaf = ilocal ? ilocal[i] : i;
1937: PetscCall(PetscHSetIAdd(seen, leaf));
1938: }
1939: PetscCall(PetscHSetIGetSize(seen, &n));
1940: PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
1941: PetscCall(PetscHSetIDestroy(&seen));
1942: }
1943: PetscFunctionReturn(PETSC_SUCCESS);
1944: }
1946: /*@
1947: PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
1949: Input Parameters:
1950: + sfA - The first `PetscSF`
1951: - sfB - The second `PetscSF`
1953: Output Parameters:
1954: . sfBA - The composite `PetscSF`
1956: Level: developer
1958: Notes:
1959: Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
1960: forests, i.e. the same leaf is not connected with different roots.
1962: `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
1963: a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
1964: nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
1965: `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
1967: .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1968: @*/
1969: PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
1970: {
1971: const PetscSFNode *remotePointsA, *remotePointsB;
1972: PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
1973: const PetscInt *localPointsA, *localPointsB;
1974: PetscInt *localPointsBA;
1975: PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
1976: PetscBool denseB;
1978: PetscFunctionBegin;
1980: PetscSFCheckGraphSet(sfA, 1);
1982: PetscSFCheckGraphSet(sfB, 2);
1983: PetscCheckSameComm(sfA, 1, sfB, 2);
1985: PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
1986: PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
1988: PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
1989: PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
1990: /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
1991: numRootsB; otherwise, garbage will be broadcasted.
1992: Example (comm size = 1):
1993: sfA: 0 <- (0, 0)
1994: sfB: 100 <- (0, 0)
1995: 101 <- (0, 1)
1996: Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
1997: of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
1998: receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
1999: remotePointsA; if not recasted, point 101 would receive a garbage value. */
2000: PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2001: for (i = 0; i < numRootsB; i++) {
2002: reorderedRemotePointsA[i].rank = -1;
2003: reorderedRemotePointsA[i].index = -1;
2004: }
2005: for (i = 0; i < numLeavesA; i++) {
2006: PetscInt localp = localPointsA ? localPointsA[i] : i;
2008: if (localp >= numRootsB) continue;
2009: reorderedRemotePointsA[localp] = remotePointsA[i];
2010: }
2011: remotePointsA = reorderedRemotePointsA;
2012: PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2013: PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2014: for (i = 0; i < maxleaf - minleaf + 1; i++) {
2015: leafdataB[i].rank = -1;
2016: leafdataB[i].index = -1;
2017: }
2018: PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
2019: PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
2020: PetscCall(PetscFree(reorderedRemotePointsA));
2022: denseB = (PetscBool)!localPointsB;
2023: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2024: if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2025: else numLeavesBA++;
2026: }
2027: if (denseB) {
2028: localPointsBA = NULL;
2029: remotePointsBA = leafdataB;
2030: } else {
2031: PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2032: PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2033: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2034: const PetscInt l = localPointsB ? localPointsB[i] : i;
2036: if (leafdataB[l - minleaf].rank == -1) continue;
2037: remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2038: localPointsBA[numLeavesBA] = l;
2039: numLeavesBA++;
2040: }
2041: PetscCall(PetscFree(leafdataB));
2042: }
2043: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2044: PetscCall(PetscSFSetFromOptions(*sfBA));
2045: PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2046: PetscFunctionReturn(PETSC_SUCCESS);
2047: }
2049: /*@
2050: PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2052: Input Parameters:
2053: + sfA - The first `PetscSF`
2054: - sfB - The second `PetscSF`
2056: Output Parameters:
2057: . sfBA - The composite `PetscSF`.
2059: Level: developer
2061: Notes:
2062: Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2063: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2064: second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
2066: `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2067: a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2068: roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
2069: on `sfA`, then
2070: a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
2072: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2073: @*/
2074: PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2075: {
2076: const PetscSFNode *remotePointsA, *remotePointsB;
2077: PetscSFNode *remotePointsBA;
2078: const PetscInt *localPointsA, *localPointsB;
2079: PetscSFNode *reorderedRemotePointsA = NULL;
2080: PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2081: MPI_Op op;
2082: #if defined(PETSC_USE_64BIT_INDICES)
2083: PetscBool iswin;
2084: #endif
2086: PetscFunctionBegin;
2088: PetscSFCheckGraphSet(sfA, 1);
2090: PetscSFCheckGraphSet(sfB, 2);
2091: PetscCheckSameComm(sfA, 1, sfB, 2);
2093: PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2094: PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2096: PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2097: PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2099: /* TODO: Check roots of sfB have degree of 1 */
2100: /* Once we implement it, we can replace the MPI_MAXLOC
2101: with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2102: We use MPI_MAXLOC only to have a deterministic output from this routine if
2103: the root condition is not meet.
2104: */
2105: op = MPI_MAXLOC;
2106: #if defined(PETSC_USE_64BIT_INDICES)
2107: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2108: PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2109: if (iswin) op = MPI_REPLACE;
2110: #endif
2112: PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2113: PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2114: for (i = 0; i < maxleaf - minleaf + 1; i++) {
2115: reorderedRemotePointsA[i].rank = -1;
2116: reorderedRemotePointsA[i].index = -1;
2117: }
2118: if (localPointsA) {
2119: for (i = 0; i < numLeavesA; i++) {
2120: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2121: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2122: }
2123: } else {
2124: for (i = 0; i < numLeavesA; i++) {
2125: if (i > maxleaf || i < minleaf) continue;
2126: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2127: }
2128: }
2130: PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2131: PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2132: for (i = 0; i < numRootsB; i++) {
2133: remotePointsBA[i].rank = -1;
2134: remotePointsBA[i].index = -1;
2135: }
2137: PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
2138: PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
2139: PetscCall(PetscFree(reorderedRemotePointsA));
2140: for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2141: if (remotePointsBA[i].rank == -1) continue;
2142: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2143: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2144: localPointsBA[numLeavesBA] = i;
2145: numLeavesBA++;
2146: }
2147: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2148: PetscCall(PetscSFSetFromOptions(*sfBA));
2149: PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2150: PetscFunctionReturn(PETSC_SUCCESS);
2151: }
2153: /*
2154: PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2156: Input Parameters:
2157: . sf - The global `PetscSF`
2159: Output Parameters:
2160: . out - The local `PetscSF`
2162: .seealso: `PetscSF`, `PetscSFCreate()`
2163: */
2164: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2165: {
2166: MPI_Comm comm;
2167: PetscMPIInt myrank;
2168: const PetscInt *ilocal;
2169: const PetscSFNode *iremote;
2170: PetscInt i, j, nroots, nleaves, lnleaves, *lilocal;
2171: PetscSFNode *liremote;
2172: PetscSF lsf;
2174: PetscFunctionBegin;
2176: if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2177: else {
2178: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2179: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2180: PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2182: /* Find out local edges and build a local SF */
2183: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2184: for (i = lnleaves = 0; i < nleaves; i++) {
2185: if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
2186: }
2187: PetscCall(PetscMalloc1(lnleaves, &lilocal));
2188: PetscCall(PetscMalloc1(lnleaves, &liremote));
2190: for (i = j = 0; i < nleaves; i++) {
2191: if (iremote[i].rank == (PetscInt)myrank) {
2192: lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2193: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2194: liremote[j].index = iremote[i].index;
2195: j++;
2196: }
2197: }
2198: PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2199: PetscCall(PetscSFSetFromOptions(lsf));
2200: PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2201: PetscCall(PetscSFSetUp(lsf));
2202: *out = lsf;
2203: }
2204: PetscFunctionReturn(PETSC_SUCCESS);
2205: }
2207: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2208: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2209: {
2210: PetscMemType rootmtype, leafmtype;
2212: PetscFunctionBegin;
2214: PetscCall(PetscSFSetUp(sf));
2215: PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2216: PetscCall(PetscGetMemType(rootdata, &rootmtype));
2217: PetscCall(PetscGetMemType(leafdata, &leafmtype));
2218: PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2219: PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2220: PetscFunctionReturn(PETSC_SUCCESS);
2221: }
2223: /*@
2224: PetscSFConcatenate - concatenate multiple `PetscSF` into one
2226: Input Parameters:
2227: + comm - the communicator
2228: . nsfs - the number of input `PetscSF`
2229: . sfs - the array of input `PetscSF`
2230: . rootMode - the root mode specifying how roots are handled
2231: - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2233: Output Parameters:
2234: . newsf - The resulting `PetscSF`
2236: Level: advanced
2238: Notes:
2239: The communicator of all `PetscSF`s in `sfs` must be comm.
2241: Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
2243: The offsets in `leafOffsets` are added to the original leaf indices.
2245: If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2246: In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
2248: If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2249: In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2251: All root modes retain the essential connectivity condition.
2252: If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2253: Parameter `rootMode` controls how the input root spaces are combined.
2254: For `PETSCSF_CONCATENATE_../../../../..MODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2255: and is also the same in the output `PetscSF`.
2256: For `PETSCSF_CONCATENATE_../../../../..MODE_LOCAL` and `PETSCSF_CONCATENATE_../../../../..MODE_GLOBAL`, the input root spaces are taken as separate and joined.
2257: `PETSCSF_CONCATENATE_../../../../..MODE_LOCAL` joins the root spaces locally;
2258: roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2259: `PETSCSF_CONCATENATE_../../../../..MODE_GLOBAL` joins the root spaces globally;
2260: roots of sfs[0], sfs[1], sfs[2, ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2261: the original root ranks are ignored.
2262: For both `PETSCSF_CONCATENATE_../../../../..MODE_LOCAL` and `PETSCSF_CONCATENATE_../../../../..MODE_GLOBAL`,
2263: the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
2264: to keep the load balancing.
2265: However, for `PETSCSF_CONCATENATE_../../../../..MODE_GLOBAL`, roots can move to different ranks.
2267: Example:
2268: We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2269: .vb
2270: make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2271: for m in {local,global,shared}; do
2272: mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2273: done
2274: .ve
2275: we generate two identical `PetscSF`s sf_0 and sf_1,
2276: .vb
2277: PetscSF Object: sf_0 2 MPI processes
2278: type: basic
2279: rank #leaves #roots
2280: [ 0] 4 2
2281: [ 1] 4 2
2282: leaves roots roots in global numbering
2283: ( 0, 0) <- ( 0, 0) = 0
2284: ( 0, 1) <- ( 0, 1) = 1
2285: ( 0, 2) <- ( 1, 0) = 2
2286: ( 0, 3) <- ( 1, 1) = 3
2287: ( 1, 0) <- ( 0, 0) = 0
2288: ( 1, 1) <- ( 0, 1) = 1
2289: ( 1, 2) <- ( 1, 0) = 2
2290: ( 1, 3) <- ( 1, 1) = 3
2291: .ve
2292: and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf:
2293: .vb
2294: rootMode = local:
2295: PetscSF Object: result_sf 2 MPI processes
2296: type: basic
2297: rank #leaves #roots
2298: [ 0] 8 4
2299: [ 1] 8 4
2300: leaves roots roots in global numbering
2301: ( 0, 0) <- ( 0, 0) = 0
2302: ( 0, 1) <- ( 0, 1) = 1
2303: ( 0, 2) <- ( 1, 0) = 4
2304: ( 0, 3) <- ( 1, 1) = 5
2305: ( 0, 4) <- ( 0, 2) = 2
2306: ( 0, 5) <- ( 0, 3) = 3
2307: ( 0, 6) <- ( 1, 2) = 6
2308: ( 0, 7) <- ( 1, 3) = 7
2309: ( 1, 0) <- ( 0, 0) = 0
2310: ( 1, 1) <- ( 0, 1) = 1
2311: ( 1, 2) <- ( 1, 0) = 4
2312: ( 1, 3) <- ( 1, 1) = 5
2313: ( 1, 4) <- ( 0, 2) = 2
2314: ( 1, 5) <- ( 0, 3) = 3
2315: ( 1, 6) <- ( 1, 2) = 6
2316: ( 1, 7) <- ( 1, 3) = 7
2318: rootMode = global:
2319: PetscSF Object: result_sf 2 MPI processes
2320: type: basic
2321: rank #leaves #roots
2322: [ 0] 8 4
2323: [ 1] 8 4
2324: leaves roots roots in global numbering
2325: ( 0, 0) <- ( 0, 0) = 0
2326: ( 0, 1) <- ( 0, 1) = 1
2327: ( 0, 2) <- ( 0, 2) = 2
2328: ( 0, 3) <- ( 0, 3) = 3
2329: ( 0, 4) <- ( 1, 0) = 4
2330: ( 0, 5) <- ( 1, 1) = 5
2331: ( 0, 6) <- ( 1, 2) = 6
2332: ( 0, 7) <- ( 1, 3) = 7
2333: ( 1, 0) <- ( 0, 0) = 0
2334: ( 1, 1) <- ( 0, 1) = 1
2335: ( 1, 2) <- ( 0, 2) = 2
2336: ( 1, 3) <- ( 0, 3) = 3
2337: ( 1, 4) <- ( 1, 0) = 4
2338: ( 1, 5) <- ( 1, 1) = 5
2339: ( 1, 6) <- ( 1, 2) = 6
2340: ( 1, 7) <- ( 1, 3) = 7
2342: rootMode = shared:
2343: PetscSF Object: result_sf 2 MPI processes
2344: type: basic
2345: rank #leaves #roots
2346: [ 0] 8 2
2347: [ 1] 8 2
2348: leaves roots roots in global numbering
2349: ( 0, 0) <- ( 0, 0) = 0
2350: ( 0, 1) <- ( 0, 1) = 1
2351: ( 0, 2) <- ( 1, 0) = 2
2352: ( 0, 3) <- ( 1, 1) = 3
2353: ( 0, 4) <- ( 0, 0) = 0
2354: ( 0, 5) <- ( 0, 1) = 1
2355: ( 0, 6) <- ( 1, 0) = 2
2356: ( 0, 7) <- ( 1, 1) = 3
2357: ( 1, 0) <- ( 0, 0) = 0
2358: ( 1, 1) <- ( 0, 1) = 1
2359: ( 1, 2) <- ( 1, 0) = 2
2360: ( 1, 3) <- ( 1, 1) = 3
2361: ( 1, 4) <- ( 0, 0) = 0
2362: ( 1, 5) <- ( 0, 1) = 1
2363: ( 1, 6) <- ( 1, 0) = 2
2364: ( 1, 7) <- ( 1, 1) = 3
2365: .ve
2367: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2368: @*/
2369: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2370: {
2371: PetscInt i, s, nLeaves, nRoots;
2372: PetscInt *leafArrayOffsets;
2373: PetscInt *ilocal_new;
2374: PetscSFNode *iremote_new;
2375: PetscBool all_ilocal_null = PETSC_FALSE;
2376: PetscLayout glayout = NULL;
2377: PetscInt *gremote = NULL;
2378: PetscMPIInt rank, size;
2380: PetscFunctionBegin;
2381: if (PetscDefined(USE_DEBUG)) {
2382: PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2384: PetscCall(PetscSFCreate(comm, &dummy));
2387: for (i = 0; i < nsfs; i++) {
2389: PetscCheckSameComm(dummy, 1, sfs[i], 3);
2390: }
2394: PetscCall(PetscSFDestroy(&dummy));
2395: }
2396: if (!nsfs) {
2397: PetscCall(PetscSFCreate(comm, newsf));
2398: PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2399: PetscFunctionReturn(PETSC_SUCCESS);
2400: }
2401: PetscCallMPI(MPI_Comm_rank(comm, &rank));
2402: PetscCallMPI(MPI_Comm_size(comm, &size));
2404: /* Calculate leaf array offsets */
2405: PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2406: leafArrayOffsets[0] = 0;
2407: for (s = 0; s < nsfs; s++) {
2408: PetscInt nl;
2410: PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2411: leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2412: }
2413: nLeaves = leafArrayOffsets[nsfs];
2415: /* Calculate number of roots */
2416: switch (rootMode) {
2417: case PETSCSF_CONCATENATE_../../../../..MODE_SHARED: {
2418: PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2419: if (PetscDefined(USE_DEBUG)) {
2420: for (s = 1; s < nsfs; s++) {
2421: PetscInt nr;
2423: PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2424: PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2425: }
2426: }
2427: } break;
2428: case PETSCSF_CONCATENATE_../../../../..MODE_GLOBAL: {
2429: /* Calculate also global layout in this case */
2430: PetscInt *nls;
2431: PetscLayout *lts;
2432: PetscInt **inds;
2433: PetscInt j;
2434: PetscInt rootOffset = 0;
2436: PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds));
2437: PetscCall(PetscLayoutCreate(comm, &glayout));
2438: glayout->bs = 1;
2439: glayout->n = 0;
2440: glayout->N = 0;
2441: for (s = 0; s < nsfs; s++) {
2442: PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s]));
2443: glayout->n += lts[s]->n;
2444: glayout->N += lts[s]->N;
2445: }
2446: PetscCall(PetscLayoutSetUp(glayout));
2447: PetscCall(PetscMalloc1(nLeaves, &gremote));
2448: for (s = 0, j = 0; s < nsfs; s++) {
2449: for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2450: rootOffset += lts[s]->N;
2451: PetscCall(PetscLayoutDestroy(<s[s]));
2452: PetscCall(PetscFree(inds[s]));
2453: }
2454: PetscCall(PetscFree3(lts, nls, inds));
2455: nRoots = glayout->N;
2456: } break;
2457: case PETSCSF_CONCATENATE_../../../../..MODE_LOCAL:
2458: /* nRoots calculated later in this case */
2459: break;
2460: default:
2461: SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2462: }
2464: if (!leafOffsets) {
2465: all_ilocal_null = PETSC_TRUE;
2466: for (s = 0; s < nsfs; s++) {
2467: const PetscInt *ilocal;
2469: PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2470: if (ilocal) {
2471: all_ilocal_null = PETSC_FALSE;
2472: break;
2473: }
2474: }
2475: PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2476: }
2478: /* Renumber and concatenate local leaves */
2479: ilocal_new = NULL;
2480: if (!all_ilocal_null) {
2481: PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2482: for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2483: for (s = 0; s < nsfs; s++) {
2484: const PetscInt *ilocal;
2485: PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2486: PetscInt i, nleaves_l;
2488: PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2489: for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2490: }
2491: }
2493: /* Renumber and concatenate remote roots */
2494: if (rootMode == PETSCSF_CONCATENATE_../../../../..MODE_LOCAL || rootMode == PETSCSF_CONCATENATE_../../../../..MODE_SHARED) {
2495: PetscInt rootOffset = 0;
2497: PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2498: for (i = 0; i < nLeaves; i++) {
2499: iremote_new[i].rank = -1;
2500: iremote_new[i].index = -1;
2501: }
2502: for (s = 0; s < nsfs; s++) {
2503: PetscInt i, nl, nr;
2504: PetscSF tmp_sf;
2505: const PetscSFNode *iremote;
2506: PetscSFNode *tmp_rootdata;
2507: PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2509: PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2510: PetscCall(PetscSFCreate(comm, &tmp_sf));
2511: /* create helper SF with contiguous leaves */
2512: PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2513: PetscCall(PetscSFSetUp(tmp_sf));
2514: PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2515: if (rootMode == PETSCSF_CONCATENATE_../../../../..MODE_LOCAL) {
2516: for (i = 0; i < nr; i++) {
2517: tmp_rootdata[i].index = i + rootOffset;
2518: tmp_rootdata[i].rank = (PetscInt)rank;
2519: }
2520: rootOffset += nr;
2521: } else {
2522: for (i = 0; i < nr; i++) {
2523: tmp_rootdata[i].index = i;
2524: tmp_rootdata[i].rank = (PetscInt)rank;
2525: }
2526: }
2527: PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2528: PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2529: PetscCall(PetscSFDestroy(&tmp_sf));
2530: PetscCall(PetscFree(tmp_rootdata));
2531: }
2532: if (rootMode == PETSCSF_CONCATENATE_../../../../..MODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2534: /* Build the new SF */
2535: PetscCall(PetscSFCreate(comm, newsf));
2536: PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2537: } else {
2538: /* Build the new SF */
2539: PetscCall(PetscSFCreate(comm, newsf));
2540: PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2541: }
2542: PetscCall(PetscSFSetUp(*newsf));
2543: PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2544: PetscCall(PetscLayoutDestroy(&glayout));
2545: PetscCall(PetscFree(gremote));
2546: PetscCall(PetscFree(leafArrayOffsets));
2547: PetscFunctionReturn(PETSC_SUCCESS);
2548: }