Actual source code: sf.c

  1: #include <petsc/private/sfimpl.h>
  2: #include <petsc/private/hashseti.h>
  3: #include <petsc/private/viewerimpl.h>
  4: #include <petsc/private/hashmapi.h>

  6: #if defined(PETSC_HAVE_CUDA)
  7:   #include <cuda_runtime.h>
  8: #endif

 10: #if defined(PETSC_HAVE_HIP)
 11:   #include <hip/hip_runtime.h>
 12: #endif

 14: #if defined(PETSC_CLANG_STATIC_ANALYZER)
 15: void PetscSFCheckGraphSet(PetscSF, int);
 16: #else
 17:   #if defined(PETSC_USE_DEBUG)
 18:     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME);
 19:   #else
 20:     #define PetscSFCheckGraphSet(sf, arg) \
 21:       do { \
 22:       } while (0)
 23:   #endif
 24: #endif

 26: const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
 27: const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_../../../../..MODE_", NULL};

 29: /*@
 30:    PetscSFCreate - create a star forest communication context

 32:    Collective

 34:    Input Parameter:
 35: .  comm - communicator on which the star forest will operate

 37:    Output Parameter:
 38: .  sf - new star forest context

 40:    Options Database Key:
 41: .  -sf_type type - value of type may be
 42: .vb
 43:     basic     -Use MPI persistent Isend/Irecv for communication (Default)
 44:     window    -Use MPI-3 one-sided window for communication
 45:     neighbor  -Use MPI-3 neighborhood collectives for communication
 46: .ve

 48:    Level: intermediate

 50:    Note:
 51:    When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
 52:    `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
 53:    `SF`s are optimized and they have better performance than the general `SF`s.

 55: .seealso: `PetscSF`, `PetscSFSetType`, PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
 56: @*/
 57: PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
 58: {
 59:   PetscSF b;

 61:   PetscFunctionBegin;
 63:   PetscCall(PetscSFInitializePackage());

 65:   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));

 67:   b->nroots    = -1;
 68:   b->nleaves   = -1;
 69:   b->minleaf   = PETSC_MAX_INT;
 70:   b->maxleaf   = PETSC_MIN_INT;
 71:   b->nranks    = -1;
 72:   b->rankorder = PETSC_TRUE;
 73:   b->ingroup   = MPI_GROUP_NULL;
 74:   b->outgroup  = MPI_GROUP_NULL;
 75:   b->graphset  = PETSC_FALSE;
 76: #if defined(PETSC_HAVE_DEVICE)
 77:   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
 78:   b->use_stream_aware_mpi = PETSC_FALSE;
 79:   b->unknown_input_stream = PETSC_FALSE;
 80:   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
 81:   b->backend = PETSCSF_BACKEND_KOKKOS;
 82:   #elif defined(PETSC_HAVE_CUDA)
 83:   b->backend = PETSCSF_BACKEND_CUDA;
 84:   #elif defined(PETSC_HAVE_HIP)
 85:   b->backend = PETSCSF_BACKEND_HIP;
 86:   #endif

 88:   #if defined(PETSC_HAVE_NVSHMEM)
 89:   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
 90:   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
 91:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
 92:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
 93:   #endif
 94: #endif
 95:   b->vscat.from_n = -1;
 96:   b->vscat.to_n   = -1;
 97:   b->vscat.unit   = MPIU_SCALAR;
 98:   *sf             = b;
 99:   PetscFunctionReturn(PETSC_SUCCESS);
100: }

102: /*@
103:    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

105:    Collective

107:    Input Parameter:
108: .  sf - star forest

110:    Level: advanced

112: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
113: @*/
114: PetscErrorCode PetscSFReset(PetscSF sf)
115: {
116:   PetscFunctionBegin;
118:   PetscTryTypeMethod(sf, Reset);
119:   sf->nroots   = -1;
120:   sf->nleaves  = -1;
121:   sf->minleaf  = PETSC_MAX_INT;
122:   sf->maxleaf  = PETSC_MIN_INT;
123:   sf->mine     = NULL;
124:   sf->remote   = NULL;
125:   sf->graphset = PETSC_FALSE;
126:   PetscCall(PetscFree(sf->mine_alloc));
127:   PetscCall(PetscFree(sf->remote_alloc));
128:   sf->nranks = -1;
129:   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
130:   sf->degreeknown = PETSC_FALSE;
131:   PetscCall(PetscFree(sf->degree));
132:   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
133:   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
134:   if (sf->multi) sf->multi->multi = NULL;
135:   PetscCall(PetscSFDestroy(&sf->multi));
136:   PetscCall(PetscLayoutDestroy(&sf->map));

138: #if defined(PETSC_HAVE_DEVICE)
139:   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
140: #endif

142:   sf->setupcalled = PETSC_FALSE;
143:   PetscFunctionReturn(PETSC_SUCCESS);
144: }

146: /*@C
147:    PetscSFSetType - Set the `PetscSF` communication implementation

149:    Collective

151:    Input Parameters:
152: +  sf - the `PetscSF` context
153: -  type - a known method
154: .vb
155:     PETSCSFWINDOW - MPI-2/3 one-sided
156:     PETSCSFBASIC - basic implementation using MPI-1 two-sided
157: .ve

159:    Options Database Key:
160: .  -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods

162:   Level: intermediate

164:    Notes:
165:    See `PetscSFType` for possible values

167: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
168: @*/
169: PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
170: {
171:   PetscBool match;
172:   PetscErrorCode (*r)(PetscSF);

174:   PetscFunctionBegin;

178:   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
179:   if (match) PetscFunctionReturn(PETSC_SUCCESS);

181:   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
182:   PetscCheck(r, PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
183:   /* Destroy the previous PetscSF implementation context */
184:   PetscTryTypeMethod(sf, Destroy);
185:   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
186:   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
187:   PetscCall((*r)(sf));
188:   PetscFunctionReturn(PETSC_SUCCESS);
189: }

191: /*@C
192:   PetscSFGetType - Get the `PetscSF` communication implementation

194:   Not Collective

196:   Input Parameter:
197: . sf  - the `PetscSF` context

199:   Output Parameter:
200: . type - the `PetscSF` type name

202:   Level: intermediate

204: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
205: @*/
206: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
207: {
208:   PetscFunctionBegin;
211:   *type = ((PetscObject)sf)->type_name;
212:   PetscFunctionReturn(PETSC_SUCCESS);
213: }

215: /*@C
216:    PetscSFDestroy - destroy a star forest

218:    Collective

220:    Input Parameter:
221: .  sf - address of star forest

223:    Level: intermediate

225: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
226: @*/
227: PetscErrorCode PetscSFDestroy(PetscSF *sf)
228: {
229:   PetscFunctionBegin;
230:   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
232:   if (--((PetscObject)(*sf))->refct > 0) {
233:     *sf = NULL;
234:     PetscFunctionReturn(PETSC_SUCCESS);
235:   }
236:   PetscCall(PetscSFReset(*sf));
237:   PetscTryTypeMethod((*sf), Destroy);
238:   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
239:   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
240:   PetscCall(PetscHeaderDestroy(sf));
241:   PetscFunctionReturn(PETSC_SUCCESS);
242: }

244: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
245: {
246:   PetscInt           i, nleaves;
247:   PetscMPIInt        size;
248:   const PetscInt    *ilocal;
249:   const PetscSFNode *iremote;

251:   PetscFunctionBegin;
252:   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
253:   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
254:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
255:   for (i = 0; i < nleaves; i++) {
256:     const PetscInt rank   = iremote[i].rank;
257:     const PetscInt remote = iremote[i].index;
258:     const PetscInt leaf   = ilocal ? ilocal[i] : i;
259:     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
260:     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
261:     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
262:   }
263:   PetscFunctionReturn(PETSC_SUCCESS);
264: }

266: /*@
267:    PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication

269:    Collective

271:    Input Parameter:
272: .  sf - star forest communication object

274:    Level: beginner

276: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
277: @*/
278: PetscErrorCode PetscSFSetUp(PetscSF sf)
279: {
280:   PetscFunctionBegin;
282:   PetscSFCheckGraphSet(sf, 1);
283:   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
284:   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
285:   PetscCall(PetscSFCheckGraphValid_Private(sf));
286:   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
287:   PetscTryTypeMethod(sf, SetUp);
288: #if defined(PETSC_HAVE_CUDA)
289:   if (sf->backend == PETSCSF_BACKEND_CUDA) {
290:     sf->ops->Malloc = PetscSFMalloc_CUDA;
291:     sf->ops->Free   = PetscSFFree_CUDA;
292:   }
293: #endif
294: #if defined(PETSC_HAVE_HIP)
295:   if (sf->backend == PETSCSF_BACKEND_HIP) {
296:     sf->ops->Malloc = PetscSFMalloc_HIP;
297:     sf->ops->Free   = PetscSFFree_HIP;
298:   }
299: #endif

301: #
302: #if defined(PETSC_HAVE_KOKKOS)
303:   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
304:     sf->ops->Malloc = PetscSFMalloc_Kokkos;
305:     sf->ops->Free   = PetscSFFree_Kokkos;
306:   }
307: #endif
308:   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
309:   sf->setupcalled = PETSC_TRUE;
310:   PetscFunctionReturn(PETSC_SUCCESS);
311: }

313: /*@
314:    PetscSFSetFromOptions - set `PetscSF` options using the options database

316:    Logically Collective

318:    Input Parameter:
319: .  sf - star forest

321:    Options Database Keys:
322: +  -sf_type               - implementation type, see `PetscSFSetType()`
323: .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
324: .  -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
325:                             use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
326:                             If true, this option only works with `-use_gpu_aware_mpi 1`.
327: .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
328:                                If true, this option only works with `-use_gpu_aware_mpi 1`.

330: -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda, hip and Kokkos.
331:                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
332:                               the only available is kokkos.

334:    Level: intermediate

336: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
337: @*/
338: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
339: {
340:   PetscSFType deft;
341:   char        type[256];
342:   PetscBool   flg;

344:   PetscFunctionBegin;
346:   PetscObjectOptionsBegin((PetscObject)sf);
347:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
348:   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
349:   PetscCall(PetscSFSetType(sf, flg ? type : deft));
350:   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
351: #if defined(PETSC_HAVE_DEVICE)
352:   {
353:     char      backendstr[32] = {0};
354:     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
355:     /* Change the defaults set in PetscSFCreate() with command line options */
356:     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
357:     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
358:     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
359:     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
360:     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
361:     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
362:   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
363:     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
364:     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
365:     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
366:     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
367:   #elif defined(PETSC_HAVE_KOKKOS)
368:     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
369:   #endif
370:   }
371: #endif
372:   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
373:   PetscOptionsEnd();
374:   PetscFunctionReturn(PETSC_SUCCESS);
375: }

377: /*@
378:    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

380:    Logically Collective

382:    Input Parameters:
383: +  sf - star forest
384: -  flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)

386:    Level: advanced

388: .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
389: @*/
390: PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
391: {
392:   PetscFunctionBegin;
395:   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
396:   sf->rankorder = flg;
397:   PetscFunctionReturn(PETSC_SUCCESS);
398: }

400: /*@C
401:    PetscSFSetGraph - Set a parallel star forest

403:    Collective

405:    Input Parameters:
406: +  sf - star forest
407: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
408: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
409: .  ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
410: during setup in debug mode)
411: .  localmode - copy mode for `ilocal`
412: .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
413: during setup in debug mode)
414: -  remotemode - copy mode for `iremote`

416:    Level: intermediate

418:    Notes:
419:    Leaf indices in `ilocal` must be unique, otherwise an error occurs.

421:    Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
422:    In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
423:    PETSc might modify the respective array;
424:    if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
425:    Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).

427:    Fortran Note:
428:    In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.

430:    Developer Note:
431:    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
432:    This also allows to compare leaf sets of two `PetscSF`s easily.

434: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
435: @*/
436: PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
437: {
438:   PetscBool unique, contiguous;

440:   PetscFunctionBegin;
444:   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
445:   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
446:   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
447:    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
448:   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
449:   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);

451:   if (sf->nroots >= 0) { /* Reset only if graph already set */
452:     PetscCall(PetscSFReset(sf));
453:   }

455:   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));

457:   sf->nroots  = nroots;
458:   sf->nleaves = nleaves;

460:   if (localmode == PETSC_COPY_VALUES && ilocal) {
461:     PetscInt *tlocal = NULL;

463:     PetscCall(PetscMalloc1(nleaves, &tlocal));
464:     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
465:     ilocal = tlocal;
466:   }
467:   if (remotemode == PETSC_COPY_VALUES) {
468:     PetscSFNode *tremote = NULL;

470:     PetscCall(PetscMalloc1(nleaves, &tremote));
471:     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
472:     iremote = tremote;
473:   }

475:   if (nleaves && ilocal) {
476:     PetscSFNode work;

478:     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
479:     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
480:     unique = PetscNot(unique);
481:     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
482:     sf->minleaf = ilocal[0];
483:     sf->maxleaf = ilocal[nleaves - 1];
484:     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
485:   } else {
486:     sf->minleaf = 0;
487:     sf->maxleaf = nleaves - 1;
488:     unique      = PETSC_TRUE;
489:     contiguous  = PETSC_TRUE;
490:   }

492:   if (contiguous) {
493:     if (localmode == PETSC_USE_POINTER) {
494:       ilocal = NULL;
495:     } else {
496:       PetscCall(PetscFree(ilocal));
497:     }
498:   }
499:   sf->mine = ilocal;
500:   if (localmode == PETSC_USE_POINTER) {
501:     sf->mine_alloc = NULL;
502:   } else {
503:     sf->mine_alloc = ilocal;
504:   }
505:   sf->remote = iremote;
506:   if (remotemode == PETSC_USE_POINTER) {
507:     sf->remote_alloc = NULL;
508:   } else {
509:     sf->remote_alloc = iremote;
510:   }
511:   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
512:   sf->graphset = PETSC_TRUE;
513:   PetscFunctionReturn(PETSC_SUCCESS);
514: }

516: /*@
517:   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern

519:   Collective

521:   Input Parameters:
522: + sf      - The `PetscSF`
523: . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
524: - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`

526:   Level: intermediate

528:   Notes:
529:   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
530:   `n` and `N` are the local and global sizes of `x` respectively.

532:   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
533:   sequential vectors `y` on all MPI processes.

535:   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
536:   sequential vector `y` on rank 0.

538:   In above cases, entries of `x` are roots and entries of `y` are leaves.

540:   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
541:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
542:   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
543:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
544:   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.

546:   In this case, roots and leaves are symmetric.

548: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
549:  @*/
550: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
551: {
552:   MPI_Comm    comm;
553:   PetscInt    n, N, res[2];
554:   PetscMPIInt rank, size;
555:   PetscSFType type;

557:   PetscFunctionBegin;
560:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
561:   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
562:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
563:   PetscCallMPI(MPI_Comm_size(comm, &size));

565:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
566:     type = PETSCSFALLTOALL;
567:     PetscCall(PetscLayoutCreate(comm, &sf->map));
568:     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
569:     PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size));
570:     PetscCall(PetscLayoutSetUp(sf->map));
571:   } else {
572:     PetscCall(PetscLayoutGetLocalSize(map, &n));
573:     PetscCall(PetscLayoutGetSize(map, &N));
574:     res[0] = n;
575:     res[1] = -n;
576:     /* Check if n are same over all ranks so that we can optimize it */
577:     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
578:     if (res[0] == -res[1]) { /* same n */
579:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
580:     } else {
581:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
582:     }
583:     PetscCall(PetscLayoutReference(map, &sf->map));
584:   }
585:   PetscCall(PetscSFSetType(sf, type));

587:   sf->pattern = pattern;
588:   sf->mine    = NULL; /* Contiguous */

590:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
591:      Also set other easy stuff.
592:    */
593:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
594:     sf->nleaves = N;
595:     sf->nroots  = n;
596:     sf->nranks  = size;
597:     sf->minleaf = 0;
598:     sf->maxleaf = N - 1;
599:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
600:     sf->nleaves = rank ? 0 : N;
601:     sf->nroots  = n;
602:     sf->nranks  = rank ? 0 : size;
603:     sf->minleaf = 0;
604:     sf->maxleaf = rank ? -1 : N - 1;
605:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
606:     sf->nleaves = size;
607:     sf->nroots  = size;
608:     sf->nranks  = size;
609:     sf->minleaf = 0;
610:     sf->maxleaf = size - 1;
611:   }
612:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
613:   sf->graphset = PETSC_TRUE;
614:   PetscFunctionReturn(PETSC_SUCCESS);
615: }

617: /*@
618:    PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map

620:    Collective

622:    Input Parameter:
623: .  sf - star forest to invert

625:    Output Parameter:
626: .  isf - inverse of `sf`

628:    Level: advanced

630:    Notes:
631:    All roots must have degree 1.

633:    The local space may be a permutation, but cannot be sparse.

635: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
636: @*/
637: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
638: {
639:   PetscMPIInt     rank;
640:   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
641:   const PetscInt *ilocal;
642:   PetscSFNode    *roots, *leaves;

644:   PetscFunctionBegin;
646:   PetscSFCheckGraphSet(sf, 1);

649:   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
650:   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */

652:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
653:   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
654:   for (i = 0; i < maxlocal; i++) {
655:     leaves[i].rank  = rank;
656:     leaves[i].index = i;
657:   }
658:   for (i = 0; i < nroots; i++) {
659:     roots[i].rank  = -1;
660:     roots[i].index = -1;
661:   }
662:   PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
663:   PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));

665:   /* Check whether our leaves are sparse */
666:   for (i = 0, count = 0; i < nroots; i++)
667:     if (roots[i].rank >= 0) count++;
668:   if (count == nroots) newilocal = NULL;
669:   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
670:     for (i = 0, count = 0; i < nroots; i++) {
671:       if (roots[i].rank >= 0) {
672:         newilocal[count]   = i;
673:         roots[count].rank  = roots[i].rank;
674:         roots[count].index = roots[i].index;
675:         count++;
676:       }
677:     }
678:   }

680:   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
681:   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
682:   PetscCall(PetscFree2(roots, leaves));
683:   PetscFunctionReturn(PETSC_SUCCESS);
684: }

686: /*@
687:    PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph

689:    Collective

691:    Input Parameters:
692: +  sf - communication object to duplicate
693: -  opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)

695:    Output Parameter:
696: .  newsf - new communication object

698:    Level: beginner

700: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
701: @*/
702: PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
703: {
704:   PetscSFType  type;
705:   MPI_Datatype dtype = MPIU_SCALAR;

707:   PetscFunctionBegin;
711:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
712:   PetscCall(PetscSFGetType(sf, &type));
713:   if (type) PetscCall(PetscSFSetType(*newsf, type));
714:   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
715:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
716:     PetscSFCheckGraphSet(sf, 1);
717:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
718:       PetscInt           nroots, nleaves;
719:       const PetscInt    *ilocal;
720:       const PetscSFNode *iremote;
721:       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
722:       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
723:     } else {
724:       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
725:     }
726:   }
727:   /* Since oldtype is committed, so is newtype, according to MPI */
728:   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
729:   (*newsf)->vscat.bs     = sf->vscat.bs;
730:   (*newsf)->vscat.unit   = dtype;
731:   (*newsf)->vscat.to_n   = sf->vscat.to_n;
732:   (*newsf)->vscat.from_n = sf->vscat.from_n;
733:   /* Do not copy lsf. Build it on demand since it is rarely used */

735: #if defined(PETSC_HAVE_DEVICE)
736:   (*newsf)->backend              = sf->backend;
737:   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
738:   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
739:   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
740: #endif
741:   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
742:   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
743:   PetscFunctionReturn(PETSC_SUCCESS);
744: }

746: /*@C
747:    PetscSFGetGraph - Get the graph specifying a parallel star forest

749:    Not Collective

751:    Input Parameter:
752: .  sf - star forest

754:    Output Parameters:
755: +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
756: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
757: .  ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
758: -  iremote - remote locations of root vertices for each leaf on the current process

760:    Level: intermediate

762:    Notes:
763:      We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet

765:      The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`

767:    Fortran Notes:
768:      The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you
769:      want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array.

771:      To check for a `NULL` `ilocal` use
772: $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then

774: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
775: @*/
776: PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
777: {
778:   PetscFunctionBegin;
780:   if (sf->ops->GetGraph) {
781:     PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote));
782:   } else {
783:     if (nroots) *nroots = sf->nroots;
784:     if (nleaves) *nleaves = sf->nleaves;
785:     if (ilocal) *ilocal = sf->mine;
786:     if (iremote) *iremote = sf->remote;
787:   }
788:   PetscFunctionReturn(PETSC_SUCCESS);
789: }

791: /*@
792:    PetscSFGetLeafRange - Get the active leaf ranges

794:    Not Collective

796:    Input Parameter:
797: .  sf - star forest

799:    Output Parameters:
800: +  minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
801: -  maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.

803:    Level: developer

805: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
806: @*/
807: PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
808: {
809:   PetscFunctionBegin;
811:   PetscSFCheckGraphSet(sf, 1);
812:   if (minleaf) *minleaf = sf->minleaf;
813:   if (maxleaf) *maxleaf = sf->maxleaf;
814:   PetscFunctionReturn(PETSC_SUCCESS);
815: }

817: /*@C
818:    PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database

820:    Collective on A

822:    Input Parameters:
823: +  A - the star forest
824: .  obj - Optional object that provides the prefix for the option names
825: -  name - command line option

827:    Level: intermediate

829:   Note:
830:   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`

832: .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
833: @*/
834: PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
835: {
836:   PetscFunctionBegin;
838:   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
839:   PetscFunctionReturn(PETSC_SUCCESS);
840: }

842: /*@C
843:    PetscSFView - view a star forest

845:    Collective

847:    Input Parameters:
848: +  sf - star forest
849: -  viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`

851:    Level: beginner

853: .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
854: @*/
855: PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
856: {
857:   PetscBool         iascii;
858:   PetscViewerFormat format;

860:   PetscFunctionBegin;
862:   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
864:   PetscCheckSameComm(sf, 1, viewer, 2);
865:   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
866:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
867:   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
868:     PetscMPIInt rank;
869:     PetscInt    ii, i, j;

871:     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
872:     PetscCall(PetscViewerASCIIPushTab(viewer));
873:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
874:       if (!sf->graphset) {
875:         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
876:         PetscCall(PetscViewerASCIIPopTab(viewer));
877:         PetscFunctionReturn(PETSC_SUCCESS);
878:       }
879:       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
880:       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
881:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks));
882:       for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
883:       PetscCall(PetscViewerFlush(viewer));
884:       PetscCall(PetscViewerGetFormat(viewer, &format));
885:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
886:         PetscMPIInt *tmpranks, *perm;
887:         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
888:         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
889:         for (i = 0; i < sf->nranks; i++) perm[i] = i;
890:         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
891:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
892:         for (ii = 0; ii < sf->nranks; ii++) {
893:           i = perm[ii];
894:           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
895:           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
896:         }
897:         PetscCall(PetscFree2(tmpranks, perm));
898:       }
899:       PetscCall(PetscViewerFlush(viewer));
900:       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
901:     }
902:     PetscCall(PetscViewerASCIIPopTab(viewer));
903:   }
904:   PetscTryTypeMethod(sf, View, viewer);
905:   PetscFunctionReturn(PETSC_SUCCESS);
906: }

908: /*@C
909:    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

911:    Not Collective

913:    Input Parameter:
914: .  sf - star forest

916:    Output Parameters:
917: +  nranks - number of ranks referenced by local part
918: .  ranks - [`nranks`] array of ranks
919: .  roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
920: .  rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank
921: -  rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank

923:    Level: developer

925: .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
926: @*/
927: PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
928: {
929:   PetscFunctionBegin;
931:   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
932:   if (sf->ops->GetRootRanks) {
933:     PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote));
934:   } else {
935:     /* The generic implementation */
936:     if (nranks) *nranks = sf->nranks;
937:     if (ranks) *ranks = sf->ranks;
938:     if (roffset) *roffset = sf->roffset;
939:     if (rmine) *rmine = sf->rmine;
940:     if (rremote) *rremote = sf->rremote;
941:   }
942:   PetscFunctionReturn(PETSC_SUCCESS);
943: }

945: /*@C
946:    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

948:    Not Collective

950:    Input Parameter:
951: .  sf - star forest

953:    Output Parameters:
954: +  niranks - number of leaf ranks referencing roots on this process
955: .  iranks - [`niranks`] array of ranks
956: .  ioffset - [`niranks`+1] offset in `irootloc` for each rank
957: -  irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank

959:    Level: developer

961: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
962: @*/
963: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
964: {
965:   PetscFunctionBegin;
967:   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
968:   if (sf->ops->GetLeafRanks) {
969:     PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc));
970:   } else {
971:     PetscSFType type;
972:     PetscCall(PetscSFGetType(sf, &type));
973:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
974:   }
975:   PetscFunctionReturn(PETSC_SUCCESS);
976: }

978: static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
979: {
980:   PetscInt i;
981:   for (i = 0; i < n; i++) {
982:     if (needle == list[i]) return PETSC_TRUE;
983:   }
984:   return PETSC_FALSE;
985: }

987: /*@C
988:    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.

990:    Collective

992:    Input Parameters:
993: +  sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
994: -  dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)

996:    Level: developer

998: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
999: @*/
1000: PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1001: {
1002:   PetscHMapI    table;
1003:   PetscHashIter pos;
1004:   PetscMPIInt   size, groupsize, *groupranks;
1005:   PetscInt     *rcount, *ranks;
1006:   PetscInt      i, irank = -1, orank = -1;

1008:   PetscFunctionBegin;
1010:   PetscSFCheckGraphSet(sf, 1);
1011:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1012:   PetscCall(PetscHMapICreateWithSize(10, &table));
1013:   for (i = 0; i < sf->nleaves; i++) {
1014:     /* Log 1-based rank */
1015:     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1016:   }
1017:   PetscCall(PetscHMapIGetSize(table, &sf->nranks));
1018:   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1019:   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1020:   PetscHashIterBegin(table, pos);
1021:   for (i = 0; i < sf->nranks; i++) {
1022:     PetscHashIterGetKey(table, pos, ranks[i]);
1023:     PetscHashIterGetVal(table, pos, rcount[i]);
1024:     PetscHashIterNext(table, pos);
1025:     ranks[i]--; /* Convert back to 0-based */
1026:   }
1027:   PetscCall(PetscHMapIDestroy(&table));

1029:   /* We expect that dgroup is reliably "small" while nranks could be large */
1030:   {
1031:     MPI_Group    group = MPI_GROUP_NULL;
1032:     PetscMPIInt *dgroupranks;
1033:     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1034:     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1035:     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1036:     PetscCall(PetscMalloc1(groupsize, &groupranks));
1037:     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1038:     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1039:     PetscCallMPI(MPI_Group_free(&group));
1040:     PetscCall(PetscFree(dgroupranks));
1041:   }

1043:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1044:   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1045:     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1046:       if (InList(ranks[i], groupsize, groupranks)) break;
1047:     }
1048:     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1049:       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1050:     }
1051:     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1052:       PetscInt tmprank, tmpcount;

1054:       tmprank             = ranks[i];
1055:       tmpcount            = rcount[i];
1056:       ranks[i]            = ranks[sf->ndranks];
1057:       rcount[i]           = rcount[sf->ndranks];
1058:       ranks[sf->ndranks]  = tmprank;
1059:       rcount[sf->ndranks] = tmpcount;
1060:       sf->ndranks++;
1061:     }
1062:   }
1063:   PetscCall(PetscFree(groupranks));
1064:   PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount));
1065:   PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1066:   sf->roffset[0] = 0;
1067:   for (i = 0; i < sf->nranks; i++) {
1068:     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1069:     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1070:     rcount[i]          = 0;
1071:   }
1072:   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1073:     /* short circuit */
1074:     if (orank != sf->remote[i].rank) {
1075:       /* Search for index of iremote[i].rank in sf->ranks */
1076:       PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank));
1077:       if (irank < 0) {
1078:         PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1079:         if (irank >= 0) irank += sf->ndranks;
1080:       }
1081:       orank = sf->remote[i].rank;
1082:     }
1083:     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank);
1084:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1085:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1086:     rcount[irank]++;
1087:   }
1088:   PetscCall(PetscFree2(rcount, ranks));
1089:   PetscFunctionReturn(PETSC_SUCCESS);
1090: }

1092: /*@C
1093:    PetscSFGetGroups - gets incoming and outgoing process groups

1095:    Collective

1097:    Input Parameter:
1098: .  sf - star forest

1100:    Output Parameters:
1101: +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1102: -  outgoing - group of destination processes for outgoing edges (roots that I reference)

1104:    Level: developer

1106: .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1107: @*/
1108: PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1109: {
1110:   MPI_Group group = MPI_GROUP_NULL;

1112:   PetscFunctionBegin;
1113:   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1114:   if (sf->ingroup == MPI_GROUP_NULL) {
1115:     PetscInt        i;
1116:     const PetscInt *indegree;
1117:     PetscMPIInt     rank, *outranks, *inranks;
1118:     PetscSFNode    *remote;
1119:     PetscSF         bgcount;

1121:     /* Compute the number of incoming ranks */
1122:     PetscCall(PetscMalloc1(sf->nranks, &remote));
1123:     for (i = 0; i < sf->nranks; i++) {
1124:       remote[i].rank  = sf->ranks[i];
1125:       remote[i].index = 0;
1126:     }
1127:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1128:     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1129:     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1130:     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1131:     /* Enumerate the incoming ranks */
1132:     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1133:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1134:     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1135:     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1136:     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1137:     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1138:     PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup));
1139:     PetscCallMPI(MPI_Group_free(&group));
1140:     PetscCall(PetscFree2(inranks, outranks));
1141:     PetscCall(PetscSFDestroy(&bgcount));
1142:   }
1143:   *incoming = sf->ingroup;

1145:   if (sf->outgroup == MPI_GROUP_NULL) {
1146:     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1147:     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1148:     PetscCallMPI(MPI_Group_free(&group));
1149:   }
1150:   *outgoing = sf->outgroup;
1151:   PetscFunctionReturn(PETSC_SUCCESS);
1152: }

1154: /*@
1155:    PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters

1157:    Collective

1159:    Input Parameter:
1160: .  sf - star forest that may contain roots with 0 or with more than 1 vertex

1162:    Output Parameter:
1163: .  multi - star forest with split roots, such that each root has degree exactly 1

1165:    Level: developer

1167:    Note:
1168:    In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1169:    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1170:    edge, it is a candidate for future optimization that might involve its removal.

1172: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1173: @*/
1174: PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1175: {
1176:   PetscFunctionBegin;
1179:   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1180:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1181:     *multi           = sf->multi;
1182:     sf->multi->multi = sf->multi;
1183:     PetscFunctionReturn(PETSC_SUCCESS);
1184:   }
1185:   if (!sf->multi) {
1186:     const PetscInt *indegree;
1187:     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1188:     PetscSFNode    *remote;
1189:     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1190:     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1191:     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1192:     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1193:     inoffset[0] = 0;
1194:     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1195:     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1196:     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1197:     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1198:     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1199:     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1200:       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1201:     }
1202:     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1203:     for (i = 0; i < sf->nleaves; i++) {
1204:       remote[i].rank  = sf->remote[i].rank;
1205:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1206:     }
1207:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1208:     sf->multi->multi = sf->multi;
1209:     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1210:     if (sf->rankorder) { /* Sort the ranks */
1211:       PetscMPIInt  rank;
1212:       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1213:       PetscSFNode *newremote;
1214:       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1215:       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1216:       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1217:       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1218:       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1219:       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1220:       /* Sort the incoming ranks at each vertex, build the inverse map */
1221:       for (i = 0; i < sf->nroots; i++) {
1222:         PetscInt j;
1223:         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1224:         PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset));
1225:         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1226:       }
1227:       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1228:       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1229:       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1230:       for (i = 0; i < sf->nleaves; i++) {
1231:         newremote[i].rank  = sf->remote[i].rank;
1232:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1233:       }
1234:       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1235:       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1236:     }
1237:     PetscCall(PetscFree3(inoffset, outones, outoffset));
1238:   }
1239:   *multi = sf->multi;
1240:   PetscFunctionReturn(PETSC_SUCCESS);
1241: }

1243: /*@C
1244:    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices

1246:    Collective

1248:    Input Parameters:
1249: +  sf - original star forest
1250: .  nselected  - number of selected roots on this process
1251: -  selected   - indices of the selected roots on this process

1253:    Output Parameter:
1254: .  esf - new star forest

1256:    Level: advanced

1258:    Note:
1259:    To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1260:    be done by calling PetscSFGetGraph().

1262: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1263: @*/
1264: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1265: {
1266:   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1267:   const PetscInt    *ilocal;
1268:   signed char       *rootdata, *leafdata, *leafmem;
1269:   const PetscSFNode *iremote;
1270:   PetscSFNode       *new_iremote;
1271:   MPI_Comm           comm;

1273:   PetscFunctionBegin;
1275:   PetscSFCheckGraphSet(sf, 1);

1279:   PetscCall(PetscSFSetUp(sf));
1280:   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1281:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1282:   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));

1284:   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or  out of range indices */
1285:     PetscBool dups;
1286:     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1287:     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1288:     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1289:   }

1291:   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1292:   else {
1293:     /* A generic version of creating embedded sf */
1294:     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1295:     maxlocal = maxleaf - minleaf + 1;
1296:     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1297:     leafdata = leafmem - minleaf;
1298:     /* Tag selected roots and bcast to leaves */
1299:     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1300:     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1301:     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));

1303:     /* Build esf with leaves that are still connected */
1304:     esf_nleaves = 0;
1305:     for (i = 0; i < nleaves; i++) {
1306:       j = ilocal ? ilocal[i] : i;
1307:       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1308:          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1309:       */
1310:       esf_nleaves += (leafdata[j] ? 1 : 0);
1311:     }
1312:     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1313:     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1314:     for (i = n = 0; i < nleaves; i++) {
1315:       j = ilocal ? ilocal[i] : i;
1316:       if (leafdata[j]) {
1317:         new_ilocal[n]        = j;
1318:         new_iremote[n].rank  = iremote[i].rank;
1319:         new_iremote[n].index = iremote[i].index;
1320:         ++n;
1321:       }
1322:     }
1323:     PetscCall(PetscSFCreate(comm, esf));
1324:     PetscCall(PetscSFSetFromOptions(*esf));
1325:     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1326:     PetscCall(PetscFree2(rootdata, leafmem));
1327:   }
1328:   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1329:   PetscFunctionReturn(PETSC_SUCCESS);
1330: }

1332: /*@C
1333:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices

1335:   Collective

1337:   Input Parameters:
1338: + sf - original star forest
1339: . nselected  - number of selected leaves on this process
1340: - selected   - indices of the selected leaves on this process

1342:   Output Parameter:
1343: .  newsf - new star forest

1345:   Level: advanced

1347: .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1348: @*/
1349: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1350: {
1351:   const PetscSFNode *iremote;
1352:   PetscSFNode       *new_iremote;
1353:   const PetscInt    *ilocal;
1354:   PetscInt           i, nroots, *leaves, *new_ilocal;
1355:   MPI_Comm           comm;

1357:   PetscFunctionBegin;
1359:   PetscSFCheckGraphSet(sf, 1);

1363:   /* Uniq selected[] and put results in leaves[] */
1364:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1365:   PetscCall(PetscMalloc1(nselected, &leaves));
1366:   PetscCall(PetscArraycpy(leaves, selected, nselected));
1367:   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1368:   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);

1370:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1371:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1372:   else {
1373:     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1374:     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1375:     PetscCall(PetscMalloc1(nselected, &new_iremote));
1376:     for (i = 0; i < nselected; ++i) {
1377:       const PetscInt l     = leaves[i];
1378:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1379:       new_iremote[i].rank  = iremote[l].rank;
1380:       new_iremote[i].index = iremote[l].index;
1381:     }
1382:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1383:     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1384:   }
1385:   PetscCall(PetscFree(leaves));
1386:   PetscFunctionReturn(PETSC_SUCCESS);
1387: }

1389: /*@C
1390:    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`

1392:    Collective

1394:    Input Parameters:
1395: +  sf - star forest on which to communicate
1396: .  unit - data type associated with each node
1397: .  rootdata - buffer to broadcast
1398: -  op - operation to use for reduction

1400:    Output Parameter:
1401: .  leafdata - buffer to be reduced with values from each leaf's respective root

1403:    Level: intermediate

1405:    Note:
1406:     When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1407:     are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1408:     use `PetscSFBcastWithMemTypeBegin()` instead.

1410: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1411: @*/
1412: PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1413: {
1414:   PetscMemType rootmtype, leafmtype;

1416:   PetscFunctionBegin;
1418:   PetscCall(PetscSFSetUp(sf));
1419:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1420:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1421:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1422:   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1423:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1424:   PetscFunctionReturn(PETSC_SUCCESS);
1425: }

1427: /*@C
1428:    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1429:    to `PetscSFBcastEnd()`

1431:    Collective

1433:    Input Parameters:
1434: +  sf - star forest on which to communicate
1435: .  unit - data type associated with each node
1436: .  rootmtype - memory type of rootdata
1437: .  rootdata - buffer to broadcast
1438: .  leafmtype - memory type of leafdata
1439: -  op - operation to use for reduction

1441:    Output Parameter:
1442: .  leafdata - buffer to be reduced with values from each leaf's respective root

1444:    Level: intermediate

1446: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1447: @*/
1448: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1449: {
1450:   PetscFunctionBegin;
1452:   PetscCall(PetscSFSetUp(sf));
1453:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1454:   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1455:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1456:   PetscFunctionReturn(PETSC_SUCCESS);
1457: }

1459: /*@C
1460:    PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`

1462:    Collective

1464:    Input Parameters:
1465: +  sf - star forest
1466: .  unit - data type
1467: .  rootdata - buffer to broadcast
1468: -  op - operation to use for reduction

1470:    Output Parameter:
1471: .  leafdata - buffer to be reduced with values from each leaf's respective root

1473:    Level: intermediate

1475: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1476: @*/
1477: PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1478: {
1479:   PetscFunctionBegin;
1481:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1482:   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1483:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1484:   PetscFunctionReturn(PETSC_SUCCESS);
1485: }

1487: /*@C
1488:    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`

1490:    Collective

1492:    Input Parameters:
1493: +  sf - star forest
1494: .  unit - data type
1495: .  leafdata - values to reduce
1496: -  op - reduction operation

1498:    Output Parameter:
1499: .  rootdata - result of reduction of values from all leaves of each root

1501:    Level: intermediate

1503:    Note:
1504:     When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1505:     are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1506:     use `PetscSFReduceWithMemTypeBegin()` instead.

1508: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
1509: @*/
1510: PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1511: {
1512:   PetscMemType rootmtype, leafmtype;

1514:   PetscFunctionBegin;
1516:   PetscCall(PetscSFSetUp(sf));
1517:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1518:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1519:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1520:   PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1521:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1522:   PetscFunctionReturn(PETSC_SUCCESS);
1523: }

1525: /*@C
1526:    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`

1528:    Collective

1530:    Input Parameters:
1531: +  sf - star forest
1532: .  unit - data type
1533: .  leafmtype - memory type of leafdata
1534: .  leafdata - values to reduce
1535: .  rootmtype - memory type of rootdata
1536: -  op - reduction operation

1538:    Output Parameter:
1539: .  rootdata - result of reduction of values from all leaves of each root

1541:    Level: intermediate

1543: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1544: @*/
1545: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1546: {
1547:   PetscFunctionBegin;
1549:   PetscCall(PetscSFSetUp(sf));
1550:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1551:   PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1552:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1553:   PetscFunctionReturn(PETSC_SUCCESS);
1554: }

1556: /*@C
1557:    PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`

1559:    Collective

1561:    Input Parameters:
1562: +  sf - star forest
1563: .  unit - data type
1564: .  leafdata - values to reduce
1565: -  op - reduction operation

1567:    Output Parameter:
1568: .  rootdata - result of reduction of values from all leaves of each root

1570:    Level: intermediate

1572: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1573: @*/
1574: PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1575: {
1576:   PetscFunctionBegin;
1578:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1579:   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1580:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1581:   PetscFunctionReturn(PETSC_SUCCESS);
1582: }

1584: /*@C
1585:    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1586:    to be completed with `PetscSFFetchAndOpEnd()`

1588:    Collective

1590:    Input Parameters:
1591: +  sf - star forest
1592: .  unit - data type
1593: .  leafdata - leaf values to use in reduction
1594: -  op - operation to use for reduction

1596:    Output Parameters:
1597: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1598: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1600:    Level: advanced

1602:    Note:
1603:    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1604:    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1605:    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1606:    integers.

1608: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1609: @*/
1610: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1611: {
1612:   PetscMemType rootmtype, leafmtype, leafupdatemtype;

1614:   PetscFunctionBegin;
1616:   PetscCall(PetscSFSetUp(sf));
1617:   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1618:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1619:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1620:   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1621:   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1622:   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1623:   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1624:   PetscFunctionReturn(PETSC_SUCCESS);
1625: }

1627: /*@C
1628:    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1629:    applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`

1631:    Collective

1633:    Input Parameters:
1634: +  sf - star forest
1635: .  unit - data type
1636: .  rootmtype - memory type of rootdata
1637: .  leafmtype - memory type of leafdata
1638: .  leafdata - leaf values to use in reduction
1639: .  leafupdatemtype - memory type of leafupdate
1640: -  op - operation to use for reduction

1642:    Output Parameters:
1643: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1644: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1646:    Level: advanced

1648:    Note:
1649:    See `PetscSFFetchAndOpBegin()` for more details.

1651: .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1652: @*/
1653: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1654: {
1655:   PetscFunctionBegin;
1657:   PetscCall(PetscSFSetUp(sf));
1658:   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1659:   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1660:   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1661:   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1662:   PetscFunctionReturn(PETSC_SUCCESS);
1663: }

1665: /*@C
1666:    PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1667:    to fetch values from roots and update atomically by applying operation using my leaf value

1669:    Collective

1671:    Input Parameters:
1672: +  sf - star forest
1673: .  unit - data type
1674: .  leafdata - leaf values to use in reduction
1675: -  op - operation to use for reduction

1677:    Output Parameters:
1678: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1679: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1681:    Level: advanced

1683: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1684: @*/
1685: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1686: {
1687:   PetscFunctionBegin;
1689:   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1690:   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1691:   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1692:   PetscFunctionReturn(PETSC_SUCCESS);
1693: }

1695: /*@C
1696:    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`

1698:    Collective

1700:    Input Parameter:
1701: .  sf - star forest

1703:    Output Parameter:
1704: .  degree - degree of each root vertex

1706:    Level: advanced

1708:    Note:
1709:    The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.

1711: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1712: @*/
1713: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1714: {
1715:   PetscFunctionBegin;
1717:   PetscSFCheckGraphSet(sf, 1);
1719:   if (!sf->degreeknown) {
1720:     PetscInt i, nroots = sf->nroots, maxlocal;
1721:     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1722:     maxlocal = sf->maxleaf - sf->minleaf + 1;
1723:     PetscCall(PetscMalloc1(nroots, &sf->degree));
1724:     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1725:     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1726:     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1727:     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1728:   }
1729:   *degree = NULL;
1730:   PetscFunctionReturn(PETSC_SUCCESS);
1731: }

1733: /*@C
1734:    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`

1736:    Collective

1738:    Input Parameter:
1739: .  sf - star forest

1741:    Output Parameter:
1742: .  degree - degree of each root vertex

1744:    Level: developer

1746:    Note:
1747:    The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.

1749: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1750: @*/
1751: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1752: {
1753:   PetscFunctionBegin;
1755:   PetscSFCheckGraphSet(sf, 1);
1757:   if (!sf->degreeknown) {
1758:     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1759:     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1760:     PetscCall(PetscFree(sf->degreetmp));
1761:     sf->degreeknown = PETSC_TRUE;
1762:   }
1763:   *degree = sf->degree;
1764:   PetscFunctionReturn(PETSC_SUCCESS);
1765: }

1767: /*@C
1768:    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1769:    Each multi-root is assigned index of the corresponding original root.

1771:    Collective

1773:    Input Parameters:
1774: +  sf - star forest
1775: -  degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`

1777:    Output Parameters:
1778: +  nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`)
1779: -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`

1781:    Level: developer

1783:    Note:
1784:    The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.

1786: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1787: @*/
1788: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1789: {
1790:   PetscSF  msf;
1791:   PetscInt i, j, k, nroots, nmroots;

1793:   PetscFunctionBegin;
1795:   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1799:   PetscCall(PetscSFGetMultiSF(sf, &msf));
1800:   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1801:   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1802:   for (i = 0, j = 0, k = 0; i < nroots; i++) {
1803:     if (!degree[i]) continue;
1804:     for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1805:   }
1806:   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1807:   if (nMultiRoots) *nMultiRoots = nmroots;
1808:   PetscFunctionReturn(PETSC_SUCCESS);
1809: }

1811: /*@C
1812:    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`

1814:    Collective

1816:    Input Parameters:
1817: +  sf - star forest
1818: .  unit - data type
1819: -  leafdata - leaf data to gather to roots

1821:    Output Parameter:
1822: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1824:    Level: intermediate

1826: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1827: @*/
1828: PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1829: {
1830:   PetscSF multi = NULL;

1832:   PetscFunctionBegin;
1834:   PetscCall(PetscSFSetUp(sf));
1835:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1836:   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1837:   PetscFunctionReturn(PETSC_SUCCESS);
1838: }

1840: /*@C
1841:    PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`

1843:    Collective

1845:    Input Parameters:
1846: +  sf - star forest
1847: .  unit - data type
1848: -  leafdata - leaf data to gather to roots

1850:    Output Parameter:
1851: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1853:    Level: intermediate

1855: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1856: @*/
1857: PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1858: {
1859:   PetscSF multi = NULL;

1861:   PetscFunctionBegin;
1863:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1864:   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1865:   PetscFunctionReturn(PETSC_SUCCESS);
1866: }

1868: /*@C
1869:    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`

1871:    Collective

1873:    Input Parameters:
1874: +  sf - star forest
1875: .  unit - data type
1876: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1878:    Output Parameter:
1879: .  leafdata - leaf data to be update with personal data from each respective root

1881:    Level: intermediate

1883: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
1884: @*/
1885: PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1886: {
1887:   PetscSF multi = NULL;

1889:   PetscFunctionBegin;
1891:   PetscCall(PetscSFSetUp(sf));
1892:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1893:   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1894:   PetscFunctionReturn(PETSC_SUCCESS);
1895: }

1897: /*@C
1898:    PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`

1900:    Collective

1902:    Input Parameters:
1903: +  sf - star forest
1904: .  unit - data type
1905: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1907:    Output Parameter:
1908: .  leafdata - leaf data to be update with personal data from each respective root

1910:    Level: intermediate

1912: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1913: @*/
1914: PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1915: {
1916:   PetscSF multi = NULL;

1918:   PetscFunctionBegin;
1920:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1921:   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1922:   PetscFunctionReturn(PETSC_SUCCESS);
1923: }

1925: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1926: {
1927:   PetscInt        i, n, nleaves;
1928:   const PetscInt *ilocal = NULL;
1929:   PetscHSetI      seen;

1931:   PetscFunctionBegin;
1932:   if (PetscDefined(USE_DEBUG)) {
1933:     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
1934:     PetscCall(PetscHSetICreate(&seen));
1935:     for (i = 0; i < nleaves; i++) {
1936:       const PetscInt leaf = ilocal ? ilocal[i] : i;
1937:       PetscCall(PetscHSetIAdd(seen, leaf));
1938:     }
1939:     PetscCall(PetscHSetIGetSize(seen, &n));
1940:     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
1941:     PetscCall(PetscHSetIDestroy(&seen));
1942:   }
1943:   PetscFunctionReturn(PETSC_SUCCESS);
1944: }

1946: /*@
1947:   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view

1949:   Input Parameters:
1950: + sfA - The first `PetscSF`
1951: - sfB - The second `PetscSF`

1953:   Output Parameters:
1954: . sfBA - The composite `PetscSF`

1956:   Level: developer

1958:   Notes:
1959:   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
1960:   forests, i.e. the same leaf is not connected with different roots.

1962:   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
1963:   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
1964:   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
1965:   `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.

1967: .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1968: @*/
1969: PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
1970: {
1971:   const PetscSFNode *remotePointsA, *remotePointsB;
1972:   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
1973:   const PetscInt    *localPointsA, *localPointsB;
1974:   PetscInt          *localPointsBA;
1975:   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
1976:   PetscBool          denseB;

1978:   PetscFunctionBegin;
1980:   PetscSFCheckGraphSet(sfA, 1);
1982:   PetscSFCheckGraphSet(sfB, 2);
1983:   PetscCheckSameComm(sfA, 1, sfB, 2);
1985:   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
1986:   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));

1988:   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
1989:   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
1990:   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
1991:      numRootsB; otherwise, garbage will be broadcasted.
1992:      Example (comm size = 1):
1993:      sfA: 0 <- (0, 0)
1994:      sfB: 100 <- (0, 0)
1995:           101 <- (0, 1)
1996:      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
1997:      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
1998:      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
1999:      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
2000:   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2001:   for (i = 0; i < numRootsB; i++) {
2002:     reorderedRemotePointsA[i].rank  = -1;
2003:     reorderedRemotePointsA[i].index = -1;
2004:   }
2005:   for (i = 0; i < numLeavesA; i++) {
2006:     PetscInt localp = localPointsA ? localPointsA[i] : i;

2008:     if (localp >= numRootsB) continue;
2009:     reorderedRemotePointsA[localp] = remotePointsA[i];
2010:   }
2011:   remotePointsA = reorderedRemotePointsA;
2012:   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2013:   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2014:   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2015:     leafdataB[i].rank  = -1;
2016:     leafdataB[i].index = -1;
2017:   }
2018:   PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
2019:   PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
2020:   PetscCall(PetscFree(reorderedRemotePointsA));

2022:   denseB = (PetscBool)!localPointsB;
2023:   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2024:     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2025:     else numLeavesBA++;
2026:   }
2027:   if (denseB) {
2028:     localPointsBA  = NULL;
2029:     remotePointsBA = leafdataB;
2030:   } else {
2031:     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2032:     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2033:     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2034:       const PetscInt l = localPointsB ? localPointsB[i] : i;

2036:       if (leafdataB[l - minleaf].rank == -1) continue;
2037:       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2038:       localPointsBA[numLeavesBA]  = l;
2039:       numLeavesBA++;
2040:     }
2041:     PetscCall(PetscFree(leafdataB));
2042:   }
2043:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2044:   PetscCall(PetscSFSetFromOptions(*sfBA));
2045:   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2046:   PetscFunctionReturn(PETSC_SUCCESS);
2047: }

2049: /*@
2050:   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one

2052:   Input Parameters:
2053: + sfA - The first `PetscSF`
2054: - sfB - The second `PetscSF`

2056:   Output Parameters:
2057: . sfBA - The composite `PetscSF`.

2059:   Level: developer

2061:   Notes:
2062:   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2063:   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2064:   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.

2066:   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2067:   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2068:   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
2069:   on `sfA`, then
2070:   a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.

2072: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2073: @*/
2074: PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2075: {
2076:   const PetscSFNode *remotePointsA, *remotePointsB;
2077:   PetscSFNode       *remotePointsBA;
2078:   const PetscInt    *localPointsA, *localPointsB;
2079:   PetscSFNode       *reorderedRemotePointsA = NULL;
2080:   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2081:   MPI_Op             op;
2082: #if defined(PETSC_USE_64BIT_INDICES)
2083:   PetscBool iswin;
2084: #endif

2086:   PetscFunctionBegin;
2088:   PetscSFCheckGraphSet(sfA, 1);
2090:   PetscSFCheckGraphSet(sfB, 2);
2091:   PetscCheckSameComm(sfA, 1, sfB, 2);
2093:   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2094:   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));

2096:   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2097:   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));

2099:   /* TODO: Check roots of sfB have degree of 1 */
2100:   /* Once we implement it, we can replace the MPI_MAXLOC
2101:      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2102:      We use MPI_MAXLOC only to have a deterministic output from this routine if
2103:      the root condition is not meet.
2104:    */
2105:   op = MPI_MAXLOC;
2106: #if defined(PETSC_USE_64BIT_INDICES)
2107:   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2108:   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2109:   if (iswin) op = MPI_REPLACE;
2110: #endif

2112:   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2113:   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2114:   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2115:     reorderedRemotePointsA[i].rank  = -1;
2116:     reorderedRemotePointsA[i].index = -1;
2117:   }
2118:   if (localPointsA) {
2119:     for (i = 0; i < numLeavesA; i++) {
2120:       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2121:       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2122:     }
2123:   } else {
2124:     for (i = 0; i < numLeavesA; i++) {
2125:       if (i > maxleaf || i < minleaf) continue;
2126:       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2127:     }
2128:   }

2130:   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2131:   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2132:   for (i = 0; i < numRootsB; i++) {
2133:     remotePointsBA[i].rank  = -1;
2134:     remotePointsBA[i].index = -1;
2135:   }

2137:   PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
2138:   PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
2139:   PetscCall(PetscFree(reorderedRemotePointsA));
2140:   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2141:     if (remotePointsBA[i].rank == -1) continue;
2142:     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2143:     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2144:     localPointsBA[numLeavesBA]        = i;
2145:     numLeavesBA++;
2146:   }
2147:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2148:   PetscCall(PetscSFSetFromOptions(*sfBA));
2149:   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2150:   PetscFunctionReturn(PETSC_SUCCESS);
2151: }

2153: /*
2154:   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`

2156:   Input Parameters:
2157: . sf - The global `PetscSF`

2159:   Output Parameters:
2160: . out - The local `PetscSF`

2162: .seealso: `PetscSF`, `PetscSFCreate()`
2163:  */
2164: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2165: {
2166:   MPI_Comm           comm;
2167:   PetscMPIInt        myrank;
2168:   const PetscInt    *ilocal;
2169:   const PetscSFNode *iremote;
2170:   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2171:   PetscSFNode       *liremote;
2172:   PetscSF            lsf;

2174:   PetscFunctionBegin;
2176:   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2177:   else {
2178:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2179:     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2180:     PetscCallMPI(MPI_Comm_rank(comm, &myrank));

2182:     /* Find out local edges and build a local SF */
2183:     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2184:     for (i = lnleaves = 0; i < nleaves; i++) {
2185:       if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
2186:     }
2187:     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2188:     PetscCall(PetscMalloc1(lnleaves, &liremote));

2190:     for (i = j = 0; i < nleaves; i++) {
2191:       if (iremote[i].rank == (PetscInt)myrank) {
2192:         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2193:         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2194:         liremote[j].index = iremote[i].index;
2195:         j++;
2196:       }
2197:     }
2198:     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2199:     PetscCall(PetscSFSetFromOptions(lsf));
2200:     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2201:     PetscCall(PetscSFSetUp(lsf));
2202:     *out = lsf;
2203:   }
2204:   PetscFunctionReturn(PETSC_SUCCESS);
2205: }

2207: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2208: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2209: {
2210:   PetscMemType rootmtype, leafmtype;

2212:   PetscFunctionBegin;
2214:   PetscCall(PetscSFSetUp(sf));
2215:   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2216:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2217:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2218:   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2219:   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2220:   PetscFunctionReturn(PETSC_SUCCESS);
2221: }

2223: /*@
2224:   PetscSFConcatenate - concatenate multiple `PetscSF` into one

2226:   Input Parameters:
2227: + comm - the communicator
2228: . nsfs - the number of input `PetscSF`
2229: . sfs  - the array of input `PetscSF`
2230: . rootMode - the root mode specifying how roots are handled
2231: - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage

2233:   Output Parameters:
2234: . newsf - The resulting `PetscSF`

2236:   Level: advanced

2238:   Notes:
2239:   The communicator of all `PetscSF`s in `sfs` must be comm.

2241:   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.

2243:   The offsets in `leafOffsets` are added to the original leaf indices.

2245:   If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2246:   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.

2248:   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2249:   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).

2251:   All root modes retain the essential connectivity condition.
2252:   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2253:   Parameter `rootMode` controls how the input root spaces are combined.
2254:   For `PETSCSF_CONCATENATE_../../../../..MODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2255:   and is also the same in the output `PetscSF`.
2256:   For `PETSCSF_CONCATENATE_../../../../..MODE_LOCAL` and `PETSCSF_CONCATENATE_../../../../..MODE_GLOBAL`, the input root spaces are taken as separate and joined.
2257:   `PETSCSF_CONCATENATE_../../../../..MODE_LOCAL` joins the root spaces locally;
2258:   roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2259:   `PETSCSF_CONCATENATE_../../../../..MODE_GLOBAL` joins the root spaces globally;
2260:   roots of sfs[0], sfs[1], sfs[2, ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2261:   the original root ranks are ignored.
2262:   For both `PETSCSF_CONCATENATE_../../../../..MODE_LOCAL` and `PETSCSF_CONCATENATE_../../../../..MODE_GLOBAL`,
2263:   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
2264:   to keep the load balancing.
2265:   However, for `PETSCSF_CONCATENATE_../../../../..MODE_GLOBAL`, roots can move to different ranks.

2267:    Example:
2268:    We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2269: .vb
2270:   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2271:   for m in {local,global,shared}; do
2272:     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2273:   done
2274: .ve
2275:    we generate two identical `PetscSF`s sf_0 and sf_1,
2276: .vb
2277:   PetscSF Object: sf_0 2 MPI processes
2278:     type: basic
2279:     rank #leaves #roots
2280:     [ 0]       4      2
2281:     [ 1]       4      2
2282:     leaves      roots       roots in global numbering
2283:     ( 0,  0) <- ( 0,  0)  =   0
2284:     ( 0,  1) <- ( 0,  1)  =   1
2285:     ( 0,  2) <- ( 1,  0)  =   2
2286:     ( 0,  3) <- ( 1,  1)  =   3
2287:     ( 1,  0) <- ( 0,  0)  =   0
2288:     ( 1,  1) <- ( 0,  1)  =   1
2289:     ( 1,  2) <- ( 1,  0)  =   2
2290:     ( 1,  3) <- ( 1,  1)  =   3
2291: .ve
2292:    and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf:
2293: .vb
2294:   rootMode = local:
2295:   PetscSF Object: result_sf 2 MPI processes
2296:     type: basic
2297:     rank #leaves #roots
2298:     [ 0]       8      4
2299:     [ 1]       8      4
2300:     leaves      roots       roots in global numbering
2301:     ( 0,  0) <- ( 0,  0)  =   0
2302:     ( 0,  1) <- ( 0,  1)  =   1
2303:     ( 0,  2) <- ( 1,  0)  =   4
2304:     ( 0,  3) <- ( 1,  1)  =   5
2305:     ( 0,  4) <- ( 0,  2)  =   2
2306:     ( 0,  5) <- ( 0,  3)  =   3
2307:     ( 0,  6) <- ( 1,  2)  =   6
2308:     ( 0,  7) <- ( 1,  3)  =   7
2309:     ( 1,  0) <- ( 0,  0)  =   0
2310:     ( 1,  1) <- ( 0,  1)  =   1
2311:     ( 1,  2) <- ( 1,  0)  =   4
2312:     ( 1,  3) <- ( 1,  1)  =   5
2313:     ( 1,  4) <- ( 0,  2)  =   2
2314:     ( 1,  5) <- ( 0,  3)  =   3
2315:     ( 1,  6) <- ( 1,  2)  =   6
2316:     ( 1,  7) <- ( 1,  3)  =   7

2318:   rootMode = global:
2319:   PetscSF Object: result_sf 2 MPI processes
2320:     type: basic
2321:     rank #leaves #roots
2322:     [ 0]       8      4
2323:     [ 1]       8      4
2324:     leaves      roots       roots in global numbering
2325:     ( 0,  0) <- ( 0,  0)  =   0
2326:     ( 0,  1) <- ( 0,  1)  =   1
2327:     ( 0,  2) <- ( 0,  2)  =   2
2328:     ( 0,  3) <- ( 0,  3)  =   3
2329:     ( 0,  4) <- ( 1,  0)  =   4
2330:     ( 0,  5) <- ( 1,  1)  =   5
2331:     ( 0,  6) <- ( 1,  2)  =   6
2332:     ( 0,  7) <- ( 1,  3)  =   7
2333:     ( 1,  0) <- ( 0,  0)  =   0
2334:     ( 1,  1) <- ( 0,  1)  =   1
2335:     ( 1,  2) <- ( 0,  2)  =   2
2336:     ( 1,  3) <- ( 0,  3)  =   3
2337:     ( 1,  4) <- ( 1,  0)  =   4
2338:     ( 1,  5) <- ( 1,  1)  =   5
2339:     ( 1,  6) <- ( 1,  2)  =   6
2340:     ( 1,  7) <- ( 1,  3)  =   7

2342:   rootMode = shared:
2343:   PetscSF Object: result_sf 2 MPI processes
2344:     type: basic
2345:     rank #leaves #roots
2346:     [ 0]       8      2
2347:     [ 1]       8      2
2348:     leaves      roots       roots in global numbering
2349:     ( 0,  0) <- ( 0,  0)  =   0
2350:     ( 0,  1) <- ( 0,  1)  =   1
2351:     ( 0,  2) <- ( 1,  0)  =   2
2352:     ( 0,  3) <- ( 1,  1)  =   3
2353:     ( 0,  4) <- ( 0,  0)  =   0
2354:     ( 0,  5) <- ( 0,  1)  =   1
2355:     ( 0,  6) <- ( 1,  0)  =   2
2356:     ( 0,  7) <- ( 1,  1)  =   3
2357:     ( 1,  0) <- ( 0,  0)  =   0
2358:     ( 1,  1) <- ( 0,  1)  =   1
2359:     ( 1,  2) <- ( 1,  0)  =   2
2360:     ( 1,  3) <- ( 1,  1)  =   3
2361:     ( 1,  4) <- ( 0,  0)  =   0
2362:     ( 1,  5) <- ( 0,  1)  =   1
2363:     ( 1,  6) <- ( 1,  0)  =   2
2364:     ( 1,  7) <- ( 1,  1)  =   3
2365: .ve

2367: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2368: @*/
2369: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2370: {
2371:   PetscInt     i, s, nLeaves, nRoots;
2372:   PetscInt    *leafArrayOffsets;
2373:   PetscInt    *ilocal_new;
2374:   PetscSFNode *iremote_new;
2375:   PetscBool    all_ilocal_null = PETSC_FALSE;
2376:   PetscLayout  glayout         = NULL;
2377:   PetscInt    *gremote         = NULL;
2378:   PetscMPIInt  rank, size;

2380:   PetscFunctionBegin;
2381:   if (PetscDefined(USE_DEBUG)) {
2382:     PetscSF dummy; /* just to have a PetscObject on comm for input validation */

2384:     PetscCall(PetscSFCreate(comm, &dummy));
2387:     for (i = 0; i < nsfs; i++) {
2389:       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2390:     }
2394:     PetscCall(PetscSFDestroy(&dummy));
2395:   }
2396:   if (!nsfs) {
2397:     PetscCall(PetscSFCreate(comm, newsf));
2398:     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2399:     PetscFunctionReturn(PETSC_SUCCESS);
2400:   }
2401:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2402:   PetscCallMPI(MPI_Comm_size(comm, &size));

2404:   /* Calculate leaf array offsets */
2405:   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2406:   leafArrayOffsets[0] = 0;
2407:   for (s = 0; s < nsfs; s++) {
2408:     PetscInt nl;

2410:     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2411:     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2412:   }
2413:   nLeaves = leafArrayOffsets[nsfs];

2415:   /* Calculate number of roots */
2416:   switch (rootMode) {
2417:   case PETSCSF_CONCATENATE_../../../../..MODE_SHARED: {
2418:     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2419:     if (PetscDefined(USE_DEBUG)) {
2420:       for (s = 1; s < nsfs; s++) {
2421:         PetscInt nr;

2423:         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2424:         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2425:       }
2426:     }
2427:   } break;
2428:   case PETSCSF_CONCATENATE_../../../../..MODE_GLOBAL: {
2429:     /* Calculate also global layout in this case */
2430:     PetscInt    *nls;
2431:     PetscLayout *lts;
2432:     PetscInt   **inds;
2433:     PetscInt     j;
2434:     PetscInt     rootOffset = 0;

2436:     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
2437:     PetscCall(PetscLayoutCreate(comm, &glayout));
2438:     glayout->bs = 1;
2439:     glayout->n  = 0;
2440:     glayout->N  = 0;
2441:     for (s = 0; s < nsfs; s++) {
2442:       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
2443:       glayout->n += lts[s]->n;
2444:       glayout->N += lts[s]->N;
2445:     }
2446:     PetscCall(PetscLayoutSetUp(glayout));
2447:     PetscCall(PetscMalloc1(nLeaves, &gremote));
2448:     for (s = 0, j = 0; s < nsfs; s++) {
2449:       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2450:       rootOffset += lts[s]->N;
2451:       PetscCall(PetscLayoutDestroy(&lts[s]));
2452:       PetscCall(PetscFree(inds[s]));
2453:     }
2454:     PetscCall(PetscFree3(lts, nls, inds));
2455:     nRoots = glayout->N;
2456:   } break;
2457:   case PETSCSF_CONCATENATE_../../../../..MODE_LOCAL:
2458:     /* nRoots calculated later in this case */
2459:     break;
2460:   default:
2461:     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2462:   }

2464:   if (!leafOffsets) {
2465:     all_ilocal_null = PETSC_TRUE;
2466:     for (s = 0; s < nsfs; s++) {
2467:       const PetscInt *ilocal;

2469:       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2470:       if (ilocal) {
2471:         all_ilocal_null = PETSC_FALSE;
2472:         break;
2473:       }
2474:     }
2475:     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2476:   }

2478:   /* Renumber and concatenate local leaves */
2479:   ilocal_new = NULL;
2480:   if (!all_ilocal_null) {
2481:     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2482:     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2483:     for (s = 0; s < nsfs; s++) {
2484:       const PetscInt *ilocal;
2485:       PetscInt       *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2486:       PetscInt        i, nleaves_l;

2488:       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2489:       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2490:     }
2491:   }

2493:   /* Renumber and concatenate remote roots */
2494:   if (rootMode == PETSCSF_CONCATENATE_../../../../..MODE_LOCAL || rootMode == PETSCSF_CONCATENATE_../../../../..MODE_SHARED) {
2495:     PetscInt rootOffset = 0;

2497:     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2498:     for (i = 0; i < nLeaves; i++) {
2499:       iremote_new[i].rank  = -1;
2500:       iremote_new[i].index = -1;
2501:     }
2502:     for (s = 0; s < nsfs; s++) {
2503:       PetscInt           i, nl, nr;
2504:       PetscSF            tmp_sf;
2505:       const PetscSFNode *iremote;
2506:       PetscSFNode       *tmp_rootdata;
2507:       PetscSFNode       *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];

2509:       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2510:       PetscCall(PetscSFCreate(comm, &tmp_sf));
2511:       /* create helper SF with contiguous leaves */
2512:       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2513:       PetscCall(PetscSFSetUp(tmp_sf));
2514:       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2515:       if (rootMode == PETSCSF_CONCATENATE_../../../../..MODE_LOCAL) {
2516:         for (i = 0; i < nr; i++) {
2517:           tmp_rootdata[i].index = i + rootOffset;
2518:           tmp_rootdata[i].rank  = (PetscInt)rank;
2519:         }
2520:         rootOffset += nr;
2521:       } else {
2522:         for (i = 0; i < nr; i++) {
2523:           tmp_rootdata[i].index = i;
2524:           tmp_rootdata[i].rank  = (PetscInt)rank;
2525:         }
2526:       }
2527:       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2528:       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2529:       PetscCall(PetscSFDestroy(&tmp_sf));
2530:       PetscCall(PetscFree(tmp_rootdata));
2531:     }
2532:     if (rootMode == PETSCSF_CONCATENATE_../../../../..MODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above

2534:     /* Build the new SF */
2535:     PetscCall(PetscSFCreate(comm, newsf));
2536:     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2537:   } else {
2538:     /* Build the new SF */
2539:     PetscCall(PetscSFCreate(comm, newsf));
2540:     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2541:   }
2542:   PetscCall(PetscSFSetUp(*newsf));
2543:   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2544:   PetscCall(PetscLayoutDestroy(&glayout));
2545:   PetscCall(PetscFree(gremote));
2546:   PetscCall(PetscFree(leafArrayOffsets));
2547:   PetscFunctionReturn(PETSC_SUCCESS);
2548: }