Actual source code: sfwindow.c
1: #include <petsc/private/sfimpl.h>
3: typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4: typedef struct _n_PetscSFWinLink *PetscSFWinLink;
6: typedef struct {
7: PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */
8: PetscSFDataLink link; /* List of MPI data types, lazily constructed for each data type */
9: PetscSFWinLink wins; /* List of active windows */
10: PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11: PetscSF dynsf;
12: MPI_Info info;
13: } PetscSF_Window;
15: struct _n_PetscSFDataLink {
16: MPI_Datatype unit;
17: MPI_Datatype *mine;
18: MPI_Datatype *remote;
19: PetscSFDataLink next;
20: };
22: struct _n_PetscSFWinLink {
23: PetscBool inuse;
24: size_t bytes;
25: void *addr;
26: void *paddr;
27: MPI_Win win;
28: MPI_Request *reqs;
29: PetscSFWindowFlavorType flavor;
30: MPI_Aint *dyn_target_addr;
31: PetscBool epoch;
32: PetscSFWinLink next;
33: };
35: const char *const PetscSFWindowSyncTypes[] = {"FENCE", "LOCK", "ACTIVE", "PetscSFWindowSyncType", "PETSCSF_WINDOW_SYNC_", NULL};
36: const char *const PetscSFWindowFlavorTypes[] = {"CREATE", "DYNAMIC", "ALLOCATE", "SHARED", "PetscSFWindowFlavorType", "PETSCSF_WINDOW_FLAVOR_", NULL};
38: /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
39: static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
40: {
41: if (*op == MPIU_SUM) *op = MPI_SUM;
42: else if (*op == MPIU_MAX) *op = MPI_MAX;
43: else if (*op == MPIU_MIN) *op = MPI_MIN;
44: return 0;
45: }
47: /*@C
48: PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
50: Not Collective
52: Input Parameters:
53: + sf - star forest
54: - unit - data type for each node
56: Output Parameters:
57: + localtypes - types describing part of local leaf buffer referencing each remote rank
58: - remotetypes - types describing part of remote root buffer referenced for each remote rank
60: Level: developer
62: .seealso: `PetscSFSetGraph()`, `PetscSFView()`
63: @*/
64: static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf, MPI_Datatype unit, const MPI_Datatype **localtypes, const MPI_Datatype **remotetypes)
65: {
66: PetscSF_Window *w = (PetscSF_Window *)sf->data;
67: PetscSFDataLink link;
68: PetscInt i, nranks;
69: const PetscInt *roffset, *rmine, *rremote;
70: const PetscMPIInt *ranks;
72: /* Look for types in cache */
73: for (link = w->link; link; link = link->next) {
74: PetscBool match;
75: MPIPetsc_Type_compare(unit, link->unit, &match);
76: if (match) {
77: *localtypes = link->mine;
78: *remotetypes = link->remote;
79: return 0;
80: }
81: }
83: /* Create new composite types for each send rank */
84: PetscSFGetRootRanks(sf, &nranks, &ranks, &roffset, &rmine, &rremote);
85: PetscNew(&link);
86: MPI_Type_dup(unit, &link->unit);
87: PetscMalloc2(nranks, &link->mine, nranks, &link->remote);
88: for (i = 0; i < nranks; i++) {
89: PetscInt rcount = roffset[i + 1] - roffset[i];
90: PetscMPIInt *rmine, *rremote;
91: #if !defined(PETSC_USE_64BIT_INDICES)
92: rmine = sf->rmine + sf->roffset[i];
93: rremote = sf->rremote + sf->roffset[i];
94: #else
95: PetscInt j;
96: PetscMalloc2(rcount, &rmine, rcount, &rremote);
97: for (j = 0; j < rcount; j++) {
98: PetscMPIIntCast(sf->rmine[sf->roffset[i] + j], rmine + j);
99: PetscMPIIntCast(sf->rremote[sf->roffset[i] + j], rremote + j);
100: }
101: #endif
103: MPI_Type_create_indexed_block(rcount, 1, rmine, link->unit, &link->mine[i]);
104: MPI_Type_create_indexed_block(rcount, 1, rremote, link->unit, &link->remote[i]);
105: #if defined(PETSC_USE_64BIT_INDICES)
106: PetscFree2(rmine, rremote);
107: #endif
108: MPI_Type_commit(&link->mine[i]);
109: MPI_Type_commit(&link->remote[i]);
110: }
111: link->next = w->link;
112: w->link = link;
114: *localtypes = link->mine;
115: *remotetypes = link->remote;
116: return 0;
117: }
119: /*@C
120: PetscSFWindowSetFlavorType - Set flavor type for MPI_Win creation
122: Logically Collective
124: Input Parameters:
125: + sf - star forest for communication
126: - flavor - flavor type
128: Options Database Key:
129: . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see PetscSFWindowFlavorType)
131: Level: advanced
133: Notes: Windows reusage follow this rules:
135: PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
137: PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
138: for i=1 to K
139: PetscSFOperationBegin(rootdata1,leafdata_whatever);
140: PetscSFOperationEnd(rootdata1,leafdata_whatever);
141: ...
142: PetscSFOperationBegin(rootdataN,leafdata_whatever);
143: PetscSFOperationEnd(rootdataN,leafdata_whatever);
144: endfor
145: The following pattern will instead raise an error
146: PetscSFOperationBegin(rootdata1,leafdata_whatever);
147: PetscSFOperationEnd(rootdata1,leafdata_whatever);
148: PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
149: PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
151: PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
153: PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
155: .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()`
156: @*/
157: PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf, PetscSFWindowFlavorType flavor)
158: {
161: PetscTryMethod(sf, "PetscSFWindowSetFlavorType_C", (PetscSF, PetscSFWindowFlavorType), (sf, flavor));
162: return 0;
163: }
165: static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType flavor)
166: {
167: PetscSF_Window *w = (PetscSF_Window *)sf->data;
169: w->flavor = flavor;
170: return 0;
171: }
173: /*@C
174: PetscSFWindowGetFlavorType - Get flavor type for PetscSF communication
176: Logically Collective
178: Input Parameter:
179: . sf - star forest for communication
181: Output Parameter:
182: . flavor - flavor type
184: Level: advanced
186: .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()`
187: @*/
188: PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf, PetscSFWindowFlavorType *flavor)
189: {
192: PetscUseMethod(sf, "PetscSFWindowGetFlavorType_C", (PetscSF, PetscSFWindowFlavorType *), (sf, flavor));
193: return 0;
194: }
196: static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType *flavor)
197: {
198: PetscSF_Window *w = (PetscSF_Window *)sf->data;
200: *flavor = w->flavor;
201: return 0;
202: }
204: /*@C
205: PetscSFWindowSetSyncType - Set synchronization type for PetscSF communication
207: Logically Collective
209: Input Parameters:
210: + sf - star forest for communication
211: - sync - synchronization type
213: Options Database Key:
214: . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
216: Level: advanced
218: .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`
219: @*/
220: PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf, PetscSFWindowSyncType sync)
221: {
224: PetscTryMethod(sf, "PetscSFWindowSetSyncType_C", (PetscSF, PetscSFWindowSyncType), (sf, sync));
225: return 0;
226: }
228: static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf, PetscSFWindowSyncType sync)
229: {
230: PetscSF_Window *w = (PetscSF_Window *)sf->data;
232: w->sync = sync;
233: return 0;
234: }
236: /*@C
237: PetscSFWindowGetSyncType - Get synchronization type for PetscSF communication
239: Logically Collective
241: Input Parameter:
242: . sf - star forest for communication
244: Output Parameter:
245: . sync - synchronization type
247: Level: advanced
249: .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`
250: @*/
251: PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf, PetscSFWindowSyncType *sync)
252: {
255: PetscUseMethod(sf, "PetscSFWindowGetSyncType_C", (PetscSF, PetscSFWindowSyncType *), (sf, sync));
256: return 0;
257: }
259: static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf, PetscSFWindowSyncType *sync)
260: {
261: PetscSF_Window *w = (PetscSF_Window *)sf->data;
263: *sync = w->sync;
264: return 0;
265: }
267: /*@C
268: PetscSFWindowSetInfo - Set the MPI_Info handle that will be used for subsequent windows allocation
270: Logically Collective
272: Input Parameters:
273: + sf - star forest for communication
274: - info - MPI_Info handle
276: Level: advanced
278: Notes: the info handle is duplicated with a call to MPI_Info_dup unless info = MPI_INFO_NULL.
280: .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()`
281: @*/
282: PetscErrorCode PetscSFWindowSetInfo(PetscSF sf, MPI_Info info)
283: {
285: PetscTryMethod(sf, "PetscSFWindowSetInfo_C", (PetscSF, MPI_Info), (sf, info));
286: return 0;
287: }
289: static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf, MPI_Info info)
290: {
291: PetscSF_Window *w = (PetscSF_Window *)sf->data;
293: if (w->info != MPI_INFO_NULL) MPI_Info_free(&w->info);
294: if (info != MPI_INFO_NULL) MPI_Info_dup(info, &w->info);
295: return 0;
296: }
298: /*@C
299: PetscSFWindowGetInfo - Get the MPI_Info handle used for windows allocation
301: Logically Collective
303: Input Parameter:
304: . sf - star forest for communication
306: Output Parameter:
307: . info - MPI_Info handle
309: Level: advanced
311: Notes: if PetscSFWindowSetInfo() has not be called, this returns MPI_INFO_NULL
313: .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()`
314: @*/
315: PetscErrorCode PetscSFWindowGetInfo(PetscSF sf, MPI_Info *info)
316: {
319: PetscUseMethod(sf, "PetscSFWindowGetInfo_C", (PetscSF, MPI_Info *), (sf, info));
320: return 0;
321: }
323: static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf, MPI_Info *info)
324: {
325: PetscSF_Window *w = (PetscSF_Window *)sf->data;
327: *info = w->info;
328: return 0;
329: }
331: /*
332: PetscSFGetWindow - Get a window for use with a given data type
334: Collective on PetscSF
336: Input Parameters:
337: + sf - star forest
338: . unit - data type
339: . array - array to be sent
340: . sync - type of synchronization PetscSFWindowSyncType
341: . epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
342: . fenceassert - assert parameter for call to MPI_Win_fence(), if sync == PETSCSF_WINDOW_SYNC_FENCE
343: . postassert - assert parameter for call to MPI_Win_post(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
344: - startassert - assert parameter for call to MPI_Win_start(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
346: Output Parameters:
347: + target_disp - target_disp argument for RMA calls (significative for PETSCSF_WINDOW_FLAVOR_DYNAMIC only)
348: + reqs - array of requests (significative for sync == PETSCSF_WINDOW_SYNC_LOCK only)
349: - win - window
351: Level: developer
352: .seealso: `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()`
353: */
354: static PetscErrorCode PetscSFGetWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscMPIInt postassert, PetscMPIInt startassert, const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
355: {
356: PetscSF_Window *w = (PetscSF_Window *)sf->data;
357: MPI_Aint lb, lb_true, bytes, bytes_true;
358: PetscSFWinLink link;
359: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
360: MPI_Aint winaddr;
361: PetscInt nranks;
362: #endif
363: PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE;
364: PetscBool dummy[2];
365: MPI_Aint wsize;
367: MPI_Type_get_extent(unit, &lb, &bytes);
368: MPI_Type_get_true_extent(unit, &lb_true, &bytes_true);
371: if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
372: for (link = w->wins; reuse && link; link = link->next) {
373: PetscBool winok = PETSC_FALSE;
374: if (w->flavor != link->flavor) continue;
375: switch (w->flavor) {
376: case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
377: if (array == link->addr) {
378: if (PetscDefined(USE_DEBUG)) {
379: dummy[0] = PETSC_TRUE;
380: dummy[1] = PETSC_TRUE;
381: MPI_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf));
382: MPI_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf));
384: }
387: winok = PETSC_TRUE;
388: link->paddr = array;
389: } else if (PetscDefined(USE_DEBUG)) {
390: dummy[0] = PETSC_FALSE;
391: dummy[1] = PETSC_FALSE;
392: MPI_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf));
393: MPI_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf));
395: }
396: break;
397: case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
398: case PETSCSF_WINDOW_FLAVOR_SHARED:
399: if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
400: update = PETSC_TRUE;
401: link->paddr = array;
402: winok = PETSC_TRUE;
403: }
404: break;
405: default:
406: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
407: }
408: if (winok) {
409: *win = link->win;
410: PetscInfo(sf, "Reusing window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf));
411: goto found;
412: }
413: }
415: wsize = (MPI_Aint)bytes * sf->nroots;
416: PetscNew(&link);
417: link->bytes = bytes;
418: link->next = w->wins;
419: link->flavor = w->flavor;
420: link->dyn_target_addr = NULL;
421: link->reqs = NULL;
422: w->wins = link;
423: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
424: PetscInt i;
426: PetscMalloc1(sf->nranks, &link->reqs);
427: for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
428: }
429: switch (w->flavor) {
430: case PETSCSF_WINDOW_FLAVOR_CREATE:
431: MPI_Win_create(array, wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->win);
432: link->addr = array;
433: link->paddr = array;
434: break;
435: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
436: case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
437: MPI_Win_create_dynamic(w->info, PetscObjectComm((PetscObject)sf), &link->win);
438: #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
439: MPI_Win_attach(link->win, wsize ? array : (void *)dummy, wsize);
440: #else
441: MPI_Win_attach(link->win, array, wsize);
442: #endif
443: link->addr = array;
444: link->paddr = array;
446: PetscSFSetUp(w->dynsf);
447: PetscSFGetRootRanks(w->dynsf, &nranks, NULL, NULL, NULL, NULL);
448: PetscMalloc1(nranks, &link->dyn_target_addr);
449: MPI_Get_address(array, &winaddr);
450: PetscSFBcastBegin(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE);
451: PetscSFBcastEnd(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE);
452: break;
453: case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
454: MPI_Win_allocate(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win);
455: update = PETSC_TRUE;
456: link->paddr = array;
457: break;
458: #endif
459: #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
460: case PETSCSF_WINDOW_FLAVOR_SHARED:
461: MPI_Win_allocate_shared(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win);
462: update = PETSC_TRUE;
463: link->paddr = array;
464: break;
465: #endif
466: default:
467: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
468: }
469: PetscInfo(sf, "New window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf));
470: *win = link->win;
472: found:
474: if (target_disp) *target_disp = link->dyn_target_addr;
475: if (reqs) *reqs = link->reqs;
476: if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
477: PetscMPIInt rank;
479: MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
480: if (sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, MPI_MODE_NOCHECK, *win);
481: PetscMemcpy(link->addr, array, sf->nroots * bytes);
482: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
483: MPI_Win_unlock(rank, *win);
484: MPI_Win_fence(0, *win);
485: }
486: }
487: link->inuse = PETSC_TRUE;
488: link->epoch = epoch;
489: if (epoch) {
490: switch (sync) {
491: case PETSCSF_WINDOW_SYNC_FENCE:
492: MPI_Win_fence(fenceassert, *win);
493: break;
494: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
495: break;
496: case PETSCSF_WINDOW_SYNC_ACTIVE: {
497: MPI_Group ingroup, outgroup;
498: PetscMPIInt isize, osize;
500: /* OpenMPI 4.0.2 with btl=vader does not like calling
501: - MPI_Win_complete when ogroup is empty
502: - MPI_Win_wait when igroup is empty
503: So, we do not even issue the corresponding start and post calls
504: The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
505: start(outgroup) has a matching post(ingroup)
506: and this is guaranteed by PetscSF
507: */
508: PetscSFGetGroups(sf, &ingroup, &outgroup);
509: MPI_Group_size(ingroup, &isize);
510: MPI_Group_size(outgroup, &osize);
511: if (isize) MPI_Win_post(ingroup, postassert, *win);
512: if (osize) MPI_Win_start(outgroup, startassert, *win);
513: } break;
514: default:
515: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
516: }
517: }
518: return 0;
519: }
521: /*
522: PetscSFFindWindow - Finds a window that is already in use
524: Not Collective
526: Input Parameters:
527: + sf - star forest
528: . unit - data type
529: - array - array with which the window is associated
531: Output Parameters:
532: + win - window
533: - reqs - outstanding requests associated to the window
535: Level: developer
537: .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
538: */
539: static PetscErrorCode PetscSFFindWindow(PetscSF sf, MPI_Datatype unit, const void *array, MPI_Win *win, MPI_Request **reqs)
540: {
541: PetscSF_Window *w = (PetscSF_Window *)sf->data;
542: PetscSFWinLink link;
544: *win = MPI_WIN_NULL;
545: for (link = w->wins; link; link = link->next) {
546: if (array == link->paddr) {
547: PetscInfo(sf, "Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf));
548: *win = link->win;
549: *reqs = link->reqs;
550: return 0;
551: }
552: }
553: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
554: }
556: /*
557: PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
559: Collective
561: Input Parameters:
562: + sf - star forest
563: . unit - data type
564: . array - array associated with window
565: . sync - type of synchronization PetscSFWindowSyncType
566: . epoch - close an epoch, must match argument to PetscSFGetWindow()
567: . update - if we have to update the local window array
568: - win - window
570: Level: developer
572: .seealso: `PetscSFFindWindow()`
573: */
574: static PetscErrorCode PetscSFRestoreWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscBool update, MPI_Win *win)
575: {
576: PetscSF_Window *w = (PetscSF_Window *)sf->data;
577: PetscSFWinLink *p, link;
578: PetscBool reuse = PETSC_FALSE;
579: PetscSFWindowFlavorType flavor;
580: void *laddr;
581: size_t bytes;
583: for (p = &w->wins; *p; p = &(*p)->next) {
584: link = *p;
585: if (*win == link->win) {
587: if (epoch != link->epoch) {
589: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Restoring window without ending epoch");
590: }
591: laddr = link->addr;
592: flavor = link->flavor;
593: bytes = link->bytes;
594: if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
595: else {
596: *p = link->next;
597: update = PETSC_FALSE;
598: } /* remove from list */
599: goto found;
600: }
601: }
602: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
604: found:
605: PetscInfo(sf, "Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf));
606: if (epoch) {
607: switch (sync) {
608: case PETSCSF_WINDOW_SYNC_FENCE:
609: MPI_Win_fence(fenceassert, *win);
610: break;
611: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
612: break;
613: case PETSCSF_WINDOW_SYNC_ACTIVE: {
614: MPI_Group ingroup, outgroup;
615: PetscMPIInt isize, osize;
617: /* OpenMPI 4.0.2 with btl=wader does not like calling
618: - MPI_Win_complete when ogroup is empty
619: - MPI_Win_wait when igroup is empty
620: The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
621: - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
622: - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
623: */
624: PetscSFGetGroups(sf, &ingroup, &outgroup);
625: MPI_Group_size(ingroup, &isize);
626: MPI_Group_size(outgroup, &osize);
627: if (osize) MPI_Win_complete(*win);
628: if (isize) MPI_Win_wait(*win);
629: } break;
630: default:
631: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
632: }
633: }
634: if (update) {
635: if (sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, *win);
636: PetscMemcpy(array, laddr, sf->nroots * bytes);
637: }
638: link->epoch = PETSC_FALSE;
639: link->inuse = PETSC_FALSE;
640: link->paddr = NULL;
641: if (!reuse) {
642: PetscFree(link->dyn_target_addr);
643: PetscFree(link->reqs);
644: MPI_Win_free(&link->win);
645: PetscFree(link);
646: *win = MPI_WIN_NULL;
647: }
648: return 0;
649: }
651: static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
652: {
653: PetscSF_Window *w = (PetscSF_Window *)sf->data;
654: MPI_Group ingroup, outgroup;
656: PetscSFSetUpRanks(sf, MPI_GROUP_EMPTY);
657: if (!w->dynsf) {
658: PetscInt i;
659: PetscSFNode *remotes;
661: PetscMalloc1(sf->nranks, &remotes);
662: for (i = 0; i < sf->nranks; i++) {
663: remotes[i].rank = sf->ranks[i];
664: remotes[i].index = 0;
665: }
666: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &w->dynsf);
667: PetscSFWindowSetFlavorType(w->dynsf, PETSCSF_WINDOW_FLAVOR_CREATE); /* break recursion */
668: PetscSFSetGraph(w->dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER);
669: }
670: switch (w->sync) {
671: case PETSCSF_WINDOW_SYNC_ACTIVE:
672: PetscSFGetGroups(sf, &ingroup, &outgroup);
673: default:
674: break;
675: }
676: return 0;
677: }
679: static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf, PetscOptionItems *PetscOptionsObject)
680: {
681: PetscSF_Window *w = (PetscSF_Window *)sf->data;
682: PetscSFWindowFlavorType flavor = w->flavor;
684: PetscOptionsHeadBegin(PetscOptionsObject, "PetscSF Window options");
685: PetscOptionsEnum("-sf_window_sync", "synchronization type to use for PetscSF Window communication", "PetscSFWindowSetSyncType", PetscSFWindowSyncTypes, (PetscEnum)w->sync, (PetscEnum *)&w->sync, NULL);
686: PetscOptionsEnum("-sf_window_flavor", "flavor to use for PetscSF Window creation", "PetscSFWindowSetFlavorType", PetscSFWindowFlavorTypes, (PetscEnum)flavor, (PetscEnum *)&flavor, NULL);
687: PetscSFWindowSetFlavorType(sf, flavor);
688: PetscOptionsHeadEnd();
689: return 0;
690: }
692: static PetscErrorCode PetscSFReset_Window(PetscSF sf)
693: {
694: PetscSF_Window *w = (PetscSF_Window *)sf->data;
695: PetscSFDataLink link, next;
696: PetscSFWinLink wlink, wnext;
697: PetscInt i;
699: for (link = w->link; link; link = next) {
700: next = link->next;
701: MPI_Type_free(&link->unit);
702: for (i = 0; i < sf->nranks; i++) {
703: MPI_Type_free(&link->mine[i]);
704: MPI_Type_free(&link->remote[i]);
705: }
706: PetscFree2(link->mine, link->remote);
707: PetscFree(link);
708: }
709: w->link = NULL;
710: for (wlink = w->wins; wlink; wlink = wnext) {
711: wnext = wlink->next;
713: PetscFree(wlink->dyn_target_addr);
714: PetscFree(wlink->reqs);
715: MPI_Win_free(&wlink->win);
716: PetscFree(wlink);
717: }
718: w->wins = NULL;
719: PetscSFDestroy(&w->dynsf);
720: if (w->info != MPI_INFO_NULL) MPI_Info_free(&w->info);
721: return 0;
722: }
724: static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
725: {
726: PetscSFReset_Window(sf);
727: PetscFree(sf->data);
728: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", NULL);
729: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", NULL);
730: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", NULL);
731: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", NULL);
732: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", NULL);
733: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", NULL);
734: return 0;
735: }
737: static PetscErrorCode PetscSFView_Window(PetscSF sf, PetscViewer viewer)
738: {
739: PetscSF_Window *w = (PetscSF_Window *)sf->data;
740: PetscBool iascii;
741: PetscViewerFormat format;
743: PetscViewerGetFormat(viewer, &format);
744: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
745: if (iascii) {
746: PetscViewerASCIIPrintf(viewer, " current flavor=%s synchronization=%s MultiSF sort=%s\n", PetscSFWindowFlavorTypes[w->flavor], PetscSFWindowSyncTypes[w->sync], sf->rankorder ? "rank-order" : "unordered");
747: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
748: if (w->info != MPI_INFO_NULL) {
749: PetscMPIInt k, nkeys;
750: char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
752: MPI_Info_get_nkeys(w->info, &nkeys);
753: PetscViewerASCIIPrintf(viewer, " current info with %d keys. Ordered key-value pairs follow:\n", nkeys);
754: for (k = 0; k < nkeys; k++) {
755: PetscMPIInt flag;
757: MPI_Info_get_nthkey(w->info, k, key);
758: MPI_Info_get(w->info, key, MPI_MAX_INFO_VAL, value, &flag);
760: PetscViewerASCIIPrintf(viewer, " %s = %s\n", key, value);
761: }
762: } else {
763: PetscViewerASCIIPrintf(viewer, " current info=MPI_INFO_NULL\n");
764: }
765: }
766: }
767: return 0;
768: }
770: static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf, PetscSFDuplicateOption opt, PetscSF newsf)
771: {
772: PetscSF_Window *w = (PetscSF_Window *)sf->data;
773: PetscSFWindowSyncType synctype;
775: synctype = w->sync;
776: /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
777: if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
778: PetscSFWindowSetSyncType(newsf, synctype);
779: PetscSFWindowSetFlavorType(newsf, w->flavor);
780: PetscSFWindowSetInfo(newsf, w->info);
781: return 0;
782: }
784: static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
785: {
786: PetscSF_Window *w = (PetscSF_Window *)sf->data;
787: PetscInt i, nranks;
788: const PetscMPIInt *ranks;
789: const MPI_Aint *target_disp;
790: const MPI_Datatype *mine, *remote;
791: MPI_Request *reqs;
792: MPI_Win win;
795: PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL);
796: PetscSFWindowGetDataTypes(sf, unit, &mine, &remote);
797: PetscSFGetWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT, 0, &target_disp, &reqs, &win);
798: for (i = 0; i < nranks; i++) {
799: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
801: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
802: MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win);
803: #if defined(PETSC_HAVE_MPI_RGET)
804: MPI_Rget(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win, &reqs[i]);
805: #else
806: MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win);
807: #endif
808: } else {
809: MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win);
810: }
811: }
812: return 0;
813: }
815: PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
816: {
817: PetscSF_Window *w = (PetscSF_Window *)sf->data;
818: MPI_Win win;
819: MPI_Request *reqs = NULL;
821: PetscSFFindWindow(sf, unit, rootdata, &win, &reqs);
822: if (reqs) MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE);
823: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
824: PetscInt i, nranks;
825: const PetscMPIInt *ranks;
827: PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL);
828: for (i = 0; i < nranks; i++) MPI_Win_unlock(ranks[i], win);
829: }
830: PetscSFRestoreWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED, PETSC_FALSE, &win);
831: return 0;
832: }
834: PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
835: {
836: PetscSF_Window *w = (PetscSF_Window *)sf->data;
837: PetscInt i, nranks;
838: const PetscMPIInt *ranks;
839: const MPI_Aint *target_disp;
840: const MPI_Datatype *mine, *remote;
841: MPI_Win win;
843: PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL);
844: PetscSFWindowGetDataTypes(sf, unit, &mine, &remote);
845: PetscSFWindowOpTranslate(&op);
846: PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win);
847: for (i = 0; i < nranks; i++) {
848: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
850: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win);
851: MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win);
852: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_unlock(ranks[i], win);
853: }
854: return 0;
855: }
857: static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
858: {
859: PetscSF_Window *w = (PetscSF_Window *)sf->data;
860: MPI_Win win;
861: MPI_Request *reqs = NULL;
863: PetscSFFindWindow(sf, unit, rootdata, &win, &reqs);
864: if (reqs) MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE);
865: PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win);
866: return 0;
867: }
869: static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
870: {
871: PetscInt i, nranks;
872: const PetscMPIInt *ranks;
873: const MPI_Datatype *mine, *remote;
874: const MPI_Aint *target_disp;
875: MPI_Win win;
876: PetscSF_Window *w = (PetscSF_Window *)sf->data;
877: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
878: PetscSFWindowFlavorType oldf;
879: #endif
881: PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL);
882: PetscSFWindowGetDataTypes(sf, unit, &mine, &remote);
883: PetscSFWindowOpTranslate(&op);
884: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
885: /* FetchAndOp without MPI_Get_Accumulate requires locking.
886: we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
887: oldf = w->flavor;
888: w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
889: PetscSFGetWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, 0, 0, &target_disp, NULL, &win);
890: #else
891: PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win);
892: #endif
893: for (i = 0; i < nranks; i++) {
894: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
896: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
897: MPI_Win_lock(MPI_LOCK_EXCLUSIVE, ranks[i], 0, win);
898: MPI_Get(leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], win);
899: MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win);
900: MPI_Win_unlock(ranks[i], win);
901: #else
902: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], 0, win);
903: MPI_Get_accumulate((void *)leafdata, 1, mine[i], leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win);
904: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_unlock(ranks[i], win);
905: #endif
906: }
907: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
908: w->flavor = oldf;
909: #endif
910: return 0;
911: }
913: static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
914: {
915: MPI_Win win;
916: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
917: PetscSF_Window *w = (PetscSF_Window *)sf->data;
918: #endif
919: MPI_Request *reqs = NULL;
921: PetscSFFindWindow(sf, unit, rootdata, &win, &reqs);
922: if (reqs) MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE);
923: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
924: PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win);
925: #else
926: PetscSFRestoreWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, PETSC_TRUE, &win);
927: #endif
928: return 0;
929: }
931: PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
932: {
933: PetscSF_Window *w = (PetscSF_Window *)sf->data;
935: sf->ops->SetUp = PetscSFSetUp_Window;
936: sf->ops->SetFromOptions = PetscSFSetFromOptions_Window;
937: sf->ops->Reset = PetscSFReset_Window;
938: sf->ops->Destroy = PetscSFDestroy_Window;
939: sf->ops->View = PetscSFView_Window;
940: sf->ops->Duplicate = PetscSFDuplicate_Window;
941: sf->ops->BcastBegin = PetscSFBcastBegin_Window;
942: sf->ops->BcastEnd = PetscSFBcastEnd_Window;
943: sf->ops->ReduceBegin = PetscSFReduceBegin_Window;
944: sf->ops->ReduceEnd = PetscSFReduceEnd_Window;
945: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
946: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window;
948: PetscNew(&w);
949: sf->data = (void *)w;
950: w->sync = PETSCSF_WINDOW_SYNC_FENCE;
951: w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
952: w->info = MPI_INFO_NULL;
954: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", PetscSFWindowSetSyncType_Window);
955: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", PetscSFWindowGetSyncType_Window);
956: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", PetscSFWindowSetFlavorType_Window);
957: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", PetscSFWindowGetFlavorType_Window);
958: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", PetscSFWindowSetInfo_Window);
959: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", PetscSFWindowGetInfo_Window);
961: #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
962: {
963: PetscBool ackbug = PETSC_FALSE;
964: PetscOptionsGetBool(NULL, NULL, "-acknowledge_ompi_onesided_bug", &ackbug, NULL);
965: if (ackbug) {
966: PetscInfo(sf, "Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");
967: } else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_LIB, "Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
968: }
969: #endif
970: return 0;
971: }