Actual source code: sfneighbor.c
1: #include <../src/vec/is/sf/impls/basic/sfpack.h>
2: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
4: /* A convenience temporary type */
5: #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
6: typedef PetscInt PetscSFCount;
7: #else
8: typedef PetscMPIInt PetscSFCount;
9: #endif
11: typedef struct {
12: SFBASICHEADER;
13: MPI_Comm comms[2]; /* Communicators with distributed topology in both directions */
14: PetscBool initialized[2]; /* Are the two communicators initialized? */
15: PetscSFCount *rootdispls,*rootcounts,*leafdispls,*leafcounts; /* displs/counts for non-distinguished ranks */
16: PetscMPIInt *rootweights,*leafweights;
17: PetscInt rootdegree,leafdegree;
18: } PetscSF_Neighbor;
20: /*===================================================================================*/
21: /* Internal utility routines */
22: /*===================================================================================*/
24: static inline PetscErrorCode PetscLogMPIMessages(PetscInt nsend,PetscSFCount *sendcnts,MPI_Datatype sendtype,PetscInt nrecv,PetscSFCount* recvcnts,MPI_Datatype recvtype)
25: {
26: #if defined(PETSC_USE_LOG)
27: petsc_isend_ct += (PetscLogDouble)nsend;
28: petsc_irecv_ct += (PetscLogDouble)nrecv;
30: if (sendtype != MPI_DATATYPE_NULL) {
31: PetscMPIInt i,typesize;
32: MPI_Type_size(sendtype,&typesize);
33: for (i=0; i<nsend; i++) petsc_isend_len += (PetscLogDouble)(sendcnts[i]*typesize);
34: }
36: if (recvtype != MPI_DATATYPE_NULL) {
37: PetscMPIInt i,typesize;
38: MPI_Type_size(recvtype,&typesize);
39: for (i=0; i<nrecv; i++) petsc_irecv_len += (PetscLogDouble)(recvcnts[i]*typesize);
40: }
41: #endif
42: return 0;
43: }
45: /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */
46: static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf,PetscSFDirection direction,MPI_Comm *distcomm)
47: {
48: PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
49: PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks;
50: const PetscMPIInt *rootranks,*leafranks;
51: MPI_Comm comm;
53: PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,NULL,NULL); /* Which ranks will access my roots (I am a destination) */
54: PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,NULL,NULL,NULL); /* My leaves will access whose roots (I am a source) */
56: if (!dat->initialized[direction]) {
57: const PetscMPIInt indegree = nrootranks-ndrootranks,*sources = rootranks+ndrootranks;
58: const PetscMPIInt outdegree = nleafranks-ndleafranks,*destinations = leafranks+ndleafranks;
59: MPI_Comm *mycomm = &dat->comms[direction];
60: PetscObjectGetComm((PetscObject)sf,&comm);
61: if (direction == PETSCSF_LEAF2../../../../../..) {
62: MPI_Dist_graph_create_adjacent(comm,indegree,sources,dat->rootweights,outdegree,destinations,dat->leafweights,MPI_INFO_NULL,1/*reorder*/,mycomm);
63: } else { /* PETSCSF_../../../../../..2LEAF, reverse src & dest */
64: MPI_Dist_graph_create_adjacent(comm,outdegree,destinations,dat->leafweights,indegree,sources,dat->rootweights,MPI_INFO_NULL,1/*reorder*/,mycomm);
65: }
66: dat->initialized[direction] = PETSC_TRUE;
67: }
68: *distcomm = dat->comms[direction];
69: return 0;
70: }
72: /*===================================================================================*/
73: /* Implementations of SF public APIs */
74: /*===================================================================================*/
75: static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf)
76: {
77: PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
78: PetscInt i,j,nrootranks,ndrootranks,nleafranks,ndleafranks;
79: const PetscInt *rootoffset,*leafoffset;
80: PetscMPIInt m,n;
82: /* SFNeighbor inherits from Basic */
83: PetscSFSetUp_Basic(sf);
84: /* SFNeighbor specific */
85: sf->persistent = PETSC_FALSE;
86: PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);
87: PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);
88: dat->rootdegree = m = (PetscMPIInt)(nrootranks-ndrootranks);
89: dat->leafdegree = n = (PetscMPIInt)(nleafranks-ndleafranks);
90: sf->nleafreqs = 0;
91: dat->nrootreqs = 1;
93: /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */
94: PetscMalloc6(m,&dat->rootdispls,m,&dat->rootcounts,m,&dat->rootweights,n,&dat->leafdispls,n,&dat->leafcounts,n,&dat->leafweights);
96: #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
97: for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
98: dat->rootdispls[j] = rootoffset[i]-rootoffset[ndrootranks];
99: dat->rootcounts[j] = rootoffset[i+1]-rootoffset[i];
100: dat->rootweights[j] = (PetscMPIInt)((PetscReal)dat->rootcounts[j]/(PetscReal)PETSC_MAX_INT*2147483647); /* Scale to range of PetscMPIInt */
101: }
103: for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
104: dat->leafdispls[j] = leafoffset[i]-leafoffset[ndleafranks];
105: dat->leafcounts[j] = leafoffset[i+1]-leafoffset[i];
106: dat->leafweights[j] = (PetscMPIInt)((PetscReal)dat->leafcounts[j]/(PetscReal)PETSC_MAX_INT*2147483647);
107: }
108: #else
109: for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
110: PetscMPIIntCast(rootoffset[i]-rootoffset[ndrootranks],&m); dat->rootdispls[j] = m;
111: PetscMPIIntCast(rootoffset[i+1]-rootoffset[i], &n); dat->rootcounts[j] = n;
112: dat->rootweights[j] = n;
113: }
115: for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
116: PetscMPIIntCast(leafoffset[i]-leafoffset[ndleafranks],&m); dat->leafdispls[j] = m;
117: PetscMPIIntCast(leafoffset[i+1]-leafoffset[i], &n); dat->leafcounts[j] = n;
118: dat->leafweights[j] = n;
119: }
120: #endif
121: return 0;
122: }
124: static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf)
125: {
126: PetscInt i;
127: PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
130: PetscFree6(dat->rootdispls,dat->rootcounts,dat->rootweights,dat->leafdispls,dat->leafcounts,dat->leafweights);
131: for (i=0; i<2; i++) {
132: if (dat->initialized[i]) {
133: MPI_Comm_free(&dat->comms[i]);
134: dat->initialized[i] = PETSC_FALSE;
135: }
136: }
137: PetscSFReset_Basic(sf); /* Common part */
138: return 0;
139: }
141: static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf)
142: {
143: PetscSFReset_Neighbor(sf);
144: PetscFree(sf->data);
145: return 0;
146: }
148: static PetscErrorCode PetscSFBcastBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
149: {
150: PetscSFLink link;
151: PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
152: MPI_Comm distcomm = MPI_COMM_NULL;
153: void *rootbuf = NULL,*leafbuf = NULL;
154: MPI_Request *req;
156: PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);
157: PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);
158: /* Do neighborhood alltoallv for remote ranks */
159: PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);
160: PetscSFGetDistComm_Neighbor(sf,PETSCSF_../../../../../..2LEAF,&distcomm);
161: PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_../../../../../..2LEAF,&rootbuf,&leafbuf,&req,NULL);
162: PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_../../../../../..2LEAF);
163: /* OpenMPI-3.0 ran into error with rootdegree = leafdegree = 0, so we skip the call in this case */
164: if (dat->rootdegree || dat->leafdegree) {
165: MPIU_Ineighbor_alltoallv(rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,distcomm,req);
166: }
167: PetscLogMPIMessages(dat->rootdegree,dat->rootcounts,unit,dat->leafdegree,dat->leafcounts,unit);
168: PetscSFLinkScatterLocal(sf,link,PETSCSF_../../../../../..2LEAF,(void*)rootdata,leafdata,op);
169: return 0;
170: }
172: static inline PetscErrorCode PetscSFLeafToRootBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out)
173: {
174: PetscSFLink link;
175: PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
176: MPI_Comm distcomm = MPI_COMM_NULL;
177: void *rootbuf = NULL,*leafbuf = NULL;
178: MPI_Request *req = NULL;
180: PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);
181: PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);
182: /* Do neighborhood alltoallv for remote ranks */
183: PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);
184: PetscSFGetDistComm_Neighbor(sf,PETSCSF_LEAF2../../../../../..,&distcomm);
185: PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2../../../../../..,&rootbuf,&leafbuf,&req,NULL);
186: PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2../../../../../..);
187: if (dat->rootdegree || dat->leafdegree) {
188: MPIU_Ineighbor_alltoallv(leafbuf,dat->leafcounts,dat->leafdispls,unit,rootbuf,dat->rootcounts,dat->rootdispls,unit,distcomm,req);
189: }
190: PetscLogMPIMessages(dat->leafdegree,dat->leafcounts,unit,dat->rootdegree,dat->rootcounts,unit);
191: *out = link;
192: return 0;
193: }
195: static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
196: {
197: PetscSFLink link = NULL;
199: PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);
200: PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2../../../../../..,rootdata,(void*)leafdata,op);
201: return 0;
202: }
204: static PetscErrorCode PetscSFFetchAndOpBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
205: {
206: PetscSFLink link = NULL;
208: PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);
209: PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);
210: return 0;
211: }
213: static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
214: {
215: PetscSFLink link = NULL;
216: MPI_Comm comm = MPI_COMM_NULL;
217: PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
218: void *rootbuf = NULL,*leafbuf = NULL;
220: PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
221: PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2../../../../../..);
222: /* Process remote fetch-and-op */
223: PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);
224: /* Bcast the updated rootbuf back to leaves */
225: PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);
226: PetscSFGetDistComm_Neighbor(sf,PETSCSF_../../../../../..2LEAF,&comm);
227: PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_../../../../../..2LEAF,&rootbuf,&leafbuf,NULL,NULL);
228: PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_../../../../../..2LEAF);
229: if (dat->rootdegree || dat->leafdegree) {
230: MPIU_Neighbor_alltoallv(rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,comm);
231: }
232: PetscLogMPIMessages(dat->rootdegree,dat->rootcounts,unit,dat->leafdegree,dat->leafcounts,unit);
233: PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_FALSE/* host2device after recving */);
234: PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);
235: PetscSFLinkReclaim(sf,&link);
236: return 0;
237: }
239: PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf)
240: {
241: PetscSF_Neighbor *dat;
243: sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
244: sf->ops->BcastEnd = PetscSFBcastEnd_Basic;
245: sf->ops->ReduceEnd = PetscSFReduceEnd_Basic;
246: sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic;
247: sf->ops->View = PetscSFView_Basic;
249: sf->ops->SetUp = PetscSFSetUp_Neighbor;
250: sf->ops->Reset = PetscSFReset_Neighbor;
251: sf->ops->Destroy = PetscSFDestroy_Neighbor;
252: sf->ops->BcastBegin = PetscSFBcastBegin_Neighbor;
253: sf->ops->ReduceBegin = PetscSFReduceBegin_Neighbor;
254: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Neighbor;
255: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Neighbor;
257: PetscNewLog(sf,&dat);
258: sf->data = (void*)dat;
259: return 0;
260: }