Actual source code: sfimpl.h

petsc-master 2020-08-25
Report Typos and Errors
  1: #if !defined(PETSCSFIMPL_H)
  2: #define PETSCSFIMPL_H

  4:  #include <petscsf.h>
  5:  #include <petsc/private/petscimpl.h>
  6:  #include <petscviewer.h>

  8: #if defined(PETSC_HAVE_CUDA)
  9:   #include <cuda_runtime.h>
 10:  #include <petsc/private/cudavecimpl.h>
 11: #endif

 13: #if defined(PETSC_HAVE_HIP)
 14:   #include <hip/hip_runtime.h>
 15: #endif

 17: PETSC_EXTERN PetscLogEvent PETSCSF_SetGraph;
 18: PETSC_EXTERN PetscLogEvent PETSCSF_SetUp;
 19: PETSC_EXTERN PetscLogEvent PETSCSF_BcastBegin;
 20: PETSC_EXTERN PetscLogEvent PETSCSF_BcastEnd;
 21: PETSC_EXTERN PetscLogEvent PETSCSF_BcastAndOpBegin;
 22: PETSC_EXTERN PetscLogEvent PETSCSF_BcastAndOpEnd;
 23: PETSC_EXTERN PetscLogEvent PETSCSF_ReduceBegin;
 24: PETSC_EXTERN PetscLogEvent PETSCSF_ReduceEnd;
 25: PETSC_EXTERN PetscLogEvent PETSCSF_FetchAndOpBegin;
 26: PETSC_EXTERN PetscLogEvent PETSCSF_FetchAndOpEnd;
 27: PETSC_EXTERN PetscLogEvent PETSCSF_EmbedSF;
 28: PETSC_EXTERN PetscLogEvent PETSCSF_DistSect;
 29: PETSC_EXTERN PetscLogEvent PETSCSF_SectSF;
 30: PETSC_EXTERN PetscLogEvent PETSCSF_RemoteOff;
 31: PETSC_EXTERN PetscLogEvent PETSCSF_Pack;
 32: PETSC_EXTERN PetscLogEvent PETSCSF_Unpack;

 34: typedef enum {PETSCSF_../../..2LEAF=0, PETSCSF_LEAF2../../..} PetscSFDirection;
 35: typedef enum {PETSCSF_BCAST=0, PETSCSF_REDUCE, PETSCSF_FETCH} PetscSFOperation;
 36: typedef enum {PETSC_MEMTYPE_HOST=0, PETSC_MEMTYPE_DEVICE} PetscMemType;

 38: struct _PetscSFOps {
 39:   PetscErrorCode (*Reset)(PetscSF);
 40:   PetscErrorCode (*Destroy)(PetscSF);
 41:   PetscErrorCode (*SetUp)(PetscSF);
 42:   PetscErrorCode (*SetFromOptions)(PetscOptionItems*,PetscSF);
 43:   PetscErrorCode (*View)(PetscSF,PetscViewer);
 44:   PetscErrorCode (*Duplicate)(PetscSF,PetscSFDuplicateOption,PetscSF);
 45:   PetscErrorCode (*BcastAndOpBegin)(PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,void*,MPI_Op);
 46:   PetscErrorCode (*BcastAndOpEnd)  (PetscSF,MPI_Datatype,const void*,void*,MPI_Op);
 47:   PetscErrorCode (*ReduceBegin)    (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,void*,MPI_Op);
 48:   PetscErrorCode (*ReduceEnd)      (PetscSF,MPI_Datatype,const void*,void*,MPI_Op);
 49:   PetscErrorCode (*FetchAndOpBegin)(PetscSF,MPI_Datatype,PetscMemType,void*,PetscMemType,const void*,void*,MPI_Op);
 50:   PetscErrorCode (*FetchAndOpEnd)  (PetscSF,MPI_Datatype,void*,const void*,void*,MPI_Op);
 51:   PetscErrorCode (*BcastToZero)    (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,      void*); /* For interal use only */
 52:   PetscErrorCode (*GetRootRanks)(PetscSF,PetscInt*,const PetscMPIInt**,const PetscInt**,const PetscInt**,const PetscInt**);
 53:   PetscErrorCode (*GetLeafRanks)(PetscSF,PetscInt*,const PetscMPIInt**,const PetscInt**,const PetscInt**);
 54:   PetscErrorCode (*CreateLocalSF)(PetscSF,PetscSF*);
 55:   PetscErrorCode (*GetGraph)(PetscSF,PetscInt*,PetscInt*,const PetscInt**,const PetscSFNode**);
 56:   PetscErrorCode (*CreateEmbeddedSF)(PetscSF,PetscInt,const PetscInt*,PetscSF*);
 57:   PetscErrorCode (*CreateEmbeddedLeafSF)(PetscSF,PetscInt,const PetscInt*,PetscSF*);
 58: };

 60: typedef struct _n_PetscSFPackOpt *PetscSFPackOpt;

 62: struct _p_PetscSF {
 63:   PETSCHEADER(struct _PetscSFOps);
 64:   PetscInt        nroots;          /* Number of root vertices on current process (candidates for incoming edges) */
 65:   PetscInt        nleaves;         /* Number of leaf vertices on current process (this process specifies a root for each leaf) */
 66:   PetscInt        *mine;           /* Location of leaves in leafdata arrays provided to the communication routines */
 67:   PetscInt        *mine_alloc;
 68:   PetscInt        minleaf,maxleaf;
 69:   PetscSFNode     *remote;         /* Remote references to roots for each local leaf */
 70:   PetscSFNode     *remote_alloc;
 71:   PetscInt        nranks;          /* Number of ranks owning roots connected to my leaves */
 72:   PetscInt        ndranks;         /* Number of ranks in distinguished group holding roots connected to my leaves */
 73:   PetscMPIInt     *ranks;          /* List of ranks referenced by "remote" */
 74:   PetscInt        *roffset;        /* Array of length nranks+1, offset in rmine/rremote for each rank */
 75:   PetscInt        *rmine;          /* Concatenated array holding local indices referencing each remote rank */
 76:   PetscInt        *rmine_d[2];     /* A copy of rmine[local/remote] in device memory if needed */

 78:   /* Some results useful in packing by analyzing rmine[] */
 79:   PetscInt        leafbuflen[2];   /* Length (in unit) of leaf buffers, in layout of [PETSCSF_LOCAL/REMOTE] */
 80:   PetscBool       leafcontig[2];   /* True means indices in rmine[self part] or rmine[remote part] are contiguous, and they start from ... */
 81:   PetscInt        leafstart[2];    /* ... leafstart[0] and leafstart[1] respectively */
 82:   PetscSFPackOpt  leafpackopt[2];  /* Optimization plans to (un)pack leaves connected to remote roots, based on index patterns in rmine[]. NULL for no optimization */
 83:   PetscSFPackOpt  leafpackopt_d[2];/* Copy of leafpackopt_d[] on device if needed */
 84:   PetscBool       leafdups[2];     /* Indices in rmine[] for self(0)/remote(1) communication have dups? TRUE implies theads working on them in parallel may have data race. */

 86:   PetscInt        nleafreqs;       /* Number of MPI reqests for leaves */
 87:   PetscInt        *rremote;        /* Concatenated array holding remote indices referenced for each remote rank */
 88:   PetscBool       degreeknown;     /* The degree is currently known, do not have to recompute */
 89:   PetscInt        *degree;         /* Degree of each of my root vertices */
 90:   PetscInt        *degreetmp;      /* Temporary local array for computing degree */
 91:   PetscBool       rankorder;       /* Sort ranks for gather and scatter operations */
 92:   MPI_Group       ingroup;         /* Group of processes connected to my roots */
 93:   MPI_Group       outgroup;        /* Group of processes connected to my leaves */
 94:   PetscSF         multi;           /* Internal graph used to implement gather and scatter operations */
 95:   PetscBool       graphset;        /* Flag indicating that the graph has been set, required before calling communication routines */
 96:   PetscBool       setupcalled;     /* Type and communication structures have been set up */
 97:   PetscSFPattern  pattern;         /* Pattern of the graph */
 98:   PetscBool       persistent;      /* Does this SF use MPI persistent requests for communication */
 99:   PetscLayout     map;             /* Layout of leaves over all processes when building a patterned graph */
100:   PetscBool       use_default_stream;  /* If true, SF assumes root/leafdata is on the default stream upon input and will also leave them there upon output */
101:   PetscBool       use_gpu_aware_mpi;   /* If true, SF assumes it can pass GPU pointers to MPI */
102:   PetscBool       use_stream_aware_mpi;/* If true, SF assumes the underlying MPI is cuda-stream aware and we won't sync streams for send/recv buffers passed to MPI */
103: #if defined(PETSC_HAVE_CUDA)
104:   PetscInt        maxResidentThreadsPerGPU;
105: #endif
106:   void *data;                      /* Pointer to implementation */
107: };

109: PETSC_EXTERN PetscBool PetscSFRegisterAllCalled;
110: PETSC_EXTERN PetscErrorCode PetscSFRegisterAll(void);

112: PETSC_INTERN PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF,PetscSF*);
113: PETSC_INTERN PetscErrorCode PetscSFBcastToZero_Private(PetscSF,MPI_Datatype,const void*,void*);

115: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_unwrap(MPI_Datatype,MPI_Datatype*,PetscBool*);
116: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_compare(MPI_Datatype,MPI_Datatype,PetscBool*);
117: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_compare_contig(MPI_Datatype,MPI_Datatype,PetscInt*);

119: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
120: #define MPIU_Iscatter(a,b,c,d,e,f,g,h,req)     MPI_Iscatter(a,b,c,d,e,f,g,h,req)
121: #define MPIU_Iscatterv(a,b,c,d,e,f,g,h,i,req)  MPI_Iscatterv(a,b,c,d,e,f,g,h,i,req)
122: #define MPIU_Igather(a,b,c,d,e,f,g,h,req)      MPI_Igather(a,b,c,d,e,f,g,h,req)
123: #define MPIU_Igatherv(a,b,c,d,e,f,g,h,i,req)   MPI_Igatherv(a,b,c,d,e,f,g,h,i,req)
124: #define MPIU_Iallgather(a,b,c,d,e,f,g,req)     MPI_Iallgather(a,b,c,d,e,f,g,req)
125: #define MPIU_Iallgatherv(a,b,c,d,e,f,g,h,req)  MPI_Iallgatherv(a,b,c,d,e,f,g,h,req)
126: #define MPIU_Ialltoall(a,b,c,d,e,f,g,req)      MPI_Ialltoall(a,b,c,d,e,f,g,req)
127: #else
128: /* Ignore req, the MPI_Request argument, and use MPI blocking collectives. One should initialize req
129:    to MPI_REQUEST_NULL so that one can do MPI_Wait(req,status) no matter the call is blocking or not.
130:  */
131: #define MPIU_Iscatter(a,b,c,d,e,f,g,h,req)     MPI_Scatter(a,b,c,d,e,f,g,h)
132: #define MPIU_Iscatterv(a,b,c,d,e,f,g,h,i,req)  MPI_Scatterv(a,b,c,d,e,f,g,h,i)
133: #define MPIU_Igather(a,b,c,d,e,f,g,h,req)      MPI_Gather(a,b,c,d,e,f,g,h)
134: #define MPIU_Igatherv(a,b,c,d,e,f,g,h,i,req)   MPI_Gatherv(a,b,c,d,e,f,g,h,i)
135: #define MPIU_Iallgather(a,b,c,d,e,f,g,req)     MPI_Allgather(a,b,c,d,e,f,g)
136: #define MPIU_Iallgatherv(a,b,c,d,e,f,g,h,req)  MPI_Allgatherv(a,b,c,d,e,f,g,h)
137: #define MPIU_Ialltoall(a,b,c,d,e,f,g,req)      MPI_Alltoall(a,b,c,d,e,f,g)
138: #endif

140: PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
141: {
144:   *type = PETSC_MEMTYPE_HOST;
145: #if defined(PETSC_HAVE_CUDA)
146:   if (PetscCUDAInitialized && data) {
147:     cudaError_t                  cerr;
148:     struct cudaPointerAttributes attr;
149:     enum cudaMemoryType          mtype;
150:     cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
151:     cudaGetLastError(); /* Reset the last error */
152:     #if (CUDART_VERSION < 10000)
153:       mtype = attr.memoryType;
154:     #else
155:       mtype = attr.type;
156:     #endif
157:     if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) {
158:       *type = PETSC_MEMTYPE_DEVICE;
159:       return(0);
160:     }
161:   }
162: #endif

164: #if defined(PETSC_HAVE_HIP)
165:   if (PetscHIPInitialized && data) {
166:     hipError_t                   cerr;
167:     struct hipPointerAttribute_t attr;
168:     enum hipMemoryType           mtype;
169:     cerr = hipPointerGetAttributes(&attr,data);
170:     hipGetLastError(); /* Reset the last error */
171:     mtype = attr.memoryType;
172:     if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) {
173:       *type = PETSC_MEMTYPE_DEVICE;
174:       return(0);
175:     }
176:   }
177: #endif
178:   return(0);
179: }

181: PETSC_STATIC_INLINE PetscErrorCode PetscMallocWithMemType(PetscMemType mtype,size_t size,void** ptr)
182: {
184:   if (mtype == PETSC_MEMTYPE_HOST) {PetscErrorCode PetscMalloc(size,ptr);}
185: #if defined(PETSC_HAVE_CUDA)
186:   else if (mtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaMalloc(ptr,size);CHKERRCUDA(err);}
187: #elif defined(PETSC_HAVE_HIP)
188:   else if (mtype == PETSC_MEMTYPE_DEVICE) {hipError_t  err = hipMalloc(ptr,size);CHKERRQ(err==hipSuccess?0:PETSC_ERR_LIB);}
189: #endif
190:   else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType %d", (int)mtype);
191:   return(0);
192: }

194: PETSC_STATIC_INLINE PetscErrorCode PetscFreeWithMemType_Private(PetscMemType mtype,void* ptr)
195: {
197:   if (mtype == PETSC_MEMTYPE_HOST) {PetscErrorCode PetscFree(ptr);}
198: #if defined(PETSC_HAVE_CUDA)
199:   else if (mtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaFree(ptr);CHKERRCUDA(err);}
200: #elif defined(PETSC_HAVE_HIP)
201:   else if (mtype == PETSC_MEMTYPE_DEVICE) {hipError_t  err = hipFree(ptr);CHKERRQ(err==hipSuccess?0:PETSC_ERR_LIB);}
202: #endif
203:   else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType %d",(int)mtype);
204:   return(0);
205: }

207: /* Free memory and set ptr to NULL when succeeded */
208: #define PetscFreeWithMemType(t,p) ((p) && (PetscFreeWithMemType_Private((t),(p)) || ((p)=NULL,0)))

210: #endif