Actual source code: vecmpicupm.cu

  1: #include "../vecmpicupm.hpp" /*I <petscvec.h> I*/

  3: using namespace Petsc::vec::cupm::impl;

  5: static constexpr auto VecMPI_CUDA = VecMPI_CUPM<::Petsc::device::cupm::DeviceType::CUDA>{};

  7: /*MC
  8:   VECCUDA - VECCUDA = "cuda" - A VECSEQCUDA on a single-process communicator, and VECMPICUDA
  9:   otherwise.

 11:   Options Database Keys:
 12: . -vec_type cuda - sets the vector type to VECCUDA during a call to VecSetFromOptions()

 14:   Level: beginner

 16: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECSEQCUDA,
 17: VECMPICUDA, VECSTANDARD, VecType, VecCreateMPI(), VecSetPinnedMemoryMin()
 18: M*/

 20: /*MC
 21:   VECMPICUDA - VECMPICUDA = "mpicuda" - The basic parallel vector, modified to use CUDA

 23:   Options Database Keys:
 24: . -vec_type mpicuda - sets the vector type to VECMPICUDA during a call to VecSetFromOptions()

 26:   Level: beginner

 28: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECMPI,
 29: VecType, VecCreateMPI(), VecSetPinnedMemoryMin()
 30: M*/

 32: PetscErrorCode VecCreate_CUDA(Vec v)
 33: {
 34:   PetscFunctionBegin;
 35:   PetscCall(VecMPI_CUDA.Create_CUPM(v));
 36:   PetscFunctionReturn(PETSC_SUCCESS);
 37: }

 39: PetscErrorCode VecCreate_MPICUDA(Vec v)
 40: {
 41:   PetscFunctionBegin;
 42:   PetscCall(VecMPI_CUDA.create(v));
 43:   PetscFunctionReturn(PETSC_SUCCESS);
 44: }

 46: PetscErrorCode VecCUDAGetArrays_Private(Vec v, const PetscScalar **host_array, const PetscScalar **device_array, PetscOffloadMask *mask)
 47: {
 48:   PetscDeviceContext dctx;

 50:   PetscFunctionBegin;
 52:   PetscCall(PetscDeviceContextGetCurrentContextAssertType_Internal(&dctx, PETSC_DEVICE_CUDA));
 53:   PetscCall(VecMPI_CUDA.GetArrays_CUPMBase(v, host_array, device_array, mask, dctx));
 54:   PetscFunctionReturn(PETSC_SUCCESS);
 55: }

 57: /*@
 58:   VecCreateMPICUDA - Creates a standard, parallel, array-style vector for CUDA devices.

 60:   Collective, Possibly Synchronous

 62:   Input Parameters:
 63: + comm - the MPI communicator to use
 64: . n    - local vector length (or PETSC_DECIDE to have calculated if N is given)
 65: - N    - global vector length (or PETSC_DETERMINE to have calculated if n is given)

 67:   Output Parameter:
 68: . v - the vector

 70:   Notes:
 71:   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the same type as an
 72:   existing vector.

 74:   This function may initialize PetscDevice, which may incur a device synchronization.

 76:   Level: intermediate

 78: .seealso: VecCreateMPICUDAWithArray(), VecCreateMPICUDAWithArrays(), VecCreateSeqCUDA(),
 79: VecCreateSeq(), VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
 80: VecCreateGhost(), VecCreateMPIWithArray(), VecCreateGhostWithArray(), VecMPISetGhost()
 81: @*/
 82: PetscErrorCode VecCreateMPICUDA(MPI_Comm comm, PetscInt n, PetscInt N, Vec *v)
 83: {
 84:   PetscFunctionBegin;
 86:   PetscCall(VecMPI_CUDA.creatempicupm(comm, 0, n, N, v, PETSC_TRUE));
 87:   PetscFunctionReturn(PETSC_SUCCESS);
 88: }

 90: /*@C
 91:   VecCreateMPICUDAWithArrays - Creates a parallel, array-style vector using CUDA, where the
 92:   user provides the complete array space to store the vector values.

 94:   Collective, Possibly Synchronous

 96:   Input Parameters:
 97: + comm     - the MPI communicator to use
 98: . bs       - block size, same meaning as VecSetBlockSize()
 99: . n        - local vector length, cannot be PETSC_DECIDE
100: . N        - global vector length (or PETSC_DECIDE to have calculated)
101: . cpuarray - CPU memory where the vector elements are to be stored (or NULL)
102: - gpuarray - GPU memory where the vector elements are to be stored (or NULL)

104:   Output Parameter:
105: . v - the vector

107:   Notes:
108:   See VecCreateSeqCUDAWithArrays() for further discussion, this routine shares identical
109:   semantics.

111:   Level: intermediate

113: .seealso: VecCreateMPICUDA(), VecCreateSeqCUDAWithArrays(), VecCreateMPIWithArray(),
114: VecCreateSeqWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
115: VecCreateMPI(), VecCreateGhostWithArray(), VecPlaceArray()
116: @*/
117: PetscErrorCode VecCreateMPICUDAWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar cpuarray[], const PetscScalar gpuarray[], Vec *v)
118: {
119:   PetscFunctionBegin;
122:   PetscCall(VecMPI_CUDA.creatempicupmwitharrays(comm, bs, n, N, cpuarray, gpuarray, v));
123:   PetscFunctionReturn(PETSC_SUCCESS);
124: }

126: /*@C
127:   VecCreateMPICUDAWithArray - Creates a parallel, array-style vector using CUDA, where the
128:   user provides the device array space to store the vector values.
129:   Collective

131:   Input Parameters:
132: + comm  - the MPI communicator to use
133: . bs    - block size, same meaning as VecSetBlockSize()
134: . n     - local vector length, cannot be PETSC_DECIDE
135: . N     - global vector length (or PETSC_DECIDE to have calculated)
136: - gpuarray - the user provided GPU array to store the vector values

138:   Output Parameter:
139: . v - the vector

141:   Notes:
142:   See VecCreateSeqCUDAWithArray() for further discussion, this routine shares identical
143:   semantics.

145:   Level: intermediate

147: .seealso: VecCreateMPICUDA(), VecCreateSeqCUDAWithArray(), VecCreateMPIWithArray(),
148: VecCreateSeqWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
149: VecCreateMPI(), VecCreateGhostWithArray(), VecPlaceArray()
150: @*/
151: PetscErrorCode VecCreateMPICUDAWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar gpuarray[], Vec *v)
152: {
153:   PetscFunctionBegin;
154:   PetscCall(VecCreateMPICUDAWithArrays(comm, bs, n, N, nullptr, gpuarray, v));
155:   PetscFunctionReturn(PETSC_SUCCESS);
156: }