Actual source code: vecmpicupm.cu
1: #include "../vecmpicupm.hpp" /*I <petscvec.h> I*/
3: using namespace Petsc::vec::cupm;
4: using Petsc::device::cupm::DeviceType;
6: static constexpr auto VecMPI_CUDA = impl::VecMPI_CUPM<DeviceType::CUDA>{};
8: /*MC
9: VECCUDA - VECCUDA = "cuda" - A VECSEQCUDA on a single-process communicator, and VECMPICUDA
10: otherwise.
12: Options Database Keys:
13: . -vec_type cuda - sets the vector type to VECCUDA during a call to VecSetFromOptions()
15: Level: beginner
17: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECSEQCUDA,
18: VECMPICUDA, VECSTANDARD, VecType, VecCreateMPI(), VecSetPinnedMemoryMin()
19: M*/
21: /*MC
22: VECMPICUDA - VECMPICUDA = "mpicuda" - The basic parallel vector, modified to use CUDA
24: Options Database Keys:
25: . -vec_type mpicuda - sets the vector type to VECMPICUDA during a call to VecSetFromOptions()
27: Level: beginner
29: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECMPI,
30: VecType, VecCreateMPI(), VecSetPinnedMemoryMin()
31: M*/
33: PetscErrorCode VecCreate_CUDA(Vec v)
34: {
35: PetscFunctionBegin;
36: PetscCall(VecMPI_CUDA.Create_CUPM(v));
37: PetscFunctionReturn(PETSC_SUCCESS);
38: }
40: PetscErrorCode VecCreate_MPICUDA(Vec v)
41: {
42: PetscFunctionBegin;
43: PetscCall(VecMPI_CUDA.Create(v));
44: PetscFunctionReturn(PETSC_SUCCESS);
45: }
47: PetscErrorCode VecCUDAGetArrays_Private(Vec v, const PetscScalar **host_array, const PetscScalar **device_array, PetscOffloadMask *mask)
48: {
49: PetscDeviceContext dctx;
51: PetscFunctionBegin;
53: PetscCall(PetscDeviceContextGetCurrentContextAssertType_Internal(&dctx, PETSC_DEVICE_CUDA));
54: PetscCall(VecMPI_CUDA.GetArrays_CUPMBase(v, host_array, device_array, mask, dctx));
55: PetscFunctionReturn(PETSC_SUCCESS);
56: }
58: /*@
59: VecCreateMPICUDA - Creates a standard, parallel, array-style vector for CUDA devices.
61: Collective, Possibly Synchronous
63: Input Parameters:
64: + comm - the MPI communicator to use
65: . n - local vector length (or PETSC_DECIDE to have calculated if N is given)
66: - N - global vector length (or PETSC_DETERMINE to have calculated if n is given)
68: Output Parameter:
69: . v - the vector
71: Notes:
72: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the same type as an
73: existing vector.
75: This function may initialize PetscDevice, which may incur a device synchronization.
77: Level: intermediate
79: .seealso: VecCreateMPICUDAWithArray(), VecCreateMPICUDAWithArrays(), VecCreateSeqCUDA(),
80: VecCreateSeq(), VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
81: VecCreateGhost(), VecCreateMPIWithArray(), VecCreateGhostWithArray(), VecMPISetGhost()
82: @*/
83: PetscErrorCode VecCreateMPICUDA(MPI_Comm comm, PetscInt n, PetscInt N, Vec *v)
84: {
85: PetscFunctionBegin;
87: PetscCall(VecCreateMPICUPMAsync<DeviceType::CUDA>(comm, n, N, v));
88: PetscFunctionReturn(PETSC_SUCCESS);
89: }
91: /*@C
92: VecCreateMPICUDAWithArrays - Creates a parallel, array-style vector using CUDA, where the
93: user provides the complete array space to store the vector values.
95: Collective, Possibly Synchronous
97: Input Parameters:
98: + comm - the MPI communicator to use
99: . bs - block size, same meaning as VecSetBlockSize()
100: . n - local vector length, cannot be PETSC_DECIDE
101: . N - global vector length (or PETSC_DECIDE to have calculated)
102: . cpuarray - CPU memory where the vector elements are to be stored (or NULL)
103: - gpuarray - GPU memory where the vector elements are to be stored (or NULL)
105: Output Parameter:
106: . v - the vector
108: Notes:
109: See VecCreateSeqCUDAWithArrays() for further discussion, this routine shares identical
110: semantics.
112: Level: intermediate
114: .seealso: VecCreateMPICUDA(), VecCreateSeqCUDAWithArrays(), VecCreateMPIWithArray(),
115: VecCreateSeqWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
116: VecCreateMPI(), VecCreateGhostWithArray(), VecPlaceArray()
117: @*/
118: PetscErrorCode VecCreateMPICUDAWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar cpuarray[], const PetscScalar gpuarray[], Vec *v)
119: {
120: PetscFunctionBegin;
121: PetscCall(VecCreateMPICUPMWithArrays<DeviceType::CUDA>(comm, bs, n, N, cpuarray, gpuarray, v));
122: PetscFunctionReturn(PETSC_SUCCESS);
123: }
125: /*@C
126: VecCreateMPICUDAWithArray - Creates a parallel, array-style vector using CUDA, where the
127: user provides the device array space to store the vector values.
128: Collective
130: Input Parameters:
131: + comm - the MPI communicator to use
132: . bs - block size, same meaning as VecSetBlockSize()
133: . n - local vector length, cannot be PETSC_DECIDE
134: . N - global vector length (or PETSC_DECIDE to have calculated)
135: - gpuarray - the user provided GPU array to store the vector values
137: Output Parameter:
138: . v - the vector
140: Notes:
141: See VecCreateSeqCUDAWithArray() for further discussion, this routine shares identical
142: semantics.
144: Level: intermediate
146: .seealso: VecCreateMPICUDA(), VecCreateSeqCUDAWithArray(), VecCreateMPIWithArray(),
147: VecCreateSeqWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
148: VecCreateMPI(), VecCreateGhostWithArray(), VecPlaceArray()
149: @*/
150: PetscErrorCode VecCreateMPICUDAWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar gpuarray[], Vec *v)
151: {
152: PetscFunctionBegin;
153: PetscCall(VecCreateMPICUPMWithArray<DeviceType::CUDA>(comm, bs, n, N, gpuarray, v));
154: PetscFunctionReturn(PETSC_SUCCESS);
155: }