Actual source code: cuspvecimpl.h
1: #ifndef __CUSPVECIMPL
4: #include <private/vecimpl.h>
5: #include <cublas.h>
6: #include <cusp/blas.h>
7: #include <thrust/device_vector.h>
8: #include <thrust/iterator/constant_iterator.h>
9: #include <thrust/transform.h>
10: #include <thrust/iterator/permutation_iterator.h>
12: #define CUSPARRAY cusp::array1d<PetscScalar,cusp::device_memory>
13: #define CUSPINTARRAYGPU cusp::array1d<PetscInt,cusp::device_memory>
14: #define CUSPINTARRAYCPU cusp::array1d<PetscInt,cusp::host_memory>
16: extern PetscErrorCode VecDotNorm2_SeqCUSP(Vec,Vec,PetscScalar *, PetscScalar *);
17: extern PetscErrorCode VecPointwiseDivide_SeqCUSP(Vec,Vec,Vec);
18: extern PetscErrorCode VecWAXPY_SeqCUSP(Vec,PetscScalar,Vec,Vec);
19: extern PetscErrorCode VecMDot_SeqCUSP(Vec,PetscInt,const Vec[],PetscScalar *);
20: extern PetscErrorCode VecSet_SeqCUSP(Vec,PetscScalar);
21: extern PetscErrorCode VecMAXPY_SeqCUSP(Vec,PetscInt,const PetscScalar *,Vec *);
22: extern PetscErrorCode VecAXPBYPCZ_SeqCUSP(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec);
23: extern PetscErrorCode VecPointwiseMult_SeqCUSP(Vec,Vec,Vec);
24: extern PetscErrorCode VecPlaceArray_SeqCUSP(Vec,const PetscScalar *);
25: extern PetscErrorCode VecResetArray_SeqCUSP(Vec);
26: extern PetscErrorCode VecReplaceArray_SeqCUSP(Vec,const PetscScalar *);
27: extern PetscErrorCode VecDot_SeqCUSP(Vec,Vec,PetscScalar *);
28: extern PetscErrorCode VecTDot_SeqCUSP(Vec,Vec,PetscScalar *);
29: extern PetscErrorCode VecScale_SeqCUSP(Vec,PetscScalar);
30: extern PetscErrorCode VecCopy_SeqCUSP(Vec,Vec);
31: extern PetscErrorCode VecSwap_SeqCUSP(Vec,Vec);
32: extern PetscErrorCode VecAXPY_SeqCUSP(Vec,PetscScalar,Vec);
33: extern PetscErrorCode VecAXPBY_SeqCUSP(Vec,PetscScalar,PetscScalar,Vec);
34: extern PetscErrorCode VecDuplicate_SeqCUSP(Vec,Vec *);
35: extern PetscErrorCode VecNorm_SeqCUSP(Vec,NormType,PetscReal*);
36: EXTERN_C_BEGIN
37: extern PetscErrorCode VecCreate_SeqCUSP(Vec);
38: EXTERN_C_END
39: extern PetscErrorCode VecView_Seq(Vec,PetscViewer);
40: extern PetscErrorCode VecDestroy_SeqCUSP(Vec);
41: extern PetscErrorCode VecAYPX_SeqCUSP(Vec,PetscScalar,Vec);
42: extern PetscErrorCode VecSetRandom_SeqCUSP(Vec,PetscRandom);
44: extern PetscErrorCode VecCUSPCopyToGPU_Public(Vec);
45: extern PetscErrorCode VecCUSPAllocateCheck_Public(Vec);
46: extern PetscErrorCode VecCUSPCopyToGPUSome_Public(Vec,CUSPINTARRAYCPU*,CUSPINTARRAYGPU*);
48: extern PetscBool synchronizeCUSP;
49: #define CHKERRCUSP(err) if (((int)err) != (int)CUBLAS_STATUS_SUCCESS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error %d",err)
51: #define VecCUSPCastToRawPtr(x) thrust::raw_pointer_cast(&(x)[0])
53: #define WaitForGPU() synchronizeCUSP ? cudaThreadSynchronize() : 0
55: struct Vec_CUSP {
56: /* eventually we should probably move the GPU flag into here */
57: CUSPARRAY* GPUarray; /* this always holds the GPU data */
58: };
62: PETSC_STATIC_INLINE PetscErrorCode VecCUSPAllocateCheck(Vec v)
63: {
65: Vec_Seq *s = (Vec_Seq*)v->data;;
68: if (v->valid_GPU_array == PETSC_CUSP_UNALLOCATED){
69: try {
70: v->spptr = new Vec_CUSP;
71: ((Vec_CUSP*)v->spptr)->GPUarray = new CUSPARRAY;
72: ((Vec_CUSP*)v->spptr)->GPUarray->resize((PetscBLASInt)v->map->n);
73: WaitForGPU();
74: } catch(char* ex) {
75: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
76: }
77: if (s->array == 0){
78: v->valid_GPU_array = PETSC_CUSP_GPU;
79: } else{
80: v->valid_GPU_array = PETSC_CUSP_CPU;
81: }
82: }
83: return(0);
84: }
89: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
90: PETSC_STATIC_INLINE PetscErrorCode VecCUSPCopyToGPU(Vec v)
91: {
92: PetscBLASInt cn = v->map->n;
96: VecCUSPAllocateCheck(v);
97: if (v->valid_GPU_array == PETSC_CUSP_CPU){
98: PetscLogEventBegin(VEC_CUSPCopyToGPU,v,0,0,0);
99: try{
100: ((Vec_CUSP*)v->spptr)->GPUarray->assign(*(PetscScalar**)v->data,*(PetscScalar**)v->data + cn);
101: WaitForGPU();
102: } catch(char* ex) {
103: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
104: }
105: PetscLogEventEnd(VEC_CUSPCopyToGPU,v,0,0,0);
106: v->valid_GPU_array = PETSC_CUSP_BOTH;
107: }
108: return(0);
109: }
113: PETSC_STATIC_INLINE PetscErrorCode VecCUSPCopyToGPUSome(Vec v,CUSPINTARRAYCPU *indicesCPU,CUSPINTARRAYGPU *indicesGPU)
114: {
115: Vec_Seq *s = (Vec_Seq *)v->data;
119: VecCUSPAllocateCheck(v);
120: if (v->valid_GPU_array == PETSC_CUSP_CPU) {
121: PetscLogEventBegin(VEC_CUSPCopyToGPUSome,v,0,0,0);
122: thrust::copy(thrust::make_permutation_iterator(s->array,indicesCPU->begin()),
123: thrust::make_permutation_iterator(s->array,indicesCPU->end()),
124: thrust::make_permutation_iterator(((Vec_CUSP *)v->spptr)->GPUarray->begin(),indicesGPU->begin()));
125: PetscLogEventEnd(VEC_CUSPCopyToGPUSome,v,0,0,0);
126: }
127: v->valid_GPU_array = PETSC_CUSP_GPU;
128: return(0);
129: }
133: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayReadWrite(Vec v, CUSPARRAY** a)
134: {
138: *a = 0;
139: VecCUSPCopyToGPU(v);
140: *a = ((Vec_CUSP *)v->spptr)->GPUarray;
141: return(0);
142: }
146: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayReadWrite(Vec v, CUSPARRAY** a)
147: {
151: if (v->valid_GPU_array != PETSC_CUSP_UNALLOCATED){
152: v->valid_GPU_array = PETSC_CUSP_GPU;
153: }
154: PetscObjectStateIncrease((PetscObject)v);
155: return(0);
156: }
160: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayRead(Vec v, CUSPARRAY** a)
161: {
165: *a = 0;
166: VecCUSPCopyToGPU(v);
167: *a = ((Vec_CUSP *)v->spptr)->GPUarray;
168: return(0);
169: }
173: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayRead(Vec v, CUSPARRAY** a)
174: {
176: return(0);
177: }
181: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayWrite(Vec v, CUSPARRAY** a)
182: {
186: *a = 0;
187: VecCUSPAllocateCheck(v);
188: *a = ((Vec_CUSP *)v->spptr)->GPUarray;
189: return(0);
190: }
194: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayWrite(Vec v, CUSPARRAY** a)
195: {
199: if (v->valid_GPU_array != PETSC_CUSP_UNALLOCATED){
200: v->valid_GPU_array = PETSC_CUSP_GPU;
201: }
202: PetscObjectStateIncrease((PetscObject)v);
203: return(0);
204: }
205: #endif