Actual source code: cuspvecimpl.h

petsc-3.5.4 2015-05-23
Report Typos and Errors
4: #include <petsccusp.h> 5: #include <petsc-private/vecimpl.h> 7: #include <algorithm> 8: #include <vector> 9: #include <string> 11: #include <cublas.h> 12: #include <cusp/blas.h> 13: #include <thrust/host_vector.h> 14: #include <thrust/device_vector.h> 15: #include <thrust/iterator/constant_iterator.h> 16: #include <thrust/transform.h> 17: #include <thrust/iterator/permutation_iterator.h> 19: #define CUSPARRAY cusp::array1d<PetscScalar,cusp::device_memory> 20: #define CUSPARRAYCPU cusp::array1d<PetscScalar,cusp::host_memory> 21: #define CUSPINTARRAYGPU cusp::array1d<PetscInt,cusp::device_memory> 22: #define CUSPINTARRAYCPU cusp::array1d<PetscInt,cusp::host_memory> 24: PETSC_INTERN PetscErrorCode VecDotNorm2_SeqCUSP(Vec,Vec,PetscScalar*, PetscScalar*); 25: PETSC_INTERN PetscErrorCode VecPointwiseDivide_SeqCUSP(Vec,Vec,Vec); 26: PETSC_INTERN PetscErrorCode VecWAXPY_SeqCUSP(Vec,PetscScalar,Vec,Vec); 27: PETSC_INTERN PetscErrorCode VecMDot_SeqCUSP(Vec,PetscInt,const Vec[],PetscScalar*); 28: PETSC_INTERN PetscErrorCode VecSet_SeqCUSP(Vec,PetscScalar); 29: PETSC_INTERN PetscErrorCode VecMAXPY_SeqCUSP(Vec,PetscInt,const PetscScalar*,Vec*); 30: PETSC_INTERN PetscErrorCode VecAXPBYPCZ_SeqCUSP(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec); 31: PETSC_INTERN PetscErrorCode VecPointwiseMult_SeqCUSP(Vec,Vec,Vec); 32: PETSC_INTERN PetscErrorCode VecPlaceArray_SeqCUSP(Vec,const PetscScalar*); 33: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUSP(Vec); 34: PETSC_INTERN PetscErrorCode VecReplaceArray_SeqCUSP(Vec,const PetscScalar*); 35: PETSC_INTERN PetscErrorCode VecDot_SeqCUSP(Vec,Vec,PetscScalar*); 36: PETSC_INTERN PetscErrorCode VecTDot_SeqCUSP(Vec,Vec,PetscScalar*); 37: PETSC_INTERN PetscErrorCode VecScale_SeqCUSP(Vec,PetscScalar); 38: PETSC_INTERN PetscErrorCode VecCopy_SeqCUSP(Vec,Vec); 39: PETSC_INTERN PetscErrorCode VecSwap_SeqCUSP(Vec,Vec); 40: PETSC_INTERN PetscErrorCode VecAXPY_SeqCUSP(Vec,PetscScalar,Vec); 41: PETSC_INTERN PetscErrorCode VecAXPBY_SeqCUSP(Vec,PetscScalar,PetscScalar,Vec); 42: PETSC_INTERN PetscErrorCode VecDuplicate_SeqCUSP(Vec,Vec*); 43: PETSC_INTERN PetscErrorCode VecNorm_SeqCUSP(Vec,NormType,PetscReal*); 44: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU(Vec); 45: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck(Vec); 46: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheckHost(Vec); 47: PETSC_EXTERN PetscErrorCode VecCreate_SeqCUSP(Vec); 48: PETSC_INTERN PetscErrorCode VecView_Seq(Vec,PetscViewer); 49: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUSP(Vec); 50: PETSC_INTERN PetscErrorCode VecAYPX_SeqCUSP(Vec,PetscScalar,Vec); 51: PETSC_INTERN PetscErrorCode VecSetRandom_SeqCUSP(Vec,PetscRandom); 53: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU_Public(Vec); 54: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck_Public(Vec); 56: #define CHKERRCUSP(err) if (((int)err) != (int)CUBLAS_STATUS_SUCCESS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error %d",err) 58: #define VecCUSPCastToRawPtr(x) thrust::raw_pointer_cast(&(x)[0]) 60: #define WaitForGPU() PetscCUSPSynchronize ? cudaThreadSynchronize() : 0 62: struct Vec_CUSP { 63: CUSPARRAY *GPUarray; /* this always holds the GPU data */ 64: cudaStream_t stream; /* A stream for doing asynchronous data transfers */ 65: PetscBool hostDataRegisteredAsPageLocked; 66: }; 68: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt, PetscInt*,PetscInt, PetscInt*,PetscCUSPIndices*); 69: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUSPIndices*); 70: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesDestroy(PetscCUSPIndices*); 71: PETSC_INTERN PetscErrorCode VecScatterCUSP_StoS(Vec,Vec,PetscCUSPIndices,InsertMode,ScatterMode); 73: typedef enum {VEC_SCATTER_CUSP_STOS, VEC_SCATTER_CUSP_PTOP} VecCUSPScatterType; 74: typedef enum {VEC_SCATTER_CUSP_GENERAL, VEC_SCATTER_CUSP_STRIDED} VecCUSPSequentialScatterMode; 76: struct _p_VecScatterCUSPIndices_PtoP { 77: PetscInt ns; 78: PetscInt sendLowestIndex; 79: PetscInt nr; 80: PetscInt recvLowestIndex; 81: }; 83: struct _p_VecScatterCUSPIndices_StoS { 84: /* from indices data */ 85: PetscInt *fslots; 86: PetscInt fromFirst; 87: PetscInt fromStep; 88: VecCUSPSequentialScatterMode fromMode; 90: /* to indices data */ 91: PetscInt *tslots; 92: PetscInt toFirst; 93: PetscInt toStep; 94: VecCUSPSequentialScatterMode toMode; 96: PetscInt n; 97: PetscInt MAX_BLOCKS; 98: PetscInt MAX_CORESIDENT_THREADS; 99: cudaStream_t stream; 100: }; 102: struct _p_PetscCUSPIndices { 103: void * scatter; 104: VecCUSPScatterType scatterType; 105: }; 107: #endif