Actual source code: vecviennacl.cxx
petsc-master 2020-08-25
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc/private/vecimpl.h>
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include <vector>
12: #include "viennacl/linalg/inner_prod.hpp"
13: #include "viennacl/linalg/norm_1.hpp"
14: #include "viennacl/linalg/norm_2.hpp"
15: #include "viennacl/linalg/norm_inf.hpp"
17: #ifdef VIENNACL_WITH_OPENCL
18: #include "viennacl/ocl/backend.hpp"
19: #endif
22: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
23: {
28: *a = 0;
29: VecViennaCLCopyToGPU(v);
30: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
31: ViennaCLWaitForGPU();
32: return(0);
33: }
35: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
36: {
41: v->offloadmask = PETSC_OFFLOAD_GPU;
43: PetscObjectStateIncrease((PetscObject)v);
44: return(0);
45: }
47: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
48: {
53: *a = 0;
54: VecViennaCLCopyToGPU(v);
55: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
56: ViennaCLWaitForGPU();
57: return(0);
58: }
60: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
61: {
64: return(0);
65: }
67: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
68: {
73: *a = 0;
74: VecViennaCLAllocateCheck(v);
75: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
76: ViennaCLWaitForGPU();
77: return(0);
78: }
80: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
81: {
86: v->offloadmask = PETSC_OFFLOAD_GPU;
88: PetscObjectStateIncrease((PetscObject)v);
89: return(0);
90: }
94: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
95: {
96: PetscErrorCode ierr;
97: char string[20];
98: PetscBool flg,flg_cuda,flg_opencl,flg_openmp;
101: /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
102: PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,sizeof(string),&flg);
103: if (flg) {
104: try {
105: PetscStrcasecmp(string,"cuda",&flg_cuda);
106: PetscStrcasecmp(string,"opencl",&flg_opencl);
107: PetscStrcasecmp(string,"openmp",&flg_openmp);
109: /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
110: if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
111: #if defined(PETSC_HAVE_CUDA)
112: else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
113: #endif
114: #if defined(PETSC_HAVE_OPENCL)
115: else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
116: #endif
117: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.\n", string);
118: } catch (std::exception const & ex) {
119: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
120: }
121: }
123: #if defined(PETSC_HAVE_OPENCL)
124: /* ViennaCL OpenCL device type configuration */
125: PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,sizeof(string),&flg);
126: if (flg) {
127: try {
128: PetscStrcasecmp(string,"cpu",&flg);
129: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
131: PetscStrcasecmp(string,"gpu",&flg);
132: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
134: PetscStrcasecmp(string,"accelerator",&flg);
135: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
136: } catch (std::exception const & ex) {
137: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
138: }
139: }
140: #endif
142: /* Print available backends */
143: PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);
144: if (flg) {
145: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
146: #if defined(PETSC_HAVE_CUDA)
147: PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
148: #endif
149: #if defined(PETSC_HAVE_OPENCL)
150: PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
151: #endif
152: #if defined(PETSC_HAVE_OPENMP)
153: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
154: #else
155: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
156: #endif
157: PetscPrintf(PETSC_COMM_WORLD, "\n");
159: /* Print selected backends */
160: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: ");
161: #if defined(PETSC_HAVE_CUDA)
162: if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
163: PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
164: }
165: #endif
166: #if defined(PETSC_HAVE_OPENCL)
167: if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
168: PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
169: }
170: #endif
171: #if defined(PETSC_HAVE_OPENMP)
172: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
173: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
174: }
175: #else
176: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
177: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
178: }
179: #endif
180: PetscPrintf(PETSC_COMM_WORLD, "\n");
181: }
182: return(0);
183: }
185: /*
186: Allocates space for the vector array on the Host if it does not exist.
187: Does NOT change the PetscViennaCLFlag for the vector
188: Does NOT zero the ViennaCL array
189: */
190: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
191: {
193: PetscScalar *array;
194: Vec_Seq *s;
195: PetscInt n = v->map->n;
198: s = (Vec_Seq*)v->data;
199: VecViennaCLAllocateCheck(v);
200: if (s->array == 0) {
201: PetscMalloc1(n,&array);
202: PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
203: s->array = array;
204: s->array_allocated = array;
205: }
206: return(0);
207: }
210: /*
211: Allocates space for the vector array on the GPU if it does not exist.
212: Does NOT change the PetscViennaCLFlag for the vector
213: Does NOT zero the ViennaCL array
215: */
216: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
217: {
219: if (!v->spptr) {
220: try {
221: v->spptr = new Vec_ViennaCL;
222: ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
223: ((Vec_ViennaCL*)v->spptr)->GPUarray = ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
225: } catch(std::exception const & ex) {
226: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
227: }
228: }
229: return(0);
230: }
233: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
234: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
235: {
240: VecViennaCLAllocateCheck(v);
241: if (v->map->n > 0) {
242: if (v->offloadmask == PETSC_OFFLOAD_CPU) {
243: PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
244: try {
245: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
246: viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
247: ViennaCLWaitForGPU();
248: } catch(std::exception const & ex) {
249: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
250: }
251: PetscLogCpuToGpu((v->map->n)*sizeof(PetscScalar));
252: PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
253: v->offloadmask = PETSC_OFFLOAD_BOTH;
254: }
255: }
256: return(0);
257: }
261: /*
262: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
263: */
264: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
265: {
270: VecViennaCLAllocateCheckHost(v);
271: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
272: PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
273: try {
274: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
275: viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
276: ViennaCLWaitForGPU();
277: } catch(std::exception const & ex) {
278: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
279: }
280: PetscLogGpuToCpu((v->map->n)*sizeof(PetscScalar));
281: PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
282: v->offloadmask = PETSC_OFFLOAD_BOTH;
283: }
284: return(0);
285: }
288: /* Copy on CPU */
289: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
290: {
291: PetscScalar *ya;
292: const PetscScalar *xa;
293: PetscErrorCode ierr;
296: VecViennaCLAllocateCheckHost(xin);
297: VecViennaCLAllocateCheckHost(yin);
298: if (xin != yin) {
299: VecGetArrayRead(xin,&xa);
300: VecGetArray(yin,&ya);
301: PetscArraycpy(ya,xa,xin->map->n);
302: VecRestoreArrayRead(xin,&xa);
303: VecRestoreArray(yin,&ya);
304: }
305: return(0);
306: }
308: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
309: {
311: PetscInt n = xin->map->n,i;
312: PetscScalar *xx;
315: VecGetArray(xin,&xx);
316: for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
317: VecRestoreArray(xin,&xx);
318: return(0);
319: }
321: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
322: {
323: Vec_Seq *vs = (Vec_Seq*)v->data;
327: PetscObjectSAWsViewOff(v);
328: #if defined(PETSC_USE_LOG)
329: PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
330: #endif
331: if (vs->array_allocated) { PetscFree(vs->array_allocated); }
332: PetscFree(vs);
333: return(0);
334: }
336: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
337: {
338: Vec_Seq *v = (Vec_Seq*)vin->data;
341: v->array = v->unplacedarray;
342: v->unplacedarray = 0;
343: return(0);
344: }
347: /*MC
348: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
350: Options Database Keys:
351: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
353: Level: beginner
355: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
356: M*/
359: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
360: {
361: const ViennaCLVector *xgpu;
362: ViennaCLVector *ygpu;
363: PetscErrorCode ierr;
366: VecViennaCLGetArrayRead(xin,&xgpu);
367: VecViennaCLGetArray(yin,&ygpu);
368: PetscLogGpuTimeBegin();
369: try {
370: if (alpha != 0.0 && xin->map->n > 0) {
371: *ygpu = *xgpu + alpha * *ygpu;
372: PetscLogGpuFlops(2.0*yin->map->n);
373: } else {
374: *ygpu = *xgpu;
375: }
376: ViennaCLWaitForGPU();
377: } catch(std::exception const & ex) {
378: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
379: }
380: PetscLogGpuTimeEnd();
381: VecViennaCLRestoreArrayRead(xin,&xgpu);
382: VecViennaCLRestoreArray(yin,&ygpu);
383: return(0);
384: }
387: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
388: {
389: const ViennaCLVector *xgpu;
390: ViennaCLVector *ygpu;
391: PetscErrorCode ierr;
394: if (alpha != 0.0 && xin->map->n > 0) {
395: VecViennaCLGetArrayRead(xin,&xgpu);
396: VecViennaCLGetArray(yin,&ygpu);
397: PetscLogGpuTimeBegin();
398: try {
399: *ygpu += alpha * *xgpu;
400: ViennaCLWaitForGPU();
401: } catch(std::exception const & ex) {
402: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
403: }
404: PetscLogGpuTimeEnd();
405: VecViennaCLRestoreArrayRead(xin,&xgpu);
406: VecViennaCLRestoreArray(yin,&ygpu);
407: PetscLogGpuFlops(2.0*yin->map->n);
408: }
409: return(0);
410: }
413: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
414: {
415: const ViennaCLVector *xgpu,*ygpu;
416: ViennaCLVector *wgpu;
417: PetscErrorCode ierr;
420: if (xin->map->n > 0) {
421: VecViennaCLGetArrayRead(xin,&xgpu);
422: VecViennaCLGetArrayRead(yin,&ygpu);
423: VecViennaCLGetArrayWrite(win,&wgpu);
424: PetscLogGpuTimeBegin();
425: try {
426: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
427: ViennaCLWaitForGPU();
428: } catch(std::exception const & ex) {
429: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
430: }
431: PetscLogGpuTimeEnd();
432: PetscLogGpuFlops(win->map->n);
433: VecViennaCLRestoreArrayRead(xin,&xgpu);
434: VecViennaCLRestoreArrayRead(yin,&ygpu);
435: VecViennaCLRestoreArrayWrite(win,&wgpu);
436: }
437: return(0);
438: }
441: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
442: {
443: const ViennaCLVector *xgpu,*ygpu;
444: ViennaCLVector *wgpu;
445: PetscErrorCode ierr;
448: if (alpha == 0.0 && xin->map->n > 0) {
449: VecCopy_SeqViennaCL(yin,win);
450: } else {
451: VecViennaCLGetArrayRead(xin,&xgpu);
452: VecViennaCLGetArrayRead(yin,&ygpu);
453: VecViennaCLGetArrayWrite(win,&wgpu);
454: PetscLogGpuTimeBegin();
455: if (alpha == 1.0) {
456: try {
457: *wgpu = *ygpu + *xgpu;
458: } catch(std::exception const & ex) {
459: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
460: }
461: PetscLogGpuFlops(win->map->n);
462: } else if (alpha == -1.0) {
463: try {
464: *wgpu = *ygpu - *xgpu;
465: } catch(std::exception const & ex) {
466: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
467: }
468: PetscLogGpuFlops(win->map->n);
469: } else {
470: try {
471: *wgpu = *ygpu + alpha * *xgpu;
472: } catch(std::exception const & ex) {
473: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
474: }
475: PetscLogGpuFlops(2*win->map->n);
476: }
477: ViennaCLWaitForGPU();
478: PetscLogGpuTimeEnd();
479: VecViennaCLRestoreArrayRead(xin,&xgpu);
480: VecViennaCLRestoreArrayRead(yin,&ygpu);
481: VecViennaCLRestoreArrayWrite(win,&wgpu);
482: }
483: return(0);
484: }
487: /*
488: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
489: *
490: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
491: * hence there is an iterated Section 1.5 Writing Application Codes with PETSc of these until the final result is obtained
492: */
493: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
494: {
496: PetscInt j;
499: for (j = 0; j < nv; ++j) {
500: if (j+1 < nv) {
501: VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
502: ++j;
503: } else {
504: VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
505: }
506: }
507: ViennaCLWaitForGPU();
508: return(0);
509: }
512: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
513: {
514: const ViennaCLVector *xgpu,*ygpu;
515: PetscErrorCode ierr;
518: if (xin->map->n > 0) {
519: VecViennaCLGetArrayRead(xin,&xgpu);
520: VecViennaCLGetArrayRead(yin,&ygpu);
521: PetscLogGpuTimeBegin();
522: try {
523: *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
524: } catch(std::exception const & ex) {
525: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
526: }
527: ViennaCLWaitForGPU();
528: PetscLogGpuTimeEnd();
529: if (xin->map->n >0) {
530: PetscLogGpuFlops(2.0*xin->map->n-1);
531: }
532: VecViennaCLRestoreArrayRead(xin,&xgpu);
533: VecViennaCLRestoreArrayRead(yin,&ygpu);
534: } else *z = 0.0;
535: return(0);
536: }
540: /*
541: * Operation z[j] = dot(x, y[j])
542: *
543: * We use an iterated Section 1.5 Writing Application Codes with PETSc of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
544: */
545: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
546: {
547: PetscErrorCode ierr;
548: PetscInt n = xin->map->n,i;
549: const ViennaCLVector *xgpu,*ygpu;
550: Vec *yyin = (Vec*)yin;
551: std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
554: if (xin->map->n > 0) {
555: VecViennaCLGetArrayRead(xin,&xgpu);
556: for (i=0; i<nv; i++) {
557: VecViennaCLGetArrayRead(yyin[i],&ygpu);
558: ygpu_array[i] = ygpu;
559: }
560: PetscLogGpuTimeBegin();
561: viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
562: ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
563: viennacl::copy(result.begin(), result.end(), z);
564: for (i=0; i<nv; i++) {
565: VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
566: }
567: ViennaCLWaitForGPU();
568: PetscLogGpuTimeEnd();
569: VecViennaCLRestoreArrayRead(xin,&xgpu);
570: PetscLogGpuFlops(PetscMax(nv*(2.0*n-1),0.0));
571: } else {
572: for (i=0; i<nv; i++) z[i] = 0.0;
573: }
574: return(0);
575: }
577: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
578: {
582: /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
583: VecMDot_SeqViennaCL(xin,nv,yin,z);
584: ViennaCLWaitForGPU();
585: return(0);
586: }
589: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
590: {
591: ViennaCLVector *xgpu;
595: if (xin->map->n > 0) {
596: VecViennaCLGetArrayWrite(xin,&xgpu);
597: PetscLogGpuTimeBegin();
598: try {
599: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
600: ViennaCLWaitForGPU();
601: } catch(std::exception const & ex) {
602: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
603: }
604: PetscLogGpuTimeEnd();
605: VecViennaCLRestoreArrayWrite(xin,&xgpu);
606: }
607: return(0);
608: }
610: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
611: {
612: ViennaCLVector *xgpu;
616: if (alpha == 0.0 && xin->map->n > 0) {
617: VecSet_SeqViennaCL(xin,alpha);
618: PetscLogGpuFlops(xin->map->n);
619: } else if (alpha != 1.0 && xin->map->n > 0) {
620: VecViennaCLGetArray(xin,&xgpu);
621: PetscLogGpuTimeBegin();
622: try {
623: *xgpu *= alpha;
624: ViennaCLWaitForGPU();
625: } catch(std::exception const & ex) {
626: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
627: }
628: PetscLogGpuTimeEnd();
629: VecViennaCLRestoreArray(xin,&xgpu);
630: PetscLogGpuFlops(xin->map->n);
631: }
632: return(0);
633: }
636: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
637: {
641: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
642: VecDot_SeqViennaCL(xin, yin, z);
643: ViennaCLWaitForGPU();
644: return(0);
645: }
648: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
649: {
650: const ViennaCLVector *xgpu;
651: ViennaCLVector *ygpu;
652: PetscErrorCode ierr;
655: if (xin != yin && xin->map->n > 0) {
656: if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
657: VecViennaCLGetArrayRead(xin,&xgpu);
658: VecViennaCLGetArrayWrite(yin,&ygpu);
659: PetscLogGpuTimeBegin();
660: try {
661: *ygpu = *xgpu;
662: ViennaCLWaitForGPU();
663: } catch(std::exception const & ex) {
664: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
665: }
666: PetscLogGpuTimeEnd();
667: VecViennaCLRestoreArrayRead(xin,&xgpu);
668: VecViennaCLRestoreArrayWrite(yin,&ygpu);
670: } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
671: /* copy in CPU if we are on the CPU*/
672: VecCopy_SeqViennaCL_Private(xin,yin);
673: ViennaCLWaitForGPU();
674: } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
675: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
676: if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
677: /* copy in CPU */
678: VecCopy_SeqViennaCL_Private(xin,yin);
679: ViennaCLWaitForGPU();
680: } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
681: /* copy in GPU */
682: VecViennaCLGetArrayRead(xin,&xgpu);
683: VecViennaCLGetArrayWrite(yin,&ygpu);
684: PetscLogGpuTimeBegin();
685: try {
686: *ygpu = *xgpu;
687: ViennaCLWaitForGPU();
688: } catch(std::exception const & ex) {
689: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
690: }
691: PetscLogGpuTimeEnd();
692: VecViennaCLRestoreArrayRead(xin,&xgpu);
693: VecViennaCLRestoreArrayWrite(yin,&ygpu);
694: } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
695: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
696: default to copy in GPU (this is an arbitrary choice) */
697: VecViennaCLGetArrayRead(xin,&xgpu);
698: VecViennaCLGetArrayWrite(yin,&ygpu);
699: PetscLogGpuTimeBegin();
700: try {
701: *ygpu = *xgpu;
702: ViennaCLWaitForGPU();
703: } catch(std::exception const & ex) {
704: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
705: }
706: PetscLogGpuTimeEnd();
707: VecViennaCLRestoreArrayRead(xin,&xgpu);
708: VecViennaCLRestoreArrayWrite(yin,&ygpu);
709: } else {
710: VecCopy_SeqViennaCL_Private(xin,yin);
711: ViennaCLWaitForGPU();
712: }
713: }
714: }
715: return(0);
716: }
719: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
720: {
722: ViennaCLVector *xgpu,*ygpu;
725: if (xin != yin && xin->map->n > 0) {
726: VecViennaCLGetArray(xin,&xgpu);
727: VecViennaCLGetArray(yin,&ygpu);
728: PetscLogGpuTimeBegin();
729: try {
730: viennacl::swap(*xgpu, *ygpu);
731: ViennaCLWaitForGPU();
732: } catch(std::exception const & ex) {
733: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
734: }
735: PetscLogGpuTimeEnd();
736: VecViennaCLRestoreArray(xin,&xgpu);
737: VecViennaCLRestoreArray(yin,&ygpu);
738: }
739: return(0);
740: }
743: // y = alpha * x + beta * y
744: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
745: {
746: PetscErrorCode ierr;
747: PetscScalar a = alpha,b = beta;
748: const ViennaCLVector *xgpu;
749: ViennaCLVector *ygpu;
752: if (a == 0.0 && xin->map->n > 0) {
753: VecScale_SeqViennaCL(yin,beta);
754: } else if (b == 1.0 && xin->map->n > 0) {
755: VecAXPY_SeqViennaCL(yin,alpha,xin);
756: } else if (a == 1.0 && xin->map->n > 0) {
757: VecAYPX_SeqViennaCL(yin,beta,xin);
758: } else if (b == 0.0 && xin->map->n > 0) {
759: VecViennaCLGetArrayRead(xin,&xgpu);
760: VecViennaCLGetArray(yin,&ygpu);
761: PetscLogGpuTimeBegin();
762: try {
763: *ygpu = *xgpu * alpha;
764: ViennaCLWaitForGPU();
765: } catch(std::exception const & ex) {
766: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
767: }
768: PetscLogGpuTimeEnd();
769: PetscLogGpuFlops(xin->map->n);
770: VecViennaCLRestoreArrayRead(xin,&xgpu);
771: VecViennaCLRestoreArray(yin,&ygpu);
772: } else if (xin->map->n > 0) {
773: VecViennaCLGetArrayRead(xin,&xgpu);
774: VecViennaCLGetArray(yin,&ygpu);
775: PetscLogGpuTimeBegin();
776: try {
777: *ygpu = *xgpu * alpha + *ygpu * beta;
778: ViennaCLWaitForGPU();
779: } catch(std::exception const & ex) {
780: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
781: }
782: PetscLogGpuTimeEnd();
783: VecViennaCLRestoreArrayRead(xin,&xgpu);
784: VecViennaCLRestoreArray(yin,&ygpu);
785: PetscLogGpuFlops(3.0*xin->map->n);
786: }
787: return(0);
788: }
791: /* operation z = alpha * x + beta *y + gamma *z*/
792: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
793: {
794: PetscErrorCode ierr;
795: PetscInt n = zin->map->n;
796: const ViennaCLVector *xgpu,*ygpu;
797: ViennaCLVector *zgpu;
800: VecViennaCLGetArrayRead(xin,&xgpu);
801: VecViennaCLGetArrayRead(yin,&ygpu);
802: VecViennaCLGetArray(zin,&zgpu);
803: if (alpha == 0.0 && xin->map->n > 0) {
804: PetscLogGpuTimeBegin();
805: try {
806: if (beta == 0.0) {
807: *zgpu = gamma * *zgpu;
808: ViennaCLWaitForGPU();
809: PetscLogGpuFlops(1.0*n);
810: } else if (gamma == 0.0) {
811: *zgpu = beta * *ygpu;
812: ViennaCLWaitForGPU();
813: PetscLogGpuFlops(1.0*n);
814: } else {
815: *zgpu = beta * *ygpu + gamma * *zgpu;
816: ViennaCLWaitForGPU();
817: PetscLogGpuFlops(3.0*n);
818: }
819: } catch(std::exception const & ex) {
820: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
821: }
822: PetscLogGpuTimeEnd();
823: PetscLogGpuFlops(3.0*n);
824: } else if (beta == 0.0 && xin->map->n > 0) {
825: PetscLogGpuTimeBegin();
826: try {
827: if (gamma == 0.0) {
828: *zgpu = alpha * *xgpu;
829: ViennaCLWaitForGPU();
830: PetscLogGpuFlops(1.0*n);
831: } else {
832: *zgpu = alpha * *xgpu + gamma * *zgpu;
833: ViennaCLWaitForGPU();
834: PetscLogGpuFlops(3.0*n);
835: }
836: } catch(std::exception const & ex) {
837: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
838: }
839: PetscLogGpuTimeEnd();
840: } else if (gamma == 0.0 && xin->map->n > 0) {
841: PetscLogGpuTimeBegin();
842: try {
843: *zgpu = alpha * *xgpu + beta * *ygpu;
844: ViennaCLWaitForGPU();
845: } catch(std::exception const & ex) {
846: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
847: }
848: PetscLogGpuTimeEnd();
849: PetscLogGpuFlops(3.0*n);
850: } else if (xin->map->n > 0) {
851: PetscLogGpuTimeBegin();
852: try {
853: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
854: if (gamma != 1.0)
855: *zgpu *= gamma;
856: *zgpu += alpha * *xgpu + beta * *ygpu;
857: ViennaCLWaitForGPU();
858: } catch(std::exception const & ex) {
859: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
860: }
861: PetscLogGpuTimeEnd();
862: VecViennaCLRestoreArray(zin,&zgpu);
863: VecViennaCLRestoreArrayRead(xin,&xgpu);
864: VecViennaCLRestoreArrayRead(yin,&ygpu);
865: PetscLogGpuFlops(5.0*n);
866: }
867: return(0);
868: }
870: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
871: {
872: PetscErrorCode ierr;
873: PetscInt n = win->map->n;
874: const ViennaCLVector *xgpu,*ygpu;
875: ViennaCLVector *wgpu;
878: if (xin->map->n > 0) {
879: VecViennaCLGetArrayRead(xin,&xgpu);
880: VecViennaCLGetArrayRead(yin,&ygpu);
881: VecViennaCLGetArray(win,&wgpu);
882: PetscLogGpuTimeBegin();
883: try {
884: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
885: ViennaCLWaitForGPU();
886: } catch(std::exception const & ex) {
887: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
888: }
889: PetscLogGpuTimeEnd();
890: VecViennaCLRestoreArrayRead(xin,&xgpu);
891: VecViennaCLRestoreArrayRead(yin,&ygpu);
892: VecViennaCLRestoreArray(win,&wgpu);
893: PetscLogGpuFlops(n);
894: }
895: return(0);
896: }
899: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
900: {
901: PetscErrorCode ierr;
902: PetscInt n = xin->map->n;
903: PetscBLASInt bn;
904: const ViennaCLVector *xgpu;
907: if (xin->map->n > 0) {
908: PetscBLASIntCast(n,&bn);
909: VecViennaCLGetArrayRead(xin,&xgpu);
910: if (type == NORM_2 || type == NORM_FROBENIUS) {
911: PetscLogGpuTimeBegin();
912: try {
913: *z = viennacl::linalg::norm_2(*xgpu);
914: ViennaCLWaitForGPU();
915: } catch(std::exception const & ex) {
916: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
917: }
918: PetscLogGpuTimeEnd();
919: PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
920: } else if (type == NORM_INFINITY) {
921: PetscLogGpuTimeBegin();
922: try {
923: *z = viennacl::linalg::norm_inf(*xgpu);
924: ViennaCLWaitForGPU();
925: } catch(std::exception const & ex) {
926: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
927: }
928: PetscLogGpuTimeEnd();
929: } else if (type == NORM_1) {
930: PetscLogGpuTimeBegin();
931: try {
932: *z = viennacl::linalg::norm_1(*xgpu);
933: ViennaCLWaitForGPU();
934: } catch(std::exception const & ex) {
935: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
936: }
937: PetscLogGpuTimeEnd();
938: PetscLogGpuFlops(PetscMax(n-1.0,0.0));
939: } else if (type == NORM_1_AND_2) {
940: PetscLogGpuTimeBegin();
941: try {
942: *z = viennacl::linalg::norm_1(*xgpu);
943: *(z+1) = viennacl::linalg::norm_2(*xgpu);
944: ViennaCLWaitForGPU();
945: } catch(std::exception const & ex) {
946: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
947: }
948: PetscLogGpuTimeEnd();
949: PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
950: PetscLogGpuFlops(PetscMax(n-1.0,0.0));
951: }
952: VecViennaCLRestoreArrayRead(xin,&xgpu);
953: } else if (type == NORM_1_AND_2) {
954: *z = 0.0;
955: *(z+1) = 0.0;
956: } else *z = 0.0;
957: return(0);
958: }
961: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
962: {
966: VecSetRandom_SeqViennaCL_Private(xin,r);
967: xin->offloadmask = PETSC_OFFLOAD_CPU;
968: return(0);
969: }
971: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
972: {
977: VecViennaCLCopyFromGPU(vin);
978: VecResetArray_SeqViennaCL_Private(vin);
979: vin->offloadmask = PETSC_OFFLOAD_CPU;
980: return(0);
981: }
983: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
984: {
989: VecViennaCLCopyFromGPU(vin);
990: VecPlaceArray_Seq(vin,a);
991: vin->offloadmask = PETSC_OFFLOAD_CPU;
992: return(0);
993: }
995: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
996: {
1001: VecViennaCLCopyFromGPU(vin);
1002: VecReplaceArray_Seq(vin,a);
1003: vin->offloadmask = PETSC_OFFLOAD_CPU;
1004: return(0);
1005: }
1008: /*@C
1009: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
1011: Collective
1013: Input Parameter:
1014: + comm - the communicator, should be PETSC_COMM_SELF
1015: - n - the vector length
1017: Output Parameter:
1018: . V - the vector
1020: Notes:
1021: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1022: same type as an existing vector.
1024: Level: intermediate
1026: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
1027: @*/
1028: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
1029: {
1033: VecCreate(comm,v);
1034: VecSetSizes(*v,n,n);
1035: VecSetType(*v,VECSEQVIENNACL);
1036: return(0);
1037: }
1040: /*@C
1041: VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
1042: where the user provides the array space to store the vector values.
1044: Collective
1046: Input Parameter:
1047: + comm - the communicator, should be PETSC_COMM_SELF
1048: . bs - the block size
1049: . n - the vector length
1050: - array - viennacl array where the vector elements are to be stored.
1052: Output Parameter:
1053: . V - the vector
1055: Notes:
1056: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1057: same type as an existing vector.
1059: If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
1060: at a later stage to SET the array for storing the vector values.
1062: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
1063: The user should not free the array until the vector is destroyed.
1065: Level: intermediate
1067: .seealso: VecCreateMPIViennaCLWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
1068: VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
1069: VecCreateMPIWithArray()
1070: @*/
1071: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const ViennaCLVector* array,Vec *V)
1072: {
1074: PetscMPIInt size;
1077: VecCreate(comm,V);
1078: VecSetSizes(*V,n,n);
1079: VecSetBlockSize(*V,bs);
1080: MPI_Comm_size(comm,&size);
1081: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
1082: VecCreate_SeqViennaCL_Private(*V,array);
1083: return(0);
1084: }
1086: /*@C
1087: VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1088: the one provided by the user. This is useful to avoid a copy.
1090: Not Collective
1092: Input Parameters:
1093: + vec - the vector
1094: - array - the ViennaCL vector
1096: Notes:
1097: You can return to the original viennacl vector with a call to
1098: VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1099: and VecPlaceArray() at the same time on the same vector.
1101: Level: intermediate
1103: .seealso: VecPlaceArray(), VecSetValues(), VecViennaCLResetArray(),
1104: VecCUDAPlaceArray(),
1106: @*/
1107: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin,const ViennaCLVector* a)
1108: {
1113: VecViennaCLCopyToGPU(vin);
1114: if (((Vec_Seq*)vin->data)->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1115: ((Vec_Seq*)vin->data)->unplacedarray = (PetscScalar *) ((Vec_ViennaCL*)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1116: ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector*)a;
1117: vin->offloadmask = PETSC_OFFLOAD_GPU;
1118: PetscObjectStateIncrease((PetscObject)vin);
1119: return(0);
1120: }
1122: /*@C
1123: VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1124: after the use of VecViennaCLPlaceArray().
1126: Not Collective
1128: Input Parameters:
1129: . vec - the vector
1131: Level: developer
1133: .seealso: VecViennaCLPlaceArray(), VecResetArray(), VecCUDAResetArray(), VecPlaceArray()
1134: @*/
1135: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1136: {
1141: VecViennaCLCopyToGPU(vin);
1142: ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector *) ((Vec_Seq*)vin->data)->unplacedarray;
1143: ((Vec_Seq*)vin->data)->unplacedarray = 0;
1144: vin->offloadmask = PETSC_OFFLOAD_GPU;
1145: PetscObjectStateIncrease((PetscObject)vin);
1146: return(0);
1147: }
1150: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1151: *
1152: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1153: */
1154: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1155: {
1156: PetscErrorCode ierr;
1159: VecDot_SeqViennaCL(s,t,dp);
1160: VecNorm_SeqViennaCL(t,NORM_2,nm);
1161: *nm *= *nm; //squared norm required
1162: return(0);
1163: }
1165: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1166: {
1170: VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1171: PetscLayoutReference(win->map,&(*V)->map);
1172: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1173: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1174: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1175: return(0);
1176: }
1178: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1179: {
1183: try {
1184: if (v->spptr) {
1185: delete ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
1186: delete (Vec_ViennaCL*) v->spptr;
1187: }
1188: } catch(char *ex) {
1189: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1190: }
1191: VecDestroy_SeqViennaCL_Private(v);
1192: return(0);
1193: }
1195: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)
1196: {
1200: V->boundtocpu = flg;
1201: if (flg) {
1202: VecViennaCLCopyFromGPU(V);
1203: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1204: V->ops->dot = VecDot_Seq;
1205: V->ops->norm = VecNorm_Seq;
1206: V->ops->tdot = VecTDot_Seq;
1207: V->ops->scale = VecScale_Seq;
1208: V->ops->copy = VecCopy_Seq;
1209: V->ops->set = VecSet_Seq;
1210: V->ops->swap = VecSwap_Seq;
1211: V->ops->axpy = VecAXPY_Seq;
1212: V->ops->axpby = VecAXPBY_Seq;
1213: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
1214: V->ops->pointwisemult = VecPointwiseMult_Seq;
1215: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1216: V->ops->setrandom = VecSetRandom_Seq;
1217: V->ops->dot_local = VecDot_Seq;
1218: V->ops->tdot_local = VecTDot_Seq;
1219: V->ops->norm_local = VecNorm_Seq;
1220: V->ops->mdot_local = VecMDot_Seq;
1221: V->ops->mtdot_local = VecMTDot_Seq;
1222: V->ops->maxpy = VecMAXPY_Seq;
1223: V->ops->mdot = VecMDot_Seq;
1224: V->ops->mtdot = VecMTDot_Seq;
1225: V->ops->aypx = VecAYPX_Seq;
1226: V->ops->waxpy = VecWAXPY_Seq;
1227: V->ops->dotnorm2 = NULL;
1228: V->ops->placearray = VecPlaceArray_Seq;
1229: V->ops->replacearray = VecReplaceArray_Seq;
1230: V->ops->resetarray = VecResetArray_Seq;
1231: V->ops->duplicate = VecDuplicate_Seq;
1232: } else {
1233: V->ops->dot = VecDot_SeqViennaCL;
1234: V->ops->norm = VecNorm_SeqViennaCL;
1235: V->ops->tdot = VecTDot_SeqViennaCL;
1236: V->ops->scale = VecScale_SeqViennaCL;
1237: V->ops->copy = VecCopy_SeqViennaCL;
1238: V->ops->set = VecSet_SeqViennaCL;
1239: V->ops->swap = VecSwap_SeqViennaCL;
1240: V->ops->axpy = VecAXPY_SeqViennaCL;
1241: V->ops->axpby = VecAXPBY_SeqViennaCL;
1242: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1243: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1244: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1245: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1246: V->ops->dot_local = VecDot_SeqViennaCL;
1247: V->ops->tdot_local = VecTDot_SeqViennaCL;
1248: V->ops->norm_local = VecNorm_SeqViennaCL;
1249: V->ops->mdot_local = VecMDot_SeqViennaCL;
1250: V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1251: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1252: V->ops->mdot = VecMDot_SeqViennaCL;
1253: V->ops->mtdot = VecMTDot_SeqViennaCL;
1254: V->ops->aypx = VecAYPX_SeqViennaCL;
1255: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1256: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1257: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1258: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1259: V->ops->resetarray = VecResetArray_SeqViennaCL;
1260: V->ops->destroy = VecDestroy_SeqViennaCL;
1261: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1262: }
1263: return(0);
1264: }
1266: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1267: {
1269: PetscMPIInt size;
1272: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1273: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1274: VecCreate_Seq_Private(V,0);
1275: PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1277: VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1278: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1280: VecViennaCLAllocateCheck(V);
1281: VecViennaCLAllocateCheckHost(V);
1282: VecSet(V,0.0);
1283: VecSet_Seq(V,0.0);
1284: V->offloadmask = PETSC_OFFLOAD_BOTH;
1285: return(0);
1286: }
1288: /*@C
1289: VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.
1291: Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1292: invoking clReleaseContext().
1295: Input Parameters:
1296: . v - the vector
1298: Output Parameters:
1299: . ctx - pointer to the underlying CL context
1301: Level: intermediate
1303: .seealso: VecViennaCLGetCLQueue(), VecViennaCLGetCLMemRead()
1304: @*/
1305: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T* ctx)
1306: {
1307: #if !defined(PETSC_HAVE_OPENCL)
1308: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_context.");
1309: #else
1315: const ViennaCLVector *v_vcl;
1316: VecViennaCLGetArrayRead(v, &v_vcl);
1317: try{
1318: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1319: const cl_context ocl_ctx = vcl_ctx.handle().get();
1320: clRetainContext(ocl_ctx);
1321: *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1322: } catch (std::exception const & ex) {
1323: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1324: }
1326: return(0);
1327: #endif
1328: }
1330: /*@C
1331: VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1332: operations of the Vec are enqueued.
1334: Caller should cast (*queue) to (const cl_command_queue). Caller is
1335: responsible for invoking clReleaseCommandQueue().
1337: Input Parameters:
1338: . v - the vector
1340: Output Parameters:
1341: . ctx - pointer to the CL command queue
1343: Level: intermediate
1345: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemRead()
1346: @*/
1347: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T* queue)
1348: {
1349: #if !defined(PETSC_HAVE_OPENCL)
1350: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1351: #else
1356: const ViennaCLVector *v_vcl;
1357: VecViennaCLGetArrayRead(v, &v_vcl);
1358: try{
1359: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1360: const viennacl::ocl::command_queue& vcl_queue = vcl_ctx.current_queue();
1361: const cl_command_queue ocl_queue = vcl_queue.handle().get();
1362: clRetainCommandQueue(ocl_queue);
1363: *queue = (PETSC_UINTPTR_T)(ocl_queue);
1364: } catch (std::exception const & ex) {
1365: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1366: }
1368: return(0);
1369: #endif
1370: }
1372: /*@C
1373: VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.
1375: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1376: invoking clReleaseMemObject().
1378: Input Parameters:
1379: . v - the vector
1381: Output Parameters:
1382: . mem - pointer to the device buffer
1384: Level: intermediate
1386: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1387: @*/
1388: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T* mem)
1389: {
1390: #if !defined(PETSC_HAVE_OPENCL)
1391: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1392: #else
1397: const ViennaCLVector *v_vcl;
1398: VecViennaCLGetArrayRead(v, &v_vcl);
1399: try{
1400: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1401: clRetainMemObject(ocl_mem);
1402: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1403: } catch (std::exception const & ex) {
1404: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1405: }
1406: return(0);
1407: #endif
1408: }
1410: /*@C
1411: VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.
1413: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1414: invoking clReleaseMemObject().
1416: The device pointer has to be released by calling
1417: VecViennaCLRestoreCLMemWrite(). Upon restoring the vector data the data on
1418: the host will be marked as out of date. A subsequent access of the host data
1419: will thus incur a data transfer from the device to the host.
1421: Input Parameters:
1422: . v - the vector
1424: Output Parameters:
1425: . mem - pointer to the device buffer
1427: Level: intermediate
1429: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMemWrite()
1430: @*/
1431: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T* mem)
1432: {
1433: #if !defined(PETSC_HAVE_OPENCL)
1434: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1435: #else
1440: ViennaCLVector *v_vcl;
1441: VecViennaCLGetArrayWrite(v, &v_vcl);
1442: try{
1443: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1444: clRetainMemObject(ocl_mem);
1445: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1446: } catch (std::exception const & ex) {
1447: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1448: }
1450: return(0);
1451: #endif
1452: }
1454: /*@C
1455: VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1456: acquired with VecViennaCLGetCLMemWrite().
1458: This marks the host data as out of date. Subsequent access to the
1459: vector data on the host side with for instance VecGetArray() incurs a
1460: data transfer.
1462: Input Parameters:
1463: . v - the vector
1465: Level: intermediate
1467: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1468: @*/
1469: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1470: {
1471: #if !defined(PETSC_HAVE_OPENCL)
1472: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1473: #else
1478: VecViennaCLRestoreArrayWrite(v, PETSC_NULL);
1480: return(0);
1481: #endif
1482: }
1485: /*@C
1486: VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.
1488: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1489: invoking clReleaseMemObject().
1491: The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1492: Upon restoring the vector data the data on the host will be marked as out of
1493: date. A subsequent access of the host data will thus incur a data transfer
1494: from the device to the host.
1496: Input Parameters:
1497: . v - the vector
1499: Output Parameters:
1500: . mem - pointer to the device buffer
1502: Level: intermediate
1504: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMem()
1505: @*/
1506: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T* mem)
1507: {
1508: #if !defined(PETSC_HAVE_OPENCL)
1509: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1510: #else
1515: ViennaCLVector *v_vcl;
1516: VecViennaCLGetArray(v, &v_vcl);
1517: try{
1518: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1519: clRetainMemObject(ocl_mem);
1520: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1521: } catch (std::exception const & ex) {
1522: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1523: }
1525: return(0);
1526: #endif
1527: }
1529: /*@C
1530: VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1531: acquired with VecViennaCLGetCLMem().
1533: This marks the host data as out of date. Subsequent access to the vector
1534: data on the host side with for instance VecGetArray() incurs a data
1535: transfer.
1537: Input Parameters:
1538: . v - the vector
1540: Level: intermediate
1542: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMem()
1543: @*/
1544: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1545: {
1546: #if !defined(PETSC_HAVE_OPENCL)
1547: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1548: #else
1553: VecViennaCLRestoreArray(v, PETSC_NULL);
1555: return(0);
1556: #endif
1557: }
1559: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V,const ViennaCLVector *array)
1560: {
1562: Vec_ViennaCL *vecviennacl;
1563: PetscMPIInt size;
1566: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1567: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1568: VecCreate_Seq_Private(V,0);
1569: PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1570: VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1571: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1573: if (array) {
1574: if (!V->spptr)
1575: V->spptr = new Vec_ViennaCL;
1576: vecviennacl = (Vec_ViennaCL*)V->spptr;
1577: vecviennacl->GPUarray_allocated = 0;
1578: vecviennacl->GPUarray = (ViennaCLVector*)array;
1579: V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1580: }
1582: return(0);
1583: }