Actual source code: vecviennacl.cxx
petsc-dev 2014-02-02
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc-private/vecimpl.h> /*I "petscvec.h" I*/
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include "viennacl/linalg/inner_prod.hpp"
11: #include "viennacl/linalg/norm_1.hpp"
12: #include "viennacl/linalg/norm_2.hpp"
13: #include "viennacl/linalg/norm_inf.hpp"
14: #include "viennacl/ocl/backend.hpp"
19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayReadWrite(Vec v, ViennaCLVector **a)
20: {
24: *a = 0;
25: VecViennaCLCopyToGPU(v);
26: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
27: ViennaCLWaitForGPU();
28: return(0);
29: }
33: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayReadWrite(Vec v, ViennaCLVector **a)
34: {
38: v->valid_GPU_array = PETSC_VIENNACL_GPU;
40: PetscObjectStateIncrease((PetscObject)v);
41: return(0);
42: }
46: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
47: {
51: *a = 0;
52: VecViennaCLCopyToGPU(v);
53: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
54: ViennaCLWaitForGPU();
55: return(0);
56: }
60: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
61: {
63: return(0);
64: }
68: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
69: {
73: *a = 0;
74: VecViennaCLAllocateCheck(v);
75: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
76: ViennaCLWaitForGPU();
77: return(0);
78: }
82: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
83: {
87: v->valid_GPU_array = PETSC_VIENNACL_GPU;
89: PetscObjectStateIncrease((PetscObject)v);
90: return(0);
91: }
97: PETSC_EXTERN PetscErrorCode PetscObjectSetFromOptions_ViennaCL(PetscObject obj)
98: {
99: PetscErrorCode ierr;
100: PetscBool flg;
103: PetscObjectOptionsBegin(obj);
105: PetscOptionsHasName(NULL,"-viennacl_device_cpu",&flg);
106: if (flg) {
107: try {
108: viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
109: } catch (std::exception const & ex) {
110: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
111: }
112: }
113: PetscOptionsHasName(NULL,"-viennacl_device_gpu",&flg);
114: if (flg) {
115: try {
116: viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
117: } catch (std::exception const & ex) {
118: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
119: }
120: }
121: PetscOptionsHasName(NULL,"-viennacl_device_accelerator",&flg);
122: if (flg) {
123: try {
124: viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
125: } catch (std::exception const & ex) {
126: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
127: }
128: }
130: PetscOptionsEnd();
131: return(0);
132: }
136: /*
137: Allocates space for the vector array on the Host if it does not exist.
138: Does NOT change the PetscViennaCLFlag for the vector
139: Does NOT zero the ViennaCL array
140: */
141: PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
142: {
144: PetscScalar *array;
145: Vec_Seq *s;
146: PetscInt n = v->map->n;
149: s = (Vec_Seq*)v->data;
150: VecViennaCLAllocateCheck(v);
151: if (s->array == 0) {
152: PetscMalloc1(n,&array);
153: PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
154: s->array = array;
155: s->array_allocated = array;
156: }
157: return(0);
158: }
163: /*
164: Allocates space for the vector array on the GPU if it does not exist.
165: Does NOT change the PetscViennaCLFlag for the vector
166: Does NOT zero the ViennaCL array
168: */
169: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
170: {
172: int rank;
175: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
176: // First allocate memory on the GPU if needed
177: if (!v->spptr) {
178: try {
179: PetscObjectSetFromOptions_ViennaCL((PetscObject)v);
180: v->spptr = new Vec_ViennaCL;
181: ((Vec_ViennaCL*)v->spptr)->GPUarray = new ViennaCLVector((PetscBLASInt)v->map->n);
183: } catch(std::exception const & ex) {
184: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
185: }
186: }
187: return(0);
188: }
193: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
194: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
195: {
199: VecViennaCLAllocateCheck(v);
200: if (v->map->n > 0) {
201: if (v->valid_GPU_array == PETSC_VIENNACL_CPU) {
202: PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
203: try {
204: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
205: viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
206: ViennaCLWaitForGPU();
207: } catch(std::exception const & ex) {
208: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
209: }
210: PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
211: v->valid_GPU_array = PETSC_VIENNACL_BOTH;
212: }
213: }
214: return(0);
215: }
221: /*
222: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
223: */
224: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
225: {
229: VecViennaCLAllocateCheckHost(v);
230: if (v->valid_GPU_array == PETSC_VIENNACL_GPU) {
231: PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
232: try {
233: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
234: viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
235: ViennaCLWaitForGPU();
236: } catch(std::exception const & ex) {
237: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
238: }
239: PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
240: v->valid_GPU_array = PETSC_VIENNACL_BOTH;
241: }
242: return(0);
243: }
246: /* Copy on CPU */
249: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
250: {
251: PetscScalar *ya;
252: const PetscScalar *xa;
253: PetscErrorCode ierr;
256: if (xin != yin) {
257: VecGetArrayRead(xin,&xa);
258: VecGetArray(yin,&ya);
259: PetscMemcpy(ya,xa,xin->map->n*sizeof(PetscScalar));
260: VecRestoreArrayRead(xin,&xa);
261: VecRestoreArray(yin,&ya);
262: }
263: return(0);
264: }
268: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
269: {
271: PetscInt n = xin->map->n,i;
272: PetscScalar *xx;
275: VecGetArray(xin,&xx);
276: for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
277: VecRestoreArray(xin,&xx);
278: return(0);
279: }
283: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
284: {
285: Vec_Seq *vs = (Vec_Seq*)v->data;
289: PetscObjectSAWsViewOff(v);
290: #if defined(PETSC_USE_LOG)
291: PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
292: #endif
293: if (vs->array_allocated) PetscFree(vs->array_allocated);
294: PetscFree(vs);
295: return(0);
296: }
300: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
301: {
302: Vec_Seq *v = (Vec_Seq*)vin->data;
305: v->array = v->unplacedarray;
306: v->unplacedarray = 0;
307: return(0);
308: }
311: /*MC
312: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
314: Options Database Keys:
315: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
317: Level: beginner
319: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
320: M*/
325: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
326: {
327: const ViennaCLVector *xgpu;
328: ViennaCLVector *ygpu;
329: PetscErrorCode ierr;
332: if (alpha != 0.0 && xin->map->n > 0) {
333: VecViennaCLGetArrayRead(xin,&xgpu);
334: VecViennaCLGetArrayReadWrite(yin,&ygpu);
335: try {
336: *ygpu = *xgpu + alpha * *ygpu;
337: ViennaCLWaitForGPU();
338: } catch(std::exception const & ex) {
339: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
340: }
341: VecViennaCLRestoreArrayRead(xin,&xgpu);
342: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
343: PetscLogFlops(2.0*yin->map->n);
344: }
345: return(0);
346: }
351: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
352: {
353: const ViennaCLVector *xgpu;
354: ViennaCLVector *ygpu;
355: PetscErrorCode ierr;
358: if (alpha != 0.0 && xin->map->n > 0) {
359: VecViennaCLGetArrayRead(xin,&xgpu);
360: VecViennaCLGetArrayReadWrite(yin,&ygpu);
361: try {
362: *ygpu += alpha * *xgpu;
363: ViennaCLWaitForGPU();
364: } catch(std::exception const & ex) {
365: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
366: }
367: VecViennaCLRestoreArrayRead(xin,&xgpu);
368: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
369: PetscLogFlops(2.0*yin->map->n);
370: }
371: return(0);
372: }
377: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
378: {
379: const ViennaCLVector *xgpu,*ygpu;
380: ViennaCLVector *wgpu;
381: PetscErrorCode ierr;
384: if (xin->map->n > 0) {
385: VecViennaCLGetArrayRead(xin,&xgpu);
386: VecViennaCLGetArrayRead(yin,&ygpu);
387: VecViennaCLGetArrayWrite(win,&wgpu);
388: try {
389: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
390: ViennaCLWaitForGPU();
391: } catch(std::exception const & ex) {
392: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
393: }
394: PetscLogFlops(win->map->n);
395: VecViennaCLRestoreArrayRead(xin,&xgpu);
396: VecViennaCLRestoreArrayRead(yin,&ygpu);
397: VecViennaCLRestoreArrayWrite(win,&wgpu);
398: }
399: return(0);
400: }
405: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
406: {
407: const ViennaCLVector *xgpu,*ygpu;
408: ViennaCLVector *wgpu;
409: PetscErrorCode ierr;
412: if (alpha == 0.0 && xin->map->n > 0) {
413: VecCopy_SeqViennaCL(yin,win);
414: } else {
415: VecViennaCLGetArrayRead(xin,&xgpu);
416: VecViennaCLGetArrayRead(yin,&ygpu);
417: VecViennaCLGetArrayWrite(win,&wgpu);
418: if (alpha == 1.0) {
419: try {
420: *wgpu = *ygpu + *xgpu;
421: } catch(std::exception const & ex) {
422: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
423: }
424: PetscLogFlops(win->map->n);
425: } else if (alpha == -1.0) {
426: try {
427: *wgpu = *ygpu - *xgpu;
428: } catch(std::exception const & ex) {
429: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
430: }
431: PetscLogFlops(win->map->n);
432: } else {
433: try {
434: *wgpu = *ygpu + alpha * *xgpu;
435: } catch(std::exception const & ex) {
436: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
437: }
438: PetscLogFlops(2*win->map->n);
439: }
440: ViennaCLWaitForGPU();
441: VecViennaCLRestoreArrayRead(xin,&xgpu);
442: VecViennaCLRestoreArrayRead(yin,&ygpu);
443: VecViennaCLRestoreArrayWrite(win,&wgpu);
444: }
445: return(0);
446: }
449: /*
450: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
451: *
452: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
453: * hence there is an iterated application of these until the final result is obtained
454: */
457: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
458: {
460: PetscInt j;
463: for (j = 0; j < nv; ++j) {
464: if (j+1 < nv) {
465: VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
466: ++j;
467: } else {
468: VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
469: }
470: }
471: ViennaCLWaitForGPU();
472: return(0);
473: }
478: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
479: {
480: const ViennaCLVector *xgpu,*ygpu;
481: PetscErrorCode ierr;
484: if (xin->map->n > 0) {
485: VecViennaCLGetArrayRead(xin,&xgpu);
486: VecViennaCLGetArrayRead(yin,&ygpu);
487: try {
488: *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
489: } catch(std::exception const & ex) {
490: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
491: }
492: if (xin->map->n >0) {
493: PetscLogFlops(2.0*xin->map->n-1);
494: }
495: ViennaCLWaitForGPU();
496: VecViennaCLRestoreArrayRead(xin,&xgpu);
497: VecViennaCLRestoreArrayRead(yin,&ygpu);
498: } else *z = 0.0;
499: return(0);
500: }
504: /*
505: * Operation z[j] = dot(x, y[j])
506: *
507: * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
508: */
511: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
512: {
513: PetscErrorCode ierr;
514: PetscInt n = xin->map->n,i;
515: const ViennaCLVector *xgpu,*ygpu;
516: Vec *yyin = (Vec*)yin;
519: if (xin->map->n > 0) {
520: VecViennaCLGetArrayRead(xin,&xgpu);
521: for (i=0; i<nv; i++) {
522: VecViennaCLGetArrayRead(yyin[i],&ygpu);
523: try {
524: z[i] = viennacl::linalg::inner_prod(*xgpu,*ygpu);
525: } catch(std::exception const & ex) {
526: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
527: }
528: VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
529: }
531: ViennaCLWaitForGPU();
532: VecViennaCLRestoreArrayRead(xin,&xgpu);
533: PetscLogFlops(PetscMax(nv*(2.0*n-1),0.0));
534: } else {
535: for (i=0; i<nv; i++) z[i] = 0.0;
536: }
537: return(0);
538: }
544: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
545: {
546: ViennaCLVector *xgpu;
550: if (xin->map->n > 0) {
551: VecViennaCLGetArrayWrite(xin,&xgpu);
552: try {
553: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
554: ViennaCLWaitForGPU();
555: } catch(std::exception const & ex) {
556: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
557: }
558: VecViennaCLRestoreArrayWrite(xin,&xgpu);
559: }
560: return(0);
561: }
565: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
566: {
567: ViennaCLVector *xgpu;
571: if (alpha == 0.0 && xin->map->n > 0) {
572: VecSet_SeqViennaCL(xin,alpha);
573: PetscLogFlops(xin->map->n);
574: } else if (alpha != 1.0 && xin->map->n > 0) {
575: VecViennaCLGetArrayReadWrite(xin,&xgpu);
576: try {
577: *xgpu *= alpha;
578: ViennaCLWaitForGPU();
579: } catch(std::exception const & ex) {
580: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
581: }
582: VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
583: PetscLogFlops(xin->map->n);
584: }
585: return(0);
586: }
591: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
592: {
596: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
597: VecDot_SeqViennaCL(xin, yin, z);
598: ViennaCLWaitForGPU();
599: return(0);
600: }
605: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
606: {
607: const ViennaCLVector *xgpu;
608: ViennaCLVector *ygpu;
609: PetscErrorCode ierr;
612: if (xin != yin && xin->map->n > 0) {
613: if (xin->valid_GPU_array == PETSC_VIENNACL_GPU) {
614: VecViennaCLGetArrayRead(xin,&xgpu);
615: VecViennaCLGetArrayWrite(yin,&ygpu);
616: try {
617: *ygpu = *xgpu;
618: ViennaCLWaitForGPU();
619: } catch(std::exception const & ex) {
620: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
621: }
622: VecViennaCLRestoreArrayRead(xin,&xgpu);
623: VecViennaCLRestoreArrayWrite(yin,&ygpu);
625: } else if (xin->valid_GPU_array == PETSC_VIENNACL_CPU) {
626: /* copy in CPU if we are on the CPU*/
627: VecCopy_SeqViennaCL_Private(xin,yin);
628: ViennaCLWaitForGPU();
629: } else if (xin->valid_GPU_array == PETSC_VIENNACL_BOTH) {
630: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
631: if (yin->valid_GPU_array == PETSC_VIENNACL_CPU) {
632: /* copy in CPU */
633: VecCopy_SeqViennaCL_Private(xin,yin);
634: ViennaCLWaitForGPU();
635: } else if (yin->valid_GPU_array == PETSC_VIENNACL_GPU) {
636: /* copy in GPU */
637: VecViennaCLGetArrayRead(xin,&xgpu);
638: VecViennaCLGetArrayWrite(yin,&ygpu);
639: try {
640: *ygpu = *xgpu;
641: ViennaCLWaitForGPU();
642: } catch(std::exception const & ex) {
643: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
644: }
645: VecViennaCLRestoreArrayRead(xin,&xgpu);
646: VecViennaCLRestoreArrayWrite(yin,&ygpu);
647: } else if (yin->valid_GPU_array == PETSC_VIENNACL_BOTH) {
648: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
649: default to copy in GPU (this is an arbitrary choice) */
650: VecViennaCLGetArrayRead(xin,&xgpu);
651: VecViennaCLGetArrayWrite(yin,&ygpu);
652: try {
653: *ygpu = *xgpu;
654: ViennaCLWaitForGPU();
655: } catch(std::exception const & ex) {
656: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
657: }
658: VecViennaCLRestoreArrayRead(xin,&xgpu);
659: VecViennaCLRestoreArrayWrite(yin,&ygpu);
660: } else {
661: VecCopy_SeqViennaCL_Private(xin,yin);
662: ViennaCLWaitForGPU();
663: }
664: }
665: }
666: return(0);
667: }
672: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
673: {
675: ViennaCLVector *xgpu,*ygpu;
678: if (xin != yin && xin->map->n > 0) {
679: VecViennaCLGetArrayReadWrite(xin,&xgpu);
680: VecViennaCLGetArrayReadWrite(yin,&ygpu);
682: try {
683: viennacl::swap(*xgpu, *ygpu);
684: ViennaCLWaitForGPU();
685: } catch(std::exception const & ex) {
686: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
687: }
688: VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
689: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
690: }
691: return(0);
692: }
695: // y = alpha * x + beta * y
698: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
699: {
700: PetscErrorCode ierr;
701: PetscScalar a = alpha,b = beta;
702: const ViennaCLVector *xgpu;
703: ViennaCLVector *ygpu;
706: if (a == 0.0 && xin->map->n > 0) {
707: VecScale_SeqViennaCL(yin,beta);
708: } else if (b == 1.0 && xin->map->n > 0) {
709: VecAXPY_SeqViennaCL(yin,alpha,xin);
710: } else if (a == 1.0 && xin->map->n > 0) {
711: VecAYPX_SeqViennaCL(yin,beta,xin);
712: } else if (b == 0.0 && xin->map->n > 0) {
713: VecViennaCLGetArrayRead(xin,&xgpu);
714: VecViennaCLGetArrayReadWrite(yin,&ygpu);
715: try {
716: *ygpu = *xgpu * alpha;
717: ViennaCLWaitForGPU();
718: } catch(std::exception const & ex) {
719: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
720: }
721: PetscLogFlops(xin->map->n);
722: VecViennaCLRestoreArrayRead(xin,&xgpu);
723: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
724: } else if (xin->map->n > 0) {
725: VecViennaCLGetArrayRead(xin,&xgpu);
726: VecViennaCLGetArrayReadWrite(yin,&ygpu);
727: try {
728: *ygpu = *xgpu * alpha + *ygpu * beta;
729: ViennaCLWaitForGPU();
730: } catch(std::exception const & ex) {
731: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
732: }
733: VecViennaCLRestoreArrayRead(xin,&xgpu);
734: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
735: PetscLogFlops(3.0*xin->map->n);
736: }
737: return(0);
738: }
741: /* operation z = alpha * x + beta *y + gamma *z*/
744: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
745: {
746: PetscErrorCode ierr;
747: PetscInt n = zin->map->n;
748: const ViennaCLVector *xgpu,*ygpu;
749: ViennaCLVector *zgpu;
752: VecViennaCLGetArrayRead(xin,&xgpu);
753: VecViennaCLGetArrayRead(yin,&ygpu);
754: VecViennaCLGetArrayReadWrite(zin,&zgpu);
755: if (alpha == 0.0 && xin->map->n > 0) {
756: try {
757: if (beta == 0.0) {
758: *zgpu = gamma * *zgpu;
759: ViennaCLWaitForGPU();
760: PetscLogFlops(1.0*n);
761: } else if (gamma == 0.0) {
762: *zgpu = beta * *ygpu;
763: ViennaCLWaitForGPU();
764: PetscLogFlops(1.0*n);
765: } else {
766: *zgpu = beta * *ygpu + gamma * *zgpu;
767: ViennaCLWaitForGPU();
768: PetscLogFlops(3.0*n);
769: }
770: } catch(std::exception const & ex) {
771: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
772: }
773: PetscLogFlops(3.0*n);
774: } else if (beta == 0.0 && xin->map->n > 0) {
775: try {
776: if (gamma == 0.0) {
777: *zgpu = alpha * *xgpu;
778: ViennaCLWaitForGPU();
779: PetscLogFlops(1.0*n);
780: } else {
781: *zgpu = alpha * *xgpu + gamma * *zgpu;
782: ViennaCLWaitForGPU();
783: PetscLogFlops(3.0*n);
784: }
785: } catch(std::exception const & ex) {
786: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
787: }
788: } else if (gamma == 0.0 && xin->map->n > 0) {
789: try {
790: *zgpu = alpha * *xgpu + beta * *ygpu;
791: ViennaCLWaitForGPU();
792: } catch(std::exception const & ex) {
793: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
794: }
795: PetscLogFlops(3.0*n);
796: } else if (xin->map->n > 0) {
797: try {
798: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
799: if (gamma != 1.0)
800: *zgpu *= gamma;
801: *zgpu += alpha * *xgpu + beta * *ygpu;
802: ViennaCLWaitForGPU();
803: } catch(std::exception const & ex) {
804: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
805: }
806: VecViennaCLRestoreArrayReadWrite(zin,&zgpu);
807: VecViennaCLRestoreArrayRead(xin,&xgpu);
808: VecViennaCLRestoreArrayRead(yin,&ygpu);
809: PetscLogFlops(5.0*n);
810: }
811: return(0);
812: }
816: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
817: {
818: PetscErrorCode ierr;
819: PetscInt n = win->map->n;
820: const ViennaCLVector *xgpu,*ygpu;
821: ViennaCLVector *wgpu;
824: if (xin->map->n > 0) {
825: VecViennaCLGetArrayRead(xin,&xgpu);
826: VecViennaCLGetArrayRead(yin,&ygpu);
827: VecViennaCLGetArrayReadWrite(win,&wgpu);
828: try {
829: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
830: ViennaCLWaitForGPU();
831: } catch(std::exception const & ex) {
832: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
833: }
834: VecViennaCLRestoreArrayRead(xin,&xgpu);
835: VecViennaCLRestoreArrayRead(yin,&ygpu);
836: VecViennaCLRestoreArrayReadWrite(win,&wgpu);
837: PetscLogFlops(n);
838: }
839: return(0);
840: }
845: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
846: {
847: PetscErrorCode ierr;
848: PetscInt n = xin->map->n;
849: PetscBLASInt bn;
850: const ViennaCLVector *xgpu;
853: if (xin->map->n > 0) {
854: PetscBLASIntCast(n,&bn);
855: VecViennaCLGetArrayRead(xin,&xgpu);
856: if (type == NORM_2 || type == NORM_FROBENIUS) {
857: try {
858: *z = viennacl::linalg::norm_2(*xgpu);
859: ViennaCLWaitForGPU();
860: } catch(std::exception const & ex) {
861: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
862: }
863: PetscLogFlops(PetscMax(2.0*n-1,0.0));
864: } else if (type == NORM_INFINITY) {
865: VecViennaCLGetArrayRead(xin,&xgpu);
866: try {
867: *z = viennacl::linalg::norm_inf(*xgpu);
868: ViennaCLWaitForGPU();
869: } catch(std::exception const & ex) {
870: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
871: }
872: VecViennaCLRestoreArrayRead(xin,&xgpu);
873: } else if (type == NORM_1) {
874: try {
875: *z = viennacl::linalg::norm_1(*xgpu);
876: ViennaCLWaitForGPU();
877: } catch(std::exception const & ex) {
878: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
879: }
880: PetscLogFlops(PetscMax(n-1.0,0.0));
881: } else if (type == NORM_1_AND_2) {
882: try {
883: *z = viennacl::linalg::norm_1(*xgpu);
884: *(z+1) = viennacl::linalg::norm_2(*xgpu);
885: ViennaCLWaitForGPU();
886: } catch(std::exception const & ex) {
887: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
888: }
889: PetscLogFlops(PetscMax(2.0*n-1,0.0));
890: PetscLogFlops(PetscMax(n-1.0,0.0));
891: }
892: VecViennaCLRestoreArrayRead(xin,&xgpu);
893: } else if (type == NORM_1_AND_2) {
894: *z = 0.0;
895: *(z+1) = 0.0;
896: } else *z = 0.0;
897: return(0);
898: }
903: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
904: {
908: VecSetRandom_SeqViennaCL_Private(xin,r);
909: xin->valid_GPU_array = PETSC_VIENNACL_CPU;
910: return(0);
911: }
915: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
916: {
920: VecViennaCLCopyFromGPU(vin);
921: VecResetArray_SeqViennaCL_Private(vin);
922: vin->valid_GPU_array = PETSC_VIENNACL_CPU;
923: return(0);
924: }
928: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
929: {
933: VecViennaCLCopyFromGPU(vin);
934: VecPlaceArray_Seq(vin,a);
935: vin->valid_GPU_array = PETSC_VIENNACL_CPU;
936: return(0);
937: }
942: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
943: {
947: VecViennaCLCopyFromGPU(vin);
948: VecReplaceArray_Seq(vin,a);
949: vin->valid_GPU_array = PETSC_VIENNACL_CPU;
950: return(0);
951: }
956: /*@
957: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
959: Collective on MPI_Comm
961: Input Parameter:
962: + comm - the communicator, should be PETSC_COMM_SELF
963: - n - the vector length
965: Output Parameter:
966: . V - the vector
968: Notes:
969: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
970: same type as an existing vector.
972: Level: intermediate
974: Concepts: vectors^creating sequential
976: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
977: @*/
978: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
979: {
983: VecCreate(comm,v);
984: VecSetSizes(*v,n,n);
985: VecSetType(*v,VECSEQVIENNACL);
986: return(0);
987: }
990: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
991: *
992: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
993: */
996: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
997: {
998: PetscErrorCode ierr;
1001: VecDot_SeqViennaCL(s,t,dp);
1002: VecNorm_SeqViennaCL(t,NORM_2,nm);
1003: *nm *= *nm; //squared norm required
1004: return(0);
1005: }
1009: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1010: {
1014: VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1015: PetscLayoutReference(win->map,&(*V)->map);
1016: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1017: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1018: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1019: return(0);
1020: }
1024: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1025: {
1029: try {
1030: if (v->spptr) {
1031: delete ((Vec_ViennaCL*)v->spptr)->GPUarray;
1032: delete (Vec_ViennaCL*) v->spptr;
1033: }
1034: } catch(char *ex) {
1035: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1036: }
1037: VecDestroy_SeqViennaCL_Private(v);
1038: return(0);
1039: }
1044: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1045: {
1047: PetscMPIInt size;
1050: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1051: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1052: VecCreate_Seq_Private(V,0);
1053: PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1055: V->ops->dot = VecDot_SeqViennaCL;
1056: V->ops->norm = VecNorm_SeqViennaCL;
1057: V->ops->tdot = VecTDot_SeqViennaCL;
1058: V->ops->scale = VecScale_SeqViennaCL;
1059: V->ops->copy = VecCopy_SeqViennaCL;
1060: V->ops->set = VecSet_SeqViennaCL;
1061: V->ops->swap = VecSwap_SeqViennaCL;
1062: V->ops->axpy = VecAXPY_SeqViennaCL;
1063: V->ops->axpby = VecAXPBY_SeqViennaCL;
1064: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1065: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1066: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1067: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1068: V->ops->dot_local = VecDot_SeqViennaCL;
1069: V->ops->tdot_local = VecTDot_SeqViennaCL;
1070: V->ops->norm_local = VecNorm_SeqViennaCL;
1071: V->ops->mdot_local = VecMDot_SeqViennaCL;
1072: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1073: V->ops->mdot = VecMDot_SeqViennaCL;
1074: V->ops->aypx = VecAYPX_SeqViennaCL;
1075: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1076: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1077: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1078: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1079: V->ops->resetarray = VecResetArray_SeqViennaCL;
1080: V->ops->destroy = VecDestroy_SeqViennaCL;
1081: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1083: VecViennaCLAllocateCheck(V);
1084: V->valid_GPU_array = PETSC_VIENNACL_GPU;
1085: VecSet(V,0.0);
1086: return(0);
1087: }