Actual source code: vecviennacl.cxx

petsc-dev 2014-02-02
Report Typos and Errors
  1: /*
  2:    Implements the sequential ViennaCL vectors.
  3: */

  5: #include <petscconf.h>
  6: #include <petsc-private/vecimpl.h>          /*I "petscvec.h" I*/
  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

 10: #include "viennacl/linalg/inner_prod.hpp"
 11: #include "viennacl/linalg/norm_1.hpp"
 12: #include "viennacl/linalg/norm_2.hpp"
 13: #include "viennacl/linalg/norm_inf.hpp"
 14: #include "viennacl/ocl/backend.hpp"


 19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayReadWrite(Vec v, ViennaCLVector **a)
 20: {

 24:   *a   = 0;
 25:   VecViennaCLCopyToGPU(v);
 26:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 27:   ViennaCLWaitForGPU();
 28:   return(0);
 29: }

 33: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayReadWrite(Vec v, ViennaCLVector **a)
 34: {

 38:   v->valid_GPU_array = PETSC_VIENNACL_GPU;

 40:   PetscObjectStateIncrease((PetscObject)v);
 41:   return(0);
 42: }

 46: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
 47: {

 51:   *a   = 0;
 52:   VecViennaCLCopyToGPU(v);
 53:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 54:   ViennaCLWaitForGPU();
 55:   return(0);
 56: }

 60: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
 61: {
 63:   return(0);
 64: }

 68: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
 69: {

 73:   *a   = 0;
 74:   VecViennaCLAllocateCheck(v);
 75:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 76:   ViennaCLWaitForGPU();
 77:   return(0);
 78: }

 82: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
 83: {

 87:   v->valid_GPU_array = PETSC_VIENNACL_GPU;

 89:   PetscObjectStateIncrease((PetscObject)v);
 90:   return(0);
 91: }



 97: PETSC_EXTERN PetscErrorCode PetscObjectSetFromOptions_ViennaCL(PetscObject obj)
 98: {
 99:   PetscErrorCode       ierr;
100:   PetscBool            flg;

103:   PetscObjectOptionsBegin(obj);

105:   PetscOptionsHasName(NULL,"-viennacl_device_cpu",&flg);
106:   if (flg) {
107:     try {
108:       viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
109:     } catch (std::exception const & ex) {
110:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
111:     }
112:   }
113:   PetscOptionsHasName(NULL,"-viennacl_device_gpu",&flg);
114:   if (flg) {
115:     try {
116:       viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
117:     } catch (std::exception const & ex) {
118:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
119:     }
120:   }
121:   PetscOptionsHasName(NULL,"-viennacl_device_accelerator",&flg);
122:   if (flg) {
123:     try {
124:       viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
125:     } catch (std::exception const & ex) {
126:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
127:     }
128:   }

130:   PetscOptionsEnd();
131:   return(0);
132: }

136: /*
137:     Allocates space for the vector array on the Host if it does not exist.
138:     Does NOT change the PetscViennaCLFlag for the vector
139:     Does NOT zero the ViennaCL array
140:  */
141: PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
142: {
144:   PetscScalar    *array;
145:   Vec_Seq        *s;
146:   PetscInt       n = v->map->n;

149:   s    = (Vec_Seq*)v->data;
150:   VecViennaCLAllocateCheck(v);
151:   if (s->array == 0) {
152:     PetscMalloc1(n,&array);
153:     PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
154:     s->array           = array;
155:     s->array_allocated = array;
156:   }
157:   return(0);
158: }


163: /*
164:     Allocates space for the vector array on the GPU if it does not exist.
165:     Does NOT change the PetscViennaCLFlag for the vector
166:     Does NOT zero the ViennaCL array

168:  */
169: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
170: {
172:   int            rank;

175:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
176:   // First allocate memory on the GPU if needed
177:   if (!v->spptr) {
178:     try {
179:       PetscObjectSetFromOptions_ViennaCL((PetscObject)v);
180:       v->spptr                            = new Vec_ViennaCL;
181:       ((Vec_ViennaCL*)v->spptr)->GPUarray = new ViennaCLVector((PetscBLASInt)v->map->n);

183:     } catch(std::exception const & ex) {
184:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
185:     }
186:   }
187:   return(0);
188: }


193: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
194: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
195: {

199:   VecViennaCLAllocateCheck(v);
200:   if (v->map->n > 0) {
201:     if (v->valid_GPU_array == PETSC_VIENNACL_CPU) {
202:       PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
203:       try {
204:         ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
205:         viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
206:         ViennaCLWaitForGPU();
207:       } catch(std::exception const & ex) {
208:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
209:       }
210:       PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
211:       v->valid_GPU_array = PETSC_VIENNACL_BOTH;
212:     }
213:   }
214:   return(0);
215: }



221: /*
222:      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
223: */
224: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
225: {

229:   VecViennaCLAllocateCheckHost(v);
230:   if (v->valid_GPU_array == PETSC_VIENNACL_GPU) {
231:     PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
232:     try {
233:       ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
234:       viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
235:       ViennaCLWaitForGPU();
236:     } catch(std::exception const & ex) {
237:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
238:     }
239:     PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
240:     v->valid_GPU_array = PETSC_VIENNACL_BOTH;
241:   }
242:   return(0);
243: }


246: /* Copy on CPU */
249: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
250: {
251:   PetscScalar       *ya;
252:   const PetscScalar *xa;
253:   PetscErrorCode    ierr;

256:   if (xin != yin) {
257:     VecGetArrayRead(xin,&xa);
258:     VecGetArray(yin,&ya);
259:     PetscMemcpy(ya,xa,xin->map->n*sizeof(PetscScalar));
260:     VecRestoreArrayRead(xin,&xa);
261:     VecRestoreArray(yin,&ya);
262:   }
263:   return(0);
264: }

268: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
269: {
271:   PetscInt       n = xin->map->n,i;
272:   PetscScalar    *xx;

275:   VecGetArray(xin,&xx);
276:   for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
277:   VecRestoreArray(xin,&xx);
278:   return(0);
279: }

283: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
284: {
285:   Vec_Seq        *vs = (Vec_Seq*)v->data;

289:   PetscObjectSAWsViewOff(v);
290: #if defined(PETSC_USE_LOG)
291:   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
292: #endif
293:   if (vs->array_allocated) PetscFree(vs->array_allocated);
294:   PetscFree(vs);
295:   return(0);
296: }

300: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
301: {
302:   Vec_Seq *v = (Vec_Seq*)vin->data;

305:   v->array         = v->unplacedarray;
306:   v->unplacedarray = 0;
307:   return(0);
308: }


311: /*MC
312:    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL

314:    Options Database Keys:
315: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()

317:   Level: beginner

319: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
320: M*/


325: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
326: {
327:   const ViennaCLVector  *xgpu;
328:   ViennaCLVector        *ygpu;
329:   PetscErrorCode        ierr;

332:   if (alpha != 0.0 && xin->map->n > 0) {
333:     VecViennaCLGetArrayRead(xin,&xgpu);
334:     VecViennaCLGetArrayReadWrite(yin,&ygpu);
335:     try {
336:       *ygpu = *xgpu + alpha * *ygpu;
337:       ViennaCLWaitForGPU();
338:     } catch(std::exception const & ex) {
339:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
340:     }
341:     VecViennaCLRestoreArrayRead(xin,&xgpu);
342:     VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
343:     PetscLogFlops(2.0*yin->map->n);
344:   }
345:   return(0);
346: }


351: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
352: {
353:   const ViennaCLVector  *xgpu;
354:   ViennaCLVector        *ygpu;
355:   PetscErrorCode        ierr;

358:   if (alpha != 0.0 && xin->map->n > 0) {
359:     VecViennaCLGetArrayRead(xin,&xgpu);
360:     VecViennaCLGetArrayReadWrite(yin,&ygpu);
361:     try {
362:       *ygpu += alpha * *xgpu;
363:       ViennaCLWaitForGPU();
364:     } catch(std::exception const & ex) {
365:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
366:     }
367:     VecViennaCLRestoreArrayRead(xin,&xgpu);
368:     VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
369:     PetscLogFlops(2.0*yin->map->n);
370:   }
371:   return(0);
372: }


377: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
378: {
379:   const ViennaCLVector  *xgpu,*ygpu;
380:   ViennaCLVector        *wgpu;
381:   PetscErrorCode        ierr;

384:   if (xin->map->n > 0) {
385:     VecViennaCLGetArrayRead(xin,&xgpu);
386:     VecViennaCLGetArrayRead(yin,&ygpu);
387:     VecViennaCLGetArrayWrite(win,&wgpu);
388:     try {
389:       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
390:       ViennaCLWaitForGPU();
391:     } catch(std::exception const & ex) {
392:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
393:     }
394:     PetscLogFlops(win->map->n);
395:     VecViennaCLRestoreArrayRead(xin,&xgpu);
396:     VecViennaCLRestoreArrayRead(yin,&ygpu);
397:     VecViennaCLRestoreArrayWrite(win,&wgpu);
398:   }
399:   return(0);
400: }


405: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
406: {
407:   const ViennaCLVector  *xgpu,*ygpu;
408:   ViennaCLVector        *wgpu;
409:   PetscErrorCode        ierr;

412:   if (alpha == 0.0 && xin->map->n > 0) {
413:     VecCopy_SeqViennaCL(yin,win);
414:   } else {
415:     VecViennaCLGetArrayRead(xin,&xgpu);
416:     VecViennaCLGetArrayRead(yin,&ygpu);
417:     VecViennaCLGetArrayWrite(win,&wgpu);
418:     if (alpha == 1.0) {
419:       try {
420:         *wgpu = *ygpu + *xgpu;
421:       } catch(std::exception const & ex) {
422:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
423:       }
424:       PetscLogFlops(win->map->n);
425:     } else if (alpha == -1.0) {
426:       try {
427:         *wgpu = *ygpu - *xgpu;
428:       } catch(std::exception const & ex) {
429:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
430:       }
431:       PetscLogFlops(win->map->n);
432:     } else {
433:       try {
434:         *wgpu = *ygpu + alpha * *xgpu;
435:       } catch(std::exception const & ex) {
436:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
437:       }
438:       PetscLogFlops(2*win->map->n);
439:     }
440:     ViennaCLWaitForGPU();
441:     VecViennaCLRestoreArrayRead(xin,&xgpu);
442:     VecViennaCLRestoreArrayRead(yin,&ygpu);
443:     VecViennaCLRestoreArrayWrite(win,&wgpu);
444:   }
445:   return(0);
446: }


449: /*
450:  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
451:  *
452:  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
453:  * hence there is an iterated application of these until the final result is obtained
454:  */
457: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
458: {
460:   PetscInt       j;

463:   for (j = 0; j < nv; ++j) {
464:     if (j+1 < nv) {
465:       VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
466:       ++j;
467:     } else {
468:       VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
469:     }
470:   }
471:   ViennaCLWaitForGPU();
472:   return(0);
473: }


478: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
479: {
480:   const ViennaCLVector  *xgpu,*ygpu;
481:   PetscErrorCode        ierr;

484:   if (xin->map->n > 0) {
485:     VecViennaCLGetArrayRead(xin,&xgpu);
486:     VecViennaCLGetArrayRead(yin,&ygpu);
487:     try {
488:       *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
489:     } catch(std::exception const & ex) {
490:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
491:     }
492:     if (xin->map->n >0) {
493:       PetscLogFlops(2.0*xin->map->n-1);
494:     }
495:     ViennaCLWaitForGPU();
496:     VecViennaCLRestoreArrayRead(xin,&xgpu);
497:     VecViennaCLRestoreArrayRead(yin,&ygpu);
498:   } else *z = 0.0;
499:   return(0);
500: }



504: /*
505:  * Operation z[j] = dot(x, y[j])
506:  *
507:  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
508:  */
511: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
512: {
513:   PetscErrorCode       ierr;
514:   PetscInt             n = xin->map->n,i;
515:   const ViennaCLVector *xgpu,*ygpu;
516:   Vec                  *yyin = (Vec*)yin;

519:   if (xin->map->n > 0) {
520:     VecViennaCLGetArrayRead(xin,&xgpu);
521:     for (i=0; i<nv; i++) {
522:       VecViennaCLGetArrayRead(yyin[i],&ygpu);
523:       try {
524:         z[i] = viennacl::linalg::inner_prod(*xgpu,*ygpu);
525:       } catch(std::exception const & ex) {
526:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
527:       }
528:       VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
529:     }

531:     ViennaCLWaitForGPU();
532:     VecViennaCLRestoreArrayRead(xin,&xgpu);
533:     PetscLogFlops(PetscMax(nv*(2.0*n-1),0.0));
534:   } else {
535:     for (i=0; i<nv; i++) z[i] = 0.0;
536:   }
537:   return(0);
538: }



544: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
545: {
546:   ViennaCLVector *xgpu;

550:   if (xin->map->n > 0) {
551:     VecViennaCLGetArrayWrite(xin,&xgpu);
552:     try {
553:       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
554:       ViennaCLWaitForGPU();
555:     } catch(std::exception const & ex) {
556:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
557:     }
558:     VecViennaCLRestoreArrayWrite(xin,&xgpu);
559:   }
560:   return(0);
561: }

565: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
566: {
567:   ViennaCLVector *xgpu;

571:   if (alpha == 0.0 && xin->map->n > 0) {
572:     VecSet_SeqViennaCL(xin,alpha);
573:     PetscLogFlops(xin->map->n);
574:   } else if (alpha != 1.0 && xin->map->n > 0) {
575:     VecViennaCLGetArrayReadWrite(xin,&xgpu);
576:     try {
577:       *xgpu *= alpha;
578:       ViennaCLWaitForGPU();
579:     } catch(std::exception const & ex) {
580:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
581:     }
582:     VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
583:     PetscLogFlops(xin->map->n);
584:   }
585:   return(0);
586: }


591: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
592: {

596:   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
597:   VecDot_SeqViennaCL(xin, yin, z);
598:   ViennaCLWaitForGPU();
599:   return(0);
600: }


605: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
606: {
607:   const ViennaCLVector *xgpu;
608:   ViennaCLVector       *ygpu;
609:   PetscErrorCode       ierr;

612:   if (xin != yin && xin->map->n > 0) {
613:     if (xin->valid_GPU_array == PETSC_VIENNACL_GPU) {
614:       VecViennaCLGetArrayRead(xin,&xgpu);
615:       VecViennaCLGetArrayWrite(yin,&ygpu);
616:       try {
617:         *ygpu = *xgpu;
618:         ViennaCLWaitForGPU();
619:       } catch(std::exception const & ex) {
620:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
621:       }
622:       VecViennaCLRestoreArrayRead(xin,&xgpu);
623:       VecViennaCLRestoreArrayWrite(yin,&ygpu);

625:     } else if (xin->valid_GPU_array == PETSC_VIENNACL_CPU) {
626:       /* copy in CPU if we are on the CPU*/
627:       VecCopy_SeqViennaCL_Private(xin,yin);
628:       ViennaCLWaitForGPU();
629:     } else if (xin->valid_GPU_array == PETSC_VIENNACL_BOTH) {
630:       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
631:       if (yin->valid_GPU_array == PETSC_VIENNACL_CPU) {
632:         /* copy in CPU */
633:         VecCopy_SeqViennaCL_Private(xin,yin);
634:         ViennaCLWaitForGPU();
635:       } else if (yin->valid_GPU_array == PETSC_VIENNACL_GPU) {
636:         /* copy in GPU */
637:         VecViennaCLGetArrayRead(xin,&xgpu);
638:         VecViennaCLGetArrayWrite(yin,&ygpu);
639:         try {
640:           *ygpu = *xgpu;
641:           ViennaCLWaitForGPU();
642:         } catch(std::exception const & ex) {
643:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
644:         }
645:         VecViennaCLRestoreArrayRead(xin,&xgpu);
646:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
647:       } else if (yin->valid_GPU_array == PETSC_VIENNACL_BOTH) {
648:         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
649:            default to copy in GPU (this is an arbitrary choice) */
650:         VecViennaCLGetArrayRead(xin,&xgpu);
651:         VecViennaCLGetArrayWrite(yin,&ygpu);
652:         try {
653:           *ygpu = *xgpu;
654:           ViennaCLWaitForGPU();
655:         } catch(std::exception const & ex) {
656:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
657:         }
658:         VecViennaCLRestoreArrayRead(xin,&xgpu);
659:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
660:       } else {
661:         VecCopy_SeqViennaCL_Private(xin,yin);
662:         ViennaCLWaitForGPU();
663:       }
664:     }
665:   }
666:   return(0);
667: }


672: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
673: {
675:   ViennaCLVector *xgpu,*ygpu;

678:   if (xin != yin && xin->map->n > 0) {
679:     VecViennaCLGetArrayReadWrite(xin,&xgpu);
680:     VecViennaCLGetArrayReadWrite(yin,&ygpu);

682:     try {
683:       viennacl::swap(*xgpu, *ygpu);
684:       ViennaCLWaitForGPU();
685:     } catch(std::exception const & ex) {
686:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
687:     }
688:     VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
689:     VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
690:   }
691:   return(0);
692: }


695: // y = alpha * x + beta * y
698: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
699: {
700:   PetscErrorCode       ierr;
701:   PetscScalar          a = alpha,b = beta;
702:   const ViennaCLVector *xgpu;
703:   ViennaCLVector       *ygpu;

706:   if (a == 0.0 && xin->map->n > 0) {
707:     VecScale_SeqViennaCL(yin,beta);
708:   } else if (b == 1.0 && xin->map->n > 0) {
709:     VecAXPY_SeqViennaCL(yin,alpha,xin);
710:   } else if (a == 1.0 && xin->map->n > 0) {
711:     VecAYPX_SeqViennaCL(yin,beta,xin);
712:   } else if (b == 0.0 && xin->map->n > 0) {
713:     VecViennaCLGetArrayRead(xin,&xgpu);
714:     VecViennaCLGetArrayReadWrite(yin,&ygpu);
715:     try {
716:       *ygpu = *xgpu * alpha;
717:       ViennaCLWaitForGPU();
718:     } catch(std::exception const & ex) {
719:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
720:     }
721:     PetscLogFlops(xin->map->n);
722:     VecViennaCLRestoreArrayRead(xin,&xgpu);
723:     VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
724:   } else if (xin->map->n > 0) {
725:     VecViennaCLGetArrayRead(xin,&xgpu);
726:     VecViennaCLGetArrayReadWrite(yin,&ygpu);
727:     try {
728:       *ygpu = *xgpu * alpha + *ygpu * beta;
729:       ViennaCLWaitForGPU();
730:     } catch(std::exception const & ex) {
731:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
732:     }
733:     VecViennaCLRestoreArrayRead(xin,&xgpu);
734:     VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
735:     PetscLogFlops(3.0*xin->map->n);
736:   }
737:   return(0);
738: }


741: /* operation  z = alpha * x + beta *y + gamma *z*/
744: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
745: {
746:   PetscErrorCode       ierr;
747:   PetscInt             n = zin->map->n;
748:   const ViennaCLVector *xgpu,*ygpu;
749:   ViennaCLVector       *zgpu;

752:   VecViennaCLGetArrayRead(xin,&xgpu);
753:   VecViennaCLGetArrayRead(yin,&ygpu);
754:   VecViennaCLGetArrayReadWrite(zin,&zgpu);
755:   if (alpha == 0.0 && xin->map->n > 0) {
756:     try {
757:       if (beta == 0.0) {
758:         *zgpu = gamma * *zgpu;
759:         ViennaCLWaitForGPU();
760:         PetscLogFlops(1.0*n);
761:       } else if (gamma == 0.0) {
762:         *zgpu = beta * *ygpu;
763:         ViennaCLWaitForGPU();
764:         PetscLogFlops(1.0*n);
765:       } else {
766:         *zgpu = beta * *ygpu + gamma * *zgpu;
767:         ViennaCLWaitForGPU();
768:         PetscLogFlops(3.0*n);
769:       }
770:     } catch(std::exception const & ex) {
771:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
772:     }
773:     PetscLogFlops(3.0*n);
774:   } else if (beta == 0.0 && xin->map->n > 0) {
775:     try {
776:       if (gamma == 0.0) {
777:         *zgpu = alpha * *xgpu;
778:         ViennaCLWaitForGPU();
779:         PetscLogFlops(1.0*n);
780:       } else {
781:         *zgpu = alpha * *xgpu + gamma * *zgpu;
782:         ViennaCLWaitForGPU();
783:         PetscLogFlops(3.0*n);
784:       }
785:     } catch(std::exception const & ex) {
786:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
787:     }
788:   } else if (gamma == 0.0 && xin->map->n > 0) {
789:     try {
790:       *zgpu = alpha * *xgpu + beta * *ygpu;
791:       ViennaCLWaitForGPU();
792:     } catch(std::exception const & ex) {
793:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
794:     }
795:     PetscLogFlops(3.0*n);
796:   } else if (xin->map->n > 0) {
797:     try {
798:       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
799:       if (gamma != 1.0)
800:         *zgpu *= gamma;
801:       *zgpu += alpha * *xgpu + beta * *ygpu;
802:       ViennaCLWaitForGPU();
803:     } catch(std::exception const & ex) {
804:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
805:     }
806:     VecViennaCLRestoreArrayReadWrite(zin,&zgpu);
807:     VecViennaCLRestoreArrayRead(xin,&xgpu);
808:     VecViennaCLRestoreArrayRead(yin,&ygpu);
809:     PetscLogFlops(5.0*n);
810:   }
811:   return(0);
812: }

816: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
817: {
818:   PetscErrorCode       ierr;
819:   PetscInt             n = win->map->n;
820:   const ViennaCLVector *xgpu,*ygpu;
821:   ViennaCLVector       *wgpu;

824:   if (xin->map->n > 0) {
825:     VecViennaCLGetArrayRead(xin,&xgpu);
826:     VecViennaCLGetArrayRead(yin,&ygpu);
827:     VecViennaCLGetArrayReadWrite(win,&wgpu);
828:     try {
829:       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
830:       ViennaCLWaitForGPU();
831:     } catch(std::exception const & ex) {
832:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
833:     }
834:     VecViennaCLRestoreArrayRead(xin,&xgpu);
835:     VecViennaCLRestoreArrayRead(yin,&ygpu);
836:     VecViennaCLRestoreArrayReadWrite(win,&wgpu);
837:     PetscLogFlops(n);
838:   }
839:   return(0);
840: }


845: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
846: {
847:   PetscErrorCode       ierr;
848:   PetscInt             n = xin->map->n;
849:   PetscBLASInt         bn;
850:   const ViennaCLVector *xgpu;

853:   if (xin->map->n > 0) {
854:     PetscBLASIntCast(n,&bn);
855:     VecViennaCLGetArrayRead(xin,&xgpu);
856:     if (type == NORM_2 || type == NORM_FROBENIUS) {
857:       try {
858:         *z = viennacl::linalg::norm_2(*xgpu);
859:         ViennaCLWaitForGPU();
860:       } catch(std::exception const & ex) {
861:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
862:       }
863:       PetscLogFlops(PetscMax(2.0*n-1,0.0));
864:     } else if (type == NORM_INFINITY) {
865:       VecViennaCLGetArrayRead(xin,&xgpu);
866:       try {
867:         *z = viennacl::linalg::norm_inf(*xgpu);
868:         ViennaCLWaitForGPU();
869:       } catch(std::exception const & ex) {
870:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
871:       }
872:       VecViennaCLRestoreArrayRead(xin,&xgpu);
873:     } else if (type == NORM_1) {
874:       try {
875:         *z = viennacl::linalg::norm_1(*xgpu);
876:         ViennaCLWaitForGPU();
877:       } catch(std::exception const & ex) {
878:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
879:       }
880:       PetscLogFlops(PetscMax(n-1.0,0.0));
881:     } else if (type == NORM_1_AND_2) {
882:       try {
883:         *z     = viennacl::linalg::norm_1(*xgpu);
884:         *(z+1) = viennacl::linalg::norm_2(*xgpu);
885:         ViennaCLWaitForGPU();
886:       } catch(std::exception const & ex) {
887:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
888:       }
889:       PetscLogFlops(PetscMax(2.0*n-1,0.0));
890:       PetscLogFlops(PetscMax(n-1.0,0.0));
891:     }
892:     VecViennaCLRestoreArrayRead(xin,&xgpu);
893:   } else if (type == NORM_1_AND_2) {
894:     *z      = 0.0;
895:     *(z+1)  = 0.0;
896:   } else *z = 0.0;
897:   return(0);
898: }


903: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
904: {

908:   VecSetRandom_SeqViennaCL_Private(xin,r);
909:   xin->valid_GPU_array = PETSC_VIENNACL_CPU;
910:   return(0);
911: }

915: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
916: {

920:   VecViennaCLCopyFromGPU(vin);
921:   VecResetArray_SeqViennaCL_Private(vin);
922:   vin->valid_GPU_array = PETSC_VIENNACL_CPU;
923:   return(0);
924: }

928: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
929: {

933:   VecViennaCLCopyFromGPU(vin);
934:   VecPlaceArray_Seq(vin,a);
935:   vin->valid_GPU_array = PETSC_VIENNACL_CPU;
936:   return(0);
937: }


942: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
943: {

947:   VecViennaCLCopyFromGPU(vin);
948:   VecReplaceArray_Seq(vin,a);
949:   vin->valid_GPU_array = PETSC_VIENNACL_CPU;
950:   return(0);
951: }


956: /*@
957:    VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.

959:    Collective on MPI_Comm

961:    Input Parameter:
962: +  comm - the communicator, should be PETSC_COMM_SELF
963: -  n - the vector length

965:    Output Parameter:
966: .  V - the vector

968:    Notes:
969:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
970:    same type as an existing vector.

972:    Level: intermediate

974:    Concepts: vectors^creating sequential

976: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
977: @*/
978: PetscErrorCode  VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
979: {

983:   VecCreate(comm,v);
984:   VecSetSizes(*v,n,n);
985:   VecSetType(*v,VECSEQVIENNACL);
986:   return(0);
987: }


990: /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
991:  *
992:  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
993:  */
996: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
997: {
998:   PetscErrorCode                         ierr;

1001:   VecDot_SeqViennaCL(s,t,dp);
1002:   VecNorm_SeqViennaCL(t,NORM_2,nm);
1003:   *nm *= *nm; //squared norm required
1004:   return(0);
1005: }

1009: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1010: {

1014:   VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1015:   PetscLayoutReference(win->map,&(*V)->map);
1016:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1017:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1018:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1019:   return(0);
1020: }

1024: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1025: {

1029:   try {
1030:     if (v->spptr) {
1031:       delete ((Vec_ViennaCL*)v->spptr)->GPUarray;
1032:       delete (Vec_ViennaCL*) v->spptr;
1033:     }
1034:   } catch(char *ex) {
1035:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1036:   }
1037:   VecDestroy_SeqViennaCL_Private(v);
1038:   return(0);
1039: }


1044: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1045: {
1047:   PetscMPIInt    size;

1050:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1051:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1052:   VecCreate_Seq_Private(V,0);
1053:   PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);

1055:   V->ops->dot             = VecDot_SeqViennaCL;
1056:   V->ops->norm            = VecNorm_SeqViennaCL;
1057:   V->ops->tdot            = VecTDot_SeqViennaCL;
1058:   V->ops->scale           = VecScale_SeqViennaCL;
1059:   V->ops->copy            = VecCopy_SeqViennaCL;
1060:   V->ops->set             = VecSet_SeqViennaCL;
1061:   V->ops->swap            = VecSwap_SeqViennaCL;
1062:   V->ops->axpy            = VecAXPY_SeqViennaCL;
1063:   V->ops->axpby           = VecAXPBY_SeqViennaCL;
1064:   V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1065:   V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1066:   V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1067:   V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1068:   V->ops->dot_local       = VecDot_SeqViennaCL;
1069:   V->ops->tdot_local      = VecTDot_SeqViennaCL;
1070:   V->ops->norm_local      = VecNorm_SeqViennaCL;
1071:   V->ops->mdot_local      = VecMDot_SeqViennaCL;
1072:   V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1073:   V->ops->mdot            = VecMDot_SeqViennaCL;
1074:   V->ops->aypx            = VecAYPX_SeqViennaCL;
1075:   V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1076:   V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1077:   V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1078:   V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1079:   V->ops->resetarray      = VecResetArray_SeqViennaCL;
1080:   V->ops->destroy         = VecDestroy_SeqViennaCL;
1081:   V->ops->duplicate       = VecDuplicate_SeqViennaCL;

1083:   VecViennaCLAllocateCheck(V);
1084:   V->valid_GPU_array      = PETSC_VIENNACL_GPU;
1085:   VecSet(V,0.0);
1086:   return(0);
1087: }