Actual source code: sacusp.cu
petsc-dev 2014-02-02
2: /* -------------------------------------------------------------------- */
4: /*
5: Include files needed for the CUSP Smoothed Aggregation preconditioner:
6: pcimpl.h - private include file intended for use by all preconditioners
7: */
9: #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/
10: #include <../src/mat/impls/aij/seq/aij.h>
11: #include <cusp/monitor.h>
12: #include <cusp/precond/smoothed_aggregation.h>
13: #include <../src/vec/vec/impls/dvecimpl.h>
14: #include <../src/mat/impls/aij/seq/seqcusp/cuspmatimpl.h>
16: #define cuspsaprecond cusp::precond::smoothed_aggregation<PetscInt,PetscScalar,cusp::device_memory>
18: /*
19: Private context (data structure) for the SACUSP preconditioner.
20: */
21: typedef struct {
22: cuspsaprecond * SACUSP;
23: /*int cycles; */
24: } PC_SACUSP;
28: static PetscErrorCode PCSACUSPSetCycles(PC pc, int n)
29: {
30: PC_SACUSP *sac = (PC_SACUSP*)pc->data;
33: sac->cycles = n;
34: return(0);
36: }*/
38: /* -------------------------------------------------------------------------- */
39: /*
40: PCSetUp_SACUSP - Prepares for the use of the SACUSP preconditioner
41: by setting data structures and options.
43: Input Parameter:
44: . pc - the preconditioner context
46: Application Interface Routine: PCSetUp()
48: Notes:
49: The interface routine PCSetUp() is not usually called directly by
50: the user, but instead is called by PCApply() if necessary.
51: */
54: static PetscErrorCode PCSetUp_SACUSP(PC pc)
55: {
56: PC_SACUSP *sa = (PC_SACUSP*)pc->data;
57: PetscBool flg = PETSC_FALSE;
59: #if !defined(PETSC_USE_COMPLEX)
60: // protect these in order to avoid compiler warnings. This preconditioner does
61: // not work for complex types.
62: Mat_SeqAIJCUSP *gpustruct;
63: #endif
66: PetscObjectTypeCompare((PetscObject)pc->pmat,MATSEQAIJCUSP,&flg);
67: if (!flg) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Currently only handles CUSP matrices");
68: if (pc->setupcalled != 0) {
69: try {
70: delete sa->SACUSP;
71: } catch(char *ex) {
72: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
73: }
74: }
75: try {
76: #if defined(PETSC_USE_COMPLEX)
77: sa->SACUSP = 0;CHKERRQ(1); /* TODO */
78: #else
79: MatCUSPCopyToGPU(pc->pmat);
80: gpustruct = (Mat_SeqAIJCUSP*)(pc->pmat->spptr);
81:
82: if (gpustruct->format==MAT_CUSP_ELL) {
83: CUSPMATRIXELL *mat = (CUSPMATRIXELL*)gpustruct->mat;
84: sa->SACUSP = new cuspsaprecond(*mat);
85: } else if (gpustruct->format==MAT_CUSP_DIA) {
86: CUSPMATRIXDIA *mat = (CUSPMATRIXDIA*)gpustruct->mat;
87: sa->SACUSP = new cuspsaprecond(*mat);
88: } else {
89: CUSPMATRIX *mat = (CUSPMATRIX*)gpustruct->mat;
90: sa->SACUSP = new cuspsaprecond(*mat);
91: }
92: #endif
94: } catch(char *ex) {
95: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
96: }
97: /*PetscOptionsInt("-pc_sacusp_cycles","Number of v-cycles to perform","PCSACUSPSetCycles",sa->cycles,
98: &sa->cycles,NULL);*/
99: return(0);
100: }
104: static PetscErrorCode PCApplyRichardson_SACUSP(PC pc, Vec b, Vec y, Vec w,PetscReal rtol, PetscReal abstol, PetscReal dtol, PetscInt its, PetscBool guesszero,PetscInt *outits,PCRichardsonConvergedReason *reason)
105: {
106: #if !defined(PETSC_USE_COMPLEX)
107: // protect these in order to avoid compiler warnings. This preconditioner does
108: // not work for complex types.
109: PC_SACUSP *sac = (PC_SACUSP*)pc->data;
110: #endif
112: CUSPARRAY *barray,*yarray;
115: /* how to incorporate dtol, guesszero, w?*/
117: VecCUSPGetArrayRead(b,&barray);
118: VecCUSPGetArrayReadWrite(y,&yarray);
119: cusp::default_monitor<PetscReal> monitor(*barray,its,rtol,abstol);
120: #if defined(PETSC_USE_COMPLEX)
121: CHKERRQ(1);
122: /* TODO */
123: #else
124: sac->SACUSP->solve(*barray,*yarray,monitor);
125: *outits = monitor.iteration_count();
126: if (monitor.converged()) *reason = PCRICHARDSON_CONVERGED_RTOL; /* how to discern between converging from RTOL or ATOL?*/
127: else *reason = PCRICHARDSON_CONVERGED_ITS;
128: #endif
129: PetscObjectStateIncrease((PetscObject)y);
130: VecCUSPRestoreArrayRead(b,&barray);
131: VecCUSPRestoreArrayReadWrite(y,&yarray);
132: return(0);
133: }
135: /* -------------------------------------------------------------------------- */
136: /*
137: PCApply_SACUSP - Applies the SACUSP preconditioner to a vector.
139: Input Parameters:
140: . pc - the preconditioner context
141: . x - input vector
143: Output Parameter:
144: . y - output vector
146: Application Interface Routine: PCApply()
147: */
150: static PetscErrorCode PCApply_SACUSP(PC pc,Vec x,Vec y)
151: {
152: PC_SACUSP *sac = (PC_SACUSP*)pc->data;
154: PetscBool flg1,flg2;
155: CUSPARRAY *xarray=NULL,*yarray=NULL;
158: /*how to apply a certain fixed number of iterations?*/
159: PetscObjectTypeCompare((PetscObject)x,VECSEQCUSP,&flg1);
160: PetscObjectTypeCompare((PetscObject)y,VECSEQCUSP,&flg2);
161: if (!(flg1 && flg2)) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP, "Currently only handles CUSP vectors");
162: if (!sac->SACUSP) {
163: PCSetUp_SACUSP(pc);
164: }
165: VecSet(y,0.0);
166: VecCUSPGetArrayRead(x,&xarray);
167: VecCUSPGetArrayWrite(y,&yarray);
168: try {
169: #if defined(PETSC_USE_COMPLEX)
171: #else
172: cusp::multiply(*sac->SACUSP,*xarray,*yarray);
173: #endif
174: } catch(char * ex) {
175: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
176: }
177: VecCUSPRestoreArrayRead(x,&xarray);
178: VecCUSPRestoreArrayWrite(y,&yarray);
179: PetscObjectStateIncrease((PetscObject)y);
180: return(0);
181: }
182: /* -------------------------------------------------------------------------- */
183: /*
184: PCDestroy_SACUSP - Destroys the private context for the SACUSP preconditioner
185: that was created with PCCreate_SACUSP().
187: Input Parameter:
188: . pc - the preconditioner context
190: Application Interface Routine: PCDestroy()
191: */
194: static PetscErrorCode PCDestroy_SACUSP(PC pc)
195: {
196: PC_SACUSP *sac = (PC_SACUSP*)pc->data;
200: if (sac->SACUSP) {
201: try {
202: delete sac->SACUSP;
203: } catch(char * ex) {
204: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
205: }
206: }
208: /*
209: Free the private data structure that was hanging off the PC
210: */
211: PetscFree(pc->data);
212: return(0);
213: }
217: static PetscErrorCode PCSetFromOptions_SACUSP(PC pc)
218: {
222: PetscOptionsHead("SACUSP options");
223: PetscOptionsTail();
224: return(0);
225: }
227: /* -------------------------------------------------------------------------- */
230: /*MC
231: PCSACUSP - A smoothed agglomeration algorithm that runs on the Nvidia GPU.
234: http://research.nvidia.com/sites/default/files/publications/nvr-2011-002.pdf
236: Level: advanced
238: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC
240: M*/
244: PETSC_EXTERN PetscErrorCode PCCreate_SACUSP(PC pc)
245: {
246: PC_SACUSP *sac;
250: /*
251: Creates the private data structure for this preconditioner and
252: attach it to the PC object.
253: */
254: PetscNewLog(pc,&sac);
255: pc->data = (void*)sac;
257: /*
258: Initialize the pointer to zero
259: Initialize number of v-cycles to default (1)
260: */
261: sac->SACUSP = 0;
262: /*sac->cycles=1;*/
265: /*
266: Set the pointers for the functions that are provided above.
267: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
268: are called, they will automatically call these functions. Note we
269: choose not to provide a couple of these functions since they are
270: not needed.
271: */
272: pc->ops->apply = PCApply_SACUSP;
273: pc->ops->applytranspose = 0;
274: pc->ops->setup = PCSetUp_SACUSP;
275: pc->ops->destroy = PCDestroy_SACUSP;
276: pc->ops->setfromoptions = PCSetFromOptions_SACUSP;
277: pc->ops->view = 0;
278: pc->ops->applyrichardson = PCApplyRichardson_SACUSP;
279: pc->ops->applysymmetricleft = 0;
280: pc->ops->applysymmetricright = 0;
281: return(0);
282: }