Actual source code: sacusp.cu
petsc-3.5.4 2015-05-23
2: /* -------------------------------------------------------------------- */
4: /*
5: Include files needed for the CUSP Smoothed Aggregation preconditioner:
6: pcimpl.h - private include file intended for use by all preconditioners
7: */
9: #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/
10: #include <../src/mat/impls/aij/seq/aij.h>
11: #include <cusp/monitor.h>
12: #include <cusp/version.h>
13: #if CUSP_VERSION >= 400
14: #include <cusp/precond/aggregation/smoothed_aggregation.h>
15: #define cuspsaprecond cusp::precond::aggregation::smoothed_aggregation<PetscInt,PetscScalar,cusp::device_memory>
16: #else
17: #include <cusp/precond/smoothed_aggregation.h>
18: #define cuspsaprecond cusp::precond::smoothed_aggregation<PetscInt,PetscScalar,cusp::device_memory>
19: #endif
20: #include <../src/vec/vec/impls/dvecimpl.h>
21: #include <../src/mat/impls/aij/seq/seqcusp/cuspmatimpl.h>
23: /*
24: Private context (data structure) for the SACUSP preconditioner.
25: */
26: typedef struct {
27: cuspsaprecond * SACUSP;
28: /*int cycles; */
29: } PC_SACUSP;
33: static PetscErrorCode PCSACUSPSetCycles(PC pc, int n)
34: {
35: PC_SACUSP *sac = (PC_SACUSP*)pc->data;
38: sac->cycles = n;
39: return(0);
41: }*/
43: /* -------------------------------------------------------------------------- */
44: /*
45: PCSetUp_SACUSP - Prepares for the use of the SACUSP preconditioner
46: by setting data structures and options.
48: Input Parameter:
49: . pc - the preconditioner context
51: Application Interface Routine: PCSetUp()
53: Notes:
54: The interface routine PCSetUp() is not usually called directly by
55: the user, but instead is called by PCApply() if necessary.
56: */
59: static PetscErrorCode PCSetUp_SACUSP(PC pc)
60: {
61: PC_SACUSP *sa = (PC_SACUSP*)pc->data;
62: PetscBool flg = PETSC_FALSE;
64: #if !defined(PETSC_USE_COMPLEX)
65: // protect these in order to avoid compiler warnings. This preconditioner does
66: // not work for complex types.
67: Mat_SeqAIJCUSP *gpustruct;
68: #endif
71: PetscObjectTypeCompare((PetscObject)pc->pmat,MATSEQAIJCUSP,&flg);
72: if (!flg) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Currently only handles CUSP matrices");
73: if (pc->setupcalled != 0) {
74: try {
75: delete sa->SACUSP;
76: } catch(char *ex) {
77: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
78: }
79: }
80: try {
81: #if defined(PETSC_USE_COMPLEX)
82: sa->SACUSP = 0;CHKERRQ(1); /* TODO */
83: #else
84: MatCUSPCopyToGPU(pc->pmat);
85: gpustruct = (Mat_SeqAIJCUSP*)(pc->pmat->spptr);
86:
87: if (gpustruct->format==MAT_CUSP_ELL) {
88: CUSPMATRIXELL *mat = (CUSPMATRIXELL*)gpustruct->mat;
89: sa->SACUSP = new cuspsaprecond(*mat);
90: } else if (gpustruct->format==MAT_CUSP_DIA) {
91: CUSPMATRIXDIA *mat = (CUSPMATRIXDIA*)gpustruct->mat;
92: sa->SACUSP = new cuspsaprecond(*mat);
93: } else {
94: CUSPMATRIX *mat = (CUSPMATRIX*)gpustruct->mat;
95: sa->SACUSP = new cuspsaprecond(*mat);
96: }
97: #endif
99: } catch(char *ex) {
100: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
101: }
102: /*PetscOptionsInt("-pc_sacusp_cycles","Number of v-cycles to perform","PCSACUSPSetCycles",sa->cycles,
103: &sa->cycles,NULL);*/
104: return(0);
105: }
109: static PetscErrorCode PCApplyRichardson_SACUSP(PC pc, Vec b, Vec y, Vec w,PetscReal rtol, PetscReal abstol, PetscReal dtol, PetscInt its, PetscBool guesszero,PetscInt *outits,PCRichardsonConvergedReason *reason)
110: {
111: #if !defined(PETSC_USE_COMPLEX)
112: // protect these in order to avoid compiler warnings. This preconditioner does
113: // not work for complex types.
114: PC_SACUSP *sac = (PC_SACUSP*)pc->data;
115: #endif
117: CUSPARRAY *barray,*yarray;
120: /* how to incorporate dtol, guesszero, w?*/
122: VecCUSPGetArrayRead(b,&barray);
123: VecCUSPGetArrayReadWrite(y,&yarray);
124: cusp::default_monitor<PetscReal> monitor(*barray,its,rtol,abstol);
125: #if defined(PETSC_USE_COMPLEX)
126: CHKERRQ(1);
127: /* TODO */
128: #else
129: sac->SACUSP->solve(*barray,*yarray,monitor);
130: *outits = monitor.iteration_count();
131: if (monitor.converged()) *reason = PCRICHARDSON_CONVERGED_RTOL; /* how to discern between converging from RTOL or ATOL?*/
132: else *reason = PCRICHARDSON_CONVERGED_ITS;
133: #endif
134: PetscObjectStateIncrease((PetscObject)y);
135: VecCUSPRestoreArrayRead(b,&barray);
136: VecCUSPRestoreArrayReadWrite(y,&yarray);
137: return(0);
138: }
140: /* -------------------------------------------------------------------------- */
141: /*
142: PCApply_SACUSP - Applies the SACUSP preconditioner to a vector.
144: Input Parameters:
145: . pc - the preconditioner context
146: . x - input vector
148: Output Parameter:
149: . y - output vector
151: Application Interface Routine: PCApply()
152: */
155: static PetscErrorCode PCApply_SACUSP(PC pc,Vec x,Vec y)
156: {
157: PC_SACUSP *sac = (PC_SACUSP*)pc->data;
159: PetscBool flg1,flg2;
160: CUSPARRAY *xarray=NULL,*yarray=NULL;
163: /*how to apply a certain fixed number of iterations?*/
164: PetscObjectTypeCompare((PetscObject)x,VECSEQCUSP,&flg1);
165: PetscObjectTypeCompare((PetscObject)y,VECSEQCUSP,&flg2);
166: if (!(flg1 && flg2)) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP, "Currently only handles CUSP vectors");
167: if (!sac->SACUSP) {
168: PCSetUp_SACUSP(pc);
169: }
170: VecSet(y,0.0);
171: VecCUSPGetArrayRead(x,&xarray);
172: VecCUSPGetArrayWrite(y,&yarray);
173: try {
174: #if defined(PETSC_USE_COMPLEX)
176: #else
177: cusp::multiply(*sac->SACUSP,*xarray,*yarray);
178: #endif
179: } catch(char * ex) {
180: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
181: }
182: VecCUSPRestoreArrayRead(x,&xarray);
183: VecCUSPRestoreArrayWrite(y,&yarray);
184: PetscObjectStateIncrease((PetscObject)y);
185: return(0);
186: }
187: /* -------------------------------------------------------------------------- */
188: /*
189: PCDestroy_SACUSP - Destroys the private context for the SACUSP preconditioner
190: that was created with PCCreate_SACUSP().
192: Input Parameter:
193: . pc - the preconditioner context
195: Application Interface Routine: PCDestroy()
196: */
199: static PetscErrorCode PCDestroy_SACUSP(PC pc)
200: {
201: PC_SACUSP *sac = (PC_SACUSP*)pc->data;
205: if (sac->SACUSP) {
206: try {
207: delete sac->SACUSP;
208: } catch(char * ex) {
209: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
210: }
211: }
213: /*
214: Free the private data structure that was hanging off the PC
215: */
216: PetscFree(pc->data);
217: return(0);
218: }
222: static PetscErrorCode PCSetFromOptions_SACUSP(PC pc)
223: {
227: PetscOptionsHead("SACUSP options");
228: PetscOptionsTail();
229: return(0);
230: }
232: /* -------------------------------------------------------------------------- */
235: /*MC
236: PCSACUSP - A smoothed agglomeration algorithm that runs on the Nvidia GPU.
239: http://research.nvidia.com/sites/default/files/publications/nvr-2011-002.pdf
241: Level: advanced
243: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC
245: M*/
249: PETSC_EXTERN PetscErrorCode PCCreate_SACUSP(PC pc)
250: {
251: PC_SACUSP *sac;
255: /*
256: Creates the private data structure for this preconditioner and
257: attach it to the PC object.
258: */
259: PetscNewLog(pc,&sac);
260: pc->data = (void*)sac;
262: /*
263: Initialize the pointer to zero
264: Initialize number of v-cycles to default (1)
265: */
266: sac->SACUSP = 0;
267: /*sac->cycles=1;*/
270: /*
271: Set the pointers for the functions that are provided above.
272: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
273: are called, they will automatically call these functions. Note we
274: choose not to provide a couple of these functions since they are
275: not needed.
276: */
277: pc->ops->apply = PCApply_SACUSP;
278: pc->ops->applytranspose = 0;
279: pc->ops->setup = PCSetUp_SACUSP;
280: pc->ops->destroy = PCDestroy_SACUSP;
281: pc->ops->setfromoptions = PCSetFromOptions_SACUSP;
282: pc->ops->view = 0;
283: pc->ops->applyrichardson = PCApplyRichardson_SACUSP;
284: pc->ops->applysymmetricleft = 0;
285: pc->ops->applysymmetricright = 0;
286: return(0);
287: }