Actual source code: openmp.c
1: #define PETSCKSP_DLL
3: #include private/pcimpl.h
4: #include petscksp.h
6: typedef struct {
7: MatStructure flag; /* pc->flag */
8: PetscInt setupcalled; /* pc->setupcalled */
9: PetscInt n;
10: MPI_Comm comm; /* local world used by this preconditioner */
11: KSP ksp; /* actual solver used across local world */
12: Mat mat; /* matrix in local world */
13: Mat gmat; /* matrix known only to process 0 in the local world */
14: Vec x,y,xdummy,ydummy;
15: VecScatter scatter;
16: PetscTruth nonzero_guess;
17: } PC_OpenMP;
22: /*
23: Would like to have this simply call PCView() on the inner PC. The problem is
24: that the outter comm does not live on the inside so cannot do this. Instead
25: handle the special case when the viewer is stdout, construct a new one just
26: for this call.
27: */
29: static PetscErrorCode PCView_OpenMP_MP(MPI_Comm comm,void *ctx)
30: {
31: PC_OpenMP *red = (PC_OpenMP*)ctx;
33: PetscViewer viewer;
36: PetscViewerASCIIGetStdout(comm,&viewer);
37: PetscViewerASCIIPushTab(viewer); /* this is bogus in general */
38: KSPView(red->ksp,viewer);
39: PetscViewerASCIIPopTab(viewer);
40: return(0);
41: }
45: static PetscErrorCode PCView_OpenMP(PC pc,PetscViewer viewer)
46: {
47: PC_OpenMP *red = (PC_OpenMP*)pc->data;
48: PetscMPIInt size;
50: PetscTruth iascii;
55: MPI_Comm_size(red->comm,&size);
56: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
57: if (iascii) {
58: PetscViewerASCIIPrintf(viewer," Size of solver nodes %d\n",size);
59: PetscViewerASCIIPrintf(viewer," Parallel sub-solver given next\n",size);
60: /* should only make the next call if the viewer is associated with stdout */
61: PetscOpenMPRun(red->comm,PCView_OpenMP_MP,red);
62: }
63: return(0);
64: }
66: #include include/private/matimpl.h
67: #include private/vecimpl.h
68: #include src/mat/impls/aij/mpi/mpiaij.h
69: #include src/mat/impls/aij/seq/aij.h
73: /*
74: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
75: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
77: Only for square matrices
78: */
79: static PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
80: {
81: PetscMPIInt rank,size;
82: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
84: Mat mat;
85: Mat_SeqAIJ *gmata;
86: PetscMPIInt tag;
87: MPI_Status status;
88: PetscTruth aij;
89: PetscScalar *gmataa,*ao,*ad,*gmataarestore=0;
92: CHKMEMQ;
93: MPI_Comm_rank(comm,&rank);
94: MPI_Comm_size(comm,&size);
95: if (!rank) {
96: PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
97: if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
98: }
99: if (reuse == MAT_INITIAL_MATRIX) {
100: MatCreate(comm,&mat);
101: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
102: MatSetType(mat,MATAIJ);
103: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
104: PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);
105: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
106: rowners[0] = 0;
107: for (i=2; i<=size; i++) {
108: rowners[i] += rowners[i-1];
109: }
110: rstart = rowners[rank];
111: rend = rowners[rank+1];
112: PetscObjectGetNewTag((PetscObject)mat,&tag);
113: if (!rank) {
114: gmata = (Mat_SeqAIJ*) gmat->data;
115: /* send row lengths to all processors */
116: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
117: for (i=1; i<size; i++) {
118: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
119: }
120: /* determine number diagonal and off-diagonal counts */
121: PetscMemzero(olens,m*sizeof(PetscInt));
122: PetscMalloc(m*sizeof(PetscInt),&ld);
123: PetscMemzero(ld,m*sizeof(PetscInt));
124: jj = 0;
125: for (i=0; i<m; i++) {
126: for (j=0; j<dlens[i]; j++) {
127: if (gmata->j[jj] < rstart) ld[i]++;
128: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
129: jj++;
130: }
131: }
132: /* send column indices to other processes */
133: for (i=1; i<size; i++) {
134: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
135: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
136: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
137: }
139: /* send numerical values to other processes */
140: for (i=1; i<size; i++) {
141: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
142: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
143: }
144: gmataa = gmata->a;
145: gmataj = gmata->j;
147: } else {
148: /* receive row lengths */
149: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
150: /* receive column indices */
151: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
152: PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);
153: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
154: /* determine number diagonal and off-diagonal counts */
155: PetscMemzero(olens,m*sizeof(PetscInt));
156: PetscMalloc(m*sizeof(PetscInt),&ld);
157: PetscMemzero(ld,m*sizeof(PetscInt));
158: jj = 0;
159: for (i=0; i<m; i++) {
160: for (j=0; j<dlens[i]; j++) {
161: if (gmataj[jj] < rstart) ld[i]++;
162: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
163: jj++;
164: }
165: }
166: /* receive numerical values */
167: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
168: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
169: }
170: /* set preallocation */
171: for (i=0; i<m; i++) {
172: dlens[i] -= olens[i];
173: }
174: MatSeqAIJSetPreallocation(mat,0,dlens);
175: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
176:
177: for (i=0; i<m; i++) {
178: dlens[i] += olens[i];
179: }
180: cnt = 0;
181: for (i=0; i<m; i++) {
182: row = rstart + i;
183: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
184: cnt += dlens[i];
185: }
186: if (rank) {
187: PetscFree2(gmataa,gmataj);
188: }
189: PetscFree2(dlens,olens);
190: PetscFree(rowners);
191: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
192: *inmat = mat;
193: } else { /* column indices are already set; only need to move over numerical values from process 0 */
194: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
195: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
196: mat = *inmat;
197: PetscObjectGetNewTag((PetscObject)mat,&tag);
198: if (!rank) {
199: /* send numerical values to other processes */
200: gmata = (Mat_SeqAIJ*) gmat->data;
201: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
202: gmataa = gmata->a;
203: for (i=1; i<size; i++) {
204: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
205: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
206: }
207: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
208: } else {
209: /* receive numerical values from process 0*/
210: nz = Ad->nz + Ao->nz;
211: PetscMalloc(nz*sizeof(PetscScalar),&gmataa); gmataarestore = gmataa;
212: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
213: }
214: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
215: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
216: ad = Ad->a;
217: ao = Ao->a;
218: if (mat->rmap.n) {
219: i = 0;
220: nz = ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
221: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
222: }
223: for (i=1; i<mat->rmap.n; i++) {
224: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
225: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
226: }
227: i--;
228: if (mat->rmap.n) {
229: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
230: }
231: if (rank) {
232: PetscFree(gmataarestore);
233: }
234: }
235: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
236: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
237: CHKMEMQ;
238: return(0);
239: }
243: static PetscErrorCode PCApply_OpenMP_1(PC pc,Vec x,Vec y)
244: {
245: PC_OpenMP *red = (PC_OpenMP*)pc->data;
249: KSPSetInitialGuessNonzero(red->ksp,pc->nonzero_guess);
250: KSPSolve(red->ksp,x,y);
251: return(0);
252: }
256: static PetscErrorCode PCSetUp_OpenMP_MP(MPI_Comm comm,void *ctx)
257: {
258: PC_OpenMP *red = (PC_OpenMP*)ctx;
260: PetscInt m;
261: MatReuse scal;
262: PetscMPIInt rank;
265: red->comm = comm;
266: MPI_Bcast(&red->setupcalled,1,MPIU_INT,0,comm);
267: MPI_Bcast(&red->flag,1,MPI_INT,0,comm);
268: if (!red->setupcalled) {
269: /* setup vector communication */
270: MPI_Bcast(&red->n,1,MPIU_INT,0,comm);
271: VecCreateMPI(comm,PETSC_DECIDE,red->n,&red->x);
272: VecCreateMPI(comm,PETSC_DECIDE,red->n,&red->y);
273: VecScatterCreateToZero(red->x,&red->scatter,&red->xdummy);
274: VecDuplicate(red->xdummy,&red->ydummy);
275: MPI_Comm_rank(comm,&rank);
276: if (!rank) {
277: VecDestroy(red->xdummy);
278: VecDestroy(red->ydummy);
279: }
280: scal = MAT_INITIAL_MATRIX;
281: } else {
282: if (red->flag == DIFFERENT_NONZERO_PATTERN) {
283: MatDestroy(red->mat);
284: scal = MAT_INITIAL_MATRIX;
285: CHKMEMQ;
286: } else {
287: scal = MAT_REUSE_MATRIX;
288: }
289: }
291: /* copy matrix out onto processes */
292: VecGetLocalSize(red->x,&m);
293: MatDistribute_MPIAIJ(comm,red->gmat,m,scal,&red->mat);
294: if (!red->setupcalled) {
295: /* create the solver */
296: KSPCreate(comm,&red->ksp);
297: KSPSetOptionsPrefix(red->ksp,"openmp_"); /* should actually append with global pc prefix */
298: KSPSetOperators(red->ksp,red->mat,red->mat,red->flag);
299: KSPSetFromOptions(red->ksp);
300: } else {
301: KSPSetOperators(red->ksp,red->mat,red->mat,red->flag);
302: }
303: return(0);
304: }
308: static PetscErrorCode PCSetUp_OpenMP(PC pc)
309: {
310: PC_OpenMP *red = (PC_OpenMP*)pc->data;
312: PetscMPIInt size;
315: red->gmat = pc->mat;
316: red->flag = pc->flag;
317: red->setupcalled = pc->setupcalled;
319: MPI_Comm_size(red->comm,&size);
320: if (size == 1) { /* special case where copy of matrix is not needed */
321: if (!red->setupcalled) {
322: /* create the solver */
323: KSPCreate(((PetscObject)pc)->comm,&red->ksp);
324: KSPSetOptionsPrefix(red->ksp,"openmp_"); /* should actually append with global pc prefix */
325: KSPSetOperators(red->ksp,red->gmat,red->gmat,red->flag);
326: KSPSetFromOptions(red->ksp);
327: } else {
328: KSPSetOperators(red->ksp,red->gmat,red->gmat,red->flag);
329: }
330: pc->ops->apply = PCApply_OpenMP_1;
331: return(0);
332: } else {
333: MatGetSize(pc->mat,&red->n,PETSC_IGNORE);
334: PetscOpenMPRun(red->comm,PCSetUp_OpenMP_MP,red);
335: }
336: return(0);
337: }
341: static PetscErrorCode PCApply_OpenMP_MP(MPI_Comm comm,void *ctx)
342: {
343: PC_OpenMP *red = (PC_OpenMP*)ctx;
347: VecScatterBegin(red->scatter,red->xdummy,red->x,INSERT_VALUES,SCATTER_REVERSE);
348: VecScatterEnd(red->scatter,red->xdummy,red->x,INSERT_VALUES,SCATTER_REVERSE);
349: MPI_Bcast(&red->nonzero_guess,1,MPIU_INT,0,red->comm);
350: if (red->nonzero_guess) {
351: VecScatterBegin(red->scatter,red->ydummy,red->y,INSERT_VALUES,SCATTER_REVERSE);
352: VecScatterEnd(red->scatter,red->ydummy,red->y,INSERT_VALUES,SCATTER_REVERSE);
353: }
354: KSPSetInitialGuessNonzero(red->ksp,red->nonzero_guess);
356: KSPSolve(red->ksp,red->x,red->y);
358: VecScatterBegin(red->scatter,red->y,red->ydummy,INSERT_VALUES,SCATTER_FORWARD);
359: VecScatterEnd(red->scatter,red->y,red->ydummy,INSERT_VALUES,SCATTER_FORWARD);
360: return(0);
361: }
365: static PetscErrorCode PCApply_OpenMP(PC pc,Vec x,Vec y)
366: {
367: PC_OpenMP *red = (PC_OpenMP*)pc->data;
371: red->xdummy = x;
372: red->ydummy = y;
373: red->nonzero_guess = pc->nonzero_guess;
374: PetscOpenMPRun(red->comm,PCApply_OpenMP_MP,red);
375: return(0);
376: }
380: static PetscErrorCode PCDestroy_OpenMP_MP(MPI_Comm comm,void *ctx)
381: {
382: PC_OpenMP *red = (PC_OpenMP*)ctx;
383: PetscMPIInt rank;
387: if (red->scatter) {VecScatterDestroy(red->scatter);}
388: if (red->x) {VecDestroy(red->x);}
389: if (red->y) {VecDestroy(red->y);}
390: if (red->ksp) {KSPDestroy(red->ksp);}
391: if (red->mat) {MatDestroy(red->mat);}
392: MPI_Comm_rank(comm,&rank);
393: if (rank) {
394: if (red->xdummy) {VecDestroy(red->xdummy);}
395: if (red->ydummy) {VecDestroy(red->ydummy);}
396: }
397: return(0);
398: }
402: static PetscErrorCode PCDestroy_OpenMP(PC pc)
403: {
404: PC_OpenMP *red = (PC_OpenMP*)pc->data;
408: PetscOpenMPRun(red->comm,PCDestroy_OpenMP_MP,red);
409: PetscOpenMPFree(red->comm,red);
410: return(0);
411: }
415: static PetscErrorCode PCSetFromOptions_OpenMP(PC pc)
416: {
418: return(0);
419: }
422: /* -------------------------------------------------------------------------------------*/
423: /*MC
424: PCOPENMP - Runs a preconditioner for a single process matrix across several MPI processes
426: $ This will usually be run with -pc_type openmp -ksp_type preonly
427: $ solver options are set with -openmp_ksp_... and -openmp_pc_... for example
428: $ -openmp_ksp_type cg would use cg as the Krylov method or -openmp_ksp_monitor or
429: $ -openmp_pc_type hypre -openmp_pc_hypre_type boomeramg
431: Always run with -ksp_view (or -snes_view) to see what solver is actually being used.
433: Currently the solver options INSIDE the OpenMP preconditioner can ONLY be set via the
434: options database.
436: Level: intermediate
438: See PetscOpenMPMerge() and PetscOpenMPSpawn() for two ways to start up MPI for use with this preconditioner
440: .seealso: PCCreate(), PCSetType(), PCType (for list of available types)
442: M*/
448: PetscErrorCode PCCreate_OpenMP(PC pc)
449: {
451: PC_OpenMP *red;
452: PetscMPIInt size;
455: MPI_Comm_size(((PetscObject)pc)->comm,&size);
456: if (size > 1) SETERRQ(PETSC_ERR_ARG_SIZ,"OpenMP preconditioner only works for sequential solves");
457: /* caste the struct length to a PetscInt for easier MPI calls */
459: PetscOpenMPNew(PETSC_COMM_LOCAL_WORLD,(PetscInt)sizeof(PC_OpenMP),(void**)&red);
460: red->comm = PETSC_COMM_LOCAL_WORLD;
461: pc->data = (void*) red;
463: pc->ops->apply = PCApply_OpenMP;
464: pc->ops->destroy = PCDestroy_OpenMP;
465: pc->ops->setfromoptions = PCSetFromOptions_OpenMP;
466: pc->ops->setup = PCSetUp_OpenMP;
467: pc->ops->view = PCView_OpenMP;
468: return(0);
469: }