Actual source code: openmp.c

  1: #define PETSCKSP_DLL

 3:  #include private/pcimpl.h
 4:  #include petscksp.h

  6: typedef struct {
  7:   MatStructure flag;               /* pc->flag */
  8:   PetscInt     setupcalled;        /* pc->setupcalled */
  9:   PetscInt     n;
 10:   MPI_Comm     comm;                 /* local world used by this preconditioner */
 11:   KSP          ksp;                  /* actual solver used across local world */
 12:   Mat          mat;                  /* matrix in local world */
 13:   Mat          gmat;                 /* matrix known only to process 0 in the local world */
 14:   Vec          x,y,xdummy,ydummy;
 15:   VecScatter   scatter;
 16:   PetscTruth   nonzero_guess;
 17: } PC_OpenMP;


 22: /*
 23:     Would like to have this simply call PCView() on the inner PC. The problem is
 24:   that the outter comm does not live on the inside so cannot do this. Instead 
 25:   handle the special case when the viewer is stdout, construct a new one just
 26:   for this call.
 27: */

 29: static PetscErrorCode PCView_OpenMP_MP(MPI_Comm comm,void *ctx)
 30: {
 31:   PC_OpenMP      *red = (PC_OpenMP*)ctx;
 33:   PetscViewer    viewer;

 36:   PetscViewerASCIIGetStdout(comm,&viewer);
 37:   PetscViewerASCIIPushTab(viewer);         /* this is bogus in general */
 38:   KSPView(red->ksp,viewer);
 39:   PetscViewerASCIIPopTab(viewer);
 40:   return(0);
 41: }

 45: static PetscErrorCode PCView_OpenMP(PC pc,PetscViewer viewer)
 46: {
 47:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;
 48:   PetscMPIInt    size;
 50:   PetscTruth     iascii;


 55:   MPI_Comm_size(red->comm,&size);
 56:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
 57:   if (iascii) {
 58:     PetscViewerASCIIPrintf(viewer,"  Size of solver nodes %d\n",size);
 59:     PetscViewerASCIIPrintf(viewer,"  Parallel sub-solver given next\n",size);
 60:     /* should only make the next call if the viewer is associated with stdout */
 61:     PetscOpenMPRun(red->comm,PCView_OpenMP_MP,red);
 62:   }
 63:   return(0);
 64: }

 66:  #include include/private/matimpl.h
 67:  #include private/vecimpl.h
 68:  #include src/mat/impls/aij/mpi/mpiaij.h
 69:  #include src/mat/impls/aij/seq/aij.h

 73: /*
 74:     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
 75:     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.

 77:     Only for square matrices
 78: */
 79: static PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
 80: {
 81:   PetscMPIInt    rank,size;
 82:   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
 84:   Mat            mat;
 85:   Mat_SeqAIJ     *gmata;
 86:   PetscMPIInt    tag;
 87:   MPI_Status     status;
 88:   PetscTruth     aij;
 89:   PetscScalar    *gmataa,*ao,*ad,*gmataarestore=0;

 92:   CHKMEMQ;
 93:   MPI_Comm_rank(comm,&rank);
 94:   MPI_Comm_size(comm,&size);
 95:   if (!rank) {
 96:     PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
 97:     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
 98:   }
 99:   if (reuse == MAT_INITIAL_MATRIX) {
100:     MatCreate(comm,&mat);
101:     MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
102:     MatSetType(mat,MATAIJ);
103:     PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
104:     PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);
105:     MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
106:     rowners[0] = 0;
107:     for (i=2; i<=size; i++) {
108:       rowners[i] += rowners[i-1];
109:     }
110:     rstart = rowners[rank];
111:     rend   = rowners[rank+1];
112:     PetscObjectGetNewTag((PetscObject)mat,&tag);
113:     if (!rank) {
114:       gmata = (Mat_SeqAIJ*) gmat->data;
115:       /* send row lengths to all processors */
116:       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
117:       for (i=1; i<size; i++) {
118:         MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
119:       }
120:       /* determine number diagonal and off-diagonal counts */
121:       PetscMemzero(olens,m*sizeof(PetscInt));
122:       PetscMalloc(m*sizeof(PetscInt),&ld);
123:       PetscMemzero(ld,m*sizeof(PetscInt));
124:       jj = 0;
125:       for (i=0; i<m; i++) {
126:         for (j=0; j<dlens[i]; j++) {
127:           if (gmata->j[jj] < rstart) ld[i]++;
128:           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
129:           jj++;
130:         }
131:       }
132:       /* send column indices to other processes */
133:       for (i=1; i<size; i++) {
134:         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
135:         MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
136:         MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
137:       }

139:       /* send numerical values to other processes */
140:       for (i=1; i<size; i++) {
141:         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
142:         MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
143:       }
144:       gmataa = gmata->a;
145:       gmataj = gmata->j;

147:     } else {
148:       /* receive row lengths */
149:       MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
150:       /* receive column indices */
151:       MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
152:       PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);
153:       MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
154:       /* determine number diagonal and off-diagonal counts */
155:       PetscMemzero(olens,m*sizeof(PetscInt));
156:       PetscMalloc(m*sizeof(PetscInt),&ld);
157:       PetscMemzero(ld,m*sizeof(PetscInt));
158:       jj = 0;
159:       for (i=0; i<m; i++) {
160:         for (j=0; j<dlens[i]; j++) {
161:           if (gmataj[jj] < rstart) ld[i]++;
162:           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
163:           jj++;
164:         }
165:       }
166:       /* receive numerical values */
167:       PetscMemzero(gmataa,nz*sizeof(PetscScalar));
168:       MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
169:     }
170:     /* set preallocation */
171:     for (i=0; i<m; i++) {
172:       dlens[i] -= olens[i];
173:     }
174:     MatSeqAIJSetPreallocation(mat,0,dlens);
175:     MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
176: 
177:     for (i=0; i<m; i++) {
178:       dlens[i] += olens[i];
179:     }
180:     cnt  = 0;
181:     for (i=0; i<m; i++) {
182:       row  = rstart + i;
183:       MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
184:       cnt += dlens[i];
185:     }
186:     if (rank) {
187:       PetscFree2(gmataa,gmataj);
188:     }
189:     PetscFree2(dlens,olens);
190:     PetscFree(rowners);
191:     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
192:     *inmat = mat;
193:   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
194:     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
195:     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
196:     mat   = *inmat;
197:     PetscObjectGetNewTag((PetscObject)mat,&tag);
198:     if (!rank) {
199:       /* send numerical values to other processes */
200:       gmata = (Mat_SeqAIJ*) gmat->data;
201:       MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
202:       gmataa = gmata->a;
203:       for (i=1; i<size; i++) {
204:         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
205:         MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
206:       }
207:       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
208:     } else {
209:       /* receive numerical values from process 0*/
210:       nz   = Ad->nz + Ao->nz;
211:       PetscMalloc(nz*sizeof(PetscScalar),&gmataa); gmataarestore = gmataa;
212:       MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
213:     }
214:     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
215:     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
216:     ad = Ad->a;
217:     ao = Ao->a;
218:     if (mat->rmap.n) {
219:       i  = 0;
220:       nz = ld[i];                                   PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
221:       nz = Ad->i[i+1] - Ad->i[i];                   PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
222:     }
223:     for (i=1; i<mat->rmap.n; i++) {
224:       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
225:       nz = Ad->i[i+1] - Ad->i[i];                   PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
226:     }
227:     i--;
228:     if (mat->rmap.n) {
229:       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
230:     }
231:     if (rank) {
232:       PetscFree(gmataarestore);
233:     }
234:   }
235:   MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
236:   MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
237:   CHKMEMQ;
238:   return(0);
239: }

243: static PetscErrorCode PCApply_OpenMP_1(PC pc,Vec x,Vec y)
244: {
245:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;

249:   KSPSetInitialGuessNonzero(red->ksp,pc->nonzero_guess);
250:   KSPSolve(red->ksp,x,y);
251:   return(0);
252: }

256: static PetscErrorCode PCSetUp_OpenMP_MP(MPI_Comm comm,void *ctx)
257: {
258:   PC_OpenMP      *red = (PC_OpenMP*)ctx;
260:   PetscInt       m;
261:   MatReuse       scal;
262:   PetscMPIInt    rank;

265:   red->comm = comm;
266:   MPI_Bcast(&red->setupcalled,1,MPIU_INT,0,comm);
267:   MPI_Bcast(&red->flag,1,MPI_INT,0,comm);
268:   if (!red->setupcalled) {
269:     /* setup vector communication */
270:     MPI_Bcast(&red->n,1,MPIU_INT,0,comm);
271:     VecCreateMPI(comm,PETSC_DECIDE,red->n,&red->x);
272:     VecCreateMPI(comm,PETSC_DECIDE,red->n,&red->y);
273:     VecScatterCreateToZero(red->x,&red->scatter,&red->xdummy);
274:     VecDuplicate(red->xdummy,&red->ydummy);
275:     MPI_Comm_rank(comm,&rank);
276:     if (!rank) {
277:       VecDestroy(red->xdummy);
278:       VecDestroy(red->ydummy);
279:     }
280:     scal = MAT_INITIAL_MATRIX;
281:   } else {
282:     if (red->flag == DIFFERENT_NONZERO_PATTERN) {
283:       MatDestroy(red->mat);
284:       scal = MAT_INITIAL_MATRIX;
285:       CHKMEMQ;
286:     } else {
287:       scal = MAT_REUSE_MATRIX;
288:     }
289:   }

291:   /* copy matrix out onto processes */
292:   VecGetLocalSize(red->x,&m);
293:   MatDistribute_MPIAIJ(comm,red->gmat,m,scal,&red->mat);
294:   if (!red->setupcalled) {
295:     /* create the solver */
296:     KSPCreate(comm,&red->ksp);
297:     KSPSetOptionsPrefix(red->ksp,"openmp_"); /* should actually append with global pc prefix */
298:     KSPSetOperators(red->ksp,red->mat,red->mat,red->flag);
299:     KSPSetFromOptions(red->ksp);
300:   } else {
301:     KSPSetOperators(red->ksp,red->mat,red->mat,red->flag);
302:   }
303:   return(0);
304: }

308: static PetscErrorCode PCSetUp_OpenMP(PC pc)
309: {
310:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;
312:   PetscMPIInt    size;

315:   red->gmat        = pc->mat;
316:   red->flag        = pc->flag;
317:   red->setupcalled = pc->setupcalled;

319:   MPI_Comm_size(red->comm,&size);
320:   if (size == 1) {  /* special case where copy of matrix is not needed */
321:     if (!red->setupcalled) {
322:       /* create the solver */
323:       KSPCreate(((PetscObject)pc)->comm,&red->ksp);
324:       KSPSetOptionsPrefix(red->ksp,"openmp_"); /* should actually append with global pc prefix */
325:       KSPSetOperators(red->ksp,red->gmat,red->gmat,red->flag);
326:       KSPSetFromOptions(red->ksp);
327:     } else {
328:       KSPSetOperators(red->ksp,red->gmat,red->gmat,red->flag);
329:     }
330:     pc->ops->apply = PCApply_OpenMP_1;
331:     return(0);
332:   } else {
333:     MatGetSize(pc->mat,&red->n,PETSC_IGNORE);
334:     PetscOpenMPRun(red->comm,PCSetUp_OpenMP_MP,red);
335:   }
336:   return(0);
337: }

341: static PetscErrorCode PCApply_OpenMP_MP(MPI_Comm comm,void *ctx)
342: {
343:   PC_OpenMP      *red = (PC_OpenMP*)ctx;

347:   VecScatterBegin(red->scatter,red->xdummy,red->x,INSERT_VALUES,SCATTER_REVERSE);
348:   VecScatterEnd(red->scatter,red->xdummy,red->x,INSERT_VALUES,SCATTER_REVERSE);
349:   MPI_Bcast(&red->nonzero_guess,1,MPIU_INT,0,red->comm);
350:   if (red->nonzero_guess) {
351:     VecScatterBegin(red->scatter,red->ydummy,red->y,INSERT_VALUES,SCATTER_REVERSE);
352:     VecScatterEnd(red->scatter,red->ydummy,red->y,INSERT_VALUES,SCATTER_REVERSE);
353:   }
354:   KSPSetInitialGuessNonzero(red->ksp,red->nonzero_guess);

356:   KSPSolve(red->ksp,red->x,red->y);

358:   VecScatterBegin(red->scatter,red->y,red->ydummy,INSERT_VALUES,SCATTER_FORWARD);
359:   VecScatterEnd(red->scatter,red->y,red->ydummy,INSERT_VALUES,SCATTER_FORWARD);
360:   return(0);
361: }

365: static PetscErrorCode PCApply_OpenMP(PC pc,Vec x,Vec y)
366: {
367:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;

371:   red->xdummy        = x;
372:   red->ydummy        = y;
373:   red->nonzero_guess = pc->nonzero_guess;
374:   PetscOpenMPRun(red->comm,PCApply_OpenMP_MP,red);
375:   return(0);
376: }

380: static PetscErrorCode PCDestroy_OpenMP_MP(MPI_Comm comm,void *ctx)
381: {
382:   PC_OpenMP      *red = (PC_OpenMP*)ctx;
383:   PetscMPIInt    rank;

387:   if (red->scatter) {VecScatterDestroy(red->scatter);}
388:   if (red->x) {VecDestroy(red->x);}
389:   if (red->y) {VecDestroy(red->y);}
390:   if (red->ksp) {KSPDestroy(red->ksp);}
391:   if (red->mat) {MatDestroy(red->mat);}
392:   MPI_Comm_rank(comm,&rank);
393:   if (rank) {
394:     if (red->xdummy) {VecDestroy(red->xdummy);}
395:     if (red->ydummy) {VecDestroy(red->ydummy);}
396:   }
397:   return(0);
398: }

402: static PetscErrorCode PCDestroy_OpenMP(PC pc)
403: {
404:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;

408:   PetscOpenMPRun(red->comm,PCDestroy_OpenMP_MP,red);
409:   PetscOpenMPFree(red->comm,red);
410:   return(0);
411: }

415: static PetscErrorCode PCSetFromOptions_OpenMP(PC pc)
416: {
418:   return(0);
419: }


422: /* -------------------------------------------------------------------------------------*/
423: /*MC
424:      PCOPENMP - Runs a preconditioner for a single process matrix across several MPI processes

426: $     This will usually be run with -pc_type openmp -ksp_type preonly
427: $     solver options are set with -openmp_ksp_... and -openmp_pc_... for example
428: $     -openmp_ksp_type cg would use cg as the Krylov method or -openmp_ksp_monitor or
429: $     -openmp_pc_type hypre -openmp_pc_hypre_type boomeramg

431:        Always run with -ksp_view (or -snes_view) to see what solver is actually being used.

433:        Currently the solver options INSIDE the OpenMP preconditioner can ONLY be set via the
434:       options database.

436:    Level: intermediate

438:    See PetscOpenMPMerge() and PetscOpenMPSpawn() for two ways to start up MPI for use with this preconditioner

440: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types)

442: M*/

448: PetscErrorCode  PCCreate_OpenMP(PC pc)
449: {
451:   PC_OpenMP      *red;
452:   PetscMPIInt    size;

455:   MPI_Comm_size(((PetscObject)pc)->comm,&size);
456:   if (size > 1) SETERRQ(PETSC_ERR_ARG_SIZ,"OpenMP preconditioner only works for sequential solves");
457:   /* caste the struct length to a PetscInt for easier MPI calls */

459:   PetscOpenMPNew(PETSC_COMM_LOCAL_WORLD,(PetscInt)sizeof(PC_OpenMP),(void**)&red);
460:   red->comm = PETSC_COMM_LOCAL_WORLD;
461:   pc->data  = (void*) red;

463:   pc->ops->apply          = PCApply_OpenMP;
464:   pc->ops->destroy        = PCDestroy_OpenMP;
465:   pc->ops->setfromoptions = PCSetFromOptions_OpenMP;
466:   pc->ops->setup          = PCSetUp_OpenMP;
467:   pc->ops->view           = PCView_OpenMP;
468:   return(0);
469: }