Actual source code: comb.c

  1: /*
  2:       Split phase global vector reductions with support for combining the
  3:    communication portion of several operations. Using MPI-1.1 support only

  5:       The idea for this and much of the initial code is contributed by 
  6:    Victor Eijkhout.

  8:        Usage:
  9:              VecDotBegin(Vec,Vec,PetscScalar *);
 10:              VecNormBegin(Vec,NormType,PetscReal *);
 11:              ....
 12:              VecDotEnd(Vec,Vec,PetscScalar *);
 13:              VecNormEnd(Vec,NormType,PetscReal *);

 15:        Limitations: 
 16:          - The order of the xxxEnd() functions MUST be in the same order
 17:            as the xxxBegin(). There is extensive error checking to try to 
 18:            insure that the user calls the routines in the correct order
 19: */

 21:  #include vecimpl.h

 23: #define STATE_BEGIN 0
 24: #define STATE_END   1

 26: #define REDUCE_SUM  0
 27: #define REDUCE_MAX  1
 28: #define REDUCE_MIN  2

 30: typedef struct {
 31:   MPI_Comm     comm;
 32:   PetscScalar  *lvalues;    /* this are the reduced values before call to MPI_Allreduce() */
 33:   PetscScalar  *gvalues;    /* values after call to MPI_Allreduce() */
 34:   void         **invecs;    /* for debugging only, vector/memory used with each op */
 35:   PetscInt     *reducetype; /* is particular value to be summed or maxed? */
 36:   PetscInt     state;       /* are we calling xxxBegin() or xxxEnd()? */
 37:   PetscInt     maxops;      /* total amount of space we have for requests */
 38:   PetscInt     numopsbegin; /* number of requests that have been queued in */
 39:   PetscInt     numopsend;   /* number of requests that have been gotten by user */
 40: } PetscSplitReduction;
 41: /*
 42:    Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
 43: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by 
 44: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
 45: some of each.
 46: */

 50: /*
 51:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 52: */
 53: PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
 54: {

 58:   PetscNew(PetscSplitReduction,sr);
 59:   (*sr)->numopsbegin = 0;
 60:   (*sr)->numopsend   = 0;
 61:   (*sr)->state       = STATE_BEGIN;
 62:   (*sr)->maxops      = 32;
 63:   PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->lvalues);
 64:   PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->gvalues);
 65:   PetscMalloc(32*sizeof(void*),&(*sr)->invecs);
 66:   (*sr)->comm        = comm;
 67:   PetscMalloc(32*sizeof(PetscInt),&(*sr)->reducetype);
 68:   return(0);
 69: }

 71: /*
 72:        This function is the MPI reduction operation used when there is 
 73:    a combination of sums and max in the reduction. The call below to 
 74:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the 
 75:    MPI operator PetscSplitReduction_Op.
 76: */
 77: MPI_Op PetscSplitReduction_Op = 0;

 82: void PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
 83: {
 84:   PetscScalar *xin = (PetscScalar *)in,*xout = (PetscScalar*)out;
 85:   PetscInt    i,count = (PetscInt)*cnt;

 88:   if (*datatype != MPIU_REAL) {
 89:     (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
 90:     MPI_Abort(MPI_COMM_WORLD,1);
 91:   }
 92: #if defined(PETSC_USE_COMPLEX)
 93:   count = count/2;
 94: #endif
 95:   count = count/2;
 96:   for (i=0; i<count; i++) {
 97:     if (((int)PetscRealPart(xin[count+i])) == REDUCE_SUM) { /* second half of xin[] is flags for reduction type */
 98:       xout[i] += xin[i];
 99:     } else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MAX) {
100:       xout[i] = PetscMax(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
101:     } else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MIN) {
102:       xout[i] = PetscMin(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
103:     } else {
104:       (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
105:       MPI_Abort(MPI_COMM_WORLD,1);
106:     }
107:   }
108:   PetscStackPop; /* since function returns void cannot use PetscFunctionReturn(); */
109:   return;
110: }

115: /*
116:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
117: */
118: PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
119: {
121:   PetscInt            i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
122:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
123:   PetscInt            sum_flg = 0,max_flg = 0, min_flg = 0;
124:   MPI_Comm       comm = sr->comm;
125:   PetscMPIInt    size;

128:   if (sr->numopsend > 0) {
129:     SETERRQ(PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
130:   }

132:   PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
133:   MPI_Comm_size(sr->comm,&size);
134:   if (size == 1) {
135:     PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
136:   } else {
137:     /* determine if all reductions are sum, max, or min */
138:     for (i=0; i<numops; i++) {
139:       if (reducetype[i] == REDUCE_MAX) {
140:         max_flg = 1;
141:       } else if (reducetype[i] == REDUCE_SUM) {
142:         sum_flg = 1;
143:       } else if (reducetype[i] == REDUCE_MIN) {
144:         min_flg = 1;
145:       } else {
146:         SETERRQ(PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
147:       }
148:     }
149:     if (sum_flg + max_flg + min_flg > 1) {
150:       /* 
151:          after all the entires in lvalues we store the reducetype flags to indicate
152:          to the reduction operations what are sums and what are max
153:       */
154:       for (i=0; i<numops; i++) {
155:         lvalues[numops+i] = reducetype[i];
156:       }
157: #if defined(PETSC_USE_COMPLEX)
158:       MPI_Allreduce(lvalues,gvalues,2*2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
159: #else
160:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
161: #endif
162:     } else if (max_flg) {
163: #if defined(PETSC_USE_COMPLEX)
164:       /* 
165:         complex case we max both the real and imaginary parts, the imaginary part
166:         is just ignored later
167:       */
168:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MAX,comm);
169: #else
170:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MAX,comm);
171: #endif
172:     } else if (min_flg) {
173: #if defined(PETSC_USE_COMPLEX)
174:       /* 
175:         complex case we min both the real and imaginary parts, the imaginary part
176:         is just ignored later
177:       */
178:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MIN,comm);
179: #else
180:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MIN,comm);
181: #endif
182:     } else {
183:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,PetscSum_Op,comm);
184:     }
185:   }
186:   sr->state     = STATE_END;
187:   sr->numopsend = 0;
188:   PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
189:   return(0);
190: }


195: /*
196:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
197: */
198: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
199: {
201:   PetscInt         maxops = sr->maxops,*reducetype = sr->reducetype;
202:   PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
203:   void        *invecs = sr->invecs;

206:   sr->maxops     = 2*maxops;
207:   PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->lvalues);
208:   PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->gvalues);
209:   PetscMalloc(2*maxops*sizeof(PetscInt),&sr->reducetype);
210:   PetscMalloc(2*maxops*sizeof(void*),&sr->invecs);
211:   PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
212:   PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
213:   PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
214:   PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
215:   PetscFree(lvalues);
216:   PetscFree(gvalues);
217:   PetscFree(reducetype);
218:   PetscFree(invecs);
219:   return(0);
220: }

224: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
225: {

229:   PetscFree(sr->lvalues);
230:   PetscFree(sr->gvalues);
231:   PetscFree(sr->reducetype);
232:   PetscFree(sr->invecs);
233:   PetscFree(sr);
234:   return(0);
235: }

237: static PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

242: /*
243:    Private routine to delete internal storage when a communicator is freed.
244:   This is called by MPI, not by users.

246:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
247:   it was MPI_Comm *comm.  
248: */
249: int Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
250: {

254:   PetscLogInfo(0,"Petsc_DelReduction:Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
255:   PetscSplitReductionDestroy((PetscSplitReduction *)attr_val);
256:   return(0);
257: }

260: /*
261:      PetscSplitReductionGet - Gets the split reduction object from a 
262:         PETSc vector, creates if it does not exit.

264: */
267: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
268: {
270:   PetscMPIInt    flag;

273:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
274:     /* 
275:        The calling sequence of the 2nd argument to this function changed
276:        between MPI Standard 1.0 and the revisions 1.1 Here we match the 
277:        new standard, if you are using an MPI implementation that uses 
278:        the older version you will get a warning message about the next line;
279:        it is only a warning message and should do no harm.
280:     */
281:     MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
282:   }
283:   MPI_Attr_get(comm,Petsc_Reduction_keyval,(void **)sr,&flag);
284:   if (!flag) {  /* doesn't exist yet so create it and put it in */
285:     PetscSplitReductionCreate(comm,sr);
286:     MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
287:     PetscLogInfo(0,"PetscSplitReductionGet:Putting reduction data in an MPI_Comm %ld\n",(long)comm);
288:   }

290:   return(0);
291: }

293: /* ----------------------------------------------------------------------------------------------------*/

297: /*@
298:    VecDotBegin - Starts a split phase dot product computation.

300:    Input Parameters:
301: +   x - the first vector
302: .   y - the second vector
303: -   result - where the result will go (can be PETSC_NULL)

305:    Level: advanced

307:    Notes:
308:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

310: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
311:          VecTDotBegin(), VecTDotEnd()
312: @*/
313: PetscErrorCode VecDotBegin(Vec x,Vec y,PetscScalar *result)
314: {
316:   PetscSplitReduction *sr;
317:   MPI_Comm            comm;

320:   PetscObjectGetComm((PetscObject)x,&comm);
321:   PetscSplitReductionGet(comm,&sr);
322:   if (sr->state == STATE_END) {
323:     SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
324:   }
325:   if (sr->numopsbegin >= sr->maxops) {
326:     PetscSplitReductionExtend(sr);
327:   }
328:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
329:   sr->invecs[sr->numopsbegin]     = (void*)x;
330:   if (!x->ops->dot_local) SETERRQ(PETSC_ERR_SUP,"Vector does not suppport local dots");
331:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
332:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
333:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
334:   return(0);
335: }

339: /*@
340:    VecDotEnd - Ends a split phase dot product computation.

342:    Input Parameters:
343: +  x - the first vector (can be PETSC_NULL)
344: .  y - the second vector (can be PETSC_NULL)
345: -  result - where the result will go

347:    Level: advanced

349:    Notes:
350:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

352: seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
353:          VecTDotBegin(),VecTDotEnd()

355: @*/
356: PetscErrorCode VecDotEnd(Vec x,Vec y,PetscScalar *result)
357: {
359:   PetscSplitReduction *sr;
360:   MPI_Comm            comm;

363:   PetscObjectGetComm((PetscObject)x,&comm);
364:   PetscSplitReductionGet(comm,&sr);
365: 
366:   if (sr->state != STATE_END) {
367:     /* this is the first call to VecxxxEnd() so do the communication */
368:     PetscSplitReductionApply(sr);
369:   }

371:   if (sr->numopsend >= sr->numopsbegin) {
372:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
373:   }
374:   if (x && (void*) x != sr->invecs[sr->numopsend]) {
375:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
376:   }
377:   if (sr->reducetype[sr->numopsend] != REDUCE_SUM) {
378:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
379:   }
380:   *result = sr->gvalues[sr->numopsend++];

382:   /*
383:      We are finished getting all the results so reset to no outstanding requests
384:   */
385:   if (sr->numopsend == sr->numopsbegin) {
386:     sr->state        = STATE_BEGIN;
387:     sr->numopsend    = 0;
388:     sr->numopsbegin  = 0;
389:   }
390:   return(0);
391: }

395: /*@
396:    VecTDotBegin - Starts a split phase transpose dot product computation.

398:    Input Parameters:
399: +  x - the first vector
400: .  y - the second vector
401: -  result - where the result will go (can be PETSC_NULL)

403:    Level: advanced

405:    Notes:
406:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

408: seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
409:          VecDotBegin(), VecDotEnd()

411: @*/
412: PetscErrorCode VecTDotBegin(Vec x,Vec y,PetscScalar *result)
413: {
415:   PetscSplitReduction *sr;
416:   MPI_Comm            comm;

419:   PetscObjectGetComm((PetscObject)x,&comm);
420:   PetscSplitReductionGet(comm,&sr);
421:   if (sr->state == STATE_END) {
422:     SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
423:   }
424:   if (sr->numopsbegin >= sr->maxops) {
425:     PetscSplitReductionExtend(sr);
426:   }
427:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
428:   sr->invecs[sr->numopsbegin]     = (void*)x;
429:   if (!x->ops->tdot_local) SETERRQ(PETSC_ERR_SUP,"Vector does not suppport local dots");
430:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
431:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
432:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
433:   return(0);
434: }

438: /*@
439:    VecTDotEnd - Ends a split phase transpose dot product computation.

441:    Input Parameters:
442: +  x - the first vector (can be PETSC_NULL)
443: .  y - the second vector (can be PETSC_NULL)
444: -  result - where the result will go

446:    Level: advanced

448:    Notes:
449:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

451: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
452:          VecDotBegin(), VecDotEnd()
453: @*/
454: PetscErrorCode VecTDotEnd(Vec x,Vec y,PetscScalar *result)
455: {

459:   /*
460:       TDotEnd() is the same as DotEnd() so reuse the code
461:   */
462:   VecDotEnd(x,y,result);
463:   return(0);
464: }

466: /* -------------------------------------------------------------------------*/

470: /*@
471:    VecNormBegin - Starts a split phase norm computation.

473:    Input Parameters:
474: +  x - the first vector
475: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
476: -  result - where the result will go (can be PETSC_NULL)

478:    Level: advanced

480:    Notes:
481:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

483: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()

485: @*/
486: PetscErrorCode VecNormBegin(Vec x,NormType ntype,PetscReal *result)
487: {
489:   PetscSplitReduction *sr;
490:   PetscReal           lresult[2];
491:   MPI_Comm            comm;

494:   PetscObjectGetComm((PetscObject)x,&comm);
495:   PetscSplitReductionGet(comm,&sr);
496:   if (sr->state == STATE_END) {
497:     SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
498:   }
499:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
500:     PetscSplitReductionExtend(sr);
501:   }
502: 
503:   sr->invecs[sr->numopsbegin]     = (void*)x;
504:   if (!x->ops->norm_local) SETERRQ(PETSC_ERR_SUP,"Vector does not support local norms");
505:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
506:   (*x->ops->norm_local)(x,ntype,lresult);
507:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
508:   if (ntype == NORM_2)         lresult[0]                = lresult[0]*lresult[0];
509:   if (ntype == NORM_1_AND_2)   lresult[1]                = lresult[1]*lresult[1];
510:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
511:   else                   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
512:   sr->lvalues[sr->numopsbegin++] = lresult[0];
513:   if (ntype == NORM_1_AND_2) {
514:     sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
515:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
516:   }
517:   return(0);
518: }

522: /*@
523:    VecNormEnd - Ends a split phase norm computation.

525:    Input Parameters:
526: +  x - the first vector (can be PETSC_NULL)
527: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
528: -  result - where the result will go

530:    Level: advanced

532:    Notes:
533:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

535: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()

537: @*/
538: PetscErrorCode VecNormEnd(Vec x,NormType ntype,PetscReal *result)
539: {
540:   PetscErrorCode      ierr;
541:   PetscInt            type_id;
542:   PetscSplitReduction *sr;
543:   MPI_Comm            comm;

546:   VecNormComposedDataID(ntype,&type_id);

548:   PetscObjectGetComm((PetscObject)x,&comm);
549:   PetscSplitReductionGet(comm,&sr);
550: 
551:   if (sr->state != STATE_END) {
552:     /* this is the first call to VecxxxEnd() so do the communication */
553:     PetscSplitReductionApply(sr);
554:   }

556:   if (sr->numopsend >= sr->numopsbegin) {
557:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
558:   }
559:   if (x && (void*)x != sr->invecs[sr->numopsend]) {
560:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
561:   }
562:   if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) {
563:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
564:   }
565:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

567:   if (ntype == NORM_2) {
568:     result[0] = sqrt(result[0]);
569:   } else if (ntype == NORM_1_AND_2) {
570:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
571:     result[1] = sqrt(result[1]);
572:   }
573:   if (ntype!=NORM_1_AND_2) {
574:     PetscObjectSetRealComposedData((PetscObject)x,type_id,result[0]);
575:   }

577:   if (sr->numopsend == sr->numopsbegin) {
578:     sr->state        = STATE_BEGIN;
579:     sr->numopsend    = 0;
580:     sr->numopsbegin  = 0;
581:   }
582:   return(0);
583: }

585: /*
586:    Possibly add

588:      PetscReductionSumBegin/End()
589:      PetscReductionMaxBegin/End()
590:      PetscReductionMinBegin/End()
591:    or have more like MPI with a single function with flag for Op? Like first better
592: */