Actual source code: comb.c
2: /*
3: Split phase global vector reductions with support for combining the
4: communication portion of several operations. Using MPI-1.1 support only
6: The idea for this and much of the initial code is contributed by
7: Victor Eijkhout.
9: Usage:
10: VecDotBegin(Vec,Vec,PetscScalar *);
11: VecNormBegin(Vec,NormType,PetscReal *);
12: ....
13: VecDotEnd(Vec,Vec,PetscScalar *);
14: VecNormEnd(Vec,NormType,PetscReal *);
16: Limitations:
17: - The order of the xxxEnd() functions MUST be in the same order
18: as the xxxBegin(). There is extensive error checking to try to
19: insure that the user calls the routines in the correct order
20: */
22: #include <petsc/private/vecimpl.h>
24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
25: {
26: PetscFunctionBegin;
27: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
28: PetscCallMPI(MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request));
29: #else
30: PetscCall(MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm));
31: *request = MPI_REQUEST_NULL;
32: #endif
33: PetscFunctionReturn(PETSC_SUCCESS);
34: }
36: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *);
38: /*
39: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
40: */
41: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm, PetscSplitReduction **sr)
42: {
43: PetscFunctionBegin;
44: PetscCall(PetscNew(sr));
45: (*sr)->numopsbegin = 0;
46: (*sr)->numopsend = 0;
47: (*sr)->state = STATE_BEGIN;
48: #define MAXOPS 32
49: (*sr)->maxops = MAXOPS;
50: PetscCall(PetscMalloc6(MAXOPS, &(*sr)->lvalues, MAXOPS, &(*sr)->gvalues, MAXOPS, &(*sr)->invecs, MAXOPS, &(*sr)->reducetype, MAXOPS, &(*sr)->lvalues_mix, MAXOPS, &(*sr)->gvalues_mix));
51: #undef MAXOPS
52: (*sr)->comm = comm;
53: (*sr)->request = MPI_REQUEST_NULL;
54: (*sr)->mix = PETSC_FALSE;
55: (*sr)->async = PETSC_FALSE;
56: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
57: (*sr)->async = PETSC_TRUE; /* Enable by default */
58: #endif
59: /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
60: PetscCall(PetscOptionsGetBool(NULL, NULL, "-splitreduction_async", &(*sr)->async, NULL));
61: PetscFunctionReturn(PETSC_SUCCESS);
62: }
64: /*
65: This function is the MPI reduction operation used when there is
66: a combination of sums and max in the reduction. The call below to
67: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
68: MPI operator PetscSplitReduction_Op.
69: */
70: MPI_Op PetscSplitReduction_Op = 0;
72: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
73: {
74: struct PetscScalarInt {
75: PetscScalar v;
76: PetscInt i;
77: };
78: struct PetscScalarInt *xin = (struct PetscScalarInt *)in;
79: struct PetscScalarInt *xout = (struct PetscScalarInt *)out;
80: PetscInt i, count = (PetscInt)*cnt;
82: PetscFunctionBegin;
83: if (*datatype != MPIU_SCALAR_INT) {
84: PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types"));
85: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
86: }
87: for (i = 0; i < count; i++) {
88: if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
89: else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
90: else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
91: else {
92: PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN"));
93: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
94: }
95: }
96: PetscFunctionReturnVoid();
97: }
99: /*@
100: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
102: Collective but not synchronizing
104: Input Parameter:
105: comm - communicator on which split reduction has been queued
107: Level: advanced
109: Note:
110: Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
111: VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).
113: .seealso: `VecNormBegin()`, `VecNormEnd()`, `VecDotBegin()`, `VecDotEnd()`, `VecTDotBegin()`, `VecTDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`
114: @*/
115: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
116: {
117: PetscSplitReduction *sr;
119: PetscFunctionBegin;
120: PetscCall(PetscSplitReductionGet(comm, &sr));
121: PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
122: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
123: PetscInt i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
124: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
125: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
126: MPI_Comm comm = sr->comm;
127: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
129: PetscCall(PetscLogEventBegin(VEC_ReduceBegin, 0, 0, 0, 0));
130: PetscCallMPI(MPI_Comm_size(sr->comm, &size));
131: if (size == 1) {
132: PetscCall(PetscArraycpy(gvalues, lvalues, numops));
133: } else {
134: /* determine if all reductions are sum, max, or min */
135: for (i = 0; i < numops; i++) {
136: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
137: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
138: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
139: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
140: }
141: PetscCheck(sum_flg + max_flg + min_flg <= 1 || !sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
142: if (sum_flg + max_flg + min_flg > 1) {
143: sr->mix = PETSC_TRUE;
144: for (i = 0; i < numops; i++) {
145: sr->lvalues_mix[i].v = lvalues[i];
146: sr->lvalues_mix[i].i = reducetype[i];
147: }
148: PetscCall(MPIPetsc_Iallreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm, &sr->request));
149: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
150: PetscCall(MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm, &sr->request));
151: } else if (min_flg) {
152: PetscCall(MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm, &sr->request));
153: } else {
154: PetscCall(MPIPetsc_Iallreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm, &sr->request));
155: }
156: }
157: sr->state = STATE_PENDING;
158: sr->numopsend = 0;
159: PetscCall(PetscLogEventEnd(VEC_ReduceBegin, 0, 0, 0, 0));
160: } else {
161: PetscCall(PetscSplitReductionApply(sr));
162: }
163: PetscFunctionReturn(PETSC_SUCCESS);
164: }
166: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
167: {
168: PetscFunctionBegin;
169: switch (sr->state) {
170: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
171: PetscCall(PetscSplitReductionApply(sr));
172: break;
173: case STATE_PENDING:
174: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
175: PetscCall(PetscLogEventBegin(VEC_ReduceEnd, 0, 0, 0, 0));
176: if (sr->request != MPI_REQUEST_NULL) PetscCallMPI(MPI_Wait(&sr->request, MPI_STATUS_IGNORE));
177: sr->state = STATE_END;
178: if (sr->mix) {
179: PetscInt i;
180: for (i = 0; i < sr->numopsbegin; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
181: sr->mix = PETSC_FALSE;
182: }
183: PetscCall(PetscLogEventEnd(VEC_ReduceEnd, 0, 0, 0, 0));
184: break;
185: default:
186: break; /* everything is already done */
187: }
188: PetscFunctionReturn(PETSC_SUCCESS);
189: }
191: /*
192: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
193: */
194: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
195: {
196: PetscInt i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
197: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
198: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
199: MPI_Comm comm = sr->comm;
200: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
202: PetscFunctionBegin;
203: PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
204: PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
205: PetscCallMPI(MPI_Comm_size(sr->comm, &size));
206: if (size == 1) {
207: PetscCall(PetscArraycpy(gvalues, lvalues, numops));
208: } else {
209: /* determine if all reductions are sum, max, or min */
210: for (i = 0; i < numops; i++) {
211: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
212: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
213: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
214: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
215: }
216: if (sum_flg + max_flg + min_flg > 1) {
217: PetscCheck(!sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
218: for (i = 0; i < numops; i++) {
219: sr->lvalues_mix[i].v = lvalues[i];
220: sr->lvalues_mix[i].i = reducetype[i];
221: }
222: PetscCall(MPIU_Allreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm));
223: for (i = 0; i < numops; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
224: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
225: PetscCall(MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm));
226: } else if (min_flg) {
227: PetscCall(MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm));
228: } else {
229: PetscCall(MPIU_Allreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm));
230: }
231: }
232: sr->state = STATE_END;
233: sr->numopsend = 0;
234: PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
235: PetscFunctionReturn(PETSC_SUCCESS);
236: }
238: /*
239: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
240: */
241: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
242: {
243: struct PetscScalarInt {
244: PetscScalar v;
245: PetscInt i;
246: };
247: PetscInt maxops = sr->maxops, *reducetype = sr->reducetype;
248: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
249: struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt *)sr->lvalues_mix;
250: struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt *)sr->gvalues_mix;
251: void **invecs = sr->invecs;
253: PetscFunctionBegin;
254: sr->maxops = 2 * maxops;
255: PetscCall(PetscMalloc6(2 * maxops, &sr->lvalues, 2 * maxops, &sr->gvalues, 2 * maxops, &sr->reducetype, 2 * maxops, &sr->invecs, 2 * maxops, &sr->lvalues_mix, 2 * maxops, &sr->gvalues_mix));
256: PetscCall(PetscArraycpy(sr->lvalues, lvalues, maxops));
257: PetscCall(PetscArraycpy(sr->gvalues, gvalues, maxops));
258: PetscCall(PetscArraycpy(sr->reducetype, reducetype, maxops));
259: PetscCall(PetscArraycpy(sr->invecs, invecs, maxops));
260: PetscCall(PetscArraycpy(sr->lvalues_mix, lvalues_mix, maxops));
261: PetscCall(PetscArraycpy(sr->gvalues_mix, gvalues_mix, maxops));
262: PetscCall(PetscFree6(lvalues, gvalues, reducetype, invecs, lvalues_mix, gvalues_mix));
263: PetscFunctionReturn(PETSC_SUCCESS);
264: }
266: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
267: {
268: PetscFunctionBegin;
269: PetscCall(PetscFree6(sr->lvalues, sr->gvalues, sr->reducetype, sr->invecs, sr->lvalues_mix, sr->gvalues_mix));
270: PetscCall(PetscFree(sr));
271: PetscFunctionReturn(PETSC_SUCCESS);
272: }
274: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
276: /*
277: Private routine to delete internal storage when a communicator is freed.
278: This is called by MPI, not by users.
280: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
281: it was MPI_Comm *comm.
282: */
283: PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelReduction(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
284: {
285: PetscFunctionBegin;
286: PetscCallMPI(PetscInfo(0, "Deleting reduction data in an MPI_Comm %ld\n", (long)comm));
287: PetscCallMPI(PetscSplitReductionDestroy((PetscSplitReduction *)attr_val));
288: PetscFunctionReturn(PETSC_SUCCESS);
289: }
291: /*
292: PetscSplitReductionGet - Gets the split reduction object from a
293: PETSc vector, creates if it does not exit.
295: */
296: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm, PetscSplitReduction **sr)
297: {
298: PetscMPIInt flag;
300: PetscFunctionBegin;
301: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
302: /*
303: The calling sequence of the 2nd argument to this function changed
304: between MPI Standard 1.0 and the revisions 1.1 Here we match the
305: new standard, if you are using an MPI implementation that uses
306: the older version you will get a warning message about the next line;
307: it is only a warning message and should do no harm.
308: */
309: PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelReduction, &Petsc_Reduction_keyval, NULL));
310: }
311: PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Reduction_keyval, (void **)sr, &flag));
312: if (!flag) { /* doesn't exist yet so create it and put it in */
313: PetscCall(PetscSplitReductionCreate(comm, sr));
314: PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Reduction_keyval, *sr));
315: PetscCall(PetscInfo(0, "Putting reduction data in an MPI_Comm %ld\n", (long)comm));
316: }
317: PetscFunctionReturn(PETSC_SUCCESS);
318: }
320: /* ----------------------------------------------------------------------------------------------------*/
322: /*@
323: VecDotBegin - Starts a split phase dot product computation.
325: Input Parameters:
326: + x - the first vector
327: . y - the second vector
328: - result - where the result will go (can be NULL)
330: Level: advanced
332: Notes:
333: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
335: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
336: VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
337: @*/
338: PetscErrorCode VecDotBegin(Vec x, Vec y, PetscScalar *result)
339: {
340: PetscSplitReduction *sr;
341: MPI_Comm comm;
343: PetscFunctionBegin;
346: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
347: PetscCall(PetscSplitReductionGet(comm, &sr));
348: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
349: if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
350: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
351: sr->invecs[sr->numopsbegin] = (void *)x;
352: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
353: PetscUseTypeMethod(x, dot_local, y, sr->lvalues + sr->numopsbegin++);
354: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
355: PetscFunctionReturn(PETSC_SUCCESS);
356: }
358: /*@
359: VecDotEnd - Ends a split phase dot product computation.
361: Input Parameters:
362: + x - the first vector (can be NULL)
363: . y - the second vector (can be NULL)
364: - result - where the result will go
366: Level: advanced
368: Notes:
369: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
371: .seealso: `VecDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
372: `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
374: @*/
375: PetscErrorCode VecDotEnd(Vec x, Vec y, PetscScalar *result)
376: {
377: PetscSplitReduction *sr;
378: MPI_Comm comm;
380: PetscFunctionBegin;
381: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
382: PetscCall(PetscSplitReductionGet(comm, &sr));
383: PetscCall(PetscSplitReductionEnd(sr));
385: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
386: PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
387: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
388: *result = sr->gvalues[sr->numopsend++];
390: /*
391: We are finished getting all the results so reset to no outstanding requests
392: */
393: if (sr->numopsend == sr->numopsbegin) {
394: sr->state = STATE_BEGIN;
395: sr->numopsend = 0;
396: sr->numopsbegin = 0;
397: sr->mix = PETSC_FALSE;
398: }
399: PetscFunctionReturn(PETSC_SUCCESS);
400: }
402: /*@
403: VecTDotBegin - Starts a split phase transpose dot product computation.
405: Input Parameters:
406: + x - the first vector
407: . y - the second vector
408: - result - where the result will go (can be NULL)
410: Level: advanced
412: Notes:
413: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
415: .seealso: `VecTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
416: `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
418: @*/
419: PetscErrorCode VecTDotBegin(Vec x, Vec y, PetscScalar *result)
420: {
421: PetscSplitReduction *sr;
422: MPI_Comm comm;
424: PetscFunctionBegin;
425: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
426: PetscCall(PetscSplitReductionGet(comm, &sr));
427: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
428: if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
429: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
430: sr->invecs[sr->numopsbegin] = (void *)x;
431: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
432: PetscUseTypeMethod(x, tdot_local, y, sr->lvalues + sr->numopsbegin++);
433: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
434: PetscFunctionReturn(PETSC_SUCCESS);
435: }
437: /*@
438: VecTDotEnd - Ends a split phase transpose dot product computation.
440: Input Parameters:
441: + x - the first vector (can be NULL)
442: . y - the second vector (can be NULL)
443: - result - where the result will go
445: Level: advanced
447: Notes:
448: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
450: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
451: VecDotBegin(), VecDotEnd()
452: @*/
453: PetscErrorCode VecTDotEnd(Vec x, Vec y, PetscScalar *result)
454: {
455: PetscFunctionBegin;
456: /*
457: TDotEnd() is the same as DotEnd() so reuse the code
458: */
459: PetscCall(VecDotEnd(x, y, result));
460: PetscFunctionReturn(PETSC_SUCCESS);
461: }
463: /* -------------------------------------------------------------------------*/
465: /*@
466: VecNormBegin - Starts a split phase norm computation.
468: Input Parameters:
469: + x - the first vector
470: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
471: - result - where the result will go (can be NULL)
473: Level: advanced
475: Notes:
476: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
478: .seealso: `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
480: @*/
481: PetscErrorCode VecNormBegin(Vec x, NormType ntype, PetscReal *result)
482: {
483: PetscSplitReduction *sr;
484: PetscReal lresult[2];
485: MPI_Comm comm;
487: PetscFunctionBegin;
489: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
490: PetscCall(PetscSplitReductionGet(comm, &sr));
491: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
492: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops - 1 && ntype == NORM_1_AND_2)) PetscCall(PetscSplitReductionExtend(sr));
494: sr->invecs[sr->numopsbegin] = (void *)x;
495: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
496: PetscUseTypeMethod(x, norm_local, ntype, lresult);
497: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
498: if (ntype == NORM_2) lresult[0] = lresult[0] * lresult[0];
499: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1] * lresult[1];
500: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
501: else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
502: sr->lvalues[sr->numopsbegin++] = lresult[0];
503: if (ntype == NORM_1_AND_2) {
504: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
505: sr->lvalues[sr->numopsbegin++] = lresult[1];
506: }
507: PetscFunctionReturn(PETSC_SUCCESS);
508: }
510: /*@
511: VecNormEnd - Ends a split phase norm computation.
513: Input Parameters:
514: + x - the first vector
515: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
516: - result - where the result will go
518: Level: advanced
520: Notes:
521: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
523: The x vector is not allowed to be NULL, otherwise the vector would not have its correctly cached norm value
525: .seealso: `VecNormBegin()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
527: @*/
528: PetscErrorCode VecNormEnd(Vec x, NormType ntype, PetscReal *result)
529: {
530: PetscSplitReduction *sr;
531: MPI_Comm comm;
533: PetscFunctionBegin;
535: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
536: PetscCall(PetscSplitReductionGet(comm, &sr));
537: PetscCall(PetscSplitReductionEnd(sr));
539: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
540: PetscCheck((void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
541: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_MAX || ntype != NORM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
542: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
544: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
545: else if (ntype == NORM_1_AND_2) {
546: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
547: result[1] = PetscSqrtReal(result[1]);
548: }
549: if (ntype != NORM_1_AND_2) PetscCall(PetscObjectComposedDataSetReal((PetscObject)x, NormIds[ntype], result[0]));
551: if (sr->numopsend == sr->numopsbegin) {
552: sr->state = STATE_BEGIN;
553: sr->numopsend = 0;
554: sr->numopsbegin = 0;
555: }
556: PetscFunctionReturn(PETSC_SUCCESS);
557: }
559: /*
560: Possibly add
562: PetscReductionSumBegin/End()
563: PetscReductionMaxBegin/End()
564: PetscReductionMinBegin/End()
565: or have more like MPI with a single function with flag for Op? Like first better
566: */
568: /*@
569: VecMDotBegin - Starts a split phase multiple dot product computation.
571: Input Parameters:
572: + x - the first vector
573: . nv - number of vectors
574: . y - array of vectors
575: - result - where the result will go (can be NULL)
577: Level: advanced
579: Notes:
580: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
582: .seealso: `VecMDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
583: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
584: @*/
585: PetscErrorCode VecMDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
586: {
587: PetscSplitReduction *sr;
588: MPI_Comm comm;
589: PetscInt i;
591: PetscFunctionBegin;
592: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
593: PetscCall(PetscSplitReductionGet(comm, &sr));
594: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
595: for (i = 0; i < nv; i++) {
596: if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
597: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
598: sr->invecs[sr->numopsbegin + i] = (void *)x;
599: }
600: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
601: PetscUseTypeMethod(x, mdot_local, nv, y, sr->lvalues + sr->numopsbegin);
602: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
603: sr->numopsbegin += nv;
604: PetscFunctionReturn(PETSC_SUCCESS);
605: }
607: /*@
608: VecMDotEnd - Ends a split phase multiple dot product computation.
610: Input Parameters:
611: + x - the first vector (can be NULL)
612: . nv - number of vectors
613: - y - array of vectors (can be NULL)
615: Output Parameters:
616: . result - where the result will go
618: Level: advanced
620: Notes:
621: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
623: .seealso: `VecMDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
624: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
626: @*/
627: PetscErrorCode VecMDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
628: {
629: PetscSplitReduction *sr;
630: MPI_Comm comm;
631: PetscInt i;
633: PetscFunctionBegin;
634: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
635: PetscCall(PetscSplitReductionGet(comm, &sr));
636: PetscCall(PetscSplitReductionEnd(sr));
638: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
639: PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
640: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
641: for (i = 0; i < nv; i++) result[i] = sr->gvalues[sr->numopsend++];
643: /*
644: We are finished getting all the results so reset to no outstanding requests
645: */
646: if (sr->numopsend == sr->numopsbegin) {
647: sr->state = STATE_BEGIN;
648: sr->numopsend = 0;
649: sr->numopsbegin = 0;
650: }
651: PetscFunctionReturn(PETSC_SUCCESS);
652: }
654: /*@
655: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
657: Input Parameters:
658: + x - the first vector
659: . nv - number of vectors
660: . y - array of vectors
661: - result - where the result will go (can be NULL)
663: Level: advanced
665: Notes:
666: Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().
668: .seealso: `VecMTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
669: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
671: @*/
672: PetscErrorCode VecMTDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
673: {
674: PetscSplitReduction *sr;
675: MPI_Comm comm;
676: PetscInt i;
678: PetscFunctionBegin;
679: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
680: PetscCall(PetscSplitReductionGet(comm, &sr));
681: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
682: for (i = 0; i < nv; i++) {
683: if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
684: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
685: sr->invecs[sr->numopsbegin + i] = (void *)x;
686: }
687: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
688: PetscUseTypeMethod(x, mtdot_local, nv, y, sr->lvalues + sr->numopsbegin);
689: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
690: sr->numopsbegin += nv;
691: PetscFunctionReturn(PETSC_SUCCESS);
692: }
694: /*@
695: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
697: Input Parameters:
698: + x - the first vector (can be NULL)
699: . nv - number of vectors
700: - y - array of vectors (can be NULL)
702: Output Parameters:
703: . result - where the result will go
705: Level: advanced
707: Notes:
708: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
710: .seealso: `VecMTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
711: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
712: @*/
713: PetscErrorCode VecMTDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
714: {
715: PetscFunctionBegin;
716: /*
717: MTDotEnd() is the same as MDotEnd() so reuse the code
718: */
719: PetscCall(VecMDotEnd(x, nv, y, result));
720: PetscFunctionReturn(PETSC_SUCCESS);
721: }