Actual source code: fftw.c
2: /*
3: Provides an interface to the FFTW package.
4: Testing examples can be found in ~src/mat/tests
5: */
7: #include <../src/mat/impls/fft/fft.h>
8: EXTERN_C_BEGIN
9: #if !PetscDefined(HAVE_MPIUNI)
10: #include <fftw3-mpi.h>
11: #else
12: #include <fftw3.h>
13: #endif
14: EXTERN_C_END
16: typedef struct {
17: ptrdiff_t ndim_fftw, *dim_fftw;
18: #if defined(PETSC_USE_64BIT_INDICES)
19: fftw_iodim64 *iodims;
20: #else
21: fftw_iodim *iodims;
22: #endif
23: PetscInt partial_dim;
24: fftw_plan p_forward, p_backward;
25: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26: PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be
27: executed for the arrays with which the plan was created */
28: } Mat_FFTW;
30: extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
31: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
32: extern PetscErrorCode MatDestroy_FFTW(Mat);
33: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
34: #if !PetscDefined(HAVE_MPIUNI)
35: extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
36: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
37: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
38: #endif
40: /*
41: MatMult_SeqFFTW performs forward DFT
42: Input parameter:
43: A - the matrix
44: x - the vector on which FDFT will be performed
46: Output parameter:
47: y - vector that stores result of FDFT
48: */
49: PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
50: {
51: Mat_FFT *fft = (Mat_FFT *)A->data;
52: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
53: const PetscScalar *x_array;
54: PetscScalar *y_array;
55: #if defined(PETSC_USE_COMPLEX)
56: #if defined(PETSC_USE_64BIT_INDICES)
57: fftw_iodim64 *iodims;
58: #else
59: fftw_iodim *iodims;
60: #endif
61: PetscInt i;
62: #endif
63: PetscInt ndim = fft->ndim, *dim = fft->dim;
65: PetscFunctionBegin;
66: PetscCall(VecGetArrayRead(x, &x_array));
67: PetscCall(VecGetArray(y, &y_array));
68: if (!fftw->p_forward) { /* create a plan, then execute it */
69: switch (ndim) {
70: case 1:
71: #if defined(PETSC_USE_COMPLEX)
72: fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
73: #else
74: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
75: #endif
76: break;
77: case 2:
78: #if defined(PETSC_USE_COMPLEX)
79: fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
80: #else
81: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
82: #endif
83: break;
84: case 3:
85: #if defined(PETSC_USE_COMPLEX)
86: fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
87: #else
88: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
89: #endif
90: break;
91: default:
92: #if defined(PETSC_USE_COMPLEX)
93: iodims = fftw->iodims;
94: #if defined(PETSC_USE_64BIT_INDICES)
95: if (ndim) {
96: iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1];
97: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
98: for (i = ndim - 2; i >= 0; --i) {
99: iodims[i].n = (ptrdiff_t)dim[i];
100: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
101: }
102: }
103: fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
104: #else
105: if (ndim) {
106: iodims[ndim - 1].n = (int)dim[ndim - 1];
107: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
108: for (i = ndim - 2; i >= 0; --i) {
109: iodims[i].n = (int)dim[i];
110: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
111: }
112: }
113: fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
114: #endif
116: #else
117: fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
118: #endif
119: break;
120: }
121: fftw->finarray = (PetscScalar *)x_array;
122: fftw->foutarray = y_array;
123: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
124: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
125: fftw_execute(fftw->p_forward);
126: } else { /* use existing plan */
127: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
128: #if defined(PETSC_USE_COMPLEX)
129: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
130: #else
131: fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
132: #endif
133: } else {
134: fftw_execute(fftw->p_forward);
135: }
136: }
137: PetscCall(VecRestoreArray(y, &y_array));
138: PetscCall(VecRestoreArrayRead(x, &x_array));
139: PetscFunctionReturn(PETSC_SUCCESS);
140: }
142: /* MatMultTranspose_SeqFFTW performs serial backward DFT
143: Input parameter:
144: A - the matrix
145: x - the vector on which BDFT will be performed
147: Output parameter:
148: y - vector that stores result of BDFT
149: */
151: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
152: {
153: Mat_FFT *fft = (Mat_FFT *)A->data;
154: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
155: const PetscScalar *x_array;
156: PetscScalar *y_array;
157: PetscInt ndim = fft->ndim, *dim = fft->dim;
158: #if defined(PETSC_USE_COMPLEX)
159: #if defined(PETSC_USE_64BIT_INDICES)
160: fftw_iodim64 *iodims = fftw->iodims;
161: #else
162: fftw_iodim *iodims = fftw->iodims;
163: #endif
164: #endif
166: PetscFunctionBegin;
167: PetscCall(VecGetArrayRead(x, &x_array));
168: PetscCall(VecGetArray(y, &y_array));
169: if (!fftw->p_backward) { /* create a plan, then execute it */
170: switch (ndim) {
171: case 1:
172: #if defined(PETSC_USE_COMPLEX)
173: fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
174: #else
175: fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
176: #endif
177: break;
178: case 2:
179: #if defined(PETSC_USE_COMPLEX)
180: fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
181: #else
182: fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
183: #endif
184: break;
185: case 3:
186: #if defined(PETSC_USE_COMPLEX)
187: fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
188: #else
189: fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
190: #endif
191: break;
192: default:
193: #if defined(PETSC_USE_COMPLEX)
194: #if defined(PETSC_USE_64BIT_INDICES)
195: fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
196: #else
197: fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
198: #endif
199: #else
200: fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
201: #endif
202: break;
203: }
204: fftw->binarray = (PetscScalar *)x_array;
205: fftw->boutarray = y_array;
206: fftw_execute(fftw->p_backward);
207: } else { /* use existing plan */
208: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
209: #if defined(PETSC_USE_COMPLEX)
210: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
211: #else
212: fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
213: #endif
214: } else {
215: fftw_execute(fftw->p_backward);
216: }
217: }
218: PetscCall(VecRestoreArray(y, &y_array));
219: PetscCall(VecRestoreArrayRead(x, &x_array));
220: PetscFunctionReturn(PETSC_SUCCESS);
221: }
223: #if !PetscDefined(HAVE_MPIUNI)
224: /* MatMult_MPIFFTW performs forward DFT in parallel
225: Input parameter:
226: A - the matrix
227: x - the vector on which FDFT will be performed
229: Output parameter:
230: y - vector that stores result of FDFT
231: */
232: PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
233: {
234: Mat_FFT *fft = (Mat_FFT *)A->data;
235: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
236: const PetscScalar *x_array;
237: PetscScalar *y_array;
238: PetscInt ndim = fft->ndim, *dim = fft->dim;
239: MPI_Comm comm;
241: PetscFunctionBegin;
242: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
243: PetscCall(VecGetArrayRead(x, &x_array));
244: PetscCall(VecGetArray(y, &y_array));
245: if (!fftw->p_forward) { /* create a plan, then execute it */
246: switch (ndim) {
247: case 1:
248: #if defined(PETSC_USE_COMPLEX)
249: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
250: #else
251: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
252: #endif
253: break;
254: case 2:
255: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
256: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
257: #else
258: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
259: #endif
260: break;
261: case 3:
262: #if defined(PETSC_USE_COMPLEX)
263: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
264: #else
265: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
266: #endif
267: break;
268: default:
269: #if defined(PETSC_USE_COMPLEX)
270: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
271: #else
272: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
273: #endif
274: break;
275: }
276: fftw->finarray = (PetscScalar *)x_array;
277: fftw->foutarray = y_array;
278: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
279: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
280: fftw_execute(fftw->p_forward);
281: } else { /* use existing plan */
282: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
283: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
284: } else {
285: fftw_execute(fftw->p_forward);
286: }
287: }
288: PetscCall(VecRestoreArray(y, &y_array));
289: PetscCall(VecRestoreArrayRead(x, &x_array));
290: PetscFunctionReturn(PETSC_SUCCESS);
291: }
293: /*
294: MatMultTranspose_MPIFFTW performs parallel backward DFT
295: Input parameter:
296: A - the matrix
297: x - the vector on which BDFT will be performed
299: Output parameter:
300: y - vector that stores result of BDFT
301: */
302: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
303: {
304: Mat_FFT *fft = (Mat_FFT *)A->data;
305: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
306: const PetscScalar *x_array;
307: PetscScalar *y_array;
308: PetscInt ndim = fft->ndim, *dim = fft->dim;
309: MPI_Comm comm;
311: PetscFunctionBegin;
312: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
313: PetscCall(VecGetArrayRead(x, &x_array));
314: PetscCall(VecGetArray(y, &y_array));
315: if (!fftw->p_backward) { /* create a plan, then execute it */
316: switch (ndim) {
317: case 1:
318: #if defined(PETSC_USE_COMPLEX)
319: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
320: #else
321: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
322: #endif
323: break;
324: case 2:
325: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
326: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
327: #else
328: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
329: #endif
330: break;
331: case 3:
332: #if defined(PETSC_USE_COMPLEX)
333: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
334: #else
335: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
336: #endif
337: break;
338: default:
339: #if defined(PETSC_USE_COMPLEX)
340: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
341: #else
342: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
343: #endif
344: break;
345: }
346: fftw->binarray = (PetscScalar *)x_array;
347: fftw->boutarray = y_array;
348: fftw_execute(fftw->p_backward);
349: } else { /* use existing plan */
350: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
351: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
352: } else {
353: fftw_execute(fftw->p_backward);
354: }
355: }
356: PetscCall(VecRestoreArray(y, &y_array));
357: PetscCall(VecRestoreArrayRead(x, &x_array));
358: PetscFunctionReturn(PETSC_SUCCESS);
359: }
360: #endif
362: PetscErrorCode MatDestroy_FFTW(Mat A)
363: {
364: Mat_FFT *fft = (Mat_FFT *)A->data;
365: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
367: PetscFunctionBegin;
368: fftw_destroy_plan(fftw->p_forward);
369: fftw_destroy_plan(fftw->p_backward);
370: if (fftw->iodims) free(fftw->iodims);
371: PetscCall(PetscFree(fftw->dim_fftw));
372: PetscCall(PetscFree(fft->data));
373: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
374: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
375: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
376: #if !PetscDefined(HAVE_MPIUNI)
377: fftw_mpi_cleanup();
378: #endif
379: PetscFunctionReturn(PETSC_SUCCESS);
380: }
382: #if !PetscDefined(HAVE_MPIUNI)
383: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
384: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
385: {
386: PetscScalar *array;
388: PetscFunctionBegin;
389: PetscCall(VecGetArray(v, &array));
390: fftw_free((fftw_complex *)array);
391: PetscCall(VecRestoreArray(v, &array));
392: PetscCall(VecDestroy_MPI(v));
393: PetscFunctionReturn(PETSC_SUCCESS);
394: }
395: #endif
397: #if !PetscDefined(HAVE_MPIUNI)
398: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
399: {
400: Mat A;
402: PetscFunctionBegin;
403: PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
404: PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
405: PetscFunctionReturn(PETSC_SUCCESS);
406: }
408: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
409: {
410: Mat A;
412: PetscFunctionBegin;
413: PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
414: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
415: PetscFunctionReturn(PETSC_SUCCESS);
416: }
418: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
419: {
420: Mat A;
422: PetscFunctionBegin;
423: PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
424: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
425: PetscFunctionReturn(PETSC_SUCCESS);
426: }
427: #endif
429: /*@
430: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
431: parallel layout determined by `MATFFTW`
433: Collective
435: Input Parameter:
436: . A - the matrix
438: Output Parameters:
439: + x - (optional) input vector of forward FFTW
440: . y - (optional) output vector of forward FFTW
441: - z - (optional) output vector of backward FFTW
443: Options Database Key:
444: . -mat_fftw_plannerflags - set FFTW planner flags
446: Level: advanced
448: Notes:
449: The parallel layout of output of forward FFTW is always same as the input
450: of the backward FFTW. But parallel layout of the input vector of forward
451: FFTW might not be same as the output of backward FFTW.
453: We need to provide enough space while doing parallel real transform.
454: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
455: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
456: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
457: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
458: zeros if it is odd only one zero is needed.
460: Lastly one needs some scratch space at the end of data set in each process. alloc_local
461: figures out how much space is needed, i.e. it figures out the data+scratch space for
462: each processor and returns that.
464: Developer Note:
465: How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
467: .seealso: [](chapter_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
468: @*/
469: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
470: {
471: PetscFunctionBegin;
472: PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
473: PetscFunctionReturn(PETSC_SUCCESS);
474: }
476: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
477: {
478: PetscMPIInt size, rank;
479: MPI_Comm comm;
480: Mat_FFT *fft = (Mat_FFT *)A->data;
482: PetscFunctionBegin;
485: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
487: PetscCallMPI(MPI_Comm_size(comm, &size));
488: PetscCallMPI(MPI_Comm_rank(comm, &rank));
489: if (size == 1) { /* sequential case */
490: #if defined(PETSC_USE_COMPLEX)
491: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
492: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
493: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
494: #else
495: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
496: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
497: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
498: #endif
499: #if !PetscDefined(HAVE_MPIUNI)
500: } else { /* parallel cases */
501: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
502: PetscInt ndim = fft->ndim, *dim = fft->dim;
503: ptrdiff_t alloc_local, local_n0, local_0_start;
504: ptrdiff_t local_n1;
505: fftw_complex *data_fout;
506: ptrdiff_t local_1_start;
507: #if defined(PETSC_USE_COMPLEX)
508: fftw_complex *data_fin, *data_bout;
509: #else
510: double *data_finr, *data_boutr;
511: PetscInt n1, N1;
512: ptrdiff_t temp;
513: #endif
515: switch (ndim) {
516: case 1:
517: #if !defined(PETSC_USE_COMPLEX)
518: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
519: #else
520: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
521: if (fin) {
522: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
523: PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
524: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
525: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
526: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
527: }
528: if (fout) {
529: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
530: PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
531: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
532: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
533: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
534: }
535: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
536: if (bout) {
537: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
538: PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
539: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
540: (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
541: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
542: }
543: break;
544: #endif
545: case 2:
546: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
547: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
548: N1 = 2 * dim[0] * (dim[1] / 2 + 1);
549: n1 = 2 * local_n0 * (dim[1] / 2 + 1);
550: if (fin) {
551: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
552: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
553: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
554: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
555: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
556: }
557: if (fout) {
558: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
559: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
560: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
561: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
562: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
563: }
564: if (bout) {
565: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
566: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
567: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
568: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
569: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
570: }
571: #else
572: /* Get local size */
573: alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
574: if (fin) {
575: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
576: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
577: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
578: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
579: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
580: }
581: if (fout) {
582: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
583: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
584: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
585: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
586: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
587: }
588: if (bout) {
589: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
590: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
591: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
592: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
593: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
594: }
595: #endif
596: break;
597: case 3:
598: #if !defined(PETSC_USE_COMPLEX)
599: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
600: N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
601: n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
602: if (fin) {
603: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
604: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
605: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
606: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
607: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
608: }
609: if (fout) {
610: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
611: PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
612: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
613: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
614: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
615: }
616: if (bout) {
617: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
618: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
619: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
620: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
621: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
622: }
623: #else
624: alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
625: if (fin) {
626: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
627: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
628: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
629: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
630: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
631: }
632: if (fout) {
633: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
634: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
635: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
636: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
637: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
638: }
639: if (bout) {
640: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
641: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
642: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
643: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
644: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
645: }
646: #endif
647: break;
648: default:
649: #if !defined(PETSC_USE_COMPLEX)
650: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
651: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
652: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
653: N1 = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
654: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
656: if (fin) {
657: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
658: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
659: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
660: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
661: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
662: }
663: if (fout) {
664: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
665: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
666: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
667: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
668: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
669: }
670: if (bout) {
671: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
672: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
673: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
674: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
675: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
676: }
677: #else
678: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
679: if (fin) {
680: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
681: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
682: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
683: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
684: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
685: }
686: if (fout) {
687: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
688: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
689: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
690: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
691: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
692: }
693: if (bout) {
694: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
695: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
696: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
697: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
698: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
699: }
700: #endif
701: break;
702: }
703: /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
704: v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
705: memory leaks. We void these pointers here to avoid accident uses.
706: */
707: if (fin) (*fin)->ops->replacearray = NULL;
708: if (fout) (*fout)->ops->replacearray = NULL;
709: if (bout) (*bout)->ops->replacearray = NULL;
710: #endif
711: }
712: PetscFunctionReturn(PETSC_SUCCESS);
713: }
715: /*@
716: VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
718: Collective
720: Input Parameters:
721: + A - FFTW matrix
722: - x - the PETSc vector
724: Output Parameters:
725: . y - the FFTW vector
727: Level: intermediate
729: Note:
730: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
731: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
732: zeros. This routine does that job by scattering operation.
734: .seealso: [](chapter_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
735: @*/
736: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
737: {
738: PetscFunctionBegin;
739: PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
740: PetscFunctionReturn(PETSC_SUCCESS);
741: }
743: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
744: {
745: MPI_Comm comm;
746: Mat_FFT *fft = (Mat_FFT *)A->data;
747: PetscInt low;
748: PetscMPIInt rank, size;
749: PetscInt vsize, vsize1;
750: VecScatter vecscat;
751: IS list1;
753: PetscFunctionBegin;
754: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
755: PetscCallMPI(MPI_Comm_size(comm, &size));
756: PetscCallMPI(MPI_Comm_rank(comm, &rank));
757: PetscCall(VecGetOwnershipRange(y, &low, NULL));
759: if (size == 1) {
760: PetscCall(VecGetSize(x, &vsize));
761: PetscCall(VecGetSize(y, &vsize1));
762: PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
763: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
764: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
765: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
766: PetscCall(VecScatterDestroy(&vecscat));
767: PetscCall(ISDestroy(&list1));
768: #if !PetscDefined(HAVE_MPIUNI)
769: } else {
770: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
771: PetscInt ndim = fft->ndim, *dim = fft->dim;
772: ptrdiff_t local_n0, local_0_start;
773: ptrdiff_t local_n1, local_1_start;
774: IS list2;
775: #if !defined(PETSC_USE_COMPLEX)
776: PetscInt i, j, k, partial_dim;
777: PetscInt *indx1, *indx2, tempindx, tempindx1;
778: PetscInt NM;
779: ptrdiff_t temp;
780: #endif
782: switch (ndim) {
783: case 1:
784: #if defined(PETSC_USE_COMPLEX)
785: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
787: PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
788: PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
789: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
790: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
791: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
792: PetscCall(VecScatterDestroy(&vecscat));
793: PetscCall(ISDestroy(&list1));
794: PetscCall(ISDestroy(&list2));
795: #else
796: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
797: #endif
798: break;
799: case 2:
800: #if defined(PETSC_USE_COMPLEX)
801: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
803: PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
804: PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
805: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
806: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
807: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
808: PetscCall(VecScatterDestroy(&vecscat));
809: PetscCall(ISDestroy(&list1));
810: PetscCall(ISDestroy(&list2));
811: #else
812: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
814: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
815: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
817: if (dim[1] % 2 == 0) {
818: NM = dim[1] + 2;
819: } else {
820: NM = dim[1] + 1;
821: }
822: for (i = 0; i < local_n0; i++) {
823: for (j = 0; j < dim[1]; j++) {
824: tempindx = i * dim[1] + j;
825: tempindx1 = i * NM + j;
827: indx1[tempindx] = local_0_start * dim[1] + tempindx;
828: indx2[tempindx] = low + tempindx1;
829: }
830: }
832: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
833: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
835: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
836: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
837: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
838: PetscCall(VecScatterDestroy(&vecscat));
839: PetscCall(ISDestroy(&list1));
840: PetscCall(ISDestroy(&list2));
841: PetscCall(PetscFree(indx1));
842: PetscCall(PetscFree(indx2));
843: #endif
844: break;
846: case 3:
847: #if defined(PETSC_USE_COMPLEX)
848: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
850: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
851: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
852: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
853: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
854: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
855: PetscCall(VecScatterDestroy(&vecscat));
856: PetscCall(ISDestroy(&list1));
857: PetscCall(ISDestroy(&list2));
858: #else
859: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
860: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
861: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
863: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
864: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
866: if (dim[2] % 2 == 0) NM = dim[2] + 2;
867: else NM = dim[2] + 1;
869: for (i = 0; i < local_n0; i++) {
870: for (j = 0; j < dim[1]; j++) {
871: for (k = 0; k < dim[2]; k++) {
872: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
873: tempindx1 = i * dim[1] * NM + j * NM + k;
875: indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
876: indx2[tempindx] = low + tempindx1;
877: }
878: }
879: }
881: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
882: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
883: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
884: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
885: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
886: PetscCall(VecScatterDestroy(&vecscat));
887: PetscCall(ISDestroy(&list1));
888: PetscCall(ISDestroy(&list2));
889: PetscCall(PetscFree(indx1));
890: PetscCall(PetscFree(indx2));
891: #endif
892: break;
894: default:
895: #if defined(PETSC_USE_COMPLEX)
896: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
898: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
899: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
900: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
901: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
902: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
903: PetscCall(VecScatterDestroy(&vecscat));
904: PetscCall(ISDestroy(&list1));
905: PetscCall(ISDestroy(&list2));
906: #else
907: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
908: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
909: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
911: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
913: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
915: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
917: partial_dim = fftw->partial_dim;
919: PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
920: PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
922: if (dim[ndim - 1] % 2 == 0) NM = 2;
923: else NM = 1;
925: j = low;
926: for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
927: indx1[i] = local_0_start * partial_dim + i;
928: indx2[i] = j;
929: if (k % dim[ndim - 1] == 0) j += NM;
930: j++;
931: }
932: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
933: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
934: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
935: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
936: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
937: PetscCall(VecScatterDestroy(&vecscat));
938: PetscCall(ISDestroy(&list1));
939: PetscCall(ISDestroy(&list2));
940: PetscCall(PetscFree(indx1));
941: PetscCall(PetscFree(indx2));
942: #endif
943: break;
944: }
945: #endif
946: }
947: PetscFunctionReturn(PETSC_SUCCESS);
948: }
950: /*@
951: VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
953: Collective
955: Input Parameters:
956: + A - `MATFFTW` matrix
957: - x - FFTW vector
959: Output Parameters:
960: . y - PETSc vector
962: Level: intermediate
964: Note:
965: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
966: `VecScatterFFTWToPetsc()` removes those extra zeros.
968: .seealso: [](chapter_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
969: @*/
970: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
971: {
972: PetscFunctionBegin;
973: PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
974: PetscFunctionReturn(PETSC_SUCCESS);
975: }
977: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
978: {
979: MPI_Comm comm;
980: Mat_FFT *fft = (Mat_FFT *)A->data;
981: PetscInt low;
982: PetscMPIInt rank, size;
983: VecScatter vecscat;
984: IS list1;
986: PetscFunctionBegin;
987: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
988: PetscCallMPI(MPI_Comm_size(comm, &size));
989: PetscCallMPI(MPI_Comm_rank(comm, &rank));
990: PetscCall(VecGetOwnershipRange(x, &low, NULL));
992: if (size == 1) {
993: PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
994: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
995: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
996: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
997: PetscCall(VecScatterDestroy(&vecscat));
998: PetscCall(ISDestroy(&list1));
1000: #if !PetscDefined(HAVE_MPIUNI)
1001: } else {
1002: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
1003: PetscInt ndim = fft->ndim, *dim = fft->dim;
1004: ptrdiff_t local_n0, local_0_start;
1005: ptrdiff_t local_n1, local_1_start;
1006: IS list2;
1007: #if !defined(PETSC_USE_COMPLEX)
1008: PetscInt i, j, k, partial_dim;
1009: PetscInt *indx1, *indx2, tempindx, tempindx1;
1010: PetscInt NM;
1011: ptrdiff_t temp;
1012: #endif
1013: switch (ndim) {
1014: case 1:
1015: #if defined(PETSC_USE_COMPLEX)
1016: fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1018: PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
1019: PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
1020: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1021: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1022: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1023: PetscCall(VecScatterDestroy(&vecscat));
1024: PetscCall(ISDestroy(&list1));
1025: PetscCall(ISDestroy(&list2));
1026: #else
1027: SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
1028: #endif
1029: break;
1030: case 2:
1031: #if defined(PETSC_USE_COMPLEX)
1032: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1034: PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
1035: PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
1036: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1037: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1038: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1039: PetscCall(VecScatterDestroy(&vecscat));
1040: PetscCall(ISDestroy(&list1));
1041: PetscCall(ISDestroy(&list2));
1042: #else
1043: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1045: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
1046: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
1048: if (dim[1] % 2 == 0) NM = dim[1] + 2;
1049: else NM = dim[1] + 1;
1051: for (i = 0; i < local_n0; i++) {
1052: for (j = 0; j < dim[1]; j++) {
1053: tempindx = i * dim[1] + j;
1054: tempindx1 = i * NM + j;
1056: indx1[tempindx] = local_0_start * dim[1] + tempindx;
1057: indx2[tempindx] = low + tempindx1;
1058: }
1059: }
1061: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
1062: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
1064: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1065: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1066: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1067: PetscCall(VecScatterDestroy(&vecscat));
1068: PetscCall(ISDestroy(&list1));
1069: PetscCall(ISDestroy(&list2));
1070: PetscCall(PetscFree(indx1));
1071: PetscCall(PetscFree(indx2));
1072: #endif
1073: break;
1074: case 3:
1075: #if defined(PETSC_USE_COMPLEX)
1076: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1078: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
1079: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
1080: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1081: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1082: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1083: PetscCall(VecScatterDestroy(&vecscat));
1084: PetscCall(ISDestroy(&list1));
1085: PetscCall(ISDestroy(&list2));
1086: #else
1087: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1089: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
1090: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
1092: if (dim[2] % 2 == 0) NM = dim[2] + 2;
1093: else NM = dim[2] + 1;
1095: for (i = 0; i < local_n0; i++) {
1096: for (j = 0; j < dim[1]; j++) {
1097: for (k = 0; k < dim[2]; k++) {
1098: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
1099: tempindx1 = i * dim[1] * NM + j * NM + k;
1101: indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
1102: indx2[tempindx] = low + tempindx1;
1103: }
1104: }
1105: }
1107: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
1108: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
1110: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1111: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1112: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1113: PetscCall(VecScatterDestroy(&vecscat));
1114: PetscCall(ISDestroy(&list1));
1115: PetscCall(ISDestroy(&list2));
1116: PetscCall(PetscFree(indx1));
1117: PetscCall(PetscFree(indx2));
1118: #endif
1119: break;
1120: default:
1121: #if defined(PETSC_USE_COMPLEX)
1122: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
1124: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
1125: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
1126: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1127: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1128: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1129: PetscCall(VecScatterDestroy(&vecscat));
1130: PetscCall(ISDestroy(&list1));
1131: PetscCall(ISDestroy(&list2));
1132: #else
1133: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
1135: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
1137: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1139: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1141: partial_dim = fftw->partial_dim;
1143: PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
1144: PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
1146: if (dim[ndim - 1] % 2 == 0) NM = 2;
1147: else NM = 1;
1149: j = low;
1150: for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
1151: indx1[i] = local_0_start * partial_dim + i;
1152: indx2[i] = j;
1153: if (k % dim[ndim - 1] == 0) j += NM;
1154: j++;
1155: }
1156: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
1157: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
1159: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1160: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1161: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1162: PetscCall(VecScatterDestroy(&vecscat));
1163: PetscCall(ISDestroy(&list1));
1164: PetscCall(ISDestroy(&list2));
1165: PetscCall(PetscFree(indx1));
1166: PetscCall(PetscFree(indx2));
1167: #endif
1168: break;
1169: }
1170: #endif
1171: }
1172: PetscFunctionReturn(PETSC_SUCCESS);
1173: }
1175: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1176: {
1177: MPI_Comm comm;
1178: Mat_FFT *fft = (Mat_FFT *)A->data;
1179: Mat_FFTW *fftw;
1180: PetscInt ndim = fft->ndim, *dim = fft->dim;
1181: const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1182: unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1183: PetscBool flg;
1184: PetscInt p_flag, partial_dim = 1, ctr;
1185: PetscMPIInt size, rank;
1186: ptrdiff_t *pdim;
1187: #if !defined(PETSC_USE_COMPLEX)
1188: PetscInt tot_dim;
1189: #endif
1191: PetscFunctionBegin;
1192: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1193: PetscCallMPI(MPI_Comm_size(comm, &size));
1194: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1196: #if !PetscDefined(HAVE_MPIUNI)
1197: fftw_mpi_init();
1198: #endif
1199: pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1200: pdim[0] = dim[0];
1201: #if !defined(PETSC_USE_COMPLEX)
1202: tot_dim = 2 * dim[0];
1203: #endif
1204: for (ctr = 1; ctr < ndim; ctr++) {
1205: partial_dim *= dim[ctr];
1206: pdim[ctr] = dim[ctr];
1207: #if !defined(PETSC_USE_COMPLEX)
1208: if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1209: else tot_dim *= dim[ctr];
1210: #endif
1211: }
1213: if (size == 1) {
1214: #if defined(PETSC_USE_COMPLEX)
1215: PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1216: fft->n = fft->N;
1217: #else
1218: PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1219: fft->n = tot_dim;
1220: #endif
1221: #if !PetscDefined(HAVE_MPIUNI)
1222: } else {
1223: ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1224: #if !defined(PETSC_USE_COMPLEX)
1225: ptrdiff_t temp;
1226: PetscInt N1;
1227: #endif
1229: switch (ndim) {
1230: case 1:
1231: #if !defined(PETSC_USE_COMPLEX)
1232: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1233: #else
1234: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1235: fft->n = (PetscInt)local_n0;
1236: PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1237: #endif
1238: break;
1239: case 2:
1240: #if defined(PETSC_USE_COMPLEX)
1241: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1242: fft->n = (PetscInt)local_n0 * dim[1];
1243: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1244: #else
1245: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1247: fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1248: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1249: #endif
1250: break;
1251: case 3:
1252: #if defined(PETSC_USE_COMPLEX)
1253: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1255: fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1256: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1257: #else
1258: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1260: fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1261: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1262: #endif
1263: break;
1264: default:
1265: #if defined(PETSC_USE_COMPLEX)
1266: fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
1268: fft->n = (PetscInt)local_n0 * partial_dim;
1269: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1270: #else
1271: temp = pdim[ndim - 1];
1273: pdim[ndim - 1] = temp / 2 + 1;
1275: fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1277: fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
1278: N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
1280: pdim[ndim - 1] = temp;
1282: PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1283: #endif
1284: break;
1285: }
1286: #endif
1287: }
1288: free(pdim);
1289: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1290: PetscCall(PetscNew(&fftw));
1291: fft->data = (void *)fftw;
1293: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1294: fftw->partial_dim = partial_dim;
1296: PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw)));
1297: if (size == 1) {
1298: #if defined(PETSC_USE_64BIT_INDICES)
1299: fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1300: #else
1301: fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1302: #endif
1303: }
1305: for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1307: fftw->p_forward = NULL;
1308: fftw->p_backward = NULL;
1309: fftw->p_flag = FFTW_ESTIMATE;
1311: if (size == 1) {
1312: A->ops->mult = MatMult_SeqFFTW;
1313: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1314: #if !PetscDefined(HAVE_MPIUNI)
1315: } else {
1316: A->ops->mult = MatMult_MPIFFTW;
1317: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1318: #endif
1319: }
1320: fft->matdestroy = MatDestroy_FFTW;
1321: A->assembled = PETSC_TRUE;
1322: A->preallocated = PETSC_TRUE;
1324: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1325: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1326: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1328: /* get runtime options */
1329: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1330: PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1331: if (flg) fftw->p_flag = iplans[p_flag];
1332: PetscOptionsEnd();
1333: PetscFunctionReturn(PETSC_SUCCESS);
1334: }