Actual source code: fftw.c


  2: /*
  3:     Provides an interface to the FFTW package.
  4:     Testing examples can be found in ~src/mat/tests
  5: */

  7: #include <../src/mat/impls/fft/fft.h>
  8: EXTERN_C_BEGIN
  9: #if !PetscDefined(HAVE_MPIUNI)
 10:   #include <fftw3-mpi.h>
 11: #else
 12:   #include <fftw3.h>
 13: #endif
 14: EXTERN_C_END

 16: typedef struct {
 17:   ptrdiff_t ndim_fftw, *dim_fftw;
 18: #if defined(PETSC_USE_64BIT_INDICES)
 19:   fftw_iodim64 *iodims;
 20: #else
 21:   fftw_iodim *iodims;
 22: #endif
 23:   PetscInt     partial_dim;
 24:   fftw_plan    p_forward, p_backward;
 25:   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
 26:   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be
 27:                                                             executed for the arrays with which the plan was created */
 28: } Mat_FFTW;

 30: extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
 31: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
 32: extern PetscErrorCode MatDestroy_FFTW(Mat);
 33: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
 34: #if !PetscDefined(HAVE_MPIUNI)
 35: extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
 36: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
 37: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
 38: #endif

 40: /*
 41:    MatMult_SeqFFTW performs forward DFT
 42:    Input parameter:
 43:      A - the matrix
 44:      x - the vector on which FDFT will be performed

 46:    Output parameter:
 47:      y - vector that stores result of FDFT
 48: */
 49: PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
 50: {
 51:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
 52:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
 53:   const PetscScalar *x_array;
 54:   PetscScalar       *y_array;
 55: #if defined(PETSC_USE_COMPLEX)
 56:   #if defined(PETSC_USE_64BIT_INDICES)
 57:   fftw_iodim64 *iodims;
 58:   #else
 59:   fftw_iodim *iodims;
 60:   #endif
 61:   PetscInt i;
 62: #endif
 63:   PetscInt ndim = fft->ndim, *dim = fft->dim;

 65:   PetscFunctionBegin;
 66:   PetscCall(VecGetArrayRead(x, &x_array));
 67:   PetscCall(VecGetArray(y, &y_array));
 68:   if (!fftw->p_forward) { /* create a plan, then execute it */
 69:     switch (ndim) {
 70:     case 1:
 71: #if defined(PETSC_USE_COMPLEX)
 72:       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 73: #else
 74:       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 75: #endif
 76:       break;
 77:     case 2:
 78: #if defined(PETSC_USE_COMPLEX)
 79:       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 80: #else
 81:       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 82: #endif
 83:       break;
 84:     case 3:
 85: #if defined(PETSC_USE_COMPLEX)
 86:       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 87: #else
 88:       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 89: #endif
 90:       break;
 91:     default:
 92: #if defined(PETSC_USE_COMPLEX)
 93:       iodims = fftw->iodims;
 94:   #if defined(PETSC_USE_64BIT_INDICES)
 95:       if (ndim) {
 96:         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
 97:         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
 98:         for (i = ndim - 2; i >= 0; --i) {
 99:           iodims[i].n  = (ptrdiff_t)dim[i];
100:           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
101:         }
102:       }
103:       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
104:   #else
105:       if (ndim) {
106:         iodims[ndim - 1].n  = (int)dim[ndim - 1];
107:         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
108:         for (i = ndim - 2; i >= 0; --i) {
109:           iodims[i].n  = (int)dim[i];
110:           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
111:         }
112:       }
113:       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
114:   #endif

116: #else
117:       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
118: #endif
119:       break;
120:     }
121:     fftw->finarray  = (PetscScalar *)x_array;
122:     fftw->foutarray = y_array;
123:     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
124:                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
125:     fftw_execute(fftw->p_forward);
126:   } else {                                                         /* use existing plan */
127:     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
128: #if defined(PETSC_USE_COMPLEX)
129:       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
130: #else
131:       fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
132: #endif
133:     } else {
134:       fftw_execute(fftw->p_forward);
135:     }
136:   }
137:   PetscCall(VecRestoreArray(y, &y_array));
138:   PetscCall(VecRestoreArrayRead(x, &x_array));
139:   PetscFunctionReturn(PETSC_SUCCESS);
140: }

142: /* MatMultTranspose_SeqFFTW performs serial backward DFT
143:    Input parameter:
144:      A - the matrix
145:      x - the vector on which BDFT will be performed

147:    Output parameter:
148:      y - vector that stores result of BDFT
149: */

151: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
152: {
153:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
154:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
155:   const PetscScalar *x_array;
156:   PetscScalar       *y_array;
157:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
158: #if defined(PETSC_USE_COMPLEX)
159:   #if defined(PETSC_USE_64BIT_INDICES)
160:   fftw_iodim64 *iodims = fftw->iodims;
161:   #else
162:   fftw_iodim *iodims = fftw->iodims;
163:   #endif
164: #endif

166:   PetscFunctionBegin;
167:   PetscCall(VecGetArrayRead(x, &x_array));
168:   PetscCall(VecGetArray(y, &y_array));
169:   if (!fftw->p_backward) { /* create a plan, then execute it */
170:     switch (ndim) {
171:     case 1:
172: #if defined(PETSC_USE_COMPLEX)
173:       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
174: #else
175:       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
176: #endif
177:       break;
178:     case 2:
179: #if defined(PETSC_USE_COMPLEX)
180:       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
181: #else
182:       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
183: #endif
184:       break;
185:     case 3:
186: #if defined(PETSC_USE_COMPLEX)
187:       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
188: #else
189:       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
190: #endif
191:       break;
192:     default:
193: #if defined(PETSC_USE_COMPLEX)
194:   #if defined(PETSC_USE_64BIT_INDICES)
195:       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
196:   #else
197:       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
198:   #endif
199: #else
200:       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
201: #endif
202:       break;
203:     }
204:     fftw->binarray  = (PetscScalar *)x_array;
205:     fftw->boutarray = y_array;
206:     fftw_execute(fftw->p_backward);
207:   } else {                                                         /* use existing plan */
208:     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
209: #if defined(PETSC_USE_COMPLEX)
210:       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
211: #else
212:       fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
213: #endif
214:     } else {
215:       fftw_execute(fftw->p_backward);
216:     }
217:   }
218:   PetscCall(VecRestoreArray(y, &y_array));
219:   PetscCall(VecRestoreArrayRead(x, &x_array));
220:   PetscFunctionReturn(PETSC_SUCCESS);
221: }

223: #if !PetscDefined(HAVE_MPIUNI)
224: /* MatMult_MPIFFTW performs forward DFT in parallel
225:    Input parameter:
226:      A - the matrix
227:      x - the vector on which FDFT will be performed

229:    Output parameter:
230:    y   - vector that stores result of FDFT
231: */
232: PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
233: {
234:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
235:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
236:   const PetscScalar *x_array;
237:   PetscScalar       *y_array;
238:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
239:   MPI_Comm           comm;

241:   PetscFunctionBegin;
242:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
243:   PetscCall(VecGetArrayRead(x, &x_array));
244:   PetscCall(VecGetArray(y, &y_array));
245:   if (!fftw->p_forward) { /* create a plan, then execute it */
246:     switch (ndim) {
247:     case 1:
248:   #if defined(PETSC_USE_COMPLEX)
249:       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
250:   #else
251:       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
252:   #endif
253:       break;
254:     case 2:
255:   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
256:       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
257:   #else
258:       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
259:   #endif
260:       break;
261:     case 3:
262:   #if defined(PETSC_USE_COMPLEX)
263:       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
264:   #else
265:       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
266:   #endif
267:       break;
268:     default:
269:   #if defined(PETSC_USE_COMPLEX)
270:       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
271:   #else
272:       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
273:   #endif
274:       break;
275:     }
276:     fftw->finarray  = (PetscScalar *)x_array;
277:     fftw->foutarray = y_array;
278:     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
279:                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
280:     fftw_execute(fftw->p_forward);
281:   } else {                                                         /* use existing plan */
282:     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
283:       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
284:     } else {
285:       fftw_execute(fftw->p_forward);
286:     }
287:   }
288:   PetscCall(VecRestoreArray(y, &y_array));
289:   PetscCall(VecRestoreArrayRead(x, &x_array));
290:   PetscFunctionReturn(PETSC_SUCCESS);
291: }

293: /*
294:    MatMultTranspose_MPIFFTW performs parallel backward DFT
295:    Input parameter:
296:      A - the matrix
297:      x - the vector on which BDFT will be performed

299:    Output parameter:
300:      y - vector that stores result of BDFT
301: */
302: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
303: {
304:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
305:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
306:   const PetscScalar *x_array;
307:   PetscScalar       *y_array;
308:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
309:   MPI_Comm           comm;

311:   PetscFunctionBegin;
312:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
313:   PetscCall(VecGetArrayRead(x, &x_array));
314:   PetscCall(VecGetArray(y, &y_array));
315:   if (!fftw->p_backward) { /* create a plan, then execute it */
316:     switch (ndim) {
317:     case 1:
318:   #if defined(PETSC_USE_COMPLEX)
319:       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
320:   #else
321:       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
322:   #endif
323:       break;
324:     case 2:
325:   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
326:       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
327:   #else
328:       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
329:   #endif
330:       break;
331:     case 3:
332:   #if defined(PETSC_USE_COMPLEX)
333:       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
334:   #else
335:       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
336:   #endif
337:       break;
338:     default:
339:   #if defined(PETSC_USE_COMPLEX)
340:       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
341:   #else
342:       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
343:   #endif
344:       break;
345:     }
346:     fftw->binarray  = (PetscScalar *)x_array;
347:     fftw->boutarray = y_array;
348:     fftw_execute(fftw->p_backward);
349:   } else {                                                         /* use existing plan */
350:     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
351:       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
352:     } else {
353:       fftw_execute(fftw->p_backward);
354:     }
355:   }
356:   PetscCall(VecRestoreArray(y, &y_array));
357:   PetscCall(VecRestoreArrayRead(x, &x_array));
358:   PetscFunctionReturn(PETSC_SUCCESS);
359: }
360: #endif

362: PetscErrorCode MatDestroy_FFTW(Mat A)
363: {
364:   Mat_FFT  *fft  = (Mat_FFT *)A->data;
365:   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;

367:   PetscFunctionBegin;
368:   fftw_destroy_plan(fftw->p_forward);
369:   fftw_destroy_plan(fftw->p_backward);
370:   if (fftw->iodims) free(fftw->iodims);
371:   PetscCall(PetscFree(fftw->dim_fftw));
372:   PetscCall(PetscFree(fft->data));
373:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
374:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
375:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
376: #if !PetscDefined(HAVE_MPIUNI)
377:   fftw_mpi_cleanup();
378: #endif
379:   PetscFunctionReturn(PETSC_SUCCESS);
380: }

382: #if !PetscDefined(HAVE_MPIUNI)
383: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
384: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
385: {
386:   PetscScalar *array;

388:   PetscFunctionBegin;
389:   PetscCall(VecGetArray(v, &array));
390:   fftw_free((fftw_complex *)array);
391:   PetscCall(VecRestoreArray(v, &array));
392:   PetscCall(VecDestroy_MPI(v));
393:   PetscFunctionReturn(PETSC_SUCCESS);
394: }
395: #endif

397: #if !PetscDefined(HAVE_MPIUNI)
398: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
399: {
400:   Mat A;

402:   PetscFunctionBegin;
403:   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
404:   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
405:   PetscFunctionReturn(PETSC_SUCCESS);
406: }

408: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
409: {
410:   Mat A;

412:   PetscFunctionBegin;
413:   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
414:   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
415:   PetscFunctionReturn(PETSC_SUCCESS);
416: }

418: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
419: {
420:   Mat A;

422:   PetscFunctionBegin;
423:   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
424:   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
425:   PetscFunctionReturn(PETSC_SUCCESS);
426: }
427: #endif

429: /*@
430:    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
431:      parallel layout determined by `MATFFTW`

433:    Collective

435:    Input Parameter:
436: .   A - the matrix

438:    Output Parameters:
439: +   x - (optional) input vector of forward FFTW
440: .   y - (optional) output vector of forward FFTW
441: -   z - (optional) output vector of backward FFTW

443:    Options Database Key:
444: .  -mat_fftw_plannerflags - set FFTW planner flags

446:   Level: advanced

448:   Notes:
449:   The parallel layout of output of forward FFTW is always same as the input
450:   of the backward FFTW. But parallel layout of the input vector of forward
451:   FFTW might not be same as the output of backward FFTW.

453:   We need to provide enough space while doing parallel real transform.
454:   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
455:   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
456:   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
457:   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
458:   zeros if it is odd only one zero is needed.

460:   Lastly one needs some scratch space at the end of data set in each process. alloc_local
461:   figures out how much space is needed, i.e. it figures out the data+scratch space for
462:   each processor and returns that.

464:   Developer Note:
465:   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?

467: .seealso: [](chapter_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
468: @*/
469: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
470: {
471:   PetscFunctionBegin;
472:   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
473:   PetscFunctionReturn(PETSC_SUCCESS);
474: }

476: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
477: {
478:   PetscMPIInt size, rank;
479:   MPI_Comm    comm;
480:   Mat_FFT    *fft = (Mat_FFT *)A->data;

482:   PetscFunctionBegin;
485:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));

487:   PetscCallMPI(MPI_Comm_size(comm, &size));
488:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
489:   if (size == 1) { /* sequential case */
490: #if defined(PETSC_USE_COMPLEX)
491:     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
492:     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
493:     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
494: #else
495:     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
496:     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
497:     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
498: #endif
499: #if !PetscDefined(HAVE_MPIUNI)
500:   } else { /* parallel cases */
501:     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
502:     PetscInt      ndim = fft->ndim, *dim = fft->dim;
503:     ptrdiff_t     alloc_local, local_n0, local_0_start;
504:     ptrdiff_t     local_n1;
505:     fftw_complex *data_fout;
506:     ptrdiff_t     local_1_start;
507:   #if defined(PETSC_USE_COMPLEX)
508:     fftw_complex *data_fin, *data_bout;
509:   #else
510:     double   *data_finr, *data_boutr;
511:     PetscInt  n1, N1;
512:     ptrdiff_t temp;
513:   #endif

515:     switch (ndim) {
516:     case 1:
517:   #if !defined(PETSC_USE_COMPLEX)
518:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
519:   #else
520:       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
521:       if (fin) {
522:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
523:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
524:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
525:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
526:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
527:       }
528:       if (fout) {
529:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
530:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
531:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
532:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
533:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
534:       }
535:       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
536:       if (bout) {
537:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
538:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
539:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
540:         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
541:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
542:       }
543:       break;
544:   #endif
545:     case 2:
546:   #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
547:       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
548:       N1          = 2 * dim[0] * (dim[1] / 2 + 1);
549:       n1          = 2 * local_n0 * (dim[1] / 2 + 1);
550:       if (fin) {
551:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
552:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
553:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
554:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
555:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
556:       }
557:       if (fout) {
558:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
559:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
560:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
561:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
562:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
563:       }
564:       if (bout) {
565:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
566:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
567:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
568:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
569:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
570:       }
571:   #else
572:       /* Get local size */
573:       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
574:       if (fin) {
575:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
576:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
577:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
578:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
579:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
580:       }
581:       if (fout) {
582:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
583:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
584:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
585:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
586:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
587:       }
588:       if (bout) {
589:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
590:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
591:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
592:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
593:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
594:       }
595:   #endif
596:       break;
597:     case 3:
598:   #if !defined(PETSC_USE_COMPLEX)
599:       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
600:       N1          = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
601:       n1          = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
602:       if (fin) {
603:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
604:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
605:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
606:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
607:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
608:       }
609:       if (fout) {
610:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
611:         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
612:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
613:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
614:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
615:       }
616:       if (bout) {
617:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
618:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
619:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
620:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
621:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
622:       }
623:   #else
624:       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
625:       if (fin) {
626:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
627:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
628:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
629:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
630:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
631:       }
632:       if (fout) {
633:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
634:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
635:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
636:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
637:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
638:       }
639:       if (bout) {
640:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
641:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
642:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
643:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
644:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
645:       }
646:   #endif
647:       break;
648:     default:
649:   #if !defined(PETSC_USE_COMPLEX)
650:       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
651:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
652:       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
653:       N1                                    = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
654:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

656:       if (fin) {
657:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
658:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
659:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
660:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
661:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
662:       }
663:       if (fout) {
664:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
665:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
666:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
667:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
668:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
669:       }
670:       if (bout) {
671:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
672:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
673:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
674:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
675:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
676:       }
677:   #else
678:       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
679:       if (fin) {
680:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
681:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
682:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
683:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
684:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
685:       }
686:       if (fout) {
687:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
688:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
689:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
690:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
691:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
692:       }
693:       if (bout) {
694:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
695:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
696:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
697:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
698:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
699:       }
700:   #endif
701:       break;
702:     }
703:     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
704:        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
705:        memory leaks. We void these pointers here to avoid accident uses.
706:     */
707:     if (fin) (*fin)->ops->replacearray = NULL;
708:     if (fout) (*fout)->ops->replacearray = NULL;
709:     if (bout) (*bout)->ops->replacearray = NULL;
710: #endif
711:   }
712:   PetscFunctionReturn(PETSC_SUCCESS);
713: }

715: /*@
716:    VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.

718:    Collective

720:    Input Parameters:
721: +  A - FFTW matrix
722: -  x - the PETSc vector

724:    Output Parameters:
725: .  y - the FFTW vector

727:    Level: intermediate

729:    Note:
730:    For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
731:    one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
732:    zeros. This routine does that job by scattering operation.

734: .seealso: [](chapter_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
735: @*/
736: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
737: {
738:   PetscFunctionBegin;
739:   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
740:   PetscFunctionReturn(PETSC_SUCCESS);
741: }

743: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
744: {
745:   MPI_Comm    comm;
746:   Mat_FFT    *fft = (Mat_FFT *)A->data;
747:   PetscInt    low;
748:   PetscMPIInt rank, size;
749:   PetscInt    vsize, vsize1;
750:   VecScatter  vecscat;
751:   IS          list1;

753:   PetscFunctionBegin;
754:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
755:   PetscCallMPI(MPI_Comm_size(comm, &size));
756:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
757:   PetscCall(VecGetOwnershipRange(y, &low, NULL));

759:   if (size == 1) {
760:     PetscCall(VecGetSize(x, &vsize));
761:     PetscCall(VecGetSize(y, &vsize1));
762:     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
763:     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
764:     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
765:     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
766:     PetscCall(VecScatterDestroy(&vecscat));
767:     PetscCall(ISDestroy(&list1));
768: #if !PetscDefined(HAVE_MPIUNI)
769:   } else {
770:     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
771:     PetscInt  ndim = fft->ndim, *dim = fft->dim;
772:     ptrdiff_t local_n0, local_0_start;
773:     ptrdiff_t local_n1, local_1_start;
774:     IS        list2;
775:   #if !defined(PETSC_USE_COMPLEX)
776:     PetscInt  i, j, k, partial_dim;
777:     PetscInt *indx1, *indx2, tempindx, tempindx1;
778:     PetscInt  NM;
779:     ptrdiff_t temp;
780:   #endif

782:     switch (ndim) {
783:     case 1:
784:   #if defined(PETSC_USE_COMPLEX)
785:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);

787:       PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
788:       PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
789:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
790:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
791:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
792:       PetscCall(VecScatterDestroy(&vecscat));
793:       PetscCall(ISDestroy(&list1));
794:       PetscCall(ISDestroy(&list2));
795:   #else
796:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
797:   #endif
798:       break;
799:     case 2:
800:   #if defined(PETSC_USE_COMPLEX)
801:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);

803:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
804:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
805:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
806:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
807:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
808:       PetscCall(VecScatterDestroy(&vecscat));
809:       PetscCall(ISDestroy(&list1));
810:       PetscCall(ISDestroy(&list2));
811:   #else
812:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

814:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
815:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));

817:       if (dim[1] % 2 == 0) {
818:         NM = dim[1] + 2;
819:       } else {
820:         NM = dim[1] + 1;
821:       }
822:       for (i = 0; i < local_n0; i++) {
823:         for (j = 0; j < dim[1]; j++) {
824:           tempindx  = i * dim[1] + j;
825:           tempindx1 = i * NM + j;

827:           indx1[tempindx] = local_0_start * dim[1] + tempindx;
828:           indx2[tempindx] = low + tempindx1;
829:         }
830:       }

832:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
833:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));

835:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
836:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
837:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
838:       PetscCall(VecScatterDestroy(&vecscat));
839:       PetscCall(ISDestroy(&list1));
840:       PetscCall(ISDestroy(&list2));
841:       PetscCall(PetscFree(indx1));
842:       PetscCall(PetscFree(indx2));
843:   #endif
844:       break;

846:     case 3:
847:   #if defined(PETSC_USE_COMPLEX)
848:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

850:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
851:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
852:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
853:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
854:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
855:       PetscCall(VecScatterDestroy(&vecscat));
856:       PetscCall(ISDestroy(&list1));
857:       PetscCall(ISDestroy(&list2));
858:   #else
859:       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
860:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
861:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

863:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
864:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));

866:       if (dim[2] % 2 == 0) NM = dim[2] + 2;
867:       else NM = dim[2] + 1;

869:       for (i = 0; i < local_n0; i++) {
870:         for (j = 0; j < dim[1]; j++) {
871:           for (k = 0; k < dim[2]; k++) {
872:             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
873:             tempindx1 = i * dim[1] * NM + j * NM + k;

875:             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
876:             indx2[tempindx] = low + tempindx1;
877:           }
878:         }
879:       }

881:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
882:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
883:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
884:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
885:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
886:       PetscCall(VecScatterDestroy(&vecscat));
887:       PetscCall(ISDestroy(&list1));
888:       PetscCall(ISDestroy(&list2));
889:       PetscCall(PetscFree(indx1));
890:       PetscCall(PetscFree(indx2));
891:   #endif
892:       break;

894:     default:
895:   #if defined(PETSC_USE_COMPLEX)
896:       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);

898:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
899:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
900:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
901:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
902:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
903:       PetscCall(VecScatterDestroy(&vecscat));
904:       PetscCall(ISDestroy(&list1));
905:       PetscCall(ISDestroy(&list2));
906:   #else
907:       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
908:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
909:       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];

911:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;

913:       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

915:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

917:       partial_dim = fftw->partial_dim;

919:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
920:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));

922:       if (dim[ndim - 1] % 2 == 0) NM = 2;
923:       else NM = 1;

925:       j = low;
926:       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
927:         indx1[i] = local_0_start * partial_dim + i;
928:         indx2[i] = j;
929:         if (k % dim[ndim - 1] == 0) j += NM;
930:         j++;
931:       }
932:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
933:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
934:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
935:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
936:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
937:       PetscCall(VecScatterDestroy(&vecscat));
938:       PetscCall(ISDestroy(&list1));
939:       PetscCall(ISDestroy(&list2));
940:       PetscCall(PetscFree(indx1));
941:       PetscCall(PetscFree(indx2));
942:   #endif
943:       break;
944:     }
945: #endif
946:   }
947:   PetscFunctionReturn(PETSC_SUCCESS);
948: }

950: /*@
951:    VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.

953:    Collective

955:     Input Parameters:
956: +   A - `MATFFTW` matrix
957: -   x - FFTW vector

959:    Output Parameters:
960: .  y - PETSc vector

962:    Level: intermediate

964:    Note:
965:    While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
966:    `VecScatterFFTWToPetsc()` removes those extra zeros.

968: .seealso: [](chapter_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
969: @*/
970: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
971: {
972:   PetscFunctionBegin;
973:   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
974:   PetscFunctionReturn(PETSC_SUCCESS);
975: }

977: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
978: {
979:   MPI_Comm    comm;
980:   Mat_FFT    *fft = (Mat_FFT *)A->data;
981:   PetscInt    low;
982:   PetscMPIInt rank, size;
983:   VecScatter  vecscat;
984:   IS          list1;

986:   PetscFunctionBegin;
987:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
988:   PetscCallMPI(MPI_Comm_size(comm, &size));
989:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
990:   PetscCall(VecGetOwnershipRange(x, &low, NULL));

992:   if (size == 1) {
993:     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
994:     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
995:     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
996:     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
997:     PetscCall(VecScatterDestroy(&vecscat));
998:     PetscCall(ISDestroy(&list1));

1000: #if !PetscDefined(HAVE_MPIUNI)
1001:   } else {
1002:     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
1003:     PetscInt  ndim = fft->ndim, *dim = fft->dim;
1004:     ptrdiff_t local_n0, local_0_start;
1005:     ptrdiff_t local_n1, local_1_start;
1006:     IS        list2;
1007:   #if !defined(PETSC_USE_COMPLEX)
1008:     PetscInt  i, j, k, partial_dim;
1009:     PetscInt *indx1, *indx2, tempindx, tempindx1;
1010:     PetscInt  NM;
1011:     ptrdiff_t temp;
1012:   #endif
1013:     switch (ndim) {
1014:     case 1:
1015:   #if defined(PETSC_USE_COMPLEX)
1016:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);

1018:       PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
1019:       PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
1020:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1021:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1022:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1023:       PetscCall(VecScatterDestroy(&vecscat));
1024:       PetscCall(ISDestroy(&list1));
1025:       PetscCall(ISDestroy(&list2));
1026:   #else
1027:       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
1028:   #endif
1029:       break;
1030:     case 2:
1031:   #if defined(PETSC_USE_COMPLEX)
1032:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);

1034:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
1035:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
1036:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1037:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1038:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1039:       PetscCall(VecScatterDestroy(&vecscat));
1040:       PetscCall(ISDestroy(&list1));
1041:       PetscCall(ISDestroy(&list2));
1042:   #else
1043:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1045:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
1046:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));

1048:       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1049:       else NM = dim[1] + 1;

1051:       for (i = 0; i < local_n0; i++) {
1052:         for (j = 0; j < dim[1]; j++) {
1053:           tempindx  = i * dim[1] + j;
1054:           tempindx1 = i * NM + j;

1056:           indx1[tempindx] = local_0_start * dim[1] + tempindx;
1057:           indx2[tempindx] = low + tempindx1;
1058:         }
1059:       }

1061:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
1062:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));

1064:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1065:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1066:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1067:       PetscCall(VecScatterDestroy(&vecscat));
1068:       PetscCall(ISDestroy(&list1));
1069:       PetscCall(ISDestroy(&list2));
1070:       PetscCall(PetscFree(indx1));
1071:       PetscCall(PetscFree(indx2));
1072:   #endif
1073:       break;
1074:     case 3:
1075:   #if defined(PETSC_USE_COMPLEX)
1076:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

1078:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
1079:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
1080:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1081:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1082:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1083:       PetscCall(VecScatterDestroy(&vecscat));
1084:       PetscCall(ISDestroy(&list1));
1085:       PetscCall(ISDestroy(&list2));
1086:   #else
1087:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1089:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
1090:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));

1092:       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1093:       else NM = dim[2] + 1;

1095:       for (i = 0; i < local_n0; i++) {
1096:         for (j = 0; j < dim[1]; j++) {
1097:           for (k = 0; k < dim[2]; k++) {
1098:             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
1099:             tempindx1 = i * dim[1] * NM + j * NM + k;

1101:             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
1102:             indx2[tempindx] = low + tempindx1;
1103:           }
1104:         }
1105:       }

1107:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
1108:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));

1110:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1111:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1112:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1113:       PetscCall(VecScatterDestroy(&vecscat));
1114:       PetscCall(ISDestroy(&list1));
1115:       PetscCall(ISDestroy(&list2));
1116:       PetscCall(PetscFree(indx1));
1117:       PetscCall(PetscFree(indx2));
1118:   #endif
1119:       break;
1120:     default:
1121:   #if defined(PETSC_USE_COMPLEX)
1122:       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);

1124:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
1125:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
1126:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1127:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1128:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1129:       PetscCall(VecScatterDestroy(&vecscat));
1130:       PetscCall(ISDestroy(&list1));
1131:       PetscCall(ISDestroy(&list2));
1132:   #else
1133:       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];

1135:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;

1137:       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1139:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

1141:       partial_dim = fftw->partial_dim;

1143:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
1144:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));

1146:       if (dim[ndim - 1] % 2 == 0) NM = 2;
1147:       else NM = 1;

1149:       j = low;
1150:       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
1151:         indx1[i] = local_0_start * partial_dim + i;
1152:         indx2[i] = j;
1153:         if (k % dim[ndim - 1] == 0) j += NM;
1154:         j++;
1155:       }
1156:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
1157:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));

1159:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1160:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1161:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1162:       PetscCall(VecScatterDestroy(&vecscat));
1163:       PetscCall(ISDestroy(&list1));
1164:       PetscCall(ISDestroy(&list2));
1165:       PetscCall(PetscFree(indx1));
1166:       PetscCall(PetscFree(indx2));
1167:   #endif
1168:       break;
1169:     }
1170: #endif
1171:   }
1172:   PetscFunctionReturn(PETSC_SUCCESS);
1173: }

1175: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1176: {
1177:   MPI_Comm    comm;
1178:   Mat_FFT    *fft = (Mat_FFT *)A->data;
1179:   Mat_FFTW   *fftw;
1180:   PetscInt    ndim = fft->ndim, *dim = fft->dim;
1181:   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1182:   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1183:   PetscBool   flg;
1184:   PetscInt    p_flag, partial_dim = 1, ctr;
1185:   PetscMPIInt size, rank;
1186:   ptrdiff_t  *pdim;
1187: #if !defined(PETSC_USE_COMPLEX)
1188:   PetscInt tot_dim;
1189: #endif

1191:   PetscFunctionBegin;
1192:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1193:   PetscCallMPI(MPI_Comm_size(comm, &size));
1194:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

1196: #if !PetscDefined(HAVE_MPIUNI)
1197:   fftw_mpi_init();
1198: #endif
1199:   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1200:   pdim[0] = dim[0];
1201: #if !defined(PETSC_USE_COMPLEX)
1202:   tot_dim = 2 * dim[0];
1203: #endif
1204:   for (ctr = 1; ctr < ndim; ctr++) {
1205:     partial_dim *= dim[ctr];
1206:     pdim[ctr] = dim[ctr];
1207: #if !defined(PETSC_USE_COMPLEX)
1208:     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1209:     else tot_dim *= dim[ctr];
1210: #endif
1211:   }

1213:   if (size == 1) {
1214: #if defined(PETSC_USE_COMPLEX)
1215:     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1216:     fft->n = fft->N;
1217: #else
1218:     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1219:     fft->n       = tot_dim;
1220: #endif
1221: #if !PetscDefined(HAVE_MPIUNI)
1222:   } else {
1223:     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1224:   #if !defined(PETSC_USE_COMPLEX)
1225:     ptrdiff_t temp;
1226:     PetscInt  N1;
1227:   #endif

1229:     switch (ndim) {
1230:     case 1:
1231:   #if !defined(PETSC_USE_COMPLEX)
1232:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1233:   #else
1234:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1235:       fft->n = (PetscInt)local_n0;
1236:       PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1237:   #endif
1238:       break;
1239:     case 2:
1240:   #if defined(PETSC_USE_COMPLEX)
1241:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1242:       fft->n = (PetscInt)local_n0 * dim[1];
1243:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1244:   #else
1245:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1247:       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1248:       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1249:   #endif
1250:       break;
1251:     case 3:
1252:   #if defined(PETSC_USE_COMPLEX)
1253:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

1255:       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1256:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1257:   #else
1258:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1260:       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1261:       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1262:   #endif
1263:       break;
1264:     default:
1265:   #if defined(PETSC_USE_COMPLEX)
1266:       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);

1268:       fft->n = (PetscInt)local_n0 * partial_dim;
1269:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1270:   #else
1271:       temp = pdim[ndim - 1];

1273:       pdim[ndim - 1] = temp / 2 + 1;

1275:       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1277:       fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
1278:       N1     = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);

1280:       pdim[ndim - 1] = temp;

1282:       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1283:   #endif
1284:       break;
1285:     }
1286: #endif
1287:   }
1288:   free(pdim);
1289:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1290:   PetscCall(PetscNew(&fftw));
1291:   fft->data = (void *)fftw;

1293:   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1294:   fftw->partial_dim = partial_dim;

1296:   PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw)));
1297:   if (size == 1) {
1298: #if defined(PETSC_USE_64BIT_INDICES)
1299:     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1300: #else
1301:     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1302: #endif
1303:   }

1305:   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];

1307:   fftw->p_forward  = NULL;
1308:   fftw->p_backward = NULL;
1309:   fftw->p_flag     = FFTW_ESTIMATE;

1311:   if (size == 1) {
1312:     A->ops->mult          = MatMult_SeqFFTW;
1313:     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1314: #if !PetscDefined(HAVE_MPIUNI)
1315:   } else {
1316:     A->ops->mult          = MatMult_MPIFFTW;
1317:     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1318: #endif
1319:   }
1320:   fft->matdestroy = MatDestroy_FFTW;
1321:   A->assembled    = PETSC_TRUE;
1322:   A->preallocated = PETSC_TRUE;

1324:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1325:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1326:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));

1328:   /* get runtime options */
1329:   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1330:   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1331:   if (flg) fftw->p_flag = iplans[p_flag];
1332:   PetscOptionsEnd();
1333:   PetscFunctionReturn(PETSC_SUCCESS);
1334: }