Actual source code: ex52.c
petsc-dev 2014-02-02
1: static const char help[] = "Testbed for FEM operations on the GPU.\n\n";
3: #include<petscdmplex.h>
4: #include<petscsnes.h>
6: #define NUM_FIELDS 1
7: PetscInt spatialDim = 0;
9: typedef enum {LAPLACIAN = 0, ELASTICITY} OpType;
11: typedef struct {
12: PetscFEM fem; /* REQUIRED to use DMPlexComputeResidualFEM() */
13: DM dm; /* The solution DM */
14: PetscInt debug; /* The debugging level */
15: PetscMPIInt rank; /* The process rank */
16: PetscMPIInt numProcs; /* The number of processes */
17: PetscInt dim; /* The topological mesh dimension */
18: PetscBool interpolate; /* Generate intermediate mesh elements */
19: PetscReal refinementLimit; /* The largest allowable cell volume */
20: PetscBool refinementUniform; /* Uniformly refine the mesh */
21: PetscInt refinementRounds; /* The number of uniform refinements */
22: char partitioner[2048]; /* The graph partitioner */
23: PetscBool computeFunction; /* The flag for computing a residual */
24: PetscBool computeJacobian; /* The flag for computing a Jacobian */
25: PetscBool gpu; /* The flag for GPU integration */
26: OpType op; /* The type of PDE operator (should use FFC/Ignition here) */
27: PetscBool showResidual, showJacobian;
28: PetscLogEvent createMeshEvent, residualEvent, residualBatchEvent, jacobianEvent, jacobianBatchEvent, integrateBatchCPUEvent, integrateBatchGPUEvent, integrateGPUOnlyEvent;
29: /* Element definition */
30: PetscFE fe[NUM_FIELDS];
31: PetscFE feAux[1];
32: void (*f0Funcs[NUM_FIELDS])(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f0[]);
33: void (*f1Funcs[NUM_FIELDS])(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f1[]);
34: void (*g0Funcs[NUM_FIELDS*NUM_FIELDS])(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g0[]);
35: void (*g1Funcs[NUM_FIELDS*NUM_FIELDS])(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g1[]);
36: void (*g2Funcs[NUM_FIELDS*NUM_FIELDS])(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g2[]);
37: void (*g3Funcs[NUM_FIELDS*NUM_FIELDS])(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g3[]);
38: void (**exactFuncs)(const PetscReal x[], PetscScalar *u, void *ctx);
39: } AppCtx;
41: void quadratic_2d(const PetscReal x[], PetscScalar u[], void *ctx)
42: {
43: u[0] = x[0]*x[0] + x[1]*x[1];
44: };
46: void quadratic_2d_elas(const PetscReal x[], PetscScalar u[], void *ctx)
47: {
48: u[0] = x[0]*x[0] + x[1]*x[1];
49: u[1] = x[0]*x[0] + x[1]*x[1];
50: };
52: void f0_lap(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f0[])
53: {
54: f0[0] = 4.0;
55: }
57: /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */
58: void f1_lap(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f1[])
59: {
60: PetscInt d;
61: for (d = 0; d < spatialDim; ++d) {f1[d] = a[0]*gradU[d];}
62: }
64: /* < \nabla v, \nabla u + {\nabla u}^T >
65: This just gives \nabla u, give the perdiagonal for the transpose */
66: void g3_lap(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g3[])
67: {
68: PetscInt d;
69: for (d = 0; d < spatialDim; ++d) {g3[d*spatialDim+d] = 1.0;}
70: }
72: void f0_elas(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f0[])
73: {
74: const PetscInt Ncomp = spatialDim;
75: PetscInt comp;
77: for (comp = 0; comp < Ncomp; ++comp) f0[comp] = 3.0;
78: }
80: /* gradU[comp*dim+d] = {u_x, u_y, v_x, v_y} or {u_x, u_y, u_z, v_x, v_y, v_z, w_x, w_y, w_z}
81: u[Ncomp] = {p} */
82: void f1_elas(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f1[])
83: {
84: const PetscInt dim = spatialDim;
85: const PetscInt Ncomp = spatialDim;
86: PetscInt comp, d;
88: for (comp = 0; comp < Ncomp; ++comp) {
89: for (d = 0; d < dim; ++d) {
90: f1[comp*dim+d] = 0.5*(gradU[comp*dim+d] + gradU[d*dim+comp]);
91: }
92: f1[comp*dim+comp] -= u[Ncomp];
93: }
94: }
96: /* < \nabla v, \nabla u + {\nabla u}^T >
97: This just gives \nabla u, give the perdiagonal for the transpose */
98: void g3_elas(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g3[])
99: {
100: const PetscInt dim = spatialDim;
101: const PetscInt Ncomp = spatialDim;
102: PetscInt compI, d;
104: for (compI = 0; compI < Ncomp; ++compI) {
105: for (d = 0; d < dim; ++d) {
106: g3[((compI*Ncomp+compI)*dim+d)*dim+d] = 1.0;
107: }
108: }
109: }
113: PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options)
114: {
115: const char *opTypes[2] = {"laplacian", "elasticity"};
116: PetscInt op;
120: options->debug = 0;
121: options->dim = 2;
122: options->interpolate = PETSC_FALSE;
123: options->refinementLimit = 0.0;
124: options->refinementUniform = PETSC_FALSE;
125: options->refinementRounds = 1;
126: options->computeFunction = PETSC_FALSE;
127: options->computeJacobian = PETSC_FALSE;
128: options->gpu = PETSC_FALSE;
129: options->op = LAPLACIAN;
130: options->showResidual = PETSC_TRUE;
131: options->showJacobian = PETSC_TRUE;
133: MPI_Comm_size(comm, &options->numProcs);
134: MPI_Comm_rank(comm, &options->rank);
135: PetscOptionsBegin(comm, "", "Bratu Problem Options", "DMPLEX");
136: PetscOptionsInt("-debug", "The debugging level", "ex52.c", options->debug, &options->debug, NULL);
137: PetscOptionsInt("-dim", "The topological mesh dimension", "ex52.c", options->dim, &options->dim, NULL);
138: spatialDim = options->dim;
139: PetscOptionsBool("-interpolate", "Generate intermediate mesh elements", "ex52.c", options->interpolate, &options->interpolate, NULL);
140: PetscOptionsReal("-refinement_limit", "The largest allowable cell volume", "ex52.c", options->refinementLimit, &options->refinementLimit, NULL);
141: PetscOptionsBool("-refinement_uniform", "Uniformly refine the mesh", "ex52.c", options->refinementUniform, &options->refinementUniform, NULL);
142: PetscOptionsInt("-refinement_rounds", "The number of uniform refinements", "ex52.c", options->refinementRounds, &options->refinementRounds, NULL);
143: PetscStrcpy(options->partitioner, "chaco");
144: PetscOptionsString("-partitioner", "The graph partitioner", "ex52.c", options->partitioner, options->partitioner, 2048, NULL);
145: PetscOptionsBool("-compute_function", "Compute the residual", "ex52.c", options->computeFunction, &options->computeFunction, NULL);
146: PetscOptionsBool("-compute_jacobian", "Compute the Jacobian", "ex52.c", options->computeJacobian, &options->computeJacobian, NULL);
147: PetscOptionsBool("-gpu", "Use the GPU for integration method", "ex52.c", options->gpu, &options->gpu, NULL);
149: op = options->op;
150: PetscOptionsEList("-op_type","Type of PDE operator","ex52.c",opTypes,2,opTypes[options->op],&op,NULL);
151: options->op = (OpType) op;
153: PetscOptionsBool("-show_residual", "Output the residual for verification", "ex52.c", options->showResidual, &options->showResidual, NULL);
154: PetscOptionsBool("-show_jacobian", "Output the Jacobian for verification", "ex52.c", options->showJacobian, &options->showJacobian, NULL);
155: PetscOptionsEnd();
157: PetscLogEventRegister("CreateMesh", DM_CLASSID, &options->createMeshEvent);
158: PetscLogEventRegister("Residual", SNES_CLASSID, &options->residualEvent);
159: PetscLogEventRegister("ResidualBatch", SNES_CLASSID, &options->residualBatchEvent);
160: PetscLogEventRegister("Jacobian", SNES_CLASSID, &options->jacobianEvent);
161: PetscLogEventRegister("JacobianBatch", SNES_CLASSID, &options->jacobianBatchEvent);
162: PetscLogEventRegister("IntegBatchCPU", SNES_CLASSID, &options->integrateBatchCPUEvent);
163: PetscLogEventRegister("IntegBatchGPU", SNES_CLASSID, &options->integrateBatchGPUEvent);
164: PetscLogEventRegister("IntegGPUOnly", SNES_CLASSID, &options->integrateGPUOnlyEvent);
165: return(0);
166: };
170: PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm)
171: {
172: PetscInt dim = user->dim;
173: PetscBool interpolate = user->interpolate;
174: PetscReal refinementLimit = user->refinementLimit;
175: PetscBool refinementUniform = user->refinementUniform;
176: PetscInt refinementRounds = user->refinementRounds;
177: const char *partitioner = user->partitioner;
181: PetscLogEventBegin(user->createMeshEvent,0,0,0,0);
182: DMPlexCreateBoxMesh(comm, dim, interpolate, dm);
183: {
184: DM refinedMesh = NULL;
185: DM distributedMesh = NULL;
187: /* Refine mesh using a volume constraint */
188: DMPlexSetRefinementLimit(*dm, refinementLimit);
189: DMRefine(*dm, comm, &refinedMesh);
190: if (refinedMesh) {
191: DMDestroy(dm);
192: *dm = refinedMesh;
193: }
194: /* Distribute mesh over processes */
195: DMPlexDistribute(*dm, partitioner, 0, NULL, &distributedMesh);
196: if (distributedMesh) {
197: DMDestroy(dm);
198: *dm = distributedMesh;
199: }
200: /* Use regular refinement in parallel */
201: if (refinementUniform) {
202: PetscInt r;
204: DMPlexSetRefinementUniform(*dm, refinementUniform);
205: for (r = 0; r < refinementRounds; ++r) {
206: DMRefine(*dm, comm, &refinedMesh);
207: if (refinedMesh) {
208: DMDestroy(dm);
209: *dm = refinedMesh;
210: }
211: }
212: }
213: }
214: PetscObjectSetName((PetscObject) *dm, "Mesh");
215: DMSetFromOptions(*dm);
216: PetscLogEventEnd(user->createMeshEvent,0,0,0,0);
218: user->dm = *dm;
219: return(0);
220: }
224: PetscErrorCode SetupElement(DM dm, AppCtx *user)
225: {
226: const PetscInt dim = user->dim;
227: PetscFE fem;
228: PetscQuadrature q;
229: DM K;
230: PetscSpace P;
231: PetscDualSpace Q;
232: PetscInt order;
233: PetscErrorCode ierr;
236: /* Create space */
237: PetscSpaceCreate(PetscObjectComm((PetscObject) dm), &P);
238: PetscSpaceSetFromOptions(P);
239: PetscSpacePolynomialSetNumVariables(P, dim);
240: PetscSpaceSetUp(P);
241: PetscSpaceGetOrder(P, &order);
242: /* Create dual space */
243: PetscDualSpaceCreate(PetscObjectComm((PetscObject) dm), &Q);
244: PetscDualSpaceCreateReferenceCell(Q, dim, PETSC_TRUE, &K);
245: PetscDualSpaceSetDM(Q, K);
246: DMDestroy(&K);
247: PetscDualSpaceSetOrder(Q, order);
248: PetscDualSpaceSetFromOptions(Q);
249: PetscDualSpaceSetUp(Q);
250: /* Create element */
251: PetscFECreate(PetscObjectComm((PetscObject) dm), &fem);
252: PetscFESetFromOptions(fem);
253: PetscFESetBasisSpace(fem, P);
254: PetscFESetDualSpace(fem, Q);
255: PetscFESetNumComponents(fem, 1);
256: PetscSpaceDestroy(&P);
257: PetscDualSpaceDestroy(&Q);
258: /* Create quadrature */
259: PetscDTGaussJacobiQuadrature(dim, order, -1.0, 1.0, &q);
260: PetscFESetQuadrature(fem, q);
261: user->fe[0] = fem;
262: user->fem.fe = user->fe;
263: return(0);
264: }
268: PetscErrorCode SetupMaterialElement(DM dm, AppCtx *user)
269: {
270: const PetscInt dim = user->dim;
271: const char *prefix = "mat_";
272: PetscFE fem;
273: PetscQuadrature q;
274: DM K;
275: PetscSpace P;
276: PetscDualSpace Q;
277: PetscInt order;
278: PetscErrorCode ierr;
281: /* Create space */
282: PetscSpaceCreate(PetscObjectComm((PetscObject) dm), &P);
283: PetscObjectSetOptionsPrefix((PetscObject) P, prefix);
284: PetscSpaceSetFromOptions(P);
285: PetscSpacePolynomialSetNumVariables(P, dim);
286: PetscSpaceSetUp(P);
287: PetscSpaceGetOrder(P, &order);
288: /* Create dual space */
289: PetscDualSpaceCreate(PetscObjectComm((PetscObject) dm), &Q);
290: PetscObjectSetOptionsPrefix((PetscObject) Q, prefix);
291: PetscDualSpaceCreateReferenceCell(Q, dim, PETSC_TRUE, &K);
292: PetscDualSpaceSetDM(Q, K);
293: DMDestroy(&K);
294: PetscDualSpaceSetOrder(Q, order);
295: PetscDualSpaceSetFromOptions(Q);
296: PetscDualSpaceSetUp(Q);
297: /* Create element */
298: PetscFECreate(PetscObjectComm((PetscObject) dm), &fem);
299: PetscObjectSetOptionsPrefix((PetscObject) fem, prefix);
300: PetscFESetFromOptions(fem);
301: PetscFESetBasisSpace(fem, P);
302: PetscFESetDualSpace(fem, Q);
303: PetscFESetNumComponents(fem, 1);
304: PetscSpaceDestroy(&P);
305: PetscDualSpaceDestroy(&Q);
306: /* Create quadrature */
307: PetscDTGaussJacobiQuadrature(dim, PetscMax(order, 1), -1.0, 1.0, &q);
308: PetscFESetQuadrature(fem, q);
309: user->feAux[0] = fem;
310: user->fem.feAux = user->feAux;
311: return(0);
312: }
316: PetscErrorCode DestroyElement(AppCtx *user)
317: {
321: PetscFEDestroy(&user->fe[0]);
322: PetscFEDestroy(&user->feAux[0]);
323: return(0);
324: }
328: PetscErrorCode SetupSection(DM dm, AppCtx *user)
329: {
330: PetscSection section;
331: PetscInt dim = user->dim;
332: PetscInt numBC = 0;
333: PetscInt numComp[1];
334: const PetscInt *numDof;
335: PetscErrorCode ierr;
338: PetscFEGetNumComponents(user->fe[0], &numComp[0]);
339: PetscFEGetNumDof(user->fe[0], &numDof);
340: DMPlexCreateSection(dm, dim, 1, numComp, numDof, numBC, NULL, NULL, §ion);
341: DMSetDefaultSection(dm, section);
342: PetscSectionDestroy(§ion);
343: return(0);
344: }
348: PetscErrorCode SetupMaterial(DM dm, DM dmAux, AppCtx *user)
349: {
350: Vec epsilon;
354: DMCreateLocalVector(dmAux, &epsilon);
355: VecSet(epsilon, 1.0);
356: PetscObjectCompose((PetscObject) dm, "A", (PetscObject) epsilon);
357: VecDestroy(&epsilon);
358: return(0);
359: }
363: int main(int argc, char **argv)
364: {
365: DM dm, dmAux;
366: SNES snes;
367: AppCtx user;
368: PetscInt numComp;
371: PetscInitialize(&argc, &argv, NULL, help);
372: #if !defined(PETSC_HAVE_CUDA) && !defined(PETSC_HAVE_OPENCL)
373: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This example requires CUDA or OpenCL support.");
374: #endif
375: ProcessOptions(PETSC_COMM_WORLD, &user);
376: SNESCreate(PETSC_COMM_WORLD, &snes);
377: CreateMesh(PETSC_COMM_WORLD, &user, &dm);
378: SNESSetDM(snes, dm);
380: SetupElement(user.dm, &user);
381: DMClone(user.dm, &dmAux);
382: PetscObjectCompose((PetscObject) dm, "dmAux", (PetscObject) dmAux);
383: SetupMaterialElement(dmAux, &user);
384: PetscFEGetNumComponents(user.fe[0], &numComp);
385: PetscMalloc(numComp * sizeof(void (*)(const PetscReal[], PetscScalar *, void *)), &user.exactFuncs);
386: switch (user.op) {
387: case LAPLACIAN:
388: user.f0Funcs[0] = f0_lap;
389: user.f1Funcs[0] = f1_lap;
390: user.g0Funcs[0] = NULL;
391: user.g1Funcs[0] = NULL;
392: user.g2Funcs[0] = NULL;
393: user.g3Funcs[0] = g3_lap;
394: user.exactFuncs[0] = quadratic_2d;
395: break;
396: case ELASTICITY:
397: user.f0Funcs[0] = f0_elas;
398: user.f1Funcs[0] = f1_elas;
399: user.g0Funcs[0] = NULL;
400: user.g1Funcs[0] = NULL;
401: user.g2Funcs[0] = NULL;
402: user.g3Funcs[0] = g3_elas;
403: user.exactFuncs[0] = quadratic_2d_elas;
404: break;
405: default:
406: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user.op);
407: }
408: user.fem.f0Funcs = user.f0Funcs;
409: user.fem.f1Funcs = user.f1Funcs;
410: user.fem.g0Funcs = user.g0Funcs;
411: user.fem.g1Funcs = user.g1Funcs;
412: user.fem.g2Funcs = user.g2Funcs;
413: user.fem.g3Funcs = user.g3Funcs;
414: user.fem.bcFuncs = user.exactFuncs;
415: user.fem.bcCtxs = NULL;
416: SetupSection(dm, &user);
417: SetupSection(dmAux, &user);
418: SetupMaterial(dm, dmAux, &user);
420: DMSNESSetFunctionLocal(dm, (PetscErrorCode (*)(DM,Vec,Vec,void*))DMPlexComputeResidualFEM,&user);
421: DMSNESSetJacobianLocal(dm, (PetscErrorCode (*)(DM,Vec,Mat,Mat,MatStructure*,void*))DMPlexComputeJacobianFEM,&user);
422: if (user.computeFunction) {
423: Vec X, F;
425: DMGetGlobalVector(dm, &X);
426: DMGetGlobalVector(dm, &F);
427: DMPlexProjectFunction(dm, user.fe, user.exactFuncs, NULL, INSERT_VALUES, X);
428: SNESComputeFunction(snes, X, F);
429: DMRestoreGlobalVector(dm, &X);
430: DMRestoreGlobalVector(dm, &F);
431: }
432: if (user.computeJacobian) {
433: Vec X;
434: Mat J;
435: MatStructure flag;
437: DMGetGlobalVector(dm, &X);
438: DMSetMatType(dm,MATAIJ);
439: DMCreateMatrix(dm, &J);
440: SNESComputeJacobian(snes, X, &J, &J, &flag);
441: MatDestroy(&J);
442: DMRestoreGlobalVector(dm, &X);
443: }
444: PetscFree(user.exactFuncs);
445: DestroyElement(&user);
446: DMDestroy(&dmAux);
447: DMDestroy(&dm);
448: SNESDestroy(&snes);
449: PetscFinalize();
450: return 0;
451: }