Actual source code: dtfe.c

petsc-dev 2014-02-02
Report Typos and Errors
  1: /* Basis Jet Tabulation

  3: We would like to tabulate the nodal basis functions and derivatives at a set of points, usually quadrature points. We
  4: follow here the derviation in http://www.math.ttu.edu/~kirby/papers/fiat-toms-2004.pdf. The nodal basis $\psi_i$ can
  5: be expressed in terms of a prime basis $\phi_i$ which can be stably evaluated. In PETSc, we will use the Legendre basis
  6: as a prime basis.

  8:   \psi_i = \sum_k \alpha_{ki} \phi_k

 10: Our nodal basis is defined in terms of the dual basis $n_j$

 12:   n_j \cdot \psi_i = \delta_{ji}

 14: and we may act on the first equation to obtain

 16:   n_j \cdot \psi_i = \sum_k \alpha_{ki} n_j \cdot \phi_k
 17:        \delta_{ji} = \sum_k \alpha_{ki} V_{jk}
 18:                  I = V \alpha

 20: so the coefficients of the nodal basis in the prime basis are

 22:    \alpha = V^{-1}

 24: We will define the dual basis vectors $n_j$ using a quadrature rule.

 26: Right now, we will just use the polynomial spaces P^k. I know some elements use the space of symmetric polynomials
 27: (I think Nedelec), but we will neglect this for now. Constraints in the space, e.g. Arnold-Winther elements, can
 28: be implemented exactly as in FIAT using functionals $L_j$.

 30: I will have to count the degrees correctly for the Legendre product when we are on simplices.

 32: We will have three objects:
 33:  - Space, P: this just need point evaluation I think
 34:  - Dual Space, P'+K: This looks like a set of functionals that can act on members of P, each n is defined by a Q
 35:  - FEM: This keeps {P, P', Q}
 36: */
 37: #include <petsc-private/petscfeimpl.h> /*I "petscfe.h" I*/
 38: #include <petscdmshell.h>
 39: #include <petscdmplex.h>
 40: #include <petscblaslapack.h>

 42: PetscClassId PETSCSPACE_CLASSID = 0;

 44: PetscFunctionList PetscSpaceList              = NULL;
 45: PetscBool         PetscSpaceRegisterAllCalled = PETSC_FALSE;

 49: /*@C
 50:   PetscSpaceRegister - Adds a new PetscSpace implementation

 52:   Not Collective

 54:   Input Parameters:
 55: + name        - The name of a new user-defined creation routine
 56: - create_func - The creation routine itself

 58:   Notes:
 59:   PetscSpaceRegister() may be called multiple times to add several user-defined PetscSpaces

 61:   Sample usage:
 62: .vb
 63:     PetscSpaceRegister("my_space", MyPetscSpaceCreate);
 64: .ve

 66:   Then, your PetscSpace type can be chosen with the procedural interface via
 67: .vb
 68:     PetscSpaceCreate(MPI_Comm, PetscSpace *);
 69:     PetscSpaceSetType(PetscSpace, "my_space");
 70: .ve
 71:    or at runtime via the option
 72: .vb
 73:     -petscspace_type my_space
 74: .ve

 76:   Level: advanced

 78: .keywords: PetscSpace, register
 79: .seealso: PetscSpaceRegisterAll(), PetscSpaceRegisterDestroy()

 81: @*/
 82: PetscErrorCode PetscSpaceRegister(const char sname[], PetscErrorCode (*function)(PetscSpace))
 83: {

 87:   PetscFunctionListAdd(&PetscSpaceList, sname, function);
 88:   return(0);
 89: }

 93: /*@C
 94:   PetscSpaceSetType - Builds a particular PetscSpace

 96:   Collective on PetscSpace

 98:   Input Parameters:
 99: + sp   - The PetscSpace object
100: - name - The kind of space

102:   Options Database Key:
103: . -petscspace_type <type> - Sets the PetscSpace type; use -help for a list of available types

105:   Level: intermediate

107: .keywords: PetscSpace, set, type
108: .seealso: PetscSpaceGetType(), PetscSpaceCreate()
109: @*/
110: PetscErrorCode PetscSpaceSetType(PetscSpace sp, PetscSpaceType name)
111: {
112:   PetscErrorCode (*r)(PetscSpace);
113:   PetscBool      match;

118:   PetscObjectTypeCompare((PetscObject) sp, name, &match);
119:   if (match) return(0);

121:   if (!PetscSpaceRegisterAllCalled) {PetscSpaceRegisterAll();}
122:   PetscFunctionListFind(PetscSpaceList, name, &r);
123:   if (!r) SETERRQ1(PetscObjectComm((PetscObject) sp), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown PetscSpace type: %s", name);

125:   if (sp->ops->destroy) {
126:     (*sp->ops->destroy)(sp);
127:     sp->ops->destroy = NULL;
128:   }
129:   (*r)(sp);
130:   PetscObjectChangeTypeName((PetscObject) sp, name);
131:   return(0);
132: }

136: /*@C
137:   PetscSpaceGetType - Gets the PetscSpace type name (as a string) from the object.

139:   Not Collective

141:   Input Parameter:
142: . dm  - The PetscSpace

144:   Output Parameter:
145: . name - The PetscSpace type name

147:   Level: intermediate

149: .keywords: PetscSpace, get, type, name
150: .seealso: PetscSpaceSetType(), PetscSpaceCreate()
151: @*/
152: PetscErrorCode PetscSpaceGetType(PetscSpace sp, PetscSpaceType *name)
153: {

159:   if (!PetscSpaceRegisterAllCalled) {
160:     PetscSpaceRegisterAll();
161:   }
162:   *name = ((PetscObject) sp)->type_name;
163:   return(0);
164: }

168: /*@C
169:   PetscSpaceView - Views a PetscSpace

171:   Collective on PetscSpace

173:   Input Parameter:
174: + sp - the PetscSpace object to view
175: - v  - the viewer

177:   Level: developer

179: .seealso PetscSpaceDestroy()
180: @*/
181: PetscErrorCode PetscSpaceView(PetscSpace sp, PetscViewer v)
182: {

187:   if (!v) {
188:     PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject) sp), &v);
189:   }
190:   if (sp->ops->view) {
191:     (*sp->ops->view)(sp, v);
192:   }
193:   return(0);
194: }

198: /*
199:   PetscSpaceViewFromOptions - Processes command line options to determine if/how a PetscSpace is to be viewed.

201:   Collective on PetscSpace

203:   Input Parameters:
204: + sp   - the PetscSpace
205: . prefix - prefix to use for viewing, or NULL to use prefix of 'rnd'
206: - optionname - option to activate viewing

208:   Level: intermediate

210: .keywords: PetscSpace, view, options, database
211: .seealso: VecViewFromOptions(), MatViewFromOptions()
212: */
213: PetscErrorCode PetscSpaceViewFromOptions(PetscSpace sp, const char prefix[], const char optionname[])
214: {
215:   PetscViewer       viewer;
216:   PetscViewerFormat format;
217:   PetscBool         flg;
218:   PetscErrorCode    ierr;

221:   if (prefix) {
222:     PetscOptionsGetViewer(PetscObjectComm((PetscObject) sp), prefix, optionname, &viewer, &format, &flg);
223:   } else {
224:     PetscOptionsGetViewer(PetscObjectComm((PetscObject) sp), ((PetscObject) sp)->prefix, optionname, &viewer, &format, &flg);
225:   }
226:   if (flg) {
227:     PetscViewerPushFormat(viewer, format);
228:     PetscSpaceView(sp, viewer);
229:     PetscViewerPopFormat(viewer);
230:     PetscViewerDestroy(&viewer);
231:   }
232:   return(0);
233: }

237: /*@
238:   PetscSpaceSetFromOptions - sets parameters in a PetscSpace from the options database

240:   Collective on PetscSpace

242:   Input Parameter:
243: . sp - the PetscSpace object to set options for

245:   Options Database:
246: . -petscspace_order the approximation order of the space

248:   Level: developer

250: .seealso PetscSpaceView()
251: @*/
252: PetscErrorCode PetscSpaceSetFromOptions(PetscSpace sp)
253: {
254:   const char    *defaultType;
255:   char           name[256];
256:   PetscBool      flg;

261:   if (!((PetscObject) sp)->type_name) {
262:     defaultType = PETSCSPACEPOLYNOMIAL;
263:   } else {
264:     defaultType = ((PetscObject) sp)->type_name;
265:   }
266:   if (!PetscSpaceRegisterAllCalled) {PetscSpaceRegisterAll();}

268:   PetscObjectOptionsBegin((PetscObject) sp);
269:   PetscOptionsFList("-petscspace_type", "Linear space", "PetscSpaceSetType", PetscSpaceList, defaultType, name, 256, &flg);
270:   if (flg) {
271:     PetscSpaceSetType(sp, name);
272:   } else if (!((PetscObject) sp)->type_name) {
273:     PetscSpaceSetType(sp, defaultType);
274:   }
275:   PetscOptionsInt("-petscspace_order", "The approximation order", "PetscSpaceSetOrder", sp->order, &sp->order, NULL);
276:   if (sp->ops->setfromoptions) {
277:     (*sp->ops->setfromoptions)(sp);
278:   }
279:   /* process any options handlers added with PetscObjectAddOptionsHandler() */
280:   PetscObjectProcessOptionsHandlers((PetscObject) sp);
281:   PetscOptionsEnd();
282:   PetscSpaceViewFromOptions(sp, NULL, "-petscspace_view");
283:   return(0);
284: }

288: /*@C
289:   PetscSpaceSetUp - Construct data structures for the PetscSpace

291:   Collective on PetscSpace

293:   Input Parameter:
294: . sp - the PetscSpace object to setup

296:   Level: developer

298: .seealso PetscSpaceView(), PetscSpaceDestroy()
299: @*/
300: PetscErrorCode PetscSpaceSetUp(PetscSpace sp)
301: {

306:   if (sp->ops->setup) {(*sp->ops->setup)(sp);}
307:   return(0);
308: }

312: /*@
313:   PetscSpaceDestroy - Destroys a PetscSpace object

315:   Collective on PetscSpace

317:   Input Parameter:
318: . sp - the PetscSpace object to destroy

320:   Level: developer

322: .seealso PetscSpaceView()
323: @*/
324: PetscErrorCode PetscSpaceDestroy(PetscSpace *sp)
325: {

329:   if (!*sp) return(0);

332:   if (--((PetscObject)(*sp))->refct > 0) {*sp = 0; return(0);}
333:   ((PetscObject) (*sp))->refct = 0;
334:   DMDestroy(&(*sp)->dm);

336:   (*(*sp)->ops->destroy)(*sp);
337:   PetscHeaderDestroy(sp);
338:   return(0);
339: }

343: /*@
344:   PetscSpaceCreate - Creates an empty PetscSpace object. The type can then be set with PetscSpaceSetType().

346:   Collective on MPI_Comm

348:   Input Parameter:
349: . comm - The communicator for the PetscSpace object

351:   Output Parameter:
352: . sp - The PetscSpace object

354:   Level: beginner

356: .seealso: PetscSpaceSetType(), PETSCSPACEPOLYNOMIAL
357: @*/
358: PetscErrorCode PetscSpaceCreate(MPI_Comm comm, PetscSpace *sp)
359: {
360:   PetscSpace     s;

365:   *sp = NULL;
366:   PetscFEInitializePackage();

368:   PetscHeaderCreate(s, _p_PetscSpace, struct _PetscSpaceOps, PETSCSPACE_CLASSID, "PetscSpace", "Linear Space", "PetscSpace", comm, PetscSpaceDestroy, PetscSpaceView);
369:   PetscMemzero(s->ops, sizeof(struct _PetscSpaceOps));

371:   s->order = 0;
372:   DMShellCreate(comm, &s->dm);

374:   *sp = s;
375:   return(0);
376: }

380: /* Dimension of the space, i.e. number of basis vectors */
381: PetscErrorCode PetscSpaceGetDimension(PetscSpace sp, PetscInt *dim)
382: {

388:   *dim = 0;
389:   if (sp->ops->getdimension) {(*sp->ops->getdimension)(sp, dim);}
390:   return(0);
391: }

395: PetscErrorCode PetscSpaceGetOrder(PetscSpace sp, PetscInt *order)
396: {
400:   *order = sp->order;
401:   return(0);
402: }

406: PetscErrorCode PetscSpaceSetOrder(PetscSpace sp, PetscInt order)
407: {
410:   sp->order = order;
411:   return(0);
412: }

416: PetscErrorCode PetscSpaceEvaluate(PetscSpace sp, PetscInt npoints, const PetscReal points[], PetscReal B[], PetscReal D[], PetscReal H[])
417: {

426:   if (sp->ops->evaluate) {(*sp->ops->evaluate)(sp, npoints, points, B, D, H);}
427:   return(0);
428: }

432: PetscErrorCode PetscSpaceSetFromOptions_Polynomial(PetscSpace sp)
433: {
434:   PetscSpace_Poly *poly = (PetscSpace_Poly *) sp->data;
435:   PetscErrorCode   ierr;

438:   PetscObjectOptionsBegin((PetscObject) sp);
439:   PetscOptionsInt("-petscspace_poly_num_variables", "The number of different variables, e.g. x and y", "PetscSpacePolynomialSetNumVariables", poly->numVariables, &poly->numVariables, NULL);
440:   PetscOptionsBool("-petscspace_poly_sym", "Use only symmetric polynomials", "PetscSpacePolynomialSetSymmetric", poly->symmetric, &poly->symmetric, NULL);
441:   PetscOptionsBool("-petscspace_poly_tensor", "Use the tensor product polynomials", "PetscSpacePolynomialSetTensor", poly->tensor, &poly->tensor, NULL);
442:   PetscOptionsEnd();
443:   return(0);
444: }

448: PetscErrorCode PetscSpacePolynomialView_Ascii(PetscSpace sp, PetscViewer viewer)
449: {
450:   PetscSpace_Poly  *poly = (PetscSpace_Poly *) sp->data;
451:   PetscViewerFormat format;
452:   PetscErrorCode    ierr;

455:   PetscViewerGetFormat(viewer, &format);
456:   if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
457:     PetscViewerASCIIPrintf(viewer, "Polynomial space in %d variables of order %d", poly->numVariables, sp->order);
458:   } else {
459:     PetscViewerASCIIPrintf(viewer, "Polynomial space in %d variables of order %d", poly->numVariables, sp->order);
460:   }
461:   return(0);
462: }

466: PetscErrorCode PetscSpaceView_Polynomial(PetscSpace sp, PetscViewer viewer)
467: {
468:   PetscBool      iascii;

474:   PetscObjectTypeCompare((PetscObject) viewer, PETSCVIEWERASCII, &iascii);
475:   if (iascii) {PetscSpacePolynomialView_Ascii(sp, viewer);}
476:   return(0);
477: }

481: PetscErrorCode PetscSpaceSetUp_Polynomial(PetscSpace sp)
482: {
483:   PetscSpace_Poly *poly    = (PetscSpace_Poly *) sp->data;
484:   PetscInt         ndegree = sp->order+1;
485:   PetscInt         deg;
486:   PetscErrorCode   ierr;

489:   PetscMalloc1(ndegree, &poly->degrees);
490:   for (deg = 0; deg < ndegree; ++deg) poly->degrees[deg] = deg;
491:   return(0);
492: }

496: PetscErrorCode PetscSpaceDestroy_Polynomial(PetscSpace sp)
497: {
498:   PetscSpace_Poly *poly = (PetscSpace_Poly *) sp->data;
499:   PetscErrorCode   ierr;

502:   PetscFree(poly->degrees);
503:   PetscFree(poly);
504:   return(0);
505: }

509: PetscErrorCode PetscSpaceGetDimension_Polynomial(PetscSpace sp, PetscInt *dim)
510: {
511:   PetscSpace_Poly *poly = (PetscSpace_Poly *) sp->data;
512:   PetscInt         deg  = sp->order;
513:   PetscInt         n    = poly->numVariables, i;
514:   PetscReal        D    = 1.0;

517:   if (poly->tensor) {
518:     *dim = 1;
519:     for (i = 0; i < n; ++i) *dim *= (deg+1);
520:   } else {
521:     for (i = 1; i <= n; ++i) {
522:       D *= ((PetscReal) (deg+i))/i;
523:     }
524:     *dim = (PetscInt) (D + 0.5);
525:   }
526:   return(0);
527: }

531: /*
532:   LatticePoint_Internal - Returns all tuples of size 'len' with nonnegative integers that sum up to 'sum'.

534:   Input Parameters:
535: + len - The length of the tuple
536: . sum - The sum of all entries in the tuple
537: - ind - The current multi-index of the tuple, initialized to the 0 tuple

539:   Output Parameter:
540: + ind - The multi-index of the tuple, -1 indicates the iteration has terminated
541: . tup - A tuple of len integers addig to sum

543:   Level: developer

545: .seealso: 
546: */
547: static PetscErrorCode LatticePoint_Internal(PetscInt len, PetscInt sum, PetscInt ind[], PetscInt tup[])
548: {
549:   PetscInt       i;

553:   if (len == 1) {
554:     ind[0] = -1;
555:     tup[0] = sum;
556:   } else if (sum == 0) {
557:     for (i = 0; i < len; ++i) {ind[0] = -1; tup[i] = 0;}
558:   } else {
559:     tup[0] = sum - ind[0];
560:     LatticePoint_Internal(len-1, ind[0], &ind[1], &tup[1]);
561:     if (ind[1] < 0) {
562:       if (ind[0] == sum) {ind[0] = -1;}
563:       else               {ind[1] = 0; ++ind[0];}
564:     }
565:   }
566:   return(0);
567: }

571: /*
572:   TensorPoint_Internal - Returns all tuples of size 'len' with nonnegative integers that are less than 'max'.

574:   Input Parameters:
575: + len - The length of the tuple
576: . max - The max for all entries in the tuple
577: - ind - The current multi-index of the tuple, initialized to the 0 tuple

579:   Output Parameter:
580: + ind - The multi-index of the tuple, -1 indicates the iteration has terminated
581: . tup - A tuple of len integers less than max

583:   Level: developer

585: .seealso: 
586: */
587: static PetscErrorCode TensorPoint_Internal(PetscInt len, PetscInt max, PetscInt ind[], PetscInt tup[])
588: {
589:   PetscInt       i;

593:   if (len == 1) {
594:     tup[0] = ind[0]++;
595:     ind[0] = ind[0] >= max ? -1 : ind[0];
596:   } else if (max == 0) {
597:     for (i = 0; i < len; ++i) {ind[0] = -1; tup[i] = 0;}
598:   } else {
599:     tup[0] = ind[0];
600:     TensorPoint_Internal(len-1, max, &ind[1], &tup[1]);
601:     if (ind[1] < 0) {
602:       if (ind[0] == max-1) {ind[0] = -1;}
603:       else                 {ind[1] = 0; ++ind[0];}
604:     }
605:   }
606:   return(0);
607: }

611: PetscErrorCode PetscSpaceEvaluate_Polynomial(PetscSpace sp, PetscInt npoints, const PetscReal points[], PetscReal B[], PetscReal D[], PetscReal H[])
612: {
613:   PetscSpace_Poly *poly    = (PetscSpace_Poly *) sp->data;
614:   DM               dm      = sp->dm;
615:   PetscInt         ndegree = sp->order+1;
616:   PetscInt        *degrees = poly->degrees;
617:   PetscInt         dim     = poly->numVariables;
618:   PetscReal       *lpoints, *tmp, *LB, *LD, *LH;
619:   PetscInt        *ind, *tup;
620:   PetscInt         pdim, d, der, i, p, deg, o;
621:   PetscErrorCode   ierr;

624:   PetscSpaceGetDimension(sp, &pdim);
625:   DMGetWorkArray(dm, npoints, PETSC_REAL, &lpoints);
626:   DMGetWorkArray(dm, npoints*ndegree*3, PETSC_REAL, &tmp);
627:   if (B) {DMGetWorkArray(dm, npoints*dim*ndegree, PETSC_REAL, &LB);}
628:   if (D) {DMGetWorkArray(dm, npoints*dim*ndegree, PETSC_REAL, &LD);}
629:   if (H) {DMGetWorkArray(dm, npoints*dim*ndegree, PETSC_REAL, &LH);}
630:   for (d = 0; d < dim; ++d) {
631:     for (p = 0; p < npoints; ++p) {
632:       lpoints[p] = points[p*dim+d];
633:     }
634:     PetscDTLegendreEval(npoints, lpoints, ndegree, degrees, tmp, &tmp[1*npoints*ndegree], &tmp[2*npoints*ndegree]);
635:     /* LB, LD, LH (ndegree * dim x npoints) */
636:     for (deg = 0; deg < ndegree; ++deg) {
637:       for (p = 0; p < npoints; ++p) {
638:         if (B) LB[(deg*dim + d)*npoints + p] = tmp[(0*npoints + p)*ndegree+deg];
639:         if (D) LD[(deg*dim + d)*npoints + p] = tmp[(1*npoints + p)*ndegree+deg];
640:         if (H) LH[(deg*dim + d)*npoints + p] = tmp[(2*npoints + p)*ndegree+deg];
641:       }
642:     }
643:   }
644:   /* Multiply by A (pdim x ndegree * dim) */
645:   PetscMalloc2(dim,&ind,dim,&tup);
646:   if (B) {
647:     /* B (npoints x pdim) */
648:     if (poly->tensor) {
649:       i = 0;
650:       PetscMemzero(ind, dim * sizeof(PetscInt));
651:       while (ind[0] >= 0) {
652:         TensorPoint_Internal(dim, sp->order+1, ind, tup);
653:         for (p = 0; p < npoints; ++p) {
654:           B[p*pdim + i] = 1.0;
655:           for (d = 0; d < dim; ++d) {
656:             B[p*pdim + i] *= LB[(tup[d]*dim + d)*npoints + p];
657:           }
658:         }
659:         ++i;
660:       }
661:     } else {
662:       i = 0;
663:       for (o = 0; o <= sp->order; ++o) {
664:         PetscMemzero(ind, dim * sizeof(PetscInt));
665:         while (ind[0] >= 0) {
666:           LatticePoint_Internal(dim, o, ind, tup);
667:           for (p = 0; p < npoints; ++p) {
668:             B[p*pdim + i] = 1.0;
669:             for (d = 0; d < dim; ++d) {
670:               B[p*pdim + i] *= LB[(tup[d]*dim + d)*npoints + p];
671:             }
672:           }
673:           ++i;
674:         }
675:       }
676:     }
677:   }
678:   if (D) {
679:     /* D (npoints x pdim x dim) */
680:     i = 0;
681:     for (o = 0; o <= sp->order; ++o) {
682:       PetscMemzero(ind, dim * sizeof(PetscInt));
683:       while (ind[0] >= 0) {
684:         LatticePoint_Internal(dim, o, ind, tup);
685:         for (p = 0; p < npoints; ++p) {
686:           for (der = 0; der < dim; ++der) {
687:             D[(p*pdim + i)*dim + der] = 1.0;
688:             for (d = 0; d < dim; ++d) {
689:               if (d == der) {
690:                 D[(p*pdim + i)*dim + der] *= LD[(tup[d]*dim + d)*npoints + p];
691:               } else {
692:                 D[(p*pdim + i)*dim + der] *= LB[(tup[d]*dim + d)*npoints + p];
693:               }
694:             }
695:           }
696:         }
697:         ++i;
698:       }
699:     }
700:   }
701:   if (H) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Too lazy to code second derivatives");
702:   PetscFree2(ind,tup);
703:   if (B) {DMRestoreWorkArray(dm, npoints*dim*ndegree, PETSC_REAL, &LB);}
704:   if (D) {DMRestoreWorkArray(dm, npoints*dim*ndegree, PETSC_REAL, &LD);}
705:   if (H) {DMRestoreWorkArray(dm, npoints*dim*ndegree, PETSC_REAL, &LH);}
706:   DMRestoreWorkArray(dm, npoints*ndegree*3, PETSC_REAL, &tmp);
707:   DMRestoreWorkArray(dm, npoints, PETSC_REAL, &lpoints);
708:   return(0);
709: }

713: PetscErrorCode PetscSpaceInitialize_Polynomial(PetscSpace sp)
714: {
716:   sp->ops->setfromoptions = PetscSpaceSetFromOptions_Polynomial;
717:   sp->ops->setup          = PetscSpaceSetUp_Polynomial;
718:   sp->ops->view           = PetscSpaceView_Polynomial;
719:   sp->ops->destroy        = PetscSpaceDestroy_Polynomial;
720:   sp->ops->getdimension   = PetscSpaceGetDimension_Polynomial;
721:   sp->ops->evaluate       = PetscSpaceEvaluate_Polynomial;
722:   return(0);
723: }

725: /*MC
726:   PETSCSPACEPOLYNOMIAL = "poly" - A PetscSpace object that encapsulates a polynomial space, e.g. P1 is the space of linear polynomials.

728:   Level: intermediate

730: .seealso: PetscSpaceType, PetscSpaceCreate(), PetscSpaceSetType()
731: M*/

735: PETSC_EXTERN PetscErrorCode PetscSpaceCreate_Polynomial(PetscSpace sp)
736: {
737:   PetscSpace_Poly *poly;
738:   PetscErrorCode   ierr;

742:   PetscNewLog(sp,&poly);
743:   sp->data = poly;

745:   poly->numVariables = 0;
746:   poly->symmetric    = PETSC_FALSE;
747:   poly->tensor       = PETSC_FALSE;
748:   poly->degrees      = NULL;

750:   PetscSpaceInitialize_Polynomial(sp);
751:   return(0);
752: }

756: PetscErrorCode PetscSpacePolynomialSetSymmetric(PetscSpace sp, PetscBool sym)
757: {
758:   PetscSpace_Poly *poly = (PetscSpace_Poly *) sp->data;

762:   poly->symmetric = sym;
763:   return(0);
764: }

768: PetscErrorCode PetscSpacePolynomialGetSymmetric(PetscSpace sp, PetscBool *sym)
769: {
770:   PetscSpace_Poly *poly = (PetscSpace_Poly *) sp->data;

775:   *sym = poly->symmetric;
776:   return(0);
777: }

781: PetscErrorCode PetscSpacePolynomialSetTensor(PetscSpace sp, PetscBool tensor)
782: {
783:   PetscSpace_Poly *poly = (PetscSpace_Poly *) sp->data;

787:   poly->tensor = tensor;
788:   return(0);
789: }

793: PetscErrorCode PetscSpacePolynomialGetTensor(PetscSpace sp, PetscBool *tensor)
794: {
795:   PetscSpace_Poly *poly = (PetscSpace_Poly *) sp->data;

800:   *tensor = poly->tensor;
801:   return(0);
802: }

806: PetscErrorCode PetscSpacePolynomialSetNumVariables(PetscSpace sp, PetscInt n)
807: {
808:   PetscSpace_Poly *poly = (PetscSpace_Poly *) sp->data;

812:   poly->numVariables = n;
813:   return(0);
814: }

818: PetscErrorCode PetscSpacePolynomialGetNumVariables(PetscSpace sp, PetscInt *n)
819: {
820:   PetscSpace_Poly *poly = (PetscSpace_Poly *) sp->data;

825:   *n = poly->numVariables;
826:   return(0);
827: }

831: PetscErrorCode PetscSpaceSetFromOptions_DG(PetscSpace sp)
832: {
833:   PetscSpace_DG *dg = (PetscSpace_DG *) sp->data;

837:   PetscObjectOptionsBegin((PetscObject) sp);
838:   PetscOptionsInt("-petscspace_dg_num_variables", "The number of different variables, e.g. x and y", "PetscSpaceDGSetNumVariables", dg->numVariables, &dg->numVariables, NULL);
839:   PetscOptionsEnd();
840:   return(0);
841: }

845: PetscErrorCode PetscSpaceDGView_Ascii(PetscSpace sp, PetscViewer viewer)
846: {
847:   PetscSpace_DG    *dg = (PetscSpace_DG *) sp->data;
848:   PetscViewerFormat format;
849:   PetscErrorCode    ierr;

852:   PetscViewerGetFormat(viewer, &format);
853:   if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
854:     PetscViewerASCIIPrintf(viewer, "DG space in dimension %d:\n", dg->numVariables);
855:     PetscViewerASCIIPushTab(viewer);
856:     PetscQuadratureView(dg->quad, viewer);
857:     PetscViewerASCIIPopTab(viewer);
858:   } else {
859:     PetscViewerASCIIPrintf(viewer, "DG space in dimension %d on %d points\n", dg->numVariables, dg->quad.numPoints);
860:   }
861:   return(0);
862: }

866: PetscErrorCode PetscSpaceView_DG(PetscSpace sp, PetscViewer viewer)
867: {
868:   PetscBool      iascii;

874:   PetscObjectTypeCompare((PetscObject) viewer, PETSCVIEWERASCII, &iascii);
875:   if (iascii) {PetscSpaceDGView_Ascii(sp, viewer);}
876:   return(0);
877: }

881: PetscErrorCode PetscSpaceSetUp_DG(PetscSpace sp)
882: {
883:   PetscSpace_DG *dg = (PetscSpace_DG *) sp->data;

887:   if (!dg->quad.points && sp->order) {
888:     PetscDTGaussJacobiQuadrature(dg->numVariables, sp->order, -1.0, 1.0, &dg->quad);
889:   }
890:   return(0);
891: }

895: PetscErrorCode PetscSpaceDestroy_DG(PetscSpace sp)
896: {
897:   PetscSpace_DG *dg = (PetscSpace_DG *) sp->data;

901:   PetscQuadratureDestroy(&dg->quad);
902:   return(0);
903: }

907: PetscErrorCode PetscSpaceGetDimension_DG(PetscSpace sp, PetscInt *dim)
908: {
909:   PetscSpace_DG *dg = (PetscSpace_DG *) sp->data;

912:   *dim = dg->quad.numPoints;
913:   return(0);
914: }

918: PetscErrorCode PetscSpaceEvaluate_DG(PetscSpace sp, PetscInt npoints, const PetscReal points[], PetscReal B[], PetscReal D[], PetscReal H[])
919: {
920:   PetscSpace_DG *dg  = (PetscSpace_DG *) sp->data;
921:   PetscInt       dim = dg->numVariables, d, p;

925:   if (D || H) SETERRQ(PetscObjectComm((PetscObject) sp), PETSC_ERR_SUP, "Cannot calculate derivatives for a DG space");
926:   if (npoints != dg->quad.numPoints) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot evaluate DG space on %d points != %d size", npoints, dg->quad.numPoints);
927:   PetscMemzero(B, npoints*npoints * sizeof(PetscReal));
928:   for (p = 0; p < npoints; ++p) {
929:     for (d = 0; d < dim; ++d) {
930:       if (PetscAbsReal(points[p*dim+d] - dg->quad.points[p*dim+d]) > 1.0e-10) SETERRQ4(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot evaluate DG point (%d, %d) %g != %g", p, d, points[p*dim+d], dg->quad.points[p*dim+d]);
931:     }
932:     B[p*npoints+p] = 1.0;
933:   }
934:   return(0);
935: }

939: PetscErrorCode PetscSpaceInitialize_DG(PetscSpace sp)
940: {
942:   sp->ops->setfromoptions = PetscSpaceSetFromOptions_DG;
943:   sp->ops->setup          = PetscSpaceSetUp_DG;
944:   sp->ops->view           = PetscSpaceView_DG;
945:   sp->ops->destroy        = PetscSpaceDestroy_DG;
946:   sp->ops->getdimension   = PetscSpaceGetDimension_DG;
947:   sp->ops->evaluate       = PetscSpaceEvaluate_DG;
948:   return(0);
949: }

951: /*MC
952:   PETSCSPACEDG = "dg" - A PetscSpace object that encapsulates functions defined on a set of quadrature points.

954:   Level: intermediate

956: .seealso: PetscSpaceType, PetscSpaceCreate(), PetscSpaceSetType()
957: M*/

961: PETSC_EXTERN PetscErrorCode PetscSpaceCreate_DG(PetscSpace sp)
962: {
963:   PetscSpace_DG *dg;

968:   PetscNewLog(sp,&dg);
969:   sp->data = dg;

971:   dg->numVariables   = 0;
972:   dg->quad.dim       = 0;
973:   dg->quad.numPoints = 0;
974:   dg->quad.points    = NULL;
975:   dg->quad.weights   = NULL;

977:   PetscSpaceInitialize_DG(sp);
978:   return(0);
979: }


982: PetscClassId PETSCDUALSPACE_CLASSID = 0;

984: PetscFunctionList PetscDualSpaceList              = NULL;
985: PetscBool         PetscDualSpaceRegisterAllCalled = PETSC_FALSE;

989: /*@C
990:   PetscDualSpaceRegister - Adds a new PetscDualSpace implementation

992:   Not Collective

994:   Input Parameters:
995: + name        - The name of a new user-defined creation routine
996: - create_func - The creation routine itself

998:   Notes:
999:   PetscDualSpaceRegister() may be called multiple times to add several user-defined PetscDualSpaces

1001:   Sample usage:
1002: .vb
1003:     PetscDualSpaceRegister("my_space", MyPetscDualSpaceCreate);
1004: .ve

1006:   Then, your PetscDualSpace type can be chosen with the procedural interface via
1007: .vb
1008:     PetscDualSpaceCreate(MPI_Comm, PetscDualSpace *);
1009:     PetscDualSpaceSetType(PetscDualSpace, "my_dual_space");
1010: .ve
1011:    or at runtime via the option
1012: .vb
1013:     -petscdualspace_type my_dual_space
1014: .ve

1016:   Level: advanced

1018: .keywords: PetscDualSpace, register
1019: .seealso: PetscDualSpaceRegisterAll(), PetscDualSpaceRegisterDestroy()

1021: @*/
1022: PetscErrorCode PetscDualSpaceRegister(const char sname[], PetscErrorCode (*function)(PetscDualSpace))
1023: {

1027:   PetscFunctionListAdd(&PetscDualSpaceList, sname, function);
1028:   return(0);
1029: }

1033: /*@C
1034:   PetscDualSpaceSetType - Builds a particular PetscDualSpace

1036:   Collective on PetscDualSpace

1038:   Input Parameters:
1039: + sp   - The PetscDualSpace object
1040: - name - The kind of space

1042:   Options Database Key:
1043: . -petscdualspace_type <type> - Sets the PetscDualSpace type; use -help for a list of available types

1045:   Level: intermediate

1047: .keywords: PetscDualSpace, set, type
1048: .seealso: PetscDualSpaceGetType(), PetscDualSpaceCreate()
1049: @*/
1050: PetscErrorCode PetscDualSpaceSetType(PetscDualSpace sp, PetscDualSpaceType name)
1051: {
1052:   PetscErrorCode (*r)(PetscDualSpace);
1053:   PetscBool      match;

1058:   PetscObjectTypeCompare((PetscObject) sp, name, &match);
1059:   if (match) return(0);

1061:   if (!PetscDualSpaceRegisterAllCalled) {PetscDualSpaceRegisterAll();}
1062:   PetscFunctionListFind(PetscDualSpaceList, name, &r);
1063:   if (!r) SETERRQ1(PetscObjectComm((PetscObject) sp), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown PetscDualSpace type: %s", name);

1065:   if (sp->ops->destroy) {
1066:     (*sp->ops->destroy)(sp);
1067:     sp->ops->destroy = NULL;
1068:   }
1069:   (*r)(sp);
1070:   PetscObjectChangeTypeName((PetscObject) sp, name);
1071:   return(0);
1072: }

1076: /*@C
1077:   PetscDualSpaceGetType - Gets the PetscDualSpace type name (as a string) from the object.

1079:   Not Collective

1081:   Input Parameter:
1082: . dm  - The PetscDualSpace

1084:   Output Parameter:
1085: . name - The PetscDualSpace type name

1087:   Level: intermediate

1089: .keywords: PetscDualSpace, get, type, name
1090: .seealso: PetscDualSpaceSetType(), PetscDualSpaceCreate()
1091: @*/
1092: PetscErrorCode PetscDualSpaceGetType(PetscDualSpace sp, PetscDualSpaceType *name)
1093: {

1099:   if (!PetscDualSpaceRegisterAllCalled) {
1100:     PetscDualSpaceRegisterAll();
1101:   }
1102:   *name = ((PetscObject) sp)->type_name;
1103:   return(0);
1104: }

1108: /*@C
1109:   PetscDualSpaceView - Views a PetscDualSpace

1111:   Collective on PetscDualSpace

1113:   Input Parameter:
1114: + sp - the PetscDualSpace object to view
1115: - v  - the viewer

1117:   Level: developer

1119: .seealso PetscDualSpaceDestroy()
1120: @*/
1121: PetscErrorCode PetscDualSpaceView(PetscDualSpace sp, PetscViewer v)
1122: {

1127:   if (!v) {
1128:     PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject) sp), &v);
1129:   }
1130:   if (sp->ops->view) {
1131:     (*sp->ops->view)(sp, v);
1132:   }
1133:   return(0);
1134: }

1138: /*
1139:   PetscDualSpaceViewFromOptions - Processes command line options to determine if/how a PetscDualSpace is to be viewed.

1141:   Collective on PetscDualSpace

1143:   Input Parameters:
1144: + sp   - the PetscDualSpace
1145: . prefix - prefix to use for viewing, or NULL to use prefix of 'rnd'
1146: - optionname - option to activate viewing

1148:   Level: intermediate

1150: .keywords: PetscDualSpace, view, options, database
1151: .seealso: VecViewFromOptions(), MatViewFromOptions()
1152: */
1153: PetscErrorCode PetscDualSpaceViewFromOptions(PetscDualSpace sp, const char prefix[], const char optionname[])
1154: {
1155:   PetscViewer       viewer;
1156:   PetscViewerFormat format;
1157:   PetscBool         flg;
1158:   PetscErrorCode    ierr;

1161:   if (prefix) {
1162:     PetscOptionsGetViewer(PetscObjectComm((PetscObject) sp), prefix, optionname, &viewer, &format, &flg);
1163:   } else {
1164:     PetscOptionsGetViewer(PetscObjectComm((PetscObject) sp), ((PetscObject) sp)->prefix, optionname, &viewer, &format, &flg);
1165:   }
1166:   if (flg) {
1167:     PetscViewerPushFormat(viewer, format);
1168:     PetscDualSpaceView(sp, viewer);
1169:     PetscViewerPopFormat(viewer);
1170:     PetscViewerDestroy(&viewer);
1171:   }
1172:   return(0);
1173: }

1177: /*@
1178:   PetscDualSpaceSetFromOptions - sets parameters in a PetscDualSpace from the options database

1180:   Collective on PetscDualSpace

1182:   Input Parameter:
1183: . sp - the PetscDualSpace object to set options for

1185:   Options Database:
1186: . -petscspace_order the approximation order of the space

1188:   Level: developer

1190: .seealso PetscDualSpaceView()
1191: @*/
1192: PetscErrorCode PetscDualSpaceSetFromOptions(PetscDualSpace sp)
1193: {
1194:   const char    *defaultType;
1195:   char           name[256];
1196:   PetscBool      flg;

1201:   if (!((PetscObject) sp)->type_name) {
1202:     defaultType = PETSCDUALSPACELAGRANGE;
1203:   } else {
1204:     defaultType = ((PetscObject) sp)->type_name;
1205:   }
1206:   if (!PetscSpaceRegisterAllCalled) {PetscSpaceRegisterAll();}

1208:   PetscObjectOptionsBegin((PetscObject) sp);
1209:   PetscOptionsFList("-petscdualspace_type", "Dual space", "PetscDualSpaceSetType", PetscDualSpaceList, defaultType, name, 256, &flg);
1210:   if (flg) {
1211:     PetscDualSpaceSetType(sp, name);
1212:   } else if (!((PetscObject) sp)->type_name) {
1213:     PetscDualSpaceSetType(sp, defaultType);
1214:   }
1215:   PetscOptionsInt("-petscdualspace_order", "The approximation order", "PetscDualSpaceSetOrder", sp->order, &sp->order, NULL);
1216:   if (sp->ops->setfromoptions) {
1217:     (*sp->ops->setfromoptions)(sp);
1218:   }
1219:   /* process any options handlers added with PetscObjectAddOptionsHandler() */
1220:   PetscObjectProcessOptionsHandlers((PetscObject) sp);
1221:   PetscOptionsEnd();
1222:   PetscDualSpaceViewFromOptions(sp, NULL, "-petscdualspace_view");
1223:   return(0);
1224: }

1228: /*@C
1229:   PetscDualSpaceSetUp - Construct a basis for the PetscDualSpace

1231:   Collective on PetscDualSpace

1233:   Input Parameter:
1234: . sp - the PetscDualSpace object to setup

1236:   Level: developer

1238: .seealso PetscDualSpaceView(), PetscDualSpaceDestroy()
1239: @*/
1240: PetscErrorCode PetscDualSpaceSetUp(PetscDualSpace sp)
1241: {

1246:   if (sp->ops->setup) {(*sp->ops->setup)(sp);}
1247:   return(0);
1248: }

1252: /*@
1253:   PetscDualSpaceDestroy - Destroys a PetscDualSpace object

1255:   Collective on PetscDualSpace

1257:   Input Parameter:
1258: . sp - the PetscDualSpace object to destroy

1260:   Level: developer

1262: .seealso PetscDualSpaceView()
1263: @*/
1264: PetscErrorCode PetscDualSpaceDestroy(PetscDualSpace *sp)
1265: {
1266:   PetscInt       dim, f;

1270:   if (!*sp) return(0);

1273:   if (--((PetscObject)(*sp))->refct > 0) {*sp = 0; return(0);}
1274:   ((PetscObject) (*sp))->refct = 0;

1276:   PetscDualSpaceGetDimension(*sp, &dim);
1277:   for (f = 0; f < dim; ++f) {
1278:     PetscQuadratureDestroy(&(*sp)->functional[f]);
1279:   }
1280:   PetscFree((*sp)->functional);
1281:   DMDestroy(&(*sp)->dm);

1283:   if ((*sp)->ops->destroy) {(*(*sp)->ops->destroy)(*sp);}
1284:   PetscHeaderDestroy(sp);
1285:   return(0);
1286: }

1290: /*@
1291:   PetscDualSpaceCreate - Creates an empty PetscDualSpace object. The type can then be set with PetscDualSpaceSetType().

1293:   Collective on MPI_Comm

1295:   Input Parameter:
1296: . comm - The communicator for the PetscDualSpace object

1298:   Output Parameter:
1299: . sp - The PetscDualSpace object

1301:   Level: beginner

1303: .seealso: PetscDualSpaceSetType(), PETSCDUALSPACELAGRANGE
1304: @*/
1305: PetscErrorCode PetscDualSpaceCreate(MPI_Comm comm, PetscDualSpace *sp)
1306: {
1307:   PetscDualSpace s;

1312:   *sp = NULL;
1313:   PetscFEInitializePackage();

1315:   PetscHeaderCreate(s, _p_PetscDualSpace, struct _PetscDualSpaceOps, PETSCDUALSPACE_CLASSID, "PetscDualSpace", "Dual Space", "PetscDualSpace", comm, PetscDualSpaceDestroy, PetscDualSpaceView);
1316:   PetscMemzero(s->ops, sizeof(struct _PetscDualSpaceOps));

1318:   s->order = 0;

1320:   *sp = s;
1321:   return(0);
1322: }

1326: PetscErrorCode PetscDualSpaceGetDM(PetscDualSpace sp, DM *dm)
1327: {
1331:   *dm = sp->dm;
1332:   return(0);
1333: }

1337: PetscErrorCode PetscDualSpaceSetDM(PetscDualSpace sp, DM dm)
1338: {

1344:   DMDestroy(&sp->dm);
1345:   PetscObjectReference((PetscObject) dm);
1346:   sp->dm = dm;
1347:   return(0);
1348: }

1352: PetscErrorCode PetscDualSpaceGetOrder(PetscDualSpace sp, PetscInt *order)
1353: {
1357:   *order = sp->order;
1358:   return(0);
1359: }

1363: PetscErrorCode PetscDualSpaceSetOrder(PetscDualSpace sp, PetscInt order)
1364: {
1367:   sp->order = order;
1368:   return(0);
1369: }

1373: PetscErrorCode PetscDualSpaceGetFunctional(PetscDualSpace sp, PetscInt i, PetscQuadrature *functional)
1374: {
1375:   PetscInt       dim;

1381:   PetscDualSpaceGetDimension(sp, &dim);
1382:   if ((i < 0) || (i >= dim)) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Functional index %d must be in [0, %d)", i, dim);
1383:   *functional = sp->functional[i];
1384:   return(0);
1385: }

1389: /* Dimension of the space, i.e. number of basis vectors */
1390: PetscErrorCode PetscDualSpaceGetDimension(PetscDualSpace sp, PetscInt *dim)
1391: {

1397:   *dim = 0;
1398:   if (sp->ops->getdimension) {(*sp->ops->getdimension)(sp, dim);}
1399:   return(0);
1400: }

1404: PetscErrorCode PetscDualSpaceGetNumDof(PetscDualSpace sp, const PetscInt **numDof)
1405: {

1411:   *numDof = NULL;
1412:   if (sp->ops->getnumdof) {(*sp->ops->getnumdof)(sp, numDof);}
1413:   return(0);
1414: }

1418: PetscErrorCode PetscDualSpaceCreateReferenceCell(PetscDualSpace sp, PetscInt dim, PetscBool simplex, DM *refdm)
1419: {
1420:   DM             rdm;

1424:   DMCreate(PetscObjectComm((PetscObject) sp), &rdm);
1425:   DMSetType(rdm, DMPLEX);
1426:   DMPlexSetDimension(rdm, dim);
1427:   switch (dim) {
1428:   case 0:
1429:   {
1430:     PetscInt    numPoints[1]        = {1};
1431:     PetscInt    coneSize[1]         = {0};
1432:     PetscInt    cones[1]            = {0};
1433:     PetscInt    coneOrientations[1] = {0};
1434:     PetscScalar vertexCoords[1]     = {0.0};

1436:     DMPlexCreateFromDAG(rdm, 0, numPoints, coneSize, cones, coneOrientations, vertexCoords);
1437:   }
1438:   break;
1439:   case 1:
1440:   {
1441:     PetscInt    numPoints[2]        = {2, 1};
1442:     PetscInt    coneSize[3]         = {2, 0, 0};
1443:     PetscInt    cones[2]            = {1, 2};
1444:     PetscInt    coneOrientations[2] = {0, 0};
1445:     PetscScalar vertexCoords[2]     = {-1.0,  1.0};

1447:     DMPlexCreateFromDAG(rdm, 1, numPoints, coneSize, cones, coneOrientations, vertexCoords);
1448:   }
1449:   break;
1450:   case 2:
1451:     if (simplex) {
1452:       PetscInt    numPoints[2]        = {3, 1};
1453:       PetscInt    coneSize[4]         = {3, 0, 0, 0};
1454:       PetscInt    cones[3]            = {1, 2, 3};
1455:       PetscInt    coneOrientations[3] = {0, 0, 0};
1456:       PetscScalar vertexCoords[6]     = {-1.0, -1.0,  1.0, -1.0,  -1.0, 1.0};

1458:       DMPlexCreateFromDAG(rdm, 1, numPoints, coneSize, cones, coneOrientations, vertexCoords);
1459:     } else {
1460:       PetscInt    numPoints[2]        = {4, 1};
1461:       PetscInt    coneSize[5]         = {4, 0, 0, 0, 0};
1462:       PetscInt    cones[4]            = {1, 2, 3, 4};
1463:       PetscInt    coneOrientations[4] = {0, 0, 0, 0};
1464:       PetscScalar vertexCoords[8]     = {-1.0, -1.0,  1.0, -1.0,  1.0, 1.0,  -1.0, 1.0};

1466:       DMPlexCreateFromDAG(rdm, 1, numPoints, coneSize, cones, coneOrientations, vertexCoords);
1467:     }
1468:   break;
1469:   case 3:
1470:     if (simplex) {
1471:       PetscInt    numPoints[2]        = {4, 1};
1472:       PetscInt    coneSize[5]         = {4, 0, 0, 0, 0};
1473:       PetscInt    cones[4]            = {1, 3, 2, 4};
1474:       PetscInt    coneOrientations[4] = {0, 0, 0, 0};
1475:       PetscScalar vertexCoords[12]    = {-1.0, -1.0, -1.0,  1.0, -1.0, -1.0,  -1.0, 1.0, -1.0,  -1.0, -1.0, 1.0};

1477:       DMPlexCreateFromDAG(rdm, 1, numPoints, coneSize, cones, coneOrientations, vertexCoords);
1478:     } else {
1479:       PetscInt    numPoints[2]        = {8, 1};
1480:       PetscInt    coneSize[9]         = {8, 0, 0, 0, 0, 0, 0, 0, 0};
1481:       PetscInt    cones[8]            = {1, 4, 3, 2, 5, 6, 7, 8};
1482:       PetscInt    coneOrientations[8] = {0, 0, 0, 0, 0, 0, 0, 0};
1483:       PetscScalar vertexCoords[24]    = {-1.0, -1.0, -1.0,  1.0, -1.0, -1.0,  1.0, 1.0, -1.0,  -1.0, 1.0, -1.0,
1484:                                          -1.0, -1.0,  1.0,  1.0, -1.0,  1.0,  1.0, 1.0,  1.0,  -1.0, 1.0,  1.0};

1486:       DMPlexCreateFromDAG(rdm, 1, numPoints, coneSize, cones, coneOrientations, vertexCoords);
1487:     }
1488:   break;
1489:   default:
1490:     SETERRQ1(PetscObjectComm((PetscObject) sp), PETSC_ERR_ARG_WRONG, "Cannot create reference cell for dimension %d", dim);
1491:   }
1492:   DMPlexInterpolate(rdm, refdm);
1493:   DMPlexCopyCoordinates(rdm, *refdm);
1494:   DMDestroy(&rdm);
1495:   return(0);
1496: }

1500: PetscErrorCode PetscDualSpaceApply(PetscDualSpace sp, PetscInt f, PetscCellGeometry geom, PetscInt numComp, void (*func)(const PetscReal [], PetscScalar *, void *), void *ctx, PetscScalar *value)
1501: {
1502:   DM               dm;
1503:   PetscQuadrature  quad;
1504:   const PetscReal *v0 = geom.v0;
1505:   const PetscReal *J  = geom.J;
1506:   PetscReal        x[3];
1507:   PetscScalar     *val;
1508:   PetscInt         dim, q, c, d, d2;
1509:   PetscErrorCode   ierr;

1514:   PetscDualSpaceGetDM(sp, &dm);
1515:   DMPlexGetDimension(dm, &dim);
1516:   PetscDualSpaceGetFunctional(sp, f, &quad);
1517:   DMGetWorkArray(dm, numComp, PETSC_SCALAR, &val);
1518:   for (c = 0; c < numComp; ++c) value[c] = 0.0;
1519:   for (q = 0; q < quad.numPoints; ++q) {
1520:     for (d = 0; d < dim; ++d) {
1521:       x[d] = v0[d];
1522:       for (d2 = 0; d2 < dim; ++d2) {
1523:         x[d] += J[d*dim+d2]*(quad.points[q*dim+d2] + 1.0);
1524:       }
1525:     }
1526:     (*func)(x, val, ctx);
1527:     for (c = 0; c < numComp; ++c) {
1528:       value[c] += val[c]*quad.weights[q];
1529:     }
1530:   }
1531:   DMRestoreWorkArray(dm, numComp, PETSC_SCALAR, &val);
1532:   return(0);
1533: }

1537: PetscErrorCode PetscDualSpaceSetUp_Lagrange(PetscDualSpace sp)
1538: {
1539:   PetscDualSpace_Lag *lag = (PetscDualSpace_Lag *) sp->data;
1540:   DM                  dm    = sp->dm;
1541:   PetscInt            order = sp->order;
1542:   PetscSection        csection;
1543:   Vec                 coordinates;
1544:   PetscReal          *qpoints, *qweights;
1545:   PetscInt           *closure = NULL, closureSize, c;
1546:   PetscInt            depth, dim, pdim, *pStart, *pEnd, coneSize, d, n, f = 0;
1547:   PetscBool           simplex;
1548:   PetscErrorCode      ierr;

1551:   /* Classify element type */
1552:   DMPlexGetDimension(dm, &dim);
1553:   DMPlexGetDepth(dm, &depth);
1554:   PetscCalloc1(dim+1, &lag->numDof);
1555:   PetscMalloc2(depth+1,&pStart,depth+1,&pEnd);
1556:   for (d = 0; d <= depth; ++d) {
1557:     DMPlexGetDepthStratum(dm, d, &pStart[d], &pEnd[d]);
1558:   }
1559:   DMPlexGetConeSize(dm, pStart[depth], &coneSize);
1560:   DMGetCoordinateSection(dm, &csection);
1561:   DMGetCoordinatesLocal(dm, &coordinates);
1562:   if      (coneSize == dim+1)    simplex = PETSC_TRUE;
1563:   else if (coneSize == 1 << dim) simplex = PETSC_FALSE;
1564:   else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only support simplices and tensor product cells");
1565:   lag->simplex = simplex;
1566:   PetscDualSpaceGetDimension(sp, &pdim);
1567:   PetscMalloc1(pdim, &sp->functional);
1568:   if (!dim) {
1569:     sp->functional[f].numPoints = 1;
1570:     PetscMalloc1(sp->functional[f].numPoints, &qpoints);
1571:     PetscMalloc1(sp->functional[f].numPoints, &qweights);
1572:     qpoints[0]  = 0.0;
1573:     qweights[0] = 1.0;
1574:     sp->functional[f].points  = qpoints;
1575:     sp->functional[f].weights = qweights;
1576:     ++f;
1577:     lag->numDof[0] = 1;
1578:   } else {
1579:     DMPlexGetTransitiveClosure(dm, pStart[depth], PETSC_TRUE, &closureSize, &closure);
1580:     for (c = 0; c < closureSize*2; c += 2) {
1581:       const PetscInt p = closure[c];

1583:       if ((p >= pStart[0]) && (p < pEnd[0])) {
1584:         /* Vertices */
1585:         const PetscScalar *coords;
1586:         PetscInt           dof, off, d;

1588:         if (order < 1) continue;
1589:         sp->functional[f].numPoints = 1;
1590:         PetscMalloc1(sp->functional[f].numPoints*dim, &qpoints);
1591:         PetscMalloc1(sp->functional[f].numPoints, &qweights);
1592:         VecGetArrayRead(coordinates, &coords);
1593:         PetscSectionGetDof(csection, p, &dof);
1594:         PetscSectionGetOffset(csection, p, &off);
1595:         if (dof != dim) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of coordinates %d does not match spatial dimension %d", dof, dim);
1596:         for (d = 0; d < dof; ++d) {qpoints[d] = PetscRealPart(coords[off+d]);}
1597:         qweights[0] = 1.0;
1598:         sp->functional[f].points  = qpoints;
1599:         sp->functional[f].weights = qweights;
1600:         ++f;
1601:         VecRestoreArrayRead(coordinates, &coords);
1602:         lag->numDof[0] = 1;
1603:       } else if ((p >= pStart[1]) && (p < pEnd[1])) {
1604:         /* Edges */
1605:         PetscScalar *coords;
1606:         PetscInt     num = order-1, k;

1608:         if (order < 2) continue;
1609:         coords = NULL;
1610:         DMPlexVecGetClosure(dm, csection, coordinates, p, &n, &coords);
1611:         if (n != dim*2) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Point %d has %d coordinate values instead of %d", p, n, dim*2);
1612:         for (k = 1; k <= num; ++k) {
1613:           sp->functional[f].numPoints = 1;
1614:           PetscMalloc1(sp->functional[f].numPoints*dim, &qpoints);
1615:           PetscMalloc1(sp->functional[f].numPoints, &qweights);
1616:           for (d = 0; d < dim; ++d) {qpoints[d] = k*PetscRealPart(coords[1*dim+d] - coords[0*dim+d])/order + PetscRealPart(coords[0*dim+d]);}
1617:           qweights[0] = 1.0;
1618:           sp->functional[f].points  = qpoints;
1619:           sp->functional[f].weights = qweights;
1620:           ++f;
1621:         }
1622:         DMPlexVecRestoreClosure(dm, csection, coordinates, p, &n, &coords);
1623:         lag->numDof[1] = num;
1624:       } else if ((p >= pStart[depth-1]) && (p < pEnd[depth-1])) {
1625:         /* Faces */

1627:         if ( simplex && (order < 3)) continue;
1628:         if (!simplex && (order < 2)) continue;
1629:         lag->numDof[depth-1] = 0;
1630:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Too lazy to implement faces");
1631:       } else if ((p >= pStart[depth]) && (p < pEnd[depth])) {
1632:         /* Cells */
1633:         PetscScalar *coords = NULL;
1634:         PetscInt     csize, v, d;

1636:         if ( simplex && (order > 0) && (order < 3)) continue;
1637:         if (!simplex && (order > 0) && (order < 2)) continue;
1638:         lag->numDof[depth] = 0;
1639:         if (order > 0) {SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Too lazy to implement cells");}

1641:         sp->functional[f].numPoints = 1;
1642:         PetscMalloc1(sp->functional[f].numPoints*dim, &qpoints);
1643:         PetscMalloc1(sp->functional[f].numPoints, &qweights);
1644:         DMPlexVecGetClosure(dm, csection, coordinates, p, &csize, &coords);
1645:         if (csize%dim) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Coordinate size %d is not divisible by spatial dimension %d", csize, dim);
1646:         for (d = 0; d < dim; ++d) {
1647:           const PetscInt numVertices = csize/dim;

1649:           qpoints[d] = 0.0;
1650:           for (v = 0; v < numVertices; ++v) {
1651:             qpoints[d] += PetscRealPart(coords[v*dim+d]);
1652:           }
1653:           qpoints[d] /= numVertices;
1654:         }
1655:         DMPlexVecRestoreClosure(dm, csection, coordinates, p, &csize, &coords);
1656:         qweights[0] = 1.0;
1657:         sp->functional[f].points  = qpoints;
1658:         sp->functional[f].weights = qweights;
1659:         ++f;
1660:         lag->numDof[depth] = 1;
1661:       }
1662:     }
1663:     DMPlexRestoreTransitiveClosure(dm, pStart[depth], PETSC_TRUE, &closureSize, &closure);
1664:   }
1665:   PetscFree2(pStart,pEnd);
1666:   if (f != pdim) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of dual basis vectors %d not equal to dimension %d", f, pdim);
1667:   return(0);
1668: }

1672: PetscErrorCode PetscDualSpaceDestroy_Lagrange(PetscDualSpace sp)
1673: {
1674:   PetscDualSpace_Lag *lag = (PetscDualSpace_Lag *) sp->data;
1675:   PetscErrorCode      ierr;

1678:   PetscFree(lag->numDof);
1679:   PetscFree(lag);
1680:   return(0);
1681: }

1685: PetscErrorCode PetscDualSpaceGetDimension_Lagrange(PetscDualSpace sp, PetscInt *dim)
1686: {
1687:   PetscDualSpace_Lag *lag = (PetscDualSpace_Lag *) sp->data;
1688:   PetscInt            deg = sp->order;
1689:   PetscReal           D   = 1.0;
1690:   PetscInt            n, i;
1691:   PetscErrorCode      ierr;

1694:   DMPlexGetDimension(sp->dm, &n);
1695:   if (lag->simplex) {
1696:     for (i = 1; i <= n; ++i) {
1697:       D *= ((PetscReal) (deg+i))/i;
1698:     }
1699:     *dim = (PetscInt) (D + 0.5);
1700:   } else {
1701:     *dim = 1;
1702:     for (i = 0; i < n; ++i) *dim *= (deg+1);
1703:   }
1704:   return(0);
1705: }

1709: PetscErrorCode PetscDualSpaceGetNumDof_Lagrange(PetscDualSpace sp, const PetscInt **numDof)
1710: {
1711:   PetscDualSpace_Lag *lag = (PetscDualSpace_Lag *) sp->data;

1714:   *numDof = lag->numDof;
1715:   return(0);
1716: }

1720: PetscErrorCode PetscDualSpaceInitialize_Lagrange(PetscDualSpace sp)
1721: {
1723:   sp->ops->setfromoptions = NULL;
1724:   sp->ops->setup          = PetscDualSpaceSetUp_Lagrange;
1725:   sp->ops->view           = NULL;
1726:   sp->ops->destroy        = PetscDualSpaceDestroy_Lagrange;
1727:   sp->ops->getdimension   = PetscDualSpaceGetDimension_Lagrange;
1728:   sp->ops->getnumdof      = PetscDualSpaceGetNumDof_Lagrange;
1729:   return(0);
1730: }

1732: /*MC
1733:   PETSCDUALSPACELAGRANGE = "lagrange" - A PetscDualSpace object that encapsulates a dual space of pointwise evaluation functionals

1735:   Level: intermediate

1737: .seealso: PetscDualSpaceType, PetscDualSpaceCreate(), PetscDualSpaceSetType()
1738: M*/

1742: PETSC_EXTERN PetscErrorCode PetscDualSpaceCreate_Lagrange(PetscDualSpace sp)
1743: {
1744:   PetscDualSpace_Lag *lag;
1745:   PetscErrorCode      ierr;

1749:   PetscNewLog(sp,&lag);
1750:   sp->data = lag;

1752:   lag->numDof  = NULL;
1753:   lag->simplex = PETSC_TRUE;

1755:   PetscDualSpaceInitialize_Lagrange(sp);
1756:   return(0);
1757: }


1760: PetscClassId PETSCFE_CLASSID = 0;

1762: PetscFunctionList PetscFEList              = NULL;
1763: PetscBool         PetscFERegisterAllCalled = PETSC_FALSE;

1767: /*@C
1768:   PetscFERegister - Adds a new PetscFE implementation

1770:   Not Collective

1772:   Input Parameters:
1773: + name        - The name of a new user-defined creation routine
1774: - create_func - The creation routine itself

1776:   Notes:
1777:   PetscFERegister() may be called multiple times to add several user-defined PetscFEs

1779:   Sample usage:
1780: .vb
1781:     PetscFERegister("my_fe", MyPetscFECreate);
1782: .ve

1784:   Then, your PetscFE type can be chosen with the procedural interface via
1785: .vb
1786:     PetscFECreate(MPI_Comm, PetscFE *);
1787:     PetscFESetType(PetscFE, "my_fe");
1788: .ve
1789:    or at runtime via the option
1790: .vb
1791:     -petscfe_type my_fe
1792: .ve

1794:   Level: advanced

1796: .keywords: PetscFE, register
1797: .seealso: PetscFERegisterAll(), PetscFERegisterDestroy()

1799: @*/
1800: PetscErrorCode PetscFERegister(const char sname[], PetscErrorCode (*function)(PetscFE))
1801: {

1805:   PetscFunctionListAdd(&PetscFEList, sname, function);
1806:   return(0);
1807: }

1811: /*@C
1812:   PetscFESetType - Builds a particular PetscFE

1814:   Collective on PetscFE

1816:   Input Parameters:
1817: + fem  - The PetscFE object
1818: - name - The kind of FEM space

1820:   Options Database Key:
1821: . -petscfe_type <type> - Sets the PetscFE type; use -help for a list of available types

1823:   Level: intermediate

1825: .keywords: PetscFE, set, type
1826: .seealso: PetscFEGetType(), PetscFECreate()
1827: @*/
1828: PetscErrorCode PetscFESetType(PetscFE fem, PetscFEType name)
1829: {
1830:   PetscErrorCode (*r)(PetscFE);
1831:   PetscBool      match;

1836:   PetscObjectTypeCompare((PetscObject) fem, name, &match);
1837:   if (match) return(0);

1839:   if (!PetscFERegisterAllCalled) {PetscFERegisterAll();}
1840:   PetscFunctionListFind(PetscFEList, name, &r);
1841:   if (!r) SETERRQ1(PetscObjectComm((PetscObject) fem), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown PetscFE type: %s", name);

1843:   if (fem->ops->destroy) {
1844:     (*fem->ops->destroy)(fem);
1845:     fem->ops->destroy = NULL;
1846:   }
1847:   (*r)(fem);
1848:   PetscObjectChangeTypeName((PetscObject) fem, name);
1849:   return(0);
1850: }

1854: /*@C
1855:   PetscFEGetType - Gets the PetscFE type name (as a string) from the object.

1857:   Not Collective

1859:   Input Parameter:
1860: . dm  - The PetscFE

1862:   Output Parameter:
1863: . name - The PetscFE type name

1865:   Level: intermediate

1867: .keywords: PetscFE, get, type, name
1868: .seealso: PetscFESetType(), PetscFECreate()
1869: @*/
1870: PetscErrorCode PetscFEGetType(PetscFE fem, PetscFEType *name)
1871: {

1877:   if (!PetscFERegisterAllCalled) {
1878:     PetscFERegisterAll();
1879:   }
1880:   *name = ((PetscObject) fem)->type_name;
1881:   return(0);
1882: }

1886: /*@C
1887:   PetscFEView - Views a PetscFE

1889:   Collective on PetscFE

1891:   Input Parameter:
1892: + fem - the PetscFE object to view
1893: - v   - the viewer

1895:   Level: developer

1897: .seealso PetscFEDestroy()
1898: @*/
1899: PetscErrorCode PetscFEView(PetscFE fem, PetscViewer v)
1900: {

1905:   if (!v) {
1906:     PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject) fem), &v);
1907:   }
1908:   if (fem->ops->view) {
1909:     (*fem->ops->view)(fem, v);
1910:   }
1911:   return(0);
1912: }

1916: /*
1917:   PetscFEViewFromOptions - Processes command line options to determine if/how a PetscFE is to be viewed.

1919:   Collective on PetscFE

1921:   Input Parameters:
1922: + fem    - the PetscFE
1923: . prefix - prefix to use for viewing, or NULL to use prefix of 'rnd'
1924: - optionname - option to activate viewing

1926:   Level: intermediate

1928: .keywords: PetscFE, view, options, database
1929: .seealso: VecViewFromOptions(), MatViewFromOptions()
1930: */
1931: PetscErrorCode PetscFEViewFromOptions(PetscFE fem, const char prefix[], const char optionname[])
1932: {
1933:   PetscViewer       viewer;
1934:   PetscViewerFormat format;
1935:   PetscBool         flg;
1936:   PetscErrorCode    ierr;

1939:   if (prefix) {
1940:     PetscOptionsGetViewer(PetscObjectComm((PetscObject) fem), prefix, optionname, &viewer, &format, &flg);
1941:   } else {
1942:     PetscOptionsGetViewer(PetscObjectComm((PetscObject) fem), ((PetscObject) fem)->prefix, optionname, &viewer, &format, &flg);
1943:   }
1944:   if (flg) {
1945:     PetscViewerPushFormat(viewer, format);
1946:     PetscFEView(fem, viewer);
1947:     PetscViewerPopFormat(viewer);
1948:     PetscViewerDestroy(&viewer);
1949:   }
1950:   return(0);
1951: }

1955: /*@
1956:   PetscFESetFromOptions - sets parameters in a PetscFE from the options database

1958:   Collective on PetscFE

1960:   Input Parameter:
1961: . fem - the PetscFE object to set options for

1963:   Options Database:
1964: . -petscfe_num_blocks  the number of cell blocks to integrate concurrently
1965: . -petscfe_num_batches the number of cell batches to integrate serially

1967:   Level: developer

1969: .seealso PetscFEView()
1970: @*/
1971: PetscErrorCode PetscFESetFromOptions(PetscFE fem)
1972: {
1973:   const char    *defaultType;
1974:   char           name[256];
1975:   PetscBool      flg;

1980:   if (!((PetscObject) fem)->type_name) {
1981:     defaultType = PETSCFEBASIC;
1982:   } else {
1983:     defaultType = ((PetscObject) fem)->type_name;
1984:   }
1985:   if (!PetscFERegisterAllCalled) {PetscFERegisterAll();}

1987:   PetscObjectOptionsBegin((PetscObject) fem);
1988:   PetscOptionsFList("-petscfe_type", "Finite element space", "PetscFESetType", PetscFEList, defaultType, name, 256, &flg);
1989:   if (flg) {
1990:     PetscFESetType(fem, name);
1991:   } else if (!((PetscObject) fem)->type_name) {
1992:     PetscFESetType(fem, defaultType);
1993:   }
1994:   PetscOptionsInt("-petscfe_num_blocks", "The number of cell blocks to integrate concurrently", "PetscSpaceSetTileSizes", fem->numBlocks, &fem->numBlocks, NULL);
1995:   PetscOptionsInt("-petscfe_num_batches", "The number of cell batches to integrate serially", "PetscSpaceSetTileSizes", fem->numBatches, &fem->numBatches, NULL);
1996:   if (fem->ops->setfromoptions) {
1997:     (*fem->ops->setfromoptions)(fem);
1998:   }
1999:   /* process any options handlers added with PetscObjectAddOptionsHandler() */
2000:   PetscObjectProcessOptionsHandlers((PetscObject) fem);
2001:   PetscOptionsEnd();
2002:   PetscFEViewFromOptions(fem, NULL, "-petscfe_view");
2003:   return(0);
2004: }

2008: /*@C
2009:   PetscFESetUp - Construct data structures for the PetscFE

2011:   Collective on PetscFE

2013:   Input Parameter:
2014: . fem - the PetscFE object to setup

2016:   Level: developer

2018: .seealso PetscFEView(), PetscFEDestroy()
2019: @*/
2020: PetscErrorCode PetscFESetUp(PetscFE fem)
2021: {

2026:   if (fem->ops->setup) {(*fem->ops->setup)(fem);}
2027:   return(0);
2028: }

2032: /*@
2033:   PetscFEDestroy - Destroys a PetscFE object

2035:   Collective on PetscFE

2037:   Input Parameter:
2038: . fem - the PetscFE object to destroy

2040:   Level: developer

2042: .seealso PetscFEView()
2043: @*/
2044: PetscErrorCode PetscFEDestroy(PetscFE *fem)
2045: {

2049:   if (!*fem) return(0);

2052:   if (--((PetscObject)(*fem))->refct > 0) {*fem = 0; return(0);}
2053:   ((PetscObject) (*fem))->refct = 0;

2055:   PetscFree((*fem)->numDof);
2056:   PetscFERestoreTabulation((*fem), 0, NULL, &(*fem)->B, &(*fem)->D, NULL /*&(*fem)->H*/);
2057:   PetscSpaceDestroy(&(*fem)->basisSpace);
2058:   PetscDualSpaceDestroy(&(*fem)->dualSpace);
2059:   PetscQuadratureDestroy(&(*fem)->quadrature);

2061:   if ((*fem)->ops->destroy) {(*(*fem)->ops->destroy)(*fem);}
2062:   PetscHeaderDestroy(fem);
2063:   return(0);
2064: }

2068: /*@
2069:   PetscFECreate - Creates an empty PetscFE object. The type can then be set with PetscFESetType().

2071:   Collective on MPI_Comm

2073:   Input Parameter:
2074: . comm - The communicator for the PetscFE object

2076:   Output Parameter:
2077: . fem - The PetscFE object

2079:   Level: beginner

2081: .seealso: PetscFESetType(), PETSCFEGALERKIN
2082: @*/
2083: PetscErrorCode PetscFECreate(MPI_Comm comm, PetscFE *fem)
2084: {
2085:   PetscFE        f;

2090:   *fem = NULL;
2091:   PetscFEInitializePackage();

2093:   PetscHeaderCreate(f, _p_PetscFE, struct _PetscFEOps, PETSCFE_CLASSID, "PetscFE", "Finite Element", "PetscFE", comm, PetscFEDestroy, PetscFEView);
2094:   PetscMemzero(f->ops, sizeof(struct _PetscFEOps));

2096:   f->basisSpace    = NULL;
2097:   f->dualSpace     = NULL;
2098:   f->numComponents = 1;
2099:   f->numDof        = NULL;
2100:   f->B             = NULL;
2101:   f->D             = NULL;
2102:   f->H             = NULL;
2103:   PetscMemzero(&f->quadrature, sizeof(PetscQuadrature));
2104:   f->blockSize     = 0;
2105:   f->numBlocks     = 1;
2106:   f->batchSize     = 0;
2107:   f->numBatches    = 1;

2109:   *fem = f;
2110:   return(0);
2111: }

2115: PetscErrorCode PetscFEGetDimension(PetscFE fem, PetscInt *dim)
2116: {

2122:   PetscSpaceGetDimension(fem->basisSpace, dim);
2123:   return(0);
2124: }

2128: PetscErrorCode PetscFEGetSpatialDimension(PetscFE fem, PetscInt *dim)
2129: {
2130:   DM             dm;

2136:   PetscDualSpaceGetDM(fem->dualSpace, &dm);
2137:   DMPlexGetDimension(dm, dim);
2138:   return(0);
2139: }

2143: PetscErrorCode PetscFESetNumComponents(PetscFE fem, PetscInt comp)
2144: {
2147:   fem->numComponents = comp;
2148:   return(0);
2149: }

2153: PetscErrorCode PetscFEGetNumComponents(PetscFE fem, PetscInt *comp)
2154: {
2158:   *comp = fem->numComponents;
2159:   return(0);
2160: }

2164: PetscErrorCode PetscFESetTileSizes(PetscFE fem, PetscInt blockSize, PetscInt numBlocks, PetscInt batchSize, PetscInt numBatches)
2165: {
2168:   fem->blockSize  = blockSize;
2169:   fem->numBlocks  = numBlocks;
2170:   fem->batchSize  = batchSize;
2171:   fem->numBatches = numBatches;
2172:   return(0);
2173: }

2177: PetscErrorCode PetscFEGetTileSizes(PetscFE fem, PetscInt *blockSize, PetscInt *numBlocks, PetscInt *batchSize, PetscInt *numBatches)
2178: {
2185:   if (blockSize)  *blockSize  = fem->blockSize;
2186:   if (numBlocks)  *numBlocks  = fem->numBlocks;
2187:   if (batchSize)  *batchSize  = fem->batchSize;
2188:   if (numBatches) *numBatches = fem->numBatches;
2189:   return(0);
2190: }

2194: PetscErrorCode PetscFEGetBasisSpace(PetscFE fem, PetscSpace *sp)
2195: {
2199:   *sp = fem->basisSpace;
2200:   return(0);
2201: }

2205: PetscErrorCode PetscFESetBasisSpace(PetscFE fem, PetscSpace sp)
2206: {

2212:   PetscSpaceDestroy(&fem->basisSpace);
2213:   fem->basisSpace = sp;
2214:   PetscObjectReference((PetscObject) fem->basisSpace);
2215:   return(0);
2216: }

2220: PetscErrorCode PetscFEGetDualSpace(PetscFE fem, PetscDualSpace *sp)
2221: {
2225:   *sp = fem->dualSpace;
2226:   return(0);
2227: }

2231: PetscErrorCode PetscFESetDualSpace(PetscFE fem, PetscDualSpace sp)
2232: {

2238:   PetscDualSpaceDestroy(&fem->dualSpace);
2239:   fem->dualSpace = sp;
2240:   PetscObjectReference((PetscObject) fem->dualSpace);
2241:   return(0);
2242: }

2246: PetscErrorCode PetscFEGetQuadrature(PetscFE fem, PetscQuadrature *q)
2247: {
2251:   *q = fem->quadrature;
2252:   return(0);
2253: }

2257: PetscErrorCode PetscFESetQuadrature(PetscFE fem, PetscQuadrature q)
2258: {

2263:   PetscFERestoreTabulation(fem, 0, NULL, &fem->B, &fem->D, NULL /*&(*fem)->H*/);
2264:   PetscQuadratureDestroy(&fem->quadrature);
2265:   fem->quadrature = q;
2266:   return(0);
2267: }

2271: PetscErrorCode PetscFEGetNumDof(PetscFE fem, const PetscInt **numDof)
2272: {
2273:   const PetscInt *numDofDual;
2274:   PetscErrorCode  ierr;

2279:   PetscDualSpaceGetNumDof(fem->dualSpace, &numDofDual);
2280:   if (!fem->numDof) {
2281:     DM       dm;
2282:     PetscInt dim, d;

2284:     PetscDualSpaceGetDM(fem->dualSpace, &dm);
2285:     DMPlexGetDimension(dm, &dim);
2286:     PetscMalloc1((dim+1), &fem->numDof);
2287:     for (d = 0; d <= dim; ++d) {
2288:       fem->numDof[d] = fem->numComponents*numDofDual[d];
2289:     }
2290:   }
2291:   *numDof = fem->numDof;
2292:   return(0);
2293: }

2297: PetscErrorCode PetscFEGetDefaultTabulation(PetscFE fem, PetscReal **B, PetscReal **D, PetscReal **H)
2298: {
2299:   PetscInt         npoints = fem->quadrature.numPoints;
2300:   const PetscReal *points  = fem->quadrature.points;
2301:   PetscErrorCode   ierr;

2308:   if (!fem->B) {PetscFEGetTabulation(fem, npoints, points, &fem->B, &fem->D, NULL/*&fem->H*/);}
2309:   if (B) *B = fem->B;
2310:   if (D) *D = fem->D;
2311:   if (H) *H = fem->H;
2312:   return(0);
2313: }

2317: PetscErrorCode PetscFEGetTabulation(PetscFE fem, PetscInt npoints, const PetscReal points[], PetscReal **B, PetscReal **D, PetscReal **H)
2318: {
2319:   DM               dm;
2320:   PetscInt         pdim; /* Dimension of FE space P */
2321:   PetscInt         dim;  /* Spatial dimension */
2322:   PetscInt         comp; /* Field components */
2323:   PetscReal       *tmpB, *tmpD, *invV;
2324:   PetscInt         p, d, j, k;
2325:   PetscErrorCode   ierr;

2333:   PetscDualSpaceGetDM(fem->dualSpace, &dm);

2335:   DMPlexGetDimension(dm, &dim);
2336:   PetscSpaceGetDimension(fem->basisSpace, &pdim);
2337:   PetscFEGetNumComponents(fem, &comp);
2338:   /* if (nvalues%dim) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "The number of coordinate values %d must be divisible by the spatial dimension %d", nvalues, dim); */

2340:   if (B) {
2341:     DMGetWorkArray(dm, npoints*pdim*comp, PETSC_REAL, B);
2342:     DMGetWorkArray(dm, npoints*pdim, PETSC_REAL, &tmpB);
2343:   }
2344:   if (D) {
2345:     DMGetWorkArray(dm, npoints*pdim*comp*dim, PETSC_REAL, D);
2346:     DMGetWorkArray(dm, npoints*pdim*dim, PETSC_REAL, &tmpD);
2347:   }
2348:   if (H) {DMGetWorkArray(dm, npoints*pdim*dim*dim, PETSC_REAL, H);}
2349:   PetscSpaceEvaluate(fem->basisSpace, npoints, points, B ? tmpB : NULL, D ? tmpD : NULL, H ? *H : NULL);

2351:   DMGetWorkArray(dm, pdim*pdim, PETSC_REAL, &invV);
2352:   for (j = 0; j < pdim; ++j) {
2353:     PetscReal      *Bf;
2354:     PetscQuadrature f;
2355:     PetscInt        q;

2357:     PetscDualSpaceGetFunctional(fem->dualSpace, j, &f);
2358:     DMGetWorkArray(dm, f.numPoints*pdim, PETSC_REAL, &Bf);
2359:     PetscSpaceEvaluate(fem->basisSpace, f.numPoints, f.points, Bf, NULL, NULL);
2360:     for (k = 0; k < pdim; ++k) {
2361:       /* n_j \cdot \phi_k */
2362:       invV[j*pdim+k] = 0.0;
2363:       for (q = 0; q < f.numPoints; ++q) {
2364:         invV[j*pdim+k] += Bf[q*pdim+k]*f.weights[q];
2365:       }
2366:     }
2367:     DMRestoreWorkArray(dm, f.numPoints*pdim, PETSC_REAL, &Bf);
2368:   }
2369:   {
2370:     PetscReal    *work;
2371:     PetscBLASInt *pivots;
2372: #ifndef PETSC_USE_COMPLEX
2373:     PetscBLASInt  n = pdim, info;
2374: #endif

2376:     DMGetWorkArray(dm, pdim, PETSC_INT, &pivots);
2377:     DMGetWorkArray(dm, pdim, PETSC_REAL, &work);
2378: #ifndef PETSC_USE_COMPLEX
2379:     PetscStackCallBLAS("LAPACKgetrf", LAPACKgetrf_(&n, &n, invV, &n, pivots, &info));
2380:     PetscStackCallBLAS("LAPACKgetri", LAPACKgetri_(&n, invV, &n, pivots, work, &n, &info));
2381: #endif
2382:     DMRestoreWorkArray(dm, pdim, PETSC_INT, &pivots);
2383:     DMRestoreWorkArray(dm, pdim, PETSC_REAL, &work);
2384:   }
2385:   for (p = 0; p < npoints; ++p) {
2386:     if (B) {
2387:       /* Multiply by V^{-1} (pdim x pdim) */
2388:       for (j = 0; j < pdim; ++j) {
2389:         const PetscInt i = (p*pdim + j)*comp;
2390:         PetscInt       c;

2392:         (*B)[i] = 0.0;
2393:         for (k = 0; k < pdim; ++k) {
2394:           (*B)[i] += invV[k*pdim+j] * tmpB[p*pdim + k];
2395:         }
2396:         for (c = 1; c < comp; ++c) {
2397:           (*B)[i+c] = (*B)[i];
2398:         }
2399:       }
2400:     }
2401:     if (D) {
2402:       /* Multiply by V^{-1} (pdim x pdim) */
2403:       for (j = 0; j < pdim; ++j) {
2404:         for (d = 0; d < dim; ++d) {
2405:           const PetscInt i = ((p*pdim + j)*comp + 0)*dim + d;
2406:           PetscInt       c;

2408:           (*D)[i] = 0.0;
2409:           for (k = 0; k < pdim; ++k) {
2410:             (*D)[i] += invV[k*pdim+j] * tmpD[(p*pdim + k)*dim + d];
2411:           }
2412:           for (c = 1; c < comp; ++c) {
2413:             (*D)[((p*pdim + j)*comp + c)*dim + d] = (*D)[i];
2414:           }
2415:         }
2416:       }
2417:     }
2418:   }
2419:   DMRestoreWorkArray(dm, pdim*pdim, PETSC_REAL, &invV);
2420:   if (B) {DMRestoreWorkArray(dm, npoints*pdim, PETSC_REAL, &tmpB);}
2421:   if (D) {DMRestoreWorkArray(dm, npoints*pdim*dim, PETSC_REAL, &tmpD);}
2422:   return(0);
2423: }

2427: PetscErrorCode PetscFERestoreTabulation(PetscFE fem, PetscInt npoints, const PetscReal points[], PetscReal **B, PetscReal **D, PetscReal **H)
2428: {
2429:   DM             dm;

2434:   PetscDualSpaceGetDM(fem->dualSpace, &dm);
2435:   if (B && *B) {DMRestoreWorkArray(dm, 0, PETSC_REAL, B);}
2436:   if (D && *D) {DMRestoreWorkArray(dm, 0, PETSC_REAL, D);}
2437:   if (H && *H) {DMRestoreWorkArray(dm, 0, PETSC_REAL, H);}
2438:   return(0);
2439: }

2443: PetscErrorCode PetscFEDestroy_Basic(PetscFE fem)
2444: {
2445:   PetscFE_Basic *b = (PetscFE_Basic *) fem->data;

2449:   PetscFree(b);
2450:   return(0);
2451: }

2455: PetscErrorCode PetscFEIntegrateResidual_Basic(PetscFE fem, PetscInt Ne, PetscInt Nf, PetscFE fe[], PetscInt field, PetscCellGeometry geom, const PetscScalar coefficients[],
2456:                                               PetscInt NfAux, PetscFE feAux[], const PetscScalar coefficientsAux[],
2457:                                               void (*f0_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f0[]),
2458:                                               void (*f1_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f1[]),
2459:                                               PetscScalar elemVec[])
2460: {
2461:   const PetscInt  debug = 0;
2462:   PetscQuadrature quad;
2463:   PetscScalar    *f0, *f1, *u, *gradU, *a, *gradA = NULL;
2464:   PetscReal      *x, *realSpaceDer;
2465:   PetscInt        dim, numComponents = 0, numComponentsAux = 0, cOffset = 0, cOffsetAux = 0, eOffset = 0, e, f;
2466:   PetscErrorCode  ierr;

2469:   PetscFEGetSpatialDimension(fe[0], &dim);
2470:   for (f = 0; f < Nf; ++f) {
2471:     PetscInt Nc;
2472:     PetscFEGetNumComponents(fe[f], &Nc);
2473:     numComponents += Nc;
2474:   }
2475:   PetscFEGetQuadrature(fe[field], &quad);
2476:   PetscMalloc6(quad.numPoints*dim,&f0,quad.numPoints*dim*dim,&f1,numComponents,&u,numComponents*dim,&gradU,dim,&x,dim,&realSpaceDer);
2477:   for (f = 0; f < NfAux; ++f) {
2478:     PetscInt Nc;
2479:     PetscFEGetNumComponents(feAux[f], &Nc);
2480:     numComponentsAux += Nc;
2481:   }
2482:   if (NfAux) {PetscMalloc2(numComponentsAux,&a,numComponentsAux*dim,&gradA);}
2483:   for (e = 0; e < Ne; ++e) {
2484:     const PetscReal  detJ        = geom.detJ[e];
2485:     const PetscReal *v0          = &geom.v0[e*dim];
2486:     const PetscReal *J           = &geom.J[e*dim*dim];
2487:     const PetscReal *invJ        = &geom.invJ[e*dim*dim];
2488:     const PetscInt   Nq          = quad.numPoints;
2489:     const PetscReal *quadPoints  = quad.points;
2490:     const PetscReal *quadWeights = quad.weights;
2491:     PetscInt         q, f;

2493:     if (debug > 1) {
2494:       PetscPrintf(PETSC_COMM_SELF, "  detJ: %g\n", detJ);
2495: #ifndef PETSC_USE_COMPLEX
2496:       DMPrintCellMatrix(e, "invJ", dim, dim, invJ);
2497: #endif
2498:     }
2499:     for (q = 0; q < Nq; ++q) {
2500:       PetscInt         fOffset = 0,       fOffsetAux = 0;
2501:       PetscInt         dOffset = cOffset, dOffsetAux = cOffsetAux;
2502:       PetscInt         Ncomp, d, d2, f, i;

2504:       if (debug) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
2505:       PetscFEGetNumComponents(fe[field], &Ncomp);
2506:       for (d = 0; d < numComponents; ++d)       {u[d]     = 0.0;}
2507:       for (d = 0; d < dim*(numComponents); ++d) {gradU[d] = 0.0;}
2508:       for (d = 0; d < dim; ++d) {
2509:         x[d] = v0[d];
2510:         for (d2 = 0; d2 < dim; ++d2) {
2511:           x[d] += J[d*dim+d2]*(quadPoints[q*dim+d2] + 1.0);
2512:         }
2513:       }
2514:       for (f = 0; f < Nf; ++f) {
2515:         PetscReal *basis, *basisDer;
2516:         PetscInt   Nb, Ncomp, b, comp;

2518:         PetscFEGetDimension(fe[f], &Nb);
2519:         PetscFEGetNumComponents(fe[f], &Ncomp);
2520:         PetscFEGetDefaultTabulation(fe[f], &basis, &basisDer, NULL);
2521:         for (b = 0; b < Nb; ++b) {
2522:           for (comp = 0; comp < Ncomp; ++comp) {
2523:             const PetscInt cidx = b*Ncomp+comp;
2524:             PetscInt       d, g;

2526:             u[fOffset+comp] += coefficients[dOffset+cidx]*basis[q*Nb*Ncomp+cidx];
2527:             for (d = 0; d < dim; ++d) {
2528:               realSpaceDer[d] = 0.0;
2529:               for (g = 0; g < dim; ++g) {
2530:                 realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
2531:               }
2532:               gradU[(fOffset+comp)*dim+d] += coefficients[dOffset+cidx]*realSpaceDer[d];
2533:             }
2534:           }
2535:         }
2536:         if (debug > 1) {
2537:           PetscInt d;
2538:           for (comp = 0; comp < Ncomp; ++comp) {
2539:             PetscPrintf(PETSC_COMM_SELF, "    u[%d,%d]: %g\n", f, comp, PetscRealPart(u[fOffset+comp]));
2540:             for (d = 0; d < dim; ++d) {
2541:               PetscPrintf(PETSC_COMM_SELF, "    gradU[%d,%d]_%c: %g\n", f, comp, 'x'+d, PetscRealPart(gradU[(fOffset+comp)*dim+d]));
2542:             }
2543:           }
2544:         }
2545:         fOffset += Ncomp;
2546:         dOffset += Nb*Ncomp;
2547:       }
2548:       for (d = 0; d < numComponentsAux; ++d)       {a[d]     = 0.0;}
2549:       for (d = 0; d < dim*(numComponentsAux); ++d) {gradA[d] = 0.0;}
2550:       for (f = 0; f < NfAux; ++f) {
2551:         PetscReal *basis, *basisDer;
2552:         PetscInt   Nb, Ncomp, b, comp;

2554:         PetscFEGetDimension(feAux[f], &Nb);
2555:         PetscFEGetNumComponents(feAux[f], &Ncomp);
2556:         PetscFEGetDefaultTabulation(feAux[f], &basis, &basisDer, NULL);
2557:         for (b = 0; b < Nb; ++b) {
2558:           for (comp = 0; comp < Ncomp; ++comp) {
2559:             const PetscInt cidx = b*Ncomp+comp;
2560:             PetscInt       d, g;

2562:             a[fOffsetAux+comp] += coefficientsAux[dOffsetAux+cidx]*basis[q*Nb*Ncomp+cidx];
2563:             for (d = 0; d < dim; ++d) {
2564:               realSpaceDer[d] = 0.0;
2565:               for (g = 0; g < dim; ++g) {
2566:                 realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
2567:               }
2568:               gradA[(fOffsetAux+comp)*dim+d] += coefficients[dOffsetAux+cidx]*realSpaceDer[d];
2569:             }
2570:           }
2571:         }
2572:         if (debug > 1) {
2573:           PetscInt d;
2574:           for (comp = 0; comp < Ncomp; ++comp) {
2575:             PetscPrintf(PETSC_COMM_SELF, "    a[%d,%d]: %g\n", f, comp, PetscRealPart(a[fOffsetAux+comp]));
2576:             for (d = 0; d < dim; ++d) {
2577:               PetscPrintf(PETSC_COMM_SELF, "    gradA[%d,%d]_%c: %g\n", f, comp, 'x'+d, PetscRealPart(gradA[(fOffsetAux+comp)*dim+d]));
2578:             }
2579:           }
2580:         }
2581:         fOffsetAux += Ncomp;
2582:         dOffsetAux += Nb*Ncomp;
2583:       }

2585:       f0_func(u, gradU, a, gradA, x, &f0[q*Ncomp]);
2586:       for (i = 0; i < Ncomp; ++i) {
2587:         f0[q*Ncomp+i] *= detJ*quadWeights[q];
2588:       }
2589:       f1_func(u, gradU, a, gradA, x, &f1[q*Ncomp*dim]);
2590:       for (i = 0; i < Ncomp*dim; ++i) {
2591:         f1[q*Ncomp*dim+i] *= detJ*quadWeights[q];
2592:       }
2593:       if (debug > 1) {
2594:         PetscInt c,d;
2595:         for (c = 0; c < Ncomp; ++c) {
2596:           PetscPrintf(PETSC_COMM_SELF, "    f0[%d]: %g\n", c, PetscRealPart(f0[q*Ncomp+c]));
2597:           for (d = 0; d < dim; ++d) {
2598:             PetscPrintf(PETSC_COMM_SELF, "    f1[%d]_%c: %g\n", c, 'x'+d, PetscRealPart(f1[(q*Ncomp + c)*dim+d]));
2599:           }
2600:         }
2601:       }
2602:       if (q == Nq-1) {cOffset = dOffset; cOffsetAux = dOffsetAux;}
2603:     }
2604:     for (f = 0; f < Nf; ++f) {
2605:       PetscInt   Nb, Ncomp, b, comp;

2607:       PetscFEGetDimension(fe[f], &Nb);
2608:       PetscFEGetNumComponents(fe[f], &Ncomp);
2609:       if (f == field) {
2610:         PetscReal *basis;
2611:         PetscReal *basisDer;

2613:         PetscFEGetDefaultTabulation(fe[f], &basis, &basisDer, NULL);
2614:         for (b = 0; b < Nb; ++b) {
2615:           for (comp = 0; comp < Ncomp; ++comp) {
2616:             const PetscInt cidx = b*Ncomp+comp;
2617:             PetscInt       q;

2619:             elemVec[eOffset+cidx] = 0.0;
2620:             for (q = 0; q < Nq; ++q) {
2621:               PetscInt d, g;

2623:               elemVec[eOffset+cidx] += basis[q*Nb*Ncomp+cidx]*f0[q*Ncomp+comp];
2624:               for (d = 0; d < dim; ++d) {
2625:                 realSpaceDer[d] = 0.0;
2626:                 for (g = 0; g < dim; ++g) {
2627:                   realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
2628:                 }
2629:                 elemVec[eOffset+cidx] += realSpaceDer[d]*f1[(q*Ncomp+comp)*dim+d];
2630:               }
2631:             }
2632:           }
2633:         }
2634:         if (debug > 1) {
2635:           PetscInt b, comp;

2637:           for (b = 0; b < Nb; ++b) {
2638:             for (comp = 0; comp < Ncomp; ++comp) {
2639:               PetscPrintf(PETSC_COMM_SELF, "    elemVec[%d,%d]: %g\n", b, comp, PetscRealPart(elemVec[eOffset+b*Ncomp+comp]));
2640:             }
2641:           }
2642:         }
2643:       }
2644:       eOffset += Nb*Ncomp;
2645:     }
2646:   }
2647:   PetscFree6(f0,f1,u,gradU,x,realSpaceDer);
2648:   if (NfAux) {PetscFree2(a,gradA);}
2649:   return(0);
2650: }

2654: PetscErrorCode PetscFEIntegrateBdResidual_Basic(PetscFE fem, PetscInt Ne, PetscInt Nf, PetscFE fe[], PetscInt field, PetscCellGeometry geom, const PetscScalar coefficients[],
2655:                                                 PetscInt NfAux, PetscFE feAux[], const PetscScalar coefficientsAux[],
2656:                                                 void (*f0_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], const PetscReal n[], PetscScalar f0[]),
2657:                                                 void (*f1_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], const PetscReal n[], PetscScalar f1[]),
2658:                                                 PetscScalar elemVec[])
2659: {
2660:   const PetscInt  debug = 0;
2661:   PetscQuadrature quad;
2662:   PetscScalar    *f0, *f1, *u, *gradU, *a, *gradA = NULL;
2663:   PetscReal      *x, *realSpaceDer;
2664:   PetscInt        dim, numComponents = 0, numComponentsAux = 0, cOffset = 0, cOffsetAux = 0, eOffset = 0, e, f;
2665:   PetscErrorCode  ierr;

2668:   PetscFEGetSpatialDimension(fe[0], &dim);
2669:   dim += 1; /* Spatial dimension is one higher than topological dimension */
2670:   for (f = 0; f < Nf; ++f) {
2671:     PetscInt Nc;
2672:     PetscFEGetNumComponents(fe[f], &Nc);
2673:     numComponents += Nc;
2674:   }
2675:   PetscFEGetQuadrature(fe[field], &quad);
2676:   PetscMalloc6(quad.numPoints*dim,&f0,quad.numPoints*dim*dim,&f1,numComponents,&u,numComponents*dim,&gradU,dim,&x,dim,&realSpaceDer);
2677:   for (f = 0; f < NfAux; ++f) {
2678:     PetscInt Nc;
2679:     PetscFEGetNumComponents(feAux[f], &Nc);
2680:     numComponentsAux += Nc;
2681:   }
2682:   if (NfAux) {PetscMalloc2(numComponentsAux,&a,numComponentsAux*dim,&gradA);}
2683:   for (e = 0; e < Ne; ++e) {
2684:     const PetscReal  detJ        = geom.detJ[e];
2685:     const PetscReal *v0          = &geom.v0[e*dim];
2686:     const PetscReal *n           = &geom.n[e*dim];
2687:     const PetscReal *J           = &geom.J[e*dim*dim];
2688:     const PetscReal *invJ        = &geom.invJ[e*dim*dim];
2689:     const PetscInt   Nq          = quad.numPoints;
2690:     const PetscReal *quadPoints  = quad.points;
2691:     const PetscReal *quadWeights = quad.weights;
2692:     PetscInt         q, f;

2694:     if (debug > 1) {
2695:       PetscPrintf(PETSC_COMM_SELF, "  detJ: %g\n", detJ);
2696: #ifndef PETSC_USE_COMPLEX
2697:       DMPrintCellMatrix(e, "invJ", dim, dim, invJ);
2698: #endif
2699:     }
2700:     for (q = 0; q < Nq; ++q) {
2701:       PetscInt         fOffset = 0,       fOffsetAux = 0;
2702:       PetscInt         dOffset = cOffset, dOffsetAux = cOffsetAux;
2703:       PetscInt         Ncomp, d, d2, f, i;
2704:       if (debug) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}

2706:       PetscFEGetNumComponents(fe[field], &Ncomp);
2707:       for (d = 0; d < numComponents; ++d)       {u[d]     = 0.0;}
2708:       for (d = 0; d < dim*(numComponents); ++d) {gradU[d] = 0.0;}
2709:       for (d = 0; d < dim; ++d) {
2710:         x[d] = v0[d];
2711:         for (d2 = 0; d2 < dim-1; ++d2) {
2712:           x[d] += J[d*dim+d2]*(quadPoints[q*(dim-1)+d2] + 1.0);
2713:         }
2714:       }
2715:       for (f = 0; f < Nf; ++f) {
2716:         PetscReal *basis, *basisDer;
2717:         PetscInt   Nb, Ncomp, b, comp;

2719:         PetscFEGetDimension(fe[f], &Nb);
2720:         PetscFEGetNumComponents(fe[f], &Ncomp);
2721:         PetscFEGetDefaultTabulation(fe[f], &basis, &basisDer, NULL);
2722:         for (b = 0; b < Nb; ++b) {
2723:           for (comp = 0; comp < Ncomp; ++comp) {
2724:             const PetscInt cidx = b*Ncomp+comp;
2725:             PetscInt       d, g;

2727:             u[fOffset+comp] += coefficients[dOffset+cidx]*basis[q*Nb*Ncomp+cidx];
2728:             for (d = 0; d < dim; ++d) {
2729:               realSpaceDer[d] = 0.0;
2730:               for (g = 0; g < dim-1; ++g) {
2731:                 realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
2732:               }
2733:               gradU[(fOffset+comp)*dim+d] += coefficients[dOffset+cidx]*realSpaceDer[d];
2734:             }
2735:           }
2736:         }
2737:         if (debug > 1) {
2738:           PetscInt d;
2739:           for (comp = 0; comp < Ncomp; ++comp) {
2740:             PetscPrintf(PETSC_COMM_SELF, "    u[%d,%d]: %g\n", f, comp, PetscRealPart(u[fOffset+comp]));
2741:             for (d = 0; d < dim; ++d) {
2742:               PetscPrintf(PETSC_COMM_SELF, "    gradU[%d,%d]_%c: %g\n", f, comp, 'x'+d, PetscRealPart(gradU[(fOffset+comp)*dim+d]));
2743:             }
2744:           }
2745:         }
2746:         fOffset += Ncomp;
2747:         dOffset += Nb*Ncomp;
2748:       }
2749:       for (d = 0; d < numComponentsAux; ++d)       {a[d]     = 0.0;}
2750:       for (d = 0; d < dim*(numComponentsAux); ++d) {gradA[d] = 0.0;}
2751:       for (f = 0; f < NfAux; ++f) {
2752:         PetscReal *basis, *basisDer;
2753:         PetscInt   Nb, Ncomp, b, comp;

2755:         PetscFEGetDimension(feAux[f], &Nb);
2756:         PetscFEGetNumComponents(feAux[f], &Ncomp);
2757:         PetscFEGetDefaultTabulation(feAux[f], &basis, &basisDer, NULL);
2758:         for (b = 0; b < Nb; ++b) {
2759:           for (comp = 0; comp < Ncomp; ++comp) {
2760:             const PetscInt cidx = b*Ncomp+comp;
2761:             PetscInt       d, g;

2763:             a[fOffsetAux+comp] += coefficientsAux[dOffsetAux+cidx]*basis[q*Nb*Ncomp+cidx];
2764:             for (d = 0; d < dim; ++d) {
2765:               realSpaceDer[d] = 0.0;
2766:               for (g = 0; g < dim-1; ++g) {
2767:                 realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
2768:               }
2769:               gradA[(fOffsetAux+comp)*dim+d] += coefficients[dOffsetAux+cidx]*realSpaceDer[d];
2770:             }
2771:           }
2772:         }
2773:         if (debug > 1) {
2774:           PetscInt d;
2775:           for (comp = 0; comp < Ncomp; ++comp) {
2776:             PetscPrintf(PETSC_COMM_SELF, "    a[%d,%d]: %g\n", f, comp, PetscRealPart(a[fOffsetAux+comp]));
2777:             for (d = 0; d < dim; ++d) {
2778:               PetscPrintf(PETSC_COMM_SELF, "    gradA[%d,%d]_%c: %g\n", f, comp, 'x'+d, PetscRealPart(gradA[(fOffsetAux+comp)*dim+d]));
2779:             }
2780:           }
2781:         }
2782:         fOffsetAux += Ncomp;
2783:         dOffsetAux += Nb*Ncomp;
2784:       }

2786:       f0_func(u, gradU, a, gradA, x, n, &f0[q*Ncomp]);
2787:       for (i = 0; i < Ncomp; ++i) {
2788:         f0[q*Ncomp+i] *= detJ*quadWeights[q];
2789:       }
2790:       f1_func(u, gradU, a, gradA, x, n, &f1[q*Ncomp*dim]);
2791:       for (i = 0; i < Ncomp*dim; ++i) {
2792:         f1[q*Ncomp*dim+i] *= detJ*quadWeights[q];
2793:       }
2794:       if (debug > 1) {
2795:         PetscInt c,d;
2796:         for (c = 0; c < Ncomp; ++c) {
2797:           PetscPrintf(PETSC_COMM_SELF, "    f0[%d]: %g\n", c, PetscRealPart(f0[q*Ncomp+c]));
2798:           for (d = 0; d < dim; ++d) {
2799:             PetscPrintf(PETSC_COMM_SELF, "    f1[%d]_%c: %g\n", c, 'x'+d, PetscRealPart(f1[(q*Ncomp + c)*dim+d]));
2800:           }
2801:         }
2802:       }
2803:       if (q == Nq-1) {cOffset = dOffset; cOffsetAux = dOffsetAux;}
2804:     }
2805:     for (f = 0; f < Nf; ++f) {
2806:       PetscInt   Nb, Ncomp, b, comp;

2808:       PetscFEGetDimension(fe[f], &Nb);
2809:       PetscFEGetNumComponents(fe[f], &Ncomp);
2810:       if (f == field) {
2811:         PetscReal *basis;
2812:         PetscReal *basisDer;

2814:         PetscFEGetDefaultTabulation(fe[f], &basis, &basisDer, NULL);
2815:         for (b = 0; b < Nb; ++b) {
2816:           for (comp = 0; comp < Ncomp; ++comp) {
2817:             const PetscInt cidx = b*Ncomp+comp;
2818:             PetscInt       q;

2820:             elemVec[eOffset+cidx] = 0.0;
2821:             for (q = 0; q < Nq; ++q) {
2822:               PetscInt d, g;

2824:               elemVec[eOffset+cidx] += basis[q*Nb*Ncomp+cidx]*f0[q*Ncomp+comp];
2825:               for (d = 0; d < dim; ++d) {
2826:                 realSpaceDer[d] = 0.0;
2827:                 for (g = 0; g < dim-1; ++g) {
2828:                   realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
2829:                 }
2830:                 elemVec[eOffset+cidx] += realSpaceDer[d]*f1[(q*Ncomp+comp)*dim+d];
2831:               }
2832:             }
2833:           }
2834:         }
2835:         if (debug > 1) {
2836:           PetscInt b, comp;

2838:           for (b = 0; b < Nb; ++b) {
2839:             for (comp = 0; comp < Ncomp; ++comp) {
2840:               PetscPrintf(PETSC_COMM_SELF, "    elemVec[%d,%d]: %g\n", b, comp, PetscRealPart(elemVec[eOffset+b*Ncomp+comp]));
2841:             }
2842:           }
2843:         }
2844:       }
2845:       eOffset += Nb*Ncomp;
2846:     }
2847:   }
2848:   PetscFree6(f0,f1,u,gradU,x,realSpaceDer);
2849:   if (NfAux) {PetscFree2(a,gradA);}
2850:   return(0);
2851: }

2855: PetscErrorCode PetscFEIntegrateJacobian_Basic(PetscFE fem, PetscInt Ne, PetscInt Nf, PetscFE fe[], PetscInt fieldI, PetscInt fieldJ, PetscCellGeometry geom, const PetscScalar coefficients[],
2856:                                               PetscInt NfAux, PetscFE feAux[], const PetscScalar coefficientsAux[],
2857:                                               void (*g0_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g0[]),
2858:                                               void (*g1_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g1[]),
2859:                                               void (*g2_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g2[]),
2860:                                               void (*g3_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g3[]),
2861:                                               PetscScalar elemMat[])
2862: {
2863:   const PetscInt  debug      = 0;
2864:   PetscInt        cellDof    = 0; /* Total number of dof on a cell */
2865:   PetscInt        cellDofAux = 0; /* Total number of auxiliary dof on a cell */
2866:   PetscInt        cOffset    = 0; /* Offset into coefficients[] for element e */
2867:   PetscInt        cOffsetAux = 0; /* Offset into coefficientsAux[] for element e */
2868:   PetscInt        eOffset    = 0; /* Offset into elemMat[] for element e */
2869:   PetscInt        offsetI    = 0; /* Offset into an element vector for fieldI */
2870:   PetscInt        offsetJ    = 0; /* Offset into an element vector for fieldJ */
2871:   PetscQuadrature quad;
2872:   PetscScalar    *g0, *g1, *g2, *g3, *u, *gradU, *a, *gradA = NULL;
2873:   PetscReal      *x, *realSpaceDerI, *realSpaceDerJ;
2874:   PetscReal      *basisI, *basisDerI, *basisJ, *basisDerJ;
2875:   PetscInt        NbI = 0, NcI = 0, NbJ = 0, NcJ = 0, numComponents = 0, numComponentsAux = 0;
2876:   PetscInt        dim, f, e;
2877:   PetscErrorCode  ierr;

2880:   PetscFEGetSpatialDimension(fe[fieldI], &dim);
2881:   PetscFEGetDefaultTabulation(fe[fieldI], &basisI, &basisDerI, NULL);
2882:   PetscFEGetDefaultTabulation(fe[fieldJ], &basisJ, &basisDerJ, NULL);
2883:   for (f = 0; f < Nf; ++f) {
2884:     PetscInt Nb, Nc;

2886:     PetscFEGetDimension(fe[f], &Nb);
2887:     PetscFEGetNumComponents(fe[f], &Nc);
2888:     if (f == fieldI) {offsetI = cellDof; NbI = Nb; NcI = Nc;}
2889:     if (f == fieldJ) {offsetJ = cellDof; NbJ = Nb; NcJ = Nc;}
2890:     numComponents += Nc;
2891:     cellDof += Nb*Nc;
2892:   }
2893:   PetscFEGetQuadrature(fe[fieldI], &quad);
2894:   PetscMalloc4(NcI*NcJ,&g0,NcI*NcJ*dim,&g1,NcI*NcJ*dim,&g2,NcI*NcJ*dim*dim,&g3);
2895:   PetscMalloc5(numComponents,&u,numComponents*dim,&gradU,dim,&x,dim,&realSpaceDerI,dim,&realSpaceDerJ);
2896:   for (f = 0; f < NfAux; ++f) {
2897:     PetscInt Nb, Nc;
2898:     PetscFEGetDimension(feAux[f], &Nb);
2899:     PetscFEGetNumComponents(feAux[f], &Nc);
2900:     numComponentsAux += Nc;
2901:     cellDofAux       += Nb*Nc;
2902:   }
2903:   if (NfAux) {PetscMalloc2(numComponentsAux,&a,numComponentsAux*dim,&gradA);}
2904:   for (e = 0; e < Ne; ++e) {
2905:     const PetscReal  detJ        = geom.detJ[e];
2906:     const PetscReal *v0          = &geom.v0[e*dim];
2907:     const PetscReal *J           = &geom.J[e*dim*dim];
2908:     const PetscReal *invJ        = &geom.invJ[e*dim*dim];
2909:     const PetscInt   Nq          = quad.numPoints;
2910:     const PetscReal *quadPoints  = quad.points;
2911:     const PetscReal *quadWeights = quad.weights;
2912:     PetscInt         f, g, q;

2914:     for (f = 0; f < NbI; ++f) {
2915:       for (g = 0; g < NbJ; ++g) {
2916:         for (q = 0; q < Nq; ++q) {
2917:           PetscInt    fOffset    = 0;          /* Offset into u[] for field_q (like offsetI) */
2918:           PetscInt    dOffset    = cOffset;    /* Offset into coefficients[] for field_q */
2919:           PetscInt    fOffsetAux = 0;          /* Offset into a[] for field_q (like offsetI) */
2920:           PetscInt    dOffsetAux = cOffsetAux; /* Offset into coefficientsAux[] for field_q */
2921:           PetscInt    field_q, d, d2;
2922:           PetscInt    fc, gc, c;

2924:           if (debug) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
2925:           for (d = 0; d < numComponents; ++d)     {u[d]     = 0.0;}
2926:           for (d = 0; d < dim*numComponents; ++d) {gradU[d] = 0.0;}
2927:           for (d = 0; d < dim; ++d) {
2928:             x[d] = v0[d];
2929:             for (d2 = 0; d2 < dim; ++d2) {
2930:               x[d] += J[d*dim+d2]*(quadPoints[q*dim+d2] + 1.0);
2931:             }
2932:           }
2933:           for (field_q = 0; field_q < Nf; ++field_q) {
2934:             PetscReal *basis, *basisDer;
2935:             PetscInt   Nb, Ncomp, b, comp;

2937:             PetscFEGetDimension(fe[field_q], &Nb);
2938:             PetscFEGetNumComponents(fe[field_q], &Ncomp);
2939:             PetscFEGetDefaultTabulation(fe[field_q], &basis, &basisDer, NULL);
2940:             for (b = 0; b < Nb; ++b) {
2941:               for (comp = 0; comp < Ncomp; ++comp) {
2942:                 const PetscInt cidx = b*Ncomp+comp;
2943:                 PetscInt       d1, d2;

2945:                 u[fOffset+comp] += coefficients[dOffset+cidx]*basis[q*Nb*Ncomp+cidx];
2946:                 for (d1 = 0; d1 < dim; ++d1) {
2947:                   realSpaceDerI[d1] = 0.0;
2948:                   for (d2 = 0; d2 < dim; ++d2) {
2949:                     realSpaceDerI[d1] += invJ[d2*dim+d1]*basisDer[(q*Nb*Ncomp+cidx)*dim+d2];
2950:                   }
2951:                   gradU[(fOffset+comp)*dim+d1] += coefficients[dOffset+cidx]*realSpaceDerI[d1];
2952:                 }
2953:               }
2954:             }
2955:             if (debug > 1) {
2956:               for (comp = 0; comp < Ncomp; ++comp) {
2957:                 PetscPrintf(PETSC_COMM_SELF, "    u[%d,%d]: %g\n", f, comp, PetscRealPart(u[fOffset+comp]));
2958:                 for (d = 0; d < dim; ++d) {
2959:                   PetscPrintf(PETSC_COMM_SELF, "    gradU[%d,%d]_%c: %g\n", f, comp, 'x'+d, PetscRealPart(gradU[(fOffset+comp)*dim+d]));
2960:                 }
2961:               }
2962:             }
2963:             fOffset += Ncomp;
2964:             dOffset += Nb*Ncomp;
2965:           }
2966:           for (d = 0; d < numComponentsAux; ++d)       {a[d]     = 0.0;}
2967:           for (d = 0; d < dim*(numComponentsAux); ++d) {gradA[d] = 0.0;}
2968:           for (field_q = 0; field_q < NfAux; ++field_q) {
2969:             PetscReal *basis, *basisDer;
2970:             PetscInt   Nb, Ncomp, b, comp;

2972:             PetscFEGetDimension(feAux[field_q], &Nb);
2973:             PetscFEGetNumComponents(feAux[field_q], &Ncomp);
2974:             PetscFEGetDefaultTabulation(feAux[field_q], &basis, &basisDer, NULL);
2975:             for (b = 0; b < Nb; ++b) {
2976:               for (comp = 0; comp < Ncomp; ++comp) {
2977:                 const PetscInt cidx = b*Ncomp+comp;
2978:                 PetscInt       d1, d2;

2980:                 a[fOffsetAux+comp] += coefficientsAux[dOffsetAux+cidx]*basis[q*Nb*Ncomp+cidx];
2981:                 for (d1 = 0; d1 < dim; ++d1) {
2982:                   realSpaceDerI[d1] = 0.0;
2983:                   for (d2 = 0; d2 < dim; ++d2) {
2984:                     realSpaceDerI[d1] += invJ[d2*dim+d1]*basisDer[(q*Nb*Ncomp+cidx)*dim+d2];
2985:                   }
2986:                   gradA[(fOffsetAux+comp)*dim+d1] += coefficientsAux[dOffsetAux+cidx]*realSpaceDerI[d1];
2987:                 }
2988:               }
2989:             }
2990:             if (debug > 1) {
2991:               for (comp = 0; comp < Ncomp; ++comp) {
2992:                 PetscPrintf(PETSC_COMM_SELF, "    a[%d,%d]: %g\n", f, comp, PetscRealPart(a[fOffsetAux+comp]));
2993:                 for (d = 0; d < dim; ++d) {
2994:                   PetscPrintf(PETSC_COMM_SELF, "    gradA[%d,%d]_%c: %g\n", f, comp, 'x'+d, PetscRealPart(gradA[(fOffsetAux+comp)*dim+d]));
2995:                 }
2996:               }
2997:             }
2998:             fOffsetAux += Ncomp;
2999:             dOffsetAux += Nb*Ncomp;
3000:           }

3002:           PetscMemzero(g0, NcI*NcJ         * sizeof(PetscScalar));
3003:           PetscMemzero(g1, NcI*NcJ*dim     * sizeof(PetscScalar));
3004:           PetscMemzero(g2, NcI*NcJ*dim     * sizeof(PetscScalar));
3005:           PetscMemzero(g3, NcI*NcJ*dim*dim * sizeof(PetscScalar));
3006:           if (g0_func) {
3007:             g0_func(u, gradU, a, gradA, x, g0);
3008:             for (c = 0; c < NcI*NcJ; ++c) {g0[c] *= detJ*quadWeights[q];}
3009:           }
3010:           if (g1_func) {
3011:             g1_func(u, gradU, a, gradA, x, g1);
3012:             for (c = 0; c < NcI*NcJ*dim; ++c) {g1[c] *= detJ*quadWeights[q];}
3013:           }
3014:           if (g2_func) {
3015:             g2_func(u, gradU, a, gradA, x, g2);
3016:             for (c = 0; c < NcI*NcJ*dim; ++c) {g2[c] *= detJ*quadWeights[q];}
3017:           }
3018:           if (g3_func) {
3019:             g3_func(u, gradU, a, gradA, x, g3);
3020:             for (c = 0; c < NcI*NcJ*dim*dim; ++c) {g3[c] *= detJ*quadWeights[q];}
3021:           }

3023:           for (fc = 0; fc < NcI; ++fc) {
3024:             const PetscInt fidx = f*NcI+fc; /* Test function basis index */
3025:             const PetscInt i    = offsetI+fidx; /* Element matrix row */
3026:             for (gc = 0; gc < NcJ; ++gc) {
3027:               const PetscInt gidx = g*NcJ+gc; /* Trial function basis index */
3028:               const PetscInt j    = offsetJ+gidx; /* Element matrix column */
3029:               PetscInt       d, d2;

3031:               for (d = 0; d < dim; ++d) {
3032:                 realSpaceDerI[d] = 0.0;
3033:                 realSpaceDerJ[d] = 0.0;
3034:                 for (d2 = 0; d2 < dim; ++d2) {
3035:                   realSpaceDerI[d] += invJ[d2*dim+d]*basisDerI[(q*NbI*NcI+fidx)*dim+d2];
3036:                   realSpaceDerJ[d] += invJ[d2*dim+d]*basisDerJ[(q*NbJ*NcJ+gidx)*dim+d2];
3037:                 }
3038:               }
3039:               elemMat[eOffset+i*cellDof+j] += basisI[q*NbI*NcI+fidx]*g0[fc*NcJ+gc]*basisJ[q*NbJ*NcJ+gidx];
3040:               for (d = 0; d < dim; ++d) {
3041:                 elemMat[eOffset+i*cellDof+j] += basisI[q*NbI*NcI+fidx]*g1[(fc*NcJ+gc)*dim+d]*realSpaceDerJ[d];
3042:                 elemMat[eOffset+i*cellDof+j] += realSpaceDerI[d]*g2[(fc*NcJ+gc)*dim+d]*basisJ[q*NbJ*NcJ+gidx];
3043:                 for (d2 = 0; d2 < dim; ++d2) {
3044:                   elemMat[eOffset+i*cellDof+j] += realSpaceDerI[d]*g3[((fc*NcJ+gc)*dim+d)*dim+d2]*realSpaceDerJ[d2];
3045:                 }
3046:               }
3047:             }
3048:           }
3049:         }
3050:       }
3051:     }
3052:     if (debug > 1) {
3053:       PetscInt fc, f, gc, g;

3055:       PetscPrintf(PETSC_COMM_SELF, "Element matrix for fields %d and %d\n", fieldI, fieldJ);
3056:       for (fc = 0; fc < NcI; ++fc) {
3057:         for (f = 0; f < NbI; ++f) {
3058:           const PetscInt i = offsetI + f*NcI+fc;
3059:           for (gc = 0; gc < NcJ; ++gc) {
3060:             for (g = 0; g < NbJ; ++g) {
3061:               const PetscInt j = offsetJ + g*NcJ+gc;
3062:               PetscPrintf(PETSC_COMM_SELF, "    elemMat[%d,%d,%d,%d]: %g\n", f, fc, g, gc, PetscRealPart(elemMat[eOffset+i*cellDof+j]));
3063:             }
3064:           }
3065:           PetscPrintf(PETSC_COMM_SELF, "\n");
3066:         }
3067:       }
3068:     }
3069:     cOffset    += cellDof;
3070:     cOffsetAux += cellDofAux;
3071:     eOffset    += cellDof*cellDof;
3072:   }
3073:   PetscFree4(g0,g1,g2,g3);
3074:   PetscFree5(u,gradU,x,realSpaceDerI,realSpaceDerJ);
3075:   if (NfAux) {PetscFree2(a,gradA);}
3076:   return(0);
3077: }

3081: PetscErrorCode PetscFEInitialize_Basic(PetscFE fem)
3082: {
3084:   fem->ops->setfromoptions          = NULL;
3085:   fem->ops->setup                   = NULL;
3086:   fem->ops->view                    = NULL;
3087:   fem->ops->destroy                 = PetscFEDestroy_Basic;
3088:   fem->ops->integrateresidual       = PetscFEIntegrateResidual_Basic;
3089:   fem->ops->integratebdresidual     = PetscFEIntegrateBdResidual_Basic;
3090:   fem->ops->integratejacobianaction = NULL/* PetscFEIntegrateJacobianAction_Basic */;
3091:   fem->ops->integratejacobian       = PetscFEIntegrateJacobian_Basic;
3092:   return(0);
3093: }

3095: /*MC
3096:   PETSCFEBASIC = "basic" - A PetscFE object that integrates with basic tiling and no vectorization

3098:   Level: intermediate

3100: .seealso: PetscFEType, PetscFECreate(), PetscFESetType()
3101: M*/

3105: PETSC_EXTERN PetscErrorCode PetscFECreate_Basic(PetscFE fem)
3106: {
3107:   PetscFE_Basic *b;

3112:   PetscNewLog(fem,&b);
3113:   fem->data = b;

3115:   PetscFEInitialize_Basic(fem);
3116:   return(0);
3117: }

3121: PetscErrorCode PetscFEDestroy_Nonaffine(PetscFE fem)
3122: {
3123:   PetscFE_Nonaffine *na = (PetscFE_Nonaffine *) fem->data;

3127:   PetscFree(na);
3128:   return(0);
3129: }

3133: PetscErrorCode PetscFEIntegrateResidual_Nonaffine(PetscFE fem, PetscInt Ne, PetscInt Nf, PetscFE fe[], PetscInt field, PetscCellGeometry geom, const PetscScalar coefficients[],
3134:                                                   PetscInt NfAux, PetscFE feAux[], const PetscScalar coefficientsAux[],
3135:                                                   void (*f0_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f0[]),
3136:                                                   void (*f1_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f1[]),
3137:                                                   PetscScalar elemVec[])
3138: {
3139:   const PetscInt  debug = 0;
3140:   PetscQuadrature quad;
3141:   PetscScalar    *f0, *f1, *u, *gradU, *a, *gradA;
3142:   PetscReal      *x, *realSpaceDer;
3143:   PetscInt        dim, numComponents = 0, numComponentsAux = 0, cOffset = 0, cOffsetAux = 0, eOffset = 0, e, f;
3144:   PetscErrorCode  ierr;

3147:   PetscFEGetSpatialDimension(fe[0], &dim);
3148:   for (f = 0; f < Nf; ++f) {
3149:     PetscInt Nc;
3150:     PetscFEGetNumComponents(fe[f], &Nc);
3151:     numComponents += Nc;
3152:   }
3153:   PetscFEGetQuadrature(fe[field], &quad);
3154:   PetscMalloc6(quad.numPoints*dim,&f0,quad.numPoints*dim*dim,&f1,numComponents,&u,numComponents*dim,&gradU,dim,&x,dim,&realSpaceDer);
3155:   for (f = 0; f < NfAux; ++f) {
3156:     PetscInt Nc;
3157:     PetscFEGetNumComponents(feAux[f], &Nc);
3158:     numComponentsAux += Nc;
3159:   }
3160:   if (NfAux) {PetscMalloc2(numComponentsAux,&a,numComponentsAux*dim,&gradA);}
3161:   for (e = 0; e < Ne; ++e) {
3162:     const PetscInt   Nq          = quad.numPoints;
3163:     const PetscReal *quadPoints  = quad.points;
3164:     const PetscReal *quadWeights = quad.weights;
3165:     PetscInt         q, f;

3167:     for (q = 0; q < Nq; ++q) {
3168:       const PetscReal  detJ    = geom.detJ[e*Nq+q];
3169:       const PetscReal *v0      = &geom.v0[(e*Nq+q)*dim];
3170:       const PetscReal *J       = &geom.J[(e*Nq+q)*dim*dim];
3171:       const PetscReal *invJ    = &geom.invJ[(e*Nq+q)*dim*dim];
3172:       PetscInt         fOffset = 0,       fOffsetAux = 0;
3173:       PetscInt         dOffset = cOffset, dOffsetAux = cOffsetAux;
3174:       PetscInt         Ncomp, d, d2, f, i;

3176:       if (debug) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
3177:       if (debug > 1) {
3178:         PetscPrintf(PETSC_COMM_SELF, "  detJ: %g\n", detJ);
3179: #ifndef PETSC_USE_COMPLEX
3180:         DMPrintCellMatrix(e, "invJ", dim, dim, invJ);
3181: #endif
3182:       }
3183:       PetscFEGetNumComponents(fe[field], &Ncomp);
3184:       for (d = 0; d < numComponents; ++d)       {u[d]     = 0.0;}
3185:       for (d = 0; d < dim*(numComponents); ++d) {gradU[d] = 0.0;}
3186:       for (d = 0; d < dim; ++d) {
3187:         x[d] = v0[d];
3188:         for (d2 = 0; d2 < dim; ++d2) {
3189:           x[d] += J[d*dim+d2]*(quadPoints[q*dim+d2] + 1.0);
3190:         }
3191:       }
3192:       for (f = 0; f < Nf; ++f) {
3193:         PetscReal *basis, *basisDer;
3194:         PetscInt   Nb, Ncomp, b, comp;

3196:         PetscFEGetDimension(fe[f], &Nb);
3197:         PetscFEGetNumComponents(fe[f], &Ncomp);
3198:         PetscFEGetDefaultTabulation(fe[f], &basis, &basisDer, NULL);
3199:         for (b = 0; b < Nb; ++b) {
3200:           for (comp = 0; comp < Ncomp; ++comp) {
3201:             const PetscInt cidx = b*Ncomp+comp;
3202:             PetscInt       d, g;

3204:             u[fOffset+comp] += coefficients[dOffset+cidx]*basis[q*Nb*Ncomp+cidx];
3205:             for (d = 0; d < dim; ++d) {
3206:               realSpaceDer[d] = 0.0;
3207:               for (g = 0; g < dim; ++g) {
3208:                 realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
3209:               }
3210:               gradU[(fOffset+comp)*dim+d] += coefficients[dOffset+cidx]*realSpaceDer[d];
3211:             }
3212:           }
3213:         }
3214:         if (debug > 1) {
3215:           PetscInt d;
3216:           for (comp = 0; comp < Ncomp; ++comp) {
3217:             PetscPrintf(PETSC_COMM_SELF, "    u[%d,%d]: %g\n", f, comp, PetscRealPart(u[fOffset+comp]));
3218:             for (d = 0; d < dim; ++d) {
3219:               PetscPrintf(PETSC_COMM_SELF, "    gradU[%d,%d]_%c: %g\n", f, comp, 'x'+d, PetscRealPart(gradU[(fOffset+comp)*dim+d]));
3220:             }
3221:           }
3222:         }
3223:         fOffset += Ncomp;
3224:         dOffset += Nb*Ncomp;
3225:       }
3226:       for (d = 0; d < numComponentsAux; ++d)       {a[d]     = 0.0;}
3227:       for (d = 0; d < dim*(numComponentsAux); ++d) {gradA[d] = 0.0;}
3228:       for (f = 0; f < NfAux; ++f) {
3229:         PetscReal *basis, *basisDer;
3230:         PetscInt   Nb, Ncomp, b, comp;

3232:         PetscFEGetDimension(feAux[f], &Nb);
3233:         PetscFEGetNumComponents(feAux[f], &Ncomp);
3234:         PetscFEGetDefaultTabulation(feAux[f], &basis, &basisDer, NULL);
3235:         for (b = 0; b < Nb; ++b) {
3236:           for (comp = 0; comp < Ncomp; ++comp) {
3237:             const PetscInt cidx = b*Ncomp+comp;
3238:             PetscInt       d, g;

3240:             a[fOffsetAux+comp] += coefficientsAux[dOffsetAux+cidx]*basis[q*Nb*Ncomp+cidx];
3241:             for (d = 0; d < dim; ++d) {
3242:               realSpaceDer[d] = 0.0;
3243:               for (g = 0; g < dim; ++g) {
3244:                 realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
3245:               }
3246:               gradA[(fOffsetAux+comp)*dim+d] += coefficients[dOffsetAux+cidx]*realSpaceDer[d];
3247:             }
3248:           }
3249:         }
3250:         if (debug > 1) {
3251:           PetscInt d;
3252:           for (comp = 0; comp < Ncomp; ++comp) {
3253:             PetscPrintf(PETSC_COMM_SELF, "    a[%d,%d]: %g\n", f, comp, PetscRealPart(a[fOffsetAux+comp]));
3254:             for (d = 0; d < dim; ++d) {
3255:               PetscPrintf(PETSC_COMM_SELF, "    gradA[%d,%d]_%c: %g\n", f, comp, 'x'+d, PetscRealPart(gradA[(fOffsetAux+comp)*dim+d]));
3256:             }
3257:           }
3258:         }
3259:         fOffsetAux += Ncomp;
3260:         dOffsetAux += Nb*Ncomp;
3261:       }

3263:       f0_func(u, gradU, a, gradA, x, &f0[q*Ncomp]);
3264:       for (i = 0; i < Ncomp; ++i) {
3265:         f0[q*Ncomp+i] *= detJ*quadWeights[q];
3266:       }
3267:       f1_func(u, gradU, a, gradA, x, &f1[q*Ncomp*dim]);
3268:       for (i = 0; i < Ncomp*dim; ++i) {
3269:         f1[q*Ncomp*dim+i] *= detJ*quadWeights[q];
3270:       }
3271:       if (debug > 1) {
3272:         PetscInt c,d;
3273:         for (c = 0; c < Ncomp; ++c) {
3274:           PetscPrintf(PETSC_COMM_SELF, "    f0[%d]: %g\n", c, PetscRealPart(f0[q*Ncomp+c]));
3275:           for (d = 0; d < dim; ++d) {
3276:             PetscPrintf(PETSC_COMM_SELF, "    f1[%d]_%c: %g\n", c, 'x'+d, PetscRealPart(f1[(q*Ncomp + c)*dim+d]));
3277:           }
3278:         }
3279:       }
3280:       if (q == Nq-1) {cOffset = dOffset; cOffsetAux = dOffsetAux;}
3281:     }
3282:     for (f = 0; f < Nf; ++f) {
3283:       PetscInt   Nb, Ncomp, b, comp;

3285:       PetscFEGetDimension(fe[f], &Nb);
3286:       PetscFEGetNumComponents(fe[f], &Ncomp);
3287:       if (f == field) {
3288:         PetscReal *basis;
3289:         PetscReal *basisDer;

3291:         PetscFEGetDefaultTabulation(fe[f], &basis, &basisDer, NULL);
3292:         for (b = 0; b < Nb; ++b) {
3293:           for (comp = 0; comp < Ncomp; ++comp) {
3294:             const PetscInt cidx = b*Ncomp+comp;
3295:             PetscInt       q;

3297:             elemVec[eOffset+cidx] = 0.0;
3298:             for (q = 0; q < Nq; ++q) {
3299:               const PetscReal *invJ = &geom.invJ[(e*Nq+q)*dim*dim];
3300:               PetscInt d, g;

3302:               elemVec[eOffset+cidx] += basis[q*Nb*Ncomp+cidx]*f0[q*Ncomp+comp];
3303:               for (d = 0; d < dim; ++d) {
3304:                 realSpaceDer[d] = 0.0;
3305:                 for (g = 0; g < dim; ++g) {
3306:                   realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
3307:                 }
3308:                 elemVec[eOffset+cidx] += realSpaceDer[d]*f1[(q*Ncomp+comp)*dim+d];
3309:               }
3310:             }
3311:           }
3312:         }
3313:         if (debug > 1) {
3314:           PetscInt b, comp;

3316:           for (b = 0; b < Nb; ++b) {
3317:             for (comp = 0; comp < Ncomp; ++comp) {
3318:               PetscPrintf(PETSC_COMM_SELF, "    elemVec[%d,%d]: %g\n", b, comp, PetscRealPart(elemVec[eOffset+b*Ncomp+comp]));
3319:             }
3320:           }
3321:         }
3322:       }
3323:       eOffset += Nb*Ncomp;
3324:     }
3325:   }
3326:   PetscFree6(f0,f1,u,gradU,x,realSpaceDer);
3327:   if (NfAux) {PetscFree2(a,gradA);}
3328:   return(0);
3329: }

3333: PetscErrorCode PetscFEIntegrateBdResidual_Nonaffine(PetscFE fem, PetscInt Ne, PetscInt Nf, PetscFE fe[], PetscInt field, PetscCellGeometry geom, const PetscScalar coefficients[],
3334:                                                 PetscInt NfAux, PetscFE feAux[], const PetscScalar coefficientsAux[],
3335:                                                 void (*f0_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], const PetscReal n[], PetscScalar f0[]),
3336:                                                 void (*f1_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], const PetscReal n[], PetscScalar f1[]),
3337:                                                 PetscScalar elemVec[])
3338: {
3339:   const PetscInt  debug = 0;
3340:   PetscQuadrature quad;
3341:   PetscScalar    *f0, *f1, *u, *gradU, *a, *gradA;
3342:   PetscReal      *x, *realSpaceDer;
3343:   PetscInt        dim, numComponents = 0, numComponentsAux = 0, cOffset = 0, cOffsetAux = 0, eOffset = 0, e, f;
3344:   PetscErrorCode  ierr;

3347:   PetscFEGetSpatialDimension(fe[0], &dim);
3348:   dim += 1; /* Spatial dimension is one higher than topological dimension */
3349:   for (f = 0; f < Nf; ++f) {
3350:     PetscInt Nc;
3351:     PetscFEGetNumComponents(fe[f], &Nc);
3352:     numComponents += Nc;
3353:   }
3354:   PetscFEGetQuadrature(fe[field], &quad);
3355:   PetscMalloc6(quad.numPoints*dim,&f0,quad.numPoints*dim*dim,&f1,numComponents,&u,numComponents*dim,&gradU,dim,&x,dim,&realSpaceDer);
3356:   for (f = 0; f < NfAux; ++f) {
3357:     PetscInt Nc;
3358:     PetscFEGetNumComponents(feAux[f], &Nc);
3359:     numComponentsAux += Nc;
3360:   }
3361:   if (NfAux) {PetscMalloc2(numComponentsAux,&a,numComponentsAux*dim,&gradA);}
3362:   for (e = 0; e < Ne; ++e) {
3363:     const PetscInt   Nq          = quad.numPoints;
3364:     const PetscReal *quadPoints  = quad.points;
3365:     const PetscReal *quadWeights = quad.weights;
3366:     PetscInt         q, f;

3368:     for (q = 0; q < Nq; ++q) {
3369:       const PetscReal  detJ    = geom.detJ[e*Nq+q];
3370:       const PetscReal *v0      = &geom.v0[(e*Nq+q)*dim];
3371:       const PetscReal *n       = &geom.n[(e*Nq+q)*dim];
3372:       const PetscReal *J       = &geom.J[(e*Nq+q)*dim*dim];
3373:       const PetscReal *invJ    = &geom.invJ[(e*Nq+q)*dim*dim];
3374:       PetscInt         fOffset = 0,       fOffsetAux = 0;
3375:       PetscInt         dOffset = cOffset, dOffsetAux = cOffsetAux;
3376:       PetscInt         Ncomp, d, d2, f, i;
3377:       if (debug) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
3378:       if (debug > 1) {
3379:         PetscPrintf(PETSC_COMM_SELF, "  detJ: %g\n", detJ);
3380: #ifndef PETSC_USE_COMPLEX
3381:         DMPrintCellMatrix(e, "invJ", dim, dim, invJ);
3382: #endif
3383:       }

3385:       PetscFEGetNumComponents(fe[field], &Ncomp);
3386:       for (d = 0; d < numComponents; ++d)       {u[d]     = 0.0;}
3387:       for (d = 0; d < dim*(numComponents); ++d) {gradU[d] = 0.0;}
3388:       for (d = 0; d < dim; ++d) {
3389:         x[d] = v0[d];
3390:         for (d2 = 0; d2 < dim-1; ++d2) {
3391:           x[d] += J[d*dim+d2]*(quadPoints[q*(dim-1)+d2] + 1.0);
3392:         }
3393:       }
3394:       for (f = 0; f < Nf; ++f) {
3395:         PetscReal *basis, *basisDer;
3396:         PetscInt   Nb, Ncomp, b, comp;

3398:         PetscFEGetDimension(fe[f], &Nb);
3399:         PetscFEGetNumComponents(fe[f], &Ncomp);
3400:         PetscFEGetDefaultTabulation(fe[f], &basis, &basisDer, NULL);
3401:         for (b = 0; b < Nb; ++b) {
3402:           for (comp = 0; comp < Ncomp; ++comp) {
3403:             const PetscInt cidx = b*Ncomp+comp;
3404:             PetscInt       d, g;

3406:             u[fOffset+comp] += coefficients[dOffset+cidx]*basis[q*Nb*Ncomp+cidx];
3407:             for (d = 0; d < dim; ++d) {
3408:               realSpaceDer[d] = 0.0;
3409:               for (g = 0; g < dim-1; ++g) {
3410:                 realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
3411:               }
3412:               gradU[(fOffset+comp)*dim+d] += coefficients[dOffset+cidx]*realSpaceDer[d];
3413:             }
3414:           }
3415:         }
3416:         if (debug > 1) {
3417:           PetscInt d;
3418:           for (comp = 0; comp < Ncomp; ++comp) {
3419:             PetscPrintf(PETSC_COMM_SELF, "    u[%d,%d]: %g\n", f, comp, PetscRealPart(u[fOffset+comp]));
3420:             for (d = 0; d < dim; ++d) {
3421:               PetscPrintf(PETSC_COMM_SELF, "    gradU[%d,%d]_%c: %g\n", f, comp, 'x'+d, PetscRealPart(gradU[(fOffset+comp)*dim+d]));
3422:             }
3423:           }
3424:         }
3425:         fOffset += Ncomp;
3426:         dOffset += Nb*Ncomp;
3427:       }
3428:       for (d = 0; d < numComponentsAux; ++d)       {a[d]     = 0.0;}
3429:       for (d = 0; d < dim*(numComponentsAux); ++d) {gradA[d] = 0.0;}
3430:       for (f = 0; f < NfAux; ++f) {
3431:         PetscReal *basis, *basisDer;
3432:         PetscInt   Nb, Ncomp, b, comp;

3434:         PetscFEGetDimension(feAux[f], &Nb);
3435:         PetscFEGetNumComponents(feAux[f], &Ncomp);
3436:         PetscFEGetDefaultTabulation(feAux[f], &basis, &basisDer, NULL);
3437:         for (b = 0; b < Nb; ++b) {
3438:           for (comp = 0; comp < Ncomp; ++comp) {
3439:             const PetscInt cidx = b*Ncomp+comp;
3440:             PetscInt       d, g;

3442:             a[fOffsetAux+comp] += coefficientsAux[dOffsetAux+cidx]*basis[q*Nb*Ncomp+cidx];
3443:             for (d = 0; d < dim; ++d) {
3444:               realSpaceDer[d] = 0.0;
3445:               for (g = 0; g < dim-1; ++g) {
3446:                 realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
3447:               }
3448:               gradA[(fOffsetAux+comp)*dim+d] += coefficients[dOffsetAux+cidx]*realSpaceDer[d];
3449:             }
3450:           }
3451:         }
3452:         if (debug > 1) {
3453:           PetscInt d;
3454:           for (comp = 0; comp < Ncomp; ++comp) {
3455:             PetscPrintf(PETSC_COMM_SELF, "    a[%d,%d]: %g\n", f, comp, PetscRealPart(a[fOffsetAux+comp]));
3456:             for (d = 0; d < dim; ++d) {
3457:               PetscPrintf(PETSC_COMM_SELF, "    gradA[%d,%d]_%c: %g\n", f, comp, 'x'+d, PetscRealPart(gradA[(fOffsetAux+comp)*dim+d]));
3458:             }
3459:           }
3460:         }
3461:         fOffsetAux += Ncomp;
3462:         dOffsetAux += Nb*Ncomp;
3463:       }

3465:       f0_func(u, gradU, a, gradA, x, n, &f0[q*Ncomp]);
3466:       for (i = 0; i < Ncomp; ++i) {
3467:         f0[q*Ncomp+i] *= detJ*quadWeights[q];
3468:       }
3469:       f1_func(u, gradU, a, gradA, x, n, &f1[q*Ncomp*dim]);
3470:       for (i = 0; i < Ncomp*dim; ++i) {
3471:         f1[q*Ncomp*dim+i] *= detJ*quadWeights[q];
3472:       }
3473:       if (debug > 1) {
3474:         PetscInt c,d;
3475:         for (c = 0; c < Ncomp; ++c) {
3476:           PetscPrintf(PETSC_COMM_SELF, "    f0[%d]: %g\n", c, PetscRealPart(f0[q*Ncomp+c]));
3477:           for (d = 0; d < dim; ++d) {
3478:             PetscPrintf(PETSC_COMM_SELF, "    f1[%d]_%c: %g\n", c, 'x'+d, PetscRealPart(f1[(q*Ncomp + c)*dim+d]));
3479:           }
3480:         }
3481:       }
3482:       if (q == Nq-1) {cOffset = dOffset; cOffsetAux = dOffsetAux;}
3483:     }
3484:     for (f = 0; f < Nf; ++f) {
3485:       PetscInt   Nb, Ncomp, b, comp;

3487:       PetscFEGetDimension(fe[f], &Nb);
3488:       PetscFEGetNumComponents(fe[f], &Ncomp);
3489:       if (f == field) {
3490:         PetscReal *basis;
3491:         PetscReal *basisDer;

3493:         PetscFEGetDefaultTabulation(fe[f], &basis, &basisDer, NULL);
3494:         for (b = 0; b < Nb; ++b) {
3495:           for (comp = 0; comp < Ncomp; ++comp) {
3496:             const PetscInt cidx = b*Ncomp+comp;
3497:             PetscInt       q;

3499:             elemVec[eOffset+cidx] = 0.0;
3500:             for (q = 0; q < Nq; ++q) {
3501:               const PetscReal *invJ = &geom.invJ[(e*Nq+q)*dim*dim];
3502:               PetscInt d, g;

3504:               elemVec[eOffset+cidx] += basis[q*Nb*Ncomp+cidx]*f0[q*Ncomp+comp];
3505:               for (d = 0; d < dim; ++d) {
3506:                 realSpaceDer[d] = 0.0;
3507:                 for (g = 0; g < dim-1; ++g) {
3508:                   realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
3509:                 }
3510:                 elemVec[eOffset+cidx] += realSpaceDer[d]*f1[(q*Ncomp+comp)*dim+d];
3511:               }
3512:             }
3513:           }
3514:         }
3515:         if (debug > 1) {
3516:           PetscInt b, comp;

3518:           for (b = 0; b < Nb; ++b) {
3519:             for (comp = 0; comp < Ncomp; ++comp) {
3520:               PetscPrintf(PETSC_COMM_SELF, "    elemVec[%d,%d]: %g\n", b, comp, PetscRealPart(elemVec[eOffset+b*Ncomp+comp]));
3521:             }
3522:           }
3523:         }
3524:       }
3525:       eOffset += Nb*Ncomp;
3526:     }
3527:   }
3528:   PetscFree6(f0,f1,u,gradU,x,realSpaceDer);
3529:   if (NfAux) {PetscFree2(a,gradA);}
3530:   return(0);
3531: }

3535: PetscErrorCode PetscFEIntegrateJacobian_Nonaffine(PetscFE fem, PetscInt Ne, PetscInt Nf, PetscFE fe[], PetscInt fieldI, PetscInt fieldJ, PetscCellGeometry geom, const PetscScalar coefficients[],
3536:                                               PetscInt NfAux, PetscFE feAux[], const PetscScalar coefficientsAux[],
3537:                                               void (*g0_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g0[]),
3538:                                               void (*g1_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g1[]),
3539:                                               void (*g2_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g2[]),
3540:                                               void (*g3_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g3[]),
3541:                                               PetscScalar elemMat[])
3542: {
3543:   const PetscInt  debug      = 0;
3544:   PetscInt        cellDof    = 0; /* Total number of dof on a cell */
3545:   PetscInt        cellDofAux = 0; /* Total number of auxiliary dof on a cell */
3546:   PetscInt        cOffset    = 0; /* Offset into coefficients[] for element e */
3547:   PetscInt        cOffsetAux = 0; /* Offset into coefficientsAux[] for element e */
3548:   PetscInt        eOffset    = 0; /* Offset into elemMat[] for element e */
3549:   PetscInt        offsetI    = 0; /* Offset into an element vector for fieldI */
3550:   PetscInt        offsetJ    = 0; /* Offset into an element vector for fieldJ */
3551:   PetscQuadrature quad;
3552:   PetscScalar    *g0, *g1, *g2, *g3, *u, *gradU, *a, *gradA;
3553:   PetscReal      *x, *realSpaceDerI, *realSpaceDerJ;
3554:   PetscReal      *basisI, *basisDerI, *basisJ, *basisDerJ;
3555:   PetscInt        NbI = 0, NcI = 0, NbJ = 0, NcJ = 0, numComponents = 0, numComponentsAux = 0;
3556:   PetscInt        dim, f, e;
3557:   PetscErrorCode  ierr;

3560:   PetscFEGetSpatialDimension(fe[fieldI], &dim);
3561:   PetscFEGetDefaultTabulation(fe[fieldI], &basisI, &basisDerI, NULL);
3562:   PetscFEGetDefaultTabulation(fe[fieldJ], &basisJ, &basisDerJ, NULL);
3563:   for (f = 0; f < Nf; ++f) {
3564:     PetscInt Nb, Nc;

3566:     PetscFEGetDimension(fe[f], &Nb);
3567:     PetscFEGetNumComponents(fe[f], &Nc);
3568:     if (f == fieldI) {offsetI = cellDof; NbI = Nb; NcI = Nc;}
3569:     if (f == fieldJ) {offsetJ = cellDof; NbJ = Nb; NcJ = Nc;}
3570:     numComponents += Nc;
3571:     cellDof += Nb*Nc;
3572:   }
3573:   PetscFEGetQuadrature(fe[fieldI], &quad);
3574:   PetscMalloc4(NcI*NcJ,&g0,NcI*NcJ*dim,&g1,NcI*NcJ*dim,&g2,NcI*NcJ*dim*dim,&g3);
3575:   PetscMalloc5(numComponents,&u,numComponents*dim,&gradU,dim,&x,dim,&realSpaceDerI,dim,&realSpaceDerJ);
3576:   for (f = 0; f < NfAux; ++f) {
3577:     PetscInt Nb, Nc;
3578:     PetscFEGetDimension(feAux[f], &Nb);
3579:     PetscFEGetNumComponents(feAux[f], &Nc);
3580:     numComponentsAux += Nc;
3581:     cellDofAux       += Nb*Nc;
3582:   }
3583:   if (NfAux) {PetscMalloc2(numComponentsAux,&a,numComponentsAux*dim,&gradA);}
3584:   for (e = 0; e < Ne; ++e) {
3585:     const PetscInt   Nq          = quad.numPoints;
3586:     const PetscReal *quadPoints  = quad.points;
3587:     const PetscReal *quadWeights = quad.weights;
3588:     PetscInt         f, g, q;

3590:     for (f = 0; f < NbI; ++f) {
3591:       for (g = 0; g < NbJ; ++g) {
3592:         for (q = 0; q < Nq; ++q) {
3593:           const PetscReal  detJ    = geom.detJ[e*Nq+q];
3594:           const PetscReal *v0      = &geom.v0[(e*Nq+q)*dim];
3595:           const PetscReal *J       = &geom.J[(e*Nq+q)*dim*dim];
3596:           const PetscReal *invJ    = &geom.invJ[(e*Nq+q)*dim*dim];
3597:           PetscInt    fOffset    = 0;          /* Offset into u[] for field_q (like offsetI) */
3598:           PetscInt    dOffset    = cOffset;    /* Offset into coefficients[] for field_q */
3599:           PetscInt    fOffsetAux = 0;          /* Offset into a[] for field_q (like offsetI) */
3600:           PetscInt    dOffsetAux = cOffsetAux; /* Offset into coefficientsAux[] for field_q */
3601:           PetscInt    field_q, d, d2;
3602:           PetscInt    fc, gc, c;

3604:           if (debug) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
3605:           for (d = 0; d < numComponents; ++d)     {u[d]     = 0.0;}
3606:           for (d = 0; d < dim*numComponents; ++d) {gradU[d] = 0.0;}
3607:           for (d = 0; d < dim; ++d) {
3608:             x[d] = v0[d];
3609:             for (d2 = 0; d2 < dim; ++d2) {
3610:               x[d] += J[d*dim+d2]*(quadPoints[q*dim+d2] + 1.0);
3611:             }
3612:           }
3613:           for (field_q = 0; field_q < Nf; ++field_q) {
3614:             PetscReal *basis, *basisDer;
3615:             PetscInt   Nb, Ncomp, b, comp;

3617:             PetscFEGetDimension(fe[field_q], &Nb);
3618:             PetscFEGetNumComponents(fe[field_q], &Ncomp);
3619:             PetscFEGetDefaultTabulation(fe[field_q], &basis, &basisDer, NULL);
3620:             for (b = 0; b < Nb; ++b) {
3621:               for (comp = 0; comp < Ncomp; ++comp) {
3622:                 const PetscInt cidx = b*Ncomp+comp;
3623:                 PetscInt       d1, d2;

3625:                 u[fOffset+comp] += coefficients[dOffset+cidx]*basis[q*Nb*Ncomp+cidx];
3626:                 for (d1 = 0; d1 < dim; ++d1) {
3627:                   realSpaceDerI[d1] = 0.0;
3628:                   for (d2 = 0; d2 < dim; ++d2) {
3629:                     realSpaceDerI[d1] += invJ[d2*dim+d1]*basisDer[(q*Nb*Ncomp+cidx)*dim+d2];
3630:                   }
3631:                   gradU[(fOffset+comp)*dim+d1] += coefficients[dOffset+cidx]*realSpaceDerI[d1];
3632:                 }
3633:               }
3634:             }
3635:             if (debug > 1) {
3636:               for (comp = 0; comp < Ncomp; ++comp) {
3637:                 PetscPrintf(PETSC_COMM_SELF, "    u[%d,%d]: %g\n", f, comp, PetscRealPart(u[fOffset+comp]));
3638:                 for (d = 0; d < dim; ++d) {
3639:                   PetscPrintf(PETSC_COMM_SELF, "    gradU[%d,%d]_%c: %g\n", f, comp, 'x'+d, PetscRealPart(gradU[(fOffset+comp)*dim+d]));
3640:                 }
3641:               }
3642:             }
3643:             fOffset += Ncomp;
3644:             dOffset += Nb*Ncomp;
3645:           }
3646:           for (d = 0; d < numComponentsAux; ++d)       {a[d]     = 0.0;}
3647:           for (d = 0; d < dim*(numComponentsAux); ++d) {gradA[d] = 0.0;}
3648:           for (field_q = 0; field_q < NfAux; ++field_q) {
3649:             PetscReal *basis, *basisDer;
3650:             PetscInt   Nb, Ncomp, b, comp;

3652:             PetscFEGetDimension(feAux[field_q], &Nb);
3653:             PetscFEGetNumComponents(feAux[field_q], &Ncomp);
3654:             PetscFEGetDefaultTabulation(feAux[field_q], &basis, &basisDer, NULL);
3655:             for (b = 0; b < Nb; ++b) {
3656:               for (comp = 0; comp < Ncomp; ++comp) {
3657:                 const PetscInt cidx = b*Ncomp+comp;
3658:                 PetscInt       d1, d2;

3660:                 a[fOffsetAux+comp] += coefficientsAux[dOffsetAux+cidx]*basis[q*Nb*Ncomp+cidx];
3661:                 for (d1 = 0; d1 < dim; ++d1) {
3662:                   realSpaceDerI[d1] = 0.0;
3663:                   for (d2 = 0; d2 < dim; ++d2) {
3664:                     realSpaceDerI[d1] += invJ[d2*dim+d1]*basisDer[(q*Nb*Ncomp+cidx)*dim+d2];
3665:                   }
3666:                   gradA[(fOffsetAux+comp)*dim+d1] += coefficientsAux[dOffsetAux+cidx]*realSpaceDerI[d1];
3667:                 }
3668:               }
3669:             }
3670:             if (debug > 1) {
3671:               for (comp = 0; comp < Ncomp; ++comp) {
3672:                 PetscPrintf(PETSC_COMM_SELF, "    a[%d,%d]: %g\n", f, comp, PetscRealPart(a[fOffsetAux+comp]));
3673:                 for (d = 0; d < dim; ++d) {
3674:                   PetscPrintf(PETSC_COMM_SELF, "    gradA[%d,%d]_%c: %g\n", f, comp, 'x'+d, PetscRealPart(gradA[(fOffsetAux+comp)*dim+d]));
3675:                 }
3676:               }
3677:             }
3678:             fOffsetAux += Ncomp;
3679:             dOffsetAux += Nb*Ncomp;
3680:           }

3682:           PetscMemzero(g0, NcI*NcJ         * sizeof(PetscScalar));
3683:           PetscMemzero(g1, NcI*NcJ*dim     * sizeof(PetscScalar));
3684:           PetscMemzero(g2, NcI*NcJ*dim     * sizeof(PetscScalar));
3685:           PetscMemzero(g3, NcI*NcJ*dim*dim * sizeof(PetscScalar));
3686:           if (g0_func) {
3687:             g0_func(u, gradU, a, gradA, x, g0);
3688:             for (c = 0; c < NcI*NcJ; ++c) {g0[c] *= detJ*quadWeights[q];}
3689:           }
3690:           if (g1_func) {
3691:             g1_func(u, gradU, a, gradA, x, g1);
3692:             for (c = 0; c < NcI*NcJ*dim; ++c) {g1[c] *= detJ*quadWeights[q];}
3693:           }
3694:           if (g2_func) {
3695:             g2_func(u, gradU, a, gradA, x, g2);
3696:             for (c = 0; c < NcI*NcJ*dim; ++c) {g2[c] *= detJ*quadWeights[q];}
3697:           }
3698:           if (g3_func) {
3699:             g3_func(u, gradU, a, gradA, x, g3);
3700:             for (c = 0; c < NcI*NcJ*dim*dim; ++c) {g3[c] *= detJ*quadWeights[q];}
3701:           }

3703:           for (fc = 0; fc < NcI; ++fc) {
3704:             const PetscInt fidx = f*NcI+fc; /* Test function basis index */
3705:             const PetscInt i    = offsetI+fidx; /* Element matrix row */
3706:             for (gc = 0; gc < NcJ; ++gc) {
3707:               const PetscInt gidx = g*NcJ+gc; /* Trial function basis index */
3708:               const PetscInt j    = offsetJ+gidx; /* Element matrix column */
3709:               PetscInt       d, d2;

3711:               for (d = 0; d < dim; ++d) {
3712:                 realSpaceDerI[d] = 0.0;
3713:                 realSpaceDerJ[d] = 0.0;
3714:                 for (d2 = 0; d2 < dim; ++d2) {
3715:                   realSpaceDerI[d] += invJ[d2*dim+d]*basisDerI[(q*NbI*NcI+fidx)*dim+d2];
3716:                   realSpaceDerJ[d] += invJ[d2*dim+d]*basisDerJ[(q*NbJ*NcJ+gidx)*dim+d2];
3717:                 }
3718:               }
3719:               elemMat[eOffset+i*cellDof+j] += basisI[q*NbI*NcI+fidx]*g0[fc*NcJ+gc]*basisJ[q*NbJ*NcJ+gidx];
3720:               for (d = 0; d < dim; ++d) {
3721:                 elemMat[eOffset+i*cellDof+j] += basisI[q*NbI*NcI+fidx]*g1[(fc*NcJ+gc)*dim+d]*realSpaceDerJ[d];
3722:                 elemMat[eOffset+i*cellDof+j] += realSpaceDerI[d]*g2[(fc*NcJ+gc)*dim+d]*basisJ[q*NbJ*NcJ+gidx];
3723:                 for (d2 = 0; d2 < dim; ++d2) {
3724:                   elemMat[eOffset+i*cellDof+j] += realSpaceDerI[d]*g3[((fc*NcJ+gc)*dim+d)*dim+d2]*realSpaceDerJ[d2];
3725:                 }
3726:               }
3727:             }
3728:           }
3729:         }
3730:       }
3731:     }
3732:     if (debug > 1) {
3733:       PetscInt fc, f, gc, g;

3735:       PetscPrintf(PETSC_COMM_SELF, "Element matrix for fields %d and %d\n", fieldI, fieldJ);
3736:       for (fc = 0; fc < NcI; ++fc) {
3737:         for (f = 0; f < NbI; ++f) {
3738:           const PetscInt i = offsetI + f*NcI+fc;
3739:           for (gc = 0; gc < NcJ; ++gc) {
3740:             for (g = 0; g < NbJ; ++g) {
3741:               const PetscInt j = offsetJ + g*NcJ+gc;
3742:               PetscPrintf(PETSC_COMM_SELF, "    elemMat[%d,%d,%d,%d]: %g\n", f, fc, g, gc, PetscRealPart(elemMat[eOffset+i*cellDof+j]));
3743:             }
3744:           }
3745:           PetscPrintf(PETSC_COMM_SELF, "\n");
3746:         }
3747:       }
3748:     }
3749:     cOffset    += cellDof;
3750:     cOffsetAux += cellDofAux;
3751:     eOffset    += cellDof*cellDof;
3752:   }
3753:   PetscFree4(g0,g1,g2,g3);
3754:   PetscFree5(u,gradU,x,realSpaceDerI,realSpaceDerJ);
3755:   if (NfAux) {PetscFree2(a,gradA);}
3756:   return(0);
3757: }

3761: PetscErrorCode PetscFEInitialize_Nonaffine(PetscFE fem)
3762: {
3764:   fem->ops->setfromoptions          = NULL;
3765:   fem->ops->setup                   = NULL;
3766:   fem->ops->view                    = NULL;
3767:   fem->ops->destroy                 = PetscFEDestroy_Nonaffine;
3768:   fem->ops->integrateresidual       = PetscFEIntegrateResidual_Nonaffine;
3769:   fem->ops->integratebdresidual     = PetscFEIntegrateBdResidual_Nonaffine;
3770:   fem->ops->integratejacobianaction = NULL/* PetscFEIntegrateJacobianAction_Nonaffine */;
3771:   fem->ops->integratejacobian       = PetscFEIntegrateJacobian_Nonaffine;
3772:   return(0);
3773: }

3775: /*MC
3776:   PETSCFENONAFFINE = "nonaffine" - A PetscFE object that integrates with basic tiling and no vectorization for non-affine mappings

3778:   Level: intermediate

3780: .seealso: PetscFEType, PetscFECreate(), PetscFESetType()
3781: M*/

3785: PETSC_EXTERN PetscErrorCode PetscFECreate_Nonaffine(PetscFE fem)
3786: {
3787:   PetscFE_Nonaffine *na;
3788:   PetscErrorCode     ierr;

3792:   PetscNewLog(fem, &na);
3793:   fem->data = na;

3795:   PetscFEInitialize_Nonaffine(fem);
3796:   return(0);
3797: }

3799: #ifdef PETSC_HAVE_OPENCL

3803: PetscErrorCode PetscFEDestroy_OpenCL(PetscFE fem)
3804: {
3805:   PetscFE_OpenCL *ocl = (PetscFE_OpenCL *) fem->data;
3806:   PetscErrorCode  ierr;

3809:   clReleaseCommandQueue(ocl->queue_id);
3810:   ocl->queue_id = 0;
3811:   clReleaseContext(ocl->ctx_id);
3812:   ocl->ctx_id = 0;
3813:   PetscFree(ocl);
3814:   return(0);
3815: }

3817: #define STRING_ERROR_CHECK(MSG) do { string_tail += count; if (string_tail == end_of_buffer) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, MSG);} while(0)
3818: enum {LAPLACIAN = 0, ELASTICITY = 1};

3822: /* dim     Number of spatial dimensions:          2                   */
3823: /* N_b     Number of basis functions:             generated           */
3824: /* N_{bt}  Number of total basis functions:       N_b * N_{comp}      */
3825: /* N_q     Number of quadrature points:           generated           */
3826: /* N_{bs}  Number of block cells                  LCM(N_b, N_q)       */
3827: /* N_{bst} Number of block cell components        LCM(N_{bt}, N_q)    */
3828: /* N_{bl}  Number of concurrent blocks            generated           */
3829: /* N_t     Number of threads:                     N_{bl} * N_{bs}     */
3830: /* N_{cbc} Number of concurrent basis      cells: N_{bl} * N_q        */
3831: /* N_{cqc} Number of concurrent quadrature cells: N_{bl} * N_b        */
3832: /* N_{sbc} Number of serial     basis      cells: N_{bs} / N_q        */
3833: /* N_{sqc} Number of serial     quadrature cells: N_{bs} / N_b        */
3834: /* N_{cb}  Number of serial cell batches:         input               */
3835: /* N_c     Number of total cells:                 N_{cb}*N_{t}/N_{comp} */
3836: PetscErrorCode PetscFEOpenCLGenerateIntegrationCode(PetscFE fem, char **string_buffer, PetscInt buffer_length, PetscBool useAux, PetscInt N_bl)
3837: {
3838:   PetscFE_OpenCL *ocl = (PetscFE_OpenCL *) fem->data;
3839:   PetscQuadrature q;
3840:   char           *string_tail   = *string_buffer;
3841:   char           *end_of_buffer = *string_buffer + buffer_length;
3842:   char            float_str[]   = "float", double_str[]  = "double";
3843:   char           *numeric_str   = &(float_str[0]);
3844:   PetscInt        op            = ocl->op;
3845:   PetscBool       useField      = PETSC_FALSE;
3846:   PetscBool       useFieldDer   = PETSC_TRUE;
3847:   PetscBool       useFieldAux   = useAux;
3848:   PetscBool       useFieldDerAux= PETSC_FALSE;
3849:   PetscBool       useF0         = PETSC_TRUE;
3850:   PetscBool       useF1         = PETSC_TRUE;
3851:   PetscReal      *basis, *basisDer;
3852:   PetscInt        dim, N_b, N_c, N_q, N_t, p, d, b, c;
3853:   size_t          count;
3854:   PetscErrorCode  ierr;

3857:   PetscFEGetSpatialDimension(fem, &dim);
3858:   PetscFEGetDimension(fem, &N_b);
3859:   PetscFEGetNumComponents(fem, &N_c);
3860:   PetscFEGetQuadrature(fem, &q);
3861:   N_q  = q.numPoints;
3862:   N_t  = N_b * N_c * N_q * N_bl;
3863:   /* Enable device extension for double precision */
3864:   if (ocl->realType == PETSC_DOUBLE) {
3865:     PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
3866: "#if defined(cl_khr_fp64)\n"
3867: "#  pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
3868: "#elif defined(cl_amd_fp64)\n"
3869: "#  pragma OPENCL EXTENSION cl_amd_fp64: enable\n"
3870: "#endif\n",
3871:                               &count);STRING_ERROR_CHECK("Message to short");
3872:     numeric_str  = &(double_str[0]);
3873:   }
3874:   /* Kernel API */
3875:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
3876: "\n"
3877: "__kernel void integrateElementQuadrature(int N_cb, __global %s *coefficients, __global %s *coefficientsAux, __global %s *jacobianInverses, __global %s *jacobianDeterminants, __global %s *elemVec)\n"
3878: "{\n",
3879:                        &count, numeric_str, numeric_str, numeric_str, numeric_str, numeric_str);STRING_ERROR_CHECK("Message to short");
3880:   /* Quadrature */
3881:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
3882: "  /* Quadrature points\n"
3883: "   - (x1,y1,x2,y2,...) */\n"
3884: "  const %s points[%d] = {\n",
3885:                        &count, numeric_str, N_q*dim);STRING_ERROR_CHECK("Message to short");
3886:   for (p = 0; p < N_q; ++p) {
3887:     for (d = 0; d < dim; ++d) {
3888:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "%g,\n", &count, q.points[p*dim+d]);STRING_ERROR_CHECK("Message to short");
3889:     }
3890:   }
3891:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "};\n", &count);STRING_ERROR_CHECK("Message to short");
3892:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
3893: "  /* Quadrature weights\n"
3894: "   - (v1,v2,...) */\n"
3895: "  const %s weights[%d] = {\n",
3896:                        &count, numeric_str, N_q);STRING_ERROR_CHECK("Message to short");
3897:   for (p = 0; p < N_q; ++p) {
3898:     PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "%g,\n", &count, q.weights[p]);STRING_ERROR_CHECK("Message to short");
3899:   }
3900:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "};\n", &count);STRING_ERROR_CHECK("Message to short");
3901:   /* Basis Functions */
3902:   PetscFEGetDefaultTabulation(fem, &basis, &basisDer, NULL);
3903:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
3904: "  /* Nodal basis function evaluations\n"
3905: "    - basis component is fastest varying, the basis function, then point */\n"
3906: "  const %s Basis[%d] = {\n",
3907:                        &count, numeric_str, N_q*N_b*N_c);STRING_ERROR_CHECK("Message to short");
3908:   for (p = 0; p < N_q; ++p) {
3909:     for (b = 0; b < N_b; ++b) {
3910:       for (c = 0; c < N_c; ++c) {
3911:         PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "%g,\n", &count, basis[(p*N_b + b)*N_c + c]);STRING_ERROR_CHECK("Message to short");
3912:       }
3913:     }
3914:   }
3915:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "};\n", &count);STRING_ERROR_CHECK("Message to short");
3916:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
3917: "\n"
3918: "  /* Nodal basis function derivative evaluations,\n"
3919: "      - derivative direction is fastest varying, then basis component, then basis function, then point */\n"
3920: "  const %s%d BasisDerivatives[%d] = {\n",
3921:                        &count, numeric_str, dim, N_q*N_b*N_c);STRING_ERROR_CHECK("Message to short");
3922:   for (p = 0; p < N_q; ++p) {
3923:     for (b = 0; b < N_b; ++b) {
3924:       for (c = 0; c < N_c; ++c) {
3925:         PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "(%s%d)(", &count, numeric_str, dim);STRING_ERROR_CHECK("Message to short");
3926:         for (d = 0; d < dim; ++d) {
3927:           if (d > 0) {
3928:             PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, ", %g", &count, basisDer[((p*N_b + b)*dim + d)*N_c + c]);STRING_ERROR_CHECK("Message to short");
3929:           } else {
3930:             PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "%g", &count, basisDer[((p*N_b + b)*dim + d)*N_c + c]);STRING_ERROR_CHECK("Message to short");
3931:           }
3932:         }
3933:         PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "),\n", &count);STRING_ERROR_CHECK("Message to short");
3934:       }
3935:     }
3936:   }
3937:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "};\n", &count);STRING_ERROR_CHECK("Message to short");
3938:   /* Sizes */
3939:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
3940: "  const int dim    = %d;                           // The spatial dimension\n"
3941: "  const int N_bl   = %d;                           // The number of concurrent blocks\n"
3942: "  const int N_b    = %d;                           // The number of basis functions\n"
3943: "  const int N_comp = %d;                           // The number of basis function components\n"
3944: "  const int N_bt   = N_b*N_comp;                    // The total number of scalar basis functions\n"
3945: "  const int N_q    = %d;                           // The number of quadrature points\n"
3946: "  const int N_bst  = N_bt*N_q;                      // The block size, LCM(N_b*N_comp, N_q), Notice that a block is not processed simultaneously\n"
3947: "  const int N_t    = N_bst*N_bl;                    // The number of threads, N_bst * N_bl\n"
3948: "  const int N_bc   = N_t/N_comp;                    // The number of cells per batch (N_b*N_q*N_bl)\n"
3949: "  const int N_sbc  = N_bst / (N_q * N_comp);\n"
3950: "  const int N_sqc  = N_bst / N_bt;\n"
3951: "  /*const int N_c    = N_cb * N_bc;*/\n"
3952: "\n"
3953: "  /* Calculated indices */\n"
3954: "  /*const int tidx    = get_local_id(0) + get_local_size(0)*get_local_id(1);*/\n"
3955: "  const int tidx    = get_local_id(0);\n"
3956: "  const int blidx   = tidx / N_bst;                  // Block number for this thread\n"
3957: "  const int bidx    = tidx %% N_bt;                   // Basis function mapped to this thread\n"
3958: "  const int cidx    = tidx %% N_comp;                 // Basis component mapped to this thread\n"
3959: "  const int qidx    = tidx %% N_q;                    // Quadrature point mapped to this thread\n"
3960: "  const int blbidx  = tidx %% N_q + blidx*N_q;        // Cell mapped to this thread in the basis phase\n"
3961: "  const int blqidx  = tidx %% N_b + blidx*N_b;        // Cell mapped to this thread in the quadrature phase\n"
3962: "  const int gidx    = get_group_id(1)*get_num_groups(0) + get_group_id(0);\n",
3963:                             &count, dim, N_bl, N_b, N_c, N_q);STRING_ERROR_CHECK("Message to short");
3964:   /* Local memory */
3965:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
3966: "\n"
3967: "  /* Quadrature data */\n"
3968: "  %s                w;                   // $w_q$, Quadrature weight at $x_q$\n"
3969: "  __local %s         phi_i[%d];    //[N_bt*N_q];  // $\\phi_i(x_q)$, Value of the basis function $i$ at $x_q$\n"
3970: "  __local %s%d       phiDer_i[%d]; //[N_bt*N_q];  // $\\frac{\\partial\\phi_i(x_q)}{\\partial x_d}$, Value of the derivative of basis function $i$ in direction $x_d$ at $x_q$\n"
3971: "  /* Geometric data */\n"
3972: "  __local %s        detJ[%d]; //[N_t];           // $|J(x_q)|$, Jacobian determinant at $x_q$\n"
3973: "  __local %s        invJ[%d];//[N_t*dim*dim];   // $J^{-1}(x_q)$, Jacobian inverse at $x_q$\n",
3974:                             &count, numeric_str, numeric_str, N_b*N_c*N_q, numeric_str, dim, N_b*N_c*N_q, numeric_str, N_t,
3975:                             numeric_str, N_t*dim*dim, numeric_str, N_t*N_b*N_c);STRING_ERROR_CHECK("Message to short");
3976:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
3977: "  /* FEM data */\n"
3978: "  __local %s        u_i[%d]; //[N_t*N_bt];       // Coefficients $u_i$ of the field $u|_{\\mathcal{T}} = \\sum_i u_i \\phi_i$\n",
3979:                             &count, numeric_str, N_t*N_b*N_c);STRING_ERROR_CHECK("Message to short");
3980:   if (useAux) {
3981:     PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
3982: "  __local %s        a_i[%d]; //[N_t];            // Coefficients $a_i$ of the auxiliary field $a|_{\\mathcal{T}} = \\sum_i a_i \\phi^R_i$\n",
3983:                             &count, numeric_str, N_t);STRING_ERROR_CHECK("Message to short");
3984:   }
3985:   if (useF0) {
3986:     PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
3987: "  /* Intermediate calculations */\n"
3988: "  __local %s         f_0[%d]; //[N_t*N_sqc];      // $f_0(u(x_q), \\nabla u(x_q)) |J(x_q)| w_q$\n",
3989:                               &count, numeric_str, N_t*N_q);STRING_ERROR_CHECK("Message to short");
3990:   }
3991:   if (useF1) {
3992:     PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
3993: "  __local %s%d       f_1[%d]; //[N_t*N_sqc];      // $f_1(u(x_q), \\nabla u(x_q)) |J(x_q)| w_q$\n",
3994:                               &count, numeric_str, dim, N_t*N_q);STRING_ERROR_CHECK("Message to short");
3995:   }
3996:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
3997: "  /* Output data */\n"
3998: "  %s                e_i;                 // Coefficient $e_i$ of the residual\n\n",
3999:                             &count, numeric_str);STRING_ERROR_CHECK("Message to short");
4000:   /* One-time loads */
4001:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4002: "  /* These should be generated inline */\n"
4003: "  /* Load quadrature weights */\n"
4004: "  w = weights[qidx];\n"
4005: "  /* Load basis tabulation \\phi_i for this cell */\n"
4006: "  if (tidx < N_bt*N_q) {\n"
4007: "    phi_i[tidx]    = Basis[tidx];\n"
4008: "    phiDer_i[tidx] = BasisDerivatives[tidx];\n"
4009: "  }\n\n",
4010:                        &count);STRING_ERROR_CHECK("Message to short");
4011:   /* Batch loads */
4012:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4013: "  for (int batch = 0; batch < N_cb; ++batch) {\n"
4014: "    const int Goffset = gidx*N_cb*N_bc;\n"
4015: "    /* Load geometry */\n"
4016: "    detJ[tidx] = jacobianDeterminants[Goffset+batch*N_bc+tidx];\n"
4017: "    for (int n = 0; n < dim*dim; ++n) {\n"
4018: "      const int offset = n*N_t;\n"
4019: "      invJ[offset+tidx] = jacobianInverses[(Goffset+batch*N_bc)*dim*dim+offset+tidx];\n"
4020: "    }\n"
4021: "    /* Load coefficients u_i for this cell */\n"
4022: "    for (int n = 0; n < N_bt; ++n) {\n"
4023: "      const int offset = n*N_t;\n"
4024: "      u_i[offset+tidx] = coefficients[(Goffset*N_bt)+batch*N_t*N_b+offset+tidx];\n"
4025: "    }\n",
4026:                        &count);STRING_ERROR_CHECK("Message to short");
4027:   if (useAux) {
4028:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4029: "    /* Load coefficients a_i for this cell */\n"
4030: "    /* TODO: This should not be N_t here, it should be N_bc*N_comp_aux */\n"
4031: "    a_i[tidx] = coefficientsAux[Goffset+batch*N_t+tidx];\n",
4032:                             &count);STRING_ERROR_CHECK("Message to short");
4033:   }
4034:   /* Quadrature phase */
4035:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4036: "\n"
4037: "    /* Map coefficients to values at quadrature points */\n"
4038: "    for (int c = 0; c < N_sqc; ++c) {\n"
4039: "      const int cell          = c*N_bl*N_b + blqidx;\n"
4040: "      const int fidx          = (cell*N_q + qidx)*N_comp + cidx;\n",
4041:                        &count);STRING_ERROR_CHECK("Message to short");
4042:   if (useField) {
4043:     PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4044: "      %s  u[%d]; //[N_comp];     // $u(x_q)$, Value of the field at $x_q$\n",
4045:                               &count, numeric_str, N_c);STRING_ERROR_CHECK("Message to short");
4046:   }
4047:   if (useFieldDer) {
4048:     PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4049: "      %s%d   gradU[%d]; //[N_comp]; // $\\nabla u(x_q)$, Value of the field gradient at $x_q$\n",
4050:                               &count, numeric_str, dim, N_c);STRING_ERROR_CHECK("Message to short");
4051:   }
4052:   if (useFieldAux) {
4053:     PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4054: "      %s  a[%d]; //[1];     // $a(x_q)$, Value of the auxiliary fields at $x_q$\n",
4055:                               &count, numeric_str, 1);STRING_ERROR_CHECK("Message to short");
4056:   }
4057:   if (useFieldDerAux) {
4058:     PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4059: "      %s%d   gradA[%d]; //[1]; // $\\nabla a(x_q)$, Value of the auxiliary field gradient at $x_q$\n",
4060:                               &count, numeric_str, dim, 1);STRING_ERROR_CHECK("Message to short");
4061:   }
4062:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4063: "\n"
4064: "      for (int comp = 0; comp < N_comp; ++comp) {\n",
4065:                             &count);STRING_ERROR_CHECK("Message to short");
4066:   if (useField) {PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "        u[comp] = 0.0;\n", &count);STRING_ERROR_CHECK("Message to short");}
4067:   if (useFieldDer) {
4068:     switch (dim) {
4069:     case 1:
4070:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "        gradU[comp].x = 0.0;\n", &count);STRING_ERROR_CHECK("Message to short");break;
4071:     case 2:
4072:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "        gradU[comp].x = 0.0; gradU[comp].y = 0.0;\n", &count);STRING_ERROR_CHECK("Message to short");break;
4073:     case 3:
4074:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "        gradU[comp].x = 0.0; gradU[comp].y = 0.0; gradU[comp].z = 0.0;\n", &count);STRING_ERROR_CHECK("Message to short");break;
4075:     }
4076:   }
4077:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4078: "      }\n",
4079:                             &count);STRING_ERROR_CHECK("Message to short");
4080:   if (useFieldAux) {
4081:     PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "      a[0] = 0.0;\n", &count);STRING_ERROR_CHECK("Message to short");
4082:   }
4083:   if (useFieldDerAux) {
4084:     switch (dim) {
4085:     case 1:
4086:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "      gradA[0].x = 0.0;\n", &count);STRING_ERROR_CHECK("Message to short");break;
4087:     case 2:
4088:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "      gradA[0].x = 0.0; gradA[0].y = 0.0;\n", &count);STRING_ERROR_CHECK("Message to short");break;
4089:     case 3:
4090:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "      gradA[0].x = 0.0; gradA[0].y = 0.0; gradA[0].z = 0.0;\n", &count);STRING_ERROR_CHECK("Message to short");break;
4091:     }
4092:   }
4093:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4094: "      /* Get field and derivatives at this quadrature point */\n"
4095: "      for (int i = 0; i < N_b; ++i) {\n"
4096: "        for (int comp = 0; comp < N_comp; ++comp) {\n"
4097: "          const int b    = i*N_comp+comp;\n"
4098: "          const int pidx = qidx*N_bt + b;\n"
4099: "          const int uidx = cell*N_bt + b;\n"
4100: "          %s%d   realSpaceDer;\n\n",
4101:                             &count, numeric_str, dim);STRING_ERROR_CHECK("Message to short");
4102:   if (useField) {PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,"          u[comp] += u_i[uidx]*phi_i[pidx];\n", &count);STRING_ERROR_CHECK("Message to short");}
4103:   if (useFieldDer) {
4104:     switch (dim) {
4105:     case 2:
4106:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4107: "          realSpaceDer.x = invJ[cell*dim*dim+0*dim+0]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+0]*phiDer_i[pidx].y;\n"
4108: "          gradU[comp].x += u_i[uidx]*realSpaceDer.x;\n"
4109: "          realSpaceDer.y = invJ[cell*dim*dim+0*dim+1]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+1]*phiDer_i[pidx].y;\n"
4110: "          gradU[comp].y += u_i[uidx]*realSpaceDer.y;\n",
4111:                            &count);STRING_ERROR_CHECK("Message to short");break;
4112:     case 3:
4113:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4114: "          realSpaceDer.x = invJ[cell*dim*dim+0*dim+0]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+0]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+0]*phiDer_i[pidx].z;\n"
4115: "          gradU[comp].x += u_i[uidx]*realSpaceDer.x;\n"
4116: "          realSpaceDer.y = invJ[cell*dim*dim+0*dim+1]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+1]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+1]*phiDer_i[pidx].z;\n"
4117: "          gradU[comp].y += u_i[uidx]*realSpaceDer.y;\n"
4118: "          realSpaceDer.z = invJ[cell*dim*dim+0*dim+2]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+2]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+2]*phiDer_i[pidx].z;\n"
4119: "          gradU[comp].z += u_i[uidx]*realSpaceDer.z;\n",
4120:                            &count);STRING_ERROR_CHECK("Message to short");break;
4121:     }
4122:   }
4123:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4124: "        }\n"
4125: "      }\n",
4126:                             &count);STRING_ERROR_CHECK("Message to short");
4127:   if (useFieldAux) {
4128:     PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,"          a[0] += a_i[cell];\n", &count);STRING_ERROR_CHECK("Message to short");
4129:   }
4130:   /* Calculate residual at quadrature points: Should be generated by an weak form egine */
4131:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4132: "      /* Process values at quadrature points */\n",
4133:                             &count);STRING_ERROR_CHECK("Message to short");
4134:   switch (op) {
4135:   case LAPLACIAN:
4136:     if (useF0) {PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "      f_0[fidx] = 4.0;\n", &count);STRING_ERROR_CHECK("Message to short");}
4137:     if (useF1) {
4138:       if (useAux) {PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "      f_1[fidx] = a[cell]*gradU[cidx];\n", &count);STRING_ERROR_CHECK("Message to short");}
4139:       else        {PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "      f_1[fidx] = gradU[cidx];\n", &count);STRING_ERROR_CHECK("Message to short");}
4140:     }
4141:     break;
4142:   case ELASTICITY:
4143:     if (useF0) {PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "      f_0[fidx] = 4.0;\n", &count);STRING_ERROR_CHECK("Message to short");}
4144:     if (useF1) {
4145:     switch (dim) {
4146:     case 2:
4147:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4148: "      switch (cidx) {\n"
4149: "      case 0:\n"
4150: "        f_1[fidx].x = 0.5*(gradU[0].x + gradU[0].x);\n"
4151: "        f_1[fidx].y = 0.5*(gradU[0].y + gradU[1].x);\n"
4152: "        break;\n"
4153: "      case 1:\n"
4154: "        f_1[fidx].x = 0.5*(gradU[1].x + gradU[0].y);\n"
4155: "        f_1[fidx].y = 0.5*(gradU[1].y + gradU[1].y);\n"
4156: "      }\n",
4157:                            &count);STRING_ERROR_CHECK("Message to short");break;
4158:     case 3:
4159:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4160: "      switch (cidx) {\n"
4161: "      case 0:\n"
4162: "        f_1[fidx].x = 0.5*(gradU[0].x + gradU[0].x);\n"
4163: "        f_1[fidx].y = 0.5*(gradU[0].y + gradU[1].x);\n"
4164: "        f_1[fidx].z = 0.5*(gradU[0].z + gradU[2].x);\n"
4165: "        break;\n"
4166: "      case 1:\n"
4167: "        f_1[fidx].x = 0.5*(gradU[1].x + gradU[0].y);\n"
4168: "        f_1[fidx].y = 0.5*(gradU[1].y + gradU[1].y);\n"
4169: "        f_1[fidx].z = 0.5*(gradU[1].y + gradU[2].y);\n"
4170: "        break;\n"
4171: "      case 2:\n"
4172: "        f_1[fidx].x = 0.5*(gradU[2].x + gradU[0].z);\n"
4173: "        f_1[fidx].y = 0.5*(gradU[2].y + gradU[1].z);\n"
4174: "        f_1[fidx].z = 0.5*(gradU[2].y + gradU[2].z);\n"
4175: "      }\n",
4176:                            &count);STRING_ERROR_CHECK("Message to short");break;
4177:     }}
4178:     break;
4179:   default:
4180:     SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_SUP, "PDE operator %d is not supported", op);
4181:   }
4182:   if (useF0) {PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,"      f_0[fidx] *= detJ[cell]*w;\n", &count);STRING_ERROR_CHECK("Message to short");}
4183:   if (useF1) {
4184:     switch (dim) {
4185:     case 1:
4186:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,"      f_1[fidx].x *= detJ[cell]*w;\n", &count);STRING_ERROR_CHECK("Message to short");break;
4187:     case 2:
4188:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,"      f_1[fidx].x *= detJ[cell]*w; f_1[fidx].y *= detJ[cell]*w;\n", &count);STRING_ERROR_CHECK("Message to short");break;
4189:     case 3:
4190:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,"      f_1[fidx].x *= detJ[cell]*w; f_1[fidx].y *= detJ[cell]*w; f_1[fidx].z *= detJ[cell]*w;\n", &count);STRING_ERROR_CHECK("Message to short");break;
4191:     }
4192:   }
4193:   /* Thread transpose */
4194:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4195: "    }\n\n"
4196: "    /* ==== TRANSPOSE THREADS ==== */\n"
4197: "    barrier(CLK_GLOBAL_MEM_FENCE);\n\n",
4198:                        &count);STRING_ERROR_CHECK("Message to short");
4199:   /* Basis phase */
4200:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4201: "    /* Map values at quadrature points to coefficients */\n"
4202: "    for (int c = 0; c < N_sbc; ++c) {\n"
4203: "      const int cell = c*N_bl*N_q + blbidx;\n"
4204: "\n"
4205: "      e_i = 0.0;\n"
4206: "      for (int q = 0; q < N_q; ++q) {\n"
4207: "        const int pidx = q*N_bt + bidx;\n"
4208: "        const int fidx = (cell*N_q + q)*N_comp + cidx;\n"
4209: "        %s%d   realSpaceDer;\n\n",
4210:                        &count, numeric_str, dim);STRING_ERROR_CHECK("Message to short");

4212:   if (useF0) {PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,"        e_i += phi_i[pidx]*f_0[fidx];\n", &count);STRING_ERROR_CHECK("Message to short");}
4213:   if (useF1) {
4214:     switch (dim) {
4215:     case 2:
4216:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4217: "        realSpaceDer.x = invJ[cell*dim*dim+0*dim+0]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+0]*phiDer_i[pidx].y;\n"
4218: "        e_i           += realSpaceDer.x*f_1[fidx].x;\n"
4219: "        realSpaceDer.y = invJ[cell*dim*dim+0*dim+1]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+1]*phiDer_i[pidx].y;\n"
4220: "        e_i           += realSpaceDer.y*f_1[fidx].y;\n",
4221:                            &count);STRING_ERROR_CHECK("Message to short");break;
4222:     case 3:
4223:       PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4224: "        realSpaceDer.x = invJ[cell*dim*dim+0*dim+0]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+0]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+0]*phiDer_i[pidx].z;\n"
4225: "        e_i           += realSpaceDer.x*f_1[fidx].x;\n"
4226: "        realSpaceDer.y = invJ[cell*dim*dim+0*dim+1]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+1]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+1]*phiDer_i[pidx].z;\n"
4227: "        e_i           += realSpaceDer.y*f_1[fidx].y;\n"
4228: "        realSpaceDer.z = invJ[cell*dim*dim+0*dim+2]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+2]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+2]*phiDer_i[pidx].z;\n"
4229: "        e_i           += realSpaceDer.z*f_1[fidx].z;\n",
4230:                            &count);STRING_ERROR_CHECK("Message to short");break;
4231:     }
4232:   }
4233:   PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
4234: "      }\n"
4235: "      /* Write element vector for N_{cbc} cells at a time */\n"
4236: "      elemVec[(gidx*N_cb*N_bc*N_bt)+(batch*N_sbc+c)*N_t+tidx] = e_i;\n"
4237: "    }\n"
4238: "    /* ==== Could do one write per batch ==== */\n"
4239: "  }\n"
4240: "  return;\n"
4241: "}\n",
4242:                        &count);STRING_ERROR_CHECK("Message to short");
4243:   return(0);
4244: }

4248: PetscErrorCode PetscFEOpenCLGetIntegrationKernel(PetscFE fem, PetscBool useAux, cl_program *ocl_prog, cl_kernel *ocl_kernel)
4249: {
4250:   PetscFE_OpenCL *ocl = (PetscFE_OpenCL *) fem->data;
4251:   PetscInt        dim, N_bl;
4252:   char           *buffer;
4253:   size_t          len;
4254:   char            errMsg[8192];
4255:   cl_int          ierr2;
4256:   PetscErrorCode  ierr;

4259:   PetscFEGetSpatialDimension(fem, &dim);
4260:   PetscMalloc1(8192, &buffer);
4261:   PetscFEGetTileSizes(fem, NULL, &N_bl, NULL, NULL);
4262:   PetscFEOpenCLGenerateIntegrationCode(fem, &buffer, 8192, useAux, N_bl);
4263:   len  = strlen(buffer);
4264:   *ocl_prog = clCreateProgramWithSource(ocl->ctx_id, 1, (const char **) &buffer, &len, &ierr2);CHKERRQ(ierr2);
4265:   clBuildProgram(*ocl_prog, 0, NULL, NULL, NULL, NULL);
4266:   if (ierr != CL_SUCCESS) {
4267:     clGetProgramBuildInfo(*ocl_prog, ocl->dev_id, CL_PROGRAM_BUILD_LOG, 8192*sizeof(char), &errMsg, NULL);
4268:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Build failed! Log:\n %s", errMsg);
4269:   }
4270:   PetscFree(buffer);
4271:   *ocl_kernel = clCreateKernel(*ocl_prog, "integrateElementQuadrature", &ierr);
4272:   return(0);
4273: }

4277: PetscErrorCode PetscFEOpenCLCalculateGrid(PetscFE fem, PetscInt N, PetscInt blockSize, size_t *x, size_t *y, size_t *z)
4278: {
4279:   const PetscInt Nblocks = N/blockSize;

4282:   if (N % blockSize) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Invalid block size %d for %d elements", blockSize, N);
4283:   *z = 1;
4284:   for (*x = (size_t) (PetscSqrtReal(Nblocks) + 0.5); *x > 0; --*x) {
4285:     *y = Nblocks / *x;
4286:     if (*x * *y == Nblocks) break;
4287:   }
4288:   if (*x * *y != Nblocks) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Could not find partition for %d with block size %d", N, blockSize);
4289:   return(0);
4290: }

4294: PetscErrorCode PetscFEOpenCLLogResidual(PetscFE fem, PetscLogDouble time, PetscLogDouble flops)
4295: {
4296:   PetscFE_OpenCL   *ocl = (PetscFE_OpenCL *) fem->data;
4297:   PetscStageLog     stageLog;
4298:   PetscEventPerfLog eventLog = NULL;
4299:   PetscInt          stage;
4300:   PetscErrorCode    ierr;

4303:   PetscLogGetStageLog(&stageLog);
4304:   PetscStageLogGetCurrent(stageLog, &stage);
4305:   PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog);
4306:     /* Log performance info */
4307:   eventLog->eventInfo[ocl->residualEvent].count++;
4308:   eventLog->eventInfo[ocl->residualEvent].time  += time;
4309:   eventLog->eventInfo[ocl->residualEvent].flops += flops;
4310:   return(0);
4311: }

4315: PetscErrorCode PetscFEIntegrateResidual_OpenCL(PetscFE fem, PetscInt Ne, PetscInt Nf, PetscFE fe[], PetscInt field, PetscCellGeometry geom, const PetscScalar coefficients[],
4316:                                                PetscInt NfAux, PetscFE feAux[], const PetscScalar coefficientsAux[],
4317:                                                void (*f0_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f0[]),
4318:                                                void (*f1_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f1[]),
4319:                                                PetscScalar elemVec[])
4320: {
4321:   /* Nbc = batchSize */
4322:   PetscFE_OpenCL   *ocl = (PetscFE_OpenCL *) fem->data;
4323:   PetscQuadrature   q;
4324:   PetscInt          dim;
4325:   PetscInt          N_b;    /* The number of basis functions */
4326:   PetscInt          N_comp; /* The number of basis function components */
4327:   PetscInt          N_bt;   /* The total number of scalar basis functions */
4328:   PetscInt          N_q;    /* The number of quadrature points */
4329:   PetscInt          N_bst;  /* The block size, LCM(N_bt, N_q), Notice that a block is not process simultaneously */
4330:   PetscInt          N_t;    /* The number of threads, N_bst * N_bl */
4331:   PetscInt          N_bl;   /* The number of blocks */
4332:   PetscInt          N_bc;   /* The batch size, N_bl*N_q*N_b */
4333:   PetscInt          N_cb;   /* The number of batches */
4334:   PetscInt          numFlops, f0Flops, f1Flops;
4335:   PetscBool         useAux      = coefficientsAux ? PETSC_TRUE : PETSC_FALSE;
4336:   PetscBool         useField    = PETSC_FALSE;
4337:   PetscBool         useFieldDer = PETSC_TRUE;
4338:   PetscBool         useF0       = PETSC_TRUE;
4339:   PetscBool         useF1       = PETSC_TRUE;
4340:   /* OpenCL variables */
4341:   cl_program        ocl_prog;
4342:   cl_kernel         ocl_kernel;
4343:   cl_event          ocl_ev;         /* The event for tracking kernel execution */
4344:   cl_ulong          ns_start;       /* Nanoseconds counter on GPU at kernel start */
4345:   cl_ulong          ns_end;         /* Nanoseconds counter on GPU at kernel stop */
4346:   cl_mem            o_jacobianInverses, o_jacobianDeterminants;
4347:   cl_mem            o_coefficients, o_coefficientsAux, o_elemVec;
4348:   float            *f_coeff, *f_coeffAux, *f_invJ, *f_detJ;
4349:   double           *d_coeff, *d_coeffAux, *d_invJ, *d_detJ;
4350:   void             *oclCoeff, *oclCoeffAux, *oclInvJ, *oclDetJ;
4351:   size_t            local_work_size[3], global_work_size[3];
4352:   size_t            realSize, x, y, z;
4353:   PetscErrorCode    ierr;

4356:   if (!Ne) {PetscFEOpenCLLogResidual(fem, 0.0, 0.0); return(0);}
4357:   PetscFEGetSpatialDimension(fem, &dim);
4358:   PetscFEGetDimension(fem, &N_b);
4359:   PetscFEGetNumComponents(fem, &N_comp);
4360:   PetscFEGetQuadrature(fem, &q);
4361:   PetscFEGetTileSizes(fem, NULL, &N_bl, &N_bc, &N_cb);
4362:   N_bt  = N_b*N_comp;
4363:   N_q   = q.numPoints;
4364:   N_bst = N_bt*N_q;
4365:   N_t   = N_bst*N_bl;
4366:   if (N_bc*N_comp != N_t) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of threads %d should be %d * %d", N_t, N_bc, N_comp);
4367:   /* Calculate layout */
4368:   if (Ne % (N_cb*N_bc)) { /* Remainder cells */
4369:     PetscFEIntegrateResidual_Basic(fem, Ne, Nf, fe, field, geom, coefficients, NfAux, feAux, coefficientsAux, f0_func, f1_func, elemVec);
4370:     return(0);
4371:   }
4372:   PetscFEOpenCLCalculateGrid(fem, Ne, N_cb*N_bc, &x, &y, &z);
4373:   local_work_size[0]  = N_bc*N_comp;
4374:   local_work_size[1]  = 1;
4375:   local_work_size[2]  = 1;
4376:   global_work_size[0] = x * local_work_size[0];
4377:   global_work_size[1] = y * local_work_size[1];
4378:   global_work_size[2] = z * local_work_size[2];
4379:   PetscInfo7(fem, "GPU layout grid(%d,%d,%d) block(%d,%d,%d) with %d batches\n", x, y, z, local_work_size[0], local_work_size[1], local_work_size[2], N_cb);
4380:   PetscInfo2(fem, " N_t: %d, N_cb: %d\n", N_t, N_cb);
4381:   /* Generate code */
4382:   if (NfAux) {
4383:     PetscSpace P;
4384:     PetscInt   order, f;

4386:     for (f = 0; f < NfAux; ++f) {
4387:       PetscFEGetBasisSpace(feAux[f], &P);
4388:       PetscSpaceGetOrder(P, &order);
4389:       if (order > 0) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Can only handle P0 coefficient fields");
4390:     }
4391:   }
4392:   PetscFEOpenCLGetIntegrationKernel(fem, useAux, &ocl_prog, &ocl_kernel);
4393:   /* Create buffers on the device and send data over */
4394:   PetscDataTypeGetSize(ocl->realType, &realSize);
4395:   if (sizeof(PetscReal) != realSize) {
4396:     switch (ocl->realType) {
4397:     case PETSC_FLOAT:
4398:     {
4399:       PetscInt c, b, d;

4401:       PetscMalloc4(Ne*N_bt,&f_coeff,Ne,&f_coeffAux,Ne*dim*dim,&f_invJ,Ne,&f_detJ);
4402:       for (c = 0; c < Ne; ++c) {
4403:         f_detJ[c] = (float) geom.detJ[c];
4404:         for (d = 0; d < dim*dim; ++d) {
4405:           f_invJ[c*dim*dim+d] = (float) geom.invJ[c*dim*dim+d];
4406:         }
4407:         for (b = 0; b < N_bt; ++b) {
4408:           f_coeff[c*N_bt+b] = (float) coefficients[c*N_bt+b];
4409:         }
4410:       }
4411:       if (coefficientsAux) { /* Assume P0 */
4412:         for (c = 0; c < Ne; ++c) {
4413:           f_coeffAux[c] = (float) coefficientsAux[c];
4414:         }
4415:       }
4416:       oclCoeff      = (void *) f_coeff;
4417:       if (coefficientsAux) {
4418:         oclCoeffAux = (void *) f_coeffAux;
4419:       } else {
4420:         oclCoeffAux = NULL;
4421:       }
4422:       oclInvJ       = (void *) f_invJ;
4423:       oclDetJ       = (void *) f_detJ;
4424:     }
4425:     break;
4426:     case PETSC_DOUBLE:
4427:     {
4428:       PetscInt c, b, d;

4430:       PetscMalloc4(Ne*N_bt,&d_coeff,Ne,&d_coeffAux,Ne*dim*dim,&d_invJ,Ne,&d_detJ);
4431:       for (c = 0; c < Ne; ++c) {
4432:         d_detJ[c] = (double) geom.detJ[c];
4433:         for (d = 0; d < dim*dim; ++d) {
4434:           d_invJ[c*dim*dim+d] = (double) geom.invJ[c*dim*dim+d];
4435:         }
4436:         for (b = 0; b < N_bt; ++b) {
4437:           d_coeff[c*N_bt+b] = (double) coefficients[c*N_bt+b];
4438:         }
4439:       }
4440:       if (coefficientsAux) { /* Assume P0 */
4441:         for (c = 0; c < Ne; ++c) {
4442:           d_coeffAux[c] = (double) coefficientsAux[c];
4443:         }
4444:       }
4445:       oclCoeff      = (void *) d_coeff;
4446:       if (coefficientsAux) {
4447:         oclCoeffAux = (void *) d_coeffAux;
4448:       } else {
4449:         oclCoeffAux = NULL;
4450:       }
4451:       oclInvJ       = (void *) d_invJ;
4452:       oclDetJ       = (void *) d_detJ;
4453:     }
4454:     break;
4455:     default:
4456:       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unsupported PETSc type %d", ocl->realType);
4457:     }
4458:   } else {
4459:     oclCoeff    = (void *) coefficients;
4460:     oclCoeffAux = (void *) coefficientsAux;
4461:     oclInvJ     = (void *) geom.invJ;
4462:     oclDetJ     = (void *) geom.detJ;
4463:   }
4464:   o_coefficients         = clCreateBuffer(ocl->ctx_id, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Ne*N_bt    * realSize, oclCoeff,    &ierr);
4465:   if (coefficientsAux) {
4466:     o_coefficientsAux    = clCreateBuffer(ocl->ctx_id, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Ne         * realSize, oclCoeffAux, &ierr);
4467:   } else {
4468:     o_coefficientsAux    = clCreateBuffer(ocl->ctx_id, CL_MEM_READ_ONLY,                        Ne         * realSize, oclCoeffAux, &ierr);
4469:   }
4470:   o_jacobianInverses     = clCreateBuffer(ocl->ctx_id, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Ne*dim*dim * realSize, oclInvJ,     &ierr);
4471:   o_jacobianDeterminants = clCreateBuffer(ocl->ctx_id, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Ne         * realSize, oclDetJ,     &ierr);
4472:   o_elemVec              = clCreateBuffer(ocl->ctx_id, CL_MEM_WRITE_ONLY,                       Ne*N_bt    * realSize, NULL,        &ierr);
4473:   /* Kernel launch */
4474:   clSetKernelArg(ocl_kernel, 0, sizeof(cl_int), (void*) &N_cb);
4475:   clSetKernelArg(ocl_kernel, 1, sizeof(cl_mem), (void*) &o_coefficients);
4476:   clSetKernelArg(ocl_kernel, 2, sizeof(cl_mem), (void*) &o_coefficientsAux);
4477:   clSetKernelArg(ocl_kernel, 3, sizeof(cl_mem), (void*) &o_jacobianInverses);
4478:   clSetKernelArg(ocl_kernel, 4, sizeof(cl_mem), (void*) &o_jacobianDeterminants);
4479:   clSetKernelArg(ocl_kernel, 5, sizeof(cl_mem), (void*) &o_elemVec);
4480:   clEnqueueNDRangeKernel(ocl->queue_id, ocl_kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &ocl_ev);
4481:   /* Read data back from device */
4482:   if (sizeof(PetscReal) != realSize) {
4483:     switch (ocl->realType) {
4484:     case PETSC_FLOAT:
4485:     {
4486:       float   *elem;
4487:       PetscInt c, b;

4489:       PetscFree4(f_coeff,f_coeffAux,f_invJ,f_detJ);
4490:       PetscMalloc1(Ne*N_bt, &elem);
4491:       clEnqueueReadBuffer(ocl->queue_id, o_elemVec, CL_TRUE, 0, Ne*N_bt * realSize, elem, 0, NULL, NULL);
4492:       for (c = 0; c < Ne; ++c) {
4493:         for (b = 0; b < N_bt; ++b) {
4494:           elemVec[c*N_bt+b] = (PetscScalar) elem[c*N_bt+b];
4495:         }
4496:       }
4497:       PetscFree(elem);
4498:     }
4499:     break;
4500:     case PETSC_DOUBLE:
4501:     {
4502:       double  *elem;
4503:       PetscInt c, b;

4505:       PetscFree4(d_coeff,d_coeffAux,d_invJ,d_detJ);
4506:       PetscMalloc1(Ne*N_bt, &elem);
4507:       clEnqueueReadBuffer(ocl->queue_id, o_elemVec, CL_TRUE, 0, Ne*N_bt * realSize, elem, 0, NULL, NULL);
4508:       for (c = 0; c < Ne; ++c) {
4509:         for (b = 0; b < N_bt; ++b) {
4510:           elemVec[c*N_bt+b] = (PetscScalar) elem[c*N_bt+b];
4511:         }
4512:       }
4513:       PetscFree(elem);
4514:     }
4515:     break;
4516:     default:
4517:       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unsupported PETSc type %d", ocl->realType);
4518:     }
4519:   } else {
4520:     clEnqueueReadBuffer(ocl->queue_id, o_elemVec, CL_TRUE, 0, Ne*N_bt * realSize, elemVec, 0, NULL, NULL);
4521:   }
4522:   /* Log performance */
4523:   clGetEventProfilingInfo(ocl_ev, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &ns_start, NULL);
4524:   clGetEventProfilingInfo(ocl_ev, CL_PROFILING_COMMAND_END,   sizeof(cl_ulong), &ns_end,   NULL);
4525:   f0Flops = 0;
4526:   switch (ocl->op) {
4527:   case LAPLACIAN:
4528:     f1Flops = useAux ? dim : 0;break;
4529:   case ELASTICITY:
4530:     f1Flops = 2*dim*dim;break;
4531:   }
4532:   numFlops = Ne*(
4533:     N_q*(
4534:       N_b*N_comp*((useField ? 2 : 0) + (useFieldDer ? 2*dim*(dim + 1) : 0))
4535:       /*+
4536:        N_ba*N_compa*((useFieldAux ? 2 : 0) + (useFieldDerAux ? 2*dim*(dim + 1) : 0))*/
4537:       +
4538:       N_comp*((useF0 ? f0Flops + 2 : 0) + (useF1 ? f1Flops + 2*dim : 0)))
4539:     +
4540:     N_b*((useF0 ? 2 : 0) + (useF1 ? 2*dim*(dim + 1) : 0)));
4541:   PetscFEOpenCLLogResidual(fem, (ns_end - ns_start)*1.0e-9, numFlops);
4542:   /* Cleanup */
4543:   clReleaseMemObject(o_coefficients);
4544:   clReleaseMemObject(o_coefficientsAux);
4545:   clReleaseMemObject(o_jacobianInverses);
4546:   clReleaseMemObject(o_jacobianDeterminants);
4547:   clReleaseMemObject(o_elemVec);
4548:   clReleaseKernel(ocl_kernel);
4549:   clReleaseProgram(ocl_prog);
4550:   return(0);
4551: }

4555: PetscErrorCode PetscFEInitialize_OpenCL(PetscFE fem)
4556: {
4558:   fem->ops->setfromoptions          = NULL;
4559:   fem->ops->setup                   = NULL;
4560:   fem->ops->view                    = NULL;
4561:   fem->ops->destroy                 = PetscFEDestroy_OpenCL;
4562:   fem->ops->integrateresidual       = PetscFEIntegrateResidual_OpenCL;
4563:   fem->ops->integratebdresidual     = NULL/* PetscFEIntegrateBdResidual_OpenCL */;
4564:   fem->ops->integratejacobianaction = NULL/* PetscFEIntegrateJacobianAction_OpenCL */;
4565:   fem->ops->integratejacobian       = PetscFEIntegrateJacobian_Basic;
4566:   return(0);
4567: }

4569: /*MC
4570:   PETSCFEOPENCL = "opencl" - A PetscFE object that integrates using a vectorized OpenCL implementation

4572:   Level: intermediate

4574: .seealso: PetscFEType, PetscFECreate(), PetscFESetType()
4575: M*/

4579: PETSC_EXTERN PetscErrorCode PetscFECreate_OpenCL(PetscFE fem)
4580: {
4581:   PetscFE_OpenCL *ocl;
4582:   cl_uint         num_platforms;
4583:   cl_platform_id  platform_ids[42];
4584:   cl_uint         num_devices;
4585:   cl_device_id    device_ids[42];
4586:   cl_int          ierr2;
4587:   PetscErrorCode  ierr;

4591:   PetscNewLog(fem,&ocl);
4592:   fem->data = ocl;

4594:   /* Init Platform */
4595:   clGetPlatformIDs(42, platform_ids, &num_platforms);
4596:   if (!num_platforms) SETERRQ(PetscObjectComm((PetscObject) fem), PETSC_ERR_SUP, "No OpenCL platform found.");
4597:   ocl->pf_id = platform_ids[0];
4598:   /* Init Device */
4599:   clGetDeviceIDs(ocl->pf_id, CL_DEVICE_TYPE_ALL, 42, device_ids, &num_devices);
4600:   if (!num_devices) SETERRQ(PetscObjectComm((PetscObject) fem), PETSC_ERR_SUP, "No OpenCL device found.");
4601:   ocl->dev_id = device_ids[0];
4602:   /* Create context with one command queue */
4603:   ocl->ctx_id   = clCreateContext(0, 1, &(ocl->dev_id), NULL, NULL, &ierr2);CHKERRQ(ierr2);
4604:   ocl->queue_id = clCreateCommandQueue(ocl->ctx_id, ocl->dev_id, CL_QUEUE_PROFILING_ENABLE, &ierr2);CHKERRQ(ierr2);
4605:   /* Types */
4606:   ocl->realType = PETSC_FLOAT;
4607:   /* Register events */
4608:   PetscLogEventRegister("OpenCL FEResidual", PETSCFE_CLASSID, &ocl->residualEvent);
4609:   /* Equation handling */
4610:   ocl->op = LAPLACIAN;

4612:   PetscFEInitialize_OpenCL(fem);
4613:   return(0);
4614: }

4618: PetscErrorCode PetscFEOpenCLSetRealType(PetscFE fem, PetscDataType realType)
4619: {
4620:   PetscFE_OpenCL *ocl = (PetscFE_OpenCL *) fem->data;

4624:   ocl->realType = realType;
4625:   return(0);
4626: }

4630: PetscErrorCode PetscFEOpenCLGetRealType(PetscFE fem, PetscDataType *realType)
4631: {
4632:   PetscFE_OpenCL *ocl = (PetscFE_OpenCL *) fem->data;

4637:   *realType = ocl->realType;
4638:   return(0);
4639: }

4641: #endif /* PETSC_HAVE_OPENCL */

4643: /*
4644: Purpose: Compute element vector for chunk of elements

4646: Input:
4647:   Sizes:
4648:      Ne:  number of elements
4649:      Nf:  number of fields
4650:      PetscFE
4651:        dim: spatial dimension
4652:        Nb:  number of basis functions
4653:        Nc:  number of field components
4654:        PetscQuadrature
4655:          Nq:  number of quadrature points

4657:   Geometry:
4658:      PetscCellGeometry
4659:        PetscReal v0s[Ne*dim]
4660:        PetscReal jacobians[Ne*dim*dim]        possibly *Nq
4661:        PetscReal jacobianInverses[Ne*dim*dim] possibly *Nq
4662:        PetscReal jacobianDeterminants[Ne]     possibly *Nq
4663:   FEM:
4664:      PetscFE
4665:        PetscQuadrature
4666:          PetscReal   quadPoints[Nq*dim]
4667:          PetscReal   quadWeights[Nq]
4668:        PetscReal   basis[Nq*Nb*Nc]
4669:        PetscReal   basisDer[Nq*Nb*Nc*dim]
4670:      PetscScalar coefficients[Ne*Nb*Nc]
4671:      PetscScalar elemVec[Ne*Nb*Nc]

4673:   Problem:
4674:      PetscInt f: the active field
4675:      f0, f1

4677:   Work Space:
4678:      PetscFE
4679:        PetscScalar f0[Nq*dim];
4680:        PetscScalar f1[Nq*dim*dim];
4681:        PetscScalar u[Nc];
4682:        PetscScalar gradU[Nc*dim];
4683:        PetscReal   x[dim];
4684:        PetscScalar realSpaceDer[dim];

4686: Purpose: Compute element vector for N_cb batches of elements

4688: Input:
4689:   Sizes:
4690:      N_cb: Number of serial cell batches

4692:   Geometry:
4693:      PetscReal v0s[Ne*dim]
4694:      PetscReal jacobians[Ne*dim*dim]        possibly *Nq
4695:      PetscReal jacobianInverses[Ne*dim*dim] possibly *Nq
4696:      PetscReal jacobianDeterminants[Ne]     possibly *Nq
4697:   FEM:
4698:      static PetscReal   quadPoints[Nq*dim]
4699:      static PetscReal   quadWeights[Nq]
4700:      static PetscReal   basis[Nq*Nb*Nc]
4701:      static PetscReal   basisDer[Nq*Nb*Nc*dim]
4702:      PetscScalar coefficients[Ne*Nb*Nc]
4703:      PetscScalar elemVec[Ne*Nb*Nc]

4705: ex62.c:
4706:   PetscErrorCode PetscFEIntegrateResidualBatch(PetscInt Ne, PetscInt numFields, PetscInt field, PetscQuadrature quad[], const PetscScalar coefficients[],
4707:                                                const PetscReal v0s[], const PetscReal jacobians[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[],
4708:                                                void (*f0_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscReal x[], PetscScalar f0[]),
4709:                                                void (*f1_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscReal x[], PetscScalar f1[]), PetscScalar elemVec[])

4711: ex52.c:
4712:   PetscErrorCode IntegrateLaplacianBatchCPU(PetscInt Ne, PetscInt Nb, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscInt Nq, const PetscReal quadPoints[], const PetscReal quadWeights[], const PetscReal basisTabulation[], const PetscReal basisDerTabulation[], PetscScalar elemVec[], AppCtx *user)
4713:   PetscErrorCode IntegrateElasticityBatchCPU(PetscInt Ne, PetscInt Nb, PetscInt Ncomp, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscInt Nq, const PetscReal quadPoints[], const PetscReal quadWeights[], const PetscReal basisTabulation[], const PetscReal basisDerTabulation[], PetscScalar elemVec[], AppCtx *user)

4715: ex52_integrateElement.cu
4716: __global__ void integrateElementQuadrature(int N_cb, realType *coefficients, realType *jacobianInverses, realType *jacobianDeterminants, realType *elemVec)

4718: PETSC_EXTERN PetscErrorCode IntegrateElementBatchGPU(PetscInt spatial_dim, PetscInt Ne, PetscInt Ncb, PetscInt Nbc, PetscInt Nbl, const PetscScalar coefficients[],
4719:                                                      const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscScalar elemVec[],
4720:                                                      PetscLogEvent event, PetscInt debug, PetscInt pde_op)

4722: ex52_integrateElementOpenCL.c:
4723: PETSC_EXTERN PetscErrorCode IntegrateElementBatchGPU(PetscInt spatial_dim, PetscInt Ne, PetscInt Ncb, PetscInt Nbc, PetscInt N_bl, const PetscScalar coefficients[],
4724:                                                      const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscScalar elemVec[],
4725:                                                      PetscLogEvent event, PetscInt debug, PetscInt pde_op)

4727: __kernel void integrateElementQuadrature(int N_cb, __global float *coefficients, __global float *jacobianInverses, __global float *jacobianDeterminants, __global float *elemVec)
4728: */

4732: /*C
4733:   PetscFEIntegrateResidual - Produce the element residual vector for a chunk of elements by quadrature integration

4735:   Not collective

4737:   Input Parameters:
4738: + fem          - The PetscFE object for the field being integrated
4739: . Ne           - The number of elements in the chunk
4740: . Nf           - The number of physical fields
4741: . fe           - The PetscFE objects for each field
4742: . field        - The field being integrated
4743: . geom         - The cell geometry for each cell in the chunk
4744: . coefficients - The array of FEM basis coefficients for the elements
4745: . NfAux        - The number of auxiliary physical fields
4746: . feAux        - The PetscFE objects for each auxiliary field
4747: . coefficientsAux - The array of FEM auxiliary basis coefficients for the elements
4748: . f0_func      - f_0 function from the first order FEM model
4749: - f1_func      - f_1 function from the first order FEM model

4751:   Output Parameter
4752: . elemVec      - the element residual vectors from each element

4754:    Calling sequence of f0_func and f1_func:
4755: $    void f0(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f0[])

4757:   Note:
4758: $ Loop over batch of elements (e):
4759: $   Loop over quadrature points (q):
4760: $     Make u_q and gradU_q (loops over fields,Nb,Ncomp) and x_q
4761: $     Call f_0 and f_1
4762: $   Loop over element vector entries (f,fc --> i):
4763: $     elemVec[i] += \psi^{fc}_f(q) f0_{fc}(u, \nabla u) + \nabla\psi^{fc}_f(q) \cdot f1_{fc,df}(u, \nabla u)
4764: */
4765: PetscErrorCode PetscFEIntegrateResidual(PetscFE fem, PetscInt Ne, PetscInt Nf, PetscFE fe[], PetscInt field, PetscCellGeometry geom, const PetscScalar coefficients[],
4766:                                         PetscInt NfAux, PetscFE feAux[], const PetscScalar coefficientsAux[],
4767:                                         void (*f0_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f0[]),
4768:                                         void (*f1_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar f1[]),
4769:                                         PetscScalar elemVec[])
4770: {

4775:   if (fem->ops->integrateresidual) {(*fem->ops->integrateresidual)(fem, Ne, Nf, fe, field, geom, coefficients, NfAux, feAux, coefficientsAux, f0_func, f1_func, elemVec);}
4776:   return(0);
4777: }

4781: /*C
4782:   PetscFEIntegrateBdResidual - Produce the element residual vector for a chunk of elements by quadrature integration over a boundary

4784:   Not collective

4786:   Input Parameters:
4787: + fem          - The PetscFE object for the field being integrated
4788: . Ne           - The number of elements in the chunk
4789: . Nf           - The number of physical fields
4790: . fe           - The PetscFE objects for each field
4791: . field        - The field being integrated
4792: . geom         - The cell geometry for each cell in the chunk
4793: . coefficients - The array of FEM basis coefficients for the elements
4794: . NfAux        - The number of auxiliary physical fields
4795: . feAux        - The PetscFE objects for each auxiliary field
4796: . coefficientsAux - The array of FEM auxiliary basis coefficients for the elements
4797: . f0_func      - f_0 function from the first order FEM model
4798: - f1_func      - f_1 function from the first order FEM model

4800:   Output Parameter
4801: . elemVec      - the element residual vectors from each element

4803:    Calling sequence of f0_func and f1_func:
4804: $    void f0(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], const PetscReal n[], PetscScalar f0[])

4806:   Note:
4807: $ Loop over batch of elements (e):
4808: $   Loop over quadrature points (q):
4809: $     Make u_q and gradU_q (loops over fields,Nb,Ncomp) and x_q
4810: $     Call f_0 and f_1
4811: $   Loop over element vector entries (f,fc --> i):
4812: $     elemVec[i] += \psi^{fc}_f(q) f0_{fc}(u, \nabla u) + \nabla\psi^{fc}_f(q) \cdot f1_{fc,df}(u, \nabla u)
4813: */
4814: PetscErrorCode PetscFEIntegrateBdResidual(PetscFE fem, PetscInt Ne, PetscInt Nf, PetscFE fe[], PetscInt field, PetscCellGeometry geom, const PetscScalar coefficients[],
4815:                                           PetscInt NfAux, PetscFE feAux[], const PetscScalar coefficientsAux[],
4816:                                           void (*f0_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], const PetscReal n[], PetscScalar f0[]),
4817:                                           void (*f1_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], const PetscReal n[], PetscScalar f1[]),
4818:                                           PetscScalar elemVec[])
4819: {

4824:   if (fem->ops->integratebdresidual) {(*fem->ops->integratebdresidual)(fem, Ne, Nf, fe, field, geom, coefficients, NfAux, feAux, coefficientsAux, f0_func, f1_func, elemVec);}
4825:   return(0);
4826: }

4830: /*C
4831:   PetscFEIntegrateJacobianAction - Produce the action of the element Jacobian on an element vector for a chunk of elements by quadrature integration

4833:   Not collective

4835:   Input Parameters:
4836: + fem          = The PetscFE object for the field being integrated
4837: . Ne           - The number of elements in the chunk
4838: . Nf           - The number of physical fields
4839: . fe           - The PetscFE objects for each field
4840: . field        - The test field being integrated
4841: . geom         - The cell geometry for each cell in the chunk
4842: . coefficients - The array of FEM basis coefficients for the elements for the Jacobian evaluation point
4843: . input        - The array of FEM basis coefficients for the elements for the input vector
4844: . g0_func      - g_0 function from the first order FEM model
4845: . g1_func      - g_1 function from the first order FEM model
4846: . g2_func      - g_2 function from the first order FEM model
4847: - g3_func      - g_3 function from the first order FEM model

4849:   Output Parameter
4850: . elemVec      - the element vector for the action from each element

4852:    Calling sequence of g0_func, g1_func, g2_func and g3_func:
4853: $    void g0(PetscScalar u[], const PetscScalar gradU[], PetscScalar a[], const PetscScalar gradA[], PetscScalar x[], PetscScalar g0[])

4855:   Note:
4856: $ Loop over batch of elements (e):
4857: $   Loop over element matrix entries (f,fc,g,gc --> i,j):
4858: $     Loop over quadrature points (q):
4859: $       Make u_q and gradU_q (loops over fields,Nb,Ncomp)
4860: $         elemMat[i,j] += \psi^{fc}_f(q) g0_{fc,gc}(u, \nabla u) \phi^{gc}_g(q)
4861: $                      + \psi^{fc}_f(q) \cdot g1_{fc,gc,dg}(u, \nabla u) \nabla\phi^{gc}_g(q)
4862: $                      + \nabla\psi^{fc}_f(q) \cdot g2_{fc,gc,df}(u, \nabla u) \phi^{gc}_g(q)
4863: $                      + \nabla\psi^{fc}_f(q) \cdot g3_{fc,gc,df,dg}(u, \nabla u) \nabla\phi^{gc}_g(q)
4864: */
4865: PetscErrorCode PetscFEIntegrateJacobianAction(PetscFE fem, PetscInt Ne, PetscInt Nf, PetscFE fe[], PetscInt field, PetscCellGeometry geom, const PetscScalar coefficients[], const PetscScalar input[],
4866:                                               void (**g0_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g0[]),
4867:                                               void (**g1_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g1[]),
4868:                                               void (**g2_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g2[]),
4869:                                               void (**g3_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g3[]),
4870:                                               PetscScalar elemVec[])
4871: {

4876:   if (fem->ops->integratejacobianaction) {(*fem->ops->integratejacobianaction)(fem, Ne, Nf, fe, field, geom, coefficients, input, g0_func, g1_func, g2_func, g3_func, elemVec);}
4877:   return(0);
4878: }

4882: /*C
4883:   PetscFEIntegrateJacobian - Produce the element Jacobian for a chunk of elements by quadrature integration

4885:   Not collective

4887:   Input Parameters:
4888: + fem          = The PetscFE object for the field being integrated
4889: . Ne           - The number of elements in the chunk
4890: . Nf           - The number of physical fields
4891: . fe           - The PetscFE objects for each field
4892: . fieldI       - The test field being integrated
4893: . fieldJ       - The basis field being integrated
4894: . geom         - The cell geometry for each cell in the chunk
4895: . coefficients - The array of FEM basis coefficients for the elements for the Jacobian evaluation point
4896: . NfAux        - The number of auxiliary physical fields
4897: . feAux        - The PetscFE objects for each auxiliary field
4898: . coefficientsAux - The array of FEM auxiliary basis coefficients for the elements
4899: . g0_func      - g_0 function from the first order FEM model
4900: . g1_func      - g_1 function from the first order FEM model
4901: . g2_func      - g_2 function from the first order FEM model
4902: - g3_func      - g_3 function from the first order FEM model

4904:   Output Parameter
4905: . elemMat              - the element matrices for the Jacobian from each element

4907:    Calling sequence of g0_func, g1_func, g2_func and g3_func:
4908: $    void g0(PetscScalar u[], const PetscScalar gradU[], PetscScalar a[], const PetscScalar gradA[], PetscScalar x[], PetscScalar g0[])

4910:   Note:
4911: $ Loop over batch of elements (e):
4912: $   Loop over element matrix entries (f,fc,g,gc --> i,j):
4913: $     Loop over quadrature points (q):
4914: $       Make u_q and gradU_q (loops over fields,Nb,Ncomp)
4915: $         elemMat[i,j] += \psi^{fc}_f(q) g0_{fc,gc}(u, \nabla u) \phi^{gc}_g(q)
4916: $                      + \psi^{fc}_f(q) \cdot g1_{fc,gc,dg}(u, \nabla u) \nabla\phi^{gc}_g(q)
4917: $                      + \nabla\psi^{fc}_f(q) \cdot g2_{fc,gc,df}(u, \nabla u) \phi^{gc}_g(q)
4918: $                      + \nabla\psi^{fc}_f(q) \cdot g3_{fc,gc,df,dg}(u, \nabla u) \nabla\phi^{gc}_g(q)
4919: */
4920: PetscErrorCode PetscFEIntegrateJacobian(PetscFE fem, PetscInt Ne, PetscInt Nf, PetscFE fe[], PetscInt fieldI, PetscInt fieldJ, PetscCellGeometry geom, const PetscScalar coefficients[],
4921:                                         PetscInt NfAux, PetscFE feAux[], const PetscScalar coefficientsAux[],
4922:                                         void (*g0_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g0[]),
4923:                                         void (*g1_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g1[]),
4924:                                         void (*g2_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g2[]),
4925:                                         void (*g3_func)(const PetscScalar u[], const PetscScalar gradU[], const PetscScalar a[], const PetscScalar gradA[], const PetscReal x[], PetscScalar g3[]),
4926:                                         PetscScalar elemMat[])
4927: {

4932:   if (fem->ops->integratejacobian) {(*fem->ops->integratejacobian)(fem, Ne, Nf, fe, fieldI, fieldJ, geom, coefficients, NfAux, feAux, coefficientsAux, g0_func, g1_func, g2_func, g3_func, elemMat);}
4933:   return(0);
4934: }