Actual source code: ex59.c
petsc-3.12.5 2020-03-29
1: static char help[] = "This example illustrates the use of PCBDDC/FETI-DP and its customization.\n\n\
2: Discrete system: 1D, 2D or 3D laplacian, discretized with spectral elements.\n\
3: Spectral degree can be specified by passing values to -p option.\n\
4: Global problem either with dirichlet boundary conditions on one side or in the pure neumann case (depending on runtime parameters).\n\
5: Domain is [-nex,nex]x[-ney,ney]x[-nez,nez]: ne_ number of elements in _ direction.\n\
6: Exaple usage: \n\
7: 1D: mpiexec -n 4 ex59 -nex 7\n\
8: 2D: mpiexec -n 4 ex59 -npx 2 -npy 2 -nex 2 -ney 2\n\
9: 3D: mpiexec -n 4 ex59 -npx 2 -npy 2 -npz 1 -nex 2 -ney 2 -nez 1\n\
10: Subdomain decomposition can be specified with -np_ parameters.\n\
11: Dirichlet boundaries on one side by default:\n\
12: it does not iterate on dirichlet nodes by default: if -usezerorows is passed in, it also iterates on Dirichlet nodes.\n\
13: Pure Neumann case can be requested by passing in -pureneumann.\n\
14: In the latter case, in order to avoid runtime errors during factorization, please specify also -coarse_redundant_pc_factor_zeropivot 0\n\n";
16: #include <petscksp.h>
17: #include <petscpc.h>
18: #include <petscdm.h>
19: #include <petscdmda.h>
20: #include <petscblaslapack.h>
21: #define DEBUG 0
23: /* structure holding domain data */
24: typedef struct {
25: /* communicator */
26: MPI_Comm gcomm;
27: /* space dimension */
28: PetscInt dim;
29: /* spectral degree */
30: PetscInt p;
31: /* subdomains per dimension */
32: PetscInt npx,npy,npz;
33: /* subdomain index in cartesian dimensions */
34: PetscInt ipx,ipy,ipz;
35: /* elements per dimension */
36: PetscInt nex,ney,nez;
37: /* local elements per dimension */
38: PetscInt nex_l,ney_l,nez_l;
39: /* global number of dofs per dimension */
40: PetscInt xm,ym,zm;
41: /* local number of dofs per dimension */
42: PetscInt xm_l,ym_l,zm_l;
43: /* starting global indexes for subdomain in lexicographic ordering */
44: PetscInt startx,starty,startz;
45: /* pure Neumann problem */
46: PetscBool pure_neumann;
47: /* Dirichlet BC implementation */
48: PetscBool DBC_zerorows;
49: /* Scaling factor for subdomain */
50: PetscScalar scalingfactor;
51: PetscBool testkspfetidp;
52: } DomainData;
54: /* structure holding GLL data */
55: typedef struct {
56: /* GLL nodes */
57: PetscReal *zGL;
58: /* GLL weights */
59: PetscScalar *rhoGL;
60: /* aux_mat */
61: PetscScalar **A;
62: /* Element matrix */
63: Mat elem_mat;
64: } GLLData;
67: static PetscErrorCode BuildCSRGraph(DomainData dd, PetscInt **xadj, PetscInt **adjncy)
68: {
70: PetscInt *xadj_temp,*adjncy_temp;
71: PetscInt i,j,k,ii,jj,kk,iindex,count_adj;
72: PetscInt istart_csr,iend_csr,jstart_csr,jend_csr,kstart_csr,kend_csr;
73: PetscBool internal_node;
75: /* first count dimension of adjncy */
76: count_adj=0;
77: for (k=0; k<dd.zm_l; k++) {
78: internal_node = PETSC_TRUE;
79: kstart_csr = k>0 ? k-1 : k;
80: kend_csr = k<dd.zm_l-1 ? k+2 : k+1;
81: if (k == 0 || k == dd.zm_l-1) {
82: internal_node = PETSC_FALSE;
83: kstart_csr = k;
84: kend_csr = k+1;
85: }
86: for (j=0; j<dd.ym_l; j++) {
87: jstart_csr = j>0 ? j-1 : j;
88: jend_csr = j<dd.ym_l-1 ? j+2 : j+1;
89: if (j == 0 || j == dd.ym_l-1) {
90: internal_node = PETSC_FALSE;
91: jstart_csr = j;
92: jend_csr = j+1;
93: }
94: for (i=0; i<dd.xm_l; i++) {
95: istart_csr = i>0 ? i-1 : i;
96: iend_csr = i<dd.xm_l-1 ? i+2 : i+1;
97: if (i == 0 || i == dd.xm_l-1) {
98: internal_node = PETSC_FALSE;
99: istart_csr = i;
100: iend_csr = i+1;
101: }
102: if (internal_node) {
103: istart_csr = i;
104: iend_csr = i+1;
105: jstart_csr = j;
106: jend_csr = j+1;
107: kstart_csr = k;
108: kend_csr = k+1;
109: }
110: for (kk=kstart_csr;kk<kend_csr;kk++) {
111: for (jj=jstart_csr;jj<jend_csr;jj++) {
112: for (ii=istart_csr;ii<iend_csr;ii++) {
113: count_adj=count_adj+1;
114: }
115: }
116: }
117: }
118: }
119: }
120: PetscMalloc1(dd.xm_l*dd.ym_l*dd.zm_l+1,&xadj_temp);
121: PetscMalloc1(count_adj,&adjncy_temp);
123: /* now fill CSR data structure */
124: count_adj=0;
125: for (k=0; k<dd.zm_l; k++) {
126: internal_node = PETSC_TRUE;
127: kstart_csr = k>0 ? k-1 : k;
128: kend_csr = k<dd.zm_l-1 ? k+2 : k+1;
129: if (k == 0 || k == dd.zm_l-1) {
130: internal_node = PETSC_FALSE;
131: kstart_csr = k;
132: kend_csr = k+1;
133: }
134: for (j=0; j<dd.ym_l; j++) {
135: jstart_csr = j>0 ? j-1 : j;
136: jend_csr = j<dd.ym_l-1 ? j+2 : j+1;
137: if (j == 0 || j == dd.ym_l-1) {
138: internal_node = PETSC_FALSE;
139: jstart_csr = j;
140: jend_csr = j+1;
141: }
142: for (i=0; i<dd.xm_l; i++) {
143: istart_csr = i>0 ? i-1 : i;
144: iend_csr = i<dd.xm_l-1 ? i+2 : i+1;
145: if (i == 0 || i == dd.xm_l-1) {
146: internal_node = PETSC_FALSE;
147: istart_csr = i;
148: iend_csr = i+1;
149: }
150: iindex = k*dd.xm_l*dd.ym_l+j*dd.xm_l+i;
151: xadj_temp[iindex] = count_adj;
152: if (internal_node) {
153: istart_csr = i;
154: iend_csr = i+1;
155: jstart_csr = j;
156: jend_csr = j+1;
157: kstart_csr = k;
158: kend_csr = k+1;
159: }
160: for (kk=kstart_csr; kk<kend_csr; kk++) {
161: for (jj=jstart_csr; jj<jend_csr; jj++) {
162: for (ii=istart_csr; ii<iend_csr; ii++) {
163: iindex = kk*dd.xm_l*dd.ym_l+jj*dd.xm_l+ii;
165: adjncy_temp[count_adj] = iindex;
166: count_adj = count_adj+1;
167: }
168: }
169: }
170: }
171: }
172: }
173: xadj_temp[dd.xm_l*dd.ym_l*dd.zm_l] = count_adj;
175: *xadj = xadj_temp;
176: *adjncy = adjncy_temp;
177: return(0);
178: }
180: static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd,IS *dirichlet,IS *neumann)
181: {
183: IS temp_dirichlet=0,temp_neumann=0;
184: PetscInt localsize,i,j,k,*indices;
185: PetscBool *touched;
188: localsize = dd.xm_l*dd.ym_l*dd.zm_l;
190: PetscMalloc1(localsize,&indices);
191: PetscMalloc1(localsize,&touched);
192: for (i=0; i<localsize; i++) touched[i] = PETSC_FALSE;
194: if (dirichlet) {
195: i = 0;
196: /* west boundary */
197: if (dd.ipx == 0) {
198: for (k=0;k<dd.zm_l;k++) {
199: for (j=0;j<dd.ym_l;j++) {
200: indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l;
201: touched[indices[i]]=PETSC_TRUE;
202: i++;
203: }
204: }
205: }
206: ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_dirichlet);
207: }
208: if (neumann) {
209: i = 0;
210: /* west boundary */
211: if (dd.ipx == 0) {
212: for (k=0;k<dd.zm_l;k++) {
213: for (j=0;j<dd.ym_l;j++) {
214: indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l;
215: if (!touched[indices[i]]) {
216: touched[indices[i]]=PETSC_TRUE;
217: i++;
218: }
219: }
220: }
221: }
222: /* east boundary */
223: if (dd.ipx == dd.npx-1) {
224: for (k=0;k<dd.zm_l;k++) {
225: for (j=0;j<dd.ym_l;j++) {
226: indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l+dd.xm_l-1;
227: if (!touched[indices[i]]) {
228: touched[indices[i]]=PETSC_TRUE;
229: i++;
230: }
231: }
232: }
233: }
234: /* south boundary */
235: if (dd.ipy == 0 && dd.dim > 1) {
236: for (k=0;k<dd.zm_l;k++) {
237: for (j=0;j<dd.xm_l;j++) {
238: indices[i]=k*dd.ym_l*dd.xm_l+j;
239: if (!touched[indices[i]]) {
240: touched[indices[i]]=PETSC_TRUE;
241: i++;
242: }
243: }
244: }
245: }
246: /* north boundary */
247: if (dd.ipy == dd.npy-1 && dd.dim > 1) {
248: for (k=0;k<dd.zm_l;k++) {
249: for (j=0;j<dd.xm_l;j++) {
250: indices[i]=k*dd.ym_l*dd.xm_l+(dd.ym_l-1)*dd.xm_l+j;
251: if (!touched[indices[i]]) {
252: touched[indices[i]]=PETSC_TRUE;
253: i++;
254: }
255: }
256: }
257: }
258: /* bottom boundary */
259: if (dd.ipz == 0 && dd.dim > 2) {
260: for (k=0;k<dd.ym_l;k++) {
261: for (j=0;j<dd.xm_l;j++) {
262: indices[i]=k*dd.xm_l+j;
263: if (!touched[indices[i]]) {
264: touched[indices[i]]=PETSC_TRUE;
265: i++;
266: }
267: }
268: }
269: }
270: /* top boundary */
271: if (dd.ipz == dd.npz-1 && dd.dim > 2) {
272: for (k=0;k<dd.ym_l;k++) {
273: for (j=0;j<dd.xm_l;j++) {
274: indices[i]=(dd.zm_l-1)*dd.ym_l*dd.xm_l+k*dd.xm_l+j;
275: if (!touched[indices[i]]) {
276: touched[indices[i]]=PETSC_TRUE;
277: i++;
278: }
279: }
280: }
281: }
282: ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_neumann);
283: }
284: if (dirichlet) *dirichlet = temp_dirichlet;
285: if (neumann) *neumann = temp_neumann;
286: PetscFree(indices);
287: PetscFree(touched);
288: return(0);
289: }
291: static PetscErrorCode ComputeMapping(DomainData dd,ISLocalToGlobalMapping *isg2lmap)
292: {
293: PetscErrorCode ierr;
294: DM da;
295: AO ao;
296: DMBoundaryType bx = DM_BOUNDARY_NONE,by = DM_BOUNDARY_NONE, bz = DM_BOUNDARY_NONE;
297: DMDAStencilType stype = DMDA_STENCIL_BOX;
298: ISLocalToGlobalMapping temp_isg2lmap;
299: PetscInt i,j,k,ig,jg,kg,lindex,gindex,localsize;
300: PetscInt *global_indices;
303: /* Not an efficient mapping: this function computes a very simple lexicographic mapping
304: just to illustrate the creation of a MATIS object */
305: localsize = dd.xm_l*dd.ym_l*dd.zm_l;
306: PetscMalloc1(localsize,&global_indices);
307: for (k=0; k<dd.zm_l; k++) {
308: kg=dd.startz+k;
309: for (j=0; j<dd.ym_l; j++) {
310: jg=dd.starty+j;
311: for (i=0; i<dd.xm_l; i++) {
312: ig =dd.startx+i;
313: lindex =k*dd.xm_l*dd.ym_l+j*dd.xm_l+i;
314: gindex =kg*dd.xm*dd.ym+jg*dd.xm+ig;
315: global_indices[lindex]=gindex;
316: }
317: }
318: }
319: if (dd.dim==3) {
320: DMDACreate3d(dd.gcomm,bx,by,bz,stype,dd.xm,dd.ym,dd.zm,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,NULL,&da);
321: } else if (dd.dim==2) {
322: DMDACreate2d(dd.gcomm,bx,by,stype,dd.xm,dd.ym,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,&da);
323: } else {
324: DMDACreate1d(dd.gcomm,bx,dd.xm,1,1,NULL,&da);
325: }
326: DMSetFromOptions(da);
327: DMSetUp(da);
328: DMDASetAOType(da,AOMEMORYSCALABLE);
329: DMDAGetAO(da,&ao);
330: AOApplicationToPetsc(ao,dd.xm_l*dd.ym_l*dd.zm_l,global_indices);
331: ISLocalToGlobalMappingCreate(dd.gcomm,1,localsize,global_indices,PETSC_OWN_POINTER,&temp_isg2lmap);
332: DMDestroy(&da);
333: *isg2lmap = temp_isg2lmap;
334: return(0);
335: }
336: static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
337: {
339: PetscInt localsize,zloc,yloc,xloc,auxnex,auxney,auxnez;
340: PetscInt ie,je,ke,i,j,k,ig,jg,kg,ii,ming;
341: PetscInt *indexg,*cols,*colsg;
342: PetscScalar *vals;
343: Mat temp_local_mat,elem_mat_DBC=0,*usedmat;
344: IS submatIS;
347: MatGetSize(glldata.elem_mat,&i,&j);
348: PetscMalloc1(i,&indexg);
349: PetscMalloc1(i,&colsg);
350: /* get submatrix of elem_mat without dirichlet nodes */
351: if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
352: xloc = dd.p+1;
353: yloc = 1;
354: zloc = 1;
355: if (dd.dim>1) yloc = dd.p+1;
356: if (dd.dim>2) zloc = dd.p+1;
357: ii = 0;
358: for (k=0;k<zloc;k++) {
359: for (j=0;j<yloc;j++) {
360: for (i=1;i<xloc;i++) {
361: indexg[ii]=k*xloc*yloc+j*xloc+i;
362: ii++;
363: }
364: }
365: }
366: ISCreateGeneral(PETSC_COMM_SELF,ii,indexg,PETSC_COPY_VALUES,&submatIS);
367: MatCreateSubMatrix(glldata.elem_mat,submatIS,submatIS,MAT_INITIAL_MATRIX,&elem_mat_DBC);
368: ISDestroy(&submatIS);
369: }
371: /* Assemble subdomain matrix */
372: localsize = dd.xm_l*dd.ym_l*dd.zm_l;
373: MatCreate(PETSC_COMM_SELF,&temp_local_mat);
374: MatSetSizes(temp_local_mat,localsize,localsize,localsize,localsize);
375: MatSetOptionsPrefix(temp_local_mat,"subdomain_");
376: /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
377: /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
378: if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
379: MatSetType(temp_local_mat,MATSEQAIJ);
380: } else {
381: MatSetType(temp_local_mat,MATSEQSBAIJ);
382: }
383: MatSetFromOptions(temp_local_mat);
385: i = PetscPowRealInt(3.0*(dd.p+1.0),dd.dim);
387: MatSeqAIJSetPreallocation(temp_local_mat,i,NULL); /* very overestimated */
388: MatSeqSBAIJSetPreallocation(temp_local_mat,1,i,NULL); /* very overestimated */
389: MatSeqBAIJSetPreallocation(temp_local_mat,1,i,NULL); /* very overestimated */
390: MatSetOption(temp_local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
392: yloc = dd.p+1;
393: zloc = dd.p+1;
394: if (dd.dim < 3) zloc = 1;
395: if (dd.dim < 2) yloc = 1;
397: auxnez = dd.nez_l;
398: auxney = dd.ney_l;
399: auxnex = dd.nex_l;
400: if (dd.dim < 3) auxnez = 1;
401: if (dd.dim < 2) auxney = 1;
403: for (ke=0; ke<auxnez; ke++) {
404: for (je=0; je<auxney; je++) {
405: for (ie=0; ie<auxnex; ie++) {
406: /* customize element accounting for BC */
407: xloc = dd.p+1;
408: ming = 0;
409: usedmat = &glldata.elem_mat;
410: if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
411: if (ie == 0) {
412: xloc = dd.p;
413: usedmat = &elem_mat_DBC;
414: } else {
415: ming = -1;
416: usedmat = &glldata.elem_mat;
417: }
418: }
419: /* local to the element/global to the subdomain indexing */
420: for (k=0; k<zloc; k++) {
421: kg = ke*dd.p+k;
422: for (j=0; j<yloc; j++) {
423: jg = je*dd.p+j;
424: for (i=0; i<xloc; i++) {
425: ig = ie*dd.p+i+ming;
426: ii = k*xloc*yloc+j*xloc+i;
427: indexg[ii] = kg*dd.xm_l*dd.ym_l+jg*dd.xm_l+ig;
428: }
429: }
430: }
431: /* Set values */
432: for (i=0; i<xloc*yloc*zloc; i++) {
433: MatGetRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);
434: for (k=0; k<j; k++) colsg[k] = indexg[cols[k]];
435: MatSetValues(temp_local_mat,1,&indexg[i],j,colsg,vals,ADD_VALUES);
436: MatRestoreRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);
437: }
438: }
439: }
440: }
441: PetscFree(indexg);
442: PetscFree(colsg);
443: MatAssemblyBegin(temp_local_mat,MAT_FINAL_ASSEMBLY);
444: MatAssemblyEnd (temp_local_mat,MAT_FINAL_ASSEMBLY);
445: #if DEBUG
446: {
447: Vec lvec,rvec;
448: PetscReal norm;
449: MatCreateVecs(temp_local_mat,&lvec,&rvec);
450: VecSet(lvec,1.0);
451: MatMult(temp_local_mat,lvec,rvec);
452: VecNorm(rvec,NORM_INFINITY,&norm);
453: VecDestroy(&lvec);
454: VecDestroy(&rvec);
455: }
456: #endif
457: *local_mat = temp_local_mat;
458: MatDestroy(&elem_mat_DBC);
459: return(0);
460: }
462: static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
463: {
465: PetscReal *M,si;
466: PetscScalar x,z0,z1,z2,Lpj,Lpr,rhoGLj,rhoGLk;
467: PetscBLASInt pm1,lierr;
468: PetscInt i,j,n,k,s,r,q,ii,jj,p=dd.p;
469: PetscInt xloc,yloc,zloc,xyloc,xyzloc;
472: /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
473: PetscCalloc1(p+1,&glldata->zGL);
475: glldata->zGL[0]=-1.0;
476: glldata->zGL[p]= 1.0;
477: if (p > 1) {
478: if (p == 2) glldata->zGL[1]=0.0;
479: else {
480: PetscMalloc1(p-1,&M);
481: for (i=0; i<p-1; i++) {
482: si = (PetscReal)(i+1.0);
483: M[i]=0.5*PetscSqrtReal(si*(si+2.0)/((si+0.5)*(si+1.5)));
484: }
485: pm1 = p-1;
486: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
487: #if defined(PETSC_MISSING_LAPACK_STEQR)
488: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEQR - Lapack routine is unavailable.");
489: #else
490: PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("N",&pm1,&glldata->zGL[1],M,&x,&pm1,M,&lierr));
491: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in STERF Lapack routine %d",(int)lierr);
492: #endif
493: PetscFPTrapPop();
494: PetscFree(M);
495: }
496: }
498: /* Weights for 1D quadrature */
499: PetscMalloc1(p+1,&glldata->rhoGL);
501: glldata->rhoGL[0]=2.0/(PetscScalar)(p*(p+1.0));
502: glldata->rhoGL[p]=glldata->rhoGL[0];
503: z2 = -1; /* Dummy value to avoid -Wmaybe-initialized */
504: for (i=1; i<p; i++) {
505: x = glldata->zGL[i];
506: z0 = 1.0;
507: z1 = x;
508: for (n=1; n<p; n++) {
509: z2 = x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
510: z0 = z1;
511: z1 = z2;
512: }
513: glldata->rhoGL[i]=2.0/(p*(p+1.0)*z2*z2);
514: }
516: /* Auxiliary mat for laplacian */
517: PetscMalloc1(p+1,&glldata->A);
518: PetscMalloc1((p+1)*(p+1),&glldata->A[0]);
519: for (i=1; i<p+1; i++) glldata->A[i]=glldata->A[i-1]+p+1;
521: for (j=1; j<p; j++) {
522: x =glldata->zGL[j];
523: z0=1.0;
524: z1=x;
525: for (n=1; n<p; n++) {
526: z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
527: z0=z1;
528: z1=z2;
529: }
530: Lpj=z2;
531: for (r=1; r<p; r++) {
532: if (r == j) {
533: glldata->A[j][j]=2.0/(3.0*(1.0-glldata->zGL[j]*glldata->zGL[j])*Lpj*Lpj);
534: } else {
535: x = glldata->zGL[r];
536: z0 = 1.0;
537: z1 = x;
538: for (n=1; n<p; n++) {
539: z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
540: z0=z1;
541: z1=z2;
542: }
543: Lpr = z2;
544: glldata->A[r][j]=4.0/(p*(p+1.0)*Lpj*Lpr*(glldata->zGL[j]-glldata->zGL[r])*(glldata->zGL[j]-glldata->zGL[r]));
545: }
546: }
547: }
548: for (j=1; j<p+1; j++) {
549: x = glldata->zGL[j];
550: z0 = 1.0;
551: z1 = x;
552: for (n=1; n<p; n++) {
553: z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
554: z0=z1;
555: z1=z2;
556: }
557: Lpj = z2;
558: glldata->A[j][0]=4.0*PetscPowRealInt(-1.0,p)/(p*(p+1.0)*Lpj*(1.0+glldata->zGL[j])*(1.0+glldata->zGL[j]));
559: glldata->A[0][j]=glldata->A[j][0];
560: }
561: for (j=0; j<p; j++) {
562: x = glldata->zGL[j];
563: z0 = 1.0;
564: z1 = x;
565: for (n=1; n<p; n++) {
566: z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
567: z0=z1;
568: z1=z2;
569: }
570: Lpj=z2;
572: glldata->A[p][j]=4.0/(p*(p+1.0)*Lpj*(1.0-glldata->zGL[j])*(1.0-glldata->zGL[j]));
573: glldata->A[j][p]=glldata->A[p][j];
574: }
575: glldata->A[0][0]=0.5+(p*(p+1.0)-2.0)/6.0;
576: glldata->A[p][p]=glldata->A[0][0];
578: /* compute element matrix */
579: xloc = p+1;
580: yloc = p+1;
581: zloc = p+1;
582: if (dd.dim<2) yloc=1;
583: if (dd.dim<3) zloc=1;
584: xyloc = xloc*yloc;
585: xyzloc = xloc*yloc*zloc;
587: MatCreate(PETSC_COMM_SELF,&glldata->elem_mat);
588: MatSetSizes(glldata->elem_mat,xyzloc,xyzloc,xyzloc,xyzloc);
589: MatSetType(glldata->elem_mat,MATSEQAIJ);
590: MatSeqAIJSetPreallocation(glldata->elem_mat,xyzloc,NULL); /* overestimated */
591: MatZeroEntries(glldata->elem_mat);
592: MatSetOption(glldata->elem_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);
594: for (k=0; k<zloc; k++) {
595: if (dd.dim>2) rhoGLk=glldata->rhoGL[k];
596: else rhoGLk=1.0;
598: for (j=0; j<yloc; j++) {
599: if (dd.dim>1) rhoGLj=glldata->rhoGL[j];
600: else rhoGLj=1.0;
602: for (i=0; i<xloc; i++) {
603: ii = k*xyloc+j*xloc+i;
604: s = k;
605: r = j;
606: for (q=0; q<xloc; q++) {
607: jj = s*xyloc+r*xloc+q;
608: MatSetValue(glldata->elem_mat,jj,ii,glldata->A[i][q]*rhoGLj*rhoGLk,ADD_VALUES);
609: }
610: if (dd.dim>1) {
611: s=k;
612: q=i;
613: for (r=0; r<yloc; r++) {
614: jj = s*xyloc+r*xloc+q;
615: MatSetValue(glldata->elem_mat,jj,ii,glldata->A[j][r]*glldata->rhoGL[i]*rhoGLk,ADD_VALUES);
616: }
617: }
618: if (dd.dim>2) {
619: r=j;
620: q=i;
621: for (s=0; s<zloc; s++) {
622: jj = s*xyloc+r*xloc+q;
623: MatSetValue(glldata->elem_mat,jj,ii,glldata->A[k][s]*rhoGLj*glldata->rhoGL[i],ADD_VALUES);
624: }
625: }
626: }
627: }
628: }
629: MatAssemblyBegin(glldata->elem_mat,MAT_FINAL_ASSEMBLY);
630: MatAssemblyEnd (glldata->elem_mat,MAT_FINAL_ASSEMBLY);
631: #if DEBUG
632: {
633: Vec lvec,rvec;
634: PetscReal norm;
635: MatCreateVecs(glldata->elem_mat,&lvec,&rvec);
636: VecSet(lvec,1.0);
637: MatMult(glldata->elem_mat,lvec,rvec);
638: VecNorm(rvec,NORM_INFINITY,&norm);
639: VecDestroy(&lvec);
640: VecDestroy(&rvec);
641: }
642: #endif
643: return(0);
644: }
646: static PetscErrorCode DomainDecomposition(DomainData *dd)
647: {
648: PetscMPIInt rank;
649: PetscInt i,j,k;
652: /* Subdomain index in cartesian coordinates */
653: MPI_Comm_rank(dd->gcomm,&rank);
654: dd->ipx = rank%dd->npx;
655: if (dd->dim>1) dd->ipz = rank/(dd->npx*dd->npy);
656: else dd->ipz = 0;
658: dd->ipy = rank/dd->npx-dd->ipz*dd->npy;
659: /* number of local elements */
660: dd->nex_l = dd->nex/dd->npx;
661: if (dd->ipx < dd->nex%dd->npx) dd->nex_l++;
662: if (dd->dim>1) {
663: dd->ney_l = dd->ney/dd->npy;
664: if (dd->ipy < dd->ney%dd->npy) dd->ney_l++;
665: } else {
666: dd->ney_l = 0;
667: }
668: if (dd->dim>2) {
669: dd->nez_l = dd->nez/dd->npz;
670: if (dd->ipz < dd->nez%dd->npz) dd->nez_l++;
671: } else {
672: dd->nez_l = 0;
673: }
674: /* local and global number of dofs */
675: dd->xm_l = dd->nex_l*dd->p+1;
676: dd->xm = dd->nex*dd->p+1;
677: dd->ym_l = dd->ney_l*dd->p+1;
678: dd->ym = dd->ney*dd->p+1;
679: dd->zm_l = dd->nez_l*dd->p+1;
680: dd->zm = dd->nez*dd->p+1;
681: if (!dd->pure_neumann) {
682: if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
683: if (!dd->DBC_zerorows) dd->xm--;
684: }
686: /* starting global index for local dofs (simple lexicographic order) */
687: dd->startx = 0;
688: j = dd->nex/dd->npx;
689: for (i=0; i<dd->ipx; i++) {
690: k = j;
691: if (i<dd->nex%dd->npx) k++;
692: dd->startx = dd->startx+k*dd->p;
693: }
694: if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;
696: dd->starty = 0;
697: if (dd->dim > 1) {
698: j = dd->ney/dd->npy;
699: for (i=0; i<dd->ipy; i++) {
700: k = j;
701: if (i<dd->ney%dd->npy) k++;
702: dd->starty = dd->starty+k*dd->p;
703: }
704: }
705: dd->startz = 0;
706: if (dd->dim > 2) {
707: j = dd->nez/dd->npz;
708: for (i=0; i<dd->ipz; i++) {
709: k = j;
710: if (i<dd->nez%dd->npz) k++;
711: dd->startz = dd->startz+k*dd->p;
712: }
713: }
714: return(0);
715: }
716: static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
717: {
718: PetscErrorCode ierr;
719: GLLData gll;
720: Mat local_mat =0,temp_A=0;
721: ISLocalToGlobalMapping matis_map =0;
722: IS dirichletIS=0;
725: /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
726: GLLStuffs(dd,&gll);
727: /* Compute matrix of subdomain Neumann problem */
728: ComputeSubdomainMatrix(dd,gll,&local_mat);
729: /* Compute global mapping of local dofs */
730: ComputeMapping(dd,&matis_map);
731: /* Create MATIS object needed by BDDC */
732: MatCreateIS(dd.gcomm,1,PETSC_DECIDE,PETSC_DECIDE,dd.xm*dd.ym*dd.zm,dd.xm*dd.ym*dd.zm,matis_map,NULL,&temp_A);
733: /* Set local subdomain matrices into MATIS object */
734: MatScale(local_mat,dd.scalingfactor);
735: MatISSetLocalMat(temp_A,local_mat);
736: /* Call assembly functions */
737: MatAssemblyBegin(temp_A,MAT_FINAL_ASSEMBLY);
738: MatAssemblyEnd(temp_A,MAT_FINAL_ASSEMBLY);
740: if (dd.DBC_zerorows) {
741: PetscInt dirsize;
743: ComputeSpecialBoundaryIndices(dd,&dirichletIS,NULL);
744: MatSetOption(local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
745: MatZeroRowsColumnsLocalIS(temp_A,dirichletIS,1.0,NULL,NULL);
746: ISGetLocalSize(dirichletIS,&dirsize);
747: ISDestroy(&dirichletIS);
748: }
750: /* giving hints to local and global matrices could be useful for the BDDC */
751: MatSetOption(local_mat,MAT_SYMMETRIC,PETSC_TRUE);
752: MatSetOption(local_mat,MAT_SPD,PETSC_TRUE);
753: #if DEBUG
754: {
755: Vec lvec,rvec;
756: PetscReal norm;
757: MatCreateVecs(temp_A,&lvec,&rvec);
758: VecSet(lvec,1.0);
759: MatMult(temp_A,lvec,rvec);
760: VecNorm(rvec,NORM_INFINITY,&norm);
761: VecDestroy(&lvec);
762: VecDestroy(&rvec);
763: }
764: #endif
765: /* free allocated workspace */
766: PetscFree(gll.zGL);
767: PetscFree(gll.rhoGL);
768: PetscFree(gll.A[0]);
769: PetscFree(gll.A);
770: MatDestroy(&gll.elem_mat);
771: MatDestroy(&local_mat);
772: ISLocalToGlobalMappingDestroy(&matis_map);
773: /* give back the pointer to te MATIS object */
774: *A = temp_A;
775: return(0);
776: }
778: static PetscErrorCode ComputeKSPFETIDP(DomainData dd, KSP ksp_bddc, KSP *ksp_fetidp)
779: {
781: KSP temp_ksp;
782: PC pc;
785: KSPGetPC(ksp_bddc,&pc);
786: if (!dd.testkspfetidp) {
787: PC D;
788: Mat F;
790: PCBDDCCreateFETIDPOperators(pc,PETSC_TRUE,NULL,&F,&D);
791: KSPCreate(PetscObjectComm((PetscObject)F),&temp_ksp);
792: KSPSetOperators(temp_ksp,F,F);
793: KSPSetType(temp_ksp,KSPCG);
794: KSPSetPC(temp_ksp,D);
795: KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);
796: KSPSetOptionsPrefix(temp_ksp,"fluxes_");
797: KSPSetFromOptions(temp_ksp);
798: KSPSetUp(temp_ksp);
799: MatDestroy(&F);
800: PCDestroy(&D);
801: } else {
802: Mat A,Ap;
804: KSPCreate(PetscObjectComm((PetscObject)ksp_bddc),&temp_ksp);
805: KSPGetOperators(ksp_bddc,&A,&Ap);
806: KSPSetOperators(temp_ksp,A,Ap);
807: KSPSetOptionsPrefix(temp_ksp,"fluxes_");
808: KSPSetType(temp_ksp,KSPFETIDP);
809: KSPFETIDPSetInnerBDDC(temp_ksp,pc);
810: KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);
811: KSPSetFromOptions(temp_ksp);
812: KSPSetUp(temp_ksp);
813: }
814: *ksp_fetidp = temp_ksp;
815: return(0);
816: }
819: static PetscErrorCode ComputeKSPBDDC(DomainData dd,Mat A,KSP *ksp)
820: {
822: KSP temp_ksp;
823: PC pc;
824: IS primals,dirichletIS=0,neumannIS=0,*bddc_dofs_splitting;
825: PetscInt vidx[8],localsize,*xadj=NULL,*adjncy=NULL;
826: MatNullSpace near_null_space;
829: KSPCreate(dd.gcomm,&temp_ksp);
830: KSPSetOperators(temp_ksp,A,A);
831: KSPSetType(temp_ksp,KSPCG);
832: KSPGetPC(temp_ksp,&pc);
833: PCSetType(pc,PCBDDC);
835: localsize = dd.xm_l*dd.ym_l*dd.zm_l;
837: /* BDDC customization */
839: /* jumping coefficients case */
840: PCISSetSubdomainScalingFactor(pc,dd.scalingfactor);
842: /* Dofs splitting
843: Simple stride-1 IS
844: It is not needed since, by default, PCBDDC assumes a stride-1 split */
845: PetscMalloc1(1,&bddc_dofs_splitting);
846: #if 1
847: ISCreateStride(PETSC_COMM_WORLD,localsize,0,1,&bddc_dofs_splitting[0]);
848: PCBDDCSetDofsSplittingLocal(pc,1,bddc_dofs_splitting);
849: #else
850: /* examples for global ordering */
852: /* each process lists the nodes it owns */
853: PetscInt sr,er;
854: MatGetOwnershipRange(A,&sr,&er);
855: ISCreateStride(PETSC_COMM_WORLD,er-sr,sr,1,&bddc_dofs_splitting[0]);
856: PCBDDCSetDofsSplitting(pc,1,bddc_dofs_splitting);
857: /* Split can be passed in a more general way since any process can list any node */
858: #endif
859: ISDestroy(&bddc_dofs_splitting[0]);
860: PetscFree(bddc_dofs_splitting);
862: /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
863: (which in practice is not needed since by default PCBDDC builds the primal space using constants for quadrature formulas */
864: #if 0
865: Vec vecs[2];
866: PetscRandom rctx;
867: MatCreateVecs(A,&vecs[0],&vecs[1]);
868: PetscRandomCreate(dd.gcomm,&rctx);
869: VecSetRandom(vecs[0],rctx);
870: VecSetRandom(vecs[1],rctx);
871: MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space);
872: VecDestroy(&vecs[0]);
873: VecDestroy(&vecs[1]);
874: PetscRandomDestroy(&rctx);
875: #else
876: MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,0,NULL,&near_null_space);
877: #endif
878: MatSetNearNullSpace(A,near_null_space);
879: MatNullSpaceDestroy(&near_null_space);
881: /* CSR graph of subdomain dofs */
882: BuildCSRGraph(dd,&xadj,&adjncy);
883: PCBDDCSetLocalAdjacencyGraph(pc,localsize,xadj,adjncy,PETSC_OWN_POINTER);
885: /* Prescribe user-defined primal vertices: in this case we use the 8 corners in 3D (for 2D and 1D, the indices are duplicated) */
886: vidx[0] = 0*dd.xm_l+0;
887: vidx[1] = 0*dd.xm_l+dd.xm_l-1;
888: vidx[2] = (dd.ym_l-1)*dd.xm_l+0;
889: vidx[3] = (dd.ym_l-1)*dd.xm_l+dd.xm_l-1;
890: vidx[4] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+0*dd.xm_l+0;
891: vidx[5] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+0*dd.xm_l+dd.xm_l-1;
892: vidx[6] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+(dd.ym_l-1)*dd.xm_l+0;
893: vidx[7] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+(dd.ym_l-1)*dd.xm_l+dd.xm_l-1;
894: ISCreateGeneral(dd.gcomm,8,vidx,PETSC_COPY_VALUES,&primals);
895: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
896: ISDestroy(&primals);
898: /* Neumann/Dirichlet indices on the global boundary */
899: if (dd.DBC_zerorows) {
900: /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
901: ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);
902: PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);
903: PCBDDCSetDirichletBoundariesLocal(pc,dirichletIS);
904: } else {
905: if (dd.pure_neumann) {
906: /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
907: ComputeSpecialBoundaryIndices(dd,NULL,&neumannIS);
908: PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);
909: } else {
910: /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
911: /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
912: ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);
913: PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);
914: }
915: }
917: /* Pass local null space information to local matrices (needed when using approximate local solvers) */
918: if (dd.ipx || dd.pure_neumann) {
919: MatNullSpace nsp;
920: Mat local_mat;
922: MatISGetLocalMat(A,&local_mat);
923: MatNullSpaceCreate(PETSC_COMM_SELF,PETSC_TRUE,0,NULL,&nsp);
924: MatSetNullSpace(local_mat,nsp);
925: MatNullSpaceDestroy(&nsp);
926: }
927: KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);
928: KSPSetOptionsPrefix(temp_ksp,"physical_");
929: KSPSetFromOptions(temp_ksp);
930: KSPSetUp(temp_ksp);
931: *ksp = temp_ksp;
932: ISDestroy(&dirichletIS);
933: ISDestroy(&neumannIS);
934: return(0);
935: }
937: static PetscErrorCode InitializeDomainData(DomainData *dd)
938: {
940: PetscMPIInt sizes,rank;
941: PetscInt factor;
944: dd->gcomm = PETSC_COMM_WORLD;
945: MPI_Comm_size(dd->gcomm,&sizes);
946: MPI_Comm_rank(dd->gcomm,&rank);
947: /* Get informations from command line */
948: /* Processors/subdomains per dimension */
949: /* Default is 1d problem */
950: dd->npx = sizes;
951: dd->npy = 0;
952: dd->npz = 0;
953: dd->dim = 1;
954: PetscOptionsGetInt(NULL,NULL,"-npx",&dd->npx,NULL);
955: PetscOptionsGetInt(NULL,NULL,"-npy",&dd->npy,NULL);
956: PetscOptionsGetInt(NULL,NULL,"-npz",&dd->npz,NULL);
957: if (dd->npy) dd->dim++;
958: if (dd->npz) dd->dim++;
959: /* Number of elements per dimension */
960: /* Default is one element per subdomain */
961: dd->nex = dd->npx;
962: dd->ney = dd->npy;
963: dd->nez = dd->npz;
964: PetscOptionsGetInt(NULL,NULL,"-nex",&dd->nex,NULL);
965: PetscOptionsGetInt(NULL,NULL,"-ney",&dd->ney,NULL);
966: PetscOptionsGetInt(NULL,NULL,"-nez",&dd->nez,NULL);
967: if (!dd->npy) {
968: dd->ney=0;
969: dd->nez=0;
970: }
971: if (!dd->npz) dd->nez=0;
972: /* Spectral degree */
973: dd->p = 3;
974: PetscOptionsGetInt(NULL,NULL,"-p",&dd->p,NULL);
975: /* pure neumann problem? */
976: dd->pure_neumann = PETSC_FALSE;
977: PetscOptionsGetBool(NULL,NULL,"-pureneumann",&dd->pure_neumann,NULL);
979: /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
980: dd->DBC_zerorows = PETSC_FALSE;
982: PetscOptionsGetBool(NULL,NULL,"-usezerorows",&dd->DBC_zerorows,NULL);
983: if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
984: dd->scalingfactor = 1.0;
986: factor = 0.0;
987: PetscOptionsGetInt(NULL,NULL,"-jump",&factor,NULL);
988: /* checkerboard pattern */
989: dd->scalingfactor = PetscPowScalar(10.0,(PetscScalar)factor*PetscPowScalar(-1.0,(PetscScalar)rank));
990: /* test data passed in */
991: if (dd->dim==1) {
992: if (sizes!=dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 1D must be equal to npx");
993: if (dd->nex<dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
994: } else if (dd->dim==2) {
995: if (sizes!=dd->npx*dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 2D must be equal to npx*npy");
996: if (dd->nex<dd->npx || dd->ney<dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
997: } else {
998: if (sizes!=dd->npx*dd->npy*dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 3D must be equal to npx*npy*npz");
999: if (dd->nex<dd->npx || dd->ney<dd->npy || dd->nez<dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
1000: }
1001: return(0);
1002: }
1004: int main(int argc,char **args)
1005: {
1006: PetscErrorCode ierr;
1007: DomainData dd;
1008: PetscReal norm,maxeig,mineig;
1009: PetscScalar scalar_value;
1010: PetscInt ndofs,its;
1011: Mat A = NULL,F = NULL;
1012: KSP KSPwithBDDC = NULL,KSPwithFETIDP = NULL;
1013: KSPConvergedReason reason;
1014: Vec exact_solution = NULL,bddc_solution = NULL,bddc_rhs = NULL;
1015: PetscBool testfetidp = PETSC_TRUE;
1017: /* Init PETSc */
1018: PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
1019: /* Initialize DomainData */
1020: InitializeDomainData(&dd);
1021: /* Decompose domain */
1022: DomainDecomposition(&dd);
1023: #if DEBUG
1024: printf("Subdomain data\n");
1025: printf("IPS : %d %d %d\n",dd.ipx,dd.ipy,dd.ipz);
1026: printf("NEG : %d %d %d\n",dd.nex,dd.ney,dd.nez);
1027: printf("NEL : %d %d %d\n",dd.nex_l,dd.ney_l,dd.nez_l);
1028: printf("LDO : %d %d %d\n",dd.xm_l,dd.ym_l,dd.zm_l);
1029: printf("SIZES : %d %d %d\n",dd.xm,dd.ym,dd.zm);
1030: printf("STARTS: %d %d %d\n",dd.startx,dd.starty,dd.startz);
1031: #endif
1032: dd.testkspfetidp = PETSC_TRUE;
1033: PetscOptionsGetBool(NULL,NULL,"-testfetidp",&testfetidp,NULL);
1034: PetscOptionsGetBool(NULL,NULL,"-testkspfetidp",&dd.testkspfetidp,NULL);
1035: /* assemble global matrix */
1036: ComputeMatrix(dd,&A);
1037: /* get work vectors */
1038: MatCreateVecs(A,&bddc_solution,NULL);
1039: VecDuplicate(bddc_solution,&bddc_rhs);
1040: VecDuplicate(bddc_solution,&exact_solution);
1041: /* create and customize KSP/PC for BDDC */
1042: ComputeKSPBDDC(dd,A,&KSPwithBDDC);
1043: /* create KSP/PC for FETIDP */
1044: if (testfetidp) {
1045: ComputeKSPFETIDP(dd,KSPwithBDDC,&KSPwithFETIDP);
1046: }
1047: /* create random exact solution */
1048: #if defined(PETSC_USE_COMPLEX)
1049: VecSet(exact_solution,1.0 + PETSC_i);
1050: #else
1051: VecSetRandom(exact_solution,NULL);
1052: #endif
1053: VecShift(exact_solution,-0.5);
1054: VecScale(exact_solution,100.0);
1055: VecGetSize(exact_solution,&ndofs);
1056: if (dd.pure_neumann) {
1057: VecSum(exact_solution,&scalar_value);
1058: scalar_value = -scalar_value/(PetscScalar)ndofs;
1059: VecShift(exact_solution,scalar_value);
1060: }
1061: /* assemble BDDC rhs */
1062: MatMult(A,exact_solution,bddc_rhs);
1063: /* test ksp with BDDC */
1064: KSPSolve(KSPwithBDDC,bddc_rhs,bddc_solution);
1065: KSPGetIterationNumber(KSPwithBDDC,&its);
1066: KSPGetConvergedReason(KSPwithBDDC,&reason);
1067: KSPComputeExtremeSingularValues(KSPwithBDDC,&maxeig,&mineig);
1068: if (dd.pure_neumann) {
1069: VecSum(bddc_solution,&scalar_value);
1070: scalar_value = -scalar_value/(PetscScalar)ndofs;
1071: VecShift(bddc_solution,scalar_value);
1072: }
1073: /* check exact_solution and BDDC solultion */
1074: VecAXPY(bddc_solution,-1.0,exact_solution);
1075: VecNorm(bddc_solution,NORM_INFINITY,&norm);
1076: PetscPrintf(dd.gcomm,"---------------------BDDC stats-------------------------------\n");
1077: PetscPrintf(dd.gcomm,"Number of degrees of freedom : %8D\n",ndofs);
1078: if (reason < 0) {
1079: PetscPrintf(dd.gcomm,"Number of iterations : %8D\n",its);
1080: PetscPrintf(dd.gcomm,"Converged reason : %s\n",KSPConvergedReasons[reason]);
1081: }
1082: if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1083: PetscPrintf(dd.gcomm,"Eigenvalues preconditioned operator : %1.1e %1.1e\n",(double)PetscFloorReal(100.*mineig)/100.,(double)PetscCeilReal(100.*maxeig)/100.);
1084: if (norm > 1.e-1 || reason < 0) {
1085: PetscPrintf(dd.gcomm,"Error betweeen exact and computed solution : %1.2e\n",(double)norm);
1086: }
1087: PetscPrintf(dd.gcomm,"--------------------------------------------------------------\n");
1088: if (testfetidp) {
1089: Vec fetidp_solution_all = NULL,fetidp_solution = NULL,fetidp_rhs = NULL;
1091: VecDuplicate(bddc_solution,&fetidp_solution_all);
1092: if (!dd.testkspfetidp) {
1093: /* assemble fetidp rhs on the space of Lagrange multipliers */
1094: KSPGetOperators(KSPwithFETIDP,&F,NULL);
1095: MatCreateVecs(F,&fetidp_solution,&fetidp_rhs);
1096: PCBDDCMatFETIDPGetRHS(F,bddc_rhs,fetidp_rhs);
1097: VecSet(fetidp_solution,0.0);
1098: /* test ksp with FETIDP */
1099: KSPSolve(KSPwithFETIDP,fetidp_rhs,fetidp_solution);
1100: /* assemble fetidp solution on physical domain */
1101: PCBDDCMatFETIDPGetSolution(F,fetidp_solution,fetidp_solution_all);
1102: } else {
1103: KSP kspF;
1104: KSPSolve(KSPwithFETIDP,bddc_rhs,fetidp_solution_all);
1105: KSPFETIDPGetInnerKSP(KSPwithFETIDP,&kspF);
1106: KSPGetOperators(kspF,&F,NULL);
1107: }
1108: MatGetSize(F,&ndofs,NULL);
1109: KSPGetIterationNumber(KSPwithFETIDP,&its);
1110: KSPGetConvergedReason(KSPwithFETIDP,&reason);
1111: KSPComputeExtremeSingularValues(KSPwithFETIDP,&maxeig,&mineig);
1112: /* check FETIDP sol */
1113: if (dd.pure_neumann) {
1114: VecSum(fetidp_solution_all,&scalar_value);
1115: scalar_value = -scalar_value/(PetscScalar)ndofs;
1116: VecShift(fetidp_solution_all,scalar_value);
1117: }
1118: VecAXPY(fetidp_solution_all,-1.0,exact_solution);
1119: VecNorm(fetidp_solution_all,NORM_INFINITY,&norm);
1120: PetscPrintf(dd.gcomm,"------------------FETI-DP stats-------------------------------\n");
1121: PetscPrintf(dd.gcomm,"Number of degrees of freedom : %8D\n",ndofs);
1122: if (reason < 0) {
1123: PetscPrintf(dd.gcomm,"Number of iterations : %8D\n",its);
1124: PetscPrintf(dd.gcomm,"Converged reason : %D\n",reason);
1125: }
1126: if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1127: PetscPrintf(dd.gcomm,"Eigenvalues preconditioned operator : %1.1e %1.1e\n",(double)PetscFloorReal(100.*mineig)/100.,(double)PetscCeilReal(100.*maxeig)/100.);
1128: if (norm > 1.e-1 || reason < 0) {
1129: PetscPrintf(dd.gcomm,"Error betweeen exact and computed solution : %1.2e\n",(double)norm);
1130: }
1131: PetscPrintf(dd.gcomm,"--------------------------------------------------------------\n");
1132: VecDestroy(&fetidp_solution);
1133: VecDestroy(&fetidp_solution_all);
1134: VecDestroy(&fetidp_rhs);
1135: }
1136: KSPDestroy(&KSPwithFETIDP);
1137: VecDestroy(&exact_solution);
1138: VecDestroy(&bddc_solution);
1139: VecDestroy(&bddc_rhs);
1140: MatDestroy(&A);
1141: KSPDestroy(&KSPwithBDDC);
1142: /* Quit PETSc */
1143: PetscFinalize();
1144: return ierr;
1145: }
1147: /*TEST
1149: testset:
1150: nsize: 4
1151: args: -nex 7 -physical_pc_bddc_coarse_eqs_per_proc 3 -physical_pc_bddc_switch_static
1152: output_file: output/ex59_bddc_fetidp_1.out
1153: test:
1154: suffix: bddc_fetidp_1
1155: test:
1156: requires: viennacl
1157: suffix: bddc_fetidp_1_viennacl
1158: args: -subdomain_mat_type aijviennacl
1159: test:
1160: requires: cuda
1161: suffix: bddc_fetidp_1_cuda
1162: args: -subdomain_mat_type aijcusparse -physical_pc_bddc_dirichlet_pc_factor_mat_solver_type cusparse
1164: testset:
1165: nsize: 4
1166: args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10
1167: requires: !single
1168: test:
1169: suffix: bddc_fetidp_2
1170: test:
1171: suffix: bddc_fetidp_3
1172: args: -npz 1 -nez 1
1173: test:
1174: suffix: bddc_fetidp_4
1175: args: -npz 1 -nez 1 -physical_pc_bddc_use_change_of_basis -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_deluxe_singlemat -fluxes_fetidp_ksp_type cg
1177: testset:
1178: nsize: 8
1179: suffix: bddc_fetidp_approximate
1180: args: -npx 2 -npy 2 -npz 2 -p 2 -nex 8 -ney 7 -nez 9 -physical_ksp_max_it 20 -subdomain_mat_type aij -physical_pc_bddc_switch_static -physical_pc_bddc_dirichlet_approximate -physical_pc_bddc_neumann_approximate -physical_pc_bddc_dirichlet_pc_type gamg -physical_pc_bddc_dirichlet_mg_levels_ksp_max_it 3 -physical_pc_bddc_neumann_pc_type sor -physical_pc_bddc_neumann_approximate_scale -testfetidp 0
1182: testset:
1183: nsize: 4
1184: args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10 -physical_ksp_view -physical_pc_bddc_levels 1
1185: filter: grep -v "variant HERMITIAN"
1186: requires: !single
1187: test:
1188: suffix: bddc_fetidp_ml_1
1189: args: -physical_pc_bddc_coarsening_ratio 1
1190: test:
1191: suffix: bddc_fetidp_ml_2
1192: args: -physical_pc_bddc_coarsening_ratio 2 -mat_partitioning_type average
1193: test:
1194: suffix: bddc_fetidp_ml_3
1195: args: -physical_pc_bddc_coarsening_ratio 4
1197: testset:
1198: nsize: 9
1199: args: -npx 3 -npy 3 -p 2 -nex 6 -ney 6 -physical_pc_bddc_deluxe_singlemat -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_graph_maxcount 1 -physical_pc_bddc_levels 3 -physical_pc_bddc_coarsening_ratio 2 -physical_pc_bddc_coarse_ksp_type gmres
1200: output_file: output/ex59_bddc_fetidp_ml_eqlimit.out
1201: test:
1202: suffix: bddc_fetidp_ml_eqlimit_1
1203: args: -physical_pc_bddc_coarse_eqs_limit 31 -mat_partitioning_type average -physical_pc_bddc_coarse_pc_bddc_graph_maxcount 1
1204: test:
1205: suffix: bddc_fetidp_ml_eqlimit_2
1206: args: -physical_pc_bddc_coarse_eqs_limit 46
1209: TEST*/