Actual source code: bddcprivate.c
petsc-dev 2014-02-02
1: #include <bddc.h>
2: #include <bddcprivate.h>
3: #include <petscblaslapack.h>
7: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8: {
9: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
10: PetscScalar *coarse_submat_vals;
14: /* Compute matrix after change of basis and extract local submatrices */
15: PCBDDCSetUpLocalMatrices(pc);
17: /* Setup local scatters R_to_B and (optionally) R_to_D */
18: /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19: PCBDDCSetUpLocalScatters(pc);
21: /* Setup local solvers ksp_D and ksp_R */
22: /* PCBDDCSetUpLocalScatters should be called first! */
23: PCBDDCSetUpLocalSolvers(pc);
25: /* Change global null space passed in by the user if change of basis has been requested */
26: if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27: PCBDDCNullSpaceAdaptGlobal(pc);
28: }
30: /*
31: Setup local correction and local part of coarse basis.
32: Gives back the dense local part of the coarse matrix in column major ordering
33: */
34: PCBDDCSetUpCorrection(pc,&coarse_submat_vals);
36: /* Compute total number of coarse nodes and setup coarse solver */
37: PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);
39: /* free */
40: PetscFree(coarse_submat_vals);
41: return(0);
42: }
46: PetscErrorCode PCBDDCResetCustomization(PC pc)
47: {
48: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
49: PetscInt i;
53: PCBDDCGraphResetCSR(pcbddc->mat_graph);
54: ISDestroy(&pcbddc->user_primal_vertices);
55: MatNullSpaceDestroy(&pcbddc->NullSpace);
56: ISDestroy(&pcbddc->NeumannBoundaries);
57: ISDestroy(&pcbddc->DirichletBoundaries);
58: for (i=0;i<pcbddc->n_ISForDofs;i++) {
59: ISDestroy(&pcbddc->ISForDofs[i]);
60: }
61: PetscFree(pcbddc->ISForDofs);
62: pcbddc->n_ISForDofs = 0;
63: MatNullSpaceDestroy(&pcbddc->onearnullspace);
64: PetscFree(pcbddc->onearnullvecs_state);
65: return(0);
66: }
70: PetscErrorCode PCBDDCResetTopography(PC pc)
71: {
72: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
76: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
77: MatDestroy(&pcbddc->ConstraintMatrix);
78: PCBDDCGraphReset(pcbddc->mat_graph);
79: return(0);
80: }
84: PetscErrorCode PCBDDCResetSolvers(PC pc)
85: {
86: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
90: VecDestroy(&pcbddc->coarse_vec);
91: VecDestroy(&pcbddc->coarse_rhs);
92: MatDestroy(&pcbddc->coarse_phi_B);
93: MatDestroy(&pcbddc->coarse_phi_D);
94: MatDestroy(&pcbddc->coarse_psi_B);
95: MatDestroy(&pcbddc->coarse_psi_D);
96: VecDestroy(&pcbddc->vec1_P);
97: VecDestroy(&pcbddc->vec1_C);
98: MatDestroy(&pcbddc->local_auxmat1);
99: MatDestroy(&pcbddc->local_auxmat2);
100: VecDestroy(&pcbddc->vec1_R);
101: VecDestroy(&pcbddc->vec2_R);
102: ISDestroy(&pcbddc->is_R_local);
103: VecScatterDestroy(&pcbddc->R_to_B);
104: VecScatterDestroy(&pcbddc->R_to_D);
105: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
106: KSPDestroy(&pcbddc->ksp_D);
107: KSPDestroy(&pcbddc->ksp_R);
108: KSPDestroy(&pcbddc->coarse_ksp);
109: MatDestroy(&pcbddc->local_mat);
110: PetscFree(pcbddc->primal_indices_local_idxs);
111: PetscFree(pcbddc->global_primal_indices);
112: return(0);
113: }
117: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
118: {
119: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
120: PC_IS *pcis = (PC_IS*)pc->data;
121: VecType impVecType;
122: PetscInt n_constraints,n_R,old_size;
126: if (!pcbddc->ConstraintMatrix) {
127: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
128: }
129: /* get sizes */
130: n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
131: n_R = pcis->n-pcbddc->n_actual_vertices;
132: VecGetType(pcis->vec1_N,&impVecType);
133: /* local work vectors (try to avoid unneeded work)*/
134: /* R nodes */
135: old_size = -1;
136: if (pcbddc->vec1_R) {
137: VecGetSize(pcbddc->vec1_R,&old_size);
138: }
139: if (n_R != old_size) {
140: VecDestroy(&pcbddc->vec1_R);
141: VecDestroy(&pcbddc->vec2_R);
142: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
143: VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
144: VecSetType(pcbddc->vec1_R,impVecType);
145: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
146: }
147: /* local primal dofs */
148: old_size = -1;
149: if (pcbddc->vec1_P) {
150: VecGetSize(pcbddc->vec1_P,&old_size);
151: }
152: if (pcbddc->local_primal_size != old_size) {
153: VecDestroy(&pcbddc->vec1_P);
154: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
155: VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
156: VecSetType(pcbddc->vec1_P,impVecType);
157: }
158: /* local explicit constraints */
159: old_size = -1;
160: if (pcbddc->vec1_C) {
161: VecGetSize(pcbddc->vec1_C,&old_size);
162: }
163: if (n_constraints && n_constraints != old_size) {
164: VecDestroy(&pcbddc->vec1_C);
165: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
166: VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
167: VecSetType(pcbddc->vec1_C,impVecType);
168: }
169: return(0);
170: }
174: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
175: {
176: PetscErrorCode ierr;
177: /* pointers to pcis and pcbddc */
178: PC_IS* pcis = (PC_IS*)pc->data;
179: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
180: /* submatrices of local problem */
181: Mat A_RV,A_VR,A_VV;
182: /* working matrices */
183: Mat M1,M2,M3,C_CR;
184: /* working vectors */
185: Vec vec1_C,vec2_C,vec1_V,vec2_V;
186: /* additional working stuff */
187: IS is_aux;
188: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
189: const PetscScalar *array,*row_cmat_values;
190: const PetscInt *row_cmat_indices,*idx_R_local;
191: PetscInt *idx_V_B,*auxindices;
192: PetscInt n_vertices,n_constraints,size_of_constraint;
193: PetscInt i,j,n_R,n_D,n_B;
194: PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE;
195: /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
196: MatType impMatType;
197: /* some shortcuts to scalars */
198: PetscScalar zero=0.0,one=1.0,m_one=-1.0;
199: /* for debugging purposes */
200: PetscReal *coarsefunctions_errors,*constraints_errors;
203: /* get number of vertices (corners plus constraints with change of basis)
204: pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
205: n_vertices = pcbddc->n_actual_vertices;
206: n_constraints = pcbddc->local_primal_size-n_vertices;
207: /* Set Non-overlapping dimensions */
208: n_B = pcis->n_B; n_D = pcis->n - n_B;
209: n_R = pcis->n-n_vertices;
211: /* Set types for local objects needed by BDDC precondtioner */
212: impMatType = MATSEQDENSE;
214: /* Allocating some extra storage just to be safe */
215: PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);
216: for (i=0;i<pcis->n;i++) auxindices[i]=i;
218: /* vertices in boundary numbering */
219: PetscMalloc1(n_vertices,&idx_V_B);
220: ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);
221: if (i != n_vertices) {
222: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
223: }
225: /* Precompute stuffs needed for preprocessing and application of BDDC*/
226: if (n_constraints) {
227: /* see if we can save some allocations */
228: if (pcbddc->local_auxmat2) {
229: PetscInt on_R,on_constraints;
230: MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);
231: if (on_R != n_R || on_constraints != n_constraints) {
232: MatDestroy(&pcbddc->local_auxmat2);
233: MatDestroy(&pcbddc->local_auxmat1);
234: }
235: }
236: /* work vectors */
237: VecDuplicate(pcbddc->vec1_C,&vec1_C);
238: VecDuplicate(pcbddc->vec1_C,&vec2_C);
239: /* auxiliary matrices */
240: if (!pcbddc->local_auxmat2) {
241: MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);
242: MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);
243: MatSetType(pcbddc->local_auxmat2,impMatType);
244: MatSetUp(pcbddc->local_auxmat2);
245: }
247: /* Extract constraints on R nodes: C_{CR} */
248: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
249: MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
250: ISDestroy(&is_aux);
252: /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
253: for (i=0;i<n_constraints;i++) {
254: VecSet(pcbddc->vec1_R,zero);
255: /* Get row of constraint matrix in R numbering */
256: MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
257: VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);
258: MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
259: VecAssemblyBegin(pcbddc->vec1_R);
260: VecAssemblyEnd(pcbddc->vec1_R);
261: /* Solve for row of constraint matrix in R numbering */
262: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
263: /* Set values in local_auxmat2 */
264: VecGetArrayRead(pcbddc->vec2_R,&array);
265: MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);
266: VecRestoreArrayRead(pcbddc->vec2_R,&array);
267: }
268: MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
269: MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
270: MatScale(pcbddc->local_auxmat2,m_one);
272: /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */
273: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
274: MatLUFactor(M3,NULL,NULL,NULL);
275: MatCreate(PETSC_COMM_SELF,&M1);
276: MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);
277: MatSetType(M1,impMatType);
278: MatSetUp(M1);
279: MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);
280: MatZeroEntries(M2);
281: VecSet(vec1_C,m_one);
282: MatDiagonalSet(M2,vec1_C,INSERT_VALUES);
283: MatMatSolve(M3,M2,M1);
284: MatDestroy(&M2);
285: MatDestroy(&M3);
286: /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
287: if (!pcbddc->local_auxmat1) {
288: MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
289: } else {
290: MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
291: }
292: }
294: /* Get submatrices from subdomain matrix */
295: if (n_vertices) {
296: PetscInt ibs,mbs;
297: PetscBool issbaij;
298: Mat newmat;
300: ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
301: MatGetBlockSize(pcbddc->local_mat,&mbs);
302: ISGetBlockSize(pcbddc->is_R_local,&ibs);
303: if (ibs != mbs) { /* need to convert to SEQAIJ */
304: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);
305: MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
306: MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
307: MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
308: MatDestroy(&newmat);
309: } else {
310: /* this is safe */
311: MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
312: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
313: if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
314: MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);
315: /* which of the two approaches is faster? */
316: /* MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
317: MatCreateTranspose(A_RV,&A_VR);*/
318: MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
319: MatCreateTranspose(A_VR,&A_RV);
320: MatDestroy(&newmat);
321: } else {
322: MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
323: MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
324: }
325: }
326: MatGetVecs(A_RV,&vec1_V,NULL);
327: VecDuplicate(vec1_V,&vec2_V);
328: ISDestroy(&is_aux);
329: }
331: /* Matrix of coarse basis functions (local) */
332: if (pcbddc->coarse_phi_B) {
333: PetscInt on_B,on_primal;
334: MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
335: if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
336: MatDestroy(&pcbddc->coarse_phi_B);
337: MatDestroy(&pcbddc->coarse_psi_B);
338: }
339: }
340: if (pcbddc->coarse_phi_D) {
341: PetscInt on_D,on_primal;
342: MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);
343: if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
344: MatDestroy(&pcbddc->coarse_phi_D);
345: MatDestroy(&pcbddc->coarse_psi_D);
346: }
347: }
348: if (!pcbddc->coarse_phi_B) {
349: MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);
350: MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);
351: MatSetType(pcbddc->coarse_phi_B,impMatType);
352: MatSetUp(pcbddc->coarse_phi_B);
353: }
354: if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
355: MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);
356: MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);
357: MatSetType(pcbddc->coarse_phi_D,impMatType);
358: MatSetUp(pcbddc->coarse_phi_D);
359: }
361: if (pcbddc->dbg_flag) {
362: ISGetIndices(pcbddc->is_R_local,&idx_R_local);
363: PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);
364: PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);
365: }
366: /* Subdomain contribution (Non-overlapping) to coarse matrix */
367: PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);
369: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
371: /* vertices */
372: for (i=0;i<n_vertices;i++) {
373: /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
374: VecSet(pcbddc->vec1_R,zero); /* TODO: REMOVE IT */
375: VecSet(vec1_V,zero);
376: VecSetValue(vec1_V,i,one,INSERT_VALUES);
377: VecAssemblyBegin(vec1_V);
378: VecAssemblyEnd(vec1_V);
379: /* simplified solution of saddle point problem with null rhs on constraints multipliers */
380: MatMult(A_RV,vec1_V,pcbddc->vec1_R);
381: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
382: VecScale(pcbddc->vec1_R,m_one);
383: if (n_constraints) {
384: MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);
385: MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
386: VecScale(vec1_C,m_one);
387: }
388: MatMult(A_VR,pcbddc->vec1_R,vec2_V);
389: MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);
391: /* Set values in coarse basis function and subdomain part of coarse_mat */
392: /* coarse basis functions */
393: VecSet(pcis->vec1_B,zero);
394: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
395: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
396: VecGetArrayRead(pcis->vec1_B,&array);
397: MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);
398: VecRestoreArrayRead(pcis->vec1_B,&array);
399: MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);
400: if (pcbddc->switch_static || pcbddc->dbg_flag) {
401: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
402: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
403: VecGetArrayRead(pcis->vec1_D,&array);
404: MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);
405: VecRestoreArrayRead(pcis->vec1_D,&array);
406: }
407: /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
408: VecGetArrayRead(vec2_V,&array);
409: PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));
410: VecRestoreArrayRead(vec2_V,&array);
411: if (n_constraints) {
412: VecGetArrayRead(vec1_C,&array);
413: PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));
414: VecRestoreArrayRead(vec1_C,&array);
415: }
417: /* check */
418: if (pcbddc->dbg_flag) {
419: /* assemble subdomain vector on local nodes */
420: VecSet(pcis->vec1_N,zero);
421: VecGetArrayRead(pcbddc->vec1_R,&array);
422: VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);
423: VecRestoreArrayRead(pcbddc->vec1_R,&array);
424: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);
425: VecAssemblyBegin(pcis->vec1_N);
426: VecAssemblyEnd(pcis->vec1_N);
427: /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
428: VecSet(pcbddc->vec1_P,zero);
429: VecGetArrayRead(vec2_V,&array);
430: VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);
431: VecRestoreArrayRead(vec2_V,&array);
432: if (n_constraints) {
433: VecGetArrayRead(vec1_C,&array);
434: VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);
435: VecRestoreArrayRead(vec1_C,&array);
436: }
437: VecAssemblyBegin(pcbddc->vec1_P);
438: VecAssemblyEnd(pcbddc->vec1_P);
439: VecScale(pcbddc->vec1_P,m_one);
440: /* check saddle point solution */
441: MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
442: MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
443: VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);
444: MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
445: /* shift by the identity matrix */
446: VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);
447: VecAssemblyBegin(pcbddc->vec1_P);
448: VecAssemblyEnd(pcbddc->vec1_P);
449: VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);
450: }
451: }
453: /* constraints */
454: for (i=0;i<n_constraints;i++) {
455: VecSet(vec2_C,zero);
456: VecSetValue(vec2_C,i,m_one,INSERT_VALUES);
457: VecAssemblyBegin(vec2_C);
458: VecAssemblyEnd(vec2_C);
459: /* simplified solution of saddle point problem with null rhs on vertices multipliers */
460: MatMult(M1,vec2_C,vec1_C);
461: MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);
462: VecScale(vec1_C,m_one);
463: if (n_vertices) {
464: MatMult(A_VR,pcbddc->vec1_R,vec2_V);
465: }
466: /* Set values in coarse basis function and subdomain part of coarse_mat */
467: /* coarse basis functions */
468: j = i+n_vertices; /* don't touch this! */
469: VecSet(pcis->vec1_B,zero);
470: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
471: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
472: VecGetArrayRead(pcis->vec1_B,&array);
473: MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);
474: VecRestoreArrayRead(pcis->vec1_B,&array);
475: if (pcbddc->switch_static || pcbddc->dbg_flag) {
476: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
477: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
478: VecGetArrayRead(pcis->vec1_D,&array);
479: MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);
480: VecRestoreArrayRead(pcis->vec1_D,&array);
481: }
482: /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
483: if (n_vertices) {
484: VecGetArrayRead(vec2_V,&array);
485: PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));
486: VecRestoreArrayRead(vec2_V,&array);
487: }
488: VecGetArrayRead(vec1_C,&array);
489: PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));
490: VecRestoreArrayRead(vec1_C,&array);
492: if (pcbddc->dbg_flag) {
493: /* assemble subdomain vector on nodes */
494: VecSet(pcis->vec1_N,zero);
495: VecGetArrayRead(pcbddc->vec1_R,&array);
496: VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);
497: VecRestoreArrayRead(pcbddc->vec1_R,&array);
498: VecAssemblyBegin(pcis->vec1_N);
499: VecAssemblyEnd(pcis->vec1_N);
500: /* assemble subdomain vector of lagrange multipliers */
501: VecSet(pcbddc->vec1_P,zero);
502: if (n_vertices) {
503: VecGetArrayRead(vec2_V,&array);
504: VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);
505: VecRestoreArrayRead(vec2_V,&array);
506: }
507: VecGetArrayRead(vec1_C,&array);
508: VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);
509: VecRestoreArrayRead(vec1_C,&array);
510: VecAssemblyBegin(pcbddc->vec1_P);
511: VecAssemblyEnd(pcbddc->vec1_P);
512: VecScale(pcbddc->vec1_P,m_one);
513: /* check saddle point solution */
514: MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
515: MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
516: VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);
517: MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
518: /* shift by the identity matrix */
519: VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);
520: VecAssemblyBegin(pcbddc->vec1_P);
521: VecAssemblyEnd(pcbddc->vec1_P);
522: VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);
523: }
524: }
525: /* call assembling routines for local coarse basis */
526: MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
527: MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
528: if (pcbddc->switch_static || pcbddc->dbg_flag) {
529: MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
530: MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
531: }
533: /* compute other basis functions for non-symmetric problems */
534: MatIsSymmetricKnown(pc->pmat,&setsym,&issym);
535: if (!setsym || (setsym && !issym)) {
536: if (!pcbddc->coarse_psi_B) {
537: MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);
538: MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);
539: MatSetType(pcbddc->coarse_psi_B,impMatType);
540: MatSetUp(pcbddc->coarse_psi_B);
541: }
542: if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
543: MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);
544: MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);
545: MatSetType(pcbddc->coarse_psi_D,impMatType);
546: MatSetUp(pcbddc->coarse_psi_D);
547: }
548: for (i=0;i<pcbddc->local_primal_size;i++) {
549: if (n_constraints) {
550: VecSet(vec1_C,zero);
551: for (j=0;j<n_constraints;j++) {
552: VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);
553: }
554: VecAssemblyBegin(vec1_C);
555: VecAssemblyEnd(vec1_C);
556: }
557: if (i<n_vertices) {
558: VecSet(vec1_V,zero);
559: VecSetValue(vec1_V,i,m_one,INSERT_VALUES);
560: VecAssemblyBegin(vec1_V);
561: VecAssemblyEnd(vec1_V);
562: MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);
563: if (n_constraints) {
564: MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
565: }
566: } else {
567: MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);
568: }
569: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
570: VecSet(pcis->vec1_B,zero);
571: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
572: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
573: VecGetArrayRead(pcis->vec1_B,&array);
574: MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);
575: VecRestoreArrayRead(pcis->vec1_B,&array);
576: if (i<n_vertices) {
577: MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);
578: }
579: if (pcbddc->switch_static || pcbddc->dbg_flag) {
580: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
581: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
582: VecGetArrayRead(pcis->vec1_D,&array);
583: MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);
584: VecRestoreArrayRead(pcis->vec1_D,&array);
585: }
587: if (pcbddc->dbg_flag) {
588: /* assemble subdomain vector on nodes */
589: VecSet(pcis->vec1_N,zero);
590: VecGetArrayRead(pcbddc->vec1_R,&array);
591: VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);
592: VecRestoreArrayRead(pcbddc->vec1_R,&array);
593: if (i<n_vertices) {
594: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);
595: }
596: VecAssemblyBegin(pcis->vec1_N);
597: VecAssemblyEnd(pcis->vec1_N);
598: /* assemble subdomain vector of lagrange multipliers */
599: for (j=0;j<pcbddc->local_primal_size;j++) {
600: VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);
601: }
602: VecAssemblyBegin(pcbddc->vec1_P);
603: VecAssemblyEnd(pcbddc->vec1_P);
604: /* check saddle point solution */
605: MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
606: MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
607: VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);
608: MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
609: /* shift by the identity matrix */
610: VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);
611: VecAssemblyBegin(pcbddc->vec1_P);
612: VecAssemblyEnd(pcbddc->vec1_P);
613: VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);
614: }
615: }
616: MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);
617: MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);
618: if (pcbddc->switch_static || pcbddc->dbg_flag) {
619: MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);
620: MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);
621: }
622: }
623: PetscFree(idx_V_B);
624: /* Checking coarse_sub_mat and coarse basis functios */
625: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
626: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
627: if (pcbddc->dbg_flag) {
628: Mat coarse_sub_mat;
629: Mat AUXMAT,TM1,TM2,TM3,TM4;
630: Mat coarse_phi_D,coarse_phi_B;
631: Mat coarse_psi_D,coarse_psi_B;
632: Mat A_II,A_BB,A_IB,A_BI;
633: MatType checkmattype=MATSEQAIJ;
634: PetscReal real_value;
636: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
637: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
638: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
639: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
640: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
641: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
642: if (pcbddc->coarse_psi_B) {
643: MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
644: MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
645: }
646: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
648: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
649: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");
650: PetscViewerFlush(pcbddc->dbg_viewer);
651: if (pcbddc->coarse_psi_B) {
652: MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
653: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
654: MatDestroy(&AUXMAT);
655: MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
656: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
657: MatDestroy(&AUXMAT);
658: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
659: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
660: MatDestroy(&AUXMAT);
661: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
662: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
663: MatDestroy(&AUXMAT);
664: } else {
665: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
666: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
667: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
668: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
669: MatDestroy(&AUXMAT);
670: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
671: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
672: MatDestroy(&AUXMAT);
673: }
674: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
675: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
676: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
677: MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);
678: MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);
679: MatNorm(TM1,NORM_INFINITY,&real_value);
680: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
681: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");
682: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);
683: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);
684: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");
685: for (i=0;i<pcbddc->local_primal_size;i++) {
686: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);
687: }
688: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");
689: for (i=0;i<pcbddc->local_primal_size;i++) {
690: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);
691: }
692: if (pcbddc->coarse_psi_B) {
693: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");
694: for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
695: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);
696: }
697: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");
698: for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
699: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);
700: }
701: }
702: PetscViewerFlush(pcbddc->dbg_viewer);
703: MatDestroy(&A_II);
704: MatDestroy(&A_BB);
705: MatDestroy(&A_IB);
706: MatDestroy(&A_BI);
707: MatDestroy(&TM1);
708: MatDestroy(&TM2);
709: MatDestroy(&TM3);
710: MatDestroy(&TM4);
711: MatDestroy(&coarse_phi_D);
712: MatDestroy(&coarse_phi_B);
713: if (pcbddc->coarse_psi_B) {
714: MatDestroy(&coarse_psi_D);
715: MatDestroy(&coarse_psi_B);
716: }
717: MatDestroy(&coarse_sub_mat);
718: ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);
719: PetscFree(coarsefunctions_errors);
720: PetscFree(constraints_errors);
721: }
722: /* free memory */
723: if (n_vertices) {
724: VecDestroy(&vec1_V);
725: VecDestroy(&vec2_V);
726: MatDestroy(&A_RV);
727: MatDestroy(&A_VR);
728: MatDestroy(&A_VV);
729: }
730: if (n_constraints) {
731: VecDestroy(&vec1_C);
732: VecDestroy(&vec2_C);
733: MatDestroy(&M1);
734: MatDestroy(&C_CR);
735: }
736: PetscFree(auxindices);
737: /* get back data */
738: *coarse_submat_vals_n = coarse_submat_vals;
739: return(0);
740: }
744: PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
745: {
746: PC_IS* pcis = (PC_IS*)(pc->data);
747: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
748: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
749: PetscBool issbaij,isbaij;
750: /* manage repeated solves */
751: MatReuse reuse;
752: MatStructure matstruct;
753: PetscErrorCode ierr;
756: if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
757: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
758: }
759: /* get mat flags */
760: PCGetOperators(pc,NULL,NULL,&matstruct);
761: reuse = MAT_INITIAL_MATRIX;
762: if (pc->setupcalled) {
763: /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
764: if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
765: if (matstruct == SAME_NONZERO_PATTERN) {
766: reuse = MAT_REUSE_MATRIX;
767: } else {
768: reuse = MAT_INITIAL_MATRIX;
769: }
770: }
771: if (reuse == MAT_INITIAL_MATRIX) {
772: MatDestroy(&pcis->A_II);
773: MatDestroy(&pcis->A_IB);
774: MatDestroy(&pcis->A_BI);
775: MatDestroy(&pcis->A_BB);
776: MatDestroy(&pcbddc->local_mat);
777: }
779: /* transform local matrices if needed */
780: if (pcbddc->use_change_of_basis) {
781: Mat change_mat_all;
782: PetscScalar *row_cmat_values;
783: PetscInt *row_cmat_indices;
784: PetscInt *nnz,*is_indices,*temp_indices;
785: PetscInt i,j,k,n_D,n_B;
787: /* Get Non-overlapping dimensions */
788: n_B = pcis->n_B;
789: n_D = pcis->n-n_B;
791: /* compute nonzero structure of change of basis on all local nodes */
792: PetscMalloc1(pcis->n,&nnz);
793: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
794: for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
795: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
796: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
797: k=1;
798: for (i=0;i<n_B;i++) {
799: MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);
800: nnz[is_indices[i]]=j;
801: if (k < j) k = j;
802: MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);
803: }
804: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
805: /* assemble change of basis matrix on the whole set of local dofs */
806: PetscMalloc1(k,&temp_indices);
807: MatCreate(PETSC_COMM_SELF,&change_mat_all);
808: MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);
809: MatSetType(change_mat_all,MATSEQAIJ);
810: MatSeqAIJSetPreallocation(change_mat_all,0,nnz);
811: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
812: for (i=0;i<n_D;i++) {
813: MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);
814: }
815: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
816: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
817: for (i=0;i<n_B;i++) {
818: MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
819: for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
820: MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);
821: MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
822: }
823: MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);
824: MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);
825: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
826: PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);
827: PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);
828: if (!issbaij && !isbaij) {
829: MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);
830: } else {
831: Mat work_mat;
832: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
833: MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);
834: MatDestroy(&work_mat);
835: }
836: /*
837: PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
838: MatView(change_mat_all,(PetscViewer)0);
839: */
840: MatDestroy(&change_mat_all);
841: PetscFree(nnz);
842: PetscFree(temp_indices);
843: } else {
844: /* without change of basis, the local matrix is unchanged */
845: if (!pcbddc->local_mat) {
846: PetscObjectReference((PetscObject)matis->A);
847: pcbddc->local_mat = matis->A;
848: }
849: }
851: /* get submatrices */
852: MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);
853: MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);
854: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
855: if (!issbaij) {
856: MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);
857: MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);
858: } else {
859: Mat newmat;
860: MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);
861: MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);
862: MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);
863: MatDestroy(&newmat);
864: }
865: return(0);
866: }
870: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
871: {
872: PC_IS* pcis = (PC_IS*)(pc->data);
873: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
874: IS is_aux1,is_aux2;
875: PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
876: PetscInt n_vertices,i,j,n_R,n_D,n_B;
877: PetscInt vbs,bs;
878: PetscBT bitmask;
882: /* No need to setup local scatters if primal space is unchanged */
883: if (!pcbddc->new_primal_space_local) {
884: return(0);
885: }
886: /* destroy old objects */
887: ISDestroy(&pcbddc->is_R_local);
888: VecScatterDestroy(&pcbddc->R_to_B);
889: VecScatterDestroy(&pcbddc->R_to_D);
890: /* Set Non-overlapping dimensions */
891: n_B = pcis->n_B; n_D = pcis->n - n_B;
892: n_vertices = pcbddc->n_actual_vertices;
893: /* create auxiliary bitmask */
894: PetscBTCreate(pcis->n,&bitmask);
895: for (i=0;i<n_vertices;i++) {
896: PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);
897: }
899: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
900: PetscMalloc1((pcis->n-n_vertices),&idx_R_local);
901: for (i=0, n_R=0; i<pcis->n; i++) {
902: if (!PetscBTLookup(bitmask,i)) {
903: idx_R_local[n_R] = i;
904: n_R++;
905: }
906: }
908: /* Block code */
909: vbs = 1;
910: MatGetBlockSize(pcbddc->local_mat,&bs);
911: if (bs>1 && !(n_vertices%bs)) {
912: PetscBool is_blocked = PETSC_TRUE;
913: PetscInt *vary;
914: /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
915: PetscMalloc1(pcis->n/bs,&vary);
916: PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
917: for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
918: for (i=0; i<n_vertices; i++) {
919: if (vary[i]!=0 && vary[i]!=bs) {
920: is_blocked = PETSC_FALSE;
921: break;
922: }
923: }
924: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
925: vbs = bs;
926: for (i=0;i<n_R/vbs;i++) {
927: idx_R_local[i] = idx_R_local[vbs*i]/vbs;
928: }
929: }
930: PetscFree(vary);
931: }
932: ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
933: PetscFree(idx_R_local);
935: /* print some info if requested */
936: if (pcbddc->dbg_flag) {
937: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
938: PetscViewerFlush(pcbddc->dbg_viewer);
939: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
940: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
941: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
942: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);
943: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);
944: PetscViewerFlush(pcbddc->dbg_viewer);
945: }
947: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
948: ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
949: PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);
950: PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);
951: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
952: for (i=0; i<n_D; i++) {
953: PetscBTSet(bitmask,is_indices[i]);
954: }
955: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
956: for (i=0, j=0; i<n_R; i++) {
957: if (!PetscBTLookup(bitmask,idx_R_local[i])) {
958: aux_array1[j++] = i;
959: }
960: }
961: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
962: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
963: for (i=0, j=0; i<n_B; i++) {
964: if (!PetscBTLookup(bitmask,is_indices[i])) {
965: aux_array2[j++] = i;
966: }
967: }
968: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
969: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
970: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
971: ISDestroy(&is_aux1);
972: ISDestroy(&is_aux2);
974: if (pcbddc->switch_static || pcbddc->dbg_flag) {
975: PetscMalloc1(n_D,&aux_array1);
976: for (i=0, j=0; i<n_R; i++) {
977: if (PetscBTLookup(bitmask,idx_R_local[i])) {
978: aux_array1[j++] = i;
979: }
980: }
981: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
982: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
983: ISDestroy(&is_aux1);
984: }
985: PetscBTDestroy(&bitmask);
986: ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
987: return(0);
988: }
993: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
994: {
995: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
996: PC_IS *pcis = (PC_IS*)pc->data;
997: PC pc_temp;
998: Mat A_RR;
999: MatStructure matstruct;
1000: MatReuse reuse;
1001: PetscScalar m_one = -1.0;
1002: PetscReal value;
1003: PetscInt n_D,n_R,ibs,mbs;
1004: PetscBool use_exact,use_exact_reduced,issbaij;
1006: /* prefixes stuff */
1007: char dir_prefix[256],neu_prefix[256],str_level[3];
1008: size_t len;
1011: PCGetOperators(pc,NULL,NULL,&matstruct);
1013: /* compute prefixes */
1014: PetscStrcpy(dir_prefix,"");
1015: PetscStrcpy(neu_prefix,"");
1016: if (!pcbddc->current_level) {
1017: PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);
1018: PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);
1019: PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1020: PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1021: } else {
1022: PetscStrcpy(str_level,"");
1023: sprintf(str_level,"%d_",(int)(pcbddc->current_level));
1024: PetscStrlen(((PetscObject)pc)->prefix,&len);
1025: len -= 15; /* remove "pc_bddc_coarse_" */
1026: if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
1027: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);
1028: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);
1029: *(dir_prefix+len)='\0';
1030: *(neu_prefix+len)='\0';
1031: PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1032: PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1033: PetscStrcat(dir_prefix,str_level);
1034: PetscStrcat(neu_prefix,str_level);
1035: }
1037: /* DIRICHLET PROBLEM */
1038: /* Matrix for Dirichlet problem is pcis->A_II */
1039: ISGetSize(pcis->is_I_local,&n_D);
1040: if (!pcbddc->ksp_D) { /* create object if not yet build */
1041: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
1042: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
1043: /* default */
1044: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
1045: KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
1046: PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
1047: KSPGetPC(pcbddc->ksp_D,&pc_temp);
1048: if (issbaij) {
1049: PCSetType(pc_temp,PCCHOLESKY);
1050: } else {
1051: PCSetType(pc_temp,PCLU);
1052: }
1053: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1054: }
1055: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);
1056: /* Allow user's customization */
1057: KSPSetFromOptions(pcbddc->ksp_D);
1058: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1059: if (!n_D) {
1060: KSPGetPC(pcbddc->ksp_D,&pc_temp);
1061: PCSetType(pc_temp,PCNONE);
1062: }
1063: /* Set Up KSP for Dirichlet problem of BDDC */
1064: KSPSetUp(pcbddc->ksp_D);
1065: /* set ksp_D into pcis data */
1066: KSPDestroy(&pcis->ksp_D);
1067: PetscObjectReference((PetscObject)pcbddc->ksp_D);
1068: pcis->ksp_D = pcbddc->ksp_D;
1070: /* NEUMANN PROBLEM */
1071: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1072: ISGetSize(pcbddc->is_R_local,&n_R);
1073: if (pcbddc->ksp_R) { /* already created ksp */
1074: PetscInt nn_R;
1075: KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);
1076: PetscObjectReference((PetscObject)A_RR);
1077: MatGetSize(A_RR,&nn_R,NULL);
1078: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1079: KSPReset(pcbddc->ksp_R);
1080: MatDestroy(&A_RR);
1081: reuse = MAT_INITIAL_MATRIX;
1082: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1083: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1084: MatDestroy(&A_RR);
1085: reuse = MAT_INITIAL_MATRIX;
1086: } else { /* safe to reuse the matrix */
1087: reuse = MAT_REUSE_MATRIX;
1088: }
1089: }
1090: /* last check */
1091: if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1092: MatDestroy(&A_RR);
1093: reuse = MAT_INITIAL_MATRIX;
1094: }
1095: } else { /* first time, so we need to create the matrix */
1096: reuse = MAT_INITIAL_MATRIX;
1097: }
1098: /* extract A_RR */
1099: MatGetBlockSize(pcbddc->local_mat,&mbs);
1100: ISGetBlockSize(pcbddc->is_R_local,&ibs);
1101: if (ibs != mbs) {
1102: Mat newmat;
1103: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);
1104: MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
1105: MatDestroy(&newmat);
1106: } else {
1107: MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
1108: }
1109: if (!pcbddc->ksp_R) { /* create object if not present */
1110: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
1111: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
1112: /* default */
1113: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
1114: KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
1115: KSPGetPC(pcbddc->ksp_R,&pc_temp);
1116: PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
1117: if (issbaij) {
1118: PCSetType(pc_temp,PCCHOLESKY);
1119: } else {
1120: PCSetType(pc_temp,PCLU);
1121: }
1122: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1123: }
1124: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);
1125: /* Allow user's customization */
1126: KSPSetFromOptions(pcbddc->ksp_R);
1127: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1128: if (!n_R) {
1129: KSPGetPC(pcbddc->ksp_R,&pc_temp);
1130: PCSetType(pc_temp,PCNONE);
1131: }
1132: /* Set Up KSP for Neumann problem of BDDC */
1133: KSPSetUp(pcbddc->ksp_R);
1135: /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1136: if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1137: /* Dirichlet */
1138: VecSetRandom(pcis->vec1_D,NULL);
1139: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
1140: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
1141: VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
1142: VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
1143: /* need to be adapted? */
1144: use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1145: MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1146: PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);
1147: /* print info */
1148: if (pcbddc->dbg_flag) {
1149: PetscViewerFlush(pcbddc->dbg_viewer);
1150: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1151: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1152: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");
1153: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
1154: PetscViewerFlush(pcbddc->dbg_viewer);
1155: }
1156: if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1157: PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);
1158: }
1160: /* Neumann */
1161: VecSetRandom(pcbddc->vec1_R,NULL);
1162: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
1163: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
1164: VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
1165: VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
1166: /* need to be adapted? */
1167: use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1168: MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1169: /* print info */
1170: if (pcbddc->dbg_flag) {
1171: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
1172: PetscViewerFlush(pcbddc->dbg_viewer);
1173: }
1174: if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1175: PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);
1176: }
1177: }
1178: /* free Neumann problem's matrix */
1179: MatDestroy(&A_RR);
1180: return(0);
1181: }
1185: static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc)
1186: {
1188: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1191: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
1192: if (pcbddc->local_auxmat1) {
1193: MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);
1194: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);
1195: }
1196: return(0);
1197: }
1201: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc)
1202: {
1204: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1205: PC_IS* pcis = (PC_IS*) (pc->data);
1206: const PetscScalar zero = 0.0;
1209: /* Application of PHI^T (or PSI^T) */
1210: if (pcbddc->coarse_psi_B) {
1211: MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
1212: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1213: } else {
1214: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
1215: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1216: }
1217: /* Scatter data of coarse_rhs */
1218: if (pcbddc->coarse_rhs) { VecSet(pcbddc->coarse_rhs,zero); }
1219: PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);
1221: /* Local solution on R nodes */
1222: VecSet(pcbddc->vec1_R,zero);
1223: VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1224: VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1225: if (pcbddc->switch_static) {
1226: VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1227: VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1228: }
1229: PCBDDCSolveSaddlePoint(pc);
1230: VecSet(pcis->vec1_B,zero);
1231: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1232: VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1233: if (pcbddc->switch_static) {
1234: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1235: VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1236: }
1238: /* Coarse solution */
1239: PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);
1240: if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1241: KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);
1242: }
1243: PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
1244: PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
1246: /* Sum contributions from two levels */
1247: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1248: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1249: return(0);
1250: }
1254: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1255: {
1257: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1260: VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);
1261: return(0);
1262: }
1266: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1267: {
1269: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1272: VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);
1273: return(0);
1274: }
1276: /* uncomment for testing purposes */
1277: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1280: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1281: {
1282: PetscErrorCode ierr;
1283: PC_IS* pcis = (PC_IS*)(pc->data);
1284: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
1285: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
1286: /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1287: MatType impMatType=MATSEQAIJ;
1288: /* one and zero */
1289: PetscScalar one=1.0,zero=0.0;
1290: /* space to store constraints and their local indices */
1291: PetscScalar *temp_quadrature_constraint;
1292: PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1293: /* iterators */
1294: PetscInt i,j,k,total_counts,temp_start_ptr;
1295: /* stuff to store connected components stored in pcbddc->mat_graph */
1296: IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1297: PetscInt n_ISForFaces,n_ISForEdges;
1298: /* near null space stuff */
1299: MatNullSpace nearnullsp;
1300: const Vec *nearnullvecs;
1301: Vec *localnearnullsp;
1302: PetscBool nnsp_has_cnst;
1303: PetscInt nnsp_size;
1304: PetscScalar *array;
1305: /* BLAS integers */
1306: PetscBLASInt lwork,lierr;
1307: PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1;
1308: PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC;
1309: /* LAPACK working arrays for SVD or POD */
1310: PetscBool skip_lapack;
1311: PetscScalar *work;
1312: PetscReal *singular_vals;
1313: #if defined(PETSC_USE_COMPLEX)
1314: PetscReal *rwork;
1315: #endif
1316: #if defined(PETSC_MISSING_LAPACK_GESVD)
1317: PetscBLASInt Blas_one_2=1;
1318: PetscScalar *temp_basis,*correlation_mat;
1319: #else
1320: PetscBLASInt dummy_int_1=1,dummy_int_2=1;
1321: PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1322: #endif
1323: /* reuse */
1324: PetscInt olocal_primal_size;
1325: PetscInt *oprimal_indices_local_idxs;
1326: /* change of basis */
1327: PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1328: PetscBool boolforchange,qr_needed;
1329: PetscBT touched,change_basis,qr_needed_idx;
1330: /* auxiliary stuff */
1331: PetscInt *nnz,*is_indices,*aux_primal_numbering_B;
1332: /* some quantities */
1333: PetscInt n_vertices,total_primal_vertices,valid_constraints;
1334: PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1338: /* Destroy Mat objects computed previously */
1339: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
1340: MatDestroy(&pcbddc->ConstraintMatrix);
1341: /* Get index sets for faces, edges and vertices from graph */
1342: if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1343: pcbddc->use_vertices = PETSC_TRUE;
1344: }
1345: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1346: /* HACK: provide functions to set change of basis */
1347: if (!ISForVertices && pcbddc->NullSpace) {
1348: pcbddc->use_change_of_basis = PETSC_TRUE;
1349: pcbddc->use_change_on_faces = PETSC_FALSE;
1350: }
1351: /* print some info */
1352: if (pcbddc->dbg_flag) {
1353: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1354: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
1355: i = 0;
1356: if (ISForVertices) {
1357: ISGetSize(ISForVertices,&i);
1358: }
1359: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);
1360: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);
1361: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);
1362: PetscViewerFlush(pcbddc->dbg_viewer);
1363: }
1364: /* check if near null space is attached to global mat */
1365: MatGetNearNullSpace(pc->pmat,&nearnullsp);
1366: if (nearnullsp) {
1367: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
1368: /* remove any stored info */
1369: MatNullSpaceDestroy(&pcbddc->onearnullspace);
1370: PetscFree(pcbddc->onearnullvecs_state);
1371: /* store information for BDDC solver reuse */
1372: PetscObjectReference((PetscObject)nearnullsp);
1373: pcbddc->onearnullspace = nearnullsp;
1374: PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
1375: for (i=0;i<nnsp_size;i++) {
1376: PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
1377: }
1378: } else { /* if near null space is not provided BDDC uses constants by default */
1379: nnsp_size = 0;
1380: nnsp_has_cnst = PETSC_TRUE;
1381: }
1382: /* get max number of constraints on a single cc */
1383: max_constraints = nnsp_size;
1384: if (nnsp_has_cnst) max_constraints++;
1386: /*
1387: Evaluate maximum storage size needed by the procedure
1388: - temp_indices will contain start index of each constraint stored as follows
1389: - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1390: - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1391: - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1392: */
1393: total_counts = n_ISForFaces+n_ISForEdges;
1394: total_counts *= max_constraints;
1395: n_vertices = 0;
1396: if (ISForVertices) {
1397: ISGetSize(ISForVertices,&n_vertices);
1398: }
1399: total_counts += n_vertices;
1400: PetscMalloc1((total_counts+1),&temp_indices);
1401: PetscBTCreate(total_counts,&change_basis);
1402: total_counts = 0;
1403: max_size_of_constraint = 0;
1404: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1405: if (i<n_ISForEdges) {
1406: used_IS = &ISForEdges[i];
1407: } else {
1408: used_IS = &ISForFaces[i-n_ISForEdges];
1409: }
1410: ISGetSize(*used_IS,&j);
1411: total_counts += j;
1412: max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1413: }
1414: total_counts *= max_constraints;
1415: total_counts += n_vertices;
1416: PetscMalloc1(total_counts,&temp_quadrature_constraint);
1417: PetscMalloc1(total_counts,&temp_indices_to_constraint);
1418: PetscMalloc1(total_counts,&temp_indices_to_constraint_B);
1419: /* get local part of global near null space vectors */
1420: PetscMalloc1(nnsp_size,&localnearnullsp);
1421: for (k=0;k<nnsp_size;k++) {
1422: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
1423: VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
1424: VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
1425: }
1427: /* whether or not to skip lapack calls */
1428: skip_lapack = PETSC_TRUE;
1429: if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1431: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1432: if (!pcbddc->use_nnsp_true && !skip_lapack) {
1433: PetscScalar temp_work;
1434: #if defined(PETSC_MISSING_LAPACK_GESVD)
1435: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1436: PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
1437: PetscMalloc1(max_constraints,&singular_vals);
1438: PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
1439: #if defined(PETSC_USE_COMPLEX)
1440: PetscMalloc1(3*max_constraints,&rwork);
1441: #endif
1442: /* now we evaluate the optimal workspace using query with lwork=-1 */
1443: PetscBLASIntCast(max_constraints,&Blas_N);
1444: PetscBLASIntCast(max_constraints,&Blas_LDA);
1445: lwork = -1;
1446: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1447: #if !defined(PETSC_USE_COMPLEX)
1448: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1449: #else
1450: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1451: #endif
1452: PetscFPTrapPop();
1453: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1454: #else /* on missing GESVD */
1455: /* SVD */
1456: PetscInt max_n,min_n;
1457: max_n = max_size_of_constraint;
1458: min_n = max_constraints;
1459: if (max_size_of_constraint < max_constraints) {
1460: min_n = max_size_of_constraint;
1461: max_n = max_constraints;
1462: }
1463: PetscMalloc1(min_n,&singular_vals);
1464: #if defined(PETSC_USE_COMPLEX)
1465: PetscMalloc1(5*min_n,&rwork);
1466: #endif
1467: /* now we evaluate the optimal workspace using query with lwork=-1 */
1468: lwork = -1;
1469: PetscBLASIntCast(max_n,&Blas_M);
1470: PetscBLASIntCast(min_n,&Blas_N);
1471: PetscBLASIntCast(max_n,&Blas_LDA);
1472: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1473: #if !defined(PETSC_USE_COMPLEX)
1474: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1475: #else
1476: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1477: #endif
1478: PetscFPTrapPop();
1479: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1480: #endif /* on missing GESVD */
1481: /* Allocate optimal workspace */
1482: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
1483: PetscMalloc1((PetscInt)lwork,&work);
1484: }
1485: /* Now we can loop on constraining sets */
1486: total_counts = 0;
1487: temp_indices[0] = 0;
1488: /* vertices */
1489: if (ISForVertices) {
1490: ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
1491: if (nnsp_has_cnst) { /* consider all vertices */
1492: PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));
1493: for (i=0;i<n_vertices;i++) {
1494: temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1495: temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1496: total_counts++;
1497: }
1498: } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1499: PetscBool used_vertex;
1500: for (i=0;i<n_vertices;i++) {
1501: used_vertex = PETSC_FALSE;
1502: k = 0;
1503: while (!used_vertex && k<nnsp_size) {
1504: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1505: if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1506: temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1507: temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1508: temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1509: total_counts++;
1510: used_vertex = PETSC_TRUE;
1511: }
1512: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1513: k++;
1514: }
1515: }
1516: }
1517: ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
1518: n_vertices = total_counts;
1519: }
1521: /* edges and faces */
1522: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1523: if (i<n_ISForEdges) {
1524: used_IS = &ISForEdges[i];
1525: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1526: } else {
1527: used_IS = &ISForFaces[i-n_ISForEdges];
1528: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1529: }
1530: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
1531: temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1532: ISGetSize(*used_IS,&size_of_constraint);
1533: ISGetIndices(*used_IS,(const PetscInt**)&is_indices);
1534: /* change of basis should not be performed on local periodic nodes */
1535: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1536: if (nnsp_has_cnst) {
1537: PetscScalar quad_value;
1538: temp_constraints++;
1539: quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1540: PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));
1541: for (j=0;j<size_of_constraint;j++) {
1542: temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1543: }
1544: temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */
1545: total_counts++;
1546: }
1547: for (k=0;k<nnsp_size;k++) {
1548: PetscReal real_value;
1549: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1550: PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));
1551: for (j=0;j<size_of_constraint;j++) {
1552: temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1553: }
1554: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1555: /* check if array is null on the connected component */
1556: PetscBLASIntCast(size_of_constraint,&Blas_N);
1557: PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1558: if (real_value > 0.0) { /* keep indices and values */
1559: temp_constraints++;
1560: temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */
1561: total_counts++;
1562: }
1563: }
1564: ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);
1565: valid_constraints = temp_constraints;
1566: /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1567: if (!pcbddc->use_nnsp_true && temp_constraints) {
1568: PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1570: #if defined(PETSC_MISSING_LAPACK_GESVD)
1571: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1572: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1573: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1574: the constraints basis will differ (by a complex factor with absolute value equal to 1)
1575: from that computed using LAPACKgesvd
1576: -> This is due to a different computation of eigenvectors in LAPACKheev
1577: -> The quality of the POD-computed basis will be the same */
1578: PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
1579: /* Store upper triangular part of correlation matrix */
1580: PetscBLASIntCast(size_of_constraint,&Blas_N);
1581: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1582: for (j=0;j<temp_constraints;j++) {
1583: for (k=0;k<j+1;k++) {
1584: PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1585: }
1586: }
1587: /* compute eigenvalues and eigenvectors of correlation matrix */
1588: PetscBLASIntCast(temp_constraints,&Blas_N);
1589: PetscBLASIntCast(temp_constraints,&Blas_LDA);
1590: #if !defined(PETSC_USE_COMPLEX)
1591: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1592: #else
1593: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1594: #endif
1595: PetscFPTrapPop();
1596: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1597: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1598: j=0;
1599: while (j < temp_constraints && singular_vals[j] < tol) j++;
1600: total_counts=total_counts-j;
1601: valid_constraints = temp_constraints-j;
1602: /* scale and copy POD basis into used quadrature memory */
1603: PetscBLASIntCast(size_of_constraint,&Blas_M);
1604: PetscBLASIntCast(temp_constraints,&Blas_N);
1605: PetscBLASIntCast(temp_constraints,&Blas_K);
1606: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1607: PetscBLASIntCast(temp_constraints,&Blas_LDB);
1608: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
1609: if (j<temp_constraints) {
1610: PetscInt ii;
1611: for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1612: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1613: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1614: PetscFPTrapPop();
1615: for (k=0;k<temp_constraints-j;k++) {
1616: for (ii=0;ii<size_of_constraint;ii++) {
1617: temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1618: }
1619: }
1620: }
1621: #else /* on missing GESVD */
1622: PetscBLASIntCast(size_of_constraint,&Blas_M);
1623: PetscBLASIntCast(temp_constraints,&Blas_N);
1624: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1625: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1626: #if !defined(PETSC_USE_COMPLEX)
1627: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1628: #else
1629: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1630: #endif
1631: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1632: PetscFPTrapPop();
1633: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1634: k = temp_constraints;
1635: if (k > size_of_constraint) k = size_of_constraint;
1636: j = 0;
1637: while (j < k && singular_vals[k-j-1] < tol) j++;
1638: total_counts = total_counts-temp_constraints+k-j;
1639: valid_constraints = k-j;
1640: #endif /* on missing GESVD */
1641: }
1642: /* setting change_of_basis flag is safe now */
1643: if (boolforchange) {
1644: for (j=0;j<valid_constraints;j++) {
1645: PetscBTSet(change_basis,total_counts-j-1);
1646: }
1647: }
1648: }
1649: /* free index sets of faces, edges and vertices */
1650: for (i=0;i<n_ISForFaces;i++) {
1651: ISDestroy(&ISForFaces[i]);
1652: }
1653: PetscFree(ISForFaces);
1654: for (i=0;i<n_ISForEdges;i++) {
1655: ISDestroy(&ISForEdges[i]);
1656: }
1657: PetscFree(ISForEdges);
1658: ISDestroy(&ISForVertices);
1659: /* map temp_indices_to_constraint in boundary numbering */
1660: ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);
1661: if (i != temp_indices[total_counts]) {
1662: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1663: }
1665: /* free workspace */
1666: if (!pcbddc->use_nnsp_true && !skip_lapack) {
1667: PetscFree(work);
1668: #if defined(PETSC_USE_COMPLEX)
1669: PetscFree(rwork);
1670: #endif
1671: PetscFree(singular_vals);
1672: #if defined(PETSC_MISSING_LAPACK_GESVD)
1673: PetscFree(correlation_mat);
1674: PetscFree(temp_basis);
1675: #endif
1676: }
1677: for (k=0;k<nnsp_size;k++) {
1678: VecDestroy(&localnearnullsp[k]);
1679: }
1680: PetscFree(localnearnullsp);
1682: /* set quantities in pcbddc data structure and store previous primal size */
1683: /* n_vertices defines the number of subdomain corners in the primal space */
1684: /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1685: olocal_primal_size = pcbddc->local_primal_size;
1686: pcbddc->local_primal_size = total_counts;
1687: pcbddc->n_vertices = n_vertices;
1688: pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1690: /* Create constraint matrix */
1691: /* The constraint matrix is used to compute the l2g map of primal dofs */
1692: /* so we need to set it up properly either with or without change of basis */
1693: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
1694: MatSetType(pcbddc->ConstraintMatrix,impMatType);
1695: MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
1696: /* array to compute a local numbering of constraints : vertices first then constraints */
1697: PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);
1698: /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1699: /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1700: PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);
1701: /* auxiliary stuff for basis change */
1702: PetscMalloc1(max_size_of_constraint,&global_indices);
1703: PetscBTCreate(pcis->n_B,&touched);
1705: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1706: total_primal_vertices=0;
1707: for (i=0;i<pcbddc->local_primal_size;i++) {
1708: size_of_constraint=temp_indices[i+1]-temp_indices[i];
1709: if (size_of_constraint == 1) {
1710: PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);
1711: aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1712: aux_primal_minloc[total_primal_vertices]=0;
1713: total_primal_vertices++;
1714: } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1715: PetscInt min_loc,min_index;
1716: ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);
1717: /* find first untouched local node */
1718: k = 0;
1719: while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1720: min_index = global_indices[k];
1721: min_loc = k;
1722: /* search the minimum among global nodes already untouched on the cc */
1723: for (k=1;k<size_of_constraint;k++) {
1724: /* there can be more than one constraint on a single connected component */
1725: if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1726: min_index = global_indices[k];
1727: min_loc = k;
1728: }
1729: }
1730: PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);
1731: aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1732: aux_primal_minloc[total_primal_vertices]=min_loc;
1733: total_primal_vertices++;
1734: }
1735: }
1736: /* determine if a QR strategy is needed for change of basis */
1737: qr_needed = PETSC_FALSE;
1738: PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);
1739: for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1740: if (PetscBTLookup(change_basis,i)) {
1741: size_of_constraint = temp_indices[i+1]-temp_indices[i];
1742: j = 0;
1743: for (k=0;k<size_of_constraint;k++) {
1744: if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1745: j++;
1746: }
1747: }
1748: /* found more than one primal dof on the cc */
1749: if (j > 1) {
1750: PetscBTSet(qr_needed_idx,i);
1751: qr_needed = PETSC_TRUE;
1752: }
1753: }
1754: }
1755: /* free workspace */
1756: PetscFree(global_indices);
1758: /* permute indices in order to have a sorted set of vertices */
1759: PetscSortInt(total_primal_vertices,aux_primal_numbering);
1761: /* nonzero structure of constraint matrix */
1762: PetscMalloc1(pcbddc->local_primal_size,&nnz);
1763: for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1764: j=total_primal_vertices;
1765: for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1766: if (!PetscBTLookup(change_basis,i)) {
1767: nnz[j]=temp_indices[i+1]-temp_indices[i];
1768: j++;
1769: }
1770: }
1771: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
1772: PetscFree(nnz);
1773: /* set values in constraint matrix */
1774: for (i=0;i<total_primal_vertices;i++) {
1775: MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);
1776: }
1777: total_counts = total_primal_vertices;
1778: for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1779: if (!PetscBTLookup(change_basis,i)) {
1780: size_of_constraint=temp_indices[i+1]-temp_indices[i];
1781: MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);
1782: total_counts++;
1783: }
1784: }
1785: /* assembling */
1786: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
1787: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
1788: /*
1789: PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
1790: MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
1791: */
1792: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1793: if (pcbddc->use_change_of_basis) {
1794: /* dual and primal dofs on a single cc */
1795: PetscInt dual_dofs,primal_dofs;
1796: /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1797: PetscInt primal_counter;
1798: /* working stuff for GEQRF */
1799: PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1800: PetscBLASInt lqr_work;
1801: /* working stuff for UNGQR */
1802: PetscScalar *gqr_work,lgqr_work_t;
1803: PetscBLASInt lgqr_work;
1804: /* working stuff for TRTRS */
1805: PetscScalar *trs_rhs;
1806: PetscBLASInt Blas_NRHS;
1807: /* pointers for values insertion into change of basis matrix */
1808: PetscInt *start_rows,*start_cols;
1809: PetscScalar *start_vals;
1810: /* working stuff for values insertion */
1811: PetscBT is_primal;
1813: /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1814: MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);
1815: MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);
1816: MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);
1817: /* work arrays */
1818: PetscMalloc1(pcis->n_B,&nnz);
1819: for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1820: /* nonzeros per row */
1821: for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1822: if (PetscBTLookup(change_basis,i)) {
1823: size_of_constraint = temp_indices[i+1]-temp_indices[i];
1824: if (PetscBTLookup(qr_needed_idx,i)) {
1825: for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1826: } else {
1827: for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1828: /* get local primal index on the cc */
1829: j = 0;
1830: while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1831: nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1832: }
1833: }
1834: }
1835: MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);
1836: PetscFree(nnz);
1837: /* Set initial identity in the matrix */
1838: for (i=0;i<pcis->n_B;i++) {
1839: MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
1840: }
1842: if (pcbddc->dbg_flag) {
1843: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
1844: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
1845: }
1848: /* Now we loop on the constraints which need a change of basis */
1849: /*
1850: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1851: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1853: Basic blocks of change of basis matrix T computed
1855: - Using the following block transformation if there is only a primal dof on the cc
1856: (in the example, primal dof is the last one of the edge in LOCAL ordering
1857: in this code, primal dof is the first one of the edge in GLOBAL ordering)
1858: | 1 0 ... 0 1 |
1859: | 0 1 ... 0 1 |
1860: | ... |
1861: | 0 ... 1 1 |
1862: | -s_1/s_n ... -s_{n-1}/-s_n 1 |
1864: - via QR decomposition of constraints otherwise
1865: */
1866: if (qr_needed) {
1867: /* space to store Q */
1868: PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);
1869: /* first we issue queries for optimal work */
1870: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
1871: PetscBLASIntCast(max_constraints,&Blas_N);
1872: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
1873: lqr_work = -1;
1874: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1875: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1876: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
1877: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
1878: lgqr_work = -1;
1879: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
1880: PetscBLASIntCast(max_size_of_constraint,&Blas_N);
1881: PetscBLASIntCast(max_constraints,&Blas_K);
1882: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
1883: if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1884: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1885: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1886: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
1887: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
1888: /* array to store scaling factors for reflectors */
1889: PetscMalloc1(max_constraints,&qr_tau);
1890: /* array to store rhs and solution of triangular solver */
1891: PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
1892: /* allocating workspace for check */
1893: if (pcbddc->dbg_flag) {
1894: PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);
1895: }
1896: }
1897: /* array to store whether a node is primal or not */
1898: PetscBTCreate(pcis->n_B,&is_primal);
1899: PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
1900: ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);
1901: if (i != total_primal_vertices) {
1902: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1903: }
1904: for (i=0;i<total_primal_vertices;i++) {
1905: PetscBTSet(is_primal,aux_primal_numbering_B[i]);
1906: }
1907: PetscFree(aux_primal_numbering_B);
1909: /* loop on constraints and see whether or not they need a change of basis and compute it */
1910: /* -> using implicit ordering contained in temp_indices data */
1911: total_counts = pcbddc->n_vertices;
1912: primal_counter = total_counts;
1913: while (total_counts<pcbddc->local_primal_size) {
1914: primal_dofs = 1;
1915: if (PetscBTLookup(change_basis,total_counts)) {
1916: /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1917: while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1918: primal_dofs++;
1919: }
1920: /* get constraint info */
1921: size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1922: dual_dofs = size_of_constraint-primal_dofs;
1924: if (pcbddc->dbg_flag) {
1925: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);
1926: }
1928: if (primal_dofs > 1) { /* QR */
1930: /* copy quadrature constraints for change of basis check */
1931: if (pcbddc->dbg_flag) {
1932: PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
1933: }
1934: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1935: PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
1937: /* compute QR decomposition of constraints */
1938: PetscBLASIntCast(size_of_constraint,&Blas_M);
1939: PetscBLASIntCast(primal_dofs,&Blas_N);
1940: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1941: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1942: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1943: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1944: PetscFPTrapPop();
1946: /* explictly compute R^-T */
1947: PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
1948: for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1949: PetscBLASIntCast(primal_dofs,&Blas_N);
1950: PetscBLASIntCast(primal_dofs,&Blas_NRHS);
1951: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1952: PetscBLASIntCast(primal_dofs,&Blas_LDB);
1953: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1954: PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1955: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1956: PetscFPTrapPop();
1958: /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1959: PetscBLASIntCast(size_of_constraint,&Blas_M);
1960: PetscBLASIntCast(size_of_constraint,&Blas_N);
1961: PetscBLASIntCast(primal_dofs,&Blas_K);
1962: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1963: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1964: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1965: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1966: PetscFPTrapPop();
1968: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1969: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1970: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1971: PetscBLASIntCast(size_of_constraint,&Blas_M);
1972: PetscBLASIntCast(primal_dofs,&Blas_N);
1973: PetscBLASIntCast(primal_dofs,&Blas_K);
1974: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1975: PetscBLASIntCast(primal_dofs,&Blas_LDB);
1976: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
1977: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1978: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1979: PetscFPTrapPop();
1980: PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
1982: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1983: start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1984: /* insert cols for primal dofs */
1985: for (j=0;j<primal_dofs;j++) {
1986: start_vals = &qr_basis[j*size_of_constraint];
1987: start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1988: MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
1989: }
1990: /* insert cols for dual dofs */
1991: for (j=0,k=0;j<dual_dofs;k++) {
1992: if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1993: start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1994: start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1995: MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
1996: j++;
1997: }
1998: }
2000: /* check change of basis */
2001: if (pcbddc->dbg_flag) {
2002: PetscInt ii,jj;
2003: PetscBool valid_qr=PETSC_TRUE;
2004: PetscBLASIntCast(primal_dofs,&Blas_M);
2005: PetscBLASIntCast(size_of_constraint,&Blas_N);
2006: PetscBLASIntCast(size_of_constraint,&Blas_K);
2007: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2008: PetscBLASIntCast(size_of_constraint,&Blas_LDB);
2009: PetscBLASIntCast(primal_dofs,&Blas_LDC);
2010: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2011: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2012: PetscFPTrapPop();
2013: for (jj=0;jj<size_of_constraint;jj++) {
2014: for (ii=0;ii<primal_dofs;ii++) {
2015: if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2016: if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2017: }
2018: }
2019: if (!valid_qr) {
2020: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);
2021: for (jj=0;jj<size_of_constraint;jj++) {
2022: for (ii=0;ii<primal_dofs;ii++) {
2023: if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2024: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2025: }
2026: if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2027: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2028: }
2029: }
2030: }
2031: } else {
2032: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);
2033: }
2034: }
2035: } else { /* simple transformation block */
2036: PetscInt row,col;
2037: PetscScalar val;
2038: for (j=0;j<size_of_constraint;j++) {
2039: row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2040: if (!PetscBTLookup(is_primal,row)) {
2041: col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2042: MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
2043: MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);
2044: } else {
2045: for (k=0;k<size_of_constraint;k++) {
2046: col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2047: if (row != col) {
2048: val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2049: } else {
2050: val = 1.0;
2051: }
2052: MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
2053: }
2054: }
2055: }
2056: }
2057: /* increment primal counter */
2058: primal_counter += primal_dofs;
2059: } else {
2060: if (pcbddc->dbg_flag) {
2061: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);
2062: }
2063: }
2064: /* increment constraint counter total_counts */
2065: total_counts += primal_dofs;
2066: }
2068: /* free workspace */
2069: if (qr_needed) {
2070: if (pcbddc->dbg_flag) {
2071: PetscFree(work);
2072: }
2073: PetscFree(trs_rhs);
2074: PetscFree(qr_tau);
2075: PetscFree(qr_work);
2076: PetscFree(gqr_work);
2077: PetscFree(qr_basis);
2078: }
2079: PetscBTDestroy(&is_primal);
2080: /* assembling */
2081: MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2082: MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2083: /*
2084: PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
2085: MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);
2086: */
2087: }
2089: /* get indices in local ordering for vertices and constraints */
2090: if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2091: PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);
2092: PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));
2093: }
2094: PetscFree(aux_primal_numbering);
2095: PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);
2096: PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);
2097: PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));
2098: PetscFree(aux_primal_numbering);
2099: PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);
2100: PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));
2101: PetscFree(aux_primal_numbering);
2102: /* set quantities in PCBDDC data struct */
2103: pcbddc->n_actual_vertices = i;
2104: /* check if a new primal space has been introduced */
2105: pcbddc->new_primal_space_local = PETSC_TRUE;
2106: if (olocal_primal_size == pcbddc->local_primal_size) {
2107: PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);
2108: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2109: PetscFree(oprimal_indices_local_idxs);
2110: }
2111: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2112: MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2114: /* flush dbg viewer */
2115: if (pcbddc->dbg_flag) {
2116: PetscViewerFlush(pcbddc->dbg_viewer);
2117: }
2119: /* free workspace */
2120: PetscBTDestroy(&touched);
2121: PetscBTDestroy(&qr_needed_idx);
2122: PetscFree(aux_primal_minloc);
2123: PetscFree(temp_indices);
2124: PetscBTDestroy(&change_basis);
2125: PetscFree(temp_indices_to_constraint);
2126: PetscFree(temp_indices_to_constraint_B);
2127: PetscFree(temp_quadrature_constraint);
2128: return(0);
2129: }
2133: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2134: {
2135: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2136: PC_IS *pcis = (PC_IS*)pc->data;
2137: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2138: PetscInt bs,ierr,i,vertex_size;
2139: PetscViewer viewer=pcbddc->dbg_viewer;
2142: /* Reset previously computed graph */
2143: PCBDDCGraphReset(pcbddc->mat_graph);
2144: /* Init local Graph struct */
2145: PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);
2147: /* Check validity of the csr graph passed in by the user */
2148: if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2149: PCBDDCGraphResetCSR(pcbddc->mat_graph);
2150: }
2152: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2153: if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2154: Mat mat_adj;
2155: const PetscInt *xadj,*adjncy;
2156: PetscBool flg_row=PETSC_TRUE;
2158: MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);
2159: MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);
2160: if (!flg_row) {
2161: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2162: }
2163: PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);
2164: MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);
2165: if (!flg_row) {
2166: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2167: }
2168: MatDestroy(&mat_adj);
2169: }
2171: /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2172: vertex_size = 1;
2173: if (!pcbddc->user_provided_isfordofs) {
2174: if (!pcbddc->n_ISForDofs) {
2175: IS *custom_ISForDofs;
2177: MatGetBlockSize(matis->A,&bs);
2178: PetscMalloc1(bs,&custom_ISForDofs);
2179: for (i=0;i<bs;i++) {
2180: ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);
2181: }
2182: PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);
2183: pcbddc->user_provided_isfordofs = PETSC_FALSE;
2184: /* remove my references to IS objects */
2185: for (i=0;i<bs;i++) {
2186: ISDestroy(&custom_ISForDofs[i]);
2187: }
2188: PetscFree(custom_ISForDofs);
2189: }
2190: } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2191: MatGetBlockSize(matis->A,&vertex_size);
2192: }
2194: /* Setup of Graph */
2195: PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2197: /* Graph's connected components analysis */
2198: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
2200: /* print some info to stdout */
2201: if (pcbddc->dbg_flag) {
2202: PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2203: }
2205: /* mark topography has done */
2206: pcbddc->recompute_topography = PETSC_FALSE;
2207: return(0);
2208: }
2212: PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2213: {
2214: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
2215: PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2219: n = 0;
2220: vertices = 0;
2221: if (pcbddc->ConstraintMatrix) {
2222: MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);
2223: for (i=0;i<local_primal_size;i++) {
2224: MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2225: if (size_of_constraint == 1) n++;
2226: MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2227: }
2228: if (vertices_idx) {
2229: PetscMalloc1(n,&vertices);
2230: n = 0;
2231: for (i=0;i<local_primal_size;i++) {
2232: MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2233: if (size_of_constraint == 1) {
2234: vertices[n++]=row_cmat_indices[0];
2235: }
2236: MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2237: }
2238: }
2239: }
2240: *n_vertices = n;
2241: if (vertices_idx) *vertices_idx = vertices;
2242: return(0);
2243: }
2247: PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2248: {
2249: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
2250: PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2251: PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2252: PetscBT touched;
2255: /* This function assumes that the number of local constraints per connected component
2256: is not greater than the number of nodes defined for the connected component
2257: (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2259: n = 0;
2260: constraints_index = 0;
2261: if (pcbddc->ConstraintMatrix) {
2262: MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);
2263: max_size_of_constraint = 0;
2264: for (i=0;i<local_primal_size;i++) {
2265: MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2266: if (size_of_constraint > 1) {
2267: n++;
2268: }
2269: max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2270: MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2271: }
2272: if (constraints_idx) {
2273: PetscMalloc1(n,&constraints_index);
2274: PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);
2275: PetscBTCreate(local_size,&touched);
2276: n = 0;
2277: for (i=0;i<local_primal_size;i++) {
2278: MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2279: if (size_of_constraint > 1) {
2280: ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);
2281: /* find first untouched local node */
2282: j = 0;
2283: while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2284: min_index = row_cmat_global_indices[j];
2285: min_loc = j;
2286: /* search the minimum among nodes not yet touched on the connected component
2287: since there can be more than one constraint on a single cc */
2288: for (j=1;j<size_of_constraint;j++) {
2289: if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2290: min_index = row_cmat_global_indices[j];
2291: min_loc = j;
2292: }
2293: }
2294: PetscBTSet(touched,row_cmat_indices[min_loc]);
2295: constraints_index[n++] = row_cmat_indices[min_loc];
2296: }
2297: MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2298: }
2299: PetscBTDestroy(&touched);
2300: PetscFree(row_cmat_global_indices);
2301: }
2302: }
2303: *n_constraints = n;
2304: if (constraints_idx) *constraints_idx = constraints_index;
2305: return(0);
2306: }
2308: /* the next two functions has been adapted from pcis.c */
2311: PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2312: {
2314: PC_IS *pcis = (PC_IS*)(pc->data);
2317: if (!vec2_B) { vec2_B = v; }
2318: MatMult(pcis->A_BB,v,vec1_B);
2319: MatMult(pcis->A_IB,v,vec1_D);
2320: KSPSolve(pcis->ksp_D,vec1_D,vec2_D);
2321: MatMult(pcis->A_BI,vec2_D,vec2_B);
2322: VecAXPY(vec1_B,-1.0,vec2_B);
2323: return(0);
2324: }
2328: PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2329: {
2331: PC_IS *pcis = (PC_IS*)(pc->data);
2334: if (!vec2_B) { vec2_B = v; }
2335: MatMultTranspose(pcis->A_BB,v,vec1_B);
2336: MatMultTranspose(pcis->A_BI,v,vec1_D);
2337: KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);
2338: MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);
2339: VecAXPY(vec1_B,-1.0,vec2_B);
2340: return(0);
2341: }
2345: PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2346: {
2347: Vec local_vec,global_vec;
2348: IS seqis,paris;
2349: VecScatter scatter_ctx;
2350: PetscScalar *array;
2351: PetscInt *temp_global_dofs;
2352: PetscScalar globalsum;
2353: PetscInt i,j,s;
2354: PetscInt nlocals,first_index,old_index,max_local;
2355: PetscMPIInt rank_prec_comm,size_prec_comm,max_global;
2356: PetscMPIInt *dof_sizes,*dof_displs;
2357: PetscBool first_found;
2361: /* mpi buffers */
2362: MPI_Comm_size(comm,&size_prec_comm);
2363: MPI_Comm_rank(comm,&rank_prec_comm);
2364: j = ( !rank_prec_comm ? size_prec_comm : 0);
2365: PetscMalloc1(j,&dof_sizes);
2366: PetscMalloc1(j,&dof_displs);
2367: /* get maximum size of subset */
2368: PetscMalloc1(n_local_dofs,&temp_global_dofs);
2369: ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);
2370: max_local = 0;
2371: if (n_local_dofs) {
2372: max_local = temp_global_dofs[0];
2373: for (i=1;i<n_local_dofs;i++) {
2374: if (max_local < temp_global_dofs[i] ) {
2375: max_local = temp_global_dofs[i];
2376: }
2377: }
2378: }
2379: MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2380: max_global++;
2381: max_local = 0;
2382: if (n_local_dofs) {
2383: max_local = local_dofs[0];
2384: for (i=1;i<n_local_dofs;i++) {
2385: if (max_local < local_dofs[i] ) {
2386: max_local = local_dofs[i];
2387: }
2388: }
2389: }
2390: max_local++;
2391: /* allocate workspace */
2392: VecCreate(PETSC_COMM_SELF,&local_vec);
2393: VecSetSizes(local_vec,PETSC_DECIDE,max_local);
2394: VecSetType(local_vec,VECSEQ);
2395: VecCreate(comm,&global_vec);
2396: VecSetSizes(global_vec,PETSC_DECIDE,max_global);
2397: VecSetType(global_vec,VECMPI);
2398: /* create scatter */
2399: ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);
2400: ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);
2401: VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);
2402: ISDestroy(&seqis);
2403: ISDestroy(&paris);
2404: /* init array */
2405: VecSet(global_vec,0.0);
2406: VecSet(local_vec,0.0);
2407: VecGetArray(local_vec,&array);
2408: if (local_dofs_mult) {
2409: for (i=0;i<n_local_dofs;i++) {
2410: array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2411: }
2412: } else {
2413: for (i=0;i<n_local_dofs;i++) {
2414: array[local_dofs[i]]=1.0;
2415: }
2416: }
2417: VecRestoreArray(local_vec,&array);
2418: /* scatter into global vec and get total number of global dofs */
2419: VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);
2420: VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);
2421: VecSum(global_vec,&globalsum);
2422: *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2423: /* Fill global_vec with cumulative function for global numbering */
2424: VecGetArray(global_vec,&array);
2425: VecGetLocalSize(global_vec,&s);
2426: nlocals = 0;
2427: first_index = -1;
2428: first_found = PETSC_FALSE;
2429: for (i=0;i<s;i++) {
2430: if (!first_found && PetscRealPart(array[i]) > 0.0) {
2431: first_found = PETSC_TRUE;
2432: first_index = i;
2433: }
2434: nlocals += (PetscInt)PetscRealPart(array[i]);
2435: }
2436: MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);
2437: if (!rank_prec_comm) {
2438: dof_displs[0]=0;
2439: for (i=1;i<size_prec_comm;i++) {
2440: dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2441: }
2442: }
2443: MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);
2444: if (first_found) {
2445: array[first_index] += (PetscScalar)nlocals;
2446: old_index = first_index;
2447: for (i=first_index+1;i<s;i++) {
2448: if (PetscRealPart(array[i]) > 0.0) {
2449: array[i] += array[old_index];
2450: old_index = i;
2451: }
2452: }
2453: }
2454: VecRestoreArray(global_vec,&array);
2455: VecSet(local_vec,0.0);
2456: VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);
2457: VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);
2458: /* get global ordering of local dofs */
2459: VecGetArray(local_vec,&array);
2460: if (local_dofs_mult) {
2461: for (i=0;i<n_local_dofs;i++) {
2462: temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2463: }
2464: } else {
2465: for (i=0;i<n_local_dofs;i++) {
2466: temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2467: }
2468: }
2469: VecRestoreArray(local_vec,&array);
2470: /* free workspace */
2471: VecScatterDestroy(&scatter_ctx);
2472: VecDestroy(&local_vec);
2473: VecDestroy(&global_vec);
2474: PetscFree(dof_sizes);
2475: PetscFree(dof_displs);
2476: /* return pointer to global ordering of local dofs */
2477: *global_numbering_subset = temp_global_dofs;
2478: return(0);
2479: }
2483: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2484: {
2485: PetscInt i,j;
2486: PetscScalar *alphas;
2490: /* this implements stabilized Gram-Schmidt */
2491: PetscMalloc1(n,&alphas);
2492: for (i=0;i<n;i++) {
2493: VecNormalize(vecs[i],NULL);
2494: if (i<n) { VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]); }
2495: for (j=i+1;j<n;j++) { VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]); }
2496: }
2497: PetscFree(alphas);
2498: return(0);
2499: }
2501: /* TODO
2502: - now preallocation is done assuming SEQDENSE local matrices
2503: */
2506: static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2507: {
2508: Mat new_mat;
2509: Mat_IS *matis = (Mat_IS*)(mat->data);
2510: /* info on mat */
2511: /* ISLocalToGlobalMapping rmapping,cmapping; */
2512: PetscInt bs,rows,cols;
2513: PetscInt lrows,lcols;
2514: PetscInt local_rows,local_cols;
2515: PetscBool isdense;
2516: /* values insertion */
2517: PetscScalar *array;
2518: PetscInt *local_indices,*global_indices;
2519: /* work */
2520: PetscInt i,j,index_row;
2521: PetscErrorCode ierr;
2524: /* MISSING CHECKS
2525: - rectangular case not covered (it is not allowed by MATIS)
2526: */
2527: /* get info from mat */
2528: /* MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping); */
2529: MatGetSize(mat,&rows,&cols);
2530: MatGetBlockSize(mat,&bs);
2531: MatGetSize(matis->A,&local_rows,&local_cols);
2533: /* work */
2534: PetscMalloc1(local_rows,&local_indices);
2535: for (i=0;i<local_rows;i++) local_indices[i]=i;
2536: /* map indices of local mat to global values */
2537: PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);
2538: /* ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices); */
2539: ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);
2541: if (reuse==MAT_INITIAL_MATRIX) {
2542: Vec vec_dnz,vec_onz;
2543: PetscScalar *my_dnz,*my_onz;
2544: PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2545: PetscInt index_col,owner;
2546: PetscMPIInt nsubdomains;
2548: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);
2549: MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);
2550: MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);
2551: MatSetBlockSize(new_mat,bs);
2552: MatSetType(new_mat,Mtype);
2553: MatSetUp(new_mat);
2554: MatGetLocalSize(new_mat,&lrows,&lcols);
2556: /*
2557: preallocation
2558: */
2560: MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);
2561: /*
2562: Some vectors are needed to sum up properly on shared interface dofs.
2563: Preallocation macros cannot do the job.
2564: Note that preallocation is not exact, since it overestimates nonzeros
2565: */
2566: MatGetVecs(new_mat,NULL,&vec_dnz);
2567: /* VecSetLocalToGlobalMapping(vec_dnz,rmapping); */
2568: VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);
2569: VecDuplicate(vec_dnz,&vec_onz);
2570: /* All processes need to compute entire row ownership */
2571: PetscMalloc1(rows,&row_ownership);
2572: MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);
2573: for (i=0;i<nsubdomains;i++) {
2574: for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2575: row_ownership[j]=i;
2576: }
2577: }
2579: /*
2580: my_dnz and my_onz contains exact contribution to preallocation from each local mat
2581: then, they will be summed up properly. This way, preallocation is always sufficient
2582: */
2583: PetscMalloc1(local_rows,&my_dnz);
2584: PetscMalloc1(local_rows,&my_onz);
2585: PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));
2586: PetscMemzero(my_onz,local_rows*sizeof(*my_onz));
2587: for (i=0;i<local_rows;i++) {
2588: index_row = global_indices[i];
2589: for (j=i;j<local_rows;j++) {
2590: owner = row_ownership[index_row];
2591: index_col = global_indices[j];
2592: if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2593: my_dnz[i] += 1.0;
2594: } else { /* offdiag block */
2595: my_onz[i] += 1.0;
2596: }
2597: /* same as before, interchanging rows and cols */
2598: if (i != j) {
2599: owner = row_ownership[index_col];
2600: if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2601: my_dnz[j] += 1.0;
2602: } else {
2603: my_onz[j] += 1.0;
2604: }
2605: }
2606: }
2607: }
2608: VecSet(vec_dnz,0.0);
2609: VecSet(vec_onz,0.0);
2610: if (local_rows) { /* multilevel guard */
2611: VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);
2612: VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);
2613: }
2614: VecAssemblyBegin(vec_dnz);
2615: VecAssemblyBegin(vec_onz);
2616: VecAssemblyEnd(vec_dnz);
2617: VecAssemblyEnd(vec_onz);
2618: PetscFree(my_dnz);
2619: PetscFree(my_onz);
2620: PetscFree(row_ownership);
2622: /* set computed preallocation in dnz and onz */
2623: VecGetArray(vec_dnz,&array);
2624: for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2625: VecRestoreArray(vec_dnz,&array);
2626: VecGetArray(vec_onz,&array);
2627: for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2628: VecRestoreArray(vec_onz,&array);
2629: VecDestroy(&vec_dnz);
2630: VecDestroy(&vec_onz);
2632: /* Resize preallocation if overestimated */
2633: for (i=0;i<lrows;i++) {
2634: dnz[i] = PetscMin(dnz[i],lcols);
2635: onz[i] = PetscMin(onz[i],cols-lcols);
2636: }
2637: /* set preallocation */
2638: MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);
2639: for (i=0;i<lrows/bs;i++) {
2640: dnz[i] = dnz[i*bs]/bs;
2641: onz[i] = onz[i*bs]/bs;
2642: }
2643: MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);
2644: for (i=0;i<lrows/bs;i++) {
2645: dnz[i] = dnz[i]-i;
2646: }
2647: MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);
2648: MatPreallocateFinalize(dnz,onz);
2649: *M = new_mat;
2650: } else {
2651: PetscInt mbs,mrows,mcols;
2652: /* some checks */
2653: MatGetBlockSize(*M,&mbs);
2654: MatGetSize(*M,&mrows,&mcols);
2655: if (mrows != rows) {
2656: SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2657: }
2658: if (mrows != rows) {
2659: SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2660: }
2661: if (mbs != bs) {
2662: SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2663: }
2664: MatZeroEntries(*M);
2665: }
2666: /* set local to global mappings */
2667: /* MatSetLocalToGlobalMapping(*M,rmapping,cmapping); */
2668: /* Set values */
2669: PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);
2670: if (isdense) { /* special case for dense local matrices */
2671: MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);
2672: MatDenseGetArray(matis->A,&array);
2673: MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);
2674: MatDenseRestoreArray(matis->A,&array);
2675: PetscFree(local_indices);
2676: PetscFree(global_indices);
2677: } else { /* very basic values insertion for all other matrix types */
2678: PetscFree(local_indices);
2679: PetscFree(global_indices);
2680: for (i=0;i<local_rows;i++) {
2681: MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);
2682: /* MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES); */
2683: ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);
2684: ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);
2685: MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);
2686: MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);
2687: }
2688: }
2689: MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);
2690: MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);
2691: if (isdense) {
2692: MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);
2693: }
2694: return(0);
2695: }
2699: PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2700: {
2701: Mat subdomain_adj;
2702: IS new_ranks,ranks_send_to;
2703: MatPartitioning partitioner;
2704: Mat_IS *matis;
2705: PetscInt n_neighs,*neighs,*n_shared,**shared;
2706: PetscInt prank;
2707: PetscMPIInt size,rank,color;
2708: PetscInt *xadj,*adjncy,*oldranks;
2709: PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2710: PetscInt i,j,n_subdomains,local_size,threshold=0;
2711: PetscErrorCode ierr;
2712: PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2713: PetscSubcomm subcomm;
2716: PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);
2717: PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
2718: PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);
2720: /* Get info on mapping */
2721: matis = (Mat_IS*)(mat->data);
2722: ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);
2723: ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);
2725: /* build local CSR graph of subdomains' connectivity */
2726: PetscMalloc1(2,&xadj);
2727: xadj[0] = 0;
2728: xadj[1] = PetscMax(n_neighs-1,0);
2729: PetscMalloc1(xadj[1],&adjncy);
2730: PetscMalloc1(xadj[1],&adjncy_wgt);
2732: if (threshold) {
2733: PetscInt* count,min_threshold;
2734: PetscMalloc1(local_size,&count);
2735: PetscMemzero(count,local_size*sizeof(PetscInt));
2736: for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2737: for (j=0;j<n_shared[i];j++) {
2738: count[shared[i][j]] += 1;
2739: }
2740: }
2741: /* adapt threshold since we dont want to lose significant connections */
2742: min_threshold = n_neighs;
2743: for (i=1;i<n_neighs;i++) {
2744: for (j=0;j<n_shared[i];j++) {
2745: min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2746: }
2747: }
2748: threshold = PetscMax(min_threshold+1,threshold);
2749: xadj[1] = 0;
2750: for (i=1;i<n_neighs;i++) {
2751: for (j=0;j<n_shared[i];j++) {
2752: if (count[shared[i][j]] < threshold) {
2753: adjncy[xadj[1]] = neighs[i];
2754: adjncy_wgt[xadj[1]] = n_shared[i];
2755: xadj[1]++;
2756: break;
2757: }
2758: }
2759: }
2760: PetscFree(count);
2761: } else {
2762: if (xadj[1]) {
2763: PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));
2764: PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));
2765: }
2766: }
2767: ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);
2768: if (use_square) {
2769: for (i=0;i<xadj[1];i++) {
2770: adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2771: }
2772: }
2773: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
2775: PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);
2777: /*
2778: Restrict work on active processes only.
2779: */
2780: PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);
2781: PetscSubcommSetNumber(subcomm,2); /* 2 groups, active process and not active processes */
2782: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
2783: PetscMPIIntCast(!local_size,&color);
2784: PetscSubcommSetTypeGeneral(subcomm,color,rank);
2785: if (color) {
2786: PetscFree(xadj);
2787: PetscFree(adjncy);
2788: PetscFree(adjncy_wgt);
2789: } else {
2790: MPI_Comm_size(subcomm->comm,&size);
2791: PetscMalloc1(size,&oldranks);
2792: prank = rank;
2793: MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);
2794: for (i=0;i<size;i++) {
2795: PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2796: }
2797: for (i=0;i<xadj[1];i++) {
2798: PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
2799: }
2800: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
2801: MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
2802: n_subdomains = (PetscInt)size;
2803: MatView(subdomain_adj,0);
2805: /* Partition */
2806: MatPartitioningCreate(subcomm->comm,&partitioner);
2807: MatPartitioningSetAdjacency(partitioner,subdomain_adj);
2808: if (use_vwgt) {
2809: PetscMalloc(sizeof(*v_wgt),&v_wgt);
2810: v_wgt[0] = local_size;
2811: MatPartitioningSetVertexWeights(partitioner,v_wgt);
2812: }
2813: PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);
2814: MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);
2815: MatPartitioningSetFromOptions(partitioner);
2816: MatPartitioningApply(partitioner,&new_ranks);
2817: MatPartitioningView(partitioner,0);
2819: ISGetIndices(new_ranks,(const PetscInt**)&is_indices);
2820: ranks_send_to_idx[0] = oldranks[is_indices[0]];
2821: ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);
2822: /* clean up */
2823: PetscFree(oldranks);
2824: ISDestroy(&new_ranks);
2825: MatDestroy(&subdomain_adj);
2826: MatPartitioningDestroy(&partitioner);
2827: }
2828: PetscSubcommDestroy(&subcomm);
2830: /* assemble parallel IS for sends */
2831: i = 1;
2832: if (color) i=0;
2833: ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);
2834: ISView(ranks_send_to,0);
2836: /* get back IS */
2837: *is_sends = ranks_send_to;
2838: return(0);
2839: }
2841: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2845: PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2846: {
2847: Mat local_mat,new_mat;
2848: Mat_IS *matis;
2849: IS is_sends_internal;
2850: PetscInt rows,cols;
2851: PetscInt i,bs,buf_size_idxs,buf_size_vals;
2852: PetscBool ismatis,isdense;
2853: ISLocalToGlobalMapping l2gmap;
2854: PetscInt* l2gmap_indices;
2855: const PetscInt* is_indices;
2856: MatType new_local_type;
2857: MatTypePrivate new_local_type_private;
2858: /* buffers */
2859: PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2860: PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2861: /* MPI */
2862: MPI_Comm comm;
2863: PetscMPIInt n_sends,n_recvs,commsize;
2864: PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals;
2865: PetscMPIInt *onodes,*olengths_idxs,*olengths_vals;
2866: PetscMPIInt len,tag_idxs,tag_vals,source_dest;
2867: MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2868: PetscErrorCode ierr;
2871: /* checks */
2872: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
2873: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2874: MatISGetLocalMat(mat,&local_mat);
2875: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
2876: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2877: MatGetSize(local_mat,&rows,&cols);
2878: if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2879: MatGetBlockSize(local_mat,&bs);
2881: /* prepare IS for sending if not provided */
2882: if (!is_sends) {
2883: if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2884: MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);
2885: } else {
2886: PetscObjectReference((PetscObject)is_sends);
2887: is_sends_internal = is_sends;
2888: }
2890: /* get pointer of MATIS data */
2891: matis = (Mat_IS*)mat->data;
2893: /* get comm */
2894: comm = PetscObjectComm((PetscObject)mat);
2896: /* compute number of sends */
2897: ISGetLocalSize(is_sends_internal,&i);
2898: PetscMPIIntCast(i,&n_sends);
2900: /* compute number of receives */
2901: MPI_Comm_size(comm,&commsize);
2902: PetscMalloc1(commsize,&iflags);
2903: PetscMemzero(iflags,commsize*sizeof(*iflags));
2904: ISGetIndices(is_sends_internal,&is_indices);
2905: for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2906: PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
2907: PetscFree(iflags);
2909: /* prepare send/receive buffers */
2910: PetscMalloc1(commsize,&ilengths_idxs);
2911: PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
2912: PetscMalloc1(commsize,&ilengths_vals);
2913: PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));
2915: /* Get data from local mat */
2916: if (!isdense) {
2917: /* TODO: See below some guidelines on how to prepare the local buffers */
2918: /*
2919: send_buffer_vals should contain the raw values of the local matrix
2920: send_buffer_idxs should contain:
2921: - MatType_PRIVATE type
2922: - PetscInt size_of_l2gmap
2923: - PetscInt global_row_indices[size_of_l2gmap]
2924: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2925: */
2926: } else {
2927: MatDenseGetArray(local_mat,&send_buffer_vals);
2928: ISLocalToGlobalMappingGetSize(matis->mapping,&i);
2929: PetscMalloc1((i+2),&send_buffer_idxs);
2930: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2931: send_buffer_idxs[1] = i;
2932: ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);
2933: PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
2934: ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);
2935: PetscMPIIntCast(i,&len);
2936: for (i=0;i<n_sends;i++) {
2937: ilengths_vals[is_indices[i]] = len*len;
2938: ilengths_idxs[is_indices[i]] = len+2;
2939: }
2940: }
2941: PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
2942: buf_size_idxs = 0;
2943: buf_size_vals = 0;
2944: for (i=0;i<n_recvs;i++) {
2945: buf_size_idxs += (PetscInt)olengths_idxs[i];
2946: buf_size_vals += (PetscInt)olengths_vals[i];
2947: }
2948: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
2949: PetscMalloc1(buf_size_vals,&recv_buffer_vals);
2951: /* get new tags for clean communications */
2952: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
2953: PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
2955: /* allocate for requests */
2956: PetscMalloc1(n_sends,&send_req_idxs);
2957: PetscMalloc1(n_sends,&send_req_vals);
2958: PetscMalloc1(n_recvs,&recv_req_idxs);
2959: PetscMalloc1(n_recvs,&recv_req_vals);
2961: /* communications */
2962: ptr_idxs = recv_buffer_idxs;
2963: ptr_vals = recv_buffer_vals;
2964: for (i=0;i<n_recvs;i++) {
2965: source_dest = onodes[i];
2966: MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
2967: MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
2968: ptr_idxs += olengths_idxs[i];
2969: ptr_vals += olengths_vals[i];
2970: }
2971: for (i=0;i<n_sends;i++) {
2972: PetscMPIIntCast(is_indices[i],&source_dest);
2973: MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
2974: MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
2975: }
2976: ISRestoreIndices(is_sends_internal,&is_indices);
2977: ISDestroy(&is_sends_internal);
2979: /* assemble new l2g map */
2980: MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
2981: ptr_idxs = recv_buffer_idxs;
2982: buf_size_idxs = 0;
2983: for (i=0;i<n_recvs;i++) {
2984: buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2985: ptr_idxs += olengths_idxs[i];
2986: }
2987: PetscMalloc1(buf_size_idxs,&l2gmap_indices);
2988: ptr_idxs = recv_buffer_idxs;
2989: buf_size_idxs = 0;
2990: for (i=0;i<n_recvs;i++) {
2991: PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
2992: buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2993: ptr_idxs += olengths_idxs[i];
2994: }
2995: PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);
2996: ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
2997: PetscFree(l2gmap_indices);
2999: /* infer new local matrix type from received local matrices type */
3000: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3001: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3002: new_local_type_private = MATAIJ_PRIVATE;
3003: new_local_type = MATSEQAIJ;
3004: if (n_recvs) {
3005: new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3006: ptr_idxs = recv_buffer_idxs;
3007: for (i=0;i<n_recvs;i++) {
3008: if ((PetscInt)new_local_type_private != *ptr_idxs) {
3009: new_local_type_private = MATAIJ_PRIVATE;
3010: break;
3011: }
3012: ptr_idxs += olengths_idxs[i];
3013: }
3014: switch (new_local_type_private) {
3015: case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
3016: new_local_type = MATSEQAIJ;
3017: bs = 1;
3018: break;
3019: case MATAIJ_PRIVATE:
3020: new_local_type = MATSEQAIJ;
3021: bs = 1;
3022: break;
3023: case MATBAIJ_PRIVATE:
3024: new_local_type = MATSEQBAIJ;
3025: break;
3026: case MATSBAIJ_PRIVATE:
3027: new_local_type = MATSEQSBAIJ;
3028: break;
3029: default:
3030: SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3031: break;
3032: }
3033: }
3035: /* create MATIS object */
3036: MatGetSize(mat,&rows,&cols);
3037: MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);
3038: ISLocalToGlobalMappingDestroy(&l2gmap);
3039: MatISGetLocalMat(new_mat,&local_mat);
3040: MatSetType(local_mat,new_local_type);
3041: MatSetUp(local_mat); /* WARNING -> no preallocation yet */
3043: /* set values */
3044: MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
3045: ptr_vals = recv_buffer_vals;
3046: ptr_idxs = recv_buffer_idxs;
3047: for (i=0;i<n_recvs;i++) {
3048: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3049: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
3050: MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
3051: MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
3052: MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
3053: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
3054: }
3055: ptr_idxs += olengths_idxs[i];
3056: ptr_vals += olengths_vals[i];
3057: }
3058: MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
3059: MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
3060: MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);
3061: MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);
3063: { /* check */
3064: Vec lvec,rvec;
3065: PetscReal infty_error;
3067: MatGetVecs(mat,&rvec,&lvec);
3068: VecSetRandom(rvec,NULL);
3069: MatMult(mat,rvec,lvec);
3070: VecScale(lvec,-1.0);
3071: MatMultAdd(new_mat,rvec,lvec,lvec);
3072: VecNorm(lvec,NORM_INFINITY,&infty_error);
3073: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3074: VecDestroy(&rvec);
3075: VecDestroy(&lvec);
3076: }
3078: /* free workspace */
3079: PetscFree(recv_buffer_idxs);
3080: PetscFree(recv_buffer_vals);
3081: MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
3082: PetscFree(send_buffer_idxs);
3083: MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
3084: if (isdense) {
3085: MatISGetLocalMat(mat,&local_mat);
3086: MatDenseRestoreArray(local_mat,&send_buffer_vals);
3087: } else {
3088: /* PetscFree(send_buffer_vals); */
3089: }
3090: PetscFree(recv_req_idxs);
3091: PetscFree(recv_req_vals);
3092: PetscFree(send_req_idxs);
3093: PetscFree(send_req_vals);
3094: PetscFree(ilengths_vals);
3095: PetscFree(ilengths_idxs);
3096: PetscFree(olengths_vals);
3097: PetscFree(olengths_idxs);
3098: PetscFree(onodes);
3099: /* get back new mat */
3100: *mat_n = new_mat;
3101: return(0);
3102: }
3106: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3107: {
3108: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3109: PC_IS *pcis = (PC_IS*)pc->data;
3110: Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
3111: MatNullSpace CoarseNullSpace=NULL;
3112: ISLocalToGlobalMapping coarse_islg;
3113: IS coarse_is;
3114: PetscInt max_it;
3115: PetscInt im_active=-1,active_procs=-1;
3116: PC pc_temp;
3117: PCType coarse_pc_type;
3118: KSPType coarse_ksp_type;
3119: PetscBool multilevel_requested,multilevel_allowed;
3120: PetscBool setsym,issym,isbddc,isnn,coarse_reuse;
3121: MatStructure matstruct;
3122: PetscErrorCode ierr;
3125: /* Assign global numbering to coarse dofs */
3126: if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */
3127: PetscInt ocoarse_size;
3128: ocoarse_size = pcbddc->coarse_size;
3129: PetscFree(pcbddc->global_primal_indices);
3130: PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
3131: /* see if we can avoid some work */
3132: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3133: if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3134: KSPReset(pcbddc->coarse_ksp);
3135: coarse_reuse = PETSC_FALSE;
3136: } else { /* we can safely reuse already computed coarse matrix */
3137: coarse_reuse = PETSC_TRUE;
3138: }
3139: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3140: coarse_reuse = PETSC_FALSE;
3141: }
3142: } else { /* primal space has not been changed, so we can reuse coarse matrix */
3143: coarse_reuse = PETSC_TRUE;
3144: }
3146: /* infer some info from user */
3147: issym = PETSC_FALSE;
3148: MatIsSymmetricKnown(pc->pmat,&setsym,&issym);
3149: multilevel_allowed = PETSC_FALSE;
3150: multilevel_requested = PETSC_FALSE;
3151: if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3152: if (multilevel_requested) {
3153: /* count "active processes" */
3154: im_active = !!(pcis->n);
3155: MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
3156: if (active_procs/pcbddc->coarsening_ratio < 2) {
3157: multilevel_allowed = PETSC_FALSE;
3158: } else {
3159: multilevel_allowed = PETSC_TRUE;
3160: }
3161: }
3163: /* set defaults for coarse KSP and PC */
3164: if (multilevel_allowed) {
3165: if (issym) {
3166: coarse_ksp_type = KSPRICHARDSON;
3167: } else {
3168: coarse_ksp_type = KSPCHEBYSHEV;
3169: }
3170: coarse_pc_type = PCBDDC;
3171: } else {
3172: coarse_ksp_type = KSPPREONLY;
3173: coarse_pc_type = PCREDUNDANT;
3174: }
3176: /* create the coarse KSP object only once with defaults */
3177: if (!pcbddc->coarse_ksp) {
3178: char prefix[256],str_level[3];
3179: size_t len;
3180: KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);
3181: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
3182: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
3183: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
3184: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
3185: PCSetType(pc_temp,coarse_pc_type);
3186: /* prefix */
3187: PetscStrcpy(prefix,"");
3188: PetscStrcpy(str_level,"");
3189: if (!pcbddc->current_level) {
3190: PetscStrcpy(prefix,((PetscObject)pc)->prefix);
3191: PetscStrcat(prefix,"pc_bddc_coarse_");
3192: } else {
3193: PetscStrlen(((PetscObject)pc)->prefix,&len);
3194: if (pcbddc->current_level>1) len -= 2;
3195: PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);
3196: *(prefix+len)='\0';
3197: sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3198: PetscStrcat(prefix,str_level);
3199: }
3200: KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
3201: }
3202: /* allow user customization */
3203: /* PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name); */
3204: KSPSetFromOptions(pcbddc->coarse_ksp);
3205: /* PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name); */
3207: /* get some info after set from options */
3208: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
3209: PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
3210: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
3211: if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3212: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
3213: PCSetType(pc_temp,coarse_pc_type);
3214: isbddc = PETSC_FALSE;
3215: }
3217: /* propagate BDDC info to the next level */
3218: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
3219: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
3220: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
3222: /* creates temporary MATIS object for coarse matrix */
3223: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
3224: ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
3225: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
3226: MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);
3227: MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);
3228: MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);
3229: MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);
3230: MatDestroy(&coarse_submat_dense);
3231: ISLocalToGlobalMappingDestroy(&coarse_islg);
3233: /* assemble coarse matrix */
3234: if (isbddc || isnn) {
3235: MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);
3236: } else {
3237: if (coarse_reuse) {
3238: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);
3239: PetscObjectReference((PetscObject)coarse_mat);
3240: MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);
3241: } else {
3242: MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);
3243: }
3244: }
3245: MatDestroy(&coarse_mat_is);
3247: /* create local to global scatters for coarse problem */
3248: if (pcbddc->new_primal_space) {
3249: VecDestroy(&pcbddc->coarse_vec);
3250: VecDestroy(&pcbddc->coarse_rhs);
3251: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3252: MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);
3253: VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
3254: }
3255: ISDestroy(&coarse_is);
3257: /* propagate symmetry info to coarse matrix */
3258: MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);
3260: /* Compute coarse null space (special handling by BDDC only) */
3261: if (pcbddc->NullSpace) {
3262: PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);
3263: if (isbddc) {
3264: PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);
3265: } else {
3266: KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);
3267: }
3268: }
3270: /* set operators */
3271: PCGetOperators(pc,NULL,NULL,&matstruct);
3272: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);
3274: /* additional KSP customization */
3275: KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);
3276: if (max_it < 5) {
3277: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
3278: }
3279: /* KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1); */
3282: /* print some info if requested */
3283: if (pcbddc->dbg_flag) {
3284: KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);
3285: PCGetType(pc_temp,&coarse_pc_type);
3286: if (!multilevel_allowed) {
3287: if (multilevel_requested) {
3288: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
3289: } else if (pcbddc->max_levels) {
3290: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
3291: }
3292: }
3293: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);
3294: PetscViewerFlush(pcbddc->dbg_viewer);
3295: }
3297: /* setup coarse ksp */
3298: KSPSetUp(pcbddc->coarse_ksp);
3299: if (pcbddc->dbg_flag) {
3300: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
3301: PetscViewerFlush(pcbddc->dbg_viewer);
3302: KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);
3303: PetscViewerFlush(pcbddc->dbg_viewer);
3304: }
3306: /* Check coarse problem if requested */
3307: if (pcbddc->dbg_flag) {
3308: KSP check_ksp;
3309: KSPType check_ksp_type;
3310: PC check_pc;
3311: Vec check_vec;
3312: PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3313: PetscInt its;
3314: PetscBool ispreonly,compute;
3316: /* Create ksp object suitable for estimation of extreme eigenvalues */
3317: KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);
3318: KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);
3319: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
3320: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
3321: if (ispreonly) {
3322: check_ksp_type = KSPPREONLY;
3323: compute = PETSC_FALSE;
3324: } else {
3325: if (issym) check_ksp_type = KSPCG;
3326: else check_ksp_type = KSPGMRES;
3327: compute = PETSC_TRUE;
3328: }
3329: KSPSetType(check_ksp,check_ksp_type);
3330: KSPSetComputeSingularValues(check_ksp,compute);
3331: KSPSetUp(check_ksp);
3332: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
3333: KSPSetPC(check_ksp,check_pc);
3334: /* create random vec */
3335: VecDuplicate(pcbddc->coarse_vec,&check_vec);
3336: VecSetRandom(check_vec,NULL);
3337: if (CoarseNullSpace) {
3338: MatNullSpaceRemove(CoarseNullSpace,check_vec);
3339: }
3340: MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);
3341: /* solve coarse problem */
3342: KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);
3343: if (CoarseNullSpace) {
3344: MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);
3345: }
3346: /* check coarse problem residual error */
3347: VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);
3348: VecNorm(check_vec,NORM_INFINITY,&infty_error);
3349: MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);
3350: VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);
3351: VecDestroy(&check_vec);
3352: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);
3353: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);
3354: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
3355: /* get eigenvalue estimation if preonly has not been requested */
3356: if (!ispreonly) {
3357: KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);
3358: KSPGetIterationNumber(check_ksp,&its);
3359: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);
3360: }
3361: PetscViewerFlush(pcbddc->dbg_viewer);
3362: KSPDestroy(&check_ksp);
3363: }
3364: /* free memory */
3365: MatNullSpaceDestroy(&CoarseNullSpace);
3366: MatDestroy(&coarse_mat);
3367: return(0);
3368: }
3372: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3373: {
3374: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3375: PC_IS* pcis = (PC_IS*)pc->data;
3376: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
3377: PetscInt i,coarse_size;
3378: PetscInt *local_primal_indices;
3382: /* Compute global number of coarse dofs */
3383: if (!pcbddc->primal_indices_local_idxs) {
3384: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
3385: }
3386: PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);
3388: /* check numbering */
3389: if (pcbddc->dbg_flag) {
3390: PetscScalar coarsesum,*array;
3392: PetscViewerFlush(pcbddc->dbg_viewer);
3393: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3394: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
3395: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
3396: VecSet(pcis->vec1_N,0.0);
3397: for (i=0;i<pcbddc->local_primal_size;i++) {
3398: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
3399: }
3400: VecAssemblyBegin(pcis->vec1_N);
3401: VecAssemblyEnd(pcis->vec1_N);
3402: VecSet(pcis->vec1_global,0.0);
3403: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3404: VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3405: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3406: VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3407: VecGetArray(pcis->vec1_N,&array);
3408: for (i=0;i<pcis->n;i++) {
3409: if (array[i] == 1.0) {
3410: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);
3411: }
3412: }
3413: PetscViewerFlush(pcbddc->dbg_viewer);
3414: for (i=0;i<pcis->n;i++) {
3415: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3416: }
3417: VecRestoreArray(pcis->vec1_N,&array);
3418: VecSet(pcis->vec1_global,0.0);
3419: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3420: VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3421: VecSum(pcis->vec1_global,&coarsesum);
3422: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
3423: if (pcbddc->dbg_flag > 1) {
3424: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
3425: PetscViewerFlush(pcbddc->dbg_viewer);
3426: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
3427: for (i=0;i<pcbddc->local_primal_size;i++) {
3428: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3429: }
3430: PetscViewerFlush(pcbddc->dbg_viewer);
3431: }
3432: PetscViewerFlush(pcbddc->dbg_viewer);
3433: }
3434: /* get back data */
3435: *coarse_size_n = coarse_size;
3436: *local_primal_indices_n = local_primal_indices;
3437: return(0);
3438: }