Actual source code: bddcprivate.c

petsc-dev 2014-02-02
Report Typos and Errors
  1: #include <bddc.h>
  2: #include <bddcprivate.h>
  3: #include <petscblaslapack.h>

  7: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
  8: {
  9:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
 10:   PetscScalar    *coarse_submat_vals;

 14:   /* Compute matrix after change of basis and extract local submatrices */
 15:   PCBDDCSetUpLocalMatrices(pc);

 17:   /* Setup local scatters R_to_B and (optionally) R_to_D */
 18:   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
 19:   PCBDDCSetUpLocalScatters(pc);

 21:   /* Setup local solvers ksp_D and ksp_R */
 22:   /* PCBDDCSetUpLocalScatters should be called first! */
 23:   PCBDDCSetUpLocalSolvers(pc);

 25:   /* Change global null space passed in by the user if change of basis has been requested */
 26:   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
 27:     PCBDDCNullSpaceAdaptGlobal(pc);
 28:   }

 30:   /*
 31:      Setup local correction and local part of coarse basis.
 32:      Gives back the dense local part of the coarse matrix in column major ordering
 33:   */
 34:   PCBDDCSetUpCorrection(pc,&coarse_submat_vals);

 36:   /* Compute total number of coarse nodes and setup coarse solver */
 37:   PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);

 39:   /* free */
 40:   PetscFree(coarse_submat_vals);
 41:   return(0);
 42: }

 46: PetscErrorCode PCBDDCResetCustomization(PC pc)
 47: {
 48:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
 49:   PetscInt       i;

 53:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
 54:   ISDestroy(&pcbddc->user_primal_vertices);
 55:   MatNullSpaceDestroy(&pcbddc->NullSpace);
 56:   ISDestroy(&pcbddc->NeumannBoundaries);
 57:   ISDestroy(&pcbddc->DirichletBoundaries);
 58:   for (i=0;i<pcbddc->n_ISForDofs;i++) {
 59:     ISDestroy(&pcbddc->ISForDofs[i]);
 60:   }
 61:   PetscFree(pcbddc->ISForDofs);
 62:   pcbddc->n_ISForDofs = 0;
 63:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
 64:   PetscFree(pcbddc->onearnullvecs_state);
 65:   return(0);
 66: }

 70: PetscErrorCode PCBDDCResetTopography(PC pc)
 71: {
 72:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

 76:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
 77:   MatDestroy(&pcbddc->ConstraintMatrix);
 78:   PCBDDCGraphReset(pcbddc->mat_graph);
 79:   return(0);
 80: }

 84: PetscErrorCode PCBDDCResetSolvers(PC pc)
 85: {
 86:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

 90:   VecDestroy(&pcbddc->coarse_vec);
 91:   VecDestroy(&pcbddc->coarse_rhs);
 92:   MatDestroy(&pcbddc->coarse_phi_B);
 93:   MatDestroy(&pcbddc->coarse_phi_D);
 94:   MatDestroy(&pcbddc->coarse_psi_B);
 95:   MatDestroy(&pcbddc->coarse_psi_D);
 96:   VecDestroy(&pcbddc->vec1_P);
 97:   VecDestroy(&pcbddc->vec1_C);
 98:   MatDestroy(&pcbddc->local_auxmat1);
 99:   MatDestroy(&pcbddc->local_auxmat2);
100:   VecDestroy(&pcbddc->vec1_R);
101:   VecDestroy(&pcbddc->vec2_R);
102:   ISDestroy(&pcbddc->is_R_local);
103:   VecScatterDestroy(&pcbddc->R_to_B);
104:   VecScatterDestroy(&pcbddc->R_to_D);
105:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
106:   KSPDestroy(&pcbddc->ksp_D);
107:   KSPDestroy(&pcbddc->ksp_R);
108:   KSPDestroy(&pcbddc->coarse_ksp);
109:   MatDestroy(&pcbddc->local_mat);
110:   PetscFree(pcbddc->primal_indices_local_idxs);
111:   PetscFree(pcbddc->global_primal_indices);
112:   return(0);
113: }

117: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
118: {
119:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
120:   PC_IS          *pcis = (PC_IS*)pc->data;
121:   VecType        impVecType;
122:   PetscInt       n_constraints,n_R,old_size;

126:   if (!pcbddc->ConstraintMatrix) {
127:     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
128:   }
129:   /* get sizes */
130:   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
131:   n_R = pcis->n-pcbddc->n_actual_vertices;
132:   VecGetType(pcis->vec1_N,&impVecType);
133:   /* local work vectors (try to avoid unneeded work)*/
134:   /* R nodes */
135:   old_size = -1;
136:   if (pcbddc->vec1_R) {
137:     VecGetSize(pcbddc->vec1_R,&old_size);
138:   }
139:   if (n_R != old_size) {
140:     VecDestroy(&pcbddc->vec1_R);
141:     VecDestroy(&pcbddc->vec2_R);
142:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
143:     VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
144:     VecSetType(pcbddc->vec1_R,impVecType);
145:     VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
146:   }
147:   /* local primal dofs */
148:   old_size = -1;
149:   if (pcbddc->vec1_P) {
150:     VecGetSize(pcbddc->vec1_P,&old_size);
151:   }
152:   if (pcbddc->local_primal_size != old_size) {
153:     VecDestroy(&pcbddc->vec1_P);
154:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
155:     VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
156:     VecSetType(pcbddc->vec1_P,impVecType);
157:   }
158:   /* local explicit constraints */
159:   old_size = -1;
160:   if (pcbddc->vec1_C) {
161:     VecGetSize(pcbddc->vec1_C,&old_size);
162:   }
163:   if (n_constraints && n_constraints != old_size) {
164:     VecDestroy(&pcbddc->vec1_C);
165:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
166:     VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
167:     VecSetType(pcbddc->vec1_C,impVecType);
168:   }
169:   return(0);
170: }

174: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
175: {
176:   PetscErrorCode         ierr;
177:   /* pointers to pcis and pcbddc */
178:   PC_IS*                 pcis = (PC_IS*)pc->data;
179:   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
180:   /* submatrices of local problem */
181:   Mat                    A_RV,A_VR,A_VV;
182:   /* working matrices */
183:   Mat                    M1,M2,M3,C_CR;
184:   /* working vectors */
185:   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
186:   /* additional working stuff */
187:   IS                     is_aux;
188:   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
189:   const PetscScalar      *array,*row_cmat_values;
190:   const PetscInt         *row_cmat_indices,*idx_R_local;
191:   PetscInt               *idx_V_B,*auxindices;
192:   PetscInt               n_vertices,n_constraints,size_of_constraint;
193:   PetscInt               i,j,n_R,n_D,n_B;
194:   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
195:   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
196:   MatType                impMatType;
197:   /* some shortcuts to scalars */
198:   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
199:   /* for debugging purposes */
200:   PetscReal              *coarsefunctions_errors,*constraints_errors;

203:   /* get number of vertices (corners plus constraints with change of basis)
204:      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
205:   n_vertices = pcbddc->n_actual_vertices;
206:   n_constraints = pcbddc->local_primal_size-n_vertices;
207:   /* Set Non-overlapping dimensions */
208:   n_B = pcis->n_B; n_D = pcis->n - n_B;
209:   n_R = pcis->n-n_vertices;

211:   /* Set types for local objects needed by BDDC precondtioner */
212:   impMatType = MATSEQDENSE;

214:   /* Allocating some extra storage just to be safe */
215:   PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);
216:   for (i=0;i<pcis->n;i++) auxindices[i]=i;

218:   /* vertices in boundary numbering */
219:   PetscMalloc1(n_vertices,&idx_V_B);
220:   ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);
221:   if (i != n_vertices) {
222:     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
223:   }

225:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
226:   if (n_constraints) {
227:     /* see if we can save some allocations */
228:     if (pcbddc->local_auxmat2) {
229:       PetscInt on_R,on_constraints;
230:       MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);
231:       if (on_R != n_R || on_constraints != n_constraints) {
232:         MatDestroy(&pcbddc->local_auxmat2);
233:         MatDestroy(&pcbddc->local_auxmat1);
234:       }
235:     }
236:     /* work vectors */
237:     VecDuplicate(pcbddc->vec1_C,&vec1_C);
238:     VecDuplicate(pcbddc->vec1_C,&vec2_C);
239:     /* auxiliary matrices */
240:     if (!pcbddc->local_auxmat2) {
241:       MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);
242:       MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);
243:       MatSetType(pcbddc->local_auxmat2,impMatType);
244:       MatSetUp(pcbddc->local_auxmat2);
245:     }

247:     /* Extract constraints on R nodes: C_{CR}  */
248:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
249:     MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
250:     ISDestroy(&is_aux);

252:     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
253:     for (i=0;i<n_constraints;i++) {
254:       VecSet(pcbddc->vec1_R,zero);
255:       /* Get row of constraint matrix in R numbering */
256:       MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
257:       VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);
258:       MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
259:       VecAssemblyBegin(pcbddc->vec1_R);
260:       VecAssemblyEnd(pcbddc->vec1_R);
261:       /* Solve for row of constraint matrix in R numbering */
262:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
263:       /* Set values in local_auxmat2 */
264:       VecGetArrayRead(pcbddc->vec2_R,&array);
265:       MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);
266:       VecRestoreArrayRead(pcbddc->vec2_R,&array);
267:     }
268:     MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
269:     MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
270:     MatScale(pcbddc->local_auxmat2,m_one);

272:     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
273:     MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
274:     MatLUFactor(M3,NULL,NULL,NULL);
275:     MatCreate(PETSC_COMM_SELF,&M1);
276:     MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);
277:     MatSetType(M1,impMatType);
278:     MatSetUp(M1);
279:     MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);
280:     MatZeroEntries(M2);
281:     VecSet(vec1_C,m_one);
282:     MatDiagonalSet(M2,vec1_C,INSERT_VALUES);
283:     MatMatSolve(M3,M2,M1);
284:     MatDestroy(&M2);
285:     MatDestroy(&M3);
286:     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
287:     if (!pcbddc->local_auxmat1) {
288:       MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
289:     } else {
290:       MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
291:     }
292:   }

294:   /* Get submatrices from subdomain matrix */
295:   if (n_vertices) {
296:     PetscInt ibs,mbs;
297:     PetscBool issbaij;
298:     Mat newmat;

300:     ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
301:     MatGetBlockSize(pcbddc->local_mat,&mbs);
302:     ISGetBlockSize(pcbddc->is_R_local,&ibs);
303:     if (ibs != mbs) { /* need to convert to SEQAIJ */
304:       MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);
305:       MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
306:       MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
307:       MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
308:       MatDestroy(&newmat);
309:     } else {
310:       /* this is safe */
311:       MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
312:       PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
313:       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
314:         MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);
315:         /* which of the two approaches is faster? */
316:         /* MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
317:         MatCreateTranspose(A_RV,&A_VR);*/
318:         MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
319:         MatCreateTranspose(A_VR,&A_RV);
320:         MatDestroy(&newmat);
321:       } else {
322:         MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
323:         MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
324:       }
325:     }
326:     MatGetVecs(A_RV,&vec1_V,NULL);
327:     VecDuplicate(vec1_V,&vec2_V);
328:     ISDestroy(&is_aux);
329:   }

331:   /* Matrix of coarse basis functions (local) */
332:   if (pcbddc->coarse_phi_B) {
333:     PetscInt on_B,on_primal;
334:     MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
335:     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
336:       MatDestroy(&pcbddc->coarse_phi_B);
337:       MatDestroy(&pcbddc->coarse_psi_B);
338:     }
339:   }
340:   if (pcbddc->coarse_phi_D) {
341:     PetscInt on_D,on_primal;
342:     MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);
343:     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
344:       MatDestroy(&pcbddc->coarse_phi_D);
345:       MatDestroy(&pcbddc->coarse_psi_D);
346:     }
347:   }
348:   if (!pcbddc->coarse_phi_B) {
349:     MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);
350:     MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);
351:     MatSetType(pcbddc->coarse_phi_B,impMatType);
352:     MatSetUp(pcbddc->coarse_phi_B);
353:   }
354:   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
355:     MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);
356:     MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);
357:     MatSetType(pcbddc->coarse_phi_D,impMatType);
358:     MatSetUp(pcbddc->coarse_phi_D);
359:   }

361:   if (pcbddc->dbg_flag) {
362:     ISGetIndices(pcbddc->is_R_local,&idx_R_local);
363:     PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);
364:     PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);
365:   }
366:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
367:   PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);

369:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */

371:   /* vertices */
372:   for (i=0;i<n_vertices;i++) {
373:     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
374:     VecSet(pcbddc->vec1_R,zero); /* TODO: REMOVE IT */
375:     VecSet(vec1_V,zero);
376:     VecSetValue(vec1_V,i,one,INSERT_VALUES);
377:     VecAssemblyBegin(vec1_V);
378:     VecAssemblyEnd(vec1_V);
379:     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
380:     MatMult(A_RV,vec1_V,pcbddc->vec1_R);
381:     KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
382:     VecScale(pcbddc->vec1_R,m_one);
383:     if (n_constraints) {
384:       MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);
385:       MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
386:       VecScale(vec1_C,m_one);
387:     }
388:     MatMult(A_VR,pcbddc->vec1_R,vec2_V);
389:     MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);

391:     /* Set values in coarse basis function and subdomain part of coarse_mat */
392:     /* coarse basis functions */
393:     VecSet(pcis->vec1_B,zero);
394:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
395:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
396:     VecGetArrayRead(pcis->vec1_B,&array);
397:     MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);
398:     VecRestoreArrayRead(pcis->vec1_B,&array);
399:     MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);
400:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
401:       VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
402:       VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
403:       VecGetArrayRead(pcis->vec1_D,&array);
404:       MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);
405:       VecRestoreArrayRead(pcis->vec1_D,&array);
406:     }
407:     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
408:     VecGetArrayRead(vec2_V,&array);
409:     PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));
410:     VecRestoreArrayRead(vec2_V,&array);
411:     if (n_constraints) {
412:       VecGetArrayRead(vec1_C,&array);
413:       PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));
414:       VecRestoreArrayRead(vec1_C,&array);
415:     }

417:     /* check */
418:     if (pcbddc->dbg_flag) {
419:       /* assemble subdomain vector on local nodes */
420:       VecSet(pcis->vec1_N,zero);
421:       VecGetArrayRead(pcbddc->vec1_R,&array);
422:       VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);
423:       VecRestoreArrayRead(pcbddc->vec1_R,&array);
424:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);
425:       VecAssemblyBegin(pcis->vec1_N);
426:       VecAssemblyEnd(pcis->vec1_N);
427:       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
428:       VecSet(pcbddc->vec1_P,zero);
429:       VecGetArrayRead(vec2_V,&array);
430:       VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);
431:       VecRestoreArrayRead(vec2_V,&array);
432:       if (n_constraints) {
433:         VecGetArrayRead(vec1_C,&array);
434:         VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);
435:         VecRestoreArrayRead(vec1_C,&array);
436:       }
437:       VecAssemblyBegin(pcbddc->vec1_P);
438:       VecAssemblyEnd(pcbddc->vec1_P);
439:       VecScale(pcbddc->vec1_P,m_one);
440:       /* check saddle point solution */
441:       MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
442:       MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
443:       VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);
444:       MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
445:       /* shift by the identity matrix */
446:       VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);
447:       VecAssemblyBegin(pcbddc->vec1_P);
448:       VecAssemblyEnd(pcbddc->vec1_P);
449:       VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);
450:     }
451:   }

453:   /* constraints */
454:   for (i=0;i<n_constraints;i++) {
455:     VecSet(vec2_C,zero);
456:     VecSetValue(vec2_C,i,m_one,INSERT_VALUES);
457:     VecAssemblyBegin(vec2_C);
458:     VecAssemblyEnd(vec2_C);
459:     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
460:     MatMult(M1,vec2_C,vec1_C);
461:     MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);
462:     VecScale(vec1_C,m_one);
463:     if (n_vertices) {
464:       MatMult(A_VR,pcbddc->vec1_R,vec2_V);
465:     }
466:     /* Set values in coarse basis function and subdomain part of coarse_mat */
467:     /* coarse basis functions */
468:     j = i+n_vertices; /* don't touch this! */
469:     VecSet(pcis->vec1_B,zero);
470:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
471:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
472:     VecGetArrayRead(pcis->vec1_B,&array);
473:     MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);
474:     VecRestoreArrayRead(pcis->vec1_B,&array);
475:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
476:       VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
477:       VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
478:       VecGetArrayRead(pcis->vec1_D,&array);
479:       MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);
480:       VecRestoreArrayRead(pcis->vec1_D,&array);
481:     }
482:     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
483:     if (n_vertices) {
484:       VecGetArrayRead(vec2_V,&array);
485:       PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));
486:       VecRestoreArrayRead(vec2_V,&array);
487:     }
488:     VecGetArrayRead(vec1_C,&array);
489:     PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));
490:     VecRestoreArrayRead(vec1_C,&array);

492:     if (pcbddc->dbg_flag) {
493:       /* assemble subdomain vector on nodes */
494:       VecSet(pcis->vec1_N,zero);
495:       VecGetArrayRead(pcbddc->vec1_R,&array);
496:       VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);
497:       VecRestoreArrayRead(pcbddc->vec1_R,&array);
498:       VecAssemblyBegin(pcis->vec1_N);
499:       VecAssemblyEnd(pcis->vec1_N);
500:       /* assemble subdomain vector of lagrange multipliers */
501:       VecSet(pcbddc->vec1_P,zero);
502:       if (n_vertices) {
503:         VecGetArrayRead(vec2_V,&array);
504:         VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);
505:         VecRestoreArrayRead(vec2_V,&array);
506:       }
507:       VecGetArrayRead(vec1_C,&array);
508:       VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);
509:       VecRestoreArrayRead(vec1_C,&array);
510:       VecAssemblyBegin(pcbddc->vec1_P);
511:       VecAssemblyEnd(pcbddc->vec1_P);
512:       VecScale(pcbddc->vec1_P,m_one);
513:       /* check saddle point solution */
514:       MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
515:       MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
516:       VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);
517:       MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
518:       /* shift by the identity matrix */
519:       VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);
520:       VecAssemblyBegin(pcbddc->vec1_P);
521:       VecAssemblyEnd(pcbddc->vec1_P);
522:       VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);
523:     }
524:   }
525:   /* call assembling routines for local coarse basis */
526:   MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
527:   MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
528:   if (pcbddc->switch_static || pcbddc->dbg_flag) {
529:     MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
530:     MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
531:   }

533:   /* compute other basis functions for non-symmetric problems */
534:   MatIsSymmetricKnown(pc->pmat,&setsym,&issym);
535:   if (!setsym || (setsym && !issym)) {
536:     if (!pcbddc->coarse_psi_B) {
537:       MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);
538:       MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);
539:       MatSetType(pcbddc->coarse_psi_B,impMatType);
540:       MatSetUp(pcbddc->coarse_psi_B);
541:     }
542:     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
543:       MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);
544:       MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);
545:       MatSetType(pcbddc->coarse_psi_D,impMatType);
546:       MatSetUp(pcbddc->coarse_psi_D);
547:     }
548:     for (i=0;i<pcbddc->local_primal_size;i++) {
549:       if (n_constraints) {
550:         VecSet(vec1_C,zero);
551:         for (j=0;j<n_constraints;j++) {
552:           VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);
553:         }
554:         VecAssemblyBegin(vec1_C);
555:         VecAssemblyEnd(vec1_C);
556:       }
557:       if (i<n_vertices) {
558:         VecSet(vec1_V,zero);
559:         VecSetValue(vec1_V,i,m_one,INSERT_VALUES);
560:         VecAssemblyBegin(vec1_V);
561:         VecAssemblyEnd(vec1_V);
562:         MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);
563:         if (n_constraints) {
564:           MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
565:         }
566:       } else {
567:         MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);
568:       }
569:       KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
570:       VecSet(pcis->vec1_B,zero);
571:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
572:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
573:       VecGetArrayRead(pcis->vec1_B,&array);
574:       MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);
575:       VecRestoreArrayRead(pcis->vec1_B,&array);
576:       if (i<n_vertices) {
577:         MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);
578:       }
579:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
580:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
581:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
582:         VecGetArrayRead(pcis->vec1_D,&array);
583:         MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);
584:         VecRestoreArrayRead(pcis->vec1_D,&array);
585:       }

587:       if (pcbddc->dbg_flag) {
588:         /* assemble subdomain vector on nodes */
589:         VecSet(pcis->vec1_N,zero);
590:         VecGetArrayRead(pcbddc->vec1_R,&array);
591:         VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);
592:         VecRestoreArrayRead(pcbddc->vec1_R,&array);
593:         if (i<n_vertices) {
594:           VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);
595:         }
596:         VecAssemblyBegin(pcis->vec1_N);
597:         VecAssemblyEnd(pcis->vec1_N);
598:         /* assemble subdomain vector of lagrange multipliers */
599:         for (j=0;j<pcbddc->local_primal_size;j++) {
600:           VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);
601:         }
602:         VecAssemblyBegin(pcbddc->vec1_P);
603:         VecAssemblyEnd(pcbddc->vec1_P);
604:         /* check saddle point solution */
605:         MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
606:         MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
607:         VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);
608:         MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
609:         /* shift by the identity matrix */
610:         VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);
611:         VecAssemblyBegin(pcbddc->vec1_P);
612:         VecAssemblyEnd(pcbddc->vec1_P);
613:         VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);
614:       }
615:     }
616:     MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);
617:     MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);
618:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
619:       MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);
620:       MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);
621:     }
622:   }
623:   PetscFree(idx_V_B);
624:   /* Checking coarse_sub_mat and coarse basis functios */
625:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
626:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
627:   if (pcbddc->dbg_flag) {
628:     Mat         coarse_sub_mat;
629:     Mat         AUXMAT,TM1,TM2,TM3,TM4;
630:     Mat         coarse_phi_D,coarse_phi_B;
631:     Mat         coarse_psi_D,coarse_psi_B;
632:     Mat         A_II,A_BB,A_IB,A_BI;
633:     MatType     checkmattype=MATSEQAIJ;
634:     PetscReal   real_value;

636:     MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
637:     MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
638:     MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
639:     MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
640:     MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
641:     MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
642:     if (pcbddc->coarse_psi_B) {
643:       MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
644:       MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
645:     }
646:     MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);

648:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
649:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");
650:     PetscViewerFlush(pcbddc->dbg_viewer);
651:     if (pcbddc->coarse_psi_B) {
652:       MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
653:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
654:       MatDestroy(&AUXMAT);
655:       MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
656:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
657:       MatDestroy(&AUXMAT);
658:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
659:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
660:       MatDestroy(&AUXMAT);
661:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
662:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
663:       MatDestroy(&AUXMAT);
664:     } else {
665:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
666:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
667:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
668:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
669:       MatDestroy(&AUXMAT);
670:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
671:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
672:       MatDestroy(&AUXMAT);
673:     }
674:     MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
675:     MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
676:     MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
677:     MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);
678:     MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);
679:     MatNorm(TM1,NORM_INFINITY,&real_value);
680:     PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
681:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");
682:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);
683:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);
684:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");
685:     for (i=0;i<pcbddc->local_primal_size;i++) {
686:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);
687:     }
688:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");
689:     for (i=0;i<pcbddc->local_primal_size;i++) {
690:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);
691:     }
692:     if (pcbddc->coarse_psi_B) {
693:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");
694:       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
695:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);
696:       }
697:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");
698:       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
699:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);
700:       }
701:     }
702:     PetscViewerFlush(pcbddc->dbg_viewer);
703:     MatDestroy(&A_II);
704:     MatDestroy(&A_BB);
705:     MatDestroy(&A_IB);
706:     MatDestroy(&A_BI);
707:     MatDestroy(&TM1);
708:     MatDestroy(&TM2);
709:     MatDestroy(&TM3);
710:     MatDestroy(&TM4);
711:     MatDestroy(&coarse_phi_D);
712:     MatDestroy(&coarse_phi_B);
713:     if (pcbddc->coarse_psi_B) {
714:       MatDestroy(&coarse_psi_D);
715:       MatDestroy(&coarse_psi_B);
716:     }
717:     MatDestroy(&coarse_sub_mat);
718:     ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);
719:     PetscFree(coarsefunctions_errors);
720:     PetscFree(constraints_errors);
721:   }
722:   /* free memory */
723:   if (n_vertices) {
724:     VecDestroy(&vec1_V);
725:     VecDestroy(&vec2_V);
726:     MatDestroy(&A_RV);
727:     MatDestroy(&A_VR);
728:     MatDestroy(&A_VV);
729:   }
730:   if (n_constraints) {
731:     VecDestroy(&vec1_C);
732:     VecDestroy(&vec2_C);
733:     MatDestroy(&M1);
734:     MatDestroy(&C_CR);
735:   }
736:   PetscFree(auxindices);
737:   /* get back data */
738:   *coarse_submat_vals_n = coarse_submat_vals;
739:   return(0);
740: }

744: PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
745: {
746:   PC_IS*            pcis = (PC_IS*)(pc->data);
747:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
748:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
749:   PetscBool         issbaij,isbaij;
750:   /* manage repeated solves */
751:   MatReuse          reuse;
752:   MatStructure      matstruct;
753:   PetscErrorCode    ierr;

756:   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
757:     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
758:   }
759:   /* get mat flags */
760:   PCGetOperators(pc,NULL,NULL,&matstruct);
761:   reuse = MAT_INITIAL_MATRIX;
762:   if (pc->setupcalled) {
763:     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
764:     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
765:     if (matstruct == SAME_NONZERO_PATTERN) {
766:       reuse = MAT_REUSE_MATRIX;
767:     } else {
768:       reuse = MAT_INITIAL_MATRIX;
769:     }
770:   }
771:   if (reuse == MAT_INITIAL_MATRIX) {
772:     MatDestroy(&pcis->A_II);
773:     MatDestroy(&pcis->A_IB);
774:     MatDestroy(&pcis->A_BI);
775:     MatDestroy(&pcis->A_BB);
776:     MatDestroy(&pcbddc->local_mat);
777:   }

779:   /* transform local matrices if needed */
780:   if (pcbddc->use_change_of_basis) {
781:     Mat         change_mat_all;
782:     PetscScalar *row_cmat_values;
783:     PetscInt    *row_cmat_indices;
784:     PetscInt    *nnz,*is_indices,*temp_indices;
785:     PetscInt    i,j,k,n_D,n_B;

787:     /* Get Non-overlapping dimensions */
788:     n_B = pcis->n_B;
789:     n_D = pcis->n-n_B;

791:     /* compute nonzero structure of change of basis on all local nodes */
792:     PetscMalloc1(pcis->n,&nnz);
793:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
794:     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
795:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
796:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
797:     k=1;
798:     for (i=0;i<n_B;i++) {
799:       MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);
800:       nnz[is_indices[i]]=j;
801:       if (k < j) k = j;
802:       MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);
803:     }
804:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
805:     /* assemble change of basis matrix on the whole set of local dofs */
806:     PetscMalloc1(k,&temp_indices);
807:     MatCreate(PETSC_COMM_SELF,&change_mat_all);
808:     MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);
809:     MatSetType(change_mat_all,MATSEQAIJ);
810:     MatSeqAIJSetPreallocation(change_mat_all,0,nnz);
811:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
812:     for (i=0;i<n_D;i++) {
813:       MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);
814:     }
815:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
816:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
817:     for (i=0;i<n_B;i++) {
818:       MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
819:       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
820:       MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);
821:       MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
822:     }
823:     MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);
824:     MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);
825:     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
826:     PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);
827:     PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);
828:     if (!issbaij && !isbaij) {
829:       MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);
830:     } else {
831:       Mat work_mat;
832:       MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
833:       MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);
834:       MatDestroy(&work_mat);
835:     }
836:     /*
837:     PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
838:     MatView(change_mat_all,(PetscViewer)0);
839:     */
840:     MatDestroy(&change_mat_all);
841:     PetscFree(nnz);
842:     PetscFree(temp_indices);
843:   } else {
844:     /* without change of basis, the local matrix is unchanged */
845:     if (!pcbddc->local_mat) {
846:       PetscObjectReference((PetscObject)matis->A);
847:       pcbddc->local_mat = matis->A;
848:     }
849:   }

851:   /* get submatrices */
852:   MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);
853:   MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);
854:   PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
855:   if (!issbaij) {
856:     MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);
857:     MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);
858:   } else {
859:     Mat newmat;
860:     MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);
861:     MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);
862:     MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);
863:     MatDestroy(&newmat);
864:   }
865:   return(0);
866: }

870: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
871: {
872:   PC_IS*         pcis = (PC_IS*)(pc->data);
873:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
874:   IS             is_aux1,is_aux2;
875:   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
876:   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
877:   PetscInt       vbs,bs;
878:   PetscBT        bitmask;

882:   /* No need to setup local scatters if primal space is unchanged */
883:   if (!pcbddc->new_primal_space_local) {
884:     return(0);
885:   }
886:   /* destroy old objects */
887:   ISDestroy(&pcbddc->is_R_local);
888:   VecScatterDestroy(&pcbddc->R_to_B);
889:   VecScatterDestroy(&pcbddc->R_to_D);
890:   /* Set Non-overlapping dimensions */
891:   n_B = pcis->n_B; n_D = pcis->n - n_B;
892:   n_vertices = pcbddc->n_actual_vertices;
893:   /* create auxiliary bitmask */
894:   PetscBTCreate(pcis->n,&bitmask);
895:   for (i=0;i<n_vertices;i++) {
896:     PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);
897:   }

899:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
900:   PetscMalloc1((pcis->n-n_vertices),&idx_R_local);
901:   for (i=0, n_R=0; i<pcis->n; i++) {
902:     if (!PetscBTLookup(bitmask,i)) {
903:       idx_R_local[n_R] = i;
904:       n_R++;
905:     }
906:   }

908:   /* Block code */
909:   vbs = 1;
910:   MatGetBlockSize(pcbddc->local_mat,&bs);
911:   if (bs>1 && !(n_vertices%bs)) {
912:     PetscBool is_blocked = PETSC_TRUE;
913:     PetscInt  *vary;
914:     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
915:     PetscMalloc1(pcis->n/bs,&vary);
916:     PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
917:     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
918:     for (i=0; i<n_vertices; i++) {
919:       if (vary[i]!=0 && vary[i]!=bs) {
920:         is_blocked = PETSC_FALSE;
921:         break;
922:       }
923:     }
924:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
925:       vbs = bs;
926:       for (i=0;i<n_R/vbs;i++) {
927:         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
928:       }
929:     }
930:     PetscFree(vary);
931:   }
932:   ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
933:   PetscFree(idx_R_local);

935:   /* print some info if requested */
936:   if (pcbddc->dbg_flag) {
937:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
938:     PetscViewerFlush(pcbddc->dbg_viewer);
939:     PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
940:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
941:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
942:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);
943:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);
944:     PetscViewerFlush(pcbddc->dbg_viewer);
945:   }

947:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
948:   ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
949:   PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);
950:   PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);
951:   ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
952:   for (i=0; i<n_D; i++) {
953:     PetscBTSet(bitmask,is_indices[i]);
954:   }
955:   ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
956:   for (i=0, j=0; i<n_R; i++) {
957:     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
958:       aux_array1[j++] = i;
959:     }
960:   }
961:   ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
962:   ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
963:   for (i=0, j=0; i<n_B; i++) {
964:     if (!PetscBTLookup(bitmask,is_indices[i])) {
965:       aux_array2[j++] = i;
966:     }
967:   }
968:   ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
969:   ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
970:   VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
971:   ISDestroy(&is_aux1);
972:   ISDestroy(&is_aux2);

974:   if (pcbddc->switch_static || pcbddc->dbg_flag) {
975:     PetscMalloc1(n_D,&aux_array1);
976:     for (i=0, j=0; i<n_R; i++) {
977:       if (PetscBTLookup(bitmask,idx_R_local[i])) {
978:         aux_array1[j++] = i;
979:       }
980:     }
981:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
982:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
983:     ISDestroy(&is_aux1);
984:   }
985:   PetscBTDestroy(&bitmask);
986:   ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
987:   return(0);
988: }


993: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
994: {
995:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
996:   PC_IS          *pcis = (PC_IS*)pc->data;
997:   PC             pc_temp;
998:   Mat            A_RR;
999:   MatStructure   matstruct;
1000:   MatReuse       reuse;
1001:   PetscScalar    m_one = -1.0;
1002:   PetscReal      value;
1003:   PetscInt       n_D,n_R,ibs,mbs;
1004:   PetscBool      use_exact,use_exact_reduced,issbaij;
1006:   /* prefixes stuff */
1007:   char           dir_prefix[256],neu_prefix[256],str_level[3];
1008:   size_t         len;

1011:   PCGetOperators(pc,NULL,NULL,&matstruct);

1013:   /* compute prefixes */
1014:   PetscStrcpy(dir_prefix,"");
1015:   PetscStrcpy(neu_prefix,"");
1016:   if (!pcbddc->current_level) {
1017:     PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);
1018:     PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);
1019:     PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1020:     PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1021:   } else {
1022:     PetscStrcpy(str_level,"");
1023:     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
1024:     PetscStrlen(((PetscObject)pc)->prefix,&len);
1025:     len -= 15; /* remove "pc_bddc_coarse_" */
1026:     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
1027:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);
1028:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);
1029:     *(dir_prefix+len)='\0';
1030:     *(neu_prefix+len)='\0';
1031:     PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1032:     PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1033:     PetscStrcat(dir_prefix,str_level);
1034:     PetscStrcat(neu_prefix,str_level);
1035:   }

1037:   /* DIRICHLET PROBLEM */
1038:   /* Matrix for Dirichlet problem is pcis->A_II */
1039:   ISGetSize(pcis->is_I_local,&n_D);
1040:   if (!pcbddc->ksp_D) { /* create object if not yet build */
1041:     KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
1042:     PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
1043:     /* default */
1044:     KSPSetType(pcbddc->ksp_D,KSPPREONLY);
1045:     KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
1046:     PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
1047:     KSPGetPC(pcbddc->ksp_D,&pc_temp);
1048:     if (issbaij) {
1049:       PCSetType(pc_temp,PCCHOLESKY);
1050:     } else {
1051:       PCSetType(pc_temp,PCLU);
1052:     }
1053:     PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1054:   }
1055:   KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);
1056:   /* Allow user's customization */
1057:   KSPSetFromOptions(pcbddc->ksp_D);
1058:   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1059:   if (!n_D) {
1060:     KSPGetPC(pcbddc->ksp_D,&pc_temp);
1061:     PCSetType(pc_temp,PCNONE);
1062:   }
1063:   /* Set Up KSP for Dirichlet problem of BDDC */
1064:   KSPSetUp(pcbddc->ksp_D);
1065:   /* set ksp_D into pcis data */
1066:   KSPDestroy(&pcis->ksp_D);
1067:   PetscObjectReference((PetscObject)pcbddc->ksp_D);
1068:   pcis->ksp_D = pcbddc->ksp_D;

1070:   /* NEUMANN PROBLEM */
1071:   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1072:   ISGetSize(pcbddc->is_R_local,&n_R);
1073:   if (pcbddc->ksp_R) { /* already created ksp */
1074:     PetscInt nn_R;
1075:     KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);
1076:     PetscObjectReference((PetscObject)A_RR);
1077:     MatGetSize(A_RR,&nn_R,NULL);
1078:     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1079:       KSPReset(pcbddc->ksp_R);
1080:       MatDestroy(&A_RR);
1081:       reuse = MAT_INITIAL_MATRIX;
1082:     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1083:       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1084:         MatDestroy(&A_RR);
1085:         reuse = MAT_INITIAL_MATRIX;
1086:       } else { /* safe to reuse the matrix */
1087:         reuse = MAT_REUSE_MATRIX;
1088:       }
1089:     }
1090:     /* last check */
1091:     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1092:       MatDestroy(&A_RR);
1093:       reuse = MAT_INITIAL_MATRIX;
1094:     }
1095:   } else { /* first time, so we need to create the matrix */
1096:     reuse = MAT_INITIAL_MATRIX;
1097:   }
1098:   /* extract A_RR */
1099:   MatGetBlockSize(pcbddc->local_mat,&mbs);
1100:   ISGetBlockSize(pcbddc->is_R_local,&ibs);
1101:   if (ibs != mbs) {
1102:     Mat newmat;
1103:     MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);
1104:     MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
1105:     MatDestroy(&newmat);
1106:   } else {
1107:     MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
1108:   }
1109:   if (!pcbddc->ksp_R) { /* create object if not present */
1110:     KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
1111:     PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
1112:     /* default */
1113:     KSPSetType(pcbddc->ksp_R,KSPPREONLY);
1114:     KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
1115:     KSPGetPC(pcbddc->ksp_R,&pc_temp);
1116:     PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
1117:     if (issbaij) {
1118:       PCSetType(pc_temp,PCCHOLESKY);
1119:     } else {
1120:       PCSetType(pc_temp,PCLU);
1121:     }
1122:     PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1123:   }
1124:   KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);
1125:   /* Allow user's customization */
1126:   KSPSetFromOptions(pcbddc->ksp_R);
1127:   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1128:   if (!n_R) {
1129:     KSPGetPC(pcbddc->ksp_R,&pc_temp);
1130:     PCSetType(pc_temp,PCNONE);
1131:   }
1132:   /* Set Up KSP for Neumann problem of BDDC */
1133:   KSPSetUp(pcbddc->ksp_R);

1135:   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1136:   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1137:     /* Dirichlet */
1138:     VecSetRandom(pcis->vec1_D,NULL);
1139:     MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
1140:     KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
1141:     VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
1142:     VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
1143:     /* need to be adapted? */
1144:     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1145:     MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1146:     PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);
1147:     /* print info */
1148:     if (pcbddc->dbg_flag) {
1149:       PetscViewerFlush(pcbddc->dbg_viewer);
1150:       PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1151:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1152:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");
1153:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
1154:       PetscViewerFlush(pcbddc->dbg_viewer);
1155:     }
1156:     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1157:       PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);
1158:     }

1160:     /* Neumann */
1161:     VecSetRandom(pcbddc->vec1_R,NULL);
1162:     MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
1163:     KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
1164:     VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
1165:     VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
1166:     /* need to be adapted? */
1167:     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1168:     MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1169:     /* print info */
1170:     if (pcbddc->dbg_flag) {
1171:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
1172:       PetscViewerFlush(pcbddc->dbg_viewer);
1173:     }
1174:     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1175:       PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);
1176:     }
1177:   }
1178:   /* free Neumann problem's matrix */
1179:   MatDestroy(&A_RR);
1180:   return(0);
1181: }

1185: static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1186: {
1188:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);

1191:   KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
1192:   if (pcbddc->local_auxmat1) {
1193:     MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);
1194:     MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);
1195:   }
1196:   return(0);
1197: }

1201: PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1202: {
1204:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1205:   PC_IS*            pcis = (PC_IS*)  (pc->data);
1206:   const PetscScalar zero = 0.0;

1209:   /* Application of PHI^T (or PSI^T)  */
1210:   if (pcbddc->coarse_psi_B) {
1211:     MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
1212:     if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1213:   } else {
1214:     MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
1215:     if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1216:   }
1217:   /* Scatter data of coarse_rhs */
1218:   if (pcbddc->coarse_rhs) { VecSet(pcbddc->coarse_rhs,zero); }
1219:   PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);

1221:   /* Local solution on R nodes */
1222:   VecSet(pcbddc->vec1_R,zero);
1223:   VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1224:   VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1225:   if (pcbddc->switch_static) {
1226:     VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1227:     VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1228:   }
1229:   PCBDDCSolveSaddlePoint(pc);
1230:   VecSet(pcis->vec1_B,zero);
1231:   VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1232:   VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1233:   if (pcbddc->switch_static) {
1234:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1235:     VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1236:   }

1238:   /* Coarse solution */
1239:   PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);
1240:   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1241:     KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);
1242:   }
1243:   PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
1244:   PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);

1246:   /* Sum contributions from two levels */
1247:   MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1248:   if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1249:   return(0);
1250: }

1254: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1255: {
1257:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);

1260:   VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);
1261:   return(0);
1262: }

1266: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1267: {
1269:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);

1272:   VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);
1273:   return(0);
1274: }

1276: /* uncomment for testing purposes */
1277: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1280: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1281: {
1282:   PetscErrorCode    ierr;
1283:   PC_IS*            pcis = (PC_IS*)(pc->data);
1284:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1285:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1286:   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1287:   MatType           impMatType=MATSEQAIJ;
1288:   /* one and zero */
1289:   PetscScalar       one=1.0,zero=0.0;
1290:   /* space to store constraints and their local indices */
1291:   PetscScalar       *temp_quadrature_constraint;
1292:   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1293:   /* iterators */
1294:   PetscInt          i,j,k,total_counts,temp_start_ptr;
1295:   /* stuff to store connected components stored in pcbddc->mat_graph */
1296:   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1297:   PetscInt          n_ISForFaces,n_ISForEdges;
1298:   /* near null space stuff */
1299:   MatNullSpace      nearnullsp;
1300:   const Vec         *nearnullvecs;
1301:   Vec               *localnearnullsp;
1302:   PetscBool         nnsp_has_cnst;
1303:   PetscInt          nnsp_size;
1304:   PetscScalar       *array;
1305:   /* BLAS integers */
1306:   PetscBLASInt      lwork,lierr;
1307:   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1308:   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1309:   /* LAPACK working arrays for SVD or POD */
1310:   PetscBool         skip_lapack;
1311:   PetscScalar       *work;
1312:   PetscReal         *singular_vals;
1313: #if defined(PETSC_USE_COMPLEX)
1314:   PetscReal         *rwork;
1315: #endif
1316: #if defined(PETSC_MISSING_LAPACK_GESVD)
1317:   PetscBLASInt      Blas_one_2=1;
1318:   PetscScalar       *temp_basis,*correlation_mat;
1319: #else
1320:   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1321:   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1322: #endif
1323:   /* reuse */
1324:   PetscInt          olocal_primal_size;
1325:   PetscInt          *oprimal_indices_local_idxs;
1326:   /* change of basis */
1327:   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1328:   PetscBool         boolforchange,qr_needed;
1329:   PetscBT           touched,change_basis,qr_needed_idx;
1330:   /* auxiliary stuff */
1331:   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1332:   /* some quantities */
1333:   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1334:   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;


1338:   /* Destroy Mat objects computed previously */
1339:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
1340:   MatDestroy(&pcbddc->ConstraintMatrix);
1341:   /* Get index sets for faces, edges and vertices from graph */
1342:   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1343:     pcbddc->use_vertices = PETSC_TRUE;
1344:   }
1345:   PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1346:   /* HACK: provide functions to set change of basis */
1347:   if (!ISForVertices && pcbddc->NullSpace) {
1348:     pcbddc->use_change_of_basis = PETSC_TRUE;
1349:     pcbddc->use_change_on_faces = PETSC_FALSE;
1350:   }
1351:   /* print some info */
1352:   if (pcbddc->dbg_flag) {
1353:     PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1354:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
1355:     i = 0;
1356:     if (ISForVertices) {
1357:       ISGetSize(ISForVertices,&i);
1358:     }
1359:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);
1360:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);
1361:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);
1362:     PetscViewerFlush(pcbddc->dbg_viewer);
1363:   }
1364:   /* check if near null space is attached to global mat */
1365:   MatGetNearNullSpace(pc->pmat,&nearnullsp);
1366:   if (nearnullsp) {
1367:     MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
1368:     /* remove any stored info */
1369:     MatNullSpaceDestroy(&pcbddc->onearnullspace);
1370:     PetscFree(pcbddc->onearnullvecs_state);
1371:     /* store information for BDDC solver reuse */
1372:     PetscObjectReference((PetscObject)nearnullsp);
1373:     pcbddc->onearnullspace = nearnullsp;
1374:     PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
1375:     for (i=0;i<nnsp_size;i++) {
1376:       PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
1377:     }
1378:   } else { /* if near null space is not provided BDDC uses constants by default */
1379:     nnsp_size = 0;
1380:     nnsp_has_cnst = PETSC_TRUE;
1381:   }
1382:   /* get max number of constraints on a single cc */
1383:   max_constraints = nnsp_size;
1384:   if (nnsp_has_cnst) max_constraints++;

1386:   /*
1387:        Evaluate maximum storage size needed by the procedure
1388:        - temp_indices will contain start index of each constraint stored as follows
1389:        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1390:        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1391:        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1392:                                                                                                                                                          */
1393:   total_counts = n_ISForFaces+n_ISForEdges;
1394:   total_counts *= max_constraints;
1395:   n_vertices = 0;
1396:   if (ISForVertices) {
1397:     ISGetSize(ISForVertices,&n_vertices);
1398:   }
1399:   total_counts += n_vertices;
1400:   PetscMalloc1((total_counts+1),&temp_indices);
1401:   PetscBTCreate(total_counts,&change_basis);
1402:   total_counts = 0;
1403:   max_size_of_constraint = 0;
1404:   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1405:     if (i<n_ISForEdges) {
1406:       used_IS = &ISForEdges[i];
1407:     } else {
1408:       used_IS = &ISForFaces[i-n_ISForEdges];
1409:     }
1410:     ISGetSize(*used_IS,&j);
1411:     total_counts += j;
1412:     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1413:   }
1414:   total_counts *= max_constraints;
1415:   total_counts += n_vertices;
1416:   PetscMalloc1(total_counts,&temp_quadrature_constraint);
1417:   PetscMalloc1(total_counts,&temp_indices_to_constraint);
1418:   PetscMalloc1(total_counts,&temp_indices_to_constraint_B);
1419:   /* get local part of global near null space vectors */
1420:   PetscMalloc1(nnsp_size,&localnearnullsp);
1421:   for (k=0;k<nnsp_size;k++) {
1422:     VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
1423:     VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
1424:     VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
1425:   }

1427:   /* whether or not to skip lapack calls */
1428:   skip_lapack = PETSC_TRUE;
1429:   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;

1431:   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1432:   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1433:     PetscScalar temp_work;
1434: #if defined(PETSC_MISSING_LAPACK_GESVD)
1435:     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1436:     PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
1437:     PetscMalloc1(max_constraints,&singular_vals);
1438:     PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
1439: #if defined(PETSC_USE_COMPLEX)
1440:     PetscMalloc1(3*max_constraints,&rwork);
1441: #endif
1442:     /* now we evaluate the optimal workspace using query with lwork=-1 */
1443:     PetscBLASIntCast(max_constraints,&Blas_N);
1444:     PetscBLASIntCast(max_constraints,&Blas_LDA);
1445:     lwork = -1;
1446:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1447: #if !defined(PETSC_USE_COMPLEX)
1448:     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1449: #else
1450:     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1451: #endif
1452:     PetscFPTrapPop();
1453:     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1454: #else /* on missing GESVD */
1455:     /* SVD */
1456:     PetscInt max_n,min_n;
1457:     max_n = max_size_of_constraint;
1458:     min_n = max_constraints;
1459:     if (max_size_of_constraint < max_constraints) {
1460:       min_n = max_size_of_constraint;
1461:       max_n = max_constraints;
1462:     }
1463:     PetscMalloc1(min_n,&singular_vals);
1464: #if defined(PETSC_USE_COMPLEX)
1465:     PetscMalloc1(5*min_n,&rwork);
1466: #endif
1467:     /* now we evaluate the optimal workspace using query with lwork=-1 */
1468:     lwork = -1;
1469:     PetscBLASIntCast(max_n,&Blas_M);
1470:     PetscBLASIntCast(min_n,&Blas_N);
1471:     PetscBLASIntCast(max_n,&Blas_LDA);
1472:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1473: #if !defined(PETSC_USE_COMPLEX)
1474:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1475: #else
1476:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1477: #endif
1478:     PetscFPTrapPop();
1479:     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1480: #endif /* on missing GESVD */
1481:     /* Allocate optimal workspace */
1482:     PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
1483:     PetscMalloc1((PetscInt)lwork,&work);
1484:   }
1485:   /* Now we can loop on constraining sets */
1486:   total_counts = 0;
1487:   temp_indices[0] = 0;
1488:   /* vertices */
1489:   if (ISForVertices) {
1490:     ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
1491:     if (nnsp_has_cnst) { /* consider all vertices */
1492:       PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));
1493:       for (i=0;i<n_vertices;i++) {
1494:         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1495:         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1496:         total_counts++;
1497:       }
1498:     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1499:       PetscBool used_vertex;
1500:       for (i=0;i<n_vertices;i++) {
1501:         used_vertex = PETSC_FALSE;
1502:         k = 0;
1503:         while (!used_vertex && k<nnsp_size) {
1504:           VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1505:           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1506:             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1507:             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1508:             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1509:             total_counts++;
1510:             used_vertex = PETSC_TRUE;
1511:           }
1512:           VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1513:           k++;
1514:         }
1515:       }
1516:     }
1517:     ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
1518:     n_vertices = total_counts;
1519:   }

1521:   /* edges and faces */
1522:   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1523:     if (i<n_ISForEdges) {
1524:       used_IS = &ISForEdges[i];
1525:       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1526:     } else {
1527:       used_IS = &ISForFaces[i-n_ISForEdges];
1528:       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1529:     }
1530:     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1531:     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1532:     ISGetSize(*used_IS,&size_of_constraint);
1533:     ISGetIndices(*used_IS,(const PetscInt**)&is_indices);
1534:     /* change of basis should not be performed on local periodic nodes */
1535:     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1536:     if (nnsp_has_cnst) {
1537:       PetscScalar quad_value;
1538:       temp_constraints++;
1539:       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1540:       PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));
1541:       for (j=0;j<size_of_constraint;j++) {
1542:         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1543:       }
1544:       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1545:       total_counts++;
1546:     }
1547:     for (k=0;k<nnsp_size;k++) {
1548:       PetscReal real_value;
1549:       VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1550:       PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));
1551:       for (j=0;j<size_of_constraint;j++) {
1552:         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1553:       }
1554:       VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1555:       /* check if array is null on the connected component */
1556:       PetscBLASIntCast(size_of_constraint,&Blas_N);
1557:       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1558:       if (real_value > 0.0) { /* keep indices and values */
1559:         temp_constraints++;
1560:         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1561:         total_counts++;
1562:       }
1563:     }
1564:     ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);
1565:     valid_constraints = temp_constraints;
1566:     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1567:     if (!pcbddc->use_nnsp_true && temp_constraints) {
1568:       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */

1570: #if defined(PETSC_MISSING_LAPACK_GESVD)
1571:       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1572:          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1573:          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1574:             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1575:             from that computed using LAPACKgesvd
1576:          -> This is due to a different computation of eigenvectors in LAPACKheev
1577:          -> The quality of the POD-computed basis will be the same */
1578:       PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
1579:       /* Store upper triangular part of correlation matrix */
1580:       PetscBLASIntCast(size_of_constraint,&Blas_N);
1581:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1582:       for (j=0;j<temp_constraints;j++) {
1583:         for (k=0;k<j+1;k++) {
1584:           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1585:         }
1586:       }
1587:       /* compute eigenvalues and eigenvectors of correlation matrix */
1588:       PetscBLASIntCast(temp_constraints,&Blas_N);
1589:       PetscBLASIntCast(temp_constraints,&Blas_LDA);
1590: #if !defined(PETSC_USE_COMPLEX)
1591:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1592: #else
1593:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1594: #endif
1595:       PetscFPTrapPop();
1596:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1597:       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1598:       j=0;
1599:       while (j < temp_constraints && singular_vals[j] < tol) j++;
1600:       total_counts=total_counts-j;
1601:       valid_constraints = temp_constraints-j;
1602:       /* scale and copy POD basis into used quadrature memory */
1603:       PetscBLASIntCast(size_of_constraint,&Blas_M);
1604:       PetscBLASIntCast(temp_constraints,&Blas_N);
1605:       PetscBLASIntCast(temp_constraints,&Blas_K);
1606:       PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1607:       PetscBLASIntCast(temp_constraints,&Blas_LDB);
1608:       PetscBLASIntCast(size_of_constraint,&Blas_LDC);
1609:       if (j<temp_constraints) {
1610:         PetscInt ii;
1611:         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1612:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1613:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1614:         PetscFPTrapPop();
1615:         for (k=0;k<temp_constraints-j;k++) {
1616:           for (ii=0;ii<size_of_constraint;ii++) {
1617:             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1618:           }
1619:         }
1620:       }
1621: #else  /* on missing GESVD */
1622:       PetscBLASIntCast(size_of_constraint,&Blas_M);
1623:       PetscBLASIntCast(temp_constraints,&Blas_N);
1624:       PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1625:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1626: #if !defined(PETSC_USE_COMPLEX)
1627:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1628: #else
1629:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1630: #endif
1631:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1632:       PetscFPTrapPop();
1633:       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1634:       k = temp_constraints;
1635:       if (k > size_of_constraint) k = size_of_constraint;
1636:       j = 0;
1637:       while (j < k && singular_vals[k-j-1] < tol) j++;
1638:       total_counts = total_counts-temp_constraints+k-j;
1639:       valid_constraints = k-j;
1640: #endif /* on missing GESVD */
1641:     }
1642:     /* setting change_of_basis flag is safe now */
1643:     if (boolforchange) {
1644:       for (j=0;j<valid_constraints;j++) {
1645:         PetscBTSet(change_basis,total_counts-j-1);
1646:       }
1647:     }
1648:   }
1649:   /* free index sets of faces, edges and vertices */
1650:   for (i=0;i<n_ISForFaces;i++) {
1651:     ISDestroy(&ISForFaces[i]);
1652:   }
1653:   PetscFree(ISForFaces);
1654:   for (i=0;i<n_ISForEdges;i++) {
1655:     ISDestroy(&ISForEdges[i]);
1656:   }
1657:   PetscFree(ISForEdges);
1658:   ISDestroy(&ISForVertices);
1659:   /* map temp_indices_to_constraint in boundary numbering */
1660:   ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);
1661:   if (i != temp_indices[total_counts]) {
1662:     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1663:   }

1665:   /* free workspace */
1666:   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1667:     PetscFree(work);
1668: #if defined(PETSC_USE_COMPLEX)
1669:     PetscFree(rwork);
1670: #endif
1671:     PetscFree(singular_vals);
1672: #if defined(PETSC_MISSING_LAPACK_GESVD)
1673:     PetscFree(correlation_mat);
1674:     PetscFree(temp_basis);
1675: #endif
1676:   }
1677:   for (k=0;k<nnsp_size;k++) {
1678:     VecDestroy(&localnearnullsp[k]);
1679:   }
1680:   PetscFree(localnearnullsp);

1682:   /* set quantities in pcbddc data structure and store previous primal size */
1683:   /* n_vertices defines the number of subdomain corners in the primal space */
1684:   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1685:   olocal_primal_size = pcbddc->local_primal_size;
1686:   pcbddc->local_primal_size = total_counts;
1687:   pcbddc->n_vertices = n_vertices;
1688:   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;

1690:   /* Create constraint matrix */
1691:   /* The constraint matrix is used to compute the l2g map of primal dofs */
1692:   /* so we need to set it up properly either with or without change of basis */
1693:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
1694:   MatSetType(pcbddc->ConstraintMatrix,impMatType);
1695:   MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
1696:   /* array to compute a local numbering of constraints : vertices first then constraints */
1697:   PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);
1698:   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1699:   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1700:   PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);
1701:   /* auxiliary stuff for basis change */
1702:   PetscMalloc1(max_size_of_constraint,&global_indices);
1703:   PetscBTCreate(pcis->n_B,&touched);

1705:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1706:   total_primal_vertices=0;
1707:   for (i=0;i<pcbddc->local_primal_size;i++) {
1708:     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1709:     if (size_of_constraint == 1) {
1710:       PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);
1711:       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1712:       aux_primal_minloc[total_primal_vertices]=0;
1713:       total_primal_vertices++;
1714:     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1715:       PetscInt min_loc,min_index;
1716:       ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);
1717:       /* find first untouched local node */
1718:       k = 0;
1719:       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1720:       min_index = global_indices[k];
1721:       min_loc = k;
1722:       /* search the minimum among global nodes already untouched on the cc */
1723:       for (k=1;k<size_of_constraint;k++) {
1724:         /* there can be more than one constraint on a single connected component */
1725:         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1726:           min_index = global_indices[k];
1727:           min_loc = k;
1728:         }
1729:       }
1730:       PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);
1731:       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1732:       aux_primal_minloc[total_primal_vertices]=min_loc;
1733:       total_primal_vertices++;
1734:     }
1735:   }
1736:   /* determine if a QR strategy is needed for change of basis */
1737:   qr_needed = PETSC_FALSE;
1738:   PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);
1739:   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1740:     if (PetscBTLookup(change_basis,i)) {
1741:       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1742:       j = 0;
1743:       for (k=0;k<size_of_constraint;k++) {
1744:         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1745:           j++;
1746:         }
1747:       }
1748:       /* found more than one primal dof on the cc */
1749:       if (j > 1) {
1750:         PetscBTSet(qr_needed_idx,i);
1751:         qr_needed = PETSC_TRUE;
1752:       }
1753:     }
1754:   }
1755:   /* free workspace */
1756:   PetscFree(global_indices);

1758:   /* permute indices in order to have a sorted set of vertices */
1759:   PetscSortInt(total_primal_vertices,aux_primal_numbering);

1761:   /* nonzero structure of constraint matrix */
1762:   PetscMalloc1(pcbddc->local_primal_size,&nnz);
1763:   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1764:   j=total_primal_vertices;
1765:   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1766:     if (!PetscBTLookup(change_basis,i)) {
1767:       nnz[j]=temp_indices[i+1]-temp_indices[i];
1768:       j++;
1769:     }
1770:   }
1771:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
1772:   PetscFree(nnz);
1773:   /* set values in constraint matrix */
1774:   for (i=0;i<total_primal_vertices;i++) {
1775:     MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);
1776:   }
1777:   total_counts = total_primal_vertices;
1778:   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1779:     if (!PetscBTLookup(change_basis,i)) {
1780:       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1781:       MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);
1782:       total_counts++;
1783:     }
1784:   }
1785:   /* assembling */
1786:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
1787:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
1788:   /*
1789:   PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
1790:   MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
1791:   */
1792:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1793:   if (pcbddc->use_change_of_basis) {
1794:     /* dual and primal dofs on a single cc */
1795:     PetscInt     dual_dofs,primal_dofs;
1796:     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1797:     PetscInt     primal_counter;
1798:     /* working stuff for GEQRF */
1799:     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1800:     PetscBLASInt lqr_work;
1801:     /* working stuff for UNGQR */
1802:     PetscScalar  *gqr_work,lgqr_work_t;
1803:     PetscBLASInt lgqr_work;
1804:     /* working stuff for TRTRS */
1805:     PetscScalar  *trs_rhs;
1806:     PetscBLASInt Blas_NRHS;
1807:     /* pointers for values insertion into change of basis matrix */
1808:     PetscInt     *start_rows,*start_cols;
1809:     PetscScalar  *start_vals;
1810:     /* working stuff for values insertion */
1811:     PetscBT      is_primal;

1813:     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1814:     MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);
1815:     MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);
1816:     MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);
1817:     /* work arrays */
1818:     PetscMalloc1(pcis->n_B,&nnz);
1819:     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1820:     /* nonzeros per row */
1821:     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1822:       if (PetscBTLookup(change_basis,i)) {
1823:         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1824:         if (PetscBTLookup(qr_needed_idx,i)) {
1825:           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1826:         } else {
1827:           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1828:           /* get local primal index on the cc */
1829:           j = 0;
1830:           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1831:           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1832:         }
1833:       }
1834:     }
1835:     MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);
1836:     PetscFree(nnz);
1837:     /* Set initial identity in the matrix */
1838:     for (i=0;i<pcis->n_B;i++) {
1839:       MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
1840:     }

1842:     if (pcbddc->dbg_flag) {
1843:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
1844:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
1845:     }


1848:     /* Now we loop on the constraints which need a change of basis */
1849:     /*
1850:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1851:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

1853:        Basic blocks of change of basis matrix T computed

1855:           - Using the following block transformation if there is only a primal dof on the cc
1856:             (in the example, primal dof is the last one of the edge in LOCAL ordering
1857:              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1858:             | 1        0   ...        0     1 |
1859:             | 0        1   ...        0     1 |
1860:             |              ...                |
1861:             | 0        ...            1     1 |
1862:             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |

1864:           - via QR decomposition of constraints otherwise
1865:     */
1866:     if (qr_needed) {
1867:       /* space to store Q */
1868:       PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);
1869:       /* first we issue queries for optimal work */
1870:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
1871:       PetscBLASIntCast(max_constraints,&Blas_N);
1872:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
1873:       lqr_work = -1;
1874:       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1875:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1876:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
1877:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
1878:       lgqr_work = -1;
1879:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
1880:       PetscBLASIntCast(max_size_of_constraint,&Blas_N);
1881:       PetscBLASIntCast(max_constraints,&Blas_K);
1882:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
1883:       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1884:       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1885:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1886:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
1887:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
1888:       /* array to store scaling factors for reflectors */
1889:       PetscMalloc1(max_constraints,&qr_tau);
1890:       /* array to store rhs and solution of triangular solver */
1891:       PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
1892:       /* allocating workspace for check */
1893:       if (pcbddc->dbg_flag) {
1894:         PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);
1895:       }
1896:     }
1897:     /* array to store whether a node is primal or not */
1898:     PetscBTCreate(pcis->n_B,&is_primal);
1899:     PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
1900:     ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);
1901:     if (i != total_primal_vertices) {
1902:       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1903:     }
1904:     for (i=0;i<total_primal_vertices;i++) {
1905:       PetscBTSet(is_primal,aux_primal_numbering_B[i]);
1906:     }
1907:     PetscFree(aux_primal_numbering_B);

1909:     /* loop on constraints and see whether or not they need a change of basis and compute it */
1910:     /* -> using implicit ordering contained in temp_indices data */
1911:     total_counts = pcbddc->n_vertices;
1912:     primal_counter = total_counts;
1913:     while (total_counts<pcbddc->local_primal_size) {
1914:       primal_dofs = 1;
1915:       if (PetscBTLookup(change_basis,total_counts)) {
1916:         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1917:         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1918:           primal_dofs++;
1919:         }
1920:         /* get constraint info */
1921:         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1922:         dual_dofs = size_of_constraint-primal_dofs;

1924:         if (pcbddc->dbg_flag) {
1925:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);
1926:         }

1928:         if (primal_dofs > 1) { /* QR */

1930:           /* copy quadrature constraints for change of basis check */
1931:           if (pcbddc->dbg_flag) {
1932:             PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
1933:           }
1934:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1935:           PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

1937:           /* compute QR decomposition of constraints */
1938:           PetscBLASIntCast(size_of_constraint,&Blas_M);
1939:           PetscBLASIntCast(primal_dofs,&Blas_N);
1940:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1941:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1942:           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1943:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1944:           PetscFPTrapPop();

1946:           /* explictly compute R^-T */
1947:           PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
1948:           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1949:           PetscBLASIntCast(primal_dofs,&Blas_N);
1950:           PetscBLASIntCast(primal_dofs,&Blas_NRHS);
1951:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1952:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
1953:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1954:           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1955:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1956:           PetscFPTrapPop();

1958:           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1959:           PetscBLASIntCast(size_of_constraint,&Blas_M);
1960:           PetscBLASIntCast(size_of_constraint,&Blas_N);
1961:           PetscBLASIntCast(primal_dofs,&Blas_K);
1962:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1963:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1964:           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1965:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1966:           PetscFPTrapPop();

1968:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1969:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1970:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1971:           PetscBLASIntCast(size_of_constraint,&Blas_M);
1972:           PetscBLASIntCast(primal_dofs,&Blas_N);
1973:           PetscBLASIntCast(primal_dofs,&Blas_K);
1974:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1975:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
1976:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
1977:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1978:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1979:           PetscFPTrapPop();
1980:           PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

1982:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1983:           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1984:           /* insert cols for primal dofs */
1985:           for (j=0;j<primal_dofs;j++) {
1986:             start_vals = &qr_basis[j*size_of_constraint];
1987:             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1988:             MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
1989:           }
1990:           /* insert cols for dual dofs */
1991:           for (j=0,k=0;j<dual_dofs;k++) {
1992:             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1993:               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1994:               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1995:               MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
1996:               j++;
1997:             }
1998:           }

2000:           /* check change of basis */
2001:           if (pcbddc->dbg_flag) {
2002:             PetscInt   ii,jj;
2003:             PetscBool valid_qr=PETSC_TRUE;
2004:             PetscBLASIntCast(primal_dofs,&Blas_M);
2005:             PetscBLASIntCast(size_of_constraint,&Blas_N);
2006:             PetscBLASIntCast(size_of_constraint,&Blas_K);
2007:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2008:             PetscBLASIntCast(size_of_constraint,&Blas_LDB);
2009:             PetscBLASIntCast(primal_dofs,&Blas_LDC);
2010:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2011:             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2012:             PetscFPTrapPop();
2013:             for (jj=0;jj<size_of_constraint;jj++) {
2014:               for (ii=0;ii<primal_dofs;ii++) {
2015:                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2016:                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2017:               }
2018:             }
2019:             if (!valid_qr) {
2020:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);
2021:               for (jj=0;jj<size_of_constraint;jj++) {
2022:                 for (ii=0;ii<primal_dofs;ii++) {
2023:                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2024:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2025:                   }
2026:                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2027:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2028:                   }
2029:                 }
2030:               }
2031:             } else {
2032:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);
2033:             }
2034:           }
2035:         } else { /* simple transformation block */
2036:           PetscInt row,col;
2037:           PetscScalar val;
2038:           for (j=0;j<size_of_constraint;j++) {
2039:             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2040:             if (!PetscBTLookup(is_primal,row)) {
2041:               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2042:               MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
2043:               MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);
2044:             } else {
2045:               for (k=0;k<size_of_constraint;k++) {
2046:                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2047:                 if (row != col) {
2048:                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2049:                 } else {
2050:                   val = 1.0;
2051:                 }
2052:                 MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
2053:               }
2054:             }
2055:           }
2056:         }
2057:         /* increment primal counter */
2058:         primal_counter += primal_dofs;
2059:       } else {
2060:         if (pcbddc->dbg_flag) {
2061:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);
2062:         }
2063:       }
2064:       /* increment constraint counter total_counts */
2065:       total_counts += primal_dofs;
2066:     }

2068:     /* free workspace */
2069:     if (qr_needed) {
2070:       if (pcbddc->dbg_flag) {
2071:         PetscFree(work);
2072:       }
2073:       PetscFree(trs_rhs);
2074:       PetscFree(qr_tau);
2075:       PetscFree(qr_work);
2076:       PetscFree(gqr_work);
2077:       PetscFree(qr_basis);
2078:     }
2079:     PetscBTDestroy(&is_primal);
2080:     /* assembling */
2081:     MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2082:     MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2083:     /*
2084:     PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
2085:     MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);
2086:     */
2087:   }

2089:   /* get indices in local ordering for vertices and constraints */
2090:   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2091:     PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);
2092:     PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));
2093:   }
2094:   PetscFree(aux_primal_numbering);
2095:   PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);
2096:   PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);
2097:   PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));
2098:   PetscFree(aux_primal_numbering);
2099:   PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);
2100:   PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));
2101:   PetscFree(aux_primal_numbering);
2102:   /* set quantities in PCBDDC data struct */
2103:   pcbddc->n_actual_vertices = i;
2104:   /* check if a new primal space has been introduced */
2105:   pcbddc->new_primal_space_local = PETSC_TRUE;
2106:   if (olocal_primal_size == pcbddc->local_primal_size) {
2107:     PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);
2108:     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2109:     PetscFree(oprimal_indices_local_idxs);
2110:   }
2111:   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2112:   MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));

2114:   /* flush dbg viewer */
2115:   if (pcbddc->dbg_flag) {
2116:     PetscViewerFlush(pcbddc->dbg_viewer);
2117:   }

2119:   /* free workspace */
2120:   PetscBTDestroy(&touched);
2121:   PetscBTDestroy(&qr_needed_idx);
2122:   PetscFree(aux_primal_minloc);
2123:   PetscFree(temp_indices);
2124:   PetscBTDestroy(&change_basis);
2125:   PetscFree(temp_indices_to_constraint);
2126:   PetscFree(temp_indices_to_constraint_B);
2127:   PetscFree(temp_quadrature_constraint);
2128:   return(0);
2129: }

2133: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2134: {
2135:   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2136:   PC_IS       *pcis = (PC_IS*)pc->data;
2137:   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2138:   PetscInt    bs,ierr,i,vertex_size;
2139:   PetscViewer viewer=pcbddc->dbg_viewer;

2142:   /* Reset previously computed graph */
2143:   PCBDDCGraphReset(pcbddc->mat_graph);
2144:   /* Init local Graph struct */
2145:   PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);

2147:   /* Check validity of the csr graph passed in by the user */
2148:   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2149:     PCBDDCGraphResetCSR(pcbddc->mat_graph);
2150:   }

2152:   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2153:   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2154:     Mat mat_adj;
2155:     const PetscInt *xadj,*adjncy;
2156:     PetscBool flg_row=PETSC_TRUE;

2158:     MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);
2159:     MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);
2160:     if (!flg_row) {
2161:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2162:     }
2163:     PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);
2164:     MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);
2165:     if (!flg_row) {
2166:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2167:     }
2168:     MatDestroy(&mat_adj);
2169:   }

2171:   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2172:   vertex_size = 1;
2173:   if (!pcbddc->user_provided_isfordofs) {
2174:     if (!pcbddc->n_ISForDofs) {
2175:       IS *custom_ISForDofs;

2177:       MatGetBlockSize(matis->A,&bs);
2178:       PetscMalloc1(bs,&custom_ISForDofs);
2179:       for (i=0;i<bs;i++) {
2180:         ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);
2181:       }
2182:       PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);
2183:       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2184:       /* remove my references to IS objects */
2185:       for (i=0;i<bs;i++) {
2186:         ISDestroy(&custom_ISForDofs[i]);
2187:       }
2188:       PetscFree(custom_ISForDofs);
2189:     }
2190:   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2191:     MatGetBlockSize(matis->A,&vertex_size);
2192:   }

2194:   /* Setup of Graph */
2195:   PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);

2197:   /* Graph's connected components analysis */
2198:   PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);

2200:   /* print some info to stdout */
2201:   if (pcbddc->dbg_flag) {
2202:     PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2203:   }

2205:   /* mark topography has done */
2206:   pcbddc->recompute_topography = PETSC_FALSE;
2207:   return(0);
2208: }

2212: PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2213: {
2214:   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2215:   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;

2219:   n = 0;
2220:   vertices = 0;
2221:   if (pcbddc->ConstraintMatrix) {
2222:     MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);
2223:     for (i=0;i<local_primal_size;i++) {
2224:       MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2225:       if (size_of_constraint == 1) n++;
2226:       MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2227:     }
2228:     if (vertices_idx) {
2229:       PetscMalloc1(n,&vertices);
2230:       n = 0;
2231:       for (i=0;i<local_primal_size;i++) {
2232:         MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2233:         if (size_of_constraint == 1) {
2234:           vertices[n++]=row_cmat_indices[0];
2235:         }
2236:         MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2237:       }
2238:     }
2239:   }
2240:   *n_vertices = n;
2241:   if (vertices_idx) *vertices_idx = vertices;
2242:   return(0);
2243: }

2247: PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2248: {
2249:   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2250:   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2251:   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2252:   PetscBT        touched;

2255:     /* This function assumes that the number of local constraints per connected component
2256:        is not greater than the number of nodes defined for the connected component
2257:        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2259:   n = 0;
2260:   constraints_index = 0;
2261:   if (pcbddc->ConstraintMatrix) {
2262:     MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);
2263:     max_size_of_constraint = 0;
2264:     for (i=0;i<local_primal_size;i++) {
2265:       MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2266:       if (size_of_constraint > 1) {
2267:         n++;
2268:       }
2269:       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2270:       MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2271:     }
2272:     if (constraints_idx) {
2273:       PetscMalloc1(n,&constraints_index);
2274:       PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);
2275:       PetscBTCreate(local_size,&touched);
2276:       n = 0;
2277:       for (i=0;i<local_primal_size;i++) {
2278:         MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2279:         if (size_of_constraint > 1) {
2280:           ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);
2281:           /* find first untouched local node */
2282:           j = 0;
2283:           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2284:           min_index = row_cmat_global_indices[j];
2285:           min_loc = j;
2286:           /* search the minimum among nodes not yet touched on the connected component
2287:              since there can be more than one constraint on a single cc */
2288:           for (j=1;j<size_of_constraint;j++) {
2289:             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2290:               min_index = row_cmat_global_indices[j];
2291:               min_loc = j;
2292:             }
2293:           }
2294:           PetscBTSet(touched,row_cmat_indices[min_loc]);
2295:           constraints_index[n++] = row_cmat_indices[min_loc];
2296:         }
2297:         MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2298:       }
2299:       PetscBTDestroy(&touched);
2300:       PetscFree(row_cmat_global_indices);
2301:     }
2302:   }
2303:   *n_constraints = n;
2304:   if (constraints_idx) *constraints_idx = constraints_index;
2305:   return(0);
2306: }

2308: /* the next two functions has been adapted from pcis.c */
2311: PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2312: {
2314:   PC_IS          *pcis = (PC_IS*)(pc->data);

2317:   if (!vec2_B) { vec2_B = v; }
2318:   MatMult(pcis->A_BB,v,vec1_B);
2319:   MatMult(pcis->A_IB,v,vec1_D);
2320:   KSPSolve(pcis->ksp_D,vec1_D,vec2_D);
2321:   MatMult(pcis->A_BI,vec2_D,vec2_B);
2322:   VecAXPY(vec1_B,-1.0,vec2_B);
2323:   return(0);
2324: }

2328: PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2329: {
2331:   PC_IS          *pcis = (PC_IS*)(pc->data);

2334:   if (!vec2_B) { vec2_B = v; }
2335:   MatMultTranspose(pcis->A_BB,v,vec1_B);
2336:   MatMultTranspose(pcis->A_BI,v,vec1_D);
2337:   KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);
2338:   MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);
2339:   VecAXPY(vec1_B,-1.0,vec2_B);
2340:   return(0);
2341: }

2345: PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2346: {
2347:   Vec            local_vec,global_vec;
2348:   IS             seqis,paris;
2349:   VecScatter     scatter_ctx;
2350:   PetscScalar    *array;
2351:   PetscInt       *temp_global_dofs;
2352:   PetscScalar    globalsum;
2353:   PetscInt       i,j,s;
2354:   PetscInt       nlocals,first_index,old_index,max_local;
2355:   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2356:   PetscMPIInt    *dof_sizes,*dof_displs;
2357:   PetscBool      first_found;

2361:   /* mpi buffers */
2362:   MPI_Comm_size(comm,&size_prec_comm);
2363:   MPI_Comm_rank(comm,&rank_prec_comm);
2364:   j = ( !rank_prec_comm ? size_prec_comm : 0);
2365:   PetscMalloc1(j,&dof_sizes);
2366:   PetscMalloc1(j,&dof_displs);
2367:   /* get maximum size of subset */
2368:   PetscMalloc1(n_local_dofs,&temp_global_dofs);
2369:   ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);
2370:   max_local = 0;
2371:   if (n_local_dofs) {
2372:     max_local = temp_global_dofs[0];
2373:     for (i=1;i<n_local_dofs;i++) {
2374:       if (max_local < temp_global_dofs[i] ) {
2375:         max_local = temp_global_dofs[i];
2376:       }
2377:     }
2378:   }
2379:   MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2380:   max_global++;
2381:   max_local = 0;
2382:   if (n_local_dofs) {
2383:     max_local = local_dofs[0];
2384:     for (i=1;i<n_local_dofs;i++) {
2385:       if (max_local < local_dofs[i] ) {
2386:         max_local = local_dofs[i];
2387:       }
2388:     }
2389:   }
2390:   max_local++;
2391:   /* allocate workspace */
2392:   VecCreate(PETSC_COMM_SELF,&local_vec);
2393:   VecSetSizes(local_vec,PETSC_DECIDE,max_local);
2394:   VecSetType(local_vec,VECSEQ);
2395:   VecCreate(comm,&global_vec);
2396:   VecSetSizes(global_vec,PETSC_DECIDE,max_global);
2397:   VecSetType(global_vec,VECMPI);
2398:   /* create scatter */
2399:   ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);
2400:   ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);
2401:   VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);
2402:   ISDestroy(&seqis);
2403:   ISDestroy(&paris);
2404:   /* init array */
2405:   VecSet(global_vec,0.0);
2406:   VecSet(local_vec,0.0);
2407:   VecGetArray(local_vec,&array);
2408:   if (local_dofs_mult) {
2409:     for (i=0;i<n_local_dofs;i++) {
2410:       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2411:     }
2412:   } else {
2413:     for (i=0;i<n_local_dofs;i++) {
2414:       array[local_dofs[i]]=1.0;
2415:     }
2416:   }
2417:   VecRestoreArray(local_vec,&array);
2418:   /* scatter into global vec and get total number of global dofs */
2419:   VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);
2420:   VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);
2421:   VecSum(global_vec,&globalsum);
2422:   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2423:   /* Fill global_vec with cumulative function for global numbering */
2424:   VecGetArray(global_vec,&array);
2425:   VecGetLocalSize(global_vec,&s);
2426:   nlocals = 0;
2427:   first_index = -1;
2428:   first_found = PETSC_FALSE;
2429:   for (i=0;i<s;i++) {
2430:     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2431:       first_found = PETSC_TRUE;
2432:       first_index = i;
2433:     }
2434:     nlocals += (PetscInt)PetscRealPart(array[i]);
2435:   }
2436:   MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);
2437:   if (!rank_prec_comm) {
2438:     dof_displs[0]=0;
2439:     for (i=1;i<size_prec_comm;i++) {
2440:       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2441:     }
2442:   }
2443:   MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);
2444:   if (first_found) {
2445:     array[first_index] += (PetscScalar)nlocals;
2446:     old_index = first_index;
2447:     for (i=first_index+1;i<s;i++) {
2448:       if (PetscRealPart(array[i]) > 0.0) {
2449:         array[i] += array[old_index];
2450:         old_index = i;
2451:       }
2452:     }
2453:   }
2454:   VecRestoreArray(global_vec,&array);
2455:   VecSet(local_vec,0.0);
2456:   VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);
2457:   VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);
2458:   /* get global ordering of local dofs */
2459:   VecGetArray(local_vec,&array);
2460:   if (local_dofs_mult) {
2461:     for (i=0;i<n_local_dofs;i++) {
2462:       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2463:     }
2464:   } else {
2465:     for (i=0;i<n_local_dofs;i++) {
2466:       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2467:     }
2468:   }
2469:   VecRestoreArray(local_vec,&array);
2470:   /* free workspace */
2471:   VecScatterDestroy(&scatter_ctx);
2472:   VecDestroy(&local_vec);
2473:   VecDestroy(&global_vec);
2474:   PetscFree(dof_sizes);
2475:   PetscFree(dof_displs);
2476:   /* return pointer to global ordering of local dofs */
2477:   *global_numbering_subset = temp_global_dofs;
2478:   return(0);
2479: }

2483: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2484: {
2485:   PetscInt       i,j;
2486:   PetscScalar    *alphas;

2490:   /* this implements stabilized Gram-Schmidt */
2491:   PetscMalloc1(n,&alphas);
2492:   for (i=0;i<n;i++) {
2493:     VecNormalize(vecs[i],NULL);
2494:     if (i<n) { VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]); }
2495:     for (j=i+1;j<n;j++) { VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]); }
2496:   }
2497:   PetscFree(alphas);
2498:   return(0);
2499: }

2501: /* TODO
2502:    - now preallocation is done assuming SEQDENSE local matrices
2503: */
2506: static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2507: {
2508:   Mat                    new_mat;
2509:   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2510:   /* info on mat */
2511:   /* ISLocalToGlobalMapping rmapping,cmapping; */
2512:   PetscInt               bs,rows,cols;
2513:   PetscInt               lrows,lcols;
2514:   PetscInt               local_rows,local_cols;
2515:   PetscBool              isdense;
2516:   /* values insertion */
2517:   PetscScalar            *array;
2518:   PetscInt               *local_indices,*global_indices;
2519:   /* work */
2520:   PetscInt               i,j,index_row;
2521:   PetscErrorCode         ierr;

2524:   /* MISSING CHECKS
2525:     - rectangular case not covered (it is not allowed by MATIS)
2526:   */
2527:   /* get info from mat */
2528:   /* MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping); */
2529:   MatGetSize(mat,&rows,&cols);
2530:   MatGetBlockSize(mat,&bs);
2531:   MatGetSize(matis->A,&local_rows,&local_cols);

2533:   /* work */
2534:   PetscMalloc1(local_rows,&local_indices);
2535:   for (i=0;i<local_rows;i++) local_indices[i]=i;
2536:   /* map indices of local mat to global values */
2537:   PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);
2538:   /* ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices); */
2539:   ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);

2541:   if (reuse==MAT_INITIAL_MATRIX) {
2542:     Vec         vec_dnz,vec_onz;
2543:     PetscScalar *my_dnz,*my_onz;
2544:     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2545:     PetscInt    index_col,owner;
2546:     PetscMPIInt nsubdomains;

2548:     MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);
2549:     MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);
2550:     MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);
2551:     MatSetBlockSize(new_mat,bs);
2552:     MatSetType(new_mat,Mtype);
2553:     MatSetUp(new_mat);
2554:     MatGetLocalSize(new_mat,&lrows,&lcols);

2556:     /*
2557:       preallocation
2558:     */

2560:     MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);
2561:     /*
2562:        Some vectors are needed to sum up properly on shared interface dofs.
2563:        Preallocation macros cannot do the job.
2564:        Note that preallocation is not exact, since it overestimates nonzeros
2565:     */
2566:     MatGetVecs(new_mat,NULL,&vec_dnz);
2567:     /* VecSetLocalToGlobalMapping(vec_dnz,rmapping); */
2568:     VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);
2569:     VecDuplicate(vec_dnz,&vec_onz);
2570:     /* All processes need to compute entire row ownership */
2571:     PetscMalloc1(rows,&row_ownership);
2572:     MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);
2573:     for (i=0;i<nsubdomains;i++) {
2574:       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2575:         row_ownership[j]=i;
2576:       }
2577:     }

2579:     /*
2580:        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2581:        then, they will be summed up properly. This way, preallocation is always sufficient
2582:     */
2583:     PetscMalloc1(local_rows,&my_dnz);
2584:     PetscMalloc1(local_rows,&my_onz);
2585:     PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));
2586:     PetscMemzero(my_onz,local_rows*sizeof(*my_onz));
2587:     for (i=0;i<local_rows;i++) {
2588:       index_row = global_indices[i];
2589:       for (j=i;j<local_rows;j++) {
2590:         owner = row_ownership[index_row];
2591:         index_col = global_indices[j];
2592:         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2593:           my_dnz[i] += 1.0;
2594:         } else { /* offdiag block */
2595:           my_onz[i] += 1.0;
2596:         }
2597:         /* same as before, interchanging rows and cols */
2598:         if (i != j) {
2599:           owner = row_ownership[index_col];
2600:           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2601:             my_dnz[j] += 1.0;
2602:           } else {
2603:             my_onz[j] += 1.0;
2604:           }
2605:         }
2606:       }
2607:     }
2608:     VecSet(vec_dnz,0.0);
2609:     VecSet(vec_onz,0.0);
2610:     if (local_rows) { /* multilevel guard */
2611:       VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);
2612:       VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);
2613:     }
2614:     VecAssemblyBegin(vec_dnz);
2615:     VecAssemblyBegin(vec_onz);
2616:     VecAssemblyEnd(vec_dnz);
2617:     VecAssemblyEnd(vec_onz);
2618:     PetscFree(my_dnz);
2619:     PetscFree(my_onz);
2620:     PetscFree(row_ownership);

2622:     /* set computed preallocation in dnz and onz */
2623:     VecGetArray(vec_dnz,&array);
2624:     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2625:     VecRestoreArray(vec_dnz,&array);
2626:     VecGetArray(vec_onz,&array);
2627:     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2628:     VecRestoreArray(vec_onz,&array);
2629:     VecDestroy(&vec_dnz);
2630:     VecDestroy(&vec_onz);

2632:     /* Resize preallocation if overestimated */
2633:     for (i=0;i<lrows;i++) {
2634:       dnz[i] = PetscMin(dnz[i],lcols);
2635:       onz[i] = PetscMin(onz[i],cols-lcols);
2636:     }
2637:     /* set preallocation */
2638:     MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);
2639:     for (i=0;i<lrows/bs;i++) {
2640:       dnz[i] = dnz[i*bs]/bs;
2641:       onz[i] = onz[i*bs]/bs;
2642:     }
2643:     MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);
2644:     for (i=0;i<lrows/bs;i++) {
2645:       dnz[i] = dnz[i]-i;
2646:     }
2647:     MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);
2648:     MatPreallocateFinalize(dnz,onz);
2649:     *M = new_mat;
2650:   } else {
2651:     PetscInt mbs,mrows,mcols;
2652:     /* some checks */
2653:     MatGetBlockSize(*M,&mbs);
2654:     MatGetSize(*M,&mrows,&mcols);
2655:     if (mrows != rows) {
2656:       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2657:     }
2658:     if (mrows != rows) {
2659:       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2660:     }
2661:     if (mbs != bs) {
2662:       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2663:     }
2664:     MatZeroEntries(*M);
2665:   }
2666:   /* set local to global mappings */
2667:   /* MatSetLocalToGlobalMapping(*M,rmapping,cmapping); */
2668:   /* Set values */
2669:   PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);
2670:   if (isdense) { /* special case for dense local matrices */
2671:     MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);
2672:     MatDenseGetArray(matis->A,&array);
2673:     MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);
2674:     MatDenseRestoreArray(matis->A,&array);
2675:     PetscFree(local_indices);
2676:     PetscFree(global_indices);
2677:   } else { /* very basic values insertion for all other matrix types */
2678:     PetscFree(local_indices);
2679:     PetscFree(global_indices);
2680:     for (i=0;i<local_rows;i++) {
2681:       MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);
2682:       /* MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES); */
2683:       ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);
2684:       ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);
2685:       MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);
2686:       MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);
2687:     }
2688:   }
2689:   MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);
2690:   MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);
2691:   if (isdense) {
2692:     MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);
2693:   }
2694:   return(0);
2695: }

2699: PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2700: {
2701:   Mat             subdomain_adj;
2702:   IS              new_ranks,ranks_send_to;
2703:   MatPartitioning partitioner;
2704:   Mat_IS          *matis;
2705:   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2706:   PetscInt        prank;
2707:   PetscMPIInt     size,rank,color;
2708:   PetscInt        *xadj,*adjncy,*oldranks;
2709:   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2710:   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2711:   PetscErrorCode  ierr;
2712:   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2713:   PetscSubcomm    subcomm;

2716:   PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);
2717:   PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
2718:   PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);

2720:   /* Get info on mapping */
2721:   matis = (Mat_IS*)(mat->data);
2722:   ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);
2723:   ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);

2725:   /* build local CSR graph of subdomains' connectivity */
2726:   PetscMalloc1(2,&xadj);
2727:   xadj[0] = 0;
2728:   xadj[1] = PetscMax(n_neighs-1,0);
2729:   PetscMalloc1(xadj[1],&adjncy);
2730:   PetscMalloc1(xadj[1],&adjncy_wgt);

2732:   if (threshold) {
2733:     PetscInt* count,min_threshold;
2734:     PetscMalloc1(local_size,&count);
2735:     PetscMemzero(count,local_size*sizeof(PetscInt));
2736:     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2737:       for (j=0;j<n_shared[i];j++) {
2738:         count[shared[i][j]] += 1;
2739:       }
2740:     }
2741:     /* adapt threshold since we dont want to lose significant connections */
2742:     min_threshold = n_neighs;
2743:     for (i=1;i<n_neighs;i++) {
2744:       for (j=0;j<n_shared[i];j++) {
2745:         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2746:       }
2747:     }
2748:     threshold = PetscMax(min_threshold+1,threshold);
2749:     xadj[1] = 0;
2750:     for (i=1;i<n_neighs;i++) {
2751:       for (j=0;j<n_shared[i];j++) {
2752:         if (count[shared[i][j]] < threshold) {
2753:           adjncy[xadj[1]] = neighs[i];
2754:           adjncy_wgt[xadj[1]] = n_shared[i];
2755:           xadj[1]++;
2756:           break;
2757:         }
2758:       }
2759:     }
2760:     PetscFree(count);
2761:   } else {
2762:     if (xadj[1]) {
2763:       PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));
2764:       PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));
2765:     }
2766:   }
2767:   ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);
2768:   if (use_square) {
2769:     for (i=0;i<xadj[1];i++) {
2770:       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2771:     }
2772:   }
2773:   PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);

2775:   PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);

2777:   /*
2778:     Restrict work on active processes only.
2779:   */
2780:   PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);
2781:   PetscSubcommSetNumber(subcomm,2); /* 2 groups, active process and not active processes */
2782:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
2783:   PetscMPIIntCast(!local_size,&color);
2784:   PetscSubcommSetTypeGeneral(subcomm,color,rank);
2785:   if (color) {
2786:     PetscFree(xadj);
2787:     PetscFree(adjncy);
2788:     PetscFree(adjncy_wgt);
2789:   } else {
2790:     MPI_Comm_size(subcomm->comm,&size);
2791:     PetscMalloc1(size,&oldranks);
2792:     prank = rank;
2793:     MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);
2794:     for (i=0;i<size;i++) {
2795:       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2796:     }
2797:     for (i=0;i<xadj[1];i++) {
2798:       PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
2799:     }
2800:     PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
2801:     MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
2802:     n_subdomains = (PetscInt)size;
2803:     MatView(subdomain_adj,0);

2805:     /* Partition */
2806:     MatPartitioningCreate(subcomm->comm,&partitioner);
2807:     MatPartitioningSetAdjacency(partitioner,subdomain_adj);
2808:     if (use_vwgt) {
2809:       PetscMalloc(sizeof(*v_wgt),&v_wgt);
2810:       v_wgt[0] = local_size;
2811:       MatPartitioningSetVertexWeights(partitioner,v_wgt);
2812:     }
2813:     PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);
2814:     MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);
2815:     MatPartitioningSetFromOptions(partitioner);
2816:     MatPartitioningApply(partitioner,&new_ranks);
2817:     MatPartitioningView(partitioner,0);

2819:     ISGetIndices(new_ranks,(const PetscInt**)&is_indices);
2820:     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2821:     ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);
2822:     /* clean up */
2823:     PetscFree(oldranks);
2824:     ISDestroy(&new_ranks);
2825:     MatDestroy(&subdomain_adj);
2826:     MatPartitioningDestroy(&partitioner);
2827:   }
2828:   PetscSubcommDestroy(&subcomm);

2830:   /* assemble parallel IS for sends */
2831:   i = 1;
2832:   if (color) i=0;
2833:   ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);
2834:   ISView(ranks_send_to,0);

2836:   /* get back IS */
2837:   *is_sends = ranks_send_to;
2838:   return(0);
2839: }

2841: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;

2845: PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2846: {
2847:   Mat                    local_mat,new_mat;
2848:   Mat_IS                 *matis;
2849:   IS                     is_sends_internal;
2850:   PetscInt               rows,cols;
2851:   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2852:   PetscBool              ismatis,isdense;
2853:   ISLocalToGlobalMapping l2gmap;
2854:   PetscInt*              l2gmap_indices;
2855:   const PetscInt*        is_indices;
2856:   MatType                new_local_type;
2857:   MatTypePrivate         new_local_type_private;
2858:   /* buffers */
2859:   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2860:   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2861:   /* MPI */
2862:   MPI_Comm               comm;
2863:   PetscMPIInt            n_sends,n_recvs,commsize;
2864:   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2865:   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2866:   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2867:   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2868:   PetscErrorCode         ierr;

2871:   /* checks */
2872:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
2873:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2874:   MatISGetLocalMat(mat,&local_mat);
2875:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
2876:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2877:   MatGetSize(local_mat,&rows,&cols);
2878:   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2879:   MatGetBlockSize(local_mat,&bs);
2881:   /* prepare IS for sending if not provided */
2882:   if (!is_sends) {
2883:     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2884:     MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);
2885:   } else {
2886:     PetscObjectReference((PetscObject)is_sends);
2887:     is_sends_internal = is_sends;
2888:   }

2890:   /* get pointer of MATIS data */
2891:   matis = (Mat_IS*)mat->data;

2893:   /* get comm */
2894:   comm = PetscObjectComm((PetscObject)mat);

2896:   /* compute number of sends */
2897:   ISGetLocalSize(is_sends_internal,&i);
2898:   PetscMPIIntCast(i,&n_sends);

2900:   /* compute number of receives */
2901:   MPI_Comm_size(comm,&commsize);
2902:   PetscMalloc1(commsize,&iflags);
2903:   PetscMemzero(iflags,commsize*sizeof(*iflags));
2904:   ISGetIndices(is_sends_internal,&is_indices);
2905:   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2906:   PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
2907:   PetscFree(iflags);

2909:   /* prepare send/receive buffers */
2910:   PetscMalloc1(commsize,&ilengths_idxs);
2911:   PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
2912:   PetscMalloc1(commsize,&ilengths_vals);
2913:   PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));

2915:   /* Get data from local mat */
2916:   if (!isdense) {
2917:     /* TODO: See below some guidelines on how to prepare the local buffers */
2918:     /*
2919:        send_buffer_vals should contain the raw values of the local matrix
2920:        send_buffer_idxs should contain:
2921:        - MatType_PRIVATE type
2922:        - PetscInt        size_of_l2gmap
2923:        - PetscInt        global_row_indices[size_of_l2gmap]
2924:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2925:     */
2926:   } else {
2927:     MatDenseGetArray(local_mat,&send_buffer_vals);
2928:     ISLocalToGlobalMappingGetSize(matis->mapping,&i);
2929:     PetscMalloc1((i+2),&send_buffer_idxs);
2930:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2931:     send_buffer_idxs[1] = i;
2932:     ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);
2933:     PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
2934:     ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);
2935:     PetscMPIIntCast(i,&len);
2936:     for (i=0;i<n_sends;i++) {
2937:       ilengths_vals[is_indices[i]] = len*len;
2938:       ilengths_idxs[is_indices[i]] = len+2;
2939:     }
2940:   }
2941:   PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
2942:   buf_size_idxs = 0;
2943:   buf_size_vals = 0;
2944:   for (i=0;i<n_recvs;i++) {
2945:     buf_size_idxs += (PetscInt)olengths_idxs[i];
2946:     buf_size_vals += (PetscInt)olengths_vals[i];
2947:   }
2948:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
2949:   PetscMalloc1(buf_size_vals,&recv_buffer_vals);

2951:   /* get new tags for clean communications */
2952:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
2953:   PetscObjectGetNewTag((PetscObject)mat,&tag_vals);

2955:   /* allocate for requests */
2956:   PetscMalloc1(n_sends,&send_req_idxs);
2957:   PetscMalloc1(n_sends,&send_req_vals);
2958:   PetscMalloc1(n_recvs,&recv_req_idxs);
2959:   PetscMalloc1(n_recvs,&recv_req_vals);

2961:   /* communications */
2962:   ptr_idxs = recv_buffer_idxs;
2963:   ptr_vals = recv_buffer_vals;
2964:   for (i=0;i<n_recvs;i++) {
2965:     source_dest = onodes[i];
2966:     MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
2967:     MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
2968:     ptr_idxs += olengths_idxs[i];
2969:     ptr_vals += olengths_vals[i];
2970:   }
2971:   for (i=0;i<n_sends;i++) {
2972:     PetscMPIIntCast(is_indices[i],&source_dest);
2973:     MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
2974:     MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
2975:   }
2976:   ISRestoreIndices(is_sends_internal,&is_indices);
2977:   ISDestroy(&is_sends_internal);

2979:   /* assemble new l2g map */
2980:   MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
2981:   ptr_idxs = recv_buffer_idxs;
2982:   buf_size_idxs = 0;
2983:   for (i=0;i<n_recvs;i++) {
2984:     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2985:     ptr_idxs += olengths_idxs[i];
2986:   }
2987:   PetscMalloc1(buf_size_idxs,&l2gmap_indices);
2988:   ptr_idxs = recv_buffer_idxs;
2989:   buf_size_idxs = 0;
2990:   for (i=0;i<n_recvs;i++) {
2991:     PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
2992:     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2993:     ptr_idxs += olengths_idxs[i];
2994:   }
2995:   PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);
2996:   ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
2997:   PetscFree(l2gmap_indices);

2999:   /* infer new local matrix type from received local matrices type */
3000:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3001:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3002:   new_local_type_private = MATAIJ_PRIVATE;
3003:   new_local_type = MATSEQAIJ;
3004:   if (n_recvs) {
3005:     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3006:     ptr_idxs = recv_buffer_idxs;
3007:     for (i=0;i<n_recvs;i++) {
3008:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3009:         new_local_type_private = MATAIJ_PRIVATE;
3010:         break;
3011:       }
3012:       ptr_idxs += olengths_idxs[i];
3013:     }
3014:     switch (new_local_type_private) {
3015:       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
3016:         new_local_type = MATSEQAIJ;
3017:         bs = 1;
3018:         break;
3019:       case MATAIJ_PRIVATE:
3020:         new_local_type = MATSEQAIJ;
3021:         bs = 1;
3022:         break;
3023:       case MATBAIJ_PRIVATE:
3024:         new_local_type = MATSEQBAIJ;
3025:         break;
3026:       case MATSBAIJ_PRIVATE:
3027:         new_local_type = MATSEQSBAIJ;
3028:         break;
3029:       default:
3030:         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3031:         break;
3032:     }
3033:   }

3035:   /* create MATIS object */
3036:   MatGetSize(mat,&rows,&cols);
3037:   MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);
3038:   ISLocalToGlobalMappingDestroy(&l2gmap);
3039:   MatISGetLocalMat(new_mat,&local_mat);
3040:   MatSetType(local_mat,new_local_type);
3041:   MatSetUp(local_mat); /* WARNING -> no preallocation yet */

3043:   /* set values */
3044:   MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
3045:   ptr_vals = recv_buffer_vals;
3046:   ptr_idxs = recv_buffer_idxs;
3047:   for (i=0;i<n_recvs;i++) {
3048:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3049:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
3050:       MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
3051:       MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
3052:       MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
3053:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
3054:     }
3055:     ptr_idxs += olengths_idxs[i];
3056:     ptr_vals += olengths_vals[i];
3057:   }
3058:   MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
3059:   MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
3060:   MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);
3061:   MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);

3063:   { /* check */
3064:     Vec       lvec,rvec;
3065:     PetscReal infty_error;

3067:     MatGetVecs(mat,&rvec,&lvec);
3068:     VecSetRandom(rvec,NULL);
3069:     MatMult(mat,rvec,lvec);
3070:     VecScale(lvec,-1.0);
3071:     MatMultAdd(new_mat,rvec,lvec,lvec);
3072:     VecNorm(lvec,NORM_INFINITY,&infty_error);
3073:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3074:     VecDestroy(&rvec);
3075:     VecDestroy(&lvec);
3076:   }

3078:   /* free workspace */
3079:   PetscFree(recv_buffer_idxs);
3080:   PetscFree(recv_buffer_vals);
3081:   MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
3082:   PetscFree(send_buffer_idxs);
3083:   MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
3084:   if (isdense) {
3085:     MatISGetLocalMat(mat,&local_mat);
3086:     MatDenseRestoreArray(local_mat,&send_buffer_vals);
3087:   } else {
3088:     /* PetscFree(send_buffer_vals); */
3089:   }
3090:   PetscFree(recv_req_idxs);
3091:   PetscFree(recv_req_vals);
3092:   PetscFree(send_req_idxs);
3093:   PetscFree(send_req_vals);
3094:   PetscFree(ilengths_vals);
3095:   PetscFree(ilengths_idxs);
3096:   PetscFree(olengths_vals);
3097:   PetscFree(olengths_idxs);
3098:   PetscFree(onodes);
3099:   /* get back new mat */
3100:   *mat_n = new_mat;
3101:   return(0);
3102: }

3106: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3107: {
3108:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3109:   PC_IS                  *pcis = (PC_IS*)pc->data;
3110:   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3111:   MatNullSpace           CoarseNullSpace=NULL;
3112:   ISLocalToGlobalMapping coarse_islg;
3113:   IS                     coarse_is;
3114:   PetscInt               max_it;
3115:   PetscInt               im_active=-1,active_procs=-1;
3116:   PC                     pc_temp;
3117:   PCType                 coarse_pc_type;
3118:   KSPType                coarse_ksp_type;
3119:   PetscBool              multilevel_requested,multilevel_allowed;
3120:   PetscBool              setsym,issym,isbddc,isnn,coarse_reuse;
3121:   MatStructure           matstruct;
3122:   PetscErrorCode         ierr;

3125:   /* Assign global numbering to coarse dofs */
3126:   if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */
3127:     PetscInt ocoarse_size;
3128:     ocoarse_size = pcbddc->coarse_size;
3129:     PetscFree(pcbddc->global_primal_indices);
3130:     PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
3131:     /* see if we can avoid some work */
3132:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3133:       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3134:         KSPReset(pcbddc->coarse_ksp);
3135:         coarse_reuse = PETSC_FALSE;
3136:       } else { /* we can safely reuse already computed coarse matrix */
3137:         coarse_reuse = PETSC_TRUE;
3138:       }
3139:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3140:       coarse_reuse = PETSC_FALSE;
3141:     }
3142:   } else { /* primal space has not been changed, so we can reuse coarse matrix */
3143:     coarse_reuse = PETSC_TRUE;
3144:   }

3146:   /* infer some info from user */
3147:   issym = PETSC_FALSE;
3148:   MatIsSymmetricKnown(pc->pmat,&setsym,&issym);
3149:   multilevel_allowed = PETSC_FALSE;
3150:   multilevel_requested = PETSC_FALSE;
3151:   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3152:   if (multilevel_requested) {
3153:     /* count "active processes" */
3154:     im_active = !!(pcis->n);
3155:     MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
3156:     if (active_procs/pcbddc->coarsening_ratio < 2) {
3157:       multilevel_allowed = PETSC_FALSE;
3158:     } else {
3159:       multilevel_allowed = PETSC_TRUE;
3160:     }
3161:   }

3163:   /* set defaults for coarse KSP and PC */
3164:   if (multilevel_allowed) {
3165:     if (issym) {
3166:       coarse_ksp_type = KSPRICHARDSON;
3167:     } else {
3168:       coarse_ksp_type = KSPCHEBYSHEV;
3169:     }
3170:     coarse_pc_type = PCBDDC;
3171:   } else {
3172:     coarse_ksp_type = KSPPREONLY;
3173:     coarse_pc_type = PCREDUNDANT;
3174:   }

3176:   /* create the coarse KSP object only once with defaults */
3177:   if (!pcbddc->coarse_ksp) {
3178:     char prefix[256],str_level[3];
3179:     size_t len;
3180:     KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);
3181:     PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
3182:     KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
3183:     KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
3184:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
3185:     PCSetType(pc_temp,coarse_pc_type);
3186:     /* prefix */
3187:     PetscStrcpy(prefix,"");
3188:     PetscStrcpy(str_level,"");
3189:     if (!pcbddc->current_level) {
3190:       PetscStrcpy(prefix,((PetscObject)pc)->prefix);
3191:       PetscStrcat(prefix,"pc_bddc_coarse_");
3192:     } else {
3193:       PetscStrlen(((PetscObject)pc)->prefix,&len);
3194:       if (pcbddc->current_level>1) len -= 2;
3195:       PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);
3196:       *(prefix+len)='\0';
3197:       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3198:       PetscStrcat(prefix,str_level);
3199:     }
3200:     KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
3201:   }
3202:   /* allow user customization */
3203:   /* PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name); */
3204:   KSPSetFromOptions(pcbddc->coarse_ksp);
3205:   /* PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name); */

3207:   /* get some info after set from options */
3208:   KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
3209:   PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
3210:   PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
3211:   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3212:     KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
3213:     PCSetType(pc_temp,coarse_pc_type);
3214:     isbddc = PETSC_FALSE;
3215:   }

3217:   /* propagate BDDC info to the next level */
3218:   PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
3219:   PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
3220:   PCBDDCSetLevels(pc_temp,pcbddc->max_levels);

3222:   /* creates temporary MATIS object for coarse matrix */
3223:   ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
3224:   ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
3225:   MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
3226:   MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);
3227:   MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);
3228:   MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);
3229:   MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);
3230:   MatDestroy(&coarse_submat_dense);
3231:   ISLocalToGlobalMappingDestroy(&coarse_islg);

3233:   /* assemble coarse matrix */
3234:   if (isbddc || isnn) {
3235:     MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);
3236:   } else {
3237:     if (coarse_reuse) {
3238:       KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);
3239:       PetscObjectReference((PetscObject)coarse_mat);
3240:       MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);
3241:     } else {
3242:       MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);
3243:     }
3244:   }
3245:   MatDestroy(&coarse_mat_is);

3247:   /* create local to global scatters for coarse problem */
3248:   if (pcbddc->new_primal_space) {
3249:     VecDestroy(&pcbddc->coarse_vec);
3250:     VecDestroy(&pcbddc->coarse_rhs);
3251:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3252:     MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);
3253:     VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
3254:   }
3255:   ISDestroy(&coarse_is);

3257:   /* propagate symmetry info to coarse matrix */
3258:   MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);

3260:   /* Compute coarse null space (special handling by BDDC only) */
3261:   if (pcbddc->NullSpace) {
3262:     PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);
3263:     if (isbddc) {
3264:       PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);
3265:     } else {
3266:       KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);
3267:     }
3268:   }

3270:   /* set operators */
3271:   PCGetOperators(pc,NULL,NULL,&matstruct);
3272:   KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);

3274:   /* additional KSP customization */
3275:   KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);
3276:   if (max_it < 5) {
3277:     KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
3278:   }
3279:   /* KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1); */


3282:   /* print some info if requested */
3283:   if (pcbddc->dbg_flag) {
3284:     KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);
3285:     PCGetType(pc_temp,&coarse_pc_type);
3286:     if (!multilevel_allowed) {
3287:       if (multilevel_requested) {
3288:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
3289:       } else if (pcbddc->max_levels) {
3290:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
3291:       }
3292:     }
3293:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);
3294:     PetscViewerFlush(pcbddc->dbg_viewer);
3295:   }

3297:   /* setup coarse ksp */
3298:   KSPSetUp(pcbddc->coarse_ksp);
3299:   if (pcbddc->dbg_flag) {
3300:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
3301:     PetscViewerFlush(pcbddc->dbg_viewer);
3302:     KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);
3303:     PetscViewerFlush(pcbddc->dbg_viewer);
3304:   }

3306:   /* Check coarse problem if requested */
3307:   if (pcbddc->dbg_flag) {
3308:     KSP       check_ksp;
3309:     KSPType   check_ksp_type;
3310:     PC        check_pc;
3311:     Vec       check_vec;
3312:     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3313:     PetscInt  its;
3314:     PetscBool ispreonly,compute;

3316:     /* Create ksp object suitable for estimation of extreme eigenvalues */
3317:     KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);
3318:     KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);
3319:     KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
3320:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
3321:     if (ispreonly) {
3322:       check_ksp_type = KSPPREONLY;
3323:       compute = PETSC_FALSE;
3324:     } else {
3325:       if (issym) check_ksp_type = KSPCG;
3326:       else check_ksp_type = KSPGMRES;
3327:       compute = PETSC_TRUE;
3328:     }
3329:     KSPSetType(check_ksp,check_ksp_type);
3330:     KSPSetComputeSingularValues(check_ksp,compute);
3331:     KSPSetUp(check_ksp);
3332:     KSPGetPC(pcbddc->coarse_ksp,&check_pc);
3333:     KSPSetPC(check_ksp,check_pc);
3334:     /* create random vec */
3335:     VecDuplicate(pcbddc->coarse_vec,&check_vec);
3336:     VecSetRandom(check_vec,NULL);
3337:     if (CoarseNullSpace) {
3338:       MatNullSpaceRemove(CoarseNullSpace,check_vec);
3339:     }
3340:     MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);
3341:     /* solve coarse problem */
3342:     KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);
3343:     if (CoarseNullSpace) {
3344:       MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);
3345:     }
3346:     /* check coarse problem residual error */
3347:     VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);
3348:     VecNorm(check_vec,NORM_INFINITY,&infty_error);
3349:     MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);
3350:     VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);
3351:     VecDestroy(&check_vec);
3352:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);
3353:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);
3354:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
3355:     /* get eigenvalue estimation if preonly has not been requested */
3356:     if (!ispreonly) {
3357:       KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);
3358:       KSPGetIterationNumber(check_ksp,&its);
3359:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);
3360:     }
3361:     PetscViewerFlush(pcbddc->dbg_viewer);
3362:     KSPDestroy(&check_ksp);
3363:   }
3364:   /* free memory */
3365:   MatNullSpaceDestroy(&CoarseNullSpace);
3366:   MatDestroy(&coarse_mat);
3367:   return(0);
3368: }

3372: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3373: {
3374:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3375:   PC_IS*         pcis = (PC_IS*)pc->data;
3376:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3377:   PetscInt       i,coarse_size;
3378:   PetscInt       *local_primal_indices;

3382:   /* Compute global number of coarse dofs */
3383:   if (!pcbddc->primal_indices_local_idxs) {
3384:     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
3385:   }
3386:   PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);

3388:   /* check numbering */
3389:   if (pcbddc->dbg_flag) {
3390:     PetscScalar coarsesum,*array;

3392:     PetscViewerFlush(pcbddc->dbg_viewer);
3393:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3394:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
3395:     PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
3396:     VecSet(pcis->vec1_N,0.0);
3397:     for (i=0;i<pcbddc->local_primal_size;i++) {
3398:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
3399:     }
3400:     VecAssemblyBegin(pcis->vec1_N);
3401:     VecAssemblyEnd(pcis->vec1_N);
3402:     VecSet(pcis->vec1_global,0.0);
3403:     VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3404:     VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3405:     VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3406:     VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3407:     VecGetArray(pcis->vec1_N,&array);
3408:     for (i=0;i<pcis->n;i++) {
3409:       if (array[i] == 1.0) {
3410:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);
3411:       }
3412:     }
3413:     PetscViewerFlush(pcbddc->dbg_viewer);
3414:     for (i=0;i<pcis->n;i++) {
3415:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3416:     }
3417:     VecRestoreArray(pcis->vec1_N,&array);
3418:     VecSet(pcis->vec1_global,0.0);
3419:     VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3420:     VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3421:     VecSum(pcis->vec1_global,&coarsesum);
3422:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
3423:     if (pcbddc->dbg_flag > 1) {
3424:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
3425:       PetscViewerFlush(pcbddc->dbg_viewer);
3426:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
3427:       for (i=0;i<pcbddc->local_primal_size;i++) {
3428:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3429:       }
3430:       PetscViewerFlush(pcbddc->dbg_viewer);
3431:     }
3432:     PetscViewerFlush(pcbddc->dbg_viewer);
3433:   }
3434:   /* get back data */
3435:   *coarse_size_n = coarse_size;
3436:   *local_primal_indices_n = local_primal_indices;
3437:   return(0);
3438: }