Actual source code: nn.c

 2:  #include src/ksp/pc/impls/is/nn/nn.h

  4: /* -------------------------------------------------------------------------- */
  5: /*
  6:    PCSetUp_NN - Prepares for the use of the NN preconditioner
  7:                     by setting data structures and options.   

  9:    Input Parameter:
 10: .  pc - the preconditioner context

 12:    Application Interface Routine: PCSetUp()

 14:    Notes:
 15:    The interface routine PCSetUp() is not usually called directly by
 16:    the user, but instead is called by PCApply() if necessary.
 17: */
 20: static PetscErrorCode PCSetUp_NN(PC pc)
 21: {
 23: 
 25:   if (!pc->setupcalled) {
 26:     /* Set up all the "iterative substructuring" common block */
 27:     PCISSetUp(pc);
 28:     /* Create the coarse matrix. */
 29:     PCNNCreateCoarseMatrix(pc);
 30:   }
 31:   return(0);
 32: }

 34: /* -------------------------------------------------------------------------- */
 35: /*
 36:    PCApply_NN - Applies the NN preconditioner to a vector.

 38:    Input Parameters:
 39: .  pc - the preconditioner context
 40: .  r - input vector (global)

 42:    Output Parameter:
 43: .  z - output vector (global)

 45:    Application Interface Routine: PCApply()
 46:  */
 49: static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
 50: {
 51:   PC_IS          *pcis = (PC_IS*)(pc->data);
 53:   PetscScalar    m_one = -1.0;
 54:   Vec            w = pcis->vec1_global;

 57:   /*
 58:     Dirichlet solvers.
 59:     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
 60:     Storing the local results at vec2_D
 61:   */
 62:   VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
 63:   VecScatterEnd  (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
 64:   KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
 65: 
 66:   /*
 67:     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
 68:     Storing the result in the interface portion of the global vector w.
 69:   */
 70:   MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
 71:   VecScale(&m_one,pcis->vec1_B);
 72:   VecCopy(r,w);
 73:   VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
 74:   VecScatterEnd  (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);

 76:   /*
 77:     Apply the interface preconditioner
 78:   */
 79:   PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
 80:                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);

 82:   /*
 83:     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
 84:     The result is stored in vec1_D.
 85:   */
 86:   VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
 87:   VecScatterEnd  (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
 88:   MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);

 90:   /*
 91:     Dirichlet solvers.
 92:     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
 93:     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
 94:   */
 95:   VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
 96:   VecScatterEnd  (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
 97:   KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
 98:   VecScale(&m_one,pcis->vec2_D);
 99:   VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
100:   VecScatterEnd  (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
101:   return(0);
102: }

104: /* -------------------------------------------------------------------------- */
105: /*
106:    PCDestroy_NN - Destroys the private context for the NN preconditioner
107:    that was created with PCCreate_NN().

109:    Input Parameter:
110: .  pc - the preconditioner context

112:    Application Interface Routine: PCDestroy()
113: */
116: static PetscErrorCode PCDestroy_NN(PC pc)
117: {
118:   PC_NN          *pcnn = (PC_NN*)pc->data;

122:   PCISDestroy(pc);

124:   if (pcnn->coarse_mat)  {MatDestroy(pcnn->coarse_mat);}
125:   if (pcnn->coarse_x)    {VecDestroy(pcnn->coarse_x);}
126:   if (pcnn->coarse_b)    {VecDestroy(pcnn->coarse_b);}
127:   if (pcnn->ksp_coarse) {KSPDestroy(pcnn->ksp_coarse);}
128:   if (pcnn->DZ_IN) {
129:     if (pcnn->DZ_IN[0]) {PetscFree(pcnn->DZ_IN[0]);}
130:     PetscFree(pcnn->DZ_IN);
131:   }

133:   /*
134:       Free the private data structure that was hanging off the PC
135:   */
136:   PetscFree(pcnn);
137:   return(0);
138: }

140: /* -------------------------------------------------------------------------- */
141: /*MC
142:    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.

144:    Options Database Keys:
145: +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
146:                                        (this skips the first coarse grid solve in the preconditioner)
147: .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
148:                                        (this skips the second coarse grid solve in the preconditioner)
149: .    -pc_is_damp_fixed <fact> -
150: .    -pc_is_remove_nullspace_fixed -
151: .    -pc_is_set_damping_factor_floating <fact> -
152: .    -pc_is_not_damp_floating -
153: +    -pc_is_not_remove_nullspace_floating - 

155:    Level: intermediate

157:    Notes: The matrix used with this preconditioner must be of type MATIS 

159:           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
160:           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
161:           on the subdomains; though in our experience using approximate solvers is slower.).

163:           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
164:           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
165:           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx

167:    Contributed by Paulo Goldfeld

169: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MatIS
170: M*/
174: PetscErrorCode PCCreate_NN(PC pc)
175: {
177:   PC_NN          *pcnn;

180:   /*
181:      Creates the private data structure for this preconditioner and
182:      attach it to the PC object.
183:   */
184:   PetscNew(PC_NN,&pcnn);
185:   pc->data  = (void*)pcnn;

187:   /*
188:      Logs the memory usage; this is not needed but allows PETSc to 
189:      monitor how much memory is being used for various purposes.
190:   */
191:   PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */

193:   PCISCreate(pc);
194:   pcnn->coarse_mat  = 0;
195:   pcnn->coarse_x    = 0;
196:   pcnn->coarse_b    = 0;
197:   pcnn->ksp_coarse = 0;
198:   pcnn->DZ_IN       = 0;

200:   /*
201:       Set the pointers for the functions that are provided above.
202:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
203:       are called, they will automatically call these functions.  Note we
204:       choose not to provide a couple of these functions since they are
205:       not needed.
206:   */
207:   pc->ops->apply               = PCApply_NN;
208:   pc->ops->applytranspose      = 0;
209:   pc->ops->setup               = PCSetUp_NN;
210:   pc->ops->destroy             = PCDestroy_NN;
211:   pc->ops->view                = 0;
212:   pc->ops->applyrichardson     = 0;
213:   pc->ops->applysymmetricleft  = 0;
214:   pc->ops->applysymmetricright = 0;
215:   return(0);
216: }


220: /* -------------------------------------------------------------------------- */
221: /*
222:    PCNNCreateCoarseMatrix - 
223: */
226: PetscErrorCode PCNNCreateCoarseMatrix (PC pc)
227: {
228:   MPI_Request    *send_request, *recv_request;
230:   PetscInt       i, j, k;
231:   PetscScalar*   mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
232:   PetscScalar**  DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */

234:   /* aliasing some names */
235:   PC_IS*         pcis     = (PC_IS*)(pc->data);
236:   PC_NN*         pcnn     = (PC_NN*)pc->data;
237:   PetscInt       n_neigh  = pcis->n_neigh;
238:   PetscInt*      neigh    = pcis->neigh;
239:   PetscInt*      n_shared = pcis->n_shared;
240:   PetscInt**     shared   = pcis->shared;
241:   PetscScalar**  DZ_IN;   /* Must be initialized after memory allocation. */

244:   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
245:   PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);

247:   /* Allocate memory for DZ */
248:   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
249:   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
250:   {
251:     PetscInt size_of_Z = 0;
252:     PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);
253:     DZ_IN = pcnn->DZ_IN;
254:     PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);
255:     for (i=0; i<n_neigh; i++) {
256:       size_of_Z += n_shared[i];
257:     }
258:     PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);
259:     PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);
260:   }
261:   for (i=1; i<n_neigh; i++) {
262:     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
263:     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
264:   }

266:   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
267:   /* First, set the auxiliary array pcis->work_N. */
268:   PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
269:   for (i=1; i<n_neigh; i++){
270:     for (j=0; j<n_shared[i]; j++) {
271:       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
272:     }
273:   }

275:   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
276:   /* Notice that send_request[] and recv_request[] could have one less element. */
277:   /* We make them longer to have request[i] corresponding to neigh[i].          */
278:   {
279:     PetscMPIInt tag;
280:     PetscObjectGetNewTag((PetscObject)pc,&tag);
281:     PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);
282:     recv_request = send_request + (n_neigh);
283:     for (i=1; i<n_neigh; i++) {
284:       MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));
285:       MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));
286:     }
287:   }

289:   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
290:   for(j=0; j<n_shared[0]; j++) {
291:     DZ_IN[0][j] = pcis->work_N[shared[0][j]];
292:   }

294:   /* Start computing with local D*Z while communication goes on.    */
295:   /* Apply Schur complement. The result is "stored" in vec (more    */
296:   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
297:   /* and also scattered to pcnn->work_N.                            */
298:   PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
299:                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);

301:   /* Compute the first column, while completing the receiving. */
302:   for (i=0; i<n_neigh; i++) {
303:     MPI_Status  stat;
304:     PetscMPIInt ind=0;
305:     if (i>0) { MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
306:     mat[ind*n_neigh+0] = 0.0;
307:     for (k=0; k<n_shared[ind]; k++) {
308:       mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
309:     }
310:   }

312:   /* Compute the remaining of the columns */
313:   for (j=1; j<n_neigh; j++) {
314:     PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
315:                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
316:     for (i=0; i<n_neigh; i++) {
317:       mat[i*n_neigh+j] = 0.0;
318:       for (k=0; k<n_shared[i]; k++) {
319:         mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
320:       }
321:     }
322:   }

324:   /* Complete the sending. */
325:   if (n_neigh>1) {
326:     MPI_Status *stat;
327:     PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);
328:     MPI_Waitall(n_neigh-1,&(send_request[1]),stat);
329:     PetscFree(stat);
330:   }

332:   /* Free the memory for the MPI requests */
333:   PetscFree(send_request);

335:   /* Free the memory for DZ_OUT */
336:   if (DZ_OUT) {
337:     if (DZ_OUT[0]) { PetscFree(DZ_OUT[0]); }
338:     PetscFree(DZ_OUT);
339:   }

341:   {
342:     PetscMPIInt size;
343:     PetscInt    n_neigh_m1;
344:     MPI_Comm_size(pc->comm,&size);
345:     n_neigh_m1 = (n_neigh) ? n_neigh-1 : 0;
346:     /* Create the global coarse vectors (rhs and solution). */
347:     VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));
348:     VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
349:     /* Create and set the global coarse AIJ matrix. */
350:     MatCreate(pc->comm,1,1,size,size,&(pcnn->coarse_mat));
351:     MatSetType(pcnn->coarse_mat,MATAIJ);
352:     MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);
353:     MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);
354:     MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
355:     MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
356:     MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
357:   }

359:   {
360:     PetscMPIInt rank;
361:     PetscScalar one = 1.0;
362:     IS          is;
363:     MPI_Comm_rank(pc->comm,&rank);
364:     /* "Zero out" rows of not-purely-Neumann subdomains */
365:     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
366:       ISCreateStride(pc->comm,0,0,0,&is);
367:     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
368:       ISCreateStride(pc->comm,1,rank,0,&is);
369:     }
370:     MatZeroRows(pcnn->coarse_mat,is,&one);
371:     ISDestroy(is);
372:   }

374:   /* Create the coarse linear solver context */
375:   {
376:     PC  pc_ctx, inner_pc;
377:     KSPCreate(pc->comm,&pcnn->ksp_coarse);
378:     KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);
379:     KSPGetPC(pcnn->ksp_coarse,&pc_ctx);
380:     PCSetType(pc_ctx,PCREDUNDANT);
381:     KSPSetType(pcnn->ksp_coarse,KSPPREONLY);
382:     PCRedundantGetPC(pc_ctx,&inner_pc);
383:     PCSetType(inner_pc,PCLU);
384:     KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");
385:     KSPSetFromOptions(pcnn->ksp_coarse);
386:     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
387:     KSPSetUp(pcnn->ksp_coarse);
388:   }

390:   /* Free the memory for mat */
391:   PetscFree(mat);

393:   /* for DEBUGGING, save the coarse matrix to a file. */
394:   {
395:     PetscTruth flg;
396:     PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);
397:     if (flg) {
398:       PetscViewer viewer;
399:       PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
400:       PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
401:       MatView(pcnn->coarse_mat,viewer);
402:       PetscViewerDestroy(viewer);
403:     }
404:   }

406:   /*  Set the variable pcnn->factor_coarse_rhs. */
407:   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;

409:   /* See historical note 02, at the bottom of this file. */
410:   return(0);
411: }

413: /* -------------------------------------------------------------------------- */
414: /*
415:    PCNNApplySchurToChunk - 

417:    Input parameters:
418: .  pcnn
419: .  n - size of chunk
420: .  idx - indices of chunk
421: .  chunk - values

423:    Output parameters:
424: .  array_N - result of Schur complement applied to chunk, scattered to big array
425: .  vec1_B  - result of Schur complement applied to chunk
426: .  vec2_B  - garbage (used as work space)
427: .  vec1_D  - garbage (used as work space)
428: .  vec2_D  - garbage (used as work space)

430: */
433: PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
434: {
436:   PetscInt       i;
437:   PC_IS          *pcis = (PC_IS*)(pc->data);

440:   PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));
441:   for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
442:   PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
443:   PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
444:   PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);
445:   return(0);
446: }

448: /* -------------------------------------------------------------------------- */
449: /*
450:    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e., 
451:                                       the preconditioner for the Schur complement.

453:    Input parameter:
454: .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.

456:    Output parameters:
457: .  z - global vector of interior and interface nodes. The values on the interface are the result of
458:        the application of the interface preconditioner to the interface part of r. The values on the
459:        interior nodes are garbage.
460: .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
461: .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
462: .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
463: .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
464: .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
465: .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
466: .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
467: .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)

469: */
472: PetscErrorCode PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
473:                                       Vec vec2_D, Vec vec1_N, Vec vec2_N)
474: {
476:   PC_IS*         pcis = (PC_IS*)(pc->data);

479:   /*
480:     First balancing step.
481:   */
482:   {
483:     PetscTruth flg;
484:     PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);
485:     if (!flg) {
486:       PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
487:     } else {
488:       VecCopy(r,z);
489:     }
490:   }

492:   /*
493:     Extract the local interface part of z and scale it by D 
494:   */
495:   VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
496:   VecScatterEnd  (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
497:   VecPointwiseMult(pcis->D,vec1_B,vec2_B);

499:   /* Neumann Solver */
500:   PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);

502:   /*
503:     Second balancing step.
504:   */
505:   {
506:     PetscTruth flg;
507:     PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);
508:     if (!flg) {
509:       PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
510:     } else {
511:       PetscScalar zero = 0.0;
512:       VecPointwiseMult(pcis->D,vec1_B,vec2_B);
513:       VecSet(&zero,z);
514:       VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
515:       VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
516:     }
517:   }
518:   return(0);
519: }

521: /* -------------------------------------------------------------------------- */
522: /*
523:    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
524:                    input argument u is provided), or s, as given in equations
525:                    (12) and (13), if the input argument u is a null vector.
526:                    Notice that the input argument u plays the role of u_i in
527:                    equation (14). The equation numbers refer to [Man93].

529:    Input Parameters:
530: .  pcnn - NN preconditioner context.
531: .  r - MPI vector of all nodes (interior and interface). It's preserved.
532: .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.

534:    Output Parameters:
535: .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
536: .  vec1_B - Sequential vector of local interface nodes. Workspace.
537: .  vec2_B - Sequential vector of local interface nodes. Workspace.
538: .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
539: .  vec1_D - Sequential vector of local interior nodes. Workspace.
540: .  vec2_D - Sequential vector of local interior nodes. Workspace.
541: .  work_N - Array of all local nodes (interior and interface). Workspace.

543: */
546: PetscErrorCode PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
547:                    Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
548: {
550:   PetscInt       k;
551:   PetscScalar    zero     =  0.0;
552:   PetscScalar    m_one    = -1.0;
553:   PetscScalar    value;
554:   PetscScalar*   lambda;
555:   PC_NN*         pcnn     = (PC_NN*)(pc->data);
556:   PC_IS*         pcis     = (PC_IS*)(pc->data);

559:   PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);
560:   if (u) {
561:     if (!vec3_B) { vec3_B = u; }
562:     VecPointwiseMult(pcis->D,u,vec1_B);
563:     VecSet(&zero,z);
564:     VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
565:     VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
566:     VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
567:     VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
568:     PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
569:     VecScale(&m_one,vec3_B);
570:     VecCopy(r,z);
571:     VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
572:     VecScatterEnd  (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
573:   } else {
574:     VecCopy(r,z);
575:   }
576:   VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
577:   VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
578:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
579:   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
580:   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
581:   {
582:     PetscMPIInt rank;
583:     MPI_Comm_rank(pc->comm,&rank);
584:     VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);
585:     /*
586:        Since we are only inserting local values (one value actually) we don't need to do the 
587:        reduction that tells us there is no data that needs to be moved. Hence we comment out these
588:        VecAssemblyBegin(pcnn->coarse_b); 
589:        VecAssemblyEnd  (pcnn->coarse_b);
590:     */
591:   }
592:   KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);
593:   if (!u) { VecScale(&m_one,pcnn->coarse_x); }
594:   VecGetArray(pcnn->coarse_x,&lambda);
595:   for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
596:   VecRestoreArray(pcnn->coarse_x,&lambda);
597:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
598:   VecSet(&zero,z);
599:   VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
600:   VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
601:   if (!u) {
602:     VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
603:     VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
604:     PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
605:     VecCopy(r,z);
606:   }
607:   VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
608:   VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
609:   PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);
610:   return(0);
611: }




617: /*  -------   E N D   O F   T H E   C O D E   -------  */
618: /*                                                     */
619: /*  From now on, "footnotes" (or "historical notes").  */
620: /*                                                     */
621: /*  -------------------------------------------------  */



625: /* --------------------------------------------------------------------------
626:    Historical note 01 
627:    -------------------------------------------------------------------------- */
628: /*
629:    We considered the possibility of an alternative D_i that would still
630:    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
631:    The basic principle was still the pseudo-inverse of the counting
632:    function; the difference was that we would not count subdomains
633:    that do not contribute to the coarse space (i.e., not pure-Neumann
634:    subdomains).

636:    This turned out to be a bad idea:  we would solve trivial Neumann
637:    problems in the not pure-Neumann subdomains, since we would be scaling
638:    the balanced residual by zero.
639: */




644: /* --------------------------------------------------------------------------
645:    Historical note 02 
646:    -------------------------------------------------------------------------- */
647: /*
648:    We tried an alternative coarse problem, that would eliminate exactly a
649:    constant error. Turned out not to improve the overall convergence.
650: */