Actual source code: nn.c

  1: /*$Id: nn.c,v 1.10 2001/03/23 23:23:22 balay Exp $*/

  3: #include "src/sles/pc/impls/is/nn/nn.h"

  5: /* -------------------------------------------------------------------------- */
  6: /*
  7:    PCSetUp_NN - Prepares for the use of the NN preconditioner
  8:                     by setting data structures and options.   

 10:    Input Parameter:
 11: .  pc - the preconditioner context

 13:    Application Interface Routine: PCSetUp()

 15:    Notes:
 16:    The interface routine PCSetUp() is not usually called directly by
 17:    the user, but instead is called by PCApply() if necessary.
 18: */
 19: static int PCSetUp_NN(PC pc)
 20: {
 22: 

 25:   if (pc->setupcalled == 0) {
 26:     /* Set up all the "iterative substructuring" common block */
 27:     PCISSetUp(pc);
 28:     /* Create the coarse matrix. */
 29:     PCNNCreateCoarseMatrix(pc);
 30:   }

 32:   return(0);
 33: }

 35: /* -------------------------------------------------------------------------- */
 36: /*
 37:    PCApply_NN - Applies the NN preconditioner to a vector.

 39:    Input Parameters:
 40: .  pc - the preconditioner context
 41: .  r - input vector (global)

 43:    Output Parameter:
 44: .  z - output vector (global)

 46:    Application Interface Routine: PCApply()
 47:  */
 48: static int PCApply_NN(PC pc,Vec r,Vec z)
 49: {
 50:   PC_IS *pcis = (PC_IS*)(pc->data);
 51:   int ierr,its;
 52:   Scalar m_one = -1.0;
 53:   Vec w = pcis->vec1_global;


 57:   /*
 58:     Dirichlet solvers.
 59:     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
 60:     Storing the local results at vec2_D
 61:   */
 62:   VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
 63:   VecScatterEnd  (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
 64:   SLESSolve(pcis->sles_D,pcis->vec1_D,pcis->vec2_D,&its);
 65: 
 66:   /*
 67:     Computing $ r_B - sum_j tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
 68:     Storing the result in the interface portion of the global vector w.
 69:   */
 70:   MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
 71:   VecScale(&m_one,pcis->vec1_B);
 72:   VecCopy(r,w);
 73:   VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
 74:   VecScatterEnd  (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);

 76:   /*
 77:     Apply the interface preconditioner
 78:   */
 79:   PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
 80:                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);

 82:   /*
 83:     Computing $ t_I^{(i)} = A_{IB}^{(i)} tilde R_i z_B $
 84:     The result is stored in vec1_D.
 85:   */
 86:   VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
 87:   VecScatterEnd  (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
 88:   MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);

 90:   /*
 91:     Dirichlet solvers.
 92:     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
 93:     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
 94:   */
 95:   VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
 96:   VecScatterEnd  (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
 97:   SLESSolve(pcis->sles_D,pcis->vec1_D,pcis->vec2_D,&its);
 98:   VecScale(&m_one,pcis->vec2_D);
 99:   VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
100:   VecScatterEnd  (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);

102:   return(0);
103: }

105: /* -------------------------------------------------------------------------- */
106: /*
107:    PCDestroy_NN - Destroys the private context for the NN preconditioner
108:    that was created with PCCreate_NN().

110:    Input Parameter:
111: .  pc - the preconditioner context

113:    Application Interface Routine: PCDestroy()
114: */
115: static int PCDestroy_NN(PC pc)
116: {
117:   PC_NN *pcnn = (PC_NN*)pc->data;
118:   int   ierr;


122:   PCISDestroy(pc);

124:   if (pcnn->coarse_mat)  {MatDestroy(pcnn->coarse_mat);}
125:   if (pcnn->coarse_x)    {VecDestroy(pcnn->coarse_x);}
126:   if (pcnn->coarse_b)    {VecDestroy(pcnn->coarse_b);}
127:   if (pcnn->sles_coarse) {SLESDestroy(pcnn->sles_coarse);}
128:   if (pcnn->DZ_IN) {
129:     if (pcnn->DZ_IN[0]) {PetscFree(pcnn->DZ_IN[0]);}
130:     PetscFree(pcnn->DZ_IN);
131:   }

133:   /*
134:       Free the private data structure that was hanging off the PC
135:   */
136:   PetscFree(pcnn);
137:   return(0);
138: }

140: /* -------------------------------------------------------------------------- */
141: /*
142:    PCCreate_NN - Creates a NN preconditioner context, PC_NN, 
143:    and sets this as the private data within the generic preconditioning 
144:    context, PC, that was created within PCCreate().

146:    Input Parameter:
147: .  pc - the preconditioner context

149:    Application Interface Routine: PCCreate()
150: */
151: EXTERN_C_BEGIN
152: int PCCreate_NN(PC pc)
153: {
155:   PC_NN *pcnn;


159:   /*
160:      Creates the private data structure for this preconditioner and
161:      attach it to the PC object.
162:   */
163:   ierr      = PetscNew(PC_NN,&pcnn);
164:   pc->data  = (void*)pcnn;

166:   /*
167:      Logs the memory usage; this is not needed but allows PETSc to 
168:      monitor how much memory is being used for various purposes.
169:   */
170:   PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */

172:   PCISCreate(pc);
173:   pcnn->coarse_mat  = 0;
174:   pcnn->coarse_x    = 0;
175:   pcnn->coarse_b    = 0;
176:   pcnn->sles_coarse = 0;
177:   pcnn->DZ_IN       = 0;

179:   /*
180:       Set the pointers for the functions that are provided above.
181:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
182:       are called, they will automatically call these functions.  Note we
183:       choose not to provide a couple of these functions since they are
184:       not needed.
185:   */
186:   pc->ops->apply               = PCApply_NN;
187:   pc->ops->applytranspose      = 0;
188:   pc->ops->setup               = PCSetUp_NN;
189:   pc->ops->destroy             = PCDestroy_NN;
190:   pc->ops->view                = 0;
191:   pc->ops->applyrichardson     = 0;
192:   pc->ops->applysymmetricleft  = 0;
193:   pc->ops->applysymmetricright = 0;

195:   return(0);
196: }
197: EXTERN_C_END


200: /* -------------------------------------------------------------------------- */
201: /*
202:    PCNNCreateCoarseMatrix - 
203: */
204: int PCNNCreateCoarseMatrix (PC pc)
205: {
206:   MPI_Request *send_request, *recv_request;
207:   int i, j, k, ierr;

209:   Scalar*   mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
210:   Scalar**  DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */

212:   /* aliasing some names */
213:   PC_IS*  pcis     = (PC_IS*)(pc->data);
214:   PC_NN*       pcnn     = (PC_NN*)pc->data;
215:   int          n_neigh  = pcis->n_neigh;
216:   int*         neigh    = pcis->neigh;
217:   int*         n_shared = pcis->n_shared;
218:   int**        shared   = pcis->shared;
219:   Scalar**     DZ_IN;   /* Must be initialized after memory allocation. */


223:   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
224:   PetscMalloc((n_neigh*n_neigh+1)*sizeof(Scalar),&mat);

226:   /* Allocate memory for DZ */
227:   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
228:   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
229:   {
230:     int size_of_Z = 0;
231:     ierr  = PetscMalloc ((n_neigh+1)*sizeof(Scalar*),&pcnn->DZ_IN);
232:     DZ_IN = pcnn->DZ_IN;
233:     ierr  = PetscMalloc ((n_neigh+1)*sizeof(Scalar*),&DZ_OUT);
234:     for (i=0; i<n_neigh; i++) {
235:       size_of_Z += n_shared[i];
236:     }
237:     PetscMalloc ((size_of_Z+1)*sizeof(Scalar),&DZ_IN[0]);
238:     PetscMalloc ((size_of_Z+1)*sizeof(Scalar),&DZ_OUT[0]);
239:   }
240:   for (i=1; i<n_neigh; i++) {
241:     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
242:     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
243:   }

245:   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
246:   /* First, set the auxiliary array pcis->work_N. */
247:   PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
248:   for (i=1; i<n_neigh; i++){
249:     for (j=0; j<n_shared[i]; j++) {
250:       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
251:     }
252:   }

254:   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
255:   /* Notice that send_request[] and recv_request[] could have one less element. */
256:   /* We make them longer to have request[i] corresponding to neigh[i].          */
257:   {
258:     int tag;
259:     PetscObjectGetNewTag((PetscObject)pc,&tag);
260:     PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);
261:     recv_request = send_request + (n_neigh);
262:     for (i=1; i<n_neigh; i++) {
263:       MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));
264:       MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));
265:     }
266:   }

268:   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
269:   for(j=0; j<n_shared[0]; j++) {
270:     DZ_IN[0][j] = pcis->work_N[shared[0][j]];
271:   }

273:   /* Start computing with local D*Z while communication goes on.    */
274:   /* Apply Schur complement. The result is "stored" in vec (more    */
275:   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
276:   /* and also scattered to pcnn->work_N.                            */
277:   PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
278:                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);

280:   /* Compute the first column, while completing the receiving. */
281:   for (i=0; i<n_neigh; i++) {
282:     MPI_Status stat;
283:     int ind;
284:     if (i==0) { ind = 0; }
285:     else { MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
286:     mat[ind*n_neigh+0] = 0.0;
287:     for (k=0; k<n_shared[ind]; k++) {
288:       mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
289:     }
290:   }

292:   /* Compute the remaining of the columns */
293:   for (j=1; j<n_neigh; j++) {
294:     PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
295:                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
296:     for (i=0; i<n_neigh; i++) {
297:       mat[i*n_neigh+j] = 0.0;
298:       for (k=0; k<n_shared[i]; k++) {
299:         mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
300:       }
301:     }
302:   }

304:   /* Complete the sending. */
305:   if (n_neigh>1) {
306:     MPI_Status *stat;
307:     PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);
308:     MPI_Waitall(n_neigh-1,&(send_request[1]),stat);
309:     PetscFree(stat);
310:   }

312:   /* Free the memory for the MPI requests */
313:   PetscFree(send_request);

315:   /* Free the memory for DZ_OUT */
316:   if (DZ_OUT) {
317:     if (DZ_OUT[0]) { PetscFree(DZ_OUT[0]); }
318:     PetscFree(DZ_OUT);
319:   }

321:   {
322:     int size,n_neigh_m1;
323:     MPI_Comm_size(pc->comm,&size);
324:     n_neigh_m1 = (n_neigh) ? n_neigh-1 : 0;
325:     /* Create the global coarse vectors (rhs and solution). */
326:     VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));
327:     VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
328:     /* Create and set the global coarse matrix. */
329:     MatCreateMPIAIJ(pc->comm,1,1,size,size,1,PETSC_NULL,n_neigh_m1,PETSC_NULL,&(pcnn->coarse_mat));
330:     MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
331:     MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
332:     MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
333:   }

335:   {
336:     int rank;
337:     Scalar one = 1.0;
338:     IS is;
339:     MPI_Comm_rank(pc->comm,&rank);
340:     /* "Zero out" rows of not-purely-Neumann subdomains */
341:     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
342:       ISCreateStride(pc->comm,0,0,0,&is);
343:     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
344:       ISCreateStride(pc->comm,1,rank,0,&is);
345:     }
346:     MatZeroRows(pcnn->coarse_mat,is,&one);
347:     ISDestroy(is);
348:   }

350:   /* Create the coarse linear solver context */
351:   {
352:     PC pc_ctx, inner_pc;
353:     KSP ksp_ctx;
354:     SLESCreate(pc->comm,&pcnn->sles_coarse);
355:     SLESSetOperators(pcnn->sles_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);
356:     SLESGetKSP(pcnn->sles_coarse,&ksp_ctx);
357:     SLESGetPC(pcnn->sles_coarse,&pc_ctx);
358:     PCSetType(pc_ctx,PCREDUNDANT);
359:     KSPSetType(ksp_ctx,KSPPREONLY);
360:     PCRedundantGetPC(pc_ctx,&inner_pc);
361:     PCSetType(inner_pc,PCLU);
362:     SLESSetOptionsPrefix(pcnn->sles_coarse,"coarse_");
363:     SLESSetFromOptions(pcnn->sles_coarse);
364:     /* the vectors in the following line are dummy arguments, just telling the SLES the vector size. Values are not used */
365:     SLESSetUp(pcnn->sles_coarse,pcnn->coarse_x,pcnn->coarse_b);
366:   }

368:   /* Free the memory for mat */
369:   PetscFree(mat);

371:   /* for DEBUGGING, save the coarse matrix to a file. */
372:   {
373:     PetscTruth flg;
374:     PetscOptionsHasName(PETSC_NULL,"-save_coarse_matrix",&flg);
375:     if (flg) {
376:       PetscViewer viewer;
377:       PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
378:       PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
379:       MatView(pcnn->coarse_mat,viewer);
380:       PetscViewerDestroy(viewer);
381:     }
382:   }

384:   /*  Set the variable pcnn->factor_coarse_rhs. */
385:   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;

387:   /* See historical note 02, at the bottom of this file. */

389:   return(0);
390: }

392: /* -------------------------------------------------------------------------- */
393: /*
394:    PCNNApplySchurToChunk - 

396:    Input parameters:
397: .  pcnn
398: .  n - size of chunk
399: .  idx - indices of chunk
400: .  chunk - values

402:    Output parameters:
403: .  array_N - result of Schur complement applied to chunk, scattered to big array
404: .  vec1_B  - result of Schur complement applied to chunk
405: .  vec2_B  - garbage (used as work space)
406: .  vec1_D  - garbage (used as work space)
407: .  vec2_D  - garbage (used as work space)

409: */
410: int PCNNApplySchurToChunk(PC pc, int n, int* idx, Scalar *chunk, Scalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
411: {
412:   int i, ierr;

414:   PC_IS *pcis = (PC_IS*)(pc->data);


418:   PetscMemzero((void*)array_N, pcis->n*sizeof(Scalar));
419:   for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
420:   PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
421:   PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
422:   PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);

424:   return(0);
425: }

427: /* -------------------------------------------------------------------------- */
428: /*
429:    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e., 
430:                                       the preconditioner for the Schur complement.

432:    Input parameter:
433: .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.

435:    Output parameters:
436: .  z - global vector of interior and interface nodes. The values on the interface are the result of
437:        the application of the interface preconditioner to the interface part of r. The values on the
438:        interior nodes are garbage.
439: .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
440: .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
441: .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
442: .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
443: .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
444: .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
445: .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
446: .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)

448: */
449: int PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, Scalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
450:                                       Vec vec2_D, Vec vec1_N, Vec vec2_N)
451: {

454:   PC_IS*  pcis = (PC_IS*)(pc->data);


458:   /*
459:     First balancing step.
460:   */
461:   {
462:     PetscTruth flg;
463:     PetscOptionsHasName(PETSC_NULL,"-turn_off_first_balancing",&flg);
464:     if (!flg) {
465:       PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
466:     } else {
467:       VecCopy(r,z);
468:     }
469:   }

471:   /*
472:     Extract the local interface part of z and scale it by D 
473:   */
474:   VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
475:   VecScatterEnd  (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
476:   VecPointwiseMult(pcis->D,vec1_B,vec2_B);

478:   /* Neumann Solver */
479:   PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);

481:   /*
482:     Second balancing step.
483:   */
484:   {
485:     PetscTruth flg;
486:     PetscOptionsHasName(PETSC_NULL,"-turn_off_second_balancing",&flg);
487:     if (!flg) {
488:       PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
489:     } else {
490:       Scalar zero = 0.0;
491:       VecPointwiseMult(pcis->D,vec1_B,vec2_B);
492:       VecSet(&zero,z);
493:       VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
494:       VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
495:     }
496:   }

498:   return(0);
499: }

501: /* -------------------------------------------------------------------------- */
502: /*
503:    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
504:                    input argument u is provided), or s, as given in equations
505:                    (12) and (13), if the input argument u is a null vector.
506:                    Notice that the input argument u plays the role of u_i in
507:                    equation (14). The equation numbers refer to [Man93].

509:    Input Parameters:
510: .  pcnn - NN preconditioner context.
511: .  r - MPI vector of all nodes (interior and interface). It's preserved.
512: .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.

514:    Output Parameters:
515: .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
516: .  vec1_B - Sequential vector of local interface nodes. Workspace.
517: .  vec2_B - Sequential vector of local interface nodes. Workspace.
518: .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
519: .  vec1_D - Sequential vector of local interior nodes. Workspace.
520: .  vec2_D - Sequential vector of local interior nodes. Workspace.
521: .  work_N - Array of all local nodes (interior and interface). Workspace.

523: */
524: int PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
525:                    Vec vec1_D, Vec vec2_D, Scalar *work_N)
526: {
527:   int         k, ierr, its;
528:   Scalar      zero     =  0.0;
529:   Scalar      m_one    = -1.0;
530:   Scalar      value;
531:   Scalar*     lambda;
532:   PC_NN*      pcnn     = (PC_NN*)(pc->data);
533:   PC_IS* pcis = (PC_IS*)(pc->data);

536:   PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);

538:   if (u) {
539:     if (!vec3_B) { vec3_B = u; }
540:     VecPointwiseMult(pcis->D,u,vec1_B);
541:     VecSet(&zero,z);
542:     VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
543:     VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
544:     VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
545:     VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
546:     PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
547:     VecScale(&m_one,vec3_B);
548:     VecCopy(r,z);
549:     VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
550:     VecScatterEnd  (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
551:   } else {
552:     VecCopy(r,z);
553:   }
554:   VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
555:   VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
556:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
557:   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
558:   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
559:   {
560:     int rank;
561:     MPI_Comm_rank(pc->comm,&rank);
562:     VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES)
563:     /*
564:        Since we are only inserting local values (one value actually) we don't need to do the 
565:        reduction that tells us there is no data that needs to be moved. Hence we comment out these
566:        VecAssemblyBegin(pcnn->coarse_b); 
567:        VecAssemblyEnd  (pcnn->coarse_b);
568:     */
569:   }
570:   SLESSolve(pcnn->sles_coarse,pcnn->coarse_b,pcnn->coarse_x,&its);
571:   if (!u) { VecScale(&m_one,pcnn->coarse_x); }
572:   VecGetArray(pcnn->coarse_x,&lambda);
573:   for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
574:   VecRestoreArray(pcnn->coarse_x,&lambda);
575:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
576:   VecSet(&zero,z);
577:   VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
578:   VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
579:   if (!u) {
580:     VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
581:     VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
582:     PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
583:     VecCopy(r,z);
584:   }
585:   VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
586:   VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
587:   PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);

589:   return(0);
590: }




595: /*  -------   E N D   O F   T H E   C O D E   -------  */
596: /*                                                     */
597: /*  From now on, "footnotes" (or "historical notes").  */
598: /*                                                     */
599: /*  -------------------------------------------------  */


602: #ifdef __HISTORICAL_NOTES___do_not_compile__

604: /* --------------------------------------------------------------------------
605:    Historical note 01 
606:    -------------------------------------------------------------------------- */
607: /*
608:    We considered the possibility of an alternative D_i that would still
609:    provide a partition of unity (i.e., $ sum_i  N_i D_i N_i^T = I $).
610:    The basic principle was still the pseudo-inverse of the counting
611:    function; the difference was that we would not count subdomains
612:    that do not contribute to the coarse space (i.e., not pure-Neumann
613:    subdomains).

615:    This turned out to be a bad idea:  we would solve trivial Neumann
616:    problems in the not pure-Neumann subdomains, since we would be scaling
617:    the balanced residual by zero.
618: */

620:     {
621:       PetscTruth flg;
622:       PetscOptionsHasName(PETSC_NULL,"-pcnn_new_scaling",&flg);
623:       if (flg) {
624:         Vec    counter;
625:         Scalar one=1.0, zero=0.0;
626:         VecDuplicate(pc->vec,&counter);
627:         VecSet(&zero,counter);
628:         if (pcnn->pure_neumann) {
629:           VecSet(&one,pcnn->D);
630:         } else {
631:           VecSet(&zero,pcnn->D);
632:         }
633:         VecScatterBegin(pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
634:         VecScatterEnd  (pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
635:         VecScatterBegin(counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
636:         VecScatterEnd  (counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
637:         VecDestroy(counter);
638:         if (pcnn->pure_neumann) {
639:           VecReciprocal(pcnn->D);
640:         } else {
641:           VecSet(&zero,pcnn->D);
642:         }
643:       }
644:     }



648: /* --------------------------------------------------------------------------
649:    Historical note 02 
650:    -------------------------------------------------------------------------- */
651: /*
652:    We tried an alternative coarse problem, that would eliminate exactly a
653:    constant error. Turned out not to improve the overall convergence.
654: */

656:   /*  Set the variable pcnn->factor_coarse_rhs. */
657:   {
658:     PetscTruth flg;
659:     PetscOptionsHasName(PETSC_NULL,"-enforce_preserving_constants",&flg);
660:     if (!flg) { pcnn->factor_coarse_rhs = (pcnn->pure_neumann) ? 1.0 : 0.0; }
661:     else {
662:       Scalar zero = 0.0, one = 1.0;
663:       VecSet(&one,pcnn->vec1_B);
664:       ApplySchurComplement(pcnn,pcnn->vec1_B,pcnn->vec2_B,(Vec)0,pcnn->vec1_D,pcnn->vec2_D);
665:       VecSet(&zero,pcnn->vec1_global);
666:       VecScatterBegin(pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
667:       VecScatterEnd  (pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
668:       VecScatterBegin(pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
669:       VecScatterEnd  (pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
670:       if (pcnn->pure_neumann) { pcnn->factor_coarse_rhs = 1.0; }
671:       else {
672:         ScatterArrayNToVecB(pcnn->work_N,pcnn->vec1_B,INSERT_VALUES,SCATTER_REVERSE,pcnn);
673:         for (k=0, pcnn->factor_coarse_rhs=0.0; k<pcnn->n_shared[0]; k++) {
674:           pcnn->factor_coarse_rhs += pcnn->work_N[pcnn->shared[0][k]] * pcnn->DZ_IN[0][k];
675:         }
676:         if (pcnn->factor_coarse_rhs) { pcnn->factor_coarse_rhs = 1.0 / pcnn->factor_coarse_rhs; }
677:         else { SETERRQ(1,"Constants cannot be preserved. Remove "-enforce_preserving_constants" option."); }
678:       }
679:     }
680:   }

682: #endif /* __HISTORICAL_NOTES___do_not_compile */