Actual source code: nn.c
1: /*$Id: nn.c,v 1.13 2001/08/07 03:03:41 balay Exp $*/
3: #include src/sles/pc/impls/is/nn/nn.h
5: /* -------------------------------------------------------------------------- */
6: /*
7: PCSetUp_NN - Prepares for the use of the NN preconditioner
8: by setting data structures and options.
10: Input Parameter:
11: . pc - the preconditioner context
13: Application Interface Routine: PCSetUp()
15: Notes:
16: The interface routine PCSetUp() is not usually called directly by
17: the user, but instead is called by PCApply() if necessary.
18: */
19: #undef __FUNCT__
21: static int PCSetUp_NN(PC pc)
22: {
24:
26: if (!pc->setupcalled) {
27: /* Set up all the "iterative substructuring" common block */
28: PCISSetUp(pc);
29: /* Create the coarse matrix. */
30: PCNNCreateCoarseMatrix(pc);
31: }
32: return(0);
33: }
35: /* -------------------------------------------------------------------------- */
36: /*
37: PCApply_NN - Applies the NN preconditioner to a vector.
39: Input Parameters:
40: . pc - the preconditioner context
41: . r - input vector (global)
43: Output Parameter:
44: . z - output vector (global)
46: Application Interface Routine: PCApply()
47: */
48: #undef __FUNCT__
50: static int PCApply_NN(PC pc,Vec r,Vec z)
51: {
52: PC_IS *pcis = (PC_IS*)(pc->data);
53: int ierr,its;
54: PetscScalar m_one = -1.0;
55: Vec w = pcis->vec1_global;
59: /*
60: Dirichlet solvers.
61: Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
62: Storing the local results at vec2_D
63: */
64: VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
65: VecScatterEnd (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
66: SLESSolve(pcis->sles_D,pcis->vec1_D,pcis->vec2_D,&its);
67:
68: /*
69: Computing $ r_B - sum_j tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
70: Storing the result in the interface portion of the global vector w.
71: */
72: MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
73: VecScale(&m_one,pcis->vec1_B);
74: VecCopy(r,w);
75: VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
76: VecScatterEnd (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
78: /*
79: Apply the interface preconditioner
80: */
81: PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
82: pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);
84: /*
85: Computing $ t_I^{(i)} = A_{IB}^{(i)} tilde R_i z_B $
86: The result is stored in vec1_D.
87: */
88: VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
89: VecScatterEnd (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
90: MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);
92: /*
93: Dirichlet solvers.
94: Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
95: $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
96: */
97: VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
98: VecScatterEnd (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
99: SLESSolve(pcis->sles_D,pcis->vec1_D,pcis->vec2_D,&its);
100: VecScale(&m_one,pcis->vec2_D);
101: VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
102: VecScatterEnd (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
104: return(0);
105: }
107: /* -------------------------------------------------------------------------- */
108: /*
109: PCDestroy_NN - Destroys the private context for the NN preconditioner
110: that was created with PCCreate_NN().
112: Input Parameter:
113: . pc - the preconditioner context
115: Application Interface Routine: PCDestroy()
116: */
117: #undef __FUNCT__
119: static int PCDestroy_NN(PC pc)
120: {
121: PC_NN *pcnn = (PC_NN*)pc->data;
122: int ierr;
126: PCISDestroy(pc);
128: if (pcnn->coarse_mat) {MatDestroy(pcnn->coarse_mat);}
129: if (pcnn->coarse_x) {VecDestroy(pcnn->coarse_x);}
130: if (pcnn->coarse_b) {VecDestroy(pcnn->coarse_b);}
131: if (pcnn->sles_coarse) {SLESDestroy(pcnn->sles_coarse);}
132: if (pcnn->DZ_IN) {
133: if (pcnn->DZ_IN[0]) {PetscFree(pcnn->DZ_IN[0]);}
134: PetscFree(pcnn->DZ_IN);
135: }
137: /*
138: Free the private data structure that was hanging off the PC
139: */
140: PetscFree(pcnn);
141: return(0);
142: }
144: /* -------------------------------------------------------------------------- */
145: /*
146: PCCreate_NN - Creates a NN preconditioner context, PC_NN,
147: and sets this as the private data within the generic preconditioning
148: context, PC, that was created within PCCreate().
150: Input Parameter:
151: . pc - the preconditioner context
153: Application Interface Routine: PCCreate()
154: */
155: EXTERN_C_BEGIN
156: #undef __FUNCT__
158: int PCCreate_NN(PC pc)
159: {
160: int ierr;
161: PC_NN *pcnn;
165: /*
166: Creates the private data structure for this preconditioner and
167: attach it to the PC object.
168: */
169: ierr = PetscNew(PC_NN,&pcnn);
170: pc->data = (void*)pcnn;
172: /*
173: Logs the memory usage; this is not needed but allows PETSc to
174: monitor how much memory is being used for various purposes.
175: */
176: PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */
178: PCISCreate(pc);
179: pcnn->coarse_mat = 0;
180: pcnn->coarse_x = 0;
181: pcnn->coarse_b = 0;
182: pcnn->sles_coarse = 0;
183: pcnn->DZ_IN = 0;
185: /*
186: Set the pointers for the functions that are provided above.
187: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
188: are called, they will automatically call these functions. Note we
189: choose not to provide a couple of these functions since they are
190: not needed.
191: */
192: pc->ops->apply = PCApply_NN;
193: pc->ops->applytranspose = 0;
194: pc->ops->setup = PCSetUp_NN;
195: pc->ops->destroy = PCDestroy_NN;
196: pc->ops->view = 0;
197: pc->ops->applyrichardson = 0;
198: pc->ops->applysymmetricleft = 0;
199: pc->ops->applysymmetricright = 0;
201: return(0);
202: }
203: EXTERN_C_END
206: /* -------------------------------------------------------------------------- */
207: /*
208: PCNNCreateCoarseMatrix -
209: */
210: #undef __FUNCT__
212: int PCNNCreateCoarseMatrix (PC pc)
213: {
214: MPI_Request *send_request, *recv_request;
215: int i, j, k, ierr;
217: PetscScalar* mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */
218: PetscScalar** DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
220: /* aliasing some names */
221: PC_IS* pcis = (PC_IS*)(pc->data);
222: PC_NN* pcnn = (PC_NN*)pc->data;
223: int n_neigh = pcis->n_neigh;
224: int* neigh = pcis->neigh;
225: int* n_shared = pcis->n_shared;
226: int** shared = pcis->shared;
227: PetscScalar** DZ_IN; /* Must be initialized after memory allocation. */
231: /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
232: PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);
234: /* Allocate memory for DZ */
235: /* Notice that DZ_OUT[0] is allocated some space that is never used. */
236: /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
237: {
238: int size_of_Z = 0;
239: ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);
240: DZ_IN = pcnn->DZ_IN;
241: ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);
242: for (i=0; i<n_neigh; i++) {
243: size_of_Z += n_shared[i];
244: }
245: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);
246: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);
247: }
248: for (i=1; i<n_neigh; i++) {
249: DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1];
250: DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
251: }
253: /* Set the values of DZ_OUT, in order to send this info to the neighbours */
254: /* First, set the auxiliary array pcis->work_N. */
255: PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
256: for (i=1; i<n_neigh; i++){
257: for (j=0; j<n_shared[i]; j++) {
258: DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
259: }
260: }
262: /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
263: /* Notice that send_request[] and recv_request[] could have one less element. */
264: /* We make them longer to have request[i] corresponding to neigh[i]. */
265: {
266: int tag;
267: PetscObjectGetNewTag((PetscObject)pc,&tag);
268: PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);
269: recv_request = send_request + (n_neigh);
270: for (i=1; i<n_neigh; i++) {
271: MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));
272: MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));
273: }
274: }
276: /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
277: for(j=0; j<n_shared[0]; j++) {
278: DZ_IN[0][j] = pcis->work_N[shared[0][j]];
279: }
281: /* Start computing with local D*Z while communication goes on. */
282: /* Apply Schur complement. The result is "stored" in vec (more */
283: /* precisely, vec points to the result, stored in pc_nn->vec1_B) */
284: /* and also scattered to pcnn->work_N. */
285: PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
286: pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
288: /* Compute the first column, while completing the receiving. */
289: for (i=0; i<n_neigh; i++) {
290: MPI_Status stat;
291: int ind=0;
292: if (i>0) { MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
293: mat[ind*n_neigh+0] = 0.0;
294: for (k=0; k<n_shared[ind]; k++) {
295: mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
296: }
297: }
299: /* Compute the remaining of the columns */
300: for (j=1; j<n_neigh; j++) {
301: PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
302: pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
303: for (i=0; i<n_neigh; i++) {
304: mat[i*n_neigh+j] = 0.0;
305: for (k=0; k<n_shared[i]; k++) {
306: mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
307: }
308: }
309: }
311: /* Complete the sending. */
312: if (n_neigh>1) {
313: MPI_Status *stat;
314: PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);
315: MPI_Waitall(n_neigh-1,&(send_request[1]),stat);
316: PetscFree(stat);
317: }
319: /* Free the memory for the MPI requests */
320: PetscFree(send_request);
322: /* Free the memory for DZ_OUT */
323: if (DZ_OUT) {
324: if (DZ_OUT[0]) { PetscFree(DZ_OUT[0]); }
325: PetscFree(DZ_OUT);
326: }
328: {
329: int size,n_neigh_m1;
330: MPI_Comm_size(pc->comm,&size);
331: n_neigh_m1 = (n_neigh) ? n_neigh-1 : 0;
332: /* Create the global coarse vectors (rhs and solution). */
333: VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));
334: VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
335: /* Create and set the global coarse matrix. */
336: MatCreateMPIAIJ(pc->comm,1,1,size,size,1,PETSC_NULL,n_neigh_m1,PETSC_NULL,&(pcnn->coarse_mat));
337: MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
338: MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
339: MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
340: }
342: {
343: int rank;
344: PetscScalar one = 1.0;
345: IS is;
346: MPI_Comm_rank(pc->comm,&rank);
347: /* "Zero out" rows of not-purely-Neumann subdomains */
348: if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
349: ISCreateStride(pc->comm,0,0,0,&is);
350: } else { /* here it DOES zero the row, since it's not a floating subdomain. */
351: ISCreateStride(pc->comm,1,rank,0,&is);
352: }
353: MatZeroRows(pcnn->coarse_mat,is,&one);
354: ISDestroy(is);
355: }
357: /* Create the coarse linear solver context */
358: {
359: PC pc_ctx, inner_pc;
360: KSP ksp_ctx;
361: SLESCreate(pc->comm,&pcnn->sles_coarse);
362: SLESSetOperators(pcnn->sles_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);
363: SLESGetKSP(pcnn->sles_coarse,&ksp_ctx);
364: SLESGetPC(pcnn->sles_coarse,&pc_ctx);
365: PCSetType(pc_ctx,PCREDUNDANT);
366: KSPSetType(ksp_ctx,KSPPREONLY);
367: PCRedundantGetPC(pc_ctx,&inner_pc);
368: PCSetType(inner_pc,PCLU);
369: SLESSetOptionsPrefix(pcnn->sles_coarse,"coarse_");
370: SLESSetFromOptions(pcnn->sles_coarse);
371: /* the vectors in the following line are dummy arguments, just telling the SLES the vector size. Values are not used */
372: SLESSetUp(pcnn->sles_coarse,pcnn->coarse_x,pcnn->coarse_b);
373: }
375: /* Free the memory for mat */
376: PetscFree(mat);
378: /* for DEBUGGING, save the coarse matrix to a file. */
379: {
380: PetscTruth flg;
381: PetscOptionsHasName(PETSC_NULL,"-save_coarse_matrix",&flg);
382: if (flg) {
383: PetscViewer viewer;
384: PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
385: PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
386: MatView(pcnn->coarse_mat,viewer);
387: PetscViewerDestroy(viewer);
388: }
389: }
391: /* Set the variable pcnn->factor_coarse_rhs. */
392: pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
394: /* See historical note 02, at the bottom of this file. */
396: return(0);
397: }
399: /* -------------------------------------------------------------------------- */
400: /*
401: PCNNApplySchurToChunk -
403: Input parameters:
404: . pcnn
405: . n - size of chunk
406: . idx - indices of chunk
407: . chunk - values
409: Output parameters:
410: . array_N - result of Schur complement applied to chunk, scattered to big array
411: . vec1_B - result of Schur complement applied to chunk
412: . vec2_B - garbage (used as work space)
413: . vec1_D - garbage (used as work space)
414: . vec2_D - garbage (used as work space)
416: */
417: #undef __FUNCT__
419: int PCNNApplySchurToChunk(PC pc, int n, int* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
420: {
421: int i, ierr;
422: PC_IS *pcis = (PC_IS*)(pc->data);
426: PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));
427: for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
428: PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
429: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
430: PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);
432: return(0);
433: }
435: /* -------------------------------------------------------------------------- */
436: /*
437: PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
438: the preconditioner for the Schur complement.
440: Input parameter:
441: . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
443: Output parameters:
444: . z - global vector of interior and interface nodes. The values on the interface are the result of
445: the application of the interface preconditioner to the interface part of r. The values on the
446: interior nodes are garbage.
447: . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
448: . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
449: . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
450: . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
451: . vec1_D - vector of local interior nodes; returns garbage (used as work space)
452: . vec2_D - vector of local interior nodes; returns garbage (used as work space)
453: . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
454: . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
456: */
457: #undef __FUNCT__
459: int PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
460: Vec vec2_D, Vec vec1_N, Vec vec2_N)
461: {
462: int ierr;
463: PC_IS* pcis = (PC_IS*)(pc->data);
467: /*
468: First balancing step.
469: */
470: {
471: PetscTruth flg;
472: PetscOptionsHasName(PETSC_NULL,"-turn_off_first_balancing",&flg);
473: if (!flg) {
474: PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
475: } else {
476: VecCopy(r,z);
477: }
478: }
480: /*
481: Extract the local interface part of z and scale it by D
482: */
483: VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
484: VecScatterEnd (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
485: VecPointwiseMult(pcis->D,vec1_B,vec2_B);
487: /* Neumann Solver */
488: PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);
490: /*
491: Second balancing step.
492: */
493: {
494: PetscTruth flg;
495: PetscOptionsHasName(PETSC_NULL,"-turn_off_second_balancing",&flg);
496: if (!flg) {
497: PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
498: } else {
499: PetscScalar zero = 0.0;
500: VecPointwiseMult(pcis->D,vec1_B,vec2_B);
501: VecSet(&zero,z);
502: VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
503: VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
504: }
505: }
507: return(0);
508: }
510: /* -------------------------------------------------------------------------- */
511: /*
512: PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
513: input argument u is provided), or s, as given in equations
514: (12) and (13), if the input argument u is a null vector.
515: Notice that the input argument u plays the role of u_i in
516: equation (14). The equation numbers refer to [Man93].
518: Input Parameters:
519: . pcnn - NN preconditioner context.
520: . r - MPI vector of all nodes (interior and interface). It's preserved.
521: . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
523: Output Parameters:
524: . z - MPI vector of interior and interface nodes. Returns s or z (see description above).
525: . vec1_B - Sequential vector of local interface nodes. Workspace.
526: . vec2_B - Sequential vector of local interface nodes. Workspace.
527: . vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
528: . vec1_D - Sequential vector of local interior nodes. Workspace.
529: . vec2_D - Sequential vector of local interior nodes. Workspace.
530: . work_N - Array of all local nodes (interior and interface). Workspace.
532: */
533: #undef __FUNCT__
535: int PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
536: Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
537: {
538: int k, ierr, its;
539: PetscScalar zero = 0.0;
540: PetscScalar m_one = -1.0;
541: PetscScalar value;
542: PetscScalar* lambda;
543: PC_NN* pcnn = (PC_NN*)(pc->data);
544: PC_IS* pcis = (PC_IS*)(pc->data);
547: PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);
549: if (u) {
550: if (!vec3_B) { vec3_B = u; }
551: VecPointwiseMult(pcis->D,u,vec1_B);
552: VecSet(&zero,z);
553: VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
554: VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
555: VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
556: VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
557: PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
558: VecScale(&m_one,vec3_B);
559: VecCopy(r,z);
560: VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
561: VecScatterEnd (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
562: } else {
563: VecCopy(r,z);
564: }
565: VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
566: VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
567: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
568: for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
569: value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */
570: {
571: int rank;
572: MPI_Comm_rank(pc->comm,&rank);
573: VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);
574: /*
575: Since we are only inserting local values (one value actually) we don't need to do the
576: reduction that tells us there is no data that needs to be moved. Hence we comment out these
577: VecAssemblyBegin(pcnn->coarse_b);
578: VecAssemblyEnd (pcnn->coarse_b);
579: */
580: }
581: SLESSolve(pcnn->sles_coarse,pcnn->coarse_b,pcnn->coarse_x,&its);
582: if (!u) { VecScale(&m_one,pcnn->coarse_x); }
583: VecGetArray(pcnn->coarse_x,&lambda);
584: for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
585: VecRestoreArray(pcnn->coarse_x,&lambda);
586: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
587: VecSet(&zero,z);
588: VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
589: VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
590: if (!u) {
591: VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
592: VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
593: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
594: VecCopy(r,z);
595: }
596: VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
597: VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
598: PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);
600: return(0);
601: }
603: #undef __FUNCT__
607: /* ------- E N D O F T H E C O D E ------- */
608: /* */
609: /* From now on, "footnotes" (or "historical notes"). */
610: /* */
611: /* ------------------------------------------------- */
614: #ifdef __HISTORICAL_NOTES___do_not_compile__
616: /* --------------------------------------------------------------------------
617: Historical note 01
618: -------------------------------------------------------------------------- */
619: /*
620: We considered the possibility of an alternative D_i that would still
621: provide a partition of unity (i.e., $ sum_i N_i D_i N_i^T = I $).
622: The basic principle was still the pseudo-inverse of the counting
623: function; the difference was that we would not count subdomains
624: that do not contribute to the coarse space (i.e., not pure-Neumann
625: subdomains).
627: This turned out to be a bad idea: we would solve trivial Neumann
628: problems in the not pure-Neumann subdomains, since we would be scaling
629: the balanced residual by zero.
630: */
632: {
633: PetscTruth flg;
634: PetscOptionsHasName(PETSC_NULL,"-pcnn_new_scaling",&flg);
635: if (flg) {
636: Vec counter;
637: PetscScalar one=1.0, zero=0.0;
638: VecDuplicate(pc->vec,&counter);
639: VecSet(&zero,counter);
640: if (pcnn->pure_neumann) {
641: VecSet(&one,pcnn->D);
642: } else {
643: VecSet(&zero,pcnn->D);
644: }
645: VecScatterBegin(pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
646: VecScatterEnd (pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
647: VecScatterBegin(counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
648: VecScatterEnd (counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
649: VecDestroy(counter);
650: if (pcnn->pure_neumann) {
651: VecReciprocal(pcnn->D);
652: } else {
653: VecSet(&zero,pcnn->D);
654: }
655: }
656: }
660: /* --------------------------------------------------------------------------
661: Historical note 02
662: -------------------------------------------------------------------------- */
663: /*
664: We tried an alternative coarse problem, that would eliminate exactly a
665: constant error. Turned out not to improve the overall convergence.
666: */
668: /* Set the variable pcnn->factor_coarse_rhs. */
669: {
670: PetscTruth flg;
671: PetscOptionsHasName(PETSC_NULL,"-enforce_preserving_constants",&flg);
672: if (!flg) { pcnn->factor_coarse_rhs = (pcnn->pure_neumann) ? 1.0 : 0.0; }
673: else {
674: PetscScalar zero = 0.0, one = 1.0;
675: VecSet(&one,pcnn->vec1_B);
676: ApplySchurComplement(pcnn,pcnn->vec1_B,pcnn->vec2_B,(Vec)0,pcnn->vec1_D,pcnn->vec2_D);
677: VecSet(&zero,pcnn->vec1_global);
678: VecScatterBegin(pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
679: VecScatterEnd (pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
680: VecScatterBegin(pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
681: VecScatterEnd (pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
682: if (pcnn->pure_neumann) { pcnn->factor_coarse_rhs = 1.0; }
683: else {
684: ScatterArrayNToVecB(pcnn->work_N,pcnn->vec1_B,INSERT_VALUES,SCATTER_REVERSE,pcnn);
685: for (k=0, pcnn->factor_coarse_rhs=0.0; k<pcnn->n_shared[0]; k++) {
686: pcnn->factor_coarse_rhs += pcnn->work_N[pcnn->shared[0][k]] * pcnn->DZ_IN[0][k];
687: }
688: if (pcnn->factor_coarse_rhs) { pcnn->factor_coarse_rhs = 1.0 / pcnn->factor_coarse_rhs; }
689: else { SETERRQ(1,"Constants cannot be preserved. Remove "-enforce_preserving_constants" option."); }
690: }
691: }
692: }
694: #endif /* __HISTORICAL_NOTES___do_not_compile */