File: | ksp/pc/impls/tfs/gs.c |
Warning: | line 723, column 43 The left operand of '*' is a garbage value |
[?] Use j/k keys for keyboard navigation
1 | ||||
2 | /***********************************gs.c*************************************** | |||
3 | ||||
4 | Author: Henry M. Tufo III | |||
5 | ||||
6 | e-mail: [email protected] | |||
7 | ||||
8 | snail-mail: | |||
9 | Division of Applied Mathematics | |||
10 | Brown University | |||
11 | Providence, RI 02912 | |||
12 | ||||
13 | Last Modification: | |||
14 | 6.21.97 | |||
15 | ************************************gs.c**************************************/ | |||
16 | ||||
17 | /***********************************gs.c*************************************** | |||
18 | File Description: | |||
19 | ----------------- | |||
20 | ||||
21 | ************************************gs.c**************************************/ | |||
22 | ||||
23 | #include <../src/ksp/pc/impls/tfs/tfs.h> | |||
24 | ||||
25 | /* default length of number of items via tree - doubles if exceeded */ | |||
26 | #define TREE_BUF_SZ2048; 2048; | |||
27 | #define GS_VEC_SZ1 1 | |||
28 | ||||
29 | ||||
30 | ||||
31 | /***********************************gs.c*************************************** | |||
32 | Type: struct gather_scatter_id | |||
33 | ------------------------------ | |||
34 | ||||
35 | ************************************gs.c**************************************/ | |||
36 | typedef struct gather_scatter_id { | |||
37 | PetscInt id; | |||
38 | PetscInt nel_min; | |||
39 | PetscInt nel_max; | |||
40 | PetscInt nel_sum; | |||
41 | PetscInt negl; | |||
42 | PetscInt gl_max; | |||
43 | PetscInt gl_min; | |||
44 | PetscInt repeats; | |||
45 | PetscInt ordered; | |||
46 | PetscInt positive; | |||
47 | PetscScalar *vals; | |||
48 | ||||
49 | /* bit mask info */ | |||
50 | PetscInt *my_proc_mask; | |||
51 | PetscInt mask_sz; | |||
52 | PetscInt *ngh_buf; | |||
53 | PetscInt ngh_buf_sz; | |||
54 | PetscInt *nghs; | |||
55 | PetscInt num_nghs; | |||
56 | PetscInt max_nghs; | |||
57 | PetscInt *pw_nghs; | |||
58 | PetscInt num_pw_nghs; | |||
59 | PetscInt *tree_nghs; | |||
60 | PetscInt num_tree_nghs; | |||
61 | ||||
62 | PetscInt num_loads; | |||
63 | ||||
64 | /* repeats == true -> local info */ | |||
65 | PetscInt nel; /* number of unique elememts */ | |||
66 | PetscInt *elms; /* of size nel */ | |||
67 | PetscInt nel_total; | |||
68 | PetscInt *local_elms; /* of size nel_total */ | |||
69 | PetscInt *companion; /* of size nel_total */ | |||
70 | ||||
71 | /* local info */ | |||
72 | PetscInt num_local_total; | |||
73 | PetscInt local_strength; | |||
74 | PetscInt num_local; | |||
75 | PetscInt *num_local_reduce; | |||
76 | PetscInt **local_reduce; | |||
77 | PetscInt num_local_gop; | |||
78 | PetscInt *num_gop_local_reduce; | |||
79 | PetscInt **gop_local_reduce; | |||
80 | ||||
81 | /* pairwise info */ | |||
82 | PetscInt level; | |||
83 | PetscInt num_pairs; | |||
84 | PetscInt max_pairs; | |||
85 | PetscInt loc_node_pairs; | |||
86 | PetscInt max_node_pairs; | |||
87 | PetscInt min_node_pairs; | |||
88 | PetscInt avg_node_pairs; | |||
89 | PetscInt *pair_list; | |||
90 | PetscInt *msg_sizes; | |||
91 | PetscInt **node_list; | |||
92 | PetscInt len_pw_list; | |||
93 | PetscInt *pw_elm_list; | |||
94 | PetscScalar *pw_vals; | |||
95 | ||||
96 | MPI_Request *msg_ids_in; | |||
97 | MPI_Request *msg_ids_out; | |||
98 | ||||
99 | PetscScalar *out; | |||
100 | PetscScalar *in; | |||
101 | PetscInt msg_total; | |||
102 | ||||
103 | /* tree - crystal accumulator info */ | |||
104 | PetscInt max_left_over; | |||
105 | PetscInt *pre; | |||
106 | PetscInt *in_num; | |||
107 | PetscInt *out_num; | |||
108 | PetscInt **in_list; | |||
109 | PetscInt **out_list; | |||
110 | ||||
111 | /* new tree work*/ | |||
112 | PetscInt tree_nel; | |||
113 | PetscInt *tree_elms; | |||
114 | PetscScalar *tree_buf; | |||
115 | PetscScalar *tree_work; | |||
116 | ||||
117 | PetscInt tree_map_sz; | |||
118 | PetscInt *tree_map_in; | |||
119 | PetscInt *tree_map_out; | |||
120 | ||||
121 | /* current memory status */ | |||
122 | PetscInt gl_bss_min; | |||
123 | PetscInt gl_perm_min; | |||
124 | ||||
125 | /* max segment size for PCTFS_gs_gop_vec() */ | |||
126 | PetscInt vec_sz; | |||
127 | ||||
128 | /* hack to make paul happy */ | |||
129 | MPI_Comm PCTFS_gs_comm; | |||
130 | ||||
131 | } PCTFS_gs_id; | |||
132 | ||||
133 | static PCTFS_gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level); | |||
134 | static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs); | |||
135 | static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs); | |||
136 | static PetscErrorCode set_pairwise(PCTFS_gs_id *gs); | |||
137 | static PCTFS_gs_id *gsi_new(void); | |||
138 | static PetscErrorCode set_tree(PCTFS_gs_id *gs); | |||
139 | ||||
140 | /* same for all but vector flavor */ | |||
141 | static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals); | |||
142 | /* vector flavor */ | |||
143 | static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); | |||
144 | ||||
145 | static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); | |||
146 | static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step); | |||
147 | static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); | |||
148 | static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); | |||
149 | static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step); | |||
150 | ||||
151 | ||||
152 | static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals); | |||
153 | static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals); | |||
154 | ||||
155 | static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); | |||
156 | static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim); | |||
157 | static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim); | |||
158 | ||||
159 | /* global vars */ | |||
160 | /* from comm.c module */ | |||
161 | ||||
162 | static PetscInt num_gs_ids = 0; | |||
163 | ||||
164 | /* should make this dynamic ... later */ | |||
165 | static PetscInt msg_buf =MAX_MSG_BUF32768; | |||
166 | static PetscInt vec_sz =GS_VEC_SZ1; | |||
167 | static PetscInt *tree_buf =NULL((void*)0); | |||
168 | static PetscInt tree_buf_sz=0; | |||
169 | static PetscInt ntree =0; | |||
170 | ||||
171 | /***************************************************************************/ | |||
172 | PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size) | |||
173 | { | |||
174 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 174; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
175 | vec_sz = size; | |||
176 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
177 | } | |||
178 | ||||
179 | /******************************************************************************/ | |||
180 | PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size) | |||
181 | { | |||
182 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 182; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
183 | msg_buf = buf_size; | |||
184 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
185 | } | |||
186 | ||||
187 | /******************************************************************************/ | |||
188 | PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level) | |||
189 | { | |||
190 | PCTFS_gs_id *gs; | |||
191 | MPI_Group PCTFS_gs_group; | |||
192 | MPI_Comm PCTFS_gs_comm; | |||
193 | PetscErrorCode ierr; | |||
194 | ||||
195 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 195; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
196 | /* ensure that communication package has been initialized */ | |||
197 | PCTFS_comm_init(); | |||
198 | ||||
199 | ||||
200 | /* determines if we have enough dynamic/semi-static memory */ | |||
201 | /* checks input, allocs and sets gd_id template */ | |||
202 | gs = gsi_check_args(elms,nel,level); | |||
203 | ||||
204 | /* only bit mask version up and working for the moment */ | |||
205 | /* LATER :: get int list version working for sparse pblms */ | |||
206 | ierr = gsi_via_bit_mask(gs);CHKERRABORT(PETSC_COMM_WORLD,ierr)do {if (__builtin_expect(!!(ierr),0)) {PetscError(((MPI_Comm) 0x44000001),206,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");MPI_Abort(PETSC_COMM_WORLD,ierr );}} while (0); | |||
207 | ||||
208 | ||||
209 | ierr = MPI_Comm_group(MPI_COMM_WORLD((MPI_Comm)0x44000000),&PCTFS_gs_group);CHKERRABORT(PETSC_COMM_WORLD,ierr)do {if (__builtin_expect(!!(ierr),0)) {PetscError(((MPI_Comm) 0x44000001),209,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");MPI_Abort(PETSC_COMM_WORLD,ierr );}} while (0); | |||
210 | ierr = MPI_Comm_create(MPI_COMM_WORLD((MPI_Comm)0x44000000),PCTFS_gs_group,&PCTFS_gs_comm);CHKERRABORT(PETSC_COMM_WORLD,ierr)do {if (__builtin_expect(!!(ierr),0)) {PetscError(((MPI_Comm) 0x44000001),210,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");MPI_Abort(PETSC_COMM_WORLD,ierr );}} while (0); | |||
211 | ierr = MPI_Group_free(&PCTFS_gs_group);CHKERRABORT(PETSC_COMM_WORLD,ierr)do {if (__builtin_expect(!!(ierr),0)) {PetscError(((MPI_Comm) 0x44000001),211,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");MPI_Abort(PETSC_COMM_WORLD,ierr );}} while (0); | |||
212 | ||||
213 | gs->PCTFS_gs_comm=PCTFS_gs_comm; | |||
214 | ||||
215 | return(gs); | |||
216 | } | |||
217 | ||||
218 | /******************************************************************************/ | |||
219 | static PCTFS_gs_id *gsi_new(void) | |||
220 | { | |||
221 | PetscErrorCode ierr; | |||
222 | PCTFS_gs_id *gs; | |||
223 | gs = (PCTFS_gs_id*) malloc(sizeof(PCTFS_gs_id)); | |||
224 | ierr = PetscMemzero(gs,sizeof(PCTFS_gs_id));CHKERRABORT(PETSC_COMM_WORLD,ierr)do {if (__builtin_expect(!!(ierr),0)) {PetscError(((MPI_Comm) 0x44000001),224,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");MPI_Abort(PETSC_COMM_WORLD,ierr );}} while (0); | |||
225 | return(gs); | |||
226 | } | |||
227 | ||||
228 | /******************************************************************************/ | |||
229 | static PCTFS_gs_id *gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level) | |||
230 | { | |||
231 | PetscInt i, j, k, t2; | |||
232 | PetscInt *companion, *elms, *unique, *iptr; | |||
233 | PetscInt num_local=0, *num_to_reduce, **local_reduce; | |||
234 | PetscInt oprs[] = {NON_UNIFORM0,GL_MIN2,GL_MAX1,GL_ADD4,GL_MIN2,GL_MAX1,GL_MIN2,GL_B_AND7}; | |||
235 | PetscInt vals[sizeof(oprs)/sizeof(oprs[0])-1]; | |||
236 | PetscInt work[sizeof(oprs)/sizeof(oprs[0])-1]; | |||
237 | PCTFS_gs_id *gs; | |||
238 | PetscErrorCode ierr; | |||
239 | ||||
240 | ||||
241 | if (!in_elms) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"elms point to nothing!!!\n")do {PetscError(PETSC_COMM_WORLD,241,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,77,PETSC_ERROR_INITIAL,"elms point to nothing!!!\n");MPI_Abort (PETSC_COMM_WORLD,77);} while (0); | |||
242 | if (nel<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"can't have fewer than 0 elms!!!\n")do {PetscError(PETSC_COMM_WORLD,242,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,77,PETSC_ERROR_INITIAL,"can't have fewer than 0 elms!!!\n"); MPI_Abort(PETSC_COMM_WORLD,77);} while (0); | |||
243 | ||||
244 | if (nel==0) { ierr = PetscInfo(0,"I don't have any elements!!!\n")PetscInfo_Private(__func__,0,"I don't have any elements!!!\n" );CHKERRABORT(PETSC_COMM_WORLD,ierr)do {if (__builtin_expect(!!(ierr),0)) {PetscError(((MPI_Comm) 0x44000001),244,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");MPI_Abort(PETSC_COMM_WORLD,ierr );}} while (0); } | |||
245 | ||||
246 | /* get space for gs template */ | |||
247 | gs = gsi_new(); | |||
248 | gs->id = ++num_gs_ids; | |||
249 | ||||
250 | /* hmt 6.4.99 */ | |||
251 | /* caller can set global ids that don't participate to 0 */ | |||
252 | /* PCTFS_gs_init ignores all zeros in elm list */ | |||
253 | /* negative global ids are still invalid */ | |||
254 | for (i=j=0; i<nel; i++) { | |||
255 | if (in_elms[i]!=0) j++; | |||
256 | } | |||
257 | ||||
258 | k=nel; nel=j; | |||
259 | ||||
260 | /* copy over in_elms list and create inverse map */ | |||
261 | elms = (PetscInt*) malloc((nel+1)*sizeof(PetscInt)); | |||
262 | companion = (PetscInt*) malloc(nel*sizeof(PetscInt)); | |||
263 | ||||
264 | for (i=j=0; i<k; i++) { | |||
265 | if (in_elms[i]!=0) { elms[j] = in_elms[i]; companion[j++] = i; } | |||
266 | } | |||
267 | ||||
268 | if (j!=nel) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"nel j mismatch!\n")do {PetscError(PETSC_COMM_WORLD,268,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,77,PETSC_ERROR_INITIAL,"nel j mismatch!\n");MPI_Abort(PETSC_COMM_WORLD ,77);} while (0); | |||
269 | ||||
270 | /* pre-pass ... check to see if sorted */ | |||
271 | elms[nel] = INT_MAX2147483647; | |||
272 | iptr = elms; | |||
273 | unique = elms+1; | |||
274 | j =0; | |||
275 | while (*iptr!=INT_MAX2147483647) { | |||
276 | if (*iptr++>*unique++) { j=1; break; } | |||
277 | } | |||
278 | ||||
279 | /* set up inverse map */ | |||
280 | if (j) { | |||
281 | ierr = PetscInfo(0,"gsi_check_args() :: elm list *not* sorted!\n")PetscInfo_Private(__func__,0,"gsi_check_args() :: elm list *not* sorted!\n" );CHKERRABORT(PETSC_COMM_WORLD,ierr)do {if (__builtin_expect(!!(ierr),0)) {PetscError(((MPI_Comm) 0x44000001),281,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");MPI_Abort(PETSC_COMM_WORLD,ierr );}} while (0); | |||
282 | ierr = PCTFS_SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER1);CHKERRABORT(PETSC_COMM_WORLD,ierr)do {if (__builtin_expect(!!(ierr),0)) {PetscError(((MPI_Comm) 0x44000001),282,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");MPI_Abort(PETSC_COMM_WORLD,ierr );}} while (0); | |||
283 | } else { ierr = PetscInfo(0,"gsi_check_args() :: elm list sorted!\n")PetscInfo_Private(__func__,0,"gsi_check_args() :: elm list sorted!\n" );CHKERRABORT(PETSC_COMM_WORLD,ierr)do {if (__builtin_expect(!!(ierr),0)) {PetscError(((MPI_Comm) 0x44000001),283,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");MPI_Abort(PETSC_COMM_WORLD,ierr );}} while (0); } | |||
284 | elms[nel] = INT_MIN(-2147483647 -1); | |||
285 | ||||
286 | /* first pass */ | |||
287 | /* determine number of unique elements, check pd */ | |||
288 | for (i=k=0; i<nel; i+=j) { | |||
289 | t2 = elms[i]; | |||
290 | j = ++i; | |||
291 | ||||
292 | /* clump 'em for now */ | |||
293 | while (elms[j]==t2) j++; | |||
294 | ||||
295 | /* how many together and num local */ | |||
296 | if (j-=i) { num_local++; k+=j; } | |||
297 | } | |||
298 | ||||
299 | /* how many unique elements? */ | |||
300 | gs->repeats = k; | |||
301 | gs->nel = nel-k; | |||
302 | ||||
303 | ||||
304 | /* number of repeats? */ | |||
305 | gs->num_local = num_local; | |||
306 | num_local += 2; | |||
307 | gs->local_reduce = local_reduce=(PetscInt**)malloc(num_local*sizeof(PetscInt*)); | |||
308 | gs->num_local_reduce = num_to_reduce=(PetscInt*) malloc(num_local*sizeof(PetscInt)); | |||
309 | ||||
310 | unique = (PetscInt*) malloc((gs->nel+1)*sizeof(PetscInt)); | |||
311 | gs->elms = unique; | |||
312 | gs->nel_total = nel; | |||
313 | gs->local_elms = elms; | |||
314 | gs->companion = companion; | |||
315 | ||||
316 | /* compess map as well as keep track of local ops */ | |||
317 | for (num_local=i=j=0; i<gs->nel; i++) { | |||
318 | k = j; | |||
319 | t2 = unique[i] = elms[j]; | |||
320 | companion[i] = companion[j]; | |||
321 | ||||
322 | while (elms[j]==t2) j++; | |||
323 | ||||
324 | if ((t2=(j-k))>1) { | |||
325 | /* number together */ | |||
326 | num_to_reduce[num_local] = t2++; | |||
327 | ||||
328 | iptr = local_reduce[num_local++] = (PetscInt*)malloc(t2*sizeof(PetscInt)); | |||
329 | ||||
330 | /* to use binary searching don't remap until we check intersection */ | |||
331 | *iptr++ = i; | |||
332 | ||||
333 | /* note that we're skipping the first one */ | |||
334 | while (++k<j) *(iptr++) = companion[k]; | |||
335 | *iptr = -1; | |||
336 | } | |||
337 | } | |||
338 | ||||
339 | /* sentinel for ngh_buf */ | |||
340 | unique[gs->nel]=INT_MAX2147483647; | |||
341 | ||||
342 | /* for two partition sort hack */ | |||
343 | num_to_reduce[num_local] = 0; | |||
344 | local_reduce[num_local] = NULL((void*)0); | |||
345 | num_to_reduce[++num_local] = 0; | |||
346 | local_reduce[num_local] = NULL((void*)0); | |||
347 | ||||
348 | /* load 'em up */ | |||
349 | /* note one extra to hold NON_UNIFORM flag!!! */ | |||
350 | vals[2] = vals[1] = vals[0] = nel; | |||
351 | if (gs->nel>0) { | |||
352 | vals[3] = unique[0]; | |||
353 | vals[4] = unique[gs->nel-1]; | |||
354 | } else { | |||
355 | vals[3] = INT_MAX2147483647; | |||
356 | vals[4] = INT_MIN(-2147483647 -1); | |||
357 | } | |||
358 | vals[5] = level; | |||
359 | vals[6] = num_gs_ids; | |||
360 | ||||
361 | /* GLOBAL: send 'em out */ | |||
362 | ierr = PCTFS_giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);CHKERRABORT(PETSC_COMM_WORLD,ierr)do {if (__builtin_expect(!!(ierr),0)) {PetscError(((MPI_Comm) 0x44000001),362,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");MPI_Abort(PETSC_COMM_WORLD,ierr );}} while (0); | |||
363 | ||||
364 | /* must be semi-pos def - only pairwise depends on this */ | |||
365 | /* LATER - remove this restriction */ | |||
366 | if (vals[3]<0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system not semi-pos def \n")do {PetscError(PETSC_COMM_WORLD,366,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,77,PETSC_ERROR_INITIAL,"gsi_check_args() :: system not semi-pos def \n" );MPI_Abort(PETSC_COMM_WORLD,77);} while (0); | |||
367 | if (vals[4]==INT_MAX2147483647) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system ub too large !\n")do {PetscError(PETSC_COMM_WORLD,367,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,77,PETSC_ERROR_INITIAL,"gsi_check_args() :: system ub too large !\n" );MPI_Abort(PETSC_COMM_WORLD,77);} while (0); | |||
368 | ||||
369 | gs->nel_min = vals[0]; | |||
370 | gs->nel_max = vals[1]; | |||
371 | gs->nel_sum = vals[2]; | |||
372 | gs->gl_min = vals[3]; | |||
373 | gs->gl_max = vals[4]; | |||
374 | gs->negl = vals[4]-vals[3]+1; | |||
375 | ||||
376 | if (gs->negl<=0) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"gsi_check_args() :: system empty or neg :: %d\n")do {PetscError(PETSC_COMM_WORLD,376,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,77,PETSC_ERROR_INITIAL,"gsi_check_args() :: system empty or neg :: %d\n" );MPI_Abort(PETSC_COMM_WORLD,77);} while (0); | |||
377 | ||||
378 | /* LATER :: add level == -1 -> program selects level */ | |||
379 | if (vals[5]<0) vals[5]=0; | |||
380 | else if (vals[5]>PCTFS_num_nodes) vals[5]=PCTFS_num_nodes; | |||
381 | gs->level = vals[5]; | |||
382 | ||||
383 | return(gs); | |||
384 | } | |||
385 | ||||
386 | /******************************************************************************/ | |||
387 | static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs) | |||
388 | { | |||
389 | PetscInt i, nel, *elms; | |||
390 | PetscInt t1; | |||
391 | PetscInt **reduce; | |||
392 | PetscInt *map; | |||
393 | PetscErrorCode ierr; | |||
394 | ||||
395 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 395; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
396 | /* totally local removes ... PCTFS_ct_bits == 0 */ | |||
397 | get_ngh_buf(gs); | |||
398 | ||||
399 | if (gs->level) set_pairwise(gs); | |||
400 | if (gs->max_left_over) set_tree(gs); | |||
401 | ||||
402 | /* intersection local and pairwise/tree? */ | |||
403 | gs->num_local_total = gs->num_local; | |||
404 | gs->gop_local_reduce = gs->local_reduce; | |||
405 | gs->num_gop_local_reduce = gs->num_local_reduce; | |||
406 | ||||
407 | map = gs->companion; | |||
408 | ||||
409 | /* is there any local compression */ | |||
410 | if (!gs->num_local) { | |||
411 | gs->local_strength = NONE0; | |||
412 | gs->num_local_gop = 0; | |||
413 | } else { | |||
414 | /* ok find intersection */ | |||
415 | map = gs->companion; | |||
416 | reduce = gs->local_reduce; | |||
417 | for (i=0, t1=0; i<gs->num_local; i++, reduce++) { | |||
418 | if ((PCTFS_ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) || PCTFS_ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) { | |||
419 | t1++; | |||
420 | if (gs->num_local_reduce[i]<=0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"nobody in list?")return PetscError(((MPI_Comm)0x44000001),420,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,77,PETSC_ERROR_INITIAL,"nobody in list?"); | |||
421 | gs->num_local_reduce[i] *= -1; | |||
422 | } | |||
423 | **reduce=map[**reduce]; | |||
424 | } | |||
425 | ||||
426 | /* intersection is empty */ | |||
427 | if (!t1) { | |||
428 | gs->local_strength = FULL2; | |||
429 | gs->num_local_gop = 0; | |||
430 | } else { /* intersection not empty */ | |||
431 | gs->local_strength = PARTIAL1; | |||
432 | ||||
433 | ierr = PCTFS_SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),433,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
434 | ||||
435 | gs->num_local_gop = t1; | |||
436 | gs->num_local_total = gs->num_local; | |||
437 | gs->num_local -= t1; | |||
438 | gs->gop_local_reduce = gs->local_reduce; | |||
439 | gs->num_gop_local_reduce = gs->num_local_reduce; | |||
440 | ||||
441 | for (i=0; i<t1; i++) { | |||
442 | if (gs->num_gop_local_reduce[i]>=0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"they aren't negative?")return PetscError(((MPI_Comm)0x44000001),442,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,77,PETSC_ERROR_INITIAL,"they aren't negative?"); | |||
443 | gs->num_gop_local_reduce[i] *= -1; | |||
444 | gs->local_reduce++; | |||
445 | gs->num_local_reduce++; | |||
446 | } | |||
447 | gs->local_reduce++; | |||
448 | gs->num_local_reduce++; | |||
449 | } | |||
450 | } | |||
451 | ||||
452 | elms = gs->pw_elm_list; | |||
453 | nel = gs->len_pw_list; | |||
454 | for (i=0; i<nel; i++) elms[i] = map[elms[i]]; | |||
455 | ||||
456 | elms = gs->tree_map_in; | |||
457 | nel = gs->tree_map_sz; | |||
458 | for (i=0; i<nel; i++) elms[i] = map[elms[i]]; | |||
459 | ||||
460 | /* clean up */ | |||
461 | free((void*) gs->local_elms); | |||
462 | free((void*) gs->companion); | |||
463 | free((void*) gs->elms); | |||
464 | free((void*) gs->ngh_buf); | |||
465 | gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL((void*)0); | |||
466 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
467 | } | |||
468 | ||||
469 | /******************************************************************************/ | |||
470 | static PetscErrorCode place_in_tree(PetscInt elm) | |||
471 | { | |||
472 | PetscInt *tp, n; | |||
473 | ||||
474 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 474; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
475 | if (ntree==tree_buf_sz) { | |||
476 | if (tree_buf_sz) { | |||
477 | tp = tree_buf; | |||
478 | n = tree_buf_sz; | |||
479 | tree_buf_sz<<=1; | |||
480 | tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); | |||
481 | PCTFS_ivec_copy(tree_buf,tp,n); | |||
482 | free(tp); | |||
483 | } else { | |||
484 | tree_buf_sz = TREE_BUF_SZ2048;; | |||
485 | tree_buf = (PetscInt*)malloc(tree_buf_sz*sizeof(PetscInt)); | |||
486 | } | |||
487 | } | |||
488 | ||||
489 | tree_buf[ntree++] = elm; | |||
490 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
491 | } | |||
492 | ||||
493 | /******************************************************************************/ | |||
494 | static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs) | |||
495 | { | |||
496 | PetscInt i, j, npw=0, ntree_map=0; | |||
497 | PetscInt p_mask_size, ngh_buf_size, buf_size; | |||
498 | PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask; | |||
499 | PetscInt *ngh_buf, *buf1, *buf2; | |||
500 | PetscInt offset, per_load, num_loads, or_ct, start, end; | |||
501 | PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms; | |||
502 | PetscInt oper=GL_B_OR6; | |||
503 | PetscInt *ptr3, *t_mask, level, ct1, ct2; | |||
504 | PetscErrorCode ierr; | |||
505 | ||||
506 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 506; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
507 | /* to make life easier */ | |||
508 | nel = gs->nel; | |||
509 | elms = gs->elms; | |||
510 | level = gs->level; | |||
511 | ||||
512 | /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */ | |||
513 | p_mask = (PetscInt*) malloc(p_mask_size=PCTFS_len_bit_mask(PCTFS_num_nodes)); | |||
514 | ierr = PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),514,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
515 | ||||
516 | /* allocate space for masks and info bufs */ | |||
517 | gs->nghs = sh_proc_mask = (PetscInt*) malloc(p_mask_size); | |||
518 | gs->pw_nghs = pw_sh_proc_mask = (PetscInt*) malloc(p_mask_size); | |||
519 | gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; | |||
520 | t_mask = (PetscInt*) malloc(p_mask_size); | |||
521 | gs->ngh_buf = ngh_buf = (PetscInt*) malloc(ngh_buf_size); | |||
522 | ||||
523 | /* comm buffer size ... memory usage bounded by ~2*msg_buf */ | |||
524 | /* had thought I could exploit rendezvous threshold */ | |||
525 | ||||
526 | /* default is one pass */ | |||
527 | per_load = negl = gs->negl; | |||
528 | gs->num_loads = num_loads = 1; | |||
529 | i = p_mask_size*negl; | |||
530 | ||||
531 | /* possible overflow on buffer size */ | |||
532 | /* overflow hack */ | |||
533 | if (i<0) i=INT_MAX2147483647; | |||
534 | ||||
535 | buf_size = PetscMin(msg_buf,i)(((msg_buf)<(i)) ? (msg_buf) : (i)); | |||
536 | ||||
537 | /* can we do it? */ | |||
538 | if (p_mask_size>buf_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size)return PetscError(((MPI_Comm)0x44000001),538,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,77,PETSC_ERROR_INITIAL,"get_ngh_buf() :: buf<pms :: %d>%d\n" ,p_mask_size,buf_size); | |||
539 | ||||
540 | /* get PCTFS_giop buf space ... make *only* one malloc */ | |||
541 | buf1 = (PetscInt*) malloc(buf_size<<1); | |||
542 | ||||
543 | /* more than one gior exchange needed? */ | |||
544 | if (buf_size!=i) { | |||
545 | per_load = buf_size/p_mask_size; | |||
546 | buf_size = per_load*p_mask_size; | |||
547 | gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); | |||
548 | } | |||
549 | ||||
550 | ||||
551 | /* convert buf sizes from #bytes to #ints - 32 bit only! */ | |||
552 | p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); | |||
553 | ||||
554 | /* find PCTFS_giop work space */ | |||
555 | buf2 = buf1+buf_size; | |||
556 | ||||
557 | /* hold #ints needed for processor masks */ | |||
558 | gs->mask_sz=p_mask_size; | |||
559 | ||||
560 | /* init buffers */ | |||
561 | ierr = PCTFS_ivec_zero(sh_proc_mask,p_mask_size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),561,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
562 | ierr = PCTFS_ivec_zero(pw_sh_proc_mask,p_mask_size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),562,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
563 | ierr = PCTFS_ivec_zero(ngh_buf,ngh_buf_size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),563,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
564 | ||||
565 | /* HACK reset tree info */ | |||
566 | tree_buf = NULL((void*)0); | |||
567 | tree_buf_sz = ntree = 0; | |||
568 | ||||
569 | /* ok do it */ | |||
570 | for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) { | |||
571 | /* identity for bitwise or is 000...000 */ | |||
572 | PCTFS_ivec_zero(buf1,buf_size); | |||
573 | ||||
574 | /* load msg buffer */ | |||
575 | for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) { | |||
576 | offset = (offset-start)*p_mask_size; | |||
577 | PCTFS_ivec_copy(buf1+offset,p_mask,p_mask_size); | |||
578 | } | |||
579 | ||||
580 | /* GLOBAL: pass buffer */ | |||
581 | ierr = PCTFS_giop(buf1,buf2,buf_size,&oper);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),581,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
582 | ||||
583 | ||||
584 | /* unload buffer into ngh_buf */ | |||
585 | ptr2=(elms+i_start); | |||
586 | for (ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) { | |||
587 | /* I own it ... may have to pairwise it */ | |||
588 | if (j==*ptr2) { | |||
589 | /* do i share it w/anyone? */ | |||
590 | ct1 = PCTFS_ct_bits((char*)ptr3,p_mask_size*sizeof(PetscInt)); | |||
591 | /* guess not */ | |||
592 | if (ct1<2) { ptr2++; ptr1+=p_mask_size; continue; } | |||
593 | ||||
594 | /* i do ... so keep info and turn off my bit */ | |||
595 | PCTFS_ivec_copy(ptr1,ptr3,p_mask_size); | |||
596 | ierr = PCTFS_ivec_xor(ptr1,p_mask,p_mask_size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),596,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
597 | ierr = PCTFS_ivec_or(sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),597,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
598 | ||||
599 | /* is it to be done pairwise? */ | |||
600 | if (--ct1<=level) { | |||
601 | npw++; | |||
602 | ||||
603 | /* turn on high bit to indicate pw need to process */ | |||
604 | *ptr2++ |= TOP_BIT(-2147483647 - 1); | |||
605 | ierr = PCTFS_ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),605,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
606 | ptr1 += p_mask_size; | |||
607 | continue; | |||
608 | } | |||
609 | ||||
610 | /* get set for next and note that I have a tree contribution */ | |||
611 | /* could save exact elm index for tree here -> save a search */ | |||
612 | ptr2++; ptr1+=p_mask_size; ntree_map++; | |||
613 | } else { /* i don't but still might be involved in tree */ | |||
614 | ||||
615 | /* shared by how many? */ | |||
616 | ct1 = PCTFS_ct_bits((char*)ptr3,p_mask_size*sizeof(PetscInt)); | |||
617 | ||||
618 | /* none! */ | |||
619 | if (ct1<2) continue; | |||
620 | ||||
621 | /* is it going to be done pairwise? but not by me of course!*/ | |||
622 | if (--ct1<=level) continue; | |||
623 | } | |||
624 | /* LATER we're going to have to process it NOW */ | |||
625 | /* nope ... tree it */ | |||
626 | ierr = place_in_tree(j);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),626,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
627 | } | |||
628 | } | |||
629 | ||||
630 | free((void*)t_mask); | |||
631 | free((void*)buf1); | |||
632 | ||||
633 | gs->len_pw_list = npw; | |||
634 | gs->num_nghs = PCTFS_ct_bits((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt)); | |||
635 | ||||
636 | /* expand from bit mask list to int list and save ngh list */ | |||
637 | gs->nghs = (PetscInt*) malloc(gs->num_nghs * sizeof(PetscInt)); | |||
638 | PCTFS_bm_to_proc((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); | |||
639 | ||||
640 | gs->num_pw_nghs = PCTFS_ct_bits((char*)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); | |||
641 | ||||
642 | oper = GL_MAX1; | |||
643 | ct1 = gs->num_nghs; | |||
644 | ierr = PCTFS_giop(&ct1,&ct2,1,&oper);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),644,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
645 | gs->max_nghs = ct1; | |||
646 | ||||
647 | gs->tree_map_sz = ntree_map; | |||
648 | gs->max_left_over=ntree; | |||
649 | ||||
650 | free((void*)p_mask); | |||
651 | free((void*)sh_proc_mask); | |||
652 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
653 | } | |||
654 | ||||
655 | /******************************************************************************/ | |||
656 | static PetscErrorCode set_pairwise(PCTFS_gs_id *gs) | |||
657 | { | |||
658 | PetscInt i, j; | |||
659 | PetscInt p_mask_size; | |||
660 | PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask; | |||
661 | PetscInt *ngh_buf, *buf2; | |||
662 | PetscInt offset; | |||
663 | PetscInt *msg_list, *msg_size, **msg_nodes, nprs; | |||
664 | PetscInt *pairwise_elm_list, len_pair_list=0; | |||
665 | PetscInt *iptr, t1, i_start, nel, *elms; | |||
666 | PetscInt ct; | |||
667 | PetscErrorCode ierr; | |||
668 | ||||
669 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 669; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
670 | /* to make life easier */ | |||
671 | nel = gs->nel; | |||
672 | elms = gs->elms; | |||
673 | ngh_buf = gs->ngh_buf; | |||
674 | sh_proc_mask = gs->pw_nghs; | |||
675 | ||||
676 | /* need a few temp masks */ | |||
677 | p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes); | |||
678 | p_mask = (PetscInt*) malloc(p_mask_size); | |||
679 | tmp_proc_mask = (PetscInt*) malloc(p_mask_size); | |||
680 | ||||
681 | /* set mask to my PCTFS_my_id's bit mask */ | |||
682 | ierr = PCTFS_set_bit_mask(p_mask,p_mask_size,PCTFS_my_id);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),682,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
683 | ||||
684 | p_mask_size /= sizeof(PetscInt); | |||
685 | ||||
686 | len_pair_list = gs->len_pw_list; | |||
687 | gs->pw_elm_list = pairwise_elm_list=(PetscInt*)malloc((len_pair_list+1)*sizeof(PetscInt)); | |||
| ||||
688 | ||||
689 | /* how many processors (nghs) do we have to exchange with? */ | |||
690 | nprs = gs->num_pairs = PCTFS_ct_bits((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt)); | |||
691 | ||||
692 | ||||
693 | /* allocate space for PCTFS_gs_gop() info */ | |||
694 | gs->pair_list = msg_list = (PetscInt*) malloc(sizeof(PetscInt)*nprs); | |||
695 | gs->msg_sizes = msg_size = (PetscInt*) malloc(sizeof(PetscInt)*nprs); | |||
696 | gs->node_list = msg_nodes = (PetscInt**) malloc(sizeof(PetscInt*)*(nprs+1)); | |||
697 | ||||
698 | /* init msg_size list */ | |||
699 | ierr = PCTFS_ivec_zero(msg_size,nprs);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),699,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
700 | ||||
701 | /* expand from bit mask list to int list */ | |||
702 | ierr = PCTFS_bm_to_proc((char*)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),702,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
703 | ||||
704 | /* keep list of elements being handled pairwise */ | |||
705 | for (i=j=0; i<nel; i++) { | |||
706 | if (elms[i] & TOP_BIT(-2147483647 - 1)) { elms[i] ^= TOP_BIT(-2147483647 - 1); pairwise_elm_list[j++] = i; } | |||
707 | } | |||
708 | pairwise_elm_list[j] = -1; | |||
709 | ||||
710 | gs->msg_ids_out = (MPI_Request*) malloc(sizeof(MPI_Request)*(nprs+1)); | |||
711 | gs->msg_ids_out[nprs] = MPI_REQUEST_NULL((MPI_Request)0x2c000000); | |||
712 | gs->msg_ids_in = (MPI_Request*) malloc(sizeof(MPI_Request)*(nprs+1)); | |||
713 | gs->msg_ids_in[nprs] = MPI_REQUEST_NULL((MPI_Request)0x2c000000); | |||
714 | gs->pw_vals = (PetscScalar*) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); | |||
715 | ||||
716 | /* find who goes to each processor */ | |||
717 | for (i_start=i=0; i<nprs; i++) { | |||
718 | /* processor i's mask */ | |||
719 | ierr = PCTFS_set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),719,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
720 | ||||
721 | /* det # going to processor i */ | |||
722 | for (ct=j=0; j<len_pair_list; j++) { | |||
723 | buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); | |||
| ||||
724 | ierr = PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),724,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
725 | if (PCTFS_ct_bits((char*)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) ct++; | |||
726 | } | |||
727 | msg_size[i] = ct; | |||
728 | i_start = PetscMax(i_start,ct)(((i_start)<(ct)) ? (ct) : (i_start)); | |||
729 | ||||
730 | /*space to hold nodes in message to first neighbor */ | |||
731 | msg_nodes[i] = iptr = (PetscInt*) malloc(sizeof(PetscInt)*(ct+1)); | |||
732 | ||||
733 | for (j=0;j<len_pair_list;j++) { | |||
734 | buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); | |||
735 | ierr = PCTFS_ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),735,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
736 | if (PCTFS_ct_bits((char*)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) *iptr++ = j; | |||
737 | } | |||
738 | *iptr = -1; | |||
739 | } | |||
740 | msg_nodes[nprs] = NULL((void*)0); | |||
741 | ||||
742 | j = gs->loc_node_pairs=i_start; | |||
743 | t1 = GL_MAX1; | |||
744 | ierr = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),744,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
745 | gs->max_node_pairs = i_start; | |||
746 | ||||
747 | i_start = j; | |||
748 | t1 = GL_MIN2; | |||
749 | ierr = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),749,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
750 | gs->min_node_pairs = i_start; | |||
751 | ||||
752 | i_start = j; | |||
753 | t1 = GL_ADD4; | |||
754 | ierr = PCTFS_giop(&i_start,&offset,1,&t1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),754,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
755 | gs->avg_node_pairs = i_start/PCTFS_num_nodes + 1; | |||
756 | ||||
757 | i_start = nprs; | |||
758 | t1 = GL_MAX1; | |||
759 | PCTFS_giop(&i_start,&offset,1,&t1); | |||
760 | gs->max_pairs = i_start; | |||
761 | ||||
762 | ||||
763 | /* remap pairwise in tail of gsi_via_bit_mask() */ | |||
764 | gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes,nprs); | |||
765 | gs->out = (PetscScalar*) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); | |||
766 | gs->in = (PetscScalar*) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); | |||
767 | ||||
768 | /* reset malloc pool */ | |||
769 | free((void*)p_mask); | |||
770 | free((void*)tmp_proc_mask); | |||
771 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
772 | } | |||
773 | ||||
774 | /* to do pruned tree just save ngh buf copy for each one and decode here! | |||
775 | ******************************************************************************/ | |||
776 | static PetscErrorCode set_tree(PCTFS_gs_id *gs) | |||
777 | { | |||
778 | PetscInt i, j, n, nel; | |||
779 | PetscInt *iptr_in, *iptr_out, *tree_elms, *elms; | |||
780 | ||||
781 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 781; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
782 | /* local work ptrs */ | |||
783 | elms = gs->elms; | |||
784 | nel = gs->nel; | |||
785 | ||||
786 | /* how many via tree */ | |||
787 | gs->tree_nel = n = ntree; | |||
788 | gs->tree_elms = tree_elms = iptr_in = tree_buf; | |||
789 | gs->tree_buf = (PetscScalar*) malloc(sizeof(PetscScalar)*n*vec_sz); | |||
790 | gs->tree_work = (PetscScalar*) malloc(sizeof(PetscScalar)*n*vec_sz); | |||
791 | j = gs->tree_map_sz; | |||
792 | gs->tree_map_in = iptr_in = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); | |||
793 | gs->tree_map_out = iptr_out = (PetscInt*) malloc(sizeof(PetscInt)*(j+1)); | |||
794 | ||||
795 | /* search the longer of the two lists */ | |||
796 | /* note ... could save this info in get_ngh_buf and save searches */ | |||
797 | if (n<=nel) { | |||
798 | /* bijective fct w/remap - search elm list */ | |||
799 | for (i=0; i<n; i++) { | |||
800 | if ((j=PCTFS_ivec_binary_search(*tree_elms++,elms,nel))>=0) {*iptr_in++ = j; *iptr_out++ = i;} | |||
801 | } | |||
802 | } else { | |||
803 | for (i=0; i<nel; i++) { | |||
804 | if ((j=PCTFS_ivec_binary_search(*elms++,tree_elms,n))>=0) {*iptr_in++ = i; *iptr_out++ = j;} | |||
805 | } | |||
806 | } | |||
807 | ||||
808 | /* sentinel */ | |||
809 | *iptr_in = *iptr_out = -1; | |||
810 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
811 | } | |||
812 | ||||
813 | /******************************************************************************/ | |||
814 | static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals) | |||
815 | { | |||
816 | PetscInt *num, *map, **reduce; | |||
817 | PetscScalar tmp; | |||
818 | ||||
819 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 819; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
820 | num = gs->num_gop_local_reduce; | |||
821 | reduce = gs->gop_local_reduce; | |||
822 | while ((map = *reduce++)) { | |||
823 | /* wall */ | |||
824 | if (*num == 2) { | |||
825 | num++; | |||
826 | vals[map[1]] = vals[map[0]]; | |||
827 | } else if (*num == 3) { /* corner shared by three elements */ | |||
828 | num++; | |||
829 | vals[map[2]] = vals[map[1]] = vals[map[0]]; | |||
830 | } else if (*num == 4) { /* corner shared by four elements */ | |||
831 | num++; | |||
832 | vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; | |||
833 | } else { /* general case ... odd geoms ... 3D*/ | |||
834 | num++; | |||
835 | tmp = *(vals + *map++); | |||
836 | while (*map >= 0) *(vals + *map++) = tmp; | |||
837 | } | |||
838 | } | |||
839 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
840 | } | |||
841 | ||||
842 | /******************************************************************************/ | |||
843 | static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals) | |||
844 | { | |||
845 | PetscInt *num, *map, **reduce; | |||
846 | PetscScalar tmp; | |||
847 | ||||
848 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 848; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
849 | num = gs->num_local_reduce; | |||
850 | reduce = gs->local_reduce; | |||
851 | while ((map = *reduce)) { | |||
852 | /* wall */ | |||
853 | if (*num == 2) { | |||
854 | num++; reduce++; | |||
855 | vals[map[1]] = vals[map[0]] += vals[map[1]]; | |||
856 | } else if (*num == 3) { /* corner shared by three elements */ | |||
857 | num++; reduce++; | |||
858 | vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); | |||
859 | } else if (*num == 4) { /* corner shared by four elements */ | |||
860 | num++; reduce++; | |||
861 | vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); | |||
862 | } else { /* general case ... odd geoms ... 3D*/ | |||
863 | num++; | |||
864 | tmp = 0.0; | |||
865 | while (*map >= 0) tmp += *(vals + *map++); | |||
866 | ||||
867 | map = *reduce++; | |||
868 | while (*map >= 0) *(vals + *map++) = tmp; | |||
869 | } | |||
870 | } | |||
871 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
872 | } | |||
873 | ||||
874 | /******************************************************************************/ | |||
875 | static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals) | |||
876 | { | |||
877 | PetscInt *num, *map, **reduce; | |||
878 | PetscScalar *base; | |||
879 | ||||
880 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 880; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
881 | num = gs->num_gop_local_reduce; | |||
882 | reduce = gs->gop_local_reduce; | |||
883 | while ((map = *reduce++)) { | |||
884 | /* wall */ | |||
885 | if (*num == 2) { | |||
886 | num++; | |||
887 | vals[map[0]] += vals[map[1]]; | |||
888 | } else if (*num == 3) { /* corner shared by three elements */ | |||
889 | num++; | |||
890 | vals[map[0]] += (vals[map[1]] + vals[map[2]]); | |||
891 | } else if (*num == 4) { /* corner shared by four elements */ | |||
892 | num++; | |||
893 | vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); | |||
894 | } else { /* general case ... odd geoms ... 3D*/ | |||
895 | num++; | |||
896 | base = vals + *map++; | |||
897 | while (*map >= 0) *base += *(vals + *map++); | |||
898 | } | |||
899 | } | |||
900 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
901 | } | |||
902 | ||||
903 | /******************************************************************************/ | |||
904 | PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs) | |||
905 | { | |||
906 | PetscInt i; | |||
907 | PetscErrorCode ierr; | |||
908 | ||||
909 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 909; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
910 | ierr = MPI_Comm_free(&gs->PCTFS_gs_comm);CHKERRABORT(PETSC_COMM_WORLD,ierr)do {if (__builtin_expect(!!(ierr),0)) {PetscError(((MPI_Comm) 0x44000001),910,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");MPI_Abort(PETSC_COMM_WORLD,ierr );}} while (0); | |||
911 | if (gs->nghs) free((void*) gs->nghs); | |||
912 | if (gs->pw_nghs) free((void*) gs->pw_nghs); | |||
913 | ||||
914 | /* tree */ | |||
915 | if (gs->max_left_over) { | |||
916 | if (gs->tree_elms) free((void*) gs->tree_elms); | |||
917 | if (gs->tree_buf) free((void*) gs->tree_buf); | |||
918 | if (gs->tree_work) free((void*) gs->tree_work); | |||
919 | if (gs->tree_map_in) free((void*) gs->tree_map_in); | |||
920 | if (gs->tree_map_out) free((void*) gs->tree_map_out); | |||
921 | } | |||
922 | ||||
923 | /* pairwise info */ | |||
924 | if (gs->num_pairs) { | |||
925 | /* should be NULL already */ | |||
926 | if (gs->ngh_buf) free((void*) gs->ngh_buf); | |||
927 | if (gs->elms) free((void*) gs->elms); | |||
928 | if (gs->local_elms) free((void*) gs->local_elms); | |||
929 | if (gs->companion) free((void*) gs->companion); | |||
930 | ||||
931 | /* only set if pairwise */ | |||
932 | if (gs->vals) free((void*) gs->vals); | |||
933 | if (gs->in) free((void*) gs->in); | |||
934 | if (gs->out) free((void*) gs->out); | |||
935 | if (gs->msg_ids_in) free((void*) gs->msg_ids_in); | |||
936 | if (gs->msg_ids_out) free((void*) gs->msg_ids_out); | |||
937 | if (gs->pw_vals) free((void*) gs->pw_vals); | |||
938 | if (gs->pw_elm_list) free((void*) gs->pw_elm_list); | |||
939 | if (gs->node_list) { | |||
940 | for (i=0;i<gs->num_pairs;i++) { | |||
941 | if (gs->node_list[i]) { | |||
942 | free((void*) gs->node_list[i]); | |||
943 | } | |||
944 | } | |||
945 | free((void*) gs->node_list); | |||
946 | } | |||
947 | if (gs->msg_sizes) free((void*) gs->msg_sizes); | |||
948 | if (gs->pair_list) free((void*) gs->pair_list); | |||
949 | } | |||
950 | ||||
951 | /* local info */ | |||
952 | if (gs->num_local_total>=0) { | |||
953 | for (i=0;i<gs->num_local_total+1;i++) { | |||
954 | if (gs->num_gop_local_reduce[i]) free((void*) gs->gop_local_reduce[i]); | |||
955 | } | |||
956 | } | |||
957 | ||||
958 | /* if intersection tree/pairwise and local isn't empty */ | |||
959 | if (gs->gop_local_reduce) free((void*) gs->gop_local_reduce); | |||
960 | if (gs->num_gop_local_reduce) free((void*) gs->num_gop_local_reduce); | |||
961 | ||||
962 | free((void*) gs); | |||
963 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
964 | } | |||
965 | ||||
966 | /******************************************************************************/ | |||
967 | PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt step) | |||
968 | { | |||
969 | PetscErrorCode ierr; | |||
970 | ||||
971 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 971; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
972 | switch (*op) { | |||
973 | case '+': | |||
974 | PCTFS_gs_gop_vec_plus(gs,vals,step); | |||
975 | break; | |||
976 | default: | |||
977 | ierr = PetscInfo1(0,"PCTFS_gs_gop_vec() :: %c is not a valid op\n",op[0])PetscInfo_Private(__func__,0,"PCTFS_gs_gop_vec() :: %c is not a valid op\n" ,op[0]);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),977,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
978 | ierr = PetscInfo(0,"PCTFS_gs_gop_vec() :: default :: plus\n")PetscInfo_Private(__func__,0,"PCTFS_gs_gop_vec() :: default :: plus\n" );CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),978,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
979 | PCTFS_gs_gop_vec_plus(gs,vals,step); | |||
980 | break; | |||
981 | } | |||
982 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
983 | } | |||
984 | ||||
985 | /******************************************************************************/ | |||
986 | static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) | |||
987 | { | |||
988 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 988; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
989 | if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PCTFS_gs_gop_vec() passed NULL gs handle!!!")return PetscError(((MPI_Comm)0x44000001),989,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,77,PETSC_ERROR_INITIAL,"PCTFS_gs_gop_vec() passed NULL gs handle!!!" ); | |||
990 | ||||
991 | /* local only operations!!! */ | |||
992 | if (gs->num_local) PCTFS_gs_gop_vec_local_plus(gs,vals,step); | |||
993 | ||||
994 | /* if intersection tree/pairwise and local isn't empty */ | |||
995 | if (gs->num_local_gop) { | |||
996 | PCTFS_gs_gop_vec_local_in_plus(gs,vals,step); | |||
997 | ||||
998 | /* pairwise */ | |||
999 | if (gs->num_pairs) PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step); | |||
1000 | ||||
1001 | /* tree */ | |||
1002 | else if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,vals,step); | |||
1003 | ||||
1004 | PCTFS_gs_gop_vec_local_out(gs,vals,step); | |||
1005 | } else { /* if intersection tree/pairwise and local is empty */ | |||
1006 | /* pairwise */ | |||
1007 | if (gs->num_pairs) PCTFS_gs_gop_vec_pairwise_plus(gs,vals,step); | |||
1008 | ||||
1009 | /* tree */ | |||
1010 | else if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,vals,step); | |||
1011 | } | |||
1012 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1013 | } | |||
1014 | ||||
1015 | /******************************************************************************/ | |||
1016 | static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) | |||
1017 | { | |||
1018 | PetscInt *num, *map, **reduce; | |||
1019 | PetscScalar *base; | |||
1020 | ||||
1021 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 1021; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1022 | num = gs->num_local_reduce; | |||
1023 | reduce = gs->local_reduce; | |||
1024 | while ((map = *reduce)) { | |||
1025 | base = vals + map[0] * step; | |||
1026 | ||||
1027 | /* wall */ | |||
1028 | if (*num == 2) { | |||
1029 | num++; reduce++; | |||
1030 | PCTFS_rvec_add (base,vals+map[1]*step,step); | |||
1031 | PCTFS_rvec_copy(vals+map[1]*step,base,step); | |||
1032 | } else if (*num == 3) { /* corner shared by three elements */ | |||
1033 | num++; reduce++; | |||
1034 | PCTFS_rvec_add (base,vals+map[1]*step,step); | |||
1035 | PCTFS_rvec_add (base,vals+map[2]*step,step); | |||
1036 | PCTFS_rvec_copy(vals+map[2]*step,base,step); | |||
1037 | PCTFS_rvec_copy(vals+map[1]*step,base,step); | |||
1038 | } else if (*num == 4) { /* corner shared by four elements */ | |||
1039 | num++; reduce++; | |||
1040 | PCTFS_rvec_add (base,vals+map[1]*step,step); | |||
1041 | PCTFS_rvec_add (base,vals+map[2]*step,step); | |||
1042 | PCTFS_rvec_add (base,vals+map[3]*step,step); | |||
1043 | PCTFS_rvec_copy(vals+map[3]*step,base,step); | |||
1044 | PCTFS_rvec_copy(vals+map[2]*step,base,step); | |||
1045 | PCTFS_rvec_copy(vals+map[1]*step,base,step); | |||
1046 | } else { /* general case ... odd geoms ... 3D */ | |||
1047 | num++; | |||
1048 | while (*++map >= 0) PCTFS_rvec_add (base,vals+*map*step,step); | |||
1049 | ||||
1050 | map = *reduce; | |||
1051 | while (*++map >= 0) PCTFS_rvec_copy(vals+*map*step,base,step); | |||
1052 | ||||
1053 | reduce++; | |||
1054 | } | |||
1055 | } | |||
1056 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1057 | } | |||
1058 | ||||
1059 | /******************************************************************************/ | |||
1060 | static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) | |||
1061 | { | |||
1062 | PetscInt *num, *map, **reduce; | |||
1063 | PetscScalar *base; | |||
1064 | ||||
1065 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 1065; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1066 | num = gs->num_gop_local_reduce; | |||
1067 | reduce = gs->gop_local_reduce; | |||
1068 | while ((map = *reduce++)) { | |||
1069 | base = vals + map[0] * step; | |||
1070 | ||||
1071 | /* wall */ | |||
1072 | if (*num == 2) { | |||
1073 | num++; | |||
1074 | PCTFS_rvec_add(base,vals+map[1]*step,step); | |||
1075 | } else if (*num == 3) { /* corner shared by three elements */ | |||
1076 | num++; | |||
1077 | PCTFS_rvec_add(base,vals+map[1]*step,step); | |||
1078 | PCTFS_rvec_add(base,vals+map[2]*step,step); | |||
1079 | } else if (*num == 4) { /* corner shared by four elements */ | |||
1080 | num++; | |||
1081 | PCTFS_rvec_add(base,vals+map[1]*step,step); | |||
1082 | PCTFS_rvec_add(base,vals+map[2]*step,step); | |||
1083 | PCTFS_rvec_add(base,vals+map[3]*step,step); | |||
1084 | } else { /* general case ... odd geoms ... 3D*/ | |||
1085 | num++; | |||
1086 | while (*++map >= 0) PCTFS_rvec_add(base,vals+*map*step,step); | |||
1087 | } | |||
1088 | } | |||
1089 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1090 | } | |||
1091 | ||||
1092 | /******************************************************************************/ | |||
1093 | static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) | |||
1094 | { | |||
1095 | PetscInt *num, *map, **reduce; | |||
1096 | PetscScalar *base; | |||
1097 | ||||
1098 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 1098; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1099 | num = gs->num_gop_local_reduce; | |||
1100 | reduce = gs->gop_local_reduce; | |||
1101 | while ((map = *reduce++)) { | |||
1102 | base = vals + map[0] * step; | |||
1103 | ||||
1104 | /* wall */ | |||
1105 | if (*num == 2) { | |||
1106 | num++; | |||
1107 | PCTFS_rvec_copy(vals+map[1]*step,base,step); | |||
1108 | } else if (*num == 3) { /* corner shared by three elements */ | |||
1109 | num++; | |||
1110 | PCTFS_rvec_copy(vals+map[1]*step,base,step); | |||
1111 | PCTFS_rvec_copy(vals+map[2]*step,base,step); | |||
1112 | } else if (*num == 4) { /* corner shared by four elements */ | |||
1113 | num++; | |||
1114 | PCTFS_rvec_copy(vals+map[1]*step,base,step); | |||
1115 | PCTFS_rvec_copy(vals+map[2]*step,base,step); | |||
1116 | PCTFS_rvec_copy(vals+map[3]*step,base,step); | |||
1117 | } else { /* general case ... odd geoms ... 3D*/ | |||
1118 | num++; | |||
1119 | while (*++map >= 0) PCTFS_rvec_copy(vals+*map*step,base,step); | |||
1120 | } | |||
1121 | } | |||
1122 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1123 | } | |||
1124 | ||||
1125 | /******************************************************************************/ | |||
1126 | static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step) | |||
1127 | { | |||
1128 | PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; | |||
1129 | PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; | |||
1130 | PetscInt *pw, *list, *size, **nodes; | |||
1131 | MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; | |||
1132 | MPI_Status status; | |||
1133 | PetscBLASInt i1 = 1,dstep; | |||
1134 | PetscErrorCode ierr; | |||
1135 | ||||
1136 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 1136; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1137 | /* strip and load s */ | |||
1138 | msg_list = list = gs->pair_list; | |||
1139 | msg_size = size = gs->msg_sizes; | |||
1140 | msg_nodes = nodes = gs->node_list; | |||
1141 | iptr = pw = gs->pw_elm_list; | |||
1142 | dptr1 = dptr3 = gs->pw_vals; | |||
1143 | msg_ids_in = ids_in = gs->msg_ids_in; | |||
1144 | msg_ids_out = ids_out = gs->msg_ids_out; | |||
1145 | dptr2 = gs->out; | |||
1146 | in1=in2 = gs->in; | |||
1147 | ||||
1148 | /* post the receives */ | |||
1149 | /* msg_nodes=nodes; */ | |||
1150 | do { | |||
1151 | /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the | |||
1152 | second one *list and do list++ afterwards */ | |||
1153 | ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in)((petsc_irecv_ct++,0) || PetscMPITypeSize(&(petsc_irecv_len ),(*size *step),(((MPI_Datatype)0x4c00080b))) || MPI_Irecv((in1 ),(*size *step),(((MPI_Datatype)0x4c00080b)),((-2)),(1001 + * list),(gs->PCTFS_gs_comm),(msg_ids_in)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1153,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1154 | list++;msg_ids_in++; | |||
1155 | in1 += *size++ *step; | |||
1156 | } while (*++msg_nodes); | |||
1157 | msg_nodes=nodes; | |||
1158 | ||||
1159 | /* load gs values into in out gs buffers */ | |||
1160 | while (*iptr >= 0) { | |||
1161 | PCTFS_rvec_copy(dptr3,in_vals + *iptr*step,step); | |||
1162 | dptr3+=step; | |||
1163 | iptr++; | |||
1164 | } | |||
1165 | ||||
1166 | /* load out buffers and post the sends */ | |||
1167 | while ((iptr = *msg_nodes++)) { | |||
1168 | dptr3 = dptr2; | |||
1169 | while (*iptr >= 0) { | |||
1170 | PCTFS_rvec_copy(dptr2,dptr1 + *iptr*step,step); | |||
1171 | dptr2+=step; | |||
1172 | iptr++; | |||
1173 | } | |||
1174 | ierr = MPI_Isend(dptr3, *msg_size *step, MPIU_SCALAR, *msg_list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out)((petsc_isend_ct++,0) || PetscMPITypeSize(&(petsc_isend_len ),(*msg_size *step),(((MPI_Datatype)0x4c00080b))) || MPI_Isend ((dptr3),(*msg_size *step),(((MPI_Datatype)0x4c00080b)),(*msg_list ),(1001 +PCTFS_my_id),(gs->PCTFS_gs_comm),(msg_ids_out)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1174,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1175 | msg_size++; msg_list++;msg_ids_out++; | |||
1176 | } | |||
1177 | ||||
1178 | /* tree */ | |||
1179 | if (gs->max_left_over) PCTFS_gs_gop_vec_tree_plus(gs,in_vals,step); | |||
1180 | ||||
1181 | /* process the received data */ | |||
1182 | msg_nodes=nodes; | |||
1183 | while ((iptr = *nodes++)) { | |||
1184 | PetscScalar d1 = 1.0; | |||
1185 | ||||
1186 | /* Should I check the return value of MPI_Wait() or status? */ | |||
1187 | /* Can this loop be replaced by a call to MPI_Waitall()? */ | |||
1188 | ierr = MPI_Wait(ids_in, &status)((petsc_wait_ct++,petsc_sum_of_waits_ct++,0) || MPI_Wait((ids_in ),(&status)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1188,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1189 | ids_in++; | |||
1190 | while (*iptr >= 0) { | |||
1191 | ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1191,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1192 | PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dstep,&d1,in2,&i1,dptr1 + *iptr*step,&i1))do { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = "BLASaxpy"; petscstack->file[petscstack->currentsize ] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ; petscstack->line[petscstack->currentsize] = 1192; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_TRUE || petscstack->hotdepth); } ; } while (0); daxpy_(&dstep,&d1,in2,&i1,dptr1 + *iptr*step,& i1); do { do {PetscErrorCode _7_ierr = PetscMallocValidate(1192 ,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" );do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError( ((MPI_Comm)0x44000001),1192,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do { ; if (petscstack && petscstack->currentsize > 0 ) { petscstack->currentsize--; petscstack->function[petscstack ->currentsize] = 0; petscstack->file[petscstack->currentsize ] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack-> hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ; } while (0); } while (0); } while(0); | |||
1193 | in2+=step; | |||
1194 | iptr++; | |||
1195 | } | |||
1196 | } | |||
1197 | ||||
1198 | /* replace vals */ | |||
1199 | while (*pw >= 0) { | |||
1200 | PCTFS_rvec_copy(in_vals + *pw*step,dptr1,step); | |||
1201 | dptr1+=step; | |||
1202 | pw++; | |||
1203 | } | |||
1204 | ||||
1205 | /* clear isend message handles */ | |||
1206 | /* This changed for clarity though it could be the same */ | |||
1207 | ||||
1208 | /* Should I check the return value of MPI_Wait() or status? */ | |||
1209 | /* Can this loop be replaced by a call to MPI_Waitall()? */ | |||
1210 | while (*msg_nodes++) { | |||
1211 | ierr = MPI_Wait(ids_out, &status)((petsc_wait_ct++,petsc_sum_of_waits_ct++,0) || MPI_Wait((ids_out ),(&status)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1211,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1212 | ids_out++; | |||
1213 | } | |||
1214 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1215 | } | |||
1216 | ||||
1217 | /******************************************************************************/ | |||
1218 | static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step) | |||
1219 | { | |||
1220 | PetscInt size, *in, *out; | |||
1221 | PetscScalar *buf, *work; | |||
1222 | PetscInt op[] = {GL_ADD4,0}; | |||
1223 | PetscBLASInt i1 = 1; | |||
1224 | PetscErrorCode ierr; | |||
1225 | PetscBLASInt dstep; | |||
1226 | ||||
1227 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 1227; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1228 | /* copy over to local variables */ | |||
1229 | in = gs->tree_map_in; | |||
1230 | out = gs->tree_map_out; | |||
1231 | buf = gs->tree_buf; | |||
1232 | work = gs->tree_work; | |||
1233 | size = gs->tree_nel*step; | |||
1234 | ||||
1235 | /* zero out collection buffer */ | |||
1236 | PCTFS_rvec_zero(buf,size); | |||
1237 | ||||
1238 | ||||
1239 | /* copy over my contributions */ | |||
1240 | while (*in >= 0) { | |||
1241 | ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1241,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1242 | PetscStackCallBLAS("BLAScopy",BLAScopy_(&dstep,vals + *in++ * step,&i1,buf + *out++ * step,&i1))do { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = "BLAScopy"; petscstack->file[petscstack->currentsize ] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ; petscstack->line[petscstack->currentsize] = 1242; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_TRUE || petscstack->hotdepth); } ; } while (0); dcopy_(&dstep,vals + *in++ * step,&i1,buf + *out++ * step,&i1); do { do {PetscErrorCode _7_ierr = PetscMallocValidate (1242,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" );do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError( ((MPI_Comm)0x44000001),1242,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do { ; if (petscstack && petscstack->currentsize > 0 ) { petscstack->currentsize--; petscstack->function[petscstack ->currentsize] = 0; petscstack->file[petscstack->currentsize ] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack-> hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ; } while (0); } while (0); } while(0); | |||
1243 | } | |||
1244 | ||||
1245 | /* perform fan in/out on full buffer */ | |||
1246 | /* must change PCTFS_grop to handle the blas */ | |||
1247 | PCTFS_grop(buf,work,size,op); | |||
1248 | ||||
1249 | /* reset */ | |||
1250 | in = gs->tree_map_in; | |||
1251 | out = gs->tree_map_out; | |||
1252 | ||||
1253 | /* get the portion of the results I need */ | |||
1254 | while (*in >= 0) { | |||
1255 | ierr = PetscBLASIntCast(step,&dstep);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1255,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1256 | PetscStackCallBLAS("BLAScopy",BLAScopy_(&dstep,buf + *out++ * step,&i1,vals + *in++ * step,&i1))do { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = "BLAScopy"; petscstack->file[petscstack->currentsize ] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ; petscstack->line[petscstack->currentsize] = 1256; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_TRUE || petscstack->hotdepth); } ; } while (0); dcopy_(&dstep,buf + *out++ * step,&i1,vals + *in++ * step,&i1); do { do {PetscErrorCode _7_ierr = PetscMallocValidate (1256,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" );do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError( ((MPI_Comm)0x44000001),1256,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do { ; if (petscstack && petscstack->currentsize > 0 ) { petscstack->currentsize--; petscstack->function[petscstack ->currentsize] = 0; petscstack->file[petscstack->currentsize ] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack-> hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ; } while (0); } while (0); } while(0); | |||
1257 | } | |||
1258 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1259 | } | |||
1260 | ||||
1261 | /******************************************************************************/ | |||
1262 | PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim) | |||
1263 | { | |||
1264 | PetscErrorCode ierr; | |||
1265 | ||||
1266 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 1266; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1267 | switch (*op) { | |||
1268 | case '+': | |||
1269 | PCTFS_gs_gop_plus_hc(gs,vals,dim); | |||
1270 | break; | |||
1271 | default: | |||
1272 | ierr = PetscInfo1(0,"PCTFS_gs_gop_hc() :: %c is not a valid op\n",op[0])PetscInfo_Private(__func__,0,"PCTFS_gs_gop_hc() :: %c is not a valid op\n" ,op[0]);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1272,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1273 | ierr = PetscInfo(0,"PCTFS_gs_gop_hc() :: default :: plus\n")PetscInfo_Private(__func__,0,"PCTFS_gs_gop_hc() :: default :: plus\n" );CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1273,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1274 | PCTFS_gs_gop_plus_hc(gs,vals,dim); | |||
1275 | break; | |||
1276 | } | |||
1277 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1278 | } | |||
1279 | ||||
1280 | /******************************************************************************/ | |||
1281 | static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) | |||
1282 | { | |||
1283 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 1283; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1284 | /* if there's nothing to do return */ | |||
1285 | if (dim<=0) PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1286 | ||||
1287 | /* can't do more dimensions then exist */ | |||
1288 | dim = PetscMin(dim,PCTFS_i_log2_num_nodes)(((dim)<(PCTFS_i_log2_num_nodes)) ? (dim) : (PCTFS_i_log2_num_nodes )); | |||
1289 | ||||
1290 | /* local only operations!!! */ | |||
1291 | if (gs->num_local) PCTFS_gs_gop_local_plus(gs,vals); | |||
1292 | ||||
1293 | /* if intersection tree/pairwise and local isn't empty */ | |||
1294 | if (gs->num_local_gop) { | |||
1295 | PCTFS_gs_gop_local_in_plus(gs,vals); | |||
1296 | ||||
1297 | /* pairwise will do tree inside ... */ | |||
1298 | if (gs->num_pairs) PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); /* tree only */ | |||
1299 | else if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,vals,dim); | |||
1300 | ||||
1301 | PCTFS_gs_gop_local_out(gs,vals); | |||
1302 | } else { /* if intersection tree/pairwise and local is empty */ | |||
1303 | /* pairwise will do tree inside */ | |||
1304 | if (gs->num_pairs) PCTFS_gs_gop_pairwise_plus_hc(gs,vals,dim); /* tree */ | |||
1305 | else if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,vals,dim); | |||
1306 | } | |||
1307 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1308 | } | |||
1309 | ||||
1310 | /******************************************************************************/ | |||
1311 | static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim) | |||
1312 | { | |||
1313 | PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; | |||
1314 | PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; | |||
1315 | PetscInt *pw, *list, *size, **nodes; | |||
1316 | MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; | |||
1317 | MPI_Status status; | |||
1318 | PetscInt i, mask=1; | |||
1319 | PetscErrorCode ierr; | |||
1320 | ||||
1321 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 1321; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1322 | for (i=1; i<dim; i++) { mask<<=1; mask++; } | |||
1323 | ||||
1324 | /* strip and load s */ | |||
1325 | msg_list = list = gs->pair_list; | |||
1326 | msg_size = size = gs->msg_sizes; | |||
1327 | msg_nodes = nodes = gs->node_list; | |||
1328 | iptr = pw = gs->pw_elm_list; | |||
1329 | dptr1 = dptr3 = gs->pw_vals; | |||
1330 | msg_ids_in = ids_in = gs->msg_ids_in; | |||
1331 | msg_ids_out = ids_out = gs->msg_ids_out; | |||
1332 | dptr2 = gs->out; | |||
1333 | in1 = in2 = gs->in; | |||
1334 | ||||
1335 | /* post the receives */ | |||
1336 | /* msg_nodes=nodes; */ | |||
1337 | do { | |||
1338 | /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the | |||
1339 | second one *list and do list++ afterwards */ | |||
1340 | if ((PCTFS_my_id|mask)==(*list|mask)) { | |||
1341 | ierr = MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in)((petsc_irecv_ct++,0) || PetscMPITypeSize(&(petsc_irecv_len ),(*size),(((MPI_Datatype)0x4c00080b))) || MPI_Irecv((in1),(* size),(((MPI_Datatype)0x4c00080b)),((-2)),(1001 + *list),(gs-> PCTFS_gs_comm),(msg_ids_in)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1341,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1342 | list++; msg_ids_in++;in1 += *size++; | |||
1343 | } else { list++; size++; } | |||
1344 | } while (*++msg_nodes); | |||
1345 | ||||
1346 | /* load gs values into in out gs buffers */ | |||
1347 | while (*iptr >= 0) *dptr3++ = *(in_vals + *iptr++); | |||
1348 | ||||
1349 | /* load out buffers and post the sends */ | |||
1350 | msg_nodes=nodes; | |||
1351 | list = msg_list; | |||
1352 | while ((iptr = *msg_nodes++)) { | |||
1353 | if ((PCTFS_my_id|mask)==(*list|mask)) { | |||
1354 | dptr3 = dptr2; | |||
1355 | while (*iptr >= 0) *dptr2++ = *(dptr1 + *iptr++); | |||
1356 | /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ | |||
1357 | /* is msg_ids_out++ correct? */ | |||
1358 | ierr = MPI_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1+PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out)((petsc_isend_ct++,0) || PetscMPITypeSize(&(petsc_isend_len ),(*msg_size),(((MPI_Datatype)0x4c00080b))) || MPI_Isend((dptr3 ),(*msg_size),(((MPI_Datatype)0x4c00080b)),(*list),(1001 +PCTFS_my_id ),(gs->PCTFS_gs_comm),(msg_ids_out)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1358,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1359 | msg_size++;list++;msg_ids_out++; | |||
1360 | } else {list++; msg_size++;} | |||
1361 | } | |||
1362 | ||||
1363 | /* do the tree while we're waiting */ | |||
1364 | if (gs->max_left_over) PCTFS_gs_gop_tree_plus_hc(gs,in_vals,dim); | |||
1365 | ||||
1366 | /* process the received data */ | |||
1367 | msg_nodes=nodes; | |||
1368 | list = msg_list; | |||
1369 | while ((iptr = *nodes++)) { | |||
1370 | if ((PCTFS_my_id|mask)==(*list|mask)) { | |||
1371 | /* Should I check the return value of MPI_Wait() or status? */ | |||
1372 | /* Can this loop be replaced by a call to MPI_Waitall()? */ | |||
1373 | ierr = MPI_Wait(ids_in, &status)((petsc_wait_ct++,petsc_sum_of_waits_ct++,0) || MPI_Wait((ids_in ),(&status)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1373,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1374 | ids_in++; | |||
1375 | while (*iptr >= 0) *(dptr1 + *iptr++) += *in2++; | |||
1376 | } | |||
1377 | list++; | |||
1378 | } | |||
1379 | ||||
1380 | /* replace vals */ | |||
1381 | while (*pw >= 0) *(in_vals + *pw++) = *dptr1++; | |||
1382 | ||||
1383 | /* clear isend message handles */ | |||
1384 | /* This changed for clarity though it could be the same */ | |||
1385 | while (*msg_nodes++) { | |||
1386 | if ((PCTFS_my_id|mask)==(*msg_list|mask)) { | |||
1387 | /* Should I check the return value of MPI_Wait() or status? */ | |||
1388 | /* Can this loop be replaced by a call to MPI_Waitall()? */ | |||
1389 | ierr = MPI_Wait(ids_out, &status)((petsc_wait_ct++,petsc_sum_of_waits_ct++,0) || MPI_Wait((ids_out ),(&status)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1389,__func__,"/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1390 | ids_out++; | |||
1391 | } | |||
1392 | msg_list++; | |||
1393 | } | |||
1394 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1395 | } | |||
1396 | ||||
1397 | /******************************************************************************/ | |||
1398 | static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim) | |||
1399 | { | |||
1400 | PetscInt size; | |||
1401 | PetscInt *in, *out; | |||
1402 | PetscScalar *buf, *work; | |||
1403 | PetscInt op[] = {GL_ADD4,0}; | |||
1404 | ||||
1405 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/ksp/pc/impls/tfs/gs.c"; petscstack->line[petscstack->currentsize] = 1405; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1406 | in = gs->tree_map_in; | |||
1407 | out = gs->tree_map_out; | |||
1408 | buf = gs->tree_buf; | |||
1409 | work = gs->tree_work; | |||
1410 | size = gs->tree_nel; | |||
1411 | ||||
1412 | PCTFS_rvec_zero(buf,size); | |||
1413 | ||||
1414 | while (*in >= 0) *(buf + *out++) = *(vals + *in++); | |||
1415 | ||||
1416 | in = gs->tree_map_in; | |||
1417 | out = gs->tree_map_out; | |||
1418 | ||||
1419 | PCTFS_grop_hc(buf,work,size,op,dim); | |||
1420 | ||||
1421 | while (*in >= 0) *(vals + *in++) = *(buf + *out++); | |||
1422 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1423 | } | |||
1424 | ||||
1425 | ||||
1426 |