Actual source code: gs.c
2: /***********************************gs.c***************************************
3: SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue
5: Author: Henry M. Tufo III
7: e-mail: hmt@cs.brown.edu
9: snail-mail:
10: Division of Applied Mathematics
11: Brown University
12: Providence, RI 02912
14: Last Modification:
15: 6.21.97
16: ************************************gs.c**************************************/
18: /***********************************gs.c***************************************
19: File Description:
20: -----------------
22: ************************************gs.c**************************************/
24: #include petsc.h
25: #if defined(PETSC_HAVE_STRINGS_H)
26: #include <strings.h>
27: #endif
28: #if defined(PETSC_HAVE_STRING_H)
29: #include <string.h>
30: #endif
32: #include <float.h>
33: #include <limits.h>
35: #include const.h
36: #include types.h
37: #include comm.h
38: #include ivec.h
39: #include "bss_malloc.h"
40: #include "bit_mask.h"
41: #include error.h
42: #include queue.h
43: #include blas.h
44: #include gs.h
46: /* default length of number of items via tree - doubles if exceeded */
47: #define TREE_BUF_SZ 2048;
48: #define GS_VEC_SZ 1
52: /***********************************gs.c***************************************
53: Type: struct gather_scatter_id
54: ------------------------------
56: ************************************gs.c**************************************/
57: typedef struct gather_scatter_id {
58: int id;
59: int nel_min;
60: int nel_max;
61: int nel_sum;
62: int negl;
63: int gl_max;
64: int gl_min;
65: int repeats;
66: int ordered;
67: int positive;
68: REAL *vals;
70: /* bit mask info */
71: int *my_proc_mask;
72: int mask_sz;
73: int *ngh_buf;
74: int ngh_buf_sz;
75: int *nghs;
76: int num_nghs;
77: int max_nghs;
78: int *pw_nghs;
79: int num_pw_nghs;
80: int *tree_nghs;
81: int num_tree_nghs;
83: int num_loads;
85: /* repeats == true -> local info */
86: int nel; /* number of unique elememts */
87: int *elms; /* of size nel */
88: int nel_total;
89: int *local_elms; /* of size nel_total */
90: int *companion; /* of size nel_total */
92: /* local info */
93: int num_local_total;
94: int local_strength;
95: int num_local;
96: int *num_local_reduce;
97: int **local_reduce;
98: int num_local_gop;
99: int *num_gop_local_reduce;
100: int **gop_local_reduce;
102: /* pairwise info */
103: int level;
104: int num_pairs;
105: int max_pairs;
106: int loc_node_pairs;
107: int max_node_pairs;
108: int min_node_pairs;
109: int avg_node_pairs;
110: int *pair_list;
111: int *msg_sizes;
112: int **node_list;
113: int len_pw_list;
114: int *pw_elm_list;
115: REAL *pw_vals;
117: MPI_Request *msg_ids_in;
118: MPI_Request *msg_ids_out;
120: REAL *out;
121: REAL *in;
122: int msg_total;
124: /* tree - crystal accumulator info */
125: int max_left_over;
126: int *pre;
127: int *in_num;
128: int *out_num;
129: int **in_list;
130: int **out_list;
132: /* new tree work*/
133: int tree_nel;
134: int *tree_elms;
135: REAL *tree_buf;
136: REAL *tree_work;
138: int tree_map_sz;
139: int *tree_map_in;
140: int *tree_map_out;
142: /* current memory status */
143: int gl_bss_min;
144: int gl_perm_min;
146: /* max segment size for gs_gop_vec() */
147: int vec_sz;
149: /* hack to make paul happy */
150: MPI_Comm gs_comm;
152: } gs_id;
155: /* to be made public */
156: #if defined(not_used)
157: static PetscErrorCode gs_dump_ngh(gs_id *id, int loc_num, int *num, int *ngh_list);
158: static void gsi_via_int_list(gs_id *gs);
159: static PetscErrorCode in_sub_tree(int *ptr3, int p_mask_size, int *buf2, int buf_size);
160: #endif
162: /* PRIVATE - and definitely not exported */
163: /*static void gs_print_template(register gs_id* gs, int who);*/
164: /*static void gs_print_stemplate(register gs_id* gs, int who);*/
166: static gs_id *gsi_check_args(int *elms, int nel, int level);
167: static void gsi_via_bit_mask(gs_id *gs);
168: static void get_ngh_buf(gs_id *gs);
169: static void set_pairwise(gs_id *gs);
170: static gs_id * gsi_new(void);
171: static void set_tree(gs_id *gs);
173: /* same for all but vector flavor */
174: static void gs_gop_local_out(gs_id *gs, REAL *vals);
175: /* vector flavor */
176: static void gs_gop_vec_local_out(gs_id *gs, REAL *vals, int step);
178: static void gs_gop_vec_plus(gs_id *gs, REAL *in_vals, int step);
179: static void gs_gop_vec_pairwise_plus(gs_id *gs, REAL *in_vals, int step);
180: static void gs_gop_vec_local_plus(gs_id *gs, REAL *vals, int step);
181: static void gs_gop_vec_local_in_plus(gs_id *gs, REAL *vals, int step);
182: static void gs_gop_vec_tree_plus(gs_id *gs, REAL *vals, int step);
185: static void gs_gop_plus(gs_id *gs, REAL *in_vals);
186: static void gs_gop_pairwise_plus(gs_id *gs, REAL *in_vals);
187: static void gs_gop_local_plus(gs_id *gs, REAL *vals);
188: static void gs_gop_local_in_plus(gs_id *gs, REAL *vals);
189: static void gs_gop_tree_plus(gs_id *gs, REAL *vals);
191: static void gs_gop_plus_hc(gs_id *gs, REAL *in_vals, int dim);
192: static void gs_gop_pairwise_plus_hc(gs_id *gs, REAL *in_vals, int dim);
193: static void gs_gop_tree_plus_hc(gs_id *gs, REAL *vals, int dim);
195: static void gs_gop_times(gs_id *gs, REAL *in_vals);
196: static void gs_gop_pairwise_times(gs_id *gs, REAL *in_vals);
197: static void gs_gop_local_times(gs_id *gs, REAL *vals);
198: static void gs_gop_local_in_times(gs_id *gs, REAL *vals);
199: static void gs_gop_tree_times(gs_id *gs, REAL *vals);
201: static void gs_gop_min(gs_id *gs, REAL *in_vals);
202: static void gs_gop_pairwise_min(gs_id *gs, REAL *in_vals);
203: static void gs_gop_local_min(gs_id *gs, REAL *vals);
204: static void gs_gop_local_in_min(gs_id *gs, REAL *vals);
205: static void gs_gop_tree_min(gs_id *gs, REAL *vals);
207: static void gs_gop_min_abs(gs_id *gs, REAL *in_vals);
208: static void gs_gop_pairwise_min_abs(gs_id *gs, REAL *in_vals);
209: static void gs_gop_local_min_abs(gs_id *gs, REAL *vals);
210: static void gs_gop_local_in_min_abs(gs_id *gs, REAL *vals);
211: static void gs_gop_tree_min_abs(gs_id *gs, REAL *vals);
213: static void gs_gop_max(gs_id *gs, REAL *in_vals);
214: static void gs_gop_pairwise_max(gs_id *gs, REAL *in_vals);
215: static void gs_gop_local_max(gs_id *gs, REAL *vals);
216: static void gs_gop_local_in_max(gs_id *gs, REAL *vals);
217: static void gs_gop_tree_max(gs_id *gs, REAL *vals);
219: static void gs_gop_max_abs(gs_id *gs, REAL *in_vals);
220: static void gs_gop_pairwise_max_abs(gs_id *gs, REAL *in_vals);
221: static void gs_gop_local_max_abs(gs_id *gs, REAL *vals);
222: static void gs_gop_local_in_max_abs(gs_id *gs, REAL *vals);
223: static void gs_gop_tree_max_abs(gs_id *gs, REAL *vals);
225: static void gs_gop_exists(gs_id *gs, REAL *in_vals);
226: static void gs_gop_pairwise_exists(gs_id *gs, REAL *in_vals);
227: static void gs_gop_local_exists(gs_id *gs, REAL *vals);
228: static void gs_gop_local_in_exists(gs_id *gs, REAL *vals);
229: static void gs_gop_tree_exists(gs_id *gs, REAL *vals);
231: static void gs_gop_pairwise_binary(gs_id *gs, REAL *in_vals, rbfp fct);
232: static void gs_gop_local_binary(gs_id *gs, REAL *vals, rbfp fct);
233: static void gs_gop_local_in_binary(gs_id *gs, REAL *vals, rbfp fct);
234: static void gs_gop_tree_binary(gs_id *gs, REAL *vals, rbfp fct);
238: /* global vars */
239: /* from comm.c module */
241: /* module state inf and fortran interface */
242: static int num_gs_ids = 0;
244: /* should make this dynamic ... later */
245: /*static queue_ADT elms_q, mask_q;*/
246: static int msg_buf=MAX_MSG_BUF;
247: /*static int msg_ch=FALSE;*/
249: static int vec_sz=GS_VEC_SZ;
250: /*static int vec_ch=FALSE; */
252: static int *tree_buf=NULL;
253: static int tree_buf_sz=0;
254: static int ntree=0;
257: /******************************************************************************
258: Function: gs_init_()
260: Input :
261: Output:
262: Return:
263: Description:
264: ******************************************************************************/
265: void gs_init_vec_sz(int size)
266: {
267: /* vec_ch = TRUE; */
269: vec_sz = size;
270: }
272: /******************************************************************************
273: Function: gs_init_()
275: Input :
276: Output:
277: Return:
278: Description:
279: ******************************************************************************/
280: void gs_init_msg_buf_sz(int buf_size)
281: {
282: /* msg_ch = TRUE; */
284: msg_buf = buf_size;
285: }
287: /******************************************************************************
288: Function: gs_init()
290: Input :
292: Output:
294: RETURN:
296: Description:
297: ******************************************************************************/
298: gs_id *
299: gs_init(register int *elms, int nel, int level)
300: {
301: register gs_id *gs;
302: #ifdef INFO1
303: int i;
304: #endif
305: MPI_Group gs_group;
306: MPI_Comm gs_comm;
309: bss_init();
310: perm_init();
312: #ifdef DEBUG
313: error_msg_warning("gs_init() start w/(%d,%d)\n",my_id,num_nodes);
314: #endif
315:
316: /* ensure that communication package has been initialized */
317: comm_init();
319: #ifdef INFO1
320: bss_stats();
321: perm_stats();
322: #endif
324: /* determines if we have enough dynamic/semi-static memory */
325: /* checks input, allocs and sets gd_id template */
326: gs = gsi_check_args(elms,nel,level);
328: /* only bit mask version up and working for the moment */
329: /* LATER :: get int list version working for sparse pblms */
330: gsi_via_bit_mask(gs);
332: #ifdef INFO1
333: /* print out P0's template as well as malloc stats */
334: gs_print_template(gs,0);
336: MPI_Barrier(MPI_COMM_WORLD);
338: for (i=1;i<num_nodes;i++)
339: {
340: gs_print_stemplate(gs,i);
341: MPI_Barrier(MPI_COMM_WORLD);
342: }
343: #endif
345: #ifdef DEBUG
346: error_msg_warning("gs_init() end w/(%d,%d)\n",my_id,num_nodes);
347: #endif
349: #ifdef INFO
350: bss_stats();
351: perm_stats();
352: #endif
354: MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
355: MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
356: gs->gs_comm=gs_comm;
358: return(gs);
359: }
363: /******************************************************************************
364: Function: gsi_new()
366: Input :
367: Output:
368: Return:
369: Description:
371: elm list must >= 0!!!
372: elm repeats allowed
373: ******************************************************************************/
374: static
375: gs_id *
376: gsi_new(void)
377: {
378: int size=sizeof(gs_id);
379: gs_id *gs;
381:
382: #ifdef DEBUG
383: error_msg_warning("gsi_new() :: size=%D\n",size);
384: #endif
386: gs = (gs_id *) perm_malloc(size);
387:
388: if (!(size%REAL_LEN))
389: {rvec_zero((REAL *)gs,size/REAL_LEN);}
390: else if (!(size%INT_LEN))
391: {ivec_zero((INT*)gs,size/INT_LEN);}
392: else
393: {memset((char *)gs,0,size/sizeof(char));}
395: return(gs);
396: }
400: /******************************************************************************
401: Function: gsi_check_args()
403: Input :
404: Output:
405: Return:
406: Description:
408: elm list must >= 0!!!
409: elm repeats allowed
410: local working copy of elms is sorted
411: ******************************************************************************/
412: static
413: gs_id *
414: gsi_check_args(int *in_elms, int nel, int level)
415: {
416: register int i, j, k, t2;
417: int *companion, *elms, *unique, *iptr;
418: int num_local=0, *num_to_reduce, **local_reduce;
419: int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
420: int vals[sizeof(oprs)/sizeof(oprs[0])-1];
421: int work[sizeof(oprs)/sizeof(oprs[0])-1];
422: gs_id *gs;
425: #ifdef DEBUG
426: error_msg_warning("gs_check_args() begin w/(%d,%d)\n",my_id,num_nodes);
427: #endif
429: #ifdef SAFE
430: if (!in_elms)
431: {error_msg_fatal("elms point to nothing!!!\n");}
433: if (nel<0)
434: {error_msg_fatal("can't have fewer than 0 elms!!!\n");}
436: if (nel==0)
437: {error_msg_warning("I don't have any elements!!!\n");}
438: #endif
440: /* get space for gs template */
441: gs = gsi_new();
442: gs->id = ++num_gs_ids;
444: /* hmt 6.4.99 */
445: /* caller can set global ids that don't participate to 0 */
446: /* gs_init ignores all zeros in elm list */
447: /* negative global ids are still invalid */
448: for (i=j=0;i<nel;i++)
449: {if (in_elms[i]!=0) {j++;}}
451: k=nel; nel=j;
453: /* copy over in_elms list and create inverse map */
454: elms = (int*) bss_malloc((nel+1)*INT_LEN);
455: companion = (int*) bss_malloc(nel*INT_LEN);
456: /* ivec_c_index(companion,nel); */
457: /* ivec_copy(elms,in_elms,nel); */
458: for (i=j=0;i<k;i++)
459: {
460: if (in_elms[i]!=0)
461: {elms[j] = in_elms[i]; companion[j++] = i;}
462: }
464: if (j!=nel)
465: {error_msg_fatal("nel j mismatch!\n");}
467: #ifdef SAFE
468: /* pre-pass ... check to see if sorted */
469: elms[nel] = INT_MAX;
470: iptr = elms;
471: unique = elms+1;
472: j=0;
473: while (*iptr!=INT_MAX)
474: {
475: if (*iptr++>*unique++)
476: {j=1; break;}
477: }
479: /* set up inverse map */
480: if (j)
481: {
482: error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
483: SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
484: }
485: else
486: {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
487: #else
488: SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
489: #endif
490: elms[nel] = INT_MIN;
492: /* first pass */
493: /* determine number of unique elements, check pd */
494: for (i=k=0;i<nel;i+=j)
495: {
496: t2 = elms[i];
497: j=++i;
498:
499: /* clump 'em for now */
500: while (elms[j]==t2) {j++;}
501:
502: /* how many together and num local */
503: if (j-=i)
504: {num_local++; k+=j;}
505: }
507: /* how many unique elements? */
508: gs->repeats=k;
509: gs->nel = nel-k;
512: /* number of repeats? */
513: gs->num_local = num_local;
514: num_local+=2;
515: gs->local_reduce=local_reduce=(int **)perm_malloc(num_local*INT_PTR_LEN);
516: gs->num_local_reduce=num_to_reduce=(int*) perm_malloc(num_local*INT_LEN);
518: unique = (int*) bss_malloc((gs->nel+1)*INT_LEN);
519: gs->elms = unique;
520: gs->nel_total = nel;
521: gs->local_elms = elms;
522: gs->companion = companion;
524: /* compess map as well as keep track of local ops */
525: for (num_local=i=j=0;i<gs->nel;i++)
526: {
527: k=j;
528: t2 = unique[i] = elms[j];
529: companion[i] = companion[j];
530:
531: while (elms[j]==t2) {j++;}
533: if ((t2=(j-k))>1)
534: {
535: /* number together */
536: num_to_reduce[num_local] = t2++;
537: iptr = local_reduce[num_local++] = (int*)perm_malloc(t2*INT_LEN);
539: /* to use binary searching don't remap until we check intersection */
540: *iptr++ = i;
541:
542: /* note that we're skipping the first one */
543: while (++k<j)
544: {*(iptr++) = companion[k];}
545: *iptr = -1;
546: }
547: }
549: /* sentinel for ngh_buf */
550: unique[gs->nel]=INT_MAX;
552: #ifdef DEBUG
553: if (num_local!=gs->num_local)
554: {error_msg_fatal("compression of maps wrong!!!\n");}
555: #endif
557: /* for two partition sort hack */
558: num_to_reduce[num_local] = 0;
559: local_reduce[num_local] = NULL;
560: num_to_reduce[++num_local] = 0;
561: local_reduce[num_local] = NULL;
563: /* load 'em up */
564: /* note one extra to hold NON_UNIFORM flag!!! */
565: vals[2] = vals[1] = vals[0] = nel;
566: if (gs->nel>0)
567: {
568: vals[3] = unique[0]; /* ivec_lb(elms,nel); */
569: vals[4] = unique[gs->nel-1]; /* ivec_ub(elms,nel); */
570: }
571: else
572: {
573: vals[3] = INT_MAX; /* ivec_lb(elms,nel); */
574: vals[4] = INT_MIN; /* ivec_ub(elms,nel); */
575: }
576: vals[5] = level;
577: vals[6] = num_gs_ids;
579: /* GLOBAL: send 'em out */
580: giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);
582: /* must be semi-pos def - only pairwise depends on this */
583: /* LATER - remove this restriction */
584: if (vals[3]<0)
585: {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}
587: if (vals[4]==INT_MAX)
588: {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}
590: #ifdef DEBUG
591: /* check gs template count */
592: if (vals[6] != num_gs_ids)
593: {error_msg_fatal("num_gs_ids mismatch!!!");}
595: /* check all have same level threshold */
596: if (level != vals[5])
597: {error_msg_fatal("gsi_check_args() :: level not uniform across nodes!!!\n");}
598: #endif
600: gs->nel_min = vals[0];
601: gs->nel_max = vals[1];
602: gs->nel_sum = vals[2];
603: gs->gl_min = vals[3];
604: gs->gl_max = vals[4];
605: gs->negl = vals[4]-vals[3]+1;
607: #ifdef DEBUG
608: printf("nel(unique)=%d\n", gs->nel);
609: printf("nel_max=%d\n", gs->nel_max);
610: printf("nel_min=%d\n", gs->nel_min);
611: printf("nel_sum=%d\n", gs->nel_sum);
612: printf("negl=%d\n", gs->negl);
613: printf("gl_max=%d\n", gs->gl_max);
614: printf("gl_min=%d\n", gs->gl_min);
615: printf("elms ordered=%d\n",gs->ordered);
616: printf("repeats=%d\n", gs->repeats);
617: printf("positive=%d\n", gs->positive);
618: printf("level=%d\n", gs->level);
619: #endif
621: if (gs->negl<=0)
622: {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
623:
624: /* LATER :: add level == -1 -> program selects level */
625: if (vals[5]<0)
626: {vals[5]=0;}
627: else if (vals[5]>num_nodes)
628: {vals[5]=num_nodes;}
629: gs->level = vals[5];
631: #ifdef DEBUG
632: error_msg_warning("gs_check_args() :: end w/(%d,%d)+level=%d\n",
633: my_id,num_nodes,vals[5]);
634: #endif
636: return(gs);
637: }
640: /******************************************************************************
641: Function: gsi_via_bit_mask()
643: Input :
644: Output:
645: Return:
646: Description:
649: ******************************************************************************/
650: static
651: void
652: gsi_via_bit_mask(gs_id *gs)
653: {
654: register int i, nel, *elms;
655: int t1;
656: int **reduce;
657: int *map;
659: #ifdef DEBUG
660: error_msg_warning("gsi_via_bit_mask() begin w/%d :: %d\n",my_id,num_nodes);
661: #endif
663: /* totally local removes ... ct_bits == 0 */
664: get_ngh_buf(gs);
666: if (gs->level)
667: {set_pairwise(gs);}
669: if (gs->max_left_over)
670: {set_tree(gs);}
672: /* intersection local and pairwise/tree? */
673: gs->num_local_total = gs->num_local;
674: gs->gop_local_reduce = gs->local_reduce;
675: gs->num_gop_local_reduce = gs->num_local_reduce;
677: map = gs->companion;
679: /* is there any local compression */
680: if (!gs->num_local) {
681: gs->local_strength = NONE;
682: gs->num_local_gop = 0;
683: } else {
684: /* ok find intersection */
685: map = gs->companion;
686: reduce = gs->local_reduce;
687: for (i=0, t1=0; i<gs->num_local; i++, reduce++)
688: {
689: if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
690: ||
691: ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
692: {
693: /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
694: t1++;
695: if (gs->num_local_reduce[i]<=0)
696: {error_msg_fatal("nobody in list?");}
697: gs->num_local_reduce[i] *= -1;
698: }
699: **reduce=map[**reduce];
700: }
702: /* intersection is empty */
703: if (!t1)
704: {
705: #ifdef DEBUG
706: error_msg_warning("gsi_check_args() :: local gs_gop w/o intersection!");
707: #endif
708: gs->local_strength = FULL;
709: gs->num_local_gop = 0;
710: }
711: /* intersection not empty */
712: else
713: {
714: #ifdef DEBUG
715: error_msg_warning("gsi_check_args() :: local gs_gop w/intersection!");
716: #endif
717: gs->local_strength = PARTIAL;
718: SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
719: gs->num_local + 1, SORT_INT_PTR);
721: gs->num_local_gop = t1;
722: gs->num_local_total = gs->num_local;
723: gs->num_local -= t1;
724: gs->gop_local_reduce = gs->local_reduce;
725: gs->num_gop_local_reduce = gs->num_local_reduce;
727: for (i=0; i<t1; i++)
728: {
729: if (gs->num_gop_local_reduce[i]>=0)
730: {error_msg_fatal("they aren't negative?");}
731: gs->num_gop_local_reduce[i] *= -1;
732: gs->local_reduce++;
733: gs->num_local_reduce++;
734: }
735: gs->local_reduce++;
736: gs->num_local_reduce++;
737: }
738: }
740: elms = gs->pw_elm_list;
741: nel = gs->len_pw_list;
742: for (i=0; i<nel; i++)
743: {elms[i] = map[elms[i]];}
745: elms = gs->tree_map_in;
746: nel = gs->tree_map_sz;
747: for (i=0; i<nel; i++)
748: {elms[i] = map[elms[i]];}
750: /* clean up */
751: bss_free((void*) gs->local_elms);
752: bss_free((void*) gs->companion);
753: bss_free((void*) gs->elms);
754: bss_free((void*) gs->ngh_buf);
755: gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
757: #ifdef DEBUG
758: error_msg_warning("gsi_via_bit_mask() end w/%d :: %d\n",my_id,num_nodes);
759: #endif
760: }
764: /******************************************************************************
765: Function: place_in_tree()
767: Input :
768: Output:
769: Return:
770: Description:
773: ******************************************************************************/
774: static
775: void
776: place_in_tree(register int elm)
777: {
778: register int *tp, n;
781: if (ntree==tree_buf_sz)
782: {
783: if (tree_buf_sz)
784: {
785: tp = tree_buf;
786: n = tree_buf_sz;
787: tree_buf_sz<<=1;
788: tree_buf = (int*)bss_malloc(tree_buf_sz*INT_LEN);
789: ivec_copy(tree_buf,tp,n);
790: bss_free(tp);
791: }
792: else
793: {
794: tree_buf_sz = TREE_BUF_SZ;
795: tree_buf = (int*)bss_malloc(tree_buf_sz*INT_LEN);
796: }
797: }
799: tree_buf[ntree++] = elm;
800: }
804: /******************************************************************************
805: Function: get_ngh_buf()
807: Input :
808: Output:
809: Return:
810: Description:
813: ******************************************************************************/
814: static
815: void
816: get_ngh_buf(gs_id *gs)
817: {
818: register int i, j, npw=0, ntree_map=0;
819: int p_mask_size, ngh_buf_size, buf_size;
820: int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
821: int *ngh_buf, *buf1, *buf2;
822: int offset, per_load, num_loads, or_ct, start, end;
823: int *ptr1, *ptr2, i_start, negl, nel, *elms;
824: int oper=GL_B_OR;
825: int *ptr3, *t_mask, level, ct1, ct2;
827: #ifdef DEBUG
828: error_msg_warning("get_ngh_buf() begin w/%d :: %d\n",my_id,num_nodes);
829: #endif
831: /* to make life easier */
832: nel = gs->nel;
833: elms = gs->elms;
834: level = gs->level;
835:
836: /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
837: p_mask = (int*) bss_malloc(p_mask_size=len_bit_mask(num_nodes));
838: set_bit_mask(p_mask,p_mask_size,my_id);
840: /* allocate space for masks and info bufs */
841: gs->nghs = sh_proc_mask = (int*) bss_malloc(p_mask_size);
842: gs->pw_nghs = pw_sh_proc_mask = (int*) perm_malloc(p_mask_size);
843: gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
844: t_mask = (int*) bss_malloc(p_mask_size);
845: gs->ngh_buf = ngh_buf = (int*) bss_malloc(ngh_buf_size);
847: /* comm buffer size ... memory usage bounded by ~2*msg_buf */
848: /* had thought I could exploit rendezvous threshold */
850: /* default is one pass */
851: per_load = negl = gs->negl;
852: gs->num_loads = num_loads = 1;
853: i=p_mask_size*negl;
855: /* possible overflow on buffer size */
856: /* overflow hack */
857: if (i<0) {i=INT_MAX;}
859: buf_size = PetscMin(msg_buf,i);
861: /* can we do it? */
862: if (p_mask_size>buf_size)
863: {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}
865: /* get giop buf space ... make *only* one malloc */
866: buf1 = (int*) bss_malloc(buf_size<<1);
868: /* more than one gior exchange needed? */
869: if (buf_size!=i)
870: {
871: per_load = buf_size/p_mask_size;
872: buf_size = per_load*p_mask_size;
873: gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
874: }
876: #ifdef DEBUG
877: /* dump some basic info */
878: error_msg_warning("n_lds=%d,pms=%d,buf_sz=%d\n",num_loads,p_mask_size,buf_size);
879: #endif
881: /* convert buf sizes from #bytes to #ints - 32 bit only! */
882: #ifdef SAFE
883: p_mask_size/=INT_LEN; ngh_buf_size/=INT_LEN; buf_size/=INT_LEN;
884: #else
885: p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2;
886: #endif
887:
888: /* find giop work space */
889: buf2 = buf1+buf_size;
891: /* hold #ints needed for processor masks */
892: gs->mask_sz=p_mask_size;
894: /* init buffers */
895: ivec_zero(sh_proc_mask,p_mask_size);
896: ivec_zero(pw_sh_proc_mask,p_mask_size);
897: ivec_zero(ngh_buf,ngh_buf_size);
899: /* HACK reset tree info */
900: tree_buf=NULL;
901: tree_buf_sz=ntree=0;
903: /* queue the tree elements for now */
904: /* elms_q = new_queue(); */
905:
906: /* can also queue tree info for pruned or forest implememtation */
907: /* mask_q = new_queue(); */
909: /* ok do it */
910: for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
911: {
912: /* identity for bitwise or is 000...000 */
913: ivec_zero(buf1,buf_size);
915: /* load msg buffer */
916: for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
917: {
918: offset = (offset-start)*p_mask_size;
919: ivec_copy(buf1+offset,p_mask,p_mask_size);
920: }
922: /* GLOBAL: pass buffer */
923: giop(buf1,buf2,buf_size,&oper);
926: /* unload buffer into ngh_buf */
927: ptr2=(elms+i_start);
928: for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
929: {
930: /* I own it ... may have to pairwise it */
931: if (j==*ptr2)
932: {
933: /* do i share it w/anyone? */
934: #ifdef SAFE
935: ct1 = ct_bits((char *)ptr3,p_mask_size*INT_LEN);
936: #else
937: ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
938: #endif
939: /* guess not */
940: if (ct1<2)
941: {ptr2++; ptr1+=p_mask_size; continue;}
943: /* i do ... so keep info and turn off my bit */
944: ivec_copy(ptr1,ptr3,p_mask_size);
945: ivec_xor(ptr1,p_mask,p_mask_size);
946: ivec_or(sh_proc_mask,ptr1,p_mask_size);
947:
948: /* is it to be done pairwise? */
949: if (--ct1<=level)
950: {
951: npw++;
952:
953: /* turn on high bit to indicate pw need to process */
954: *ptr2++ |= TOP_BIT;
955: ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
956: ptr1+=p_mask_size;
957: continue;
958: }
960: /* get set for next and note that I have a tree contribution */
961: /* could save exact elm index for tree here -> save a search */
962: ptr2++; ptr1+=p_mask_size; ntree_map++;
963: }
964: /* i don't but still might be involved in tree */
965: else
966: {
968: /* shared by how many? */
969: #ifdef SAFE
970: ct1 = ct_bits((char *)ptr3,p_mask_size*INT_LEN);
971: #else
972: ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
973: #endif
975: /* none! */
976: if (ct1<2)
977: {continue;}
979: /* is it going to be done pairwise? but not by me of course!*/
980: if (--ct1<=level)
981: {continue;}
982: }
983: /* LATER we're going to have to process it NOW */
984: /* nope ... tree it */
985: place_in_tree(j);
986: }
987: }
989: bss_free((void*)t_mask);
990: bss_free((void*)buf1);
992: gs->len_pw_list=npw;
993: gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*INT_LEN);
995: /* expand from bit mask list to int list and save ngh list */
996: gs->nghs = (int*) perm_malloc(gs->num_nghs * INT_LEN);
997: bm_to_proc((char *)sh_proc_mask,p_mask_size*INT_LEN,gs->nghs);
999: gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*INT_LEN);
1001: oper = GL_MAX;
1002: ct1 = gs->num_nghs;
1003: giop(&ct1,&ct2,1,&oper);
1004: gs->max_nghs = ct1;
1006: gs->tree_map_sz = ntree_map;
1007: gs->max_left_over=ntree;
1009: bss_free((void*)p_mask);
1010: bss_free((void*)sh_proc_mask);
1012: #ifdef DEBUG
1013: error_msg_warning("get_ngh_buf() end w/%d :: %d\n",my_id,num_nodes);
1014: #endif
1015: }
1021: /******************************************************************************
1022: Function: pairwise_init()
1024: Input :
1025: Output:
1026: Return:
1027: Description:
1029: if an element is shared by fewer that level# of nodes do pairwise exch
1030: ******************************************************************************/
1031: static
1032: void
1033: set_pairwise(gs_id *gs)
1034: {
1035: register int i, j;
1036: int p_mask_size;
1037: int *p_mask, *sh_proc_mask, *tmp_proc_mask;
1038: int *ngh_buf, *buf2;
1039: int offset;
1040: int *msg_list, *msg_size, **msg_nodes, nprs;
1041: int *pairwise_elm_list, len_pair_list=0;
1042: int *iptr, t1, i_start, nel, *elms;
1043: int ct;
1046: #ifdef DEBUG
1047: error_msg_warning("set_pairwise() begin w/%d :: %d\n",my_id,num_nodes);
1048: #endif
1050: /* to make life easier */
1051: nel = gs->nel;
1052: elms = gs->elms;
1053: ngh_buf = gs->ngh_buf;
1054: sh_proc_mask = gs->pw_nghs;
1056: /* need a few temp masks */
1057: p_mask_size = len_bit_mask(num_nodes);
1058: p_mask = (int*) bss_malloc(p_mask_size);
1059: tmp_proc_mask = (int*) bss_malloc(p_mask_size);
1061: /* set mask to my my_id's bit mask */
1062: set_bit_mask(p_mask,p_mask_size,my_id);
1064: #ifdef SAFE
1065: p_mask_size /= INT_LEN;
1066: #else
1067: p_mask_size >>= 2;
1068: #endif
1069:
1070: len_pair_list=gs->len_pw_list;
1071: gs->pw_elm_list=pairwise_elm_list=(int*)perm_malloc((len_pair_list+1)*INT_LEN);
1073: /* how many processors (nghs) do we have to exchange with? */
1074: nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*INT_LEN);
1077: /* allocate space for gs_gop() info */
1078: gs->pair_list = msg_list = (int*) perm_malloc(INT_LEN*nprs);
1079: gs->msg_sizes = msg_size = (int*) perm_malloc(INT_LEN*nprs);
1080: gs->node_list = msg_nodes = (int **) perm_malloc(INT_PTR_LEN*(nprs+1));
1082: /* init msg_size list */
1083: ivec_zero(msg_size,nprs);
1085: /* expand from bit mask list to int list */
1086: bm_to_proc((char *)sh_proc_mask,p_mask_size*INT_LEN,msg_list);
1087:
1088: /* keep list of elements being handled pairwise */
1089: for (i=j=0;i<nel;i++)
1090: {
1091: if (elms[i] & TOP_BIT)
1092: {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
1093: }
1094: pairwise_elm_list[j] = -1;
1096: #ifdef DEBUG
1097: if (j!=len_pair_list)
1098: {error_msg_fatal("oops ... bad paiwise list in set_pairwise!");}
1099: #endif
1101: gs->msg_ids_out = (MPI_Request *) perm_malloc(sizeof(MPI_Request)*(nprs+1));
1102: gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
1103: gs->msg_ids_in = (MPI_Request *) perm_malloc(sizeof(MPI_Request)*(nprs+1));
1104: gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
1105: gs->pw_vals = (REAL *) perm_malloc(REAL_LEN*len_pair_list*vec_sz);
1107: /* find who goes to each processor */
1108: for (i_start=i=0;i<nprs;i++)
1109: {
1110: /* processor i's mask */
1111: set_bit_mask(p_mask,p_mask_size*INT_LEN,msg_list[i]);
1113: /* det # going to processor i */
1114: for (ct=j=0;j<len_pair_list;j++)
1115: {
1116: buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
1117: ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
1118: if (ct_bits((char *)tmp_proc_mask,p_mask_size*INT_LEN))
1119: {ct++;}
1120: }
1121: msg_size[i] = ct;
1122: i_start = PetscMax(i_start,ct);
1124: /*space to hold nodes in message to first neighbor */
1125: msg_nodes[i] = iptr = (int*) perm_malloc(INT_LEN*(ct+1));
1127: for (j=0;j<len_pair_list;j++)
1128: {
1129: buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
1130: ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
1131: if (ct_bits((char *)tmp_proc_mask,p_mask_size*INT_LEN))
1132: {*iptr++ = j;}
1133: }
1134: *iptr = -1;
1135: }
1136: msg_nodes[nprs] = NULL;
1138: #ifdef INFO1
1139: t1 = GL_MAX;
1140: giop(&i_start,&offset,1,&t1);
1141: gs->max_pairs = i_start;
1142: #else
1143: j=gs->loc_node_pairs=i_start;
1144: t1 = GL_MAX;
1145: giop(&i_start,&offset,1,&t1);
1146: gs->max_node_pairs = i_start;
1148: i_start=j;
1149: t1 = GL_MIN;
1150: giop(&i_start,&offset,1,&t1);
1151: gs->min_node_pairs = i_start;
1153: i_start=j;
1154: t1 = GL_ADD;
1155: giop(&i_start,&offset,1,&t1);
1156: gs->avg_node_pairs = i_start/num_nodes + 1;
1158: i_start=nprs;
1159: t1 = GL_MAX;
1160: giop(&i_start,&offset,1,&t1);
1161: gs->max_pairs = i_start;
1163: /*gs->max_pairs = -1;*/
1164: #endif
1166: /* remap pairwise in tail of gsi_via_bit_mask() */
1167: gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
1168: gs->out = (REAL *) perm_malloc(REAL_LEN*gs->msg_total*vec_sz);
1169: gs->in = (REAL *) perm_malloc(REAL_LEN*gs->msg_total*vec_sz);
1171: /* reset malloc pool */
1172: bss_free((void*)p_mask);
1173: bss_free((void*)tmp_proc_mask);
1175: #ifdef DEBUG
1176: error_msg_warning("set_pairwise() end w/%d :: %d\n",my_id,num_nodes);
1177: #endif
1178: }
1182: /******************************************************************************
1183: Function: set_tree()
1185: Input :
1186: Output:
1187: Return:
1188: Description:
1190: to do pruned tree just save ngh buf copy for each one and decode here!
1191: ******************************************************************************/
1192: static
1193: void
1194: set_tree(gs_id *gs)
1195: {
1196: register int i, j, n, nel;
1197: register int *iptr_in, *iptr_out, *tree_elms, *elms;
1200: #ifdef DEBUG
1201: error_msg_warning("set_tree() :: begin\n");
1202: #endif
1204: /* local work ptrs */
1205: elms = gs->elms;
1206: nel = gs->nel;
1208: /* how many via tree */
1209: gs->tree_nel = n = ntree;
1210: gs->tree_elms = tree_elms = iptr_in = tree_buf;
1211: gs->tree_buf = (REAL *) bss_malloc(REAL_LEN*n*vec_sz);
1212: gs->tree_work = (REAL *) bss_malloc(REAL_LEN*n*vec_sz);
1213: j=gs->tree_map_sz;
1214: gs->tree_map_in = iptr_in = (int*) bss_malloc(INT_LEN*(j+1));
1215: gs->tree_map_out = iptr_out = (int*) bss_malloc(INT_LEN*(j+1));
1217: #ifdef DEBUG
1218: error_msg_warning("num on tree=%d,%d",gs->max_left_over,gs->tree_nel);
1219: #endif
1221: /* search the longer of the two lists */
1222: /* note ... could save this info in get_ngh_buf and save searches */
1223: if (n<=nel)
1224: {
1225: /* bijective fct w/remap - search elm list */
1226: for (i=0; i<n; i++)
1227: {
1228: if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1229: {*iptr_in++ = j; *iptr_out++ = i;}
1230: }
1231: }
1232: else
1233: {
1234: for (i=0; i<nel; i++)
1235: {
1236: if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1237: {*iptr_in++ = i; *iptr_out++ = j;}
1238: }
1239: }
1241: /* sentinel */
1242: *iptr_in = *iptr_out = -1;
1244: #ifdef DEBUG
1245: error_msg_warning("set_tree() :: end\n");
1246: #endif
1247: }
1250: /******************************************************************************
1251: Function: gsi_via_int_list()
1253: Input :
1254: Output:
1255: Return:
1256: Description:
1257: ******************************************************************************/
1258: /*static
1259: void
1260: gsi_via_int_list(gs_id *gs)
1261: {
1263: LATER: for P large the bit masks -> too many passes
1264: LATER: strategy: do gsum w/1 in position i in negl if owner
1265: LATER: then sum of entire vector 1 ... negl determines min buf len
1266: LATER: So choose min from this or mask method
1267: }*/
1270: #if defined(not_used)
1271: static
1272: int
1273: root_sub_tree(int *proc_list, int num)
1274: {
1275: register int i, j, p_or, p_and;
1276: register int root, mask;
1279: /* ceiling(log2(num_nodes)) - 1 */
1280: j = i_log2_num_nodes;
1281: if (num_nodes==floor_num_nodes)
1282: {j--;}
1284: /* set mask to msb */
1285: for(mask=1,i=0; i<j; i++)
1286: {mask<<=1;}
1288: p_or = ivec_reduce_or(proc_list,num);
1289: p_and = ivec_reduce_and(proc_list,num);
1290: for(root=i=0; i<j; i++,mask>>=1)
1291: {
1292: /* (msb-i)'th bits on ==> root in right 1/2 tree */
1293: if (mask & p_and)
1294: {root |= mask;}
1296: /* (msb-i)'th bits differ ==> root found */
1297: else if (mask & p_or)
1298: {break;}
1300: /* (msb-i)'th bits off ==>root in left 1/2 tree */
1301: }
1303: #ifdef DEBUG
1304: if ((root<0) || (root>num_nodes))
1305: {error_msg_fatal("root_sub_tree() :: bad root!");}
1307: if (!my_id)
1308: {
1309: printf("num_nodes=%d, j=%d, root=%d\n",num_nodes,j,root);
1310: printf("procs: ");
1311: for(i=0;i<num;i++)
1312: {printf("%d ",proc_list[i]);}
1313: printf("\n");
1314: }
1315: #endif
1317: return(root);
1318: }
1319: #endif
1322: #if defined(not_used)
1323: static int
1324: in_sub_tree(int *mask, int mask_size, int *work, int nw)
1325: {
1326: int ct, nb;
1327:
1328: /* mask size in bytes */
1329: nb = mask_size<<2;
1330:
1331: /* shared amoungst how many? */
1332: ct = ct_bits((char *)mask,nb);
1334: /* enough space? */
1335: if (nw<ct)
1336: {error_msg_fatal("in_sub_tree() :: not enough space to expand bit mask!");}
1338: /* expand */
1339: bm_to_proc((char *)mask,nb,work);
1340:
1341: /* find tree root */
1342: root_sub_tree(work,ct);
1344: /* am i in any of the paths? */
1346: return(TRUE);
1348: /*
1349: sh_mask = (int*)bss_malloc(nb);
1350: bss_free(sh_mask);
1351: */
1352: }
1353: #endif
1356: /******************************************************************************
1357: Function: gather_scatter
1359: Input :
1360: Output:
1361: Return:
1362: Description:
1363: ******************************************************************************/
1364: static
1365: void
1366: gs_gop_local_out(register gs_id *gs, register REAL *vals)
1367: {
1368: register int *num, *map, **reduce;
1369: register REAL tmp;
1372: #ifdef DEBUG
1373: error_msg_warning("start gs_gop_xxx()\n");
1374: #endif
1376: num = gs->num_gop_local_reduce;
1377: reduce = gs->gop_local_reduce;
1378: while ((map = *reduce++))
1379: {
1380: /* wall */
1381: if (*num == 2)
1382: {
1383: num ++;
1384: vals[map[1]] = vals[map[0]];
1385: }
1386: /* corner shared by three elements */
1387: else if (*num == 3)
1388: {
1389: num ++;
1390: vals[map[2]] = vals[map[1]] = vals[map[0]];
1391: }
1392: /* corner shared by four elements */
1393: else if (*num == 4)
1394: {
1395: num ++;
1396: vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1397: }
1398: /* general case ... odd geoms ... 3D*/
1399: else
1400: {
1401: num++;
1402: tmp = *(vals + *map++);
1403: while (*map >= 0)
1404: {*(vals + *map++) = tmp;}
1405: }
1406: }
1407: }
1411: /******************************************************************************
1412: Function: gather_scatter
1414: Input :
1415: Output:
1416: Return:
1417: Description:
1418: ******************************************************************************/
1419: void
1420: gs_gop_binary(gs_ADT gs, REAL *vals, rbfp fct)
1421: {
1422: #ifdef DEBUG
1423: if (!gs) {error_msg_fatal("gs_gop() :: passed NULL gs handle!!!");}
1424: if (!fct) {error_msg_fatal("gs_gop() :: passed NULL bin fct handle!!!");}
1425: error_msg_warning("start gs_gop_xxx()\n");
1426: #endif
1428: /* local only operations!!! */
1429: if (gs->num_local)
1430: {gs_gop_local_binary(gs,vals,fct);}
1431:
1432: /* if intersection tree/pairwise and local isn't empty */
1433: if (gs->num_local_gop)
1434: {
1435: gs_gop_local_in_binary(gs,vals,fct);
1436:
1437: /* pairwise */
1438: if (gs->num_pairs)
1439: {gs_gop_pairwise_binary(gs,vals,fct);}
1440:
1441: /* tree */
1442: else if (gs->max_left_over)
1443: {gs_gop_tree_binary(gs,vals,fct);}
1444:
1445: gs_gop_local_out(gs,vals);
1446: }
1447: /* if intersection tree/pairwise and local is empty */
1448: else
1449: {
1450: /* pairwise */
1451: if (gs->num_pairs)
1452: {gs_gop_pairwise_binary(gs,vals,fct);}
1453:
1454: /* tree */
1455: else if (gs->max_left_over)
1456: {gs_gop_tree_binary(gs,vals,fct);}
1457: }
1458: }
1462: /******************************************************************************
1463: Function: gather_scatter
1465: Input :
1466: Output:
1467: Return:
1468: Description:
1469: ******************************************************************************/
1470: static
1471: void
1472: gs_gop_local_binary(register gs_id *gs, register REAL *vals, register rbfp fct)
1473: {
1474: register int *num, *map, **reduce;
1475: REAL tmp;
1478: #ifdef DEBUG
1479: error_msg_warning("start gs_gop_xxx()\n");
1480: #endif
1481: num = gs->num_local_reduce;
1482: reduce = gs->local_reduce;
1483: while ((map = *reduce))
1484: {
1485: num ++;
1486: (*fct)(&tmp,NULL,1);
1487: /* tmp = 0.0; */
1488: while (*map >= 0)
1489: {(*fct)(&tmp,(vals + *map),1); map++;}
1490: /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1491:
1492: map = *reduce++;
1493: while (*map >= 0)
1494: {*(vals + *map++) = tmp;}
1495: }
1496: }
1500: /******************************************************************************
1501: Function: gather_scatter
1503: Input :
1504: Output:
1505: Return:
1506: Description:
1507: ******************************************************************************/
1508: static
1509: void
1510: gs_gop_local_in_binary(register gs_id *gs, register REAL *vals, register rbfp fct)
1511: {
1512: register int *num, *map, **reduce;
1513: register REAL *base;
1516: #ifdef DEBUG
1517: error_msg_warning("start gs_gop_xxx()\n");
1518: #endif
1520: num = gs->num_gop_local_reduce;
1522: reduce = gs->gop_local_reduce;
1523: while ((map = *reduce++))
1524: {
1525: num++;
1526: base = vals + *map++;
1527: while (*map >= 0)
1528: {(*fct)(base,(vals + *map),1); map++;}
1529: /* {*base = (*fct)(*base,*(vals + *map)); map++;} */
1530: }
1531: }
1535: /******************************************************************************
1536: Function: gather_scatter
1538: VERSION 3 ::
1540: Input :
1541: Output:
1542: Return:
1543: Description:
1544: ******************************************************************************/
1545: static
1546: void
1547: gs_gop_pairwise_binary(register gs_id *gs, register REAL *in_vals,
1548: register rbfp fct)
1549: {
1550: register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
1551: register int *iptr, *msg_list, *msg_size, **msg_nodes;
1552: register int *pw, *list, *size, **nodes;
1553: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1554: MPI_Status status;
1557: /* strip and load registers */
1558: msg_list =list = gs->pair_list;
1559: msg_size =size = gs->msg_sizes;
1560: msg_nodes=nodes = gs->node_list;
1561: iptr=pw = gs->pw_elm_list;
1562: dptr1=dptr3 = gs->pw_vals;
1563: msg_ids_in = ids_in = gs->msg_ids_in;
1564: msg_ids_out = ids_out = gs->msg_ids_out;
1565: dptr2 = gs->out;
1566: in1=in2 = gs->in;
1568: /* post the receives */
1569: /* msg_nodes=nodes; */
1570: do
1571: {
1572: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1573: second one *list and do list++ afterwards */
1574: MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1575: gs->gs_comm, msg_ids_in++);
1576: in1 += *size++;
1577: }
1578: while (*++msg_nodes);
1579: msg_nodes=nodes;
1581: /* load gs values into in out gs buffers */
1582: while (*iptr >= 0)
1583: {*dptr3++ = *(in_vals + *iptr++);}
1585: /* load out buffers and post the sends */
1586: while ((iptr = *msg_nodes++))
1587: {
1588: dptr3 = dptr2;
1589: while (*iptr >= 0)
1590: {*dptr2++ = *(dptr1 + *iptr++);}
1591: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1592: /* is msg_ids_out++ correct? */
1593: MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
1594: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1595: }
1597: if (gs->max_left_over)
1598: {gs_gop_tree_binary(gs,in_vals,fct);}
1600: /* process the received data */
1601: msg_nodes=nodes;
1602: while ((iptr = *nodes++))
1603: {
1604: /* Should I check the return value of MPI_Wait() or status? */
1605: /* Can this loop be replaced by a call to MPI_Waitall()? */
1606: MPI_Wait(ids_in++, &status);
1607: while (*iptr >= 0)
1608: {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1609: /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1610: }
1612: /* replace vals */
1613: while (*pw >= 0)
1614: {*(in_vals + *pw++) = *dptr1++;}
1616: /* clear isend message handles */
1617: /* This changed for clarity though it could be the same */
1618: while (*msg_nodes++)
1619: /* Should I check the return value of MPI_Wait() or status? */
1620: /* Can this loop be replaced by a call to MPI_Waitall()? */
1621: {MPI_Wait(ids_out++, &status);}
1622: }
1626: /******************************************************************************
1627: Function: gather_scatter
1629: Input :
1630: Output:
1631: Return:
1632: Description:
1633: ******************************************************************************/
1634: static
1635: void
1636: gs_gop_tree_binary(gs_id *gs, REAL *vals, register rbfp fct)
1637: {
1638: int size;
1639: int *in, *out;
1640: REAL *buf, *work;
1642: #ifdef DEBUG
1643: error_msg_warning("gs_gop_tree_binary() :: start\n");
1644: #endif
1645:
1646: in = gs->tree_map_in;
1647: out = gs->tree_map_out;
1648: buf = gs->tree_buf;
1649: work = gs->tree_work;
1650: size = gs->tree_nel;
1652: /* load vals vector w/identity */
1653: (*fct)(buf,NULL,size);
1654:
1655: /* load my contribution into val vector */
1656: while (*in >= 0)
1657: {(*fct)((buf + *out++),(vals + *in++),-1);}
1658: /* {*(buf + *out++) = *(vals + *in++);} */
1660: gfop(buf,work,size,(vbfp)fct,REAL_TYPE,0);
1662: in = gs->tree_map_in;
1663: out = gs->tree_map_out;
1664: while (*in >= 0)
1665: {(*fct)((vals + *in++),(buf + *out++),-1);}
1666: /* {*(vals + *in++) = *(buf + *out++);} */
1669: #ifdef DEBUG
1670: error_msg_warning("gs_gop_tree_binary() :: end\n");
1671: #endif
1673: }
1678: /******************************************************************************
1679: Function: gather_scatter
1681: Input :
1682: Output:
1683: Return:
1684: Description:
1685: ******************************************************************************/
1686: void
1687: gs_gop(register gs_id *gs, register REAL *vals, register const char *op)
1688: {
1689: #ifdef DEBUG
1690: error_msg_warning("start gs_gop()\n");
1691: if (!gs) {error_msg_fatal("gs_gop() :: passed NULL gs handle!!!");}
1692: if (!op) {error_msg_fatal("gs_gop() :: passed NULL operation!!!");}
1693: #endif
1695: switch (*op) {
1696: case '+':
1697: gs_gop_plus(gs,vals);
1698: break;
1699: case '*':
1700: gs_gop_times(gs,vals);
1701: break;
1702: case 'a':
1703: gs_gop_min_abs(gs,vals);
1704: break;
1705: case 'A':
1706: gs_gop_max_abs(gs,vals);
1707: break;
1708: case 'e':
1709: gs_gop_exists(gs,vals);
1710: break;
1711: case 'm':
1712: gs_gop_min(gs,vals);
1713: break;
1714: case 'M':
1715: gs_gop_max(gs,vals); break;
1716: /*
1717: if (*(op+1)=='\0')
1718: {gs_gop_max(gs,vals); break;}
1719: else if (*(op+1)=='X')
1720: {gs_gop_max_abs(gs,vals); break;}
1721: else if (*(op+1)=='N')
1722: {gs_gop_min_abs(gs,vals); break;}
1723: */
1724: default:
1725: error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1726: error_msg_warning("gs_gop() :: default :: plus");
1727: gs_gop_plus(gs,vals);
1728: break;
1729: }
1730: #ifdef DEBUG
1731: error_msg_warning("end gs_gop()\n");
1732: #endif
1733: }
1736: /******************************************************************************
1737: Function: gather_scatter
1739: Input :
1740: Output:
1741: Return:
1742: Description:
1743: ******************************************************************************/
1744: static void
1745: gs_gop_exists(register gs_id *gs, register REAL *vals)
1746: {
1747: #ifdef DEBUG
1748: error_msg_warning("start gs_gop_xxx()\n");
1749: #endif
1751: /* local only operations!!! */
1752: if (gs->num_local)
1753: {gs_gop_local_exists(gs,vals);}
1755: /* if intersection tree/pairwise and local isn't empty */
1756: if (gs->num_local_gop)
1757: {
1758: gs_gop_local_in_exists(gs,vals);
1760: /* pairwise */
1761: if (gs->num_pairs)
1762: {gs_gop_pairwise_exists(gs,vals);}
1763:
1764: /* tree */
1765: else if (gs->max_left_over)
1766: {gs_gop_tree_exists(gs,vals);}
1767:
1768: gs_gop_local_out(gs,vals);
1769: }
1770: /* if intersection tree/pairwise and local is empty */
1771: else
1772: {
1773: /* pairwise */
1774: if (gs->num_pairs)
1775: {gs_gop_pairwise_exists(gs,vals);}
1776:
1777: /* tree */
1778: else if (gs->max_left_over)
1779: {gs_gop_tree_exists(gs,vals);}
1780: }
1781: }
1785: /******************************************************************************
1786: Function: gather_scatter
1788: Input :
1789: Output:
1790: Return:
1791: Description:
1792: ******************************************************************************/
1793: static
1794: void
1795: gs_gop_local_exists(register gs_id *gs, register REAL *vals)
1796: {
1797: register int *num, *map, **reduce;
1798: register REAL tmp;
1801: #ifdef DEBUG
1802: error_msg_warning("start gs_gop_xxx()\n");
1803: #endif
1805: num = gs->num_local_reduce;
1806: reduce = gs->local_reduce;
1807: while ((map = *reduce))
1808: {
1809: num ++;
1810: tmp = 0.0;
1811: while (*map >= 0)
1812: {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1813:
1814: map = *reduce++;
1815: while (*map >= 0)
1816: {*(vals + *map++) = tmp;}
1817: }
1818: }
1822: /******************************************************************************
1823: Function: gather_scatter
1825: Input :
1826: Output:
1827: Return:
1828: Description:
1829: ******************************************************************************/
1830: static
1831: void
1832: gs_gop_local_in_exists(register gs_id *gs, register REAL *vals)
1833: {
1834: register int *num, *map, **reduce;
1835: register REAL *base;
1838: #ifdef DEBUG
1839: error_msg_warning("start gs_gop_xxx()\n");
1840: #endif
1842: num = gs->num_gop_local_reduce;
1843: reduce = gs->gop_local_reduce;
1844: while ((map = *reduce++))
1845: {
1846: num++;
1847: base = vals + *map++;
1848: while (*map >= 0)
1849: {*base = EXISTS(*base,*(vals + *map)); map++;}
1850: }
1851: }
1855: /******************************************************************************
1856: Function: gather_scatter
1858: VERSION 3 ::
1860: Input :
1861: Output:
1862: Return:
1863: Description:
1864: ******************************************************************************/
1865: static
1866: void
1867: gs_gop_pairwise_exists(register gs_id *gs, register REAL *in_vals)
1868: {
1869: register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
1870: register int *iptr, *msg_list, *msg_size, **msg_nodes;
1871: register int *pw, *list, *size, **nodes;
1872: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1873: MPI_Status status;
1876: /* strip and load registers */
1877: msg_list =list = gs->pair_list;
1878: msg_size =size = gs->msg_sizes;
1879: msg_nodes=nodes = gs->node_list;
1880: iptr=pw = gs->pw_elm_list;
1881: dptr1=dptr3 = gs->pw_vals;
1882: msg_ids_in = ids_in = gs->msg_ids_in;
1883: msg_ids_out = ids_out = gs->msg_ids_out;
1884: dptr2 = gs->out;
1885: in1=in2 = gs->in;
1887: /* post the receives */
1888: /* msg_nodes=nodes; */
1889: do
1890: {
1891: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1892: second one *list and do list++ afterwards */
1893: MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1894: gs->gs_comm, msg_ids_in++);
1895: in1 += *size++;
1896: }
1897: while (*++msg_nodes);
1898: msg_nodes=nodes;
1900: /* load gs values into in out gs buffers */
1901: while (*iptr >= 0)
1902: {*dptr3++ = *(in_vals + *iptr++);}
1904: /* load out buffers and post the sends */
1905: while ((iptr = *msg_nodes++))
1906: {
1907: dptr3 = dptr2;
1908: while (*iptr >= 0)
1909: {*dptr2++ = *(dptr1 + *iptr++);}
1910: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1911: /* is msg_ids_out++ correct? */
1912: MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
1913: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1914: }
1916: if (gs->max_left_over)
1917: {gs_gop_tree_exists(gs,in_vals);}
1919: /* process the received data */
1920: msg_nodes=nodes;
1921: while ((iptr = *nodes++))
1922: {
1923: /* Should I check the return value of MPI_Wait() or status? */
1924: /* Can this loop be replaced by a call to MPI_Waitall()? */
1925: MPI_Wait(ids_in++, &status);
1926: while (*iptr >= 0)
1927: {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1928: }
1930: /* replace vals */
1931: while (*pw >= 0)
1932: {*(in_vals + *pw++) = *dptr1++;}
1934: /* clear isend message handles */
1935: /* This changed for clarity though it could be the same */
1936: while (*msg_nodes++)
1937: /* Should I check the return value of MPI_Wait() or status? */
1938: /* Can this loop be replaced by a call to MPI_Waitall()? */
1939: {MPI_Wait(ids_out++, &status);}
1940: }
1944: /******************************************************************************
1945: Function: gather_scatter
1947: Input :
1948: Output:
1949: Return:
1950: Description:
1951: ******************************************************************************/
1952: static
1953: void
1954: gs_gop_tree_exists(gs_id *gs, REAL *vals)
1955: {
1956: int size;
1957: int *in, *out;
1958: REAL *buf, *work;
1959: int op[] = {GL_EXISTS,0};
1962: #ifdef DEBUG
1963: error_msg_warning("start gs_gop_tree_exists()");
1964: #endif
1965:
1966: in = gs->tree_map_in;
1967: out = gs->tree_map_out;
1968: buf = gs->tree_buf;
1969: work = gs->tree_work;
1970: size = gs->tree_nel;
1972: #if defined BLAS||CBLAS
1973: *work = 0.0;
1974: copy(size,work,0,buf,1);
1975: #else
1976: rvec_zero(buf,size);
1977: #endif
1979: while (*in >= 0)
1980: {
1981: /*
1982: printf("%d :: out=%d\n",my_id,*out);
1983: printf("%d :: in=%d\n",my_id,*in);
1984: */
1985: *(buf + *out++) = *(vals + *in++);
1986: }
1988: grop(buf,work,size,op);
1990: in = gs->tree_map_in;
1991: out = gs->tree_map_out;
1993: while (*in >= 0)
1994: {*(vals + *in++) = *(buf + *out++);}
1996: #ifdef DEBUG
1997: error_msg_warning("start gs_gop_tree_exists()");
1998: #endif
1999: }
2003: /******************************************************************************
2004: Function: gather_scatter
2006: Input :
2007: Output:
2008: Return:
2009: Description:
2010: ******************************************************************************/
2011: static void
2012: gs_gop_max_abs(register gs_id *gs, register REAL *vals)
2013: {
2014: #ifdef DEBUG
2015: error_msg_warning("start gs_gop_xxx()\n");
2016: #endif
2018: /* local only operations!!! */
2019: if (gs->num_local)
2020: {gs_gop_local_max_abs(gs,vals);}
2022: /* if intersection tree/pairwise and local isn't empty */
2023: if (gs->num_local_gop)
2024: {
2025: gs_gop_local_in_max_abs(gs,vals);
2027: /* pairwise */
2028: if (gs->num_pairs)
2029: {gs_gop_pairwise_max_abs(gs,vals);}
2030:
2031: /* tree */
2032: else if (gs->max_left_over)
2033: {gs_gop_tree_max_abs(gs,vals);}
2034:
2035: gs_gop_local_out(gs,vals);
2036: }
2037: /* if intersection tree/pairwise and local is empty */
2038: else
2039: {
2040: /* pairwise */
2041: if (gs->num_pairs)
2042: {gs_gop_pairwise_max_abs(gs,vals);}
2043:
2044: /* tree */
2045: else if (gs->max_left_over)
2046: {gs_gop_tree_max_abs(gs,vals);}
2047: }
2048: }
2052: /******************************************************************************
2053: Function: gather_scatter
2055: Input :
2056: Output:
2057: Return:
2058: Description:
2059: ******************************************************************************/
2060: static
2061: void
2062: gs_gop_local_max_abs(register gs_id *gs, register REAL *vals)
2063: {
2064: register int *num, *map, **reduce;
2065: register REAL tmp;
2068: #ifdef DEBUG
2069: error_msg_warning("start gs_gop_xxx()\n");
2070: #endif
2072: num = gs->num_local_reduce;
2073: reduce = gs->local_reduce;
2074: while ((map = *reduce))
2075: {
2076: num ++;
2077: tmp = 0.0;
2078: while (*map >= 0)
2079: {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
2080:
2081: map = *reduce++;
2082: while (*map >= 0)
2083: {*(vals + *map++) = tmp;}
2084: }
2085: }
2089: /******************************************************************************
2090: Function: gather_scatter
2092: Input :
2093: Output:
2094: Return:
2095: Description:
2096: ******************************************************************************/
2097: static
2098: void
2099: gs_gop_local_in_max_abs(register gs_id *gs, register REAL *vals)
2100: {
2101: register int *num, *map, **reduce;
2102: register REAL *base;
2105: #ifdef DEBUG
2106: error_msg_warning("start gs_gop_xxx()\n");
2107: #endif
2109: num = gs->num_gop_local_reduce;
2110: reduce = gs->gop_local_reduce;
2111: while ((map = *reduce++))
2112: {
2113: num++;
2114: base = vals + *map++;
2115: while (*map >= 0)
2116: {*base = MAX_FABS(*base,*(vals + *map)); map++;}
2117: }
2118: }
2122: /******************************************************************************
2123: Function: gather_scatter
2125: VERSION 3 ::
2127: Input :
2128: Output:
2129: Return:
2130: Description:
2131: ******************************************************************************/
2132: static
2133: void
2134: gs_gop_pairwise_max_abs(register gs_id *gs, register REAL *in_vals)
2135: {
2136: register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
2137: register int *iptr, *msg_list, *msg_size, **msg_nodes;
2138: register int *pw, *list, *size, **nodes;
2139: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2140: MPI_Status status;
2143: /* strip and load registers */
2144: msg_list =list = gs->pair_list;
2145: msg_size =size = gs->msg_sizes;
2146: msg_nodes=nodes = gs->node_list;
2147: iptr=pw = gs->pw_elm_list;
2148: dptr1=dptr3 = gs->pw_vals;
2149: msg_ids_in = ids_in = gs->msg_ids_in;
2150: msg_ids_out = ids_out = gs->msg_ids_out;
2151: dptr2 = gs->out;
2152: in1=in2 = gs->in;
2154: /* post the receives */
2155: /* msg_nodes=nodes; */
2156: do
2157: {
2158: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2159: second one *list and do list++ afterwards */
2160: MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2161: gs->gs_comm, msg_ids_in++);
2162: in1 += *size++;
2163: }
2164: while (*++msg_nodes);
2165: msg_nodes=nodes;
2167: /* load gs values into in out gs buffers */
2168: while (*iptr >= 0)
2169: {*dptr3++ = *(in_vals + *iptr++);}
2171: /* load out buffers and post the sends */
2172: while ((iptr = *msg_nodes++))
2173: {
2174: dptr3 = dptr2;
2175: while (*iptr >= 0)
2176: {*dptr2++ = *(dptr1 + *iptr++);}
2177: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2178: /* is msg_ids_out++ correct? */
2179: MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
2180: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2181: }
2183: if (gs->max_left_over)
2184: {gs_gop_tree_max_abs(gs,in_vals);}
2186: /* process the received data */
2187: msg_nodes=nodes;
2188: while ((iptr = *nodes++))
2189: {
2190: /* Should I check the return value of MPI_Wait() or status? */
2191: /* Can this loop be replaced by a call to MPI_Waitall()? */
2192: MPI_Wait(ids_in++, &status);
2193: while (*iptr >= 0)
2194: {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2195: }
2197: /* replace vals */
2198: while (*pw >= 0)
2199: {*(in_vals + *pw++) = *dptr1++;}
2201: /* clear isend message handles */
2202: /* This changed for clarity though it could be the same */
2203: while (*msg_nodes++)
2204: /* Should I check the return value of MPI_Wait() or status? */
2205: /* Can this loop be replaced by a call to MPI_Waitall()? */
2206: {MPI_Wait(ids_out++, &status);}
2207: }
2211: /******************************************************************************
2212: Function: gather_scatter
2214: Input :
2215: Output:
2216: Return:
2217: Description:
2218: ******************************************************************************/
2219: static
2220: void
2221: gs_gop_tree_max_abs(gs_id *gs, REAL *vals)
2222: {
2223: int size;
2224: int *in, *out;
2225: REAL *buf, *work;
2226: int op[] = {GL_MAX_ABS,0};
2229: #ifdef DEBUG
2230: error_msg_warning("start gs_gop_tree_max_abs()");
2231: #endif
2232:
2233: in = gs->tree_map_in;
2234: out = gs->tree_map_out;
2235: buf = gs->tree_buf;
2236: work = gs->tree_work;
2237: size = gs->tree_nel;
2239: #if defined BLAS||CBLAS
2240: *work = 0.0;
2241: copy(size,work,0,buf,1);
2242: #else
2243: rvec_zero(buf,size);
2244: #endif
2246: while (*in >= 0)
2247: {
2248: /*
2249: printf("%d :: out=%d\n",my_id,*out);
2250: printf("%d :: in=%d\n",my_id,*in);
2251: */
2252: *(buf + *out++) = *(vals + *in++);
2253: }
2255: grop(buf,work,size,op);
2257: in = gs->tree_map_in;
2258: out = gs->tree_map_out;
2260: while (*in >= 0)
2261: {*(vals + *in++) = *(buf + *out++);}
2263: #ifdef DEBUG
2264: error_msg_warning("start gs_gop_tree_max_abs()");
2265: #endif
2266: }
2270: /******************************************************************************
2271: Function: gather_scatter
2273: Input :
2274: Output:
2275: Return:
2276: Description:
2277: ******************************************************************************/
2278: static void
2279: gs_gop_max(register gs_id *gs, register REAL *vals)
2280: {
2281: #ifdef DEBUG
2282: error_msg_warning("start gs_gop_xxx()\n");
2283: #endif
2286: /* local only operations!!! */
2287: if (gs->num_local)
2288: {gs_gop_local_max(gs,vals);}
2290: /* if intersection tree/pairwise and local isn't empty */
2291: if (gs->num_local_gop)
2292: {
2293: gs_gop_local_in_max(gs,vals);
2295: /* pairwise */
2296: if (gs->num_pairs)
2297: {gs_gop_pairwise_max(gs,vals);}
2298:
2299: /* tree */
2300: else if (gs->max_left_over)
2301: {gs_gop_tree_max(gs,vals);}
2302:
2303: gs_gop_local_out(gs,vals);
2304: }
2305: /* if intersection tree/pairwise and local is empty */
2306: else
2307: {
2308: /* pairwise */
2309: if (gs->num_pairs)
2310: {gs_gop_pairwise_max(gs,vals);}
2311:
2312: /* tree */
2313: else if (gs->max_left_over)
2314: {gs_gop_tree_max(gs,vals);}
2315: }
2316: }
2320: /******************************************************************************
2321: Function: gather_scatter
2323: Input :
2324: Output:
2325: Return:
2326: Description:
2327: ******************************************************************************/
2328: static
2329: void
2330: gs_gop_local_max(register gs_id *gs, register REAL *vals)
2331: {
2332: register int *num, *map, **reduce;
2333: register REAL tmp;
2336: #ifdef DEBUG
2337: error_msg_warning("start gs_gop_xxx()\n");
2338: #endif
2340: num = gs->num_local_reduce;
2341: reduce = gs->local_reduce;
2342: while ((map = *reduce))
2343: {
2344: num ++;
2345: tmp = -REAL_MAX;
2346: while (*map >= 0)
2347: {tmp = PetscMax(tmp,*(vals + *map)); map++;}
2348:
2349: map = *reduce++;
2350: while (*map >= 0)
2351: {*(vals + *map++) = tmp;}
2352: }
2353: }
2357: /******************************************************************************
2358: Function: gather_scatter
2360: Input :
2361: Output:
2362: Return:
2363: Description:
2364: ******************************************************************************/
2365: static
2366: void
2367: gs_gop_local_in_max(register gs_id *gs, register REAL *vals)
2368: {
2369: register int *num, *map, **reduce;
2370: register REAL *base;
2373: #ifdef DEBUG
2374: error_msg_warning("start gs_gop_xxx()\n");
2375: #endif
2377: num = gs->num_gop_local_reduce;
2378: reduce = gs->gop_local_reduce;
2379: while ((map = *reduce++))
2380: {
2381: num++;
2382: base = vals + *map++;
2383: while (*map >= 0)
2384: {*base = PetscMax(*base,*(vals + *map)); map++;}
2385: }
2386: }
2390: /******************************************************************************
2391: Function: gather_scatter
2393: VERSION 3 ::
2395: Input :
2396: Output:
2397: Return:
2398: Description:
2399: ******************************************************************************/
2400: static
2401: void
2402: gs_gop_pairwise_max(register gs_id *gs, register REAL *in_vals)
2403: {
2404: register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
2405: register int *iptr, *msg_list, *msg_size, **msg_nodes;
2406: register int *pw, *list, *size, **nodes;
2407: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2408: MPI_Status status;
2411: /* strip and load registers */
2412: msg_list =list = gs->pair_list;
2413: msg_size =size = gs->msg_sizes;
2414: msg_nodes=nodes = gs->node_list;
2415: iptr=pw = gs->pw_elm_list;
2416: dptr1=dptr3 = gs->pw_vals;
2417: msg_ids_in = ids_in = gs->msg_ids_in;
2418: msg_ids_out = ids_out = gs->msg_ids_out;
2419: dptr2 = gs->out;
2420: in1=in2 = gs->in;
2422: /* post the receives */
2423: /* msg_nodes=nodes; */
2424: do
2425: {
2426: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2427: second one *list and do list++ afterwards */
2428: MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2429: gs->gs_comm, msg_ids_in++);
2430: in1 += *size++;
2431: }
2432: while (*++msg_nodes);
2433: msg_nodes=nodes;
2435: /* load gs values into in out gs buffers */
2436: while (*iptr >= 0)
2437: {*dptr3++ = *(in_vals + *iptr++);}
2439: /* load out buffers and post the sends */
2440: while ((iptr = *msg_nodes++))
2441: {
2442: dptr3 = dptr2;
2443: while (*iptr >= 0)
2444: {*dptr2++ = *(dptr1 + *iptr++);}
2445: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2446: /* is msg_ids_out++ correct? */
2447: MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
2448: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2449: }
2451: if (gs->max_left_over)
2452: {gs_gop_tree_max(gs,in_vals);}
2454: /* process the received data */
2455: msg_nodes=nodes;
2456: while ((iptr = *nodes++))
2457: {
2458: /* Should I check the return value of MPI_Wait() or status? */
2459: /* Can this loop be replaced by a call to MPI_Waitall()? */
2460: MPI_Wait(ids_in++, &status);
2461: while (*iptr >= 0)
2462: {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2463: }
2465: /* replace vals */
2466: while (*pw >= 0)
2467: {*(in_vals + *pw++) = *dptr1++;}
2469: /* clear isend message handles */
2470: /* This changed for clarity though it could be the same */
2471: while (*msg_nodes++)
2472: /* Should I check the return value of MPI_Wait() or status? */
2473: /* Can this loop be replaced by a call to MPI_Waitall()? */
2474: {MPI_Wait(ids_out++, &status);}
2475: }
2479: /******************************************************************************
2480: Function: gather_scatter
2482: Input :
2483: Output:
2484: Return:
2485: Description:
2486: ******************************************************************************/
2487: static
2488: void
2489: gs_gop_tree_max(gs_id *gs, REAL *vals)
2490: {
2491: int size;
2492: int *in, *out;
2493: REAL *buf, *work;
2494: /* int op[] = {GL_MAX,0}; */
2497: #ifdef DEBUG
2498: error_msg_warning("start gs_gop_tree_max()");
2499: #endif
2500:
2501: in = gs->tree_map_in;
2502: out = gs->tree_map_out;
2503: buf = gs->tree_buf;
2504: work = gs->tree_work;
2505: size = gs->tree_nel;
2507: #if defined BLAS||CBLAS
2508: *work = -REAL_MAX;
2509: copy(size,work,0,buf,1);
2510: #else
2511: rvec_set(buf,-REAL_MAX,size);
2512: #endif
2514: while (*in >= 0)
2515: {*(buf + *out++) = *(vals + *in++);}
2517: in = gs->tree_map_in;
2518: out = gs->tree_map_out;
2519: MPI_Allreduce(buf,work,size,REAL_TYPE,MPI_MAX,gs->gs_comm);
2520: while (*in >= 0)
2521: {*(vals + *in++) = *(work + *out++);}
2523: #ifdef DEBUG
2524: error_msg_warning("end gs_gop_tree_max()");
2525: #endif
2526: }
2530: /******************************************************************************
2531: Function: gather_scatter
2533: Input :
2534: Output:
2535: Return:
2536: Description:
2537: ******************************************************************************/
2538: static void
2539: gs_gop_min_abs(register gs_id *gs, register REAL *vals)
2540: {
2541: #ifdef DEBUG
2542: error_msg_warning("start gs_gop_xxx()\n");
2543: #endif
2545: /* local only operations!!! */
2546: if (gs->num_local)
2547: {gs_gop_local_min_abs(gs,vals);}
2549: /* if intersection tree/pairwise and local isn't empty */
2550: if (gs->num_local_gop)
2551: {
2552: gs_gop_local_in_min_abs(gs,vals);
2554: /* pairwise */
2555: if (gs->num_pairs)
2556: {gs_gop_pairwise_min_abs(gs,vals);}
2557:
2558: /* tree */
2559: else if (gs->max_left_over)
2560: {gs_gop_tree_min_abs(gs,vals);}
2561:
2562: gs_gop_local_out(gs,vals);
2563: }
2564: /* if intersection tree/pairwise and local is empty */
2565: else
2566: {
2567: /* pairwise */
2568: if (gs->num_pairs)
2569: {gs_gop_pairwise_min_abs(gs,vals);}
2570:
2571: /* tree */
2572: else if (gs->max_left_over)
2573: {gs_gop_tree_min_abs(gs,vals);}
2574: }
2575: }
2579: /******************************************************************************
2580: Function: gather_scatter
2582: Input :
2583: Output:
2584: Return:
2585: Description:
2586: ******************************************************************************/
2587: static
2588: void
2589: gs_gop_local_min_abs(register gs_id *gs, register REAL *vals)
2590: {
2591: register int *num, *map, **reduce;
2592: register REAL tmp;
2595: #ifdef DEBUG
2596: error_msg_warning("start gs_gop_xxx()\n");
2597: #endif
2599: num = gs->num_local_reduce;
2600: reduce = gs->local_reduce;
2601: while ((map = *reduce))
2602: {
2603: num ++;
2604: tmp = REAL_MAX;
2605: while (*map >= 0)
2606: {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2607:
2608: map = *reduce++;
2609: while (*map >= 0)
2610: {*(vals + *map++) = tmp;}
2611: }
2612: }
2616: /******************************************************************************
2617: Function: gather_scatter
2619: Input :
2620: Output:
2621: Return:
2622: Description:
2623: ******************************************************************************/
2624: static
2625: void
2626: gs_gop_local_in_min_abs(register gs_id *gs, register REAL *vals)
2627: {
2628: register int *num, *map, **reduce;
2629: register REAL *base;
2631: #ifdef DEBUG
2632: error_msg_warning("start gs_gop_xxx()\n");
2633: #endif
2635: num = gs->num_gop_local_reduce;
2636: reduce = gs->gop_local_reduce;
2637: while ((map = *reduce++))
2638: {
2639: num++;
2640: base = vals + *map++;
2641: while (*map >= 0)
2642: {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2643: }
2644: }
2648: /******************************************************************************
2649: Function: gather_scatter
2651: VERSION 3 ::
2653: Input :
2654: Output:
2655: Return:
2656: Description:
2657: ******************************************************************************/
2658: static
2659: void
2660: gs_gop_pairwise_min_abs(register gs_id *gs, register REAL *in_vals)
2661: {
2662: register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
2663: register int *iptr, *msg_list, *msg_size, **msg_nodes;
2664: register int *pw, *list, *size, **nodes;
2665: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2666: MPI_Status status;
2669: /* strip and load registers */
2670: msg_list =list = gs->pair_list;
2671: msg_size =size = gs->msg_sizes;
2672: msg_nodes=nodes = gs->node_list;
2673: iptr=pw = gs->pw_elm_list;
2674: dptr1=dptr3 = gs->pw_vals;
2675: msg_ids_in = ids_in = gs->msg_ids_in;
2676: msg_ids_out = ids_out = gs->msg_ids_out;
2677: dptr2 = gs->out;
2678: in1=in2 = gs->in;
2680: /* post the receives */
2681: /* msg_nodes=nodes; */
2682: do
2683: {
2684: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2685: second one *list and do list++ afterwards */
2686: MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2687: gs->gs_comm, msg_ids_in++);
2688: in1 += *size++;
2689: }
2690: while (*++msg_nodes);
2691: msg_nodes=nodes;
2693: /* load gs values into in out gs buffers */
2694: while (*iptr >= 0)
2695: {*dptr3++ = *(in_vals + *iptr++);}
2697: /* load out buffers and post the sends */
2698: while ((iptr = *msg_nodes++))
2699: {
2700: dptr3 = dptr2;
2701: while (*iptr >= 0)
2702: {*dptr2++ = *(dptr1 + *iptr++);}
2703: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2704: /* is msg_ids_out++ correct? */
2705: MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
2706: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2707: }
2709: if (gs->max_left_over)
2710: {gs_gop_tree_min_abs(gs,in_vals);}
2712: /* process the received data */
2713: msg_nodes=nodes;
2714: while ((iptr = *nodes++))
2715: {
2716: /* Should I check the return value of MPI_Wait() or status? */
2717: /* Can this loop be replaced by a call to MPI_Waitall()? */
2718: MPI_Wait(ids_in++, &status);
2719: while (*iptr >= 0)
2720: {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2721: }
2723: /* replace vals */
2724: while (*pw >= 0)
2725: {*(in_vals + *pw++) = *dptr1++;}
2727: /* clear isend message handles */
2728: /* This changed for clarity though it could be the same */
2729: while (*msg_nodes++)
2730: /* Should I check the return value of MPI_Wait() or status? */
2731: /* Can this loop be replaced by a call to MPI_Waitall()? */
2732: {MPI_Wait(ids_out++, &status);}
2733: }
2737: /******************************************************************************
2738: Function: gather_scatter
2740: Input :
2741: Output:
2742: Return:
2743: Description:
2744: ******************************************************************************/
2745: static
2746: void
2747: gs_gop_tree_min_abs(gs_id *gs, REAL *vals)
2748: {
2749: int size;
2750: int *in, *out;
2751: REAL *buf, *work;
2752: int op[] = {GL_MIN_ABS,0};
2755: #ifdef DEBUG
2756: error_msg_warning("start gs_gop_tree_min_abs()");
2757: #endif
2758:
2759: in = gs->tree_map_in;
2760: out = gs->tree_map_out;
2761: buf = gs->tree_buf;
2762: work = gs->tree_work;
2763: size = gs->tree_nel;
2765: #if defined BLAS||CBLAS
2766: *work = REAL_MAX;
2767: copy(size,work,0,buf,1);
2768: #else
2769: rvec_set(buf,REAL_MAX,size);
2770: #endif
2772: while (*in >= 0)
2773: {*(buf + *out++) = *(vals + *in++);}
2775: in = gs->tree_map_in;
2776: out = gs->tree_map_out;
2777: grop(buf,work,size,op);
2778: while (*in >= 0)
2779: {*(vals + *in++) = *(buf + *out++);}
2781: #ifdef DEBUG
2782: error_msg_warning("end gs_gop_tree_min_abs()");
2783: #endif
2784: }
2788: /******************************************************************************
2789: Function: gather_scatter
2791: Input :
2792: Output:
2793: Return:
2794: Description:
2795: ******************************************************************************/
2796: static void
2797: gs_gop_min(register gs_id *gs, register REAL *vals)
2798: {
2799: #ifdef DEBUG
2800: error_msg_warning("start gs_gop_xxx()\n");
2801: #endif
2803: /* local only operations!!! */
2804: if (gs->num_local)
2805: {gs_gop_local_min(gs,vals);}
2807: /* if intersection tree/pairwise and local isn't empty */
2808: if (gs->num_local_gop)
2809: {
2810: gs_gop_local_in_min(gs,vals);
2812: /* pairwise */
2813: if (gs->num_pairs)
2814: {gs_gop_pairwise_min(gs,vals);}
2815:
2816: /* tree */
2817: else if (gs->max_left_over)
2818: {gs_gop_tree_min(gs,vals);}
2819:
2820: gs_gop_local_out(gs,vals);
2821: }
2822: /* if intersection tree/pairwise and local is empty */
2823: else
2824: {
2825: /* pairwise */
2826: if (gs->num_pairs)
2827: {gs_gop_pairwise_min(gs,vals);}
2828:
2829: /* tree */
2830: else if (gs->max_left_over)
2831: {gs_gop_tree_min(gs,vals);}
2832: }
2833: #ifdef DEBUG
2834: error_msg_warning("end gs_gop_xxx()\n");
2835: #endif
2836: }
2840: /******************************************************************************
2841: Function: gather_scatter
2843: Input :
2844: Output:
2845: Return:
2846: Description:
2847: ******************************************************************************/
2848: static
2849: void
2850: gs_gop_local_min(register gs_id *gs, register REAL *vals)
2851: {
2852: register int *num, *map, **reduce;
2853: register REAL tmp;
2856: #ifdef DEBUG
2857: error_msg_warning("start gs_gop_xxx()\n");
2858: #endif
2860: num = gs->num_local_reduce;
2861: reduce = gs->local_reduce;
2862: while ((map = *reduce))
2863: {
2864: num ++;
2865: tmp = REAL_MAX;
2866: while (*map >= 0)
2867: {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2868:
2869: map = *reduce++;
2870: while (*map >= 0)
2871: {*(vals + *map++) = tmp;}
2872: }
2873: }
2877: /******************************************************************************
2878: Function: gather_scatter
2880: Input :
2881: Output:
2882: Return:
2883: Description:
2884: ******************************************************************************/
2885: static
2886: void
2887: gs_gop_local_in_min(register gs_id *gs, register REAL *vals)
2888: {
2889: register int *num, *map, **reduce;
2890: register REAL *base;
2893: #ifdef DEBUG
2894: error_msg_warning("start gs_gop_xxx()\n");
2895: #endif
2897: num = gs->num_gop_local_reduce;
2898: reduce = gs->gop_local_reduce;
2899: while ((map = *reduce++))
2900: {
2901: num++;
2902: base = vals + *map++;
2903: while (*map >= 0)
2904: {*base = PetscMin(*base,*(vals + *map)); map++;}
2905: }
2906: }
2910: /******************************************************************************
2911: Function: gather_scatter
2913: VERSION 3 ::
2915: Input :
2916: Output:
2917: Return:
2918: Description:
2919: ******************************************************************************/
2920: static
2921: void
2922: gs_gop_pairwise_min(register gs_id *gs, register REAL *in_vals)
2923: {
2924: register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
2925: register int *iptr, *msg_list, *msg_size, **msg_nodes;
2926: register int *pw, *list, *size, **nodes;
2927: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2928: MPI_Status status;
2931: /* strip and load registers */
2932: msg_list =list = gs->pair_list;
2933: msg_size =size = gs->msg_sizes;
2934: msg_nodes=nodes = gs->node_list;
2935: iptr=pw = gs->pw_elm_list;
2936: dptr1=dptr3 = gs->pw_vals;
2937: msg_ids_in = ids_in = gs->msg_ids_in;
2938: msg_ids_out = ids_out = gs->msg_ids_out;
2939: dptr2 = gs->out;
2940: in1=in2 = gs->in;
2942: /* post the receives */
2943: /* msg_nodes=nodes; */
2944: do
2945: {
2946: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2947: second one *list and do list++ afterwards */
2948: MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2949: gs->gs_comm, msg_ids_in++);
2950: in1 += *size++;
2951: }
2952: while (*++msg_nodes);
2953: msg_nodes=nodes;
2955: /* load gs values into in out gs buffers */
2956: while (*iptr >= 0)
2957: {*dptr3++ = *(in_vals + *iptr++);}
2959: /* load out buffers and post the sends */
2960: while ((iptr = *msg_nodes++))
2961: {
2962: dptr3 = dptr2;
2963: while (*iptr >= 0)
2964: {*dptr2++ = *(dptr1 + *iptr++);}
2965: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2966: /* is msg_ids_out++ correct? */
2967: MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
2968: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2969: }
2971: /* process the received data */
2972: if (gs->max_left_over)
2973: {gs_gop_tree_min(gs,in_vals);}
2975: msg_nodes=nodes;
2976: while ((iptr = *nodes++))
2977: {
2978: /* Should I check the return value of MPI_Wait() or status? */
2979: /* Can this loop be replaced by a call to MPI_Waitall()? */
2980: MPI_Wait(ids_in++, &status);
2981: while (*iptr >= 0)
2982: {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2983: }
2985: /* replace vals */
2986: while (*pw >= 0)
2987: {*(in_vals + *pw++) = *dptr1++;}
2989: /* clear isend message handles */
2990: /* This changed for clarity though it could be the same */
2991: while (*msg_nodes++)
2992: /* Should I check the return value of MPI_Wait() or status? */
2993: /* Can this loop be replaced by a call to MPI_Waitall()? */
2994: {MPI_Wait(ids_out++, &status);}
2995: }
2999: /******************************************************************************
3000: Function: gather_scatter
3002: Input :
3003: Output:
3004: Return:
3005: Description:
3006: ******************************************************************************/
3007: static
3008: void
3009: gs_gop_tree_min(gs_id *gs, REAL *vals)
3010: {
3011: int size;
3012: int *in, *out;
3013: REAL *buf, *work;
3014: /*int op[] = {GL_MIN,0};*/
3017: #ifdef DEBUG
3018: error_msg_warning("start gs_gop_tree_min()");
3019: #endif
3020:
3021: in = gs->tree_map_in;
3022: out = gs->tree_map_out;
3023: buf = gs->tree_buf;
3024: work = gs->tree_work;
3025: size = gs->tree_nel;
3027: #if defined BLAS||CBLAS
3028: *work = REAL_MAX;
3029: copy(size,work,0,buf,1);
3030: #else
3031: rvec_set(buf,REAL_MAX,size);
3032: #endif
3034: while (*in >= 0)
3035: {*(buf + *out++) = *(vals + *in++);}
3037: in = gs->tree_map_in;
3038: out = gs->tree_map_out;
3039: MPI_Allreduce(buf,work,size,REAL_TYPE,MPI_MIN,gs->gs_comm);
3040: while (*in >= 0)
3041: {*(vals + *in++) = *(work + *out++);}
3043: #ifdef DEBUG
3044: error_msg_warning("end gs_gop_tree_min()");
3045: #endif
3046: }
3050: /******************************************************************************
3051: Function: gather_scatter
3053: Input :
3054: Output:
3055: Return:
3056: Description:
3057: ******************************************************************************/
3058: static void
3059: gs_gop_times(register gs_id *gs, register REAL *vals)
3060: {
3061: #ifdef DEBUG
3062: error_msg_warning("start gs_gop_times()\n");
3063: #endif
3065: /* local only operations!!! */
3066: if (gs->num_local)
3067: {gs_gop_local_times(gs,vals);}
3069: /* if intersection tree/pairwise and local isn't empty */
3070: if (gs->num_local_gop)
3071: {
3072: gs_gop_local_in_times(gs,vals);
3074: /* pairwise */
3075: if (gs->num_pairs)
3076: {gs_gop_pairwise_times(gs,vals);}
3077:
3078: /* tree */
3079: else if (gs->max_left_over)
3080: {gs_gop_tree_times(gs,vals);}
3081:
3082: gs_gop_local_out(gs,vals);
3083: }
3084: /* if intersection tree/pairwise and local is empty */
3085: else
3086: {
3087: /* pairwise */
3088: if (gs->num_pairs)
3089: {gs_gop_pairwise_times(gs,vals);}
3090:
3091: /* tree */
3092: else if (gs->max_left_over)
3093: {gs_gop_tree_times(gs,vals);}
3094: }
3095: }
3099: /******************************************************************************
3100: Function: gather_scatter
3102: Input :
3103: Output:
3104: Return:
3105: Description:
3106: ******************************************************************************/
3107: static
3108: void
3109: gs_gop_local_times(register gs_id *gs, register REAL *vals)
3110: {
3111: register int *num, *map, **reduce;
3112: register REAL tmp;
3115: #ifdef DEBUG
3116: error_msg_warning("start gs_gop_xxx()\n");
3117: #endif
3119: num = gs->num_local_reduce;
3120: reduce = gs->local_reduce;
3121: while ((map = *reduce))
3122: {
3123: /* wall */
3124: if (*num == 2)
3125: {
3126: num ++; reduce++;
3127: vals[map[1]] = vals[map[0]] *= vals[map[1]];
3128: }
3129: /* corner shared by three elements */
3130: else if (*num == 3)
3131: {
3132: num ++; reduce++;
3133: vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
3134: }
3135: /* corner shared by four elements */
3136: else if (*num == 4)
3137: {
3138: num ++; reduce++;
3139: vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
3140: (vals[map[1]] * vals[map[2]] * vals[map[3]]);
3141: }
3142: /* general case ... odd geoms ... 3D*/
3143: else
3144: {
3145: num ++;
3146: tmp = 1.0;
3147: while (*map >= 0)
3148: {tmp *= *(vals + *map++);}
3150: map = *reduce++;
3151: while (*map >= 0)
3152: {*(vals + *map++) = tmp;}
3153: }
3154: }
3155: }
3159: /******************************************************************************
3160: Function: gather_scatter
3162: Input :
3163: Output:
3164: Return:
3165: Description:
3166: ******************************************************************************/
3167: static
3168: void
3169: gs_gop_local_in_times(register gs_id *gs, register REAL *vals)
3170: {
3171: register int *num, *map, **reduce;
3172: register REAL *base;
3175: #ifdef DEBUG
3176: error_msg_warning("start gs_gop_xxx()\n");
3177: #endif
3179: num = gs->num_gop_local_reduce;
3180: reduce = gs->gop_local_reduce;
3181: while ((map = *reduce++))
3182: {
3183: /* wall */
3184: if (*num == 2)
3185: {
3186: num ++;
3187: vals[map[0]] *= vals[map[1]];
3188: }
3189: /* corner shared by three elements */
3190: else if (*num == 3)
3191: {
3192: num ++;
3193: vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
3194: }
3195: /* corner shared by four elements */
3196: else if (*num == 4)
3197: {
3198: num ++;
3199: vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
3200: }
3201: /* general case ... odd geoms ... 3D*/
3202: else
3203: {
3204: num++;
3205: base = vals + *map++;
3206: while (*map >= 0)
3207: {*base *= *(vals + *map++);}
3208: }
3209: }
3210: }
3214: /******************************************************************************
3215: Function: gather_scatter
3217: VERSION 3 ::
3219: Input :
3220: Output:
3221: Return:
3222: Description:
3223: ******************************************************************************/
3224: static
3225: void
3226: gs_gop_pairwise_times(register gs_id *gs, register REAL *in_vals)
3227: {
3228: register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
3229: register int *iptr, *msg_list, *msg_size, **msg_nodes;
3230: register int *pw, *list, *size, **nodes;
3231: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3232: MPI_Status status;
3235: /* strip and load registers */
3236: msg_list =list = gs->pair_list;
3237: msg_size =size = gs->msg_sizes;
3238: msg_nodes=nodes = gs->node_list;
3239: iptr=pw = gs->pw_elm_list;
3240: dptr1=dptr3 = gs->pw_vals;
3241: msg_ids_in = ids_in = gs->msg_ids_in;
3242: msg_ids_out = ids_out = gs->msg_ids_out;
3243: dptr2 = gs->out;
3244: in1=in2 = gs->in;
3246: /* post the receives */
3247: /* msg_nodes=nodes; */
3248: do
3249: {
3250: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3251: second one *list and do list++ afterwards */
3252: MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3253: gs->gs_comm, msg_ids_in++);
3254: in1 += *size++;
3255: }
3256: while (*++msg_nodes);
3257: msg_nodes=nodes;
3259: /* load gs values into in out gs buffers */
3260: while (*iptr >= 0)
3261: {*dptr3++ = *(in_vals + *iptr++);}
3263: /* load out buffers and post the sends */
3264: while ((iptr = *msg_nodes++))
3265: {
3266: dptr3 = dptr2;
3267: while (*iptr >= 0)
3268: {*dptr2++ = *(dptr1 + *iptr++);}
3269: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3270: /* is msg_ids_out++ correct? */
3271: MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
3272: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3273: }
3275: if (gs->max_left_over)
3276: {gs_gop_tree_times(gs,in_vals);}
3278: /* process the received data */
3279: msg_nodes=nodes;
3280: while ((iptr = *nodes++))
3281: {
3282: /* Should I check the return value of MPI_Wait() or status? */
3283: /* Can this loop be replaced by a call to MPI_Waitall()? */
3284: MPI_Wait(ids_in++, &status);
3285: while (*iptr >= 0)
3286: {*(dptr1 + *iptr++) *= *in2++;}
3287: }
3289: /* replace vals */
3290: while (*pw >= 0)
3291: {*(in_vals + *pw++) = *dptr1++;}
3293: /* clear isend message handles */
3294: /* This changed for clarity though it could be the same */
3295: while (*msg_nodes++)
3296: /* Should I check the return value of MPI_Wait() or status? */
3297: /* Can this loop be replaced by a call to MPI_Waitall()? */
3298: {MPI_Wait(ids_out++, &status);}
3299: }
3303: /******************************************************************************
3304: Function: gather_scatter
3306: Input :
3307: Output:
3308: Return:
3309: Description:
3310: ******************************************************************************/
3311: static
3312: void
3313: gs_gop_tree_times(gs_id *gs, REAL *vals)
3314: {
3315: int size;
3316: int *in, *out;
3317: REAL *buf, *work;
3318: /*int op[] = {GL_MULT,0};*/
3321: #ifdef DEBUG
3322: error_msg_warning("start gs_gop_tree_times()");
3323: #endif
3324:
3325: in = gs->tree_map_in;
3326: out = gs->tree_map_out;
3327: buf = gs->tree_buf;
3328: work = gs->tree_work;
3329: size = gs->tree_nel;
3331: #if defined BLAS||CBLAS
3332: *work = 1.0;
3333: copy(size,work,0,buf,1);
3334: #else
3335: rvec_one(buf,size);
3336: #endif
3338: while (*in >= 0)
3339: {*(buf + *out++) = *(vals + *in++);}
3341: in = gs->tree_map_in;
3342: out = gs->tree_map_out;
3343: MPI_Allreduce(buf,work,size,REAL_TYPE,MPI_PROD,gs->gs_comm);
3344: while (*in >= 0)
3345: {*(vals + *in++) = *(work + *out++);}
3347: #ifdef DEBUG
3348: error_msg_warning("end gs_gop_tree_times()");
3349: #endif
3350: }
3354: /******************************************************************************
3355: Function: gather_scatter
3358: Input :
3359: Output:
3360: Return:
3361: Description:
3362: ******************************************************************************/
3363: static void
3364: gs_gop_plus(register gs_id *gs, register REAL *vals)
3365: {
3366: #ifdef DEBUG
3367: error_msg_warning("start gs_gop_plus()\n");
3368: #endif
3370: /* local only operations!!! */
3371: if (gs->num_local)
3372: {gs_gop_local_plus(gs,vals);}
3374: /* if intersection tree/pairwise and local isn't empty */
3375: if (gs->num_local_gop)
3376: {
3377: gs_gop_local_in_plus(gs,vals);
3379: /* pairwise will NOT do tree inside ... */
3380: if (gs->num_pairs)
3381: {gs_gop_pairwise_plus(gs,vals);}
3383: /* tree */
3384: if (gs->max_left_over)
3385: {gs_gop_tree_plus(gs,vals);}
3386:
3387: gs_gop_local_out(gs,vals);
3388: }
3389: /* if intersection tree/pairwise and local is empty */
3390: else
3391: {
3392: /* pairwise will NOT do tree inside */
3393: if (gs->num_pairs)
3394: {gs_gop_pairwise_plus(gs,vals);}
3395:
3396: /* tree */
3397: if (gs->max_left_over)
3398: {gs_gop_tree_plus(gs,vals);}
3399: }
3401: #ifdef DEBUG
3402: error_msg_warning("end gs_gop_plus()\n");
3403: #endif
3404: }
3408: /******************************************************************************
3409: Function: gather_scatter
3411: Input :
3412: Output:
3413: Return:
3414: Description:
3415: ******************************************************************************/
3416: static
3417: void
3418: gs_gop_local_plus(register gs_id *gs, register REAL *vals)
3419: {
3420: register int *num, *map, **reduce;
3421: register REAL tmp;
3424: #ifdef DEBUG
3425: error_msg_warning("begin gs_gop_local_plus()\n");
3426: #endif
3428: num = gs->num_local_reduce;
3429: reduce = gs->local_reduce;
3430: while ((map = *reduce))
3431: {
3432: /* wall */
3433: if (*num == 2)
3434: {
3435: num ++; reduce++;
3436: vals[map[1]] = vals[map[0]] += vals[map[1]];
3437: }
3438: /* corner shared by three elements */
3439: else if (*num == 3)
3440: {
3441: num ++; reduce++;
3442: vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
3443: }
3444: /* corner shared by four elements */
3445: else if (*num == 4)
3446: {
3447: num ++; reduce++;
3448: vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
3449: (vals[map[1]] + vals[map[2]] + vals[map[3]]);
3450: }
3451: /* general case ... odd geoms ... 3D*/
3452: else
3453: {
3454: num ++;
3455: tmp = 0.0;
3456: while (*map >= 0)
3457: {tmp += *(vals + *map++);}
3459: map = *reduce++;
3460: while (*map >= 0)
3461: {*(vals + *map++) = tmp;}
3462: }
3463: }
3464: #ifdef DEBUG
3465: error_msg_warning("end gs_gop_local_plus()\n");
3466: #endif
3467: }
3471: /******************************************************************************
3472: Function: gather_scatter
3474: Input :
3475: Output:
3476: Return:
3477: Description:
3478: ******************************************************************************/
3479: static
3480: void
3481: gs_gop_local_in_plus(register gs_id *gs, register REAL *vals)
3482: {
3483: register int *num, *map, **reduce;
3484: register REAL *base;
3487: #ifdef DEBUG
3488: error_msg_warning("begin gs_gop_local_in_plus()\n");
3489: #endif
3491: num = gs->num_gop_local_reduce;
3492: reduce = gs->gop_local_reduce;
3493: while ((map = *reduce++))
3494: {
3495: /* wall */
3496: if (*num == 2)
3497: {
3498: num ++;
3499: vals[map[0]] += vals[map[1]];
3500: }
3501: /* corner shared by three elements */
3502: else if (*num == 3)
3503: {
3504: num ++;
3505: vals[map[0]] += (vals[map[1]] + vals[map[2]]);
3506: }
3507: /* corner shared by four elements */
3508: else if (*num == 4)
3509: {
3510: num ++;
3511: vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
3512: }
3513: /* general case ... odd geoms ... 3D*/
3514: else
3515: {
3516: num++;
3517: base = vals + *map++;
3518: while (*map >= 0)
3519: {*base += *(vals + *map++);}
3520: }
3521: }
3522: #ifdef DEBUG
3523: error_msg_warning("end gs_gop_local_in_plus()\n");
3524: #endif
3525: }
3529: /******************************************************************************
3530: Function: gather_scatter
3532: VERSION 3 ::
3534: Input :
3535: Output:
3536: Return:
3537: Description:
3538: ******************************************************************************/
3539: static
3540: void
3541: gs_gop_pairwise_plus(register gs_id *gs, register REAL *in_vals)
3542: {
3543: register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
3544: register int *iptr, *msg_list, *msg_size, **msg_nodes;
3545: register int *pw, *list, *size, **nodes;
3546: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3547: MPI_Status status;
3550: #ifdef DEBUG
3551: error_msg_warning("gs_gop_pairwise_plus() start\n");
3552: #endif
3554: /* strip and load registers */
3555: msg_list =list = gs->pair_list;
3556: msg_size =size = gs->msg_sizes;
3557: msg_nodes=nodes = gs->node_list;
3558: iptr=pw = gs->pw_elm_list;
3559: dptr1=dptr3 = gs->pw_vals;
3560: msg_ids_in = ids_in = gs->msg_ids_in;
3561: msg_ids_out = ids_out = gs->msg_ids_out;
3562: dptr2 = gs->out;
3563: in1=in2 = gs->in;
3565: /* post the receives */
3566: /* msg_nodes=nodes; */
3567: do
3568: {
3569: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3570: second one *list and do list++ afterwards */
3571: MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3572: gs->gs_comm, msg_ids_in++);
3573: in1 += *size++;
3574: }
3575: while (*++msg_nodes);
3576: msg_nodes=nodes;
3578: /* load gs values into in out gs buffers */
3579: while (*iptr >= 0)
3580: {*dptr3++ = *(in_vals + *iptr++);}
3582: /* load out buffers and post the sends */
3583: while ((iptr = *msg_nodes++))
3584: {
3585: dptr3 = dptr2;
3586: while (*iptr >= 0)
3587: {*dptr2++ = *(dptr1 + *iptr++);}
3588: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3589: /* is msg_ids_out++ correct? */
3590: MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
3591: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3592: }
3594: /* do the tree while we're waiting */
3595: if (gs->max_left_over)
3596: {gs_gop_tree_plus(gs,in_vals);}
3598: /* process the received data */
3599: msg_nodes=nodes;
3600: while ((iptr = *nodes++))
3601: {
3602: /* Should I check the return value of MPI_Wait() or status? */
3603: /* Can this loop be replaced by a call to MPI_Waitall()? */
3604: MPI_Wait(ids_in++, &status);
3605: while (*iptr >= 0)
3606: {*(dptr1 + *iptr++) += *in2++;}
3607: }
3609: /* replace vals */
3610: while (*pw >= 0)
3611: {*(in_vals + *pw++) = *dptr1++;}
3613: /* clear isend message handles */
3614: /* This changed for clarity though it could be the same */
3615: while (*msg_nodes++)
3616: /* Should I check the return value of MPI_Wait() or status? */
3617: /* Can this loop be replaced by a call to MPI_Waitall()? */
3618: {MPI_Wait(ids_out++, &status);}
3620: #ifdef DEBUG
3621: error_msg_warning("gs_gop_pairwise_plus() end\n");
3622: #endif
3624: }
3628: /******************************************************************************
3629: Function: gather_scatter
3631: Input :
3632: Output:
3633: Return:
3634: Description:
3635: ******************************************************************************/
3636: static
3637: void
3638: gs_gop_tree_plus(gs_id *gs, REAL *vals)
3639: {
3640: int size;
3641: int *in, *out;
3642: REAL *buf, *work;
3643: /*int op[] = {GL_ADD,0}; */
3646: #ifdef DEBUG
3647: error_msg_warning("start gs_gop_tree_plus()\n");
3648: #endif
3649:
3650: in = gs->tree_map_in;
3651: out = gs->tree_map_out;
3652: buf = gs->tree_buf;
3653: work = gs->tree_work;
3654: size = gs->tree_nel;
3656: #if defined BLAS||CBLAS
3657: *work = 0.0;
3658: copy(size,work,0,buf,1);
3659: #else
3660: rvec_zero(buf,size);
3661: #endif
3663: while (*in >= 0)
3664: {*(buf + *out++) = *(vals + *in++);}
3666: in = gs->tree_map_in;
3667: out = gs->tree_map_out;
3668: MPI_Allreduce(buf,work,size,REAL_TYPE,MPI_SUM,gs->gs_comm);
3669: while (*in >= 0)
3670: {*(vals + *in++) = *(work + *out++);}
3672: #ifdef DEBUG
3673: error_msg_warning("end gs_gop_tree_plus()\n");
3674: #endif
3675: }
3679: /******************************************************************************
3680: Function: level_best_guess()
3682: Input :
3683: Output:
3684: Return:
3685: Description:
3686: ******************************************************************************/
3687: #if defined(not_used)
3688: static
3689: PetscErrorCode level_best_guess(void)
3690: {
3691: /* full pairwise for now */
3692: return(num_nodes);
3693: }
3694: #endif
3697: /******************************************************************************
3698: Function: gs_print_template()
3700: Input :
3702: Output:
3704: Return:
3706: Description:
3707: ******************************************************************************/
3708: #if defined(not_used)
3709: static
3710: void
3711: gs_print_template(register gs_id* gs, int who)
3712: {
3713: register int j, k, *iptr, *iptr2;
3715:
3716: if ((my_id == who) && (num_gs_ids))
3717: {
3718: printf("\n\nP#%d's GS#%d template:\n", my_id, gs->id);
3719: printf("id=%d\n", gs->id);
3720: printf("nel(unique)=%d\n", gs->nel);
3721: printf("nel_max=%d\n", gs->nel_max);
3722: printf("nel_min=%d\n", gs->nel_min);
3723: printf("nel_sum=%d\n", gs->nel_sum);
3724: printf("negl=%d\n", gs->negl);
3725: printf("gl_max=%d\n", gs->gl_max);
3726: printf("gl_min=%d\n", gs->gl_min);
3727: printf("elms ordered=%d\n",gs->ordered);
3728: printf("repeats=%d\n", gs->repeats);
3729: printf("positive=%d\n", gs->positive);
3730: printf("elms=%ld\n", (PTRINT) gs->elms);
3731: printf("elms(total)=%ld\n", (PTRINT) gs->local_elms);
3732: printf("vals=%ld\n", (PTRINT) gs->vals);
3733: printf("gl_bss_min=%d\n", gs->gl_bss_min);
3734: printf("gl_perm_min=%d\n", gs->gl_perm_min);
3735: printf("level=%d\n", gs->level);
3736: printf("proc_mask_sz=%d\n",gs->mask_sz);
3737: printf("sh_proc_mask=%ld\n",(PTRINT) gs->nghs);
3738: printf("ngh_buf_size=%d\n",gs->ngh_buf_sz);
3739: printf("ngh_buf=%ld\n", (PTRINT) gs->ngh_buf);
3740: printf("num_nghs=%d\n", gs->num_nghs);
3741: printf("max_nghs=%d\n", gs->max_nghs);
3743: /* pairwise exchange information */
3744: printf("\nPaiwise Info:\n");
3745: printf("num_pairs=%d\n", gs->num_pairs);
3746: printf("max_pairs=%d\n", gs->max_pairs);
3747: printf("len_pw_list=%d\n", gs->len_pw_list);
3748: printf("pair_list=%ld\n", (PTRINT) gs->pair_list);
3749: printf("msg_sizes=%ld\n", (PTRINT) gs->msg_sizes);
3750: printf("node_list=%ld\n", (PTRINT) gs->node_list);
3751: printf("pw_elm_list=%ld\n", (PTRINT) gs->pw_elm_list);
3753: printf("pw_elm_list: ");
3754: if ((iptr = gs->pw_elm_list))
3755: {
3756: for (j=0;j<gs->len_pw_list;j++)
3757: {printf("%d ", *iptr); iptr++;}
3758: }
3759: printf("\n");
3761: printf("processor_list: ");
3762: if ((iptr = gs->pair_list))
3763: {
3764: for (j=0;j<gs->num_pairs;j++)
3765: {printf("%d ", *iptr); iptr++;}
3766: }
3767: printf("\n");
3769: printf("loc_node_pairs=%d\n", gs->loc_node_pairs);
3770: printf("max_node_pairs=%d\n", gs->max_node_pairs);
3771: printf("min_node_pairs=%d\n", gs->min_node_pairs);
3772: printf("avg_node_pairs=%d\n", gs->avg_node_pairs);
3774: printf("size_list: ");
3775: if ((iptr = gs->msg_sizes))
3776: {
3777: for (j=0;j<gs->num_pairs;j++)
3778: {printf("%d ", *iptr); iptr++;}
3779: }
3780: printf("\n");
3781: if ((iptr = gs->pair_list))
3782: {
3783: for (j=0;j<gs->num_pairs;j++)
3784: {
3785: printf("node_list %d: ", *iptr);
3786: if ((iptr2 = (gs->node_list)[j]))
3787: {
3788: for (k=0;k<(gs->msg_sizes)[j];k++)
3789: {printf("%d ", *iptr2); iptr2++;}
3790: }
3791: iptr++;
3792: printf("\n");
3793: }
3794: }
3795: printf("\n");
3796:
3797: printf("elm_list(U): ");
3798: if ((iptr = gs->elms))
3799: {
3800: for (j=0;j<gs->nel;j++)
3801: {printf("%d ", *iptr); iptr++;}
3802: }
3803: printf("\n");
3804: printf("\n");
3805:
3806: printf("elm_list(T): ");
3807: if ((iptr = gs->local_elms))
3808: {
3809: for (j=0;j<gs->nel_total;j++)
3810: {printf("%d ", *iptr); iptr++;}
3811: }
3812: printf("\n");
3813: printf("\n");
3814:
3815: printf("map_list(T): ");
3816: if ((iptr = gs->companion))
3817: {
3818: for (j=0;j<gs->nel;j++)
3819: {printf("%d ", *iptr); iptr++;}
3820: }
3821: printf("\n");
3822: printf("\n");
3823:
3825: /* local exchange information */
3826: printf("\nLocal Info:\n");
3827: printf("local_strength=%d\n", gs->local_strength);
3828: printf("num_local_total=%d\n", gs->num_local_total);
3829: printf("num_local=%d\n", gs->num_local);
3830: printf("num_local_gop=%d\n", gs->num_local_gop);
3831: printf("num_local_reduce=%ld\n", (PTRINT) gs->num_local_reduce);
3832: printf("local_reduce=%ld\n", (PTRINT) gs->local_reduce);
3833: printf("num_gop_local_reduce=%ld\n", (PTRINT) gs->num_gop_local_reduce);
3834: printf("gop_local_reduce=%ld\n", (PTRINT) gs->gop_local_reduce);
3835: printf("\n");
3837: for (j=0;j<gs->num_local;j++)
3838: {
3839: printf("local reduce_list %d: ", j);
3840: if ((iptr2 = (gs->local_reduce)[j]))
3841: {
3842: if ((gs->num_local_reduce)[j] <= 0)
3843: {printf("oops");}
3844:
3845: for (k=0;k<(gs->num_local_reduce)[j];k++)
3846: {printf("%d ", *iptr2); iptr2++;}
3847: }
3848: printf("\n");
3849: }
3850:
3851: printf("\n");
3852: printf("\n");
3853:
3854: for (j=0;j<gs->num_local_gop;j++)
3855: {
3856: printf("gop reduce_list %d: ", j);
3857: iptr2 = (gs->gop_local_reduce)[j];
3858:
3859: if ((gs->num_gop_local_reduce)[j] <= 0)
3860: {printf("oops");}
3861:
3863: for (k=0;k<(gs->num_gop_local_reduce)[j];k++)
3864: {printf("%d ", *iptr2); iptr2++;}
3865: printf("\n");
3866: }
3867: printf("\n");
3868: printf("\n");
3870: /* crystal router information */
3871: printf("\n\n");
3872: printf("Tree Info:\n");
3873: printf("max_left_over=%d\n", gs->max_left_over);
3874: printf("num_in_list=%ld\n", (PTRINT) gs->in_num);
3875: printf("in_list=%ld\n", (PTRINT) gs->in_list);
3876: printf("num_out_list=%ld\n", (PTRINT) gs->out_num);
3877: printf("out_list=%ld\n", (PTRINT) gs->out_list);
3879: printf("\n\n");
3880: }
3881: fflush(stdout);
3882: }
3883: #endif
3887: /******************************************************************************
3888: Function: gs_free()
3890: Input :
3892: Output:
3894: Return:
3896: Description:
3897: if (gs->sss) {perm_free((void*) gs->sss);}
3898: ******************************************************************************/
3899: void
3900: gs_free(register gs_id *gs)
3901: {
3902: register int i;
3905: #ifdef DEBUG
3906: error_msg_warning("start gs_gop_xxx()\n");
3907: if (!gs) {error_msg_warning("NULL ptr passed to gs_free()"); return;}
3908: #endif
3910: if (gs->nghs) {perm_free((void*) gs->nghs);}
3911: if (gs->pw_nghs) {perm_free((void*) gs->pw_nghs);}
3913: /* tree */
3914: if (gs->max_left_over)
3915: {
3916: if (gs->tree_elms) {bss_free((void*) gs->tree_elms);}
3917: if (gs->tree_buf) {bss_free((void*) gs->tree_buf);}
3918: if (gs->tree_work) {bss_free((void*) gs->tree_work);}
3919: if (gs->tree_map_in) {bss_free((void*) gs->tree_map_in);}
3920: if (gs->tree_map_out) {bss_free((void*) gs->tree_map_out);}
3921: }
3923: /* pairwise info */
3924: if (gs->num_pairs)
3925: {
3926: /* should be NULL already */
3927: if (gs->ngh_buf) {bss_free((void*) gs->ngh_buf);}
3928: if (gs->elms) {bss_free((void*) gs->elms);}
3929: if (gs->local_elms) {bss_free((void*) gs->local_elms);}
3930: if (gs->companion) {bss_free((void*) gs->companion);}
3931:
3932: /* only set if pairwise */
3933: if (gs->vals) {perm_free((void*) gs->vals);}
3934: if (gs->in) {perm_free((void*) gs->in);}
3935: if (gs->out) {perm_free((void*) gs->out);}
3936: if (gs->msg_ids_in) {perm_free((void*) gs->msg_ids_in);}
3937: if (gs->msg_ids_out) {perm_free((void*) gs->msg_ids_out);}
3938: if (gs->pw_vals) {perm_free((void*) gs->pw_vals);}
3939: if (gs->pw_elm_list) {perm_free((void*) gs->pw_elm_list);}
3940: if (gs->node_list)
3941: {
3942: for (i=0;i<gs->num_pairs;i++)
3943: {if (gs->node_list[i]) {perm_free((void*) gs->node_list[i]);}}
3944: perm_free((void*) gs->node_list);
3945: }
3946: if (gs->msg_sizes) {perm_free((void*) gs->msg_sizes);}
3947: if (gs->pair_list) {perm_free((void*) gs->pair_list);}
3948: }
3950: /* local info */
3951: if (gs->num_local_total>=0)
3952: {
3953: for (i=0;i<gs->num_local_total+1;i++)
3954: /* for (i=0;i<gs->num_local_total;i++) */
3955: {
3956: if (gs->num_gop_local_reduce[i])
3957: {perm_free((void*) gs->gop_local_reduce[i]);}
3958: }
3959: }
3961: /* if intersection tree/pairwise and local isn't empty */
3962: if (gs->gop_local_reduce) {perm_free((void*) gs->gop_local_reduce);}
3963: if (gs->num_gop_local_reduce) {perm_free((void*) gs->num_gop_local_reduce);}
3965: perm_free((void*) gs);
3966: }
3973: /******************************************************************************
3974: Function: gather_scatter
3976: Input :
3977: Output:
3978: Return:
3979: Description:
3980: ******************************************************************************/
3981: void
3982: gs_gop_vec(register gs_id *gs, register REAL *vals, register const char *op, register int step)
3983: {
3984: #ifdef DEBUG
3985: error_msg_warning("gs_gop_vec() start");
3986: if (!gs) {error_msg_fatal("gs_gop_vec() :: passed NULL gs handle!!!");}
3987: if (!op) {error_msg_fatal("gs_gop_vec() :: passed NULL operation!!!");}
3989: /* check top make sure that segments being requested aren't larger */
3990: /* then what I reserved earlier ... fix is to allow user to reset */
3991: if (step>gs->vec_sz)
3992: {error_msg_fatal("gs_gop_vec() :: %d > %d!\n",step,gs->vec_sz);}
3993: #endif
3995: switch (*op) {
3996: case '+':
3997: gs_gop_vec_plus(gs,vals,step);
3998: break;
3999: #ifdef NOT_YET
4000: case '*':
4001: gs_gop_times(gs,vals);
4002: break;
4003: case 'a':
4004: gs_gop_min_abs(gs,vals);
4005: break;
4006: case 'A':
4007: gs_gop_max_abs(gs,vals);
4008: break;
4009: case 'e':
4010: gs_gop_exists(gs,vals);
4011: break;
4012: case 'm':
4013: gs_gop_min(gs,vals);
4014: break;
4015: case 'M':
4016: gs_gop_max(gs,vals); break;
4017: /*
4018: if (*(op+1)=='\0')
4019: {gs_gop_max(gs,vals); break;}
4020: else if (*(op+1)=='X')
4021: {gs_gop_max_abs(gs,vals); break;}
4022: else if (*(op+1)=='N')
4023: {gs_gop_min_abs(gs,vals); break;}
4024: */
4025: #endif
4026: default:
4027: error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
4028: error_msg_warning("gs_gop_vec() :: default :: plus");
4029: gs_gop_vec_plus(gs,vals,step);
4030: break;
4031: }
4032: #ifdef DEBUG
4033: error_msg_warning("gs_gop_vec() end");
4034: #endif
4035: }
4039: /******************************************************************************
4040: Function: gather_scatter
4042: Input :
4043: Output:
4044: Return:
4045: Description:
4046: ******************************************************************************/
4047: static void
4048: gs_gop_vec_plus(register gs_id *gs, register REAL *vals, register int step)
4049: {
4050: #ifdef DEBUG
4051: error_msg_warning("gs_gop_vec_plus() start");
4052: #endif
4054: if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}
4056: /* local only operations!!! */
4057: if (gs->num_local)
4058: {gs_gop_vec_local_plus(gs,vals,step);}
4060: /* if intersection tree/pairwise and local isn't empty */
4061: if (gs->num_local_gop)
4062: {
4063: gs_gop_vec_local_in_plus(gs,vals,step);
4065: /* pairwise */
4066: if (gs->num_pairs)
4067: {gs_gop_vec_pairwise_plus(gs,vals,step);}
4069: /* tree */
4070: else if (gs->max_left_over)
4071: {gs_gop_vec_tree_plus(gs,vals,step);}
4073: gs_gop_vec_local_out(gs,vals,step);
4074: }
4075: /* if intersection tree/pairwise and local is empty */
4076: else
4077: {
4078: /* pairwise */
4079: if (gs->num_pairs)
4080: {gs_gop_vec_pairwise_plus(gs,vals,step);}
4082: /* tree */
4083: else if (gs->max_left_over)
4084: {gs_gop_vec_tree_plus(gs,vals,step);}
4085: }
4086: #ifdef DEBUG
4087: error_msg_warning("gs_gop_vec_plus() end");
4088: #endif
4089: }
4093: /******************************************************************************
4094: Function: gather_scatter
4096: Input :
4097: Output:
4098: Return:
4099: Description:
4100: ******************************************************************************/
4101: static
4102: void
4103: gs_gop_vec_local_plus(register gs_id *gs, register REAL *vals,
4104: register int step)
4105: {
4106: register int *num, *map, **reduce;
4107: register REAL *base;
4110: #ifdef DEBUG
4111: error_msg_warning("gs_gop_vec_local_plus() start");
4112: #endif
4114: num = gs->num_local_reduce;
4115: reduce = gs->local_reduce;
4116: while ((map = *reduce))
4117: {
4118: base = vals + map[0] * step;
4120: /* wall */
4121: if (*num == 2)
4122: {
4123: num++; reduce++;
4124: rvec_add (base,vals+map[1]*step,step);
4125: rvec_copy(vals+map[1]*step,base,step);
4126: }
4127: /* corner shared by three elements */
4128: else if (*num == 3)
4129: {
4130: num++; reduce++;
4131: rvec_add (base,vals+map[1]*step,step);
4132: rvec_add (base,vals+map[2]*step,step);
4133: rvec_copy(vals+map[2]*step,base,step);
4134: rvec_copy(vals+map[1]*step,base,step);
4135: }
4136: /* corner shared by four elements */
4137: else if (*num == 4)
4138: {
4139: num++; reduce++;
4140: rvec_add (base,vals+map[1]*step,step);
4141: rvec_add (base,vals+map[2]*step,step);
4142: rvec_add (base,vals+map[3]*step,step);
4143: rvec_copy(vals+map[3]*step,base,step);
4144: rvec_copy(vals+map[2]*step,base,step);
4145: rvec_copy(vals+map[1]*step,base,step);
4146: }
4147: /* general case ... odd geoms ... 3D */
4148: else
4149: {
4150: num++;
4151: while (*++map >= 0)
4152: {rvec_add (base,vals+*map*step,step);}
4153:
4154: map = *reduce;
4155: while (*++map >= 0)
4156: {rvec_copy(vals+*map*step,base,step);}
4157:
4158: reduce++;
4159: }
4160: }
4161: #ifdef DEBUG
4162: error_msg_warning("gs_gop_vec_local_plus() end");
4163: #endif
4164: }
4168: /******************************************************************************
4169: Function: gather_scatter
4171: Input :
4172: Output:
4173: Return:
4174: Description:
4175: ******************************************************************************/
4176: static
4177: void
4178: gs_gop_vec_local_in_plus(register gs_id *gs, register REAL *vals,
4179: register int step)
4180: {
4181: register int *num, *map, **reduce;
4182: register REAL *base;
4185: #ifdef DEBUG
4186: error_msg_warning("gs_gop_vec_locel_in_plus() start");
4187: #endif
4189: num = gs->num_gop_local_reduce;
4190: reduce = gs->gop_local_reduce;
4191: while ((map = *reduce++))
4192: {
4193: base = vals + map[0] * step;
4195: /* wall */
4196: if (*num == 2)
4197: {
4198: num ++;
4199: rvec_add(base,vals+map[1]*step,step);
4200: }
4201: /* corner shared by three elements */
4202: else if (*num == 3)
4203: {
4204: num ++;
4205: rvec_add(base,vals+map[1]*step,step);
4206: rvec_add(base,vals+map[2]*step,step);
4207: }
4208: /* corner shared by four elements */
4209: else if (*num == 4)
4210: {
4211: num ++;
4212: rvec_add(base,vals+map[1]*step,step);
4213: rvec_add(base,vals+map[2]*step,step);
4214: rvec_add(base,vals+map[3]*step,step);
4215: }
4216: /* general case ... odd geoms ... 3D*/
4217: else
4218: {
4219: num++;
4220: while (*++map >= 0)
4221: {rvec_add(base,vals+*map*step,step);}
4222: }
4223: }
4224: #ifdef DEBUG
4225: error_msg_warning("gs_gop_vec_local_in_plus() end");
4226: #endif
4227: }
4230: /******************************************************************************
4231: Function: gather_scatter
4233: Input :
4234: Output:
4235: Return:
4236: Description:
4237: ******************************************************************************/
4238: static
4239: void
4240: gs_gop_vec_local_out(register gs_id *gs, register REAL *vals,
4241: register int step)
4242: {
4243: register int *num, *map, **reduce;
4244: register REAL *base;
4247: #ifdef DEBUG
4248: error_msg_warning("gs_gop_vec_local_out() start");
4249: #endif
4251: num = gs->num_gop_local_reduce;
4252: reduce = gs->gop_local_reduce;
4253: while ((map = *reduce++))
4254: {
4255: base = vals + map[0] * step;
4257: /* wall */
4258: if (*num == 2)
4259: {
4260: num ++;
4261: rvec_copy(vals+map[1]*step,base,step);
4262: }
4263: /* corner shared by three elements */
4264: else if (*num == 3)
4265: {
4266: num ++;
4267: rvec_copy(vals+map[1]*step,base,step);
4268: rvec_copy(vals+map[2]*step,base,step);
4269: }
4270: /* corner shared by four elements */
4271: else if (*num == 4)
4272: {
4273: num ++;
4274: rvec_copy(vals+map[1]*step,base,step);
4275: rvec_copy(vals+map[2]*step,base,step);
4276: rvec_copy(vals+map[3]*step,base,step);
4277: }
4278: /* general case ... odd geoms ... 3D*/
4279: else
4280: {
4281: num++;
4282: while (*++map >= 0)
4283: {rvec_copy(vals+*map*step,base,step);}
4284: }
4285: }
4286: #ifdef DEBUG
4287: error_msg_warning("gs_gop_vec_local_out() end");
4288: #endif
4289: }
4293: /******************************************************************************
4294: Function: gather_scatter
4296: VERSION 3 ::
4298: Input :
4299: Output:
4300: Return:
4301: Description:
4302: ******************************************************************************/
4303: static
4304: void
4305: gs_gop_vec_pairwise_plus(register gs_id *gs, register REAL *in_vals,
4306: register int step)
4307: {
4308: register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
4309: register int *iptr, *msg_list, *msg_size, **msg_nodes;
4310: register int *pw, *list, *size, **nodes;
4311: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
4312: MPI_Status status;
4315: #ifdef DEBUG
4316: error_msg_warning("gs_gop_vec_pairwise_plus() start");
4317: #endif
4319: /* strip and load registers */
4320: msg_list =list = gs->pair_list;
4321: msg_size =size = gs->msg_sizes;
4322: msg_nodes=nodes = gs->node_list;
4323: iptr=pw = gs->pw_elm_list;
4324: dptr1=dptr3 = gs->pw_vals;
4325: msg_ids_in = ids_in = gs->msg_ids_in;
4326: msg_ids_out = ids_out = gs->msg_ids_out;
4327: dptr2 = gs->out;
4328: in1=in2 = gs->in;
4330: /* post the receives */
4331: /* msg_nodes=nodes; */
4332: do
4333: {
4334: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
4335: second one *list and do list++ afterwards */
4336: MPI_Irecv(in1, *size *step, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
4337: gs->gs_comm, msg_ids_in++);
4338: in1 += *size++ *step;
4339: }
4340: while (*++msg_nodes);
4341: msg_nodes=nodes;
4343: /* load gs values into in out gs buffers */
4344: while (*iptr >= 0)
4345: {
4346: rvec_copy(dptr3,in_vals + *iptr*step,step);
4347: dptr3+=step;
4348: iptr++;
4349: }
4351: /* load out buffers and post the sends */
4352: while ((iptr = *msg_nodes++))
4353: {
4354: dptr3 = dptr2;
4355: while (*iptr >= 0)
4356: {
4357: rvec_copy(dptr2,dptr1 + *iptr*step,step);
4358: dptr2+=step;
4359: iptr++;
4360: }
4361: MPI_Isend(dptr3, *msg_size++ *step, REAL_TYPE, *msg_list++,
4362: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
4363: }
4365: /* tree */
4366: if (gs->max_left_over)
4367: {gs_gop_vec_tree_plus(gs,in_vals,step);}
4369: /* process the received data */
4370: msg_nodes=nodes;
4371: while ((iptr = *nodes++))
4372: {
4373: /* Should I check the return value of MPI_Wait() or status? */
4374: /* Can this loop be replaced by a call to MPI_Waitall()? */
4375: MPI_Wait(ids_in++, &status);
4376: while (*iptr >= 0)
4377: {
4378: #if defined BLAS||CBLAS
4379: axpy(step,1.0,in2,1,dptr1 + *iptr*step,1);
4380: #else
4381: rvec_add(dptr1 + *iptr*step,in2,step);
4382: #endif
4383: in2+=step;
4384: iptr++;
4385: }
4386: }
4388: /* replace vals */
4389: while (*pw >= 0)
4390: {
4391: rvec_copy(in_vals + *pw*step,dptr1,step);
4392: dptr1+=step;
4393: pw++;
4394: }
4396: /* clear isend message handles */
4397: /* This changed for clarity though it could be the same */
4398: while (*msg_nodes++)
4399: /* Should I check the return value of MPI_Wait() or status? */
4400: /* Can this loop be replaced by a call to MPI_Waitall()? */
4401: {MPI_Wait(ids_out++, &status);}
4403: #ifdef DEBUG
4404: error_msg_warning("gs_gop_vec_pairwise_plus() end");
4405: #endif
4407: }
4411: /******************************************************************************
4412: Function: gather_scatter
4414: Input :
4415: Output:
4416: Return:
4417: Description:
4418: ******************************************************************************/
4419: static
4420: void
4421: gs_gop_vec_tree_plus(register gs_id *gs, register REAL *vals, register int step)
4422: {
4423: register int size, *in, *out;
4424: register REAL *buf, *work;
4425: int op[] = {GL_ADD,0};
4427: #ifdef DEBUG
4428: error_msg_warning("start gs_gop_vec_tree_plus()");
4429: #endif
4431: /* copy over to local variables */
4432: in = gs->tree_map_in;
4433: out = gs->tree_map_out;
4434: buf = gs->tree_buf;
4435: work = gs->tree_work;
4436: size = gs->tree_nel*step;
4438: /* zero out collection buffer */
4439: #if defined BLAS||CBLAS
4440: *work = 0.0;
4441: copy(size,work,0,buf,1);
4442: #else
4443: rvec_zero(buf,size);
4444: #endif
4447: /* copy over my contributions */
4448: while (*in >= 0)
4449: {
4450: #if defined BLAS||CBLAS
4451: copy(step,vals + *in++*step,1,buf + *out++*step,1);
4452: #else
4453: rvec_copy(buf + *out++*step,vals + *in++*step,step);
4454: #endif
4455: }
4457: /* perform fan in/out on full buffer */
4458: /* must change grop to handle the blas */
4459: grop(buf,work,size,op);
4461: /* reset */
4462: in = gs->tree_map_in;
4463: out = gs->tree_map_out;
4465: /* get the portion of the results I need */
4466: while (*in >= 0)
4467: {
4468: #if defined BLAS||CBLAS
4469: copy(step,buf + *out++*step,1,vals + *in++*step,1);
4470: #else
4471: rvec_copy(vals + *in++*step,buf + *out++*step,step);
4472: #endif
4473: }
4475: #ifdef DEBUG
4476: error_msg_warning("start gs_gop_vec_tree_plus()");
4477: #endif
4478: }
4482: /******************************************************************************
4483: Function: gather_scatter
4485: Input :
4486: Output:
4487: Return:
4488: Description:
4489: ******************************************************************************/
4490: void
4491: gs_gop_hc(register gs_id *gs, register REAL *vals, register const char *op, register int dim)
4492: {
4493: #ifdef DEBUG
4494: error_msg_warning("gs_gop_hc() start\n");
4495: if (!gs) {error_msg_fatal("gs_gop_vec() :: passed NULL gs handle!!!\n");}
4496: if (!op) {error_msg_fatal("gs_gop_vec() :: passed NULL operation!!!\n");}
4497: #endif
4499: switch (*op) {
4500: case '+':
4501: gs_gop_plus_hc(gs,vals,dim);
4502: break;
4503: #ifdef NOT_YET
4504: case '*':
4505: gs_gop_times(gs,vals);
4506: break;
4507: case 'a':
4508: gs_gop_min_abs(gs,vals);
4509: break;
4510: case 'A':
4511: gs_gop_max_abs(gs,vals);
4512: break;
4513: case 'e':
4514: gs_gop_exists(gs,vals);
4515: break;
4516: case 'm':
4517: gs_gop_min(gs,vals);
4518: break;
4519: case 'M':
4520: gs_gop_max(gs,vals); break;
4521: /*
4522: if (*(op+1)=='\0')
4523: {gs_gop_max(gs,vals); break;}
4524: else if (*(op+1)=='X')
4525: {gs_gop_max_abs(gs,vals); break;}
4526: else if (*(op+1)=='N')
4527: {gs_gop_min_abs(gs,vals); break;}
4528: */
4529: #endif
4530: default:
4531: error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
4532: error_msg_warning("gs_gop_hc() :: default :: plus\n");
4533: gs_gop_plus_hc(gs,vals,dim);
4534: break;
4535: }
4536: #ifdef DEBUG
4537: error_msg_warning("gs_gop_hc() end\n");
4538: #endif
4539: }
4543: /******************************************************************************
4544: Function: gather_scatter
4546: Input :
4547: Output:
4548: Return:
4549: Description:
4550: ******************************************************************************/
4551: static void
4552: gs_gop_plus_hc(register gs_id *gs, register REAL *vals, int dim)
4553: {
4554: #ifdef DEBUG
4555: error_msg_warning("start gs_gop_hc()\n");
4556: if (!gs) {error_msg_fatal("gs_gop_hc() passed NULL gs handle!!!\n");}
4557: #endif
4559: /* if there's nothing to do return */
4560: if (dim<=0)
4561: {return;}
4563: /* can't do more dimensions then exist */
4564: dim = PetscMin(dim,i_log2_num_nodes);
4566: /* local only operations!!! */
4567: if (gs->num_local)
4568: {gs_gop_local_plus(gs,vals);}
4570: /* if intersection tree/pairwise and local isn't empty */
4571: if (gs->num_local_gop)
4572: {
4573: gs_gop_local_in_plus(gs,vals);
4575: /* pairwise will do tree inside ... */
4576: if (gs->num_pairs)
4577: {gs_gop_pairwise_plus_hc(gs,vals,dim);}
4579: /* tree only */
4580: else if (gs->max_left_over)
4581: {gs_gop_tree_plus_hc(gs,vals,dim);}
4582:
4583: gs_gop_local_out(gs,vals);
4584: }
4585: /* if intersection tree/pairwise and local is empty */
4586: else
4587: {
4588: /* pairwise will do tree inside */
4589: if (gs->num_pairs)
4590: {gs_gop_pairwise_plus_hc(gs,vals,dim);}
4591:
4592: /* tree */
4593: else if (gs->max_left_over)
4594: {gs_gop_tree_plus_hc(gs,vals,dim);}
4595: }
4597: #ifdef DEBUG
4598: error_msg_warning("end gs_gop_hc()\n");
4599: #endif
4600: }
4603: /******************************************************************************
4604: VERSION 3 ::
4606: Input :
4607: Output:
4608: Return:
4609: Description:
4610: ******************************************************************************/
4611: static
4612: void
4613: gs_gop_pairwise_plus_hc(register gs_id *gs, register REAL *in_vals, int dim)
4614: {
4615: register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
4616: register int *iptr, *msg_list, *msg_size, **msg_nodes;
4617: register int *pw, *list, *size, **nodes;
4618: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
4619: MPI_Status status;
4620: int i, mask=1;
4622: for (i=1; i<dim; i++)
4623: {mask<<=1; mask++;}
4626: #ifdef DEBUG
4627: error_msg_warning("gs_gop_pairwise_hc() start\n");
4628: #endif
4630: /* strip and load registers */
4631: msg_list =list = gs->pair_list;
4632: msg_size =size = gs->msg_sizes;
4633: msg_nodes=nodes = gs->node_list;
4634: iptr=pw = gs->pw_elm_list;
4635: dptr1=dptr3 = gs->pw_vals;
4636: msg_ids_in = ids_in = gs->msg_ids_in;
4637: msg_ids_out = ids_out = gs->msg_ids_out;
4638: dptr2 = gs->out;
4639: in1=in2 = gs->in;
4641: /* post the receives */
4642: /* msg_nodes=nodes; */
4643: do
4644: {
4645: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
4646: second one *list and do list++ afterwards */
4647: if ((my_id|mask)==(*list|mask))
4648: {
4649: MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
4650: gs->gs_comm, msg_ids_in++);
4651: in1 += *size++;
4652: }
4653: else
4654: {list++; size++;}
4655: }
4656: while (*++msg_nodes);
4658: /* load gs values into in out gs buffers */
4659: while (*iptr >= 0)
4660: {*dptr3++ = *(in_vals + *iptr++);}
4662: /* load out buffers and post the sends */
4663: msg_nodes=nodes;
4664: list = msg_list;
4665: while ((iptr = *msg_nodes++))
4666: {
4667: if ((my_id|mask)==(*list|mask))
4668: {
4669: dptr3 = dptr2;
4670: while (*iptr >= 0)
4671: {*dptr2++ = *(dptr1 + *iptr++);}
4672: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
4673: /* is msg_ids_out++ correct? */
4674: MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *list++,
4675: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
4676: }
4677: else
4678: {list++; msg_size++;}
4679: }
4681: /* do the tree while we're waiting */
4682: if (gs->max_left_over)
4683: {gs_gop_tree_plus_hc(gs,in_vals,dim);}
4685: /* process the received data */
4686: msg_nodes=nodes;
4687: list = msg_list;
4688: while ((iptr = *nodes++))
4689: {
4690: if ((my_id|mask)==(*list|mask))
4691: {
4692: /* Should I check the return value of MPI_Wait() or status? */
4693: /* Can this loop be replaced by a call to MPI_Waitall()? */
4694: MPI_Wait(ids_in++, &status);
4695: while (*iptr >= 0)
4696: {*(dptr1 + *iptr++) += *in2++;}
4697: }
4698: list++;
4699: }
4701: /* replace vals */
4702: while (*pw >= 0)
4703: {*(in_vals + *pw++) = *dptr1++;}
4705: /* clear isend message handles */
4706: /* This changed for clarity though it could be the same */
4707: while (*msg_nodes++)
4708: {
4709: if ((my_id|mask)==(*msg_list|mask))
4710: {
4711: /* Should I check the return value of MPI_Wait() or status? */
4712: /* Can this loop be replaced by a call to MPI_Waitall()? */
4713: MPI_Wait(ids_out++, &status);
4714: }
4715: msg_list++;
4716: }
4718: #ifdef DEBUG
4719: error_msg_warning("gs_gop_pairwise_hc() end\n");
4720: #endif
4722: }
4726: /******************************************************************************
4727: Function: gather_scatter
4729: Input :
4730: Output:
4731: Return:
4732: Description:
4733: ******************************************************************************/
4734: static
4735: void
4736: gs_gop_tree_plus_hc(gs_id *gs, REAL *vals, int dim)
4737: {
4738: int size;
4739: int *in, *out;
4740: REAL *buf, *work;
4741: int op[] = {GL_ADD,0};
4743: #ifdef DEBUG
4744: error_msg_warning("start gs_gop_tree_plus_hc()\n");
4745: #endif
4746:
4747: in = gs->tree_map_in;
4748: out = gs->tree_map_out;
4749: buf = gs->tree_buf;
4750: work = gs->tree_work;
4751: size = gs->tree_nel;
4753: #if defined BLAS||CBLAS
4754: *work = 0.0;
4755: copy(size,work,0,buf,1);
4756: #else
4757: rvec_zero(buf,size);
4758: #endif
4760: while (*in >= 0)
4761: {*(buf + *out++) = *(vals + *in++);}
4763: in = gs->tree_map_in;
4764: out = gs->tree_map_out;
4766: grop_hc(buf,work,size,op,dim);
4768: while (*in >= 0)
4769: {*(vals + *in++) = *(buf + *out++);}
4771: #ifdef DEBUG
4772: error_msg_warning("end gs_gop_tree_plus_hc()\n");
4773: #endif
4774: }