00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef COMPONENTS_TRIG_CPU_ALLREDUCE_TREE_TRIGGERED_H
00014 #define COMPONENTS_TRIG_CPU_ALLREDUCE_TREE_TRIGGERED_H
00015
00016 #include "sst/elements/portals4_sm/trig_cpu/application.h"
00017 #include "sst/elements/portals4_sm/trig_cpu/trig_cpu.h"
00018 #include "sst/elements/portals4_sm/trig_cpu/portals.h"
00019
00020 class allreduce_tree_triggered : public application {
00021 public:
00022 allreduce_tree_triggered(trig_cpu *cpu, bool nary) : application(cpu), init(false), algo_count(0)
00023 {
00024 radix = cpu->getRadix();
00025 ptl = cpu->getPortalsHandle();
00026
00027 if (nary) {
00028 boost::tie(my_root, my_children) = buildNaryTree(radix);
00029 } else {
00030 boost::tie(my_root, my_children) = buildBinomialTree(radix);
00031 }
00032 num_children = my_children.size();
00033
00034 in_buf = 1;
00035 out_buf = 0;
00036 tmp_buf = 0;
00037 zero_buf = 0;
00038 }
00039
00040 bool
00041 operator()(Event *ev)
00042 {
00043 ptl_md_t md;
00044 ptl_me_t me;
00045
00046 crBegin();
00047
00048 if (!init) {
00049
00050 ptl->PtlCTAlloc(PTL_CT_OPERATION, up_tree_ct_h);
00051 me.start = &tmp_buf;
00052 me.length = 8;
00053 me.ignore_bits = ~0x0;
00054 me.ct_handle = up_tree_ct_h;
00055 ptl->PtlMEAppend(PT_UP, me, PTL_PRIORITY_LIST, NULL, up_tree_me_h);
00056
00057 md.start = &tmp_buf;
00058 md.length = 8;
00059 md.eq_handle = PTL_EQ_NONE;
00060 md.ct_handle = PTL_CT_NONE;
00061 ptl->PtlMDBind(md, &up_tree_md_h);
00062
00063 md.start = &zero_buf;
00064 md.length = 8;
00065 md.eq_handle = PTL_EQ_NONE;
00066 md.ct_handle = PTL_CT_NONE;
00067 ptl->PtlMDBind(md, &zero_md_h);
00068
00069 init = true;
00070 crReturn();
00071 start_noise_section();
00072 }
00073
00074
00075 start_time = cpu->getCurrentSimTimeNano();
00076 cpu->addBusyTime("200ns");
00077 crReturn();
00078
00079
00080
00081
00082 ptl->PtlCTAlloc(PTL_CT_OPERATION, user_ct_h);
00083 crReturn();
00084 me.start = &out_buf;
00085 me.length = 8;
00086 me.ignore_bits = ~0x0;
00087 me.ct_handle = user_ct_h;
00088 ptl->PtlMEAppend(PT_DOWN, me, PTL_PRIORITY_LIST, NULL, user_me_h);
00089 crReturn();
00090
00091 if (num_children > 0) {
00092 md.start = &out_buf;
00093 md.length = 8;
00094 md.eq_handle = PTL_EQ_NONE;
00095 md.ct_handle = PTL_CT_NONE;
00096 ptl->PtlMDBind(md, &user_md_h);
00097 crReturn();
00098 }
00099
00100 md.start = &in_buf;
00101 md.length = 8;
00102 md.eq_handle = PTL_EQ_NONE;
00103 md.ct_handle = PTL_CT_NONE;
00104 ptl->PtlMDBind(md, &user_in_md_h);
00105 crReturn();
00106
00107 ptl->PtlEnableCoalesce();
00108 crReturn();
00109
00110 if (num_children == 0) {
00111
00112 ptl->PtlAtomic(user_in_md_h, 0, 8, 0, my_root, PT_UP, 0, 0, NULL, 0, PTL_SUM, PTL_LONG);
00113 crReturn();
00114 } else {
00115
00116 ptl->PtlAtomic(user_in_md_h, 0, 8, 0, my_id, PT_UP, 0, 0, NULL,
00117 0, PTL_SUM, PTL_LONG);
00118 crReturn();
00119 if (my_root == my_id) {
00120
00121
00122 ptl->PtlTriggeredPut(up_tree_md_h, 0, 8, 0, my_id, PT_DOWN, 0, 0, NULL,
00123 0, up_tree_ct_h, (algo_count * (num_children + 2)) + num_children + 1);
00124 crReturn();
00125 } else {
00126
00127 ptl->PtlTriggeredAtomic(up_tree_md_h, 0, 8, 0, my_root, PT_UP,
00128 0, 0, NULL, 0, PTL_SUM, PTL_LONG,
00129 up_tree_ct_h, (algo_count * (num_children + 2)) + num_children + 1);
00130 crReturn();
00131 }
00132
00133
00134 ptl->PtlTriggeredAtomic(zero_md_h, 0, 8, 0, my_id, PT_UP, 0, 0, NULL,
00135 0, PTL_LAND, PTL_LONG,
00136 up_tree_ct_h, (algo_count * (num_children + 2)) + num_children + 1);
00137 crReturn();
00138
00139
00140 for (i = 0 ; i < num_children ; ++i) {
00141 ptl->PtlTriggeredPut(user_md_h, 0, 8, 0, my_children[i], PT_DOWN,
00142 0, 0, NULL, 0, user_ct_h, 1);
00143 crReturn();
00144 }
00145 }
00146
00147 ptl->PtlDisableCoalesce();
00148 crReturn();
00149
00150
00151 while (!ptl->PtlCTWait(user_ct_h, 1)) { crReturn(); }
00152
00153 if (num_children > 0) {
00154 while (!ptl->PtlCTWait(up_tree_ct_h, ((algo_count + 1) * (num_children + 2)))) { crReturn(); }
00155 }
00156
00157 ptl->PtlMEUnlink(user_me_h);
00158 crReturn();
00159 ptl->PtlCTFree(user_ct_h);
00160 crReturn();
00161 algo_count++;
00162 trig_cpu::addTimeToStats(cpu->getCurrentSimTimeNano()-start_time);
00163
00164 if (out_buf != (uint64_t) num_nodes) {
00165 printf("%05d: got %lu, expected %lu\n",
00166 my_id, (unsigned long) out_buf, (unsigned long) num_nodes);
00167 }
00168
00169 crFinish();
00170 return true;
00171 }
00172
00173 private:
00174 allreduce_tree_triggered();
00175 allreduce_tree_triggered(const application& a);
00176 void operator=(allreduce_tree_triggered const&);
00177
00178 portals *ptl;
00179 SimTime_t start_time;
00180 int i;
00181 bool init;
00182 int radix;
00183
00184 int my_root;
00185 std::vector<int> my_children;
00186 int num_children;
00187
00188 uint64_t in_buf, out_buf, tmp_buf, zero_buf;
00189
00190 ptl_handle_ct_t ct_handle;
00191
00192 ptl_handle_ct_t up_tree_ct_h;
00193 ptl_handle_me_t up_tree_me_h;
00194 ptl_handle_md_t up_tree_md_h;
00195
00196 ptl_handle_ct_t user_ct_h;
00197 ptl_handle_me_t user_me_h;
00198 ptl_handle_md_t user_md_h;
00199 ptl_handle_md_t user_in_md_h;
00200
00201 ptl_handle_md_t zero_md_h;
00202
00203 static const int PT_UP = 0;
00204 static const int PT_DOWN = 1;
00205
00206 uint64_t algo_count;
00207 };
00208
00209 #endif // COMPONENTS_TRIG_CPU_ALLREDUCE_TREE_TRIGGERED_H