00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef COMPONENTS_TRIG_CPU_ALLREDUCE_RECDBL_TRIGGERED_H
00014 #define COMPONENTS_TRIG_CPU_ALLREDUCE_RECDBL_TRIGGERED_H
00015
00016 #include "algorithm.h"
00017 #include "trig_cpu.h"
00018 #include "portals.h"
00019
00020 class allreduce_recdbl_triggered : public algorithm {
00021 public:
00022 allreduce_recdbl_triggered(trig_cpu *cpu) : algorithm(cpu)
00023 {
00024 ptl = cpu->getPortalsHandle();
00025 }
00026
00027 bool
00028 operator()(Event *ev)
00029 {
00030 int adj;
00031 ptl_md_t md;
00032 ptl_me_t me;
00033 int next_level;
00034 int remote;
00035
00036 switch (state) {
00037 case 0:
00038
00039 my_levels = -1;
00040 for (adj = 0x1; adj <= num_nodes ; adj <<= 1) { my_levels++; } adj = adj >> 1;
00041 if (adj != num_nodes) {
00042 printf("recursive_doubling requires power of 2 nodes (%d)\n",
00043 num_nodes);
00044 exit(1);
00045 }
00046
00047 my_level_steps.resize(my_levels);
00048 my_level_ct_hs.resize(my_levels);
00049 my_level_me_hs.resize(my_levels);
00050 my_level_md_hs.resize(my_levels);
00051
00052 for (int i = 0 ; i < my_levels ; ++i) {
00053 my_level_steps[i] = 0;
00054 ptl->PtlCTAlloc(PTL_CT_OPERATION, my_level_ct_hs[i]);
00055
00056 me.start = &my_level_steps[i];
00057 me.length = 8;
00058 me.match_bits = i;
00059 me.ignore_bits = 0;
00060 me.ct_handle = my_level_ct_hs[i];
00061 ptl->PtlMEAppend(0, me, PTL_PRIORITY_LIST, NULL,
00062 my_level_me_hs[i]);
00063
00064 md.start = &my_level_steps[i];
00065 me.length = 8;
00066 md.eq_handle = PTL_EQ_NONE;
00067 md.ct_handle = PTL_CT_NONE;
00068 ptl->PtlMDBind(md, &my_level_md_hs[i]);
00069 }
00070 state = 1;
00071 break;
00072
00073 case 1:
00074
00075 start_time = cpu->getCurrentSimTimeNano();
00076 cpu->addBusyTime("200ns");
00077
00078
00079
00080
00081 ptl->PtlCTAlloc(PTL_CT_OPERATION, user_ct_h);
00082 me.start = NULL;
00083 me.length = 8;
00084 me.ignore_bits = ~0x0;
00085 me.ct_handle = user_ct_h;
00086 ptl->PtlMEAppend(1, me, PTL_PRIORITY_LIST, NULL, user_me_h);
00087
00088 md.start = NULL;
00089 md.length = 8;
00090 md.eq_handle = PTL_EQ_NONE;
00091 md.ct_handle = PTL_CT_NONE;
00092 ptl->PtlMDBind(md, &user_md_h);
00093
00094 state = 2;
00095 break;
00096
00097 case 2:
00098
00099 ptl->PtlAtomic(user_md_h, 0, 8, 0, my_id, 0, 0, 0, NULL, 0, PTL_SUM, PTL_DOUBLE);
00100 state = 3;
00101 break;
00102
00103 case 3:
00104 ptl->PtlAtomic(user_md_h, 0, 8, 0, my_id ^ 0x1, 0, 0, 0, NULL, 0, PTL_SUM, PTL_DOUBLE);
00105
00106 loop_var = 1;
00107 state = (loop_var < my_levels) ? 4 : 8;
00108 break;
00109
00110 case 4:
00111 next_level = 0x1 << loop_var;
00112 remote = my_id ^ next_level;
00113 ptl->PtlTriggeredAtomic(my_level_md_hs[loop_var - 1], 0, 8, 0, my_id, 0,
00114 loop_var, 0, NULL, 0, PTL_SUM, PTL_DOUBLE,
00115 my_level_ct_hs[loop_var - 1], 2);
00116 state = 5;
00117 break;
00118
00119 case 5:
00120 next_level = 0x1 << loop_var;
00121 remote = my_id ^ next_level;
00122 ptl->PtlTriggeredAtomic(my_level_md_hs[loop_var - 1], 0, 8, 0, remote, 0,
00123 loop_var, 0, NULL, 0, PTL_SUM, PTL_DOUBLE,
00124 my_level_ct_hs[loop_var - 1], 2);
00125 state = 6;
00126 break;
00127
00128 case 6:
00129 next_level = 0x1 << loop_var;
00130 remote = my_id ^ next_level;
00131 ptl->PtlTriggeredPut(zero_md_h, 0, 8, 0, my_id, 0,
00132 loop_var - 1, 0, NULL, 0, my_level_ct_hs[loop_var - 1], 2);
00133 state = 7;
00134 break;
00135
00136 case 7:
00137 ptl->PtlTriggeredCTInc(my_level_ct_hs[loop_var - 1], -3,
00138 my_level_ct_hs[loop_var - 1], 3);
00139 loop_var++;
00140 state = (loop_var < my_levels) ? 4 : 8;
00141 break;
00142
00143 case 8:
00144
00145 ptl->PtlTriggeredPut(my_level_md_hs[my_levels - 1], 0, 8, 0, my_id, 1,
00146 0, 0, NULL, 0, my_level_ct_hs[my_levels - 1], 2);
00147 state = 9;
00148 break;
00149
00150 case 9:
00151 ptl->PtlTriggeredPut(zero_md_h, 0, 8, 0, my_id, 0,
00152 my_levels - 1, 0, NULL, 0, my_level_ct_hs[my_levels - 1], 2);
00153 ptl->PtlTriggeredCTInc(my_level_ct_hs[my_levels - 1], -3,
00154 my_level_ct_hs[my_levels - 1], 3);
00155 state = 10;
00156 break;
00157
00158 case 10:
00159 if (ptl->PtlCTWait(user_ct_h, 1)) state = 11;
00160 break;
00161
00162 case 11:
00163 ptl->PtlMEUnlink(user_me_h);
00164 trig_cpu::addTimeToStats(cpu->getCurrentSimTimeNano()-start_time);
00165 ptl->PtlMEUnlink(user_me_h);
00166 state = 1;
00167 return true;
00168
00169 default:
00170 printf("triggered recursive doubling: unhandled state: %d\n", state);
00171 exit(1);
00172 }
00173
00174 return false;
00175 }
00176
00177 private:
00178 allreduce_recdbl_triggered();
00179 allreduce_recdbl_triggered(const algorithm& a);
00180 void operator=(allreduce_recdbl_triggered const&);
00181
00182 portals *ptl;
00183 SimTime_t start_time;
00184 int loop_var;
00185 int my_levels;
00186
00187 std::vector<double> my_level_steps;
00188 std::vector<ptl_handle_ct_t> my_level_ct_hs;
00189 std::vector<ptl_handle_me_t> my_level_me_hs;
00190 std::vector<ptl_handle_md_t> my_level_md_hs;
00191
00192 ptl_handle_ct_t user_ct_h;
00193 ptl_handle_me_t user_me_h;
00194 ptl_handle_md_t user_md_h;
00195
00196 ptl_handle_md_t zero_md_h;
00197 };
00198
00199 #endif // COMPONENTS_TRIG_CPU_ALLREDUCE_RECDBL_TRIGGERED_H