00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef COMPONENTS_TRIG_CPU_ALLREDUCE_TREE_H
00014 #define COMPONENTS_TRIG_CPU_ALLREDUCE_TREE_H
00015
00016 #include "algorithm.h"
00017 #include "trig_cpu.h"
00018
00019 class allreduce_tree : public algorithm {
00020 public:
00021 allreduce_tree(trig_cpu *cpu) : algorithm(cpu)
00022 {
00023 radix = cpu->getRadix();
00024 }
00025
00026 bool
00027 operator()(Event *ev)
00028 {
00029 int handle, my_root;
00030 if (0 != state) {
00031 my_root = (my_id / (curr_radix * level)) * (curr_radix * level);
00032 }
00033
00034 switch (state) {
00035 case 0:
00036
00037
00038 start_time = cpu->getCurrentSimTimeNano();
00039 cpu->addBusyTime("200ns");
00040
00041 level = 1;
00042 curr_radix = radix;
00043 state = 1;
00044 break;
00045
00046 case 1:
00047
00048 loop_var = 1;
00049 state = 2;
00050
00051 case 2:
00052
00053 if (my_id == my_root) {
00054
00055 if ( loop_var < curr_radix ) {
00056 if ( cpu->recv(my_id+(level*loop_var),NULL,handle) ) {
00057 loop_var++;
00058 }
00059 } else {
00060 state = 3;
00061 }
00062 } else {
00063
00064 cpu->send(my_root, 0);
00065 state = 4;
00066 }
00067 break;
00068
00069 case 3:
00070
00071 if (! cpu->waitall()) break;
00072
00073
00074 for (int i = 0 ; i < (((curr_radix - 1) / 8) + 1) ; ++i) {
00075 cpu->addBusyTime("100ns");
00076 }
00077
00078 level *= curr_radix;
00079 if (level == num_nodes) {
00080
00081 state = 6;
00082 } else {
00083
00084 if (num_nodes / level < curr_radix) curr_radix = num_nodes / level;
00085 state = 1;
00086 }
00087 break;
00088
00089 case 4:
00090
00091 if (cpu->recv(my_root, NULL, handle) ) {
00092 state = 5;
00093
00094 curr_radix = radix;
00095 }
00096
00097 break;
00098
00099 case 5:
00100
00101
00102
00103 if ( cpu->waitall() ) {
00104 state = (1 == level) ? 9 : 6;
00105 }
00106 break;
00107
00108 case 6:
00109
00110
00111
00112 loop_var = 1;
00113 level /= curr_radix;
00114 state = 7;
00115 break;
00116
00117 case 7:
00118
00119 if ( loop_var < curr_radix ) {
00120 cpu->send(my_id + (loop_var*level),0);
00121 loop_var++;
00122 } else {
00123 state = 8;
00124 }
00125 break;
00126
00127 case 8:
00128
00129
00130 curr_radix = radix;
00131 state = (1 == level) ? 9 : 6;
00132 break;
00133
00134 case 9:
00135 trig_cpu::addTimeToStats(cpu->getCurrentSimTimeNano()-start_time);
00136 state = 0;
00137 return true;
00138
00139 default:
00140 printf("tree: unhandled state: %d\n", state);
00141 exit(1);
00142 }
00143
00144 return false;
00145 }
00146
00147 private:
00148 allreduce_tree();
00149 allreduce_tree(const algorithm& a);
00150 void operator=(allreduce_tree const&);
00151
00152 SimTime_t start_time;
00153 int radix;
00154 int curr_radix;
00155 int level;
00156 int loop_var;
00157 };
00158
00159 #endif // COMPONENTS_TRIG_CPU_ALLREDUCE_TREE_H