00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef COMPONENTS_TRIG_CPU_ALLREDUCE_TREE_H
00014 #define COMPONENTS_TRIG_CPU_ALLREDUCE_TREE_H
00015
00016 #include "sst/elements/portals4_sm/trig_cpu/application.h"
00017 #include "sst/elements/portals4_sm/trig_cpu/trig_cpu.h"
00018
00019 class allreduce_tree : public application {
00020 public:
00021 allreduce_tree(trig_cpu *cpu, bool nary) : application(cpu)
00022 {
00023 radix = cpu->getRadix();
00024
00025 if (nary) {
00026 boost::tie(my_root, my_children) = buildNaryTree(radix);
00027 } else {
00028 boost::tie(my_root, my_children) = buildBinomialTree(radix);
00029 }
00030 num_children = my_children.size();
00031
00032 in_buf = 1;
00033 out_buf = 0;
00034 tmp_buf = (uint64_t*) malloc(sizeof(uint64_t) * (num_children == 0 ? 1 : num_children));
00035 }
00036
00037 bool
00038 operator()(Event *ev)
00039 {
00040 crBegin();
00041
00042 start_time = cpu->getCurrentSimTimeNano();
00043 cpu->addBusyTime("200ns");
00044 crReturn();
00045
00046
00047 if (0 != num_children) {
00048 for (i = 0 ; i < num_children ; ++i) {
00049 while (!cpu->irecv(my_children[i], &tmp_buf[i], handle)) { crReturn(); }
00050 }
00051 while (!cpu->waitall()) { crReturn(); }
00052 }
00053
00054
00055 out_buf = in_buf;
00056 for (i = 0 ; i < num_children ; ++i) {
00057 out_buf += tmp_buf[i];
00058 }
00059 for (i = 0 ; i < (num_children / 8) + 1 ; ++i) {
00060 cpu->addBusyTime("100ns");
00061 crReturn();
00062 }
00063
00064
00065 if (my_root != my_id) {
00066 cpu->isend(my_root, &out_buf, 8);
00067 crReturn();
00068 while (!cpu->irecv(my_root, &out_buf, handle)) { crReturn(); }
00069 while (!cpu->waitall()) { crReturn(); }
00070 }
00071
00072
00073 for (i = 0 ; i < num_children ; ++i) {
00074 cpu->isend(my_children[i], &out_buf, 8);
00075 crReturn();
00076 }
00077
00078 crReturn();
00079 trig_cpu::addTimeToStats(cpu->getCurrentSimTimeNano() - start_time);
00080
00081 assert(out_buf == (uint64_t) cpu->getNumNodes());
00082
00083 crFinish();
00084 return true;
00085 }
00086
00087 private:
00088 allreduce_tree();
00089 allreduce_tree(const application& a);
00090 void operator=(allreduce_tree const&);
00091
00092 SimTime_t start_time;
00093 int radix;
00094 int i;
00095 int handle;
00096
00097 uint64_t in_buf, out_buf;
00098 uint64_t *tmp_buf;
00099
00100 int my_root;
00101 std::vector<int> my_children;
00102 int num_children;
00103 };
00104
00105 #endif // COMPONENTS_TRIG_CPU_ALLREDUCE_TREE_H