• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

sst/elements/portals4_sm/trig_cpu/apps/allreduce_tree_trig.h

00001 // Copyright 2009-2010 Sandia Corporation. Under the terms
00002 // of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S.
00003 // Government retains certain rights in this software.
00004 // 
00005 // Copyright (c) 2009-2010, Sandia Corporation
00006 // All rights reserved.
00007 // 
00008 // This file is part of the SST software package. For license
00009 // information, see the LICENSE file in the top level directory of the
00010 // distribution.
00011 
00012 
00013 #ifndef COMPONENTS_TRIG_CPU_ALLREDUCE_TREE_TRIGGERED_H
00014 #define COMPONENTS_TRIG_CPU_ALLREDUCE_TREE_TRIGGERED_H
00015 
00016 #include "sst/elements/portals4_sm/trig_cpu/application.h"
00017 #include "sst/elements/portals4_sm/trig_cpu/trig_cpu.h"
00018 #include "sst/elements/portals4_sm/trig_cpu/portals.h"
00019 
00020 class allreduce_tree_triggered :  public application {
00021 public:
00022     allreduce_tree_triggered(trig_cpu *cpu, bool nary) : application(cpu), init(false), algo_count(0)
00023     {
00024         radix = cpu->getRadix();
00025         ptl = cpu->getPortalsHandle();
00026 
00027         if (nary) {
00028             boost::tie(my_root, my_children) = buildNaryTree(radix);
00029         } else {
00030             boost::tie(my_root, my_children) = buildBinomialTree(radix);
00031         }
00032         num_children = my_children.size();
00033 
00034         in_buf = 1;
00035         out_buf = 0;
00036         tmp_buf = 0;
00037         zero_buf = 0;
00038     }
00039 
00040     bool
00041     operator()(Event *ev)
00042     {
00043         ptl_md_t md;
00044         ptl_me_t me;
00045 
00046         crBegin();
00047 
00048         if (!init) {
00049             // setup md handles
00050             ptl->PtlCTAlloc(PTL_CT_OPERATION, up_tree_ct_h);
00051             me.start = &tmp_buf;
00052             me.length = 8;
00053             me.ignore_bits = ~0x0;
00054             me.ct_handle = up_tree_ct_h;
00055             ptl->PtlMEAppend(PT_UP, me, PTL_PRIORITY_LIST, NULL, up_tree_me_h);
00056 
00057             md.start = &tmp_buf;
00058             md.length = 8;
00059             md.eq_handle = PTL_EQ_NONE;
00060             md.ct_handle = PTL_CT_NONE;
00061             ptl->PtlMDBind(md, &up_tree_md_h);
00062 
00063             md.start = &zero_buf;
00064             md.length = 8;
00065             md.eq_handle = PTL_EQ_NONE;
00066             md.ct_handle = PTL_CT_NONE;
00067             ptl->PtlMDBind(md, &zero_md_h);
00068 
00069             init = true;
00070             crReturn();
00071             start_noise_section();
00072         }
00073 
00074         // 200ns startup time
00075         start_time = cpu->getCurrentSimTimeNano();
00076         cpu->addBusyTime("200ns");
00077         crReturn();
00078 
00079         // Create description of user buffer.  We can't possibly have
00080         // a result to need this information before we add our portion
00081         // to the result, so this doesn't need to be persistent.
00082         ptl->PtlCTAlloc(PTL_CT_OPERATION, user_ct_h);
00083         crReturn();
00084         me.start = &out_buf;
00085         me.length = 8;
00086         me.ignore_bits = ~0x0;
00087         me.ct_handle = user_ct_h;
00088         ptl->PtlMEAppend(PT_DOWN, me, PTL_PRIORITY_LIST, NULL, user_me_h);
00089         crReturn();
00090 
00091         if (num_children > 0) {
00092             md.start = &out_buf;
00093             md.length = 8;
00094             md.eq_handle = PTL_EQ_NONE;
00095             md.ct_handle = PTL_CT_NONE;
00096             ptl->PtlMDBind(md, &user_md_h);
00097             crReturn();
00098         }
00099 
00100         md.start = &in_buf;
00101         md.length = 8;
00102         md.eq_handle = PTL_EQ_NONE;
00103         md.ct_handle = PTL_CT_NONE;
00104         ptl->PtlMDBind(md, &user_in_md_h);
00105         crReturn();
00106 
00107         ptl->PtlEnableCoalesce();
00108         crReturn();
00109 
00110         if (num_children == 0) {
00111             // leaf node - push directly to the upper level's up tree
00112             ptl->PtlAtomic(user_in_md_h, 0, 8, 0, my_root, PT_UP, 0, 0, NULL, 0, PTL_SUM, PTL_LONG);
00113             crReturn();
00114         } else {
00115             // add our portion to the mix
00116             ptl->PtlAtomic(user_in_md_h, 0, 8, 0, my_id, PT_UP, 0, 0, NULL, 
00117                            0, PTL_SUM, PTL_LONG);
00118             crReturn();
00119             if (my_root == my_id) {
00120                 // setup trigger to move data to right place, then send
00121                 // data out of there down the tree
00122                 ptl->PtlTriggeredPut(up_tree_md_h, 0, 8, 0, my_id, PT_DOWN, 0, 0, NULL, 
00123                                      0, up_tree_ct_h, (algo_count * (num_children + 2)) + num_children + 1);
00124                 crReturn();
00125             } else {
00126                 // setup trigger to move data up the tree when we get enough updates
00127                 ptl->PtlTriggeredAtomic(up_tree_md_h, 0, 8, 0, my_root, PT_UP,
00128                                         0, 0, NULL, 0, PTL_SUM, PTL_LONG,
00129                                         up_tree_ct_h, (algo_count * (num_children + 2)) + num_children + 1);
00130                 crReturn();
00131             }
00132 
00133             // and to clean up after ourselves
00134             ptl->PtlTriggeredAtomic(zero_md_h, 0, 8, 0, my_id, PT_UP, 0, 0, NULL, 
00135                                     0, PTL_LAND, PTL_LONG, 
00136                                     up_tree_ct_h, (algo_count * (num_children + 2)) + num_children + 1);
00137             crReturn();
00138 
00139             // push down the tree
00140             for (i = 0 ; i < num_children ; ++i) {
00141                 ptl->PtlTriggeredPut(user_md_h, 0, 8, 0, my_children[i], PT_DOWN,
00142                                      0, 0, NULL, 0, user_ct_h, 1);
00143                 crReturn();
00144             }
00145         }
00146 
00147         ptl->PtlDisableCoalesce();
00148         crReturn();
00149 
00150 /*      printf("waiting\n"); */
00151         while (!ptl->PtlCTWait(user_ct_h, 1)) { crReturn(); }
00152 /*      printf("done waiting\n"); */
00153         if (num_children > 0) {
00154             while (!ptl->PtlCTWait(up_tree_ct_h, ((algo_count + 1) * (num_children + 2)))) { crReturn(); }
00155         }
00156 
00157         ptl->PtlMEUnlink(user_me_h);
00158         crReturn();
00159         ptl->PtlCTFree(user_ct_h);
00160         crReturn();
00161         algo_count++;
00162         trig_cpu::addTimeToStats(cpu->getCurrentSimTimeNano()-start_time);
00163 
00164         if (out_buf != (uint64_t) num_nodes) {
00165             printf("%05d: got %lu, expected %lu\n",
00166                    my_id, (unsigned long) out_buf, (unsigned long) num_nodes);
00167         }
00168 
00169         crFinish();
00170         return true;
00171     }
00172 
00173 private:
00174     allreduce_tree_triggered();
00175     allreduce_tree_triggered(const application& a);
00176     void operator=(allreduce_tree_triggered const&);
00177 
00178     portals *ptl;
00179     SimTime_t start_time;
00180     int i;
00181     bool init;
00182     int radix;
00183 
00184     int my_root;
00185     std::vector<int> my_children;
00186     int num_children;
00187 
00188     uint64_t in_buf, out_buf, tmp_buf, zero_buf;
00189 
00190     ptl_handle_ct_t ct_handle;
00191 
00192     ptl_handle_ct_t up_tree_ct_h;
00193     ptl_handle_me_t up_tree_me_h;
00194     ptl_handle_md_t up_tree_md_h;
00195 
00196     ptl_handle_ct_t user_ct_h;
00197     ptl_handle_me_t user_me_h;
00198     ptl_handle_md_t user_md_h;
00199     ptl_handle_md_t user_in_md_h;
00200 
00201     ptl_handle_md_t zero_md_h;
00202 
00203     static const int PT_UP = 0;
00204     static const int PT_DOWN = 1;
00205 
00206     uint64_t algo_count;
00207 };
00208 
00209 #endif // COMPONENTS_TRIG_CPU_ALLREDUCE_TREE_TRIGGERED_H

Generated on Fri Oct 22 2010 11:02:25 for SST by  doxygen 1.7.1