• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

sst/elements/portals4_sm/trig_cpu/apps/allreduce_recdbl_trig.h

00001 // Copyright 2009-2010 Sandia Corporation. Under the terms
00002 // of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S.
00003 // Government retains certain rights in this software.
00004 // 
00005 // Copyright (c) 2009-2010, Sandia Corporation
00006 // All rights reserved.
00007 // 
00008 // This file is part of the SST software package. For license
00009 // information, see the LICENSE file in the top level directory of the
00010 // distribution.
00011 
00012 
00013 #ifndef COMPONENTS_TRIG_CPU_ALLREDUCE_RECDBL_TRIGGERED_H
00014 #define COMPONENTS_TRIG_CPU_ALLREDUCE_RECDBL_TRIGGERED_H
00015 
00016 #include "sst/elements/portals4_sm/trig_cpu/application.h"
00017 #include "sst/elements/portals4_sm/trig_cpu/trig_cpu.h"
00018 #include "sst/elements/portals4_sm/trig_cpu/portals.h"
00019 
00020 class allreduce_recdbl_triggered :  public application {
00021 public:
00022     allreduce_recdbl_triggered(trig_cpu *cpu) : application(cpu), init(false), algo_count(0)
00023     {
00024         ptl = cpu->getPortalsHandle();
00025 
00026         in_buf = 1;
00027         out_buf = 0;
00028         zero_buf = 0;
00029     }
00030 
00031     bool
00032     operator()(Event *ev)
00033     {
00034         ptl_md_t md;
00035         ptl_me_t me;
00036 
00037         crBegin();
00038 
00039         if (!init) {
00040             my_levels = -1;
00041             for (adj = 0x1; adj <= num_nodes ; adj  <<= 1) { my_levels++; } adj = adj >> 1;
00042             if (adj != num_nodes) {
00043                 printf("recursive_doubling requires power of 2 nodes (%d)\n",
00044                        num_nodes);
00045                 exit(1);
00046             }
00047 
00048             my_level_steps.resize(my_levels);
00049             my_level_ct_hs.resize(my_levels);
00050             my_level_me_hs.resize(my_levels);
00051             my_level_md_hs.resize(my_levels);
00052 
00053             for (i = 0 ; i < my_levels ; ++i) {
00054                 my_level_steps[i] = 0;
00055                 ptl->PtlCTAlloc(PTL_CT_OPERATION, my_level_ct_hs[i]);
00056 
00057                 me.start = &my_level_steps[i];
00058                 me.length = 8;
00059                 me.match_bits = i;
00060                 me.ignore_bits = 0;
00061                 me.ct_handle = my_level_ct_hs[i];
00062                 ptl->PtlMEAppend(0, me, PTL_PRIORITY_LIST, NULL, 
00063                                  my_level_me_hs[i]);
00064 
00065                 md.start = &my_level_steps[i];
00066                 me.length = 8;
00067                 md.eq_handle = PTL_EQ_NONE;
00068                 md.ct_handle = PTL_CT_NONE;
00069                 ptl->PtlMDBind(md, &my_level_md_hs[i]);
00070             }
00071 
00072             md.start = &zero_buf;
00073             md.length = 8;
00074             md.eq_handle = PTL_EQ_NONE;
00075             md.ct_handle = PTL_CT_NONE;
00076             ptl->PtlMDBind(md, &zero_md_h);
00077 
00078             init = true;
00079             crReturn();
00080             start_noise_section();
00081         }
00082 
00083         crReturn();
00084         // 200ns startup time
00085         start_time = cpu->getCurrentSimTimeNano();
00086         cpu->addBusyTime("200ns");
00087         crReturn();
00088 
00089         out_buf = in_buf;
00090 
00091         // Create description of user buffer.  We can't possibly have
00092         // a result to need this information before we add our portion
00093         // to the result, so this doesn't need to be persistent.
00094         ptl->PtlCTAlloc(PTL_CT_OPERATION, user_ct_h);
00095         me.start = &out_buf;
00096         me.length = 8;
00097         me.ignore_bits = ~0x0;
00098         me.ct_handle = user_ct_h;
00099         ptl->PtlMEAppend(1, me, PTL_PRIORITY_LIST, NULL, user_me_h);
00100 
00101         md.start = &out_buf;
00102         md.length = 8;
00103         md.eq_handle = PTL_EQ_NONE;
00104         md.ct_handle = PTL_CT_NONE;
00105         ptl->PtlMDBind(md, &user_md_h);
00106 
00107 /*         ptl->PtlEnableCoalesce(); */
00108 /*         crReturn(); */
00109 
00110         // start the trip
00111         ptl->PtlAtomic(user_md_h, 0, 8, 0, my_id, 0, 0, 0, NULL, 0, PTL_SUM, PTL_LONG);
00112         crReturn();
00113         ptl->PtlAtomic(user_md_h, 0, 8, 0, my_id ^ 0x1, 0, 0, 0, NULL, 0, PTL_SUM, PTL_LONG);
00114         crReturn();
00115 
00116         ptl->PtlEnableCoalesce();
00117         crReturn();
00118 
00119         for (i = 1 ; i < my_levels ; ++i) {
00120             next_level = 0x1 << i;
00121             remote = my_id ^ next_level;
00122             ptl->PtlTriggeredAtomic(my_level_md_hs[i - 1], 0, 8, 0, my_id, 0,
00123                                     i, 0, NULL, 0, PTL_SUM, PTL_LONG,
00124                                     my_level_ct_hs[i - 1], algo_count * 3 + 2);
00125             crReturn();
00126             ptl->PtlTriggeredAtomic(my_level_md_hs[i - 1], 0, 8, 0, remote, 0,
00127                                     i, 0, NULL, 0, PTL_SUM, PTL_LONG,
00128                                     my_level_ct_hs[i - 1], algo_count * 3 + 2);
00129             crReturn();
00130             ptl->PtlTriggeredAtomic(zero_md_h, 0, 8, 0, my_id, 0, 
00131                                     i - 1, 0, NULL, 0, PTL_LAND, PTL_LONG,
00132                                     my_level_ct_hs[i - 1], algo_count * 3 + 2);
00133             crReturn();
00134         }
00135 
00136         // copy into user buffer
00137         ptl->PtlTriggeredPut(my_level_md_hs[my_levels - 1], 0, 8, 0, my_id, 1,
00138                              0, 0, NULL, 0, my_level_ct_hs[my_levels - 1], algo_count * 3 + 2);
00139         crReturn();
00140         ptl->PtlTriggeredAtomic(zero_md_h, 0, 8, 0, my_id, 0, 
00141                                 my_levels - 1, 0, NULL, 0, PTL_LAND, PTL_LONG,
00142                                 my_level_ct_hs[my_levels - 1], algo_count * 3 + 2);
00143         crReturn();
00144 
00145         ptl->PtlDisableCoalesce();
00146         crReturn();
00147 
00148         while (!ptl->PtlCTWait(user_ct_h, 1)) { crReturn(); }
00149         while (!ptl->PtlCTWait(my_level_ct_hs[my_levels - 1], algo_count * 3 + 3)) { crReturn(); }
00150 
00151         ptl->PtlMEUnlink(user_me_h);
00152         crReturn();
00153         ptl->PtlCTFree(user_ct_h);
00154         crReturn();
00155         algo_count++;
00156 /*      printf("%5d: %lld ns\n",my_id,cpu->getCurrentSimTimeNano()-start_time); */
00157         trig_cpu::addTimeToStats(cpu->getCurrentSimTimeNano()-start_time);
00158 
00159         if (out_buf != (uint64_t) num_nodes) {
00160             printf("%05d: got %lu, expected %lu\n",
00161                    my_id, (unsigned long) out_buf, (unsigned long) num_nodes);
00162         }
00163 
00164         crFinish();
00165         return true;
00166     }
00167 
00168 private:
00169     allreduce_recdbl_triggered();
00170     allreduce_recdbl_triggered(const application& a);
00171     void operator=(allreduce_recdbl_triggered const&);
00172 
00173     portals *ptl;
00174     SimTime_t start_time;
00175     int i;
00176     int my_levels;
00177     bool init;
00178 
00179     std::vector<uint64_t> my_level_steps;
00180     std::vector<ptl_handle_ct_t> my_level_ct_hs;
00181     std::vector<ptl_handle_me_t> my_level_me_hs;
00182     std::vector<ptl_handle_md_t> my_level_md_hs;
00183 
00184     ptl_handle_ct_t user_ct_h;
00185     ptl_handle_me_t user_me_h;
00186     ptl_handle_md_t user_md_h;
00187 
00188     ptl_handle_md_t zero_md_h;
00189 
00190     int adj;
00191     int next_level;
00192     int remote;
00193 
00194     uint64_t in_buf, out_buf, tmp_buf, zero_buf;
00195 
00196     uint64_t algo_count;
00197 };
00198 
00199 #endif // COMPONENTS_TRIG_CPU_ALLREDUCE_RECDBL_TRIGGERED_H

Generated on Fri Oct 22 2010 11:02:25 for SST by  doxygen 1.7.1