• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

sst/elements/portals4_sm/trig_cpu/apps/bcast_trig.h

00001 // Copyright 2009-2010 Sandia Corporation. Under the terms
00002 // of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S.
00003 // Government retains certain rights in this software.
00004 // 
00005 // Copyright (c) 2009-2010, Sandia Corporation
00006 // All rights reserved.
00007 // 
00008 // This file is part of the SST software package. For license
00009 // information, see the LICENSE file in the top level directory of the
00010 // distribution.
00011 
00012 
00013 #ifndef COMPONENTS_TRIG_CPU_BCAST_TREE_TRIGGERED_H
00014 #define COMPONENTS_TRIG_CPU_BCAST_TREE_TRIGGERED_H
00015 
00016 #include "sst/elements/portals4_sm/trig_cpu/application.h"
00017 #include "sst/elements/portals4_sm/trig_cpu/trig_cpu.h"
00018 #include "sst/elements/portals4_sm/trig_cpu/portals.h"
00019 #include <string.h>                    // for memcpy()
00020 
00021 class bcast_tree_triggered :  public application {
00022 public:
00023     bcast_tree_triggered(trig_cpu *cpu) : application(cpu), init(false), algo_count(0)
00024     {
00025         radix = cpu->getRadix();
00026         ptl = cpu->getPortalsHandle();
00027 
00028         msg_size = cpu->getMessageSize();
00029         chunk_size = cpu->getChunkSize();
00030 
00031         // compute my root and children
00032         boost::tie(my_root, my_children) = buildBinomialTree(radix);
00033         num_children = my_children.size();
00034 
00035         if (my_id == my_root) {
00036             in_buf = (char*) malloc(msg_size);
00037             for (i = 0 ; i < msg_size ; ++i) {
00038                 in_buf[i] = i % 255;
00039             }
00040         } else {
00041             in_buf = NULL;
00042         }
00043         out_buf = (char*) malloc(msg_size);
00044         memset(out_buf, 0, msg_size);
00045         bounce_buf = (char*) malloc(chunk_size);
00046     }
00047 
00048     bool
00049     operator()(Event *ev)
00050     {
00051         ptl_md_t md;
00052         ptl_me_t me;
00053 
00054         crBegin();
00055 
00056         if (!init) {
00057             /* Setup persistent ME/MD/CT to hold bounce data */
00058             ptl->PtlCTAlloc(PTL_CT_OPERATION, bounce_ct_h);
00059             me.start = bounce_buf;
00060             me.length = chunk_size;
00061             me.match_bits = 0x0;
00062             me.ignore_bits = 0x0;
00063             me.ct_handle = bounce_ct_h;
00064             ptl->PtlMEAppend(PT_BOUNCE, me, PTL_PRIORITY_LIST, NULL, bounce_me_h);
00065 
00066             md.start = bounce_buf;
00067             md.length = chunk_size;
00068             md.eq_handle = PTL_EQ_NONE;
00069             md.ct_handle = PTL_CT_NONE;
00070             ptl->PtlMDBind(md, &bounce_md_h);
00071 
00072             init = true;
00073             crReturn();
00074             start_noise_section();
00075         }
00076 
00077         /* Initialization case */
00078         // 200ns startup time
00079         start_time = cpu->getCurrentSimTimeNano();
00080         cpu->addBusyTime("200ns");
00081         crReturn();
00082 
00083         // Create description of user buffer.
00084         ptl->PtlCTAlloc(PTL_CT_OPERATION, out_me_ct_h);
00085         crReturn();
00086         me.start = out_buf;
00087         me.length = msg_size;
00088         me.match_bits = 0x0;
00089         me.ignore_bits = 0x0;
00090         me.ct_handle = out_me_ct_h;
00091         ptl->PtlMEAppend(PT_OUT, me, PTL_PRIORITY_LIST, NULL, out_me_h);
00092         crReturn();
00093 
00094         ptl->PtlCTAlloc(PTL_CT_OPERATION, out_md_ct_h);
00095         crReturn();
00096         md.start = out_buf;
00097         md.length = msg_size;
00098         md.eq_handle = PTL_EQ_NONE;
00099         md.ct_handle = out_md_ct_h;
00100         ptl->PtlMDBind(md, &out_md_h);
00101         crReturn();
00102 
00103         ptl->PtlEnableCoalesce();
00104         crReturn();
00105 
00106         /* long protocol only for now */
00107         if (my_id == my_root) {
00108             /* copy to self */
00109             memcpy(out_buf, in_buf, msg_size);
00110             /* send to children */
00111             for (j = 0 ; j < msg_size ; j += chunk_size) {
00112 /*              ptl->PtlStartTriggeredPutV(num_children); */
00113                 for (i = 0 ; i < num_children ; ++i) {
00114                     ptl->PtlPut(bounce_md_h, 0, 0, 0, my_children[i],
00115                                 PT_BOUNCE, 0x0, 0, NULL, 0);
00116 /*                     ptl->PtlTriggeredPut(bounce_md_h, 0, 0, 0, my_children[i], */
00117 /*                                       PT_BOUNCE, 0x0, 0, NULL, 0, 0, 0); */
00118                     crReturn();
00119                 }
00120 /*              ptl->PtlEndTriggeredPutV(); */
00121 /*              crReturn(); */
00122             }
00123             
00124         } else {
00125             for (j = 0 ; j < msg_size ; j += chunk_size) {
00126                 /* when a chunk is ready, issue get. */
00127                 comm_size = (msg_size - j > chunk_size) ? 
00128                     chunk_size : msg_size - j;
00129                 ptl->PtlTriggeredGet(out_md_h, j, comm_size, my_root,
00130                                      PT_OUT, 0x0, NULL, j, bounce_ct_h,
00131                                      algo_count + j / chunk_size  + 1);
00132                 crReturn();
00133 
00134                 /* then when the get is completed, send ready acks to children */
00135                 ptl->PtlStartTriggeredPutV(num_children);
00136                 for (i = 0 ; i < num_children ; ++i) {
00137                     ptl->PtlTriggeredPut(bounce_md_h, 0, 0, 0, my_children[i],
00138                                          PT_BOUNCE, 0x0, 0, NULL, 0, out_md_ct_h,
00139                                          j / chunk_size  + 1);
00140                     crReturn();
00141                 }
00142                 ptl->PtlEndTriggeredPutV();
00143                 crReturn();
00144             }
00145 
00146             /* reset 0-byte put received counter */
00147             count = (msg_size + chunk_size - 1) / chunk_size;
00148             algo_count += count;
00149         }
00150 
00151         ptl->PtlDisableCoalesce();
00152         crReturn();
00153 
00154         if (num_children > 0) {
00155             /* wait for completion */
00156             count = num_children * ((msg_size + chunk_size - 1) / chunk_size);
00157             while (!ptl->PtlCTWait(out_me_ct_h, count)) { crReturn(); }
00158         } else {
00159             /* wait for local gets to complete */
00160             count = (msg_size + chunk_size - 1) / chunk_size;
00161             while (!ptl->PtlCTWait(out_md_ct_h, count)) { crReturn(); }
00162         }
00163         crReturn();
00164 
00165         ptl->PtlCTFree(out_me_ct_h);
00166         crReturn();
00167         ptl->PtlMEUnlink(out_me_h);
00168         crReturn();
00169         ptl->PtlCTFree(out_md_ct_h);
00170         crReturn();
00171         ptl->PtlMDRelease(out_md_h);
00172         crReturn();
00173 
00174         trig_cpu::addTimeToStats(cpu->getCurrentSimTimeNano()-start_time);
00175 
00176         {
00177             int bad = 0;
00178             for (i = 0 ; i < msg_size ; ++i) {
00179                 if ((out_buf[i] & 0xff) != i % 255) bad++;
00180             }
00181             if (bad) printf("%5d: bad results: %d\n",my_id,bad);
00182         }
00183 
00184         crFinish();
00185         return true;
00186     }
00187 
00188 private:
00189     bcast_tree_triggered();
00190     bcast_tree_triggered(const application& a);
00191     void operator=(bcast_tree_triggered const&);
00192 
00193     bool init;
00194     portals *ptl;
00195 
00196     SimTime_t start_time;
00197     int radix;
00198     int i, j;
00199 
00200     int msg_size;
00201     int chunk_size;
00202     int comm_size;
00203     int count;
00204 
00205     char *in_buf;
00206     char *out_buf;
00207     char *bounce_buf;
00208     
00209     ptl_handle_ct_t bounce_ct_h; /* short (me), long (me) */
00210     ptl_handle_me_t bounce_me_h; /* short, long */
00211     ptl_handle_md_t bounce_md_h; /* short, long */
00212 
00213     ptl_handle_ct_t out_me_ct_h; /* short (me), long (me) */
00214     ptl_handle_me_t out_me_h; /* short, long */
00215     ptl_handle_ct_t out_md_ct_h; /* long (md) */
00216     ptl_handle_md_t out_md_h; /* short, long */
00217 
00218     ptl_handle_ct_t ack_ct_h; /* short (me) */
00219     ptl_handle_me_t ack_me_h; /* short */
00220     ptl_handle_md_t ack_md_h; /* short */
00221 
00222     int my_root;
00223     std::vector<int> my_children;
00224     int num_children;
00225 
00226     static const int PT_BOUNCE = 0;
00227     static const int PT_ACK    = 1;
00228     static const int PT_OUT    = 2;
00229 
00230     uint64_t algo_count;
00231 };
00232 
00233 #endif // COMPONENTS_TRIG_CPU_BCAST_TREE_TRIGGERED_H

Generated on Fri Oct 22 2010 11:02:25 for SST by  doxygen 1.7.1