00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef COMPONENTS_TRIG_CPU_BCAST_TREE_TRIGGERED_H
00014 #define COMPONENTS_TRIG_CPU_BCAST_TREE_TRIGGERED_H
00015
00016 #include "sst/elements/portals4_sm/trig_cpu/application.h"
00017 #include "sst/elements/portals4_sm/trig_cpu/trig_cpu.h"
00018 #include "sst/elements/portals4_sm/trig_cpu/portals.h"
00019 #include <string.h>
00020
00021 class bcast_tree_triggered : public application {
00022 public:
00023 bcast_tree_triggered(trig_cpu *cpu) : application(cpu), init(false), algo_count(0)
00024 {
00025 radix = cpu->getRadix();
00026 ptl = cpu->getPortalsHandle();
00027
00028 msg_size = cpu->getMessageSize();
00029 chunk_size = cpu->getChunkSize();
00030
00031
00032 boost::tie(my_root, my_children) = buildBinomialTree(radix);
00033 num_children = my_children.size();
00034
00035 if (my_id == my_root) {
00036 in_buf = (char*) malloc(msg_size);
00037 for (i = 0 ; i < msg_size ; ++i) {
00038 in_buf[i] = i % 255;
00039 }
00040 } else {
00041 in_buf = NULL;
00042 }
00043 out_buf = (char*) malloc(msg_size);
00044 memset(out_buf, 0, msg_size);
00045 bounce_buf = (char*) malloc(chunk_size);
00046 }
00047
00048 bool
00049 operator()(Event *ev)
00050 {
00051 ptl_md_t md;
00052 ptl_me_t me;
00053
00054 crBegin();
00055
00056 if (!init) {
00057
00058 ptl->PtlCTAlloc(PTL_CT_OPERATION, bounce_ct_h);
00059 me.start = bounce_buf;
00060 me.length = chunk_size;
00061 me.match_bits = 0x0;
00062 me.ignore_bits = 0x0;
00063 me.ct_handle = bounce_ct_h;
00064 ptl->PtlMEAppend(PT_BOUNCE, me, PTL_PRIORITY_LIST, NULL, bounce_me_h);
00065
00066 md.start = bounce_buf;
00067 md.length = chunk_size;
00068 md.eq_handle = PTL_EQ_NONE;
00069 md.ct_handle = PTL_CT_NONE;
00070 ptl->PtlMDBind(md, &bounce_md_h);
00071
00072 init = true;
00073 crReturn();
00074 start_noise_section();
00075 }
00076
00077
00078
00079 start_time = cpu->getCurrentSimTimeNano();
00080 cpu->addBusyTime("200ns");
00081 crReturn();
00082
00083
00084 ptl->PtlCTAlloc(PTL_CT_OPERATION, out_me_ct_h);
00085 crReturn();
00086 me.start = out_buf;
00087 me.length = msg_size;
00088 me.match_bits = 0x0;
00089 me.ignore_bits = 0x0;
00090 me.ct_handle = out_me_ct_h;
00091 ptl->PtlMEAppend(PT_OUT, me, PTL_PRIORITY_LIST, NULL, out_me_h);
00092 crReturn();
00093
00094 ptl->PtlCTAlloc(PTL_CT_OPERATION, out_md_ct_h);
00095 crReturn();
00096 md.start = out_buf;
00097 md.length = msg_size;
00098 md.eq_handle = PTL_EQ_NONE;
00099 md.ct_handle = out_md_ct_h;
00100 ptl->PtlMDBind(md, &out_md_h);
00101 crReturn();
00102
00103 ptl->PtlEnableCoalesce();
00104 crReturn();
00105
00106
00107 if (my_id == my_root) {
00108
00109 memcpy(out_buf, in_buf, msg_size);
00110
00111 for (j = 0 ; j < msg_size ; j += chunk_size) {
00112
00113 for (i = 0 ; i < num_children ; ++i) {
00114 ptl->PtlPut(bounce_md_h, 0, 0, 0, my_children[i],
00115 PT_BOUNCE, 0x0, 0, NULL, 0);
00116
00117
00118 crReturn();
00119 }
00120
00121
00122 }
00123
00124 } else {
00125 for (j = 0 ; j < msg_size ; j += chunk_size) {
00126
00127 comm_size = (msg_size - j > chunk_size) ?
00128 chunk_size : msg_size - j;
00129 ptl->PtlTriggeredGet(out_md_h, j, comm_size, my_root,
00130 PT_OUT, 0x0, NULL, j, bounce_ct_h,
00131 algo_count + j / chunk_size + 1);
00132 crReturn();
00133
00134
00135 ptl->PtlStartTriggeredPutV(num_children);
00136 for (i = 0 ; i < num_children ; ++i) {
00137 ptl->PtlTriggeredPut(bounce_md_h, 0, 0, 0, my_children[i],
00138 PT_BOUNCE, 0x0, 0, NULL, 0, out_md_ct_h,
00139 j / chunk_size + 1);
00140 crReturn();
00141 }
00142 ptl->PtlEndTriggeredPutV();
00143 crReturn();
00144 }
00145
00146
00147 count = (msg_size + chunk_size - 1) / chunk_size;
00148 algo_count += count;
00149 }
00150
00151 ptl->PtlDisableCoalesce();
00152 crReturn();
00153
00154 if (num_children > 0) {
00155
00156 count = num_children * ((msg_size + chunk_size - 1) / chunk_size);
00157 while (!ptl->PtlCTWait(out_me_ct_h, count)) { crReturn(); }
00158 } else {
00159
00160 count = (msg_size + chunk_size - 1) / chunk_size;
00161 while (!ptl->PtlCTWait(out_md_ct_h, count)) { crReturn(); }
00162 }
00163 crReturn();
00164
00165 ptl->PtlCTFree(out_me_ct_h);
00166 crReturn();
00167 ptl->PtlMEUnlink(out_me_h);
00168 crReturn();
00169 ptl->PtlCTFree(out_md_ct_h);
00170 crReturn();
00171 ptl->PtlMDRelease(out_md_h);
00172 crReturn();
00173
00174 trig_cpu::addTimeToStats(cpu->getCurrentSimTimeNano()-start_time);
00175
00176 {
00177 int bad = 0;
00178 for (i = 0 ; i < msg_size ; ++i) {
00179 if ((out_buf[i] & 0xff) != i % 255) bad++;
00180 }
00181 if (bad) printf("%5d: bad results: %d\n",my_id,bad);
00182 }
00183
00184 crFinish();
00185 return true;
00186 }
00187
00188 private:
00189 bcast_tree_triggered();
00190 bcast_tree_triggered(const application& a);
00191 void operator=(bcast_tree_triggered const&);
00192
00193 bool init;
00194 portals *ptl;
00195
00196 SimTime_t start_time;
00197 int radix;
00198 int i, j;
00199
00200 int msg_size;
00201 int chunk_size;
00202 int comm_size;
00203 int count;
00204
00205 char *in_buf;
00206 char *out_buf;
00207 char *bounce_buf;
00208
00209 ptl_handle_ct_t bounce_ct_h;
00210 ptl_handle_me_t bounce_me_h;
00211 ptl_handle_md_t bounce_md_h;
00212
00213 ptl_handle_ct_t out_me_ct_h;
00214 ptl_handle_me_t out_me_h;
00215 ptl_handle_ct_t out_md_ct_h;
00216 ptl_handle_md_t out_md_h;
00217
00218 ptl_handle_ct_t ack_ct_h;
00219 ptl_handle_me_t ack_me_h;
00220 ptl_handle_md_t ack_md_h;
00221
00222 int my_root;
00223 std::vector<int> my_children;
00224 int num_children;
00225
00226 static const int PT_BOUNCE = 0;
00227 static const int PT_ACK = 1;
00228 static const int PT_OUT = 2;
00229
00230 uint64_t algo_count;
00231 };
00232
00233 #endif // COMPONENTS_TRIG_CPU_BCAST_TREE_TRIGGERED_H