00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef MCNIAGARA_H
00013 #define MCNIAGARA_H
00014
00015 #include <stdio.h>
00016 #include <McSimDefs.h>
00017 #include <OffCpuIF.h>
00018 #include <MemoryModel.h>
00019 #include <Dependency.h>
00020 #include <CycleTracker.h>
00021
00022
00023 #define LD_LD_HIST_LENGTH 513
00024 #define ST_ST_HIST_LENGTH 513
00025 #define FP_FP_HIST_LENGTH 513
00026 #define INT_INT_HIST_LENGTH 513
00027 #define BR_BR_HIST_LENGTH 513
00028 #define ST_LD_HIST_LENGTH 513
00029 #define LD_USE_HIST_LENGTH 513 // load-to-use histogram
00030 #define INT_USE_HIST_LENGTH 513 // int_prodcucer-to-use histogram
00031 #define FP_USE_HIST_LENGTH 513 // fp-to-use histogram
00032
00033 #define MAX_STB_ENTRIES 8
00034
00035
00036 #define MAX_INST (2*TOTAL_INSTS)
00037
00038
00039
00040 #define THRESHOLD 1.0E-3
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054 #define FGU_LATENCY 6
00055
00056 // Documentation states 6-cycle, but micro-benchmarks
00057
00058 #define BRANCH_MISS_PENALTY 7
00059
00060
00061
00062
00063 #define L1_LATENCY 3
00064
00065
00066
00067
00068
00069 #define L2_LATENCY 20
00070
00071
00072
00073 #define MEM_LATENCY 176
00074
00075
00076
00077 #define TLB_LATENCY 190
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087 class McNiagara {
00088
00089
00090 struct ModelParam {
00091 char name[32];
00092 union {
00093 unsigned long long lval;
00094 double dval;
00095 } v;
00096 };
00097
00098 ModelParam performanceCtr[20];
00099 ModelParam instructionProb[30];
00100
00101 enum InstructionProbIDs {
00102 PB_6_CTI_N, PB_6_INT_N, PB_25_INT_N, PB_6_FGU_N, PB_30_FGU_N,
00103 PB_6_MEM_N, PB_25_MEM_N, PB_3_LD_N, P_FDIV_FSQRT_S_N,
00104 P_FDIV_FSQRT_D_N, PB_6_INT_D_N, PB_25_INT_D_N, PB_6_FGU_D_N,
00105 PB_30_FGU_D_N, PB_6_MEM_D_N, PB_25_MEM_D_N, PB_3_LD_D_N,
00106 P_FDIV_FSQRT_S_D_N, P_FDIV_FSQRT_D_D_N, P_FDIV_FSQRT_S,
00107 P_FDIV_FSQRT_D, P_DS, DELAY_SLOT_N, ANNULLED_N, D_LOADS,
00108 D_STORES, D_FLOATS, D_INTS, NUM_INSTPROBS
00109 };
00110
00111
00112 static const char* instructionProbNames[NUM_INSTPROBS+1];
00113
00114 enum PerformanceCtrIDs {
00115 TOTAL_CYCLES, L2_MISSES, L2_I_MISSES, L1_MISSES, IC_MISSES,
00116 TLB_MISSES, ITLB_MISSES, TAKEN_BRS, TOTAL_INSTS, MEASURED_CPI,
00117 TOTAL_LDS, TOTAL_STS, TOTAL_FPS, TOTAL_BRS, LD_PERC, ST_PERC,
00118 BR_PERC, FP_PERC, GR_PERC, NUM_PERFCTRS
00119 };
00120
00121
00122
00123 static const char* performanceCtrNames[NUM_PERFCTRS+1];
00124
00125
00126
00127
00128
00129 enum InstructionType {
00130 I_LOAD, I_STORE, I_BRANCH, I_GRPROD, I_FLOAT, I_NOP, I_NUMTYPES
00131 };
00132
00133 double iTypeProbCDF[I_NUMTYPES];
00134
00135 double iTypeDelaySlotProbCDF[I_NUMTYPES];
00136
00137
00138 enum LoadCategory {
00139 PB_6_MEM, PB_25_MEM, PB_3_LD, OTHER_LD, NUMLOADCATS
00140 };
00141
00142 double loadCatProbCDF[NUMLOADCATS];
00143
00144 double loadCatDelaySlotProbCDF[NUMLOADCATS];
00145
00146
00147 enum IntCategory {
00148 PB_6_FGU, PB_30_FGU, PB_6_INT, PB_25_INT, OTHER_INT, NUMINTCATS
00149 };
00150
00151 double intCatProbCDF[NUMINTCATS];
00152
00153 double intCatDelaySlotProbCDF[NUMINTCATS];
00154
00155
00156 enum FloatCategory {
00157 FDIV_FSQRT_S, FDIV_FSQRT_D, OTHER_FLOAT, NUMFLOATCATS
00158 };
00159
00160 double floatCatProbCDF[NUMFLOATCATS];
00161
00162 double floatCatDelaySlotProbCDF[NUMFLOATCATS];
00163
00164
00165 struct Token {
00166 InstructionType type;
00167 union {
00168 LoadCategory l;
00169 IntCategory i;
00170 FloatCategory f;
00171 unsigned int v;
00172 } category;
00173 double optProb;
00174 bool inDelaySlot;
00175 };
00176
00177
00178
00179
00180
00181
00182 CycleTracker cycleTracker;
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195 struct dependency *dc_head;
00196 ***/
00197 DependencyTracker depTracker;
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207 struct memory_queue *mq_head;
00208 struct memory_queue *last_store;
00209 // How many loads/stores are currently in memory queue
00210 int ld_in_q, st_in_q;
00211 ***/
00212 MemoryModel memModel;
00213
00214
00215
00216 double ld_ld_hist[LD_LD_HIST_LENGTH],
00217 st_st_hist[ST_ST_HIST_LENGTH],
00218 fp_fp_hist[FP_FP_HIST_LENGTH],
00219 int_int_hist[INT_INT_HIST_LENGTH];
00220 double br_br_hist[BR_BR_HIST_LENGTH],
00221 st_ld_hist[ST_LD_HIST_LENGTH];
00222
00223
00224 double ld_use_hist[LD_USE_HIST_LENGTH],
00225 int_use_hist[INT_USE_HIST_LENGTH],
00226 fp_use_hist[FP_USE_HIST_LENGTH];
00227
00228 double P1,
00229 P2,
00230 PM,
00231 PT,
00232 PG,
00233 PF,
00234 PBM,
00235 P_SP,
00236 PBR,
00237 PLD,
00238 PST;
00239
00240
00241 double P_1,
00242 P_2,
00243 P_3,
00244 P_4,
00245 P_5,
00246 P_6,
00247 P_7,
00248 P_8,
00249 P_9,
00250 P_10,
00251 P_11,
00252 P_12,
00253 P_13;
00254
00255
00256
00257 double PLD_D,
00258 PST_D,
00259 PF_D,
00260 PG_D;
00261
00262
00263 FILE *outf;
00264 double cycles;
00265
00266 double probLoadFromSTB;
00267
00268
00269
00270
00271
00272 unsigned long long n_loads,
00273 n_stores,
00274 n_memops,
00275 n_branches,
00276 n_miss_branches;
00277 unsigned long long n_l1,
00278 n_l2,
00279 n_mem,
00280 n_tlb,
00281 n_gr_produced,
00282 n_fr_produced;
00283 unsigned long long n_pipe_flushes,
00284 n_icache_misses,
00285 n_stb_full,
00286 n_stb_reads,
00287
00288 last_fdiv;
00289
00290 double total_stores,
00291 total_loads,
00292 total_gr_producers,
00293 total_fr_producers,
00294 total_int,
00295 total_fp,
00296 total_br,
00297 total_instructions;
00298
00299
00300 double ic_p2,
00301 ic_pm,
00302 i_miss,
00303 ic_p1,
00304 ITLB_P,
00305 istall;
00306
00307
00308 OffCpuIF *external_if;
00309
00310 FILE *tracef;
00311
00312 bool Debug;
00313
00314 unsigned long long tot_insns,
00315 tot_delayslot_insns;
00316
00317
00318
00319 double CPIi;
00320
00321
00322 public:
00323 bool convergence;
00324 bool traceEnded;
00325 McNiagara(void);
00326 void init(const char *in_file, OffCpuIF * extif, const char *instProbFile,
00327 const char *perfCountFile, const char *tracefile = 0,
00328 unsigned long seed=0);
00329 void un_init(void);
00330 int sim_cycle(unsigned long current_cycle);
00331 int generate_instruction(Token * token);
00332 int sim_instruction(Token * token);
00333 int fini(const char *outfile);
00334
00335
00336 private:
00337 double make_cdf(double *buf, int length, int ignore_last_n, FILE * inf);
00338 int sample_hist(double *hist, int hist_length);
00339 int diff(double pred, unsigned long long real, int flag);
00340 void sanity_check(void);
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354 int read_paramfile(const char *filename, ModelParam params[],
00355 const char *paramnames[]);
00356
00357 };
00358
00359 #endif