• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

sst/elements/mcniagara/McNiagara.h

00001 //
00002 // Definitions for Niagara MC simulator
00003 //
00004 /// @mainpage
00005 /// McNiagara is a Monte Carlo model and simulator of the 
00006 /// Niagara processor.
00007 ///
00008 /// @section intro Introduction
00009 /// This is an example of a main page section.
00010 ///
00011 
00012 #ifndef MCNIAGARA_H
00013 #define MCNIAGARA_H
00014 
00015 #include <stdio.h>
00016 #include <McSimDefs.h>
00017 #include <OffCpuIF.h>
00018 #include <MemoryModel.h>
00019 #include <Dependency.h>
00020 #include <CycleTracker.h>
00021 
00022 // defining histogram lengths
00023 #define  LD_LD_HIST_LENGTH     513
00024 #define  ST_ST_HIST_LENGTH     513
00025 #define  FP_FP_HIST_LENGTH     513
00026 #define  INT_INT_HIST_LENGTH   513
00027 #define  BR_BR_HIST_LENGTH     513
00028 #define  ST_LD_HIST_LENGTH     513
00029 #define  LD_USE_HIST_LENGTH    513      // load-to-use histogram
00030 #define  INT_USE_HIST_LENGTH   513      // int_prodcucer-to-use histogram
00031 #define  FP_USE_HIST_LENGTH    513      // fp-to-use histogram
00032 
00033 #define  MAX_STB_ENTRIES   8
00034 
00035 //--------- Simulation TERMINATION parameters
00036 #define MAX_INST      (2*TOTAL_INSTS)   /*  Twice the number of instructions.
00037                                            Of course, CPI will converge 
00038                                            before getting there
00039                                            But this is jsut in case!  */
00040 #define THRESHOLD      1.0E-3   /* Threshold on CPI variation tolerated */
00041 
00042 
00043 /************************
00044  Info on the T2 Processor
00045  each core has a 16KB 8-way icache 32 byte lines
00046  8KB 4-way 16byte line L1 cache
00047  64 entry iTLB & 128entry DTLB
00048  All 8 cores share a unified 4MB 16-way L2 cache with 64 byte lines
00049  ************************/
00050 
00051 //--------- Define latencies in cycles
00052 
00053 /// Floating point unit latency?
00054 #define FGU_LATENCY   6
00055 /// Branch miss penalty
00056 // Documentation states 6-cycle, but micro-benchmarks
00057 // tell us that it's 7 cycles
00058 #define BRANCH_MISS_PENALTY  7
00059 
00060 /// L1 access latency is 3
00061 // This is the basic latency, but it could vary depending
00062 // on the distance to the next dep instruction/load
00063 #define L1_LATENCY  3
00064 
00065 /// L2 access latency.
00066 // This is the basic latency, but it could vary depending
00067 // on the distance to the next dep instruction/load 
00068 //#define L2_LATENCY  23
00069 #define L2_LATENCY  20
00070 
00071 /// Main memory Latency
00072 // (obtained from microbenchmark)
00073 #define MEM_LATENCY  176
00074 
00075 /// ITLB/DTLB miss latency
00076 // This has not been affirmed yet, but is a close estimate
00077 #define TLB_LATENCY  190
00078 
00079 ///
00080 /// @brief Main simulator class
00081 ///
00082 /// One object of this class is instantiated for each CPU
00083 /// that should be simulated (SST needs to be able to simulate
00084 /// multiple separate CPU's). See the main() function in 
00085 /// McNiagara.cc for stand-alone execution.
00086 ///
00087 class McNiagara {
00088 
00089    /// Structure to hold read-from-file model parameters
00090    struct ModelParam {
00091       char name[32];  ///< parameter string name
00092       union {
00093          unsigned long long lval;  ///< integer parameter ULL value
00094          double dval;              ///< real parameter double value
00095       } v;
00096    };
00097    
00098    ModelParam performanceCtr[20];  ///< parameters from perf_cnt.h (really 19)
00099    ModelParam instructionProb[30]; ///< parameters from inst_prob.h (really 28)
00100    
00101    enum InstructionProbIDs {
00102       PB_6_CTI_N, PB_6_INT_N, PB_25_INT_N, PB_6_FGU_N, PB_30_FGU_N,
00103       PB_6_MEM_N, PB_25_MEM_N, PB_3_LD_N, P_FDIV_FSQRT_S_N,
00104       P_FDIV_FSQRT_D_N, PB_6_INT_D_N, PB_25_INT_D_N, PB_6_FGU_D_N,
00105       PB_30_FGU_D_N, PB_6_MEM_D_N, PB_25_MEM_D_N, PB_3_LD_D_N,
00106       P_FDIV_FSQRT_S_D_N, P_FDIV_FSQRT_D_D_N, P_FDIV_FSQRT_S,
00107       P_FDIV_FSQRT_D, P_DS, DELAY_SLOT_N, ANNULLED_N, D_LOADS,
00108       D_STORES, D_FLOATS, D_INTS, NUM_INSTPROBS
00109    };
00110    // unfortunately must initialize strings in .cc file
00111    // they MUST correspond to the list above
00112    static const char* instructionProbNames[NUM_INSTPROBS+1];
00113    
00114    enum PerformanceCtrIDs {
00115       TOTAL_CYCLES, L2_MISSES, L2_I_MISSES, L1_MISSES, IC_MISSES,
00116       TLB_MISSES, ITLB_MISSES, TAKEN_BRS, TOTAL_INSTS, MEASURED_CPI,
00117       TOTAL_LDS, TOTAL_STS, TOTAL_FPS, TOTAL_BRS, LD_PERC, ST_PERC,
00118       BR_PERC, FP_PERC, GR_PERC, NUM_PERFCTRS
00119    };
00120    // unfortunately must initialize strings in .cc file
00121    // they MUST correspond to the list above
00122    /// Performance counter name strings
00123    static const char* performanceCtrNames[NUM_PERFCTRS+1];
00124 
00125    // JEC: Simpler instruction categorization: create an array
00126    // of instruction category probabilities in a CDF form, then
00127    // just gen a random (0,1) number and find where it lands
00128    /// General instruction types
00129    enum InstructionType {
00130       I_LOAD, I_STORE, I_BRANCH, I_GRPROD, I_FLOAT, I_NOP, I_NUMTYPES
00131    };
00132    /// Instruction type CDF
00133    double iTypeProbCDF[I_NUMTYPES];
00134    /// Delay slot instruction type CDF
00135    double iTypeDelaySlotProbCDF[I_NUMTYPES];
00136 
00137    /// Load instruction categories
00138    enum LoadCategory {
00139       PB_6_MEM, PB_25_MEM, PB_3_LD, OTHER_LD, NUMLOADCATS
00140    };
00141    /// Load category CDF
00142    double loadCatProbCDF[NUMLOADCATS];
00143    /// Delay slot load category CDF
00144    double loadCatDelaySlotProbCDF[NUMLOADCATS];
00145 
00146    /// Integer (GR) instruction categories
00147    enum IntCategory {
00148       PB_6_FGU, PB_30_FGU, PB_6_INT, PB_25_INT, OTHER_INT, NUMINTCATS
00149    };
00150    /// Int category CDF
00151    double intCatProbCDF[NUMINTCATS];
00152    /// Delay slot int category CDF
00153    double intCatDelaySlotProbCDF[NUMINTCATS];
00154 
00155    /// Float instruction categories
00156    enum FloatCategory {
00157       FDIV_FSQRT_S, FDIV_FSQRT_D, OTHER_FLOAT, NUMFLOATCATS
00158    };
00159    /// Float category CDF
00160    double floatCatProbCDF[NUMFLOATCATS];
00161    /// Delay slot float category CDF
00162    double floatCatDelaySlotProbCDF[NUMFLOATCATS];
00163 
00164    /// Token type structure for MC or trace tokens
00165    struct Token {
00166       InstructionType type;  ///< general type of instruction
00167       union {
00168          LoadCategory l;    ///< load category if load type
00169          IntCategory i;     ///< int/gr category if int type
00170          FloatCategory f;   ///< float category if float type
00171          unsigned int v;    ///< generic category value accessor
00172       } category;
00173       double optProb;    ///< optional probability for insn if needed
00174       bool inDelaySlot;  ///< true if instruction is in a delay slot
00175    };
00176 
00177    // --------- GLOBAL valriables
00178 
00179    //static char* stallReasonNames[NUMSTALLREASONS+1];
00180    // Effective stall time caused by each StallReason
00181    //double effective_t[NUMSTALLREASONS];
00182    CycleTracker cycleTracker;
00183 
00184    //enum token_type { IS_LOAD, IS_STORE };
00185 
00186    /// Data dependency tracking struct
00187    /*** REPLACED BY DependencyTracker CLASS
00188    struct dependency {
00189       unsigned long long which_inst; ///< Insn count of the DEPENDENT instruction
00190       double when_satisfied;         ///< Cycle number when the value is available
00191       enum StallReason reason;  ///< Reason for dependency
00192       struct dependency *next;  // Pointer to the next node
00193    };
00194    /// Linked list of data dependencies
00195    struct dependency *dc_head;
00196    ***/
00197    DependencyTracker depTracker;
00198 
00199    /// Memory operation queue (?)
00200    /*** REPLACED BY MemoryModel CLASS
00201    struct memory_queue {
00202       double when_satisfied;    // the cycle at which the mem op will be satisfied
00203       enum token_type whoami;   // Load or STORE
00204       struct memory_queue *next;
00205    };
00206    /// Linked list of memory operations
00207    struct memory_queue *mq_head;
00208    struct memory_queue *last_store;
00209    // How many loads/stores are currently in memory queue
00210    int ld_in_q, st_in_q;
00211    ***/
00212    MemoryModel memModel;
00213 
00214    // distance histograms for distances btween loads, stores, fp, int 
00215    // and branch instructions
00216    double ld_ld_hist[LD_LD_HIST_LENGTH],
00217           st_st_hist[ST_ST_HIST_LENGTH],
00218           fp_fp_hist[FP_FP_HIST_LENGTH],
00219           int_int_hist[INT_INT_HIST_LENGTH];
00220    double br_br_hist[BR_BR_HIST_LENGTH],
00221           st_ld_hist[ST_LD_HIST_LENGTH];
00222 
00223    // distance to use histograms ==> for dependencey checking
00224    double ld_use_hist[LD_USE_HIST_LENGTH],
00225           int_use_hist[INT_USE_HIST_LENGTH],
00226           fp_use_hist[FP_USE_HIST_LENGTH];
00227 
00228    double P1,
00229           P2,
00230           PM,
00231           PT,
00232           PG,
00233           PF,
00234           PBM,
00235           P_SP,
00236           PBR,
00237           PLD,
00238           PST;
00239 
00240    // Probabibilities in the Model Diagaram 
00241    double P_1,
00242           P_2,
00243           P_3,
00244           P_4,
00245           P_5,
00246           P_6,
00247           P_7,
00248           P_8,
00249           P_9,
00250           P_10,
00251           P_11,
00252           P_12,
00253           P_13;
00254 
00255    // Delay Slot Probabilities
00256    // - probabilities of what type of instruction in delay slot
00257    double PLD_D,
00258           PST_D,
00259           PF_D,
00260           PG_D;
00261 
00262    // Global variables
00263    FILE *outf;               ///< Output file handle
00264    double cycles;            ///< Global number of current cycle
00265    
00266    double probLoadFromSTB;
00267 
00268    //double p_FDIV_FSQRT_S,
00269    //       p_FDIV_FSQRT_D;
00270 
00271    // More variables: number of events, loads, stores, branches, ....etc
00272    unsigned long long n_loads,
00273         n_stores,
00274         n_memops,
00275         n_branches,
00276         n_miss_branches;
00277    unsigned long long n_l1,
00278         n_l2,
00279         n_mem,
00280         n_tlb,
00281         n_gr_produced,
00282         n_fr_produced;
00283    unsigned long long n_pipe_flushes,
00284         n_icache_misses,
00285         n_stb_full,
00286         n_stb_reads,
00287         //next_ld,
00288         last_fdiv;
00289 
00290    double total_stores,
00291           total_loads,
00292           total_gr_producers,
00293           total_fr_producers,
00294           total_int,
00295           total_fp,
00296           total_br,
00297           total_instructions;
00298 
00299    // Instruction cache miss probabilities
00300    double ic_p2,
00301           ic_pm,
00302           i_miss,
00303           ic_p1,
00304           ITLB_P,
00305           istall;
00306 
00307    /// Object pointer to external interface module 
00308    OffCpuIF *external_if;
00309    /// Input instruction trace, if in trace-driven mode
00310    FILE *tracef;
00311 
00312    bool Debug;
00313 
00314    unsigned long long tot_insns,
00315         tot_delayslot_insns;
00316 
00317    // one instruction per cycle is the max issue frequency
00318    // for a single-thread single-core model
00319    double CPIi;
00320    //       p_cpi;
00321 
00322  public:
00323    bool convergence;    ///< flag for model CPI convergence
00324    bool traceEnded;     ///< flag for trace file end
00325    McNiagara(void);      // constructor
00326    void init(const char *in_file, OffCpuIF * extif, const char *instProbFile, 
00327              const char *perfCountFile, const char *tracefile = 0,
00328              unsigned long seed=0);
00329    void un_init(void);
00330    int sim_cycle(unsigned long current_cycle);
00331    int generate_instruction(Token * token);
00332    int sim_instruction(Token * token);
00333    int fini(const char *outfile);
00334    //double my_rand(void);
00335 
00336  private:
00337    double make_cdf(double *buf, int length, int ignore_last_n, FILE * inf);
00338    int sample_hist(double *hist, int hist_length);
00339    int diff(double pred, unsigned long long real, int flag);
00340    void sanity_check(void);
00341    //double scan_memq(void);
00342    //void add_memq(double when_satisfied, enum token_type whoami);
00343    //double cycles_to_serve_load(enum StallReason *where, int flag, int d_dist,
00344    //      int l_dist);
00345    //void add_dependency(unsigned long long which_inst, double when_satisfied,
00346    //      enum StallReason reason);
00347    //void adjust_dependence_chain(double c);
00348    //double is_dependent(unsigned long long which_inst,
00349    //      enum StallReason *where);
00350    //void serve_store(void);
00351    //void handle_delay_slot(unsigned long long i,
00352    //      enum StallReason *last_ld_reason, double *last_ld_satisfied,
00353    //      double *fdiv_allowed);
00354    int read_paramfile(const char *filename, ModelParam params[],
00355                       const char *paramnames[]);
00356 
00357 };
00358 
00359 #endif

Generated on Fri Oct 22 2010 11:02:25 for SST by  doxygen 1.7.1