• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

sst/elements/genericProc/ssBackEnd/ssb_sim-outorder.h

00001 // Copyright 2009-2010 Sandia Corporation. Under the terms
00002 // of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S.
00003 // Government retains certain rights in this software.
00004 // 
00005 // Copyright (c) 2009-2010, Sandia Corporation
00006 // All rights reserved.
00007 // 
00008 // This file is part of the SST software package. For license
00009 // information, see the LICENSE file in the top level directory of the
00010 // distribution.
00011 
00012 #ifndef SSB_SIM_OUTORDER_H
00013 #define SSB_SIM_OUTORDER_H
00014 
00015 #include <set>
00016 #include "ssb_memory.h"
00017 #include "ssb_ruu.h"
00018 #include "ssb_cv_link.h"
00019 #include "ssb_rs_link.h"
00020 #include "ssb_bitmap.h"
00021 #include "ssb_cache.h"
00022 #include "prefetch.h"
00023 #include "sst/core/event.h"
00024 
00025 #define GET_IMIX 1
00026 
00027 class thread;
00028 
00029 //: Conventional processor
00030 //
00031 // Base class for conventional processors, using simple scalar as the
00032 // model.
00033 //
00034 //!SEC:ssBack
00035 class convProc : public prefetchProc
00036 {
00037   friend struct CV_link;
00038 
00039   /* 
00040    *  Typedefs
00041    */
00042 
00043   /* 
00044    * data members
00045    */ 
00046 
00047   unsigned long long tickCount;
00048   processor *myProc;
00049   int myCoreID;
00050 public:
00051   unsigned long long TimeStamp() const {
00052     return tickCount;
00053   }
00054 
00055   //: Check L2 Cache
00056   // 
00057   // Support for prefetcher
00058   virtual bool checkCache(const simAddress addr) {
00059     return cache_probe(cache_dl2, addr);
00060   };
00061   //: Insert to L2
00062   // 
00063   // Support for prefetcher
00064   virtual void insertCache(const simAddress mem) {
00065     bool dc; md_addr_t bumped = 0;
00066     cache_access(cache_dl2, Inject, (mem&~3), NULL, 0, TimeStamp(), 
00067                  NULL, NULL, dc, &bumped);
00068     if (dc == 1) {
00069       printf("need to handle main mem access in %s\n", __FILE__);
00070     }
00071     if (bumped != 0) {
00072       if (pref) pref->reportCacheEject(bumped);
00073     }
00074   }
00075   void wakeUpMM(map<instruction*, RUU_station*>::iterator &mi);
00076   void wakeUpPrefetched(instruction *inst);
00077   //: Send instruciton to Memory
00078   // 
00079   // Support for prefetcher
00080   virtual void sendToMem(instruction *i) {
00081     mmSendParcel(i);
00082   }
00083 protected:
00084   //: simple fetching
00085   //
00086   // uses getNextInstruction() not getNextInstruction(PC). Useful for traces
00087   bool simpleFetch;
00088 
00089   //: max outstanding stores
00090   //
00091   // Maximum outstanding stores to main memory
00092   int maxMMStores;
00093 
00094   //: port limited commit
00095   //
00096   // 0 or -1 = do not use. Otherwise, this limits the number of
00097   // registers which can be written back in the commit stage. This is
00098   // in addition to the normal instruction limit (-commit:width).
00099   int portLimitedCommit;
00100   int regPortAvail;
00101 
00102   //: Extra cycles for WACI
00103   //
00104   // Allows the RdPort to be reserved for extra cycles by WACI Loads.
00105   int waciLoadExtra;
00106   
00107   unsigned long long lsqCompares;
00108 
00109   //: pipeline clear flag 
00110   //
00111   // are we trying to drain the pipeline? This flag is set to clear
00112   // the pipe, when switching threads or to take an 'interrupt'.
00113   bool clearPipe;
00114 
00115   //: Processor is serializing
00116   //
00117   // This flag indicates that the processor is serializing, most
00118   // likely due to a "sync" instruction. When this flag is set, the
00119   // fetch stage waits until the pipeline is clear before allowing any
00120   // new instructions to fecth.
00121   bool isSyncing;
00122 
00123   //: clock Ratio
00124   int clockRatio;
00125 
00126   //: Collection of active frames
00127   // space for allocated Frames for threads to use
00128   map<frameID, simRegister*> allocatedFrames;
00129 
00130   //: Set of Store instructions accessing main memory
00131   //
00132   // Instructions are recorded here before they are sent off to access
00133   // main memory. We keep them around here so that we can retire them
00134   // later.
00135   set<instruction*> mainMemStores;
00136   //: List of instructions waiting on stores to retire
00137   //
00138   // Because we need to keep remote stores around, we can't retire
00139   // them where we normally would. Plus, we need to retire everything
00140   // in-order. Thus, whenever we have remote stores we queue up
00141   // everyone who can retire as soon as that store gets back.
00142   deque<instruction*> retireList;
00143   //: Set of stores which have arrived out of order
00144   //
00145   // If remote stores arrive out of order, we need to keep track of
00146   // who we can and can't retire.
00147   set<instruction*> OOOStores;
00148 
00149   set<instruction*> condemnedRemotes;
00150 
00151   //: Map of Load instructions accessing main memory
00152   //
00153   // Loads Instructions are recorded here before they are sent off to
00154   // access main memory. When the return, we take them from here and
00155   // put them in the event queue for writeback.
00156   map<instruction*, RUU_station*> mainMemLoads;
00157 
00158   //: Instruction blocking ifetch
00159   //
00160   // An instruction being fetched which requires a remote
00161   // access. Until this instruction returns, ifetching is
00162   // blocked. When it is returned, we squash it and restart it.
00163   instruction *iFetchBlocker;
00164 
00165   //: Memory Controller
00166   // 
00167   // Where we send memory requests when they miss cache
00168   //component *memCtrl;
00169 
00170   prefetcher *pref;
00171 
00172   //: Thread of execution
00173   thread *thr;
00174   
00175   //: size of an instruction 
00176   //
00177   // in bytes
00178   int instructionSize;
00179 
00180   //: Simple memory model flag
00181   //
00182   // 'simpleMemory' uses just the simple scalar memory model (i.e., no
00183   // external memory accesses, main memory is a constant
00184   // latency). Otherwise, we model remote accesses with a memory
00185   // controller.
00186   int simpleMemory;
00187 
00188   RS_link_list rs_free_list;
00189 
00190   //: Last op attempted to dispatch
00191   //
00192   // the last operation that ruu_dispatch() attempted to dispatch, for
00193   // implementing in-order issue
00194   struct RS_link last_op;
00195 
00196   //: program counter 
00197   md_addr_t pred_PC;
00198 
00199   //: PC to recover to
00200   md_addr_t recover_PC;
00201   
00202   //: fetch unit next fetch address 
00203   md_addr_t fetch_regs_PC;
00204 
00205   //: predicted fetch PC
00206   md_addr_t fetch_pred_PC;
00207 
00208   //: IFETCH -> DISPATCH inst queue 
00209   struct fetch_rec *fetch_data;
00210   //: num entries in IF -> DIS queue 
00211   int fetch_num;                        
00212   //: tail pointer of fetch queue 
00213   int fetch_tail;
00214   //: head pointer of fetch queue 
00215   int fetch_head;       
00216   
00217   //: Did the last instruction miss
00218   int last_inst_missed;
00219   int last_inst_tmissed;
00220 
00221   //: register update unit
00222   //
00223   // register update unit, combination of reservation stations and
00224   // reorder buffer device, organized as a circular queue 
00225   struct RUU_station *RUU;              
00226   //: RUU head pointer
00227   int RUU_head;
00228   //: RUU tail pointer
00229   int RUU_tail; 
00230   //: num entries in RUU
00231   int RUU_num;
00232 
00233   //: load/store queue (LSQ):
00234   //
00235   // load/store queue (LSQ): holds loads and stores in program order,
00236   // indicating status of load/store access:
00237   //
00238   // 
00239   // - issued: address computation complete, memory access in progress
00240   //
00241   // - completed: memory access has completed, stored value available
00242   //
00243   // - squashed: memory access was squashed, ignore this entry
00244   //
00245   // loads may execute when:
00246   //
00247   //   1) register operands are ready, and
00248   //
00249   //   2) memory operands are ready (no earlier unresolved store)
00250   //
00251   // loads are serviced by:
00252   //
00253   // 1) previous store at same address in LSQ (hit latency), or
00254   //
00255   // 2) data cache (hit latency + miss latency)
00256   //
00257   // stores may execute when:
00258   //
00259   // 1) register operands are ready
00260   //
00261   // stores are serviced by: 
00262   //
00263   // 1) depositing store value into the load/store queue
00264   //
00265   // 2) writing store value to the store buffer (plus tag check) at commit
00266   //
00267   // 3) writing store buffer entry to data cache when cache is free
00268   //
00269   // NOTE: the load/store queue can bypass a store value to a load in
00270   // the same cycle the store executes (using a bypass network), thus
00271   // stores complete in effective zero time after their effective
00272   // address is known
00273   struct RUU_station *LSQ;         /* load/store queue */
00274   //: LSQ head pointer
00275   int LSQ_head;
00276   //: LSQ tail pointer
00277   int  LSQ_tail;
00278   //: num entries currently in LSQ 
00279   int LSQ_num;        
00280 
00281   //: Pending event queue
00282   //
00283   // pending event queue, sorted from soonest to latest event (in
00284   // time), NOTE: RS_LINK nodes are used for the event queue list so
00285   // that it need not be updated during squash events 
00286   struct RS_link *event_queue;
00287 
00288   //the ready instruction queue
00289   struct RS_link *ready_queue;  
00290 
00291   unsigned int use_spec_cv[CV_BMAP_SZ];
00292   //: Create Vector
00293   //
00294   // the create vector maps a logical register to a creator in the RUU
00295   // (and specific output operand) or the architected register file
00296   // (if RS_link is NULL)
00297   //
00298   // Note: speculative copy on write storage provided for fast
00299   // recovery during wrong path execute (see tracer_recover() for
00300   // details on this process
00301   struct CV_link create_vector[MD_TOTAL_REGS+2];
00302   //: Speculative create vector
00303   //
00304   // indicates create in speculative state
00305   struct CV_link spec_create_vector[MD_TOTAL_REGS+2];
00306   
00307   //: Indicate when a register was created
00308   tick_t create_vector_rt[MD_TOTAL_REGS+2];
00309   //: Indicate when a speculative register was created
00310   tick_t spec_create_vector_rt[MD_TOTAL_REGS+2];  
00311 
00312   /*
00313    * simulator options
00314    */
00315   #include "ssb_sim-outorder-options.h"
00316   
00317   /* options database */
00318   //: Simulator options
00319   struct opt_odb_t *sim_odb;
00320 
00321   /*
00322    * simulator stats
00323    */
00324   //: stats database 
00325   struct stat_sdb_t *sim_sdb;
00326 
00327 #if GET_IMIX == 1
00328 public:
00329   //: instruction Mix counters
00330   unsigned long long iMix[LASTINST];
00331 protected:
00332 #endif
00333   //: Number of instructions executed
00334   counter_t sim_num_insn;
00335   //: total number of instructions executed
00336   counter_t sim_total_insn;
00337   //: total number of memory references committed 
00338   counter_t sim_num_refs;
00339   //: total number of memory references executed 
00340   counter_t sim_total_refs;
00341   //: total number of loads committed 
00342   counter_t sim_num_loads;
00343   //: total number of loads executed 
00344   counter_t sim_total_loads;
00345   //: total number of branches committed 
00346   counter_t sim_num_branches;
00347   //: total number of branches executed 
00348   counter_t sim_total_branches;
00349   //: cumulative IFQ occupancy 
00350   counter_t IFQ_count;
00351   //: cumulative IFQ full count
00352   counter_t IFQ_fcount;
00353   //: cumulative RUU occupancy 
00354   counter_t RUU_count;
00355   //: cumulative RUU full count
00356   counter_t RUU_fcount;
00357   //: cumulative LSQ occupancy 
00358   counter_t LSQ_count;
00359 #define WANT_LSQ_HIST 1
00360 #if WANT_LSQ_HIST == 1
00361   map<int, counter_t> LSQ_hist;
00362 #endif
00363   //: cumulative LSQ full count
00364   counter_t LSQ_fcount;
00365   // total non-speculative bogus addresses seen
00366   // (debug var) 
00367   counter_t sim_invalid_addrs;
00368   
00369   /*
00370    * simulator state variables
00371    */  
00372   //: execution start times 
00373   time_t sim_start_time;
00374   //: execution end times 
00375   time_t sim_end_time;
00376   //: elapsed sim time
00377   int sim_elapsed_time;
00378 
00379   //: instruction sequence counter
00380   //
00381   // instruction sequence counter, used to assign unique id's to insts
00382   unsigned int inst_seq;
00383   //: pipetrace instruction sequence counter 
00384   unsigned int ptrace_seq;
00385   //: Speculation mode
00386   //
00387   // speculation mode, non-zero when mis-speculating, i.e., executing
00388   // instructions down the wrong path, thus state recovery will
00389   // eventually have to occur that resets processor register and
00390   // memory state back to the last precise state
00391   bool spec_mode;
00392   //: encountered a lmw or stmw 
00393   int lsq_mult;
00394   //: cycles until fetch issue resumes 
00395   // for delays caused by l1 or tlb misses
00396   unsigned ruu_fetch_issue_delay;
00397   //: cycles till dispatch resumes
00398   // for delays caused by FEB misses.
00399   unsigned ruu_dispatch_delay;
00400   virtual unsigned getFEBDelay() {return 0;}
00401   //: perfect prediction enabled 
00402   int pred_perfect;
00403   //: speculative bpred-update enabled 
00404   char *bpred_spec_opt;
00405   //: Speculation enum
00406   enum { spec_ID, spec_WB, spec_CT } bpred_spec_update;
00407   //: iL1
00408   // level 1 instruction cache, entry level instruction cache 
00409   struct cache_t *cache_il1;
00410   //: iL2
00411   // level 2 instruction cache 
00412   struct cache_t *cache_il2;
00413   //: dL1
00414   // level 1 data cache, entry level data cache 
00415   struct cache_t *cache_dl1;
00416   //: dL2
00417   // level 2 data cache 
00418   struct cache_t *cache_dl2;
00419   //: instruction TLB 
00420   struct cache_t *itlb;
00421   //: data TLB 
00422   struct cache_t *dtlb;
00423   //: branch predictor 
00424   struct bpred_t *pred;
00425   //: functional unit resource pool 
00426   struct res_pool *fu_pool;
00427   //: text-based stat profiles 
00428   struct stat_stat_t *pcstat_stats[MAX_PCSTAT_VARS];
00429   //: text-based stat profiles 
00430   counter_t pcstat_lastvals[MAX_PCSTAT_VARS];
00431   //: text-based stat profiles 
00432   struct stat_stat_t *pcstat_sdists[MAX_PCSTAT_VARS];
00433 
00434   typedef map<instruction*, int> latencyMap;
00435   latencyMap extraInstLat;
00436   instruction *committingInst;
00437 
00438   /* 
00439    * ssb_sim-outorder-constructor.h
00440    */ 
00441 public:
00442   convProc(string configFile, processor *p, int maxMMOut, int coreNum,
00443            map<string,string> prefetchInit);
00444   //component *getMemCtrl() {return memCtrl;}
00445 protected:
00446 
00447   /* 
00448    *  memory and TLB access functions. located in
00449    *  ssb_sim-outorder-memory.cc
00450    */
00451   void mmSendParcel(instruction *inst);
00452   void mainMemAccess(instruction*);
00453   unsigned int mem_access_latency(int blk_sz);
00454   virtual unsigned int cplx_mem_access_latency(const enum mem_cmd cmd,
00455                                                const md_addr_t baddr,
00456                                                const int bsize,
00457                                                bool &);
00458   /*enum md_fault_type new_mem_access(struct mem_t *mem, enum mem_cmd cmd,
00459                                     md_addr_t addr, void *vp,
00460                                     int nbytes);*/
00461   virtual void noteWrite(const simAddress a) = 0;
00462   virtual void handleCoher(const simAddress, const enum mem_cmd cmd) = 0;
00463   uint dl1_access_fn(enum mem_cmd cmd,  md_addr_t baddr, int bsize,
00464                      struct cache_blk_t *blk,  tick_t now, bool&);
00465   uint dl2_access_fn(enum mem_cmd cmd,  md_addr_t baddr,  int bsize,            
00466                      cache_blk_t *blk, tick_t now, bool&);
00467   uint il1_access_fn(enum mem_cmd cmd,  md_addr_t baddr, int bsize,     
00468                      struct cache_blk_t *blk, tick_t now, bool&);
00469   uint il2_access_fn(enum mem_cmd cmd,  md_addr_t baddr,  int bsize,    
00470                      struct cache_blk_t *blk, tick_t now, bool&);
00471   uint itlb_access_fn(enum mem_cmd cmd, md_addr_t baddr, int bsize,             
00472                       struct cache_blk_t *blk,  tick_t now, bool&);
00473   uint dtlb_access_fn(enum mem_cmd cmd, md_addr_t baddr, int bsize,     
00474                       struct cache_blk_t *blk, tick_t now, bool&);
00475 
00476   /* 
00477    *  ssb_main.cc
00478    */
00479   void sim_print_stats(FILE *fd);
00480   int ss_main(const char*);
00481 
00482   /* 
00483    * options & stats located in ssb_sim-outorder-options.cc
00484    */
00485   void sim_reg_options(struct opt_odb_t *odb);
00486   void sim_check_options(struct opt_odb_t *odb);
00487   void sim_reg_stats(struct stat_sdb_t *sdb);
00488 
00489   /* 
00490    *  init stuff ssb_sim-outorder-init.cc
00491    */ 
00492   void sim_init(void);
00493   // note load_prog also does other init stuff
00494   void sim_load_prog(const string fuConfStr);
00495   void ruu_init(void);
00496   void lsq_init(void);
00497   void fetch_init(void);
00498 
00499   /* 
00500    *  dump stuff ssb_sim-outorder-dump.cc
00501    */ 
00502   static void ruu_dumpent(struct RUU_station *rs, int index, FILE *stream,
00503                           int header);
00504   void ruu_dump(FILE *stream);
00505   void lsq_dump(FILE *stream);
00506   void rspec_dump(FILE *stream);
00507   void mspec_dump(FILE *stream);
00508   void fetch_dump(FILE *stream);
00509   void eventq_dump(FILE *stream);
00510   void readyq_dump(FILE *stream);
00511 
00512   /* 
00513    * DLite stuff ssb_sim-outorder-dlite.cc
00514    */ 
00515   static char *simoo_reg_obj(struct regs_t *regs, int is_write,
00516                              enum md_reg_type rt, int reg,
00517                              struct eval_value_t *val);
00518   static char *simoo_mem_obj(struct mem_t *mem, int is_write,
00519                              md_addr_t addr, char *p, int nbytes);              
00520   static char *simoo_mstate_obj(FILE *stream, char *cmd, struct regs_t *regs,   
00521                                 struct mem_t *mem);  
00522 
00523   /*
00524    * execution unit event queue - ssb_sim-outorder-eventq.cc
00525    */
00526   struct RUU_station *eventq_next_event(void);
00527   void eventq_queue_event(struct RUU_station *rs, tick_t when);
00528   void eventq_init(void);
00529 
00530   /*
00531    * The ready instruction queue - ssb_sim-outorder-readyq.cc
00532    */
00533   void readyq_init(void);
00534   void readyq_enqueue(struct RUU_station *rs);
00535 
00536   /* 
00537    * Tracer functions - ssb_sim-outorder-tracer.cc
00538    */
00539   void tracer_recover(void);
00540   void tracer_init(void);
00541   void ruu_recover(int branch_index);
00542 
00543   /* 
00544    * idep/odep handling - ssb_sim-outorder-dep.cc
00545    */
00546   void ruu_link_idep(struct RUU_station * const rs, const int idep_num, const int idep_name);
00547   void ruu_install_odep(struct RUU_station *rs, int odep_num, int odep_name);
00548 
00549   /* 
00550    *  "main loop" simulation functions
00551    */
00552   void fast_sim_loop();
00553   void sim_loop(bool);
00554   void ruu_release_fu(void);
00555   void ruu_commit(void);
00556   void ruu_writeback(void);
00557   void lsq_refresh(void);
00558   void ruu_issue(void);
00559   void ruu_dispatch(void);
00560   void ruu_fetch(void);
00561 public:
00562   //: Check if the pipeline is clear
00563   //
00564   // NOTE: we could probabl speed this up by tagging entries in the
00565   // retireList with the thread to which they belong. That way, we
00566   // could just check the RUU and fetch, instead of checking the
00567   // retire list and having to wait for all stores to return.
00568   bool pipeClear() {return (RUU_num == 0 && fetch_num == 0 &&
00569                             retireList.empty());}
00570 protected:
00571   //: Check if the pipeline past the fetch stage is clear
00572   //
00573   // this is the same as pipeClear(), except it ignores instructions
00574   // in the fetch->dispatch pipe. This is used for sync instructions.
00575   bool pipeDispatchClear() {return (RUU_num == 0 && retireList.empty());}
00576 
00577   /* 
00578    * Enkidu functions 
00579    */ 
00580   virtual void setup()=0;
00581   virtual void finish();
00582 public:
00583   virtual void handleMemEvent(instruction* inst );
00584 protected:
00585   virtual void postTic()=0;
00586   void handleReturningStore(instruction *inst);
00587 
00588   /* 
00589    * Processor functions
00590    */
00591   frameID requestFrame(int size);
00592   simRegister* getFrame(frameID);
00593   void returnFrame(frameID);
00594   virtual bool insertThread(thread*);
00595   //bool isLocal(const simAddress, const simPID);
00596   void dataCacheInvalidate( simAddress addr );
00597 
00598 };
00599 
00600 /* read a create vector entry */
00601 #define CREATE_VECTOR_P(P,N)  (BITMAP_SET_P(P->use_spec_cv, CV_BMAP_SZ, (N)) \
00602                                ? P->spec_create_vector[N]               \
00603                                : P->create_vector[N])
00604 #define CREATE_VECTOR(N)        (BITMAP_SET_P(use_spec_cv, CV_BMAP_SZ, (N))\
00605                                  ? spec_create_vector[N]                \
00606                                  : create_vector[N])
00607 
00608 /* read a create vector timestamp entry */
00609 #define CREATE_VECTOR_RT(N)     (BITMAP_SET_P(use_spec_cv, CV_BMAP_SZ, (N))\
00610                                  ? spec_create_vector_rt[N]             \
00611                                  : create_vector_rt[N])
00612 
00613 /* set a create vector entry */
00614 #define SET_CREATE_VECTOR(N, L) (spec_mode                              \
00615                                  ? (BITMAP_SET(use_spec_cv, CV_BMAP_SZ, (N)),\
00616                                     spec_create_vector[N] = (L))        \
00617                                  : (create_vector[N] = (L)))
00618 
00619 /* specified instruction is a LMW or STMW or other variants */
00620 #ifdef TARGET_PPC
00621 //#define  IS_MULT_LSQ(op) ((op == STMW) || (op == LMW) || (op == LSWI) || (op == LSWX) || (op == STSWI) || (op == STSWX))
00622 #define  IS_MULT_LSQ(op) 0
00623 #else
00624 #define  IS_MULT_LSQ(op) FALSE
00625 #endif
00626 
00627 #endif

Generated on Fri Oct 22 2010 11:02:25 for SST by  doxygen 1.7.1