/* * tpt_c.h - TPT cache module interfaces * * Revision 1.0 2005/04/08 11:07am oboudreaux * Initial revision * ************* * DIRECTORY * ************* * * tpt_policy * tpt_blk_t * tpt_set_t * tpt_cache_t * tpt_char2policy * tpt_wbuff_blk_t * tpt_wbuff_t * tpt_create * tpt_blk_create * tpt_wbuff_create * tpt_get_offp_targ * tpt_config * tpt_reg_stats * tpt_stats * tpt_dump * tpt_set_dump * tpt_read * tpt_write * tpt_update_way * tpt_copy_blk * tpt_wbuff_aquire * tpt_wbuff_release * tpt_probe * tpt_flush * tpt_test * * PREFETCH Unit * pf_req_t * pf_queue_t * pfq_create * pfq_cycle_update * pfq_lu * pfq_add_outb * pfq_next_to_issue * pfq_issue * * * pfq_add_issue * pfq_add_outb_top * */ #ifndef TPTCACHE_H #define TPTCACHE_H #include #include "host.h" #include "misc.h" #include "machine.h" #include "memory.h" #include "stats.h" #include "cache.h" /* * This module contains code to implement the TPT cache. The user instantiates caches using * tpt_create(). When instantiated, the user may specify the geometry of the cache (i.e., * number of sets, associativity, replacement policy), and supply a block access function. * The block access function indicates the latency to access lines when the cache misses, * accounting for any component of miss latency, e.g., bus acquire latency, bus transfer * latency, memory access latency, etc... * * This module also tracks latency of accessing the data cache, each cache has a hit latency * defined when instantiated, miss latency is returned by the cache's block access function, * the caches may service any number of hits under any number of misses, the calling * simulator should limit the number of outstanding misses or the number of hits under * misses as per the limitations of the particular microarchitecture being simulated. * * Due to the organization of this cache implementation, the latency of a request cannot be * affected by a later request to this module. As a result, reordering of requests in the i * memory hierarchy is not possible. */ /* highly associative caches are implemented using a hash table lookup to speed block access, this macro decides if a cache is "highly associative" */ //#define CACHE_HIGHLY_ASSOC(cp) ((cp)->assoc > 4) /* cache replacement policy */ enum tpt_policy { tLRU, /* replace least recently used block (perfect LRU) */ tRandom, /* replace a random block */ tLIFO /* replace the oldest block in the set */ }; enum bool { false, true }; /* block status values */ #define TPT_BLK_VALID 0x00000001 /* block valid */ #define TPT_BLK_PARTIAL 0x00000002 /* block partially valid */ /* tpt entry definition */ struct tpt_blk_t { struct tpt_blk_t *way_next; /* next block in the ordered way chain, used to order blocks for replacement */ struct tpt_blk_t *way_prev; /* previous block in the order way chain */ md_addr_t tag; /* data block tag value (br_seq.high_addr_bits)*/ unsigned int status; /* block status, see CACHE_BLK_* defs above */ tick_t ready; /* time when block will be accessible, field is set when a miss fetch is initiated */ md_addr_t *on_path; /* addresses of basic blocks on pred path */ md_addr_t *off_path; /* addresses of basic blocks off pred path */ int *align; /* number of instructions in each basic block on path */ }; /* tpt set definition (one or more blocks sharing the same set index) */ struct tpt_set_t { struct tpt_blk_t *way_head; /* head of way list */ struct tpt_blk_t *way_tail; /* tail pf way list */ struct tpt_blk_t *blks; /* cache blocks, allocated sequentially, so this pointer can also be used for random access to cache blocks */ }; /* tpt definition */ struct tpt_cache_t { /* parameters */ int nsets; /* number of sets */ int assoc; /* cache associativity */ enum tpt_policy policy; /* cache replacement policy */ unsigned int br_depth; /* number of branch predictions/cycle */ unsigned int offp_depth; /* depth tp prefetch off pred path */ unsigned int hit_latency; /* cache hit latency */ unsigned int miss_latency; /* cache miss latency */ /* derived data, for fast decoding */ md_addr_t set_mask; /* use *after* shift */ int tag_shift; int br_shift; /* stats */ counter_t hits; /* total number of hits */ counter_t misses; /* total number of misses */ counter_t replacements; /* total number of replacements at misses */ /* last block to hit, used to optimize cache hit processing */ md_addr_t last_tag; /* tag of last line accessed */ int last_set; struct tpt_blk_t *last_blk; /* cache block last accessed */ struct tpt_blk_t *data; /* FIXME not sure about this...*/ struct tpt_set_t sets[1]; /* each entry is a set */ }; /* parse policy */ enum tpt_policy /* replacement policy enum */ tpt_char2policy(char c); /* replacement policy as a char */ /* tpt write buffer entry * used in commit stage to build tpt_entries */ struct tpt_wbuff_blk_t { struct tpt_wbuff_blk_t *next; struct tpt_wbuff_blk_t *prev; md_addr_t entry_pt; /* tpt_blk entry point */ int onp_index; /* pointer to current basic block on path */ int offp_index; /* pointer to current basic block on path */ int align_index; /* pointer to current basic block on path */ int align_cnt; /* number of instructions in current basic block */ unsigned int br_seq; /* branch sequence of TPT entry */ struct tpt_blk_t *tpt_entry; /* buffer entry */ }; /* tpt write buffer definition * used in commit stage to build tpt_entries */ struct tpt_wbuff_t { enum bool new_block; /* set if starting a new block*/ md_addr_t offp_target; /* set to alternate block addr */ unsigned int dir; /* direction of last branch */ struct tpt_wbuff_blk_t *free_h; /* list of pointers to avaliable buffers */ struct tpt_wbuff_blk_t *free_t; /* last avaliable buffer */ struct tpt_wbuff_blk_t *busy_h; /* list of pointers to occupied buffers */ struct tpt_wbuff_blk_t *busy_t; /* last occupied buffer */ }; /* touch list definition */ /* TO BE ADDED - touch list. When a tpt entry is hit in TPT, nodes are placed on a touch list to be probed by prefetcher. Purpose is to pull them back into L1 cache. */ //struct tlist_t //{ // list on_path; /* items to be touched that are on the path */ // list off_path; /* items to be touched that are off the path */ //}; /* prefetch list */ /* prefetch list - populated in three ways: 1) Miss to TPT - (high priorty) - Addr and br_seq put at head of on_path list. Blocks that are targeted are also given high priorty. For example: if A10 is a TPT miss where &A1=B and &A10=&B0=C then B and C will be placed at the head of the on_path list as soon as they are "discovered". Simmalarly, blocks &A0, &A11, &A00, and &A01 will be put at the head of off_path. (COULD BE OPTOMIZED) 2) Hit to TPT - (med priority) - intermediate nodes as well as their off_path branch sequence is placed on the off path list. The final node in the sequence is placed on the on_path list. For example: If A101 is a TPT hit, the off_path list gets &A0, &A11, &A100 while on_path gets &A101. The depth of the branch sequence dictates the depth of the subtree 3) Prefetcher IDs target - (low priorty) - When parsing a cache line, taken branch targets will be identified (outside of the cache block currently being parsed. These targets will be placed at the tail of off_path. TO BE ADDED - throttling mechanism that changes priorty of on_path (depth) vs. off_path (breadth) prefetching... Adaptive cache! */ //struct pflist_item_t //{ // md_addr_t addr; /* address of first item in block */ // unsigned int br_seq; /* predicted branch sequence from addr // NULL if not result of a tpt miss */ // unsigned int fetch_depth; /* depth to build tpt entries for subtree */ //}; //struct pflist_t //{ // list on_path; /* prefetch units give this higher priorty */ // list off_path; /* prefetch units give this lower priorty */ //}; /* tpt_create * create (allocate) and initialize a tpt */ struct tpt_cache_t * /* pointer to cache created */ tpt_create(int nsets, /* total number of sets in cache */ int assoc, /* associativity of cache */ enum tpt_policy policy, /* replacement policy w/in sets */ int br_depth, /* number of branches to predict */ int offp_depth, /* depth to prefetch off pred path */ unsigned int hit_latency, /* latency in cycles for a hit */ unsigned int miss_latency); /* latency in cycles for a miss */ /* tpt_blk_create * create (allocate) and initialize a tpt entry * used as buffers for reading and writing into TPT */ struct tpt_blk_t * tpt_blk_create(int br_depth, /* number of branches to predict */ int offp_depth); /* depth to prefetch off pred path */ /* tpt_wbuff_create * create (allocate) and initialize the tpt write buffers */ struct tpt_wbuff_t * tpt_wbuff_create(int num_buffers, /* number of write buffers */ int br_depth); /* on path tpt depth */ /* tpt_get_offp_targ * find the fall through address for a taken branch or * a taken address for a fall through branch */ md_addr_t tpt_get_offp_targ(md_addr_t PC, /* PC for current inst */ md_addr_t next_PC, /* PC for next inst */ struct mem_t *mem); /* pointer to memory */ /* tpt_config * print cache configuration */ void tpt_config(struct tpt_cache_t *tptp, /* cache instance */ FILE *stream); /* output stream */ /* tpt_reg_stats * register cache stats */ void tpt_reg_stats(struct tpt_cache_t *tptp, /* cache instance */ struct stat_sdb_t *sdb); /* stats database */ /* tpt_stats * print cache stats */ void tpt_stats(struct tpt_cache_t *tptp, /* cache instance */ FILE *stream); /* output stream */ /* * tpt_dump * dump tpt contents to a file * used for debugging */ void tpt_dump(struct tpt_cache_t *tptp, /* TPT pointer */ FILE *stream); /* output stream */ /* * tpt_set_dump * dump a set of tpt contents to a file * used for debugging */ void tpt_set_dump(struct tpt_cache_t *tptp, /* TPT pointer */ int set, /* set to print */ FILE *stream); /* output stream */ /*********************** * TPT Access Functions* ***********************/ /* * tpt_read * a read in the tpt pulls off the set bits and appends the branch sequence to the * MSB of the resultant tag (no data block bits are used). The pointer to the block * as well as the replaced blocks branch sequence and address are returned by refrence. * Access time is returned. * TO BE ADDED * partial hits - return partial info if block not compleatly written */ unsigned int /* latency of access in cycles */ tpt_read(struct tpt_cache_t *tptp, /* cache to access */ md_addr_t addr, /* address of access */ unsigned int br_seq, /* branch sequence of access */ tick_t now, /* time of access */ //boolean *full_hit, /* is the target entry fully written? */ struct tpt_blk_t *data); /* for return of user data ptr */ /* * tpt_write * used by prefetch engines. TPT entry passed by refrence. Address and branch * sequence of replaced block returned by refrence. (Why??) Access time returned. * TO BE ADDED * Partial Writes - write a level of tpt node (on/off path) as soon as it it known */ unsigned int /* latency of access in cycles */ tpt_write(struct tpt_cache_t *tptp, /* cache to access */ md_addr_t addr, /* address of access */ unsigned int br_seq, /* branch sequence of access */ tick_t now, /* time of access */ struct tpt_blk_t *data, /* ptr to blk to write to tpt */ unsigned int *repl_br_seq, /* for branch sequence of replaced block */ md_addr_t *repl_addr); /* for address of replaced block */ /* * Update way chain - * called durring reads and writes depending on replacement policy of tpt */ void tpt_update_way(struct tpt_cache_t *tptp,/* pointer to tpt */ struct tpt_blk_t *blk, /* pointer to most LRU block */ md_addr_t set); /* set number of update */ /* Copy a TPT entry from *Src to *Dest except for *way_next and *way_prev */ void tpt_copy_blk(struct tpt_cache_t *tptp, /* pointer to tpt */ struct tpt_blk_t *src, /* pointer to source block */ struct tpt_blk_t *dst); /* pointer to destination block */ /* move a buffer from free to busy list */ void tpt_wbuff_aquire(struct tpt_wbuff_t *tpt_wbuff, /*pointer to write buffer*/ md_addr_t PC); /* move a buffer from busy free list */ void tpt_wbuff_release(struct tpt_wbuff_t *tpt_wbuff, /*pointer to write buffer*/ struct tpt_wbuff_blk_t *blk); /* return non-zero if block containing address ADDR is contained in cache CP, this interface is used primarily for debugging and asserting cache invariants */ int /* non-zero if access would hit */ tpt_probe(struct tpt_cache_t *tptp, /* cache instance to probe */ unsigned int br_seq, /* branch sequence of block */ md_addr_t addr); /* address of block to probe */ /* flush the entire cache, returns latency of the operation */ unsigned int /* latency of the flush operation */ tpt_flush(struct tpt_cache_t *tptp, /* cache instance to flush */ tick_t now); /* time of cache flush */ void tpt_test(int nsets, /* number of sets */ int assoc, /* associativity */ enum tpt_policy policy, int br_depth, int offp_depth, unsigned int hit_latency, unsigned int miss_latency, FILE *stream); /* file to dump output */ /* prefetch request */ struct pf_req_t { md_addr_t addr; /* address of request */ signed int lat; /* latency remaining for accesss */ struct pf_req_t *next; /* next request */ struct pf_req_t *prev; /* previous request */ }; /* prefetch queue system - used in accessing I$ to order pf requests and priotitize requests */ struct pf_queue_t { int limited; /* false = infinte resources */ int outbound_count; /* memory requests in queue to be issued */ int issued_count; /* mem requests outstanding */ int free_count; /* free requests outstanding */ int issued_this_cycle; /* num requests issued this cycle */ int outbound_max; /* size of outbound queue */ int issued_max; /* size of issue queue */ int BW_max; /* memory system bandwidth limit */ struct pf_req_t *issued; /* outstanding requests */ struct pf_req_t *outbound; /* requests to be issued (1/cycle, priority to requests from ss)*/ struct pf_req_t *free; /* unallocated entries */ int fetch_issued; /* true if fetch issued this cycle FIXME - later update to number of fetches issued this cycle */ }; struct pf_queue_t* pfq_create( int limited, int issued_size, int outbound_size); void pfq_cycle_update( struct pf_queue_t *pfq); int pfq_lu( md_addr_t addr, struct pf_queue_t *pfq, struct cache_t *cp); void pfq_add_outb( md_addr_t addr, struct pf_queue_t *pfq); /* prefetch issue issues top outbound request to memory system */ struct pf_req_t * pfq_next_to_issue( struct pf_queue_t *pfq); void pfq_issue( struct pf_req_t *pfr, struct pf_queue_t *pfq, int lat); /* prefetch add issue adds a pf request to issued list (ordered by outstanding latency)*/ /* void pfq_add_issue( md_addr_t addr, unsigned int lat, struct pf_queue_t *pfq); */ /* prefetch add outbound top adds a request to the top of the outbound list*/ /* void pfq_add_outb_top( md_addr_t addr, struct pf_queue_t *pfq); */ #endif /* TPTCACHE_H */