Skip to content
Snippets Groups Projects
Commit 3e06beca authored by Sergii Vystoropskyi's avatar Sergii Vystoropskyi
Browse files

csep548 hw

parents
No related branches found
No related tags found
No related merge requests found
Showing
with 757 additions and 0 deletions
def make_predictor(bhr_size, sat_counter_bits, pc_bits_used):
class SaturatingCounter(object):
def __init__(self, bits, init):
self.max = (2 ** bits) - 1
self.value = init
pass
def taken(self):
if self.value + 1 <= self.max:
self.value += 1
pass
def nottaken(self):
if self.value - 1 >= 0:
self.value -= 1
pass
def istaken(self):
return self.value >= ((self.max+1)/2)
class ShiftRegister(object):
def __init__(self,size):
self.size = size
self.values = []
for i in range( size ):
self.values.append( False )
pass
def push(self,val):
assert isinstance( val, bool )
self.values.append( val )
if ( len(self.values) > self.size ):
self.values.pop( 0 )
def tobits(self):
t = 0
for i in range( len(self.values) ):
t = t << 1
if self.values[i]:
t = t | 1
else:
t = t | 0
return t
class MysteryBranchPredictor(object):
def __init__(self):
self.bht = {}
self.bhr = ShiftRegister( bhr_size )
self.pcmask = (2 ** pc_bits_used) - 1
self.reset()
pass
def reset(self):
# reset bht
sc_init_val = ((2 ** sat_counter_bits )-1) / 2
bht_entries = 2 ** ( pc_bits_used + bhr_size )
for i in range( bht_entries ):
self.bht[i] = SaturatingCounter( sat_counter_bits, sc_init_val )
pass
# reset gh
for i in range( bhr_size ):
self.bhr.push( False )
def _compute_index(self,pc):
pcbits = pc & self.pcmask
ghbits = self.bhr.tobits()
return (ghbits << pc_bits_used) | pcbits
def predict(self, pc):
index = self._compute_index( pc )
return self.bht[ index ].istaken()
def actual(self, pc, taken):
index = self._compute_index( pc )
if taken:
self.bht[ index ].taken()
else:
self.bht[ index ].nottaken()
pass
self.bhr.push( taken )
return MysteryBranchPredictor()
mystery_predictors = [
('1bhr, 1sc, 4bht', make_predictor(1, 1, 1)),
('1bhr, 1sc, 8bht', make_predictor(1, 1, 2)),
('2bhr, 2sc, 16bht', make_predictor(2, 2, 2)),
('3bhr, 1sc, 64bht', make_predictor(3, 1, 3)),
('8bhr, 4sc, 2048bht', make_predictor(8, 4, 3)),
]
#!/usr/bin/env python
import branchpredictor
import math
# You might want to store the BHR size here to help figure out
# subsequent predictor parameters. (Optional!)
BHR_SIZE = None
# Constants that bound the parameter space.
MAX_BHR_SIZE = 12
MAX_SC_BITS = 4
MAX_PC_BITS = 10
def find_branch_history_register_size(bpred):
global BHR_SIZE # (You need "global" in Python to be able to write to
# variables outside of function scope.)
bpred.reset()
BHR_SIZE = -1
pc = 0
for i in range(MAX_BHR_SIZE + 2**MAX_SC_BITS):
if bpred.predict(pc):
break
bpred.actual(pc, True)
BHR_SIZE += 1
bpred.reset()
return BHR_SIZE
def find_saturating_counter_bits(bpred):
bpred.reset()
pc = 0
res = 0
for i in range(2**MAX_SC_BITS):
bpred.actual(pc, False)
for i in range(2**MAX_SC_BITS):
if bpred.predict(pc):
break
bpred.actual(pc, True)
for j in range(BHR_SIZE):
bpred.actual(pc, False)
res += 1
bpred.reset()
return int(math.log(res,2)) + 1
def find_pc_bits_used(bpred):
bpred.reset()
pc = 1
res = 0
for i in range(BHR_SIZE):
bpred.actual(0, True)
for i in range(MAX_PC_BITS):
bpred.actual(pc, True)
pc = pc << 1
res += 1
if bpred.predict(pc):
break
return res - 1
def find_branch_history_table_entries(bpred):
# TODO: your code here
return (2**find_pc_bits_used(bpred)) * (2**BHR_SIZE)
def discover(bpred):
# This function collects your results and returns them in a
# dictionary. You can add/remove reset() calls or reorder the
# calls to your functions if you wish, but please keep the
# structure of the returned dictionary the same for our grading
# scripts.
results = {}
bpred.reset()
results['bhr size'] = find_branch_history_register_size(bpred)
bpred.reset()
results['saturating counter bits'] = find_saturating_counter_bits(bpred)
bpred.reset()
results['bht entries'] = find_branch_history_table_entries(bpred)
return results
if __name__ == '__main__':
# When run as a script, we'll analyze each of the provided branch
# predictors.
for name, bpred in branchpredictor.mystery_predictors:
results = discover(bpred)
print '%s:' % name
for k, v in results.items():
print ' %s: %s' % (k, v)
print
import random
import pprint
import sys
# Each insn is a python dictionary (map) with the following fields:
#
# src1 - operand #1 source register id
# src2 - operand #2 source register id
# dst - destination register id
#
# Register id's are just integers. There are 16 architectural
# registers in this simple ISA, with id's 0 to 15.
#
# There are two additional fields which you should fill in as you
# schedule instructions according to their dependences.
#
# consumer_insns - list of insns that consume output of this insn
# depth - depth of insn in the scheduling tree
def gen_insns(n):
# Unique root insn to make sure our dependencies form a tree.
insns = [ {'src1':0, 'src2':0, 'dst':0,
'consumer_insns':[], 'depth':0} ]
live_regs = set([0]) # start out with only 1 live reg
for i in range(n):
insn = {}
insn['src1'] = random.choice(list(live_regs))
insn['src2'] = random.choice(list(live_regs))
insn['dst'] = random.randint(0, 15)
live_regs.add(insn['dst'])
# Used for building dependence tree.
insn['consumer_insns'] = []
# Used for calculating program latency.
insn['depth'] = None
insns.append(insn)
return insns
instr_map = {0:0}
write_instr_counter = 0
def rename(insn):
global write_instr_counter
insn['osrc1'] = insn['src1']
insn['osrc2'] = insn['src2']
insn['odst'] = insn['dst']
if insn['src1'] not in instr_map:
instr_map[insn['src1']] = write_instr_counter
write_instr_counter += 1
insn['src1'] = instr_map[insn['src1']]
if insn['src2'] not in instr_map:
instr_map[insn['src2']] = write_instr_counter
write_instr_counter += 1
insn['src2'] = instr_map[insn['src2']]
write_instr_counter += 1
instr_map[insn['dst']] = write_instr_counter
insn['dst'] = write_instr_counter
def compute_latency(insns):
def find_producer(insns, r, instr):
for i in insns:
if i['dst'] == r:
if instr not in i['consumer_insns']:
i['consumer_insns'].append(instr)
return i['depth']
return -1
lat = 0
for i in insns:
if i['depth'] is not None:
continue
i['depth'] = max(find_producer(insns, i['src1'], i), find_producer(insns, i['src2'], i))+1
lat = max(lat, i['depth'])
return lat + 1
def compute_max_width(insns):
d = {}
for i in insns:
if i['depth'] not in d:
d[i['depth']] = 1
else:
d[i['depth']] += 1
res = 0
for k in d:
res = max(res, d[k])
return res
def compute_max_pregs(insns):
def get_instr_by_depth(insns):
tmp = {}
res = []
max_depth = 0
for i in insns:
if i['depth'] not in tmp:
tmp[i['depth']] = [i]
else:
tmp[i['depth']].append(i)
tmp[i['depth']].sort(key=lambda x : x['number'])
max_depth = max(max_depth, i['depth'])
for i in range(max_depth+1):
res.append(tmp[i])
return res
def get_commit_order(insns):
insns = get_instr_by_depth(insns)
buffer = []
res = []
for i in insns:
buffer = buffer + i
buffer.sort(key=lambda x:x['number'])
tmp = [buffer[0]]
i = 1
while i < len(buffer) and buffer[i-1]['number']+1 == buffer[i]['number']:
tmp.append(buffer[i])
i += 1
res.append(tmp)
if i == len(buffer):
buffer = []
else:
buffer = buffer[i:]
return res
def get_reg_allocation(insns):
by_depth = get_instr_by_depth(insns)
res = []
for i in range(len(by_depth)):
if i == 0:
tmp = set()
else:
tmp = set(res[len(res) - 1])
for ins in by_depth[i]:
tmp.add(ins['src1'])
tmp.add(ins['src2'])
tmp.add(ins['dst'])
res.append(tmp)
return res
def count_reg_usage(insns, reg):
res = 0
for ins in insns:
if reg == ins['src1']:
res += 1
if reg == ins['src2']:
res += 1
if reg == ins['dst']:
res += 1
return res
def get_reg_free(insns):
commit_order = get_commit_order(insns)
not_commited = insns[:]
commited = []
res = []
for i in commit_order:
s = set()
if len(res) != 0:
s = set(res[len(res)-1])
for ins in i:
not_commited.remove(ins)
commited.append(ins)
for ins in i:
if count_reg_usage(not_commited, ins['src1']) == 0:
s.add(ins['src1'])
if count_reg_usage(not_commited, ins['src2']) == 0:
s.add(ins['src2'])
if count_reg_usage(not_commited, ins['dst']) == 0:
s.add(ins['dst'])
res.append(s)
return res
def get_reg_free2(insns):
for i,ins in enumerate(insns):
if ins['odst'] == ins['osrc1']:
ins['free'] = ins['src1']
continue
elif ins['odst'] == ins['osrc2']:
ins['free'] = ins['src2']
continue
elif i > 0:
for j in reversed(insns[:i]):
if j['odst'] == ins['odst']:
ins['free'] = j['dst']
break
elif j['osrc1'] == ins['odst']:
ins['free'] = j['src1']
break
elif j['osrc2'] == ins['odst']:
ins['free'] = j['src2']
break
commit_order = get_commit_order(insns)
res = []
for instructions in commit_order:
tmp = []
if len(res) != 0:
tmp = res[len(res) - 1][:]
for j in instructions:
if 'free' in j:
tmp.append(j['free'])
res.append(tmp)
return res
#return [[i['free'] for i in instctions if 'free' in i] for instctions in commit_order]
for i,ins in enumerate(insns):
ins['number'] = i
reg_free = get_reg_free2(insns)
reg_alloc = get_reg_allocation(insns)
mx = len(reg_alloc[0])
for i in range(1, len(reg_alloc)):
tmp = len(reg_alloc[i]) - len(reg_free[i-1])
mx = max(mx,tmp)
return mx
def main(insns):
# First, rename insns to remove false dependences.
for i in insns:
rename(i)
results = {}
# Compute latency (versus serial).
results['latency'] = compute_latency(insns)
results['serial latency'] = len(insns)
# Compute max machine width used (max number of insns that
# executed in parallel).
results['max machine width used'] = compute_max_width(insns)
# Compute max number of pregs used.
results['max pregs used'] = compute_max_pregs(insns)
return repr(results)
if __name__ == "__main__":
# Edit this line to run with different trace files (or pass a
# filename on the command name).
#filename = "/Users/svystoro/Downloads/hw3-ilp-scheduler/parallel-1tick-6width-7pregs.insns"
#filename = "/Users/svystoro/Downloads/hw3-ilp-scheduler/serial-6ticks-1width-3pregs.insns"
#filename = "/Users/svystoro/Downloads/hw3-ilp-scheduler/test-3ticks-1width-4pregs.insns"
#filename = "/Users/svystoro/Downloads/hw3-ilp-scheduler/test-3ticks-2width-5pregs.insns"
#filename = "/Users/svystoro/Downloads/hw3-ilp-scheduler/test-4ticks-2width-7pregs.insns"
#filename = "/Users/svystoro/Downloads/hw3-ilp-scheduler/random-3ticks-3width-4pregs.insns"
#filename = "/Users/svystoro/Downloads/hw3-ilp-scheduler/random-4ticks-2width-6pregs.insns"
filename = "/Users/svystoro/Downloads/hw3-ilp-scheduler/random-5ticks-4width-6pregs.insns"
if len(sys.argv) > 1:
filename = sys.argv[1]
pprint.pprint(main( eval(open(filename).read()) ))
# Uncomment this line to run with a random trace instead.
# pprint.pprint(main( gen_insns(5) ))
# This code below will dump a random trace of 5 insns to the
# terminal, so you can save it as a file and read it back in later.
# pprint.pprint(gen_insns(5))
[{'src2': 0, 'depth': 0, 'dst': 1, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 1, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 2, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 2, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 3, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 3, 'src1': 0, 'consumer_insns': []}]
[{'src2': 0, 'depth': 0, 'dst': 0, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 1, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 1, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 2, 'src1': 1, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 1, 'src1': 0, 'consumer_insns': []},
]
[{'src2': 0, 'depth': 0, 'dst': 0, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 7, 'src1': 0, 'consumer_insns': []},
{'src2': 7, 'depth': None, 'dst': 13, 'src1': 7, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 5, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 1, 'src1': 7, 'consumer_insns': []},
{'src2': 13, 'depth': None, 'dst': 11, 'src1': 1, 'consumer_insns': []}]
[{'src2': 0, 'depth': 0, 'dst': 0, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 1, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 1, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 2, 'src1': 1, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 2, 'src1': 1, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 2, 'src1': 2, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 3, 'src1': 2, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 2, 'src1': 1, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 2, 'src1': 1, 'consumer_insns': []},
]
[{'src2': 0, 'depth': 0, 'dst': 1, 'src1': 0, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 1, 'src1': 1, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 1, 'src1': 1, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 1, 'src1': 1, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 1, 'src1': 1, 'consumer_insns': []},
{'src2': 0, 'depth': None, 'dst': 1, 'src1': 1, 'consumer_insns': []}]
[{'dst': 1, 'src1': 0, 'src2': 0, 'depth': 0, 'consumer_insns': []},
{'dst': 2, 'src1': 1, 'src2': 0, 'depth': None, 'consumer_insns': []},
{'dst': 2, 'src1': 2, 'src2': 0, 'depth': None, 'consumer_insns': []},
#{'dst': 3, 'src1': 1, 'src2': 0, 'depth': None, 'consumer_insns': []},
]
[{'dst': 1, 'src1': 0, 'src2': 0, 'depth': 0, 'consumer_insns': []},
{'dst': 2, 'src1': 0, 'src2': 1, 'depth': None, 'consumer_insns': []},
{'dst': 2, 'src1': 0, 'src2': 2, 'depth': None, 'consumer_insns': []},
{'dst': 3, 'src1': 0, 'src2': 1, 'depth': None, 'consumer_insns': []},
]
[{'dst': 1, 'src1': 0, 'src2': 0, 'depth': 0, 'consumer_insns': []},
{'dst': 2, 'src1': 1, 'src2': 0, 'depth': None, 'consumer_insns': []},
{'dst': 3, 'src1': 2, 'src2': 1, 'depth': None, 'consumer_insns': []},
{'dst': 4, 'src1': 3, 'src2': 1, 'depth': None, 'consumer_insns': []},
# this write to 1 shouldn't clobber any of the uses of 1 above, even
# though they will execute after this insn does
{'dst': 1, 'src1': 0, 'src2': 0, 'depth': None, 'consumer_insns': []},
{'dst': 6, 'src1': 1, 'src2': 0, 'depth': None, 'consumer_insns': []},
]
import copy
import math
class CacheLine(object):
def __init__(self):
self.tag = 0
self.setnum = 0
self.valid = False
class Cache(object):
def __init__(self, bsize, assoc, size, vbsize=0):
self.bsize = bsize
self.assoc = assoc
self.size = size
self.num_sets = (size / self.assoc) / self.bsize
self.vbsize = vbsize
assert self.num_sets * self.assoc * self.bsize == size, \
"Cache size is not evenly divisible by block size " \
"and/or associativity!"
# bits used for block offset
self.block_offset_bits = int( math.log( self.bsize, 2 ) )
# bits used for index
self.index_bits = int( math.log( self.num_sets, 2 ) )
self.reset()
def has_victim_buffer(self):
return self.victim_buffer is not None
def setnum(self, address):
return (address >> self.block_offset_bits) % self.num_sets
def tag(self, address):
return address >> (self.block_offset_bits + self.index_bits)
def access(self, address):
"""Requests the given address from the cache. Returns True for
a hit and False for a miss.
"""
setnum = self.setnum( address )
tag = self.tag( address )
set = self.cache[ setnum ]
# search for our cache line
hits = [ cl for cl in set if cl.valid and cl.tag == tag ]
if len( hits ) > 0:
assert 1 == len( hits ), "Duplicate cache line!"
hit = hits[0]
# mark line as MRU
set.remove( hit )
set.append( hit )
return True
# we missed in the cache - find LRU line to evict
lru = set[0]
# check victim buffer
if self.victim_buffer is not None:
hits = [ cl for cl in self.victim_buffer
if cl.valid and cl.tag == tag and cl.setnum == setnum]
if len( hits ) > 0:
assert 1 == len( hits ), "Duplicate cache lines!"
# remove hit from victim buffer
self.victim_buffer.remove( hits[0] )
# evict LRU line to head of the victim buffer
self.victim_buffer.append( copy.deepcopy(lru) )
# move victim buffer hit back into cache by overwriting lru
lru.tag = tag
lru.setnum = setnum
lru.valid = True
# mark as MRU
set.remove( lru )
set.append( lru )
return True
# a true miss :-(
# evict LRU line to head of the victim buffer
if lru.valid and self.victim_buffer is not None:
self.victim_buffer.pop( 0 )
self.victim_buffer.append( copy.deepcopy(lru) )
pass
# fill with new line by overwriting lru
lru.tag = tag
lru.setnum = setnum
lru.valid = True
# mark as MRU
set.remove( lru )
set.append( lru )
return False
def reset(self):
"""Empty the cache and the victim buffer (if any)."""
# setup victim buffer, if we have one
self.victim_buffer = None
if self.vbsize > 0:
self.victim_buffer = []
for _ in range(self.vbsize):
self.victim_buffer.append( CacheLine() )
# setup cache itself
self.cache = []
for _ in range(self.num_sets):
set = []
for __ in range(self.assoc):
set.append( CacheLine() )
pass
self.cache.append( set )
import caches
import pprint
import math
MAX_BLOCK_SIZE = 32 # block size is always a power of 2
MAX_CACHE_SIZE = 2 ** 16 # cache size is always a power of 2
MAX_ASSOC = 8 # associativity is always a power of 2
MAX_VBUFFER_SIZE = 6 # victim buffer size is just a natural number
def find_block_size( cache ):
cache.reset()
cache.access(0)
for i in range(1, MAX_BLOCK_SIZE+1):
if not cache.access(i):
return i
return -1
def find_cache_size(cache, block_size):
sz = _find_cache_size(cache, block_size)
if not cache.has_victim_buffer():
return sz
else:
return sz - find_victim_buffer_size(cache, sz, block_size)[1]*block_size
def _find_cache_size(cache, block_size):
"""Returns total size (in bytes) of the cache."""
def test_hipotesys(cache, suggested_block_number, block_size):
cache.reset()
for i in range(suggested_block_number):
cache.access(i*block_size)
return cache.access(0)
prev = 1
for i in range(2, MAX_CACHE_SIZE/block_size + MAX_VBUFFER_SIZE+2):
if not test_hipotesys(cache=cache, suggested_block_number=i, block_size=block_size):
return prev*block_size
else:
prev = i
return -1
def find_associativity( cache, size, block_size ):
"""Returns associativity of the cache for a cache without a
victim buffer.
"""
#assert not cache.has_victim_buffer()
bs = find_block_size(cache)
size = _find_cache_size(cache, block_size)
prev = 1
for i in range(1, MAX_ASSOC + 1):
cache.reset()
for j in range(i):
cache.access(j*size)
if not cache.access(0):
return prev
else:
prev = i
return prev
def find_victim_buffer_size( cache, size, block_size ):
"""Returns a tuple of (associativity,victim buffer size) for a
cache with a victim buffer.
"""
assert cache.has_victim_buffer()
'''As her HW description:
1) We won't test your code on extremely onerous corner cases, such as fully-associative caches with a victim buffer,
or caches where the size of the victim buffer is equal to or larger than the cache itself.
2) The cache size (which does not count the size of the victim buffer), the associativity, and the block size will
always be powers of two.
Assuming that if buffer_size - victim_buffer_size = 2^N, and buffer_size > 2*victim_buffer_size, the following should
be true:
Log(buffer_size, base=2) - Log(buffer_size - victim_buffer_size, base=2) < 1, thus victim buffer size is
buffer_size - 2^int(Log(size, base=2))
'''
bs = find_block_size(cache)
size = _find_cache_size(cache, block_size)
vbs = (size - 2**int(math.log(size, 2)))/bs
size = size - vbs*bs
prev = 1
for i in range(1, MAX_ASSOC + 1 + vbs):
cache.reset()
for j in range(i):
cache.access(j*size)
if not cache.access(0):
break
else:
prev = i
return prev-vbs, vbs
def main(cache):
answers = {}
block_size = find_block_size(cache)
answers['block size'] = block_size
size = find_cache_size(cache, block_size)
answers['cache size'] = size
if cache.has_victim_buffer():
assoc,vb_size = find_victim_buffer_size(cache, size, block_size)
answers['victim buffer size'] = vb_size
answers['associativity'] = assoc
else:
answers['associativity'] = find_associativity(cache, size, block_size)
return answers
if __name__ == "__main__":
# test your code by creating different caches here
cache = caches.Cache( bsize=2, assoc=4, size=64, vbsize=None )
#cache = caches.Cache( bsize=2, assoc=1, size=64, vbsize=None ) # direct-mapped
#cache = caches.Cache( bsize=2, assoc=4, size=8, vbsize=None ) # fully-associative
#cache = caches.Cache( bsize=4, assoc=2, size=16, vbsize=1 ) # victim buffer
#cache = caches.Cache( bsize=4, assoc=1, size=16, vbsize=1 ) # direct-mapped + victim buffer
pprint.pprint(main(cache))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment