-
Notifications
You must be signed in to change notification settings - Fork 2
/
cachesim.py
231 lines (182 loc) · 7.83 KB
/
cachesim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#!/usr/bin/env python
#
# Simple cache simulator. Implements an N-way set-associative LRU cache, used
# for exploring the cache hit and miss rates when feeding in address traces
# from two programs, generated by the pinatrace Pin tool. These accesses are
# interleaved to simulate context switching between different threads.
#
# This program is compatible with Python 2.6.
#
# To generate an input file, run:
# pin -t /path/to/pinatrace.so -- <executable>
#
# Good sample programs to try are those involving interesting data reference
# patterns, such as image processing programs or those working with matrices.
# Trace files around a gigabyte in size are relatively quick to work with.
#
# Author: Oleg Vaskevich ([email protected])
import argparse
from itertools import islice
import os
import sys
def enum(**enums):
return type('Enum', (), enums)
Program = enum(ONE=0, TWO=1)
class CacheSimException(Exception):
pass
class Partitioner(object):
"""Handles different methods of cache partitioning."""
def get_set(self, size, program, address):
"""Returns the segment to which an address is mapped for a given
program."""
raise NotImplementedError('abstract')
class BasicPartitioner(Partitioner):
"""Does not perform any cache partitioning."""
def get_set(self, size, program, address):
return address % size
class StaticPartitioner(Partitioner):
"""Splits the cache into two equally-sized caches for each program."""
def get_set(self, size, program, address):
cset = address % (size / 2)
if program == Program.TWO:
cset += size / 2
return cset
class DynamicPartitioner(Partitioner):
"""Allocates half of the cache to each program, but keeps track of which
sets belong to which program."""
def __init__(self, size):
self.assign = [None] * size
self.alloc = {Program.ONE: 0, Program.TWO: 0}
def get_set(self, size, program, address):
# Return the closest available slot, allocating if necessary.
can_reserve = self.alloc[program] < size / 2
for i in xrange(size):
sign = 1 if i % 2 == 0 else -1
cset = (address + sign * i / 2) % size
if self.assign[cset] == program:
break
elif self.assign[cset] is None and can_reserve:
self.assign[cset] = program
self.alloc[program] += 1
break
else:
raise RuntimeError('Could not find cache slot for address.')
return cset
class CacheSim(object):
"""Main cache simulator class."""
def __init__(self, files, interleave, cache_params):
self.fa, self.fb = files
self.interleave = interleave
self.cache_pscheme, self.cache_size, self.cache_assoc = cache_params
self.num_sets = self.cache_size / self.cache_assoc
if self.cache_pscheme == 'STATIC':
self.partitioner = StaticPartitioner()
elif self.cache_pscheme == 'DYNAMIC':
self.partitioner = DynamicPartitioner(self.num_sets)
else:
self.partitioner = BasicPartitioner()
self.errors = self.total = self.hits = self.misses = 0
def _trace_size(self, file):
with open(file, 'r') as f:
return sum(1 for line in f)
def _eat_header(self, file):
for x in xrange(3):
file.readline()
def _initialize(self):
self.cache = [{} for _ in xrange(self.num_sets)]
def _access(self, program, address):
self.total += 1
cset = self.partitioner.get_set(self.num_sets,
program, address)
# For simplicity, instead of tags we store the entire address in each
# set.
if address in self.cache[cset]:
self.hits += 1
else:
self.misses += 1
if len(self.cache[cset]) == self.cache_assoc:
# Evict the LRU value.
lru_address = max(self.cache[cset],
key=lambda addr: self.cache[cset][addr])
del self.cache[cset][lru_address]
# Add the accessed address as the MRU.
self.cache[cset][address] = 0
# Increment age of all other addresses in the set.
for cached_address in self.cache[cset]:
if cached_address != address:
self.cache[cset][cached_address] += 1
def _update_progress(self, message, i, count, done=False):
self.longest_message = max(self.longest_message, len(message))
padding = ' ' * (self.longest_message - len(message))
print '\r{0:>3.1%} {1} {2}'.format(i / float(count), message, padding),
if done:
sys.stdout.write('\n')
else:
sys.stdout.flush()
def run(self):
self._initialize()
self.errors = self.total = self.hits = self.misses = 0
line_i = line_count = 0
self.longest_message = 0
for f in (self.fa, self.fb):
print 'Checking trace "{0}"...'.format(f),
sys.stdout.flush()
size = self._trace_size(f)
line_count += size
print ' {0:} lines'.format(size)
with open(self.fa, 'r') as fa:
with open(self.fb, 'r') as fb:
self._update_progress('Initializing...', line_i, line_count)
self._eat_header(fa)
self._eat_header(fb)
trace = fa
diff = abs(len(fa.name) - len(fb.name))
while True:
self._update_progress('Processing {0}'.format(trace.name),
line_i, line_count)
sys.stdout.flush()
for i, line in enumerate(islice(trace, self.interleave)):
line_i += 1
try:
address = long(line.split()[2][2:], 16)
except (IndexError, ValueError):
self.errors += 1
continue
program = Program.ONE if trace is fa else Program.TWO
self._access(program, address)
if i < self.interleave - 1:
break
else:
trace = fa if trace is fb else fb
self._update_progress('Done!', line_count, line_count, True)
def get_stats(self):
return self.errors, self.total, self.hits, self.misses
def main(args):
cs = CacheSim((args.trace_a, args.trace_b), args.interleave,
(args.partitioning_scheme, args.size, args.associativity))
try:
cs.run()
errors, total, hits, misses = cs.get_stats()
print '\n=== Configuration ==='
print '# of sets: {0}'.format(cs.num_sets)
print 'Blocks / set: {0}'.format(cs.cache_assoc)
print 'Partitioner: {0}'.format(type(cs.partitioner).__name__)
print '\n=== Stats ==='
print 'Hits: {0:>9} ({1:>6.1%})'.format(hits, hits / float(total))
print 'Misses: {0:>9} ({1:>6.1%})'.format(misses,
misses / float(total))
print 'Total: {0:>9}'.format(total)
print 'Errors: {0:>9}'.format(errors)
except CacheSimException as e:
print 'Error: {0}'.format(e)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Simple cache simulator')
parser.add_argument('trace_a')
parser.add_argument('trace_b')
parser.add_argument('--interleave', type=int, default=200000)
parser.add_argument('--partitioning-scheme',
choices=['NONE', 'STATIC', 'DYNAMIC'])
parser.add_argument('--size', type=int, default=2048)
parser.add_argument('--associativity', type=int, choices=[1, 2, 4],
default=2)
main(parser.parse_args())