cachesim.py

#!/usr/bin/env python
#
# Simple cache simulator. Implements an N-way set-associative LRU cache, used
# for exploring the cache hit and miss rates when feeding in address traces
# from two programs, generated by the pinatrace Pin tool. These accesses are
# interleaved to simulate context switching between different threads.
#
# This program is compatible with Python 2.6.
#
# To generate an input file, run:
#     pin -t /path/to/pinatrace.so -- <executable>
#
# Good sample programs to try are those involving interesting data reference
# patterns, such as image processing programs or those working with matrices.
# Trace files around a gigabyte in size are relatively quick to work with.
#
# Author: Oleg Vaskevich (oleg@osv.im)

import argparse
from itertools import islice
import os
import sys


def enum(**enums):
    return type('Enum', (), enums)

Program = enum(ONE=0, TWO=1)


class CacheSimException(Exception):
    pass


class Partitioner(object):
    """Handles different methods of cache partitioning."""
    def get_set(self, size, program, address):
        """Returns the segment to which an address is mapped for a given
        program."""
        raise NotImplementedError('abstract')


class BasicPartitioner(Partitioner):
    """Does not perform any cache partitioning."""
    def get_set(self, size, program, address):
        return address % size


class StaticPartitioner(Partitioner):
    """Splits the cache into two equally-sized caches for each program."""
    def get_set(self, size, program, address):
        cset = address % (size / 2)
        if program == Program.TWO:
            cset += size / 2
        return cset


class DynamicPartitioner(Partitioner):
    """Allocates half of the cache to each program, but keeps track of which
    sets belong to which program."""
    def __init__(self, size):
        self.assign = [None] * size
        self.alloc = {Program.ONE: 0, Program.TWO: 0}

    def get_set(self, size, program, address):
        # Return the closest available slot, allocating if necessary.
        can_reserve = self.alloc[program] < size / 2

        for i in xrange(size):
            sign = 1 if i % 2 == 0 else -1
            cset = (address + sign * i / 2) % size
            if self.assign[cset] == program:
                break
            elif self.assign[cset] is None and can_reserve:
                self.assign[cset] = program
                self.alloc[program] += 1
                break
        else:
            raise RuntimeError('Could not find cache slot for address.')

        return cset


class CacheSim(object):
    """Main cache simulator class."""
    def __init__(self, files, interleave, cache_params):
        self.fa, self.fb = files
        self.interleave = interleave
        self.cache_pscheme, self.cache_size, self.cache_assoc = cache_params

        self.num_sets = self.cache_size / self.cache_assoc

        if self.cache_pscheme == 'STATIC':
            self.partitioner = StaticPartitioner()
        elif self.cache_pscheme == 'DYNAMIC':
            self.partitioner = DynamicPartitioner(self.num_sets)
        else:
            self.partitioner = BasicPartitioner()

        self.errors = self.total = self.hits = self.misses = 0

    def _trace_size(self, file):
        with open(file, 'r') as f:
            return sum(1 for line in f)

    def _eat_header(self, file):
        for x in xrange(3):
            file.readline()

    def _initialize(self):
        self.cache = [{} for _ in xrange(self.num_sets)]

    def _access(self, program, address):
        self.total += 1

        cset = self.partitioner.get_set(self.num_sets,
                                        program, address)

        # For simplicity, instead of tags we store the entire address in each
        # set.
        if address in self.cache[cset]:
            self.hits += 1
        else:
            self.misses += 1

            if len(self.cache[cset]) == self.cache_assoc:
                # Evict the LRU value.
                lru_address = max(self.cache[cset],
                                  key=lambda addr: self.cache[cset][addr])
                del self.cache[cset][lru_address]

            # Add the accessed address as the MRU.
            self.cache[cset][address] = 0

        # Increment age of all other addresses in the set.
        for cached_address in self.cache[cset]:
            if cached_address != address:
                self.cache[cset][cached_address] += 1

    def _update_progress(self, message, i, count, done=False):
        self.longest_message = max(self.longest_message, len(message))
        padding = ' ' * (self.longest_message - len(message))
        print '\r{0:>3.1%} {1} {2}'.format(i / float(count), message, padding),
        if done:
            sys.stdout.write('\n')
        else:
            sys.stdout.flush()

    def run(self):
        self._initialize()
        self.errors = self.total = self.hits = self.misses = 0

        line_i = line_count = 0
        self.longest_message = 0
        for f in (self.fa, self.fb):
            print 'Checking trace "{0}"...'.format(f),
            sys.stdout.flush()
            size = self._trace_size(f)
            line_count += size
            print ' {0:} lines'.format(size)

        with open(self.fa, 'r') as fa:
            with open(self.fb, 'r') as fb:
                self._update_progress('Initializing...', line_i, line_count)
                self._eat_header(fa)
                self._eat_header(fb)

                trace = fa
                diff = abs(len(fa.name) - len(fb.name))

                while True:
                    self._update_progress('Processing {0}'.format(trace.name),
                                          line_i, line_count)
                    sys.stdout.flush()

                    for i, line in enumerate(islice(trace, self.interleave)):
                        line_i += 1
                        try:
                            address = long(line.split()[2][2:], 16)
                        except (IndexError, ValueError):
                            self.errors += 1
                            continue

                        program = Program.ONE if trace is fa else Program.TWO
                        self._access(program, address)

                    if i < self.interleave - 1:
                        break
                    else:
                        trace = fa if trace is fb else fb

                self._update_progress('Done!', line_count, line_count, True)

    def get_stats(self):
        return self.errors, self.total, self.hits, self.misses


def main(args):
    cs = CacheSim((args.trace_a, args.trace_b), args.interleave,
                  (args.partitioning_scheme, args.size, args.associativity))
    try:
        cs.run()
        errors, total, hits, misses = cs.get_stats()

        print '\n=== Configuration ==='
        print '# of sets:    {0}'.format(cs.num_sets)
        print 'Blocks / set: {0}'.format(cs.cache_assoc)
        print 'Partitioner:  {0}'.format(type(cs.partitioner).__name__)

        print '\n=== Stats ==='
        print 'Hits:   {0:>9} ({1:>6.1%})'.format(hits, hits / float(total))
        print 'Misses: {0:>9} ({1:>6.1%})'.format(misses,
                                                  misses / float(total))
        print 'Total:  {0:>9}'.format(total)
        print 'Errors: {0:>9}'.format(errors)
    except CacheSimException as e:
        print 'Error: {0}'.format(e)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Simple cache simulator')
    parser.add_argument('trace_a')
    parser.add_argument('trace_b')
    parser.add_argument('--interleave', type=int, default=200000)
    parser.add_argument('--partitioning-scheme',
                        choices=['NONE', 'STATIC', 'DYNAMIC'])
    parser.add_argument('--size', type=int, default=2048)
    parser.add_argument('--associativity', type=int, choices=[1, 2, 4],
                        default=2)

    main(parser.parse_args())