diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index 166a2e5..e9fdb6d 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -70,5 +70,13 @@ jobs: export PATH=$PATH:${{ steps.build_micropython.outputs.bin_dir }} export PATH=$PATH:${{ steps.build_binutils.outputs.bin_dir }} cd tests - ln -s ../binutils-esp32ulp # already cloned earlier. reuse. ./01_compat_tests.sh + + - name: Run compat tests with RTC macros + id: compat_rtc_tests + run: | + export PATH=$PATH:${{ steps.build_micropython.outputs.bin_dir }} + export PATH=$PATH:${{ steps.build_binutils.outputs.bin_dir }} + cd tests + ln -s ../binutils-esp32ulp # already cloned earlier. reuse. + ./02_compat_rtc_tests.sh diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..571f8ee --- /dev/null +++ b/AUTHORS @@ -0,0 +1,8 @@ +E-mail addresses listed here are not intended for support. + +py-esp32-ulp authors +-------------------- +py-esp32-ulp is written and maintained by Thomas Waldmann and various contributors: + +- Thomas Waldmann +- Wilko Nienhaus diff --git a/LICENSE b/LICENSE index 6fc734f..46bf124 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2018 Thomas Waldmann +Copyright 2018-2021 by the py-esp32-ulp authors, see AUTHORS file Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.rst b/README.rst index 3952878..2afa421 100644 --- a/README.rst +++ b/README.rst @@ -17,9 +17,18 @@ Status The most commonly used simple stuff should work. +Expressions in assembly source code are supported and get evaluated during +assembling. Only expressions evaluating to a single integer are supported. +Constants defined with ``.set`` are supported in expressions. + We have some unit tests and also compatibility tests that compare the output whether it is identical with binutils-esp32ulp output. +There is a simple preprocessor that understands just enough to allow assembling +ULP source files containing convenience macros such as WRITE_RTC_REG. The +preprocessor and how to use it is documented here: +`Preprocessor support `_. + There might be some stuff missing, some bugs and other symptoms of alpha software. Also, error and exception handling is rather rough yet. diff --git a/docs/preprocess.rst b/docs/preprocess.rst new file mode 100644 index 0000000..0716e69 --- /dev/null +++ b/docs/preprocess.rst @@ -0,0 +1,138 @@ +Preprocessor +--------------------- + +py-esp32-ulp contains a small preprocessor, which aims to fulfill one goal: +facilitate assembling of ULP code from Espressif and other open-source +projects to loadable/executable machine code without modification. + +Such code uses convenience macros (``READ_RTC_*`` and ``WRITE_RTC_*``) +provided by the ESP-IDF framework, along with constants defined in the +framework's include files (such as ``RTC_GPIO_IN_REG``), to make reading +and writing from/to peripheral registers much easier. + +In order to do this the preprocessor has two capabilities: + +1. Parse and replace identifiers defined with ``#define`` +2. Recognise the ``WRITE_RTC_*`` and ``READ_RTC_*`` macros and expand + them in a way that mirrors what the real ESP-IDF macros do. + + +Usage +------------------------ + +Normally the assembler is called as follows + +.. code-block:: python + + src = "..full assembler file contents" + assembler = Assembler() + assembler.assemble(src) + ... + +With the preprocessor, simply pass the source code via the preprocessor first: + +.. code-block:: python + + from preprocess import preprocess + + src = "..full assembler file contents" + src = preprocess(src) + assembler = Assembler() + assembler.assemble(src) + ... + + +Using a "Defines Database" +-------------------------- + +Because the py-esp32-ulp assembler was built for running on the ESP32 +microcontroller with limited RAM, the preprocessor aims to work there too. + +To handle large number of defined constants (such as the ``RTC_*`` constants from +the ESP-IDF) the preprocessor can use a database (based on BerkleyDB) stored on the +device's filesystem for looking up defines. + +The database needs to be populated before preprocessing. (Usually, when only using +constants from the ESP-IDF, this is a one-time step, because the include files +don't change.) The database can be reused for all subsequent preprocessor runs. + +(The database can also be generated on a PC and then deployed to the ESP32, to +save processing effort on the device. In that case the include files themselves +are not needed on the device either.) + +1. Build the defines database + + The ``esp32_ulp.parse_to_db`` tool can be used to generate the defines + database from include files. The resulting file will be called + ``defines.db``. + + (The following assume running on a PC. To do this on device, refer to the + `esp32_ulp/parse_to_db.py <../esp32_ulp/parse_to_db.py>`_ file.) + + .. code-block:: bash + + # general command + micropython -m esp32_ulp.parse_to_db path/to/include.h + + # loading specific ESP-IDF include files + micropython -m esp32_ulp.parse_to_db esp-idf/components/soc/esp32/include/soc/soc_ulp.h + + # loading multiple files at once + micropython -m esp32_ulp.parse_to_db esp-idf/components/soc/esp32/include/soc/*.h + + # if file system space is not a concern, the following can be convenient + # by including all relevant include files from the ESP-IDF framework. + # This results in an approximately 2MB large database. + micropython -m esp32_ulp.parse_to_db \ + esp-idf/components/soc/esp32/include/soc/*.h \ + esp-idf/components/esp_common/include/*.h + + # most ULP code uses only 5 include files. Parsing only those into the + # database should thus allow assembling virtually all ULP code one would + # find or want to write. + # This results in an approximately 250kB large database. + micropython -m esp32_ulp.parse_to_db \ + esp-idf/components/soc/esp32/include/soc/{soc,soc_ulp,rtc_cntl_reg,rtc_io_reg,sens_reg}.h + +2. Using the defines database during preprocessing + + The preprocessor will automatically use a defines database, when using the + ``preprocess.preprocess`` convenience function, even when the database does + not exist (an absent database is treated like an empty database, and care + is taken not to create an empty database file, cluttering up the filesystem, + when not needed). + + If you do not want the preprocessor use use a DefinesDB, pass ``False`` to + the ``use_defines_db`` argument of the ``preprocess`` convenience function, + or instantiate the ``Preprocessor`` class directly, without passing it a + DefinesDB instance via ``use_db``. + +Design choices +-------------- + +The preprocessor does not support: + +1. Function style macros such as :code:`#define f(a,b) (a+b)` + + This is not important, because there are only few RTC macros that need + to be supported and they are simply implemented as Python functions. + + Since the preprocessor will understand ``#define`` directives directly in the + assembler source file, include mechanisms are not needed in some cases + (simply copying the needed ``#define`` statements from include files into the + assembler source will work). + +2. ``#include`` directives + + The preprocessor does not currently follow ``#include`` directives. To + limit space requirements (both in memory and on the filesystem), the + preprocessor relies on a database of defines (key/value pairs). This + database should be populated before using the preprocessor, by using the + ``esp32_ulp.parse_to_db`` tool (see section above), which parses include + files for identifiers defined therein. + +3. Preserving comments + + The assumption is that the output will almost always go into the + assembler directly, so preserving comments is not very useful and + would add a lot of complexity. diff --git a/esp32_ulp/__main__.py b/esp32_ulp/__main__.py index 584a3dd..209656f 100644 --- a/esp32_ulp/__main__.py +++ b/esp32_ulp/__main__.py @@ -2,6 +2,7 @@ from .util import garbage_collect +from .preprocess import preprocess from .assemble import Assembler from .link import make_binary garbage_collect('after import') @@ -9,7 +10,8 @@ def src_to_binary(src): assembler = Assembler() - assembler.assemble(src) + src = preprocess(src) + assembler.assemble(src, remove_comments=False) # comments already removed by preprocessor garbage_collect('before symbols export') addrs_syms = assembler.symbols.export() for addr, sym in addrs_syms: diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index d0b1ff2..e348363 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -3,7 +3,7 @@ """ from . import opcodes -from .nocomment import remove_comments +from .nocomment import remove_comments as do_remove_comments from .util import garbage_collect TEXT, DATA, BSS = 'text', 'data', 'bss' @@ -12,13 +12,10 @@ class SymbolTable: - def __init__(self, symbols, bases): + def __init__(self, symbols, bases, globals): self._symbols = symbols self._bases = bases - self._pass = None - - def set_pass(self, _pass): - self._pass = _pass + self._globals = globals def set_bases(self, bases): self._bases = bases @@ -32,38 +29,28 @@ def get_from(self): def set_sym(self, symbol, stype, section, value): entry = (stype, section, value) if symbol in self._symbols and entry != self._symbols[symbol]: - raise Exception('redefining symbol %s with different value %r -> %r.' % (label, self._symbols[symbol], entry)) + raise Exception('redefining symbol %s with different value %r -> %r.' % (symbol, self._symbols[symbol], entry)) self._symbols[symbol] = entry def has_sym(self, symbol): return symbol in self._symbols def get_sym(self, symbol): - try: - entry = self._symbols[symbol] - except KeyError: - if self._pass == 1: - entry = (REL, TEXT, 0) # for a dummy, this is good enough - else: - raise + entry = self._symbols[symbol] return entry def dump(self): for symbol, entry in self._symbols.items(): print(symbol, entry) - def export(self): - addrs_syms = [(self.resolve_absolute(entry), symbol) for symbol, entry in self._symbols.items()] + def export(self, incl_non_globals=False): + addrs_syms = [(self.resolve_absolute(entry), symbol) + for symbol, entry in self._symbols.items() + if incl_non_globals or symbol in self._globals] return sorted(addrs_syms) def to_abs_addr(self, section, offset): - try: - base = self._bases[section] - except KeyError: - if self._pass == 1: - base = 0 # for a dummy this is good enough - else: - raise + base = self._bases[section] return base + offset def resolve_absolute(self, symbol): @@ -93,16 +80,19 @@ def resolve_relative(self, symbol): from_addr = self.to_abs_addr(self._from_section, self._from_offset) return sym_addr - from_addr + def set_global(self, symbol): + self._globals[symbol] = True + pass + class Assembler: - def __init__(self, symbols=None, bases=None): - self.symbols = SymbolTable(symbols or {}, bases or {}) + def __init__(self, symbols=None, bases=None, globals=None): + self.symbols = SymbolTable(symbols or {}, bases or {}, globals or {}) opcodes.symbols = self.symbols # XXX dirty hack def init(self, a_pass): self.a_pass = a_pass - self.symbols.set_pass(a_pass) self.sections = dict(text=[], data=[]) self.offsets = dict(text=0, data=0, bss=0) self.section = TEXT @@ -118,7 +108,7 @@ def parse_line(self, line): """ if not line: return - has_label = line[0] not in '\t ' + has_label = line[0] not in '\t .' if has_label: label_line = line.split(None, 1) if len(label_line) == 2: @@ -150,8 +140,10 @@ def append_section(self, value, expected_section=None): if expected_section is not None and s is not expected_section: raise TypeError('only allowed in %s section' % expected_section) if s is BSS: - # just increase BSS size by value - self.offsets[s] += value + if int.from_bytes(value, 'little') != 0: + raise ValueError('attempt to store non-zero value in section .bss') + # just increase BSS size by length of value + self.offsets[s] += len(value) else: self.sections[s].append(value) self.offsets[s] += len(value) @@ -231,9 +223,12 @@ def d_align(self, align=4, fill=None): self.fill(self.section, amount, fill) def d_set(self, symbol, expr): - value = int(expr) # TODO: support more than just integers + value = int(opcodes.eval_arg(expr)) self.symbols.set_sym(symbol, ABS, None, value) + def d_global(self, symbol): + self.symbols.set_global(symbol) + def append_data(self, wordlen, args): data = [int(arg).to_bytes(wordlen, 'little') for arg in args] self.append_section(b''.join(data)) @@ -245,6 +240,11 @@ def d_word(self, *args): self.append_data(2, args) def d_long(self, *args): + self.d_int(*args) + + def d_int(self, *args): + # .long and .int are identical as per GNU assembler documentation + # https://sourceware.org/binutils/docs/as/Long.html self.append_data(4, args) def assembler_pass(self, lines): @@ -263,16 +263,22 @@ def assembler_pass(self, lines): continue else: # machine instruction - func = getattr(opcodes, 'i_' + opcode, None) + func = getattr(opcodes, 'i_' + opcode.lower(), None) if func is not None: - instruction = func(*args) + # during the first pass, symbols are not all known yet. + # so some expressions may not evaluate to something (yet). + # instruction building requires sane arguments however. + # since all instructions are 4 bytes long, we simply skip + # building instructions during pass 1, and append an "empty + # instruction" to the section to get the right section size. + instruction = 0 if self.a_pass == 1 else func(*args) self.append_section(instruction.to_bytes(4, 'little'), TEXT) continue - raise Exception('Unknown opcode or directive: %s' % opcode) + raise ValueError('Unknown opcode or directive: %s' % opcode) self.finalize_sections() - def assemble(self, text): - lines = remove_comments(text) + def assemble(self, text, remove_comments=True): + lines = do_remove_comments(text) if remove_comments else text.splitlines() self.init(1) # pass 1 is only to get the symbol table right self.assembler_pass(lines) self.symbols.set_bases(self.compute_bases()) diff --git a/esp32_ulp/definesdb.py b/esp32_ulp/definesdb.py new file mode 100644 index 0000000..4a05459 --- /dev/null +++ b/esp32_ulp/definesdb.py @@ -0,0 +1,78 @@ +import os +import btree +from .util import file_exists + +DBNAME = 'defines.db' + + +class DefinesDB: + def __init__(self): + self._file = None + self._db = None + self._db_exists = None + + def clear(self): + self.close() + try: + os.remove(DBNAME) + self._db_exists = False + except OSError: + pass + + def is_open(self): + return self._db is not None + + def open(self): + if self.is_open(): + return + try: + self._file = open(DBNAME, 'r+b') + except OSError: + self._file = open(DBNAME, 'w+b') + self._db = btree.open(self._file) + self._db_exists = True + + def close(self): + if not self.is_open(): + return + self._db.close() + self._db = None + self._file.close() + self._file = None + + def db_exists(self): + if self._db_exists is None: + self._db_exists = file_exists(DBNAME) + return self._db_exists + + def update(self, dictionary): + for k, v in dictionary.items(): + self.__setitem__(k, v) + + def get(self, key, default): + try: + result = self.__getitem__(key) + except KeyError: + result = default + return result + + def keys(self): + if not self.db_exists(): + return [] + + self.open() + return [k.decode() for k in self._db.keys()] + + def __getitem__(self, key): + if not self.db_exists(): + raise KeyError + + self.open() + return self._db[key.encode()].decode() + + def __setitem__(self, key, value): + self.open() + self._db[key.encode()] = str(value).encode() + + def __iter__(self): + return iter(self.keys()) diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py index 4e2ca04..103b1f7 100644 --- a/esp32_ulp/opcodes.py +++ b/esp32_ulp/opcodes.py @@ -6,6 +6,7 @@ from uctypes import struct, addressof, LITTLE_ENDIAN, UINT32, BFUINT32, BF_POS, BF_LEN from .soc import * +from .util import split_tokens, validate_expression # XXX dirty hack: use a global for the symbol table symbols = None @@ -15,6 +16,7 @@ OPCODE_WR_REG = 1 OPCODE_RD_REG = 2 +DR_REG_MAX_DIRECT = 0x3ff RD_REG_PERIPH_RTC_CNTL = 0 RD_REG_PERIPH_RTC_IO = 1 RD_REG_PERIPH_SENS = 2 @@ -112,7 +114,7 @@ def make_ins(layout): unused : 8 # Unused low : 5 # Low bit high : 5 # High bit - opcode : 4 # Opcode (OPCODE_WR_REG) + opcode : 4 # Opcode (OPCODE_RD_REG) """) @@ -267,6 +269,20 @@ def make_ins(layout): ARG = namedtuple('ARG', ('type', 'value', 'raw')) +def eval_arg(arg): + parts = [] + for token in split_tokens(arg): + if symbols.has_sym(token): + _, _, sym_value = symbols.get_sym(token) + parts.append(str(sym_value)) + else: + parts.append(token) + parts = "".join(parts) + if not validate_expression(parts): + raise ValueError('Unsupported expression: %s' % parts) + return eval(parts) + + def arg_qualify(arg): """ look at arg and qualify its type: @@ -289,8 +305,12 @@ def arg_qualify(arg): return ARG(IMM, int(arg), arg) except ValueError: pass - entry = symbols.get_sym(arg) - return ARG(SYM, entry, arg) + try: + entry = symbols.get_sym(arg) + except KeyError: + return ARG(IMM, int(eval_arg(arg)), arg) + else: + return ARG(SYM, entry, arg) def get_reg(arg): @@ -334,8 +354,9 @@ def get_cond(arg): def _soc_reg_to_ulp_periph_sel(reg): # Map SoC peripheral register to periph_sel field of RD_REG and WR_REG instructions. - ret = 3 - if reg < DR_REG_RTCCNTL_BASE: + if reg < DR_REG_MAX_DIRECT: + ret = RD_REG_PERIPH_RTC_CNTL + elif reg < DR_REG_RTCCNTL_BASE: raise ValueError("invalid register base") elif reg < DR_REG_RTCIO_BASE: ret = RD_REG_PERIPH_RTC_CNTL @@ -352,7 +373,10 @@ def _soc_reg_to_ulp_periph_sel(reg): def i_reg_wr(reg, high_bit, low_bit, val): reg = get_imm(reg) - _wr_reg.addr = (reg & 0xff) >> 2 + if reg < DR_REG_MAX_DIRECT: # see https://github.com/espressif/binutils-esp32ulp/blob/master/gas/config/tc-esp32ulp_esp32.c + _wr_reg.addr = reg + else: + _wr_reg.addr = (reg & 0xff) >> 2 _wr_reg.periph_sel = _soc_reg_to_ulp_periph_sel(reg) _wr_reg.data = get_imm(val) _wr_reg.low = get_imm(low_bit) @@ -363,7 +387,10 @@ def i_reg_wr(reg, high_bit, low_bit, val): def i_reg_rd(reg, high_bit, low_bit): reg = get_imm(reg) - _rd_reg.addr = (reg & 0xff) >> 2 + if reg < DR_REG_MAX_DIRECT: # see https://github.com/espressif/binutils-esp32ulp/blob/master/gas/config/tc-esp32ulp_esp32.c + _rd_reg.addr = reg + else: + _rd_reg.addr = (reg & 0xff) >> 2 _rd_reg.periph_sel = _soc_reg_to_ulp_periph_sel(reg) _rd_reg.unused = 0 _rd_reg.low = get_imm(low_bit) @@ -463,7 +490,7 @@ def i_move(reg_dest, reg_imm_src): if src.type == REG: _alu_reg.dreg = dest _alu_reg.sreg = src.value - _alu_reg.treg = 1 # XXX undocumented, this is the value binutils-esp32 uses + _alu_reg.treg = src.value # XXX undocumented, this is the value binutils-esp32 uses _alu_reg.unused = 0 _alu_reg.sel = ALU_SEL_MOV _alu_reg.sub_opcode = SUB_OPCODE_ALU_REG diff --git a/esp32_ulp/parse_to_db.py b/esp32_ulp/parse_to_db.py new file mode 100644 index 0000000..ac61f98 --- /dev/null +++ b/esp32_ulp/parse_to_db.py @@ -0,0 +1,23 @@ +import sys + +from .preprocess import Preprocessor +from .definesdb import DefinesDB + + +def parse(files): + db = DefinesDB() + + p = Preprocessor() + p.use_db(db) + + for f in files: + print('Processing file:', f) + + p.process_include_file(f) + + print('Done.') + + +if __name__ == '__main__': + parse(sys.argv[1:]) + diff --git a/esp32_ulp/preprocess.py b/esp32_ulp/preprocess.py new file mode 100644 index 0000000..03a9317 --- /dev/null +++ b/esp32_ulp/preprocess.py @@ -0,0 +1,156 @@ +from . import nocomment +from .util import split_tokens +from .definesdb import DefinesDB + + +class RTC_Macros: + @staticmethod + def READ_RTC_REG(rtc_reg, low_bit, bit_width): + return '\treg_rd ' + ', '.join(( + rtc_reg, + '%s + %s - 1' % (low_bit, bit_width), + low_bit + )) + + @staticmethod + def WRITE_RTC_REG(rtc_reg, low_bit, bit_width, value): + return '\treg_wr ' + ', '.join(( + rtc_reg, + '%s + %s - 1' % (low_bit, bit_width), + low_bit, + value + )) + + @staticmethod + def READ_RTC_FIELD(rtc_reg, low_bit): + return RTC_Macros.READ_RTC_REG(rtc_reg, low_bit, 1) + + @staticmethod + def WRITE_RTC_FIELD(rtc_reg, low_bit, value): + return RTC_Macros.WRITE_RTC_REG(rtc_reg, low_bit, 1, value + ' & 1') + + +class Preprocessor: + def __init__(self): + self._defines_db = None + self._defines = {} + + def parse_define_line(self, line): + line = line.strip() + if not line.startswith("#define"): + # skip lines not containing #define + return {} + line = line[8:].strip() # remove #define + parts = line.split(None, 1) + if len(parts) != 2: + # skip defines without value + return {} + identifier, value = parts + tmp = identifier.split('(', 1) + if len(tmp) == 2: + # skip parameterised defines (macros) + return {} + value = "".join(nocomment.remove_comments(value)).strip() + return {identifier: value} + + def parse_defines(self, content): + for line in content.splitlines(): + self._defines.update(self.parse_define_line(line)) + + return self._defines + + def expand_defines(self, line): + found = True + while found: # do as many passed as needed, until nothing was replaced anymore + found = False + tokens = split_tokens(line) + line = "" + for t in tokens: + lu = self._defines.get(t, t) + if lu == t and self._defines_db: + lu = self._defines_db.get(t, t) + if lu == t and t == 'BIT': + # Special hack: BIT(..) translates to a 32-bit mask where only the specified bit is set. + # But the reg_wr and reg_rd opcodes expect actual bit numbers for argument 2 and 3. + # While the real READ_RTC_*/WRITE_RTC_* macros take in the output of BIT(x), they + # ultimately convert these back (via helper macros) to the bit number (x). And since this + # preprocessor does not (aim to) implement "proper" macro-processing, we can simply + # short-circuit this round-trip via macros and replace "BIT" with nothing so that + # "BIT(x)" gets mapped to "(x)". + continue + if lu != t: + found = True + line += lu + + return line + + def process_include_file(self, filename): + with self.open_db() as db: + with open(filename, 'r') as f: + for line in f: + result = self.parse_define_line(line) + db.update(result) + + return db + + def expand_rtc_macros(self, line): + clean_line = line.strip() + if not clean_line: + return line + + macro = clean_line.split('(', 1) + if len(macro) != 2: + return line + + macro_name, macro_args = macro + + macro_fn = getattr(RTC_Macros, macro_name, None) + if macro_fn is None: + return line + + macro_args, _ = macro_args.rsplit(')', 1) # trim away right bracket. safe as comments already stripped + macro_args = macro_args.split(',') # not safe when args contain ',' but we should not have those + macro_args = [x.strip() for x in macro_args] + + return macro_fn(*macro_args) + + def use_db(self, defines_db): + self._defines_db = defines_db + + def open_db(self): + class ctx: + def __init__(self, db): + self._db = db + + def __enter__(self): + # not opening DefinesDB - it opens itself when needed + return self._db + + def __exit__(self, type, value, traceback): + if isinstance(self._db, DefinesDB): + self._db.close() + + if self._defines_db: + return ctx(self._defines_db) + + return ctx(self._defines) + + def preprocess(self, content): + self.parse_defines(content) + + with self.open_db(): + lines = nocomment.remove_comments(content) + result = [] + for line in lines: + line = self.expand_defines(line) + line = self.expand_rtc_macros(line) + result.append(line) + result = "\n".join(result) + + return result + + +def preprocess(content, use_defines_db=True): + preprocessor = Preprocessor() + preprocessor.use_db(DefinesDB()) + return preprocessor.preprocess(content) diff --git a/esp32_ulp/util.py b/esp32_ulp/util.py index c184414..d79c538 100644 --- a/esp32_ulp/util.py +++ b/esp32_ulp/util.py @@ -1,6 +1,9 @@ DEBUG = False import gc +import os + +NORMAL, WHITESPACE = 0, 1 def garbage_collect(msg, verbose=DEBUG): @@ -9,3 +12,68 @@ def garbage_collect(msg, verbose=DEBUG): free_after = gc.mem_free() if verbose: print("%s: %d --gc--> %d bytes free" % (msg, free_before, free_after)) + + +def split_tokens(line): + buf = "" + tokens = [] + state = NORMAL + for c in line: + if c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_": + if state != NORMAL: + if len(buf) > 0: + tokens.append(buf) + buf = "" + state = NORMAL + buf += c + elif c in " \t": + if state != WHITESPACE: + if len(buf) > 0: + tokens.append(buf) + buf = "" + state = WHITESPACE + buf += c + else: + if len(buf) > 0: + tokens.append(buf) + buf = "" + tokens.append(c) + + if len(buf) > 0: + tokens.append(buf) + + return tokens + + +def validate_expression(param): + for token in split_tokens(param): + state = 0 + for c in token: + if c not in ' \t+-*/%()<>&|~x0123456789abcdef': + return False + + # the following allows hex digits a-f after 0x but not otherwise + if state == 0: + if c in 'abcdef': + return False + if c == '0': + state = 1 + continue + + if state == 1: + state = 2 if c == 'x' else 0 + continue + + if state == 2: + if c not in '0123456789abcdef': + state = 0 + return True + + +def file_exists(filename): + try: + os.stat(filename) + return True + except OSError: + pass + return False diff --git a/tests/00_unit_tests.sh b/tests/00_unit_tests.sh index 07d221f..ee1a239 100755 --- a/tests/00_unit_tests.sh +++ b/tests/00_unit_tests.sh @@ -4,7 +4,7 @@ set -e -for file in opcodes assemble link ; do +for file in opcodes assemble link util preprocess definesdb; do echo testing $file... micropython $file.py done diff --git a/tests/01_compat_tests.sh b/tests/01_compat_tests.sh index c565aa1..68f8bdc 100755 --- a/tests/01_compat_tests.sh +++ b/tests/01_compat_tests.sh @@ -13,12 +13,14 @@ for src_file in $(ls -1 compat/*.S); do log_file="${src_name}.log" micropython -m esp32_ulp $src_file 1>$log_file # generates $ulp_file + pre_file="${src_name}.pre" obj_file="${src_name}.o" elf_file="${src_name}.elf" bin_file="${src_name}.bin" echo -e "\tBuilding using binutils" - esp32ulp-elf-as -o $obj_file $src_file + gcc -E -o ${pre_file} $src_file + esp32ulp-elf-as -o $obj_file ${pre_file} esp32ulp-elf-ld -T esp32.ulp.ld -o $elf_file $obj_file esp32ulp-elf-objcopy -O binary $elf_file $bin_file diff --git a/tests/02_compat_rtc_tests.sh b/tests/02_compat_rtc_tests.sh new file mode 100755 index 0000000..2904ee6 --- /dev/null +++ b/tests/02_compat_rtc_tests.sh @@ -0,0 +1,118 @@ +#!/bin/bash + +# export PYTHONPATH=.:$PYTHONPATH + +set -e + +make_log_dir() { + mkdir -p log +} + +fetch_esp_idf() { + [ -d esp-idf ] && return + + echo "Fetching esp-idf" + log_file=log/fetch-esp-idf.log + git clone --depth 1 \ + https://github.com/espressif/esp-idf.git 1>$log_file 2>&1 +} + +fetch_ulptool_examples() { + [ -d ulptool ] && return + + echo "Fetching ulptool examples" + log_file=log/fetch-ulptool.log + git clone --depth 1 \ + https://github.com/duff2013/ulptool 1>$log_file 2>&1 +} + +fetch_binutils_esp32ulp_examples() { + [ -d binutils-esp32ulp ] && return + + echo "Fetching binutils-esp32ulp examples" + log_file=log/fetch-binutils.log + git clone --depth 1 \ + https://github.com/espressif/binutils-esp32ulp.git 1>$log_file 2>&1 +} + +build_defines_db() { + local defines_db=defines.db + + if [ "$1" = "-r" ] && [ -s "${defines_db}" ]; then + # reuse existing defines.db + return + fi + + echo "Building defines DB from include files" + log_file=log/build_defines_db.log + rm -f "${defines_db}" + micropython -m esp32_ulp.parse_to_db \ + esp-idf/components/soc/esp32/include/soc/*.h \ + esp-idf/components/esp_common/include/*.h 1>$log_file +} + +make_log_dir +fetch_esp_idf +fetch_ulptool_examples +fetch_binutils_esp32ulp_examples +build_defines_db $1 + +for src_file in ulptool/src/ulp_examples/*/*.s binutils-esp32ulp/gas/testsuite/gas/esp32ulp/esp32/*.s; do + + src_name="${src_file%.s}" + + echo "Testing $src_file" + + test_name="${src_name##*/}" + + # for now, skip files that contain known bugs in esp32_ulp (essentially a todo list of what to fix) + for I in esp32ulp_all esp32ulp_globals esp32ulp_jumpr esp32ulp_ranges test_reg; do + if [ "${test_name}" = "$I" ]; then + # these are old bugs, and not related to the RTC macro handling functionality + # they will still be great to fix over time + echo -e "\tSkipping... known bugs in esp32_ulp" + continue 2 + fi + done + + # for now, skip files that contain unsupported things (macros) + for I in i2c i2c_dev stack i2c_wr test1 test_jumpr test_macro; do + if [ "${test_name}" = "$I" ]; then + echo -e "\tSkipping... not yet supported" + continue 2 + fi + done + + echo -e "\tBuilding using py-esp32-ulp" + ulp_file="${src_name}.ulp" + log_file="${src_name}.log" + micropython -m esp32_ulp $src_file 1>$log_file # generates $ulp_file + + pre_file="${src_name}.pre" + obj_file="${src_name}.o" + elf_file="${src_name}.elf" + bin_file="${src_name}.bin" + + echo -e "\tBuilding using binutils" + gcc -I esp-idf/components/soc/esp32/include -I esp-idf/components/esp_common/include \ + -x assembler-with-cpp \ + -E -o ${pre_file} $src_file + esp32ulp-elf-as -o $obj_file ${pre_file} + esp32ulp-elf-ld -T esp32.ulp.ld -o $elf_file $obj_file + esp32ulp-elf-objcopy -O binary $elf_file $bin_file + + if ! diff $ulp_file $bin_file 1>/dev/null; then + echo -e "\tBuild outputs differ!" + echo "" + echo "Compatibility test failed for $src_file" + echo "py-esp32-ulp log:" + cat $log_file + echo "py-esp32-ulp output:" + xxd $ulp_file + echo "binutils output:" + xxd $bin_file + exit 1 + else + echo -e "\tBuild outputs match" + fi +done diff --git a/tests/assemble.py b/tests/assemble.py index 3875ee0..e607ba2 100644 --- a/tests/assemble.py +++ b/tests/assemble.py @@ -3,25 +3,54 @@ from esp32_ulp.nocomment import remove_comments src = """\ + .set const, 123 +.set const_left, 976 start: wait 42 ld r0, r1, 0 st r0, r1,0 halt end: +.data +""" + +src_bss = """\ + .bss + +label: + .long 0 +""" + + +src_global = """\ + + .global counter +counter: + .long 0 + +internal: + .long 0 + + .text + .global entry +entry: + wait 42 + halt """ def test_parse_line(): a = Assembler() - lines = src.splitlines() - # note: line number = index + 1 - assert a.parse_line(lines[0]) == None - assert a.parse_line(lines[1]) == ('start', 'wait', ('42', )) - assert a.parse_line(lines[2]) == (None, 'ld', ('r0', 'r1', '0', )) - assert a.parse_line(lines[3]) == (None, 'st', ('r0', 'r1', '0', )) - assert a.parse_line(lines[4]) == (None, 'halt', ()) - assert a.parse_line(lines[5]) == ('end', None, ()) + lines = iter(src.splitlines()) + assert a.parse_line(next(lines)) == (None, '.set', ('const', '123', )) + assert a.parse_line(next(lines)) == (None, '.set', ('const_left', '976', )) + assert a.parse_line(next(lines)) == None + assert a.parse_line(next(lines)) == ('start', 'wait', ('42', )) + assert a.parse_line(next(lines)) == (None, 'ld', ('r0', 'r1', '0', )) + assert a.parse_line(next(lines)) == (None, 'st', ('r0', 'r1', '0', )) + assert a.parse_line(next(lines)) == (None, 'halt', ()) + assert a.parse_line(next(lines)) == ('end', None, ()) + assert a.parse_line(next(lines)) == (None, '.data', ()) # test left-aligned directive is not treated as label def test_parse(): @@ -34,8 +63,12 @@ def test_parse(): def test_assemble(): a = Assembler() a.assemble(src) + assert a.symbols.has_sym('const') + assert a.symbols.has_sym('const_left') assert a.symbols.has_sym('start') assert a.symbols.has_sym('end') + assert a.symbols.get_sym('const') == (ABS, None, 123) + assert a.symbols.get_sym('const_left') == (ABS, None, 976) assert a.symbols.get_sym('start') == (REL, TEXT, 0) assert a.symbols.get_sym('end') == (REL, TEXT, 4) assert len(b''.join(a.sections[TEXT])) == 16 # 4 instructions * 4B @@ -43,33 +76,151 @@ def test_assemble(): assert a.offsets[BSS] == 0 +def test_assemble_bss(): + a = Assembler() + try: + a.assemble(src_bss) + except TypeError: + raised = True + else: + raised = False + assert not raised + assert a.offsets[BSS] == 4 # 1 word * 4B + + +def test_assemble_bss_with_value(): + lines = """\ +.bss + .long 3 #non-zero value not allowed in bss section +""" + + a = Assembler() + try: + a.assemble(lines) + except ValueError as e: + if str(e) != "attempt to store non-zero value in section .bss": + raise # re-raise failures we didn't expect + raised = True + else: + raised = False + + assert raised + + +def test_assemble_global(): + a = Assembler() + a.assemble(src_global) + assert a.symbols.has_sym('counter') + assert a.symbols.has_sym('internal') + assert a.symbols.has_sym('entry') + + exported_symbols = a.symbols.export() + assert exported_symbols == [(0, 'counter'), (2, 'entry')] # internal not exported + + exported_symbols = a.symbols.export(True) # include non-global symbols + assert exported_symbols == [(0, 'counter'), (1, 'internal'), (2, 'entry')] + + +def test_assemble_uppercase_opcode(): + a = Assembler() + try: + a.assemble(" WAIT 42") + except ValueError as e: + if str(e) != "Unknown opcode or directive: WAIT": + # re-raise failures we didn't expect + raise + raised = True + else: + raised = False + assert not raised + + +def test_assemble_evalulate_expressions(): + src_w_expr = """\ + .set shft, 2 + .set loops, (1 << shft) + +entry: + move r0, 1+1 + move r1, loops + move r2, (shft + 10) * 2 + move r3, entry << 2 +""" + a = Assembler() + a.assemble(src_w_expr) + + assert a.symbols.has_sym('shft') + assert a.symbols.has_sym('loops') + assert a.symbols.has_sym('entry') + assert a.symbols.get_sym('shft') == (ABS, None, 2) + assert a.symbols.get_sym('loops') == (ABS, None, 4) + assert a.symbols.get_sym('entry') == (REL, TEXT, 0) + + +def test_assemble_optional_comment_removal(): + line = " move r1, 123 # comment" + + a = Assembler() + + # first assemble as normal (comments will be removed by default) + a.assemble(line) + + # now assemble with comment removal disabled + try: + a.assemble(line, remove_comments=False) + except ValueError as e: + raised = True + else: + raised = False + assert raised + + +def test_assemble_test_regressions_from_evaluation(): + line = " reg_wr (0x3ff48400 + 0x10), 1, 1, 1" + + a = Assembler() + raised = False + try: + a.assemble(line) + except ValueError as e: + if str(e) == 'invalid register base': # ensure we trapped the expected Exception + raised = True + assert not raised + + def test_symbols(): - st = SymbolTable({}, {}) + st = SymbolTable({}, {}, {}) for entry in [ ('rel_t4', REL, TEXT, 4), ('abs_t4', ABS, TEXT, 4), ('rel_d4', REL, DATA, 4), ('abs_d4', ABS, DATA, 4), + ('const', ABS, None, 123), ]: st.set_sym(*entry) # PASS 1 ======================================================== - st.set_pass(1) assert st.has_sym('abs_t4') assert st.get_sym('abs_t4') == (ABS, TEXT, 4) assert not st.has_sym('notexist') - assert st.get_sym('notexist') == (REL, TEXT, 0) # pass1 -> dummy + try: + st.get_sym('notexist') # pass1 -> raises + except KeyError: + raised = True + else: + raised = False + assert raised assert st.resolve_absolute('abs_t4') == 4 - assert st.resolve_absolute('abs_d4') == 4 - assert st.resolve_absolute('rel_t4') == 4 - assert st.resolve_absolute('rel_d4') == 4 - st.set_from(TEXT, 8) - assert st.resolve_relative('abs_t4') == -4 - assert st.resolve_relative('abs_d4') == -4 - assert st.resolve_relative('rel_t4') == -4 - assert st.resolve_relative('rel_d4') == -4 + try: + # relative symbols cannot be resolved, because in pass 1 section bases are not yet defined + st.resolve_absolute('rel_t4') + except KeyError: + raised = True + else: + raised = False + assert raised + assert st.resolve_absolute('const') == 123 # PASS 2 ======================================================== st.set_bases({TEXT: 100, DATA: 200}) - st.set_pass(2) assert st.has_sym('abs_t4') assert st.get_sym('abs_t4') == (ABS, TEXT, 4) assert not st.has_sym('notexist') @@ -84,14 +235,23 @@ def test_symbols(): assert st.resolve_absolute('abs_d4') == 4 assert st.resolve_absolute('rel_t4') == 100 + 4 assert st.resolve_absolute('rel_d4') == 200 + 4 + assert st.resolve_absolute('const') == 123 st.set_from(TEXT, 8) assert st.resolve_relative('abs_t4') == 4 - 108 assert st.resolve_relative('abs_d4') == 4 - 108 assert st.resolve_relative('rel_t4') == 104 - 108 assert st.resolve_relative('rel_d4') == 204 - 108 + assert st.resolve_absolute('const') == 123 test_parse_line() test_parse() test_assemble() +test_assemble_bss() +test_assemble_bss_with_value() +test_assemble_global() +test_assemble_uppercase_opcode() +test_assemble_evalulate_expressions() +test_assemble_optional_comment_removal() +test_assemble_test_regressions_from_evaluation() test_symbols() diff --git a/tests/compat/expr.S b/tests/compat/expr.S new file mode 100644 index 0000000..3650623 --- /dev/null +++ b/tests/compat/expr.S @@ -0,0 +1,48 @@ +# common example of real world code using expressions + .set adc_channel, 6 + + .set adc_oversampling_factor_log, 2 + .set adc_oversampling_factor, (1 << adc_oversampling_factor_log) + +.data + +result: + .long 0 + + .text + .global entry +entry: + move r0, 0 + stage_rst + +measure: + adc r1, 0, adc_channel + 1 + add r0, r0, r1 + + stage_inc 1 + jumps measure, adc_oversampling_factor, lt + + rsh r0, r0, adc_oversampling_factor_log + + move r3, result + st r0, r3, 0 + +exit: + halt + + +# --- +# test that expressions evaluate correctly for all supported operators +# (these statements do not mean anything other than testing the operations) + move r3, 1+2 + move r3, 3-5 + move r3, -5 + move r3, 2*3 + move r3, 4/2 + move r3, 4 % 3 + move r3, 0xff << 2 + move r3, 0xff >> 1 + move r3, (0xabcdef | 0xff) & 0xff + move r3, 0x1234 & ~2 + move r3, 42|4&0xf # 46 (4&0xf is evaluated first) + move r3, (42|4)&0xf # 14 (42|4 is evaluated first) diff --git a/tests/compat/fixes.S b/tests/compat/fixes.S new file mode 100644 index 0000000..9e4d0ef --- /dev/null +++ b/tests/compat/fixes.S @@ -0,0 +1,28 @@ +# This file tests various fixes to the assembler, +# to ensure the binary output matches that of binutils. +# a) support for left-aligned directives (e.g. .set without preceding whitespace) +# b) a crash-fix related to data items in the .bss section +# c) support for marking labels as global +# d) support for upper case ULP opcode names +# +.set gpio, 2 + +.bss + +counter: +.long 0 + +.data +var2: .int 1111 + + .text + .global entry +entry: + MOVE R1, gpio + WAIT 42 + + # reg_rd/reg_wr with "short" and "long" address notation + reg_rd 12, 7, 0 + reg_rd 0x3ff48000, 7, 0 + + halt diff --git a/tests/compat/preprocess_simple.S b/tests/compat/preprocess_simple.S new file mode 100644 index 0000000..b6a61e8 --- /dev/null +++ b/tests/compat/preprocess_simple.S @@ -0,0 +1,7 @@ +#define GPIO 2 +#define BASE 0x100 +#define ADDR (BASE + GPIO) + +entry: + move r0, GPIO + move r1, ADDR diff --git a/tests/compat/symbols.S b/tests/compat/symbols.S index bf59c3b..359fa15 100644 --- a/tests/compat/symbols.S +++ b/tests/compat/symbols.S @@ -1,10 +1,12 @@ .text .set constant42, 42 +.set notindented, 1 start: move r0, data0 move r1, data1 move r2, constant42 + move r3, notindented # count from 0 .. 42 in stage register stage_rst diff --git a/tests/definesdb.py b/tests/definesdb.py new file mode 100644 index 0000000..5e2100c --- /dev/null +++ b/tests/definesdb.py @@ -0,0 +1,60 @@ +import os + +from esp32_ulp.definesdb import DefinesDB, DBNAME +from esp32_ulp.util import file_exists + +tests = [] + + +def test(param): + tests.append(param) + + +@test +def test_definesdb_clear_removes_all_keys(): + db = DefinesDB() + db.open() + db.update({'KEY1': 'VALUE1'}) + + db.clear() + + assert 'KEY1' not in db + + db.close() + + +@test +def test_definesdb_persists_data_across_instantiations(): + db = DefinesDB() + db.open() + db.clear() + + db.update({'KEY1': 'VALUE1'}) + + assert 'KEY1' in db + + db.close() + del db + db = DefinesDB() + db.open() + + assert db.get('KEY1', None) == 'VALUE1' + + db.close() + + +@test +def test_definesdb_should_not_create_a_db_file_when_only_reading(): + db = DefinesDB() + + db.clear() + assert not file_exists(DBNAME) + + assert db.get('some-key', None) is None + assert not file_exists(DBNAME) + + +if __name__ == '__main__': + # run all methods marked with @test + for t in tests: + t() diff --git a/tests/fixtures/incl.h b/tests/fixtures/incl.h new file mode 100644 index 0000000..712aa7c --- /dev/null +++ b/tests/fixtures/incl.h @@ -0,0 +1,5 @@ +#define CONST1 42 +#define MACRO(x,y) x+y +#define MULTI_LINE abc \ + xyz +#define CONST2 99 diff --git a/tests/fixtures/incl2.h b/tests/fixtures/incl2.h new file mode 100644 index 0000000..d19aeba --- /dev/null +++ b/tests/fixtures/incl2.h @@ -0,0 +1,2 @@ +#define CONST2 123 +#define CONST3 777 diff --git a/tests/opcodes.py b/tests/opcodes.py index 54bb673..f14829a 100644 --- a/tests/opcodes.py +++ b/tests/opcodes.py @@ -1,6 +1,8 @@ from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN from esp32_ulp.opcodes import make_ins, make_ins_struct_def -from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, ARG, REG, IMM, COND +from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND +from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT +import esp32_ulp.opcodes as opcodes OPCODE_DELAY = 4 LAYOUT_DELAY = """ @@ -43,6 +45,19 @@ def test_arg_qualify(): assert arg_qualify('Eq') == ARG(COND, 'eq', 'Eq') assert arg_qualify('EQ') == ARG(COND, 'eq', 'EQ') + # for the next tests, ensure the opcodes module has a SymbolTable + opcodes.symbols = SymbolTable({}, {}, {}) + opcodes.symbols.set_sym('const', ABS, None, 42) # constant as defined by .set + opcodes.symbols.set_sym('entry', REL, TEXT, 4) # label pointing to code + + assert arg_qualify('1+1') == ARG(IMM, 2, '1+1') + assert arg_qualify('const >> 1') == ARG(IMM, 21, 'const >> 1') + assert arg_qualify('entry') == ARG(SYM, (REL, TEXT, 4), 'entry') # symbols should not (yet) be evaluated + assert arg_qualify('entry + const') == ARG(IMM, 46, 'entry + const') + + # clean up + opcodes.symbols = None + def test_get_reg(): assert get_reg('r0') == 0 @@ -57,9 +72,46 @@ def test_get_cond(): assert get_cond('Eq') == 'eq' +def test_eval_arg(): + opcodes.symbols = SymbolTable({}, {}, {}) + opcodes.symbols.set_sym('const', ABS, None, 42) # constant + opcodes.symbols.set_sym('raise', ABS, None, 99) # constant using a python keyword as name (is allowed) + + assert eval_arg('1+1') == 2 + assert eval_arg('1+const') == 43 + assert eval_arg('raise*2/3') == 66 + assert eval_arg('raise-const') == 57 + assert eval_arg('(raise-const)*2') == 114 + assert eval_arg('const % 5') == 2 + assert eval_arg('const + 0x19af') == 0x19af + 42 + assert eval_arg('const & ~2') == 40 + assert eval_arg('const << 3') == 336 + assert eval_arg('const >> 1') == 21 + assert eval_arg('(const|4)&0xf') == 0xe + + assert_raises(ValueError, eval_arg, 'evil()') + assert_raises(ValueError, eval_arg, 'def cafe()') + assert_raises(ValueError, eval_arg, '1 ^ 2') + assert_raises(ValueError, eval_arg, '!100') + + # clean up + opcodes.symbols = None + + +def assert_raises(exception, func, *args): + try: + func(*args) + except exception: + raised = True + else: + raised = False + assert raised + + test_make_ins_struct_def() test_make_ins() test_arg_qualify() test_get_reg() test_get_imm() test_get_cond() +test_eval_arg() \ No newline at end of file diff --git a/tests/preprocess.py b/tests/preprocess.py new file mode 100644 index 0000000..5a3825d --- /dev/null +++ b/tests/preprocess.py @@ -0,0 +1,338 @@ +import os + +from esp32_ulp.preprocess import Preprocessor +from esp32_ulp.definesdb import DefinesDB, DBNAME +from esp32_ulp.util import file_exists + +tests = [] + + +def test(param): + tests.append(param) + + +@test +def test_replace_defines_should_return_empty_line_given_empty_string(): + p = Preprocessor() + + assert p.preprocess("") == "" + + +@test +def replace_defines_should_return_remove_comments(): + p = Preprocessor() + + line = "// some comment" + expected = "" + assert p.preprocess(line) == expected + + +@test +def test_parse_defines(): + p = Preprocessor() + + assert p.parse_define_line("") == {} + assert p.parse_define_line("// comment") == {} + assert p.parse_define_line(" // comment") == {} + assert p.parse_define_line(" /* comment */") == {} + assert p.parse_define_line(" /* comment */ #define A 42") == {} # #define must be the first thing on a line + assert p.parse_define_line("#define a 1") == {"a": "1"} + assert p.parse_define_line(" #define a 1") == {"a": "1"} + assert p.parse_define_line("#define a 1 2") == {"a": "1 2"} + assert p.parse_define_line("#define f(a,b) 1") == {} # macros not supported + assert p.parse_define_line("#define f(a, b) 1") == {} # macros not supported + assert p.parse_define_line("#define f (a,b) 1") == {"f": "(a,b) 1"} # f is not a macro + assert p.parse_define_line("#define f (a, b) 1") == {"f": "(a, b) 1"} # f is not a macro + assert p.parse_define_line("#define RTC_ADDR 0x12345 // start of range") == {"RTC_ADDR": "0x12345"} + + +@test +def test_parse_defines_handles_multiple_input_lines(): + p = Preprocessor() + + multi_line_1 = """\ +#define ID_WITH_UNDERSCORE something +#define ID2 somethingelse +""" + assert p.parse_defines(multi_line_1) == {"ID_WITH_UNDERSCORE": "something", "ID2": "somethingelse"} + + +@test +def test_parse_defines_does_not_understand_comments_by_current_design(): + # comments are not understood. lines are expected to already have comments removed! + p = Preprocessor() + + multi_line_2 = """\ +#define ID_WITH_UNDERSCORE something +/* +#define ID2 somethingelse +*/ +""" + assert "ID2" in p.parse_defines(multi_line_2) + + +@test +def test_parse_defines_does_not_understand_line_continuations_with_backslash_by_current_design(): + p = Preprocessor() + + multi_line_3 = r""" + #define ID_WITH_UNDERSCORE something \ + line2 + """ + + assert p.parse_defines(multi_line_3) == {"ID_WITH_UNDERSCORE": "something \\"} + + +@test +def preprocess_should_remove_comments_and_defines_but_keep_the_lines_as_empty_lines(): + p = Preprocessor() + + lines = """\ + // copyright + #define A 1 + + move r1, r2""" + + assert p.preprocess(lines) == "\n\n\n\tmove r1, r2" + + +@test +def preprocess_should_replace_words_defined(): + p = Preprocessor() + + lines = """\ + #define DR_REG_RTCIO_BASE 0x3ff48400 + + move r1, DR_REG_RTCIO_BASE""" + + assert "move r1, 0x3ff48400" in p.preprocess(lines) + + +@test +def preprocess_should_replace_words_defined_multiple_times(): + p = Preprocessor() + + lines = """\ + #define DR_REG_RTCIO_BASE 0x3ff48400 + + move r1, DR_REG_RTCIO_BASE #once + move r2, DR_REG_RTCIO_BASE #second time""" + + assert "move r1, 0x3ff48400" in p.preprocess(lines) + assert "move r2, 0x3ff48400" in p.preprocess(lines) + + +@test +def preprocess_should_replace_all_defined_words(): + p = Preprocessor() + + lines = """\ + #define DR_REG_RTCIO_BASE 0x3ff48400 + #define SOME_OFFSET 4 + + move r1, DR_REG_RTCIO_BASE + add r2, r1, SOME_OFFSET""" + + assert "move r1, 0x3ff48400" in p.preprocess(lines) + assert "add r2, r1, 4" in p.preprocess(lines) + + +@test +def preprocess_should_not_replace_substrings_within_identifiers(): + p = Preprocessor() + + # ie. if AAA is defined don't touch PREFIX_AAA_SUFFIX + lines = """\ + #define RTCIO 4 + move r1, DR_REG_RTCIO_BASE""" + + assert "DR_REG_4_BASE" not in p.preprocess(lines) + + # ie. if A and AA are defined, don't replace AA as two A's but with AA + lines = """\ + #define A 4 + #define AA 8 + move r1, A + move r2, AA""" + + assert "move r1, 4" in p.preprocess(lines) + assert "move r2, 8" in p.preprocess(lines) + + +@test +def preprocess_should_replace_defines_used_in_defines(): + p = Preprocessor() + + lines = """\ + #define BITS (BASE << 4) + #define BASE 0x1234 + + move r1, BITS + move r2, BASE""" + + assert "move r1, (0x1234 << 4)" in p.preprocess(lines) + + +@test +def test_expand_rtc_macros(): + p = Preprocessor() + + assert p.expand_rtc_macros("") == "" + assert p.expand_rtc_macros("abc") == "abc" + assert p.expand_rtc_macros("WRITE_RTC_REG(1, 2, 3, 4)") == "\treg_wr 1, 2 + 3 - 1, 2, 4" + assert p.expand_rtc_macros("READ_RTC_REG(1, 2, 3)") == "\treg_rd 1, 2 + 3 - 1, 2" + assert p.expand_rtc_macros("WRITE_RTC_FIELD(1, 2, 3)") == "\treg_wr 1, 2 + 1 - 1, 2, 3 & 1" + assert p.expand_rtc_macros("READ_RTC_FIELD(1, 2)") == "\treg_rd 1, 2 + 1 - 1, 2" + + +@test +def preprocess_should_replace_BIT_with_empty_string_unless_defined(): + # by default replace BIT with empty string (see description for why in the code) + src = " move r1, 0x123 << BIT(24)" + assert "move r1, 0x123 << (24)" in Preprocessor().preprocess(src) + + # but if BIT is defined, use that + src = """\ + #define BIT 12 + + move r1, BIT""" + + assert "move r1, 12" in Preprocessor().preprocess(src) + + +@test +def test_process_include_file(): + p = Preprocessor() + + defines = p.process_include_file('fixtures/incl.h') + + assert defines['CONST1'] == '42' + assert defines['CONST2'] == '99' + assert defines.get('MULTI_LINE', None) == 'abc \\' # correct. line continuations not supported + assert 'MACRO' not in defines + + +@test +def test_process_include_file_with_multiple_files(): + p = Preprocessor() + + defines = p.process_include_file('fixtures/incl.h') + defines = p.process_include_file('fixtures/incl2.h') + + assert defines['CONST1'] == '42', "constant from incl.h" + assert defines['CONST2'] == '123', "constant overridden by incl2.h" + assert defines['CONST3'] == '777', "constant from incl2.h" + + +@test +def test_process_include_file_using_database(): + db = DefinesDB() + db.clear() + + p = Preprocessor() + p.use_db(db) + + p.process_include_file('fixtures/incl.h') + p.process_include_file('fixtures/incl2.h') + + assert db['CONST1'] == '42', "constant from incl.h" + assert db['CONST2'] == '123', "constant overridden by incl2.h" + assert db['CONST3'] == '777', "constant from incl2.h" + + db.close() + + +@test +def test_process_include_file_should_not_load_database_keys_into_instance_defines_dictionary(): + db = DefinesDB() + db.clear() + + p = Preprocessor() + p.use_db(db) + + p.process_include_file('fixtures/incl.h') + + # a bit hackish to reference instance-internal state + # but it's important to verify this, as we otherwise run out of memory on device + assert 'CONST2' not in p._defines + + + +@test +def test_preprocess_should_use_definesdb_when_provided(): + p = Preprocessor() + + content = """\ +#define LOCALCONST 42 + +entry: + move r1, LOCALCONST + move r2, DBKEY +""" + + # first try without db + result = p.preprocess(content) + + assert "move r1, 42" in result + assert "move r2, DBKEY" in result + assert "move r2, 99" not in result + + # now try with db + db = DefinesDB() + db.clear() + db.update({'DBKEY': '99'}) + p.use_db(db) + + result = p.preprocess(content) + + assert "move r1, 42" in result + assert "move r2, 99" in result + assert "move r2, DBKEY" not in result + + +@test +def test_preprocess_should_ensure_no_definesdb_is_created_when_only_reading_from_it(): + content = """\ + #define CONST 42 + move r1, CONST""" + + # remove any existing db + db = DefinesDB() + db.clear() + assert not file_exists(DBNAME) + + # now preprocess using db + p = Preprocessor() + p.use_db(db) + + result = p.preprocess(content) + + assert "move r1, 42" in result + + assert not file_exists(DBNAME) + + +@test +def test_preprocess_should_ensure_the_definesdb_is_properly_closed_after_use(): + content = """\ + #define CONST 42 + move r1, CONST""" + + # remove any existing db + db = DefinesDB() + db.open() + assert db.is_open() + + # now preprocess using db + p = Preprocessor() + p.use_db(db) + + p.preprocess(content) + + assert not db.is_open() + + +if __name__ == '__main__': + # run all methods marked with @test + for t in tests: + t() diff --git a/tests/util.py b/tests/util.py new file mode 100644 index 0000000..009f3f1 --- /dev/null +++ b/tests/util.py @@ -0,0 +1,76 @@ +import os +from esp32_ulp.util import split_tokens, validate_expression, file_exists + +tests = [] + + +def test(param): + """ + the @test decorator + """ + tests.append(param) + + +@test +def test_split_tokens(): + assert split_tokens("") == [] + assert split_tokens("t") == ['t'] + assert split_tokens("test") == ['test'] + assert split_tokens("t t") == ['t', ' ', 't'] + assert split_tokens("t,t") == ['t', ',', 't'] + assert split_tokens("test(arg)") == ['test', '(', 'arg', ')'] + assert split_tokens("test(arg,arg2)") == ['test', '(', 'arg', ',', 'arg2', ')'] + assert split_tokens("test(arg,arg2)") == ['test', '(', 'arg', ',', 'arg2', ')'] + assert split_tokens(" test( arg, arg2)") == [' ', 'test', '(', ' ', 'arg', ',', ' ', 'arg2', ')'] + assert split_tokens(" test( arg ) ") == [' ', 'test', '(', ' ', 'arg', ' ', ')', ' '] + assert split_tokens("\t test \t ") == ['\t ', 'test', " \t "] + assert split_tokens("test\nrow2") == ['test', "\n", "row2"] + + # split_token does not support comments. should generally only be used after comments are already stripped + assert split_tokens("test(arg /*comment*/)") == ['test', '(', 'arg', ' ', '/', '*', 'comment', '*', '/', ')'] + assert split_tokens("#test") == ['#', 'test'] + + +@test +def test_validate_expression(): + assert validate_expression('') is True + assert validate_expression('1') is True + assert validate_expression('1+1') is True + assert validate_expression('(1+1)') is True + assert validate_expression('(1+1)*2') is True + assert validate_expression('(1 + 1)') is True + assert validate_expression('10 % 2') is True + assert validate_expression('0x100 << 2') is True + assert validate_expression('0x100 & ~2') is True + assert validate_expression('0xabcdef') is True + assert validate_expression('0x123def') is True + assert validate_expression('2*3+4/5&6|7') is True + assert validate_expression('(((((1+1) * 2') is True # valid characters, even if expression is not valid + + assert validate_expression(':') is False + assert validate_expression('_') is False + assert validate_expression('=') is False + assert validate_expression('.') is False + assert validate_expression('!') is False + assert validate_expression('123 ^ 4') is False # operator not supported for now + assert validate_expression('evil()') is False + assert validate_expression('def cafe()') is False # valid hex digits, but potentially dangerous code + + +@test +def test_file_exists(): + testfile = '.testfile' + with open(testfile, 'w') as f: + f.write('contents') + + assert file_exists(testfile) + + os.remove(testfile) + + assert not file_exists(testfile) + + +if __name__ == '__main__': + # run all methods marked with @test + for t in tests: + t()