diff --git a/src/verilog/Makefile b/src/verilog/Makefile index e59430a57..7a37520ba 100644 --- a/src/verilog/Makefile +++ b/src/verilog/Makefile @@ -1,11 +1,24 @@ -SRC = verilog_language.cpp verilog_y.tab.cpp verilog_lex.yy.cpp verilog_parser.cpp \ - expr2verilog.cpp verilog_typecheck.cpp verilog_preprocessor.cpp \ - verilog_typecheck_expr.cpp verilog_synthesis.cpp \ - verilog_interfaces.cpp verilog_typecheck_base.cpp \ - verilog_generate.cpp verilog_parameterize_module.cpp \ - verilog_symbol_table.cpp verilog_parse_tree.cpp \ - verilog_module.cpp vtype.cpp verilog_typecheck_type.cpp \ - verilog_interpreter.cpp +SRC = expr2verilog.cpp \ + verilog_generate.cpp \ + verilog_interfaces.cpp \ + verilog_interpreter.cpp \ + verilog_language.cpp \ + verilog_lex.yy.cpp \ + verilog_module.cpp \ + verilog_parameterize_module.cpp \ + verilog_parse_tree.cpp \ + verilog_parser.cpp \ + verilog_preprocessor.cpp \ + verilog_preprocessor_lex.yy.cpp \ + verilog_preprocessor_tokenizer.cpp \ + verilog_symbol_table.cpp \ + verilog_synthesis.cpp \ + verilog_typecheck.cpp \ + verilog_typecheck_base.cpp \ + verilog_typecheck_expr.cpp \ + verilog_typecheck_type.cpp \ + verilog_y.tab.cpp \ + vtype.cpp include $(CPROVER_DIR)/config.inc include $(CPROVER_DIR)/common @@ -34,6 +47,9 @@ verilog_y.tab.h: verilog_y.tab.cpp verilog_lex.yy.cpp: scanner.l $(LEX) -Pyyverilog -o$@ scanner.l +verilog_preprocessor_lex.yy.cpp: verilog_preprocessor_tokenizer.l + $(LEX) -Pyyverilog_preprocessor -o$@ verilog_preprocessor_tokenizer.l + # extra dependencies verilog_y.tab$(OBJEXT): verilog_y.tab.cpp verilog_y.tab.h verilog_lex.yy$(OBJEXT): verilog_y.tab.cpp verilog_lex.yy.cpp verilog_y.tab.h diff --git a/src/verilog/verilog_preprocessor.cpp b/src/verilog/verilog_preprocessor.cpp index f039a1b57..6f756a34d 100644 --- a/src/verilog/verilog_preprocessor.cpp +++ b/src/verilog/verilog_preprocessor.cpp @@ -10,6 +10,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include "verilog_preprocessor_error.h" @@ -32,37 +33,14 @@ source_locationt verilog_preprocessort::filet::make_source_location() const source_locationt result; result.set_file(filename); - result.set_line(line); + result.set_line(tokenizer.line_no()); return result; } /*******************************************************************\ -Function: verilog_preprocessort::getline - - Inputs: - - Outputs: - - Purpose: - -\*******************************************************************/ - -void verilog_preprocessort::filet::getline(std::string &dest) -{ - dest=""; - - char ch; - - while(get(ch) && ch!='\n') - if(ch!='\r') - dest+=ch; -} - -/*******************************************************************\ - -Function: verilog_preprocessort::filet::get +Function: verilog_preprocessort::include Inputs: @@ -72,141 +50,49 @@ Function: verilog_preprocessort::filet::get \*******************************************************************/ -bool verilog_preprocessort::filet::get(char &ch) +void verilog_preprocessort::include(const std::string &filename) { - state=INITIAL; - - while(in->get(ch)) + // first try filename as is { - if(ch=='\n') - { - line++; - column=1; - } - else - column++; +#ifdef _MSC_VER + auto in = new std::ifstream(widen(filename)); +#else + auto in = new std::ifstream(filename); +#endif - switch(state) + if(*in) { - case INITIAL: - switch(ch) - { - case '/': - state=DASH; - break; - - default: - return true; - } - break; - - case DASH: - switch(ch) - { - case '*': - state=C_COMMENT; - break; - - case '/': - state=CPP_COMMENT; - cpp_comment_empty=(column==3); - break; - - default: - in->unget(); - ch='/'; - return true; - } - break; - - case C_COMMENT: - switch(ch) - { - case '*': - state=C_COMMENT2; - break; - - default:; - } - break; - - case C_COMMENT2: - switch(ch) - { - case '/': - state=INITIAL; - break; - - case '*': - break; - - default: - state=C_COMMENT; - } - break; - - case CPP_COMMENT: - switch(ch) - { - case '\n': - if(cpp_comment_empty) - { - state=INITIAL; - break; - } - - return true; - - default:; - } - break; + files.emplace_back(true, in, filename); + files.back().print_line_directive(out, 1); // 'enter' + return; // done } + else + delete in; } - return false; -} - -/*******************************************************************\ - -Function: verilog_preprocessort::include - - Inputs: - - Outputs: - - Purpose: - -\*******************************************************************/ - -void verilog_preprocessort::include( - const std::string &filename, - const source_locationt &source_location) -{ - files.emplace_back(true, nullptr, filename); - filet &file=files.back(); + // try include paths in given order + for(const auto &path : config.verilog.include_paths) + { + auto full_name = concat_dir_file(path, filename); - file.in=new std::ifstream(filename.c_str()); - if(*file.in) return; +#ifdef _MSC_VER + auto in = new std::ifstream(widen(full_name)); +#else + auto in = new std::ifstream(full_name); +#endif - delete file.in; - file.close=false; + if(*in) + { + files.emplace_back(true, in, filename); + files.back().print_line_directive(out, 1); // 'enter' + return; // done + } - // try include paths in given order - for(std::list::const_iterator - it=config.verilog.include_paths.begin(); - it!=config.verilog.include_paths.end(); - it++) - { - file.close=true; - file.in = new std::ifstream(concat_dir_file(*it, filename)); - if(*file.in) return; - delete file.in; - file.close=false; + delete in; } - error().source_location = source_location; - error() << "include file `" << filename << "' not found" << eom; - throw 0; + throw verilog_preprocessor_errort() + << "include file `" << filename << "' not found"; } /*******************************************************************\ @@ -227,43 +113,25 @@ void verilog_preprocessort::preprocessor() { files.emplace_back(false, &in, filename); + files.back().print_line_directive(out, 0); // 'neither' + while(!files.empty()) { - files.back().print_line(out, files.size() == 1 ? 0 : 2); - - char ch, last_out = 0; - - while(files.back().get(ch)) + while(!tokenizer().eof()) { - switch(ch) - { - case '`': + auto token = tokenizer().next_token(); + if(token == '`') directive(); - break; - - default: - if(condition) - { - filet &file = files.back(); - - if(last_out == '\n' && file.last_line != file.line && ch != '\n') - { - file.print_line(out, 0); - file.last_line = file.line; - } - - out << ch; - last_out = ch; - - if(ch == '\n') - file.last_line++; - } + else if(condition) + { + out << token; } } - if(last_out != '\n') - out << '\n'; files.pop_back(); + + if(!files.empty()) + files.back().print_line_directive(out, 2); // 'exit' } } catch(const verilog_preprocessor_errort &e) @@ -340,69 +208,63 @@ Function: verilog_preprocessort::directive void verilog_preprocessort::directive() { - // remember the source location - auto source_location = files.back().make_source_location(); - - std::string text; - - char ch; - - while(files.back().in->get(ch)) - { - if(isalnum(ch) || ch=='$' || ch=='_') - text+=ch; - else - { - files.back().in->unget(); - break; - } - } + // we expect an identifier after the backtick + auto directive_token = tokenizer().next_token(); + if(!directive_token.is_identifier()) + throw verilog_preprocessor_errort() + << "expecting an identifier after backtick"; - std::string line; + const auto &text = directive_token.text; if(text=="define") { - files.back().getline(line); - if(!condition) + { + // ignore + tokenizer().skip_until_eol(); return; - - const char *tptr=line.c_str(); + } // skip whitespace - while(*tptr==' ' || *tptr=='\t') tptr++; + tokenizer().skip_ws(); - std::string identifier, value; + // we expect an identifier after `define + auto identifier_token = tokenizer().next_token(); + if(!identifier_token.is_identifier()) + throw verilog_preprocessor_errort() + << "expecting an identifier after `define"; - // copy identifier - while(isalnum(*tptr) || *tptr=='$' || *tptr=='_') - { - identifier+=*tptr; - tptr++; - } + auto &identifier = identifier_token.text; - // is there a parameter list? - if(*tptr=='(') + // Is there a parameter list? + // These have been introduced in Verilog 2001. + if(tokenizer().peek() == '(') { - error().source_location = source_location; - error() << "`define with parameters not yet supported" << eom; - throw 0; + throw verilog_preprocessor_errort() + << "`define with parameters not yet supported"; } // skip whitespace - while(*tptr==' ' || *tptr=='\t') tptr++; - - value=tptr; - - // remove trailing whitespace - - while(value.size()!=0 && - (value[value.size()-1]==' ' || value[value.size()-1]=='\t')) - value.resize(value.size()-1); + tokenizer().skip_ws(); - replace_macros(value); + // read any tokens until end of line + std::string value; + while(!tokenizer().eof()) + { + auto token = tokenizer().next_token(); + if(token.is_identifier()) + { + value += token.text; + } + else if(token == '\n') + break; + else + { + value += token.text; + } + } - #ifdef DEBUG +#ifdef DEBUG std::cout << "DEFINE: >" << identifier << "< = >" << value << "<" << std::endl; #endif @@ -411,83 +273,76 @@ void verilog_preprocessort::directive() } else if(text=="undef") { - files.back().getline(line); - if(!condition) + { + // ignore + tokenizer().skip_until_eol(); return; - - const char *tptr=line.c_str(); + } // skip whitespace - while(*tptr==' ' || *tptr=='\t') tptr++; + tokenizer().skip_ws(); - std::string identifier, value; + // we expect an identifier after `undef + auto identifier_token = tokenizer().next_token(); + if(!identifier_token.is_identifier()) + throw verilog_preprocessor_errort() + << "expecting an identifier after `undef"; - // copy identifier - while(isalnum(*tptr) || *tptr=='$' || *tptr=='_') - { - identifier+=*tptr; - tptr++; - } + auto &identifier = identifier_token.text; + + tokenizer().skip_until_eol(); definest::iterator it=defines.find(identifier); if(it!=defines.end()) { // found it! remove it! - defines.erase(it); } } else if(text=="ifdef" || text=="ifndef") { - files.back().getline(line); - - const char *tptr=line.c_str(); + bool ifdef = text == "ifdef"; // skip whitespace - while(*tptr==' ' || *tptr=='\t') tptr++; + tokenizer().skip_ws(); - std::string identifier, value; + // we expect an identifier + auto identifier_token = tokenizer().next_token(); + if(!identifier_token.is_identifier()) + throw verilog_preprocessor_errort() + << "expecting an identifier after ifdef"; - // copy identifier - while(isalnum(*tptr) || *tptr=='$' || *tptr=='_') - { - identifier+=*tptr; - tptr++; - } + auto &identifier = identifier_token.text; - definest::iterator it=defines.find(identifier); + tokenizer().skip_until_eol(); + + bool defined = defines.find(identifier) != defines.end(); conditionalt conditional; - - if(text=="ifdef") - conditional.condition=(it!=defines.end()); + + if(ifdef) + conditional.condition = defined; else - conditional.condition=(it==defines.end()); - + conditional.condition = !defined; + conditional.previous_condition=condition; conditionals.push_back(conditional); condition=conditional.get_cond(); } else if(text=="else") { - files.back().getline(line); - if(conditionals.empty()) - { - error().source_location = source_location; - error() << "`else without `ifdef/`ifndef" << eom; - throw 0; - } + throw verilog_preprocessor_errort() << "`else without `ifdef/`ifndef"; + + tokenizer().skip_until_eol(); conditionalt &conditional=conditionals.back(); if(conditional.else_part) { - error().source_location = source_location; - error() << "`else without `ifdef/`ifndef" << eom; - throw 0; + throw verilog_preprocessor_errort() << "`else without `ifdef/`ifndef"; } conditional.else_part=true; @@ -495,15 +350,13 @@ void verilog_preprocessort::directive() } else if(text=="endif") { - files.back().getline(line); - if(conditionals.empty()) { - error().source_location = source_location; - error() << "`endif without `ifdef/`ifndef" << eom; - throw 0; + throw verilog_preprocessor_errort() << "`endif without `ifdef/`ifndef"; } + tokenizer().skip_until_eol(); + conditionals.pop_back(); if(conditionals.empty()) @@ -513,62 +366,39 @@ void verilog_preprocessort::directive() } else if(text=="include") { - // remember the source location - auto include_source_location = files.back().make_source_location(); - - files.back().getline(line); - - const char *tptr=line.c_str(); - // skip whitespace - while(*tptr==' ' || *tptr=='\t') tptr++; - - if(tptr[0]!='"') - { - error() << include_source_location; - error() << "expected file name" << eom; - throw 0; - } - - tptr++; - - std::string filename; - - // copy filename - while(*tptr!='"') - { - if(*tptr==0) - { - error() << include_source_location; - error() << "expected `\"'" << eom; - throw 0; - } - - filename+=*tptr; - tptr++; - } - - include(filename, include_source_location); + tokenizer().skip_ws(); + + // we expect a string literal + auto file_token = tokenizer().next_token(); + if(!file_token.is_string_literal()) + throw verilog_preprocessor_errort() + << "expecting a string literal after `include"; + + // strip quotes off string literal, escaping, etc. + auto filename = file_token.string_literal_value(); + tokenizer().skip_until_eol(); + include(filename); } else if(text=="resetall") { // ignored - files.back().getline(line); + tokenizer().skip_until_eol(); } else if(text=="timescale") { // ignored - files.back().getline(line); + tokenizer().skip_until_eol(); } else if(text=="celldefine") { // ignored - files.back().getline(line); + tokenizer().skip_until_eol(); } else if(text=="endcelldefine") { // ignored - files.back().getline(line); + tokenizer().skip_until_eol(); } else { @@ -585,7 +415,6 @@ void verilog_preprocessort::directive() } // found it! replace it! - out << it->second; } } diff --git a/src/verilog/verilog_preprocessor.h b/src/verilog/verilog_preprocessor.h index 21f0e921d..0a416bea2 100644 --- a/src/verilog/verilog_preprocessor.h +++ b/src/verilog/verilog_preprocessor.h @@ -1,11 +1,14 @@ #ifndef VERILOG_PREPROCESSOR_H #define VERILOG_PREPROCESSOR_H -#include - +#include #include -#include #include +#include + +#include "verilog_preprocessor_tokenizer.h" + +#include class verilog_preprocessort:public preprocessort { @@ -32,7 +35,7 @@ class verilog_preprocessort:public preprocessort virtual void directive(); virtual void replace_macros(std::string &s); - virtual void include(const std::string &filename, const source_locationt &); + virtual void include(const std::string &filename); // for ifdef, else, endif @@ -61,44 +64,42 @@ class verilog_preprocessort:public preprocessort class filet { - public: + protected: bool close; std::istream *in; std::string filename; - unsigned line, last_line; + + public: + verilog_preprocessor_tokenizert tokenizer; filet(bool _close, std::istream *_in, std::string _filename) - : close(_close), - in(_in), - filename(std::move(_filename)), - line(1), - last_line(0), - column(1) + : close(_close), in(_in), filename(std::move(_filename)), tokenizer(*in) { } - ~filet() { if(close) delete in; } - - bool get(char &ch); - void getline(std::string &dest); - - // a minimal scanner - - typedef enum { INITIAL, C_COMMENT, CPP_COMMENT, - DASH, C_COMMENT2 } statet; - - statet state; - unsigned column; - bool cpp_comment_empty; + ~filet() + { + if(close) + delete in; + } - void print_line(std::ostream &out, unsigned level) - { out << "`line " << line << " \"" - < files; + std::list files; + + // the topmost tokenizer + verilog_preprocessor_tokenizert &tokenizer() + { + PRECONDITION(!files.empty()); + return files.back().tokenizer; + } }; #endif diff --git a/src/verilog/verilog_preprocessor_tokenizer.cpp b/src/verilog/verilog_preprocessor_tokenizer.cpp new file mode 100644 index 000000000..1e70f1468 --- /dev/null +++ b/src/verilog/verilog_preprocessor_tokenizer.cpp @@ -0,0 +1,110 @@ +/*******************************************************************\ + +Module: Verilog Preprocessor Tokenizer + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#include "verilog_preprocessor_tokenizer.h" + +#include + +void verilog_preprocessor_token_sourcet::skip_until_eol() +{ + while(true) + { + auto token = peek(); + if(token == '\n' || token.is_eof()) + break; + next_token(); // eat + } +} + +void verilog_preprocessor_token_sourcet::skip_ws() +{ + while(true) + { + auto token = peek(); + if(token != ' ' && token != '\t') + break; + next_token(); // eat + } +} + +std::string +verilog_preprocessor_token_sourcet::tokent::string_literal_value() const +{ + PRECONDITION(kind == STRING_LITERAL); + PRECONDITION(text.size() >= 2); + std::string result; + result.reserve(text.size()); + + for(std::size_t i = 1; i < text.size() - 1; i++) + { + char ch = text[i]; + result += ch; + } + + return result; +} + +bool verilog_preprocessor_token_sourcet::eof() +{ + return peek().is_eof(); +} + +auto verilog_preprocessor_token_sourcet::peek() -> const tokent & +{ + if(peeked) + return token; + else + { + get_token_from_stream(); + peeked = true; + return token; + } +} + +auto verilog_preprocessor_token_sourcet::next_token() -> const tokent & +{ + if(peeked) + peeked = false; + else + get_token_from_stream(); + + if(token == '\n') + _line_no++; + + return token; +} + +verilog_preprocessor_tokenizert::verilog_preprocessor_tokenizert( + std::istream &_in) + : in(_in) +{ +} + +std::size_t +verilog_preprocessor_tokenizert::yy_input(char *buffer, std::size_t max_size) +{ + std::size_t result; + for(result = 0; result < max_size; result++) + { + char ch; + if(!in.get(ch)) + return result; // eof + buffer[result] = ch; + } + return result; +} + +int yyverilog_preprocessorlex(); +verilog_preprocessor_tokenizert *verilog_preprocessor_tokenizer; + +void verilog_preprocessor_tokenizert::get_token_from_stream() +{ + verilog_preprocessor_tokenizer = this; + token.kind = static_cast( + yyverilog_preprocessorlex()); +} diff --git a/src/verilog/verilog_preprocessor_tokenizer.h b/src/verilog/verilog_preprocessor_tokenizer.h new file mode 100644 index 000000000..b62540061 --- /dev/null +++ b/src/verilog/verilog_preprocessor_tokenizer.h @@ -0,0 +1,110 @@ +/*******************************************************************\ + +Module: Verilog Preprocessor Tokenizer + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#ifndef VERILOG_PREPROCESSOR_TOKENIZER_H +#define VERILOG_PREPROCESSOR_TOKENIZER_H + +#include + +#include "verilog_preprocessor_error.h" + +/// Note that the set of tokens recognised by the Verilog preprocessor +/// differs from the set of tokens used by the main parser. + +class verilog_preprocessor_token_sourcet +{ +public: + virtual ~verilog_preprocessor_token_sourcet() = default; + + class tokent + { + public: + using kindt = enum { + END_OF_FILE = 0, // defined by flex + NONE = 256, + IDENTIFIER, + STRING_LITERAL + }; + kindt kind = NONE; + std::string text; + bool is_eof() const + { + return kind == END_OF_FILE; + } + bool is_identifier() const + { + return kind == IDENTIFIER; + } + bool is_string_literal() const + { + return kind == STRING_LITERAL; + } + bool operator==(char ch) const + { + return static_cast(kind) == ch; + } + bool operator!=(char ch) const + { + return static_cast(kind) != ch; + } + friend std::ostream &operator<<(std::ostream &out, const tokent &t) + { + return out << t.text; + } + std::string string_literal_value() const; + }; + + // convenience helpers + bool eof(); + void skip_ws(); + void skip_until_eol(); + + // get the next token from the stream + const tokent &next_token(); + + // get the next token without consuming it + const tokent &peek(); + + std::size_t line_no() const + { + return _line_no; + } + +protected: + std::size_t _line_no = 1; + bool peeked = false; + tokent token; // for peeking + + /// read a token from the input stream and store it in 'token' + virtual void get_token_from_stream() = 0; +}; + +class verilog_preprocessor_tokenizert + : public verilog_preprocessor_token_sourcet +{ +public: + explicit verilog_preprocessor_tokenizert(std::istream &_in); + + // for flex + std::size_t yy_input(char *buffer, std::size_t max_size); + void text(const char *buffer, std::size_t len) + { + token.text = std::string(buffer, len); + } + + void text(char ch) + { + token.text = std::string(1, ch); + } + +protected: + std::istream ∈ + void get_token_from_stream() override; +}; + +#endif // VERILOG_PREPROCESSOR_TOKENIZER_H diff --git a/src/verilog/verilog_preprocessor_tokenizer.l b/src/verilog/verilog_preprocessor_tokenizer.l new file mode 100644 index 000000000..4dd0bbaca --- /dev/null +++ b/src/verilog/verilog_preprocessor_tokenizer.l @@ -0,0 +1,55 @@ +%option nounput +%option noinput + +%{ +#include +#include +#include + +#ifdef _WIN32 +#define YY_NO_UNISTD_H +static int isatty(int) { return 0; } +#endif + +#include "verilog_preprocessor_tokenizer.h" + +extern verilog_preprocessor_tokenizert *verilog_preprocessor_tokenizer; +using tokent = verilog_preprocessor_tokenizert::tokent; + +#define TOKENIZER (*verilog_preprocessor_tokenizer) + +#define YY_INPUT(buf, result, max_size) \ + { result = TOKENIZER.yy_input(buf, max_size); } +%} + +%s SLCOMMENT +%s MLCOMMENT +%s STRING + +Identifier [a-zA-Z_][0-9a-zA-Z_$]* +String \"(\\.|[^"\\])*\" + +%% + +"//" { BEGIN SLCOMMENT; /* single-line comment */ } +"/*" { BEGIN MLCOMMENT; /* multi-line comment */ } +{String} { TOKENIZER.text(yytext, yyleng); return tokent::STRING_LITERAL; } +{Identifier} { TOKENIZER.text(yytext, yyleng); return tokent::IDENTIFIER; } +.|\n { TOKENIZER.text(yytext[0]); return yytext[0]; /* anything else */ } +\n { BEGIN INITIAL; TOKENIZER.text('\n'); return '\n'; } +. { /* just eat up */ } +<> { throw verilog_preprocessor_errort() << "EOF inside a comment"; } +\n { TOKENIZER.text('\n'); return '\n'; } +"*/" { BEGIN INITIAL; /* comment done */ } +. { /* just eat up */ } +<> { throw verilog_preprocessor_errort() << "EOF inside a comment"; } + +%% + +int yywrap() { return 1; } + +void verilog_preprocessor_tokenizer_init() +{ + YY_FLUSH_BUFFER; + BEGIN INITIAL; +}