diff --git a/CMakePresets.json b/CMakePresets.json index e1951a1..a0aa4cc 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -7,10 +7,7 @@ "configurePresets": [ { "name": "default", - "inherits": "gcc-debug", - "cacheVariables": { - "NOCTERN_TEST_COLOR": "ON" - } + "inherits": "gcc-debug" }, { "name": "gcc-debug", @@ -59,7 +56,8 @@ "generator": "Ninja", "binaryDir": "build/${presetName}", "inherits": [ - "_ccache" + "_ccache", + "_compile_commands_json" ] } ], diff --git a/cmake/PresetMixins.json b/cmake/PresetMixins.json index a6d0da5..811b6cf 100644 --- a/cmake/PresetMixins.json +++ b/cmake/PresetMixins.json @@ -44,6 +44,13 @@ "CMAKE_C_COMPILER_LAUNCHER": "ccache", "CMAKE_CXX_COMPILER_LAUNCHER": "ccache" } + }, + { + "name": "_compile_commands_json", + "hidden": true, + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON" + } } ] } \ No newline at end of file diff --git a/examples/basic_math.nct b/examples/basic_math.nct index 5833e66..ab91f22 100644 --- a/examples/basic_math.nct +++ b/examples/basic_math.nct @@ -1,4 +1,4 @@ -def silly_add(x, y): { +def silly_add(x: f64, y: f64): f64 = { let z = y - 0.2; return y + z + x * 2. - 2 + .1; }; \ No newline at end of file diff --git a/examples/simple_main.nct b/examples/simple_main.nct index 9fde721..b5ee96f 100644 --- a/examples/simple_main.nct +++ b/examples/simple_main.nct @@ -1,6 +1,6 @@ -def Main(): { - let x = 3; - let y = 4; - let z = y - 0.2; +def Main(): f64 = { + let x: f64 = 3; + let y: f64 = 4; + let z: f64 = y - 0.2; return y + z + x * 2. - 2 + .1; }; \ No newline at end of file diff --git a/src/nir.main.cpp b/src/nir.main.cpp new file mode 100644 index 0000000..a2f57c7 --- /dev/null +++ b/src/nir.main.cpp @@ -0,0 +1,107 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "noctern/compilation_unit.hpp" +#include "noctern/intern_table.hpp" +#include "noctern/interpreter.hpp" +#include "noctern/nir.hpp" +#include "noctern/parser.hpp" +#include "noctern/symbol_table.hpp" +#include "noctern/tokenize.hpp" + +int main(int argc, char** argv) { + if (argc != 2) { + fmt::println(stderr, "Usage: nocternc "); + return 1; + } + + // TODO: mmap + std::FILE* file = std::fopen(argv[1], "rb"); + if (file == nullptr) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "Couldn't find file {}: {}", argv[1], err); + return 1; + } + if (std::fseek(file, 0, SEEK_END) != 0) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "fseek failed: {}", err); + return 1; + } + long length = std::ftell(file); + if (length == -1) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "ftell failed: {}", err); + return 1; + } + if (std::fseek(file, 0, SEEK_SET) != 0) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "fseek failed: {}", err); + return 1; + } + std::string source(length, '\0'); + [[maybe_unused]] size_t c = std::fread(source.data(), source.size(), length, file); + if (std::ferror(file) != 0) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "fread failed: {}", err); + return 1; + } + if (std::feof(file) == 0) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "failed to read entire file; didn't find eof."); + return 1; + } + + noctern::tokens tokens = noctern::tokenize_all(source); + tokens = noctern::parse(std::move(tokens)); + + noctern::compilation_unit compile_unit(tokens); + noctern::string_intern_table global_symbols; + noctern::symbol_table symbol_table(tokens, compile_unit, global_symbols); + + std::optional main = symbol_table.find_fn_decl(global_symbols.intern("Main")); + if (!main.has_value()) { + fmt::println(stderr, "No `Main()` function found!"); + return 1; + } + + noctern::nir::instructions instructions; + noctern::nir::instructions::function fn + = instructions.compile_function(tokens, *main, global_symbols); + + instructions.visit(fn, + [&]( + noctern::val_t, Register reg, Args&&... args) { + using enum noctern::nir::instructions::opcode; + if constexpr (op == return_) { + fmt::println(" return %r{}", reg); + } else { + fmt::print(" %r{} = {}", reg, stringify(op)); + + if constexpr (op == load_int_lit || op == load_real_lit) { + fmt::print(" {}", [](auto x) { return x; }(args...)); + } else if constexpr (op == load_nonlocal) { + fmt::print(" {}", [&](noctern::interned_string id) { + return global_symbols.get(id); + }(args...)); + } else if constexpr (op == call) { + [&](Reg fn, std::span args) { + fmt::print(" %r{}({})", fn, + fmt::join(std::ranges::transform_view( + args, [](auto reg) { return fmt::format("%r{}", reg); }), + ", ")); + }(args...); + } + + fmt::println(""); + } + }); + + return 0; +} diff --git a/src/noctern/compilation_unit.cpp b/src/noctern/compilation_unit.cpp index f7e316e..551e4e2 100644 --- a/src/noctern/compilation_unit.cpp +++ b/src/noctern/compilation_unit.cpp @@ -16,6 +16,6 @@ namespace noctern { compilation_unit::compilation_unit(const tokens& input) : fn_defs_(noctern::from_range(std::ranges::filter_view( - input, [&](token t) { return input.id(t) == token_id::fn_intro; }))) { + input, [&](token t) { return input.id(t) == token_id::def; }))) { } } \ No newline at end of file diff --git a/src/noctern/enum.hpp b/src/noctern/enum.hpp index 596bf7a..4d648d8 100644 --- a/src/noctern/enum.hpp +++ b/src/noctern/enum.hpp @@ -83,7 +83,7 @@ namespace noctern { ///// // The key functions. These are not hidden friends to work around a Clang bug. - + template requires std::is_enum_v constexpr std::string_view stringify(Enum e) { @@ -108,7 +108,7 @@ namespace noctern { return std::invoke(std::forward(fn), val...); }); } - + // End key functions. ///// diff --git a/src/noctern/inout.cpp b/src/noctern/inout.cpp new file mode 100644 index 0000000..8b483ec --- /dev/null +++ b/src/noctern/inout.cpp @@ -0,0 +1 @@ +#include "./inout.hpp" diff --git a/src/noctern/inout.hpp b/src/noctern/inout.hpp new file mode 100644 index 0000000..38e152e --- /dev/null +++ b/src/noctern/inout.hpp @@ -0,0 +1,109 @@ +#pragma once + +#include + +// Helpers for specifying in/out/inout parameters with odd lifetime requirements. +// +// If the lifetime requirements are standard (i.e. "live until function ends"), this isn't needed, +// but if we're holding a reference onto the parameter, we should pass the parameter via one of +// these aliases, constructed via CTAD. +// +// Example usage: +// +// void do_something(int x, int y, inout> storage) { +// do_something_else(x, inout(storage)); +// do_something_else(y, inout(storage)); +// } + +namespace noctern { + // Marks inout parameters with odd lifetime requirements. + template + class inout { + public: + explicit constexpr inout(T& ref) + : ref_(&ref) { + } + + template + requires std::convertible_to + explicit constexpr inout(inout& rhs) + : ref_(rhs.ref_) { + } + + // This is not a value type. + constexpr inout(const inout&) = delete; + + T& operator*() { + return *ref_; + } + + T* operator->() { + return ref_; + } + + private: + T* ref_; + }; + + // Marks input parameters with odd lifetime requirements. + template + class in { + public: + explicit constexpr in(T& ref) + : ref_(&ref) { + } + + template + requires std::convertible_to + explicit constexpr in(in& rhs) + : ref_(rhs.ref_) { + } + + // This is not a value type. + constexpr in(const in&) = delete; + + T& operator*() { + return *ref_; + } + + T* operator->() { + return ref_; + } + + private: + T* ref_; + }; + + // Marks output parameters with odd lifetime requirements. + template + class out { + public: + explicit constexpr out(T& ref) + : ref_(&ref) { + } + + template + requires std::convertible_to + explicit constexpr out(out& rhs) + : ref_(rhs.ref_) { + } + + // This is not a value type. + constexpr out(const out&) = delete; + + T& operator*() { + return *ref_; + } + + T* operator->() { + return ref_; + } + + private: + T* ref_; + }; + + static_assert(!std::copy_constructible>); + static_assert(!std::copy_constructible>); + static_assert(!std::copy_constructible>); +} diff --git a/src/noctern/inout.test.cpp b/src/noctern/inout.test.cpp new file mode 100644 index 0000000..797eb08 --- /dev/null +++ b/src/noctern/inout.test.cpp @@ -0,0 +1,54 @@ +#include "./inout.hpp" + +#include + +namespace noctern { + namespace { + struct point { + int x; + int y; + }; + + TEST_CASE("in works") { + int x = 42; + in param = in(x); + + CHECK(*param == 42); + + SECTION("->") { + point x {1, 2}; + in param = in(x); + CHECK(param->x == 1); + } + } + + TEST_CASE("out works") { + int x = 42; + out param = out(x); + *param = 45; + + CHECK(x == 45); + + SECTION("->") { + point x {1, 2}; + out param = out(x); + CHECK(param->x == 1); + } + } + + TEST_CASE("inout works") { + int x = 42; + inout param = inout(x); + CHECK(*param == 42); + *param = 45; + + CHECK(x == 45); + + SECTION("->") { + point x {1, 2}; + inout param = inout(x); + CHECK(param->x == 1); + } + } + } +} \ No newline at end of file diff --git a/src/noctern/intern_table.cpp b/src/noctern/intern_table.cpp new file mode 100644 index 0000000..826c849 --- /dev/null +++ b/src/noctern/intern_table.cpp @@ -0,0 +1 @@ +#include "./intern_table.hpp" diff --git a/src/noctern/intern_table.hpp b/src/noctern/intern_table.hpp new file mode 100644 index 0000000..096c394 --- /dev/null +++ b/src/noctern/intern_table.hpp @@ -0,0 +1,127 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "noctern/meta.hpp" + +namespace noctern { + template + requires std::unsigned_integral + class intern_table; + + namespace intern_table_internal { + template + requires std::unsigned_integral + class interned { + friend intern_table; + friend std::hash; + + public: + constexpr interned(const interned&) = default; + constexpr interned(interned&&) noexcept = default; + + friend constexpr bool operator==(interned, interned) = default; + + template + requires std::integral + static constexpr auto encoded_size(type_t) { + constexpr auto Div = sizeof(InternIndexType) / sizeof(Word); + constexpr auto Rem = sizeof(InternIndexType) % sizeof(Word); + constexpr auto DivRoundingUp = Div + (Rem != 0 ? 1 : 0); + return DivRoundingUp; + } + + template + requires std::integral + constexpr auto encode_as(type_t) const { + constexpr auto MaxWord = std::numeric_limits::max(); + constexpr auto WordBits = std::numeric_limits::digits; + + std::array)> result {}; + InternIndexType data = index_; + for (size_t index = 0; index < result.size(); ++index) { + result[index] = data & MaxWord; + if constexpr (sizeof(Word) < sizeof(InternIndexType)) { + // Work around -Wshift-count-overflow. It would do the right thing anyway, + // but compilers warn that we're shifting by a very large value, so this is + // "useless". It's not, because this is generic code. + data >>= WordBits; + } + } + return result; + } + + template + requires std::integral + static constexpr interned decode_from(std::span data) { + constexpr auto WordBits = std::numeric_limits::digits; + + InternIndexType result = 0; + for (size_t indexp1 = encoded_size(type); indexp1 > 0; --indexp1) { + if constexpr (sizeof(Word) < sizeof(InternIndexType)) { + // Work around -Wshift-count-overflow. It would do the right thing anyway, + // but compilers warn that we're shifting by a very large value, so this is + // "useless". It's not, because this is generic code. + result <<= WordBits; + } + result |= data[indexp1 - 1]; + } + + return interned {result}; + } + + private: + explicit constexpr interned(InternIndexType index) + : index_(index) { + } + + InternIndexType index_; + }; + } + + template + requires std::unsigned_integral + class intern_table { + public: + using interned = intern_table_internal::interned; + + const T& get(interned token) const { + return data_[token.index_]; + } + + interned intern(const T& item) { + InternIndexType index = table_.size(); + + auto [it, was_inserted] = table_.try_emplace(item, index); + if (was_inserted) { + data_.push_back(item); + } else { + index = it->second; + } + + return interned {index}; + } + + private: + std::vector data_; + std::unordered_map table_; + }; + + using string_intern_table = intern_table; + using interned_string = string_intern_table::interned; +} + +template +struct std::hash> { + constexpr size_t operator()( + typename noctern::intern_table_internal::interned token) const { + return std::hash {}(token.index_); + } +}; diff --git a/src/noctern/interpreter.cpp b/src/noctern/interpreter.cpp index 6c49493..46ad887 100644 --- a/src/noctern/interpreter.cpp +++ b/src/noctern/interpreter.cpp @@ -24,8 +24,14 @@ namespace noctern { assert(source.id(*pos) == token_id::ident); assert(frame.locals.contains(source.string(*pos))); ++pos; + // TODO: type + assert(source.id(*pos) == token_id::ident); + ++pos; } ++pos; + // TODO: return type + assert(source.id(*pos) == token_id::ident); + ++pos; token_id id = source.id(*pos); if (id == token_id::lbrace) { @@ -40,12 +46,16 @@ namespace noctern { assert(source.id(*pos) == token_id::lbrace); ++pos; - while (source.id(*pos) == token_id::valdef_intro) { + while (source.id(*pos) == token_id::let) { ++pos; assert(source.id(*pos) == token_id::ident); const token ident = *pos; ++pos; + // TODO: type + assert(source.id(*pos) == token_id::ident); + ++pos; + double result = eval_expr(source, frame, pos); // Only insert after `eval_expr`, to avoid reading an undefined variable. frame.locals[source.string(ident)] = result; @@ -63,7 +73,7 @@ namespace noctern { const tokens& source, frame& frame, tokens::const_iterator& pos) const { assert(frame.expr_stack.empty()); - while (source.id(*pos) != token_id::statement_end) { + while (source.id(*pos) != token_id::semicolon) { token next = *pos; token_id id = source.id(next); ++pos; diff --git a/src/noctern/interpreter.test.cpp b/src/noctern/interpreter.test.cpp index 24bff75..8a8f757 100644 --- a/src/noctern/interpreter.test.cpp +++ b/src/noctern/interpreter.test.cpp @@ -66,23 +66,27 @@ namespace noctern { using enum noctern::token_id; fabricated_tokens tokens = noctern::make_tokens( - // def silly_add(x, y,): { - // let z = y - 0.2; + // def silly_add(x: f64, y: f64,): f64 = { + // let z: f64 = y - 0.2; // return y + z + x * 2. - 2 + .1; // }; { - fn_intro, + def, {ident, "silly_add"}, {ident, "x"}, + {ident, "f64"}, {ident, "y"}, + {ident, "f64"}, rparen, + {ident, "f64"}, lbrace, - valdef_intro, + let, {ident, "z"}, + {ident, "f64"}, {ident, "y"}, {real_lit, "0.2"}, minus, - statement_end, + semicolon, return_, {ident, "y"}, {ident, "z"}, @@ -95,17 +99,18 @@ namespace noctern { minus, {real_lit, ".1"}, plus, - statement_end, + semicolon, rbrace, - statement_end, + semicolon, }); noctern::compilation_unit cu(tokens.tokens); - noctern::symbol_table st(tokens.tokens, cu); + noctern::string_intern_table global_symbols; + noctern::symbol_table st(tokens.tokens, cu, global_symbols); noctern::interpreter interpreter(st); // TODO: safely unwrap this. - noctern::token silly_add = *st.find_fn_decl("silly_add"); + noctern::token silly_add = *st.find_fn_decl(global_symbols.intern("silly_add")); double x = 42.3; double y = -2.9; diff --git a/src/noctern/nir.cpp b/src/noctern/nir.cpp new file mode 100644 index 0000000..64c4ec4 --- /dev/null +++ b/src/noctern/nir.cpp @@ -0,0 +1,253 @@ +#include "./nir.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "noctern/enum.hpp" +#include "noctern/inout.hpp" +#include "noctern/tokenize.hpp" + +namespace noctern { + namespace { + double parse_double(std::string_view value) { + double answer; + auto [ptr, ec] = std::from_chars(value.data(), value.data() + value.size(), answer); + assert(ptr == value.data() + value.size()); + assert(ec == std::errc {}); + return answer; + } + } + + struct nir::instructions::function_compiler { + in source; + inout global_names; + + out> instructions; + out> payloads; + + std::vector register_names = {}; + std::unordered_map registers = {}; + + std::vector expr_stack = {}; + + template + void write(val_t, Args... args) { + instructions->push_back(instruction { + .opcode = op, + .payload = encode(val, args...), + }); + } + + payload_index_t encode(val_t, register_index_t reg) { + return reg; + } + + payload_index_t encode( + val_t, register_index_t reg, interned_string global_name) { + auto array = global_name.encode_as(type); + auto index = static_cast(payloads->size()); + payloads->push_back(reg); + payloads->insert(payloads->end(), array.begin(), array.end()); + return index; + } + + payload_index_t encode( + val_t, register_index_t reg, double int_literal) { + assert(int_literal <= std::numeric_limits::max() + && "internal error: Large literals not yet supported"); + + auto index = static_cast(payloads->size()); + payloads->push_back(reg); + payloads->push_back(static_cast(int_literal)); + return index; + } + + payload_index_t encode( + val_t, register_index_t reg, double real_literal) { + auto index = static_cast(payloads->size()); + payloads->push_back(reg); + + constexpr auto Div = sizeof(double) / sizeof(payload_word_t); + constexpr auto Rem = sizeof(double) % sizeof(payload_word_t); + constexpr auto DivRoundingUp = Div + (Rem != 0 ? 1 : 0); + constexpr auto Size = DivRoundingUp; + + // TODO: doubles and endianness? May not matter at all. + auto bytes = std::bit_cast>(real_literal); + std::array result = {}; + std::memcpy(reinterpret_cast(result.data()), bytes.data(), bytes.size()); + payloads->insert(payloads->end(), result.begin(), result.end()); + + return index; + } + + payload_index_t encode(val_t, register_index_t reg, register_index_t function, + std::span args) { + auto index = static_cast(payloads->size()); + payloads->push_back(reg); + payloads->push_back(function); + payloads->push_back(static_cast(args.size())); + payloads->insert(payloads->end(), args.begin(), args.end()); + return index; + } + + void compile_function(const tokens& source, tokens::const_iterator& pos) { + while (source.id(*pos) != token_id::rparen) { + assert(source.id(*pos) == token_id::ident); + register_names.push_back(source.string(*pos)); + auto [it, was_inserted] + = registers.try_emplace(register_names.back(), register_names.size() - 1); + + // Error: duplicate function parameter names! + assert(was_inserted && "Duplicated function parameter names"); + ++pos; + // TODO: type + assert(source.id(*pos) == token_id::ident); + ++pos; + } + ++pos; + // TODO: return type + assert(source.id(*pos) == token_id::ident); + ++pos; + + token_id id = source.id(*pos); + if (id == token_id::lbrace) { + compile_block(source, pos); + } else { + register_index_t reg = compile_expr(source, pos); + write(val, reg); + } + } + + void compile_block(const tokens& source, tokens::const_iterator& pos) { + assert(source.id(*pos) == token_id::lbrace); + ++pos; + + while (source.id(*pos) == token_id::let) { + ++pos; + assert(source.id(*pos) == token_id::ident); + std::string_view variable_name = source.string(*pos); + ++pos; + + // TODO: type + assert(source.id(*pos) == token_id::ident); + ++pos; + + register_index_t reg = compile_expr(source, pos); + assert(register_names[reg] == "" && "internal error: unnamed register has a name?"); + + register_names[reg] = variable_name; + auto [it, was_inserted] + = registers.try_emplace(register_names.back(), register_names.size() - 1); + // Error: duplicate variable names! + assert(was_inserted && "Duplicated variable names"); + } + + assert(source.id(*pos) == token_id::return_); + ++pos; + register_index_t reg = compile_expr(source, pos); + write(val, reg); + assert(source.id(*pos) == token_id::rbrace); + ++pos; + } + + register_index_t compile_expr(const tokens& source, tokens::const_iterator& pos) { + assert(expr_stack.empty()); + + auto new_register = [&] -> register_index_t { + auto reg = static_cast(register_names.size()); + register_names.push_back(""); + return reg; + }; + + while (source.id(*pos) != token_id::semicolon) { + token next = *pos; + token_id id = source.id(next); + ++pos; + + if (id == token_id::ident) { + std::string_view id = source.string(next); + + auto it = registers.find(id); + if (it != registers.end()) { + // Local variable. No instructions necessary. + expr_stack.push_back(it->second); + } else { + interned_string global_id = global_names->intern(id); + write(val, new_register(), global_id); + } + } else if (id == token_id::int_lit) { + double d = parse_double(source.string(next)); + register_index_t reg = new_register(); + expr_stack.push_back(reg); + write(val, reg, d); + } else if (id == token_id::real_lit) { + double d = parse_double(source.string(next)); + register_index_t reg = new_register(); + expr_stack.push_back(reg); + write(val, reg, d); + } else if (id == token_id::plus || id == token_id::minus || id == token_id::mult + || id == token_id::div) { + assert(expr_stack.size() >= 2); + register_index_t second = expr_stack.back(); + expr_stack.pop_back(); + register_index_t first = expr_stack.back(); + expr_stack.pop_back(); + + std::string_view operator_function_name + = enum_switch(id, [](val_t) -> std::string_view { + if constexpr (id == token_id::plus) { + return "add"; + } else if constexpr (id == token_id::minus) { + return "sub"; + } else if constexpr (id == token_id::mult) { + return "mul"; + } else if constexpr (id == token_id::div) { + return "div"; + } else { + assert(false && "not an operation"); + } + }); + + auto operator_id = new_register(); + write(val, operator_id, + global_names->intern(operator_function_name)); + register_index_t reg = new_register(); + expr_stack.push_back(reg); + write(val, reg, operator_id, + std::array({first, second})); + } + } + ++pos; + + assert(expr_stack.size() == 1); + double result = expr_stack.back(); + expr_stack.pop_back(); + return result; + } + }; + + auto nir::instructions::compile_function( + const tokens& source, token from, string_intern_table& global_names) -> function { + const auto index = static_cast(instructions_.size()); + + function_compiler compiler { + .source = in(source), + .global_names = inout(global_names), + .instructions = out(instructions_), + .payloads = out(payloads_), + }; + tokens::const_iterator pos = source.to_iterator(from); + compiler.compile_function(source, pos); + + return function(index); + } +} diff --git a/src/noctern/nir.hpp b/src/noctern/nir.hpp new file mode 100644 index 0000000..9d151c6 --- /dev/null +++ b/src/noctern/nir.hpp @@ -0,0 +1,249 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "noctern/enum.hpp" +#include "noctern/intern_table.hpp" +#include "noctern/tokenize.hpp" + +namespace noctern { + // NIR - Noctern IR + // + // This is a stripped down intermediate representation of the language frontend. The idea is + // that all frontend work should be handled by this IR. + // + // Some principles: + // - Ease of implementation/use over expressiveness. + // - No nested expressions. No unnamed temporaries. + // - Explicit over implicit. + // - No implicit conversions. + // - Operators are just functions with special names. + class nir { + public: + class instructions { + using function_index_t = token_index_t; + + public: + class function { + friend instructions; + + public: + constexpr function() = default; + + private: + explicit constexpr function(function_index_t index) + : index_(index) { + } + + function_index_t index_ = static_cast(-1); + }; + + enum class opcode : uint8_t { +#define NOCTERN_X_NIR_OPCODE(X) \ + X(nop) \ + X(load_nonlocal) \ + X(load_int_lit) \ + X(load_real_lit) \ + X(call) \ + X(return_) +#define NOCTERN_MAKE_ENUM(name) name, + NOCTERN_X_NIR_OPCODE(NOCTERN_MAKE_ENUM) +#undef NOCTERN_MAKE_ENUM + }; + + function compile_function( + const tokens& source, token from, string_intern_table& global_names); + + template + void visit(function func, Fn&& fn) const { + const std::span payload_span(payloads_); + for (instruction instruction : std::span(instructions_).subspan(func.index_)) { + enum_switch(instruction.opcode, [&](val_t op) { + using enum instructions::opcode; + if constexpr (op == nop) { + std::invoke(fn, op, -1); + } else if constexpr (op == return_) { + std::invoke(fn, op, register_index_t {instruction.payload}); + } else if constexpr (op == load_nonlocal) { + auto data = payload_span.subspan( + instruction.payload, payloads_.size() - instruction.payload); + + register_index_t reg = data[0]; + data = data.subspan(1, data.size() - 1); + + auto name = interned_string::decode_from(data); + + std::invoke(fn, op, reg, name); + } else if constexpr (op == load_int_lit) { + std::invoke(fn, op, register_index_t {payloads_[instruction.payload]}, + payload_word_t {payloads_[instruction.payload + 1]}); + } else if constexpr (op == load_real_lit) { + auto data = payload_span.subspan( + instruction.payload, payloads_.size() - instruction.payload); + + register_index_t reg = data[0]; + data = data.subspan(1, data.size() - 1); + + // TODO: doubles and endianness? May not matter at all. + std::array bytes = {}; + std::memcpy(bytes.data(), + reinterpret_cast(data.data()), bytes.size()); + + std::invoke(fn, op, reg, std::bit_cast(bytes)); + } else { + static_assert(op == call); + + auto data = payload_span.subspan( + instruction.payload, payloads_.size() - instruction.payload); + + register_index_t reg = data[0]; + register_index_t function = data[1]; + payload_word_t num_args = data[2]; + auto args = data.subspan(3, num_args); + + std::invoke(fn, op, reg, function, args); + } + }); + } + } + + private: + friend enum_mixin; + + template + friend constexpr decltype(auto) switch_introspect(opcode op, Fn&& fn) { + switch (op) { + using enum opcode; + NOCTERN_X_NIR_OPCODE(NOCTERN_ENUM_X_INTROSPECT) + } + assert(false); + } + + template + friend constexpr decltype(auto) introspect(type_t, Fn&& fn) { + using enum opcode; + return std::invoke(std::forward(fn) +#define NOCTERN_NIR_OPCODE_TYPE(name) , val + NOCTERN_X_NIR_OPCODE(NOCTERN_NIR_OPCODE_TYPE) +#undef NOCTERN_NIR_OPCODE_TYPE + ); + } +#undef NOCTERN_X_NIR_OPCODE + + private: + struct function_compiler; + + using payload_index_t = uint32_t; + using payload_word_t = uint32_t; + using register_index_t = payload_word_t; + + struct instruction { + instructions::opcode opcode : 8; + payload_index_t payload : 24; + }; + + std::vector instructions_; + std::vector payloads_; + + std::vector> variable_names_; + }; + + class types { + using type_index_t = uint32_t; + using typecode_index_t = uint64_t; + static constexpr type_index_t unit = 0; + + public: + // The idea: top-down instructions on how to decompose a type. + // Rather than bottom-up instructions on how to compose a type. + enum class opcode : uint8_t { +#define NOCTERN_X_NIR_OPCODE(X) X(return_) +#define NOCTERN_MAKE_ENUM(name) name, + NOCTERN_X_NIR_OPCODE(NOCTERN_MAKE_ENUM) +#undef NOCTERN_MAKE_ENUM + }; + + class type { + friend types; + + public: + constexpr type() = default; + + private: + explicit constexpr type(type_index_t index) + : index_(index) { + } + + type_index_t index_ = unit; + }; + + private: + using payload_word_t = uint32_t; + + struct instruction { + types::opcode opcode; + }; + + public: + class type_builder { + friend types; + + public: + private: + std::vector typecode_; + }; + + type define_type(type_builder, uint32_t align, uint32_t size) { + auto index = static_cast(types_.size()); + types_.push_back(typecode_index_t {0}); + type_sizes_.push_back(size_info {.alignment = align, .size = size}); + return type(index); + } + + private: + friend enum_mixin; + + template + friend constexpr decltype(auto) switch_introspect(opcode op, Fn&& fn) { + switch (op) { + using enum opcode; + NOCTERN_X_NIR_OPCODE(NOCTERN_ENUM_X_INTROSPECT) + } + assert(false); + } + + template + friend constexpr decltype(auto) introspect(type_t, Fn&& fn) { + using enum opcode; + return std::invoke(std::forward(fn) +#define NOCTERN_NIR_OPCODE_TYPE(name) , val + NOCTERN_X_NIR_OPCODE(NOCTERN_NIR_OPCODE_TYPE) +#undef NOCTERN_NIR_OPCODE_TYPE + ); + } +#undef NOCTERN_X_NIR_OPCODE + + private: + struct size_info { + uint32_t alignment; + uint32_t size; + }; + + std::vector types_; + std::vector typecode_ = {instruction {.opcode = opcode::return_}}; + std::vector type_sizes_; + }; + + private: + instructions instructions_; + std::vector functions_; + }; +} diff --git a/src/noctern/parser.cpp b/src/noctern/parser.cpp index c8e0238..05c36f5 100644 --- a/src/noctern/parser.cpp +++ b/src/noctern/parser.cpp @@ -11,17 +11,20 @@ namespace noctern { enum class rule : uint8_t { #define NOCTERN_X_RULE(X) \ X(file) /* ::= (list) fndef */ \ - X(fndef) /* ::= <(> fn_params <)> <:> expr <;> */ \ - X(fn_params) /* ::= (list: join <,>) */ \ + X(fndef) /* ::= <(> fn_params <)> <:> <=> expr <;> */ \ + X(fn_params) /* ::= (list: join <,>) ( <:> ) */ \ X(expr) /* ::= block | add_sub_expr */ \ X(block) /* ::= <{> ((list) valdecl) return_ <}> */ \ X(return_) /* ::= expr <;> */ \ - X(valdecl) /* ::= <=> expr <;> */ \ + X(valdecl) /* ::= <:> <=> expr <;> */ \ X(add_sub_expr) /* ::= div_mul_expr add_sub_expr2 */ \ X(add_sub_expr2) /* ::= <+> expr | <-> expr | */ \ - X(div_mul_expr) /* ::= base_expr div_mul_expr2 */ \ - X(div_mul_expr2) /* ::= div_mul_expr | <*> div_mul_expr | */ \ - X(base_expr) /* ::= <(> expr <)> | | | */ + X(div_mul_expr) /* ::= fn_call_expr div_mul_expr2 */ \ + X(div_mul_expr2) /* ::= div_mul_expr | <*> div_mul_expr | */ \ + X(fn_call_expr) /* ::= base_expr fn_call_expr2 */ \ + X(fn_call_expr2) /* ::= <(> (list: join <,>) expr <)> | */ \ + X(base_expr) /* ::= <(> expr <)> | | | */ \ + X(type) /* ::= */ #define NOCTERN_MAKE_ENUM_VALUE(name) name, NOCTERN_X_RULE(NOCTERN_MAKE_ENUM_VALUE) #undef NOCTERN_MAKE_ENUM_VALUE @@ -70,7 +73,10 @@ namespace noctern { tokens::const_iterator out; auto advance_token(token_id token_id) { - if (tokens.empty() || input.id(tokens.front()) != token_id) { + if (tokens.empty()) { + assert(false && "parse error"); + } + if (input.id(tokens.front()) != token_id) { assert(false && "parse error"); } auto it = input.extract(tokens.begin()); @@ -87,7 +93,7 @@ namespace noctern { void parse_at(val_t) { while (!tokens.empty()) { const token_id token_id = input.id(tokens.front()); - if (token_id == token_id::fn_intro) { + if (token_id == token_id::def) { parse_at(val); } else { // ERROR! @@ -97,23 +103,29 @@ namespace noctern { } void parse_at(val_t) { - push_token(advance_token(token_id::fn_intro)); + push_token(advance_token(token_id::def)); push_token(advance_token(token_id::ident)); advance_token(token_id::lparen); parse_at(val); push_token(advance_token(token_id::rparen)); - advance_token(token_id::fn_outro); + advance_token(token_id::colon); + + parse_at(val); + + advance_token(token_id::assign); parse_at(val); - push_token(advance_token(token_id::statement_end)); + push_token(advance_token(token_id::semicolon)); } void parse_at(val_t) { while (!tokens.empty() && input.id(tokens.front()) != token_id::rparen) { push_token(advance_token(token_id::ident)); + advance_token(token_id::colon); + parse_at(val); if (!tokens.empty() && input.id(tokens.front()) != token_id::rparen) { if (input.id(tokens.front()) != token_id::comma) { @@ -159,17 +171,19 @@ namespace noctern { void parse_at(val_t) { push_token(advance_token(token_id::return_)); parse_at(val); - push_token(advance_token(token_id::statement_end)); + push_token(advance_token(token_id::semicolon)); } void parse_at(val_t) { - push_token(advance_token(token_id::valdef_intro)); + push_token(advance_token(token_id::let)); push_token(advance_token(token_id::ident)); - advance_token(token_id::valdef_outro); + advance_token(token_id::colon); + parse_at(val); + advance_token(token_id::assign); parse_at(val); - push_token(advance_token(token_id::statement_end)); + push_token(advance_token(token_id::semicolon)); } void parse_at(val_t) { @@ -194,7 +208,7 @@ namespace noctern { } void parse_at(val_t) { - parse_at(val); + parse_at(val); parse_at(val); } @@ -213,6 +227,32 @@ namespace noctern { } } + void parse_at(val_t) { + parse_at(val); + parse_at(val); + } + + void parse_at(val_t) { + if (tokens.empty()) { + // Okay! + return; + } + token_id token_id = input.id(tokens.front()); + if (token_id == token_id::lparen) { + push_token(advance_token(token_id::lparen)); + + while (!tokens.empty() && input.id(tokens.front()) != token_id::rparen) { + parse_at(val); + + if (!tokens.empty() && input.id(tokens.front()) != token_id::rparen) { + advance_token(token_id::comma); + } + } + + push_token(advance_token(token_id::rparen)); + } + } + void parse_at(val_t) { if (tokens.empty()) { // Error! @@ -232,6 +272,10 @@ namespace noctern { assert(false && "parse error"); } } + + void parse_at(val_t) { + push_token(advance_token(token_id::ident)); + } }; } diff --git a/src/noctern/parser.test.cpp b/src/noctern/parser.test.cpp index 4b41485..d537e52 100644 --- a/src/noctern/parser.test.cpp +++ b/src/noctern/parser.test.cpp @@ -76,28 +76,36 @@ namespace noctern { using enum noctern::token_id; fabricated_tokens tokens = noctern::make_tokens( - // def silly_add(x, y,): { - // let z = y - 0.2; - // return y + z + x * 2. - 2 + .1; + // def silly_add(x: f64, y: f64,): f64 = { + // let z: f64 = y - 0.2; + // return y + z + x * 2. - return_me(2) + .1; // }; { - fn_intro, + def, {ident, "silly_add"}, lparen, {ident, "x"}, + colon, + {ident, "f64"}, comma, {ident, "y"}, + colon, + {ident, "f64"}, comma, rparen, - fn_outro, + colon, + {ident, "f64"}, + assign, lbrace, - valdef_intro, + let, {ident, "z"}, - valdef_outro, + colon, + {ident, "f64"}, + assign, {ident, "y"}, minus, {real_lit, "0.2"}, - statement_end, + semicolon, return_, {ident, "y"}, plus, @@ -107,45 +115,55 @@ namespace noctern { mult, {real_lit, "2."}, minus, + {ident, "return_me"}, + lparen, {int_lit, "2"}, + rparen, plus, {real_lit, ".1"}, - statement_end, + semicolon, rbrace, - statement_end, + semicolon, }); noctern::tokens result = noctern::parse(tokens.tokens); CHECK_THAT(noctern::elaborate(result), Catch::Matchers::Equals(std::vector({ - fn_intro, + def, {ident, "silly_add"}, {ident, "x"}, + {ident, "f64"}, {ident, "y"}, + {ident, "f64"}, rparen, + {ident, "f64"}, lbrace, - valdef_intro, + let, {ident, "z"}, + {ident, "f64"}, {ident, "y"}, {real_lit, "0.2"}, minus, - statement_end, + semicolon, return_, {ident, "y"}, {ident, "z"}, {ident, "x"}, {real_lit, "2."}, mult, + {ident, "return_me"}, + lparen, {int_lit, "2"}, + rparen, {real_lit, ".1"}, plus, minus, plus, plus, - statement_end, + semicolon, rbrace, - statement_end, + semicolon, }))); } } diff --git a/src/noctern/symbol_table.cpp b/src/noctern/symbol_table.cpp index e6be7d0..6d725df 100644 --- a/src/noctern/symbol_table.cpp +++ b/src/noctern/symbol_table.cpp @@ -16,11 +16,12 @@ namespace noctern { } } - symbol_table::symbol_table(const tokens& input, const compilation_unit& unit) + symbol_table::symbol_table( + const tokens& input, const compilation_unit& unit, string_intern_table& string_interner) : fn_table_(from_range(unit.fn_defs() | views::transform([&](token token) { const auto it = input.to_iterator(token); assert(input.id(it[1]) == token_id::ident); - return std::pair(input.string(it[1]), it[2]); + return std::pair(string_interner.intern(input.string(it[1])), it[2]); }))) { } } \ No newline at end of file diff --git a/src/noctern/symbol_table.hpp b/src/noctern/symbol_table.hpp index 7daad0a..8a6db33 100644 --- a/src/noctern/symbol_table.hpp +++ b/src/noctern/symbol_table.hpp @@ -5,14 +5,16 @@ #include #include "noctern/compilation_unit.hpp" +#include "noctern/intern_table.hpp" #include "noctern/tokenize.hpp" namespace noctern { class symbol_table { public: - explicit symbol_table(const tokens& input, const compilation_unit& unit); + explicit symbol_table(const tokens& input, const compilation_unit& unit, + string_intern_table& string_interner); - std::optional find_fn_decl(std::string_view name) const { + std::optional find_fn_decl(interned_string name) const { auto it = fn_table_.find(name); if (it == fn_table_.end()) return std::nullopt; return it->second; @@ -20,6 +22,6 @@ namespace noctern { private: // TODO: use a better map type. - std::unordered_map fn_table_; + std::unordered_map fn_table_; }; } \ No newline at end of file diff --git a/src/noctern/tokenize.hpp b/src/noctern/tokenize.hpp index d41c1f0..f03b1be 100644 --- a/src/noctern/tokenize.hpp +++ b/src/noctern/tokenize.hpp @@ -30,13 +30,13 @@ namespace noctern { X(rbrace) \ X(lparen) \ X(rparen) \ - X(statement_end) \ + X(semicolon) \ \ - X(fn_intro) \ - X(fn_outro) \ + X(def) \ + X(colon) \ \ - X(valdef_intro) \ - X(valdef_outro) \ + X(let) \ + X(assign) \ X(ident) \ \ X(int_lit) \ @@ -46,7 +46,6 @@ namespace noctern { X(minus) \ X(mult) \ X(div) \ - \ X(return_) #define NOCTERN_MAKE_ENUM_VALUE(name) name, NOCTERN_X_TOKEN(NOCTERN_MAKE_ENUM_VALUE) @@ -112,10 +111,10 @@ namespace noctern { inline constexpr auto token_data = string_data {}; template <> - inline constexpr auto token_data = empty_data<"def"> {}; + inline constexpr auto token_data = empty_data<"def"> {}; template <> - inline constexpr auto token_data = empty_data<":"> {}; + inline constexpr auto token_data = empty_data<":"> {}; template <> inline constexpr auto token_data = empty_data<"{"> {}; @@ -127,10 +126,10 @@ namespace noctern { inline constexpr auto token_data = empty_data<","> {}; template <> - inline constexpr auto token_data = empty_data<"let"> {}; + inline constexpr auto token_data = empty_data<"let"> {}; template <> - inline constexpr auto token_data = empty_data<"="> {}; + inline constexpr auto token_data = empty_data<"="> {}; template <> inline constexpr auto token_data = string_data {}; @@ -160,7 +159,7 @@ namespace noctern { inline constexpr auto token_data = empty_data<")"> {}; template <> - inline constexpr auto token_data = empty_data<";"> {}; + inline constexpr auto token_data = empty_data<";"> {}; template <> inline constexpr auto token_data = empty_data<"return"> {}; diff --git a/src/noctern/tokenize.test.cpp b/src/noctern/tokenize.test.cpp index e306792..556f7c5 100644 --- a/src/noctern/tokenize.test.cpp +++ b/src/noctern/tokenize.test.cpp @@ -31,15 +31,15 @@ namespace noctern { std::vector test_cases = { {.name = "empty"}, { - .name = "fn_intro", + .name = "def", .input = "def", - .expected = {token_id::fn_intro}, + .expected = {token_id::def}, .expected_str_data = {"def"}, }, { - .name = "fn_outro", + .name = "colon", .input = ":", - .expected = {token_id::fn_outro}, + .expected = {token_id::colon}, .expected_str_data = {":"}, }, { @@ -55,15 +55,15 @@ namespace noctern { .expected_str_data = {"}"}, }, { - .name = "valdef_intro", + .name = "let", .input = "let", - .expected = {token_id::valdef_intro}, + .expected = {token_id::let}, .expected_str_data = {"let"}, }, { - .name = "valdef_outro", + .name = "assign", .input = "=", - .expected = {token_id::valdef_outro}, + .expected = {token_id::assign}, .expected_str_data = {"="}, }, { @@ -188,9 +188,9 @@ namespace noctern { .expected_str_data = {")"}, }, { - .name = "statement_end", + .name = "semicolon", .input = ";", - .expected = {token_id::statement_end}, + .expected = {token_id::semicolon}, .expected_str_data = {";"}, }, { @@ -238,15 +238,15 @@ namespace noctern { test_cases.push_back({ .name = "function definition stream", .input = R"(def foobar(x, y): { let z = y; return z + x + 0.2; })", - .expected = {token_id::fn_intro, token_id::space, token_id::ident, token_id::lparen, + .expected = {token_id::def, token_id::space, token_id::ident, token_id::lparen, token_id::ident, token_id::comma, token_id::space, token_id::ident, - token_id::rparen, token_id::fn_outro, token_id::space, token_id::lbrace, - token_id::space, token_id::valdef_intro, token_id::space, token_id::ident, - token_id::space, token_id::valdef_outro, token_id::space, token_id::ident, - token_id::statement_end, token_id::space, token_id::return_, token_id::space, + token_id::rparen, token_id::colon, token_id::space, token_id::lbrace, + token_id::space, token_id::let, token_id::space, token_id::ident, + token_id::space, token_id::assign, token_id::space, token_id::ident, + token_id::semicolon, token_id::space, token_id::return_, token_id::space, token_id::ident, token_id::space, token_id::plus, token_id::space, token_id::ident, token_id::space, token_id::plus, token_id::space, - token_id::real_lit, token_id::statement_end, token_id::space, token_id::rbrace}, + token_id::real_lit, token_id::semicolon, token_id::space, token_id::rbrace}, .expected_str_data = {"def", " ", "foobar", "(", "x", ",", " ", "y", ")", ":", " ", "{", " ", "let", " ", "z", " ", "=", " ", "y", ";", " ", "return", " ", "z", " ", "+", " ", "x", " ", "+", " ", "0.2", ";", " ", "}"}, diff --git a/src/nocternc.main.cpp b/src/nocternc.main.cpp index 3fe0ff4..08717eb 100644 --- a/src/nocternc.main.cpp +++ b/src/nocternc.main.cpp @@ -7,7 +7,9 @@ #include #include "noctern/compilation_unit.hpp" +#include "noctern/intern_table.hpp" #include "noctern/interpreter.hpp" +#include "noctern/nir.hpp" #include "noctern/parser.hpp" #include "noctern/symbol_table.hpp" #include "noctern/tokenize.hpp" @@ -56,19 +58,24 @@ int main(int argc, char** argv) { noctern::tokens tokens = noctern::tokenize_all(source); tokens = noctern::parse(std::move(tokens)); + noctern::compilation_unit compile_unit(tokens); - noctern::symbol_table symbol_table(tokens, compile_unit); + noctern::string_intern_table global_symbols; + noctern::symbol_table symbol_table(tokens, compile_unit, global_symbols); - std::optional main = symbol_table.find_fn_decl("Main"); + std::optional main = symbol_table.find_fn_decl(global_symbols.intern("Main")); if (!main.has_value()) { fmt::println(stderr, "No `Main()` function found!"); return 1; } - noctern::interpreter interpreter(std::move(symbol_table)); - double result = interpreter.eval_fn(tokens, *main, noctern::interpreter::frame {}); + noctern::nir::types types; + [[maybe_unused]] noctern::nir::types::type f64 + = types.define_type({}, alignof(double), sizeof(double)); - fmt::println(stdout, "Result: {}", result); + noctern::nir::instructions instructions; + [[maybe_unused]] noctern::nir::instructions::function fn + = instructions.compile_function(tokens, *main, global_symbols); return 0; } diff --git a/src/nocterni.main.cpp b/src/nocterni.main.cpp new file mode 100644 index 0000000..f3fd3ab --- /dev/null +++ b/src/nocterni.main.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include + +#include + +#include "noctern/compilation_unit.hpp" +#include "noctern/intern_table.hpp" +#include "noctern/interpreter.hpp" +#include "noctern/parser.hpp" +#include "noctern/symbol_table.hpp" +#include "noctern/tokenize.hpp" + +int main(int argc, char** argv) { + if (argc != 2) { + fmt::println(stderr, "Usage: nocternc "); + return 1; + } + + // TODO: mmap + std::FILE* file = std::fopen(argv[1], "rb"); + if (file == nullptr) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "Couldn't find file {}: {}", argv[1], err); + return 1; + } + if (std::fseek(file, 0, SEEK_END) != 0) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "fseek failed: {}", err); + return 1; + } + long length = std::ftell(file); + if (length == -1) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "ftell failed: {}", err); + return 1; + } + if (std::fseek(file, 0, SEEK_SET) != 0) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "fseek failed: {}", err); + return 1; + } + std::string source(length, '\0'); + [[maybe_unused]] size_t c = std::fread(source.data(), source.size(), length, file); + if (std::ferror(file) != 0) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "fread failed: {}", err); + return 1; + } + if (std::feof(file) == 0) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "failed to read entire file; didn't find eof."); + return 1; + } + + noctern::tokens tokens = noctern::tokenize_all(source); + tokens = noctern::parse(std::move(tokens)); + + noctern::compilation_unit compile_unit(tokens); + noctern::string_intern_table global_symbols; + noctern::symbol_table symbol_table(tokens, compile_unit, global_symbols); + + std::optional main = symbol_table.find_fn_decl(global_symbols.intern("Main")); + if (!main.has_value()) { + fmt::println(stderr, "No `Main()` function found!"); + return 1; + } + + noctern::interpreter interpreter(std::move(symbol_table)); + double result = interpreter.eval_fn(tokens, *main, noctern::interpreter::frame {}); + + fmt::println(stdout, "Result: {}", result); + + return 0; +} diff --git a/src/parser.main.cpp b/src/parser.main.cpp new file mode 100644 index 0000000..a084516 --- /dev/null +++ b/src/parser.main.cpp @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#include + +#include "noctern/compilation_unit.hpp" +#include "noctern/intern_table.hpp" +#include "noctern/interpreter.hpp" +#include "noctern/parser.hpp" +#include "noctern/symbol_table.hpp" +#include "noctern/tokenize.hpp" + +int main(int argc, char** argv) { + if (argc != 2) { + fmt::println(stderr, "Usage: nocternc "); + return 1; + } + + // TODO: mmap + std::FILE* file = std::fopen(argv[1], "rb"); + if (file == nullptr) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "Couldn't find file {}: {}", argv[1], err); + return 1; + } + if (std::fseek(file, 0, SEEK_END) != 0) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "fseek failed: {}", err); + return 1; + } + long length = std::ftell(file); + if (length == -1) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "ftell failed: {}", err); + return 1; + } + if (std::fseek(file, 0, SEEK_SET) != 0) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "fseek failed: {}", err); + return 1; + } + std::string source(length, '\0'); + [[maybe_unused]] size_t c = std::fread(source.data(), source.size(), length, file); + if (std::ferror(file) != 0) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "fread failed: {}", err); + return 1; + } + if (std::feof(file) == 0) { + std::string err(std::strerror(errno)); + fmt::println(stderr, "failed to read entire file; didn't find eof."); + return 1; + } + + noctern::tokens tokens = noctern::tokenize_all(source); + tokens = noctern::parse(std::move(tokens)); + + for (noctern::token token : tokens) { + noctern::token_id id = tokens.id(token); + if (id == noctern::token_id::space) continue; + if (noctern::has_data(id)) { + fmt::println("<{}: {}>", stringify(id), tokens.string(token)); + } else { + fmt::println("<{}>", stringify(id)); + } + } +} \ No newline at end of file