diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..4ec790d --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,180 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "block-buffer" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b" +dependencies = [ + "block-padding", + "byte-tools", + "byteorder", + "generic-array", +] + +[[package]] +name = "block-padding" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5" +dependencies = [ + "byte-tools", +] + +[[package]] +name = "byte-tools" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" + +[[package]] +name = "byteorder" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" + +[[package]] +name = "digest" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" +dependencies = [ + "generic-array", +] + +[[package]] +name = "fake-simd" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" + +[[package]] +name = "generic-array" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c68f0274ae0e023facc3c97b2e00f076be70e254bc851d972503b328db79b2ec" +dependencies = [ + "typenum", +] + +[[package]] +name = "hm-asm" +version = "0.1.0" +dependencies = [ + "pest", + "pest_derive", +] + +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + +[[package]] +name = "opaque-debug" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c" + +[[package]] +name = "pest" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +dependencies = [ + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "833d1ae558dc601e9a60366421196a8d94bc0ac980476d0b67e1d0988d72b2d0" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99b8db626e31e5b81787b9783425769681b347011cc59471e33ea46d2ea0cf55" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pest_meta" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54be6e404f5317079812fc8f9f5279de376d8856929e21c184ecf6bbd692a11d" +dependencies = [ + "maplit", + "pest", + "sha-1", +] + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "sha-1" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df" +dependencies = [ + "block-buffer", + "digest", + "fake-simd", + "opaque-debug", +] + +[[package]] +name = "syn" +version = "1.0.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c1e438504729046a5cfae47f97c30d6d083c7d91d94603efdae3477fc070d4c" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "typenum" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33" + +[[package]] +name = "ucd-trie" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a1ec4f4 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "hm-asm" +version = "0.1.0" +authors = ["Henrik Boeving "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +pest = "2.0" +pest_derive = "2.0" diff --git a/README.md b/README.md index 309f8ee..a3d758d 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,15 @@ # hm-asm -A small assembler I wrote for use with the microprocessor from the Book "Grundlagen der Technische Informatik" +A small assembler I wrote for use with the microprocessor from the Book "Grundlagen der Technische Informatik" by Dirk +W. Hoffmann. + +## How does it work? +You run the binary with a path to an assembler file like this `cargo run path/to/file.asm`. + +## Syntax? +You can find some syntax examples in `examples/` + +## Limitations +Note that since this microprocessor is only a 4 bit microporcessor we cannot possibly have programs with more than 16 +instructions. Furthermore since the parameters of the instructions are stored in the same part of memory as where you +can store values with the `STA` instruction it is technically possible to overwrite your program at runtime and do a +sort of self modifying programming style, this assembler does not warn you if you do this as of now. diff --git a/examples/add_endless.asm b/examples/add_endless.asm new file mode 100644 index 0000000..5749763 --- /dev/null +++ b/examples/add_endless.asm @@ -0,0 +1,3 @@ +LDA #0 +ADD: ADD #1 +JMP ADD diff --git a/examples/comments.asm b/examples/comments.asm new file mode 100644 index 0000000..61a83a7 --- /dev/null +++ b/examples/comments.asm @@ -0,0 +1,2 @@ +// comment +LDA #1 diff --git a/examples/labels.asm b/examples/labels.asm new file mode 100644 index 0000000..03361de --- /dev/null +++ b/examples/labels.asm @@ -0,0 +1,3 @@ +START: LDA #1 +// Jumps to the label start +JMP START diff --git a/examples/list_of_ops.asm b/examples/list_of_ops.asm new file mode 100644 index 0000000..39c6188 --- /dev/null +++ b/examples/list_of_ops.asm @@ -0,0 +1,14 @@ +NOP +LDA #1 +LDA (1) +STA (1) +ADD #1 +ADD (1) +SUB #1 +SUB (1) +JMP 0 +BRZ #1 +BRC #1 +BRN #1 +MYLABEL: NOP +JMP MYLABEL diff --git a/src/asm.pest b/src/asm.pest new file mode 100644 index 0000000..f53cee1 --- /dev/null +++ b/src/asm.pest @@ -0,0 +1,30 @@ +program = _{ SOI ~ "\n"* ~ (stmt ~ "\n"+) * ~ stmt? ~ EOI } + + +stmt = { ((label ~ ":")? ~ instruction)} + + +instruction = { + no_arg_instruction | + arg_instruction ~ argument | + jump_instruction ~ jump_argument | + memory_location_instruction ~ memory_location | + constant_arg_instruction ~ digit_literal +} + +memory_location_instruction = {"STA"} +constant_arg_instruction = {"BRZ" | "BRC" | "BRN"} +jump_instruction = {"JMP"} +arg_instruction = {"LDA" | "ADD" | "SUB"} +no_arg_instruction = { "NOP" } + +jump_argument = { jump_location | label} +argument = { memory_location | digit_literal } +memory_location = { "(" ~ ASCII_HEX_DIGIT ~")" } +digit_literal = {"#" ~ ASCII_HEX_DIGIT} +jump_location = { ASCII_HEX_DIGIT } + +label = { ASCII_ALPHA_UPPER+ } + +WHITESPACE = _{ " " | "\t" } +COMMENT = _{"//" ~ (!"\n" ~ ANY)* } diff --git a/src/asm.rs b/src/asm.rs new file mode 100644 index 0000000..3440059 --- /dev/null +++ b/src/asm.rs @@ -0,0 +1,51 @@ +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Argument { + MemoryLocation(u8), + Constant(u8) +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct Label<'a>{ + pub name: &'a str, + pub location: u8 +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum JumpArgument<'a> { + Location(u8), + Label(&'a str) +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Instruction<'a> { + NoArgumentInstruction(NoArgumentInstruction, Option>), + MemoryLocationInstruction(MemoryLocationInstruction, Option>), + ConstantArgumentInstruction(ConstantArgumentInstruction, Option>), + ArgumentInstruction(ArgumentInstruction, Option>), + Jump(JumpArgument<'a>, Option>) +} + + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum NoArgumentInstruction { + NOP +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum MemoryLocationInstruction { + STA(u8) +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ConstantArgumentInstruction { + BRZ(u8), + BRC(u8), + BRN(u8) +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ArgumentInstruction { + LDA(Argument), + ADD(Argument), + SUB(Argument), +} diff --git a/src/generate.rs b/src/generate.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..504ccae --- /dev/null +++ b/src/main.rs @@ -0,0 +1,32 @@ +use std::fs; +use std::env; + + +#[macro_use] +extern crate pest_derive; + +use pest::Parser; + + +mod asm; +mod parse; +mod generate; + +use parse::{parse_asm, AsmParser}; + +fn main() { + let file_name = env::args().nth(1); + + let file_content = match file_name { + Some(file_name) => { + fs::read_to_string(file_name).expect("Could not read the provided asm file") + } + None => { + println!("No input file was provided"); + return; + } + }; + + let instructions = parse_asm(AsmParser::parse(parse::Rule::program, &file_content).unwrap_or_else(|e| panic!("{}", e))); + println!("{:#?}", instructions); +} diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..80013d4 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,137 @@ +use crate::asm::*; +use pest::iterators::{Pair, Pairs}; + +#[derive(Parser)] +#[grammar = "asm.pest"] +pub struct AsmParser; + +pub fn parse_asm<'a>(pairs: Pairs<'a, Rule>) -> Vec> { + let mut instruction = Vec::new(); + let mut instruction_counter = 0; + for stmnt in pairs { + if let Rule::stmt = stmnt.as_rule() { + let mut stmnt = stmnt.into_inner(); + let first = stmnt.next(); + let second = stmnt.next(); + + // Second can only be Some if we have a label, thus second must be the + // instruction if that is the case. + if second.is_some() { + instruction.push(parse_instruction(second.unwrap(), first, instruction_counter)); + } else { + instruction.push(parse_instruction(first.unwrap(), None, instruction_counter)); + } + + instruction_counter += 1; + } + + if instruction_counter > 15 { + panic!("This program contains more than 16 instructions, that is impossible on this processor"); + } + } + instruction +} + +fn parse_instruction<'a>(instruction: Pair<'a, Rule>, label: Option>, instruction_counter: u8) -> Instruction<'a>{ + let label = label.map(|l| parse_label(l, instruction_counter)); + let mut instruction = instruction.into_inner(); + let mnemonic = instruction.next().unwrap(); + + match mnemonic.as_rule() { + Rule::no_arg_instruction => parse_no_arg_instruction(mnemonic, label), + Rule::arg_instruction => { + parse_arg_instruction(mnemonic, instruction.next().unwrap(), label) + }, + Rule::jump_instruction => { + parse_jump_instruction(mnemonic, instruction.next().unwrap(), label) + }, + Rule::memory_location_instruction => { + parse_memory_location_instruction(mnemonic, instruction.next().unwrap(), label) + }, + Rule::constant_arg_instruction => { + parse_constant_arg_instruction(mnemonic, instruction.next().unwrap(), label) + }, + _ => unreachable!() + } +} + +fn parse_no_arg_instruction<'a>(instruction: Pair<'a, Rule>, label: Option>) -> Instruction<'a> { + match instruction.as_str() { + "NOP" => Instruction::NoArgumentInstruction(NoArgumentInstruction::NOP, label), + _ => unreachable!() + } +} + +fn parse_arg_instruction<'a>(instruction: Pair<'a, Rule>, arg: Pair<'a, Rule>, label: Option>) -> Instruction<'a> { + let arg = parse_argument(arg); + match instruction.as_str() { + "LDA" => Instruction::ArgumentInstruction(ArgumentInstruction::LDA(arg), label), + "ADD" => Instruction::ArgumentInstruction(ArgumentInstruction::ADD(arg), label), + "SUB" => Instruction::ArgumentInstruction(ArgumentInstruction::SUB(arg), label), + _ => unreachable!() + } +} + +fn parse_jump_instruction<'a>(instruction: Pair<'a, Rule>, arg: Pair<'a, Rule>, label: Option>) -> Instruction<'a> { + let arg = parse_jump_argument(arg); + match instruction.as_str() { + "JMP" => Instruction::Jump(arg, label), + _ => unreachable!() + } +} + +fn parse_memory_location_instruction<'a>(instruction: Pair<'a, Rule>, arg: Pair<'a, Rule>,label: Option>) -> Instruction<'a> { + let arg_string = arg.as_str(); + let arg_value = u8::from_str_radix(&arg_string[1..arg_string.len()-1], 16).unwrap(); + match instruction.as_str() { + "STA" => Instruction::MemoryLocationInstruction(MemoryLocationInstruction::STA(arg_value), label), + _ => unreachable!() + } +} + +fn parse_constant_arg_instruction<'a>(instruction: Pair<'a, Rule>, arg: Pair<'a, Rule>, label: Option>) -> Instruction<'a> { + let arg_value = u8::from_str_radix(&arg.as_str()[1..], 16).unwrap(); + match instruction.as_str() { + "BRZ" => Instruction::ConstantArgumentInstruction(ConstantArgumentInstruction::BRZ(arg_value), label), + "BRC" => Instruction::ConstantArgumentInstruction(ConstantArgumentInstruction::BRC(arg_value), label), + "BRN" => Instruction::ConstantArgumentInstruction(ConstantArgumentInstruction::BRN(arg_value), label), + _ => unreachable!() + } +} + +fn parse_argument<'a>(argument: Pair<'a, Rule>) -> Argument { + let argument = argument.into_inner().next().unwrap(); + match argument.as_rule() { + Rule::memory_location => { + let arg_string = argument.as_str(); + let arg_value = u8::from_str_radix(&arg_string[1..arg_string.len()-1], 16).unwrap(); + Argument::MemoryLocation(arg_value) + }, + Rule::digit_literal => { + let arg_value = u8::from_str_radix(&argument.as_str()[1..], 16).unwrap(); + Argument::Constant(arg_value) + }, + _ => unreachable!() + } +} + +fn parse_label<'a>(label: Pair<'a, Rule>, instruction_counter: u8) -> Label<'a> { + match label.as_rule() { + Rule::label => { + Label { + name: label.as_str(), + location: instruction_counter + } + }, + _ => unreachable!() + } +} + +fn parse_jump_argument<'a>(arg: Pair<'a, Rule>) -> JumpArgument<'a> { + let arg = arg.into_inner().next().unwrap(); + match arg.as_rule() { + Rule::label => JumpArgument::Label(arg.as_str()), + Rule::jump_location => JumpArgument::Location(u8::from_str_radix(arg.as_str(), 16).unwrap()), + _ => unreachable!() + } +}