diff --git a/Cargo.lock b/Cargo.lock index e69e87bf0..c95d24c18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -678,6 +678,8 @@ version = "1.2.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" dependencies = [ + "jobserver", + "libc", "shlex", ] @@ -1884,6 +1886,18 @@ dependencies = [ "slab", ] +[[package]] +name = "fuzz" +version = "0.0.0" +dependencies = [ + "harper-comments", + "harper-core", + "harper-html", + "harper-literate-haskell", + "harper-typst", + "libfuzzer-sys", +] + [[package]] name = "gemm" version = "0.17.1" @@ -2835,6 +2849,16 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.3", + "libc", +] + [[package]] name = "js-sys" version = "0.3.77" @@ -2883,6 +2907,16 @@ version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +[[package]] +name = "libfuzzer-sys" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5037190e1f70cbeef565bd267599242926f724d3b8a9f510fd7e0b540cfa4404" +dependencies = [ + "arbitrary", + "cc", +] + [[package]] name = "libloading" version = "0.8.8" diff --git a/Cargo.toml b/Cargo.toml index 9eab182db..25dfdad2f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst", "harper-stats", "harper-pos-utils", "harper-brill", "harper-ink", "harper-python"] +members = [ "harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst", "fuzz" , "harper-stats", "harper-pos-utils", "harper-brill", "harper-ink", "harper-python"] resolver = "2" # Comment out the below lines if you plan to use a debugger. diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 000000000..1a45eee77 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 000000000..c3631bbae --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,52 @@ +[package] +name = "fuzz" +version = "0.0.0" +publish = false +edition = "2024" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +harper-core = { path = "../harper-core" } +harper-typst = { path = "../harper-typst" } +harper-literate-haskell = { path = "../harper-literate-haskell" } +harper-html = { path = "../harper-html" } +harper-comments = { path = "../harper-comments" } + +[[bin]] +name = "fuzz_harper_typst" +path = "fuzz_targets/fuzz_harper_typst.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_harper_literate_haskell" +path = "fuzz_targets/fuzz_harper_literate_haskell.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_harper_html" +path = "fuzz_targets/fuzz_harper_html.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_harper_comment" +path = "fuzz_targets/fuzz_harper_comment.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_harper_core_markdown" +path = "fuzz_targets/fuzz_harper_core_markdown.rs" +test = false +doc = false +bench = false diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 000000000..52d797b72 --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,39 @@ +# cargo-fuzz targets + +## Setup + +Follow the rust-fuzz [setup guide](https://rust-fuzz.github.io/book/cargo-fuzz/setup.html). +You need a nightly toolchain and the cargo-fuzz plugin. + +Simple installation steps: + +- `rustup install nightly` +- `cargo install cargo-fuzz` + +## Adding a new fuzzing target + +To add a new target, run `cargo fuzz add $TARGET_NAME` + + +## Doing a fuzzing run + +First, make sure that lto is turned off, otherwise you'll encounter linker errors. +You can do this via the workspace `Cargo.toml` or the `CARGO_PROFILE_RELEASE_LTO` environment variable. + +If possible, prefill the `fuzz/corpus/$TARGET_NAME` directory with appropriate examples to speed up fuzzing. +The fuzzer should be coverage aware, so providing a well formed input document to fuzzing targets only expecting a string as input can speed things up a lot. + +Then, run `CARGO_PROFILE_RELEASE_LTO=false cargo +nightly fuzz run $TARGET_NAME -- -timeout=$TIMEOUT` + +The timeout flag accepts a timeout in seconds, after which a long-running test case will be aborted. +This should be set to a low number to quickly report endless loops / deep recursion in parsers. + +The normal fuzzing run will continue until a crash is found. + +## Minifying a test case + +Once the fuzzer finds a crash, we probably want to minify the result. +This can be done with `CARGO_PROFILE_RELEASE_LTO=false cargo +nightly fuzz tmin $TARGET $TEST_CASE_PATH` + + + diff --git a/fuzz/fuzz_targets/fuzz_harper_comment.rs b/fuzz/fuzz_targets/fuzz_harper_comment.rs new file mode 100644 index 000000000..b59045796 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_harper_comment.rs @@ -0,0 +1,76 @@ +#![no_main] + +use harper_core::parsers::{MarkdownOptions, StrParser}; +use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; +use libfuzzer_sys::fuzz_target; + +#[derive(Debug)] +struct Language(String); + +const LANGUAGES: [&str; 32] = [ + "cmake", + "cpp", + "csharp", + "c", + "dart", + "go", + "haskell", + "javascriptreact", + "javascript", + "java", + "kotlin", + "lua", + "nix", + "php", + "python", + "ruby", + "rust", + "scala", + "shellscript", + "solidity", + "swift", + "toml", + "typescriptreact", + "typescript", + "clojure", + "go", + "lua", + "java", + "javascriptreact", + "typescript", + "typescriptreact", + "solidity", +]; + +impl<'a> Arbitrary<'a> for Language { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let &lang = u.choose(&LANGUAGES)?; + Ok(Language(lang.to_owned())) + } +} + +#[derive(Debug)] +struct Input { + language: Language, + text: String, +} + +impl<'a> Arbitrary<'a> for Input { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let (language, text) = Arbitrary::arbitrary(u)?; + Ok(Input { language, text }) + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + let (language, text) = Arbitrary::arbitrary_take_rest(u)?; + Ok(Input { language, text }) + } +} + +fuzz_target!(|data: Input| { + let opts = MarkdownOptions::default(); + let parser = harper_comments::CommentParser::new_from_language_id(&data.language.0, opts); + if let Some(parser) = parser { + let _res = parser.parse_str(&data.text); + } +}); diff --git a/fuzz/fuzz_targets/fuzz_harper_core_markdown.rs b/fuzz/fuzz_targets/fuzz_harper_core_markdown.rs new file mode 100644 index 000000000..596a269d7 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_harper_core_markdown.rs @@ -0,0 +1,10 @@ +#![no_main] + +use harper_core::parsers::{Markdown, MarkdownOptions, StrParser}; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &str| { + let opts = MarkdownOptions::default(); + let parser = Markdown::new(opts); + let _res = parser.parse_str(data); +}); diff --git a/fuzz/fuzz_targets/fuzz_harper_html.rs b/fuzz/fuzz_targets/fuzz_harper_html.rs new file mode 100644 index 000000000..b0f11cf63 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_harper_html.rs @@ -0,0 +1,9 @@ +#![no_main] + +use harper_core::parsers::StrParser; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &str| { + let parser = harper_html::HtmlParser::default(); + let _res = parser.parse_str(data); +}); diff --git a/fuzz/fuzz_targets/fuzz_harper_literate_haskell.rs b/fuzz/fuzz_targets/fuzz_harper_literate_haskell.rs new file mode 100644 index 000000000..ea3df937a --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_harper_literate_haskell.rs @@ -0,0 +1,9 @@ +#![no_main] + +// use harper_core::parsers::StrParser; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|_data: &str| { + // TODO: figure out how to create a literate haskell parser + // let _res = typst.parse_str(&data); +}); diff --git a/fuzz/fuzz_targets/fuzz_harper_typst.rs b/fuzz/fuzz_targets/fuzz_harper_typst.rs new file mode 100644 index 000000000..166282c68 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_harper_typst.rs @@ -0,0 +1,9 @@ +#![no_main] + +use harper_core::parsers::StrParser; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &str| { + let typst = harper_typst::Typst; + let _res = typst.parse_str(data); +});