From d145668fc86d44f3e951e068880a93b2720e3bc3 Mon Sep 17 00:00:00 2001 From: Benjamin Bannier Date: Mon, 7 Apr 2025 21:26:42 +0200 Subject: [PATCH 1/2] Format comments with topiary --- .github/workflows/build_wheels.yml | 13 ++- Cargo.toml | 23 ++++ pyproject.toml | 10 +- setup.py | 15 --- src/lib.rs | 109 ++++++++++++++++++ src/query.scm | 37 ++++++ .../format_zeek__test__comments-2.snap | 7 ++ .../format_zeek__test__comments-3.snap | 7 ++ .../format_zeek__test__comments-4.snap | 7 ++ .../format_zeek__test__comments-5.snap | 7 ++ .../format_zeek__test__comments-6.snap | 7 ++ .../format_zeek__test__comments-7.snap | 7 ++ .../format_zeek__test__comments-8.snap | 7 ++ .../format_zeek__test__comments-9.snap | 7 ++ .../format_zeek__test__comments.snap | 7 ++ zeekscript/__init__.py | 2 + zeekscript/formatter.py | 11 +- zeekscript/zeekscript.pyi | 1 + 18 files changed, 258 insertions(+), 26 deletions(-) create mode 100644 Cargo.toml delete mode 100644 setup.py create mode 100644 src/lib.rs create mode 100644 src/query.scm create mode 100644 src/snapshots/format_zeek__test__comments-2.snap create mode 100644 src/snapshots/format_zeek__test__comments-3.snap create mode 100644 src/snapshots/format_zeek__test__comments-4.snap create mode 100644 src/snapshots/format_zeek__test__comments-5.snap create mode 100644 src/snapshots/format_zeek__test__comments-6.snap create mode 100644 src/snapshots/format_zeek__test__comments-7.snap create mode 100644 src/snapshots/format_zeek__test__comments-8.snap create mode 100644 src/snapshots/format_zeek__test__comments-9.snap create mode 100644 src/snapshots/format_zeek__test__comments.snap create mode 100644 zeekscript/zeekscript.pyi diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 6dab8dd..50d79ec 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -18,8 +18,10 @@ jobs: with: submodules: recursive + - uses: astral-sh/setup-uv@v6 + - name: Build SDist - run: python ./setup.py sdist + run: uv build --sdist - uses: actions/upload-artifact@v5 with: @@ -31,17 +33,16 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v5 - uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} + - uses: astral-sh/setup-uv@v6 - name: Install dependencies - run: pip install .[dev] - - run: pytest - - + run: uv sync --all-extras + - run: uv run pytest upload_all: needs: [build_sdist, check] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..7156217 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "zeekscript" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "zeekscript" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.27.1", optional = true } +thiserror = "2.0.17" +topiary-core = { version = "0.7.0", default-features = false } +topiary-tree-sitter-facade = { version = "0.7.0", default-features = false } +tree-sitter-zeek = { git = "https://github.com/zeek/tree-sitter-zeek", version = "0.2.9" } + +[dev-dependencies] +insta = "1.43.2" + +[features] +default = ["python"] +python = ["pyo3"] diff --git a/pyproject.toml b/pyproject.toml index fe1b6bf..554f109 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,6 @@ [build-system] -requires = ["setuptools"] +requires = ["maturin>=1.8,<2.0"] +build-backend = "maturin" [project] name = "zeekscript" @@ -9,6 +10,9 @@ readme = "README.md" classifiers=[ "Programming Language :: Python :: 3.7", + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", "License :: OSI Approved :: BSD License", "Topic :: Utilities", ] @@ -50,8 +54,8 @@ Repository = "https://github.com/zeek/zeekscript" zeek-format = "zeekscript.cli:zeek_format" zeek-script = "zeekscript.cli:zeek_script" -[tool.setuptools] -packages = ["zeekscript"] +[tool.maturin] +features = ["pyo3/extension-module"] [tool.ruff.lint] select = ["PL", "UP", "RUF", "N", "I", "RET"] diff --git a/setup.py b/setup.py deleted file mode 100644 index f6f8abe..0000000 --- a/setup.py +++ /dev/null @@ -1,15 +0,0 @@ -"""Installation setup.""" - -from setuptools import setup - - -def get_version() -> str: - """Get the version from the version file.""" - with open("VERSION", encoding="utf-8") as version: - return version.read().replace("-", ".dev", 1).strip() - - -setup( - version=get_version(), - setup_requires=["tree_sitter"], -) diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..f7d5de2 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,109 @@ +use std::string::FromUtf8Error; + +use thiserror::Error; +use topiary_core::{FormatterError, TopiaryQuery}; + +#[derive(Error, Debug)] +pub enum FormatError { + #[error("parse error")] + Parse, + + #[error("internal query error")] + Query(String), + + #[error("idempotency violated")] + Idempotency, + + #[error("UTF8 conversion error")] + UTF8(FromUtf8Error), + + #[error("unknown error")] + Unknown, +} + +const QUERY: &str = include_str!("query.scm"); + +pub fn format( + input: &str, + skip_idempotence: bool, + tolerate_parsing_errors: bool, +) -> Result { + let mut output = Vec::new(); + + let grammar = topiary_tree_sitter_facade::Language::from(tree_sitter_zeek::LANGUAGE); + + let query = TopiaryQuery::new(&grammar, QUERY).map_err(|e| match e { + FormatterError::Query(m, e) => FormatError::Query(match e { + None => m, + Some(e) => format!("{m}: {e}"), + }), + _ => FormatError::Unknown, + })?; + + let language = topiary_core::Language { + name: "zeek".to_string(), + indent: Some("\t".into()), + grammar, + query, + }; + + if let Err(e) = topiary_core::formatter( + &mut input.as_bytes(), + &mut output, + &language, + topiary_core::Operation::Format { + skip_idempotence, + tolerate_parsing_errors, + }, + ) { + Err(match e { + FormatterError::Query(m, e) => FormatError::Query(match e { + None => m, + Some(e) => format!("{m}: {e}"), + }), + FormatterError::Idempotence => FormatError::Idempotency, + FormatterError::Parsing { .. } => FormatError::Parse, + _ => FormatError::Unknown, + })?; + }; + + let output = String::from_utf8(output).map_err(FormatError::UTF8)?; + + Ok(output) +} + +#[cfg(feature = "python")] +#[pyo3::pymodule] +mod zeekscript { + use pyo3::{exceptions::PyException, pyfunction, PyResult}; + + #[pyfunction] + fn format(input: &str) -> PyResult { + super::format(input, false, true).map_err(|e| PyException::new_err(e.to_string())) + } +} + +#[cfg(test)] +mod test { + use insta::assert_debug_snapshot; + + use crate::FormatError; + + fn format(input: &str) -> Result { + crate::format(input, false, false) + } + + #[test] + fn comments() { + assert_debug_snapshot!(format("# foo\n;1;")); + assert_debug_snapshot!(format("##! foo\n;1;")); + assert_debug_snapshot!(format("## foo\n1;")); + assert_debug_snapshot!(format("##< foo\n1;")); + + assert_debug_snapshot!(format("1;# foo\n;1;")); + assert_debug_snapshot!(format("1;##! foo\n;1;")); + assert_debug_snapshot!(format("1;## foo\n1;")); + assert_debug_snapshot!(format("1;##< foo")); + assert_debug_snapshot!(format("1;##< foo\n##< bar")); + } +} diff --git a/src/query.scm b/src/query.scm new file mode 100644 index 0000000..93216b2 --- /dev/null +++ b/src/query.scm @@ -0,0 +1,37 @@ +; Rules for formatting Spicy. +; +; Formatting is specified here in terms of tree-sitter nodes. We select nodes +; with tree-sitter queries[^1] and then attach topiary formatting rules[^2] in +; the captures. +; +; See the Development section in README.md for a workflow on how to modify or +; extend these rules. + +; [^1]: https://tree-sitter.github.io/tree-sitter/using-parsers#pattern-matching-with-queries +; [^2]: https://github.com/tweag/topiary#design + +; Comments are always followed by a linebreak. +[ + (minor_comment) + (zeekygen_head_comment) + (zeekygen_prev_comment) + (zeekygen_next_comment) +] @append_hardline + +; Comments are preceeded by a space. +( + [ + (_) + (nl) @do_nothing + ] + . + [ + (minor_comment) + (zeekygen_head_comment) + (zeekygen_prev_comment) + (zeekygen_next_comment) + ] @prepend_space +) + +; If we have multiple comments documenting an item with `##<` align them all. +(zeekygen_prev_comment) @multi_line_indent_all diff --git a/src/snapshots/format_zeek__test__comments-2.snap b/src/snapshots/format_zeek__test__comments-2.snap new file mode 100644 index 0000000..675c585 --- /dev/null +++ b/src/snapshots/format_zeek__test__comments-2.snap @@ -0,0 +1,7 @@ +--- +source: src/lib.rs +expression: "format(\"##! foo\\n;1;\")" +--- +Ok( + "##! foo\n;1;\n", +) diff --git a/src/snapshots/format_zeek__test__comments-3.snap b/src/snapshots/format_zeek__test__comments-3.snap new file mode 100644 index 0000000..18e63f0 --- /dev/null +++ b/src/snapshots/format_zeek__test__comments-3.snap @@ -0,0 +1,7 @@ +--- +source: src/lib.rs +expression: "format(\"## foo\\n1;\")" +--- +Ok( + "## foo\n1;\n", +) diff --git a/src/snapshots/format_zeek__test__comments-4.snap b/src/snapshots/format_zeek__test__comments-4.snap new file mode 100644 index 0000000..fb2bc6b --- /dev/null +++ b/src/snapshots/format_zeek__test__comments-4.snap @@ -0,0 +1,7 @@ +--- +source: src/lib.rs +expression: "format(\"##< foo\\n1;\")" +--- +Ok( + "##< foo\n1;\n", +) diff --git a/src/snapshots/format_zeek__test__comments-5.snap b/src/snapshots/format_zeek__test__comments-5.snap new file mode 100644 index 0000000..0218b85 --- /dev/null +++ b/src/snapshots/format_zeek__test__comments-5.snap @@ -0,0 +1,7 @@ +--- +source: src/lib.rs +expression: "format(\"1;# foo\\n;1;\")" +--- +Ok( + "1; # foo\n;1;\n", +) diff --git a/src/snapshots/format_zeek__test__comments-6.snap b/src/snapshots/format_zeek__test__comments-6.snap new file mode 100644 index 0000000..5243fff --- /dev/null +++ b/src/snapshots/format_zeek__test__comments-6.snap @@ -0,0 +1,7 @@ +--- +source: src/lib.rs +expression: "format(\"1;##! foo\\n;1;\")" +--- +Ok( + "1; ##! foo\n;1;\n", +) diff --git a/src/snapshots/format_zeek__test__comments-7.snap b/src/snapshots/format_zeek__test__comments-7.snap new file mode 100644 index 0000000..81e3b76 --- /dev/null +++ b/src/snapshots/format_zeek__test__comments-7.snap @@ -0,0 +1,7 @@ +--- +source: src/lib.rs +expression: "format(\"1;## foo\\n1;\")" +--- +Ok( + "1; ## foo\n1;\n", +) diff --git a/src/snapshots/format_zeek__test__comments-8.snap b/src/snapshots/format_zeek__test__comments-8.snap new file mode 100644 index 0000000..7e6a1e0 --- /dev/null +++ b/src/snapshots/format_zeek__test__comments-8.snap @@ -0,0 +1,7 @@ +--- +source: src/lib.rs +expression: "format(\"1;##< foo\")" +--- +Ok( + "1; ##< foo\n", +) diff --git a/src/snapshots/format_zeek__test__comments-9.snap b/src/snapshots/format_zeek__test__comments-9.snap new file mode 100644 index 0000000..e73a7cd --- /dev/null +++ b/src/snapshots/format_zeek__test__comments-9.snap @@ -0,0 +1,7 @@ +--- +source: src/lib.rs +expression: "format(\"1;##< foo\\n##< bar\")" +--- +Ok( + "1; ##< foo\n##< bar\n", +) diff --git a/src/snapshots/format_zeek__test__comments.snap b/src/snapshots/format_zeek__test__comments.snap new file mode 100644 index 0000000..1097171 --- /dev/null +++ b/src/snapshots/format_zeek__test__comments.snap @@ -0,0 +1,7 @@ +--- +source: src/lib.rs +expression: "format(\"# foo\\n;1;\")" +--- +Ok( + "# foo\n;1;\n", +) diff --git a/zeekscript/__init__.py b/zeekscript/__init__.py index 5b7b858..710cfba 100644 --- a/zeekscript/__init__.py +++ b/zeekscript/__init__.py @@ -7,6 +7,7 @@ "add_format_cmd", "add_parse_cmd", "add_version_arg", + "format", "print_error", ] @@ -14,3 +15,4 @@ from .formatter import Formatter from .output import print_error from .script import Script +from .zeekscript import format diff --git a/zeekscript/formatter.py b/zeekscript/formatter.py index d703077..7d68a4b 100644 --- a/zeekscript/formatter.py +++ b/zeekscript/formatter.py @@ -26,6 +26,8 @@ from zeekscript.node import Node +from .zeekscript import format + if TYPE_CHECKING: from zeekscript.output import OutputStream from zeekscript.script import Script @@ -1448,8 +1450,13 @@ def format(self) -> None: class ZeekygenCommentFormatter(CommentFormatter): def format(self) -> None: - self._format_token() - self._write_nl() + return self._write( + format( + self.script.get_content(*self.node.script_range()).decode( + encoding="utf-8" + ) + ) + ) class ZeekygenPrevCommentFormatter(CommentFormatter): diff --git a/zeekscript/zeekscript.pyi b/zeekscript/zeekscript.pyi new file mode 100644 index 0000000..899c914 --- /dev/null +++ b/zeekscript/zeekscript.pyi @@ -0,0 +1 @@ +def format(code: str) -> str: ... From e57cc0ca083046798fa3b82cabbfa0238cfd64a5 Mon Sep 17 00:00:00 2001 From: Benjamin Bannier Date: Sun, 2 Nov 2025 13:08:50 +0100 Subject: [PATCH 2/2] Bump pre-commit hooks --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ae20f12..9b03cff 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,7 +10,7 @@ repos: - id: check-added-large-files - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.13.2 + rev: v0.14.3 hooks: - id: ruff - id: ruff-format