diff --git a/pyproject.toml b/pyproject.toml index 63f1b4a..0957917 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,6 +93,7 @@ addopts = [ [tool.coverage.report] exclude_also = [ "if \\S*TYPE_CHECKING:", + "if __name__ == '__main__':", ] [tool.pylint.main] diff --git a/src/anchovy/cli.py b/src/anchovy/cli.py index 60cedcc..fa11a3d 100644 --- a/src/anchovy/cli.py +++ b/src/anchovy/cli.py @@ -179,6 +179,13 @@ def main(arguments: list[str] | None = None): help='file path to a config file to build', type=Path, default=None) + parser.add_argument('-s', '--serve', + help='serve the output directory over HTTP after building', + action='store_true') + parser.add_argument('-p', '--port', + help='port to serve from', + type=int, + default=8080) args, remaining = parser.parse_known_args(arguments) @@ -234,3 +241,8 @@ def main(arguments: list[str] | None = None): style='red' ) sys.exit(1) + + if args.serve: + from .server import serve + parsed_settings = parse_settings_args(settings, argv=remaining) + serve(args.port, parsed_settings.output_dir) diff --git a/src/anchovy/components/md_rendering.py b/src/anchovy/components/md_rendering.py index 36ef95b..6251509 100644 --- a/src/anchovy/components/md_rendering.py +++ b/src/anchovy/components/md_rendering.py @@ -61,7 +61,7 @@ class AnchovyRendererHTML(RendererHTML): integration and toml frontmatter support. """ # https://github.com/executablebooks/markdown-it-py/issues/256 - def fence(self, tokens: Sequence[Token], idx: int, options: OptionsDict, _env: EnvType): + def fence(self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType): """ Handles rendering a markdown code fence, with optional syntax highlighting. diff --git a/src/anchovy/core.py b/src/anchovy/core.py index 994d98b..73fe5d1 100644 --- a/src/anchovy/core.py +++ b/src/anchovy/core.py @@ -12,6 +12,9 @@ from .dependencies import Dependency from .pretty_utils import track_progress +if t.TYPE_CHECKING: + from collections.abc import Sequence, Set + T = t.TypeVar('T') T2 = t.TypeVar('T2') @@ -297,7 +300,7 @@ def is_available(cls) -> bool: return all(d.satisfied for d in cls.get_dependencies()) @classmethod - def get_dependencies(cls) -> set[Dependency]: + def get_dependencies(cls) -> Set[Dependency]: """ Return the requirements for this Step. """ @@ -314,7 +317,7 @@ def __call__( self, path: Path, output_paths: list[Path] - ) -> None | tuple[list[Path | CustodyEntry], list[Path]]: + ) -> None | tuple[Sequence[Path | CustodyEntry], list[Path]]: ... diff --git a/src/anchovy/include.py b/src/anchovy/include.py index b279de1..2814f21 100644 --- a/src/anchovy/include.py +++ b/src/anchovy/include.py @@ -11,7 +11,7 @@ from .core import Context from .custody import CustodyEntry -from .dependencies import PipDependency +from .dependencies import Dependency, PipDependency from .simple import BaseStandardStep @@ -76,7 +76,7 @@ class URLLibFetchStep(BaseStandardStep): """ @classmethod def get_dependencies(cls): - return {PipDependency('tomli')} if sys.version_info < (3, 11) else {} + return {PipDependency('tomli')} if sys.version_info < (3, 11) else set[Dependency]() def bind(self, context: Context): super().bind(context) diff --git a/src/anchovy/jinja.py b/src/anchovy/jinja.py index f51425a..26c47d9 100644 --- a/src/anchovy/jinja.py +++ b/src/anchovy/jinja.py @@ -31,7 +31,7 @@ class JinjaRenderStep(BaseStandardStep): Abstract base class for Steps using Jinja rendering. """ @classmethod - def get_dependencies(cls): + def get_dependencies(cls) -> set[Dependency]: return { PipDependency('jinja2'), } diff --git a/src/anchovy/server.py b/src/anchovy/server.py new file mode 100644 index 0000000..830b462 --- /dev/null +++ b/src/anchovy/server.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +import argparse +import hashlib +import http.server +import mimetypes +import os +import pathlib +import typing +if typing.TYPE_CHECKING: + from socketserver import _AfInetAddress + + +INDEX_FILE = 'index.html' +# Default used by nginx +DEFAULT_MIME_TYPE = 'application/octet-stream' + + +class ThreadedHTTPServer(http.server.ThreadingHTTPServer): + """ + A simple threaded HTTP server with a more powerful handler. + """ + def __init__(self, + server_address: _AfInetAddress, + directory: str | pathlib.Path = '.', + bind_and_activate: bool = True) -> None: + super().__init__(server_address, Handler, bind_and_activate) + self.directory = str(directory) + + def finish_request(self, request, client_address) -> None: + Handler(request, client_address, self, directory=self.directory) + + +class Handler(http.server.SimpleHTTPRequestHandler): + def get_etag(self, file_path): + """ + Generate an etag for a file based on its path and modification time. + """ + mtime = os.path.getmtime(file_path) + file_size = os.path.getsize(file_path) + file_info = f"{file_size}-{mtime}" + return hashlib.md5(file_info.encode('utf-8')).hexdigest() + + def do_GET(self): + try: + # Get the etag for the file + file_path = pathlib.Path(self.translate_path(self.path)) + if file_path.is_dir(): + file_path /= INDEX_FILE + + # Double-check that we haven't escaped the directory. + # self.translate_path() should discard any suspicious path + # components, but it's better to be safe. + if not file_path.is_relative_to(self.directory): + return self.send_error(403, 'Forbidden') + + etag = self.get_etag(file_path) + # Check if the client already has the file + if 'If-None-Match' in self.headers and self.headers['If-None-Match'] == etag: + self.send_response(304) + self.end_headers() + else: + # Get the file extension and set the MIME type accordingly + mime_type, _enc = mimetypes.guess_type(file_path) + self.send_response(200) + self.send_header('Content-type', mime_type or DEFAULT_MIME_TYPE) + self.send_header('ETag', etag) + self.end_headers() + # Serve the file + with open(file_path, 'rb') as file: + # Serve the file in chunks to avoid reading the entire file + # into memory + chunk_size = 8192 + while True: + chunk = file.read(chunk_size) + if not chunk: + break + self.wfile.write(chunk) + except FileNotFoundError: + self.send_error(404, f'File Not Found: {self.path}') + + +def serve(port: int, directory: str | pathlib.Path, host: str = 'localhost'): + with ThreadedHTTPServer((host, port), directory=directory) as httpd: + print(f'Serving at http://localhost:{port}') + httpd.serve_forever() + + +def main(arguments: list[str] | None = None): + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--port', + help='port to serve from', + type=int, + default=8080) + parser.add_argument('-d', '--directory', + help='directory to serve', + type=pathlib.Path, + default='.') + args = parser.parse_args(arguments) + serve(args.port, args.directory) + + +if __name__ == '__main__': + main() diff --git a/src/anchovy/test_harness.py b/src/anchovy/test_harness.py new file mode 100644 index 0000000..c8d5578 --- /dev/null +++ b/src/anchovy/test_harness.py @@ -0,0 +1,104 @@ +import json +import pathlib +import runpy +import typing as t + +import anchovy.cli +from anchovy.core import BuildSettings, Context, ContextDir, Rule +from anchovy.custody import CONTEXT_DIR_KEYS + + +MTIME_MODE_NONE = 0 +MTIME_MODE_NE = 1 +MTIME_MODE_EQ = 2 + + +def get_context_dir(context: Context, key: str): + path = pathlib.Path(key) + if key in CONTEXT_DIR_KEYS: + return key + return t.cast('ContextDir', str(path.parents[-2])) + + +def load_example(path: pathlib.Path): + return runpy.run_path(str(path)) + + +def load_artifact(path: pathlib.Path): + with path.open() as file: + return json.load(file) + + +def load_context(path: pathlib.Path, tmp_dir: pathlib.Path, purge_dirs: bool = False): + module_items = load_example(path) + input_dir: pathlib.Path = module_items['SETTINGS']['input_dir'] + artifact_path = tmp_dir / 'artifact.json' + + rules: list[Rule] = module_items['RULES'] + settings = BuildSettings( + input_dir=input_dir, + output_dir=tmp_dir / 'output', + working_dir=tmp_dir / 'working', + custody_cache=artifact_path, + purge_dirs=purge_dirs, + ) + return Context(settings, rules) + + +def run_example(path: pathlib.Path, tmp_dir: pathlib.Path, purge_dirs: bool = False): + context = load_context(path, tmp_dir, purge_dirs) + context.run() + return context + + +def run_example_cli(path: pathlib.Path, tmp_dir: pathlib.Path, purge_dirs: bool = False): + context = load_context(path, tmp_dir, purge_dirs) + context.custodian.bind(context) + + arguments = [ + str(path), + '--custody-cache', str(context['custody_cache']) + ] + if purge_dirs: + arguments.append('--purge') + + anchovy.cli.main(arguments) + + return context + + +def canonicalize_graph(graph: dict): + for key, val in graph.items(): + if isinstance(val, list): + val.sort() + elif isinstance(val, dict): + canonicalize_graph(val) + + return graph + + +def compare_artifacts(old: dict, new: dict, context: Context, mtime_mode=MTIME_MODE_NONE): + assert canonicalize_graph(new['graph']) == canonicalize_graph(old['graph']) + assert new['meta'].keys() == old['meta'].keys() + for key in new['meta']: + n_type, n_dict = new['meta'][key] + o_type, o_dict = old['meta'][key] + print(f'{key}:\n new={n_dict}\n old={o_dict}') + assert n_type == o_type + if n_type == 'path': + context_dir = get_context_dir(context, key) + path = context.custodian.degenericize_path(key) + if path.is_dir(): + continue + try: + assert n_dict['sha1'] == o_dict['sha1'] + assert n_dict['size'] == o_dict['size'] + if mtime_mode == MTIME_MODE_NE and context_dir != 'input_dir': + assert n_dict['m_time'] != o_dict['m_time'] + elif mtime_mode == MTIME_MODE_EQ: + assert n_dict['m_time'] == o_dict['m_time'] + except AssertionError: + print(path.read_bytes()) + raise + else: + assert n_dict.keys() == o_dict.keys() diff --git a/test/test_examples.py b/test/test_examples.py index 8c30245..39e4821 100644 --- a/test/test_examples.py +++ b/test/test_examples.py @@ -1,13 +1,11 @@ -import json import pathlib -import runpy -import typing as t import pytest -import anchovy.cli -from anchovy.core import BuildSettings, Context, ContextDir, Rule -from anchovy.custody import CONTEXT_DIR_KEYS +from anchovy.test_harness import ( + load_artifact, run_example, run_example_cli, compare_artifacts, + MTIME_MODE_EQ, MTIME_MODE_NE, +) EXAMPLE_LIST = [ @@ -15,111 +13,22 @@ 'gallery', 'code_index', ] -MTIME_MODE_NONE = 0 -MTIME_MODE_NE = 1 -MTIME_MODE_EQ = 2 - - -def get_context_dir(context: Context, key: str): - path = pathlib.Path(key) - if key in CONTEXT_DIR_KEYS: - return key - return t.cast('ContextDir', str(path.parents[-2])) - - -def get_example_path(name: str): - return (pathlib.Path(__file__).parent.parent / 'examples/' / f'{name}').with_suffix('.py') - - -def load_example(name: str): - return runpy.run_path(str(get_example_path(name))) - - -def load_context(name: str, tmp_dir: pathlib.Path, purge_dirs: bool = False): - module_items = load_example(name) - input_dir: pathlib.Path = module_items['SETTINGS']['input_dir'] - artifact_path = tmp_dir / 'artifact.json' - - rules: list[Rule] = module_items['RULES'] - settings = BuildSettings( - input_dir=input_dir, - output_dir=tmp_dir / 'output', - working_dir=tmp_dir / 'working', - custody_cache=artifact_path, - purge_dirs=purge_dirs, - ) - return Context(settings, rules) - - -def run_example(name: str, tmp_dir: pathlib.Path, purge_dirs: bool = False): - context = load_context(name, tmp_dir, purge_dirs) - context.run() - return context - - -def run_example_cli(name: str, tmp_dir: pathlib.Path, purge_dirs: bool = False): - context = load_context(name, tmp_dir, purge_dirs) - context.custodian.bind(context) - - arguments = [ - str(get_example_path(name)), - '--custody-cache', str(context['custody_cache']) - ] - if purge_dirs: - arguments.append('--purge') - - anchovy.cli.main(arguments) - - return context - - -def canonicalize_graph(graph: dict): - for key, val in graph.items(): - if isinstance(val, list): - val.sort() - elif isinstance(val, dict): - canonicalize_graph(val) - - return graph - - -def compare_artifacts(old: dict, new: dict, context: Context, mtime_mode=MTIME_MODE_NONE): - assert canonicalize_graph(new['graph']) == canonicalize_graph(old['graph']) - assert new['meta'].keys() == old['meta'].keys() - for key in new['meta']: - n_type, n_dict = new['meta'][key] - o_type, o_dict = old['meta'][key] - print(f'{key}:\n new={n_dict}\n old={o_dict}') - assert n_type == o_type - if n_type == 'path': - context_dir = get_context_dir(context, key) - path = context.custodian.degenericize_path(key) - if path.is_dir(): - continue - try: - assert n_dict['sha1'] == o_dict['sha1'] - assert n_dict['size'] == o_dict['size'] - if mtime_mode == MTIME_MODE_NE and context_dir != 'input_dir': - assert n_dict['m_time'] != o_dict['m_time'] - elif mtime_mode == MTIME_MODE_EQ: - assert n_dict['m_time'] == o_dict['m_time'] - except AssertionError: - print(path.read_bytes()) - raise - else: - assert n_dict.keys() == o_dict.keys() +EXAMPLE_PATHS = { + name: (pathlib.Path(__file__).parent.parent / 'examples/' / f'{name}').with_suffix('.py') + for name in EXAMPLE_LIST +} @pytest.mark.parametrize('name', EXAMPLE_LIST) def test_example(name: str, tmp_path: pathlib.Path): old_artifact_path = (pathlib.Path(__file__).parent / 'artifacts' / name).with_suffix('.json') - with open(old_artifact_path) as file: - old_artifact = json.load(file) - context = run_example(name, tmp_path) + old_artifact = load_artifact(old_artifact_path) + + context = run_example(EXAMPLE_PATHS[name], tmp_path) if not (new_artifact_path := context['custody_cache']): raise RuntimeError(f'No custody artifact generated for {name}') - with open(new_artifact_path) as file: - new_artifact = json.load(file) + new_artifact = load_artifact(new_artifact_path) + compare_artifacts(old_artifact, new_artifact, context) @@ -129,16 +38,16 @@ def test_example_rerun(name: str, tmp_path: pathlib.Path): Run an example twice without purging, and check that the runs have identical output and unchanged mtimes. """ - context_one = run_example(name, tmp_path) + context_one = run_example(EXAMPLE_PATHS[name], tmp_path) if not (first_artifact_path := context_one['custody_cache']): raise RuntimeError(f'No custody artifact generated for {name} #1') - with open(first_artifact_path) as file: - first_artifact = json.load(file) - context_two = run_example(name, tmp_path) + first_artifact = load_artifact(first_artifact_path) + + context_two = run_example(EXAMPLE_PATHS[name], tmp_path) if not (second_artifact_path := context_two['custody_cache']): raise RuntimeError(f'No custody artifact generated for {name} #2') - with open(second_artifact_path) as file: - second_artifact = json.load(file) + second_artifact = load_artifact(second_artifact_path) + compare_artifacts(first_artifact, second_artifact, context_two, mtime_mode=MTIME_MODE_EQ) @@ -148,17 +57,16 @@ def test_example_purge(name: str, tmp_path: pathlib.Path): Run an example twice while purging, and check that the runs have identical output and different mtimes. """ - context_one = run_example(name, tmp_path, purge_dirs=True) + context_one = run_example(EXAMPLE_PATHS[name], tmp_path, purge_dirs=True) if not (first_artifact_path := context_one['custody_cache']): raise RuntimeError(f'No custody artifact generated for {name} #1') - with open(first_artifact_path) as file: - first_artifact = json.load(file) + first_artifact = load_artifact(first_artifact_path) - context_two = run_example(name, tmp_path, purge_dirs=True) + context_two = run_example(EXAMPLE_PATHS[name], tmp_path, purge_dirs=True) if not (second_artifact_path := context_two['custody_cache']): raise RuntimeError(f'No custody artifact generated for {name} #2') - with open(second_artifact_path) as file: - second_artifact = json.load(file) + second_artifact = load_artifact(second_artifact_path) + compare_artifacts(first_artifact, second_artifact, context_two, mtime_mode=MTIME_MODE_NE) @@ -168,12 +76,12 @@ def test_example_cli(name, tmp_path): Run an example using the CLI, and check that run has the expected output. """ old_artifact_path = (pathlib.Path(__file__).parent / 'artifacts' / name).with_suffix('.json') - with open(old_artifact_path) as file: - old_artifact = json.load(file) + old_artifact = load_artifact(old_artifact_path) + # TODO: Figure out why code_index doesn't work with purge_dirs=False. - context = run_example_cli(name, tmp_path, purge_dirs=True) + context = run_example_cli(EXAMPLE_PATHS[name], tmp_path, purge_dirs=True) if not (new_artifact_path := context['custody_cache']): raise RuntimeError(f'No custody artifact generated for {name}') - with open(new_artifact_path) as file: - new_artifact = json.load(file) + new_artifact = load_artifact(new_artifact_path) + compare_artifacts(old_artifact, new_artifact, context) diff --git a/test/test_server.py b/test/test_server.py new file mode 100644 index 0000000..4c47dae --- /dev/null +++ b/test/test_server.py @@ -0,0 +1,82 @@ +import contextlib +import pathlib +import socket +import threading + +import pytest +import requests + +from anchovy.server import main, ThreadedHTTPServer +from anchovy.test_harness import run_example + + +EXAMPLE_PATH = pathlib.Path(__file__).parent.parent / 'examples/' / 'basic_site.py' + + +def get_port(): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.bind(('localhost', 0)) + port = sock.getsockname()[1] + sock.close() + return port + + +@contextlib.contextmanager +def run_server(directory: pathlib.Path, port: int): + server = ThreadedHTTPServer(('localhost', port), directory) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + yield + server.shutdown() + thread.join() + + +@contextlib.contextmanager +def run_server_cli(directory: pathlib.Path, port: int): + args = [ + '--port', str(port), + '--directory', str(directory) + ] + thread = threading.Thread(target=main, args=(args,), daemon=True) + thread.start() + yield + + +@pytest.fixture(scope='module', params=[False, True]) +def server(request, tmp_path_factory: pytest.TempPathFactory): + tmp_path = tmp_path_factory.mktemp('server') + context = run_example(EXAMPLE_PATH, tmp_path) + directory = context['output_dir'] + port = get_port() + runner = run_server if not request.param else run_server_cli + with runner(directory, port): + yield port + + +def test_server(server: int): + response = requests.get(f'http://localhost:{server}/') + assert response.status_code == 200 + assert response.headers['content-type'] == 'text/html' + + +def test_server_etag(server: int): + response = requests.get(f'http://localhost:{server}/') + assert response.status_code == 200 + assert response.headers['content-type'] == 'text/html' + etag = response.headers['etag'] + new_response = requests.get(f'http://localhost:{server}/', headers={'If-None-Match': etag}) + assert new_response.status_code == 304 + + +def test_server_stale_etag(server: int): + response = requests.get(f'http://localhost:{server}/') + assert response.status_code == 200 + assert response.headers['content-type'] == 'text/html' + etag = response.headers['etag'] + new_response = requests.get(f'http://localhost:{server}/', headers={'If-None-Match': etag + '0'}) + assert new_response.status_code == 200 + + +def test_server_404(server: int): + response = requests.get(f'http://localhost:{server}/does_not_exist') + assert response.status_code == 404