From 5179ba356083dacff2f72e8464a27cf842d2e6e6 Mon Sep 17 00:00:00 2001 From: Daniel Foerster Date: Thu, 21 Dec 2023 16:49:44 -0600 Subject: [PATCH] Replace JinjaMarkdownStep with JinjaExtendedMarkdownStep (#72) --- src/anchovy/components/md_frontmatter.py | 51 ++++++++ src/anchovy/components/md_rendering.py | 15 ++- src/anchovy/jinja.py | 143 +++-------------------- 3 files changed, 79 insertions(+), 130 deletions(-) create mode 100644 src/anchovy/components/md_frontmatter.py diff --git a/src/anchovy/components/md_frontmatter.py b/src/anchovy/components/md_frontmatter.py new file mode 100644 index 0000000..229d4c9 --- /dev/null +++ b/src/anchovy/components/md_frontmatter.py @@ -0,0 +1,51 @@ +import sys +import typing as t + + +def simple_frontmatter_parser(content: str) -> dict: + """ + Read metadata from the front of a markdown-formatted text in a very simple + YAML-like format, without value parsing. + """ + meta = {} + lines = content.splitlines() + + for line in lines: + if ':' not in line: + break + key, value = line.split(':', 1) + if not key.isidentifier(): + break + meta[key.strip()] = value.strip() + + print(meta, '...') + return meta + + +def get_toml_frontmatter_parser(): + if sys.version_info < (3, 11): + import tomli as tomllib + else: + import tomllib + return tomllib.loads + + +def get_yaml_frontmatter_parser(): + from ruamel.yaml import YAML + return YAML(typ='safe').load + + +FrontMatterParser = t.Callable[[str], dict] +FrontMatterParserName = t.Literal['simple', 'toml', 'yaml'] + +FRONTMATTER_PARSER_FACTORIES: dict[FrontMatterParserName, t.Callable[[], FrontMatterParser]] = { + 'simple': lambda: simple_frontmatter_parser, + 'toml': get_toml_frontmatter_parser, + 'yaml': get_yaml_frontmatter_parser, +} + + +def get_frontmatter_parser(parser) -> FrontMatterParser: + if callable(parser): + return parser + return FRONTMATTER_PARSER_FACTORIES[parser]() diff --git a/src/anchovy/components/md_rendering.py b/src/anchovy/components/md_rendering.py index 6251509..a6783a2 100644 --- a/src/anchovy/components/md_rendering.py +++ b/src/anchovy/components/md_rendering.py @@ -3,16 +3,12 @@ """ from __future__ import annotations -import sys import typing as t from markdown_it.common.utils import escapeHtml, unescapeAll from markdown_it.renderer import RendererHTML -if sys.version_info < (3, 11): - import tomli as tomllib -else: - import tomllib +from .md_frontmatter import simple_frontmatter_parser, FrontMatterParser if t.TYPE_CHECKING: from collections.abc import Sequence @@ -60,6 +56,10 @@ class AnchovyRendererHTML(RendererHTML): A customized markdown-it-py HTML renderer, with hooks for better pygments integration and toml frontmatter support. """ + def __init__(self, parser: t.Any = None): + super().__init__(parser) + self.front_matter_parser: FrontMatterParser = simple_frontmatter_parser + # https://github.com/executablebooks/markdown-it-py/issues/256 def fence(self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType): """ @@ -76,10 +76,13 @@ def fence(self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: En or escapeHtml(token.content) ) + def set_front_matter_parser(self, parser: FrontMatterParser): + self.front_matter_parser = parser + def front_matter(self, tokens: Sequence[Token], idx: int, _options: OptionsDict, env: EnvType): """ Handles parsing markdown frontmatter using TOML. """ - parsed = tomllib.loads(tokens[idx].content) + parsed = self.front_matter_parser(tokens[idx].content) env['anchovy_meta'].update(parsed) return '' diff --git a/src/anchovy/jinja.py b/src/anchovy/jinja.py index 26c47d9..38b6521 100644 --- a/src/anchovy/jinja.py +++ b/src/anchovy/jinja.py @@ -11,6 +11,10 @@ from .dependencies import PipDependency, Dependency from .simple import BaseStandardStep +from .components.md_frontmatter import ( + FrontMatterParser, FrontMatterParserName, + get_frontmatter_parser, +) if t.TYPE_CHECKING: from collections.abc import Sequence @@ -79,129 +83,7 @@ def render_template(self, template_name: str, meta: dict[str, t.Any], output_pat class JinjaMarkdownStep(JinjaRenderStep): """ - A Step for rendering Markdown using Jinja templates. Parses according to - CommonMark and renders to HTML by default. - """ - @classmethod - def _build_markdownit(cls): - import markdown_it - processor = markdown_it.MarkdownIt() - - def convert(md_string: str) -> str: - return processor.render(md_string) - - return convert - - @classmethod - def _build_mistletoe(cls): - import mistletoe - - def convert(md_string: str) -> str: - return mistletoe.markdown(md_string) - - return convert - - @classmethod - def _build_markdown(cls): - import markdown - processor = markdown.Markdown() - - def convert(md_string: str): - return processor.convert(md_string) - - return convert - - @classmethod - def _build_commonmark(cls): - import commonmark - parser = commonmark.Parser() - renderer = commonmark.HtmlRenderer() - - def convert(md_string: str) -> str: - return renderer.render(parser.parse(md_string)) - - return convert - - @classmethod - def get_options(cls): - """ - Helper method returning a list of tuples of dependencies and markdown - renderer factories for those dependencies. - """ - return [ - (PipDependency('markdown-it-py', check_name='markdown_it'), cls._build_markdownit), - (PipDependency('mistletoe'), cls._build_mistletoe), - (PipDependency('markdown'), cls._build_markdown), - (PipDependency('commonmark'), cls._build_commonmark), - ] - - @classmethod - def get_dependencies(cls): - deps = [option[0] for option in cls.get_options()] - dep_set = {reduce(lambda x, y: x | y, deps)} if deps else set[Dependency]() - - return super().get_dependencies() | dep_set - - def __init__(self, - default_template: str | None = None, - md_processor: MDProcessor | None = None, - jinja_env: Environment | None = None, - jinja_globals: dict[str, t.Any] | None = None): - super().__init__(jinja_env, jinja_globals) - self.default_template = default_template - self._md_processor = md_processor - - @property - def md_processor(self): - """ - Returns the markdown processor for this Step, creating it if necessary. - """ - if not self._md_processor: - for dep, factory in self.get_options(): - if dep.satisfied: - self._md_processor = factory() - break - else: - raise RuntimeError('Markdown processor could not be initialized!') - return self._md_processor - - - def __call__(self, path: Path, output_paths: list[Path]): - meta, content = self.extract_metadata(path.read_text(self.encoding)) - meta |= {'rendered_markdown': self.md_processor(content.strip()).strip()} - - template_path = self.render_template( - meta.get('template', self.default_template), - meta, - output_paths - ) - if template_path: - return [path, Path(template_path)], output_paths - - def extract_metadata(self, text: str): - """ - Read metadata from the front of a markdown-formatted text. - """ - meta = {} - lines = text.splitlines() - - i = 0 - for line in lines: - if ':' not in line: - break - key, value = line.split(':', 1) - if not key.isidentifier(): - break - - meta[key.strip()] = value.strip() - i += 1 - - return meta, '\n'.join(lines[i:]) - - -class JinjaExtendedMarkdownStep(JinjaRenderStep): - """ - A Step for extended Markdown rendering. + A Step for Markdown rendering. Goes beyond the default functionality of markdown-it-py to offer toml frontmatter, pygments syntax highlighting for code blocks, containers, @@ -228,6 +110,7 @@ def __init__(self, substitutions: dict[str, str] | None = None, auto_anchors: bool = False, auto_typography: bool = True, + frontmatter_parser: FrontMatterParser | FrontMatterParserName = 'yaml', code_highlighting: bool = True, pygments_params: dict[str, t.Any] | None = None, wordcount: bool = False): @@ -249,12 +132,14 @@ def __init__(self, is needed beyond the default options. :param substitutions: A dictionary of variable names and values to substitute into markdown before it is rendered. See - `JinjaExtendedMarkdownStep.apply_substitutions()` for more details. + `JinjaMarkdownStep.apply_substitutions()` for more details. :param auto_anchors: Whether to enable the `mdit_py_plugins.anchors` plugin. :param auto_typography: Whether to enable smartquotes and replacement functionalities in markdown-it-py. :param code_highlighting: Whether to enable code highlighting. + :param frontmatter_parser: The name of the frontmatter parser to use, + or a function capable of parsing frontmatter from a string. :param pygments_params: Parameters to supply to `pygments.formatters.html.HtmlFormatter`. :param wordcount: Whether to enable the `mdit_py_plugins.wordcount` @@ -268,6 +153,7 @@ def __init__(self, self.auto_anchors = auto_anchors self.auto_typography = auto_typography self.code_highlighting = code_highlighting + self.frontmatter_parser = frontmatter_parser self.pygments_params = pygments_params or {} self.wordcount = wordcount self._md_processor: t.Callable[[str], tuple[str, dict[str, t.Any]]] | None = None @@ -347,6 +233,12 @@ def _build_processor(self): }, renderer_cls=md_rendering.AnchovyRendererHTML ) + + t.cast( + md_rendering.AnchovyRendererHTML, + processor.renderer + ).set_front_matter_parser(get_frontmatter_parser(self.frontmatter_parser)) + processor.enable(['strikethrough', 'table']) if self.auto_typography: processor.enable(['smartquotes', 'replacements']) @@ -375,3 +267,6 @@ def convert(md_string: str): return rendered_md, meta return convert + + +JinjaExtendedMarkdownStep = JinjaMarkdownStep