From 5179ba356083dacff2f72e8464a27cf842d2e6e6 Mon Sep 17 00:00:00 2001
From: Daniel Foerster <pydsigner@gmail.com>
Date: Thu, 21 Dec 2023 16:49:44 -0600
Subject: [PATCH] Replace JinjaMarkdownStep with JinjaExtendedMarkdownStep
 (#72)

---
 src/anchovy/components/md_frontmatter.py |  51 ++++++++
 src/anchovy/components/md_rendering.py   |  15 ++-
 src/anchovy/jinja.py                     | 143 +++--------------------
 3 files changed, 79 insertions(+), 130 deletions(-)
 create mode 100644 src/anchovy/components/md_frontmatter.py

diff --git a/src/anchovy/components/md_frontmatter.py b/src/anchovy/components/md_frontmatter.py
new file mode 100644
index 0000000..229d4c9
--- /dev/null
+++ b/src/anchovy/components/md_frontmatter.py
@@ -0,0 +1,51 @@
+import sys
+import typing as t
+
+
+def simple_frontmatter_parser(content: str) -> dict:
+    """
+    Read metadata from the front of a markdown-formatted text in a very simple
+    YAML-like format, without value parsing.
+    """
+    meta = {}
+    lines = content.splitlines()
+
+    for line in lines:
+        if ':' not in line:
+            break
+        key, value = line.split(':', 1)
+        if not key.isidentifier():
+            break
+        meta[key.strip()] = value.strip()
+
+    print(meta, '...')
+    return meta
+
+
+def get_toml_frontmatter_parser():
+    if sys.version_info < (3, 11):
+        import tomli as tomllib
+    else:
+        import tomllib
+    return tomllib.loads
+
+
+def get_yaml_frontmatter_parser():
+    from ruamel.yaml import YAML
+    return YAML(typ='safe').load
+
+
+FrontMatterParser = t.Callable[[str], dict]
+FrontMatterParserName = t.Literal['simple', 'toml', 'yaml']
+
+FRONTMATTER_PARSER_FACTORIES: dict[FrontMatterParserName, t.Callable[[], FrontMatterParser]] = {
+    'simple': lambda: simple_frontmatter_parser,
+    'toml': get_toml_frontmatter_parser,
+    'yaml': get_yaml_frontmatter_parser,
+}
+
+
+def get_frontmatter_parser(parser) -> FrontMatterParser:
+    if callable(parser):
+        return parser
+    return FRONTMATTER_PARSER_FACTORIES[parser]()
diff --git a/src/anchovy/components/md_rendering.py b/src/anchovy/components/md_rendering.py
index 6251509..a6783a2 100644
--- a/src/anchovy/components/md_rendering.py
+++ b/src/anchovy/components/md_rendering.py
@@ -3,16 +3,12 @@
 """
 from __future__ import annotations
 
-import sys
 import typing as t
 
 from markdown_it.common.utils import escapeHtml, unescapeAll
 from markdown_it.renderer import RendererHTML
 
-if sys.version_info < (3, 11):
-    import tomli as tomllib
-else:
-    import tomllib
+from .md_frontmatter import simple_frontmatter_parser, FrontMatterParser
 
 if t.TYPE_CHECKING:
     from collections.abc import Sequence
@@ -60,6 +56,10 @@ class AnchovyRendererHTML(RendererHTML):
     A customized markdown-it-py HTML renderer, with hooks for better pygments
     integration and toml frontmatter support.
     """
+    def __init__(self, parser: t.Any = None):
+        super().__init__(parser)
+        self.front_matter_parser: FrontMatterParser = simple_frontmatter_parser
+
     # https://github.com/executablebooks/markdown-it-py/issues/256
     def fence(self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType):
         """
@@ -76,10 +76,13 @@ def fence(self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: En
             or escapeHtml(token.content)
         )
 
+    def set_front_matter_parser(self, parser: FrontMatterParser):
+        self.front_matter_parser = parser
+
     def front_matter(self, tokens: Sequence[Token], idx: int, _options: OptionsDict, env: EnvType):
         """
         Handles parsing markdown frontmatter using TOML.
         """
-        parsed = tomllib.loads(tokens[idx].content)
+        parsed = self.front_matter_parser(tokens[idx].content)
         env['anchovy_meta'].update(parsed)
         return ''
diff --git a/src/anchovy/jinja.py b/src/anchovy/jinja.py
index 26c47d9..38b6521 100644
--- a/src/anchovy/jinja.py
+++ b/src/anchovy/jinja.py
@@ -11,6 +11,10 @@
 
 from .dependencies import PipDependency, Dependency
 from .simple import BaseStandardStep
+from .components.md_frontmatter import (
+    FrontMatterParser, FrontMatterParserName,
+    get_frontmatter_parser,
+)
 
 if t.TYPE_CHECKING:
     from collections.abc import Sequence
@@ -79,129 +83,7 @@ def render_template(self, template_name: str, meta: dict[str, t.Any], output_pat
 
 class JinjaMarkdownStep(JinjaRenderStep):
     """
-    A Step for rendering Markdown using Jinja templates. Parses according to
-    CommonMark and renders to HTML by default.
-    """
-    @classmethod
-    def _build_markdownit(cls):
-        import markdown_it
-        processor = markdown_it.MarkdownIt()
-
-        def convert(md_string: str) -> str:
-            return processor.render(md_string)
-
-        return convert
-
-    @classmethod
-    def _build_mistletoe(cls):
-        import mistletoe
-
-        def convert(md_string: str) -> str:
-            return mistletoe.markdown(md_string)
-
-        return convert
-
-    @classmethod
-    def _build_markdown(cls):
-        import markdown
-        processor = markdown.Markdown()
-
-        def convert(md_string: str):
-            return processor.convert(md_string)
-
-        return convert
-
-    @classmethod
-    def _build_commonmark(cls):
-        import commonmark
-        parser = commonmark.Parser()
-        renderer = commonmark.HtmlRenderer()
-
-        def convert(md_string: str) -> str:
-            return renderer.render(parser.parse(md_string))
-
-        return convert
-
-    @classmethod
-    def get_options(cls):
-        """
-        Helper method returning a list of tuples of dependencies and markdown
-        renderer factories for those dependencies.
-        """
-        return [
-            (PipDependency('markdown-it-py', check_name='markdown_it'), cls._build_markdownit),
-            (PipDependency('mistletoe'), cls._build_mistletoe),
-            (PipDependency('markdown'), cls._build_markdown),
-            (PipDependency('commonmark'), cls._build_commonmark),
-        ]
-
-    @classmethod
-    def get_dependencies(cls):
-        deps = [option[0] for option in cls.get_options()]
-        dep_set = {reduce(lambda x, y: x | y, deps)} if deps else set[Dependency]()
-
-        return super().get_dependencies() | dep_set
-
-    def __init__(self,
-                 default_template: str | None = None,
-                 md_processor: MDProcessor | None = None,
-                 jinja_env: Environment | None = None,
-                 jinja_globals: dict[str, t.Any] | None = None):
-        super().__init__(jinja_env, jinja_globals)
-        self.default_template = default_template
-        self._md_processor = md_processor
-
-    @property
-    def md_processor(self):
-        """
-        Returns the markdown processor for this Step, creating it if necessary.
-        """
-        if not self._md_processor:
-            for dep, factory in self.get_options():
-                if dep.satisfied:
-                    self._md_processor = factory()
-                    break
-            else:
-                raise RuntimeError('Markdown processor could not be initialized!')
-        return self._md_processor
-
-
-    def __call__(self, path: Path, output_paths: list[Path]):
-        meta, content = self.extract_metadata(path.read_text(self.encoding))
-        meta |= {'rendered_markdown': self.md_processor(content.strip()).strip()}
-
-        template_path = self.render_template(
-            meta.get('template', self.default_template),
-            meta,
-            output_paths
-        )
-        if template_path:
-            return [path, Path(template_path)], output_paths
-
-    def extract_metadata(self, text: str):
-        """
-        Read metadata from the front of a markdown-formatted text.
-        """
-        meta = {}
-        lines = text.splitlines()
-
-        i = 0
-        for line in lines:
-            if ':' not in line:
-                break
-            key, value = line.split(':', 1)
-            if not key.isidentifier():
-                break
-
-            meta[key.strip()] = value.strip()
-            i += 1
-
-        return meta, '\n'.join(lines[i:])
-
-
-class JinjaExtendedMarkdownStep(JinjaRenderStep):
-    """
-    A Step for extended Markdown rendering.
+    A Step for Markdown rendering.
 
     Goes beyond the default functionality of markdown-it-py to offer toml
     frontmatter, pygments syntax highlighting for code blocks, containers,
@@ -228,6 +110,7 @@ def __init__(self,
                  substitutions: dict[str, str] | None = None,
                  auto_anchors: bool = False,
                  auto_typography: bool = True,
+                 frontmatter_parser: FrontMatterParser | FrontMatterParserName = 'yaml',
                  code_highlighting: bool = True,
                  pygments_params: dict[str, t.Any] | None = None,
                  wordcount: bool = False):
@@ -249,12 +132,14 @@ def __init__(self,
             is needed beyond the default options.
         :param substitutions: A dictionary of variable names and values to
             substitute into markdown before it is rendered. See
-            `JinjaExtendedMarkdownStep.apply_substitutions()` for more details.
+            `JinjaMarkdownStep.apply_substitutions()` for more details.
         :param auto_anchors: Whether to enable the `mdit_py_plugins.anchors`
             plugin.
         :param auto_typography: Whether to enable smartquotes and replacement
             functionalities in markdown-it-py.
         :param code_highlighting: Whether to enable code highlighting.
+        :param frontmatter_parser: The name of the frontmatter parser to use,
+            or a function capable of parsing frontmatter from a string.
         :param pygments_params: Parameters to supply to
             `pygments.formatters.html.HtmlFormatter`.
         :param wordcount: Whether to enable the `mdit_py_plugins.wordcount`
@@ -268,6 +153,7 @@ def __init__(self,
         self.auto_anchors = auto_anchors
         self.auto_typography = auto_typography
         self.code_highlighting = code_highlighting
+        self.frontmatter_parser = frontmatter_parser
         self.pygments_params = pygments_params or {}
         self.wordcount = wordcount
         self._md_processor: t.Callable[[str], tuple[str, dict[str, t.Any]]] | None = None
@@ -347,6 +233,12 @@ def _build_processor(self):
             },
             renderer_cls=md_rendering.AnchovyRendererHTML
         )
+
+        t.cast(
+            md_rendering.AnchovyRendererHTML,
+            processor.renderer
+        ).set_front_matter_parser(get_frontmatter_parser(self.frontmatter_parser))
+
         processor.enable(['strikethrough', 'table'])
         if self.auto_typography:
             processor.enable(['smartquotes', 'replacements'])
@@ -375,3 +267,6 @@ def convert(md_string: str):
             return rendered_md, meta
 
         return convert
+
+
+JinjaExtendedMarkdownStep = JinjaMarkdownStep