liran-funaro · akaihola · May 17, 2025 · May 17, 2025 · May 17, 2025 · May 17, 2025
diff --git a/Makefile b/Makefile
@@ -27,11 +27,19 @@ doc-%:
 
 docs: doc-markdown
 
+doc-singlemarkdown:
+	@$(SPHINX_BUILD) -M singlemarkdown "$(SOURCE_DIR)" "$(BUILD_DIR)" $(SPHINX_OPTS) $(O) -a -t Partners
+
+docs-single: doc-singlemarkdown
+
 
 test-diff:
 	@echo "Building markdown..."
 	@$(SPHINX_BUILD) -M markdown "$(SOURCE_DIR)" "$(BUILD_DIR)" $(SPHINX_OPTS) $(O) -a -t Partners -j 8
 
+	@echo "Building singlemarkdown..."
+	@$(SPHINX_BUILD) -M singlemarkdown "$(SOURCE_DIR)" "$(BUILD_DIR)" $(SPHINX_OPTS) $(O) -a -t Partners
+
 	@echo "Building markdown with configuration overrides..."
 	@$(SPHINX_BUILD) -M markdown "$(SOURCE_DIR)" "$(BUILD_DIR)/overrides" $(SPHINX_OPTS) $(O) -a \
 			-D markdown_http_base="https://localhost" -D markdown_uri_doc_suffix=".html" \

diff --git a/README.md b/README.md
@@ -21,11 +21,17 @@ extensions = [
 ]
 ```
 
-Build markdown files with `sphinx-build` command
+Build separate markdown files with `sphinx-build` command:
 ```sh
 sphinx-build -M markdown ./docs ./build
 ```
 
+Build a single consolidated markdown file with:
+```sh
+sphinx-build -M singlemarkdown ./docs ./build
+```
+This will generate a single markdown file containing all your documentation in one place.
+
 ## Configurations
 
 You can add the following configurations to your `conf.py` file:

diff --git a/pyproject.toml b/pyproject.toml
@@ -21,15 +21,17 @@ classifiers = [
 ]
 keywords = ["sphinx", "sphinx-extension", "markdown", "docs", "documentation", "builder"]
 dependencies = ["sphinx>=5.1.0", "tabulate", "docutils"]
-requires-python = ">=3.7"
+requires-python = ">=3.9"
 
 [tool.poetry.plugins] # Optional super table
 
 [tool.poetry.plugins."sphinx.builders"]
 "markdown" = "sphinx_markdown_builder"
+"singlemarkdown" = "sphinx_markdown_builder.singlemarkdown"
 
 [project.entry-points."sphinx.builders"]
 "markdown" = "sphinx_markdown_builder"
+"singlemarkdown" = "sphinx_markdown_builder.singlemarkdown"
 
 [project.optional-dependencies]
 dev = [

diff --git a/sphinx_markdown_builder/__init__.py b/sphinx_markdown_builder/__init__.py
@@ -5,14 +5,24 @@
 from sphinx.util.typing import ExtensionMetadata
 
 from sphinx_markdown_builder.builder import MarkdownBuilder
-
+from sphinx_markdown_builder.singlemarkdown import SingleFileMarkdownBuilder
 
 __version__ = "0.6.8"
 __docformat__ = "reStructuredText"
 
 
 def setup(app) -> ExtensionMetadata:
+    """Setup the Sphinx extension.
+
+    This is the main entry point for the extension.
+    """
+    # Register the regular markdown builder
     app.add_builder(MarkdownBuilder)
+
+    # Register the single file markdown builder
+    app.add_builder(SingleFileMarkdownBuilder)
+
+    # Add configuration values
     app.add_config_value("markdown_http_base", "", "html", str)
     app.add_config_value("markdown_uri_doc_suffix", ".md", "html", str)
     app.add_config_value("markdown_file_suffix", ".md", "html", str)

diff --git a/sphinx_markdown_builder/builder.py b/sphinx_markdown_builder/builder.py
@@ -47,7 +47,7 @@ class MarkdownBuilder(Builder):
 
     def __init__(self, app: Sphinx, env: BuildEnvironment = None):
         super().__init__(app, env)
-        self.writer = None
+        self.writer: MarkdownWriter | None = None
         self.sec_numbers = None
         self.current_doc_name = None
 

diff --git a/sphinx_markdown_builder/singlemarkdown.py b/sphinx_markdown_builder/singlemarkdown.py
@@ -0,0 +1,267 @@
+"""Single Markdown builder."""
+
+# pyright: reportIncompatibleMethodOverride=false, reportImplicitOverride=false
+
+from __future__ import annotations
+
+import os
+from typing import TYPE_CHECKING, cast
+
+from docutils import nodes
+from docutils.io import StringOutput
+from sphinx._cli.util.colour import darkgreen
+from sphinx.environment.adapters.toctree import global_toctree_for_doc
+from sphinx.locale import __
+from sphinx.util import logging
+from sphinx.util.docutils import SphinxTranslator, new_document
+from sphinx.util.nodes import inline_all_toctrees
+from sphinx.util.osutil import ensuredir, os_path
+
+from sphinx_markdown_builder.builder import MarkdownBuilder
+from sphinx_markdown_builder.singletranslator import SingleMarkdownTranslator
+from sphinx_markdown_builder.writer import MarkdownWriter
+
+if TYPE_CHECKING:
+    from sphinx.application import Sphinx
+    from sphinx.util.typing import ExtensionMetadata
+
+logger = logging.getLogger(__name__)
+
+
+class SingleFileMarkdownBuilder(MarkdownBuilder):
+    """Builds the whole document tree as a single Markdown page."""
+
+    name: str = "singlemarkdown"
+    epilog: str = __("The Markdown page is in %(outdir)s.")
+
+    # These are copied from SingleFileHTMLBuilder
+    copysource: bool = False
+
+    # Use the custom translator for single file output
+    default_translator_class: type[SphinxTranslator] = SingleMarkdownTranslator
+
+    def get_outdated_docs(self) -> str | list[str]:
+        return "all documents"
+
+    def get_target_uri(self, docname: str, typ: str | None = None) -> str:
+        if docname in self.env.all_docs:
+            # All references are on the same page, use anchors
+            # Add anchor for document
+            return f"#{docname}"
+        # External files like images or other resources
+        return docname + self.out_suffix
+
+    def get_relative_uri(self, from_: str, to: str, typ: str | None = None) -> str:
+        # Ignore source - all links are in the same document
+        return self.get_target_uri(to, typ)
+
+    def render_partial(self, node: nodes.Node | None) -> dict[str, str | bytes]:
+        """Utility: Render a lone doctree node."""
+        if node is None:
+            return {"fragment": ""}
+
+        # Create a new writer for this partial rendering
+        writer = MarkdownWriter(self)
+
+        # Create a mini doctree containing only the node if it's not already a document
+        if not isinstance(node, nodes.document):
+            # Create a proper document with settings
+            doctree = new_document("", self.env.settings)
+            doctree.append(node)
+        else:
+            doctree = node
+
+        # Render to string
+        destination = StringOutput(encoding="utf-8")
+        _ = writer.write(doctree, destination)
+
+        # Convert all return values to strings to match expected type
+        fragment = writer.output if writer.output is not None else ""
+
+        # Return required fragments with string values
+        return {
+            "fragment": fragment,
+            "title": "",
+            "css": "",
+            "js": "",
+            "script": "",
+        }
+
+    def _get_local_toctree(
+        self,
+        docname: str,
+        collapse: bool = True,
+        **kwargs: bool | int | str,
+    ) -> str:
+        if isinstance(includehidden := kwargs.get("includehidden"), str):
+            if includehidden.lower() == "false":
+                kwargs["includehidden"] = False
+            elif includehidden.lower() == "true":
+                kwargs["includehidden"] = True
+        if kwargs.get("maxdepth") == "":
+            _ = kwargs.pop("maxdepth")
+        toctree = global_toctree_for_doc(
+            self.env,
+            docname,
+            self,
+            collapse=collapse,
+            **kwargs,  # pyright: ignore[reportArgumentType]
+        )
+        fragment = self.render_partial(toctree)["fragment"]
+        return str(fragment)
+
+    def assemble_doctree(self) -> nodes.document:
+        master = cast(str, self.config.root_doc)
+        tree = self.env.get_doctree(master)
+        tree = inline_all_toctrees(self, set(), master, tree, darkgreen, [master])
+        tree["docname"] = master
+        self.env.resolve_references(tree, master, self)
+        return tree
+
+    def assemble_toc_secnumbers(self) -> dict[str, dict[str, tuple[int, ...]]]:
+        new_secnumbers: dict[str, tuple[int, ...]] = {}
+        for docname, secnums in self.env.toc_secnumbers.items():
+            for id_, secnum in secnums.items():
+                alias = f"{docname}/{id_}"
+                new_secnumbers[alias] = secnum
+
+        root_doc = cast(str, self.config.root_doc)
+        return {root_doc: new_secnumbers}
+
+    def assemble_toc_fignumbers(
+        self,
+    ) -> dict[str, dict[str, dict[str, tuple[int, ...]]]]:
+        new_fignumbers: dict[str, dict[str, tuple[int, ...]]] = {}
+        for docname, fignumlist in self.env.toc_fignumbers.items():
+            for figtype, fignums in fignumlist.items():
+                alias = f"{docname}/{figtype}"
+                _ = new_fignumbers.setdefault(alias, {})
+                for id_, fignum in fignums.items():
+                    new_fignumbers[alias][id_] = fignum
+
+        root_doc = cast(str, self.config.root_doc)
+        return {root_doc: new_fignumbers}
+
+    def get_doc_context(
+        self,
+        docname: str,  # pylint: disable=unused-argument  # pyright: ignore[reportUnusedParameter]
+        body: str,
+        metatags: str,
+    ) -> dict[str, str | bytes | bool | list[dict[str, str]] | None]:
+        # no relation links...
+        root_doc = cast(str, self.config.root_doc)
+        toctree = global_toctree_for_doc(self.env, root_doc, self, collapse=False)
+        # if there is no toctree, toc is None
+        if toctree:
+            toc = self.render_partial(toctree)["fragment"]
+            display_toc = True
+        else:
+            toc = ""
+            display_toc = False
+        return {
+            "parents": [],
+            "prev": None,
+            "next": None,
+            "docstitle": None,
+            "title": cast(str, self.config.html_title),
+            "meta": None,
+            "body": body,
+            "metatags": metatags,
+            "rellinks": [],
+            "sourcename": "",
+            "toc": toc,
+            "display_toc": display_toc,
+        }
+
+    def write_documents(self, _docnames: set[str]) -> None:
+        # Prepare writer for output
+        self.writer: MarkdownWriter | None = MarkdownWriter(self)
+
+        # Prepare for writing all documents
+        self.prepare_writing(set(self.env.all_docs))
+
+        # To store final output
+        content_parts: list[str] = []
+
+        # Add main header
+        project = cast(str, self.config.project)
+        content_parts.append(f"# {project} Documentation\n\n")
+
+        # Add table of contents
+        content_parts.append("## Table of Contents\n\n")
+
+        # The list of docnames to process - start with root doc and include all docnames
+        root_doc = cast(str, self.config.root_doc)
+        docnames = [root_doc] + list(self.env.found_docs - {root_doc})
+
+        # Add TOC entries
+        for docname in docnames:
+            if docname == root_doc:
+                content_parts.append(f"* [Main Document](#{docname})\n")
+            else:
+                title = docname.rsplit("/", 1)[-1].replace("_", " ").replace("-", " ").title()
+                content_parts.append(f"* [{title}](#{docname})\n")
+
+        content_parts.append("\n")
+
+        # Process each document
+        for docname in docnames:
+            logger.info("Adding content from %s", docname)
+
+            try:
+                # Get the doctree for this document
+                doc = self.env.get_doctree(docname)
+
+                # Add anchor for linking
+                content_parts.append(f'\n<a id="{docname}"></a>\n\n')
+
+                # Generate title based on docname
+                if docname == root_doc:
+                    title = "Main Document"
+                else:
+                    title = docname.rsplit("/", 1)[-1].replace("_", " ").replace("-", " ").title()
+
+                content_parts.append(f"## {title}\n\n")
+
+                # Get markdown writer output for this document
+                self.writer = MarkdownWriter(self)
+
+                destination = StringOutput(encoding="utf-8")
+                _ = self.writer.write(doc, destination)  # Use proper StringOutput as destination
+                content_parts.append(self.writer.output if self.writer.output is not None else "")
+                content_parts.append("\n\n")
+
+            except Exception as e:  # pylint: disable=broad-exception-caught
+                logger.warning("Error adding content from %s: %s", docname, e)
+
+        # Combine all content
+        final_content = "".join(content_parts)
+
+        # Write to output file
+        outfilename = os.path.join(self.outdir, os_path(root_doc) + self.out_suffix)
+
+        # Ensure output directory exists
+        ensuredir(os.path.dirname(outfilename))
+
+        try:
+            with open(outfilename, "w", encoding="utf-8") as f:
+                _ = f.write(final_content)
+        except OSError as err:
+            logger.warning(__("error writing file %s: %s"), outfilename, err)
+
+
+def setup(app: Sphinx) -> ExtensionMetadata:
+    """Setup the singlemarkdown builder extension.
+
+    This follows the pattern from Sphinx's own singlehtml.py.
+    """
+    # Setup the main extension first
+    app.setup_extension("sphinx_markdown_builder")
+
+    # No need to register the builder here as it's already registered in __init__.py
+
+    return {
+        "version": "builtin",
+        "parallel_read_safe": True,
+        "parallel_write_safe": True,
+    }
diff --git a/sphinx_markdown_builder/singletranslator.py b/sphinx_markdown_builder/singletranslator.py
@@ -0,0 +1,36 @@
+"""Custom translator for single markdown file output."""
+
+# pyright: reportImplicitOverride=false
+
+import re
+from typing import TYPE_CHECKING, cast
+
+from docutils import nodes
+
+from sphinx_markdown_builder.translator import MarkdownTranslator
+
+if TYPE_CHECKING:  # pragma: no cover
+    from sphinx_markdown_builder.singlemarkdown import SingleFileMarkdownBuilder
+
+
+class SingleMarkdownTranslator(MarkdownTranslator):
+    """Translator that ensures proper content inclusion for a single markdown file."""
+
+    def __init__(self, document: nodes.document, builder: "SingleFileMarkdownBuilder"):
+        super().__init__(document, builder)
+        # Keep track of document names we've seen to avoid duplications
+        self._seen_docs: list[str] = []
+
+    def visit_section(self, node: nodes.Element):
+        """Capture section node visit to ensure proper handling."""
+        # Add anchors for document sectioning
+        docname: str = cast(str, node.get("docname"))
+        if docname and docname not in self._seen_docs:
+            self._seen_docs.append(docname)
+            self.add(f'<a id="document-{docname}"></a>', prefix_eol=2)
+            # Add a title with the document name
+            safe_name = re.sub(r"[^a-zA-Z0-9-]", " ", docname.split("/")[-1]).title()
+            self.add(f"# {safe_name}", prefix_eol=1, suffix_eol=2)
+
+        # Call the parent's visit_section method
+        MarkdownTranslator.visit_section(self, node)