diff --git a/.gitignore b/.gitignore index 1994c11..73357e7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,133 @@ -*.egg-info -*.pyc -*.swp +# Mac OS X internals +*.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# Bower and NPM libraries +bower_components +node_modules + +# Build files +build +MANIFEST +site + +# PyCharm CE files +.idea/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +.venv/ +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject + +mkdocs_combine/cli/mkdocs2print.py diff --git a/CHANGES b/CHANGES index 84de39e..ef56af4 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,30 @@ +0.4.0.0 by Daniel Nüst + + * Add verbose option and some logging + * Single version definition in setup.py + * Switch to mkdocs 1.0.4 or later, incl. support for new "nav" config property, which deprecates "pages", see https://github.com/mkdocs/mkdocs/pull/1504 + +0.3.1.1 by Adam Twardoch: + + * Compatibility fix for unicode filenames + +0.3.1.0 by Daniel Nüst: + +* Added admonition processing + +0.3.0.1 by Jeff Hastings: + + * Add ability to insert page breaks between pages + +0.3.0.0 by Adam Twardoch: + + * Renamed project to 'mkdocs-combine' + * Added more commandline options to mkdocscombine tool + +0.2.6.3: + + * Added support for pages without titles specified in mkdocs.yml + 0.2.6: * Fixed issues/11 (added support for underwide header rows in tables) diff --git a/README.md b/README.md index 5fceac5..239a08a 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,44 @@ -# DESCRIPTION +# mkdocs-combine -This module contains a set of filters for converting -[mkdocs](http://www.mkdocs.org) style markdown documentation into a single -[pandoc(1)](http://www.pandoc.org) flavoured markdown document. This is useful -for +**2018-06-05: Note that MkDocs now supports plugins that provide a better architecture for this task. I'll probably replace this project with a fork of [shauser's plugin](https://github.com/shauser/mkdocs-pdf-export-plugin)** — Adam -* Generating PDF or EPUB from your mkdocs documentation -* Generating single-page HTML from your mkdocs documentation -* Converting your mkdocs documentation to other formats, such as asciidoc. +[**`mkdocs-combine`**](https://github.com/twardoch/mkdocs-combine/) is a Python module that combines a [MkDocs](http://www.mkdocs.org/)-style Markdown source site into a single Markdown document. This is useful for -Aside from the filters the module contains a converter class tying them -together into a coherent whole and the command line converter `mkdocs2pandoc`. +* Generating PDF or EPUB from your MkDocs documentation +* Generating single-page HTML from your MkDocs documentation +* Converting your MkDocs documentation to other formats, such as asciidoc -# PREREQUISITES +The output Markdown document is compatible with [pandoc](http://www.pandoc.org/). -For generating PDF through pandoc(1) you will need to install a few things -pip(1) won't handle, namely pandoc and the somewhat exotic LaTeX packages its -default LaTeX template uses. On a Ubuntu 14.04 system this amounts to the -following packages: +This package is written in Python 2.7 and relies on `mkdocs` and the Python `Markdown` implementation. Aside from several filters, the module contains a `MkDocsCombiner` class tying them together into a coherent whole, and the command-line tool `mkdocscombine`. + +[`mkdocs-combine`](https://github.com/twardoch/mkdocs-combine/) is maintained by Adam Twardoch. It's a fork of [`mkdocs-pandoc`](https://github.com/jgrassler/mkdocs-pandoc) by Johannes Grassler. + +# Installation + +_Note: The following instructions apply to both Unixoid systems and Windows._ + +If you'd like to use the development version, use + +``` +pip install git+https://github.com/twardoch/mkdocs-combine.git +``` + +Note that if you are behind a proxy, you might need to add the `--proxy` option like this + +``` +pip --proxy=http[s]://user@mydomain:port install ... +``` + +If you'd like to install a local development version from the current path, use + +``` +pip install -e . +``` + +## Pandoc compatibility + +For generating PDF through `pandoc` you will need to install a few things `pip` won't handle, namely `pandoc` and the somewhat exotic LaTeX packages its default LaTeX template uses. On a Ubuntu 14.04 system this amounts to the following packages: ``` fonts-lmodern @@ -29,84 +50,115 @@ texlive-fonts-recommended texlive-latex-recommended texlive-xetex ``` -On a Windows system you can get them through -[Chocolatey](https://chocolatey.org/). Once you have Chocolatey up and running -the following commands should leave you with everything you need to create PDF -output from Pandoc: +On a Windows system you can get them through [Chocolatey](https://chocolatey.org/). Once you have Chocolatey up and running the following commands should leave you with everything you need to create PDF output from `pandoc`: ``` choco install python choco install pandocpdf ``` -# INSTALLATION +# Usage -_Note: The following instructions apply to both Unixoid systems and Windows._ - -Make sure, you have [pip](https://pip.pypa.io/en/stable/) installed, then issue -the following command: - -``` -pip install mkdocs-pandoc -``` - -This will install the stable version. If you'd like to use the development -version, use - -``` -pip install git+https://github.com/jgrassler/mkdocs-pandoc -``` - -instead. Note that if you are behind a proxy, you might need to add the `--proxy` option like this +When executed in the directory where your documentation's `mkdoc.yml` and the `docs/` directory containing the actual documentation resides, `mkdocscombine` should print one long Markdown document suitable for `pandoc` on standard output. The tool also allows to output a long HTML file in addition to, or in place of the Markdown file. ``` -pip --proxy=http[s]://user@mydomain:port install ... +usage: mkdocscombine [-h] [-V] [-o OUTFILE] [-f CONFIG_FILE] [-e ENCODING] + [-x EXCLUDE] [-H OUTHTML] [-y | -Y] [-c | -C] [-u | -k] + [-t | -g] [-G WIDTH] [-r | -R] [-a | -A] [-m | -l] + [-i IMAGE_EXT] + +mkdocscombine.py - combines an MkDocs source site into a single Markdown +document + +optional arguments: + -h, --help show this help message and exit + -V, --version show program's version number and exit + -v, --verbose print additional info during execution + +files: + -o OUTFILE, --outfile OUTFILE + write combined Markdown to path ('-' for stdout) + -f CONFIG_FILE, --config-file CONFIG_FILE + MkDocs config file (default: mkdocs.yml) + -e ENCODING, --encoding ENCODING + set encoding for input files (default: utf-8) + -x EXCLUDE, --exclude EXCLUDE + exclude Markdown files from processing (default: none) + -H OUTHTML, --outhtml OUTHTML + write simple HTML to path ('-' for stdout) + +structure: + -y, --meta keep YAML metadata (default) + -Y, --no-meta strip YAML metadata + -c, --titles add titles from mkdocs.yml to Markdown files (default) + -C, --no-titles do not add titles to Markdown files + -u, --up-levels increase ATX header levels in Markdown files (default) + -k, --keep-levels do not increase ATX header levels in Markdown files + -B, --no-page-break do not add page break between pages (default) + -b, --page-break add page break between pages + +tables: + -t, --tables keep original Markdown tables (default) + -g, --grid-tables combine Markdown tables to Pandoc-style grid tables + -G WIDTH, --grid-width WIDTH + char width of converted grid tables (default: 100) + +links: + -r, --refs keep MkDocs-style cross-references + -R, --no-refs replace MkDocs-style cross-references by just their + title (default) + -a, --anchors keep HTML anchor tags + -A, --no-anchors strip out HTML anchor tags (default) + +extras: + -m, --math keep \( \) Markdown math notation as is (default) + -l, --latex combine the \( \) Markdown math into LaTeX $$ inlines + -i IMAGE_EXT, --image-ext IMAGE_EXT + replace image extensions by (default: no replacement) + -d, --admonitions-md convert admonitions to HTML already in the Markdown ``` -# USAGE - -When executed in the directory where your documentation's `mkdoc.yml` and the -`docs/` directory containing the actual documentation resides, `mkdocs2pandoc` -should print one long Markdown document suitable for `pandoc(1)` on standard -output. This works under the following assumptions: - ## Usage example ``` cd ~/mydocs -mkdocs2pandoc > mydocs.pd +mkdocscombine -o mydocs.pd pandoc --toc -f markdown+grid_tables+table_captions -o mydocs.pdf mydocs.pd # Generate PDF pandoc --toc -f markdown+grid_tables -t epub -o mydocs.epub mydocs.pd # Generate EPUB ``` -# BUGS +# Bugs The following things are known to be broken: -* `mdtableconv.py`: Line wrapping in table cells will wrap links, which causes - whitespace to be inserted in their target URLs, at least in PDF output. While - this is a bit of a Pandoc problem, it can and should be fixed in this module. - -* [Internal Hyperlinks](http://www.mkdocs.org/user-guide/writing-your-docs/#internal-hyperlinks) - between markdown documents will be reduced to their link titles, i.e. they - will not be links in the resulting Pandoc document. +* Line wrapping in table cells will wrap links, which causes whitespace to be inserted in their target URLs, at least in PDF output. While this is a bit of a Pandoc problem, it can and should be fixed in this module. +* [Internal Hyperlinks](http://www.mkdocs.org/user-guide/writing-your-docs/#internal-hyperlinks) between markdown documents will be reduced to their link titles, i.e. they will not be links in the resulting Pandoc document. -# COPYRIGHT +# Copyright -(C) 2015 Johannes Grassler + * © 2015 Johannes Grassler + * © 2017 Adam Twardoch Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +[http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0) You will also find a copy of the License in the file `LICENSE` in the top level -directory of this source code repository. In case the above URL is unreachable -and/or differs from the copy in this file, the file takes precedence. +directory of this source code repository. In case the above URL is unreachable and/or differs from the copy in this file, the file takes precedence. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + + +### Projects related to Markdown and MkDocs by Adam Twardoch: + +* [https://twardoch.github.io/markdown-rundown/](https://twardoch.github.io/markdown-rundown/) — summary of Markdown formatting styles [git](https://github.com/twardoch/markdown-rundown) +* [https://twardoch.github.io/markdown-steroids/](https://twardoch.github.io/markdown-steroids/) — Some extensions for Python Markdown [git](https://github.com/twardoch/markdown-steroids) +* [https://twardoch.github.io/markdown-utils/](https://twardoch.github.io/markdown-utils/) — various utilities for working with Markdown-based documents [git](https://github.com/twardoch/markdown-utils) +* [https://twardoch.github.io/mkdocs-combine/](https://twardoch.github.io/mkdocs-combine/) — convert an MkDocs Markdown source site to a single Markdown document [git](https://github.com/twardoch/mkdocs-combine) +* [https://github.com/twardoch/noto-mkdocs-theme/tree/rework](https://github.com/twardoch/noto-mkdocs-theme/tree/rework) — great Material Design-inspired theme for MkDocs [git](https://github.com/twardoch/noto-mkdocs-theme) +* [https://twardoch.github.io/clinker-mktheme/](https://twardoch.github.io/clinker-mktheme/) — great theme for MkDocs [git](https://github.com/twardoch/clinker-mktheme) + diff --git a/_config.yml b/_config.yml new file mode 100644 index 0000000..2f7efbe --- /dev/null +++ b/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-minimal \ No newline at end of file diff --git a/install-macos.command b/install-macos.command new file mode 100644 index 0000000..92245f1 --- /dev/null +++ b/install-macos.command @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +dir=${0%/*} +if [ "$dir" = "$0" ]; then + dir="." +fi +cd "$dir" + +# Install me +pip install --user --upgrade -r py-requirements.txt +pip install --user --upgrade . +echo "# Done!" diff --git a/mkdocs_combine/__init__.py b/mkdocs_combine/__init__.py new file mode 100644 index 0000000..b562e0e --- /dev/null +++ b/mkdocs_combine/__init__.py @@ -0,0 +1 @@ +from mkdocs_combine.mkdocs_combiner import MkDocsCombiner diff --git a/mkdocs_pandoc/cli/__init__.py b/mkdocs_combine/cli/__init__.py similarity index 100% rename from mkdocs_pandoc/cli/__init__.py rename to mkdocs_combine/cli/__init__.py diff --git a/mkdocs_combine/cli/mkdocscombine.py b/mkdocs_combine/cli/mkdocscombine.py new file mode 100644 index 0000000..95a05df --- /dev/null +++ b/mkdocs_combine/cli/mkdocscombine.py @@ -0,0 +1,184 @@ +#!/usr/bin/python +# +# Copyright 2015 Johannes Grassler +# Copyright 2017 Adam Twardoch +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# mkdocscombine - combines an MkDocs source site into a single Markdown document + +from __future__ import print_function + +import argparse +import codecs +import sys + +import mkdocs_combine +from mkdocs_combine.exceptions import FatalError + +from pkg_resources import get_distribution +__version__ = get_distribution('mkdocs-combine').version + +def stdout_file(encoding): + # Python 2 and Python 3 have mutually incompatible approaches to writing + # encoded data to sys.stdout, so we'll have to pick the appropriate one. + + if sys.version_info.major == 2: + return codecs.getwriter(encoding)(sys.stdout) + elif sys.version_info.major >= 3: + return open(sys.stdout.fileno(), mode='w', encoding=encoding, buffering=1) + + +def parse_args(): + args = argparse.ArgumentParser( + description="mkdocscombine.py " + + "- combines an MkDocs source site into a single Markdown document") + + args.add_argument('-V', '--version', action='version', + version='%(prog)s {version}'.format(version=__version__)) + args.add_argument('-v', '--verbose', dest='verbose', action='store_true', + help="print additional info during execution") + + args_files = args.add_argument_group('files') + args_files.add_argument('-o', '--outfile', dest='outfile', default=None, + help="write combined Markdown to path ('-' for stdout)") + args_files.add_argument('-f', '--config-file', dest='config_file', default='mkdocs.yml', + help="MkDocs config file (default: mkdocs.yml)") + args_files.add_argument('-e', '--encoding', dest='encoding', default='utf-8', + help="set encoding for input files (default: utf-8)") + args_files.add_argument('-x', '--exclude', dest='exclude', default=None, action='append', + help="exclude Markdown files from processing (default: none)") + args_files.add_argument('-H', '--outhtml', dest='outhtml', default=None, + help="write simple HTML to path ('-' for stdout)") + + args_struct = args.add_argument_group('structure') + args_strip_metadata = args_struct.add_mutually_exclusive_group(required=False) + args_strip_metadata.add_argument('-y', '--meta', dest='strip_metadata', action='store_false', + help='keep YAML metadata (default)') + args_strip_metadata.add_argument('-Y', '--no-meta', dest='strip_metadata', action='store_true', + help='strip YAML metadata') + args.set_defaults(strip_metadata=False) + + args_add_chapter_heads = args_struct.add_mutually_exclusive_group(required=False) + args_add_chapter_heads.add_argument('-c', '--titles', dest='add_chapter_heads', action='store_true', + help='add titles from mkdocs.yml to Markdown files (default)') + args_add_chapter_heads.add_argument('-C', '--no-titles', dest='add_chapter_heads', action='store_false', + help='do not add titles to Markdown files') + args.set_defaults(add_chapter_heads=True) + + args_increase_heads = args_struct.add_mutually_exclusive_group(required=False) + args_increase_heads.add_argument('-u', '--up-levels', dest='increase_heads', action='store_true', + help='increase ATX header levels in Markdown files (default)') + args_increase_heads.add_argument('-k', '--keep-levels', dest='increase_heads', action='store_false', + help='do not increase ATX header levels in Markdown files') + args.set_defaults(increase_heads=True) + + args_add_page_break = args_struct.add_mutually_exclusive_group(required=False) + args_add_page_break.add_argument('-B', '--no-page-break', dest='add_page_break', action='store_false', + help='do not add page break between pages (default)') + args_add_page_break.add_argument('-b', '--page-break', dest='add_page_break', action='store_true', + help='add page break between pages') + args.set_defaults(add_page_break=False) + + args_tables = args.add_argument_group('tables') + args_filter_tables = args_tables.add_mutually_exclusive_group(required=False) + args_filter_tables.add_argument('-t', '--tables', dest='filter_tables', action='store_false', + help='keep original Markdown tables (default)') + args_filter_tables.add_argument('-g', '--grid-tables', dest='filter_tables', action='store_true', + help='combine Markdown tables to Pandoc-style grid tables') + args.set_defaults(filter_tables=False) + + args_tables.add_argument('-G', '--grid-width', dest='width', default=100, + help="char width of converted grid tables (default: 100)") + + args_links = args.add_argument_group('links') + args_filter_xrefs = args_links.add_mutually_exclusive_group(required=False) + args_filter_xrefs.add_argument('-r', '--refs', dest='filter_xrefs', action='store_false', + help='keep MkDocs-style cross-references') + args_filter_xrefs.add_argument('-R', '--no-refs', dest='filter_xrefs', action='store_true', + help='replace MkDocs-style cross-references by just their title (default)') + args.set_defaults(filter_xrefs=True) + + args_strip_anchors = args_links.add_mutually_exclusive_group(required=False) + args_strip_anchors.add_argument('-a', '--anchors', dest='strip_anchors', action='store_false', + help='keep HTML anchor tags') + args_strip_anchors.add_argument('-A', '--no-anchors', dest='strip_anchors', action='store_true', + help='strip out HTML anchor tags (default)') + args.set_defaults(strip_anchors=True) + + args_extras = args.add_argument_group('extras') + args_convert_math = args_extras.add_mutually_exclusive_group(required=False) + args_convert_math.add_argument('-m', '--math', dest='convert_math', action='store_false', + help=r'keep \( \) Markdown math notation as is (default)') + args_convert_math.add_argument('-l', '--latex', dest='convert_math', action='store_true', + help=r'combine the \( \) Markdown math into LaTeX $$ inlines') + args.set_defaults(convert_math=False) + + args_extras.add_argument('-i', '--image-ext', dest='image_ext', default=None, + help="replace image extensions by (default: no replacement)") + args_extras.add_argument('-d', '--admonitions-md', dest='convert_admonition_md', action='store_true', + help='convert admonitions to HTML already in the Markdown') + + return args.parse_args() + + +def main(): + args = parse_args() + + try: + mkdocs_combiner = mkdocs_combine.MkDocsCombiner( + config_file=args.config_file, + exclude=args.exclude, + image_ext=args.image_ext, + width=args.width, + encoding=args.encoding, + filter_tables=args.filter_tables, + filter_xrefs=args.filter_xrefs, + strip_anchors=args.strip_anchors, + strip_metadata=args.strip_metadata, + convert_math=args.convert_math, + add_chapter_heads=args.add_chapter_heads, + increase_heads=args.increase_heads, + add_page_break=args.add_page_break, + verbose=args.verbose, + convert_admonition_md=args.convert_admonition_md + ) + except FatalError as e: + print(e.message, file=sys.stderr) + return e.status + + mkdocs_combiner.combine() + + combined_md_file = None + if args.outfile == '-': + combined_md_file = stdout_file(args.encoding) + elif args.outfile: + try: + combined_md_file = codecs.open(args.outfile, 'w', encoding=args.encoding) + except IOError as e: + print("Couldn't open %s for writing: %s" % (args.outfile, e.strerror), file=sys.stderr) + if combined_md_file: + combined_md_file.write('\n'.join(mkdocs_combiner.combined_md_lines)) + combined_md_file.close() + + html_file = None + if args.outhtml == '-': + html_file = stdout_file(args.encoding) + elif args.outhtml: + try: + html_file = codecs.open(args.outhtml, 'w', encoding=args.encoding) + except IOError as e: + print("Couldn't open %s for writing: %s" % (args.htmlfile, e.strerror), file=sys.stderr) + if html_file: + html_file.write(mkdocs_combiner.to_html()) + html_file.close() diff --git a/mkdocs_pandoc/exceptions.py b/mkdocs_combine/exceptions.py similarity index 100% rename from mkdocs_pandoc/exceptions.py rename to mkdocs_combine/exceptions.py diff --git a/mkdocs_pandoc/filters/__init__.py b/mkdocs_combine/filters/__init__.py similarity index 100% rename from mkdocs_pandoc/filters/__init__.py rename to mkdocs_combine/filters/__init__.py diff --git a/mkdocs_combine/filters/admonitions.py b/mkdocs_combine/filters/admonitions.py new file mode 100644 index 0000000..9beab94 --- /dev/null +++ b/mkdocs_combine/filters/admonitions.py @@ -0,0 +1,93 @@ +#!/usr/bin/python +# +# Copyright 2015 Johannes Grassler +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# mdtableconv.py - converts pipe tables to Pandoc's grid tables + +import markdown.extensions.admonition as adm +import markdown.blockparser +from xml.etree import ElementTree as etree + +class AdmonitionFilter(adm.AdmonitionProcessor): + + def __init__(self, encoding='utf-8', tab_length = 4): + self.encoding = encoding + self.tab_length = tab_length + + def blocks(self, lines): + """Groups lines into markdown blocks""" + state = markdown.blockparser.State() + blocks = [] + + # We use three states: start, ``` and '\n' + state.set('start') + + # index of current block + currblock = 0 + + for line in lines: + line += '\n' + if state.isstate('start'): + if line[:3] == '```': + state.set('```') + else: + state.set('\n') + blocks.append('') + currblock = len(blocks) - 1 + else: + marker = line[:3] # Will capture either '\n' or '```' + if state.isstate(marker): + state.reset() + blocks[currblock] += line + + return blocks + + def run(self, lines): + """Filter method: Passes all blocks through convert_admonition() and returns a list of lines.""" + ret = [] + + blocks = self.blocks(lines) + for block in blocks: + + ret.extend(self.convert_admonition(block)) + + return ret + + def convert_admonition(self, block): + lines = block.split('\n') + + if self.RE.search(block): + + m = self.RE.search(lines.pop(0)) + klass, title = self.get_class_and_title(m) + + lines = list(map(lambda x:self.detab(x)[0], lines)) + lines = '\n'.join(lines[:-1]) + + div = etree.Element('div') + div.set('class', '%s %s' % (self.CLASSNAME, klass)) + if title: + p = etree.SubElement(div, 'p') + p.set('class', self.CLASSNAME_TITLE) + p.text = title + + content = etree.SubElement(div, 'p') + content.text = lines + + string = etree.tostring(div).decode(self.encoding) + lines = [string] + lines.append('') + + return lines diff --git a/mkdocs_pandoc/filters/anchors.py b/mkdocs_combine/filters/anchors.py similarity index 100% rename from mkdocs_pandoc/filters/anchors.py rename to mkdocs_combine/filters/anchors.py diff --git a/mkdocs_pandoc/filters/chapterhead.py b/mkdocs_combine/filters/chapterhead.py similarity index 100% rename from mkdocs_pandoc/filters/chapterhead.py rename to mkdocs_combine/filters/chapterhead.py diff --git a/mkdocs_pandoc/filters/exclude.py b/mkdocs_combine/filters/exclude.py similarity index 100% rename from mkdocs_pandoc/filters/exclude.py rename to mkdocs_combine/filters/exclude.py diff --git a/mkdocs_pandoc/filters/headlevels.py b/mkdocs_combine/filters/headlevels.py similarity index 70% rename from mkdocs_pandoc/filters/headlevels.py rename to mkdocs_combine/filters/headlevels.py index 64a4728..4bb94d1 100644 --- a/mkdocs_pandoc/filters/headlevels.py +++ b/mkdocs_combine/filters/headlevels.py @@ -1,4 +1,5 @@ # Copyright 2015 Johannes Grassler +# Copyright 2017 Adam Twardoch # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import re # TODO: Implement handling for Setext style headers. @@ -23,7 +23,7 @@ class HeadlevelFilter(object): def __init__(self, pages): max_offset = 0 - # Determine maximum header level from nesting in mkdocs.yml + # Determine maximum header level from nesting in mkdocs.yml for page in pages: if page['level'] > max_offset: max_offset = page['level'] @@ -32,9 +32,15 @@ def __init__(self, pages): def run(self, lines): + not_in_code_block = True """Filter method""" ret = [] for line in lines: - ret.append(re.sub(r'^#', '#' + ('#' * self.offset), line)) + if '```' in line: + not_in_code_block = not not_in_code_block + if not_in_code_block == True: + line = re.sub(r'^(#+\s)', '#' * self.offset + r'\1', line) + line = re.sub(r'^#######+', '######', line) + ret.append(line) return ret diff --git a/mkdocs_pandoc/filters/images.py b/mkdocs_combine/filters/images.py similarity index 98% rename from mkdocs_pandoc/filters/images.py rename to mkdocs_combine/filters/images.py index a03c819..66502b7 100644 --- a/mkdocs_pandoc/filters/images.py +++ b/mkdocs_combine/filters/images.py @@ -49,7 +49,7 @@ def run(self, lines): if match.group(0) in processed: break # Skip URLs - if re.match('\w+://', match.group(2)): + if re.match(r'\w+://', match.group(2)): break alt = match.group(1) img_name = match.group(2) diff --git a/mkdocs_pandoc/filters/include.py b/mkdocs_combine/filters/include.py similarity index 100% rename from mkdocs_pandoc/filters/include.py rename to mkdocs_combine/filters/include.py diff --git a/mkdocs_combine/filters/math.py b/mkdocs_combine/filters/math.py new file mode 100644 index 0000000..3770740 --- /dev/null +++ b/mkdocs_combine/filters/math.py @@ -0,0 +1,28 @@ +# Copyright 2015 Johannes Grassler +# Copyright 2016 Kergonath +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re + +class MathFilter(object): + r"""Turn the \( \) Markdown math notation into LaTex $$ inlines""" + + def run(self, lines): + """Filter method""" + ret = [] + for line in lines: + ret.append(re.sub(r'\\\((.*)\\\)', r'$\1$', line)) + + return ret diff --git a/mkdocs_combine/filters/metadata.py b/mkdocs_combine/filters/metadata.py new file mode 100644 index 0000000..b5f8425 --- /dev/null +++ b/mkdocs_combine/filters/metadata.py @@ -0,0 +1,34 @@ +# Copyright 2015 Johannes Grassler +# Copyright 2016 Kergonath +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re + +class MetadataFilter(object): + r"""Strip out metadata from the beginning of the file""" + + def run(self, lines): + """Filter method""" + ret = [] + header = True + for line in lines: + if header: + if not re.match(r'^[a-zA-Z\ ]:', line): + header = False + ret.append(line) + else: + ret.append(line) + + return ret diff --git a/mkdocs_pandoc/filters/tables.py b/mkdocs_combine/filters/tables.py similarity index 100% rename from mkdocs_pandoc/filters/tables.py rename to mkdocs_combine/filters/tables.py diff --git a/mkdocs_pandoc/filters/toc.py b/mkdocs_combine/filters/toc.py similarity index 100% rename from mkdocs_pandoc/filters/toc.py rename to mkdocs_combine/filters/toc.py diff --git a/mkdocs_pandoc/filters/xref.py b/mkdocs_combine/filters/xref.py similarity index 86% rename from mkdocs_pandoc/filters/xref.py rename to mkdocs_combine/filters/xref.py index 32d4f9c..5c1786b 100644 --- a/mkdocs_pandoc/filters/xref.py +++ b/mkdocs_combine/filters/xref.py @@ -26,10 +26,10 @@ def run(self, lines): ret = [] for line in lines: while True: - match = re.search(r'\[(.*?)\]\((.*?\.md)\)', line) + match = re.search(r'[^!]\[([^\]]+?)\]\(([^http].*?)\)', line) if match != None: title = match.group(1) - line = re.sub(r'\[.*?\]\(.*?\.md\)', title, line, count=1) + line = re.sub(r'[^!]\[[^\]]+?\]\([^http].*?\)', title, line, count=1) else: break ret.append(line) diff --git a/mkdocs_combine/mkdocs_combiner.py b/mkdocs_combine/mkdocs_combiner.py new file mode 100644 index 0000000..06cd21e --- /dev/null +++ b/mkdocs_combine/mkdocs_combiner.py @@ -0,0 +1,283 @@ +# Copyright 2015 Johannes Grassler +# Copyright 2017 Adam Twardoch +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import codecs +import os +import sys + +import markdown +import mkdocs.config +import mkdocs.utils + +import mkdocs_combine.filters.anchors +import mkdocs_combine.filters.chapterhead +import mkdocs_combine.filters.exclude +import mkdocs_combine.filters.headlevels +import mkdocs_combine.filters.images +import mkdocs_combine.filters.include +import mkdocs_combine.filters.math +import mkdocs_combine.filters.metadata +import mkdocs_combine.filters.tables +import mkdocs_combine.filters.toc +import mkdocs_combine.filters.xref +import mkdocs_combine.filters.admonitions +from mkdocs_combine.exceptions import FatalError + + +class MkDocsCombiner: + """Top level converter class. Instantiate separately for each mkdocs.yml.""" + + def __init__(self, **kwargs): + self.config_file = kwargs.get('config_file', 'mkdocs.yml') + self.encoding = kwargs.get('encoding', 'utf-8') + self.exclude = kwargs.get('exclude', None) + self.filter_tables = kwargs.get('filter_tables', True) + self.filter_xrefs = kwargs.get('filter_xrefs', True) + self.image_ext = kwargs.get('image_ext', None) + self.strip_anchors = kwargs.get('strip_anchors', True) + self.strip_metadata = kwargs.get('strip_metadata', True) + self.convert_math = kwargs.get('convert_math', True) + self.width = kwargs.get('width', 100) + self.add_chapter_heads = kwargs.get('add_chapter_heads', True) + self.add_page_break = kwargs.get('add_page_break', False) + self.increase_heads = kwargs.get('increase_heads', True) + self.convert_admonition_md = kwargs.get('convert_admonition_md', False) + self.verbose = kwargs.get('verbose', False) + self.combined_md_lines = [] + self.html_bare = u'' + self.html = u'' + + self.log('Arguments: ' + str(kwargs)) + + try: + cfg = codecs.open(self.config_file, 'r', self.encoding) + except IOError as e: + raise FatalError("Couldn't open %s for reading: %s" % (self.config_file, + e.strerror), 1) + + self.config = mkdocs.config.load_config(config_file=self.config_file) + + if not u'docs_dir' in self.config: + self.config[u'docs_dir'] = u'docs' + + if not u'site_dir' in self.config: + self.config[u'site_dir'] = u'site' + + # Set filters depending on markdown extensions from config + # Defaults first... + self.filter_include = False + self.filter_toc = False + + # ...then override defaults based on config, if any: + + if u'markdown_extensions' in self.config: + for ext in self.config[u'markdown_extensions']: + extname = u'' + # extension entries may be dicts (for passing extension parameters) + if type(ext) is dict: + extname = list(ext.keys())[0].split(u'(')[0] + if type(ext) is str or type(ext) is self.encoding: + extname = ext + + if extname == u'markdown_include.include': + self.filter_include = True + if extname == u'toc': + self.filter_toc = True + + cfg.close() + + def log(self, message): + """Print messages if verbose mode is activated""" + if(self.verbose): + print('[mkdocscombine] ' + message) + + def flatten_pages(self, pages, level=1): + """Recursively flattens pages data structure into a one-dimensional data structure""" + flattened = [] + + if sys.version_info.major < 3: + str_type = (str, self.encoding, unicode) + else: + str_type = (str, self.encoding) + + for page in pages: + if type(page) in str_type: + flattened.append( + { + u'file' : page, + u'title': u'%s {: .page-title}' % mkdocs.utils.filename_to_title(page), + u'level': level, + }) + if type(page) is list: + flattened.append( + { + u'file' : page[0], + u'title': u'%s {: .page-title}' % page[1], + u'level': level, + }) + if type(page) is dict: + if type(list(page.values())[0]) in (str, self.encoding): + flattened.append( + { + u'file' : list(page.values())[0], + u'title': u'%s {: .page-title}' % list(page.keys())[0], + u'level': level, + }) + if type(list(page.values())[0]) is list: + # Add the parent section + flattened.append( + { + u'file' : None, + u'title': u'%s {: .page-title}' % list(page.keys())[0], + u'level': level, + }) + # Add children sections + flattened.extend( + self.flatten_pages( + list(page.values())[0], + level + 1) + ) + return flattened + + def combine(self): + """User-facing conversion method. Returns combined document as a list of lines.""" + lines = [] + + if(self.verbose): + self.log('Running mkdocs-combine in verbose mode') + + self.log(u'Configuration: {0}'.format(self.config)) + + pages = [] + if u'pages' in self.config and self.config[u'pages'] is not None: + pages = self.flatten_pages(self.config[u'pages']) + self.log('Pages: ') + else: + if u'nav' in self.config and self.config[u'nav'] is not None: + pages = self.flatten_pages(self.config[u'nav']) + self.log('Pages (using "nav" property): ') + + f_exclude = mkdocs_combine.filters.exclude.ExcludeFilter( + exclude=self.exclude) + + f_include = mkdocs_combine.filters.include.IncludeFilter( + base_path=self.config[u'docs_dir'], + encoding=self.encoding) + + # First, do the processing that must be done on a per-file basis: + # Adjust header levels, insert chapter headings and adjust image paths. + + f_headlevel = mkdocs_combine.filters.headlevels.HeadlevelFilter(pages) + + for page in pages: + lines_tmp = [] + if page[u'file']: + fname = os.path.join(self.config[u'docs_dir'], page[u'file']) + try: + with codecs.open(fname, 'r', self.encoding) as p: + for line in p.readlines(): + lines_tmp.append(line.rstrip()) + except IOError as e: + raise FatalError("Couldn't open %s for reading: %s" % (fname, + e.strerror), 1) + + f_chapterhead = mkdocs_combine.filters.chapterhead.ChapterheadFilter( + headlevel=page[u'level'], + title=page[u'title'] + ) + + f_image = mkdocs_combine.filters.images.ImageFilter( + filename=page[u'file'], + image_path=self.config[u'site_dir'], + image_ext=self.image_ext) + + if self.exclude: + lines_tmp = f_exclude.run(lines_tmp) + + if self.filter_include: + lines_tmp = f_include.run(lines_tmp) + + lines_tmp = mkdocs_combine.filters.metadata.MetadataFilter().run(lines_tmp) + if self.increase_heads: + lines_tmp = f_headlevel.run(lines_tmp) + if self.add_chapter_heads: + lines_tmp = f_chapterhead.run(lines_tmp) + lines_tmp = f_image.run(lines_tmp) + lines.extend(lines_tmp) + # Add an empty line between pages to prevent text from a previous + # file from butting up against headers in a subsequent file. + lines.append('') + if self.add_page_break: + lines.append('\\newpage') + lines.append('') + + # Strip anchor tags + if self.strip_anchors: + self.log('Stripping anchor tags') + lines = mkdocs_combine.filters.anchors.AnchorFilter().run(lines) + + # Convert math expressions + if self.convert_math: + self.log('Converting math expressions') + lines = mkdocs_combine.filters.math.MathFilter().run(lines) + + # Fix cross references + if self.filter_xrefs: + self.log('Fixing cross references') + lines = mkdocs_combine.filters.xref.XrefFilter().run(lines) + + # Convert admonitions already for Markdown output + if self.convert_admonition_md: + self.log('Converting admonitions to HTML in Markdown output') + lines = mkdocs_combine.filters.admonitions.AdmonitionFilter().run(lines) + + if self.filter_toc: + self.log('Creating TOC') + lines = mkdocs_combine.filters.toc.TocFilter().run(lines) + + if self.filter_tables: + self.log('Filtering tables') + lines = mkdocs_combine.filters.tables.TableFilter().run(lines) + + self.combined_md_lines = lines + return (self.combined_md_lines) + + def to_html(self): + md = u"\n".join(self.combined_md_lines) + mkdocs_extensions = self.config.get(u'markdown_extensions', []) + extensions = ['markdown.extensions.attr_list'] + extension_configs = self.config.get(u'mdx_configs', []) + for ext in mkdocs_extensions: + if type(ext) is str or type(ext) is self.encoding: + extname = str(ext) + extensions.append(extname) + elif type(ext) is dict: + extname = str(ext.keys()[0]) + extensions.append(extname) + extension_configs[extname] = ext[extname] + self.html_bare = markdown.markdown(md, extensions=extensions, + extension_configs=extension_configs, + output_format='html5') + self.html = u""" + + + + + + {} + + + """.format(self.html_bare) + return (self.html) diff --git a/mkdocs_pandoc/__init__.py b/mkdocs_pandoc/__init__.py deleted file mode 100644 index 8b41ea8..0000000 --- a/mkdocs_pandoc/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from mkdocs_pandoc.pandoc_converter import PandocConverter diff --git a/mkdocs_pandoc/cli/mkdocs2pandoc.py b/mkdocs_pandoc/cli/mkdocs2pandoc.py deleted file mode 100644 index 5cb12a2..0000000 --- a/mkdocs_pandoc/cli/mkdocs2pandoc.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/python -# -# Copyright 2015 Johannes Grassler -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# mkdocs2pandoc - converts mkdocs documentation into a single pandoc markdown document - -from __future__ import print_function - -import argparse -import codecs -import sys - -import mkdocs.config -from mkdocs_pandoc.exceptions import FatalError - -import mkdocs_pandoc - -def main(): - opts = argparse.ArgumentParser( - description="mdtableconv.py " + - "- converts pipe delimited tables to Pandoc's grid tables") - - opts.add_argument('-e', '--encoding', default='utf-8', - help="Set encoding for input files (default: utf-8)") - - opts.add_argument('-f', '--config-file', default='mkdocs.yml', - help="mkdocs configuration file to use") - - opts.add_argument('-i', '--image-ext', default=None, - help="Extension to substitute image extensions by (default: no replacement)") - - opts.add_argument('-w', '--width', default=100, - help="Width of generated grid tables in characters (default: 100)") - - opts.add_argument('-x', '--exclude', default=None, action='append', - help="Include files to skip (default: none)") - - opts.add_argument('-o', '--outfile', default=None, - help="File to write finished pandoc document to (default: STDOUT)") - - args = opts.parse_args() - - # Python 2 and Python 3 have mutually incompatible approaches to writing - # encoded data to sys.stdout, so we'll have to pick the appropriate one. - - if sys.version_info.major == 2: - out = codecs.getwriter(args.encoding)(sys.stdout) - elif sys.version_info.major >= 3: - out = open(sys.stdout.fileno(), mode='w', encoding=args.encoding, buffering=1) - - try: - pconv = mkdocs_pandoc.PandocConverter( - config_file=args.config_file, - exclude=args.exclude, - image_ext=args.image_ext, - width=args.width, - encoding=args.encoding, - ) - except FatalError as e: - print(e.message, file=sys.stderr) - return(e.status) - if args.outfile: - try: - out = codecs.open(args.outfile, 'w', encoding=args.encoding) - except IOError as e: - print("Couldn't open %s for writing: %s" % (args.outfile, e.strerror), file=sys.stderr) - - for line in pconv.convert(): - out.write(line + '\n') - out.close() diff --git a/mkdocs_pandoc/pandoc_converter.py b/mkdocs_pandoc/pandoc_converter.py deleted file mode 100644 index 7b89a8b..0000000 --- a/mkdocs_pandoc/pandoc_converter.py +++ /dev/null @@ -1,167 +0,0 @@ -import mkdocs_pandoc.filters.anchors -import mkdocs_pandoc.filters.chapterhead -import mkdocs_pandoc.filters.headlevels -import mkdocs_pandoc.filters.images -import mkdocs_pandoc.filters.exclude -import mkdocs_pandoc.filters.include -import mkdocs_pandoc.filters.tables -import mkdocs_pandoc.filters.toc -import mkdocs_pandoc.filters.xref - -from mkdocs_pandoc.exceptions import FatalError - -import codecs -import os -import yaml - - -class PandocConverter: - """Top level converter class. Instatiate separately for each mkdocs.yml.""" - - def __init__(self, **kwargs): - self.config_file = kwargs.get('config_file', 'mkdocs.yml') - self.encoding = kwargs.get('encoding', 'utf-8') - self.exclude = kwargs.get('exclude', None) - self.filter_tables = kwargs.get('filter_tables', True) - self.filter_xrefs = kwargs.get('filter_xrefs', True) - self.image_ext = kwargs.get('image_ext', None) - self.strip_anchors = kwargs.get('strip_anchors', True) - self.width = kwargs.get('width', 100) - - try: - cfg = codecs.open(self.config_file, 'r', self.encoding) - except IOError as e: - raise FatalError("Couldn't open %s for reading: %s" % (self.config_file, - e.strerror), 1) - - self.config = yaml.load(cfg) - - if not 'docs_dir' in self.config: - self.config['docs_dir'] = 'docs' - - if not 'site_dir' in self.config: - self.config['site_dir'] = 'site' - - # Set filters depending on markdown extensions from config - # Defaults first... - self.filter_include = False - self.filter_toc = False - - # ...then override defaults based on config, if any: - - if 'markdown_extensions' in self.config: - for ext in self.config['markdown_extensions']: - extname = '' - # extension entries may be dicts (for passing extension parameters) - if type(ext) is dict: - extname = list(ext.keys())[0] - if type(ext) is str: - extname = ext - - if extname == 'markdown_include.include': - self.filter_include = True - if extname == 'toc': - self.filter_toc = True - - cfg.close() - - def flatten_pages(self, pages, level=1): - """Recursively flattens pages data structure into a one-dimensional data structure""" - flattened = [] - - for page in pages: - if type(page) is list: - flattened.append( - { - 'file': page[0], - 'title': page[1], - 'level': level, - }) - if type(page) is dict: - if type(list(page.values())[0]) is str: - flattened.append( - { - 'file': list(page.values())[0], - 'title': list(page.keys())[0], - 'level': level, - }) - if type(list(page.values())[0]) is list: - flattened.extend( - self.flatten_pages( - list(page.values())[0], - level + 1) - ) - - - return flattened - - def convert(self): - """User-facing conversion method. Returns pandoc document as a list of - lines.""" - lines = [] - - pages = self.flatten_pages(self.config['pages']) - - f_exclude = mkdocs_pandoc.filters.exclude.ExcludeFilter( - exclude=self.exclude) - - f_include = mkdocs_pandoc.filters.include.IncludeFilter( - base_path=self.config['docs_dir'], - encoding=self.encoding) - - # First, do the processing that must be done on a per-file basis: - # Adjust header levels, insert chapter headings and adjust image paths. - - f_headlevel = mkdocs_pandoc.filters.headlevels.HeadlevelFilter(pages) - - for page in pages: - fname = os.path.join(self.config['docs_dir'], page['file']) - try: - p = codecs.open(fname, 'r', self.encoding) - except IOError as e: - raise FatalError("Couldn't open %s for reading: %s" % (fname, - e.strerror), 1) - f_chapterhead = mkdocs_pandoc.filters.chapterhead.ChapterheadFilter( - headlevel=page['level'], - title=page['title'] - ) - - f_image = mkdocs_pandoc.filters.images.ImageFilter( - filename=page['file'], - image_path=self.config['site_dir'], - image_ext=self.image_ext) - - lines_tmp = [] - - for line in p.readlines(): - lines_tmp.append(line.rstrip()) - - if self.exclude: - lines_tmp = f_exclude.run(lines_tmp) - - if self.filter_include: - lines_tmp = f_include.run(lines_tmp) - - lines_tmp = f_headlevel.run(lines_tmp) - lines_tmp = f_chapterhead.run(lines_tmp) - lines_tmp = f_image.run(lines_tmp) - lines.extend(lines_tmp) - # Add an empty line between pages to prevent text from a previous - # file from butting up against headers in a subsequent file. - lines.append('') - - # Strip anchor tags - if self.strip_anchors: - lines = mkdocs_pandoc.filters.anchors.AnchorFilter().run(lines) - - # Fix cross references - if self.filter_xrefs: - lines = mkdocs_pandoc.filters.xref.XrefFilter().run(lines) - - if self.filter_toc: - lines = mkdocs_pandoc.filters.toc.TocFilter().run(lines) - - if self.filter_tables: - lines = mkdocs_pandoc.filters.tables.TableFilter().run(lines) - - return(lines) diff --git a/py-requirements.txt b/py-requirements.txt new file mode 100644 index 0000000..49ec75f --- /dev/null +++ b/py-requirements.txt @@ -0,0 +1,5 @@ +Markdown>=3.0.1 +mkdocs>=1.0.4 +markdown-include>=0.5.1 +pytest +pytest-cov diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..cad4da7 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +addopts = --cov=mkdocs_combine --cov-report=term-missing diff --git a/setup.py b/setup.py index 68dcf29..6db01b6 100644 --- a/setup.py +++ b/setup.py @@ -1,57 +1,89 @@ #!/usr/bin/env/python -from setuptools import setup, find_packages -from codecs import open from os import path -here = path.abspath(path.dirname(__file__)) +from setuptools import find_packages, setup long_description = ( - "mkdocs_pandoc is a library of preprocessors that convert mkdocs style markdown " - "(multiple files, with the document structure defined in the mkdocs " - "configuration file mkdocs.yml) into a single markdown document digestible by " - "pandoc. It ships with the command line frontend tool mkdocs2pandoc as its primary " - "user interface." - ) + "mkdocs_combine is a library that combines a MkDocs-style Markdown site " + "(multiple files, with the document structure defined in the MkDocs " + "configuration file mkdocs.yml) into a single Markdown document. " + "The resulting document can be processed by pandoc or other Markdown tools." + "The command line frontend tool mkdocscombine is the primary user interface." + "Derived from https://github.com/jgrassler/mkdocs-pandoc/" +) setup( - name='mkdocs-pandoc', + name='mkdocs-combine', - version='0.2.6', + # Versions should comply with PEP440. + version='0.4.0.0', - description='A translator from mkdocs style markdown to pandoc style ' - + 'markdown', + description='Combines a MkDocs Markdown site into a single Markdown file', long_description=long_description, - url='https://github.com/jgrassler/mkdocs-pandoc', + # The project's main homepage. + url='https://github.com/twardoch/mkdocs-combine/', + download_url='https://github.com/twardoch/mkdocs-combine/archive/master.zip', + + # Author details author='Johannes Grassler', author_email='johannes@btw23.de', + maintainer='Adam Twardoch', + maintainer_email='adam+github@twardoch.com', + + # Choose your license license='Apache', # See https://pypi.python.org/pypi?%3Aaction=list_classifiers classifiers=[ + 'Environment :: MacOS X', + "Environment :: Console", + 'Operating System :: MacOS :: MacOS X', + # How mature is this project? Common values are + # 3 - Alpha + # 4 - Beta + # 5 - Production/Stable 'Development Status :: 3 - Alpha', + # Indicate who your project is intended for 'Intended Audience :: End Users/Desktop', 'Intended Audience :: Developers', 'Intended Audience :: Information Technology', 'Intended Audience :: System Administrators', 'Topic :: Documentation', 'Topic :: Text Processing', + 'Topic :: Text Processing :: Filters', + 'Topic :: Text Processing :: Markup', + 'Topic :: Text Processing :: Markup :: HTML', + 'Topic :: Software Development :: Documentation', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Natural Language :: English', + # Pick your license as you wish (should match "license" above) 'License :: OSI Approved :: Apache Software License', + # Specify the Python versions you support here. In particular, ensure + # that you indicate whether you support Python 2, Python 3 or both. 'Programming Language :: Python :: 2.7', ], - keywords='mkdoc markdown pandoc', + # What does your project relate to? + keywords='mkdocs markdown pandoc print inline combine flatten', + # You can just specify the packages manually here if your project is + # simple. Or you can use find_packages(). packages=find_packages(), - install_requires=['mkdocs>=0.14.0', - 'markdown-include>=0.5.1' - ], + # List run-time dependencies here. These will be installed by pip when + # your project is installed. For an analysis of "install_requires" vs pip's + # requirements files see: + # https://packaging.python.org/en/latest/requirements.html + install_requires=['mkdocs>=1.0.4', + 'Markdown>=3.0.1', + 'markdown-include>=0.5.1' + ], entry_points={ 'console_scripts': [ - 'mkdocs2pandoc=mkdocs_pandoc.cli.mkdocs2pandoc:main', + 'mkdocscombine=mkdocs_combine.cli.mkdocscombine:main', ], }, ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..b05912c --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,2 @@ +# This file is intentionally left empty. +# It marks the 'tests' directory as a Python package. diff --git a/tests/test_mkdocs_combiner.py b/tests/test_mkdocs_combiner.py new file mode 100644 index 0000000..cfe6947 --- /dev/null +++ b/tests/test_mkdocs_combiner.py @@ -0,0 +1,230 @@ +import pytest +from mkdocs_combine.mkdocs_combiner import MkDocsCombiner + +# TODO: Create dummy mkdocs.yml and docs/ files for testing + +def test_mkdocs_combiner_instantiation(tmp_path): + """ + Test that MkDocsCombiner can be instantiated. + """ + # Create a dummy mkdocs.yml file + mkdocs_yml_content = """ +site_name: Test Site +docs_dir: docs +nav: + - Home: index.md +""" + mkdocs_yml_file = tmp_path / "mkdocs.yml" + mkdocs_yml_file.write_text(mkdocs_yml_content) + + # Create a dummy docs/index.md file + docs_dir = tmp_path / "docs" + docs_dir.mkdir() + index_md_content = """ +# Home +This is the home page. +""" + index_md_file = docs_dir / "index.md" + index_md_file.write_text(index_md_content) + + try: + combiner = MkDocsCombiner(config_file=str(mkdocs_yml_file)) + assert combiner is not None + except Exception as e: + pytest.fail(f"MkDocsCombiner instantiation failed: {e}") + + +def test_mkdocs_combiner_combine_basic(tmp_path): + """ + Test basic document combination. + """ + # Create a dummy mkdocs.yml file + mkdocs_yml_content = """ +site_name: Test Site +docs_dir: docs +nav: + - Home: index.md + - About: about.md +""" + mkdocs_yml_file = tmp_path / "mkdocs.yml" + mkdocs_yml_file.write_text(mkdocs_yml_content) + + # Create dummy Markdown files + docs_dir = tmp_path / "docs" + docs_dir.mkdir() + index_md_content = """ +# Home +This is the home page. +""".strip() + index_md_file = docs_dir / "index.md" + index_md_file.write_text(index_md_content) + + about_md_content = """ +# About +This is the about page. +""".strip() + about_md_file = docs_dir / "about.md" + about_md_file.write_text(about_md_content) + + combiner = MkDocsCombiner(config_file=str(mkdocs_yml_file)) + combined_lines = combiner.combine() + + expected_lines = [ + '# Home {: .page-title}', + '', + '## Home', # Offset by HeadlevelFilter + 'This is the home page.', + '', + '# About {: .page-title}', + '', + '## About', # Offset by HeadlevelFilter + 'This is the about page.', + '', + ] + assert combined_lines == expected_lines + + +def test_mkdocs_combiner_keep_metadata(tmp_path): + """ + Test document combination with strip_metadata=False. + YAML metadata should be kept. + """ + mkdocs_yml_content = """ +site_name: Test Site +docs_dir: docs +nav: + - Home: index.md +""" + mkdocs_yml_file = tmp_path / "mkdocs.yml" + mkdocs_yml_file.write_text(mkdocs_yml_content) + + docs_dir = tmp_path / "docs" + docs_dir.mkdir() + index_md_content = """--- +title: My Document Title +author: Test Author +--- +# Content Title +This is the actual content. +""".strip() + (docs_dir / "index.md").write_text(index_md_content) + + # increase_heads=True (default), add_chapter_heads=True (default) + # HeadlevelFilter.offset will be 1. + combiner = MkDocsCombiner(config_file=str(mkdocs_yml_file), strip_metadata=False) + combined_lines = combiner.combine() + + expected_lines = [ + '# Home {: .page-title}', # Chapter head + '', + '---', # Metadata kept + 'title: My Document Title', + 'author: Test Author', + '---', + '## Content Title', # Original H1, offset by HeadlevelFilter + 'This is the actual content.', + '', + ] + assert combined_lines == expected_lines + + +def test_mkdocs_combiner_combine_no_chapter_heads(tmp_path): + """ + Test document combination with add_chapter_heads=False. + Titles from mkdocs.yml should not be added. + """ + mkdocs_yml_content = """ +site_name: Test Site +docs_dir: docs +nav: + - Home: index.md + - About: about.md +""" + mkdocs_yml_file = tmp_path / "mkdocs.yml" + mkdocs_yml_file.write_text(mkdocs_yml_content) + + docs_dir = tmp_path / "docs" + docs_dir.mkdir() + index_md_content = """ +# Home Content +This is the home page. +""".strip() + (docs_dir / "index.md").write_text(index_md_content) + + about_md_content = """ +# About Content +This is the about page. +""".strip() + (docs_dir / "about.md").write_text(about_md_content) + + # increase_heads is True by default, so content headers will be offset. + # HeadlevelFilter.offset will be 1. + combiner = MkDocsCombiner(config_file=str(mkdocs_yml_file), add_chapter_heads=False) + combined_lines = combiner.combine() + + expected_lines = [ + '## Home Content', # Original H1, offset by HeadlevelFilter + 'This is the home page.', + '', + '## About Content', # Original H1, offset by HeadlevelFilter + 'This is the about page.', + '', + ] + assert combined_lines == expected_lines + + +def test_mkdocs_combiner_combine_no_increase_heads(tmp_path): + """ + Test document combination with increase_heads=False. + Header levels in content should not change. + """ + mkdocs_yml_content = """ +site_name: Test Site +docs_dir: docs +nav: + - Page1: page1.md + - Section: + - Page2: page2.md +""" + mkdocs_yml_file = tmp_path / "mkdocs.yml" + mkdocs_yml_file.write_text(mkdocs_yml_content) + + docs_dir = tmp_path / "docs" + docs_dir.mkdir() + page1_md_content = """ +# Page 1 Title +Content of page 1. +## Subheading +""".strip() + (docs_dir / "page1.md").write_text(page1_md_content) + + page2_md_content = """ +# Page 2 Title +Content of page 2. +### SubSubheading +""".strip() + (docs_dir / "page2.md").write_text(page2_md_content) + + combiner = MkDocsCombiner(config_file=str(mkdocs_yml_file), increase_heads=False) + combined_lines = combiner.combine() + + # Expected: Chapter titles added, original headers unchanged. + # HeadlevelFilter.offset would be 2 for Page2 if increase_heads was True. + expected_lines = [ + '# Page1 {: .page-title}', + '', + '# Page 1 Title', + 'Content of page 1.', + '## Subheading', + '', + '# Section {: .page-title}', # This is a section title, not a file + '', + '', # Empty line for the section itself + '## Page2 {: .page-title}', # page title for page2, level 2 + '', + '# Page 2 Title', + 'Content of page 2.', + '### SubSubheading', + '', + ] + assert combined_lines == expected_lines