Skip to content

Commit e0df890

Browse files
committed
♻️ REFACTOR: Move parsing to separate module
add add more parsing validation
1 parent 8039ea2 commit e0df890

22 files changed

+255
-222
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ The ToC file is parsed to a `SiteMap`, which is a `MutableMapping` subclass, wit
283283

284284
```python
285285
import yaml
286-
from sphinx_external_toc.api import parse_toc_yaml
286+
from sphinx_external_toc.parsing import parse_toc_yaml
287287
path = "path/to/_toc.yml"
288288
site_map = parse_toc_yaml(path)
289289
yaml.dump(site_map.as_json())

sphinx_external_toc/api.py

+3-217
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,10 @@
1-
""" """
2-
from collections.abc import Mapping, MutableMapping
3-
from pathlib import Path
4-
from typing import Any, Dict, Iterator, List, Optional, Sequence, Set, Tuple, Union
1+
"""Defines the `SiteMap` object, for storing the parsed ToC."""
2+
from collections.abc import MutableMapping
3+
from typing import Any, Dict, Iterator, List, Optional, Set, Union
54

65
import attr
7-
import yaml
86
from attr.validators import deep_iterable, instance_of, optional
97

10-
FILE_KEY = "file"
11-
GLOB_KEY = "glob"
12-
URL_KEY = "url"
13-
148

159
class FileItem(str):
1610
"""A document path in a toctree list.
@@ -145,211 +139,3 @@ def as_json(
145139
assert meta_key not in dct
146140
dct[meta_key] = self.meta
147141
return dct
148-
149-
150-
class MalformedError(Exception):
151-
"""Raised if toc file is malformed."""
152-
153-
154-
def parse_toc_yaml(path: Union[str, Path], encoding: str = "utf8") -> SiteMap:
155-
"""Parse the ToC file."""
156-
with Path(path).open(encoding=encoding) as handle:
157-
data = yaml.safe_load(handle)
158-
return parse_toc_data(data)
159-
160-
161-
def parse_toc_data(data: Dict[str, Any]) -> SiteMap:
162-
"""Parse a dictionary of the ToC."""
163-
if not isinstance(data, Mapping):
164-
raise MalformedError(f"toc is not a mapping: {type(data)}")
165-
166-
defaults: Dict[str, Any] = data.get("defaults", {})
167-
168-
doc_item, docs_list = _parse_doc_item(data, defaults, "/", file_key="root")
169-
170-
site_map = SiteMap(root=doc_item, meta=data.get("meta"))
171-
172-
_parse_docs_list(docs_list, site_map, defaults, "/")
173-
174-
return site_map
175-
176-
177-
def _parse_doc_item(
178-
data: Dict[str, Any], defaults: Dict[str, Any], path: str, file_key: str = FILE_KEY
179-
) -> Tuple[DocItem, Sequence[Dict[str, Any]]]:
180-
"""Parse a single doc item."""
181-
if file_key not in data:
182-
raise MalformedError(f"'{file_key}' key not found: '{path}'")
183-
if "sections" in data:
184-
# this is a shorthand for defining a single part
185-
if "parts" in data:
186-
raise MalformedError(f"Both 'sections' and 'parts' found: '{path}'")
187-
parts_data = [{"sections": data["sections"]}]
188-
else:
189-
parts_data = data.get("parts", [])
190-
191-
if not isinstance(parts_data, Sequence):
192-
raise MalformedError(f"'parts' not a sequence: '{path}'")
193-
194-
_known_link_keys = {FILE_KEY, GLOB_KEY, URL_KEY}
195-
196-
parts = []
197-
for part_idx, part in enumerate(parts_data):
198-
199-
# generate sections list
200-
sections: List[Union[GlobItem, FileItem, UrlItem]] = []
201-
for sect_idx, section in enumerate(part["sections"]):
202-
link_keys = _known_link_keys.intersection(section)
203-
if not link_keys:
204-
raise MalformedError(
205-
"toctree section does not contain one of "
206-
f"{_known_link_keys!r}: {path}{part_idx}/{sect_idx}"
207-
)
208-
if not len(link_keys) == 1:
209-
raise MalformedError(
210-
"toctree section contains incompatible keys "
211-
f"{link_keys!r}: {path}{part_idx}/{sect_idx}"
212-
)
213-
214-
if link_keys == {FILE_KEY}:
215-
sections.append(FileItem(section[FILE_KEY]))
216-
elif link_keys == {GLOB_KEY}:
217-
if "sections" in section or "parts" in section:
218-
raise MalformedError(
219-
"toctree section contains incompatible keys "
220-
f"{GLOB_KEY} and parts/sections: {path}{part_idx}/{sect_idx}"
221-
)
222-
sections.append(GlobItem(section[GLOB_KEY]))
223-
elif link_keys == {URL_KEY}:
224-
if "sections" in section or "parts" in section:
225-
raise MalformedError(
226-
"toctree section contains incompatible keys "
227-
f"{URL_KEY} and parts/sections: {path}{part_idx}/{sect_idx}"
228-
)
229-
sections.append(UrlItem(section[URL_KEY], section.get("title")))
230-
231-
# generate toc key-word arguments
232-
keywords = {}
233-
for key in ("caption", "numbered", "titlesonly", "reversed"):
234-
if key in part:
235-
keywords[key] = part[key]
236-
elif key in defaults:
237-
keywords[key] = defaults[key]
238-
239-
# TODO this is a hacky fix for the fact that sphinx logs a warning
240-
# for nested toctrees, see:
241-
# sphinx/environment/collectors/toctree.py::TocTreeCollector::assign_section_numbers::_walk_toctree
242-
if keywords.get("numbered") and path != "/":
243-
keywords.pop("numbered")
244-
245-
try:
246-
toc_item = TocItem(sections=sections, **keywords)
247-
except TypeError as exc:
248-
raise MalformedError(f"toctree validation: {path}{part_idx}") from exc
249-
parts.append(toc_item)
250-
251-
try:
252-
doc_item = DocItem(docname=data[file_key], title=data.get("title"), parts=parts)
253-
except TypeError as exc:
254-
raise MalformedError(f"doc validation: {path}") from exc
255-
256-
docs_data = [
257-
section
258-
for part in parts_data
259-
for section in part["sections"]
260-
if FILE_KEY in section
261-
]
262-
263-
return (
264-
doc_item,
265-
docs_data,
266-
)
267-
268-
269-
def _parse_docs_list(
270-
docs_list: Sequence[Dict[str, Any]],
271-
site_map: SiteMap,
272-
defaults: Dict[str, Any],
273-
path: str,
274-
):
275-
"""Parse a list of docs."""
276-
for doc_data in docs_list:
277-
docname = doc_data["file"]
278-
if docname in site_map:
279-
raise MalformedError(f"document file used multiple times: {docname}")
280-
child_path = f"{path}{docname}/"
281-
child_item, child_docs_list = _parse_doc_item(doc_data, defaults, child_path)
282-
site_map[docname] = child_item
283-
284-
_parse_docs_list(child_docs_list, site_map, defaults, child_path)
285-
286-
287-
def create_toc_dict(site_map: SiteMap, *, skip_defaults: bool = True) -> Dict[str, Any]:
288-
"""Create the Toc dictionary from a site-map."""
289-
data = _docitem_to_dict(
290-
site_map.root, site_map, skip_defaults=skip_defaults, file_key="root"
291-
)
292-
if site_map.meta:
293-
data["meta"] = site_map.meta.copy()
294-
return data
295-
296-
297-
def _docitem_to_dict(
298-
doc_item: DocItem,
299-
site_map: SiteMap,
300-
*,
301-
skip_defaults: bool = True,
302-
file_key: str = FILE_KEY,
303-
parsed_docnames: Optional[Set[str]] = None,
304-
) -> Dict[str, Any]:
305-
306-
# protect against infinite recursion
307-
parsed_docnames = parsed_docnames or set()
308-
if doc_item.docname in parsed_docnames:
309-
raise RecursionError(f"{doc_item.docname!r} in site-map multiple times")
310-
parsed_docnames.add(doc_item.docname)
311-
312-
data: Dict[str, Any] = {}
313-
314-
data[file_key] = doc_item.docname
315-
if doc_item.title is not None:
316-
data["title"] = doc_item.title
317-
318-
if not doc_item.parts:
319-
return data
320-
321-
def _parse_section(item):
322-
if isinstance(item, FileItem):
323-
if item in site_map:
324-
return _docitem_to_dict(
325-
site_map[item],
326-
site_map,
327-
skip_defaults=skip_defaults,
328-
parsed_docnames=parsed_docnames,
329-
)
330-
return {FILE_KEY: str(item)}
331-
if isinstance(item, GlobItem):
332-
return {GLOB_KEY: str(item)}
333-
if isinstance(item, UrlItem):
334-
if item.title is not None:
335-
return {URL_KEY: item.url, "title": item.title}
336-
return {URL_KEY: item.url}
337-
raise TypeError(item)
338-
339-
data["parts"] = []
340-
fields = attr.fields_dict(TocItem)
341-
for part in doc_item.parts:
342-
# only add these keys if their value is not the default
343-
part_data = {
344-
key: getattr(part, key)
345-
for key in ("caption", "numbered", "reversed", "titlesonly")
346-
if (not skip_defaults) or getattr(part, key) != fields[key].default
347-
}
348-
part_data["sections"] = [_parse_section(s) for s in part.sections]
349-
data["parts"].append(part_data)
350-
351-
# apply shorthand if possible
352-
if len(data["parts"]) == 1 and list(data["parts"][0]) == ["sections"]:
353-
data["sections"] = data.pop("parts")[0]["sections"]
354-
355-
return data

sphinx_external_toc/cli.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import yaml
55

66
from sphinx_external_toc import __version__
7-
from sphinx_external_toc.api import create_toc_dict, parse_toc_yaml
7+
from sphinx_external_toc.parsing import create_toc_dict, parse_toc_yaml
88
from sphinx_external_toc.tools import create_site_from_toc, create_site_map_from_path
99

1010

sphinx_external_toc/events.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
from sphinx.util.docutils import SphinxDirective
1515
from sphinx.util.matching import Matcher, patfilter, patmatch
1616

17-
from .api import DocItem, FileItem, GlobItem, SiteMap, UrlItem, parse_toc_yaml
17+
from .api import DocItem, FileItem, GlobItem, SiteMap, UrlItem
18+
from .parsing import parse_toc_yaml
1819

1920
logger = logging.getLogger(__name__)
2021

0 commit comments

Comments
 (0)