Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sphinx_llms_txt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def build_finished(app: Sphinx, exception):
"llms_txt_full_max_size": app.config.llms_txt_full_max_size,
"llms_txt_directives": app.config.llms_txt_directives,
"llms_txt_exclude": app.config.llms_txt_exclude,
"llms_txt_rm_directives": app.config.llms_txt_rm_directives,
"html_baseurl": getattr(app.config, "html_baseurl", ""),
}
_manager.set_config(config)
Expand Down Expand Up @@ -86,6 +87,7 @@ def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value("llms_txt_title", None, "env")
app.add_config_value("llms_txt_summary", None, "env")
app.add_config_value("llms_txt_exclude", [], "env")
app.add_config_value("llms_txt_rm_directives", False, "env")

# Connect to Sphinx events
app.connect("doctree-resolved", doctree_resolved)
Expand Down
25 changes: 25 additions & 0 deletions sphinx_llms_txt/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,33 @@ def process_content(self, content: str, source_path: Path) -> str:
# Then process path directives (image, figure, etc.)
content = self._process_path_directives(content, source_path)

# Remove directives if configured to do so
if self.config.get("llms_txt_rm_directives", False):
content = self._remove_directives(content)

return content

def _remove_directives(self, content: str) -> str:
"""Remove directives from content.

Args:
content: The source content from which to remove directives

Returns:
Content with all directives removed
"""
# Match any directive pattern (starting with .. followed by ::)
directive_pattern = re.compile(r'^\s*\.\.\s+[\w\-]+::.*?$(?:\n\s+.*?$)*',
re.MULTILINE | re.DOTALL)

# Replace all directives with an empty string
processed_content = directive_pattern.sub('', content)

# Clean up any consecutive blank lines that might result from directive removal
processed_content = re.sub(r'\n{3,}', '\n\n', processed_content)

return processed_content

def _extract_relative_document_path(
self, source_path: Path
) -> Tuple[Optional[str], Optional[str], Optional[List[str]]]:
Expand Down
41 changes: 41 additions & 0 deletions tests/test_llms_txt.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,44 @@ def test_write_verbose_info_with_baseurl(tmp_path):

assert "- [Home Page](https://example.org/index.html)" in content
assert "- [About Us](https://example.org/about.html)" in content


def test_remove_directives():
"""Test removing directives from content."""
# Create a processor with remove_directives enabled
config = {"llms_txt_rm_directives": True}
processor = DocumentProcessor(config)

# Test content with various directives
content = """This is a test document.

.. image:: /path/to/image.jpg
:alt: An example image
:width: 100%

This is a paragraph after the image.

.. note::
This is a note.

.. code-block:: python

def hello_world():
print("Hello, world!")

Final paragraph."""

processed_content = processor._remove_directives(content)

# Check that directives are removed
assert ".. image::" not in processed_content
assert ".. note::" not in processed_content
assert ".. code-block::" not in processed_content

# Check that regular content is preserved
assert "This is a test document." in processed_content
assert "This is a paragraph after the image." in processed_content
assert "Final paragraph." in processed_content

# Check that there are no excessive blank lines
assert "\n\n\n" not in processed_content
Loading