diff --git a/pyproject.toml b/pyproject.toml index fc62c31..872c42e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ dependencies = [ "cryptography>=46.0.3", "docling>=2.61.2", "fastapi>=0.121.2", - "httpx>=0.28.1", + "httpx[http2]>=0.28.1", "langchain-community>=0.3.17", "langchain-google-vertexai>=3.2.0", "langchain-litellm>=0.3.0", @@ -27,6 +27,7 @@ dependencies = [ "sentence-transformers>=5.2.0", "sqlalchemy>=2.0.44", "tiktoken>=0.12.0", + "trafilatura>=1.7.0", "transformers>=4.57.1", "uvicorn>=0.38.0", ] diff --git a/ragitect/services/embedding.py b/ragitect/services/embedding.py index 904ffe5..55a9148 100644 --- a/ragitect/services/embedding.py +++ b/ragitect/services/embedding.py @@ -202,6 +202,7 @@ def create_embeddings_model(config: EmbeddingConfig | None = None) -> Embeddings return OpenAIEmbeddings( model=config.model, api_key=SecretStr(config.api_key), + dimensions=config.dimension, ) elif provider == "vertex_ai": diff --git a/ragitect/services/processor/__init__.py b/ragitect/services/processor/__init__.py index e69de29..2fe1813 100644 --- a/ragitect/services/processor/__init__.py +++ b/ragitect/services/processor/__init__.py @@ -0,0 +1,20 @@ +"""Document processors for RAGitect. + +This module provides processors for extracting text/markdown from various sources. +""" + +from ragitect.services.processor.base import BaseDocumentProcessor +from ragitect.services.processor.simple import SimpleProcessor +from ragitect.services.processor.web_url_processor import ( + ContentExtractionError, + URLFetchError, + WebURLProcessor, +) + +__all__ = [ + "BaseDocumentProcessor", + "ContentExtractionError", + "SimpleProcessor", + "URLFetchError", + "WebURLProcessor", +] \ No newline at end of file diff --git a/ragitect/services/processor/web_url_processor.py b/ragitect/services/processor/web_url_processor.py new file mode 100644 index 0000000..e533466 --- /dev/null +++ b/ragitect/services/processor/web_url_processor.py @@ -0,0 +1,193 @@ +"""Web URL Processor - Fetches web pages and converts to clean Markdown. + +This processor handles web URL ingestion by: +1. Fetching HTML content via httpx with proper timeout/HTTP2/redirect configuration +2. Extracting main article content using trafilatura (removes nav, ads, footers) +3. Converting to Markdown format for downstream chunking and embedding + +Usage: + processor = WebURLProcessor() + markdown = await processor.process("https://example.com/article") + +Note: + This processor inherits from BaseDocumentProcessor but overrides with an + async signature. The async process(url: str) method is used for URL fetching. + The sync process(file_bytes, file_name) method raises NotImplementedError. + + Integration with ProcessorFactory happens in Story 5.5. +""" + +import logging +from typing import override + +import httpx +import trafilatura + +from ragitect.services.processor.base import BaseDocumentProcessor + +logger = logging.getLogger(__name__) + + +class URLFetchError(Exception): + """Raised when HTTP request fails (timeout, connection error, 4xx/5xx). + + Attributes: + url: The URL that failed to fetch + message: Descriptive error message including URL and error type + """ + + pass + + +class ContentExtractionError(Exception): + """Raised when content extraction fails (trafilatura returns None). + + Attributes: + url: The URL where content extraction failed + message: Descriptive error message + """ + + pass + + +class WebURLProcessor(BaseDocumentProcessor): + """Processor for fetching web pages and converting to Markdown. + + Inherits from BaseDocumentProcessor but provides an async process(url: str) + method instead of the sync process(file_bytes, file_name) method. + + Implements async web page fetching with: + - 30 second total timeout, 10 second connect timeout (NFR-P4) + - HTTP/2 support for improved performance + - Automatic redirect following + - Connection pooling (max 20 keepalive connections) + + Content extraction uses trafilatura to: + - Extract main article content + - Remove navigation, ads, headers, footers + - Strip potentially malicious elements (scripts, iframes) for security (NFR-S5) + - Capture article metadata (title, author, date) when available + + Example: + >>> processor = WebURLProcessor() + >>> markdown = await processor.process("https://en.wikipedia.org/wiki/Python") + >>> print(markdown[:100]) + # Python (programming language) + ... + """ + + @override + def supported_formats(self) -> list[str]: + """Return list of supported file extensions. + + WebURLProcessor is not file-based, so returns empty list. + URL-based routing is handled separately from file extension routing. + + Returns: + Empty list (not file-based) + """ + return [] + + async def process(self, url: str) -> str: + """Fetch web page and convert to clean Markdown. + + Args: + url: HTTP or HTTPS URL to fetch + + Returns: + Markdown string with main article content extracted + + Raises: + URLFetchError: If HTTP request fails (timeout, connection error, 4xx/5xx) + ContentExtractionError: If content extraction fails (empty page) + """ + logger.info(f"Processing web URL: {url}") + + # Fetch HTML content + html_content = await self._fetch_url(url) + + # Extract main content and convert to Markdown + markdown = self._extract_content(html_content, url) + + logger.info(f"Successfully processed {url} - {len(markdown)} chars extracted") + return markdown + + async def _fetch_url(self, url: str) -> str: + """Fetch HTML content from URL with configured httpx client. + + Args: + url: URL to fetch + + Returns: + HTML content as string + + Raises: + URLFetchError: On timeout, connection error, or HTTP error status + """ + # Configure timeout: 30s total, 10s connect (NFR-P4) + timeout = httpx.Timeout(30.0, connect=10.0) + + # Configure connection limits for pooling (NFR-R3) + limits = httpx.Limits(max_keepalive_connections=20) + + # Set User-Agent to avoid 403 from sites that block automated requests + headers = { + "User-Agent": "Mozilla/5.0 (compatible; RAGitect/1.0; +https://github.com/bhdai/ragitect)" + } + + async with httpx.AsyncClient( + timeout=timeout, + http2=True, # Enable HTTP/2 support + follow_redirects=True, # Auto-follow redirects + limits=limits, # Connection pooling + headers=headers, # Default headers for all requests + ) as client: + try: + response = await client.get(url) + response.raise_for_status() + return response.text + except httpx.TimeoutException as e: + logger.error(f"Timeout fetching {url}: {e}") + raise URLFetchError(f"Timeout fetching {url} (30s limit)") + except httpx.ConnectError as e: + logger.error(f"Connection error fetching {url}: {e}") + raise URLFetchError(f"Connection error fetching {url}: {str(e)}") + except httpx.HTTPStatusError as e: + logger.error(f"HTTP {e.response.status_code} fetching {url}") + raise URLFetchError(f"HTTP {e.response.status_code} fetching {url}") + + def _extract_content(self, html_content: str, url: str) -> str: + """Extract main article content and convert to Markdown. + + Uses trafilatura for: + - Main content extraction (removes nav, ads, footers) + - Script/iframe stripping (NFR-S5 security) + - Direct Markdown output + + Args: + html_content: Raw HTML content + url: Original URL (for error messages) + + Returns: + Markdown string with extracted content + + Raises: + ContentExtractionError: If extraction returns None/empty + """ + # Extract main content with trafilatura + # output_format="markdown" gives us direct Markdown output + markdown = trafilatura.extract( + html_content, + output_format="markdown", + include_comments=False, # Exclude comments + include_tables=True, # Keep tables + include_images=True, # Keep image references + include_links=True, # Keep hyperlinks + no_fallback=False, # Use fallback extraction if main method fails + ) + + if markdown is None: + logger.error(f"Failed to extract content from {url}") + raise ContentExtractionError(f"Failed to extract content from {url}") + + return markdown diff --git a/tests/services/processor/test_web_url_processor.py b/tests/services/processor/test_web_url_processor.py new file mode 100644 index 0000000..3ec6986 --- /dev/null +++ b/tests/services/processor/test_web_url_processor.py @@ -0,0 +1,448 @@ +"""Tests for WebURLProcessor - Web page fetching and markdown extraction + +Red-Green-Refactor TDD: These tests define expected behavior before implementation. +""" + +import pytest +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx + +# Module-level markers as per project-context.md +pytestmark = [pytest.mark.asyncio] + + +class TestWebURLProcessorInterface: + """Test WebURLProcessor class interface and method signatures""" + + def test_class_exists(self): + """WebURLProcessor class should be importable""" + from ragitect.services.processor.web_url_processor import WebURLProcessor + + processor = WebURLProcessor() + assert processor is not None + + def test_inherits_from_base_document_processor(self): + """WebURLProcessor should inherit from BaseDocumentProcessor""" + from ragitect.services.processor.base import BaseDocumentProcessor + from ragitect.services.processor.web_url_processor import WebURLProcessor + + processor = WebURLProcessor() + assert isinstance(processor, BaseDocumentProcessor) + + def test_supported_formats_returns_empty_list(self): + """WebURLProcessor is not file-based, returns empty list""" + from ragitect.services.processor.web_url_processor import WebURLProcessor + + processor = WebURLProcessor() + formats = processor.supported_formats() + assert formats == [] + + async def test_process_method_signature_async(self): + """process() should be async and accept url string""" + from ragitect.services.processor.web_url_processor import WebURLProcessor + + processor = WebURLProcessor() + # This will fail until implemented - verifies async signature + with patch.object(processor, "_fetch_url", new_callable=AsyncMock) as mock_fetch: + mock_fetch.return_value = "Test" + with patch("trafilatura.extract") as mock_extract: + mock_extract.return_value = "# Test" + result = await processor.process("https://example.com") + assert isinstance(result, str) + + +class TestWebURLProcessorFetching: + """Test HTTP fetching functionality""" + + async def test_successful_fetch_returns_markdown(self): + """Successful fetch and extraction returns markdown string""" + from ragitect.services.processor.web_url_processor import WebURLProcessor + + processor = WebURLProcessor() + + mock_html = "

Test Article

Content here.

" + expected_markdown = "# Test Article\n\nContent here." + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_response = MagicMock() + mock_response.text = mock_html + mock_response.raise_for_status = MagicMock() + mock_client.get = AsyncMock(return_value=mock_response) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + mock_client_class.return_value = mock_client + + with patch("trafilatura.extract") as mock_extract: + mock_extract.return_value = expected_markdown + + result = await processor.process("https://example.com/article") + + assert result == expected_markdown + mock_client.get.assert_called_once() + mock_extract.assert_called_once() + + async def test_httpx_client_configured_with_timeouts(self): + """httpx client should have 30s total, 10s connect timeout""" + from ragitect.services.processor.web_url_processor import WebURLProcessor + + processor = WebURLProcessor() + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_response = MagicMock() + mock_response.text = "" + mock_response.raise_for_status = MagicMock() + mock_client.get = AsyncMock(return_value=mock_response) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + mock_client_class.return_value = mock_client + + with patch("trafilatura.extract", return_value="# Test"): + await processor.process("https://example.com") + + # Verify client was created with correct config + call_kwargs = mock_client_class.call_args.kwargs + assert "timeout" in call_kwargs + timeout = call_kwargs["timeout"] + assert timeout.connect == 10.0 + assert timeout.read == 30.0 or timeout.pool == 30.0 + + async def test_httpx_client_has_http2_enabled(self): + """httpx client should have HTTP/2 support enabled""" + from ragitect.services.processor.web_url_processor import WebURLProcessor + + processor = WebURLProcessor() + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_response = MagicMock() + mock_response.text = "" + mock_response.raise_for_status = MagicMock() + mock_client.get = AsyncMock(return_value=mock_response) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + mock_client_class.return_value = mock_client + + with patch("trafilatura.extract", return_value="# Test"): + await processor.process("https://example.com") + + call_kwargs = mock_client_class.call_args.kwargs + assert call_kwargs.get("http2") is True + + async def test_httpx_client_follows_redirects(self): + """httpx client should follow redirects automatically""" + from ragitect.services.processor.web_url_processor import WebURLProcessor + + processor = WebURLProcessor() + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_response = MagicMock() + mock_response.text = "" + mock_response.raise_for_status = MagicMock() + mock_client.get = AsyncMock(return_value=mock_response) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + mock_client_class.return_value = mock_client + + with patch("trafilatura.extract", return_value="# Test"): + await processor.process("https://example.com") + + call_kwargs = mock_client_class.call_args.kwargs + assert call_kwargs.get("follow_redirects") is True + + +class TestWebURLProcessorContentExtraction: + """Test content extraction via trafilatura""" + + async def test_trafilatura_extracts_main_content(self): + """trafilatura should be called with html and return markdown""" + from ragitect.services.processor.web_url_processor import WebURLProcessor + + processor = WebURLProcessor() + + html_with_nav = """ + + +
+

Main Article

+

This is the main content.

+
+ + + """ + expected_markdown = "# Main Article\n\nThis is the main content." + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_response = MagicMock() + mock_response.text = html_with_nav + mock_response.raise_for_status = MagicMock() + mock_client.get = AsyncMock(return_value=mock_response) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + mock_client_class.return_value = mock_client + + with patch("trafilatura.extract") as mock_extract: + mock_extract.return_value = expected_markdown + + result = await processor.process("https://example.com") + + # Verify trafilatura was called with html content + mock_extract.assert_called_once() + call_args = mock_extract.call_args + # First positional arg should be the HTML content + assert call_args[0][0] == html_with_nav + + # Verify result is the markdown + assert result == expected_markdown + + +class TestWebURLProcessorErrorHandling: + """Test error handling for various failure scenarios""" + + async def test_timeout_raises_url_fetch_error(self): + """Timeout should raise URLFetchError with descriptive message""" + from ragitect.services.processor.web_url_processor import ( + WebURLProcessor, + URLFetchError, + ) + + processor = WebURLProcessor() + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_client.get = AsyncMock(side_effect=httpx.TimeoutException("Timeout")) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + mock_client_class.return_value = mock_client + + with pytest.raises(URLFetchError) as exc_info: + await processor.process("https://slow-site.com") + + assert "slow-site.com" in str(exc_info.value) + assert "timeout" in str(exc_info.value).lower() + + async def test_connection_error_raises_url_fetch_error(self): + """Connection error should raise URLFetchError with details""" + from ragitect.services.processor.web_url_processor import ( + WebURLProcessor, + URLFetchError, + ) + + processor = WebURLProcessor() + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_client.get = AsyncMock( + side_effect=httpx.ConnectError("Connection refused") + ) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + mock_client_class.return_value = mock_client + + with pytest.raises(URLFetchError) as exc_info: + await processor.process("https://unreachable.com") + + assert "unreachable.com" in str(exc_info.value) + + async def test_http_404_raises_url_fetch_error(self): + """HTTP 404 should raise URLFetchError with status code""" + from ragitect.services.processor.web_url_processor import ( + WebURLProcessor, + URLFetchError, + ) + + processor = WebURLProcessor() + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_response = MagicMock() + mock_response.status_code = 404 + mock_request = MagicMock() + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + "Not Found", request=mock_request, response=mock_response + ) + mock_client.get = AsyncMock(return_value=mock_response) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + mock_client_class.return_value = mock_client + + with pytest.raises(URLFetchError) as exc_info: + await processor.process("https://example.com/missing") + + assert "404" in str(exc_info.value) + assert "example.com" in str(exc_info.value) + + async def test_http_500_raises_url_fetch_error(self): + """HTTP 500 should raise URLFetchError with status code""" + from ragitect.services.processor.web_url_processor import ( + WebURLProcessor, + URLFetchError, + ) + + processor = WebURLProcessor() + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_response = MagicMock() + mock_response.status_code = 500 + mock_request = MagicMock() + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + "Internal Server Error", request=mock_request, response=mock_response + ) + mock_client.get = AsyncMock(return_value=mock_response) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + mock_client_class.return_value = mock_client + + with pytest.raises(URLFetchError) as exc_info: + await processor.process("https://example.com/error") + + assert "500" in str(exc_info.value) + + async def test_trafilatura_returns_none_raises_content_extraction_error(self): + """Empty extraction should raise ContentExtractionError""" + from ragitect.services.processor.web_url_processor import ( + WebURLProcessor, + ContentExtractionError, + ) + + processor = WebURLProcessor() + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_response = MagicMock() + mock_response.text = "" + mock_response.raise_for_status = MagicMock() + mock_client.get = AsyncMock(return_value=mock_response) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + mock_client_class.return_value = mock_client + + with patch("trafilatura.extract", return_value=None): + with pytest.raises(ContentExtractionError) as exc_info: + await processor.process("https://empty-page.com") + + assert "empty-page.com" in str(exc_info.value) + + async def test_exception_messages_contain_url(self): + """All exception messages should include the URL for debugging""" + from ragitect.services.processor.web_url_processor import ( + WebURLProcessor, + URLFetchError, + ) + + processor = WebURLProcessor() + test_url = "https://test-debugging.example.com/path" + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_client.get = AsyncMock(side_effect=httpx.TimeoutException("Timeout")) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + mock_client_class.return_value = mock_client + + with pytest.raises(URLFetchError) as exc_info: + await processor.process(test_url) + + # URL should be in the error message for debugging + assert "test-debugging.example.com" in str(exc_info.value) + + +class TestWebURLProcessorExceptions: + """Test custom exception classes exist and are properly defined""" + + def test_url_fetch_error_exists(self): + """URLFetchError exception class should exist""" + from ragitect.services.processor.web_url_processor import URLFetchError + + error = URLFetchError("Test error") + assert isinstance(error, Exception) + assert str(error) == "Test error" + + def test_content_extraction_error_exists(self): + """ContentExtractionError exception class should exist""" + from ragitect.services.processor.web_url_processor import ContentExtractionError + + error = ContentExtractionError("Test error") + assert isinstance(error, Exception) + assert str(error) == "Test error" + + +@pytest.mark.integration +class TestWebURLProcessorIntegration: + """Integration tests with real web pages (require network access)""" + + async def test_process_wikipedia_article(self): + """Integration test: fetch real Wikipedia page and extract markdown""" + from ragitect.services.processor.web_url_processor import WebURLProcessor + + processor = WebURLProcessor() + url = "https://en.wikipedia.org/wiki/Python_(programming_language)" + + # Fetch real page + markdown = await processor.process(url) + + # Verify main content present + assert "Python" in markdown + assert "#" in markdown # Has headings + assert len(markdown) > 1000 # Substantial content extracted + + # Verify markdown format + assert any( + marker in markdown for marker in ["#", "##", "###"] + ) # Has markdown headings + + async def test_process_removes_navigation_boilerplate(self): + """Integration test: verify navigation/boilerplate is stripped""" + from ragitect.services.processor.web_url_processor import WebURLProcessor + + processor = WebURLProcessor() + url = "https://en.wikipedia.org/wiki/Python_(programming_language)" + + markdown = await processor.process(url) + markdown_lower = markdown.lower() + + # These Wikipedia navigation elements should NOT be in extracted content + # Note: Some may appear in actual article text, so we check for common nav patterns + navigation_patterns = [ + "jump to navigation", + "jump to search", + "personal tools", + "[edit]", # Wikipedia edit links + ] + + # At least most navigation patterns should be absent + nav_found = sum(1 for p in navigation_patterns if p in markdown_lower) + assert nav_found <= 1, f"Too many nav patterns found: {nav_found}" + + async def test_markdown_compatible_with_chunking(self): + """Integration test: verify markdown works with DocumentProcessor chunking""" + from ragitect.services.processor.web_url_processor import WebURLProcessor + from ragitect.services.document_processor import split_markdown_document + + processor = WebURLProcessor() + url = "https://en.wikipedia.org/wiki/Python_(programming_language)" + + # Fetch and extract + markdown = await processor.process(url) + + # Test with existing chunker + chunks = split_markdown_document( + raw_text=markdown, + chunk_size=512, + overlap=50, + ) + + # Verify chunking works + assert len(chunks) > 0, "Should produce at least one chunk" + assert all( + isinstance(chunk, str) for chunk in chunks + ), "Chunks should be strings" + assert all( + len(chunk) > 0 for chunk in chunks + ), "Each chunk should have content" + diff --git a/uv.lock b/uv.lock index 5f953d4..23b88ec 100644 --- a/uv.lock +++ b/uv.lock @@ -196,6 +196,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" }, ] +[[package]] +name = "babel" +version = "2.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/6b/d52e42361e1aa00709585ecc30b3f9684b3ab62530771402248b1b1d6240/babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d", size = 9951852, upload-time = "2025-02-01T15:17:41.026Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537, upload-time = "2025-02-01T15:17:37.39Z" }, +] + [[package]] name = "beautifulsoup4" version = "4.14.2" @@ -394,6 +403,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6d/c1/e419ef3723a074172b68aaa89c9f3de486ed4c2399e2dbd8113a4fdcaf9e/colorlog-6.10.1-py3-none-any.whl", hash = "sha256:2d7e8348291948af66122cff006c9f8da6255d224e7cf8e37d8de2df3bad8c9c", size = 11743, upload-time = "2025-10-16T16:14:10.512Z" }, ] +[[package]] +name = "courlan" +version = "1.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "babel" }, + { name = "tld" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6f/54/6d6ceeff4bed42e7a10d6064d35ee43a810e7b3e8beb4abeae8cff4713ae/courlan-1.3.2.tar.gz", hash = "sha256:0b66f4db3a9c39a6e22dd247c72cfaa57d68ea660e94bb2c84ec7db8712af190", size = 206382, upload-time = "2024-10-29T16:40:20.994Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/ca/6a667ccbe649856dcd3458bab80b016681b274399d6211187c6ab969fc50/courlan-1.3.2-py3-none-any.whl", hash = "sha256:d0dab52cf5b5b1000ee2839fbc2837e93b2514d3cb5bb61ae158a55b7a04c6be", size = 33848, upload-time = "2024-10-29T16:40:18.325Z" }, +] + [[package]] name = "coverage" version = "7.12.0" @@ -524,6 +547,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" }, ] +[[package]] +name = "dateparser" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "regex" }, + { name = "tzlocal" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/30/064144f0df1749e7bb5faaa7f52b007d7c2d08ec08fed8411aba87207f68/dateparser-1.2.2.tar.gz", hash = "sha256:986316f17cb8cdc23ea8ce563027c5ef12fc725b6fb1d137c14ca08777c5ecf7", size = 329840, upload-time = "2025-06-26T09:29:23.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/22/f020c047ae1346613db9322638186468238bcfa8849b4668a22b97faad65/dateparser-1.2.2-py3-none-any.whl", hash = "sha256:5a5d7211a09013499867547023a2a0c91d5a27d15dd4dbcea676ea9fe66f2482", size = 315453, upload-time = "2025-06-26T09:29:21.412Z" }, +] + [[package]] name = "dill" version = "0.4.0" @@ -1110,7 +1148,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" }, { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" }, { url = "https://files.pythonhosted.org/packages/f7/0b/bc13f787394920b23073ca3b6c4a7a21396301ed75a655bcb47196b50e6e/greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc", size = 655191, upload-time = "2025-08-07T13:45:29.752Z" }, - { url = "https://files.pythonhosted.org/packages/f2/d6/6adde57d1345a8d0f14d31e4ab9c23cfe8e2cd39c3baf7674b4b0338d266/greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a", size = 649516, upload-time = "2025-08-07T13:53:16.314Z" }, { url = "https://files.pythonhosted.org/packages/7f/3b/3a3328a788d4a473889a2d403199932be55b1b0060f4ddd96ee7cdfcad10/greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504", size = 652169, upload-time = "2025-08-07T13:18:32.861Z" }, { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, @@ -1121,7 +1158,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" }, { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" }, { url = "https://files.pythonhosted.org/packages/c0/aa/687d6b12ffb505a4447567d1f3abea23bd20e73a5bed63871178e0831b7a/greenlet-3.2.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c17b6b34111ea72fc5a4e4beec9711d2226285f0386ea83477cbb97c30a3f3a5", size = 699218, upload-time = "2025-08-07T13:45:30.969Z" }, - { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" }, { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" }, { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" }, { url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" }, @@ -1197,6 +1233,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "h2" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, +] + [[package]] name = "hf-xet" version = "1.2.0" @@ -1226,6 +1275,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" }, ] +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, +] + +[[package]] +name = "htmldate" +version = "1.9.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "charset-normalizer" }, + { name = "dateparser" }, + { name = "lxml" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/10/ead9dabc999f353c3aa5d0dc0835b1e355215a5ecb489a7f4ef2ddad5e33/htmldate-1.9.4.tar.gz", hash = "sha256:1129063e02dd0354b74264de71e950c0c3fcee191178321418ccad2074cc8ed0", size = 44690, upload-time = "2025-11-04T17:46:44.983Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/bd/adfcdaaad5805c0c5156aeefd64c1e868c05e9c1cd6fd21751f168cd88c7/htmldate-1.9.4-py3-none-any.whl", hash = "sha256:1b94bcc4e08232a5b692159903acf95548b6a7492dddca5bb123d89d6325921c", size = 31558, upload-time = "2025-11-04T17:46:43.258Z" }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -1254,6 +1328,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] +[package.optional-dependencies] +http2 = [ + { name = "h2" }, +] + [[package]] name = "httpx-sse" version = "0.4.3" @@ -1282,6 +1361,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/bd/1a875e0d592d447cbc02805fd3fe0f497714d6a2583f59d14fa9ebad96eb/huggingface_hub-0.36.0-py3-none-any.whl", hash = "sha256:7bcc9ad17d5b3f07b57c78e79d527102d08313caa278a641993acddcb894548d", size = 566094, upload-time = "2025-10-23T12:11:59.557Z" }, ] +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, +] + [[package]] name = "identify" version = "2.6.15" @@ -1475,6 +1563,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, ] +[[package]] +name = "justext" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml", extra = ["html-clean"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/f3/45890c1b314f0d04e19c1c83d534e611513150939a7cf039664d9ab1e649/justext-3.0.2.tar.gz", hash = "sha256:13496a450c44c4cd5b5a75a5efcd9996066d2a189794ea99a49949685a0beb05", size = 828521, upload-time = "2025-02-25T20:21:49.934Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/ac/52f4e86d1924a7fc05af3aeb34488570eccc39b4af90530dd6acecdf16b5/justext-3.0.2-py2.py3-none-any.whl", hash = "sha256:62b1c562b15c3c6265e121cc070874243a443bfd53060e869393f09d6b6cc9a7", size = 837940, upload-time = "2025-02-25T20:21:44.179Z" }, +] + [[package]] name = "langchain" version = "1.1.3" @@ -1784,6 +1884,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/92/aa/df863bcc39c5e0946263454aba394de8a9084dbaff8ad143846b0d844739/lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c", size = 3822205, upload-time = "2025-09-22T04:03:36.249Z" }, ] +[package.optional-dependencies] +html-clean = [ + { name = "lxml-html-clean" }, +] + +[[package]] +name = "lxml-html-clean" +version = "0.4.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d9/cb/c9c5bb2a9c47292e236a808dd233a03531f53b626f36259dcd32b49c76da/lxml_html_clean-0.4.3.tar.gz", hash = "sha256:c9df91925b00f836c807beab127aac82575110eacff54d0a75187914f1bd9d8c", size = 21498, upload-time = "2025-10-02T20:49:24.895Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/4a/63a9540e3ca73709f4200564a737d63a4c8c9c4dd032bab8535f507c190a/lxml_html_clean-0.4.3-py3-none-any.whl", hash = "sha256:63fd7b0b9c3a2e4176611c2ca5d61c4c07ffca2de76c14059a81a2825833731e", size = 14177, upload-time = "2025-10-02T20:49:23.749Z" }, +] + [[package]] name = "mako" version = "1.3.10" @@ -3278,7 +3395,7 @@ dependencies = [ { name = "cryptography" }, { name = "docling" }, { name = "fastapi" }, - { name = "httpx" }, + { name = "httpx", extra = ["http2"] }, { name = "langchain-community" }, { name = "langchain-google-vertexai" }, { name = "langchain-litellm" }, @@ -3295,6 +3412,7 @@ dependencies = [ { name = "sentence-transformers" }, { name = "sqlalchemy" }, { name = "tiktoken" }, + { name = "trafilatura" }, { name = "transformers" }, { name = "uvicorn" }, ] @@ -3320,7 +3438,7 @@ requires-dist = [ { name = "cryptography", specifier = ">=46.0.3" }, { name = "docling", specifier = ">=2.61.2" }, { name = "fastapi", specifier = ">=0.121.2" }, - { name = "httpx", specifier = ">=0.28.1" }, + { name = "httpx", extras = ["http2"], specifier = ">=0.28.1" }, { name = "langchain-community", specifier = ">=0.3.17" }, { name = "langchain-google-vertexai", specifier = ">=3.2.0" }, { name = "langchain-litellm", specifier = ">=0.3.0" }, @@ -3337,6 +3455,7 @@ requires-dist = [ { name = "sentence-transformers", specifier = ">=5.2.0" }, { name = "sqlalchemy", specifier = ">=2.0.44" }, { name = "tiktoken", specifier = ">=0.12.0" }, + { name = "trafilatura", specifier = ">=1.7.0" }, { name = "transformers", specifier = ">=4.57.1" }, { name = "uvicorn", specifier = ">=0.38.0" }, ] @@ -3997,6 +4116,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" }, ] +[[package]] +name = "tld" +version = "0.13.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/a1/5723b07a70c1841a80afc9ac572fdf53488306848d844cd70519391b0d26/tld-0.13.1.tar.gz", hash = "sha256:75ec00936cbcf564f67361c41713363440b6c4ef0f0c1592b5b0fbe72c17a350", size = 462000, upload-time = "2025-05-21T22:18:29.341Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/70/b2f38360c3fc4bc9b5e8ef429e1fde63749144ac583c2dbdf7e21e27a9ad/tld-0.13.1-py2.py3-none-any.whl", hash = "sha256:a2d35109433ac83486ddf87e3c4539ab2c5c2478230e5d9c060a18af4b03aa7c", size = 274718, upload-time = "2025-05-21T22:18:25.811Z" }, +] + [[package]] name = "tokenizers" version = "0.22.1" @@ -4110,6 +4238,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, ] +[[package]] +name = "trafilatura" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "courlan" }, + { name = "htmldate" }, + { name = "justext" }, + { name = "lxml" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/25/e3ebeefdebfdfae8c4a4396f5a6ea51fc6fa0831d63ce338e5090a8003dc/trafilatura-2.0.0.tar.gz", hash = "sha256:ceb7094a6ecc97e72fea73c7dba36714c5c5b577b6470e4520dca893706d6247", size = 253404, upload-time = "2024-12-03T15:23:24.16Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/b6/097367f180b6383a3581ca1b86fcae284e52075fa941d1232df35293363c/trafilatura-2.0.0-py3-none-any.whl", hash = "sha256:77eb5d1e993747f6f20938e1de2d840020719735690c840b9a1024803a4cd51d", size = 132557, upload-time = "2024-12-03T15:23:21.41Z" }, +] + [[package]] name = "transformers" version = "4.57.1" @@ -4299,6 +4445,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" }, ] +[[package]] +name = "tzlocal" +version = "5.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd", size = 30761, upload-time = "2025-03-05T21:17:41.549Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d", size = 18026, upload-time = "2025-03-05T21:17:39.857Z" }, +] + [[package]] name = "urllib3" version = "2.5.0"