diff --git a/ragitect/api/v1/documents.py b/ragitect/api/v1/documents.py
index abfc8bf..661ba84 100644
--- a/ragitect/api/v1/documents.py
+++ b/ragitect/api/v1/documents.py
@@ -71,6 +71,31 @@ async def process_document_background(document_id: UUID) -> None:
             # Error already logged and status updated in service
 
 
+async def fetch_and_process_url_background(
+    document_id: UUID,
+    url: str,
+    source_type: str,
+) -> None:
+    """Background task wrapper for URL document processing
+
+    Args:
+        document_id: Document UUID to process
+        url: Source URL to fetch content from
+        source_type: Type of URL source ("url", "youtube", "pdf")
+    """
+    from ragitect.services.database.connection import get_session_factory
+    from ragitect.services.url_processing_service import URLProcessingService
+
+    session_factory = get_session_factory()
+    async with session_factory() as session:
+        try:
+            processing_service = URLProcessingService(session)
+            await processing_service.process_url_document(document_id, url, source_type)
+        except Exception as e:
+            logger.error(f"Background URL processing failed for {document_id}: {e}")
+            # Error already logged and status updated in service
+
+
 @router.post(
     "/{workspace_id}/documents",
     response_model=DocumentListResponse,
@@ -185,6 +210,7 @@ async def upload_documents(
 async def upload_url(
     workspace_id: UUID,
     input_data: URLUploadInput,
+    background_tasks: BackgroundTasks = BackgroundTasks(),
     session: AsyncSession = Depends(get_async_session),
 ) -> URLUploadResponse:
     """Submit URL for document ingestion
@@ -192,6 +218,7 @@ async def upload_url(
     Args:
         workspace_id: Target workspace UUID
         input_data: URL upload input with source_type and url
+        background_tasks: FastAPI background tasks for async processing
         session: Database session (injected by FastAPI)
 
     Returns:
@@ -271,8 +298,14 @@ async def upload_url(
             safe_log_url,
         )
 
-        # NOTE: Background processing NOT triggered here (Story 5.5)
-        # Document will remain in "backlog" status until background task picks it up
+        # Trigger background URL processing
+        background_tasks.add_task(
+            fetch_and_process_url_background,
+            document_id=document.id,
+            url=sanitized_url,
+            source_type=source_type,
+        )
+        logger.info(f"Scheduled background URL processing for document {document.id}")
 
         return URLUploadResponse(
             id=str(document.id),
diff --git a/ragitect/services/processor/factory.py b/ragitect/services/processor/factory.py
index 1c34e74..8c1dee3 100644
--- a/ragitect/services/processor/factory.py
+++ b/ragitect/services/processor/factory.py
@@ -1,17 +1,27 @@
 """
 Document Processor Factory
 
-Selects the appropriate document processor based on file type.
-Uses a simple two-processor strategy:
+Selects the appropriate document processor based on file type or URL source type.
+Uses a simple two-processor strategy for files:
 - SimpleProcessor for text-based formats (lightweight, fast)
 - DoclingProcessor for complex formats (PDF, DOCX, etc.)
+
+For URL sources (Story 5.5):
+- WebURLProcessor for web pages
+- YouTubeProcessor for YouTube videos
+- PDFURLProcessor for PDF URLs
 """
 
+import logging
+from pathlib import Path
+from typing import Literal
+
 from ragitect.services.processor.base import BaseDocumentProcessor
-from ragitect.services.processor.simple import SimpleProcessor
 from ragitect.services.processor.docling_processor import DoclingProcessor
-from pathlib import Path
-import logging
+from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+from ragitect.services.processor.simple import SimpleProcessor
+from ragitect.services.processor.web_url_processor import WebURLProcessor
+from ragitect.services.processor.youtube_processor import YouTubeProcessor
 
 logger = logging.getLogger(__name__)
 
@@ -28,13 +38,19 @@ class UnsupportedFormatError(Exception):
 
 
 class ProcessorFactory:
-    """Factory for selecting appropriate document processor based on filetype"""
+    """Factory for selecting appropriate document processor based on filetype or URL source type"""
 
     def __init__(self):
         logger.info("Initializing ProcessorFactory")
+        # File-based processors
         self.simple_processor: BaseDocumentProcessor = SimpleProcessor()
         self.docling_processor: BaseDocumentProcessor = DoclingProcessor()
 
+        # URL-based processors (Story 5.5)
+        self.web_url_processor: WebURLProcessor = WebURLProcessor()
+        self.youtube_processor: YouTubeProcessor = YouTubeProcessor()
+        self.pdf_url_processor: PDFURLProcessor = PDFURLProcessor()
+
         self.text_formats: list[str] = self.simple_processor.supported_formats()
         self.complex_formats: list[str] = self.docling_processor.supported_formats()
 
@@ -42,26 +58,50 @@ def __init__(self):
             f"ProcessorFactory initialized - Text formats: {len(self.text_formats)}, Complex formats: {len(self.complex_formats)}"
         )
 
-    def get_processor(self, file_name: str) -> BaseDocumentProcessor:
-        """Select appropriate processor for given file
+    def get_processor(
+        self,
+        source: str,
+        source_type: Literal["file", "url", "youtube", "pdf"] = "file",
+    ) -> BaseDocumentProcessor:
+        """Select appropriate processor for given source
 
         Args:
-            file_name: name of the uploaded file
+            source: File name (for files) or URL (for URLs)
+            source_type: Type of source - "file", "url", "youtube", or "pdf"
+                - "file" (default): Route by file extension
+                - "url": Use WebURLProcessor for web pages
+                - "youtube": Use YouTubeProcessor for YouTube videos
+                - "pdf": Use PDFURLProcessor for PDF URLs
 
         Returns:
-            BaseDocumentProcessor: either SimpleProcessor or DoclingProcessor instance
+            BaseDocumentProcessor: Appropriate processor for the source type
 
         Raises:
-            UnsupportedFormatError: if the file extension is not supported by any processor
+            UnsupportedFormatError: If source_type is "file" and the file extension
+                is not supported by any processor
         """
-        ext = Path(file_name).suffix.lower()
+        # URL-based routing (Story 5.5)
+        if source_type == "url":
+            logger.info(f"Selected WebURLProcessor for URL: {source[:50]}...")
+            return self.web_url_processor
+
+        if source_type == "youtube":
+            logger.info(f"Selected YouTubeProcessor for YouTube URL: {source[:50]}...")
+            return self.youtube_processor
+
+        if source_type == "pdf":
+            logger.info(f"Selected PDFURLProcessor for PDF URL: {source[:50]}...")
+            return self.pdf_url_processor
+
+        # Existing file-based routing (backward compatible)
+        ext = Path(source).suffix.lower()
 
         if ext in self.text_formats:
-            logger.info(f"Selected SimpleProcessor for file {file_name}")
+            logger.info(f"Selected SimpleProcessor for file {source}")
             return self.simple_processor
 
         if ext in self.complex_formats:
-            logger.info(f"Selected DoclingProcessor for file {file_name}")
+            logger.info(f"Selected DoclingProcessor for file {source}")
             return self.docling_processor
 
         all_formats = sorted(self.text_formats + self.complex_formats)
@@ -70,5 +110,5 @@ def get_processor(self, file_name: str) -> BaseDocumentProcessor:
             f"Unsupported format: {ext}\nSupported formats: {', '.join(all_formats)}"
         )
 
-        logger.warning(f"Unsupported file format attempted: {ext} (file: {file_name})")
+        logger.warning(f"Unsupported file format attempted: {ext} (file: {source})")
         raise UnsupportedFormatError(error_msg)
diff --git a/ragitect/services/retry.py b/ragitect/services/retry.py
new file mode 100644
index 0000000..acd1601
--- /dev/null
+++ b/ragitect/services/retry.py
@@ -0,0 +1,119 @@
+"""Retry utility with exponential backoff for URL fetching
+
+Implements AC4 (NFR-R3) retry logic:
+- Retry up to 3 times on transient failures
+- Exponential backoff delays: 1s, 2s, 4s
+- Retryable errors: timeout, network errors, HTTP 5xx
+- Non-retryable errors: HTTP 4xx (immediate fail)
+
+Usage:
+    >>> from ragitect.services.retry import with_retry
+    >>> result = await with_retry(fetch_url, "https://example.com")
+"""
+
+import asyncio
+import logging
+from typing import Any, Callable
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+# Retry configuration constants (AC4)
+MAX_RETRIES = 3
+INITIAL_DELAY = 1.0  # seconds
+BACKOFF_MULTIPLIER = 2
+
+# Retryable exceptions (transient network issues)
+RETRYABLE_EXCEPTIONS = (
+    httpx.TimeoutException,
+    httpx.NetworkError,
+    httpx.ConnectError,
+    httpx.RemoteProtocolError,
+)
+
+# HTTP status codes that should trigger retry
+RETRYABLE_STATUS_CODES = {500, 502, 503, 504}
+
+
+async def with_retry(func: Callable, *args: Any, **kwargs: Any) -> Any:
+    """Execute async function with exponential backoff retry.
+
+    Retries on:
+    - Timeout exceptions (httpx.TimeoutException)
+    - Network errors (httpx.NetworkError, httpx.ConnectError)
+    - HTTP 5xx server errors (500, 502, 503, 504)
+
+    Does NOT retry on:
+    - HTTP 4xx client errors (400, 401, 403, 404)
+    - Content extraction errors
+    - Other application errors
+
+    Args:
+        func: Async function to call
+        *args: Positional arguments for func
+        **kwargs: Keyword arguments for func
+
+    Returns:
+        Result from func if successful
+
+    Raises:
+        Exception: The last exception if all retries exhausted,
+            or immediately for non-retryable errors
+
+    Example:
+        >>> async def fetch_data(url: str) -> str:
+        ...     async with httpx.AsyncClient() as client:
+        ...         response = await client.get(url)
+        ...         return response.text
+        >>> result = await with_retry(fetch_data, "https://example.com")
+    """
+    delay = INITIAL_DELAY
+    last_exception: Exception | None = None
+
+    for attempt in range(MAX_RETRIES):
+        try:
+            return await func(*args, **kwargs)
+
+        except RETRYABLE_EXCEPTIONS as e:
+            last_exception = e
+            if attempt < MAX_RETRIES - 1:
+                logger.warning(
+                    f"Retry {attempt + 1}/{MAX_RETRIES} after {delay:.1f}s: "
+                    f"{type(e).__name__}: {e}"
+                )
+                await asyncio.sleep(delay)
+                delay *= BACKOFF_MULTIPLIER
+            else:
+                logger.error(f"All {MAX_RETRIES} retries exhausted: {e}")
+                raise
+
+        except httpx.HTTPStatusError as e:
+            # Only retry on 5xx server errors
+            if e.response.status_code in RETRYABLE_STATUS_CODES:
+                last_exception = e
+                if attempt < MAX_RETRIES - 1:
+                    logger.warning(
+                        f"Retry {attempt + 1}/{MAX_RETRIES} after {delay:.1f}s: "
+                        f"HTTP {e.response.status_code}"
+                    )
+                    await asyncio.sleep(delay)
+                    delay *= BACKOFF_MULTIPLIER
+                else:
+                    logger.error(
+                        f"All {MAX_RETRIES} retries exhausted: "
+                        f"HTTP {e.response.status_code}"
+                    )
+                    raise
+            else:
+                # 4xx errors - don't retry, fail immediately
+                logger.warning(
+                    f"Non-retryable HTTP error: {e.response.status_code}"
+                )
+                raise
+
+    # Should not reach here, but just in case
+    if last_exception:
+        raise last_exception
+
+    raise RuntimeError("Unexpected state in retry logic")
diff --git a/ragitect/services/url_processing_service.py b/ragitect/services/url_processing_service.py
new file mode 100644
index 0000000..b668494
--- /dev/null
+++ b/ragitect/services/url_processing_service.py
@@ -0,0 +1,286 @@
+"""URL Processing Service
+
+Handles background URL document processing including URL fetching,
+content extraction, embedding generation, and status management.
+
+Implements AC2, AC3 from Story 5.5:
+- Background task function for URL-based documents
+- Status tracking in document metadata
+
+Flow:
+    1. Update status to "fetching"
+    2. Get appropriate processor from ProcessorFactory (url/youtube/pdf)
+    3. Fetch URL and convert to Markdown
+    4. Store processed content in DB
+    5. Update status to "processing"
+    6. Split text into chunks
+    7. Update status to "embedding"
+    8. Generate embeddings for all chunks
+    9. Store chunks with embeddings
+    10. Update status to "ready" (or "error" on failure)
+"""
+
+import asyncio
+import logging
+from datetime import datetime, timezone
+from typing import Any, Literal
+from uuid import UUID
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from ragitect.services.config import load_document_config
+from ragitect.services.database.repositories.document_repo import DocumentRepository
+from ragitect.services.document_processor import split_document
+from ragitect.services.embedding import embed_documents, get_embedding_model_from_config
+from ragitect.services.processor.factory import ProcessorFactory
+from ragitect.services.retry import with_retry
+
+logger = logging.getLogger(__name__)
+
+# Concurrency limit for URL fetches (NFR-P5, AC5)
+# Will be implemented in Task 8
+_url_fetch_semaphore = asyncio.Semaphore(5)
+
+
+class URLProcessingService:
+    """Service for background URL document processing
+
+    Orchestrates the URL processing workflow:
+    1. Update status to "fetching" with fetch_started_at timestamp
+    2. Get appropriate processor from ProcessorFactory
+    3. Fetch URL and convert to Markdown via processor
+    4. Store processed content in DB
+    5. Update status to "processing" with fetch_completed_at timestamp
+    6. Split text into chunks
+    7. Update status to "embedding"
+    8. Generate embeddings for all chunks
+    9. Store chunks with embeddings via add_chunks()
+    10. Update status to "ready" (or "error" on failure)
+
+    This mirrors DocumentProcessingService but handles URL-based sources
+    instead of file uploads.
+
+    Usage:
+        >>> async with get_session() as session:
+        ...     service = URLProcessingService(session)
+        ...     await service.process_url_document(document_id, url, "url")
+    """
+
+    def __init__(self, session: AsyncSession):
+        """Initialize URLProcessingService
+
+        Args:
+            session: SQLAlchemy async session
+        """
+        self.session = session
+        self.repo = DocumentRepository(session)
+        self.processor_factory = ProcessorFactory()
+
+    async def process_url_document(
+        self,
+        document_id: UUID,
+        url: str,
+        source_type: Literal["url", "youtube", "pdf"],
+    ) -> None:
+        """Process URL document: fetch, extract, chunk, embed (AC2)
+
+        Args:
+            document_id: Document UUID to process
+            url: URL to fetch content from
+            source_type: Type of URL source ("url", "youtube", "pdf")
+
+        Note:
+            This method catches all exceptions internally and updates
+            document status to "error" with error message in metadata.
+            Exceptions are logged but not re-raised to avoid crashing
+            the background task.
+        """
+        async with _url_fetch_semaphore:
+            try:
+                # Update status to fetching with timestamp (AC3)
+                await self._update_status_with_metadata(
+                    document_id,
+                    "fetching",
+                    {"fetch_started_at": datetime.now(timezone.utc).isoformat()},
+                )
+                await self.session.commit()
+
+                # Get appropriate processor from factory
+                processor = self.processor_factory.get_processor(url, source_type)
+
+                # Fetch and convert to Markdown with retry logic (AC4)
+                markdown = await with_retry(processor.process, url)
+
+                logger.info(
+                    f"Fetched {len(markdown)} chars from {url[:50]}..."
+                )
+
+                # Update status to processing with fetch_completed_at (AC3)
+                await self._update_status_with_metadata(
+                    document_id,
+                    "processing",
+                    {"fetch_completed_at": datetime.now(timezone.utc).isoformat()},
+                )
+                await self.session.commit()
+
+                # Store processed content in document
+                await self.repo.update_processed_content(document_id, markdown)
+                await self.session.commit()
+
+                # Chunking and embedding (reuse pattern from DocumentProcessingService)
+                await self._chunk_and_embed(document_id, markdown)
+
+                # Final status update
+                await self.repo.update_status(document_id, "ready")
+                await self.session.commit()
+
+                logger.info(f"Successfully processed URL document {document_id}")
+
+            except Exception as e:
+                logger.error(
+                    f"URL processing failed for {document_id}: {e}",
+                    exc_info=True,
+                )
+                await self._set_error_status(document_id, str(e))
+
+    async def _update_status_with_metadata(
+        self,
+        document_id: UUID,
+        status: str,
+        extra_metadata: dict[str, Any],
+    ) -> None:
+        """Update status and merge additional metadata (AC3)
+
+        Args:
+            document_id: Document UUID
+            status: New status value
+            extra_metadata: Additional metadata fields to merge
+        """
+        document = await self.repo.get_by_id_or_raise(document_id)
+        metadata = dict(document.metadata_) if document.metadata_ else {}
+        metadata["status"] = status
+        metadata.update(extra_metadata)
+        await self.repo.update_metadata(document_id, metadata)
+
+    async def _chunk_and_embed(self, document_id: UUID, text: str) -> None:
+        """Split text and generate embeddings
+
+        Args:
+            document_id: Document UUID
+            text: Processed text content to chunk and embed
+        """
+        # Update status to embedding
+        await self.repo.update_status(document_id, "embedding")
+        await self.session.commit()
+
+        # Load document configuration for chunk size/overlap
+        doc_config = load_document_config()
+
+        # Split text into chunks using Markdown-aware splitter
+        chunks = split_document(
+            text,
+            chunk_size=doc_config.chunk_size,
+            overlap=doc_config.chunk_overlap,
+        )
+
+        logger.info(f"Split into {len(chunks)} chunks for document {document_id}")
+
+        if chunks:
+            # Get embedding model from config
+            embedding_model = await get_embedding_model_from_config(self.session)
+
+            # Generate embeddings for all chunks
+            embeddings = await embed_documents(embedding_model, chunks)
+
+            # Prepare chunk data: (content, embedding, metadata)
+            chunk_data: list[tuple[str, list[float], dict[str, Any] | None]] = [
+                (chunk, embedding, {"chunk_index": i})
+                for i, (chunk, embedding) in enumerate(zip(chunks, embeddings))
+            ]
+
+            # Store chunks with embeddings
+            await self.repo.add_chunks(document_id, chunk_data)
+            logger.info(f"Stored {len(chunk_data)} chunks for document {document_id}")
+        else:
+            logger.warning(
+                f"No chunks generated for URL document {document_id} (empty content)"
+            )
+
+    async def _set_error_status(
+        self, document_id: UUID, error_message: str
+    ) -> None:
+        """Set error status with user-friendly message (AC6)
+
+        Args:
+            document_id: Document UUID
+            error_message: Raw error message to humanize
+        """
+        try:
+            document = await self.repo.get_by_id_or_raise(document_id)
+            metadata = dict(document.metadata_) if document.metadata_ else {}
+            metadata["status"] = "error"
+            metadata["error_message"] = self._humanize_error(error_message)
+            await self.repo.update_metadata(document_id, metadata)
+            await self.session.commit()
+        except Exception as update_error:
+            logger.error(f"Failed to set error status: {update_error}")
+
+    def _humanize_error(self, error_message: str) -> str:
+        """Convert technical error to user-friendly message (AC6)
+
+        Args:
+            error_message: Raw technical error message
+
+        Returns:
+            User-friendly error message (< 500 chars)
+        """
+        error_lower = error_message.lower()
+
+        # Timeout errors
+        if "timeout" in error_lower:
+            return "Request timed out (30s limit). The server may be slow or unavailable."
+
+        # HTTP status errors
+        if "404" in error_lower:
+            return "Page not found (404). Please verify the URL is correct."
+
+        if "403" in error_lower:
+            return "Access denied (403). The content may be restricted."
+
+        if "401" in error_lower:
+            return "Authentication required (401). The content may be private."
+
+        if "500" in error_lower or "502" in error_lower or "503" in error_lower:
+            return "Server error. The website may be temporarily unavailable."
+
+        # Connection errors
+        if "connect" in error_lower:
+            return "Could not connect to the URL. Please check the address and try again."
+
+        # YouTube-specific errors
+        if "transcript" in error_lower and "disabled" in error_lower:
+            return "Transcripts are disabled for this YouTube video."
+
+        if "transcript" in error_lower:
+            return "Could not retrieve transcript for this YouTube video."
+
+        # PDF-specific errors
+        if "pdf" in error_lower and "invalid" in error_lower:
+            return "URL does not point to a valid PDF file."
+
+        if "pdf" in error_lower and "download" in error_lower:
+            return "Failed to download PDF. Please check the URL."
+
+        if "pdf" in error_lower and "process" in error_lower:
+            return "Failed to process PDF content. The file may be corrupted or password-protected."
+
+        # Content extraction errors
+        if "extract" in error_lower or "content" in error_lower:
+            return "Could not extract content from the web page."
+
+        # Generic fallback - truncate to keep user-friendly
+        truncated = error_message[:200]
+        if len(error_message) > 200:
+            truncated += "..."
+
+        return f"Processing failed: {truncated}"
diff --git a/tests/services/processor/test_factory.py b/tests/services/processor/test_factory.py
index 3b96410..d3da4b2 100644
--- a/tests/services/processor/test_factory.py
+++ b/tests/services/processor/test_factory.py
@@ -5,6 +5,9 @@
 from ragitect.services.processor.factory import ProcessorFactory, UnsupportedFormatError
 from ragitect.services.processor.simple import SimpleProcessor
 from ragitect.services.processor.docling_processor import DoclingProcessor
+from ragitect.services.processor.web_url_processor import WebURLProcessor
+from ragitect.services.processor.youtube_processor import YouTubeProcessor
+from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
 
 
 class TestProcessorFactory:
@@ -76,3 +79,93 @@ def test_handles_filename_with_dots(self):
     def test_handles_path_like_filename(self):
         processor = self.factory.get_processor("path/to/document.pdf")
         assert isinstance(processor, DoclingProcessor)
+
+
+class TestProcessorFactoryURLRouting:
+    """Test processor factory URL routing via source_type parameter (Story 5.5)"""
+
+    def setup_method(self):
+        self.factory = ProcessorFactory()
+
+    # AC1: source_type="file" returns existing behavior (backward compatible)
+    def test_source_type_file_uses_file_routing(self):
+        """source_type='file' should use file extension routing"""
+        processor = self.factory.get_processor("document.txt", source_type="file")
+        assert isinstance(processor, SimpleProcessor)
+
+    def test_source_type_file_for_pdf_returns_docling(self):
+        """source_type='file' with .pdf should return DoclingProcessor"""
+        processor = self.factory.get_processor("report.pdf", source_type="file")
+        assert isinstance(processor, DoclingProcessor)
+
+    # AC1: source_type="url" returns WebURLProcessor
+    def test_source_type_url_returns_web_url_processor(self):
+        """source_type='url' should return WebURLProcessor"""
+        processor = self.factory.get_processor(
+            "https://example.com/article", source_type="url"
+        )
+        assert isinstance(processor, WebURLProcessor)
+
+    def test_source_type_url_ignores_file_extension(self):
+        """source_type='url' should ignore .html extension and return WebURLProcessor"""
+        processor = self.factory.get_processor(
+            "https://example.com/page.html", source_type="url"
+        )
+        assert isinstance(processor, WebURLProcessor)
+
+    # AC1: source_type="youtube" returns YouTubeProcessor
+    def test_source_type_youtube_returns_youtube_processor(self):
+        """source_type='youtube' should return YouTubeProcessor"""
+        processor = self.factory.get_processor(
+            "https://www.youtube.com/watch?v=dQw4w9WgXcQ", source_type="youtube"
+        )
+        assert isinstance(processor, YouTubeProcessor)
+
+    def test_source_type_youtube_with_short_url(self):
+        """source_type='youtube' should work with youtu.be short URLs"""
+        processor = self.factory.get_processor(
+            "https://youtu.be/dQw4w9WgXcQ", source_type="youtube"
+        )
+        assert isinstance(processor, YouTubeProcessor)
+
+    # AC1: source_type="pdf" returns PDFURLProcessor
+    def test_source_type_pdf_returns_pdf_url_processor(self):
+        """source_type='pdf' should return PDFURLProcessor"""
+        processor = self.factory.get_processor(
+            "https://arxiv.org/pdf/1706.03762.pdf", source_type="pdf"
+        )
+        assert isinstance(processor, PDFURLProcessor)
+
+    def test_source_type_pdf_with_non_pdf_url(self):
+        """source_type='pdf' should return PDFURLProcessor even for non-.pdf URLs"""
+        processor = self.factory.get_processor(
+            "https://example.com/document", source_type="pdf"
+        )
+        assert isinstance(processor, PDFURLProcessor)
+
+    # AC1: Default source_type is "file" (backward compatibility)
+    def test_default_source_type_is_file(self):
+        """Default source_type should be 'file' for backward compatibility"""
+        # Calling without source_type should behave like source_type="file"
+        processor = self.factory.get_processor("document.txt")
+        assert isinstance(processor, SimpleProcessor)
+
+    def test_backward_compatible_with_existing_code(self):
+        """Existing code without source_type should continue to work"""
+        # These calls match existing tests, ensuring backward compatibility
+        processor_txt = self.factory.get_processor("file.txt")
+        processor_pdf = self.factory.get_processor("file.pdf")
+        processor_docx = self.factory.get_processor("file.docx")
+
+        assert isinstance(processor_txt, SimpleProcessor)
+        assert isinstance(processor_pdf, DoclingProcessor)
+        assert isinstance(processor_docx, DoclingProcessor)
+
+    def test_url_processors_are_initialized(self):
+        """URL processors should be initialized in ProcessorFactory"""
+        assert hasattr(self.factory, "web_url_processor")
+        assert hasattr(self.factory, "youtube_processor")
+        assert hasattr(self.factory, "pdf_url_processor")
+        assert isinstance(self.factory.web_url_processor, WebURLProcessor)
+        assert isinstance(self.factory.youtube_processor, YouTubeProcessor)
+        assert isinstance(self.factory.pdf_url_processor, PDFURLProcessor)
diff --git a/tests/services/test_retry.py b/tests/services/test_retry.py
new file mode 100644
index 0000000..650e065
--- /dev/null
+++ b/tests/services/test_retry.py
@@ -0,0 +1,319 @@
+"""Unit tests for retry utility with exponential backoff
+
+Tests retry logic (AC4) including:
+- Retry on timeout exception (should retry 3 times)
+- Retry on 5xx server error (should retry)
+- No retry on 4xx client error (immediate fail)
+- Exponential backoff delays: 1s, 2s, 4s
+- Final failure after 3 retries
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+import asyncio
+
+import httpx
+import pytest
+
+from ragitect.services.retry import (
+    with_retry,
+    MAX_RETRIES,
+    INITIAL_DELAY,
+    BACKOFF_MULTIPLIER,
+    RETRYABLE_EXCEPTIONS,
+)
+
+
+pytestmark = [pytest.mark.asyncio]
+
+
+class TestRetryOnTimeoutException:
+    """Test retry behavior for timeout exceptions (AC4)"""
+
+    async def test_retries_on_timeout_exception(self):
+        """Should retry up to 3 times on timeout"""
+        call_count = 0
+
+        async def failing_func():
+            nonlocal call_count
+            call_count += 1
+            raise httpx.TimeoutException("Connection timed out")
+
+        with pytest.raises(httpx.TimeoutException):
+            with patch("asyncio.sleep", new=AsyncMock()):
+                await with_retry(failing_func)
+
+        assert call_count == MAX_RETRIES
+
+    async def test_succeeds_after_retry(self):
+        """Should succeed if function passes after retry"""
+        call_count = 0
+
+        async def sometimes_failing():
+            nonlocal call_count
+            call_count += 1
+            if call_count < 3:
+                raise httpx.TimeoutException("Temporary failure")
+            return "success"
+
+        with patch("asyncio.sleep", new=AsyncMock()):
+            result = await with_retry(sometimes_failing)
+
+        assert result == "success"
+        assert call_count == 3
+
+
+class TestRetryOn5xxServerError:
+    """Test retry behavior for HTTP 5xx server errors (AC4)"""
+
+    async def test_retries_on_500_error(self):
+        """Should retry on HTTP 500 Internal Server Error"""
+        call_count = 0
+        mock_response = MagicMock()
+        mock_response.status_code = 500
+
+        async def server_error():
+            nonlocal call_count
+            call_count += 1
+            raise httpx.HTTPStatusError(
+                "Internal Server Error",
+                request=MagicMock(),
+                response=mock_response,
+            )
+
+        with pytest.raises(httpx.HTTPStatusError):
+            with patch("asyncio.sleep", new=AsyncMock()):
+                await with_retry(server_error)
+
+        assert call_count == MAX_RETRIES
+
+    async def test_retries_on_502_error(self):
+        """Should retry on HTTP 502 Bad Gateway"""
+        call_count = 0
+        mock_response = MagicMock()
+        mock_response.status_code = 502
+
+        async def bad_gateway():
+            nonlocal call_count
+            call_count += 1
+            raise httpx.HTTPStatusError(
+                "Bad Gateway",
+                request=MagicMock(),
+                response=mock_response,
+            )
+
+        with pytest.raises(httpx.HTTPStatusError):
+            with patch("asyncio.sleep", new=AsyncMock()):
+                await with_retry(bad_gateway)
+
+        assert call_count == MAX_RETRIES
+
+    async def test_retries_on_503_error(self):
+        """Should retry on HTTP 503 Service Unavailable"""
+        call_count = 0
+        mock_response = MagicMock()
+        mock_response.status_code = 503
+
+        async def service_unavailable():
+            nonlocal call_count
+            call_count += 1
+            raise httpx.HTTPStatusError(
+                "Service Unavailable",
+                request=MagicMock(),
+                response=mock_response,
+            )
+
+        with pytest.raises(httpx.HTTPStatusError):
+            with patch("asyncio.sleep", new=AsyncMock()):
+                await with_retry(service_unavailable)
+
+        assert call_count == MAX_RETRIES
+
+    async def test_retries_on_504_error(self):
+        """Should retry on HTTP 504 Gateway Timeout"""
+        call_count = 0
+        mock_response = MagicMock()
+        mock_response.status_code = 504
+
+        async def gateway_timeout():
+            nonlocal call_count
+            call_count += 1
+            raise httpx.HTTPStatusError(
+                "Gateway Timeout",
+                request=MagicMock(),
+                response=mock_response,
+            )
+
+        with pytest.raises(httpx.HTTPStatusError):
+            with patch("asyncio.sleep", new=AsyncMock()):
+                await with_retry(gateway_timeout)
+
+        assert call_count == MAX_RETRIES
+
+
+class TestNoRetryOn4xxClientError:
+    """Test no retry on HTTP 4xx client errors (AC4)"""
+
+    async def test_no_retry_on_400_error(self):
+        """Should NOT retry on HTTP 400 Bad Request - immediate fail"""
+        call_count = 0
+        mock_response = MagicMock()
+        mock_response.status_code = 400
+
+        async def bad_request():
+            nonlocal call_count
+            call_count += 1
+            raise httpx.HTTPStatusError(
+                "Bad Request",
+                request=MagicMock(),
+                response=mock_response,
+            )
+
+        with pytest.raises(httpx.HTTPStatusError):
+            with patch("asyncio.sleep", new=AsyncMock()):
+                await with_retry(bad_request)
+
+        # Should only be called once (no retries)
+        assert call_count == 1
+
+    async def test_no_retry_on_401_error(self):
+        """Should NOT retry on HTTP 401 Unauthorized"""
+        call_count = 0
+        mock_response = MagicMock()
+        mock_response.status_code = 401
+
+        async def unauthorized():
+            nonlocal call_count
+            call_count += 1
+            raise httpx.HTTPStatusError(
+                "Unauthorized",
+                request=MagicMock(),
+                response=mock_response,
+            )
+
+        with pytest.raises(httpx.HTTPStatusError):
+            await with_retry(unauthorized)
+
+        assert call_count == 1
+
+    async def test_no_retry_on_403_error(self):
+        """Should NOT retry on HTTP 403 Forbidden"""
+        call_count = 0
+        mock_response = MagicMock()
+        mock_response.status_code = 403
+
+        async def forbidden():
+            nonlocal call_count
+            call_count += 1
+            raise httpx.HTTPStatusError(
+                "Forbidden",
+                request=MagicMock(),
+                response=mock_response,
+            )
+
+        with pytest.raises(httpx.HTTPStatusError):
+            await with_retry(forbidden)
+
+        assert call_count == 1
+
+    async def test_no_retry_on_404_error(self):
+        """Should NOT retry on HTTP 404 Not Found"""
+        call_count = 0
+        mock_response = MagicMock()
+        mock_response.status_code = 404
+
+        async def not_found():
+            nonlocal call_count
+            call_count += 1
+            raise httpx.HTTPStatusError(
+                "Not Found",
+                request=MagicMock(),
+                response=mock_response,
+            )
+
+        with pytest.raises(httpx.HTTPStatusError):
+            await with_retry(not_found)
+
+        assert call_count == 1
+
+
+class TestExponentialBackoff:
+    """Test exponential backoff delays: 1s, 2s, 4s (AC4)"""
+
+    async def test_exponential_backoff_delays(self):
+        """Should use exponential backoff: 1s, 2s, 4s"""
+        sleep_calls = []
+
+        async def mock_sleep(delay):
+            sleep_calls.append(delay)
+
+        call_count = 0
+
+        async def always_timeout():
+            nonlocal call_count
+            call_count += 1
+            raise httpx.TimeoutException("Timeout")
+
+        with patch("ragitect.services.retry.asyncio.sleep", side_effect=mock_sleep):
+            with pytest.raises(httpx.TimeoutException):
+                await with_retry(always_timeout)
+
+        # Should have 2 sleep calls (before attempts 2 and 3)
+        assert len(sleep_calls) == MAX_RETRIES - 1
+
+        # Check exponential backoff delays
+        assert sleep_calls[0] == INITIAL_DELAY  # 1s
+        assert sleep_calls[1] == INITIAL_DELAY * BACKOFF_MULTIPLIER  # 2s
+
+
+class TestRetryConstants:
+    """Test retry configuration constants (AC4)"""
+
+    def test_max_retries_is_3(self):
+        """MAX_RETRIES should be 3"""
+        assert MAX_RETRIES == 3
+
+    def test_initial_delay_is_1_second(self):
+        """Initial delay should be 1 second"""
+        assert INITIAL_DELAY == 1.0
+
+    def test_backoff_multiplier_is_2(self):
+        """Backoff multiplier should be 2 (doubling)"""
+        assert BACKOFF_MULTIPLIER == 2
+
+    def test_timeout_is_retryable(self):
+        """TimeoutException should be in retryable exceptions"""
+        assert httpx.TimeoutException in RETRYABLE_EXCEPTIONS
+
+
+class TestNetworkErrors:
+    """Test retry behavior for network errors (AC4)"""
+
+    async def test_retries_on_network_error(self):
+        """Should retry on NetworkError"""
+        call_count = 0
+
+        async def network_error():
+            nonlocal call_count
+            call_count += 1
+            raise httpx.NetworkError("Network unreachable")
+
+        with pytest.raises(httpx.NetworkError):
+            with patch("asyncio.sleep", new=AsyncMock()):
+                await with_retry(network_error)
+
+        assert call_count == MAX_RETRIES
+
+    async def test_retries_on_connect_error(self):
+        """Should retry on ConnectError"""
+        call_count = 0
+
+        async def connect_error():
+            nonlocal call_count
+            call_count += 1
+            raise httpx.ConnectError("Connection refused")
+
+        with pytest.raises(httpx.ConnectError):
+            with patch("asyncio.sleep", new=AsyncMock()):
+                await with_retry(connect_error)
+
+        assert call_count == MAX_RETRIES
diff --git a/tests/services/test_url_processing_service.py b/tests/services/test_url_processing_service.py
new file mode 100644
index 0000000..98ce2b0
--- /dev/null
+++ b/tests/services/test_url_processing_service.py
@@ -0,0 +1,690 @@
+"""Unit tests for URLProcessingService
+
+Tests URL document processing workflow including:
+- Status transitions (backlog → fetching → processing → embedding → ready)
+- Metadata field updates (source_type, source_url, timestamps)
+- Integration with ProcessorFactory and DocumentProcessingService patterns
+- Error handling and retry logic
+"""
+
+from datetime import datetime, timezone
+from uuid import uuid4
+
+import pytest
+from sqlalchemy.ext.asyncio import AsyncSession
+from unittest.mock import AsyncMock, MagicMock, patch
+
+from ragitect.services.database.models import Document
+from ragitect.services.database.repositories.document_repo import DocumentRepository
+from ragitect.services.url_processing_service import URLProcessingService
+
+
+pytestmark = [pytest.mark.asyncio]
+
+
+@pytest.fixture
+def mock_document_repo(mocker):
+    """Mock DocumentRepository for testing"""
+    repo = mocker.Mock(spec=DocumentRepository)
+    repo.get_by_id_or_raise = mocker.AsyncMock()
+    repo.update_status = mocker.AsyncMock()
+    repo.update_metadata = mocker.AsyncMock()
+    repo.update_processed_content = mocker.AsyncMock()
+    repo.add_chunks = mocker.AsyncMock(return_value=[])
+    return repo
+
+
+@pytest.fixture
+def mock_session(mocker):
+    """Mock AsyncSession for testing"""
+    session = mocker.Mock(spec=AsyncSession)
+    session.commit = mocker.AsyncMock()
+    session.rollback = mocker.AsyncMock()
+    return session
+
+
+@pytest.fixture
+def url_processing_service(mock_session, mock_document_repo, mocker):
+    """Create URLProcessingService with mocked dependencies"""
+    service = URLProcessingService(mock_session)
+    # Replace repo with mock
+    mocker.patch.object(service, "repo", mock_document_repo)
+    return service
+
+
+@pytest.fixture
+def sample_url_document():
+    """Sample document in backlog status awaiting URL fetching"""
+    doc_id = uuid4()
+    return Document(
+        id=doc_id,
+        workspace_id=uuid4(),
+        file_name="[URL] example.com-article",
+        file_type="html",
+        content_hash="url_hash_123",
+        unique_identifier_hash="unique_url_123",
+        processed_content=None,
+        metadata_={
+            "status": "backlog",
+            "source_type": "url",
+            "source_url": "https://example.com/article",
+            "submitted_at": datetime.now(timezone.utc).isoformat(),
+        },
+    )
+
+
+@pytest.fixture
+def sample_youtube_document():
+    """Sample YouTube document in backlog status"""
+    doc_id = uuid4()
+    return Document(
+        id=doc_id,
+        workspace_id=uuid4(),
+        file_name="[YouTube] dQw4w9WgXcQ",
+        file_type="youtube",
+        content_hash="yt_hash_123",
+        unique_identifier_hash="unique_yt_123",
+        processed_content=None,
+        metadata_={
+            "status": "backlog",
+            "source_type": "youtube",
+            "source_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
+            "submitted_at": datetime.now(timezone.utc).isoformat(),
+        },
+    )
+
+
+@pytest.fixture
+def sample_pdf_url_document():
+    """Sample PDF URL document in backlog status"""
+    doc_id = uuid4()
+    return Document(
+        id=doc_id,
+        workspace_id=uuid4(),
+        file_name="paper.pdf",
+        file_type="pdf",
+        content_hash="pdf_hash_123",
+        unique_identifier_hash="unique_pdf_123",
+        processed_content=None,
+        metadata_={
+            "status": "backlog",
+            "source_type": "pdf",
+            "source_url": "https://arxiv.org/pdf/1706.03762.pdf",
+            "submitted_at": datetime.now(timezone.utc).isoformat(),
+        },
+    )
+
+
+class TestURLProcessingServiceStatusFlow:
+    """Test status progression through URL processing lifecycle (AC2, AC3)"""
+
+    async def test_status_updated_to_fetching_at_start(
+        self,
+        url_processing_service,
+        mock_document_repo,
+        mock_session,
+        sample_url_document,
+    ):
+        """Status should be updated to 'fetching' at the start of processing"""
+        # Arrange
+        mock_document_repo.get_by_id_or_raise.return_value = sample_url_document
+        mock_processor = AsyncMock()
+        mock_processor.process = AsyncMock(return_value="# Test Content\n\nBody")
+
+        with patch.object(
+            url_processing_service.processor_factory,
+            "get_processor",
+            return_value=mock_processor,
+        ):
+            with patch(
+                "ragitect.services.url_processing_service.split_document",
+                return_value=["Chunk 1"],
+            ):
+                with patch(
+                    "ragitect.services.url_processing_service.get_embedding_model_from_config",
+                    new=AsyncMock(return_value=MagicMock()),
+                ):
+                    with patch(
+                        "ragitect.services.url_processing_service.embed_documents",
+                        new=AsyncMock(return_value=[[0.1] * 768]),
+                    ):
+                        # Act
+                        await url_processing_service.process_url_document(
+                            sample_url_document.id,
+                            "https://example.com/article",
+                            "url",
+                        )
+
+        # Assert - verify first metadata update includes fetching status
+        update_calls = mock_document_repo.update_metadata.call_args_list
+        assert len(update_calls) >= 1
+
+        # First update should set status to "fetching"
+        first_metadata = update_calls[0][0][1]
+        assert first_metadata.get("status") == "fetching"
+        assert "fetch_started_at" in first_metadata
+
+    async def test_status_progresses_through_all_stages(
+        self,
+        url_processing_service,
+        mock_document_repo,
+        mock_session,
+        sample_url_document,
+    ):
+        """Status should progress: fetching → processing → embedding → ready"""
+        # Arrange
+        mock_document_repo.get_by_id_or_raise.return_value = sample_url_document
+        mock_processor = AsyncMock()
+        mock_processor.process = AsyncMock(return_value="# Markdown Content")
+
+        # Track status updates
+        statuses_seen = []
+
+        def track_metadata(doc_id, metadata):
+            if "status" in metadata:
+                statuses_seen.append(metadata["status"])
+            return sample_url_document
+
+        mock_document_repo.update_metadata.side_effect = track_metadata
+        mock_document_repo.update_status.side_effect = lambda doc_id, status: (
+            statuses_seen.append(status),
+            sample_url_document,
+        )[1]
+
+        with patch.object(
+            url_processing_service.processor_factory,
+            "get_processor",
+            return_value=mock_processor,
+        ):
+            with patch(
+                "ragitect.services.url_processing_service.split_document",
+                return_value=["Chunk"],
+            ):
+                with patch(
+                    "ragitect.services.url_processing_service.get_embedding_model_from_config",
+                    new=AsyncMock(return_value=MagicMock()),
+                ):
+                    with patch(
+                        "ragitect.services.url_processing_service.embed_documents",
+                        new=AsyncMock(return_value=[[0.1] * 768]),
+                    ):
+                        # Act
+                        await url_processing_service.process_url_document(
+                            sample_url_document.id,
+                            "https://example.com/article",
+                            "url",
+                        )
+
+        # Assert - verify proper status sequence
+        assert "fetching" in statuses_seen
+        assert "processing" in statuses_seen
+        assert "embedding" in statuses_seen
+        assert "ready" in statuses_seen
+
+        # Verify order: fetching before processing before embedding before ready
+        fetching_idx = statuses_seen.index("fetching")
+        processing_idx = statuses_seen.index("processing")
+        embedding_idx = statuses_seen.index("embedding")
+        ready_idx = statuses_seen.index("ready")
+
+        assert fetching_idx < processing_idx < embedding_idx < ready_idx
+
+    async def test_metadata_includes_fetch_timestamps(
+        self,
+        url_processing_service,
+        mock_document_repo,
+        mock_session,
+        sample_url_document,
+    ):
+        """Metadata should include fetch_started_at and fetch_completed_at timestamps"""
+        # Arrange
+        mock_document_repo.get_by_id_or_raise.return_value = sample_url_document
+        mock_processor = AsyncMock()
+        mock_processor.process = AsyncMock(return_value="# Content")
+
+        captured_metadata = []
+        mock_document_repo.update_metadata.side_effect = lambda doc_id, metadata: (
+            captured_metadata.append(metadata.copy()),
+            sample_url_document,
+        )[1]
+
+        with patch.object(
+            url_processing_service.processor_factory,
+            "get_processor",
+            return_value=mock_processor,
+        ):
+            with patch(
+                "ragitect.services.url_processing_service.split_document",
+                return_value=["Chunk"],
+            ):
+                with patch(
+                    "ragitect.services.url_processing_service.get_embedding_model_from_config",
+                    new=AsyncMock(return_value=MagicMock()),
+                ):
+                    with patch(
+                        "ragitect.services.url_processing_service.embed_documents",
+                        new=AsyncMock(return_value=[[0.1] * 768]),
+                    ):
+                        # Act
+                        await url_processing_service.process_url_document(
+                            sample_url_document.id,
+                            "https://example.com/article",
+                            "url",
+                        )
+
+        # Assert - check timestamps
+        all_metadata_keys = set()
+        for m in captured_metadata:
+            all_metadata_keys.update(m.keys())
+
+        assert "fetch_started_at" in all_metadata_keys
+        assert "fetch_completed_at" in all_metadata_keys
+
+
+class TestURLProcessingServiceProcessorIntegration:
+    """Test integration with ProcessorFactory and URL processors (AC2)"""
+
+    async def test_uses_correct_processor_for_web_url(
+        self,
+        url_processing_service,
+        mock_document_repo,
+        mock_session,
+        sample_url_document,
+    ):
+        """Should use WebURLProcessor for source_type='url'"""
+        # Arrange
+        mock_document_repo.get_by_id_or_raise.return_value = sample_url_document
+
+        with patch.object(
+            url_processing_service.processor_factory, "get_processor"
+        ) as mock_get_processor:
+            mock_processor = AsyncMock()
+            mock_processor.process = AsyncMock(return_value="# Content")
+            mock_get_processor.return_value = mock_processor
+
+            with patch(
+                "ragitect.services.url_processing_service.split_document",
+                return_value=["Chunk"],
+            ):
+                with patch(
+                    "ragitect.services.url_processing_service.get_embedding_model_from_config",
+                    new=AsyncMock(return_value=MagicMock()),
+                ):
+                    with patch(
+                        "ragitect.services.url_processing_service.embed_documents",
+                        new=AsyncMock(return_value=[[0.1] * 768]),
+                    ):
+                        # Act
+                        await url_processing_service.process_url_document(
+                            sample_url_document.id,
+                            "https://example.com/article",
+                            "url",
+                        )
+
+            # Assert
+            mock_get_processor.assert_called_once_with(
+                "https://example.com/article", "url"
+            )
+
+    async def test_uses_correct_processor_for_youtube(
+        self,
+        url_processing_service,
+        mock_document_repo,
+        mock_session,
+        sample_youtube_document,
+    ):
+        """Should use YouTubeProcessor for source_type='youtube'"""
+        # Arrange
+        mock_document_repo.get_by_id_or_raise.return_value = sample_youtube_document
+
+        with patch.object(
+            url_processing_service.processor_factory, "get_processor"
+        ) as mock_get_processor:
+            mock_processor = AsyncMock()
+            mock_processor.process = AsyncMock(return_value="# YouTube Transcript")
+            mock_get_processor.return_value = mock_processor
+
+            with patch(
+                "ragitect.services.url_processing_service.split_document",
+                return_value=["Chunk"],
+            ):
+                with patch(
+                    "ragitect.services.url_processing_service.get_embedding_model_from_config",
+                    new=AsyncMock(return_value=MagicMock()),
+                ):
+                    with patch(
+                        "ragitect.services.url_processing_service.embed_documents",
+                        new=AsyncMock(return_value=[[0.1] * 768]),
+                    ):
+                        # Act
+                        await url_processing_service.process_url_document(
+                            sample_youtube_document.id,
+                            "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
+                            "youtube",
+                        )
+
+            # Assert
+            mock_get_processor.assert_called_once_with(
+                "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "youtube"
+            )
+
+    async def test_uses_correct_processor_for_pdf_url(
+        self,
+        url_processing_service,
+        mock_document_repo,
+        mock_session,
+        sample_pdf_url_document,
+    ):
+        """Should use PDFURLProcessor for source_type='pdf'"""
+        # Arrange
+        mock_document_repo.get_by_id_or_raise.return_value = sample_pdf_url_document
+
+        with patch.object(
+            url_processing_service.processor_factory, "get_processor"
+        ) as mock_get_processor:
+            mock_processor = AsyncMock()
+            mock_processor.process = AsyncMock(return_value="# PDF Content")
+            mock_get_processor.return_value = mock_processor
+
+            with patch(
+                "ragitect.services.url_processing_service.split_document",
+                return_value=["Chunk"],
+            ):
+                with patch(
+                    "ragitect.services.url_processing_service.get_embedding_model_from_config",
+                    new=AsyncMock(return_value=MagicMock()),
+                ):
+                    with patch(
+                        "ragitect.services.url_processing_service.embed_documents",
+                        new=AsyncMock(return_value=[[0.1] * 768]),
+                    ):
+                        # Act
+                        await url_processing_service.process_url_document(
+                            sample_pdf_url_document.id,
+                            "https://arxiv.org/pdf/1706.03762.pdf",
+                            "pdf",
+                        )
+
+            # Assert
+            mock_get_processor.assert_called_once_with(
+                "https://arxiv.org/pdf/1706.03762.pdf", "pdf"
+            )
+
+
+class TestURLProcessingServiceContentStorage:
+    """Test that processor output is stored and chunked correctly (AC2)"""
+
+    async def test_processor_output_stored_in_document(
+        self,
+        url_processing_service,
+        mock_document_repo,
+        mock_session,
+        sample_url_document,
+    ):
+        """Processor Markdown output should be stored in document"""
+        # Arrange
+        mock_document_repo.get_by_id_or_raise.return_value = sample_url_document
+        expected_markdown = "# Article Title\n\nThis is the article content."
+
+        mock_processor = AsyncMock()
+        mock_processor.process = AsyncMock(return_value=expected_markdown)
+
+        with patch.object(
+            url_processing_service.processor_factory,
+            "get_processor",
+            return_value=mock_processor,
+        ):
+            with patch(
+                "ragitect.services.url_processing_service.split_document",
+                return_value=["Chunk 1", "Chunk 2"],
+            ):
+                with patch(
+                    "ragitect.services.url_processing_service.get_embedding_model_from_config",
+                    new=AsyncMock(return_value=MagicMock()),
+                ):
+                    with patch(
+                        "ragitect.services.url_processing_service.embed_documents",
+                        new=AsyncMock(return_value=[[0.1] * 768, [0.2] * 768]),
+                    ):
+                        # Act
+                        await url_processing_service.process_url_document(
+                            sample_url_document.id,
+                            "https://example.com/article",
+                            "url",
+                        )
+
+        # Assert
+        mock_document_repo.update_processed_content.assert_called_once_with(
+            sample_url_document.id, expected_markdown
+        )
+
+    async def test_chunks_stored_with_embeddings(
+        self,
+        url_processing_service,
+        mock_document_repo,
+        mock_session,
+        sample_url_document,
+    ):
+        """Chunks should be created and stored with embeddings"""
+        # Arrange
+        mock_document_repo.get_by_id_or_raise.return_value = sample_url_document
+        mock_processor = AsyncMock()
+        mock_processor.process = AsyncMock(return_value="# Content\n\nParagraph 1")
+
+        with patch.object(
+            url_processing_service.processor_factory,
+            "get_processor",
+            return_value=mock_processor,
+        ):
+            with patch(
+                "ragitect.services.url_processing_service.split_document",
+                return_value=["Chunk 1", "Chunk 2"],
+            ):
+                with patch(
+                    "ragitect.services.url_processing_service.get_embedding_model_from_config",
+                    new=AsyncMock(return_value=MagicMock()),
+                ):
+                    with patch(
+                        "ragitect.services.url_processing_service.embed_documents",
+                        new=AsyncMock(return_value=[[0.1] * 768, [0.2] * 768]),
+                    ):
+                        # Act
+                        await url_processing_service.process_url_document(
+                            sample_url_document.id,
+                            "https://example.com/article",
+                            "url",
+                        )
+
+        # Assert - chunks should be added
+        mock_document_repo.add_chunks.assert_called_once()
+        call_args = mock_document_repo.add_chunks.call_args[0]
+        assert call_args[0] == sample_url_document.id  # document_id
+        chunk_data = call_args[1]
+        assert len(chunk_data) == 2  # Two chunks
+
+
+class TestURLProcessingServiceErrorHandling:
+    """Test error state handling (AC6)"""
+
+    async def test_fetch_error_updates_status_to_error(
+        self,
+        url_processing_service,
+        mock_document_repo,
+        mock_session,
+        sample_url_document,
+    ):
+        """Fetch failure should update status to 'error' with message"""
+        # Arrange
+        mock_document_repo.get_by_id_or_raise.return_value = sample_url_document
+        mock_processor = AsyncMock()
+        mock_processor.process = AsyncMock(
+            side_effect=Exception("Connection timeout")
+        )
+
+        with patch.object(
+            url_processing_service.processor_factory,
+            "get_processor",
+            return_value=mock_processor,
+        ):
+            with patch(
+                "ragitect.services.url_processing_service.with_retry",
+                side_effect=Exception("Connection timeout"),
+            ):
+                # Act
+                await url_processing_service.process_url_document(
+                    sample_url_document.id,
+                    "https://example.com/article",
+                    "url",
+                )
+
+        # Assert - status should be updated to error
+        error_update_calls = [
+            call
+            for call in mock_document_repo.update_metadata.call_args_list
+            if call[0][1].get("status") == "error"
+        ]
+        assert len(error_update_calls) >= 1
+
+        # Error message should be in metadata
+        error_metadata = error_update_calls[-1][0][1]
+        assert "error_message" in error_metadata
+        assert len(error_metadata["error_message"]) > 0
+
+    async def test_error_message_is_user_friendly(
+        self,
+        url_processing_service,
+        mock_document_repo,
+        mock_session,
+        sample_url_document,
+    ):
+        """Error message should be user-friendly, not technical stack trace"""
+        # Arrange
+        mock_document_repo.get_by_id_or_raise.return_value = sample_url_document
+        mock_processor = AsyncMock()
+        # Simulate a timeout error
+        mock_processor.process = AsyncMock(
+            side_effect=Exception("httpx.TimeoutException: timeout of 30s exceeded")
+        )
+
+        with patch.object(
+            url_processing_service.processor_factory,
+            "get_processor",
+            return_value=mock_processor,
+        ):
+            with patch(
+                "ragitect.services.url_processing_service.with_retry",
+                side_effect=Exception("httpx.TimeoutException: timeout of 30s exceeded"),
+            ):
+                # Act
+                await url_processing_service.process_url_document(
+                    sample_url_document.id,
+                    "https://example.com/article",
+                    "url",
+                )
+
+        # Assert - error message should be concise and user-friendly
+        error_calls = [
+            call
+            for call in mock_document_repo.update_metadata.call_args_list
+            if call[0][1].get("status") == "error"
+        ]
+        if error_calls:
+            error_msg = error_calls[-1][0][1].get("error_message", "")
+            # Should not contain python traceback markers
+            assert "Traceback" not in error_msg
+            # Should be reasonably short (under 500 chars for user display)
+            assert len(error_msg) < 500
+
+
+class TestURLProcessingServiceConcurrency:
+    """Test concurrency limiting with semaphore (AC5)"""
+
+    async def test_semaphore_limits_concurrent_fetches(
+        self,
+        mock_session,
+        mocker,
+    ):
+        """Should limit concurrent URL fetches to 5"""
+        from ragitect.services.url_processing_service import _url_fetch_semaphore
+
+        # Verify semaphore is configured with limit of 5
+        assert _url_fetch_semaphore._value == 5
+
+    async def test_semaphore_acquired_and_released(
+        self,
+        url_processing_service,
+        mock_document_repo,
+        mock_session,
+        sample_url_document,
+    ):
+        """Semaphore should be acquired at start and released at end"""
+        from ragitect.services.url_processing_service import _url_fetch_semaphore
+
+        # Record initial semaphore value
+        initial_value = _url_fetch_semaphore._value
+
+        # Arrange
+        mock_document_repo.get_by_id_or_raise.return_value = sample_url_document
+        mock_processor = AsyncMock()
+        mock_processor.process = AsyncMock(return_value="# Content")
+
+        with patch.object(
+            url_processing_service.processor_factory,
+            "get_processor",
+            return_value=mock_processor,
+        ):
+            with patch("ragitect.services.url_processing_service.split_document", return_value=["Chunk"]):
+                with patch("ragitect.services.url_processing_service.get_embedding_model_from_config", new=AsyncMock(return_value=MagicMock())):
+                    with patch("ragitect.services.url_processing_service.embed_documents", new=AsyncMock(return_value=[[0.1] * 768])):
+                        # Act
+                        await url_processing_service.process_url_document(
+                            sample_url_document.id,
+                            "https://example.com/article",
+                            "url",
+                        )
+
+        # Assert - semaphore should be back to initial value
+        assert _url_fetch_semaphore._value == initial_value
+
+    async def test_semaphore_released_on_error(
+        self,
+        url_processing_service,
+        mock_document_repo,
+        mock_session,
+        sample_url_document,
+    ):
+        """Semaphore should be released even on error"""
+        from ragitect.services.url_processing_service import _url_fetch_semaphore
+
+        # Record initial semaphore value
+        initial_value = _url_fetch_semaphore._value
+
+        # Arrange
+        mock_document_repo.get_by_id_or_raise.return_value = sample_url_document
+
+        with patch.object(
+            url_processing_service.processor_factory,
+            "get_processor",
+            side_effect=Exception("Processor error"),
+        ):
+            # Act - should not raise (error handled internally)
+            await url_processing_service.process_url_document(
+                sample_url_document.id,
+                "https://example.com/article",
+                "url",
+            )
+
+        # Assert - semaphore should be back to initial value
+        assert _url_fetch_semaphore._value == initial_value
+
+    async def test_semaphore_used_via_context_manager(self, mocker):
+        """Verify semaphore is used via async context manager for safe release"""
+        # This test verifies implementation pattern
+        from ragitect.services import url_processing_service
+
+        # Read the source code to verify semaphore is used with async with
+        import inspect
+        source = inspect.getsource(url_processing_service.URLProcessingService.process_url_document)
+
+        # Verify the pattern "async with _url_fetch_semaphore:" is present
+        assert "async with _url_fetch_semaphore:" in source