diff --git a/scripts/__init__.py b/scripts/__init__.py
index 1c28744..5e59001 100644
--- a/scripts/__init__.py
+++ b/scripts/__init__.py
@@ -1,11 +1,30 @@
 """
 scripts package - AdGuard Home Blocklist Compiler
 
+A high-performance blocklist compiler that merges 80+ DNS blocklists into a single,
+deduplicated output optimized for AdGuard Home.
+
 Modules:
-    downloader: Download blocklists with ETag/Last-Modified caching
-    cleaner: Clean and validate rules for AGH compatibility
-    compiler: Modifier-aware deduplication
-    pipeline: Main processing pipeline
+    downloader: Async blocklist downloader with ETag/Last-Modified caching
+    cleaner: Rule filtering and validation for AdGuard Home compatibility
+    compiler: Format compression and modifier-aware deduplication engine
+    pipeline: Main processing pipeline orchestrator
+
+Example:
+    >>> from scripts.pipeline import process_files
+    >>> stats = process_files("lists/_raw", "lists/merged.txt")
 """
 
-__version__ = "1.0.0"
+from typing import Final
+
+__version__: Final[str] = "1.4.0"
+__author__: Final[str] = "MissionWAR"
+
+__all__ = [
+    "__version__",
+    "__author__",
+    "downloader",
+    "cleaner", 
+    "compiler",
+    "pipeline",
+]
diff --git a/scripts/cleaner.py b/scripts/cleaner.py
index 4e3d98b..6b878c1 100644
--- a/scripts/cleaner.py
+++ b/scripts/cleaner.py
@@ -5,7 +5,7 @@
 This module filters blocklist rules to keep only those compatible with AdGuard Home.
 It's the first stage of the pipeline, running BEFORE the compiler.
 
-CRITICAL UNDERSTANDING - DNS vs Browser Blocking:
+Critical Understanding - DNS vs Browser Blocking:
     AdGuard Home is a DNS-level blocker, NOT a browser extension. This means:
     
     - DNS only sees domain names, not URLs, request types, or page content
@@ -19,13 +19,13 @@
     If we stripped the modifiers, we'd get ||ads.example.com^ which blocks EVERYTHING
     from that domain - a much more aggressive rule than intended! This could break sites.
 
-DESIGN DECISION - Discard, Don't Strip:
+Design Decision - Discard, Don't Strip:
     Rules with unsupported modifiers are COMPLETELY DISCARDED, not stripped.
     This prevents false positives and unexpected site breakage.
     
     A smaller, more accurate blocklist is better than a larger, overly-aggressive one.
 
-KEY OPERATIONS:
+Key Operations:
     1. Remove comments (# and ! lines)
     2. Discard cosmetic/element-hiding rules (##, #@#, #$#, etc.)
     3. Discard rules with browser-only modifiers
@@ -33,86 +33,181 @@
 """
 
 import re
-from typing import NamedTuple
-
-
-# ============================================================================
-# MODIFIER DEFINITIONS (based on official AdGuard DNS filtering syntax docs)
-# ============================================================================
-
-# Modifiers that are browser-only and NOT supported by AGH
-# If a rule contains ANY of these, the ENTIRE RULE should be discarded
-UNSUPPORTED_MODIFIERS = frozenset({
-    # Content type modifiers (browser-only)
-    "script", "image", "stylesheet", "font", "media", "object",
-    "subdocument", "xmlhttprequest", "websocket", "webrtc",
-    "ping", "other",
+from typing import Final, NamedTuple, TypedDict
+
+
+# =============================================================================
+# MODIFIER DEFINITIONS
+# =============================================================================
+# Based on official AdGuard DNS filtering syntax documentation.
+# https://adguard-dns.io/kb/general/dns-filtering-syntax/
+
+# Modifiers that are browser-only and NOT supported by AdGuard Home.
+# If a rule contains ANY of these, the ENTIRE RULE should be discarded.
+#
+# Grouped by category for easier maintenance:
+UNSUPPORTED_MODIFIERS: Final[frozenset[str]] = frozenset({
+    # -------------------------------------------------------------------------
+    # Content type modifiers (browser-only, DNS can't see content types)
+    # -------------------------------------------------------------------------
+    "script",         # JavaScript files
+    "image",          # Images (png, jpg, etc.)
+    "stylesheet",     # CSS files
+    "font",           # Web fonts
+    "media",          # Audio/video content
+    "object",         # Flash/plugins (legacy)
+    "subdocument",    # Iframes
+    "xmlhttprequest", # AJAX requests
+    "websocket",      # WebSocket connections
+    "webrtc",         # WebRTC connections
+    "ping",           # Navigator.sendBeacon()
+    "other",          # Other content types
+    
+    # -------------------------------------------------------------------------
     # Shorthand content types
-    "css", "js",
-    # Third-party/first-party
-    "third-party", "3p", "first-party", "1p",
-    # Document modifiers
-    "document", "doc", "popup", "all",
-    # Network/redirect modifiers
-    "network", "redirect", "redirect-rule", "empty", "mp4",
-    # Request modification
-    "csp", "permissions", "header", "removeparam", "removeheader",
-    "replace", "hls", "jsonprune",
-    # Exception modifiers
-    "genericblock", "generichide", "elemhide", "specifichide",
-    "jsinject", "urlblock", "content", "extension",
-    # Domain restriction (would make rule domain-specific, not useful for DNS-wide)
-    "domain",
-    # Matching modifiers
-    "match-case", "strict-first-party", "strict-third-party",
-    # Stealth mode
-    "stealth",
-    # App-specific
-    "app",
-    # Method restrictions (browser-only)
-    "method",
+    # -------------------------------------------------------------------------
+    "css",  # Alias for stylesheet
+    "js",   # Alias for script
+    
+    # -------------------------------------------------------------------------
+    # Third-party/first-party (requires page context)
+    # -------------------------------------------------------------------------
+    "third-party",  # Requests from different domain
+    "3p",           # Shorthand for third-party
+    "first-party",  # Requests from same domain
+    "1p",           # Shorthand for first-party
+    
+    # -------------------------------------------------------------------------
+    # Document modifiers (page-level blocking)
+    # -------------------------------------------------------------------------
+    "document",  # Block entire document
+    "doc",       # Alias for document
+    "popup",     # Block popups
+    "all",       # Match all content types
+    
+    # -------------------------------------------------------------------------
+    # Network/redirect modifiers (require HTTP-level access)
+    # -------------------------------------------------------------------------
+    "network",       # Network requests
+    "redirect",      # Redirect to resource
+    "redirect-rule", # Conditional redirect
+    "empty",         # Return empty response
+    "mp4",           # Return empty MP4
+    
+    # -------------------------------------------------------------------------
+    # Request modification (HTTP header manipulation)
+    # -------------------------------------------------------------------------
+    "csp",          # Content Security Policy injection
+    "permissions",  # Permissions Policy injection
+    "header",       # HTTP header modification
+    "removeparam",  # Remove URL parameters
+    "removeheader", # Remove HTTP headers
+    "replace",      # Replace response content
+    "hls",          # HLS playlist modification
+    "jsonprune",    # JSON response modification
+    
+    # -------------------------------------------------------------------------
+    # Exception modifiers (browser extension exceptions)
+    # -------------------------------------------------------------------------
+    "genericblock",  # Disable generic blocking
+    "generichide",   # Disable generic hiding
+    "elemhide",      # Disable element hiding
+    "specifichide",  # Disable specific hiding
+    "jsinject",      # Disable JS injection
+    "urlblock",      # Disable URL blocking
+    "content",       # Disable content blocking
+    "extension",     # Disable extension rules
+    
+    # -------------------------------------------------------------------------
+    # Domain restriction (page-level, not useful for DNS-wide blocking)
+    # -------------------------------------------------------------------------
+    "domain",  # Only apply on specific domains
+    
+    # -------------------------------------------------------------------------
+    # Matching modifiers (case sensitivity, strict party)
+    # -------------------------------------------------------------------------
+    "match-case",           # Case-sensitive matching
+    "strict-first-party",   # Strict first-party check
+    "strict-third-party",   # Strict third-party check
+    
+    # -------------------------------------------------------------------------
+    # Other browser-only features
+    # -------------------------------------------------------------------------
+    "stealth",  # Stealth mode settings
+    "app",      # App-specific rules
+    "method",   # HTTP method restrictions
 })
 
 
-# ============================================================================
+# =============================================================================
 # REGEX PATTERNS
-# ============================================================================
+# =============================================================================
 
-# Cosmetic/element-hiding rule patterns (DISCARD entirely)
-# These include: ## #@# #?# #$# #$?# #@?# #@$# etc.
-COSMETIC_PATTERN = re.compile(
+#: Cosmetic/element-hiding rule patterns (DISCARD entirely)
+#: These include: ## #@# #?# #$# #$?# #@?# #@$# etc.
+COSMETIC_PATTERN: Final[re.Pattern[str]] = re.compile(
     r"#[@$?%]*#|"            # Standard element hiding: ## #@# #?# #$# etc.
     r"#[@$?%]*\?#|"          # Extended CSS: #?# #@?# etc.
     r"\$#|"                  # Snippet injection: $#
     r"#%#|"                  # Scriptlet injection: #%#
-    r"\[adblock",            # Adblock header
+    r"\[adblock",            # Adblock header: [Adblock Plus ...]
     re.IGNORECASE
 )
 
-# Pattern to detect if a line is likely a comment
-COMMENT_PATTERN = re.compile(r"^\s*[#!]")
+#: Pattern to detect if a line is a comment (starts with # or !)
+COMMENT_PATTERN: Final[re.Pattern[str]] = re.compile(r"^\s*[#!]")
 
-# Trailing inline comment: match "# comment" preceded by whitespace
-TRAILING_COMMENT_PATTERN = re.compile(r"\s+#\s+.*$")
+#: Trailing inline comment: match " # comment" (space before #)
+#: Example: "||example.com^ # block ads" → "||example.com^"
+TRAILING_COMMENT_PATTERN: Final[re.Pattern[str]] = re.compile(r"\s+#\s+.*$")
 
-# Pattern to extract modifier section from ABP rule
-# Matches: $modifier1,modifier2,... at end of rule
-MODIFIER_PATTERN = re.compile(r"\$([^$]+)$")
+#: Pattern to extract modifier section from ABP rule
+#: Matches: $modifier1,modifier2,... at end of rule
+MODIFIER_PATTERN: Final[re.Pattern[str]] = re.compile(r"\$([^$]+)$")
 
 
-# ============================================================================
+# =============================================================================
 # DATA STRUCTURES
-# ============================================================================
+# =============================================================================
 
 class CleanResult(NamedTuple):
-    """Result of cleaning a single line."""
-    line: str | None       # Cleaned line, or None if discarded
-    discarded: bool        # True if line was discarded
-    reason: str | None     # Reason for discard (for logging)
+    """
+    Result of cleaning a single line.
+    
+    Attributes:
+        line: Cleaned line, or None if discarded
+        discarded: True if line was discarded
+        reason: Reason for discard (for logging/stats), or None if kept
+        
+    Example:
+        >>> result = CleanResult("||example.com^", False, None)
+        >>> result.discarded
+        False
+    """
+    line: str | None
+    discarded: bool
+    reason: str | None
 
 
 class CleanStats(NamedTuple):
-    """Statistics from cleaning operation."""
+    """
+    Statistics from cleaning operation.
+    
+    Attributes:
+        total_lines: Total lines processed
+        kept_lines: Lines kept after cleaning
+        comments_removed: Comment lines removed
+        cosmetic_removed: Cosmetic/element-hiding rules removed
+        unsupported_modifier_removed: Rules with unsupported modifiers removed
+        empty_removed: Empty lines removed
+        invalid_removed: Invalid/malformed lines removed
+        trimmed: Lines that had whitespace trimmed
+        
+    Example:
+        >>> stats = CleanStats(100, 80, 10, 5, 3, 2, 0, 15)
+        >>> stats.kept_lines
+        80
+    """
     total_lines: int
     kept_lines: int
     comments_removed: int
@@ -120,15 +215,43 @@ class CleanStats(NamedTuple):
     unsupported_modifier_removed: int
     empty_removed: int
     invalid_removed: int
-    trimmed: int  # Lines that had whitespace trimmed
+    trimmed: int
 
 
-# ============================================================================
+class CleanStatsDict(TypedDict):
+    """TypedDict for internal stats tracking with type safety."""
+    total: int
+    kept: int
+    comments: int
+    cosmetic: int
+    unsupported_modifier: int
+    empty: int
+    invalid: int
+    trimmed: int
+
+
+# =============================================================================
 # CLEANING FUNCTIONS
-# ============================================================================
+# =============================================================================
 
 def is_comment(line: str) -> bool:
-    """Check if line is a comment (starts with # or !)."""
+    """
+    Check if line is a comment (starts with # or !).
+    
+    Args:
+        line: The line to check
+        
+    Returns:
+        True if the line is a comment
+        
+    Example:
+        >>> is_comment("# This is a comment")
+        True
+        >>> is_comment("! Another comment style")
+        True
+        >>> is_comment("||example.com^")
+        False
+    """
     return bool(COMMENT_PATTERN.match(line))
 
 
@@ -138,6 +261,18 @@ def is_cosmetic_rule(line: str) -> bool:
     
     These rules can't be processed by AdGuard Home (DNS-level blocker)
     and should be completely discarded.
+    
+    Args:
+        line: The line to check
+        
+    Returns:
+        True if the line is a cosmetic rule
+        
+    Example:
+        >>> is_cosmetic_rule("example.com##.ad-banner")
+        True
+        >>> is_cosmetic_rule("||example.com^")
+        False
     """
     return bool(COSMETIC_PATTERN.search(line))
 
@@ -146,10 +281,20 @@ def strip_trailing_comment(line: str) -> str:
     """
     Remove trailing inline comments to reduce file size.
     
-    Example:
-        "||example.com^ # block ads"  ->  "||example.com^"
+    Only strips comments that are preceded by whitespace, to avoid
+    accidentally stripping URL fragments or modifier values.
     
-    Be careful not to strip URL fragments or rule modifiers.
+    Args:
+        line: The line to process
+        
+    Returns:
+        Line with trailing comment removed
+        
+    Example:
+        >>> strip_trailing_comment("||example.com^ # block ads")
+        '||example.com^'
+        >>> strip_trailing_comment("||example.com^#fragment")  # No space, kept
+        '||example.com^#fragment'
     """
     # Don't process lines that might have # in modifiers
     if "$" in line and "#" in line.split("$")[-1]:
@@ -166,17 +311,28 @@ def extract_modifiers(rule: str) -> set[str]:
     """
     Extract modifier names from an ABP-style rule.
     
+    Handles modifiers with values (key=value) and negation (~modifier).
+    
+    Args:
+        rule: The ABP rule to parse
+        
+    Returns:
+        Set of modifier names (lowercase, without ~ prefix or =value suffix)
+        
     Example:
-        "||example.com^$script,third-party" -> {"script", "third-party"}
-        "||example.com^$important" -> {"important"}
-        "||example.com^" -> set()
+        >>> extract_modifiers("||example.com^$script,third-party")
+        {'script', 'third-party'}
+        >>> extract_modifiers("||example.com^$important")
+        {'important'}
+        >>> extract_modifiers("||example.com^")
+        set()
     """
     match = MODIFIER_PATTERN.search(rule)
     if not match:
         return set()
     
     modifier_string = match.group(1)
-    modifiers = set()
+    modifiers: set[str] = set()
     
     for part in modifier_string.split(","):
         # Handle modifiers with values: client=192.168.1.1, dnsrewrite=example.com
@@ -196,6 +352,18 @@ def has_unsupported_modifiers(modifiers: set[str]) -> bool:
     
     If ANY unsupported modifier is found, the rule should be DISCARDED
     (not stripped) to avoid false positives and breakage.
+    
+    Args:
+        modifiers: Set of modifier names to check
+        
+    Returns:
+        True if any modifier is unsupported
+        
+    Example:
+        >>> has_unsupported_modifiers({'script', 'important'})
+        True  # 'script' is unsupported
+        >>> has_unsupported_modifiers({'important', 'client'})
+        False  # Both are supported
     """
     return bool(modifiers & UNSUPPORTED_MODIFIERS)
 
@@ -204,8 +372,23 @@ def clean_line(line: str) -> tuple[CleanResult, bool]:
     """
     Clean a single rule line.
     
+    Performs all cleaning operations: strip whitespace, remove comments,
+    discard cosmetic rules, and check for unsupported modifiers.
+    
+    Args:
+        line: The raw line to clean
+        
     Returns:
-        (CleanResult, was_trimmed) - was_trimmed is True if whitespace was removed
+        Tuple of (CleanResult, was_trimmed):
+        - CleanResult: The cleaning result with line/discarded/reason
+        - was_trimmed: True if whitespace was removed from the line
+        
+    Example:
+        >>> result, trimmed = clean_line("  ||example.com^  ")
+        >>> result.line
+        '||example.com^'
+        >>> trimmed
+        True
     """
     original = line
     
@@ -233,10 +416,11 @@ def clean_line(line: str) -> tuple[CleanResult, bool]:
         was_trimmed = True
     
     # For ABP-style rules, check modifiers
-    if line.startswith("||") or line.startswith("@@||"):
+    # Use tuple form for single startswith call (faster than OR)
+    if line.startswith(("||", "@@||")):
         modifiers = extract_modifiers(line)
         if modifiers and has_unsupported_modifiers(modifiers):
-            # DISCARD entire rule (as per user's requirement)
+            # DISCARD entire rule (as per design decision)
             # This prevents false positives like blocking google.com when
             # the original rule was "$third-party" (third-party connections only)
             return CleanResult(None, True, "unsupported_modifier"), False
@@ -256,11 +440,26 @@ def clean_lines(lines: list[str]) -> tuple[list[str], CleanStats]:
     """
     Clean a list of lines.
     
+    Processes each line through the cleaning pipeline and collects statistics.
+    
+    Args:
+        lines: List of raw lines to clean
+        
     Returns:
-        (cleaned_lines, stats)
+        Tuple of (cleaned_lines, stats):
+        - cleaned_lines: List of valid, cleaned lines
+        - stats: CleanStats with counts of removed line types
+        
+    Example:
+        >>> lines = ["# comment", "||example.com^", "bad.com##.ad"]
+        >>> cleaned, stats = clean_lines(lines)
+        >>> len(cleaned)
+        1
+        >>> stats.comments_removed
+        1
     """
-    cleaned = []
-    stats = {
+    cleaned: list[str] = []
+    stats: CleanStatsDict = {
         "total": 0,
         "kept": 0,
         "comments": 0,
@@ -290,7 +489,7 @@ def clean_lines(lines: list[str]) -> tuple[list[str], CleanStats]:
             else:
                 stats["invalid"] += 1
         else:
-            cleaned.append(result.line)
+            cleaned.append(result.line)  # type: ignore[arg-type]
             stats["kept"] += 1
     
     return cleaned, CleanStats(
@@ -309,12 +508,18 @@ def clean_file(input_path: str, output_path: str | None = None) -> CleanStats:
     """
     Clean a single file.
     
+    Reads the input file, cleans all lines, and writes to output.
+    
     Args:
         input_path: Path to input file
         output_path: Path to output file (defaults to in-place modification)
-    
+        
     Returns:
-        CleanStats
+        CleanStats with counts of removed line types
+        
+    Example:
+        >>> stats = clean_file("raw.txt", "cleaned.txt")
+        >>> print(f"Kept {stats.kept_lines} of {stats.total_lines} lines")
     """
     from pathlib import Path
     
@@ -334,9 +539,9 @@ def clean_file(input_path: str, output_path: str | None = None) -> CleanStats:
     return stats
 
 
-# ============================================================================
-# MAIN
-# ============================================================================
+# =============================================================================
+# CLI INTERFACE
+# =============================================================================
 
 if __name__ == "__main__":
     import sys
diff --git a/scripts/compiler.py b/scripts/compiler.py
index 7dce313..c1cfcd4 100644
--- a/scripts/compiler.py
+++ b/scripts/compiler.py
@@ -5,14 +5,14 @@
 This module is the core of the blocklist merging pipeline. It takes cleaned rules
 from multiple blocklists and produces a minimal, deduplicated output file.
 
-DESIGN GOALS:
+Core Goals (in priority order):
     1. Maximum blocking coverage - Every domain that should be blocked, IS blocked
     2. Minimum rule count - Smaller lists = faster loading, less memory in AdGuard Home
     3. Only output blocking rules - No whitelist/exception rules (@@) in output
 
-KEY INSIGHT - FORMAT COMPRESSION:
+Key Insight - Format Compression:
     Instead of handling hosts, plain domains, and ABP rules separately, we CONVERT
-    everything to ABP format during parsing:
+    everything to ABP format during parsing::
     
         0.0.0.0 ads.example.com  →  ||ads.example.com^
         ads.example.com          →  ||ads.example.com^
@@ -22,87 +22,164 @@
     If we have ||example.com^, then ||sub.example.com^ becomes redundant regardless
     of whether it came from a hosts file or an ABP list.
 
-MODIFIER-AWARE PRUNING:
+Modifier-Aware Pruning:
     Not all subdomain rules can be pruned! AdGuard Home modifiers change behavior:
     
-    - $important    → Child with $important must NOT be pruned by parent without it
-    - $badfilter    → Never prune by a $badfilter parent (it disables rules, not blocks)
-    - $dnsrewrite   → Never prune (has custom DNS response behavior)
-    - $denyallow    → Never prune (excludes specific domains)
-    - $dnstype      → Only prune if parent blocks ALL types
-    - $client/$ctag → Parent with restrictions can't prune unrestricted child
-
-WHITELIST HANDLING:
+    ============  ================================================================
+    Modifier      Behavior
+    ============  ================================================================
+    $important    Child with $important must NOT be pruned by parent without it
+    $badfilter    Never prune by a $badfilter parent (it disables rules, not blocks)
+    $dnsrewrite   Never prune (has custom DNS response behavior)
+    $denyallow    Never prune (excludes specific domains)
+    $dnstype      Only prune if parent blocks ALL types
+    $client/$ctag Parent with restrictions can't prune unrestricted child
+    ============  ================================================================
+
+Whitelist Handling:
     @@rules (whitelist/exception rules) are used ONLY to remove conflicting blocking
     rules. The @@rules themselves are NOT output. This keeps the output file simple.
-
-See docs/LOGIC.md for detailed examples of each pruning rule.
 """
 
 import re
 from dataclasses import dataclass
 from functools import lru_cache
 from pathlib import Path
+from sys import intern
+from typing import Final
 
 import tldextract
 
-# Pre-configure tldextract for better performance (no updates check)
+# =============================================================================
+# TYPE ALIASES
+# =============================================================================
+# These make complex type signatures more readable throughout the codebase.
+
+#: A parsed ABP rule entry: (original_rule, modifiers_frozenset, is_wildcard)
+RuleEntry = tuple[str, frozenset[str], bool]
+
+#: A TLD wildcard entry: (original_rule, modifiers_frozenset)
+WildcardEntry = tuple[str, frozenset[str]]
+
+# =============================================================================
+# CONFIGURATION CONSTANTS
+# =============================================================================
+# Named constants improve readability and make tuning easier.
+
+#: LRU cache size for domain extraction (covers most unique domains in a run)
+LRU_CACHE_SIZE: Final[int] = 65536
+
+#: Pre-allocated empty frozenset to avoid repeated allocations
+EMPTY_FROZENSET: Final[frozenset[str]] = frozenset()
+
+# Pre-configure tldextract for better performance (no online updates check)
 _tld_extract = tldextract.TLDExtract(suffix_list_urls=None)
 
-# ============================================================================
+# =============================================================================
 # REGEX PATTERNS
-# ============================================================================
+# =============================================================================
+# Pre-compiled patterns for performance. Each pattern is documented with
+# examples of what it matches.
 
-# ABP pattern: ||[*.]domain^ (including IP addresses)
-ABP_DOMAIN_PATTERN = re.compile(
+#: ABP pattern: ||[*.]domain^ (including IP addresses)
+#: Examples: ||example.com^, ||*.example.com^, @@||example.com^$important
+ABP_DOMAIN_PATTERN: Final[re.Pattern[str]] = re.compile(
     r"^(@@)?\|\|"              # Start: || or @@|| (group 1: exception marker)
     r"(\*\.)?"                 # Optional *. wildcard (group 2)
-    r"([^^\$|*\s]+)"           # Domain/IP (group 3)
+    r"([^^$|*\s]+)"            # Domain/IP (group 3)
     r"\^"                      # Separator
 )
 
-# Hosts format: IP domain [domain2 ...]
-HOSTS_PATTERN = re.compile(
+#: Hosts format: IP domain [domain2 ...]
+#: Examples: 0.0.0.0 example.com, 127.0.0.1 ads.example.com tracking.example.com
+HOSTS_PATTERN: Final[re.Pattern[str]] = re.compile(
     r"^([\d.:a-fA-F]+)\s+"   # IP address (IPv4 or IPv6)
     r"(.+)$"                 # Rest of line (domains)
 )
 
-# Valid domain/IP for hosts
-HOSTS_DOMAIN_PATTERN = re.compile(r"^[a-zA-Z0-9][\w.-]*$")
+#: Valid domain/IP for hosts file entries
+#: Examples: example.com, sub.example.com, my-domain.co.uk
+HOSTS_DOMAIN_PATTERN: Final[re.Pattern[str]] = re.compile(
+    r"^[a-zA-Z0-9][\w.-]*$"
+)
 
-# Plain domain (simple domain name, no special chars except . and -)
-PLAIN_DOMAIN_PATTERN = re.compile(
+#: Plain domain (simple domain name, no special chars except . and -)
+#: Examples: example.com, sub.example.com (NOT: ||example.com^, 0.0.0.0 example.com)
+PLAIN_DOMAIN_PATTERN: Final[re.Pattern[str]] = re.compile(
     r"^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?"
     r"(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
 )
 
-# Local/blocking IPs recognized in hosts format
-BLOCKING_IPS = frozenset({
+# =============================================================================
+# DOMAIN CONSTANTS
+# =============================================================================
+
+#: Local/blocking IPs recognized in hosts format
+#: These indicate the entry is meant to block the domain, not redirect it.
+BLOCKING_IPS: Final[frozenset[str]] = frozenset({
     "0.0.0.0", "127.0.0.1", "::1", "::0", "::",
     "0:0:0:0:0:0:0:0", "0:0:0:0:0:0:0:1",
 })
 
-# Local hostnames to skip
-LOCAL_HOSTNAMES = frozenset({
+#: Local hostnames to skip (these appear in hosts files but shouldn't be blocked)
+LOCAL_HOSTNAMES: Final[frozenset[str]] = frozenset({
     "localhost", "localhost.localdomain", "local", "broadcasthost",
     "ip6-localhost", "ip6-loopback", "ip6-localnet",
     "ip6-mcastprefix", "ip6-allnodes", "ip6-allrouters", "ip6-allhosts",
 })
 
-# Modifiers with special behavior that should never be pruned
-SPECIAL_BEHAVIOR_MODIFIERS = frozenset({"badfilter", "dnsrewrite", "denyallow"})
+# =============================================================================
+# MODIFIER CONSTANTS
+# =============================================================================
 
-# Modifiers that restrict who is blocked
-CLIENT_RESTRICTION_MODIFIERS = frozenset({"client", "ctag"})
+#: Modifiers with special behavior that should never be pruned.
+#: These modifiers have effects that can't be covered by a parent rule.
+SPECIAL_BEHAVIOR_MODIFIERS: Final[frozenset[str]] = frozenset({
+    "badfilter",   # Disables other rules (meta-modifier)
+    "dnsrewrite",  # Custom DNS response (e.g., redirect to specific IP)
+    "denyallow",   # Excludes specific domains from blocking
+})
+
+#: Modifiers that restrict who is blocked (client-specific rules).
+#: A parent with these can't prune a child without them.
+CLIENT_RESTRICTION_MODIFIERS: Final[frozenset[str]] = frozenset({
+    "client",  # Block only for specific client IP
+    "ctag",    # Block only for specific client tag
+})
 
 
-# ============================================================================
+# =============================================================================
 # DATA STRUCTURES
-# ============================================================================
+# =============================================================================
 
 @dataclass(slots=True)
 class CompileStats:
-    """Statistics from compilation."""
+    """
+    Statistics from the compilation process.
+    
+    This dataclass tracks all metrics during rule compilation, 
+    providing insight into how many rules were kept, pruned, or transformed.
+    
+    Attributes:
+        total_input: Total number of input lines processed
+        total_output: Total number of rules written to output
+        abp_kept: ABP-style rules kept in output
+        other_kept: Other rules (regex, etc.) kept in output
+        abp_subdomain_pruned: Subdomain rules pruned by parent rules
+        tld_wildcard_pruned: Rules pruned by TLD wildcards (e.g., ||*.autos^)
+        duplicate_pruned: Exact duplicate rules removed
+        whitelist_conflict_pruned: Rules removed due to whitelist conflicts
+        local_hostname_pruned: Local hostnames (localhost, etc.) skipped
+        formats_compressed: Hosts/plain domains converted to ABP format
+        malformed_discarded: Malformed rules (e.g., ||^) discarded
+    
+    Example:
+        >>> stats = CompileStats()
+        >>> stats.total_input = 1000
+        >>> stats.abp_kept = 500
+        >>> print(f"Kept {stats.abp_kept} of {stats.total_input}")
+        Kept 500 of 1000
+    """
     total_input: int = 0
     total_output: int = 0
     
@@ -110,61 +187,108 @@ class CompileStats:
     abp_kept: int = 0
     other_kept: int = 0
     
-    # Pruning
+    # Pruning counts
     abp_subdomain_pruned: int = 0
     tld_wildcard_pruned: int = 0
     duplicate_pruned: int = 0
     whitelist_conflict_pruned: int = 0
     local_hostname_pruned: int = 0
-    formats_compressed: int = 0  # Hosts/plain domains converted to ABP format
-    malformed_discarded: int = 0  # Malformed rules (e.g., ||^) discarded
+    formats_compressed: int = 0
+    malformed_discarded: int = 0
 
 
-# ============================================================================
+# =============================================================================
 # HELPER FUNCTIONS
-# ============================================================================
+# =============================================================================
 
 def normalize_domain(domain: str) -> str:
-    """Normalize domain to lowercase, stripped."""
-    return domain.lower().strip().rstrip(".")
+    """
+    Normalize a domain to lowercase, stripped of whitespace and trailing dots.
+    
+    Uses sys.intern() to deduplicate domain strings in memory, which also
+    speeds up dictionary lookups (pointer comparison vs string comparison).
+    
+    Args:
+        domain: The domain string to normalize
+        
+    Returns:
+        Normalized and interned domain string
+        
+    Example:
+        >>> normalize_domain("  Example.COM.  ")
+        'example.com'
+    """
+    return intern(domain.lower().strip().rstrip("."))
 
 
-def extract_abp_info(rule: str) -> tuple[str | None, frozenset, bool, bool]:
+def extract_abp_info(rule: str) -> tuple[str | None, frozenset[str], bool, bool]:
     """
     Extract domain, modifiers, exception status, and wildcard status from ABP rule.
     
+    Args:
+        rule: An ABP-style rule string
+        
     Returns:
-        (domain, modifiers, is_exception, is_wildcard)
+        A tuple of (domain, modifiers, is_exception, is_wildcard):
+        - domain: The extracted domain, or None if parsing failed
+        - modifiers: Frozenset of modifier names (lowercase)
+        - is_exception: True if this is a whitelist rule (@@)
+        - is_wildcard: True if this is a wildcard rule (||*.domain^)
+        
+    Example:
+        >>> extract_abp_info("||example.com^$important")
+        ('example.com', frozenset({'important'}), False, False)
+        >>> extract_abp_info("@@||*.example.com^")
+        ('example.com', frozenset(), True, True)
     """
     match = ABP_DOMAIN_PATTERN.match(rule)
     if not match:
-        return None, frozenset(), False, False
+        return None, EMPTY_FROZENSET, False, False
     
     # Groups: (1) @@ exception, (2) *. wildcard, (3) domain
     is_exception = match.group(1) is not None
     is_wildcard = match.group(2) is not None
     domain = normalize_domain(match.group(3))
     
-    # Extract modifiers
-    modifiers = set()
-    if "$" in rule:
-        mod_part = rule.split("$", 1)[1]
-        for mod in mod_part.split(","):
-            mod_name = mod.split("=")[0].strip().lower()
-            if mod_name.startswith("~"):
-                mod_name = mod_name[1:]
-            if mod_name:
-                modifiers.add(mod_name)
+    # Extract modifiers from $modifier1,modifier2,...
+    # Fast path: no $ means no modifiers (common case)
+    if "$" not in rule:
+        return domain, EMPTY_FROZENSET, is_exception, is_wildcard
     
-    return domain, frozenset(modifiers), is_exception, is_wildcard
+    modifiers: set[str] = set()
+    mod_part = rule.split("$", 1)[1]
+    for mod in mod_part.split(","):
+        mod_name = mod.split("=")[0].strip().lower()
+        # Handle negation prefix (e.g., ~third-party)
+        if mod_name.startswith("~"):
+            mod_name = mod_name[1:]
+        if mod_name:
+            modifiers.add(mod_name)
+    
+    return domain, frozenset(modifiers) if modifiers else EMPTY_FROZENSET, is_exception, is_wildcard
 
 
 def extract_hosts_info(rule: str) -> tuple[str | None, list[str]]:
     """
     Extract IP and domains from hosts-style rule.
     
+    Args:
+        rule: A hosts-style rule string (e.g., "0.0.0.0 example.com")
+        
     Returns:
-        (ip, [domains]) or (None, []) if not valid hosts
+        A tuple of (ip, domains):
+        - ip: The IP address, or None if not a valid hosts rule
+        - domains: List of domain names (may be empty)
+        
+    Note:
+        Only "blocking" IPs (0.0.0.0, 127.0.0.1, etc.) are recognized.
+        Real IPs like 8.8.8.8 are ignored as they indicate redirects, not blocks.
+        
+    Example:
+        >>> extract_hosts_info("0.0.0.0 example.com ads.example.com")
+        ('0.0.0.0', ['example.com', 'ads.example.com'])
+        >>> extract_hosts_info("8.8.8.8 dns.google")  # Real IP, not blocking
+        (None, [])
     """
     match = HOSTS_PATTERN.match(rule)
     if not match:
@@ -173,11 +297,11 @@ def extract_hosts_info(rule: str) -> tuple[str | None, list[str]]:
     ip = match.group(1)
     rest = match.group(2)
     
-    # Only process blocking IPs
+    # Only process blocking IPs (0.0.0.0, 127.x.x.x, ::, etc.)
     if ip not in BLOCKING_IPS and not ip.startswith("0.") and not ip.startswith("127."):
         return None, []
     
-    domains = []
+    domains: list[str] = []
     for part in rest.split():
         # Stop at comments
         if part.startswith("#"):
@@ -190,35 +314,89 @@ def extract_hosts_info(rule: str) -> tuple[str | None, list[str]]:
     return ip, domains
 
 
-@lru_cache(maxsize=65536)
+@lru_cache(maxsize=LRU_CACHE_SIZE)
 def _extract_domain_parts(domain: str) -> tuple[str, str, str]:
-    """Cached tldextract extraction. Returns (subdomain, domain, suffix)."""
+    """
+    Cached tldextract extraction.
+    
+    Uses LRU cache to avoid repeated expensive tldextract calls for the same domain.
+    
+    Args:
+        domain: Full domain to parse
+        
+    Returns:
+        Tuple of (subdomain, domain, suffix)
+        
+    Example:
+        >>> _extract_domain_parts("sub.example.co.uk")
+        ('sub', 'example', 'co.uk')
+    """
     ext = _tld_extract(domain)
     return ext.subdomain, ext.domain, ext.suffix
 
 
 def get_tld(domain: str) -> str | None:
-    """Get the TLD (suffix) of a domain."""
+    """
+    Get the TLD (suffix) of a domain.
+    
+    Args:
+        domain: The domain to extract TLD from
+        
+    Returns:
+        The TLD string, or None if not found
+        
+    Example:
+        >>> get_tld("example.com")
+        'com'
+        >>> get_tld("example.co.uk")
+        'co.uk'
+    """
     _, _, suffix = _extract_domain_parts(domain)
     return suffix if suffix else None
 
 
 def get_registered_domain(domain: str) -> str | None:
-    """Get registered domain (domain.tld) from full domain."""
+    """
+    Get registered domain (domain.tld) from full domain.
+    
+    Args:
+        domain: Full domain including subdomains
+        
+    Returns:
+        The registered domain (e.g., "example.com"), or None if not found
+        
+    Example:
+        >>> get_registered_domain("sub.example.com")
+        'example.com'
+        >>> get_registered_domain("deep.sub.example.co.uk")
+        'example.co.uk'
+    """
     _, dom, suffix = _extract_domain_parts(domain)
     if suffix and dom:
         return f"{dom}.{suffix}"
     return None
 
 
-@lru_cache(maxsize=65536)
+@lru_cache(maxsize=LRU_CACHE_SIZE)
 def walk_parent_domains(domain: str) -> tuple[str, ...]:
     """
     Walk up the domain hierarchy to find all parent domains.
     
-    Example: "a.b.example.com" -> ("b.example.com", "example.com")
-    
-    Returns tuple for hashability (caching).
+    Args:
+        domain: The domain to find parents for
+        
+    Returns:
+        Tuple of parent domains, from most specific to least specific.
+        Returns empty tuple for apex domains (no parents).
+        
+    Note:
+        Returns tuple (not list) for hashability, enabling LRU caching.
+        
+    Example:
+        >>> walk_parent_domains("a.b.example.com")
+        ('b.example.com', 'example.com')
+        >>> walk_parent_domains("example.com")  # Apex domain
+        ()
     """
     subdomain, dom, suffix = _extract_domain_parts(domain)
     if not suffix or not dom:
@@ -230,8 +408,9 @@ def walk_parent_domains(domain: str) -> tuple[str, ...]:
         return ()
     
     parts = subdomain.split(".")
-    parents = []
-    # Build parents from most specific to least
+    parents: list[str] = []
+    
+    # Build parents from most specific to least specific
     for i in range(1, len(parts) + 1):
         if i == len(parts):
             parents.append(registered)
@@ -246,15 +425,34 @@ def should_prune_by_modifiers(child_mods: frozenset[str], parent_mods: frozenset
     """
     Determine if a child rule is redundant given the parent's modifiers.
     
-    Returns True if child can be safely pruned (parent covers it).
+    This function implements the modifier-aware pruning logic that ensures
+    we don't incorrectly remove rules with special behavior.
     
-    Key rules:
-    - $badfilter parent: Never prune (it disables rules, doesn't block)
-    - $important child: Keep if parent lacks $important (child takes priority)
-    - $dnsrewrite/$denyallow/$badfilter child: Never prune (special behavior)
-    - $dnstype mismatch: Child blocking ALL types not covered by parent blocking ONE type
-    - $client/$ctag parent: Child without restrictions blocks more broadly
+    Args:
+        child_mods: Modifiers on the child (subdomain) rule
+        parent_mods: Modifiers on the parent rule
+        
+    Returns:
+        True if child can be safely pruned (parent covers it), False otherwise
+        
+    Pruning Rules:
+        1. $badfilter parent → Never prune (it disables rules, doesn't block)
+        2. $important child → Keep if parent lacks $important
+        3. $dnsrewrite/$denyallow/$badfilter child → Never prune (special behavior)
+        4. $dnstype mismatch → Child blocking ALL types not covered by parent blocking ONE
+        5. $client/$ctag parent → Child without restrictions blocks more broadly
+        
+    Example:
+        >>> should_prune_by_modifiers(frozenset(), frozenset())
+        True
+        >>> should_prune_by_modifiers(frozenset({'important'}), frozenset())
+        False  # Child's $important takes priority
     """
+    # Fast path: no modifiers on either side (most common case ~90%+)
+    # This avoids all the set operations below
+    if not child_mods and not parent_mods:
+        return True
+    
     # $badfilter disables rules, it doesn't block anything
     if "badfilter" in parent_mods:
         return False
@@ -283,19 +481,38 @@ def should_prune_by_modifiers(child_mods: frozenset[str], parent_mods: frozenset
     return True
 
 
-# ============================================================================
+# =============================================================================
 # MAIN COMPILATION
-# ============================================================================
+# =============================================================================
 
 def compile_rules(
     lines: list[str],
     output_file: str,
 ) -> CompileStats:
     """
-    Compile and deduplicate rules with two-phase approach.
+    Compile and deduplicate rules with format compression.
     
-    Phase 1: Collect all rules and build lookup structures
-    Phase 2: Filter and deduplicate
+    This is the main entry point for the compiler. It processes input lines through
+    multiple phases to produce a minimal, deduplicated output file.
+    
+    Args:
+        lines: List of rule strings to compile
+        output_file: Path to write the compiled output
+        
+    Returns:
+        CompileStats with metrics about the compilation process
+        
+    Pipeline Phases:
+        1. **Parse & Compress**: Parse all rules, converting hosts/plain to ABP format
+        2. **Build Lookups**: Create efficient lookup structures for pruning
+        3. **Prune**: Remove redundant subdomain and whitelist-conflicted rules
+        4. **Output**: Write deduplicated rules atomically
+        
+    Example:
+        >>> lines = ["||example.com^", "||sub.example.com^", "0.0.0.0 other.example.com"]
+        >>> stats = compile_rules(lines, "output.txt")
+        >>> print(f"Reduced {stats.total_input} to {stats.total_output} rules")
+        Reduced 3 to 1 rules
     """
     stats = CompileStats()
     
@@ -304,25 +521,27 @@ def compile_rules(
     # =========================================================================
     
     # ABP blocking rules: domain -> (original_rule, modifiers, is_wildcard)
-    abp_rules: dict[str, tuple[str, frozenset, bool]] = {}
-    abp_wildcards: dict[str, tuple[str, frozenset]] = {}  # TLD wildcards: tld -> rule
+    abp_rules: dict[str, RuleEntry] = {}
+    abp_wildcards: dict[str, WildcardEntry] = {}  # TLD wildcards: tld -> rule
     
     # Whitelisted domains (from @@rules)
     allow_domains: set[str] = set()
     
-    # Other rules (regex, partial matches, etc.)
-    other_rules: list[str] = []
+    # Other rules (regex, partial matches, etc.) - use set for inline dedup
+    other_rules: set[str] = set()
     
     for line in lines:
         stats.total_input += 1
-        line = line.strip()
-        if not line:
+        
+        # Early exit for empty lines (walrus operator avoids assignment for empty)
+        if not (line := line.strip()):
             continue
         
-        # =====================================================================
+        # -----------------------------------------------------------------
         # ABP-style rules (highest priority)
-        # =====================================================================
-        if line.startswith("||") or line.startswith("@@||"):
+        # -----------------------------------------------------------------
+        # Use tuple form for single startswith call (faster than OR)
+        if line.startswith(("||", "@@||")):
             domain, modifiers, is_exception, is_wildcard = extract_abp_info(line)
             
             if not domain:
@@ -369,9 +588,9 @@ def compile_rules(
             # Can't extract domain from non-ABP exceptions, skip for now
             continue
         
-        # =====================================================================
+        # -----------------------------------------------------------------
         # Hosts-style rules - COMPRESS TO ABP FORMAT
-        # =====================================================================
+        # -----------------------------------------------------------------
         ip, domains = extract_hosts_info(line)
         if ip and domains:
             for domain in domains:
@@ -383,34 +602,37 @@ def compile_rules(
                 # Convert to ABP format: 0.0.0.0 example.com → ||example.com^
                 abp_rule = f"||{domain}^"
                 if domain not in abp_rules:
-                    abp_rules[domain] = (abp_rule, frozenset(), False)
+                    abp_rules[domain] = (abp_rule, EMPTY_FROZENSET, False)
                     stats.formats_compressed += 1
                 else:
                     stats.duplicate_pruned += 1
             continue
         
-        # =====================================================================
+        # -----------------------------------------------------------------
         # Plain domain - COMPRESS TO ABP FORMAT
-        # =====================================================================
+        # -----------------------------------------------------------------
         if PLAIN_DOMAIN_PATTERN.match(line):
             domain = normalize_domain(line)
             if domain and domain not in LOCAL_HOSTNAMES:
                 # Convert to ABP format: example.com → ||example.com^
                 abp_rule = f"||{domain}^"
                 if domain not in abp_rules:
-                    abp_rules[domain] = (abp_rule, frozenset(), False)
-                    stats.formats_compressed += 1  # Reusing stat for both hosts and plain
+                    abp_rules[domain] = (abp_rule, EMPTY_FROZENSET, False)
+                    stats.formats_compressed += 1
                 else:
                     stats.duplicate_pruned += 1
             else:
                 stats.local_hostname_pruned += 1
             continue
         
-        # =====================================================================
-        # Other (regex, etc.)
-        # =====================================================================
+        # -----------------------------------------------------------------
+        # Other (regex, etc.) - inline duplicate check with set
+        # -----------------------------------------------------------------
         if line.startswith("/") or "|" in line or "*" in line:
-            other_rules.append(line)
+            if line not in other_rules:
+                other_rules.add(line)
+            else:
+                stats.duplicate_pruned += 1
             continue
     
     # =========================================================================
@@ -446,7 +668,8 @@ def is_covered_by_abp(domain: str) -> bool:
         return False
     
     def is_whitelisted(domain: str) -> bool:
-        """Check if domain is whitelisted.
+        """
+        Check if domain is whitelisted.
         
         A domain is whitelisted if:
         1. It's directly in allow_domains (@@||domain^)
@@ -469,7 +692,7 @@ def is_whitelisted(domain: str) -> bool:
     # PHASE 3: Prune ABP subdomain rules
     # =========================================================================
     
-    pruned_abp: dict[str, tuple[str, frozenset, bool]] = {}
+    pruned_abp: dict[str, RuleEntry] = {}
     
     for domain, (rule, modifiers, is_wildcard) in abp_rules.items():
         # Skip if whitelisted
@@ -519,20 +742,8 @@ def is_whitelisted(domain: str) -> bool:
         else:
             pruned_abp[domain] = (rule, modifiers, is_wildcard)
     
-    # =========================================================================
-    # PHASE 4: Deduplicate other rules (regex, partial matches, etc.)
-    # =========================================================================
-    
-    seen_other: set[str] = set()
-    kept_other: list[str] = []
-    for rule in other_rules:
-        if rule not in seen_other:
-            seen_other.add(rule)
-            kept_other.append(rule)
-        else:
-            stats.duplicate_pruned += 1
-    
-    # NOTE: Whitelist/exception rules (@@) are intentionally NOT output.
+    # NOTE: other_rules is already deduplicated (used set during parse)
+    # Whitelist/exception rules (@@) are intentionally NOT output.
     # They were only used internally to remove conflicting blocking rules.
     # The final output contains only blocking rules.
     
@@ -560,8 +771,8 @@ def is_whitelisted(domain: str) -> bool:
             f.write(rule + "\n")
             stats.abp_kept += 1
         
-        # Other rules (regex, partial matches, etc.)
-        for rule in kept_other:
+        # Other rules (regex, partial matches, etc.) - already deduplicated
+        for rule in other_rules:
             f.write(rule + "\n")
             stats.other_kept += 1
     
@@ -573,9 +784,9 @@ def is_whitelisted(domain: str) -> bool:
     return stats
 
 
-# ============================================================================
-# MAIN
-# ============================================================================
+# =============================================================================
+# CLI INTERFACE
+# =============================================================================
 
 if __name__ == "__main__":
     import sys
@@ -606,4 +817,3 @@ def is_whitelisted(domain: str) -> bool:
     print(f"  Duplicates:         {stats.duplicate_pruned:,}")
     print(f"  Whitelist conflicts: {stats.whitelist_conflict_pruned:,}")
     print(f"  Local hostnames:    {stats.local_hostname_pruned:,}")
-
diff --git a/scripts/downloader.py b/scripts/downloader.py
index a8ad0ca..e8944d2 100644
--- a/scripts/downloader.py
+++ b/scripts/downloader.py
@@ -5,8 +5,21 @@
 Downloads blocklists with ETag/Last-Modified caching and concurrent fetching.
 Falls back to cached files if download fails.
 
+Features:
+    - Async downloads with aiohttp for high concurrency
+    - ETag/Last-Modified caching to avoid re-downloading unchanged files
+    - Automatic retry with exponential backoff
+    - Graceful fallback to cached files on error
+    - Progress tracking and detailed statistics
+
 Usage:
     python -m scripts.downloader --sources sources.txt --outdir data/ --cache .cache
+
+Example:
+    >>> from scripts.downloader import fetch_all
+    >>> import asyncio
+    >>> results = asyncio.run(fetch_all(urls, output_dir, cache_dir, 8, 30, 3))
+    >>> print(f"Downloaded {sum(r.success for r in results)} of {len(results)} sources")
 """
 
 import argparse
@@ -16,31 +29,74 @@
 import sys
 import time
 from pathlib import Path
-from typing import NamedTuple
+from typing import Final, NamedTuple
 
 import aiohttp
 import aiofiles
 
 
-# Default configuration
-DEFAULT_TIMEOUT = 30        # seconds per request
-DEFAULT_RETRIES = 3         # attempts before giving up
-DEFAULT_CONCURRENCY = 8     # simultaneous connections
+# =============================================================================
+# CONFIGURATION CONSTANTS
+# =============================================================================
+
+#: Default timeout per HTTP request in seconds
+DEFAULT_TIMEOUT: Final[int] = 30
+
+#: Default number of retry attempts before giving up
+DEFAULT_RETRIES: Final[int] = 3
+
+#: Default number of simultaneous HTTP connections
+DEFAULT_CONCURRENCY: Final[int] = 8
 
-# State file for ETag/Last-Modified tracking
-STATE_FILE = "state.json"
+#: State file name for ETag/Last-Modified tracking
+STATE_FILE: Final[str] = "state.json"
 
 
+# =============================================================================
+# DATA STRUCTURES
+# =============================================================================
+
 class FetchResult(NamedTuple):
-    """Result of a single fetch operation."""
+    """
+    Result of a single fetch operation.
+    
+    Attributes:
+        url: The URL that was fetched
+        success: True if fetch succeeded (includes cache fallback)
+        changed: True if content changed (False for 304 Not Modified)
+        error: Error message if something went wrong, None otherwise
+        
+    Example:
+        >>> result = FetchResult("https://example.com/list.txt", True, True, None)
+        >>> result.success
+        True
+    """
     url: str
     success: bool
     changed: bool
     error: str | None = None
 
 
+# =============================================================================
+# HELPER FUNCTIONS
+# =============================================================================
+
 def url_to_filename(url: str) -> str:
-    """Generate a safe, unique filename from a URL."""
+    """
+    Generate a safe, unique filename from a URL.
+    
+    Uses SHA256 hash for uniqueness and extracts domain for readability.
+    
+    Args:
+        url: The source URL
+        
+    Returns:
+        Safe filename like "example_com_a1b2c3d4.txt"
+        
+    Example:
+        >>> url_to_filename("https://example.com/blocklist.txt")
+        'example_com_a1b2c3d4e5f6g7h8.txt'
+    """
     # Use SHA256 hash for uniqueness, take first 16 chars
     url_hash = hashlib.sha256(url.encode()).hexdigest()[:16]
     # Extract domain for readability
@@ -53,8 +109,25 @@ def url_to_filename(url: str) -> str:
 
 
 def load_sources(sources_file: str) -> list[str]:
-    """Load URLs from sources file, skipping comments and empty lines."""
-    urls = []
+    """
+    Load URLs from sources file, skipping comments and empty lines.
+    
+    Args:
+        sources_file: Path to the sources.txt file
+        
+    Returns:
+        List of URLs to fetch
+        
+    Note:
+        Lines starting with # are treated as comments.
+        Inline comments (after #) are also stripped.
+        
+    Example:
+        >>> urls = load_sources("config/sources.txt")
+        >>> len(urls)
+        80
+    """
+    urls: list[str] = []
     path = Path(sources_file)
     if not path.exists():
         print(f"ERROR: Sources file not found: {sources_file}", file=sys.stderr)
@@ -72,8 +145,21 @@ def load_sources(sources_file: str) -> list[str]:
     return urls
 
 
-def load_state(cache_dir: Path) -> dict:
-    """Load state.json containing ETag/Last-Modified cache."""
+def load_state(cache_dir: Path) -> dict[str, dict[str, str]]:
+    """
+    Load state.json containing ETag/Last-Modified cache.
+    
+    Args:
+        cache_dir: Directory containing the state file
+        
+    Returns:
+        State dictionary mapping URLs to their cached headers
+        
+    Example:
+        >>> state = load_state(Path(".cache"))
+        >>> state.get("https://example.com/list.txt", {}).get("etag")
+        '"abc123"'
+    """
     state_path = cache_dir / STATE_FILE
     if state_path.exists():
         try:
@@ -84,8 +170,17 @@ def load_state(cache_dir: Path) -> dict:
     return {}
 
 
-def save_state(cache_dir: Path, state: dict) -> None:
-    """Save state.json atomically."""
+def save_state(cache_dir: Path, state: dict[str, dict[str, str]]) -> None:
+    """
+    Save state.json atomically.
+    
+    Uses a temporary file and atomic rename to prevent corruption if
+    the process is interrupted during write.
+    
+    Args:
+        cache_dir: Directory to save state file in
+        state: State dictionary to save
+    """
     state_path = cache_dir / STATE_FILE
     temp_path = state_path.with_suffix(".tmp")
     try:
@@ -96,31 +191,50 @@ def save_state(cache_dir: Path, state: dict) -> None:
         print(f"Warning: Could not save state.json: {e}", file=sys.stderr)
 
 
+# =============================================================================
+# ASYNC FETCH FUNCTIONS
+# =============================================================================
+
 async def fetch_url(
     session: aiohttp.ClientSession,
     url: str,
     output_dir: Path,
     cache_dir: Path,
-    state: dict,
+    state: dict[str, dict[str, str]],
     timeout: int,
     retries: int,
 ) -> FetchResult:
     """
     Fetch a single URL with ETag/Last-Modified caching.
     
+    Uses conditional requests (If-None-Match, If-Modified-Since) to avoid
+    re-downloading unchanged content. Falls back to cached files on error.
+    
+    Args:
+        session: aiohttp session for connection pooling
+        url: URL to fetch
+        output_dir: Directory to save fetched files
+        cache_dir: Directory for cached files and state
+        state: Mutable state dict for tracking ETags
+        timeout: Request timeout in seconds
+        retries: Number of retry attempts
+        
     Returns:
         FetchResult with success/changed status
+        
+    Note:
+        This function modifies `state` in-place when new ETags are received.
     """
     filename = url_to_filename(url)
     output_path = output_dir / filename
     cache_path = cache_dir / filename
     
-    # Get cached headers
+    # Get cached headers for conditional request
     url_state = state.get(url, {})
     etag = url_state.get("etag")
     last_modified = url_state.get("last_modified")
     
-    headers = {}
+    headers: dict[str, str] = {}
     if etag:
         headers["If-None-Match"] = etag
     if last_modified:
@@ -180,7 +294,7 @@ async def fetch_url(
                     await f.write(content)
                 
                 # Update state with new ETag/Last-Modified
-                new_state = {}
+                new_state: dict[str, str] = {}
                 if "ETag" in response.headers:
                     new_state["etag"] = response.headers["ETag"]
                 if "Last-Modified" in response.headers:
@@ -231,7 +345,27 @@ async def fetch_all(
     timeout: int,
     retries: int,
 ) -> list[FetchResult]:
-    """Fetch all URLs concurrently with rate limiting."""
+    """
+    Fetch all URLs concurrently with rate limiting.
+    
+    Uses a semaphore to control maximum concurrent connections,
+    preventing overwhelming the network or servers.
+    
+    Args:
+        urls: List of URLs to fetch
+        output_dir: Directory to save fetched files
+        cache_dir: Directory for cached files and state
+        concurrency: Maximum concurrent connections
+        timeout: Request timeout in seconds per URL
+        retries: Number of retry attempts per URL
+        
+    Returns:
+        List of FetchResult for each URL
+        
+    Example:
+        >>> results = await fetch_all(urls, Path("data"), Path(".cache"), 8, 30, 3)
+        >>> success_count = sum(r.success for r in results)
+    """
     # Ensure directories exist
     output_dir.mkdir(parents=True, exist_ok=True)
     cache_dir.mkdir(parents=True, exist_ok=True)
@@ -255,7 +389,7 @@ async def fetch_with_semaphore(url: str) -> FetchResult:
         results = await asyncio.gather(*tasks, return_exceptions=True)
     
     # Handle exceptions in results
-    final_results = []
+    final_results: list[FetchResult] = []
     for i, result in enumerate(results):
         if isinstance(result, Exception):
             final_results.append(FetchResult(urls[i], success=False, changed=False, error=str(result)))
@@ -268,8 +402,17 @@ async def fetch_with_semaphore(url: str) -> FetchResult:
     return final_results
 
 
+# =============================================================================
+# CLI INTERFACE
+# =============================================================================
+
 def main() -> int:
-    """Main entry point."""
+    """
+    Main entry point for CLI usage.
+    
+    Returns:
+        Exit code (0 for success, 1 for failure)
+    """
     parser = argparse.ArgumentParser(description="Fetch blocklist sources with caching")
     parser.add_argument("--sources", required=True, help="Path to sources.txt file")
     parser.add_argument("--outdir", required=True, help="Output directory for fetched files")
diff --git a/scripts/pipeline.py b/scripts/pipeline.py
index 475b1db..18fa519 100644
--- a/scripts/pipeline.py
+++ b/scripts/pipeline.py
@@ -1,44 +1,100 @@
 #!/usr/bin/env python3
 """
-pipeline.py
+pipeline.py - Main Processing Pipeline for Blocklist Compilation
 
-Main processing pipeline for blocklist compilation.
+This is the orchestrator that ties together the cleaning and compilation stages.
+It reads raw blocklist files, processes them through the pipeline, and outputs
+a unified, deduplicated blocklist.
+
+Pipeline Stages:
+    1. **Read**: Load all .txt files from input directory
+    2. **Clean**: Remove comments, cosmetic rules, unsupported modifiers
+    3. **Compile**: Compress formats, deduplicate, prune subdomains
+    4. **Write**: Output merged blocklist with statistics
 
 Usage:
     python -m scripts.pipeline <input_dir> <output_file>
 
-Pipeline stages:
-1. Read all files from input_dir
-2. Clean each rule (remove comments, cosmetic, unsupported modifiers)
-3. Compile (compress formats, deduplicate, prune subdomains)
-4. Write merged output
+Example:
+    >>> from scripts.pipeline import process_files
+    >>> stats = process_files("lists/_raw", "lists/merged.txt")
+    >>> print(f"Output {stats['lines_output']:,} rules")
 """
 
+import json
 import sys
 import time
 from pathlib import Path
+from typing import TypedDict
 
 from scripts.cleaner import clean_line
 from scripts.compiler import compile_rules
 
 
-def process_files(input_dir: str, output_file: str) -> dict[str, int]:
+# =============================================================================
+# DATA STRUCTURES
+# =============================================================================
+
+class PipelineStats(TypedDict):
+    """
+    Statistics collected during pipeline execution.
+    
+    Provides detailed metrics about each stage of processing,
+    useful for monitoring and debugging.
+    """
+    files_processed: int
+    lines_raw: int
+    lines_clean: int
+    lines_output: int
+    comments_removed: int
+    cosmetic_removed: int
+    unsupported_removed: int
+    empty_removed: int
+    trimmed: int
+    abp_subdomain_pruned: int
+    tld_wildcard_pruned: int
+    duplicate_pruned: int
+    whitelist_conflict_pruned: int
+    local_hostname_pruned: int
+    formats_compressed: int
+    abp_kept: int
+    other_kept: int
+
+
+# =============================================================================
+# PIPELINE FUNCTIONS
+# =============================================================================
+
+def process_files(input_dir: str, output_file: str) -> PipelineStats:
     """
     Run the full pipeline on input directory.
     
+    Orchestrates the complete blocklist compilation process:
+    1. Reads all .txt files from input_dir
+    2. Cleans each line (removes comments, cosmetic, unsupported modifiers)
+    3. Compiles all lines (compresses formats, deduplicates)
+    4. Writes output and returns statistics
+    
     Args:
         input_dir: Directory containing raw blocklist files
         output_file: Path to output merged list
-    
+        
     Returns:
-        Statistics dictionary
+        PipelineStats with detailed metrics from all stages
+        
+    Raises:
+        FileNotFoundError: If input_dir doesn't exist
+        
+    Example:
+        >>> stats = process_files("lists/_raw", "lists/merged.txt")
+        >>> print(f"Reduced {stats['lines_raw']:,} to {stats['lines_output']:,}")
     """
     input_path = Path(input_dir)
     
     if not input_path.is_dir():
         raise FileNotFoundError(f"Input directory not found: {input_dir}")
     
-    stats = {
+    stats: PipelineStats = {
         "files_processed": 0,
         "lines_raw": 0,
         "lines_clean": 0,
@@ -54,20 +110,24 @@ def process_files(input_dir: str, output_file: str) -> dict[str, int]:
         "whitelist_conflict_pruned": 0,
         "local_hostname_pruned": 0,
         "formats_compressed": 0,
+        "abp_kept": 0,
+        "other_kept": 0,
     }
     
     # =========================================================================
-    # Stage 1: Read and clean all files
+    # STAGE 1: Read and clean all files
     # =========================================================================
     print("📖 Stage 1: Reading and cleaning files...")
     stage1_start = time.time()
     
     all_cleaned: list[str] = []
+    files = sorted(input_path.glob("*.txt"))
     
-    # Process .txt files
-    for file in sorted(input_path.glob("*.txt")):
+    # Process files (deterministic order for reproducibility)
+    for file in files:
         stats["files_processed"] += 1
         
+        # Read and clean each file
         with open(file, encoding="utf-8-sig", errors="replace") as f:
             for line in f:
                 stats["lines_raw"] += 1
@@ -79,16 +139,17 @@ def process_files(input_dir: str, output_file: str) -> dict[str, int]:
                     stats["trimmed"] += 1
                 
                 if result.discarded:
-                    if result.reason == "comment":
-                        stats["comments_removed"] += 1
-                    elif result.reason == "cosmetic":
-                        stats["cosmetic_removed"] += 1
-                    elif result.reason == "unsupported_modifier":
-                        stats["unsupported_removed"] += 1
-                    elif result.reason == "empty":
-                        stats["empty_removed"] += 1
+                    match result.reason:
+                        case "comment":
+                            stats["comments_removed"] += 1
+                        case "cosmetic":
+                            stats["cosmetic_removed"] += 1
+                        case "unsupported_modifier":
+                            stats["unsupported_removed"] += 1
+                        case "empty":
+                            stats["empty_removed"] += 1
                 else:
-                    all_cleaned.append(result.line)
+                    all_cleaned.append(result.line)  # type: ignore[arg-type]
     
     stats["lines_clean"] = len(all_cleaned)
     stage1_time = time.time() - stage1_start
@@ -96,13 +157,14 @@ def process_files(input_dir: str, output_file: str) -> dict[str, int]:
     print(f"   Kept {stats['lines_clean']:,} clean rules ({stage1_time:.1f}s)")
     
     # =========================================================================
-    # Stage 2: Compile and deduplicate
+    # STAGE 2: Compile and deduplicate
     # =========================================================================
     print("\n⚙️  Stage 2: Compiling and deduplicating...")
     stage2_start = time.time()
     
     compile_stats = compile_rules(all_cleaned, output_file)
     
+    # Transfer compilation stats
     stats["lines_output"] = compile_stats.total_output
     stats["abp_subdomain_pruned"] = compile_stats.abp_subdomain_pruned
     stats["tld_wildcard_pruned"] = compile_stats.tld_wildcard_pruned
@@ -111,7 +173,7 @@ def process_files(input_dir: str, output_file: str) -> dict[str, int]:
     stats["local_hostname_pruned"] = compile_stats.local_hostname_pruned
     stats["formats_compressed"] = compile_stats.formats_compressed
     
-    # Add format breakdown
+    # Format breakdown
     stats["abp_kept"] = compile_stats.abp_kept
     stats["other_kept"] = compile_stats.other_kept
     
@@ -121,8 +183,16 @@ def process_files(input_dir: str, output_file: str) -> dict[str, int]:
     return stats
 
 
-def print_summary(stats: dict[str, int]) -> None:
-    """Print formatted summary."""
+def print_summary(stats: PipelineStats) -> None:
+    """
+    Print formatted summary of pipeline execution.
+    
+    Displays a comprehensive breakdown of what was processed,
+    what was removed at each stage, and the final output.
+    
+    Args:
+        stats: PipelineStats from process_files()
+    """
     print("\n" + "=" * 60)
     print("📊 PIPELINE SUMMARY")
     print("=" * 60)
@@ -158,18 +228,63 @@ def print_summary(stats: dict[str, int]) -> None:
     print(f"   Local hostnames:   {stats['local_hostname_pruned']:>10,}")
     
     print(f"\n📦 Output breakdown:")
-    print(f"   ABP rules:   {stats.get('abp_kept', 0):>10,} (incl. {stats.get('formats_compressed', 0):,} compressed)")
-    print(f"   Other rules: {stats.get('other_kept', 0):>10,}")
+    print(f"   ABP rules:   {stats['abp_kept']:>10,} (incl. {stats['formats_compressed']:,} compressed)")
+    print(f"   Other rules: {stats['other_kept']:>10,}")
 
 
+def save_stats_json(stats: PipelineStats, output_path: str, total_time: float) -> None:
+    """
+    Save pipeline statistics to a JSON file.
+    
+    Args:
+        stats: Pipeline statistics dictionary
+        output_path: Path to write JSON file
+        total_time: Total execution time in seconds
+    """
+    output = {
+        "version": "1.4.0",
+        "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+        "execution_time_seconds": round(total_time, 2),
+        "statistics": dict(stats),
+    }
+    
+    path = Path(output_path)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(output, f, indent=2)
+
+
+# =============================================================================
+# CLI INTERFACE
+# =============================================================================
+
 def main() -> int:
-    """Main entry point."""
-    if len(sys.argv) < 3:
-        print("Usage: python -m scripts.pipeline <input_dir> <output_file>")
+    """
+    Main entry point for CLI usage.
+    
+    Returns:
+        Exit code (0 for success, 1 for error, 2 for usage error)
+    """
+    # Parse arguments
+    args = sys.argv[1:]
+    json_stats_path: str | None = None
+    
+    # Check for --json-stats flag
+    if "--json-stats" in args:
+        idx = args.index("--json-stats")
+        if idx + 1 < len(args):
+            json_stats_path = args[idx + 1]
+            args = args[:idx] + args[idx + 2:]
+        else:
+            print("Error: --json-stats requires a path argument", file=sys.stderr)
+            return 2
+    
+    if len(args) < 2:
+        print("Usage: python -m scripts.pipeline <input_dir> <output_file> [--json-stats <path>]")
         return 2
     
-    input_dir = sys.argv[1]
-    output_file = sys.argv[2]
+    input_dir = args[0]
+    output_file = args[1]
     
     try:
         print("🚀 Starting blocklist pipeline...")
@@ -181,6 +296,12 @@ def main() -> int:
         
         print_summary(stats)
         print(f"\n⏱️  Total time: {total_time:.1f}s")
+        
+        # Save JSON stats if requested
+        if json_stats_path:
+            save_stats_json(stats, json_stats_path, total_time)
+            print(f"📊 Stats saved to: {json_stats_path}")
+        
         print("✅ Pipeline completed successfully!")
         
         return 0