diff --git a/termscrape/.gitignore b/termscrape/.gitignore
new file mode 100644
index 0000000..2f99b40
--- /dev/null
+++ b/termscrape/.gitignore
@@ -0,0 +1,98 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+*.manifest
+*.spec
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Virtual environments
+venv/
+env/
+ENV/
+env.bak/
+venv.bak/
+
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+
+# Playwright
+.playwright/
+
+# Scraped data
+scraped/
+*.scraped/
+output/
+downloads/
+
+# Logs
+*.log
+logs/
+
+# Environment variables
+.env
+.env.local
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Project specific
+*.md.bak
+*.json.bak
+temp/
+tmp/
diff --git a/termscrape/LICENSE b/termscrape/LICENSE
new file mode 100644
index 0000000..2a9f4c7
--- /dev/null
+++ b/termscrape/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 TermScrape Team
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/termscrape/README.md b/termscrape/README.md
new file mode 100644
index 0000000..6b13b15
--- /dev/null
+++ b/termscrape/README.md
@@ -0,0 +1,413 @@
+# TermScrape
+
+**TermScrape** is a comprehensive, terminal-based web scraping tool inspired by Firecrawl. It runs entirely locally, handles JavaScript-heavy sites, supports recursive crawling, and integrates with local LLMs for intelligent data extraction.
+
+## Features
+
+- **Single URL Scraping**: Fetch and parse any webpage
+- **Recursive Crawling**: Crawl entire websites with depth control
+- **Multiple Output Formats**: Markdown, JSON, or plain text
+- **JavaScript Support**: Headless browser rendering via Playwright
+- **LLM Integration**: Extract structured data using local Ollama models
+- **Ethical Scraping**: Respects robots.txt and includes rate limiting
+- **CLI Interface**: Easy-to-use command-line interface with Click
+- **Modular Design**: Clean, extensible Python codebase
+
+## Installation
+
+### Prerequisites
+
+- Python 3.10 or higher
+- pip package manager
+
+### Quick Start
+
+```bash
+# Clone the repository
+git clone https://github.com/yourusername/termscrape
+cd termscrape
+
+# Install dependencies
+pip install -r requirements.txt
+
+# Install Playwright browsers (for JavaScript support)
+playwright install
+```
+
+### Optional: LLM Support
+
+For LLM-based extraction, install Ollama:
+
+```bash
+# Install Ollama from https://ollama.ai
+
+# Pull a model
+ollama pull llama3
+
+# Install Python package
+pip install ollama
+```
+
+## Usage
+
+### Scrape a Single URL
+
+```bash
+# Basic markdown scrape
+python -m src.main scrape --url https://example.com
+
+# With JavaScript rendering
+python -m src.main scrape --url https://example.com --js
+
+# Output to JSON
+python -m src.main scrape --url https://example.com --format json --output page.json
+
+# Extract data with LLM
+python -m src.main scrape --url https://news.example.com \
+  --llm-prompt "Extract article title, author, and date"
+```
+
+### Crawl a Website
+
+```bash
+# Crawl with depth 3
+python -m src.main crawl --url https://example.com --depth 3
+
+# Save to directory
+python -m src.main crawl --url https://example.com --output ./scraped/
+
+# Exclude patterns
+python -m src.main crawl --url https://example.com \
+  --exclude .pdf --exclude /login
+
+# Crawl with LLM extraction
+python -m src.main crawl --url https://products.example.com \
+  --depth 2 \
+  --llm-prompt "Extract product names and prices" \
+  --output products.json
+```
+
+### Command Reference
+
+#### `scrape` Command
+
+| Option | Description | Default |
+|--------|-------------|---------|
+| `--url` | URL to scrape (required) | - |
+| `--format` | Output format: `markdown`, `json`, `text` | `markdown` |
+| `--js` | Enable JavaScript rendering | `False` |
+| `--output`, `-o` | Output file path | stdout |
+| `--user-agent` | Custom User-Agent string | `TermScrape/1.0` |
+| `--llm-prompt` | LLM extraction prompt | - |
+| `--llm-model` | LLM model name | `llama3` |
+| `--verbose` | Enable debug logging | `False` |
+
+#### `crawl` Command
+
+| Option | Description | Default |
+|--------|-------------|---------|
+| `--url` | Starting URL (required) | - |
+| `--depth` | Maximum crawl depth | `1` |
+| `--format` | Output format per page | `markdown` |
+| `--js` | Enable JavaScript rendering | `False` |
+| `--output`, `-o` | Output directory or file | stdout |
+| `--user-agent` | Custom User-Agent string | `TermScrape/1.0` |
+| `--exclude` | URL patterns to exclude (repeatable) | - |
+| `--llm-prompt` | LLM extraction prompt | - |
+| `--llm-model` | LLM model name | `llama3` |
+| `--verbose` | Enable debug logging | `False` |
+
+## Examples
+
+### Example 1: Documentation Scraping
+
+Scrape documentation and convert to markdown:
+
+```bash
+python -m src.main scrape \
+  --url https://docs.python.org/3/ \
+  --format markdown \
+  --output python_docs.md
+```
+
+### Example 2: Blog Archive
+
+Archive an entire blog:
+
+```bash
+python -m src.main crawl \
+  --url https://blog.example.com \
+  --depth 2 \
+  --format markdown \
+  --exclude /tag/ --exclude /category/ \
+  --output ./blog_archive/
+```
+
+### Example 3: E-commerce Product Data
+
+Extract product information with LLM:
+
+```bash
+python -m src.main crawl \
+  --url https://shop.example.com/products \
+  --depth 1 \
+  --js \
+  --llm-prompt "Extract: product name, price, description, and stock status" \
+  --output products.json
+```
+
+### Example 4: Research Papers
+
+Scrape academic papers:
+
+```bash
+python -m src.main scrape \
+  --url https://arxiv.org/abs/2301.12345 \
+  --format markdown \
+  --output paper.md
+```
+
+## Programmatic Usage
+
+TermScrape can also be used as a Python library:
+
+```python
+from src import scrape_url, crawl_site
+
+# Scrape single URL
+content = scrape_url(
+    url="https://example.com",
+    output_format="markdown",
+    use_js=False
+)
+print(content)
+
+# Crawl website
+results = crawl_site(
+    start_url="https://example.com",
+    max_depth=2,
+    output_format="json",
+    exclude_patterns=[".pdf", "/login"]
+)
+
+for url, data in results.items():
+    if "content" in data:
+        print(f"{url}: {len(data['content'])} chars")
+```
+
+### Advanced Usage
+
+```python
+from src.scraper import Scraper
+from src.parser import HTMLParser
+from src.llm_extract import extract_with_llm
+
+# Custom scraper configuration
+scraper = Scraper(
+    user_agent="MyBot/1.0",
+    timeout=60,
+    respect_robots=True
+)
+
+# Scrape and parse
+html = scraper._fetch_static("https://example.com")
+parser = HTMLParser(html, url="https://example.com")
+
+# Get structured data
+markdown = parser.to_markdown()
+json_data = parser.to_json(structured=True)
+
+# LLM extraction
+extracted = extract_with_llm(
+    content=markdown,
+    prompt="Extract all headings and their summaries",
+    model="llama3"
+)
+```
+
+## Project Structure
+
+```
+termscrape/
+├── src/
+│   ├── __init__.py       # Package initialization
+│   ├── main.py           # CLI entry point
+│   ├── scraper.py        # Single URL scraping
+│   ├── crawler.py        # Recursive crawling
+│   ├── parser.py         # HTML parsing and formatting
+│   ├── browser.py        # Playwright browser automation
+│   ├── llm_extract.py    # LLM-based extraction
+│   └── utils.py          # Utility functions
+├── tests/
+│   ├── test_scraper.py   # Scraper tests
+│   └── test_crawler.py   # Crawler tests
+├── docs/
+│   ├── usage.md          # Detailed usage guide
+│   └── contributing.md   # Contribution guidelines
+├── examples/
+│   └── simple_scrape.py  # Example scripts
+├── requirements.txt      # Python dependencies
+├── pyproject.toml        # Project configuration
+└── README.md             # This file
+```
+
+## Ethical Guidelines
+
+TermScrape is designed for ethical web scraping:
+
+- ✅ **Respects robots.txt**: Automatically checks and honors robots.txt
+- ✅ **Rate limiting**: Random delays (1-5s) between requests
+- ✅ **Proper identification**: Clear User-Agent string
+- ✅ **No aggressive crawling**: Configurable depth limits
+
+**Please scrape responsibly:**
+
+- Don't overwhelm servers with requests
+- Respect website terms of service
+- Don't scrape personal data without permission
+- Use scraped data ethically and legally
+
+## Technical Details
+
+### Dependencies
+
+- **click**: CLI interface
+- **requests**: HTTP requests
+- **beautifulsoup4**: HTML parsing
+- **markdownify**: HTML to Markdown conversion
+- **playwright**: JavaScript rendering
+- **ollama** (optional): LLM integration
+
+### Python Version
+
+Requires Python 3.10 or higher.
+
+### Performance
+
+- Static pages: < 1 second per page
+- JavaScript pages: 2-5 seconds per page (browser overhead)
+- Crawling: Depends on depth and site structure
+
+### Limitations
+
+- No built-in proxy rotation (can be added)
+- Context window limits for LLM extraction
+- Playwright requires browser installation (~200MB)
+
+## Testing
+
+Run tests with pytest:
+
+```bash
+# Run all tests
+pytest
+
+# With coverage
+pytest --cov=src --cov-report=term-missing
+
+# Specific test file
+pytest tests/test_scraper.py
+```
+
+## Development
+
+### Setting Up Dev Environment
+
+```bash
+# Create virtual environment
+python -m venv venv
+source venv/bin/activate
+
+# Install dev dependencies
+pip install -r requirements.txt
+pip install pytest pytest-cov black flake8 mypy
+
+# Format code
+black src/ tests/
+
+# Lint code
+flake8 src/ tests/
+
+# Type check
+mypy src/
+```
+
+See [docs/contributing.md](docs/contributing.md) for detailed contribution guidelines.
+
+## Troubleshooting
+
+### Playwright Issues
+
+If Playwright fails to install:
+
+```bash
+playwright install chromium
+```
+
+### Ollama Connection
+
+Ensure Ollama is running:
+
+```bash
+ollama serve
+```
+
+### Import Errors
+
+Make sure you're in the project directory:
+
+```bash
+cd termscrape
+python -m src.main --help
+```
+
+## Comparison with Alternatives
+
+| Feature | TermScrape | Crawl4AI | Scrapy | BeautifulSoup |
+|---------|-----------|----------|---------|---------------|
+| CLI Interface | ✅ | ✅ | ✅ | ❌ |
+| JavaScript Support | ✅ | ✅ | ⚠️ (plugin) | ❌ |
+| LLM Integration | ✅ | ✅ | ❌ | ❌ |
+| Crawling | ✅ | ✅ | ✅ | ❌ |
+| Markdown Output | ✅ | ✅ | ⚠️ (custom) | ❌ |
+| Learning Curve | Easy | Medium | Steep | Easy |
+| Local/Offline | ✅ | ✅ | ✅ | ✅ |
+
+## Roadmap
+
+- [ ] PDF extraction support
+- [ ] Proxy rotation
+- [ ] API mode (REST server)
+- [ ] Distributed crawling
+- [ ] Browser fingerprint randomization
+- [ ] Screenshot capture
+- [ ] CAPTCHA detection
+
+## License
+
+MIT License - see [LICENSE](LICENSE) file for details.
+
+## Contributing
+
+Contributions are welcome! Please read [docs/contributing.md](docs/contributing.md) before submitting PRs.
+
+## Acknowledgments
+
+Inspired by:
+- [Firecrawl](https://github.com/mendableai/firecrawl)
+- [Crawl4AI](https://github.com/unclecode/crawl4ai)
+- [ScrapeGraphAI](https://github.com/ScrapeGraphAI/Scrapegraph-ai)
+
+## Support
+
+- **Issues**: [GitHub Issues](https://github.com/yourusername/termscrape/issues)
+- **Discussions**: [GitHub Discussions](https://github.com/yourusername/termscrape/discussions)
+- **Email**: contact@termscrape.dev
+
+## Authors
+
+Created by the TermScrape Team
+
+---
+
+**Star this repo if you find it useful!** ⭐
diff --git a/termscrape/docs/contributing.md b/termscrape/docs/contributing.md
new file mode 100644
index 0000000..3017f28
--- /dev/null
+++ b/termscrape/docs/contributing.md
@@ -0,0 +1,408 @@
+# Contributing to TermScrape
+
+Thank you for your interest in contributing to TermScrape! This document provides guidelines and instructions for contributing.
+
+## Code of Conduct
+
+- Be respectful and inclusive
+- Focus on constructive feedback
+- Help others learn and grow
+- Follow ethical scraping practices
+
+## Getting Started
+
+### 1. Fork and Clone
+
+```bash
+git clone https://github.com/yourusername/termscrape
+cd termscrape
+```
+
+### 2. Set Up Development Environment
+
+```bash
+# Create virtual environment
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+
+# Install dependencies
+pip install -r requirements.txt
+
+# Install Playwright
+playwright install
+
+# Install development dependencies
+pip install pytest pytest-cov black flake8 mypy
+```
+
+### 3. Create a Branch
+
+```bash
+git checkout -b feature/your-feature-name
+```
+
+## Development Guidelines
+
+### Code Style
+
+TermScrape follows PEP8 with these specifications:
+
+- **Line length**: 88 characters (Black default)
+- **Formatter**: Black
+- **Linter**: Flake8
+- **Type hints**: Use type hints for all functions
+
+```bash
+# Format code
+black src/ tests/
+
+# Check linting
+flake8 src/ tests/
+
+# Type checking
+mypy src/
+```
+
+### Project Structure
+
+```
+termscrape/
+├── src/              # Source code
+│   ├── main.py       # CLI entry point
+│   ├── scraper.py    # Single URL scraping
+│   ├── crawler.py    # Multi-page crawling
+│   ├── parser.py     # HTML parsing
+│   ├── browser.py    # Playwright integration
+│   ├── llm_extract.py # LLM extraction
+│   └── utils.py      # Helper functions
+├── tests/            # Test files
+├── docs/             # Documentation
+└── examples/         # Example scripts
+```
+
+### Coding Standards
+
+#### 1. Modularity
+
+Each module should handle one concern:
+
+```python
+# Good
+from src.scraper import scrape_url
+from src.parser import parse_html
+
+# Bad - mixing concerns
+from src.everything import do_everything
+```
+
+#### 2. Type Hints
+
+Use type hints for clarity:
+
+```python
+from typing import Optional, List, Dict
+
+def scrape_url(
+    url: str,
+    output_format: str = "markdown",
+    use_js: bool = False
+) -> str:
+    """Scrape a URL and return content."""
+    pass
+```
+
+#### 3. Error Handling
+
+Handle errors gracefully with specific exceptions:
+
+```python
+try:
+    response = requests.get(url, timeout=30)
+    response.raise_for_status()
+except Timeout:
+    raise ScraperError(f"Request timeout for {url}")
+except RequestException as e:
+    raise ScraperError(f"Request failed: {e}")
+```
+
+#### 4. Logging
+
+Use the logging module, not print statements:
+
+```python
+import logging
+
+logger = logging.getLogger("termscrape")
+
+logger.info("Starting scrape...")
+logger.debug(f"Fetched {len(content)} bytes")
+logger.error(f"Failed to scrape: {e}")
+```
+
+#### 5. Documentation
+
+Document all public functions and classes:
+
+```python
+def scrape_url(url: str, output_format: str = "markdown") -> str:
+    """
+    Scrape a single URL.
+
+    Args:
+        url: URL to scrape
+        output_format: Output format ('markdown', 'json', 'text')
+
+    Returns:
+        Scraped content in specified format
+
+    Raises:
+        ScraperError: If scraping fails
+    """
+    pass
+```
+
+## Testing
+
+### Writing Tests
+
+All new features must include tests. Use pytest:
+
+```python
+# tests/test_myfeature.py
+import pytest
+from src.mymodule import my_function
+
+def test_my_function():
+    """Test that my_function works correctly."""
+    result = my_function("input")
+    assert result == "expected"
+
+def test_my_function_error():
+    """Test error handling."""
+    with pytest.raises(ValueError):
+        my_function(None)
+```
+
+### Running Tests
+
+```bash
+# Run all tests
+pytest
+
+# Run with coverage
+pytest --cov=src --cov-report=term-missing
+
+# Run specific test file
+pytest tests/test_scraper.py
+
+# Run specific test
+pytest tests/test_scraper.py::test_scrape_url
+```
+
+### Test Coverage
+
+Aim for 80%+ code coverage. Check with:
+
+```bash
+pytest --cov=src --cov-report=html
+# Open htmlcov/index.html in browser
+```
+
+## Ethical Scraping Guidelines
+
+All contributions must respect ethical scraping practices:
+
+### 1. Robots.txt
+
+Always check robots.txt before scraping:
+
+```python
+from src.utils import check_robots_txt
+
+if not check_robots_txt(url):
+    raise ScraperError("Disallowed by robots.txt")
+```
+
+### 2. Rate Limiting
+
+Add delays between requests:
+
+```python
+from src.utils import random_delay
+
+random_delay(min_seconds=1, max_seconds=5)
+```
+
+### 3. User-Agent
+
+Always identify your scraper:
+
+```python
+headers = {"User-Agent": "TermScrape/1.0 (+your.email)"}
+```
+
+### 4. Respect Terms of Service
+
+- Don't scrape login-protected content
+- Don't bypass paywalls
+- Don't scrape personal data without consent
+- Don't use scraped data for illegal purposes
+
+## Pull Request Process
+
+### 1. Before Submitting
+
+- [ ] Code follows style guidelines (run Black)
+- [ ] All tests pass (run pytest)
+- [ ] New features have tests
+- [ ] Documentation is updated
+- [ ] Commit messages are clear
+
+### 2. Commit Messages
+
+Use clear, descriptive commit messages:
+
+```bash
+# Good
+git commit -m "Add support for custom timeout in scraper"
+git commit -m "Fix robots.txt parsing for URLs with ports"
+
+# Bad
+git commit -m "Fix bug"
+git commit -m "Update code"
+```
+
+### 3. Submit Pull Request
+
+1. Push your branch to your fork
+2. Open a pull request on GitHub
+3. Describe your changes clearly
+4. Link any related issues
+5. Wait for review
+
+### 4. PR Template
+
+```markdown
+## Description
+Brief description of changes
+
+## Type of Change
+- [ ] Bug fix
+- [ ] New feature
+- [ ] Breaking change
+- [ ] Documentation update
+
+## Testing
+How was this tested?
+
+## Checklist
+- [ ] Tests pass
+- [ ] Code formatted with Black
+- [ ] Documentation updated
+```
+
+## Feature Requests
+
+Have an idea? Open an issue:
+
+1. Check if it already exists
+2. Describe the feature clearly
+3. Explain the use case
+4. Provide examples if possible
+
+## Bug Reports
+
+Found a bug? Report it:
+
+1. Check if it's already reported
+2. Provide clear steps to reproduce
+3. Include error messages
+4. Specify your environment (OS, Python version)
+
+### Bug Report Template
+
+```markdown
+## Bug Description
+Clear description of the bug
+
+## Steps to Reproduce
+1. Run command...
+2. See error...
+
+## Expected Behavior
+What should happen
+
+## Actual Behavior
+What actually happens
+
+## Environment
+- OS:
+- Python version:
+- TermScrape version:
+
+## Error Output
+```
+Paste error messages here
+```
+```
+
+## Development Workflow
+
+### Adding a New Feature
+
+1. Create an issue to discuss the feature
+2. Get approval from maintainers
+3. Create a branch: `feature/feature-name`
+4. Implement the feature with tests
+5. Update documentation
+6. Submit pull request
+
+### Fixing a Bug
+
+1. Create an issue (if not exists)
+2. Create a branch: `fix/bug-description`
+3. Write a failing test that reproduces the bug
+4. Fix the bug
+5. Ensure test passes
+6. Submit pull request
+
+## Code Review
+
+### As a Reviewer
+
+- Be constructive and respectful
+- Test the changes locally
+- Check code quality and tests
+- Suggest improvements clearly
+
+### As an Author
+
+- Respond to feedback promptly
+- Don't take criticism personally
+- Make requested changes
+- Thank reviewers for their time
+
+## Documentation
+
+Update documentation when:
+
+- Adding new features
+- Changing CLI commands
+- Updating dependencies
+- Fixing bugs that affect usage
+
+## Questions?
+
+- Open an issue for questions
+- Check existing documentation
+- Review closed issues for similar problems
+
+## Recognition
+
+Contributors will be:
+
+- Listed in CONTRIBUTORS.md
+- Mentioned in release notes
+- Appreciated in the community!
+
+Thank you for contributing to TermScrape!
diff --git a/termscrape/docs/usage.md b/termscrape/docs/usage.md
new file mode 100644
index 0000000..07632c6
--- /dev/null
+++ b/termscrape/docs/usage.md
@@ -0,0 +1,289 @@
+# TermScrape Usage Guide
+
+## Installation
+
+```bash
+# Clone the repository
+git clone https://github.com/yourusername/termscrape
+cd termscrape
+
+# Install dependencies
+pip install -r requirements.txt
+
+# Install Playwright browsers
+playwright install
+```
+
+## CLI Commands
+
+TermScrape provides two main commands: `scrape` and `crawl`.
+
+### Scrape Command
+
+Scrape a single URL and output its content.
+
+```bash
+python -m src.main scrape --url <URL> [OPTIONS]
+```
+
+**Options:**
+
+- `--url` (required): URL to scrape
+- `--format`: Output format - `markdown` (default), `json`, or `text`
+- `--js`: Enable JavaScript rendering using headless browser
+- `--output`, `-o`: Save to file instead of stdout
+- `--user-agent`: Custom User-Agent string (default: "TermScrape/1.0")
+- `--llm-prompt`: Extract data using LLM (requires Ollama)
+- `--llm-model`: LLM model to use (default: "llama3")
+- `--verbose`: Enable debug logging
+
+**Examples:**
+
+```bash
+# Basic scrape to markdown
+python -m src.main scrape --url https://example.com
+
+# Scrape with JavaScript rendering
+python -m src.main scrape --url https://example.com --js
+
+# Scrape to JSON format
+python -m src.main scrape --url https://example.com --format json
+
+# Save to file
+python -m src.main scrape --url https://example.com --output page.md
+
+# Use LLM to extract specific data
+python -m src.main scrape --url https://example.com \
+  --llm-prompt "Extract all product titles and prices"
+```
+
+### Crawl Command
+
+Recursively crawl a website starting from a URL.
+
+```bash
+python -m src.main crawl --url <URL> [OPTIONS]
+```
+
+**Options:**
+
+- `--url` (required): Starting URL
+- `--depth`: Maximum crawl depth (default: 1)
+- `--format`: Output format for each page
+- `--js`: Enable JavaScript rendering
+- `--output`, `-o`: Output directory or JSON file
+- `--user-agent`: Custom User-Agent string
+- `--exclude`: URL patterns to exclude (can use multiple times)
+- `--llm-prompt`: Extract data from each page using LLM
+- `--llm-model`: LLM model to use
+- `--verbose`: Enable debug logging
+
+**Examples:**
+
+```bash
+# Crawl with depth 2
+python -m src.main crawl --url https://example.com --depth 2
+
+# Crawl and save each page to directory
+python -m src.main crawl --url https://example.com --output ./scraped/
+
+# Crawl with exclusions
+python -m src.main crawl --url https://example.com \
+  --exclude .pdf --exclude /login --exclude /admin
+
+# Crawl with LLM extraction
+python -m src.main crawl --url https://example.com \
+  --llm-prompt "Extract main headings and key points" \
+  --output results.json
+```
+
+## Global Options
+
+These options work with any command:
+
+- `--verbose`: Enable detailed debug logging
+- `--help`: Show help message
+
+## Output Formats
+
+### Markdown
+
+Clean, readable markdown format. Best for documentation and human reading.
+
+```bash
+python -m src.main scrape --url https://example.com --format markdown
+```
+
+### JSON
+
+Structured JSON with metadata (title, headings, links, images, etc.).
+
+```bash
+python -m src.main scrape --url https://example.com --format json
+```
+
+### Text
+
+Plain text extraction, removing all HTML tags.
+
+```bash
+python -m src.main scrape --url https://example.com --format text
+```
+
+## JavaScript Rendering
+
+For dynamic websites that require JavaScript:
+
+```bash
+python -m src.main scrape --url https://example.com --js
+```
+
+This uses Playwright's headless browser to render the page before scraping.
+
+## LLM Extraction
+
+TermScrape can use local LLMs via Ollama for intelligent data extraction.
+
+**Prerequisites:**
+
+1. Install Ollama: https://ollama.ai
+2. Pull a model: `ollama pull llama3`
+3. Install Python package: `pip install ollama`
+
+**Usage:**
+
+```bash
+# Extract specific information
+python -m src.main scrape --url https://news.example.com \
+  --llm-prompt "Extract article title, author, and publication date"
+
+# Extract from crawled pages
+python -m src.main crawl --url https://products.example.com \
+  --depth 2 \
+  --llm-prompt "Extract product name, price, and description" \
+  --output products.json
+```
+
+## Ethical Scraping
+
+TermScrape includes built-in ethical scraping features:
+
+- **Robots.txt respect**: Automatically checks and respects robots.txt
+- **Rate limiting**: Random delays (1-5 seconds) between requests
+- **User-Agent**: Proper identification in requests
+
+To disable robots.txt checking (not recommended):
+
+```python
+# In Python code
+from src.scraper import Scraper
+scraper = Scraper(respect_robots=False)
+```
+
+## Error Handling
+
+TermScrape handles common errors gracefully:
+
+- Network timeouts
+- Invalid URLs
+- Missing pages (404)
+- Malformed HTML
+- Robots.txt restrictions
+
+Errors are logged to stderr. Use `--verbose` for detailed error information.
+
+## Performance Tips
+
+1. **Use static requests when possible**: Only use `--js` for JavaScript-heavy sites
+2. **Limit crawl depth**: Higher depths exponentially increase scraping time
+3. **Use exclude patterns**: Skip unnecessary pages (PDFs, logins, etc.)
+4. **Batch operations**: For multiple URLs, use the crawl command
+
+## Examples
+
+### Example 1: Blog Archive
+
+Scrape a blog and extract all articles:
+
+```bash
+python -m src.main crawl \
+  --url https://blog.example.com \
+  --depth 2 \
+  --exclude /tag/ --exclude /category/ \
+  --format markdown \
+  --output ./blog_archive/
+```
+
+### Example 2: Product Catalog
+
+Extract product information with LLM:
+
+```bash
+python -m src.main crawl \
+  --url https://shop.example.com/products \
+  --depth 1 \
+  --js \
+  --llm-prompt "Extract product name, price, description, and availability" \
+  --output products.json
+```
+
+### Example 3: Research Paper
+
+Scrape a research paper with JavaScript:
+
+```bash
+python -m src.main scrape \
+  --url https://papers.example.com/paper/12345 \
+  --js \
+  --format markdown \
+  --output paper.md
+```
+
+## Programmatic Usage
+
+You can also use TermScrape as a Python library:
+
+```python
+from src import scrape_url, crawl_site
+
+# Scrape single URL
+content = scrape_url(
+    url="https://example.com",
+    output_format="markdown",
+    use_js=False
+)
+
+# Crawl website
+results = crawl_site(
+    start_url="https://example.com",
+    max_depth=2,
+    output_format="json"
+)
+```
+
+## Troubleshooting
+
+### Playwright Installation Issues
+
+If you get Playwright errors:
+
+```bash
+playwright install chromium
+```
+
+### Ollama Connection Issues
+
+Make sure Ollama is running:
+
+```bash
+ollama serve
+```
+
+### Permission Errors
+
+Some sites may block scrapers. Try using a custom user agent:
+
+```bash
+python -m src.main scrape --url https://example.com \
+  --user-agent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+```
diff --git a/termscrape/examples/simple_scrape.py b/termscrape/examples/simple_scrape.py
new file mode 100755
index 0000000..fc11249
--- /dev/null
+++ b/termscrape/examples/simple_scrape.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+"""
+Simple scraping examples using TermScrape.
+
+This file demonstrates how to use TermScrape programmatically.
+"""
+
+import sys
+import os
+
+# Add parent directory to path to import src modules
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+
+from src import scrape_url, crawl_site
+from src.scraper import Scraper
+from src.parser import HTMLParser
+from src.utils import setup_logging
+
+
+def example_1_simple_scrape():
+    """Example 1: Simple URL scraping to markdown."""
+    print("=" * 60)
+    print("Example 1: Simple Scrape")
+    print("=" * 60)
+
+    url = "https://example.com"
+    content = scrape_url(url, output_format="markdown")
+
+    print(f"\nScraped {url}:\n")
+    print(content[:500])  # Print first 500 chars
+    print("\n...")
+
+
+def example_2_json_output():
+    """Example 2: Scraping with JSON output."""
+    print("\n" + "=" * 60)
+    print("Example 2: JSON Output")
+    print("=" * 60)
+
+    url = "https://example.com"
+    content = scrape_url(url, output_format="json")
+
+    print(f"\nScraped {url} as JSON:\n")
+    print(content[:500])
+    print("\n...")
+
+
+def example_3_custom_scraper():
+    """Example 3: Using custom scraper configuration."""
+    print("\n" + "=" * 60)
+    print("Example 3: Custom Scraper")
+    print("=" * 60)
+
+    # Create custom scraper
+    scraper = Scraper(
+        user_agent="MyCustomBot/1.0",
+        timeout=60,
+        respect_robots=True,
+        use_delay=False,  # Disable delay for example
+    )
+
+    url = "https://example.com"
+    content = scraper.scrape(url, output_format="text")
+
+    print(f"\nScraped with custom settings:\n")
+    print(content[:300])
+    print("\n...")
+
+
+def example_4_crawl_site():
+    """Example 4: Crawling a website."""
+    print("\n" + "=" * 60)
+    print("Example 4: Site Crawling")
+    print("=" * 60)
+
+    url = "https://example.com"
+    results = crawl_site(
+        start_url=url,
+        max_depth=1,  # Only crawl 1 level deep
+        output_format="markdown",
+    )
+
+    print(f"\nCrawled {len(results)} pages from {url}:\n")
+    for page_url, data in results.items():
+        if "content" in data:
+            print(f"- {page_url} ({len(data['content'])} chars)")
+        else:
+            print(f"- {page_url} (error: {data.get('error', 'unknown')})")
+
+
+def example_5_html_parser():
+    """Example 5: Using HTMLParser directly."""
+    print("\n" + "=" * 60)
+    print("Example 5: Direct HTML Parsing")
+    print("=" * 60)
+
+    # Sample HTML
+    html = """
+    <html>
+        <head><title>Test Page</title></head>
+        <body>
+            <h1>Welcome to Test Page</h1>
+            <p>This is a paragraph with some <strong>bold text</strong>.</p>
+            <ul>
+                <li>Item 1</li>
+                <li>Item 2</li>
+            </ul>
+            <a href="https://example.com">Link</a>
+        </body>
+    </html>
+    """
+
+    parser = HTMLParser(html, url="https://example.com/test")
+
+    # Get markdown
+    markdown = parser.to_markdown()
+    print("\nMarkdown output:")
+    print(markdown)
+
+    # Get structured JSON
+    json_output = parser.to_json(structured=True)
+    print("\nStructured JSON:")
+    print(json_output[:400])
+    print("\n...")
+
+
+def example_6_with_excludes():
+    """Example 6: Crawling with URL exclusions."""
+    print("\n" + "=" * 60)
+    print("Example 6: Crawling with Exclusions")
+    print("=" * 60)
+
+    url = "https://example.com"
+    results = crawl_site(
+        start_url=url,
+        max_depth=1,
+        output_format="markdown",
+        exclude_patterns=[".pdf", ".zip", "/login", "/admin"],
+    )
+
+    print(f"\nCrawled with exclusions: {len(results)} pages")
+    for page_url in list(results.keys())[:5]:  # Show first 5
+        print(f"- {page_url}")
+
+
+def example_7_save_to_file():
+    """Example 7: Scraping and saving to file."""
+    print("\n" + "=" * 60)
+    print("Example 7: Save to File")
+    print("=" * 60)
+
+    url = "https://example.com"
+    content = scrape_url(url, output_format="markdown")
+
+    # Save to file
+    output_file = "example_output.md"
+    with open(output_file, "w", encoding="utf-8") as f:
+        f.write(content)
+
+    print(f"\nSaved content to {output_file}")
+    print(f"File size: {len(content)} bytes")
+
+
+def main():
+    """Run all examples."""
+    # Enable verbose logging
+    setup_logging(verbose=False)
+
+    print("\n" + "=" * 60)
+    print("TermScrape Examples")
+    print("=" * 60)
+    print("\nThese examples demonstrate basic TermScrape usage.")
+    print("Some examples use https://example.com for safety.\n")
+
+    try:
+        # Run examples
+        example_1_simple_scrape()
+        example_2_json_output()
+        example_3_custom_scraper()
+        example_4_crawl_site()
+        example_5_html_parser()
+        example_6_with_excludes()
+        example_7_save_to_file()
+
+        print("\n" + "=" * 60)
+        print("All examples completed!")
+        print("=" * 60)
+
+    except Exception as e:
+        print(f"\nError running examples: {e}")
+        print("Make sure you have internet connection and required dependencies.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/termscrape/pyproject.toml b/termscrape/pyproject.toml
new file mode 100644
index 0000000..10c3bb8
--- /dev/null
+++ b/termscrape/pyproject.toml
@@ -0,0 +1,74 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "termscrape"
+version = "1.0.0"
+description = "Terminal-based web scraper with LLM support"
+readme = "README.md"
+requires-python = ">=3.10"
+license = {text = "MIT"}
+authors = [
+    {name = "TermScrape Team", email = "contact@termscrape.dev"}
+]
+keywords = ["scraper", "crawler", "web-scraping", "llm", "cli"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Internet :: WWW/HTTP",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+
+dependencies = [
+    "click>=8.1.0",
+    "requests>=2.31.0",
+    "beautifulsoup4>=4.12.0",
+    "markdownify>=0.11.6",
+    "playwright>=1.40.0",
+]
+
+[project.optional-dependencies]
+llm = ["ollama>=0.1.0"]
+dev = [
+    "pytest>=7.4.0",
+    "pytest-cov>=4.1.0",
+    "pytest-asyncio>=0.21.0",
+    "black>=23.0.0",
+    "flake8>=6.0.0",
+    "mypy>=1.5.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/yourusername/termscrape"
+Repository = "https://github.com/yourusername/termscrape"
+Issues = "https://github.com/yourusername/termscrape/issues"
+
+[project.scripts]
+termscrape = "src.main:cli"
+
+[tool.setuptools]
+packages = ["src"]
+
+[tool.black]
+line-length = 88
+target-version = ["py310", "py311", "py312"]
+
+[tool.mypy]
+python_version = "3.10"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = false
+ignore_missing_imports = true
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = "test_*.py"
+python_classes = "Test*"
+python_functions = "test_*"
+addopts = "-v --cov=src --cov-report=term-missing"
diff --git a/termscrape/requirements.txt b/termscrape/requirements.txt
new file mode 100644
index 0000000..96870b5
--- /dev/null
+++ b/termscrape/requirements.txt
@@ -0,0 +1,19 @@
+# Core dependencies
+click>=8.1.0
+requests>=2.31.0
+beautifulsoup4>=4.12.0
+markdownify>=0.11.6
+playwright>=1.40.0
+
+# Optional dependencies
+ollama>=0.1.0  # For LLM extraction
+
+# Testing dependencies
+pytest>=7.4.0
+pytest-cov>=4.1.0
+pytest-asyncio>=0.21.0
+
+# Development dependencies
+black>=23.0.0
+flake8>=6.0.0
+mypy>=1.5.0
diff --git a/termscrape/src/__init__.py b/termscrape/src/__init__.py
new file mode 100644
index 0000000..9421244
--- /dev/null
+++ b/termscrape/src/__init__.py
@@ -0,0 +1,20 @@
+"""TermScrape - Terminal-based web scraper with LLM support."""
+
+__version__ = "1.0.0"
+__author__ = "TermScrape Team"
+
+from .scraper import scrape_url, Scraper
+from .crawler import crawl_site, Crawler
+from .parser import parse_html, HTMLParser
+from .utils import setup_logging, check_robots_txt
+
+__all__ = [
+    "scrape_url",
+    "Scraper",
+    "crawl_site",
+    "Crawler",
+    "parse_html",
+    "HTMLParser",
+    "setup_logging",
+    "check_robots_txt",
+]
diff --git a/termscrape/src/browser.py b/termscrape/src/browser.py
new file mode 100644
index 0000000..ef29b51
--- /dev/null
+++ b/termscrape/src/browser.py
@@ -0,0 +1,167 @@
+"""Headless browser automation using Playwright for JS-heavy sites."""
+
+import asyncio
+import logging
+from typing import Optional
+
+from playwright.async_api import async_playwright, Browser, Page, TimeoutError
+
+
+logger = logging.getLogger("termscrape")
+
+
+class HeadlessBrowser:
+    """Async headless browser for rendering JavaScript content."""
+
+    def __init__(self, user_agent: Optional[str] = None):
+        """
+        Initialize headless browser.
+
+        Args:
+            user_agent: Custom user agent string
+        """
+        self.user_agent = user_agent or "TermScrape/1.0"
+        self.browser: Optional[Browser] = None
+        self.playwright = None
+
+    async def __aenter__(self):
+        """Context manager entry."""
+        await self.start()
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit."""
+        await self.close()
+
+    async def start(self) -> None:
+        """Start the browser instance."""
+        self.playwright = await async_playwright().start()
+        self.browser = await self.playwright.chromium.launch(headless=True)
+        logger.debug("Headless browser started")
+
+    async def close(self) -> None:
+        """Close the browser instance."""
+        if self.browser:
+            await self.browser.close()
+        if self.playwright:
+            await self.playwright.stop()
+        logger.debug("Headless browser closed")
+
+    async def fetch_page(
+        self, url: str, wait_until: str = "networkidle", timeout: int = 30000
+    ) -> str:
+        """
+        Fetch a page with JavaScript rendering.
+
+        Args:
+            url: URL to fetch
+            wait_until: When to consider page loaded ('load', 'domcontentloaded',
+                       'networkidle')
+            timeout: Maximum time to wait in milliseconds
+
+        Returns:
+            Rendered HTML content
+
+        Raises:
+            TimeoutError: If page load times out
+            Exception: For other browser errors
+        """
+        if not self.browser:
+            await self.start()
+
+        page: Optional[Page] = None
+        try:
+            page = await self.browser.new_page(user_agent=self.user_agent)
+
+            logger.info(f"Fetching {url} with JavaScript rendering...")
+            await page.goto(url, wait_until=wait_until, timeout=timeout)
+
+            # Wait for page to be fully rendered
+            await page.wait_for_load_state("networkidle")
+
+            # Get the fully rendered HTML
+            html_content = await page.content()
+
+            logger.debug(f"Successfully fetched {url} ({len(html_content)} bytes)")
+            return html_content
+
+        except TimeoutError:
+            logger.error(f"Timeout loading {url}")
+            raise TimeoutError(f"Page load timeout for {url}")
+
+        except Exception as e:
+            logger.error(f"Error fetching {url}: {e}")
+            raise
+
+        finally:
+            if page:
+                await page.close()
+
+    async def fetch_with_wait(
+        self, url: str, selector: Optional[str] = None, timeout: int = 30000
+    ) -> str:
+        """
+        Fetch a page and wait for a specific element.
+
+        Args:
+            url: URL to fetch
+            selector: CSS selector to wait for
+            timeout: Maximum time to wait in milliseconds
+
+        Returns:
+            Rendered HTML content
+        """
+        if not self.browser:
+            await self.start()
+
+        page: Optional[Page] = None
+        try:
+            page = await self.browser.new_page(user_agent=self.user_agent)
+
+            await page.goto(url, wait_until="domcontentloaded", timeout=timeout)
+
+            # Wait for specific selector if provided
+            if selector:
+                await page.wait_for_selector(selector, timeout=timeout)
+
+            html_content = await page.content()
+            return html_content
+
+        finally:
+            if page:
+                await page.close()
+
+
+async def fetch_with_js(
+    url: str, user_agent: Optional[str] = None, timeout: int = 30000
+) -> str:
+    """
+    Convenience function to fetch a single page with JS rendering.
+
+    Args:
+        url: URL to fetch
+        user_agent: Custom user agent
+        timeout: Timeout in milliseconds
+
+    Returns:
+        Rendered HTML content
+    """
+    async with HeadlessBrowser(user_agent=user_agent) as browser:
+        return await browser.fetch_page(url, timeout=timeout)
+
+
+def fetch_with_js_sync(
+    url: str, user_agent: Optional[str] = None, timeout: int = 30000
+) -> str:
+    """
+    Synchronous wrapper for fetching with JS.
+
+    Args:
+        url: URL to fetch
+        user_agent: Custom user agent
+        timeout: Timeout in milliseconds
+
+    Returns:
+        Rendered HTML content
+    """
+    return asyncio.run(fetch_with_js(url, user_agent, timeout))
diff --git a/termscrape/src/crawler.py b/termscrape/src/crawler.py
new file mode 100644
index 0000000..b9bf0d9
--- /dev/null
+++ b/termscrape/src/crawler.py
@@ -0,0 +1,221 @@
+"""Recursive web crawling functionality."""
+
+import logging
+from collections import deque
+from typing import Dict, List, Optional, Set
+from urllib.parse import urljoin, urlparse
+
+from bs4 import BeautifulSoup
+
+from .scraper import Scraper, ScraperError
+from .utils import normalize_url
+
+
+logger = logging.getLogger("termscrape")
+
+
+class Crawler:
+    """Crawl a website recursively with depth control."""
+
+    def __init__(
+        self,
+        user_agent: str = "TermScrape/1.0",
+        max_depth: int = 1,
+        respect_robots: bool = True,
+        same_domain_only: bool = True,
+        exclude_patterns: Optional[List[str]] = None,
+    ):
+        """
+        Initialize crawler.
+
+        Args:
+            user_agent: User agent string
+            max_depth: Maximum crawl depth (0 = only start URL)
+            respect_robots: Check robots.txt
+            same_domain_only: Only crawl URLs from same domain
+            exclude_patterns: URL patterns to exclude (e.g., ['.pdf', '/login'])
+        """
+        self.user_agent = user_agent
+        self.max_depth = max_depth
+        self.respect_robots = respect_robots
+        self.same_domain_only = same_domain_only
+        self.exclude_patterns = exclude_patterns or []
+
+        self.scraper = Scraper(
+            user_agent=user_agent,
+            respect_robots=respect_robots,
+        )
+
+        self.visited: Set[str] = set()
+        self.results: Dict[str, Dict] = {}
+
+    def crawl(
+        self,
+        start_url: str,
+        output_format: str = "markdown",
+        use_js: bool = False,
+    ) -> Dict[str, Dict]:
+        """
+        Crawl a website starting from a URL.
+
+        Args:
+            start_url: Starting URL
+            output_format: Output format for each page
+            use_js: Use JavaScript rendering
+
+        Returns:
+            Dictionary mapping URLs to their scraped content and metadata
+        """
+        logger.info(
+            f"Starting crawl from {start_url} (max depth: {self.max_depth})"
+        )
+
+        # Queue: (url, depth)
+        queue: deque = deque([(start_url, 0)])
+        base_domain = urlparse(start_url).netloc
+
+        while queue:
+            current_url, depth = queue.popleft()
+
+            # Skip if already visited
+            if current_url in self.visited:
+                continue
+
+            # Skip if depth exceeded
+            if depth > self.max_depth:
+                continue
+
+            # Skip if matches exclude patterns
+            if self._should_exclude(current_url):
+                logger.debug(f"Excluding {current_url} (matches exclude pattern)")
+                continue
+
+            # Skip if different domain and same_domain_only is True
+            if self.same_domain_only:
+                if urlparse(current_url).netloc != base_domain:
+                    logger.debug(
+                        f"Skipping {current_url} (different domain)"
+                    )
+                    continue
+
+            # Mark as visited
+            self.visited.add(current_url)
+
+            # Scrape the page
+            try:
+                logger.info(f"Crawling {current_url} (depth: {depth})")
+                content = self.scraper.scrape(
+                    current_url, output_format, use_js
+                )
+
+                self.results[current_url] = {
+                    "content": content,
+                    "depth": depth,
+                    "format": output_format,
+                }
+
+                # Extract links if not at max depth
+                if depth < self.max_depth:
+                    links = self._extract_links(current_url, content, use_js)
+                    for link in links:
+                        normalized_link = normalize_url(link, current_url)
+                        if normalized_link not in self.visited:
+                            queue.append((normalized_link, depth + 1))
+
+            except ScraperError as e:
+                logger.warning(f"Failed to crawl {current_url}: {e}")
+                self.results[current_url] = {
+                    "error": str(e),
+                    "depth": depth,
+                }
+
+        logger.info(
+            f"Crawl complete. Visited {len(self.visited)} pages, "
+            f"successfully scraped {len([r for r in self.results.values() if 'content' in r])} pages"
+        )
+
+        return self.results
+
+    def _extract_links(
+        self, base_url: str, content: str, use_js: bool
+    ) -> List[str]:
+        """
+        Extract links from scraped content.
+
+        Args:
+            base_url: Base URL for resolving relative links
+            content: Scraped content (markdown or JSON format)
+            use_js: Whether JS was used (affects parsing)
+
+        Returns:
+            List of absolute URLs
+        """
+        links: List[str] = []
+
+        # Re-fetch HTML to extract links (content might be markdown/json)
+        try:
+            if use_js:
+                from .browser import fetch_with_js_sync
+
+                html = fetch_with_js_sync(base_url, self.user_agent)
+            else:
+                html = self.scraper._fetch_static(base_url)
+
+            soup = BeautifulSoup(html, "html.parser")
+
+            for link_tag in soup.find_all("a", href=True):
+                href = link_tag.get("href")
+                if href:
+                    # Convert relative to absolute
+                    absolute_url = urljoin(base_url, href)
+                    links.append(absolute_url)
+
+        except Exception as e:
+            logger.warning(f"Failed to extract links from {base_url}: {e}")
+
+        return links
+
+    def _should_exclude(self, url: str) -> bool:
+        """
+        Check if URL matches any exclude pattern.
+
+        Args:
+            url: URL to check
+
+        Returns:
+            True if URL should be excluded
+        """
+        for pattern in self.exclude_patterns:
+            if pattern in url:
+                return True
+        return False
+
+
+def crawl_site(
+    start_url: str,
+    max_depth: int = 1,
+    output_format: str = "markdown",
+    use_js: bool = False,
+    user_agent: str = "TermScrape/1.0",
+    exclude_patterns: Optional[List[str]] = None,
+) -> Dict[str, Dict]:
+    """
+    Convenience function to crawl a website.
+
+    Args:
+        start_url: Starting URL
+        max_depth: Maximum crawl depth
+        output_format: Output format
+        use_js: Use JavaScript rendering
+        user_agent: Custom user agent
+        exclude_patterns: URL patterns to exclude
+
+    Returns:
+        Dictionary of crawled results
+    """
+    crawler = Crawler(
+        user_agent=user_agent,
+        max_depth=max_depth,
+        exclude_patterns=exclude_patterns,
+    )
+    return crawler.crawl(start_url, output_format, use_js)
diff --git a/termscrape/src/llm_extract.py b/termscrape/src/llm_extract.py
new file mode 100644
index 0000000..b37e1d8
--- /dev/null
+++ b/termscrape/src/llm_extract.py
@@ -0,0 +1,221 @@
+"""LLM-based data extraction using Ollama."""
+
+import json
+import logging
+from typing import Any, Dict, Optional
+
+try:
+    import ollama
+
+    OLLAMA_AVAILABLE = True
+except ImportError:
+    OLLAMA_AVAILABLE = False
+    logger = logging.getLogger("termscrape")
+    logger.warning(
+        "Ollama not installed. LLM extraction disabled. "
+        "Install with: pip install ollama"
+    )
+
+
+logger = logging.getLogger("termscrape")
+
+
+class LLMExtractor:
+    """Extract structured data from content using local LLM."""
+
+    def __init__(self, model: str = "llama3"):
+        """
+        Initialize LLM extractor.
+
+        Args:
+            model: Ollama model name (e.g., 'llama3', 'mistral')
+
+        Raises:
+            RuntimeError: If Ollama is not available
+        """
+        if not OLLAMA_AVAILABLE:
+            raise RuntimeError(
+                "Ollama is not installed. Install with: pip install ollama"
+            )
+
+        self.model = model
+        self._verify_model()
+
+    def _verify_model(self) -> None:
+        """Verify that the specified model is available."""
+        try:
+            # List available models
+            models = ollama.list()
+            model_names = [m["name"] for m in models.get("models", [])]
+
+            if not any(self.model in name for name in model_names):
+                logger.warning(
+                    f"Model '{self.model}' not found locally. "
+                    f"Available models: {model_names}"
+                )
+                logger.info(f"Attempting to pull model '{self.model}'...")
+                # Model will be pulled on first use
+        except Exception as e:
+            logger.warning(f"Could not verify model availability: {e}")
+
+    def extract(self, content: str, prompt: str) -> str:
+        """
+        Extract information from content using LLM.
+
+        Args:
+            content: Text content to extract from
+            prompt: Extraction prompt (e.g., "Extract all product titles")
+
+        Returns:
+            LLM response as string
+        """
+        full_prompt = self._build_prompt(content, prompt)
+
+        try:
+            logger.info(f"Sending extraction request to {self.model}...")
+            response = ollama.generate(model=self.model, prompt=full_prompt)
+
+            result = response.get("response", "")
+            logger.debug(f"LLM extraction complete ({len(result)} characters)")
+
+            return result
+
+        except Exception as e:
+            logger.error(f"LLM extraction failed: {e}")
+            raise RuntimeError(f"LLM extraction failed: {e}")
+
+    def extract_structured(
+        self, content: str, prompt: str, format: str = "json"
+    ) -> Any:
+        """
+        Extract structured data and parse as JSON.
+
+        Args:
+            content: Text content to extract from
+            prompt: Extraction prompt
+            format: Expected output format ('json' or 'text')
+
+        Returns:
+            Parsed JSON object or text string
+        """
+        if format == "json":
+            enhanced_prompt = (
+                f"{prompt}\n\n"
+                "Respond ONLY with valid JSON. No explanations."
+            )
+        else:
+            enhanced_prompt = prompt
+
+        result = self.extract(content, enhanced_prompt)
+
+        if format == "json":
+            try:
+                return json.loads(result)
+            except json.JSONDecodeError as e:
+                logger.warning(f"Failed to parse JSON from LLM: {e}")
+                # Try to extract JSON from markdown code blocks
+                result = self._extract_json_from_markdown(result)
+                return json.loads(result)
+
+        return result
+
+    def _build_prompt(self, content: str, user_prompt: str) -> str:
+        """
+        Build the full prompt for the LLM.
+
+        Args:
+            content: Content to extract from
+            user_prompt: User's extraction instruction
+
+        Returns:
+            Complete prompt string
+        """
+        # Truncate content if too long (most models have context limits)
+        max_content_length = 4000
+        if len(content) > max_content_length:
+            logger.warning(
+                f"Content truncated from {len(content)} to "
+                f"{max_content_length} characters"
+            )
+            content = content[:max_content_length] + "..."
+
+        prompt = f"""You are a data extraction assistant. Extract information from the following content based on the user's request.
+
+User Request: {user_prompt}
+
+Content:
+{content}
+
+Provide the extracted information:"""
+
+        return prompt
+
+    def _extract_json_from_markdown(self, text: str) -> str:
+        """
+        Extract JSON from markdown code blocks.
+
+        Args:
+            text: Text potentially containing markdown code blocks
+
+        Returns:
+            Extracted JSON string
+        """
+        # Try to find JSON in ```json ``` blocks
+        import re
+
+        json_pattern = r"```(?:json)?\s*(\{.*?\}|\[.*?\])\s*```"
+        matches = re.findall(json_pattern, text, re.DOTALL)
+
+        if matches:
+            return matches[0]
+
+        # If no code blocks, return original
+        return text
+
+
+def extract_with_llm(
+    content: str,
+    prompt: str,
+    model: str = "llama3",
+    structured: bool = False,
+) -> Any:
+    """
+    Convenience function for LLM extraction.
+
+    Args:
+        content: Content to extract from
+        prompt: Extraction prompt
+        model: Ollama model name
+        structured: Return structured JSON if True
+
+    Returns:
+        Extracted data (string or dict)
+    """
+    if not OLLAMA_AVAILABLE:
+        raise RuntimeError(
+            "Ollama not available. Install with: pip install ollama"
+        )
+
+    extractor = LLMExtractor(model=model)
+
+    if structured:
+        return extractor.extract_structured(content, prompt, format="json")
+    else:
+        return extractor.extract(content, prompt)
+
+
+def is_ollama_available() -> bool:
+    """
+    Check if Ollama is available.
+
+    Returns:
+        True if Ollama is installed and accessible
+    """
+    if not OLLAMA_AVAILABLE:
+        return False
+
+    try:
+        ollama.list()
+        return True
+    except Exception:
+        return False
diff --git a/termscrape/src/main.py b/termscrape/src/main.py
new file mode 100644
index 0000000..2f0926e
--- /dev/null
+++ b/termscrape/src/main.py
@@ -0,0 +1,253 @@
+"""TermScrape CLI - Terminal-based web scraper."""
+
+import json
+import sys
+from pathlib import Path
+from typing import Optional
+
+import click
+
+from .crawler import crawl_site
+from .llm_extract import extract_with_llm, is_ollama_available
+from .scraper import scrape_url
+from .utils import sanitize_filename, setup_logging
+
+
+@click.group()
+@click.option(
+    "--verbose", is_flag=True, help="Enable verbose logging (DEBUG level)"
+)
+@click.pass_context
+def cli(ctx, verbose: bool):
+    """TermScrape: Terminal-based web scraper with LLM support."""
+    ctx.ensure_object(dict)
+    ctx.obj["verbose"] = verbose
+    setup_logging(verbose)
+
+
+@cli.command()
+@click.option("--url", required=True, help="URL to scrape")
+@click.option(
+    "--format",
+    type=click.Choice(["markdown", "json", "text"], case_sensitive=False),
+    default="markdown",
+    help="Output format",
+)
+@click.option("--js", is_flag=True, help="Enable JavaScript rendering")
+@click.option(
+    "--output", "-o", type=click.Path(), help="Output file (default: stdout)"
+)
+@click.option(
+    "--user-agent",
+    default="TermScrape/1.0",
+    help="Custom User-Agent string",
+)
+@click.option(
+    "--llm-prompt",
+    help="Optional LLM extraction prompt (requires Ollama)",
+)
+@click.option(
+    "--llm-model",
+    default="llama3",
+    help="LLM model to use for extraction (default: llama3)",
+)
+@click.pass_context
+def scrape(
+    ctx,
+    url: str,
+    format: str,
+    js: bool,
+    output: Optional[str],
+    user_agent: str,
+    llm_prompt: Optional[str],
+    llm_model: str,
+):
+    """Scrape a single URL and output content."""
+    try:
+        # Scrape the URL
+        click.echo(f"Scraping {url}...", err=True)
+        content = scrape_url(
+            url=url,
+            output_format=format,
+            use_js=js,
+            user_agent=user_agent,
+        )
+
+        # Apply LLM extraction if requested
+        if llm_prompt:
+            if not is_ollama_available():
+                click.echo(
+                    "Error: Ollama is not available. "
+                    "Install with: pip install ollama",
+                    err=True,
+                )
+                sys.exit(1)
+
+            click.echo(f"Extracting with LLM ({llm_model})...", err=True)
+            content = extract_with_llm(
+                content=content,
+                prompt=llm_prompt,
+                model=llm_model,
+                structured=False,
+            )
+
+        # Output result
+        if output:
+            output_path = Path(output)
+            output_path.write_text(content, encoding="utf-8")
+            click.echo(f"Saved to {output}", err=True)
+        else:
+            click.echo(content)
+
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        sys.exit(1)
+
+
+@cli.command()
+@click.option("--url", required=True, help="Starting URL to crawl")
+@click.option(
+    "--depth",
+    type=int,
+    default=1,
+    help="Maximum crawl depth (default: 1)",
+)
+@click.option(
+    "--format",
+    type=click.Choice(["markdown", "json", "text"], case_sensitive=False),
+    default="markdown",
+    help="Output format for each page",
+)
+@click.option("--js", is_flag=True, help="Enable JavaScript rendering")
+@click.option(
+    "--output", "-o", type=click.Path(), help="Output directory or file"
+)
+@click.option(
+    "--user-agent",
+    default="TermScrape/1.0",
+    help="Custom User-Agent string",
+)
+@click.option(
+    "--exclude",
+    multiple=True,
+    help="URL patterns to exclude (can be used multiple times)",
+)
+@click.option(
+    "--llm-prompt",
+    help="Optional LLM extraction prompt (requires Ollama)",
+)
+@click.option(
+    "--llm-model",
+    default="llama3",
+    help="LLM model to use for extraction (default: llama3)",
+)
+@click.pass_context
+def crawl(
+    ctx,
+    url: str,
+    depth: int,
+    format: str,
+    js: bool,
+    output: Optional[str],
+    user_agent: str,
+    exclude: tuple,
+    llm_prompt: Optional[str],
+    llm_model: str,
+):
+    """Crawl a website recursively."""
+    try:
+        # Crawl the site
+        click.echo(
+            f"Crawling {url} (depth: {depth})...", err=True
+        )
+        results = crawl_site(
+            start_url=url,
+            max_depth=depth,
+            output_format=format,
+            use_js=js,
+            user_agent=user_agent,
+            exclude_patterns=list(exclude) if exclude else None,
+        )
+
+        # Apply LLM extraction if requested
+        if llm_prompt:
+            if not is_ollama_available():
+                click.echo(
+                    "Error: Ollama is not available. "
+                    "Install with: pip install ollama",
+                    err=True,
+                )
+                sys.exit(1)
+
+            click.echo(
+                f"Extracting with LLM ({llm_model})...", err=True
+            )
+            for page_url, data in results.items():
+                if "content" in data:
+                    data["llm_extraction"] = extract_with_llm(
+                        content=data["content"],
+                        prompt=llm_prompt,
+                        model=llm_model,
+                        structured=False,
+                    )
+
+        # Output results
+        if output:
+            output_path = Path(output)
+
+            # If output is a directory, save each page as separate file
+            if output_path.suffix == "" or output_path.is_dir():
+                output_path.mkdir(parents=True, exist_ok=True)
+
+                for page_url, data in results.items():
+                    if "content" not in data:
+                        continue
+
+                    # Create filename from URL
+                    filename = sanitize_filename(page_url)
+                    if format == "markdown":
+                        filename += ".md"
+                    elif format == "json":
+                        filename += ".json"
+                    else:
+                        filename += ".txt"
+
+                    file_path = output_path / filename
+                    file_path.write_text(
+                        data["content"], encoding="utf-8"
+                    )
+
+                click.echo(
+                    f"Saved {len(results)} pages to {output_path}",
+                    err=True,
+                )
+
+            else:
+                # Save as single JSON file
+                output_path.write_text(
+                    json.dumps(results, indent=2, ensure_ascii=False),
+                    encoding="utf-8",
+                )
+                click.echo(f"Saved to {output}", err=True)
+
+        else:
+            # Print to stdout as JSON
+            click.echo(json.dumps(results, indent=2, ensure_ascii=False))
+
+        click.echo(
+            f"Crawled {len(results)} pages successfully.", err=True
+        )
+
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        sys.exit(1)
+
+
+@cli.command()
+def version():
+    """Show TermScrape version."""
+    click.echo("TermScrape v1.0.0")
+
+
+if __name__ == "__main__":
+    cli(obj={})
diff --git a/termscrape/src/parser.py b/termscrape/src/parser.py
new file mode 100644
index 0000000..b94547f
--- /dev/null
+++ b/termscrape/src/parser.py
@@ -0,0 +1,195 @@
+"""HTML parsing and output formatting for TermScrape."""
+
+import json
+import logging
+from typing import Any, Dict, List, Optional
+
+from bs4 import BeautifulSoup
+from markdownify import markdownify as md
+
+
+logger = logging.getLogger("termscrape")
+
+
+class HTMLParser:
+    """Parse HTML content and convert to various formats."""
+
+    def __init__(self, html_content: str, url: Optional[str] = None):
+        """
+        Initialize parser with HTML content.
+
+        Args:
+            html_content: Raw HTML string
+            url: Source URL (optional, for metadata)
+        """
+        self.html_content = html_content
+        self.url = url
+        self.soup = BeautifulSoup(html_content, "html.parser")
+
+    def to_markdown(self, strip_tags: Optional[List[str]] = None) -> str:
+        """
+        Convert HTML to markdown format.
+
+        Args:
+            strip_tags: List of HTML tags to remove before conversion
+
+        Returns:
+            Markdown formatted string
+        """
+        soup_copy = BeautifulSoup(str(self.soup), "html.parser")
+
+        # Remove unwanted tags (scripts, styles, etc.)
+        default_strip = ["script", "style", "nav", "footer", "header"]
+        tags_to_strip = strip_tags or default_strip
+
+        for tag in tags_to_strip:
+            for element in soup_copy.find_all(tag):
+                element.decompose()
+
+        # Convert to markdown
+        markdown_content = md(
+            str(soup_copy),
+            heading_style="ATX",
+            bullets="-",
+            strip=["img"],
+        )
+
+        return markdown_content.strip()
+
+    def to_json(self, structured: bool = True) -> str:
+        """
+        Convert HTML to JSON format.
+
+        Args:
+            structured: If True, extract structured data; else raw HTML dict
+
+        Returns:
+            JSON formatted string
+        """
+        if structured:
+            data = self._extract_structured_data()
+        else:
+            data = {"url": self.url, "html": self.html_content}
+
+        return json.dumps(data, indent=2, ensure_ascii=False)
+
+    def _extract_structured_data(self) -> Dict[str, Any]:
+        """
+        Extract structured data from HTML.
+
+        Returns:
+            Dictionary containing title, headings, paragraphs, links, etc.
+        """
+        data: Dict[str, Any] = {
+            "url": self.url,
+            "title": self._get_title(),
+            "meta": self._get_meta_tags(),
+            "headings": self._get_headings(),
+            "paragraphs": self._get_paragraphs(),
+            "links": self._get_links(),
+            "images": self._get_images(),
+        }
+
+        return data
+
+    def _get_title(self) -> Optional[str]:
+        """Extract page title."""
+        title_tag = self.soup.find("title")
+        return title_tag.get_text(strip=True) if title_tag else None
+
+    def _get_meta_tags(self) -> Dict[str, str]:
+        """Extract meta tags."""
+        meta_tags = {}
+        for meta in self.soup.find_all("meta"):
+            name = meta.get("name") or meta.get("property")
+            content = meta.get("content")
+            if name and content:
+                meta_tags[name] = content
+        return meta_tags
+
+    def _get_headings(self) -> List[Dict[str, Any]]:
+        """Extract all headings (h1-h6)."""
+        headings = []
+        for level in range(1, 7):
+            for heading in self.soup.find_all(f"h{level}"):
+                headings.append(
+                    {"level": level, "text": heading.get_text(strip=True)}
+                )
+        return headings
+
+    def _get_paragraphs(self) -> List[str]:
+        """Extract all paragraph text."""
+        paragraphs = []
+        for p in self.soup.find_all("p"):
+            text = p.get_text(strip=True)
+            if text:
+                paragraphs.append(text)
+        return paragraphs
+
+    def _get_links(self) -> List[Dict[str, Optional[str]]]:
+        """Extract all links."""
+        links = []
+        for link in self.soup.find_all("a", href=True):
+            links.append(
+                {
+                    "text": link.get_text(strip=True),
+                    "href": link.get("href"),
+                    "title": link.get("title"),
+                }
+            )
+        return links
+
+    def _get_images(self) -> List[Dict[str, Optional[str]]]:
+        """Extract all images."""
+        images = []
+        for img in self.soup.find_all("img"):
+            images.append(
+                {
+                    "src": img.get("src"),
+                    "alt": img.get("alt"),
+                    "title": img.get("title"),
+                }
+            )
+        return images
+
+    def get_text(self) -> str:
+        """
+        Extract plain text from HTML.
+
+        Returns:
+            Plain text content
+        """
+        # Remove scripts and styles
+        for element in self.soup(["script", "style"]):
+            element.decompose()
+
+        return self.soup.get_text(separator="\n", strip=True)
+
+
+def parse_html(
+    html_content: str,
+    url: Optional[str] = None,
+    output_format: str = "markdown",
+) -> str:
+    """
+    Parse HTML and convert to specified format.
+
+    Args:
+        html_content: Raw HTML string
+        url: Source URL
+        output_format: Output format ('markdown', 'json', or 'text')
+
+    Returns:
+        Formatted content as string
+    """
+    parser = HTMLParser(html_content, url)
+
+    if output_format == "markdown":
+        return parser.to_markdown()
+    elif output_format == "json":
+        return parser.to_json(structured=True)
+    elif output_format == "text":
+        return parser.get_text()
+    else:
+        logger.warning(f"Unknown format '{output_format}', using markdown")
+        return parser.to_markdown()
diff --git a/termscrape/src/scraper.py b/termscrape/src/scraper.py
new file mode 100644
index 0000000..f1a339f
--- /dev/null
+++ b/termscrape/src/scraper.py
@@ -0,0 +1,161 @@
+"""Single URL scraping functionality."""
+
+import logging
+from typing import Optional
+
+import requests
+from requests.exceptions import RequestException, Timeout
+
+from .browser import fetch_with_js_sync
+from .parser import parse_html
+from .utils import check_robots_txt, is_valid_url, random_delay
+
+
+logger = logging.getLogger("termscrape")
+
+
+class ScraperError(Exception):
+    """Base exception for scraper errors."""
+
+    pass
+
+
+class Scraper:
+    """Scrape content from a single URL."""
+
+    def __init__(
+        self,
+        user_agent: str = "TermScrape/1.0",
+        timeout: int = 30,
+        respect_robots: bool = True,
+        use_delay: bool = True,
+    ):
+        """
+        Initialize scraper.
+
+        Args:
+            user_agent: User agent string for requests
+            timeout: Request timeout in seconds
+            respect_robots: Check robots.txt before scraping
+            use_delay: Add random delay between requests
+        """
+        self.user_agent = user_agent
+        self.timeout = timeout
+        self.respect_robots = respect_robots
+        self.use_delay = use_delay
+        self.session = requests.Session()
+        self.session.headers.update({"User-Agent": self.user_agent})
+
+    def scrape(
+        self,
+        url: str,
+        output_format: str = "markdown",
+        use_js: bool = False,
+    ) -> str:
+        """
+        Scrape a single URL.
+
+        Args:
+            url: URL to scrape
+            output_format: Output format ('markdown', 'json', 'text')
+            use_js: Use headless browser for JavaScript rendering
+
+        Returns:
+            Scraped content in specified format
+
+        Raises:
+            ScraperError: If scraping fails
+        """
+        # Validate URL
+        if not is_valid_url(url):
+            raise ScraperError(f"Invalid URL: {url}")
+
+        # Check robots.txt
+        if self.respect_robots:
+            if not check_robots_txt(url, self.user_agent):
+                raise ScraperError(
+                    f"Scraping {url} is disallowed by robots.txt"
+                )
+
+        # Add delay if enabled
+        if self.use_delay:
+            random_delay()
+
+        # Fetch content
+        try:
+            if use_js:
+                logger.info(f"Fetching {url} with JavaScript rendering...")
+                html_content = fetch_with_js_sync(
+                    url, self.user_agent, self.timeout * 1000
+                )
+            else:
+                logger.info(f"Fetching {url} with static request...")
+                html_content = self._fetch_static(url)
+
+            # Parse and format
+            result = parse_html(html_content, url, output_format)
+            logger.info(f"Successfully scraped {url}")
+
+            return result
+
+        except Exception as e:
+            logger.error(f"Failed to scrape {url}: {e}")
+            raise ScraperError(f"Failed to scrape {url}: {e}")
+
+    def _fetch_static(self, url: str) -> str:
+        """
+        Fetch page content using static HTTP request.
+
+        Args:
+            url: URL to fetch
+
+        Returns:
+            HTML content
+
+        Raises:
+            ScraperError: If request fails
+        """
+        try:
+            response = self.session.get(url, timeout=self.timeout)
+            response.raise_for_status()
+
+            # Check content type
+            content_type = response.headers.get("Content-Type", "")
+            if "text/html" not in content_type:
+                logger.warning(
+                    f"URL {url} returned non-HTML content: {content_type}"
+                )
+
+            return response.text
+
+        except Timeout:
+            raise ScraperError(f"Request timeout for {url}")
+        except RequestException as e:
+            raise ScraperError(f"Request failed for {url}: {e}")
+
+
+def scrape_url(
+    url: str,
+    output_format: str = "markdown",
+    use_js: bool = False,
+    user_agent: str = "TermScrape/1.0",
+    respect_robots: bool = True,
+) -> str:
+    """
+    Convenience function to scrape a single URL.
+
+    Args:
+        url: URL to scrape
+        output_format: Output format ('markdown', 'json', 'text')
+        use_js: Use JavaScript rendering
+        user_agent: Custom user agent
+        respect_robots: Check robots.txt
+
+    Returns:
+        Scraped content in specified format
+    """
+    scraper = Scraper(
+        user_agent=user_agent,
+        respect_robots=respect_robots,
+    )
+    return scraper.scrape(url, output_format, use_js)
diff --git a/termscrape/src/utils.py b/termscrape/src/utils.py
new file mode 100644
index 0000000..24d8f6e
--- /dev/null
+++ b/termscrape/src/utils.py
@@ -0,0 +1,123 @@
+"""Utility functions for TermScrape."""
+
+import logging
+import random
+import time
+from typing import Optional
+from urllib.parse import urlparse
+from urllib.robotparser import RobotFileParser
+
+
+def setup_logging(verbose: bool = False) -> logging.Logger:
+    """
+    Set up logging configuration.
+
+    Args:
+        verbose: Enable verbose logging (DEBUG level) if True
+
+    Returns:
+        Configured logger instance
+    """
+    level = logging.DEBUG if verbose else logging.INFO
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        handlers=[logging.StreamHandler()],
+    )
+    return logging.getLogger("termscrape")
+
+
+def random_delay(min_seconds: float = 1.0, max_seconds: float = 5.0) -> None:
+    """
+    Sleep for a random duration to avoid rate limiting.
+
+    Args:
+        min_seconds: Minimum delay in seconds
+        max_seconds: Maximum delay in seconds
+    """
+    delay = random.uniform(min_seconds, max_seconds)
+    time.sleep(delay)
+
+
+def check_robots_txt(url: str, user_agent: str = "TermScrape/1.0") -> bool:
+    """
+    Check if URL is allowed to be scraped according to robots.txt.
+
+    Args:
+        url: Target URL to check
+        user_agent: User agent string to use for checking
+
+    Returns:
+        True if scraping is allowed, False otherwise
+    """
+    try:
+        parsed_url = urlparse(url)
+        robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
+
+        rp = RobotFileParser()
+        rp.set_url(robots_url)
+        rp.read()
+
+        return rp.can_fetch(user_agent, url)
+    except Exception as e:
+        # If robots.txt cannot be fetched, allow by default
+        logging.getLogger("termscrape").warning(
+            f"Could not fetch robots.txt for {url}: {e}"
+        )
+        return True
+
+
+def sanitize_filename(filename: str) -> str:
+    """
+    Sanitize a filename by removing/replacing invalid characters.
+
+    Args:
+        filename: Original filename
+
+    Returns:
+        Sanitized filename safe for filesystem
+    """
+    invalid_chars = '<>:"/\\|?*'
+    for char in invalid_chars:
+        filename = filename.replace(char, "_")
+    return filename
+
+
+def normalize_url(url: str, base_url: Optional[str] = None) -> str:
+    """
+    Normalize a URL by handling relative paths and fragments.
+
+    Args:
+        url: URL to normalize
+        base_url: Base URL for resolving relative paths
+
+    Returns:
+        Normalized absolute URL
+    """
+    from urllib.parse import urljoin, urldefrag
+
+    # Remove fragment
+    url, _ = urldefrag(url)
+
+    # Resolve relative URLs
+    if base_url:
+        url = urljoin(base_url, url)
+
+    return url
+
+
+def is_valid_url(url: str) -> bool:
+    """
+    Validate if string is a properly formed URL.
+
+    Args:
+        url: URL string to validate
+
+    Returns:
+        True if URL is valid, False otherwise
+    """
+    try:
+        result = urlparse(url)
+        return all([result.scheme, result.netloc])
+    except Exception:
+        return False
diff --git a/termscrape/tests/__init__.py b/termscrape/tests/__init__.py
new file mode 100644
index 0000000..11a6609
--- /dev/null
+++ b/termscrape/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for TermScrape."""
diff --git a/termscrape/tests/test_crawler.py b/termscrape/tests/test_crawler.py
new file mode 100644
index 0000000..175ca37
--- /dev/null
+++ b/termscrape/tests/test_crawler.py
@@ -0,0 +1,107 @@
+"""Tests for crawler module."""
+
+import pytest
+from unittest.mock import Mock, patch
+
+from src.crawler import Crawler, crawl_site
+
+
+class TestCrawler:
+    """Test cases for Crawler class."""
+
+    def test_crawler_initialization(self):
+        """Test crawler initializes with correct defaults."""
+        crawler = Crawler()
+        assert crawler.user_agent == "TermScrape/1.0"
+        assert crawler.max_depth == 1
+        assert crawler.respect_robots is True
+        assert crawler.same_domain_only is True
+
+    @patch("src.crawler.Scraper.scrape")
+    @patch("src.crawler.Scraper._fetch_static")
+    def test_crawl_single_page(self, mock_fetch, mock_scrape):
+        """Test crawling a single page (depth 0)."""
+        mock_scrape.return_value = "# Test Page"
+        mock_fetch.return_value = "<html><body>Test</body></html>"
+
+        crawler = Crawler(max_depth=0)
+        results = crawler.crawl("https://example.com")
+
+        assert len(results) == 1
+        assert "https://example.com" in results
+        assert results["https://example.com"]["depth"] == 0
+
+    @patch("src.crawler.Scraper.scrape")
+    @patch("src.crawler.Scraper._fetch_static")
+    def test_crawl_with_links(self, mock_fetch, mock_scrape):
+        """Test crawling with link extraction."""
+        mock_scrape.return_value = "# Test"
+
+        # First page has links
+        mock_fetch.side_effect = [
+            """<html><body>
+                <a href="/page2">Link 2</a>
+                <a href="/page3">Link 3</a>
+            </body></html>""",
+            "<html><body>Page 2</body></html>",
+            "<html><body>Page 3</body></html>",
+        ]
+
+        crawler = Crawler(max_depth=1)
+        results = crawler.crawl("https://example.com/")
+
+        # Should visit the start page plus linked pages
+        assert len(results) >= 1
+
+    def test_exclude_patterns(self):
+        """Test URL exclusion patterns."""
+        crawler = Crawler(exclude_patterns=[".pdf", "/login"])
+
+        assert crawler._should_exclude("https://example.com/file.pdf")
+        assert crawler._should_exclude("https://example.com/login")
+        assert not crawler._should_exclude("https://example.com/page")
+
+    @patch("src.crawler.Scraper.scrape")
+    @patch("src.crawler.Scraper._fetch_static")
+    def test_crawl_respects_depth(self, mock_fetch, mock_scrape):
+        """Test that crawler respects max depth."""
+        mock_scrape.return_value = "# Test"
+        mock_fetch.return_value = "<html><body>Test</body></html>"
+
+        crawler = Crawler(max_depth=0)
+        results = crawler.crawl("https://example.com")
+
+        # With depth 0, should only visit start URL
+        assert len(results) == 1
+
+    @patch("src.crawler.Scraper.scrape")
+    def test_crawl_handles_errors(self, mock_scrape):
+        """Test crawler handles scraping errors gracefully."""
+        from src.scraper import ScraperError
+
+        mock_scrape.side_effect = ScraperError("Test error")
+
+        crawler = Crawler(max_depth=0)
+        results = crawler.crawl("https://example.com")
+
+        # Should record error instead of failing
+        assert "https://example.com" in results
+        assert "error" in results["https://example.com"]
+
+    def test_same_domain_only(self):
+        """Test same_domain_only filtering."""
+        crawler = Crawler(same_domain_only=True, max_depth=1)
+
+        # This would be tested in integration, but we can verify the flag
+        assert crawler.same_domain_only is True
+
+
+def test_crawl_site_convenience_function():
+    """Test the crawl_site convenience function."""
+    with patch("src.crawler.Crawler.crawl") as mock_crawl:
+        mock_crawl.return_value = {"https://example.com": {"content": "Test"}}
+
+        result = crawl_site("https://example.com")
+
+        assert "https://example.com" in result
+        mock_crawl.assert_called_once()
diff --git a/termscrape/tests/test_scraper.py b/termscrape/tests/test_scraper.py
new file mode 100644
index 0000000..8e81b1f
--- /dev/null
+++ b/termscrape/tests/test_scraper.py
@@ -0,0 +1,106 @@
+"""Tests for scraper module."""
+
+import pytest
+from unittest.mock import Mock, patch
+
+from src.scraper import Scraper, ScraperError, scrape_url
+
+
+class TestScraper:
+    """Test cases for Scraper class."""
+
+    def test_scraper_initialization(self):
+        """Test scraper initializes with correct defaults."""
+        scraper = Scraper()
+        assert scraper.user_agent == "TermScrape/1.0"
+        assert scraper.timeout == 30
+        assert scraper.respect_robots is True
+
+    def test_invalid_url(self):
+        """Test that invalid URLs raise error."""
+        scraper = Scraper()
+        with pytest.raises(ScraperError, match="Invalid URL"):
+            scraper.scrape("not-a-valid-url")
+
+    @patch("src.scraper.check_robots_txt")
+    @patch("src.scraper.requests.Session.get")
+    def test_scrape_static_page(self, mock_get, mock_robots):
+        """Test scraping a static page."""
+        # Mock robots.txt check
+        mock_robots.return_value = True
+
+        # Mock HTTP response
+        mock_response = Mock()
+        mock_response.text = """
+        <html>
+            <head><title>Test Page</title></head>
+            <body><h1>Hello World</h1></body>
+        </html>
+        """
+        mock_response.headers = {"Content-Type": "text/html"}
+        mock_response.raise_for_status = Mock()
+        mock_get.return_value = mock_response
+
+        scraper = Scraper(use_delay=False)
+        result = scraper.scrape("https://example.com", output_format="text")
+
+        assert "Hello World" in result
+        mock_robots.assert_called_once()
+
+    @patch("src.scraper.check_robots_txt")
+    def test_robots_txt_disallowed(self, mock_robots):
+        """Test that robots.txt disallowed URLs are blocked."""
+        mock_robots.return_value = False
+
+        scraper = Scraper()
+        with pytest.raises(ScraperError, match="disallowed by robots.txt"):
+            scraper.scrape("https://example.com")
+
+    @patch("src.scraper.check_robots_txt")
+    @patch("src.scraper.requests.Session.get")
+    def test_scrape_markdown_format(self, mock_get, mock_robots):
+        """Test markdown output format."""
+        mock_robots.return_value = True
+
+        mock_response = Mock()
+        mock_response.text = """
+        <html>
+            <body>
+                <h1>Heading</h1>
+                <p>Paragraph text</p>
+            </body>
+        </html>
+        """
+        mock_response.headers = {"Content-Type": "text/html"}
+        mock_response.raise_for_status = Mock()
+        mock_get.return_value = mock_response
+
+        scraper = Scraper(use_delay=False)
+        result = scraper.scrape(
+            "https://example.com", output_format="markdown"
+        )
+
+        assert "# Heading" in result or "Heading" in result
+        assert "Paragraph text" in result
+
+    @patch("src.scraper.check_robots_txt")
+    @patch("src.scraper.requests.Session.get")
+    def test_request_timeout(self, mock_get, mock_robots):
+        """Test handling of request timeouts."""
+        mock_robots.return_value = True
+        mock_get.side_effect = Exception("Timeout")
+
+        scraper = Scraper(use_delay=False)
+        with pytest.raises(ScraperError):
+            scraper.scrape("https://example.com")
+
+
+def test_scrape_url_convenience_function():
+    """Test the scrape_url convenience function."""
+    with patch("src.scraper.Scraper.scrape") as mock_scrape:
+        mock_scrape.return_value = "Test content"
+
+        result = scrape_url("https://example.com")
+
+        assert result == "Test content"
+        mock_scrape.assert_called_once()