diff --git a/.gitignore b/.gitignore
index 454059a..60e56f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,6 +48,7 @@ coverage.xml
 
 # Jupyter Notebook
 .ipynb_checkpoints
+test.ipynb
 
 # pyenv
 .python-version
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..8fcbf31
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,51 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Added
+- **Heuristic reference removal**: Automatically detect and remove bibliographic reference lines based on pattern scoring
+- **Batch processing script**: Process multiple markdown files in parallel with `scripts/clean_mds_in_folder.py`
+- **Footnote pattern removal**: Remove footnote references in text (e.g., `.1`, `.23`)
+- **Enhanced linebreak crimping**: Improved algorithm for fixing line break errors from PDF conversion
+  - Connective-based crimping for lines ending with `-`, `–`, `—`, or `...`
+  - Justified text crimping for adjacent lines of similar length
+- **CLI options**:
+  - `--keep-references`: Disable heuristic reference detection
+  - `--no-crimping`: Disable linebreak crimping
+  - Additional fine-grained control options for cleaning operations
+
+### Changed
+- **Default patterns**: Updated `default_cleaning_patterns.yaml` with:
+  - Improved section removal patterns (e.g., "Authors' Note", "Note on sources")
+  - Additional inline patterns (LaTeX footnotes, trailing ellipsis)
+  - Refined keyword and conflict of interest patterns
+- **API**: `MarkdownCleaner` constructor now accepts optional `patterns` parameter (defaults to None for default patterns)
+- **Linebreak crimping**: Now enabled by default (`crimp_linebreaks: True`)
+- **README**: Comprehensive documentation updates with new features and examples
+
+### Fixed
+- Linebreak crimping logic now properly handles various PDF conversion artifacts
+- Test suite updated to match new implementation
+
+## [0.2.0] - 2025-03-XX
+
+Initial PyPI release with core markdown cleaning functionality.
+
+### Features
+- Remove references, bibliographies, and citations
+- Remove copyright notices and legal disclaimers
+- Remove acknowledgements and funding information
+- Pattern-based text cleaning with customizable YAML configuration
+- Command-line interface
+- Python API for programmatic use
+- Text replacement and pattern removal
+- Duplicate headline removal
+- Short line removal
+- Empty line contraction
+- Encoding fix support (mojibake)
+- Quotation normalization
diff --git a/README.md b/README.md
index bbbf607..6afc85a 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ A simple Python tool for cleaning and formatting markdown documents. Default con
 ## Description
 
 `markdowncleaner` helps you clean up markdown files by removing unwanted content such as:
-- References, bibliographies, and citations
+- References, bibliographies, and citations (including heuristic detection of bibliographic lines)
 - Footnotes and endnote references in text
 - Copyright notices and legal disclaimers
 - Acknowledgements and funding information
@@ -13,6 +13,7 @@ A simple Python tool for cleaning and formatting markdown documents. Default con
 - Specific patterns like DOIs, URLs, and email addresses
 - Short lines and excessive whitespace
 - Duplicate headlines (for example, because paper title and author names were reprinted on every page of a PDF)
+- Erroneous line breaks from PDF conversion
 
 This tool is particularly useful for processing academic papers, books, or any markdown document that needs formatting cleanup.
 
@@ -24,7 +25,9 @@ pip install markdowncleaner
 
 ## Usage
 
-### Basic Usage
+### Python API
+
+#### Basic Usage
 
 ```python
 from markdowncleaner import MarkdownCleaner
@@ -42,7 +45,7 @@ cleaned_text = cleaner.clean_markdown_string(text)
 print(cleaned_text)
 ```
 
-### Customizing Cleaning Options
+#### Customizing Cleaning Options
 
 ```python
 from markdowncleaner import MarkdownCleaner, CleanerOptions
@@ -51,9 +54,11 @@ from markdowncleaner import MarkdownCleaner, CleanerOptions
 options = CleanerOptions()
 options.remove_short_lines = True
 options.min_line_length = 50  # custom minimum line length
-options.remove_duplicate_headlines = False 
+options.remove_duplicate_headlines = False
 options.remove_footnotes_in_text = True
 options.contract_empty_lines = True
+options.fix_encoding_mojibake = True
+options.normalize_quotation_symbols = True
 
 # Initialize cleaner with custom options
 cleaner = MarkdownCleaner(options=options)
@@ -61,12 +66,13 @@ cleaner = MarkdownCleaner(options=options)
 # Use the cleaner as before
 ```
 
-### Custom Cleaning Patterns
+#### Custom Cleaning Patterns
 
 You can also provide custom cleaning patterns:
 
 ```python
-from markdowncleaner import MarkdownCleaner, CleaningPatterns
+from markdowncleaner import MarkdownCleaner
+from markdowncleaner.config.loader import CleaningPatterns
 from pathlib import Path
 
 # Load custom patterns from a YAML file
@@ -76,6 +82,102 @@ custom_patterns = CleaningPatterns.from_yaml(Path("my_patterns.yaml"))
 cleaner = MarkdownCleaner(patterns=custom_patterns)
 ```
 
+### Command Line Interface
+
+Clean a single markdown file using the CLI:
+
+```bash
+# Basic usage - creates a new file with "_cleaned" suffix
+markdowncleaner input.md
+
+# Specify output file
+markdowncleaner input.md -o output.md
+
+# Specify output directory
+markdowncleaner input.md --output-dir cleaned_files/
+
+# Use custom configuration
+markdowncleaner input.md --config my_patterns.yaml
+
+# Enable encoding fixes and quotation normalization
+markdowncleaner input.md --fix-encoding --normalize-quotation
+
+# Customize line length threshold
+markdowncleaner input.md --min-line-length 50
+
+# Disable specific cleaning operations
+markdowncleaner input.md --keep-short-lines --keep-sections --keep-footnotes
+
+# Disable replacements and inline pattern removal
+markdowncleaner input.md --no-replacements --keep-inline-patterns
+
+# Disable formatting operations
+markdowncleaner input.md --no-crimping --keep-empty-lines
+
+# Keep references (disable heuristic reference detection)
+markdowncleaner input.md --keep-references
+```
+
+**Available CLI Options:**
+
+- `-o`, `--output`: Path to save the cleaned markdown file
+- `--output-dir`: Directory to save the cleaned file
+- `--config`: Path to custom YAML configuration file
+- `--fix-encoding`: Fix encoding mojibake issues
+- `--normalize-quotation`: Normalize quotation symbols to standard ASCII
+- `--keep-short-lines`: Don't remove lines shorter than minimum length
+- `--min-line-length`: Minimum line length to keep (default: 70)
+- `--keep-bad-lines`: Don't remove lines matching bad line patterns
+- `--keep-sections`: Don't remove sections like References, Acknowledgements
+- `--keep-duplicate-headlines`: Don't remove duplicate headlines
+- `--keep-footnotes`: Don't remove footnote references in text
+- `--no-replacements`: Don't perform text replacements
+- `--keep-inline-patterns`: Don't remove inline patterns like citations
+- `--keep-empty-lines`: Don't contract consecutive empty lines
+- `--no-crimping`: Don't crimp linebreaks (fix line break errors from PDF conversion)
+- `--keep-references`: Don't heuristically detect and remove bibliographic reference lines
+
+### Batch Processing Script
+
+For processing multiple markdown files in a folder and its subfolders, use the included batch processing script:
+
+```bash
+# Basic usage - will prompt for confirmation
+python scripts/clean_mds_in_folder.py documents/
+
+# Skip confirmation prompt
+python scripts/clean_mds_in_folder.py documents/ --yes
+
+# Use 8 parallel workers (default is your CPU count)
+python scripts/clean_mds_in_folder.py documents/ --workers 8
+
+# Use custom cleaning patterns
+python scripts/clean_mds_in_folder.py documents/ --config my_patterns.yaml
+
+# Combine options
+python scripts/clean_mds_in_folder.py documents/ --yes --workers 4
+```
+
+**Features:**
+- Recursively finds all `.md` files in the specified folder and subfolders
+- Processes files in parallel using multiple CPU cores for faster processing
+- Shows real-time progress bar with `tqdm`
+- Cleans files in-place (modifies original files)
+- Asks for confirmation before processing (unless `--yes` is used)
+- Continues processing even if some files fail
+- Reports all successful and failed files at the end
+
+**Script Options:**
+- `folder`: Path to folder containing markdown files (required)
+- `-y`, `--yes`: Skip confirmation prompt and proceed immediately
+- `-w`, `--workers`: Number of parallel workers (default: CPU count)
+- `--config`: Path to custom YAML configuration file
+
+**Note:** Requires `tqdm` for the progress bar:
+```bash
+pip install tqdm
+```
+
 ## Configuration
 
 The default cleaning patterns are defined in `default_cleaning_patterns.yaml` and include:
@@ -88,16 +190,22 @@ The default cleaning patterns are defined in `default_cleaning_patterns.yaml` an
 
 ## Options
 
-- `remove_short_lines`: Remove lines shorter than `min_line_length` (default: 70 characters)
-- `remove_whole_lines`: Remove lines matching specific patterns
-- `remove_sections`: Remove entire sections based on section headings
-- `remove_duplicate_headlines`: Remove duplicate headlines based on threshold
-- `remove_duplicate_headlines_threshold`: Threshold for duplicate headline removal
-- `remove_footnotes_in_text`: Remove footnote references
-- `replace_within_lines`: Replace specific patterns within lines
-- `remove_within_lines`: Remove specific patterns within lines
-- `contract_empty_lines`: Normalize whitespace
-- `crimp_linebreaks`: Improve line break formatting
+All available `CleanerOptions`:
+
+- `fix_encoding_mojibake`: Fix encoding issues and mojibake using ftfy (default: False)
+- `normalize_quotation_symbols`: Normalize various quotation marks to standard ASCII quotes (default: False)
+- `remove_short_lines`: Remove lines shorter than `min_line_length` (default: True)
+- `min_line_length`: Minimum line length to keep when `remove_short_lines` is enabled (default: 70)
+- `remove_whole_lines`: Remove lines matching specific patterns (default: True)
+- `remove_sections`: Remove entire sections based on section headings (default: True)
+- `remove_duplicate_headlines`: Remove duplicate headlines based on threshold (default: True)
+- `remove_duplicate_headlines_threshold`: Number of occurrences needed to consider a headline duplicate (default: 2)
+- `remove_footnotes_in_text`: Remove footnote references like ".1" or ".23" (default: True)
+- `replace_within_lines`: Replace specific patterns within lines (default: True)
+- `remove_within_lines`: Remove specific patterns within lines (default: True)
+- `contract_empty_lines`: Reduce multiple consecutive empty lines to one (default: True)
+- `crimp_linebreaks`: Fix line break errors from PDF conversion (default: True)
+- `remove_references_heuristically`: Heuristically detect and remove bibliographic reference lines by scoring lines based on bibliographic patterns (default: True)
 
 ## License
 
diff --git a/scripts/clean_mds_in_folder.py b/scripts/clean_mds_in_folder.py
new file mode 100644
index 0000000..2433936
--- /dev/null
+++ b/scripts/clean_mds_in_folder.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+"""
+Script to clean all markdown files in a folder and its subfolders in parallel.
+Processes files in-place using ProcessPoolExecutor for efficient parallel processing.
+"""
+
+import argparse
+import sys
+from pathlib import Path
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from typing import Tuple, Optional
+import multiprocessing
+
+try:
+    from tqdm import tqdm
+except ImportError:
+    print("Error: tqdm is required for progress tracking.", file=sys.stderr)
+    print("Install it with: pip install tqdm", file=sys.stderr)
+    sys.exit(1)
+
+from markdowncleaner import MarkdownCleaner
+from markdowncleaner.config.loader import get_default_patterns, CleaningPatterns
+
+
+def clean_single_file(file_path: Path, config_path: Optional[Path] = None) -> Tuple[Path, bool, Optional[str]]:
+    """
+    Clean a single markdown file in-place.
+
+    Args:
+        file_path: Path to the markdown file to clean
+        config_path: Optional path to custom YAML configuration
+
+    Returns:
+        Tuple of (file_path, success, error_message)
+    """
+    try:
+        # Load patterns
+        if config_path:
+            patterns = CleaningPatterns.from_yaml(config_path)
+        else:
+            patterns = get_default_patterns()
+
+        # Initialize cleaner
+        cleaner = MarkdownCleaner(patterns=patterns)
+
+        # Clean the file in-place
+        cleaner.clean_markdown_file(input_file=file_path, output_file=file_path)
+
+        return (file_path, True, None)
+    except Exception as e:
+        return (file_path, False, str(e))
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Clean all markdown files in a folder and its subfolders in parallel.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s documents/
+  %(prog)s documents/ --yes
+  %(prog)s documents/ --workers 8
+  %(prog)s documents/ --config custom_patterns.yaml
+        """
+    )
+
+    parser.add_argument(
+        "folder",
+        type=Path,
+        help="Folder containing markdown files to clean"
+    )
+
+    parser.add_argument(
+        "-y", "--yes",
+        action="store_true",
+        help="Skip confirmation prompt and proceed immediately"
+    )
+
+    parser.add_argument(
+        "-w", "--workers",
+        type=int,
+        default=None,
+        help=f"Number of parallel workers (default: {multiprocessing.cpu_count()})"
+    )
+
+    parser.add_argument(
+        "--config",
+        type=Path,
+        help="Path to custom YAML configuration file with cleaning patterns"
+    )
+
+    return parser.parse_args()
+
+
+def main() -> int:
+    """Main entry point for the script."""
+    args = parse_args()
+
+    # Validate folder exists
+    if not args.folder.exists():
+        print(f"Error: Folder '{args.folder}' does not exist.", file=sys.stderr)
+        return 1
+
+    if not args.folder.is_dir():
+        print(f"Error: '{args.folder}' is not a directory.", file=sys.stderr)
+        return 1
+
+    # Find all markdown files recursively
+    print(f"Searching for markdown files in '{args.folder}'...")
+    md_files = list(args.folder.rglob("*.md"))
+
+    if not md_files:
+        print("No markdown files found.")
+        return 0
+
+    # Show count and ask for confirmation
+    print(f"Found {len(md_files)} markdown file(s).")
+
+    if not args.yes:
+        response = input("Proceed with cleaning? [y/N]: ")
+        if response.lower() not in ['y', 'yes']:
+            print("Aborted.")
+            return 0
+
+    # Determine number of workers
+    max_workers = args.workers or multiprocessing.cpu_count()
+    print(f"Processing files using {max_workers} parallel worker(s)...")
+
+    # Process files in parallel with progress bar
+    successful = []
+    failed = []
+
+    with ProcessPoolExecutor(max_workers=max_workers) as executor:
+        # Submit all tasks
+        future_to_file = {
+            executor.submit(clean_single_file, file_path, args.config): file_path
+            for file_path in md_files
+        }
+
+        # Process completed tasks with progress bar
+        with tqdm(total=len(md_files), unit="file") as pbar:
+            for future in as_completed(future_to_file):
+                file_path, success, error = future.result()
+
+                if success:
+                    successful.append(file_path)
+                else:
+                    failed.append((file_path, error))
+
+                pbar.update(1)
+
+    # Report results
+    print(f"\n{'='*60}")
+    print(f"Successfully cleaned: {len(successful)} file(s)")
+
+    if failed:
+        print(f"Failed: {len(failed)} file(s)")
+        print("\nFailed files:")
+        for file_path, error in failed:
+            print(f"  - {file_path}")
+            print(f"    Error: {error}")
+        return 1
+    else:
+        print("All files processed successfully!")
+        return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/markdowncleaner/cli.py b/src/markdowncleaner/cli.py
index bdde4a0..1d12f26 100644
--- a/src/markdowncleaner/cli.py
+++ b/src/markdowncleaner/cli.py
@@ -43,6 +43,18 @@ def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
     )
 
     # Options for customizing the cleaning process
+    parser.add_argument(
+        "--fix-encoding",
+        action="store_true",
+        help="Fix encoding mojibake"
+    )
+
+    parser.add_argument(
+        "--normalize-quotation",
+        action="store_true",
+        help="Normalize quotation symbols"
+    )    
+
     parser.add_argument(
         "--keep-short-lines",
         action="store_true",
@@ -99,11 +111,17 @@ def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
     )
 
     parser.add_argument(
-        "--no-crimp-linebreaks",
+        "--no-crimping",
         action="store_true",
         help="Don't crimp linebreaks"
     )
 
+    parser.add_argument(
+        "--keep-references",
+        action="store_true",
+        help="Don't heuristically detect and remove references"
+    )
+
     return parser.parse_args(args)
 
 
@@ -113,6 +131,8 @@ def main(args: Optional[List[str]] = None) -> int:
 
     # Configure cleaner options based on arguments
     options = CleanerOptions()
+    options.fix_encoding_mojibake = parsed_args.fix_encoding
+    options.normalize_quotation_symbols = parsed_args.normalize_quotation 
     options.remove_short_lines = not parsed_args.keep_short_lines
     options.min_line_length = parsed_args.min_line_length
     options.remove_whole_lines = not parsed_args.keep_bad_lines
@@ -122,7 +142,8 @@ def main(args: Optional[List[str]] = None) -> int:
     options.replace_within_lines = not parsed_args.no_replacements
     options.remove_within_lines = not parsed_args.keep_inline_patterns
     options.contract_empty_lines = not parsed_args.keep_empty_lines
-    options.crimp_linebreaks = not parsed_args.no_crimp_linebreaks
+    options.crimp_linebreaks = not parsed_args.no_crimping
+    options.remove_references_heuristically = not parsed_args.keep_references
 
     # Load patterns from custom config or use defaults
     if parsed_args.config:
diff --git a/src/markdowncleaner/config/default_cleaning_patterns.yaml b/src/markdowncleaner/config/default_cleaning_patterns.yaml
index 5bf8b4f..3f3469e 100644
--- a/src/markdowncleaner/config/default_cleaning_patterns.yaml
+++ b/src/markdowncleaner/config/default_cleaning_patterns.yaml
@@ -44,7 +44,8 @@ sections_to_remove:
   - List of Illustrations
   - |-
     ^\s?Keywords\s?\.?$
-  - Authors' Note
+  - ^\bauthors?[\''']?\s+note\b
+  - ^\bAUTHORS?[\''']?\s+NOTE\b
   - |-
     ^[a-zA-Z0-9]+-[a-zA-Z0-9]+-?[a-zA-Z0-9]+[^\s]*$
   - |-
@@ -62,6 +63,7 @@ sections_to_remove:
   - |-
     Published by\:
   - Preface
+  - Note on sources
 
 # Patterns for inline text that should be removed
 bad_inline_patterns:
@@ -81,6 +83,9 @@ bad_inline_patterns:
     \* 1 ,
   - |-
     \(p\.?\d{1,3}\)
+  - \\footnote\{[^}]*\}
+# line ending with ...
+  - \.\.\.$
 
 # Patterns for entire lines that should be removed
 bad_lines_patterns:
@@ -110,10 +115,8 @@ bad_lines_patterns:
   - |-
     10\.\d{4,9}/[-._;()/:\w]+
   - Digital Object Identifier
-  - |-
-    ^[Kk]eywords
-  - |-
-     ^Conflict of Interest
+  - ^[Kk]ey\s?words
+  - ^Conflict of Interest
   - ^Library of Congress
   - ^Open Access
   - ^OpenAccess
@@ -128,10 +131,8 @@ bad_lines_patterns:
   - ^Table of Contents
   - |-
     ^Source:
-  - |-
-    ^Published by
-  - |-
-   ^CCS Concepts
+  - ^Published by
+  - ^CCS Concepts
   - ISSN
   - ISBN
   - |-
@@ -160,10 +161,19 @@ bad_lines_patterns:
   - ™
   - |-
     c \d{4}
+  # Unicode footnote numbers
+  - ^([⁰¹²³⁴⁵⁶⁷⁸⁹]+)
+  # Latex OCRed footnotes, text and math environment
+  - ^\\textsuperscript\{(\d+)\}
+  - ^\\\(\^\{?(\d+)\}?\\\)
+  # bibliography
+  - ^\\bibitem\{[^\s}]+\}
+  # footnotes: line starting with number and "ibid"
+  - ^\d+.*\b[Ii]bid\b
   - |-
-    ^\d{1,3}\s?.\s?[Ss]ee
+    ^\d{1,3}\s?.\s?([Ss]ee|[Nn]ote)
   - |-
-    ^\-\s?\d{1,3}\s?.\s?[Ss]ee
+    ^\-\s?\d{1,3}\s?.\s?([Ss]ee|[Nn]ote)
   - |-
     ^\d{1,2}[A-Za-z]+
   - |-
@@ -215,6 +225,8 @@ bad_lines_patterns:
   - All rights reserved
   - No portion of this publication may be reproduced
   - authors have asserted their rights to be identified as the authors
+  - authors contributed equally
+  - ^\*{0,2} ?[Aa]uthor contributions
   - material in this work has been asserted in accordance
   - imprint of Macmillan Publishers
   - A catalogue record for this book is available
@@ -223,21 +235,13 @@ bad_lines_patterns:
   - This article is published with open access
   - Links to third party websites
   - Registered in England and Wales
-  - |-
-    \*I thank
-  - |-
-    \*We thank
-  - |-
-    \*For comments
-  - |-
-    \*I would like to thank
-  - |-
-    \*We would like to thank
-  - |-
-    \*Thanks to
+ # Thanks 
+  - \b(I|[Ww]e)\b.*\b(thank|grateful|indebted)\b.*\b(feedback|participants|individuals|referee|publisher|reviewer|NSF|NEH|NIH|discussions|comments)\b.*
+  - \*For comments
+  - \*Thanks to
+  - Many thanks to
   - |-
     \*\s?This article is
-  - I am grateful for
   - Oxford University Press is a department of
   - a registered trademark of
   - Published in the United States
@@ -248,14 +252,24 @@ bad_lines_patterns:
   - catalogue recordfor this publication
   - An earlier version of this paper was
   - Springer International Publishing AG
+  - Springer Nature remains neutral
   - part of Springer Nature
   - This work is subject to copyright.
   - permission to reuse the copyright
   - distributed under the terms of a Creative Commons
+  - author(s) received no financial support
+  - author(s) declared no
+  - ^The views expressed in this
+  - ^Competing interests
   - |-
     , [A-Z]{2} \d{5}(?:-\d{4})?,
+# some bibliography patterns
+  - ^[A-Z][a-z]+(?:,\s[A-Z][a-z]+)*(?:,\s(?:and\s)?[A-Z][a-z]+)*\.\s\d{4}\.
+  - ^([^(]+)\.\s+\((\[?\d{4}\]?(?:\s+\d{4})?)\)\s+(.+)$
 
-# Footnotes
+# In-line footnote numbers, will be replaced with '.' if matched and if remove_footnotes_in_text
+footnote_patterns:
+  - \\footnote\{[^}]*\}
 # Pattern matches:
 # 1. A period
 # 2. Optional space
@@ -263,9 +277,13 @@ bad_lines_patterns:
 # 4. Optionally followed by comma and space and more 1-3 digit numbers
 # 5. This pattern repeats for any number of comma-separated numbers
 # 6. Followed by either end of string, whitespace, or newline
-footnote_patterns:
-  - |-
-    \.\s?(\d{1,3}(?:,\s*\d{1,3})*?)(?=\s|$)
+  - \.\s?(\d{1,3}(?:,\s*\d{1,3})*?)(?=\s|$)
+# Latex superscripted numerals in text and math environment like \textsuperscript{12} or .\(^{12}\)
+  - \.\\textsuperscript\{(\d+)\}(?=\s|$)
+  - \.\\\(\^\{?(\d+)\}?\\\)(?=\s|$)
+# Unicode superscript numerals
+  - \.([⁰¹²³⁴⁵⁶⁷⁸⁹]+)(?=\s|$)
+  
 
 replacements:
   'GLYPH&lt;28&gt;': 'fi'
@@ -293,4 +311,6 @@ replacements:
   'GLYPH<2>': 'Š'
   '/ornm58.ornm': ''
   '/ornm69.ornm': ''
-  '&amp;': '&'
\ No newline at end of file
+  '&amp;': '&'
+  # note: need to escape the \ below (these are read as regex)
+  '\\item': '-'
\ No newline at end of file
diff --git a/src/markdowncleaner/markdowncleaner.py b/src/markdowncleaner/markdowncleaner.py
index 05ffac1..f58851a 100644
--- a/src/markdowncleaner/markdowncleaner.py
+++ b/src/markdowncleaner/markdowncleaner.py
@@ -11,6 +11,8 @@
 @dataclass
 class CleanerOptions:
     """Countainer for Cleaner options"""
+    fix_encoding_mojibake : bool = False
+    normalize_quotation_symbols : bool = False
     remove_short_lines: bool = True
     min_line_length: int = 70
     remove_whole_lines: bool = True
@@ -22,17 +24,19 @@ class CleanerOptions:
     remove_within_lines: bool = True
     contract_empty_lines: bool = True
     crimp_linebreaks: bool = True
+    remove_references_heuristically: bool = True
 
 
 class MarkdownCleaner:
     """Class to handle markdown document cleaning operations."""
 
-    def __init__(self, patterns: CleaningPatterns = None, options: Optional[CleanerOptions] = None):
+    def __init__(self, patterns: Optional[CleaningPatterns] = None, options: Optional[CleanerOptions] = None):
         """
         Initialize the cleaner with patterns.
 
         Args:
             patterns: CleaningPatterns instance, or None to use defaults
+            options: CleanerOptions instance, or None to use defaults
         """
         self.patterns = patterns or get_default_patterns()
         self.options = options or CleanerOptions()
@@ -86,14 +90,20 @@ def clean_markdown_string(self, content: str) -> str:
         """Apply all cleaning operations to the content."""
 
         # Apply all default ftfy fixes if mojibake is detected
-        if ftfy.is_bad(content):
-            content = ftfy.fix_text(content)
+        if self.options.fix_encoding_mojibake:
+            if ftfy.is_bad(content):
+                content = ftfy.fix_text(content)
 
+        # Heuristically detect and remove blocks of lines with bibliographic information 
+        if self.options.remove_references_heuristically:
+            content = self._remove_bibliographic_lines(content)
+        
         # Reduce two or more subsequent spaces to a single space
         content = re.sub(r' {2,}', ' ', content)
 
         # Normalize quotes
-        content = self._normalize_quotation_symbols(content)
+        if self.options.normalize_quotation_symbols:
+            content = self._normalize_quotation_symbols(content)
 
         # Remove lines shorter than min_line_length (default: 70 characters)
         if self.options.remove_short_lines:
@@ -116,9 +126,9 @@ def clean_markdown_string(self, content: str) -> str:
             for k, v in self.patterns.replacements.items():
                 content = self._replace_within_lines(content, k, v)
 
-        # Replace footnote pattern (numbers at end of sentence) with '.'
+        # Replace footnote pattern (numbers at end of sentence) with '. '
         if self.options.remove_footnotes_in_text:
-            content = self._replace_within_lines(content, self.patterns.footnote_patterns, '.')
+            content = self._replace_within_lines(content, self.patterns.footnote_patterns, '. ')
 
         # Remove remaining unwanted inline patterns (some may have been replaced by replacements)
         if self.options.remove_within_lines:
@@ -132,6 +142,134 @@ def clean_markdown_string(self, content: str) -> str:
 
         return content
     
+    def _remove_bibliographic_lines(self, text: str, score_threshold: int = 3) -> str:
+        """
+        Remove bibliographic reference lines from text.
+
+        Detects and removes individual lines that appear to be bibliography entries
+        by scoring each line based on bibliographic patterns.
+
+        Args:
+            text: Input text to clean
+            score_threshold: Minimum score for a line to be removed (default: 3)
+
+        Returns:
+            Text with bibliographic lines removed
+        """
+        lines = text.splitlines()
+        result_lines = []
+
+        for line in lines:
+            score = self._score_bibliography_line(line)
+            if score < score_threshold:
+                result_lines.append(line)
+
+        return '\n'.join(result_lines)
+
+    def _score_bibliography_line(self, line: str) -> int:
+        """
+        Score a line based on bibliographic patterns.
+
+        Args:
+            line: Line of text to score
+
+        Returns:
+            Integer score based on number of bibliographic patterns matched
+        """
+        score = 0
+        stripped = line.strip()
+
+        # Don't score very short lines
+        if len(stripped) < 20:
+            return 0
+
+        # 1 point patterns
+
+        # Year in parentheses: (1984), (2020), or [1960]
+        if re.search(r'\([12][089]\d{2}\)|\[[12][089]\d{2}\]', line):
+            score += 1
+
+        # Page ranges: 35-57, pp. 332-487, 283-310
+        if re.search(r'\bpp?\.\s*\d+-\d+|\b\d{2,3}-\d{2,3}\b', line):
+            score += 1
+
+        # Publisher/location: "Cambridge, MA: Harvard", "Oxford: Clarendon Press"
+        if re.search(r'[A-Z][a-z]+(?:,\s*[A-Z]{2})?:\s*[A-Z][a-z]+', line):
+            score += 1
+
+        # Numbered list format: starts with "1. ", "14. ", etc.
+        if re.match(r'^\s*\d{1,3}\.\s+', line):
+            score += 1
+
+        # Bullet format: starts with "- " or "• "
+        if re.match(r'^\s*[-•]\s+', line):
+            score += 1
+
+        # Italic markers: *Title Text*
+        if re.search(r'\*[^*]+\*', line):
+            score += 1
+
+        # "In:" followed by capital letter
+        if re.search(r'\bIn:\s+[A-Z]', line):
+            score += 1
+
+        # Ampersand: " & " in author context
+        if ' & ' in line:
+            score += 1
+
+        # Multiple initials: "J. B. Wiesner", "M.A.", "H. F."
+        if re.search(r'\b[A-Z]\.\s*[A-Z]\.|\b[A-Z]\.[A-Z]\.', line):
+            score += 1
+
+        # "et al."
+        if 'et al.' in line:
+            score += 1
+
+        # Author name patterns: "LastName, FirstInitial." or "LastName, FirstName" at line start
+        if re.match(r'^\s*[A-Z][a-z]+,\s+[A-Z]', line):
+            score += 1
+
+        # Punctuation density: >8% of characters are . , : ; ( )
+        if len(line) > 0:
+            punct_chars = sum(1 for c in line if c in '.,;:()')
+            if punct_chars / len(line) > 0.08:
+                score += 1
+
+        # 2 point patterns
+        
+        # Common journal names
+        if re.search(r'\b(journal|proceedings|review|quarterly|annals|transactions|bulletin|University Press)\b', line, re.IGNORECASE):
+            score += 2
+        
+        # Author initial and date without brackets separated with dots
+        if re.search(r' [A-Za-z]\. \d{4}\. ', line):
+            score += 2
+        if re.search(r' [A-Za-z]\.\, \d{4}\, ', line):
+            score += 2
+        # Author lastname, first abbreviated, date in brackets, then title, e.g., Axelrod, R. (1984) T
+        if re.search(r'\w+,\s+([A-Z]\.)+\s+\(\d{4}\)\s+[A-Z]', line):
+            score += 2
+
+        # volumne and page ranges
+        if re.search(r' \d{2,3}: \d{1,4}[-–—]\d{2,4}', line):
+            score += 2
+
+        # 3 point patterns
+
+        # Volume/issue: 121(3), 14(2), Vol. I, vol(issue)
+        if re.search(r'\b\d+\(\d+\)\b|Vol\.\s*[IVX]+|vol\.\s*\d+', line, re.IGNORECASE):
+            score += 3
+
+        # DOI: doi.org/, DOI:
+        if re.search(r'doi\.org/|DOI:', line, re.IGNORECASE):
+            score += 3
+
+        # Editor markers: "Ed." or "Eds." (as standalone word or in parentheses)
+        if re.search(r'\bEds?\.\b|\(Eds?\.\)', line):
+            score += 3
+
+        return score
+
     def _normalize_quotation_symbols(self, text: str) -> str:
         """
         Normalizes quotation symbols in the input text.
@@ -209,10 +347,10 @@ def _remove_short_lines(self, multiline_string: str, length: int = 70) -> str:
         # Split the content into lines
         lines = multiline_string.splitlines()
 
-        # Filter out lines that are shorter than length but that are neither empty nor start with '#'
+        # Filter out lines that are shorter than length but that are neither empty nor start with '#' nor with a pattern indicating a markdown list like '1. '
         filtered_lines = []
         for line in lines:
-            if not line.strip() == '' and not line.startswith('#') and len(line) < length:
+            if not line.strip() == '' and not line.startswith('#') and not re.match(r'^\d{1,2}\.\s', line) and len(line) < length:
                 continue
             filtered_lines.append(line)
 
@@ -366,51 +504,92 @@ def _crimp_linebreaks(self, markdown_text: str) -> str:
             markdown_text (str): Input markdown text with potential line break errors
 
         The function handles two cases:
-        1. Hyphenated words split across lines (even with one empty line in between)
-        2. Paragraphs incorrectly split by empty lines when a line ends with a letter
+        1. Connective-based crimping: Lines ending with -, –, —, or ...
+        2. Justified text crimping: Adjacent lines of similar length
 
         Returns:
-            str: Text with all patterns removed
+            str: Text with crimped linebreaks
         """
-
         lines = markdown_text.splitlines()
         result_lines = []
         i = 0
 
+        def _is_list_item(line: str) -> bool:
+            if not line:
+                return False
+            
+            # Check condition 1: starts with list marker
+            if line[0] in '-–—*∙•・◦●○':
+                return True
+            
+            # Check condition 2: contains list punctuation in first 5 chars
+            first_five = line[:5]
+            if any(char in first_five for char in '.)*]'):
+                return True
+            
+            # Check condition 3: starts with numeral
+            if line[0].isdigit():
+                return True
+            
+            return False
+
         while i < len(lines):
             current_line = lines[i].strip()
             
             # Try to join as many consecutive lines as possible
             while True:
-                joined = False
                 
-                # Case 1: Handle hyphenated words
-                if current_line.endswith('-'):
+                # Case 1: Connective-based crimping
+                if current_line and current_line.endswith(('-', '–', '—', '...')):
+                    # Find next non-empty line within 3 lines (max 2 empty lines between)
                     j = i + 1
-                    while j < len(lines) and not lines[j].strip():
-                        j += 1
+                    empty_count = 0
+                    while j < len(lines) and empty_count <= 2:
+                        if not lines[j].strip():
+                            empty_count += 1
+                            j += 1
+                        else:
+                            break
                     
-                    if j < len(lines) and lines[j].strip() and lines[j].strip()[0].islower():
-                        current_line = current_line[:-1] + lines[j].strip()
-                        i = j  # Update i to the last joined line
-                        joined = True
-                        continue  # Skip to next join check
+                    # Check if we found a valid next line
+                    if j < len(lines) and lines[j].strip():
+                        next_line = lines[j].strip()
+                        
+                        # Check all conditions
+                        if (next_line[0].isalpha() and  # Starts with letter
+                            not _is_list_item(next_line) and  # Not a list item
+                            '.' in next_line[6:]):  # Contains '.' at position 6 or later
+                            
+                            # Remove hyphen if present, otherwise add space
+                            if current_line.endswith(('-', '–', '—')):
+                                current_line = current_line[:-1] + next_line
+                            else:  # ends with '...'
+                                current_line = current_line + ' ' + next_line
+                            
+                            i = j  # Update i to the last joined line
+                            continue
                 
-                # Case 2: Handle paragraph merging
-                if not current_line.startswith('#') and current_line and (current_line[-1].isalpha() or current_line[-1] in ',;\'\"'):
-                    _logger.debug(f'Crimping line:... {current_line[-50:]}')
-                    j = i + 1
-                    while j < len(lines) and not lines[j].strip():
-                        j += 1
+                # Case 2: Justified text crimping
+                if (current_line and 
+                    current_line[-1].isalpha() and  # Ends with letter
+                    not current_line.startswith('#') and  # Not a heading
+                    not _is_list_item(current_line)):  # Not a list item
                     
-                    if j < len(lines) and lines[j].strip() and \
-                       not lines[j].strip().startswith('#') and \
-                       not lines[j].strip().startswith('*') and \
-                       not lines[j].strip().startswith('-'):
-                        current_line = current_line + ' ' + lines[j].strip()
-                        i = j  # Update i to the last joined line
-                        joined = True # noqa: F841
-                        continue  # Skip to next join check
+                    # Check immediately next line (L+1)
+                    j = i + 1
+                    if j < len(lines) and lines[j].strip():
+                        next_line = lines[j].strip()
+                        
+                        # Check all conditions
+                        if (next_line[0].isalpha() and  # Starts with letter
+                            not next_line.startswith('#') and  # Not a heading
+                            not _is_list_item(next_line) and  # Not a list item
+                            len(next_line) >= 78 and  # Length >= 78
+                            abs(len(next_line) - len(current_line)) <= 10):  # Within ±10
+                            
+                            current_line = current_line + ' ' + next_line
+                            i = j  # Update i to the last joined line
+                            continue
                 
                 # If no joins were made, break the loop
                 break
diff --git a/tests/test_cleaner.py b/tests/test_cleaner.py
index 10806a3..1c7d6de 100644
--- a/tests/test_cleaner.py
+++ b/tests/test_cleaner.py
@@ -179,9 +179,10 @@ def test_remove_duplicate_headlines(self):
         self.assertEqual(self.cleaner._remove_duplicate_headlines(text_case), text_case)
 
     def test_crimp_linebreaks(self):
-        text = "This line ends \nwith an awkward break."
+        # Test connective-based crimping (line ending with hyphen)
+        text = "This line ends with a hy-\nphenated word that continues."
         result = self.cleaner._crimp_linebreaks(text)
-        self.assertEqual(result, "This line ends with an awkward break.")
+        self.assertEqual(result, "This line ends with a hyphenated word that continues.")
         
     def test_clean_markdown_file(self):
         # Create a test markdown file
diff --git a/tests/test_cli.py b/tests/test_cli.py
index ceef79a..db80dba 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -40,7 +40,7 @@ def test_parse_args_all_options(self):
             '--no-replacements',
             '--keep-inline-patterns',
             '--keep-empty-lines',
-            '--no-crimp-linebreaks',
+            '--no-crimping',
         ])
 
         self.assertEqual(args.input_file, Path('somefile.md'))
@@ -56,7 +56,7 @@ def test_parse_args_all_options(self):
         self.assertTrue(args.no_replacements)
         self.assertTrue(args.keep_inline_patterns)
         self.assertTrue(args.keep_empty_lines)
-        self.assertTrue(args.no_crimp_linebreaks)
+        self.assertTrue(args.no_crimping)
 
     def test_main_success(self):
         # Create a test markdown file