diff --git a/experiments/README.md b/experiments/README.md
new file mode 100644
index 0000000..cc1e741
--- /dev/null
+++ b/experiments/README.md
@@ -0,0 +1,100 @@
+## Experiments
+
+This directory contains small reproducible experiments used to validate assumptions behind the **OpenVerifiableLLM deterministic training pipeline**.
+
+The goal of these experiments is to verify that:
+
+- preprocessing produces deterministic outputs
+- dataset tampering can be detected using Merkle roots
+- small reproducible datasets can be used for testing the pipeline
+
+These experiments are **not part of the main pipeline**. They are intended for testing ideas and validating reproducibility guarantees.
+
+---
+
+## Directory Structure
+
+experiments/
+│
+├── data_subset/
+│ ├── sample_wiki_generate.py
+│ ├── sample_wiki.xml.bz2
+│ └── tampered_sample_wiki.xml.bz2
+│
+├── preprocessing_determinism/
+│ └── test_preprocessing.py
+│
+├── merkle_verification/
+│ └── test_merkle.py
+│
+└── README.md
+
+---
+
+## Experiments includes
+
+### 1. Preprocessing Determinism
+
+Verifies that running the preprocessing pipeline multiple times on the same dataset produces identical outputs.
+
+The experiment compares:
+
+- `processed_sha256`
+- `processed_merkle_root`
+- `environment_hash`
+
+If these values match across runs, the preprocessing step is deterministic.
+
+Run:
+
+```bash
+python -m experiments.preprocessing_determinism.test_preprocessing experiments/data_subset/sample_wiki.xml.bz2
+```
+
+**Expected Results** -
+
+```bash
+Run 1 hash: ...
+Run 2 hash: ...
+
+Deterministic preprocessing confirmed 🎉
+```
+
+### 2. Merkle Root Tamper Detection
+
+Tests whether dataset tampering is detected by comparing Merkle roots.
+
+Two datasets are used:
+
+sample_wiki.xml.bz2 (original)
+
+tampered_sample_wiki.xml.bz2 (modified)
+
+The experiment compares:
+
+raw_merkle_root
+
+processed_merkle_root
+
+If either root differs, the tampering is successfully detected.
+
+Run:
+
+```bash
+python -m experiments.merkle_verification.test_merkle --path1 experiments/data_subset/sample_wiki.xml.bz2 --path2 experiments/data_subset/tampered_sample_wiki.xml.bz2
+```
+
+**Expected Results** -
+
+```bash
+Run 1 RAW Merkle root: ...
+Run 2 RAW Merkle root: ...
+
+Tampering detected 🎉
+```
+
+### 3. Dataset Subset
+
+The data_subset directory contains a minimal Wikipedia XML example used for quick experimentation without downloading full dumps.
+
+This allows experiments to run quickly while still exercising the preprocessing pipeline.
diff --git a/experiments/data_subset/sample_wiki_generate.py b/experiments/data_subset/sample_wiki_generate.py
new file mode 100644
index 0000000..89cc32b
--- /dev/null
+++ b/experiments/data_subset/sample_wiki_generate.py
@@ -0,0 +1,20 @@
+import bz2
+
+# To make this tampered I deleted e of online
+
+xml_content = """<?xml version="1.0" encoding="UTF-8"?>
+<mediawiki>
+  <page>
+    <revision>
+      <text>
+        Hello <ref>citation</ref> world.
+        This is [[Python|programming language]]
+        {{Wikipedia }}is a free onlin encyclopedia.
+      </text>
+    </revision>
+  </page>
+</mediawiki>
+"""
+
+with bz2.open("experiments/data_subset/tampered_sample_wiki.xml.bz2", "wt", encoding="utf-8") as f:
+    f.write(xml_content)
\ No newline at end of file
diff --git a/experiments/merkle_verification/test_merkle.py b/experiments/merkle_verification/test_merkle.py
new file mode 100644
index 0000000..fa35092
--- /dev/null
+++ b/experiments/merkle_verification/test_merkle.py
@@ -0,0 +1,63 @@
+import argparse
+import json
+import logging
+from pathlib import Path
+
+from openverifiablellm.utils import extract_text_from_xml
+
+logger = logging.getLogger(__name__)
+
+"""
+Experiment: Tamper Detection via Merkle Root Comparison
+
+Run with:
+python -m experiments.merkle_verification.test_merkle --path1 experiments/data_subset/sample_wiki.xml.bz2 --path2 experiments/data_subset/tampered_sample_wiki.xml.bz2
+
+"""
+MANIFEST_PATH = Path("data/dataset_manifest.json")
+
+def run(path1):
+    """Run preprocessing and return processed Merkle root."""
+    extract_text_from_xml(path1)
+    
+    #read genertaed manifest
+    with MANIFEST_PATH.open() as f:
+        manifest = json.load(f)
+        
+    return {
+        "raw_merkle_root": manifest["raw_merkle_root"],
+        "processed_merkle_root": manifest["processed_merkle_root"]
+    }
+
+if __name__ == "__main__":
+    
+    parser= argparse.ArgumentParser(
+        description= "Test tamper detection using Merkle root"
+    )
+    
+    parser.add_argument("--path1",required=True,help="Original dataset")
+    parser.add_argument("--path2",required=True,help="Tampered dataset")
+    
+    args= parser.parse_args()
+    
+    logging.basicConfig(
+        level= logging.INFO,
+        format="%(levelname)s - %(message)s"
+    )
+    
+    root1 = run(args.path1)
+    root2 = run(args.path2)
+
+    print(f"\nRun 1 RAW Merkle root: {root1['raw_merkle_root']}")
+    print(f"Run 2 RAW Merkle root: {root2['raw_merkle_root']}")
+    
+    print(f"\nRun 1 processed Merkle root: {root1['processed_merkle_root']}")
+    print(f"Run 2 processed Merkle root: {root2['processed_merkle_root']}")
+
+    if (
+        root1["raw_merkle_root"] != root2["raw_merkle_root"] 
+        or root1["processed_merkle_root"] != root2["processed_merkle_root"]
+    ):
+        print("\nTampering detected 🎉 (Merkle roots differ)")
+    else:
+        print("\nUnexpected result ❌ (Merkle roots identical)")
\ No newline at end of file
diff --git a/experiments/preprocessing_determinism/test_preprocessing.py b/experiments/preprocessing_determinism/test_preprocessing.py
new file mode 100644
index 0000000..743a23b
--- /dev/null
+++ b/experiments/preprocessing_determinism/test_preprocessing.py
@@ -0,0 +1,57 @@
+import json
+import logging
+import sys
+from pathlib import Path
+
+from openverifiablellm.utils import extract_text_from_xml
+
+logger = logging.getLogger(__name__)
+
+"""
+Experiment to test Deterministic preprocessing, by compairing generated hash on 2 runs.
+
+Run with:
+    python -m experiments.preprocessing_determinism.test_preprocessing experiments/data_subset/sample_wiki.xml.bz2
+"""
+MANIFEST_PATH = Path("data/dataset_manifest.json")
+
+def run(input_path):
+    # Run preprocessing
+    extract_text_from_xml(input_path)
+    
+    #read genertaed manifest
+    with MANIFEST_PATH.open() as f:
+        manifest = json.load(f)
+        
+    return {
+        "processed_sha256": manifest["processed_sha256"],
+        "processed_merkle_root": manifest["processed_merkle_root"],
+        "environment_hash": manifest["environment_hash"],
+    }
+
+
+if __name__ == "__main__":
+    
+    if len(sys.argv) < 2:
+        print("Usage: python -m experiments.preprocessing_determinism.test_preprocessing <input_dump>")
+        sys.exit(1)
+        
+    logging.basicConfig(
+    level=logging.INFO,
+    format="%(levelname)s - %(message)s"
+    )
+    
+    result1= run(sys.argv[1])
+    result2= run(sys.argv[1])
+    
+    print(f"\nRun 1 hash: {result1['processed_sha256']}")
+    print(f"Run 2 hash: {result2['processed_sha256']}")
+        
+    if (
+        result1["processed_sha256"] == result2["processed_sha256"]
+        and result1["processed_merkle_root"] == result2["processed_merkle_root"]
+        and result1["environment_hash"] == result2["environment_hash"]
+    ):
+        print("\nDeterministic preprocessing confirmed🎉")
+    else:
+        print("Hash didn't match❌")
\ No newline at end of file