diff --git a/experiments/README.md b/experiments/README.md
new file mode 100644
index 0000000..cc1e741
--- /dev/null
+++ b/experiments/README.md
@@ -0,0 +1,100 @@
+## Experiments
+
+This directory contains small reproducible experiments used to validate assumptions behind the **OpenVerifiableLLM deterministic training pipeline**.
+
+The goal of these experiments is to verify that:
+
+- preprocessing produces deterministic outputs
+- dataset tampering can be detected using Merkle roots
+- small reproducible datasets can be used for testing the pipeline
+
+These experiments are **not part of the main pipeline**. They are intended for testing ideas and validating reproducibility guarantees.
+
+---
+
+## Directory Structure
+
+experiments/
+│
+├── data_subset/
+│ ├── sample_wiki_generate.py
+│ ├── sample_wiki.xml.bz2
+│ └── tampered_sample_wiki.xml.bz2
+│
+├── preprocessing_determinism/
+│ └── test_preprocessing.py
+│
+├── merkle_verification/
+│ └── test_merkle.py
+│
+└── README.md
+
+---
+
+## Experiments includes
+
+### 1. Preprocessing Determinism
+
+Verifies that running the preprocessing pipeline multiple times on the same dataset produces identical outputs.
+
+The experiment compares:
+
+- `processed_sha256`
+- `processed_merkle_root`
+- `environment_hash`
+
+If these values match across runs, the preprocessing step is deterministic.
+
+Run:
+
+```bash
+python -m experiments.preprocessing_determinism.test_preprocessing experiments/data_subset/sample_wiki.xml.bz2
+```
+
+**Expected Results** -
+
+```bash
+Run 1 hash: ...
+Run 2 hash: ...
+
+Deterministic preprocessing confirmed 🎉
+```
+
+### 2. Merkle Root Tamper Detection
+
+Tests whether dataset tampering is detected by comparing Merkle roots.
+
+Two datasets are used:
+
+sample_wiki.xml.bz2 (original)
+
+tampered_sample_wiki.xml.bz2 (modified)
+
+The experiment compares:
+
+raw_merkle_root
+
+processed_merkle_root
+
+If either root differs, the tampering is successfully detected.
+
+Run:
+
+```bash
+python -m experiments.merkle_verification.test_merkle --path1 experiments/data_subset/sample_wiki.xml.bz2 --path2 experiments/data_subset/tampered_sample_wiki.xml.bz2
+```
+
+**Expected Results** -
+
+```bash
+Run 1 RAW Merkle root: ...
+Run 2 RAW Merkle root: ...
+
+Tampering detected 🎉
+```
+
+### 3. Dataset Subset
+
+The data_subset directory contains a minimal Wikipedia XML example used for quick experimentation without downloading full dumps.
+
+This allows experiments to run quickly while still exercising the preprocessing pipeline.
diff --git a/experiments/data_subset/sample_wiki_generate.py b/experiments/data_subset/sample_wiki_generate.py
new file mode 100644
index 0000000..89cc32b
--- /dev/null
+++ b/experiments/data_subset/sample_wiki_generate.py
@@ -0,0 +1,20 @@
+import bz2
+
+# To make this tampered I deleted e of online
+
+xml_content = """
+
+
+
+
+ Hello [citation] world.
+ This is [[Python|programming language]]
+ {{Wikipedia }}is a free onlin encyclopedia.
+
+
+
+
+"""
+
+with bz2.open("experiments/data_subset/tampered_sample_wiki.xml.bz2", "wt", encoding="utf-8") as f:
+ f.write(xml_content)
\ No newline at end of file
diff --git a/experiments/merkle_verification/test_merkle.py b/experiments/merkle_verification/test_merkle.py
new file mode 100644
index 0000000..fa35092
--- /dev/null
+++ b/experiments/merkle_verification/test_merkle.py
@@ -0,0 +1,63 @@
+import argparse
+import json
+import logging
+from pathlib import Path
+
+from openverifiablellm.utils import extract_text_from_xml
+
+logger = logging.getLogger(__name__)
+
+"""
+Experiment: Tamper Detection via Merkle Root Comparison
+
+Run with:
+python -m experiments.merkle_verification.test_merkle --path1 experiments/data_subset/sample_wiki.xml.bz2 --path2 experiments/data_subset/tampered_sample_wiki.xml.bz2
+
+"""
+MANIFEST_PATH = Path("data/dataset_manifest.json")
+
+def run(path1):
+ """Run preprocessing and return processed Merkle root."""
+ extract_text_from_xml(path1)
+
+ #read genertaed manifest
+ with MANIFEST_PATH.open() as f:
+ manifest = json.load(f)
+
+ return {
+ "raw_merkle_root": manifest["raw_merkle_root"],
+ "processed_merkle_root": manifest["processed_merkle_root"]
+ }
+
+if __name__ == "__main__":
+
+ parser= argparse.ArgumentParser(
+ description= "Test tamper detection using Merkle root"
+ )
+
+ parser.add_argument("--path1",required=True,help="Original dataset")
+ parser.add_argument("--path2",required=True,help="Tampered dataset")
+
+ args= parser.parse_args()
+
+ logging.basicConfig(
+ level= logging.INFO,
+ format="%(levelname)s - %(message)s"
+ )
+
+ root1 = run(args.path1)
+ root2 = run(args.path2)
+
+ print(f"\nRun 1 RAW Merkle root: {root1['raw_merkle_root']}")
+ print(f"Run 2 RAW Merkle root: {root2['raw_merkle_root']}")
+
+ print(f"\nRun 1 processed Merkle root: {root1['processed_merkle_root']}")
+ print(f"Run 2 processed Merkle root: {root2['processed_merkle_root']}")
+
+ if (
+ root1["raw_merkle_root"] != root2["raw_merkle_root"]
+ or root1["processed_merkle_root"] != root2["processed_merkle_root"]
+ ):
+ print("\nTampering detected 🎉 (Merkle roots differ)")
+ else:
+ print("\nUnexpected result ❌ (Merkle roots identical)")
\ No newline at end of file
diff --git a/experiments/preprocessing_determinism/test_preprocessing.py b/experiments/preprocessing_determinism/test_preprocessing.py
new file mode 100644
index 0000000..743a23b
--- /dev/null
+++ b/experiments/preprocessing_determinism/test_preprocessing.py
@@ -0,0 +1,57 @@
+import json
+import logging
+import sys
+from pathlib import Path
+
+from openverifiablellm.utils import extract_text_from_xml
+
+logger = logging.getLogger(__name__)
+
+"""
+Experiment to test Deterministic preprocessing, by compairing generated hash on 2 runs.
+
+Run with:
+ python -m experiments.preprocessing_determinism.test_preprocessing experiments/data_subset/sample_wiki.xml.bz2
+"""
+MANIFEST_PATH = Path("data/dataset_manifest.json")
+
+def run(input_path):
+ # Run preprocessing
+ extract_text_from_xml(input_path)
+
+ #read genertaed manifest
+ with MANIFEST_PATH.open() as f:
+ manifest = json.load(f)
+
+ return {
+ "processed_sha256": manifest["processed_sha256"],
+ "processed_merkle_root": manifest["processed_merkle_root"],
+ "environment_hash": manifest["environment_hash"],
+ }
+
+
+if __name__ == "__main__":
+
+ if len(sys.argv) < 2:
+ print("Usage: python -m experiments.preprocessing_determinism.test_preprocessing ")
+ sys.exit(1)
+
+ logging.basicConfig(
+ level=logging.INFO,
+ format="%(levelname)s - %(message)s"
+ )
+
+ result1= run(sys.argv[1])
+ result2= run(sys.argv[1])
+
+ print(f"\nRun 1 hash: {result1['processed_sha256']}")
+ print(f"Run 2 hash: {result2['processed_sha256']}")
+
+ if (
+ result1["processed_sha256"] == result2["processed_sha256"]
+ and result1["processed_merkle_root"] == result2["processed_merkle_root"]
+ and result1["environment_hash"] == result2["environment_hash"]
+ ):
+ print("\nDeterministic preprocessing confirmed🎉")
+ else:
+ print("Hash didn't match❌")
\ No newline at end of file