diff --git a/demo/cmd_getting_started/vw_intro.ipynb b/demo/cmd_getting_started/vw_intro.ipynb
new file mode 100644
index 00000000000..724697a475c
--- /dev/null
+++ b/demo/cmd_getting_started/vw_intro.ipynb
@@ -0,0 +1,328 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Helpers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read(path):\n",
+    "    with open(path) as f:\n",
+    "        print(\"\".join(f.readlines()))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Regression"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's generate data of the following form: <br>\n",
+    "Every example has single namespace 'f' with single feature 'x' in it <br>\n",
+    "Target function is $$\\hat{y} = 2x + 1$$\n",
+    "And we are learning weights $w$, $b$ for $$y=wx+b$$"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "with open(\"regression1.txt\", \"w\") as f:\n",
+    "    for i in range(1000):\n",
+    "        x = np.random.rand()\n",
+    "        y = 2 * x + 1\n",
+    "        f.write(f\"{y} |f x:{x}\\n\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Simplest execution"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!vw -d regression1.txt"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Output more artifacts\n",
+    "-p - predictions <br>\n",
+    "--invert_hash - model in readable format <br>\n",
+    "-f - model in binary format (consumable by vw)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!vw -d regression1.txt -p regression1.pred --invert_hash regression1.model.txt -f regression1.model.bin"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can look at weights and see the $w$ and $b$ got close to expected 2 and 1 values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "read(\"regression1.model.txt\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Do only predictions, no learning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!vw -d regression1.txt -t"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!vw -d regression1.txt -t --learning_rate 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!vw -d regression1.txt -t -i regression1.model.bin"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Interactions"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's generate another dataset of the following form: <br>\n",
+    "Every example has single namespace 'f' with single feature 'x' in it <br>\n",
+    "Target function is $$\\hat{y} = x^2 + 1$$"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "with open(\"regression2.txt\", \"w\") as f:\n",
+    "    for i in range(1000):\n",
+    "        x = np.random.rand() * 4\n",
+    "        y = x**2 + 1\n",
+    "        f.write(f\"{y} |f x:{x}\\n\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see loss being far from zero if we stil try to learn $$y=wx+b$$ "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!vw -d regression2.txt --invert_hash regression2.model.txt"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "So let's try to learn $$y=w_1 x^2 + w_2 x + b$$"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!vw -d regression2.txt --invert_hash regression2.model.txt --interactions ff"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "read(\"regression2.model.txt\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Contextual bandits"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "env = {\"Tom\": {\"sports\": 0, \"politics\": 1}, \"Anna\": {\"sports\": 1, \"politics\": 0}}\n",
+    "\n",
+    "users = [\"Tom\", \"Anna\"]\n",
+    "content = [\"sports\", \"politics\"]\n",
+    "\n",
+    "with open(\"cb.txt\", \"w\") as f:\n",
+    "    for i in range(1000):\n",
+    "        user = users[np.random.randint(0, 2)]\n",
+    "        chosen = np.random.randint(0, 2)\n",
+    "        reward = env[user][content[chosen]]\n",
+    "\n",
+    "        f.write(f\"shared |u {user}\\n\")\n",
+    "        f.write(f\"0:{-reward}:0.5 |a {content[chosen]}\\n\")\n",
+    "        f.write(f\"|a {content[(chosen + 1) % 2]}\\n\\n\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's try to learn to predict reward in the following form: $$r = w_1 I(user\\ is\\ Tom) + w_2 I(user\\ is\\ Anna) + w_3 I(topic\\ is\\ sports) + w_4 I(topic\\ is\\ politics) + b$$"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!vw --cb_explore_adf -d cb.txt --invert_hash cb.model.txt"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see that average reward is still around 0.5 which is the same as we would get answering randomly. This is expected since personalization is not captured in this form.\n",
+    "Let's add interaction between 'u' and 'a' namespaces and try to learn function of the following form:\n",
+    "$$\\begin{aligned}r = w_1 I(user\\ is\\ Tom) I(topic\\ is\\ sports) + w_2 I(user\\ is\\ Tom) I(topic\\ is\\ politics) +\\\\+ w_3 I(user\\ is\\ Anna) I(topic\\ is\\ sports) + w_4 I(user\\ is\\ Anna) I(topic\\ is\\ politics) +\\\\+ w_5 I(user\\ is\\ Tom) + w_6 I(user\\ is\\ Anna) +\\\\+ w_7 I(topic\\ is\\ sports) + w_8 I(topic\\ is\\ politics) + b\\end{aligned}$$"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!vw --cb_explore_adf -d cb.txt --invert_hash cb.model.txt --interactions ua"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "read(\"cb.model.txt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/python/tests/e2e_v2/assert_job.py b/python/tests/e2e_v2/assert_job.py
new file mode 100644
index 00000000000..4acc1389ec3
--- /dev/null
+++ b/python/tests/e2e_v2/assert_job.py
@@ -0,0 +1,121 @@
+import numpy as np
+import os
+from numpy.testing import assert_allclose, assert_almost_equal
+from vw_executor.vw import ExecutionStatus
+import vowpalwabbit as vw
+from test_helper import get_function_object, datagen_driver
+
+
+def remove_non_digits(string):
+    return "".join(char for char in string if char.isdigit() or char == ".")
+
+
+def get_from_kwargs(kwargs, key, default=None):
+    if key in kwargs:
+        return kwargs[key]
+    else:
+        return default
+
+
+def majority_close(arr1, arr2, rtol, atol, threshold):
+    # Check if the majority of elements are close
+    close_count = np.count_nonzero(np.isclose(arr1, arr2, rtol=rtol, atol=atol))
+    return close_count >= len(arr1) * threshold
+
+
+def assert_weight(job, **kwargs):
+    atol = get_from_kwargs(kwargs, "atol", 10e-8)
+    rtol = get_from_kwargs(kwargs, "rtol", 10e-5)
+    expected_weights = kwargs["expected_weights"]
+    assert job.status == ExecutionStatus.Success, f"{job.opts} job should be successful"
+    data = job.outputs["--invert_hash"]
+    with open(data[0], "r") as f:
+        data = f.readlines()
+    data = [i.strip() for i in data]
+    weights = job[0].model9("--invert_hash").weights.to_dict()["weight"]
+    for x in expected_weights:
+        assert_allclose(
+            [weights[x]], [expected_weights[x]], atol=atol, rtol=rtol
+        ), f"weights {x} should be {expected_weights[x]}"
+
+
+def assert_prediction(job, **kwargs):
+    assert job.status == ExecutionStatus.Success, "job should be successful"
+    atol = kwargs.get("atol", 10e-8)
+    rtol = kwargs.get("rtol", 10e-5)
+    threshold = kwargs.get("threshold", 0.9)
+    expected_value = kwargs["expected_value"]
+    predictions = job.outputs["-p"]
+    with open(predictions[0], "r") as f:
+        prediction = [i.strip() for i in f.readlines()]
+        prediction = [i for i in prediction if i != ""]
+        if ":" in prediction[0]:
+            prediction = [[j.split(":")[1] for j in i.split(",")] for i in prediction]
+        elif "," in prediction[0]:
+            prediction = [[j for j in i.split(",")] for i in prediction]
+        if type(prediction[0]) == list:
+            prediction = [[float(remove_non_digits(j)) for j in i] for i in prediction]
+        else:
+            prediction = [float(remove_non_digits(i)) for i in prediction]
+        assert majority_close(
+            prediction,
+            [expected_value] * len(prediction),
+            rtol=rtol,
+            atol=atol,
+            threshold=threshold,
+        ), f"predicted value should be {expected_value}, \n actual values are {prediction}"
+
+
+def assert_loss(job, **kwargs):
+    assert job.status == ExecutionStatus.Success, "job should be successful"
+    assert type(job[0].loss) == float, "loss should be an float"
+    decimal = kwargs.get("decimal", 2)
+    assert_almost_equal(job[0].loss, kwargs["expected_loss"], decimal=decimal)
+
+
+def assert_loss_below(job, **kwargs):
+    assert job.status == ExecutionStatus.Success, "job should be successful"
+    assert type(job[0].loss) == float, "loss should be an float"
+    assert (
+        job[0].loss <= kwargs["expected_loss"]
+    ), f"loss should be below {kwargs['expected_loss']}"
+
+
+def assert_prediction_with_generated_data(job, **kwargs):
+    assert job.status == ExecutionStatus.Success, "job should be successful"
+    expected_class = []
+    trained_model = vw.Workspace(f"-i {job[0].model9('-f').path} --quiet")
+    predictions = []
+    folder_path = os.path.dirname(os.path.realpath(__file__))
+    subdirectories = [
+        os.path.join(folder_path, name)
+        for name in os.listdir(folder_path)
+        if os.path.isdir(os.path.join(folder_path, name))
+    ]
+    for subdir in subdirectories:
+        try:
+            subdir_name = subdir.replace("\\", "/").split("/")[-1]
+            data_func_obj = get_function_object(
+                f"{subdir_name}.data_generation", kwargs["data_func"]["name"]
+            )
+            if data_func_obj:
+                break
+        except:
+            pass
+    script_directory = os.path.dirname(os.path.realpath(__file__))
+    dataFile = datagen_driver(
+        os.path.join(script_directory, subdir_name),
+        data_func_obj,
+        **kwargs["data_func"]["params"],
+    )
+    with open(dataFile, "r") as f:
+        for line in f.readlines():
+            expected_class.append(line.split("|")[0].strip())
+            predicted_class = trained_model.predict(line.strip())
+            predictions.append(predicted_class)
+    accuracy = sum(
+        [1 if int(yp) == int(ye) else 0 for yp, ye in zip(predictions, expected_class)]
+    ) / len(expected_class)
+    assert (
+        accuracy >= kwargs["accuracy_threshold"]
+    ), f"Accuracy is {accuracy} and Threshold is {kwargs['accuracy_threshold']}"
diff --git a/python/tests/e2e_v2/cb/data_generation.py b/python/tests/e2e_v2/cb/data_generation.py
new file mode 100644
index 00000000000..d7c9bbb702f
--- /dev/null
+++ b/python/tests/e2e_v2/cb/data_generation.py
@@ -0,0 +1,67 @@
+import random
+import os
+from test_helper import get_function_object
+
+script_directory = os.path.dirname(os.path.realpath(__file__))
+
+
+def random_number_items(items):
+    num_items_to_select = random.randint(1, len(items))
+    return random.sample(items, num_items_to_select)
+
+
+def generate_cb_data(
+    f,
+    num_examples,
+    num_features,
+    num_actions,
+    reward_function,
+    logging_policy,
+    context_name=["1"],
+    seed=random.randint(0, 100),
+):
+    random.seed(seed)
+
+    reward_function_obj = get_function_object(
+        "cb.reward_functions", reward_function["name"]
+    )
+    logging_policy_obj = get_function_object(
+        "cb.logging_policies", logging_policy["name"]
+    )
+    features = [f"feature{index}" for index in range(1, num_features + 1)]
+    for _ in range(num_examples):
+        no_context = len(context_name)
+        if no_context > 1:
+            context = random.randint(1, no_context)
+        else:
+            context = 1
+
+        def return_cost_probability(chosen_action, context=1):
+            cost = -reward_function_obj(
+                chosen_action, context, **reward_function["params"]
+            )
+            if "params" not in logging_policy:
+                logging_policy["params"] = {}
+            logging_policy["params"]["chosen_action"] = chosen_action
+            logging_policy["params"]["num_actions"] = num_actions
+            probability = logging_policy_obj(**logging_policy["params"])
+            return cost, probability
+
+        chosen_action = random.randint(1, num_actions)
+        if no_context > 1:
+            f.write(f"shared | User s_{context_name[context-1]}\n")
+            for action in range(1, num_actions + 1):
+                cost, probability = return_cost_probability(action, context)
+                if action == chosen_action:
+                    f.write(
+                        f'{action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n'
+                    )
+                else:
+                    f.write(f'| {" ".join(random_number_items(features))}\n')
+
+        else:
+            cost, probability = return_cost_probability(chosen_action)
+            f.write(
+                f'{chosen_action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n'
+            )
+        f.write("\n")
diff --git a/python/tests/e2e_v2/cb/logging_policies.py b/python/tests/e2e_v2/cb/logging_policies.py
new file mode 100644
index 00000000000..7f6f951fffa
--- /dev/null
+++ b/python/tests/e2e_v2/cb/logging_policies.py
@@ -0,0 +1,6 @@
+def constant_probability(chosen_action):
+    return 1
+
+
+def even_probability(chosen_action, num_actions):
+    return round(1 / num_actions, 2)
diff --git a/python/tests/e2e_v2/cb/reward_functions.py b/python/tests/e2e_v2/cb/reward_functions.py
new file mode 100644
index 00000000000..a13a699d0a4
--- /dev/null
+++ b/python/tests/e2e_v2/cb/reward_functions.py
@@ -0,0 +1,18 @@
+def fixed_reward(chosen_action, context):
+    return 1
+
+
+def constant_reward(chosen_action, context, reward):
+    return reward[chosen_action - 1]
+
+
+def fixed_reward_two_action(chosen_action, context):
+    if context == 1 and chosen_action == 2:
+        return 1
+    elif context == 2 and chosen_action == 2:
+        return 0
+    elif context == 1 and chosen_action == 1:
+        return 0
+    elif context == 2 and chosen_action == 1:
+        return 1
+    return 1
diff --git a/python/tests/e2e_v2/cb_cont/data_generation.py b/python/tests/e2e_v2/cb_cont/data_generation.py
new file mode 100644
index 00000000000..df2fa0de536
--- /dev/null
+++ b/python/tests/e2e_v2/cb_cont/data_generation.py
@@ -0,0 +1,62 @@
+import random
+import os
+from test_helper import get_function_object
+
+script_directory = os.path.dirname(os.path.realpath(__file__))
+
+
+def random_number_items(items):
+    num_items_to_select = random.randint(1, len(items))
+    return random.sample(items, num_items_to_select)
+
+
+def generate_cb_data(
+    f,
+    num_examples,
+    num_features,
+    action_range,
+    reward_function,
+    logging_policy,
+    context_name=["1"],
+    seed=random.randint(0, 100),
+):
+    random.seed(seed)
+    num_actions = int(abs(action_range[1] - action_range[0]))
+
+    reward_function_obj = get_function_object(
+        "cb_cont.reward_functions", reward_function["name"]
+    )
+    logging_policy_obj = get_function_object(
+        "cb_cont.logging_policies", logging_policy["name"]
+    )
+    features = [f"feature{index}" for index in range(1, num_features + 1)]
+
+    for _ in range(num_examples):
+        no_context = len(context_name)
+        if no_context > 1:
+            context = random.randint(1, no_context)
+        else:
+            context = 1
+
+        def return_cost_probability(chosen_action, context):
+            cost = -reward_function_obj(
+                chosen_action, context, **reward_function["params"]
+            )
+            if "params" not in logging_policy:
+                logging_policy["params"] = {}
+            logging_policy["params"]["chosen_action"] = chosen_action
+            logging_policy["params"]["num_actions"] = num_actions
+            probability = logging_policy_obj(**logging_policy["params"])
+            return cost, probability
+
+        chosen_action = round(random.uniform(0, num_actions), 2)
+        cost, probability = return_cost_probability(chosen_action, context)
+        if no_context == 1:
+            f.write(
+                f'ca {chosen_action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n'
+            )
+        else:
+            f.write(
+                f'ca {chosen_action}:{cost}:{probability} | {"s_" + context_name[context-1]} {" ".join(random_number_items(features))}\n'
+            )
+        f.write("\n")
diff --git a/python/tests/e2e_v2/cb_cont/logging_policies.py b/python/tests/e2e_v2/cb_cont/logging_policies.py
new file mode 100644
index 00000000000..7f6f951fffa
--- /dev/null
+++ b/python/tests/e2e_v2/cb_cont/logging_policies.py
@@ -0,0 +1,6 @@
+def constant_probability(chosen_action):
+    return 1
+
+
+def even_probability(chosen_action, num_actions):
+    return round(1 / num_actions, 2)
diff --git a/python/tests/e2e_v2/cb_cont/reward_functions.py b/python/tests/e2e_v2/cb_cont/reward_functions.py
new file mode 100644
index 00000000000..c865e666084
--- /dev/null
+++ b/python/tests/e2e_v2/cb_cont/reward_functions.py
@@ -0,0 +1,18 @@
+def fixed_reward(chosen_action, context):
+    return 1
+
+
+def piecewise_constant(chosen_action, context, reward):
+    return reward[int(chosen_action) - 1]
+
+
+def fixed_reward_two_action(chosen_action, context):
+    if context == 1 and chosen_action >= 2:
+        return 1
+    elif context == 2 and chosen_action < 2 and chosen_action >= 1:
+        return 0
+    elif context == 1 and chosen_action < 1 and chosen_action >= 1:
+        return 0
+    elif context == 2 and chosen_action < 1:
+        return 1
+    return 1
diff --git a/python/tests/e2e_v2/classification/classification_functions.py b/python/tests/e2e_v2/classification/classification_functions.py
new file mode 100644
index 00000000000..67aeb8a77d2
--- /dev/null
+++ b/python/tests/e2e_v2/classification/classification_functions.py
@@ -0,0 +1,19 @@
+def binary_classification_one_feature(input_vector):
+    if input_vector[0] > 0.5:
+        return 2
+    return 1
+
+
+def multi_classification_two_features(input_vector):
+    # Define the number of divisions for each feature
+    divisions = 5
+
+    # Calculate the division size for each feature
+    division_size = 1 / divisions
+
+    # Calculate the class index based on the input vector's position in the feature space
+    class_idx = int(input_vector[0] // division_size) * divisions + int(
+        input_vector[1] // division_size
+    )
+
+    return class_idx + 1
diff --git a/python/tests/e2e_v2/classification/data_generation.py b/python/tests/e2e_v2/classification/data_generation.py
new file mode 100644
index 00000000000..c65e49b3350
--- /dev/null
+++ b/python/tests/e2e_v2/classification/data_generation.py
@@ -0,0 +1,29 @@
+import os, random
+from test_helper import get_function_object
+
+
+script_directory = os.path.dirname(os.path.realpath(__file__))
+random.seed(10)
+
+
+def generate_classification_data(
+    f,
+    num_example,
+    num_features,
+    classify_func,
+    seed=random.randint(0, 100),
+    bounds=None,
+):
+    random.seed(seed)
+    classify_func_obj = get_function_object(
+        "classification.classification_functions", classify_func["name"]
+    )
+    if not bounds:
+        bounds = [[0, 1] for _ in range(num_features)]
+    for _ in range(num_example):
+        x = [
+            random.uniform(bounds[index][0], bounds[index][1])
+            for index in range(num_features)
+        ]
+        y = classify_func_obj(x, **classify_func["params"])
+        f.write(f"{y} |f {' '.join([f'x{i}:{x[i]}' for i in range(num_features)])}\n")
diff --git a/python/tests/e2e_v2/conftest.py b/python/tests/e2e_v2/conftest.py
new file mode 100644
index 00000000000..5297e54cec2
--- /dev/null
+++ b/python/tests/e2e_v2/conftest.py
@@ -0,0 +1,16 @@
+# conftest.py
+def pytest_addoption(parser):
+    parser.addoption(
+        "--store_output",
+        action="store",
+        default=False,
+        help="Store output file for tests.",
+    )
+
+
+def pytest_configure(config):
+    _store_output = config.getoption("--store_output")
+    # Store the custom_arg_value in a global variable or a custom configuration object.
+    # For example, you can store it in a global variable like this:
+    global STORE_OUTPUT
+    STORE_OUTPUT = _store_output
diff --git a/python/tests/e2e_v2/regression/data_generation.py b/python/tests/e2e_v2/regression/data_generation.py
new file mode 100644
index 00000000000..cf6e8bca04c
--- /dev/null
+++ b/python/tests/e2e_v2/regression/data_generation.py
@@ -0,0 +1,13 @@
+import random
+import os
+
+script_directory = os.path.dirname(os.path.realpath(__file__))
+
+
+def constant_function(
+    f, no_sample, constant, x_lower_bound, x_upper_bound, seed=random.randint(0, 100)
+):
+    random.seed(seed)
+    for _ in range(no_sample):
+        x = random.uniform(x_lower_bound, x_upper_bound)
+        f.write(f"{constant} |f x:{x}\n")
diff --git a/python/tests/e2e_v2/slate/action_space.py b/python/tests/e2e_v2/slate/action_space.py
new file mode 100644
index 00000000000..bd13b796357
--- /dev/null
+++ b/python/tests/e2e_v2/slate/action_space.py
@@ -0,0 +1,5 @@
+def new_action_after_threshold(iteration, threshold, before, after):
+    # before iteration 500, it is sunny and after it is raining
+    if iteration > threshold:
+        return after
+    return before
diff --git a/python/tests/e2e_v2/slate/assert_job.py b/python/tests/e2e_v2/slate/assert_job.py
new file mode 100644
index 00000000000..aff9061df6e
--- /dev/null
+++ b/python/tests/e2e_v2/slate/assert_job.py
@@ -0,0 +1,42 @@
+from vw_executor.vw import ExecutionStatus
+import numpy as np
+
+
+def majority_close(arr1, arr2, rtol, atol, threshold):
+    # Check if the majority of elements are close
+    close_count = np.count_nonzero(np.isclose(arr1, arr2, rtol=rtol, atol=atol))
+    return close_count >= len(arr1) * threshold
+
+
+def assert_prediction(job, **kwargs):
+    assert job.status == ExecutionStatus.Success, "job should be successful"
+    atol = kwargs.get("atol", 10e-8)
+    rtol = kwargs.get("rtol", 10e-5)
+    threshold = kwargs.get("threshold", 0.9)
+    expected_value = kwargs["expected_value"]
+    predictions = job.outputs["-p"]
+    res = []
+    with open(predictions[0], "r") as f:
+        exampleRes = []
+        while True:
+            line = f.readline()
+            if not line:
+                break
+            if line.count(":") == 0:
+                res.append(exampleRes)
+                exampleRes = []
+                continue
+            slotRes = [0] * line.count(":")
+            slot = line.split(",")
+            for i in range(len(slot)):
+                actionInd = int(slot[i].split(":")[0])
+                slotRes[i] = float(slot[actionInd].split(":")[1])
+            exampleRes.append(slotRes)
+
+        assert majority_close(
+            res,
+            [expected_value] * len(res),
+            rtol=rtol,
+            atol=atol,
+            threshold=threshold,
+        ), f"predicted value should be {expected_value}, \n actual values are {res}"
diff --git a/python/tests/e2e_v2/slate/data_generation.py b/python/tests/e2e_v2/slate/data_generation.py
new file mode 100644
index 00000000000..d308b245d3d
--- /dev/null
+++ b/python/tests/e2e_v2/slate/data_generation.py
@@ -0,0 +1,69 @@
+import random
+import os
+from test_helper import get_function_object
+
+script_directory = os.path.dirname(os.path.realpath(__file__))
+
+
+def generate_slate_data(
+    f,
+    num_examples,
+    reward_function,
+    logging_policy,
+    action_space,
+    context_name=["1"],
+    seed=random.randint(0, 100),
+):
+    random.seed(seed)
+    action_space_obj = get_function_object("slate.action_space", action_space["name"])
+
+    reward_function_obj = get_function_object(
+        "slate.reward_functions", reward_function["name"]
+    )
+    logging_policy_obj = get_function_object(
+        "slate.logging_policies", logging_policy["name"]
+    )
+
+    def return_cost_probability(chosen_action, chosen_slot, context):
+        cost = -reward_function_obj(
+            chosen_action, context, chosen_slot, **reward_function["params"]
+        )
+        logging_policy["params"]["num_action"] = num_actions[chosen_slot - 1]
+        logging_policy["params"]["chosen_action"] = chosen_action
+        probability = logging_policy_obj(**logging_policy["params"])
+        return cost, probability
+
+    for i in range(num_examples):
+        action_space["params"]["iteration"] = i
+        action_spaces = action_space_obj(**action_space["params"])
+        reward_function["params"]["iteration"] = i
+        num_slots = len(action_spaces)
+        num_actions = [len(slot) for slot in action_spaces]
+        slot_name = [f"slot_{index}" for index in range(1, num_slots + 1)]
+        chosen_actions = []
+        num_context = len(context_name)
+        if num_context > 1:
+            context = random.randint(1, num_context)
+        else:
+            context = 1
+        for s in range(num_slots):
+            chosen_actions.append(random.randint(1, num_actions[s]))
+        chosen_actions_cost_prob = [
+            return_cost_probability(action, slot + 1, context)
+            for slot, action in enumerate(chosen_actions)
+        ]
+        total_cost = sum([cost for cost, _ in chosen_actions_cost_prob])
+
+        f.write(f"slates shared {total_cost} |User {context_name[context-1]}\n")
+        # write actions
+        for ind, slot in enumerate(action_spaces):
+            for a in slot:
+                f.write(
+                    f"slates action {ind} |Action {a}\n",
+                )
+
+        for s in range(num_slots):
+            f.write(
+                f"slates slot {chosen_actions[s]}:{chosen_actions_cost_prob[s][1]} |Slot {slot_name[s]}\n"
+            )
+        f.write("\n")
diff --git a/python/tests/e2e_v2/slate/logging_policies.py b/python/tests/e2e_v2/slate/logging_policies.py
new file mode 100644
index 00000000000..4222a514b1f
--- /dev/null
+++ b/python/tests/e2e_v2/slate/logging_policies.py
@@ -0,0 +1,2 @@
+def even_probability(chosen_action, num_action):
+    return round(1 / num_action, 2)
diff --git a/python/tests/e2e_v2/slate/reward_functions.py b/python/tests/e2e_v2/slate/reward_functions.py
new file mode 100644
index 00000000000..2dc79a5100f
--- /dev/null
+++ b/python/tests/e2e_v2/slate/reward_functions.py
@@ -0,0 +1,10 @@
+def fixed_reward(chosen_action, context, slot, reward):
+    return reward[slot - 1][chosen_action - 1]
+
+
+def reverse_reward_after_threshold(
+    chosen_action, context, slot, reward, iteration, threshold
+):
+    if iteration > threshold:
+        reward = [i[::-1] for i in reward]
+    return reward[slot - 1][chosen_action - 1]
diff --git a/python/tests/e2e_v2/test_configs/cb.json b/python/tests/e2e_v2/test_configs/cb.json
new file mode 100644
index 00000000000..15fa7584fe2
--- /dev/null
+++ b/python/tests/e2e_v2/test_configs/cb.json
@@ -0,0 +1,270 @@
+[
+    {
+        "test_name": "cb_two_action",
+        "data_func": {
+            "name": "generate_cb_data",
+            "params": {
+                "num_examples": 100,
+                "num_features": 1,
+                "num_actions": 2,
+                "seed": 1,
+                "reward_function": {
+                    "name": "constant_reward",
+                    "params": {
+                        "reward": [
+                            1,
+                            0
+                        ]
+                    }
+                },
+                "logging_policy": {
+                    "name": "even_probability",
+                    "params": {}
+                },
+                "context_name": [
+                    "1",
+                    "2"
+                ]
+            }
+        },
+        "assert_functions": [
+            {
+                "name": "assert_loss",
+                "params": {
+                    "expected_loss": -1,
+                    "decimal": 1
+                }
+            },
+            {
+                "name": "assert_prediction",
+                "params": {
+                    "expected_value": [
+                        1,
+                        0
+                    ],
+                    "threshold": 0.8
+                }
+            }
+        ],
+        "grids": {
+            "cb": {
+                "#base": [
+                    "--cb_explore 2"
+                ]
+            },
+            "epsilon": {
+                "--epsilon": [
+                    0.1,
+                    0.2,
+                    0.3
+                ]
+            },
+            "first": {
+                "--first": [
+                    1,
+                    2
+                ]
+            },
+            "bag": {
+                "--bag": [
+                    5,
+                    6,
+                    7
+                ]
+            },
+            "cover": {
+                "--cover": [
+                    1,
+                    2,
+                    3
+                ]
+            },
+            "squarecb": {
+                "--squarecb": [
+                    "--gamma_scale 1000",
+                    "--gamma_scale 10000"
+                ]
+            },
+            "synthcover": {
+                "--synthcover": [
+                    ""
+                ]
+            },
+            "regcb": {
+                "--regcb": [
+                    ""
+                ]
+            },
+            "softmax": {
+                "--softmax": [
+                    ""
+                ]
+            }
+        },
+        "grids_expression": "cb * (epsilon + first + bag + cover + squarecb + synthcover + regcb + softmax)",
+        "output": [
+            "--readable_model",
+            "-p"
+        ]
+    },
+    {
+        "test_name": "cb_one_action",
+        "data_func": {
+            "name": "generate_cb_data",
+            "params": {
+                "num_examples": 100,
+                "num_features": 1,
+                "num_actions": 1,
+                "seed": 1,
+                "reward_function": {
+                    "name": "fixed_reward",
+                    "params": {}
+                },
+                "logging_policy": {
+                    "name": "even_probability"
+                }
+            }
+        },
+        "assert_functions": [
+            {
+                "name": "assert_loss",
+                "params": {
+                    "expected_loss": -1,
+                    "decimal": 1
+                }
+            },
+            {
+                "name": "assert_prediction",
+                "params": {
+                    "expected_value": 0,
+                    "threshold": 0.1
+                }
+            }
+        ],
+        "grids": {
+            "g0": {
+                "#base": [
+                    "--cb 1 --preserve_performance_counters --save_resume"
+                ]
+            },
+            "g1": {
+                "--cb_type": [
+                    "ips",
+                    "mtr",
+                    "dr",
+                    "dm"
+                ]
+            }
+        },
+        "grids_expression": "g0 * g1",
+        "output": [
+            "--readable_model",
+            "-p"
+        ]
+    },
+    {
+        "test_name": "cb_two_action_diff_context",
+        "data_func": {
+            "name": "generate_cb_data",
+            "params": {
+                "num_examples": 100,
+                "num_features": 2,
+                "num_actions": 2,
+                "seed": 1,
+                "reward_function": {
+                    "name": "fixed_reward_two_action",
+                    "params": {}
+                },
+                "logging_policy": {
+                    "name": "even_probability",
+                    "params": {}
+                },
+                "context_name": [
+                    "1",
+                    "2"
+                ]
+            }
+        },
+        "assert_functions": [
+            {
+                "name": "assert_loss",
+                "params": {
+                    "expected_loss": -0.5,
+                    "decimal": 1
+                }
+            },
+            {
+                "name": "assert_prediction",
+                "params": {
+                    "expected_value": [
+                        0.975,
+                        0.025
+                    ],
+                    "threshold": 0.1,
+                    "atol": 0.1,
+                    "rtol": 0.1
+                }
+            }
+        ],
+        "grids": {
+            "cb": {
+                "#base": [
+                    "--cb_explore_adf"
+                ]
+            },
+            "epsilon": {
+                "--epsilon": [
+                    0.1,
+                    0.2,
+                    0.3
+                ]
+            },
+            "first": {
+                "--first": [
+                    1,
+                    2
+                ]
+            },
+            "bag": {
+                "--bag": [
+                    5,
+                    6,
+                    7
+                ]
+            },
+            "cover": {
+                "--cover": [
+                    1,
+                    2,
+                    3
+                ]
+            },
+            "squarecb": {
+                "--squarecb": [
+                    "--gamma_scale 1000",
+                    "--gamma_scale 10000"
+                ]
+            },
+            "synthcover": {
+                "--synthcover": [
+                    ""
+                ]
+            },
+            "regcb": {
+                "--regcb": [
+                    ""
+                ]
+            },
+            "softmax": {
+                "--softmax": [
+                    ""
+                ]
+            }
+        },
+        "grids_expression": "cb * (epsilon + first + bag + cover + squarecb + synthcover + regcb + softmax)",
+        "output": [
+            "--readable_model",
+            "-p"
+        ]
+    }
+]
\ No newline at end of file
diff --git a/python/tests/e2e_v2/test_configs/cb_cont.json b/python/tests/e2e_v2/test_configs/cb_cont.json
new file mode 100644
index 00000000000..1a0d6e49066
--- /dev/null
+++ b/python/tests/e2e_v2/test_configs/cb_cont.json
@@ -0,0 +1,195 @@
+[
+    {
+        "test_name": "cb_two_action",
+        "data_func": {
+            "name": "generate_cb_data",
+            "params": {
+                "num_examples": 100,
+                "num_features": 1,
+                "seed": 1,
+                "action_range": [
+                    0,
+                    2
+                ],
+                "reward_function": {
+                    "name": "piecewise_constant",
+                    "params": {
+                        "reward": [
+                            1,
+                            0
+                        ]
+                    }
+                },
+                "logging_policy": {
+                    "name": "even_probability",
+                    "params": {}
+                }
+            }
+        },
+        "assert_functions": [
+            {
+                "name": "assert_loss",
+                "params": {
+                    "expected_loss": -1,
+                    "decimal": 1
+                }
+            },
+            {
+                "name": "assert_prediction",
+                "params": {
+                    "expected_value": [
+                        1,
+                        0
+                    ],
+                    "threshold": 0.8
+                }
+            }
+        ],
+        "grids": {
+            "cb": {
+                "#base": [
+                    "--cats 2 --min_value 0 --max_value 2 --bandwidth 1"
+                ]
+            },
+            "epsilon": {
+                "--epsilon": [
+                    0.1,
+                    0.2,
+                    0.3
+                ]
+            }
+        },
+        "grids_expression": "cb * (epsilon)",
+        "output": [
+            "--readable_model",
+            "-p"
+        ]
+    },
+    {
+        "test_name": "cb_two_action_diff_context",
+        "data_func": {
+            "name": "generate_cb_data",
+            "params": {
+                "num_examples": 100,
+                "num_features": 2,
+                "seed": 1,
+                "action_range": [
+                    0,
+                    2
+                ],
+                "reward_function": {
+                    "name": "fixed_reward_two_action",
+                    "params": {}
+                },
+                "logging_policy": {
+                    "name": "even_probability",
+                    "params": {}
+                },
+                "context_name": [
+                    "1",
+                    "2"
+                ]
+            }
+        },
+        "assert_functions": [
+            {
+                "name": "assert_loss",
+                "params": {
+                    "expected_loss": -0.8,
+                    "decimal": 1
+                }
+            },
+            {
+                "name": "assert_prediction",
+                "params": {
+                    "expected_value": [
+                        0.975,
+                        0.025
+                    ],
+                    "threshold": 0.1,
+                    "atol": 0.1,
+                    "rtol": 0.1
+                }
+            }
+        ],
+        "grids": {
+            "cb": {
+                "#base": [
+                    "--cats 2 --min_value 0 --max_value 2 --bandwidth 1"
+                ]
+            },
+            "epsilon": {
+                "--epsilon": [
+                    0.1,
+                    0.2,
+                    0.3
+                ]
+            }
+        },
+        "grids_expression": "cb * (epsilon)",
+        "output": [
+            "--readable_model",
+            "-p"
+        ]
+    },
+    {
+        "test_name": "cb_one_action",
+        "data_func": {
+            "name": "generate_cb_data",
+            "params": {
+                "num_examples": 10,
+                "num_features": 1,
+                "seed": 1,
+                "action_range": [
+                    0,
+                    1
+                ],
+                "reward_function": {
+                    "name": "fixed_reward",
+                    "params": {}
+                },
+                "logging_policy": {
+                    "name": "even_probability"
+                }
+            }
+        },
+        "assert_functions": [
+            {
+                "name": "assert_loss",
+                "params": {
+                    "expected_loss": -1
+                }
+            },
+            {
+                "name": "assert_prediction",
+                "params": {
+                    "expected_value": [
+                        0,
+                        1
+                    ],
+                    "threshold": 0.1
+                }
+            }
+        ],
+        "grids": {
+            "g0": {
+                "#base": [
+                    "--cats 2 --min_value 0 --max_value 1 --bandwidth 1"
+                ]
+            },
+            "g1": {
+                "--cb_type": [
+                    "ips",
+                    "mtr",
+                    "dr",
+                    "dm"
+                ]
+            }
+        },
+        "grids_expression": "g0 * g1",
+        "output": [
+            "--readable_model",
+            "-p"
+        ]
+    }
+]
\ No newline at end of file
diff --git a/python/tests/e2e_v2/test_configs/classification.json b/python/tests/e2e_v2/test_configs/classification.json
new file mode 100644
index 00000000000..738e1981eda
--- /dev/null
+++ b/python/tests/e2e_v2/test_configs/classification.json
@@ -0,0 +1,102 @@
+[
+    {
+        "test_name": "binary_class",
+        "data_func": {
+            "name": "generate_classification_data",
+            "params": {
+                "num_example": 2000,
+                "num_features": 1,
+                "seed": 1,
+                "classify_func": {
+                    "name": "binary_classification_one_feature",
+                    "params": {}
+                },
+                "bounds": [
+                    [
+                        0,
+                        1
+                    ]
+                ]
+            }
+        },
+        "assert_functions": [
+            {
+                "name": "assert_prediction_with_generated_data",
+                "params": {
+                    "data_func": {
+                        "name": "generate_classification_data",
+                        "params": {
+                            "num_example": 100,
+                            "num_features": 1,
+                            "classify_func": {
+                                "name": "binary_classification_one_feature",
+                                "params": {}
+                            },
+                            "seed": 1
+                        }
+                    },
+                    "accuracy_threshold": 0.9
+                }
+            }
+        ],
+        "grids": {
+            "g0": {
+                "#base": [
+                    "--oaa 3"
+                ]
+            }
+        },
+        "grids_expression": "g0",
+        "output": [
+            "--readable_model",
+            "-p"
+        ]
+    },
+    {
+        "test_name": "multiclass_two_features",
+        "data_func": {
+            "name": "generate_classification_data",
+            "params": {
+                "num_example": 100000,
+                "num_features": 2,
+                "classify_func": {
+                    "name": "multi_classification_two_features",
+                    "params": {}
+                },
+                "seed": 1
+            }
+        },
+        "assert_functions": [
+            {
+                "name": "assert_prediction_with_generated_data",
+                "params": {
+                    "data_func": {
+                        "name": "generate_classification_data",
+                        "params": {
+                            "num_example": 500,
+                            "num_features": 2,
+                            "seed": 1,
+                            "classify_func": {
+                                "name": "multi_classification_two_features",
+                                "params": {}
+                            }
+                        }
+                    },
+                    "accuracy_threshold": 0.5
+                }
+            }
+        ],
+        "grids": {
+            "g0": {
+                "#base": [
+                    "--oaa 25"
+                ]
+            }
+        },
+        "grids_expression": "g0",
+        "output": [
+            "--readable_model",
+            "-p"
+        ]
+    }
+]
\ No newline at end of file
diff --git a/python/tests/e2e_v2/test_configs/regression.json b/python/tests/e2e_v2/test_configs/regression.json
new file mode 100644
index 00000000000..ae4a5bcc973
--- /dev/null
+++ b/python/tests/e2e_v2/test_configs/regression.json
@@ -0,0 +1,107 @@
+[
+    {
+        "data_func": {
+            "name": "constant_function",
+            "params": {
+                "no_sample": 2000,
+                "constant": 5,
+                "x_lower_bound": 1,
+                "x_upper_bound": 100,
+                "seed" : 1
+            }
+        },
+        "assert_functions": [
+            {
+                "name": "assert_prediction",
+                "params": {
+                    "expected_value": [
+                        5
+                    ],
+                    "threshold": 0.8
+                }
+            },
+            {
+                "name": "assert_weight",
+                "params": {
+                    "expected_weights": {
+                        "f^x": 0,
+                        "Constant": 5
+                    },
+                    "atol": 0.1,
+                    "rtol": 1
+                }
+            }
+        ],
+        "grids": {
+            "g0": {
+                "#base": [
+                    "-P 50000 --preserve_performance_counters --save_resume"
+                ]
+            },
+            "g1": {
+                "--learning_rate": [
+                    null,
+                    0.1,
+                    0.01,
+                    0.001
+                ],
+                "--decay_learning_rate": [
+                    null,
+                    1.1,
+                    1,
+                    0.9
+                ],
+                "--power_t": [
+                    null,
+                    0.5,
+                    0.6,
+                    0.4
+                ]
+            },
+            "g2": {
+                "#reg": [
+                    "--freegrad",
+                    "--conjugate_gradient",
+                    "--bfgs --passes 1 --cache"
+                ]
+            },
+            "g3": {
+                "#reg": [
+                    "--ftrl",
+                    "--coin",
+                    "--pistol"
+                ],
+                "--ftrl_alpha": [
+                    null,
+                    0.1
+                ],
+                "--ftrl_beta": [
+                    null,
+                    0.1
+                ]
+            },
+            "g4": {
+                "--loss_function": [
+                    null,
+                    "poisson",
+                    "quantile"
+                ]
+            },
+            "g5": {
+                "--loss_function": [
+                    "expectile"
+                ],
+                "--expectile_q": [
+                    0.25,
+                    0.5
+                ]
+            }
+        },
+        "grids_expression": "g0 * (g1 + g2 + g3) * (g5 + g4)",
+        "output": [
+            "--readable_model",
+            "--invert_hash",
+            "-p"
+        ]
+    }
+]
\ No newline at end of file
diff --git a/python/tests/e2e_v2/test_configs/slate.json b/python/tests/e2e_v2/test_configs/slate.json
new file mode 100644
index 00000000000..670b2992f40
--- /dev/null
+++ b/python/tests/e2e_v2/test_configs/slate.json
@@ -0,0 +1,145 @@
+[
+    {
+        "test_name": "slates",
+        "data_func": {
+            "name": "generate_slate_data",
+            "params": {
+                "num_examples": 1000,
+                "seed" : 1,
+                "reward_function": {
+                    "name": "reverse_reward_after_threshold",
+                    "params": {
+                        "reward": [
+                            [
+                                1,
+                                0
+                            ],
+                            [
+                                0,
+                                1
+                            ]
+                        ],
+                        "threshold": 500
+                    }
+                },
+                "logging_policy": {
+                    "name": "even_probability",
+                    "params": {}
+                },
+                "action_space": {
+                    "name": "new_action_after_threshold",
+                    "params": {
+                        "threshold": 500,
+                        "before": [
+                            [
+                                "longshirt",
+                                "tshirt"
+                            ],
+                            [
+                                "shorts",
+                                "jeans"
+                            ]
+                        ],
+                        "after": [
+                            [
+                                "rainshirt",
+                                "buttonupshirt"
+                            ],
+                            [
+                                "formalpants",
+                                "rainpants"
+                            ]
+                        ]
+                    }
+                }
+            }
+        },
+        "assert_functions": [
+            {
+                "name": "assert_loss",
+                "params": {
+                    "expected_loss": -1.9,
+                    "decimal": 0.1
+                }
+            },
+            {
+                "name": "assert_prediction",
+                "params": {
+                    "expected_value": [
+                        [
+                            0.1,
+                            0.9
+                        ],
+                        [
+                            0.9,
+                            0.1
+                        ]
+                    ],
+                    "threshold": 0.8,
+                    "atol": 0.01,
+                    "rtol": 0.01
+                }
+            }
+        ],
+        "grids": {
+            "slate": {
+                "#base": [
+                    "--slates"
+                ]
+            },
+            "epsilon": {
+                "--epsilon": [
+                    0.1,
+                    0.2,
+                    0.3
+                ]
+            },
+            "first": {
+                "--first": [
+                    1,
+                    2
+                ]
+            },
+            "bag": {
+                "--bag": [
+                    5,
+                    6,
+                    7
+                ]
+            },
+            "cover": {
+                "--cover": [
+                    1,
+                    2,
+                    3
+                ]
+            },
+            "squarecb": {
+                "--squarecb": [
+                    "--gamma_scale 1000",
+                    "--gamma_scale 10000"
+                ]
+            },
+            "synthcover": {
+                "--synthcover": [
+                    ""
+                ]
+            },
+            "regcb": {
+                "--regcb": [
+                    ""
+                ]
+            },
+            "softmax": {
+                "--softmax": [
+                    ""
+                ]
+            }
+        },
+        "grids_expression": "slate * (epsilon + first + bag + cover + squarecb + synthcover + regcb + softmax)",
+        "output": [
+            "--readable_model",
+            "-p"
+        ]
+    }
+]
\ No newline at end of file
diff --git a/python/tests/e2e_v2/test_core.py b/python/tests/e2e_v2/test_core.py
new file mode 100644
index 00000000000..26d071de838
--- /dev/null
+++ b/python/tests/e2e_v2/test_core.py
@@ -0,0 +1,129 @@
+from vw_executor.vw import Vw
+from vw_executor.vw_opts import Grid
+import pytest
+import os
+import logging
+from test_helper import (
+    json_to_dict_list,
+    evaluate_expression,
+    copy_file,
+    custom_sort,
+    get_function_obj_with_dirs,
+    datagen_driver,
+)
+from conftest import STORE_OUTPUT
+
+CURR_DICT = os.path.dirname(os.path.abspath(__file__))
+TEST_CONFIG_FILES_NAME = os.listdir(os.path.join(CURR_DICT, "test_configs"))
+TEST_CONFIG_FILES = [json_to_dict_list(i) for i in TEST_CONFIG_FILES_NAME]
+GENERATED_TEST_CASES = []
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
+)
+
+
+def cleanup_data_file():
+    script_directory = os.path.dirname(os.path.realpath(__file__))
+    # List all files in the directory
+    for name in TEST_CONFIG_FILES_NAME:
+        name = name.split(".")[0]
+        try:
+            files = os.listdir(os.path.join(script_directory, name))
+        except:
+            return
+        # Iterate over the files and remove the ones with .txt extension
+        for file in files:
+            if file.endswith(".txt"):
+                file_path = os.path.join(script_directory + "/" + name, file)
+                os.remove(file_path)
+
+
+@pytest.fixture
+def test_descriptions(request):
+    resource = request.param
+    yield resource
+    cleanup_data_file()
+
+
+def core_test(files, grid, outputs, job_assert, job_assert_args):
+    vw = Vw(CURR_DICT + "/.vw_cache", reset=True, handler=None)
+    result = vw.train(files, grid, outputs)
+    for j in result:
+        test_name = (
+            job_assert.__name__
+            + "_"
+            + "_".join("".join([i for i in str(j.opts) if i != "-"]).split(" "))
+        )
+        GENERATED_TEST_CASES.append(
+            [lambda: job_assert(j, **job_assert_args), test_name]
+        )
+        if STORE_OUTPUT:
+            if not os.path.exists(CURR_DICT + "/output"):
+                os.mkdir(CURR_DICT + "/output")
+            if not os.path.exists(CURR_DICT + "/output/" + test_name):
+                os.mkdir(CURR_DICT + "/output/" + test_name)
+            fileName = str(list(j.outputs.values())[0][0]).split("/")[-1]
+            for key, value in list(j.outputs.items()):
+                copy_file(
+                    value[0],
+                    CURR_DICT + "/output/" + test_name + "/" + f"{key}_" + fileName,
+                )
+            copy_file(
+                os.path.join(j.cache.path, "cacheNone/" + fileName),
+                CURR_DICT + "/output/" + test_name + "/" + fileName,
+            )
+
+
+def get_options(grids, expression):
+    final_variables = {}
+    for key in grids:
+        final_variables[key] = Grid(grids[key])
+    return evaluate_expression(expression, final_variables)
+
+
+@pytest.mark.usefixtures("test_descriptions", TEST_CONFIG_FILES)
+def init_all(test_descriptions):
+    for tIndex, tests in enumerate(test_descriptions):
+        task_folder = TEST_CONFIG_FILES_NAME[tIndex].split(".")[0]
+        package_name = [task_folder + ".", ""]
+        package_name = custom_sort(task_folder, package_name)
+        package_name.append(".")
+        if type(tests) is not list:
+            tests = [tests]
+        for test_description in tests:
+            options = get_options(
+                test_description["grids"], test_description["grids_expression"]
+            )
+            data_func = get_function_obj_with_dirs(
+                package_name,
+                "data_generation",
+                test_description["data_func"]["name"],
+            )
+            scenario_directory = (
+                os.path.dirname(os.path.realpath(__file__)) + f"/{task_folder}"
+            )
+            data = datagen_driver(
+                scenario_directory, data_func, **test_description["data_func"]["params"]
+            )
+            script_directory = os.path.dirname(os.path.realpath(__file__))
+            for assert_func in test_description["assert_functions"]:
+                assert_job = get_function_obj_with_dirs(
+                    package_name, "assert_job", assert_func["name"]
+                )
+                core_test(
+                    os.path.join(script_directory, data),
+                    options,
+                    test_description["output"],
+                    assert_job,
+                    assert_func["params"],
+                )
+
+
+try:
+    init_all(TEST_CONFIG_FILES)
+    for generated_test_case in GENERATED_TEST_CASES:
+        test_name = f"test_{generated_test_case[1]}"
+        generated_test_case[0].__name__ = test_name
+        globals()[test_name] = generated_test_case[0]
+finally:
+    cleanup_data_file()
diff --git a/python/tests/e2e_v2/test_helper.py b/python/tests/e2e_v2/test_helper.py
new file mode 100644
index 00000000000..b4fd9bd759c
--- /dev/null
+++ b/python/tests/e2e_v2/test_helper.py
@@ -0,0 +1,140 @@
+import json
+import importlib
+import os
+import itertools
+import inspect
+import shutil
+
+# Get the current directory
+current_dir = os.path.dirname(os.path.abspath(__file__))
+
+
+def json_to_dict_list(file):
+    with open(current_dir + "/test_configs/" + file, "r") as file:
+        # Load the JSON data
+        return json.load(file)
+
+
+def evaluate_expression(expression, variables):
+    # Create a dictionary to hold the variable values
+    variables_dict = {}
+    # Populate the variables_dict with the provided variables
+    for variable_name, variable_value in variables.items():
+        variables_dict[variable_name] = variable_value
+    # Evaluate the expression using eval()
+    result = eval(expression, variables_dict)
+    return result
+
+
+def dynamic_function_call(module_name, function_name, *args, **kwargs):
+    try:
+        calling_frame = inspect.stack()[1]
+        calling_module = inspect.getmodule(calling_frame[0])
+        calling_package = calling_module.__package__
+        module = importlib.import_module(module_name, package=calling_package)
+        function = getattr(module, function_name)
+        result = function(*args, **kwargs)
+        return result
+    except ImportError:
+        pass
+    except AttributeError:
+        pass
+
+
+def get_function_object(module_name, function_name):
+    function = None
+    try:
+        calling_frame = inspect.stack()[1]
+        calling_module = inspect.getmodule(calling_frame[0])
+        calling_package = calling_module.__package__
+        module = importlib.import_module(module_name, package=calling_package)
+        function = getattr(module, function_name)
+        return function
+    except ImportError:
+        pass
+    except AttributeError:
+        pass
+
+
+def generate_string_combinations(*lists):
+    combinations = list(itertools.product(*lists))
+    combinations = ["".join(combination) for combination in combinations]
+    return combinations
+
+
+def copy_file(source_file, destination_file):
+    try:
+        shutil.copy(source_file, destination_file)
+        print(f"File copied successfully from '{source_file}' to '{destination_file}'.")
+    except FileNotFoundError:
+        print(f"Source file '{source_file}' not found.")
+    except PermissionError:
+        print(
+            f"Permission denied. Unable to copy '{source_file}' to '{destination_file}'."
+        )
+
+
+def call_function_with_dirs(dirs, module_name, function_name, **kargs):
+    for dir in dirs:
+        try:
+            data = dynamic_function_call(
+                dir + module_name,
+                function_name,
+                **kargs,
+            )
+            if data:
+                return data
+        except Exception as error:
+            if type(error) not in [ModuleNotFoundError]:
+                raise error
+
+
+def get_function_obj_with_dirs(dirs, module_name, function_name):
+    obj = None
+    for dir in dirs:
+        try:
+            obj = get_function_object(
+                dir + module_name,
+                function_name,
+            )
+            if obj:
+                return obj
+        except Exception as error:
+            if type(error) not in [ModuleNotFoundError]:
+                raise error
+    if not obj:
+        raise ModuleNotFoundError(
+            f"Module '{module_name}' not found in any of the directories {dirs}."
+        )
+
+
+def datagen_driver(script_directory, impl, **kwargs):
+    names = []
+    for i in kwargs.values():
+        if type(i) == dict:
+            names.append(list(i.items())[0][1])
+        elif type(i) == list:
+            pass
+        else:
+            names.append(i)
+
+    dataFile = f"{str(impl.__name__)}_{'_'.join([str(i) for i in names])}.txt"
+    with open(os.path.join(script_directory, dataFile), "w") as f:
+        impl(f, **kwargs)
+    return os.path.join(script_directory, dataFile)
+
+
+def calculate_similarity(word, string):
+    # Calculate the similarity score between the string and the word
+    score = 0
+    for char in word:
+        if char in string:
+            score += 1
+    return score
+
+
+def custom_sort(word, strings):
+    # Sort the list of strings based on their similarity to the word
+    return sorted(
+        strings, key=lambda string: calculate_similarity(word, string), reverse=True
+    )