mlflow · odorfer · Nov 6, 2025 · a-gajam · Nov 21, 2025 · odorfer
diff --git a/README.md b/README.md
@@ -49,6 +49,7 @@ These are the MLflow objects and their attributes that can be exported.
 | Logged Model | [link](https://github.com/mlflow/mlflow/blob/v3.0.0/mlflow/protos/service.proto#L612) | [link](https://mlflow.org/docs/latest/api_reference/python_api/mlflow.entities.html#mlflow.entities.LoggedModel)                                                                                                              | |
 | Trace | [link](https://github.com/mlflow/mlflow/blob/v2.14.0/mlflow/protos/service.proto#L459) | [link](https://mlflow.org/docs/latest/api_reference/python_api/mlflow.entities.html#mlflow.entities.Trace) | |
 | Prompt | [link](https://mlflow.org/docs/latest/llms/prompt-engineering/index.html) | [link](https://mlflow.org/docs/latest/python_api/mlflow.genai.html) | - |
+| Evaluation Dataset | [link](https://mlflow.org/docs/latest/genai/index.html) | [link](https://mlflow.org/docs/latest/python_api/mlflow.genai.html) | - |
 
 MLflow Export Import provides rudimentary capabilities for tracking lineage of the imported Mlflow objects
 by having the option save the original MLflow object attributes in the imported target environment.

diff --git a/README_bulk.md b/README_bulk.md
@@ -30,6 +30,8 @@ Notes:
 |               | [import-traces](#Import-traces)               | [code](mlflow_export_import/bulk/import_traces.py)               | Imports traces from a directory                                                                                            |
 | Prompt        | [export-prompts](#Export-prompts)             | [code](mlflow_export_import/bulk/export_prompts.py)              | Export prompts from the MLflow Prompt Registry (MLflow 2.21.0+).                                                           |
 |               | [import-prompts](#Import-prompts)             | [code](mlflow_export_import/bulk/import_prompts.py)              | Imports prompts to the MLflow Prompt Registry. |   
+| Evaluation Dataset | [export-evaluation-datasets](#Export-evaluation-datasets) | [code](mlflow_export_import/bulk/export_evaluation_datasets.py) | Export GenAI evaluation datasets (MLflow 3.4.0+). |
+|               | [import-evaluation-datasets](#Import-evaluation-datasets) | [code](mlflow_export_import/bulk/import_evaluation_datasets.py) | Imports GenAI evaluation datasets. |
 
 ## All MLflow Objects Tools
 
@@ -621,3 +623,92 @@ import-prompts --input-dir out/prompts
 **Notes:** 
 * Prompts are imported with their original names and version numbers are preserved.
 * All versions of each prompt are exported and imported to maintain complete version history.
+* If a prompt with the same name already exists, it will be skipped with a warning to preserve version numbers. Use `--delete-prompt True` to replace existing prompts.
+
+
+## Evaluation Datasets
+
+Export/import GenAI evaluation datasets from the MLflow tracking server (MLflow 3.4.0+).
+
+**Note:** Evaluation dataset support requires MLflow 3.4.0 or higher and a SQL-based tracking backend (SQLite, PostgreSQL, MySQL). FileStore is not supported. The export/import will be skipped with a warning message if the MLflow version doesn't support evaluation datasets or if using FileStore.
+
+### Export evaluation datasets
+
+Export evaluation datasets to a directory.
+
+Source: [export_evaluation_datasets.py](mlflow_export_import/bulk/export_evaluation_datasets.py).
+
+#### Usage
+
+```
+export-evaluation-datasets --help
+
+Options:
+  --output-dir TEXT              Output directory.  [required]
+  --evaluation-datasets TEXT     Evaluation dataset names: 'all' for all datasets, 
+                                comma-delimited list (e.g., 'dataset1,dataset2'), 
+                                or file path ending with '.txt' containing dataset 
+                                names (one per line).  [required]
+  --experiment-ids TEXT    Comma-separated list of experiment IDs to filter
+                          datasets. Only used when --evaluation-datasets is 'all'.
+  --use-threads BOOLEAN    Use multithreading for export.  [default: False]
+```
+
+#### Examples
+
+##### Export all evaluation datasets
+```
+export-evaluation-datasets \
+  --output-dir out/evaluation_datasets \
+  --evaluation-datasets all
+```
+
+##### Export specific evaluation datasets
+```
+export-evaluation-datasets \
+  --output-dir out/evaluation_datasets \
+  --evaluation-datasets wine-quality-eval,iris-classification-eval
+```
+
+##### Export all evaluation datasets for specific experiments
+```
+export-evaluation-datasets \
+  --output-dir out/evaluation_datasets \
+  --evaluation-datasets all \
+  --experiment-ids 1,2,3
+```
+
+**Note:** `--experiment-ids` only filters when `--evaluation-datasets` is set to 'all'. If you specify specific dataset names, `--experiment-ids` is ignored.
+
+### Import evaluation datasets
+
+Import evaluation datasets from a directory.
+
+Source: [import_evaluation_datasets.py](mlflow_export_import/bulk/import_evaluation_datasets.py).
+
+#### Usage
+
+```
+import-evaluation-datasets --help
+
+Options:
+  --input-dir TEXT                      Input directory containing exported evaluation datasets.  [required]
+  --delete-evaluation-dataset BOOLEAN   Delete existing evaluation dataset before importing.  [default: False]
+  --use-threads BOOLEAN                 Use multithreading for import.  [default: False]
+```
+
+#### Examples
+
+##### Import evaluation datasets
+```
+import-evaluation-datasets --input-dir out/evaluation_datasets
+```
+
+##### Import with evaluation dataset deletion (if dataset exists, delete it first)
+```
+import-evaluation-datasets \
+  --input-dir out/evaluation_datasets \
+  --delete-evaluation-dataset True
+```
+
+**Note:** If an evaluation dataset with the same name already exists, it will be skipped with a warning. Use `--delete-evaluation-dataset True` to replace existing datasets.
diff --git a/README_single.md b/README_single.md
@@ -29,6 +29,8 @@ See sample JSON export files [here](README_export_format.md#sample-export-json-f
 || [import-trace](#import-trace)                         |  [code](mlflow_export_import/trace/import_trace.py)              |
 | Prompt | [export-prompt](#export-prompt) | [code](mlflow_export_import/prompt/export_prompt.py) |
 |  | [import-prompt](#import-prompt) | [code](mlflow_export_import/prompt/import_prompt.py) |
+| Evaluation Dataset | [export-evaluation-dataset](#export-evaluation-dataset) | [code](mlflow_export_import/evaluation_dataset/export_evaluation_dataset.py) |
+|  | [import-evaluation-dataset](#import-evaluation-dataset) | [code](mlflow_export_import/evaluation_dataset/import_evaluation_dataset.py) |
 
 ## Experiment Tools
 
@@ -648,4 +650,77 @@ import-prompt --input-dir out
 import-prompt \
   --input-dir out \
   --prompt-name my-new-prompt-name
-```
+```
+
+
+## Evaluation Dataset Tools
+
+Export and import GenAI evaluation datasets (MLflow 3.4.0+).
+
+**Note:** Evaluation dataset support requires MLflow 3.4.0 or higher and a SQL-based tracking backend (SQLite, PostgreSQL, MySQL). FileStore is not supported.
+
+### Export Evaluation Dataset
+
+Export a single evaluation dataset to a directory.
+
+Source: [export_evaluation_dataset.py](mlflow_export_import/evaluation_dataset/export_evaluation_dataset.py).
+
+#### Usage
+
+```
+export-evaluation-dataset --help
+
+Options:
+  --dataset-name TEXT  Name of the evaluation dataset to export (mutually exclusive with --dataset-id).
+  --dataset-id TEXT    ID of the evaluation dataset to export (mutually exclusive with --dataset-name).
+  --output-dir TEXT    Output directory.  [required]
+
+Note: Either --dataset-name or --dataset-id must be provided (mutually exclusive).
+```
+
+#### Examples
+
+##### Export by dataset name
+```
+export-evaluation-dataset \
+  --dataset-name wine-quality-eval \
+  --output-dir out
+```
+
+##### Export by dataset ID
+```
+export-evaluation-dataset \
+  --dataset-id abc123 \
+  --output-dir out
+```
+
+### Import Evaluation Dataset
+
+Import an evaluation dataset from an exported directory.
+
+Source: [import_evaluation_dataset.py](mlflow_export_import/evaluation_dataset/import_evaluation_dataset.py).
+
+#### Usage
+
+```
+import-evaluation-dataset --help
+
+Options:
+  --input-dir TEXT     Input directory containing exported evaluation dataset.  [required]
+  --dataset-name TEXT  Optional new name for the imported dataset. If not
+                      specified, uses original name.
+```
+
+#### Examples
+
+##### Import with original name
+```
+import-evaluation-dataset --input-dir out
+```
+
+##### Import with new name
+```
+import-evaluation-dataset \
+  --input-dir out \
+  --dataset-name my-new-dataset-name
+```
diff --git a/mlflow_export_import/bulk/export_all.py b/mlflow_export_import/bulk/export_all.py
@@ -24,6 +24,7 @@
 from mlflow_export_import.bulk.export_models import export_models
 from mlflow_export_import.bulk.export_experiments import export_experiments
 from mlflow_export_import.bulk.export_prompts import export_prompts
+from mlflow_export_import.bulk.export_evaluation_datasets import export_evaluation_datasets
 
 ALL_STAGES = "Production,Staging,Archived,None"
 
@@ -58,7 +59,7 @@ def export_all(
         use_threads = use_threads
     )
 
-    # Only import those experiments not exported by above export_models()
+    # Only export those experiments not exported by above export_models()
     exported_exp_names = res_models["experiments"]["experiment_names"]
     all_exps = SearchExperimentsIterator(mlflow_client)
     all_exp_names = [ exp.name for exp in all_exps ]
@@ -94,6 +95,26 @@ def export_all(
         _logger.warning(f"Failed to export prompts: {e}")
         res_prompts = {"error": str(e)}
 
+    # Export evaluation datasets (returns dict with status)
+    res_datasets = None
+    try:
+        _logger.info("Exporting evaluation datasets...")
+        res_datasets = export_evaluation_datasets(
+            output_dir = os.path.join(output_dir, "evaluation_datasets"),
+            dataset_names = None,  # Export all datasets
+            experiment_ids = None,
+            use_threads = use_threads,
+            mlflow_client = mlflow_client
+        )
+        # Log if unsupported but don't fail
+        if res_datasets and "unsupported" in res_datasets:
+            _logger.warning(f"Evaluation datasets not supported in MLflow {res_datasets.get('mlflow_version')}")
+        elif res_datasets and "error" in res_datasets:
+            _logger.warning(f"Failed to export evaluation datasets: {res_datasets['error']}")
+    except Exception as e:
+        _logger.warning(f"Failed to export evaluation datasets: {e}")
+        res_datasets = {"error": str(e)}
+
     duration = round(time.time() - start_time, 1)
     info_attr = {
         "options": {
@@ -108,7 +129,8 @@ def export_all(
             "duration": duration,
             "models": res_models,
             "experiments": res_exps,
-            "prompts": res_prompts
+            "prompts": res_prompts,
+            "evaluation_datasets": res_datasets
         }
     }
     io_utils.write_export_file(output_dir, "manifest.json", __file__, {}, info_attr)