From b8cbab766ee450cb121c72c7446f5e8b74d3906d Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Tue, 30 Sep 2025 13:53:16 -0700
Subject: [PATCH 01/15] Fix documentation type-o

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 docs/source/tutorials/create-a-new-workflow.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/tutorials/create-a-new-workflow.md b/docs/source/tutorials/create-a-new-workflow.md
index 4fc0c506a..7d5ab7fd3 100644
--- a/docs/source/tutorials/create-a-new-workflow.md
+++ b/docs/source/tutorials/create-a-new-workflow.md
@@ -286,7 +286,7 @@ uv pip install -e examples/documentation_guides/workflows/text_file_ingest
 Run the workflow with the following command:
 ```bash
 nat run --config_file examples/documentation_guides/workflows/text_file_ingest/configs/config.yml \
-   --input "What does DOCA GPUNetIO to remove the CPU from the critical path?"
+   --input "What does DOCA GPUNetIO do to remove the CPU from the critical path?"
 ```
 
 If successful, you should receive output similar to the following:

From 9b57bf3009e9c3b1277ddb8559a4bb3465ff5385 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Tue, 30 Sep 2025 13:57:51 -0700
Subject: [PATCH 02/15] Add test for the text_file_ingest example

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 .../tests/test_text_file_ingest.py            | 55 +++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 examples/documentation_guides/tests/test_text_file_ingest.py

diff --git a/examples/documentation_guides/tests/test_text_file_ingest.py b/examples/documentation_guides/tests/test_text_file_ingest.py
new file mode 100644
index 000000000..652940af3
--- /dev/null
+++ b/examples/documentation_guides/tests/test_text_file_ingest.py
@@ -0,0 +1,55 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import sys
+from pathlib import Path
+
+import pytest
+
+from nat.test.utils import locate_example_config
+from nat.test.utils import run_workflow
+
+logger = logging.getLogger(__name__)
+
+
+@pytest.fixture(name="text_file_ingest_dir", scope="session")
+def text_file_ingest_dir_fixture(workflows_dir: Path) -> Path:
+    text_file_ingest = workflows_dir / "text_file_ingest"
+    assert text_file_ingest.exists(), f"Could not find text_file_ingest example at {text_file_ingest}"
+    return text_file_ingest
+
+
+@pytest.fixture(name="src_dir", scope="session", autouse=True)
+def src_dir_fixture(text_file_ingest_dir: Path) -> Path:
+    src_dir = text_file_ingest_dir / "src"
+    assert src_dir.exists(), f"Could not find text_file_ingest src at {src_dir}"
+
+    return src_dir
+
+
+@pytest.fixture(scope="session", autouse=True)
+def add_src_dir_to_path_fixture(src_dir: Path) -> Path:
+    # Since this is a documentation guide, it is not installed by default, so we need to manually append it to the path
+    abs_src_dir = src_dir.absolute()
+    if str(abs_src_dir) not in sys.path:
+        sys.path.append(str(abs_src_dir))
+    return abs_src_dir
+
+
+def test_text_file_ingest_full_workflow():
+    from text_file_ingest.text_file_ingest_function import TextFileIngestFunctionConfig
+    config_file = locate_example_config(TextFileIngestFunctionConfig)
+    run_workflow(config_file, "What does DOCA GPUNetIO do to remove the CPU from the critical path?", "GPUDirect")

From 463a4ff5bf35001d9f0ae12c97fedd8c312cfa99 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Tue, 30 Sep 2025 14:09:37 -0700
Subject: [PATCH 03/15] Cleanup test

---
 .../tests/test_text_file_ingest.py            | 22 ++++++++++++++-----
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/examples/documentation_guides/tests/test_text_file_ingest.py b/examples/documentation_guides/tests/test_text_file_ingest.py
index 652940af3..9483ba3bb 100644
--- a/examples/documentation_guides/tests/test_text_file_ingest.py
+++ b/examples/documentation_guides/tests/test_text_file_ingest.py
@@ -15,6 +15,7 @@
 
 import logging
 import sys
+from collections.abc import Generator
 from pathlib import Path
 
 import pytest
@@ -40,15 +41,24 @@ def src_dir_fixture(text_file_ingest_dir: Path) -> Path:
     return src_dir
 
 
-@pytest.fixture(scope="session", autouse=True)
-def add_src_dir_to_path_fixture(src_dir: Path) -> Path:
+@pytest.fixture(name="add_src_dir_to_path", scope="session")
+def add_src_dir_to_path_fixture(src_dir: Path) -> Generator[str]:
     # Since this is a documentation guide, it is not installed by default, so we need to manually append it to the path
-    abs_src_dir = src_dir.absolute()
-    if str(abs_src_dir) not in sys.path:
-        sys.path.append(str(abs_src_dir))
-    return abs_src_dir
+    abs_src_dir = str(src_dir.absolute())
+    if abs_src_dir not in sys.path:
+        added = True
+        sys.path.append(abs_src_dir)
+    else:
+        added = False
 
+    yield abs_src_dir
 
+    if added:
+        sys.path.remove(abs_src_dir)
+
+
+@pytest.mark.integration
+@pytest.mark.usefixtures("nvidia_api_key", "add_src_dir_to_path")
 def test_text_file_ingest_full_workflow():
     from text_file_ingest.text_file_ingest_function import TextFileIngestFunctionConfig
     config_file = locate_example_config(TextFileIngestFunctionConfig)

From fff2a6b22de474c5b856a98c9b7601a54269552e Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Tue, 30 Sep 2025 14:10:15 -0700
Subject: [PATCH 04/15] Remove eval configs from text_file_ingest as it is not
 a part of the documentation example

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 .../src/text_file_ingest/configs/config.yml   | 46 -------------------
 1 file changed, 46 deletions(-)

diff --git a/examples/documentation_guides/workflows/text_file_ingest/src/text_file_ingest/configs/config.yml b/examples/documentation_guides/workflows/text_file_ingest/src/text_file_ingest/configs/config.yml
index 7cd056885..0370d392f 100644
--- a/examples/documentation_guides/workflows/text_file_ingest/src/text_file_ingest/configs/config.yml
+++ b/examples/documentation_guides/workflows/text_file_ingest/src/text_file_ingest/configs/config.yml
@@ -29,19 +29,6 @@ llms:
     _type: nim
     model_name: meta/llama-3.1-70b-instruct
     temperature: 0.0
-  nim_rag_eval_llm:
-    _type: nim
-    model_name: meta/llama-3.1-70b-instruct
-    max_tokens: 8
-  nim_rag_eval_large_llm:
-    _type: nim
-    model_name: meta/llama-3.1-70b-instruct
-    max_tokens: 2048
-  nim_trajectory_eval_llm:
-    _type: nim
-    model_name: meta/llama-3.1-70b-instruct
-    temperature: 0.0
-    max_tokens: 1024
 
 embedders:
   nv-embedqa-e5-v5:
@@ -54,36 +41,3 @@ workflow:
   llm_name: nim_llm
   verbose: true
   parse_agent_response_max_retries: 3
-
-eval:
-  general:
-    output_dir: .tmp/nat/examples/getting_started/simple_web_query/
-    dataset:
-      _type: json
-      file_path: examples/evaluation_and_profiling/simple_web_query_eval/data/langsmith.json
-    profiler:
-      fit_model: True
-
-  evaluators:
-    rag_accuracy:
-      _type: ragas
-      metric: AnswerAccuracy
-      llm_name: nim_rag_eval_llm
-    rag_groundedness:
-      _type: ragas
-      metric: ResponseGroundedness
-      llm_name: nim_rag_eval_llm
-    rag_relevance:
-      _type: ragas
-      metric: ContextRelevance
-      llm_name: nim_rag_eval_llm
-    rag_factual_correctness:
-      _type: ragas
-      metric:
-        FactualCorrectness:
-          kwargs:
-            mode: precision
-      llm_name: nim_rag_eval_large_llm # requires more tokens
-    trajectory:
-      _type: trajectory
-      llm_name: nim_trajectory_eval_llm

From edcb68ce323144c9bccfafce8acd464ad0e25fb8 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Tue, 30 Sep 2025 14:10:44 -0700
Subject: [PATCH 05/15] Update run_workflow to require keyword arguments

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 .../profiler_agent/tests/test_profiler_agent.py             | 4 +++-
 examples/agents/tests/test_agents.py                        | 4 ++--
 .../tests/test_auto_desc_generation.py                      | 2 +-
 examples/documentation_guides/tests/test_custom_workflow.py | 4 ++--
 .../documentation_guides/tests/test_text_file_ingest.py     | 6 ++++--
 packages/nvidia_nat_test/src/nat/test/utils.py              | 5 +++--
 6 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/examples/advanced_agents/profiler_agent/tests/test_profiler_agent.py b/examples/advanced_agents/profiler_agent/tests/test_profiler_agent.py
index e2b63de2a..9de2001e1 100644
--- a/examples/advanced_agents/profiler_agent/tests/test_profiler_agent.py
+++ b/examples/advanced_agents/profiler_agent/tests/test_profiler_agent.py
@@ -94,4 +94,6 @@ async def test_token_usage_tool(df_path: Path):
 @pytest.mark.usefixtures("nvidia_api_key")
 async def test_full_workflow():
     config_file: Path = locate_example_config(ProfilerAgentConfig)
-    await run_workflow(config_file, "Is the product of 33 * 4 greater than the current hour of the day?", "yes")
+    await run_workflow(config_file=config_file,
+                       question="Is the product of 33 * 4 greater than the current hour of the day?",
+                       expected_answer="yes")
diff --git a/examples/agents/tests/test_agents.py b/examples/agents/tests/test_agents.py
index 027df4c64..b2d73e0f1 100644
--- a/examples/agents/tests/test_agents.py
+++ b/examples/agents/tests/test_agents.py
@@ -62,7 +62,7 @@ def rewoo_answer_fixture(request: pytest.FixtureRequest, rewoo_data: list[dict])
                          indirect=True)
 async def test_rewoo_full_workflow(rewoo_question: str, rewoo_answer: str):
     config_file = os.path.join(AGENTS_DIR, "rewoo/configs/config.yml")
-    await run_workflow(config_file, rewoo_question, rewoo_answer)
+    await run_workflow(config_file=config_file, question=rewoo_question, expected_answer=rewoo_answer)
 
 
 @pytest.mark.slow
@@ -79,4 +79,4 @@ async def test_rewoo_full_workflow(rewoo_question: str, rewoo_answer: str):
     ],
     ids=["mixture_of_agents", "react", "react-reasoning", "tool_calling", "tool_calling-reasoning"])
 async def test_agent_full_workflow(config_file: str, question: str, answer: str):
-    await run_workflow(config_file, question, answer)
+    await run_workflow(config_file=config_file, question=question, expected_answer=answer)
diff --git a/examples/custom_functions/automated_description_generation/tests/test_auto_desc_generation.py b/examples/custom_functions/automated_description_generation/tests/test_auto_desc_generation.py
index 9ba9ec929..d4ebc5d22 100644
--- a/examples/custom_functions/automated_description_generation/tests/test_auto_desc_generation.py
+++ b/examples/custom_functions/automated_description_generation/tests/test_auto_desc_generation.py
@@ -37,4 +37,4 @@ async def test_full_workflow(milvus_uri: str) -> None:
     config.retrievers['retriever'].uri = HttpUrl(url=milvus_uri)
 
     # Unfortunately the workflow itself returns inconsistent results
-    await run_workflow(None, "List 5 subspecies of Aardvark?", "Aardvark", config=config)
+    await run_workflow(config=config, question="List 5 subspecies of Aardvark?", expected_answer="Aardvark")
diff --git a/examples/documentation_guides/tests/test_custom_workflow.py b/examples/documentation_guides/tests/test_custom_workflow.py
index 5cc9cb382..7132d2e2c 100644
--- a/examples/documentation_guides/tests/test_custom_workflow.py
+++ b/examples/documentation_guides/tests/test_custom_workflow.py
@@ -44,7 +44,7 @@ def answer_fixture() -> str:
 @pytest.mark.usefixtures("nvidia_api_key")
 async def test_custom_full_workflow(custom_workflow_dir: Path, question: str, answer: str):
     config_file = custom_workflow_dir / "custom_config.yml"
-    await run_workflow(config_file, question, answer)
+    await run_workflow(config_file=config_file, question=question, expected_answer=answer)
 
 
 @pytest.mark.slow
@@ -53,4 +53,4 @@ async def test_custom_full_workflow(custom_workflow_dir: Path, question: str, an
 async def test_search_full_workflow(custom_workflow_dir: Path, question: str, answer: str):
     # Technically this is the same as the custom workflow test, but it requires a second key
     config_file = custom_workflow_dir / "search_config.yml"
-    await run_workflow(config_file, question, answer)
+    await run_workflow(config_file=config_file, question=question, expected_answer=answer)
diff --git a/examples/documentation_guides/tests/test_text_file_ingest.py b/examples/documentation_guides/tests/test_text_file_ingest.py
index 9483ba3bb..211da7e81 100644
--- a/examples/documentation_guides/tests/test_text_file_ingest.py
+++ b/examples/documentation_guides/tests/test_text_file_ingest.py
@@ -59,7 +59,9 @@ def add_src_dir_to_path_fixture(src_dir: Path) -> Generator[str]:
 
 @pytest.mark.integration
 @pytest.mark.usefixtures("nvidia_api_key", "add_src_dir_to_path")
-def test_text_file_ingest_full_workflow():
+async def test_text_file_ingest_full_workflow():
     from text_file_ingest.text_file_ingest_function import TextFileIngestFunctionConfig
     config_file = locate_example_config(TextFileIngestFunctionConfig)
-    run_workflow(config_file, "What does DOCA GPUNetIO do to remove the CPU from the critical path?", "GPUDirect")
+    await run_workflow(config_file=config_file,
+                       question="What does DOCA GPUNetIO do to remove the CPU from the critical path?",
+                       expected_answer="GPUDirect")
diff --git a/packages/nvidia_nat_test/src/nat/test/utils.py b/packages/nvidia_nat_test/src/nat/test/utils.py
index 3ee119cae..56231fff9 100644
--- a/packages/nvidia_nat_test/src/nat/test/utils.py
+++ b/packages/nvidia_nat_test/src/nat/test/utils.py
@@ -62,11 +62,12 @@ def locate_example_config(example_config_class: type,
 
 
 async def run_workflow(
-    config_file: "StrPath | None",
+    *,
+    config: "Config | None" = None,
+    config_file: "StrPath | None" = None,
     question: str,
     expected_answer: str,
     assert_expected_answer: bool = True,
-    config: "Config | None" = None,
 ) -> str:
     from nat.builder.workflow_builder import WorkflowBuilder
     from nat.runtime.loader import load_config

From 5638e84852e6a043cb08366d8f6e076241afff8c Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Tue, 30 Sep 2025 14:59:52 -0700
Subject: [PATCH 06/15] Move configs and data dirs to allign with other
 examples

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 .../configs/config-llama-3.1-8b-instruct.yml                      | 0
 .../configs/config-llama-3.3-70b-instruct.yml                     | 0
 .../configs/config-mixtral-8x22b-instruct-v0.1.yml                | 0
 .../configs/config-phi-3-medium-4k-instruct.yml                   | 0
 .../configs/config-phi-3-mini-4k-instruct.yml                     | 0
 .../nat_email_phishing_analyzer}/configs/config-reasoning.yml     | 0
 .../{ => src/nat_email_phishing_analyzer}/configs/config.yml      | 0
 .../nat_email_phishing_analyzer}/configs/config_optimizer.yml     | 0
 .../{ => src/nat_email_phishing_analyzer}/data/smaller_test.csv   | 0
 9 files changed, 0 insertions(+), 0 deletions(-)
 rename examples/evaluation_and_profiling/email_phishing_analyzer/{ => src/nat_email_phishing_analyzer}/configs/config-llama-3.1-8b-instruct.yml (100%)
 rename examples/evaluation_and_profiling/email_phishing_analyzer/{ => src/nat_email_phishing_analyzer}/configs/config-llama-3.3-70b-instruct.yml (100%)
 rename examples/evaluation_and_profiling/email_phishing_analyzer/{ => src/nat_email_phishing_analyzer}/configs/config-mixtral-8x22b-instruct-v0.1.yml (100%)
 rename examples/evaluation_and_profiling/email_phishing_analyzer/{ => src/nat_email_phishing_analyzer}/configs/config-phi-3-medium-4k-instruct.yml (100%)
 rename examples/evaluation_and_profiling/email_phishing_analyzer/{ => src/nat_email_phishing_analyzer}/configs/config-phi-3-mini-4k-instruct.yml (100%)
 rename examples/evaluation_and_profiling/email_phishing_analyzer/{ => src/nat_email_phishing_analyzer}/configs/config-reasoning.yml (100%)
 rename examples/evaluation_and_profiling/email_phishing_analyzer/{ => src/nat_email_phishing_analyzer}/configs/config.yml (100%)
 rename examples/evaluation_and_profiling/email_phishing_analyzer/{ => src/nat_email_phishing_analyzer}/configs/config_optimizer.yml (100%)
 rename examples/evaluation_and_profiling/email_phishing_analyzer/{ => src/nat_email_phishing_analyzer}/data/smaller_test.csv (100%)

diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/configs/config-llama-3.1-8b-instruct.yml b/examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config-llama-3.1-8b-instruct.yml
similarity index 100%
rename from examples/evaluation_and_profiling/email_phishing_analyzer/configs/config-llama-3.1-8b-instruct.yml
rename to examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config-llama-3.1-8b-instruct.yml
diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/configs/config-llama-3.3-70b-instruct.yml b/examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config-llama-3.3-70b-instruct.yml
similarity index 100%
rename from examples/evaluation_and_profiling/email_phishing_analyzer/configs/config-llama-3.3-70b-instruct.yml
rename to examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config-llama-3.3-70b-instruct.yml
diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/configs/config-mixtral-8x22b-instruct-v0.1.yml b/examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config-mixtral-8x22b-instruct-v0.1.yml
similarity index 100%
rename from examples/evaluation_and_profiling/email_phishing_analyzer/configs/config-mixtral-8x22b-instruct-v0.1.yml
rename to examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config-mixtral-8x22b-instruct-v0.1.yml
diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/configs/config-phi-3-medium-4k-instruct.yml b/examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config-phi-3-medium-4k-instruct.yml
similarity index 100%
rename from examples/evaluation_and_profiling/email_phishing_analyzer/configs/config-phi-3-medium-4k-instruct.yml
rename to examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config-phi-3-medium-4k-instruct.yml
diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/configs/config-phi-3-mini-4k-instruct.yml b/examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config-phi-3-mini-4k-instruct.yml
similarity index 100%
rename from examples/evaluation_and_profiling/email_phishing_analyzer/configs/config-phi-3-mini-4k-instruct.yml
rename to examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config-phi-3-mini-4k-instruct.yml
diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/configs/config-reasoning.yml b/examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config-reasoning.yml
similarity index 100%
rename from examples/evaluation_and_profiling/email_phishing_analyzer/configs/config-reasoning.yml
rename to examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config-reasoning.yml
diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/configs/config.yml b/examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config.yml
similarity index 100%
rename from examples/evaluation_and_profiling/email_phishing_analyzer/configs/config.yml
rename to examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config.yml
diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/configs/config_optimizer.yml b/examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config_optimizer.yml
similarity index 100%
rename from examples/evaluation_and_profiling/email_phishing_analyzer/configs/config_optimizer.yml
rename to examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/configs/config_optimizer.yml
diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/data/smaller_test.csv b/examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/data/smaller_test.csv
similarity index 100%
rename from examples/evaluation_and_profiling/email_phishing_analyzer/data/smaller_test.csv
rename to examples/evaluation_and_profiling/email_phishing_analyzer/src/nat_email_phishing_analyzer/data/smaller_test.csv

From 244aa90cb6ed36f02be1d9819d06716c3fd5fc09 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Tue, 30 Sep 2025 15:00:48 -0700
Subject: [PATCH 07/15] Add symlinks

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 .../evaluation_and_profiling/email_phishing_analyzer/configs     | 1 +
 examples/evaluation_and_profiling/email_phishing_analyzer/data   | 1 +
 2 files changed, 2 insertions(+)
 create mode 120000 examples/evaluation_and_profiling/email_phishing_analyzer/configs
 create mode 120000 examples/evaluation_and_profiling/email_phishing_analyzer/data

diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/configs b/examples/evaluation_and_profiling/email_phishing_analyzer/configs
new file mode 120000
index 000000000..cf4006edd
--- /dev/null
+++ b/examples/evaluation_and_profiling/email_phishing_analyzer/configs
@@ -0,0 +1 @@
+src/nat_email_phishing_analyzer/configs
\ No newline at end of file
diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/data b/examples/evaluation_and_profiling/email_phishing_analyzer/data
new file mode 120000
index 000000000..fc0ff14f4
--- /dev/null
+++ b/examples/evaluation_and_profiling/email_phishing_analyzer/data
@@ -0,0 +1 @@
+src/nat_email_phishing_analyzer/data
\ No newline at end of file

From 8aa0e0ed8562fe34c7f05dc7ba5bf830e9d3203e Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Tue, 30 Sep 2025 15:06:05 -0700
Subject: [PATCH 08/15] Add a test for the email_phishing_analyzer

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 .../tests/test_email_phishing_analyzer.py     | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 examples/evaluation_and_profiling/email_phishing_analyzer/tests/test_email_phishing_analyzer.py

diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/tests/test_email_phishing_analyzer.py b/examples/evaluation_and_profiling/email_phishing_analyzer/tests/test_email_phishing_analyzer.py
new file mode 100644
index 000000000..5edf726d3
--- /dev/null
+++ b/examples/evaluation_and_profiling/email_phishing_analyzer/tests/test_email_phishing_analyzer.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from pathlib import Path
+
+import pytest
+
+from nat.test.utils import locate_example_config
+from nat.test.utils import run_workflow
+from nat_email_phishing_analyzer.register import EmailPhishingAnalyzerConfig
+
+logger = logging.getLogger(__name__)
+
+
+@pytest.mark.integration
+@pytest.mark.usefixtures("nvidia_api_key")
+async def test_full_workflow(milvus_uri: str) -> None:
+    from pydantic import HttpUrl
+
+    from nat.runtime.loader import load_config
+
+    config_file: Path = locate_example_config(EmailPhishingAnalyzerConfig)
+    config = load_config(config_file)
+
+    # Unfortunately the workflow itself returns inconsistent results
+    await run_workflow(
+        config=config,
+        question=(
+            "Dear [Customer], Thank you for your purchase on [Date]. We have processed a refund of $[Amount] to your "
+            "account. Please provide your account and routing numbers so we can complete the transaction. Thank you, "
+            "[Your Company]"),
+        expected_answer="likely")

From 0ab0a62dbbd59400a8594fb586f254b7dc6504ff Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Tue, 30 Sep 2025 16:30:40 -0700
Subject: [PATCH 09/15] Add a fixture for tests that require the nest_asyncio
 patch, add an e2e for the optimization run, remove unused imports

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 .../tests/test_email_phishing_analyzer.py      | 18 ++++++++++++++----
 .../nvidia_nat_test/src/nat/test/plugin.py     | 10 ++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/tests/test_email_phishing_analyzer.py b/examples/evaluation_and_profiling/email_phishing_analyzer/tests/test_email_phishing_analyzer.py
index 5edf726d3..e21595ac9 100644
--- a/examples/evaluation_and_profiling/email_phishing_analyzer/tests/test_email_phishing_analyzer.py
+++ b/examples/evaluation_and_profiling/email_phishing_analyzer/tests/test_email_phishing_analyzer.py
@@ -20,17 +20,15 @@
 
 from nat.test.utils import locate_example_config
 from nat.test.utils import run_workflow
-from nat_email_phishing_analyzer.register import EmailPhishingAnalyzerConfig
 
 logger = logging.getLogger(__name__)
 
 
 @pytest.mark.integration
 @pytest.mark.usefixtures("nvidia_api_key")
-async def test_full_workflow(milvus_uri: str) -> None:
-    from pydantic import HttpUrl
-
+async def test_run_full_workflow():
     from nat.runtime.loader import load_config
+    from nat_email_phishing_analyzer.register import EmailPhishingAnalyzerConfig
 
     config_file: Path = locate_example_config(EmailPhishingAnalyzerConfig)
     config = load_config(config_file)
@@ -43,3 +41,15 @@ async def test_full_workflow(milvus_uri: str) -> None:
             "account. Please provide your account and routing numbers so we can complete the transaction. Thank you, "
             "[Your Company]"),
         expected_answer="likely")
+
+
+@pytest.mark.integration
+@pytest.mark.usefixtures("nvidia_api_key", "require_nest_asyncio")
+async def test_optimize_full_workflow():
+    from nat.data_models.optimizer import OptimizerRunConfig
+    from nat.profiler.parameter_optimization.optimizer_runtime import optimize_config
+    from nat_email_phishing_analyzer.register import EmailPhishingAnalyzerConfig
+
+    config_file: Path = locate_example_config(EmailPhishingAnalyzerConfig, "config_optimizer.yml")
+    config = OptimizerRunConfig(config_file=config_file, dataset=None)
+    await optimize_config(config)
diff --git a/packages/nvidia_nat_test/src/nat/test/plugin.py b/packages/nvidia_nat_test/src/nat/test/plugin.py
index d9a7b7839..5f96d4448 100644
--- a/packages/nvidia_nat_test/src/nat/test/plugin.py
+++ b/packages/nvidia_nat_test/src/nat/test/plugin.py
@@ -332,3 +332,13 @@ def populate_milvus_fixture(milvus_uri: str, root_repo_dir: Path):
         "wikipedia_docs"
     ],
                    check=True)
+
+
+@pytest.fixture(name="require_nest_asyncio", scope="session")
+def require_nest_asyncio_fixture():
+    """
+    Some tests require nest_asyncio to be installed to allow nested event loops, calling nest_asyncio.apply() more than
+    once is a no-op so it's safe to call this fixture even if one of our dependencies already called it.
+    """
+    import nest_asyncio
+    nest_asyncio.apply()

From 63c7402d2fa621daa3a326a6db87c7558d914647 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Wed, 1 Oct 2025 08:51:16 -0700
Subject: [PATCH 10/15] Add some asserts, skip the test for now as it is being
 rate limited

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 .../tests/test_email_phishing_analyzer.py          | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/tests/test_email_phishing_analyzer.py b/examples/evaluation_and_profiling/email_phishing_analyzer/tests/test_email_phishing_analyzer.py
index e21595ac9..49b33d323 100644
--- a/examples/evaluation_and_profiling/email_phishing_analyzer/tests/test_email_phishing_analyzer.py
+++ b/examples/evaluation_and_profiling/email_phishing_analyzer/tests/test_email_phishing_analyzer.py
@@ -43,13 +43,21 @@ async def test_run_full_workflow():
         expected_answer="likely")
 
 
+@pytest.mark.skip(reason="This test gets rate limited potentially issue #842 and does not complete")
 @pytest.mark.integration
 @pytest.mark.usefixtures("nvidia_api_key", "require_nest_asyncio")
-async def test_optimize_full_workflow():
+async def test_optimize_full_workflow(capsys):
+    from nat.data_models.config import Config
     from nat.data_models.optimizer import OptimizerRunConfig
     from nat.profiler.parameter_optimization.optimizer_runtime import optimize_config
     from nat_email_phishing_analyzer.register import EmailPhishingAnalyzerConfig
 
     config_file: Path = locate_example_config(EmailPhishingAnalyzerConfig, "config_optimizer.yml")
-    config = OptimizerRunConfig(config_file=config_file, dataset=None)
-    await optimize_config(config)
+    config = OptimizerRunConfig(config_file=config_file,
+                                dataset=None,
+                                override=(('eval.general.max_concurrency', '1'), ('optimizer.numeric.n_trials', '1')))
+    optimized_config = await optimize_config(config)
+    assert isinstance(optimized_config, Config)
+    captured_output = capsys.readouterr()
+
+    assert "All optimization phases complete" in captured_output.out

From 8357e1aabfb71f4267d9e825a6f73d4afd80f75f Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Wed, 1 Oct 2025 08:57:49 -0700
Subject: [PATCH 11/15] Lazily import

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 .../simple_web_query_eval/tests/test_simple_web_query_eval.py  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/evaluation_and_profiling/simple_web_query_eval/tests/test_simple_web_query_eval.py b/examples/evaluation_and_profiling/simple_web_query_eval/tests/test_simple_web_query_eval.py
index 5a3f0ce60..6e8beaa17 100644
--- a/examples/evaluation_and_profiling/simple_web_query_eval/tests/test_simple_web_query_eval.py
+++ b/examples/evaluation_and_profiling/simple_web_query_eval/tests/test_simple_web_query_eval.py
@@ -19,7 +19,6 @@
 
 import pytest
 
-import nat_simple_web_query_eval
 from nat.eval.evaluate import EvaluationRun
 from nat.eval.evaluate import EvaluationRunConfig
 from nat.test.utils import locate_example_config
@@ -110,6 +109,8 @@ async def test_eval():
        a. the rag accuracy metric
        b. the trajectory score (if present)
     """
+    import nat_simple_web_query_eval
+
     # Get config dynamically
     config_file: Path = locate_example_config(nat_simple_web_query_eval, "eval_config.yml")
 

From c001a7d5389951a0b35402758145fefe8bb3e928 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Wed, 1 Oct 2025 09:31:54 -0700
Subject: [PATCH 12/15] Document setting max_concurrency to work-around rate
 limits

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 .../evaluation_and_profiling/simple_calculator_eval/README.md   | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/evaluation_and_profiling/simple_calculator_eval/README.md b/examples/evaluation_and_profiling/simple_calculator_eval/README.md
index cc3a60d8b..c7878f6ca 100644
--- a/examples/evaluation_and_profiling/simple_calculator_eval/README.md
+++ b/examples/evaluation_and_profiling/simple_calculator_eval/README.md
@@ -47,6 +47,8 @@ Install this evaluation example:
 uv pip install -e examples/evaluation_and_profiling/simple_calculator_eval
 ```
 
+> **Note**: If you encounter rate limiting (`[429] Too Many Requests`) during evaluation, try setting the `eval.general.max_concurrency` value either in the YAML directly or via the command line with: `--override eval.general.max_concurrency 1`.
+
 ## Run the Workflow
 
 ### Running Evaluation

From 1ea662bce398aa72f2a868e550ee1a8e60aa1f86 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Wed, 1 Oct 2025 09:32:57 -0700
Subject: [PATCH 13/15] Fix output dir

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 .../configs/config-tunable-rag-eval.yml                         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/evaluation_and_profiling/simple_calculator_eval/src/nat_simple_calculator_eval/configs/config-tunable-rag-eval.yml b/examples/evaluation_and_profiling/simple_calculator_eval/src/nat_simple_calculator_eval/configs/config-tunable-rag-eval.yml
index 6a512727f..0d485f666 100644
--- a/examples/evaluation_and_profiling/simple_calculator_eval/src/nat_simple_calculator_eval/configs/config-tunable-rag-eval.yml
+++ b/examples/evaluation_and_profiling/simple_calculator_eval/src/nat_simple_calculator_eval/configs/config-tunable-rag-eval.yml
@@ -72,7 +72,7 @@ workflow:
 
 eval:
   general:
-    output_dir: .tmp/nat/examples/getting_started/simple_web_query
+    output_dir: .tmp/nat/examples/getting_started/simple_calculator
     dataset:
       _type: json
       file_path: examples/getting_started/simple_calculator/data/simple_calculator.json

From b39060cb6f9f3fe28427b42c3241769e91577e20 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Wed, 1 Oct 2025 09:44:57 -0700
Subject: [PATCH 14/15] Move the validate_workflow_output method to be shared
 with other tests, add test for simple calculator eval

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 .../tests/test_simple_calculator_eval.py      | 79 +++++++++++++++++++
 .../tests/test_simple_web_query_eval.py       | 26 +-----
 .../nvidia_nat_test/src/nat/test/utils.py     | 26 ++++++
 3 files changed, 106 insertions(+), 25 deletions(-)
 create mode 100644 examples/evaluation_and_profiling/simple_calculator_eval/tests/test_simple_calculator_eval.py

diff --git a/examples/evaluation_and_profiling/simple_calculator_eval/tests/test_simple_calculator_eval.py b/examples/evaluation_and_profiling/simple_calculator_eval/tests/test_simple_calculator_eval.py
new file mode 100644
index 000000000..279316000
--- /dev/null
+++ b/examples/evaluation_and_profiling/simple_calculator_eval/tests/test_simple_calculator_eval.py
@@ -0,0 +1,79 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from pathlib import Path
+
+import pytest
+
+from nat.eval.evaluate import EvaluationRun
+from nat.eval.evaluate import EvaluationRunConfig
+from nat.test.utils import locate_example_config
+from nat.test.utils import validate_workflow_output
+
+logger = logging.getLogger(__name__)
+
+
+@pytest.mark.integration
+@pytest.mark.usefixtures("nvidia_api_key")
+async def test_eval():
+    """
+    1. nat-eval writes the workflow output to workflow_output.json
+    2. nat-eval creates a file with scores for each evaluation metric.
+    3. This test audits -
+       a. the rag accuracy metric
+       b. the trajectory score (if present)
+    """
+    import nat_simple_calculator_eval
+
+    # Get config dynamically
+    config_file: Path = locate_example_config(nat_simple_calculator_eval, "config-tunable-rag-eval.yml")
+
+    # Create the configuration object for running the evaluation, single rep using the eval config in eval_config.yml
+    # WIP: skip test if eval config is not present
+    config = EvaluationRunConfig(
+        config_file=config_file,
+        dataset=None,
+        result_json_path="$",
+        skip_workflow=False,
+        skip_completed_entries=False,
+        endpoint=None,
+        endpoint_timeout=30,
+        reps=1,
+        override=(('eval.general.max_concurrency', '1'), ),
+    )
+
+    # Run evaluation
+    eval_runner = EvaluationRun(config=config)
+    output = await eval_runner.run_and_evaluate()
+
+    # Ensure the workflow was not interrupted
+    assert not output.workflow_interrupted, "The workflow was interrupted"
+
+    # Look for the tuneable_eval_output file
+    tuneable_eval_output: Path | None = None
+
+    for output_file in output.evaluator_output_files:
+        assert output_file.exists()
+        output_file_str = str(output_file)
+        if "tuneable_eval_output" in output_file_str:
+            tuneable_eval_output = output_file
+
+    # Validate the workflow output
+    assert output.workflow_output_file, "The workflow_output.json file was not created"
+    validate_workflow_output(output.workflow_output_file)
+
+    # Verify that atleast one tuneable_eval_output file is present
+    assert tuneable_eval_output, "Expected output file does not exist"
diff --git a/examples/evaluation_and_profiling/simple_web_query_eval/tests/test_simple_web_query_eval.py b/examples/evaluation_and_profiling/simple_web_query_eval/tests/test_simple_web_query_eval.py
index 6e8beaa17..88ad08c03 100644
--- a/examples/evaluation_and_profiling/simple_web_query_eval/tests/test_simple_web_query_eval.py
+++ b/examples/evaluation_and_profiling/simple_web_query_eval/tests/test_simple_web_query_eval.py
@@ -22,35 +22,11 @@
 from nat.eval.evaluate import EvaluationRun
 from nat.eval.evaluate import EvaluationRunConfig
 from nat.test.utils import locate_example_config
+from nat.test.utils import validate_workflow_output
 
 logger = logging.getLogger(__name__)
 
 
-def validate_workflow_output(workflow_output_file: Path):
-    """
-    Validate the contents of the workflow output file.
-    WIP: output format should be published as a schema and this validation should be done against that schema.
-    """
-    # Ensure the workflow_output.json file was created
-    assert workflow_output_file.exists(), "The workflow_output.json file was not created"
-
-    # Read and validate the workflow_output.json file
-    try:
-        with open(workflow_output_file, encoding="utf-8") as f:
-            result_json = json.load(f)
-    except json.JSONDecodeError:
-        pytest.fail("Failed to parse workflow_output.json as valid JSON")
-
-    assert isinstance(result_json, list), "The workflow_output.json file is not a list"
-    assert len(result_json) > 0, "The workflow_output.json file is empty"
-    assert isinstance(result_json[0], dict), "The workflow_output.json file is not a list of dictionaries"
-
-    # Ensure required keys exist
-    required_keys = ["id", "question", "answer", "generated_answer", "intermediate_steps"]
-    for key in required_keys:
-        assert all(item.get(key) for item in result_json), f"The '{key}' key is missing in workflow_output.json"
-
-
 def validate_rag_accuracy(rag_metric_output_file: Path, score: float):
     """
     1. Validate the contents of the rag evaluator ouput file.
diff --git a/packages/nvidia_nat_test/src/nat/test/utils.py b/packages/nvidia_nat_test/src/nat/test/utils.py
index 56231fff9..1f38986c2 100644
--- a/packages/nvidia_nat_test/src/nat/test/utils.py
+++ b/packages/nvidia_nat_test/src/nat/test/utils.py
@@ -15,6 +15,7 @@
 
 import importlib.resources
 import inspect
+import json
 import subprocess
 import typing
 from pathlib import Path
@@ -86,3 +87,28 @@ async def run_workflow(
         assert expected_answer.lower() in result.lower(), f"Expected '{expected_answer}' in '{result}'"
 
     return result
+
+
+def validate_workflow_output(workflow_output_file: Path):
+    """
+    Validate the contents of the workflow output file.
+    WIP: output format should be published as a schema and this validation should be done against that schema.
+    """
+    # Ensure the workflow_output.json file was created
+    assert workflow_output_file.exists(), "The workflow_output.json file was not created"
+
+    # Read and validate the workflow_output.json file
+    try:
+        with open(workflow_output_file, encoding="utf-8") as f:
+            result_json = json.load(f)
+    except json.JSONDecodeError:
+        raise RuntimeError("Failed to parse workflow_output.json as valid JSON")
+
+    assert isinstance(result_json, list), "The workflow_output.json file is not a list"
+    assert len(result_json) > 0, "The workflow_output.json file is empty"
+    assert isinstance(result_json[0], dict), "The workflow_output.json file is not a list of dictionaries"
+
+    # Ensure required keys exist
+    required_keys = ["id", "question", "answer", "generated_answer", "intermediate_steps"]
+    for key in required_keys:
+        assert all(item.get(key) for item in result_json), f"The '{key}' key is missing in workflow_output.json"

From 31fed521d06a4a5c271bdf6aa671689ba7f1ee01 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Mon, 13 Oct 2025 11:25:24 -0700
Subject: [PATCH 15/15] Widen the exception for email_phishing_analyzer

Signed-off-by: David Gardner <dagardner@nvidia.com>
---
 ci/scripts/path_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/scripts/path_checks.py b/ci/scripts/path_checks.py
index 6f83ad320..ad4ec9ae4 100644
--- a/ci/scripts/path_checks.py
+++ b/ci/scripts/path_checks.py
@@ -49,7 +49,7 @@
         r"^examples/evaluation_and_profiling/simple_web_query_eval/data/langsmith.json",
     ),
     (
-        r"^examples/evaluation_and_profiling/email_phishing_analyzer/configs",
+        r"^examples/evaluation_and_profiling/email_phishing_analyzer/.*/configs",
         r"^examples/evaluation_and_profiling/email_phishing_analyzer/data",
     ),
     (