From 8b555f4ec3cdef77b981cf110e1ae90218ba6cc9 Mon Sep 17 00:00:00 2001
From: HubGab-Git <97609337+HubGab-Git@users.noreply.github.com>
Date: Sun, 29 Sep 2024 16:21:13 +0200
Subject: [PATCH] #4725: Change model deployment to JumpStart

---
 ...estion_answering_langchain_jumpstart.ipynb | 522 +++++++++++-------
 1 file changed, 335 insertions(+), 187 deletions(-)

diff --git a/introduction_to_amazon_algorithms/jumpstart-foundation-models/question_answering_retrieval_augmented_generation/question_answering_langchain_jumpstart.ipynb b/introduction_to_amazon_algorithms/jumpstart-foundation-models/question_answering_retrieval_augmented_generation/question_answering_langchain_jumpstart.ipynb
index a3edd26314..e613850a04 100644
--- a/introduction_to_amazon_algorithms/jumpstart-foundation-models/question_answering_retrieval_augmented_generation/question_answering_langchain_jumpstart.ipynb
+++ b/introduction_to_amazon_algorithms/jumpstart-foundation-models/question_answering_retrieval_augmented_generation/question_answering_langchain_jumpstart.ipynb
@@ -43,7 +43,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {
     "collapsed": false,
     "jupyter": {
@@ -57,65 +57,68 @@
    "outputs": [],
    "source": [
     "!pip install --upgrade sagemaker --quiet\n",
-    "!pip install ipywidgets==7.0.0 --quiet\n",
-    "!pip install langchain==0.0.148 --quiet\n",
-    "!pip install faiss-cpu --quiet"
+    "# !pip install ipywidgets==7.0.0 --quiet\n",
+    "# !pip install langchain==0.0.148 --quiet\n",
+    "# !pip install faiss-cpu --quiet"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
-    "import time\n",
-    "import sagemaker, boto3, json\n",
-    "from sagemaker.session import Session\n",
-    "from sagemaker.model import Model\n",
-    "from sagemaker import image_uris, model_uris, script_uris, hyperparameters\n",
-    "from sagemaker.predictor import Predictor\n",
+    "# import time\n",
+    "# import sagemaker, boto3, json\n",
+    "# from sagemaker.session import Session\n",
+    "# from sagemaker.model import Model\n",
+    "# from sagemaker import image_uris, model_uris, script_uris, hyperparameters\n",
+    "# from sagemaker.predictor import Predictor\n",
+    "# from sagemaker.utils import name_from_base\n",
+    "# from typing import Any, Dict, List, Optional\n",
+    "# from langchain.embeddings import SagemakerEndpointEmbeddings\n",
+    "# from langchain.llms.sagemaker_endpoint import ContentHandlerBase\n",
+    "from sagemaker import Session\n",
     "from sagemaker.utils import name_from_base\n",
-    "from typing import Any, Dict, List, Optional\n",
-    "from langchain.embeddings import SagemakerEndpointEmbeddings\n",
-    "from langchain.llms.sagemaker_endpoint import ContentHandlerBase\n",
+    "from sagemaker.jumpstart.model import JumpStartModel\n",
     "\n",
     "sagemaker_session = Session()\n",
-    "aws_role = sagemaker_session.get_caller_identity_arn()\n",
-    "aws_region = boto3.Session().region_name\n",
-    "sess = sagemaker.Session()\n",
-    "model_version = \"1.*\""
+    "# aws_role = sagemaker_session.get_caller_identity_arn()\n",
+    "# aws_region = boto3.Session().region_name\n",
+    "# sess = sagemaker.Session()\n",
+    "# model_version = \"1.*\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
-    "def query_endpoint_with_json_payload(encoded_json, endpoint_name, content_type=\"application/json\"):\n",
-    "    client = boto3.client(\"runtime.sagemaker\")\n",
-    "    response = client.invoke_endpoint(\n",
-    "        EndpointName=endpoint_name, ContentType=content_type, Body=encoded_json\n",
-    "    )\n",
-    "    return response\n",
-    "\n",
-    "\n",
-    "def parse_response_model_flan_t5(query_response):\n",
-    "    model_predictions = json.loads(query_response[\"Body\"].read())\n",
-    "    generated_text = model_predictions[\"generated_texts\"]\n",
-    "    return generated_text\n",
-    "\n",
-    "\n",
-    "def parse_response_multiple_texts_bloomz(query_response):\n",
-    "    generated_text = []\n",
-    "    model_predictions = json.loads(query_response[\"Body\"].read())\n",
-    "    for x in model_predictions[0]:\n",
-    "        generated_text.append(x[\"generated_text\"])\n",
-    "    return generated_text"
+    "# def query_endpoint_with_json_payload(encoded_json, endpoint_name, content_type=\"application/json\"):\n",
+    "#     client = boto3.client(\"runtime.sagemaker\")\n",
+    "#     response = client.invoke_endpoint(\n",
+    "#         EndpointName=endpoint_name, ContentType=content_type, Body=encoded_json\n",
+    "#     )\n",
+    "#     return response\n",
+    "\n",
+    "\n",
+    "# def parse_response_model_flan_t5(query_response):\n",
+    "#     model_predictions = json.loads(query_response[\"Body\"].read())\n",
+    "#     generated_text = model_predictions[\"generated_texts\"]\n",
+    "#     return generated_text\n",
+    "\n",
+    "\n",
+    "# def parse_response_multiple_texts_bloomz(query_response):\n",
+    "#     generated_text = []\n",
+    "#     model_predictions = json.loads(query_response[\"Body\"].read())\n",
+    "#     for x in model_predictions[0]:\n",
+    "#         generated_text.append(x[\"generated_text\"])\n",
+    "#     return generated_text"
    ]
   },
   {
@@ -127,7 +130,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {
     "tags": []
    },
@@ -135,74 +138,89 @@
    "source": [
     "_MODEL_CONFIG_ = {\n",
     "    \"huggingface-text2text-flan-t5-xxl\": {\n",
-    "        \"instance type\": \"ml.g5.12xlarge\",\n",
-    "        \"env\": {\"SAGEMAKER_MODEL_SERVER_WORKERS\": \"1\", \"TS_DEFAULT_WORKERS_PER_MODEL\": \"1\"},\n",
-    "        \"parse_function\": parse_response_model_flan_t5,\n",
-    "        \"prompt\": \"\"\"Answer based on context:\\n\\n{context}\\n\\n{question}\"\"\",\n",
+    "        \"model_version\": \"2.*\",\n",
+    "        \"instance type\": \"ml.g5.12xlarge\"\n",
     "    },\n",
-    "    \"huggingface-textembedding-gpt-j-6b\": {\n",
-    "        \"instance type\": \"ml.g5.24xlarge\",\n",
-    "        \"env\": {\"SAGEMAKER_MODEL_SERVER_WORKERS\": \"1\", \"TS_DEFAULT_WORKERS_PER_MODEL\": \"1\"},\n",
-    "    },\n",
-    "    # \"huggingface-textgeneration1-bloomz-7b1-fp16\": {\n",
-    "    #     \"instance type\": \"ml.g5.12xlarge\",\n",
-    "    #     \"env\": {},\n",
-    "    #     \"parse_function\": parse_response_multiple_texts_bloomz,\n",
-    "    #     \"prompt\": \"\"\"question: \\\"{question}\"\\\\n\\nContext: \\\"{context}\"\\\\n\\nAnswer:\"\"\",\n",
+    "    # Got error: \"DeprecatedJumpStartModelError:\n",
+    "    # This model is no longer available. Please try another model.\"\n",
+    "    # for GPT-J Emedings models at the time of testing it\n",
+    "    # \"huggingface-textembedding-gpt-j-6b\": {\n",
+    "    #     \"model_version\": \"1.*\",\n",
+    "    #     \"instance type\": \"ml.g5.24xlarge\"\n",
     "    # },\n",
-    "    # \"huggingface-text2text-flan-ul2-bf16\": {\n",
-    "    #     \"instance type\": \"ml.g5.24xlarge\",\n",
-    "    #     \"env\": {\n",
-    "    #         \"SAGEMAKER_MODEL_SERVER_WORKERS\": \"1\",\n",
-    "    #         \"TS_DEFAULT_WORKERS_PER_MODEL\": \"1\"\n",
-    "    #     },\n",
-    "    #     \"parse_function\": parse_response_model_flan_t5,\n",
-    "    #     \"prompt\": \"\"\"Answer based on context:\\n\\n{context}\\n\\n{question}\"\"\",\n",
+    "    \"huggingface-textembedding-all-MiniLM-L6-v2\": {\n",
+    "        \"model_version\": \"1.*\",\n",
+    "        \"instance type\": \"ml.g5.24xlarge\"\n",
+    "    }\n",
+    "    # \"huggingface-textembedding-all-MiniLM-L6-v2\": {\n",
+    "    #     \"model_version\": \"3.*\",\n",
+    "    #     \"instance type\": \"ml.g5.12xlarge\"\n",
     "    # },\n",
+    "    # \"huggingface-text2text-flan-ul2-bf16\": {\n",
+    "    #     \"model_version\": \"2.*\",\n",
+    "    #     \"instance type\": \"ml.g5.24xlarge\"\n",
+    "    # }\n",
     "}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Deploying huggingface-text2text-flan-t5-xxl...\n",
+      "---------!"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using model 'huggingface-textembedding-all-MiniLM-L6-v2' with wildcard version identifier '1.*'. You can pin to version '1.0.0' for more stable results. Note that models may have different input/output signatures after a major version upgrade.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Deployed endpoint: jumpstart-example-raglc-huggingface-tex-2024-09-29-13-08-36-631\n",
+      "Deploying huggingface-textembedding-all-MiniLM-L6-v2...\n",
+      "---------!Deployed endpoint: jumpstart-example-raglc-huggingface-tex-2024-09-29-13-13-39-117\n",
+      "Deployment process completed.\n"
+     ]
+    }
+   ],
    "source": [
-    "newline, bold, unbold = \"\\n\", \"\\033[1m\", \"\\033[0m\"\n",
-    "\n",
     "for model_id in _MODEL_CONFIG_:\n",
-    "    endpoint_name = name_from_base(f\"jumpstart-example-raglc-{model_id}\")\n",
-    "    inference_instance_type = _MODEL_CONFIG_[model_id][\"instance type\"]\n",
-    "\n",
-    "    # Retrieve the inference container uri. This is the base HuggingFace container image for the default model above.\n",
-    "    deploy_image_uri = image_uris.retrieve(\n",
-    "        region=None,\n",
-    "        framework=None,  # automatically inferred from model_id\n",
-    "        image_scope=\"inference\",\n",
+    "    endpoint_name = name_from_base(f'jumpstart-example-raglc-{model_id}')\n",
+    "    inference_instance_type = _MODEL_CONFIG_[model_id]['instance type']\n",
+    "    model_version = _MODEL_CONFIG_[model_id]['model_version']\n",
+    "\n",
+    "    print(f'Deploying {model_id}...')\n",
+    "\n",
+    "    model = JumpStartModel(\n",
     "        model_id=model_id,\n",
-    "        model_version=model_version,\n",
-    "        instance_type=inference_instance_type,\n",
-    "    )\n",
-    "    # Retrieve the model uri.\n",
-    "    model_uri = model_uris.retrieve(\n",
-    "        model_id=model_id, model_version=model_version, model_scope=\"inference\"\n",
+    "        model_version=model_version\n",
     "    )\n",
-    "    model_inference = Model(\n",
-    "        image_uri=deploy_image_uri,\n",
-    "        model_data=model_uri,\n",
-    "        role=aws_role,\n",
-    "        predictor_cls=Predictor,\n",
-    "        name=endpoint_name,\n",
-    "        env=_MODEL_CONFIG_[model_id][\"env\"],\n",
-    "    )\n",
-    "    model_predictor_inference = model_inference.deploy(\n",
-    "        initial_instance_count=1,\n",
-    "        instance_type=inference_instance_type,\n",
-    "        predictor_cls=Predictor,\n",
-    "        endpoint_name=endpoint_name,\n",
-    "    )\n",
-    "    print(f\"{bold}Model {model_id} has been deployed successfully.{unbold}{newline}\")\n",
-    "    _MODEL_CONFIG_[model_id][\"endpoint_name\"] = endpoint_name"
+    "\n",
+    "    try:\n",
+    "        predictor = model.deploy(\n",
+    "            initial_instance_count=1,\n",
+    "            instance_type=inference_instance_type,\n",
+    "            endpoint_name=name_from_base(\n",
+    "                f\"jumpstart-example-raglc-{model_id}\"\n",
+    "            )\n",
+    "        )\n",
+    "        print(f\"Deployed endpoint: {predictor.endpoint_name}\")\n",
+    "        _MODEL_CONFIG_[model_id]['predictor'] = predictor\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error deploying {model_id}: {str(e)}\")\n",
+    "\n",
+    "print(\"Deployment process completed.\")"
    ]
   },
   {
@@ -216,7 +234,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -225,30 +243,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For model: huggingface-text2text-flan-t5-xxl, the generated output is:\n",
+      "\n",
+      "ARM 64-bit instances of the SageMaker server.\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
-    "payload = {\n",
-    "    \"text_inputs\": question,\n",
-    "    \"max_length\": 100,\n",
-    "    \"num_return_sequences\": 1,\n",
-    "    \"top_k\": 50,\n",
-    "    \"top_p\": 0.95,\n",
-    "    \"do_sample\": True,\n",
-    "}\n",
-    "\n",
     "list_of_LLMs = list(_MODEL_CONFIG_.keys())\n",
-    "list_of_LLMs.remove(\"huggingface-textembedding-gpt-j-6b\")  # remove the embedding model\n",
-    "\n",
+    "list_of_LLMs = [model for model in list_of_LLMs if \"textembedding\" not in model]\n",
     "\n",
     "for model_id in list_of_LLMs:\n",
-    "    endpoint_name = _MODEL_CONFIG_[model_id][\"endpoint_name\"]\n",
-    "    query_response = query_endpoint_with_json_payload(\n",
-    "        json.dumps(payload).encode(\"utf-8\"), endpoint_name=endpoint_name\n",
-    "    )\n",
-    "    generated_texts = _MODEL_CONFIG_[model_id][\"parse_function\"](query_response)\n",
-    "    print(f\"For model: {model_id}, the generated output is: {generated_texts[0]}\\n\")"
+    "    predictor = _MODEL_CONFIG_[model_id][\"predictor\"]\n",
+    "    response = predictor.predict({\n",
+    "        \"inputs\": question\n",
+    "    })\n",
+    "    print(f\"For model: {model_id}, the generated output is:\\n\")\n",
+    "    print(f\"{response[0]['generated_text']}\\n\")"
    ]
   },
   {
@@ -270,7 +289,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -279,35 +298,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For model: huggingface-text2text-flan-t5-xxl, the generated output is:\n",
+      "\n",
+      "all instances\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
-    "parameters = {\n",
-    "    \"max_length\": 200,\n",
-    "    \"num_return_sequences\": 1,\n",
-    "    \"top_k\": 250,\n",
-    "    \"top_p\": 0.95,\n",
-    "    \"do_sample\": False,\n",
-    "    \"temperature\": 1,\n",
-    "}\n",
-    "\n",
-    "for model_id in list_of_LLMs:\n",
-    "    endpoint_name = _MODEL_CONFIG_[model_id][\"endpoint_name\"]\n",
+    "# parameters = {\n",
+    "#     \"max_length\": 200,\n",
+    "#     \"num_return_sequences\": 1,\n",
+    "#     \"top_k\": 250,\n",
+    "#     \"top_p\": 0.95,\n",
+    "#     \"do_sample\": False,\n",
+    "#     \"temperature\": 1,\n",
+    "# }\n",
     "\n",
-    "    prompt = _MODEL_CONFIG_[model_id][\"prompt\"]\n",
+    "prompt = f'Answer based on context:\\n\\n{context}\\n\\n{question}'\n",
     "\n",
-    "    text_input = prompt.replace(\"{context}\", context)\n",
-    "    text_input = text_input.replace(\"{question}\", question)\n",
-    "    payload = {\"text_inputs\": text_input, **parameters}\n",
-    "\n",
-    "    query_response = query_endpoint_with_json_payload(\n",
-    "        json.dumps(payload).encode(\"utf-8\"), endpoint_name=endpoint_name\n",
-    "    )\n",
-    "    generated_texts = _MODEL_CONFIG_[model_id][\"parse_function\"](query_response)\n",
-    "    print(\n",
-    "        f\"{bold}For model: {model_id}, the generated output is: {generated_texts[0]}{unbold}{newline}\"\n",
-    "    )"
+    "for model_id in list_of_LLMs:\n",
+    "    predictor = _MODEL_CONFIG_[model_id][\"predictor\"]\n",
+    "    response = predictor.predict({\n",
+    "        \"inputs\": prompt\n",
+    "    })\n",
+    "    print(f\"For model: {model_id}, the generated output is:\\n\")\n",
+    "    print(f\"{response[0]['generated_text']}\\n\")"
    ]
   },
   {
@@ -358,7 +381,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
    "metadata": {
     "tags": []
    },
@@ -405,9 +428,12 @@
     "\n",
     "\n",
     "content_handler = ContentHandler()\n",
+    "endpoint_name=_MODEL_CONFIG_[\n",
+    "        \"huggingface-textembedding-all-MiniLM-L6-v2\"\n",
+    "    ][\"predictor\"].endpoint_name\n",
     "\n",
     "embeddings = SagemakerEndpointEmbeddingsJumpStart(\n",
-    "    endpoint_name=_MODEL_CONFIG_[\"huggingface-textembedding-gpt-j-6b\"][\"endpoint_name\"],\n",
+    "    endpoint_name=endpoint_name,\n",
     "    region_name=aws_region,\n",
     "    content_handler=content_handler,\n",
     ")"
@@ -422,39 +448,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 50,
    "metadata": {},
    "outputs": [],
    "source": [
     "from langchain.llms.sagemaker_endpoint import LLMContentHandler, SagemakerEndpoint\n",
     "\n",
-    "parameters = {\n",
-    "    \"max_length\": 200,\n",
-    "    \"num_return_sequences\": 1,\n",
-    "    \"top_k\": 250,\n",
-    "    \"top_p\": 0.95,\n",
-    "    \"do_sample\": False,\n",
-    "    \"temperature\": 1,\n",
-    "}\n",
-    "\n",
-    "\n",
     "class ContentHandler(LLMContentHandler):\n",
     "    content_type = \"application/json\"\n",
     "    accepts = \"application/json\"\n",
     "\n",
     "    def transform_input(self, prompt: str, model_kwargs={}) -> bytes:\n",
-    "        input_str = json.dumps({\"text_inputs\": prompt, **model_kwargs})\n",
+    "        input_str = json.dumps({\"inputs\": prompt, **model_kwargs})\n",
     "        return input_str.encode(\"utf-8\")\n",
     "\n",
     "    def transform_output(self, output: bytes) -> str:\n",
     "        response_json = json.loads(output.read().decode(\"utf-8\"))\n",
-    "        return response_json[\"generated_texts\"][0]\n",
+    "        return response_json[0][\"generated_text\"]\n",
     "\n",
     "\n",
     "content_handler = ContentHandler()\n",
+    "endpoint_name=_MODEL_CONFIG_[\n",
+    "        \"huggingface-text2text-flan-t5-xxl\"\n",
+    "    ][\"predictor\"].endpoint_name\n",
+    "\n",
     "\n",
     "sm_llm = SagemakerEndpoint(\n",
-    "    endpoint_name=_MODEL_CONFIG_[\"huggingface-text2text-flan-t5-xxl\"][\"endpoint_name\"],\n",
+    "    endpoint_name=endpoint_name,\n",
     "    region_name=aws_region,\n",
     "    model_kwargs=parameters,\n",
     "    content_handler=content_handler,\n",
@@ -472,11 +492,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {
     "tags": []
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "download: s3://jumpstart-cache-prod-us-east-2/training-datasets/Amazon_SageMaker_FAQs/Amazon_SageMaker_FAQs.csv to rag_data/Amazon_SageMaker_FAQs.csv\n"
+     ]
+    }
+   ],
    "source": [
     "original_data = \"s3://jumpstart-cache-prod-us-east-2/training-datasets/Amazon_SageMaker_FAQs/\"\n",
     "\n",
@@ -493,7 +521,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {
     "tags": []
    },
@@ -521,7 +549,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "metadata": {
     "tags": []
    },
@@ -532,18 +560,81 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {
     "tags": []
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Answer</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Amazon SageMaker is a fully managed service to...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>For a list of the supported Amazon SageMaker A...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Amazon SageMaker is designed for high availabi...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Amazon SageMaker stores code in ML storage vol...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Amazon SageMaker ensures that ML model artifac...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                              Answer\n",
+       "0  Amazon SageMaker is a fully managed service to...\n",
+       "1  For a list of the supported Amazon SageMaker A...\n",
+       "2  Amazon SageMaker is designed for high availabi...\n",
+       "3  Amazon SageMaker stores code in ML storage vol...\n",
+       "4  Amazon SageMaker ensures that ML model artifac..."
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "df_knowledge.head(5)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "metadata": {
     "tags": []
    },
@@ -554,7 +645,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "metadata": {
     "tags": []
    },
@@ -580,7 +671,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 36,
    "metadata": {
     "tags": []
    },
@@ -591,7 +682,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
    "metadata": {
     "tags": []
    },
@@ -619,16 +710,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 38,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Which instances can I use with Managed Spot Training in SageMaker?'"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "question"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -641,7 +743,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -650,9 +752,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 51,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'all instances supported in Amazon SageMaker'"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "index.query(question=question, llm=sm_llm)"
    ]
@@ -675,7 +788,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 52,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -684,9 +797,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 53,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Which instances can I use with Managed Spot Training in SageMaker?'"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "question"
    ]
@@ -700,7 +824,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 54,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -716,9 +840,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 55,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(page_content='Amazon SageMaker is a fully managed service to prepare data and build, train, and deploy machine learning (ML) models for any use case with fully managed infrastructure, tools, and workflows.: Managed Spot Training is ideal when you have flexibility with your training runs and when you want to minimize the cost of your training jobs. With Managed Spot Training, you can reduce the cost of training your ML models by up to 90%.', metadata={'source': 'rag_data/processed_data.csv', 'row': 85}),\n",
+       " Document(page_content='Amazon SageMaker is a fully managed service to prepare data and build, train, and deploy machine learning (ML) models for any use case with fully managed infrastructure, tools, and workflows.: Managed Spot Training can be used with all instances supported in Amazon SageMaker.', metadata={'source': 'rag_data/processed_data.csv', 'row': 89}),\n",
+       " Document(page_content='Amazon SageMaker is a fully managed service to prepare data and build, train, and deploy machine learning (ML) models for any use case with fully managed infrastructure, tools, and workflows.: Managed Spot Training is supported in all AWS Regions where Amazon SageMaker is currently available.', metadata={'source': 'rag_data/processed_data.csv', 'row': 90})]"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "docs"
    ]
@@ -734,7 +871,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 56,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -745,7 +882,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 57,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -761,7 +898,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 58,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -779,9 +916,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 59,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'all instances supported in Amazon SageMaker'"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "result"
    ]
@@ -1384,9 +1532,9 @@
   ],
   "instance_type": "ml.t3.medium",
   "kernelspec": {
-   "display_name": "Python 3 (Data Science 2.0)",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-data-science-38"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1398,7 +1546,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,