diff --git a/notebooks/02_semantic_search.ipynb b/notebooks/02_semantic_search.ipynb index f070dea..b43e1d7 100644 --- a/notebooks/02_semantic_search.ipynb +++ b/notebooks/02_semantic_search.ipynb @@ -1,7274 +1,7757 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "9b088054-daa2-4dbe-a71c-3a3dd58ab3b5", - "metadata": {}, - "source": [ - "# Part 1: Setting up a basic semantic search system" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "aGixSoAgIzLo", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "aGixSoAgIzLo", - "outputId": "cfcad5a5-3d39-4b79-a0ef-c33107ad1972" - }, - "outputs": [ + "cells": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Successfully installed datasets-2.18.0 dill-0.3.8 h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 multiprocess-0.70.16 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.19.3 nvidia-nvjitlink-cu12-12.4.127 nvidia-nvtx-cu12-12.1.105 openai-1.17.0 pinecone-client-3.2.2 sentence-transformers-2.6.1 tiktoken-0.6.0 xxhash-3.4.1\n" - ] - } - ], - "source": [ - "!pip install pinecone-client openai sentence-transformers tiktoken datasets" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "c12182ab", - "metadata": { - "id": "c12182ab" - }, - "outputs": [], - "source": [ - "from openai import OpenAI\n", - "from datetime import datetime\n", - "import hashlib\n", - "import re\n", - "import os\n", - "from sentence_transformers import CrossEncoder\n", - "\n", - "\n", - "from tqdm import tqdm\n", - "import numpy as np\n", - "from torch import nn\n", - "\n", - "import logging\n", - "from pinecone import Pinecone, ServerlessSpec\n", - "\n", - "logger = logging.getLogger()\n", - "logger.setLevel(logging.CRITICAL)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "f0289c54", - "metadata": { - "id": "f0289c54" - }, - "outputs": [], - "source": [ - "pinecone_key = os.environ.get('PINECONE_API_KEY')\n", - "client = OpenAI(\n", - " api_key=os.environ.get(\"OPENAI_API_KEY\")\n", - ")\n", - "\n", - "INDEX_NAME = 'semantic-search-test'\n", - "NAMESPACE = 'default'\n", - "ENGINE = 'text-embedding-3-large' # has vector size 3072\n", - "\n", - "pc = Pinecone(\n", - " api_key=pinecone_key\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "3cf993a3-6d51-49f4-8968-60f654d6202d", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "3cf993a3-6d51-49f4-8968-60f654d6202d", - "outputId": "12a29199-9a84-4855-f07f-7979b15a718b" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(3072, 2)" + "cell_type": "markdown", + "id": "9b088054-daa2-4dbe-a71c-3a3dd58ab3b5", + "metadata": { + "id": "9b088054-daa2-4dbe-a71c-3a3dd58ab3b5" + }, + "source": [ + "# Part 1: Setting up a basic semantic search system" ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# helper functions to get lists of embeddings from the OpenAI API\n", - "def get_embeddings(texts, engine=ENGINE):\n", - " response = client.embeddings.create(\n", - " input=texts,\n", - " model=engine\n", - " )\n", - "\n", - " return [d.embedding for d in list(response.data)]\n", - "\n", - "def get_embedding(text, engine=ENGINE):\n", - " return get_embeddings([text], engine)[0]\n", - "\n", - "len(get_embedding('hi')), len(get_embeddings(['hi', 'hello']))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ea70672a", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ea70672a", - "outputId": "7ba237ca-f93f-4a8b-83a1-5133a33b3080" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating index semantic-search-test\n" - ] }, { - "data": { - "text/plain": [ - "" + "cell_type": "markdown", + "id": "722572bc", + "metadata": {}, + "source": [ + "A big thank you to [David on Github](https://github.com/gypsydave5) for finding a bug in my analysis code and bringing it to my attention. The bug was resolved! \n", + "\n", + "Some numbers might be different from what is reported in the book/video course but the overall gist is the same: re-ranking helps our semantic search and fine-tuning the re-ranking cross encoder yielded even better results." ] - }, - "execution_count": 160, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "if INDEX_NAME not in pc.list_indexes().names():\n", - " print(f'Creating index {INDEX_NAME}')\n", - " pc.create_index(\n", - " name=INDEX_NAME, # The name of the index\n", - " dimension=3072, # The dimensionality of the vectors for our OpenAI embedder\n", - " metric='cosine', # The similarity metric to use when searching the index\n", - " spec=ServerlessSpec(\n", - " cloud='aws',\n", - " region='us-west-2'\n", - " )\n", - " )\n", - "\n", - "# Store the index as a variable\n", - "index = pc.Index(name=INDEX_NAME)\n", - "index" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b6103d6c", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "b6103d6c", - "outputId": "549365a0-a4ee-4dff-b20e-8d2e07c97e13" - }, - "outputs": [ { - "data": { - "text/plain": [ - "{'dimension': 3072,\n", - " 'index_fullness': 0.0,\n", - " 'namespaces': {},\n", - " 'total_vector_count': 0}" + "cell_type": "code", + "execution_count": 1, + "id": "aGixSoAgIzLo", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "aGixSoAgIzLo", + "outputId": "ba5a2fd2-f4ad-4723-d499-5824bd94d28a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pinecone-client in /usr/local/lib/python3.11/dist-packages (5.0.1)\n", + "Requirement already satisfied: openai in /usr/local/lib/python3.11/dist-packages (1.61.1)\n", + "Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.11/dist-packages (3.4.1)\n", + "Requirement already satisfied: tiktoken in /usr/local/lib/python3.11/dist-packages (0.8.0)\n", + "Requirement already satisfied: datasets in /usr/local/lib/python3.11/dist-packages (3.2.0)\n", + "Requirement already satisfied: certifi>=2019.11.17 in /usr/local/lib/python3.11/dist-packages (from pinecone-client) (2025.1.31)\n", + "Requirement already satisfied: pinecone-plugin-inference<2.0.0,>=1.0.3 in /usr/local/lib/python3.11/dist-packages (from pinecone-client) (1.1.0)\n", + "Requirement already satisfied: pinecone-plugin-interface<0.0.8,>=0.0.7 in /usr/local/lib/python3.11/dist-packages (from pinecone-client) (0.0.7)\n", + "Requirement already satisfied: tqdm>=4.64.1 in /usr/local/lib/python3.11/dist-packages (from pinecone-client) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4 in /usr/local/lib/python3.11/dist-packages (from pinecone-client) (4.12.2)\n", + "Requirement already satisfied: urllib3>=1.26.0 in /usr/local/lib/python3.11/dist-packages (from pinecone-client) (2.3.0)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.11/dist-packages (from openai) (3.7.1)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from openai) (0.28.1)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai) (0.8.2)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.11/dist-packages (from openai) (2.10.6)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai) (1.3.1)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.41.0 in /usr/local/lib/python3.11/dist-packages (from sentence-transformers) (4.48.2)\n", + "Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.11/dist-packages (from sentence-transformers) (2.5.1+cu124)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (from sentence-transformers) (1.6.1)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from sentence-transformers) (1.13.1)\n", + "Requirement already satisfied: huggingface-hub>=0.20.0 in /usr/local/lib/python3.11/dist-packages (from sentence-transformers) (0.28.1)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.11/dist-packages (from sentence-transformers) (11.1.0)\n", + "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.11/dist-packages (from tiktoken) (2024.11.6)\n", + "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.11/dist-packages (from tiktoken) (2.32.3)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets) (3.17.0)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (1.26.4)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (17.0.0)\n", + "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.3.8)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets) (2.2.2)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets) (3.5.0)\n", + "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.70.16)\n", + "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.9.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets) (3.11.11)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets) (24.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets) (6.0.2)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/dist-packages (from anyio<5,>=3.5.0->openai) (3.10)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (25.1.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.18.3)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->openai) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3,>=1.9.0->openai) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3,>=1.9.0->openai) (2.27.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->tiktoken) (3.4.1)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (3.1.5)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.4.127)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.4.5.8)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (11.2.1.3)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (10.3.5.147)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (11.6.1.9)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.3.1.170)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.4.127)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.4.127)\n", + "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (3.1.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=1.11.0->sentence-transformers) (1.3.0)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers) (0.21.0)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers) (0.5.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2025.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2025.1)\n", + "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->sentence-transformers) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->sentence-transformers) (3.5.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=1.11.0->sentence-transformers) (3.0.2)\n" + ] + } + ], + "source": [ + "!pip install pinecone-client openai sentence-transformers tiktoken datasets" ] - }, - "execution_count": 161, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "index.describe_index_stats()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "7f2fdfe7", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 }, - "id": "7f2fdfe7", - "outputId": "4c41491f-5ce8-415e-f792-e149e470b267" - }, - "outputs": [ { - "data": { - "text/plain": [ - "'ae76cc4dfd345ecaeea9b8ba0d5c3437'" + "cell_type": "code", + "execution_count": 2, + "id": "c12182ab", + "metadata": { + "id": "c12182ab" + }, + "outputs": [], + "source": [ + "from openai import OpenAI\n", + "from datetime import datetime\n", + "import hashlib\n", + "import re\n", + "import os\n", + "from sentence_transformers import CrossEncoder\n", + "\n", + "\n", + "from tqdm import tqdm\n", + "import numpy as np\n", + "from torch import nn\n", + "\n", + "import logging\n", + "from pinecone import Pinecone, ServerlessSpec\n", + "\n", + "logger = logging.getLogger()\n", + "logger.setLevel(logging.CRITICAL)\n" ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def my_hash(s):\n", - " # Return the MD5 hash of the input string as a hexadecimal string\n", - " return hashlib.md5(s.encode()).hexdigest()\n", - "\n", - "my_hash('I love to hash it')" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "ecd86f51", - "metadata": { - "id": "ecd86f51" - }, - "outputs": [], - "source": [ - "def prepare_for_pinecone(texts, engine=ENGINE):\n", - " # Get the current UTC date and time\n", - " now = datetime.utcnow()\n", - "\n", - " # Generate vector embeddings for each string in the input list, using the specified engine\n", - " embeddings = get_embeddings(texts, engine=engine)\n", - "\n", - " # Create tuples of (hash, embedding, metadata) for each input string and its corresponding vector embedding\n", - " # The my_hash() function is used to generate a unique hash for each string, and the datetime.utcnow() function is used to generate the current UTC date and time\n", - " return [\n", - " (\n", - " my_hash(text), # A unique ID for each string, generated using the my_hash() function\n", - " embedding, # The vector embedding of the string\n", - " dict(text=text, date_uploaded=now) # A dictionary of metadata, including the original text and the current UTC date and time\n", - " )\n", - " for text, embedding in zip(texts, embeddings) # Iterate over each input string and its corresponding vector embedding\n", - " ]\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "4c40d99a", - "metadata": { - "id": "4c40d99a" - }, - "outputs": [], - "source": [ - "texts = ['hi']" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "3e1b73f3", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "3e1b73f3", - "outputId": "eeba3b54-c2e5-481c-f340-3e3802c79b38" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ID: 49f68a5c8493ec2c0bf489821c21fc3b \n", - "LEN: 3072 \n", - "META: {'text': 'hi', 'date_uploaded': datetime.datetime(2024, 7, 1, 16, 33, 16, 847362)}\n" - ] - } - ], - "source": [ - "_id, embedding, metadata = prepare_for_pinecone(texts)[0]\n", - "\n", - "print('ID: ',_id, '\\nLEN: ', len(embedding), '\\nMETA:', metadata)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b49debd5", - "metadata": { - "id": "b49debd5" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "bf47aabd", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "bf47aabd", - "outputId": "0cdc3643-d204-4d4f-9ed4-3f632a4328fb" - }, - "outputs": [], - "source": [ - "def upload_texts_to_pinecone(texts, namespace=NAMESPACE, batch_size=None, show_progress_bar=False):\n", - " # Call the prepare_for_pinecone function to prepare the input texts for indexing\n", - " total_upserted = 0\n", - " if not batch_size:\n", - " batch_size = len(texts)\n", - "\n", - " _range = range(0, len(texts), batch_size)\n", - " for i in tqdm(_range) if show_progress_bar else _range:\n", - " batch = texts[i: i + batch_size]\n", - " prepared_texts = prepare_for_pinecone(batch)\n", - "\n", - " # Use the upsert() method of the index object to upload the prepared texts to Pinecone\n", - " total_upserted += index.upsert(\n", - " vectors=prepared_texts,\n", - " namespace=namespace\n", - " )['upserted_count']\n", - "\n", - "\n", - " return total_upserted\n", - "\n", - "# Call the upload_texts_to_pinecone() function with the input texts\n", - "upload_texts_to_pinecone(texts)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "V0XI6RAom-Ln", - "metadata": { - "id": "V0XI6RAom-Ln" - }, - "outputs": [], - "source": [ - "def query_from_pinecone(query, top_k=3, include_metadata=True):\n", - " # get embedding from THE SAME embedder as the documents\n", - " query_embedding = get_embedding(query, engine=ENGINE)\n", - "\n", - " return index.query(\n", - " vector=query_embedding,\n", - " top_k=top_k,\n", - " namespace=NAMESPACE,\n", - " include_metadata=include_metadata # gets the metadata (dates, text, etc)\n", - " ).get('matches')" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "84a0871f", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "84a0871f", - "outputId": "cf32ed08-348f-4be0-a647-cf0546ca0c2d" - }, - "outputs": [], - "source": [ - "def delete_texts_from_pinecone(texts, namespace=NAMESPACE):\n", - " # Compute the hash (id) for each text\n", - " hashes = [hashlib.md5(text.encode()).hexdigest() for text in texts]\n", - "\n", - " # The ids parameter is used to specify the list of IDs (hashes) to delete\n", - " return index.delete(ids=hashes, namespace=namespace)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a6a5172f-a0c6-4692-ab77-83321e141679", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "4523cab6-0bc3-4791-9c73-7bd7d7e5858b", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "4523cab6-0bc3-4791-9c73-7bd7d7e5858b", - "outputId": "768dff70-122a-458d-b26b-ef915dee80e5" - }, - "outputs": [ { - "data": { - "text/plain": [ - "DatasetDict({\n", - " test: Dataset({\n", - " features: ['id', 'title', 'context', 'question', 'answers'],\n", - " num_rows: 1148\n", - " })\n", - " train: Dataset({\n", - " features: ['id', 'title', 'context', 'question', 'answers'],\n", - " num_rows: 11590\n", - " })\n", - "})" + "cell_type": "code", + "execution_count": 4, + "id": "f0289c54", + "metadata": { + "id": "f0289c54" + }, + "outputs": [], + "source": [ + "pinecone_key = os.environ.get('PINECONE_API_KEY')\n", + "client = OpenAI(\n", + " api_key=os.environ.get(\"OPENAI_API_KEY\")\n", + ")\n", + "\n", + "INDEX_NAME = 'semantic-search-test'\n", + "NAMESPACE = 'default'\n", + "ENGINE = 'text-embedding-3-large' # has vector size 3072\n", + "\n", + "pc = Pinecone(\n", + " api_key=pinecone_key\n", + ")" ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datasets import load_dataset\n", - "\n", - "dataset = load_dataset(\"xtreme\", \"MLQA.en.en\")\n", - "\n", - "# rename test -> train and val -> test (as we will use it in later in this chapter)\n", - "dataset['train'] = dataset['test']\n", - "dataset['test'] = dataset['validation']\n", - "del dataset['validation']\n", - "\n", - "dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "e622fbfa-2dde-4711-9c46-1390eb3430f9", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "e622fbfa-2dde-4711-9c46-1390eb3430f9", - "outputId": "8e7d6133-76ce-493a-963f-f29e05c6e0ba" - }, - "outputs": [ { - "data": { - "text/plain": [ - "({'id': 'a4968ca8a18de16aa3859be760e43dbd3af3fce9',\n", - " 'title': 'Area 51',\n", - " 'context': 'In 1994, five unnamed civilian contractors and the widows of contractors Walter Kasza and Robert Frost sued the USAF and the United States Environmental Protection Agency. Their suit, in which they were represented by George Washington University law professor Jonathan Turley, alleged they had been present when large quantities of unknown chemicals had been burned in open pits and trenches at Groom. Biopsies taken from the complainants were analyzed by Rutgers University biochemists, who found high levels of dioxin, dibenzofuran, and trichloroethylene in their body fat. The complainants alleged they had sustained skin, liver, and respiratory injuries due to their work at Groom, and that this had contributed to the deaths of Frost and Kasza. The suit sought compensation for the injuries they had sustained, claiming the USAF had illegally handled toxic materials, and that the EPA had failed in its duty to enforce the Resource Conservation and Recovery Act (which governs handling of dangerous materials). They also sought detailed information about the chemicals to which they were allegedly exposed, hoping this would facilitate the medical treatment of survivors. Congressman Lee H. Hamilton, former chairman of the House Intelligence Committee, told 60 Minutes reporter Lesley Stahl, \"The Air Force is classifying all information about Area 51 in order to protect themselves from a lawsuit.\"',\n", - " 'question': 'Who analyzed the biopsies?',\n", - " 'answers': {'answer_start': [457],\n", - " 'text': ['Rutgers University biochemists']}},\n", - " {'id': 'f251ea56c4f1aa1df270137f7e6d89c0cc1b6ef4',\n", - " 'title': 'Area 51',\n", - " 'context': 'In 1994, five unnamed civilian contractors and the widows of contractors Walter Kasza and Robert Frost sued the USAF and the United States Environmental Protection Agency. Their suit, in which they were represented by George Washington University law professor Jonathan Turley, alleged they had been present when large quantities of unknown chemicals had been burned in open pits and trenches at Groom. Biopsies taken from the complainants were analyzed by Rutgers University biochemists, who found high levels of dioxin, dibenzofuran, and trichloroethylene in their body fat. The complainants alleged they had sustained skin, liver, and respiratory injuries due to their work at Groom, and that this had contributed to the deaths of Frost and Kasza. The suit sought compensation for the injuries they had sustained, claiming the USAF had illegally handled toxic materials, and that the EPA had failed in its duty to enforce the Resource Conservation and Recovery Act (which governs handling of dangerous materials). They also sought detailed information about the chemicals to which they were allegedly exposed, hoping this would facilitate the medical treatment of survivors. Congressman Lee H. Hamilton, former chairman of the House Intelligence Committee, told 60 Minutes reporter Lesley Stahl, \"The Air Force is classifying all information about Area 51 in order to protect themselves from a lawsuit.\"',\n", - " 'question': 'who represented robert frost and walter kasza in their suit?',\n", - " 'answers': {'answer_start': [218],\n", - " 'text': ['George Washington University law professor Jonathan Turley']}})" + "cell_type": "code", + "execution_count": 5, + "id": "3cf993a3-6d51-49f4-8968-60f654d6202d", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3cf993a3-6d51-49f4-8968-60f654d6202d", + "outputId": "26e4b864-5bc3-4fd5-d250-1eba18b3b7f4" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(3072, 2)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# helper functions to get lists of embeddings from the OpenAI API\n", + "def get_embeddings(texts, engine=ENGINE):\n", + " response = client.embeddings.create(\n", + " input=texts,\n", + " model=engine\n", + " )\n", + "\n", + " return [d.embedding for d in list(response.data)]\n", + "\n", + "def get_embedding(text, engine=ENGINE):\n", + " return get_embeddings([text], engine)[0]\n", + "\n", + "len(get_embedding('hi')), len(get_embeddings(['hi', 'hello']))" ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dataset['train'][0], dataset['train'][1]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0221343a", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "0221343a", - "outputId": "5ab7c522-23ea-467c-be72-fe031432fbdc" - }, - "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 31/31 [01:28<00:00, 2.87s/it]\n" - ] - } - ], - "source": [ - "unique_passages = list(set(dataset['test']['context']))\n", - "for idx in tqdm(range(0, len(unique_passages), 32)):\n", - " passages = unique_passages[idx:idx + 32]\n", - " upload_texts_to_pinecone(passages)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "796a7c80-7149-430e-b22c-9926c0d1daee", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "796a7c80-7149-430e-b22c-9926c0d1daee", - "outputId": "5fc883f2-230a-4fbf-d186-a60f5dd3573a" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "978" + "cell_type": "code", + "execution_count": 6, + "id": "ea70672a", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ea70672a", + "outputId": "f7fa123a-079a-4c38-d43c-f8b4a760440d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "if INDEX_NAME not in pc.list_indexes().names():\n", + " print(f'Creating index {INDEX_NAME}')\n", + " pc.create_index(\n", + " name=INDEX_NAME, # The name of the index\n", + " dimension=3072, # The dimensionality of the vectors for our OpenAI embedder\n", + " metric='cosine', # The similarity metric to use when searching the index\n", + " spec=ServerlessSpec(\n", + " cloud='aws',\n", + " region='us-west-2'\n", + " )\n", + " )\n", + "\n", + "# Store the index as a variable\n", + "index = pc.Index(name=INDEX_NAME)\n", + "index" ] - }, - "execution_count": 163, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(unique_passages)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "33yf7QrWtwt-", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "33yf7QrWtwt-", - "outputId": "9e8d30a5-b1b3-43f7-ad6d-bec8135fc5ee" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'dimension': 3072,\n", - " 'index_fullness': 0.0,\n", - " 'namespaces': {'default': {'vector_count': 978}},\n", - " 'total_vector_count': 978}" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "index.describe_index_stats()" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "432b021b-2cdb-4d73-a2c6-f2e0d1eb5ffc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'id': '569666f4dc3983dab5624e989212c1d9d0cd1798',\n", - " 'title': 'Pappataci fever',\n", - " 'context': 'Pappataci fever is prevalent in the subtropical zone of the Eastern Hemisphere between 20°N and 45°N, particularly in Southern Europe, North Africa, the Balkans, Eastern Mediterranean, Iraq, Iran, Pakistan, Afghanistan and India.The disease is transmitted by the bites of phlebotomine sandflies of the Genus Phlebotomus, in particular, Phlebotomus papatasi, Phlebotomus perniciosus and Phlebotomus perfiliewi. The sandfly becomes infected when biting an infected human in the period between 48 hours before the onset of fever and 24 hours after the end of the fever, and remains infected for its lifetime. Besides this horizontal virus transmission from man to sandfly, the virus can be transmitted in insects transovarially, from an infected female sandfly to its offspring.Pappataci fever is seldom recognised in endemic populations because it is mixed with other febrile illnesses of childhood, but it is more well-known among immigrants and military personnel from non-endemic regions.',\n", - " 'question': 'Does an infection for Sandflies go away over time?',\n", - " 'answers': {'answer_start': [571],\n", - " 'text': ['remains infected for its lifetime']}}" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dataset['test'][0]" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "36e13316-2295-4a72-8f2a-b459f0e7826a", - "metadata": { - "scrolled": true - }, - "outputs": [ { - "data": { - "text/plain": [ - "[{'id': '2f90090e21f19450887d5f3ff781e541',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:47:20.914703',\n", - " 'text': 'Pappataci fever is prevalent in the subtropical zone of '\n", - " 'the Eastern Hemisphere between 20°N and 45°N, '\n", - " 'particularly in Southern Europe, North Africa, the '\n", - " 'Balkans, Eastern Mediterranean, Iraq, Iran, Pakistan, '\n", - " 'Afghanistan and India.The disease is transmitted by the '\n", - " 'bites of phlebotomine sandflies of the Genus '\n", - " 'Phlebotomus, in particular, Phlebotomus papatasi, '\n", - " 'Phlebotomus perniciosus and Phlebotomus perfiliewi. The '\n", - " 'sandfly becomes infected when biting an infected human '\n", - " 'in the period between 48 hours before the onset of '\n", - " 'fever and 24 hours after the end of the fever, and '\n", - " 'remains infected for its lifetime. Besides this '\n", - " 'horizontal virus transmission from man to sandfly, the '\n", - " 'virus can be transmitted in insects transovarially, '\n", - " 'from an infected female sandfly to its '\n", - " 'offspring.Pappataci fever is seldom recognised in '\n", - " 'endemic populations because it is mixed with other '\n", - " 'febrile illnesses of childhood, but it is more '\n", - " 'well-known among immigrants and military personnel from '\n", - " 'non-endemic regions.'},\n", - " 'score': 0.436064631,\n", - " 'values': []},\n", - " {'id': '00661b04eb84a4664717245513ea30cd',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:50.126453',\n", - " 'text': 'Paratyphoid fever, also known simply as paratyphoid, is '\n", - " 'a bacterial infection caused by one of the three types '\n", - " 'of Salmonella enterica. Symptoms usually begin 6–30 '\n", - " 'days after exposure and are the same as those of '\n", - " 'typhoid fever. Often, a gradual onset of a high fever '\n", - " 'occurs over several days. Weakness, loss of appetite, '\n", - " 'and headaches also commonly occur. Some people develop '\n", - " 'a skin rash with rose-colored spots. Without treatment, '\n", - " 'symptoms may last weeks or months. Other people may '\n", - " 'carry the bacteria without being affected; however, '\n", - " 'they are still able to spread the disease to others. '\n", - " 'Both typhoid and paratyphoid are of similar severity. '\n", - " 'Paratyphoid and typhoid fever are types of enteric '\n", - " 'fever.Paratyphoid is caused by the bacterium Salmonella '\n", - " 'enterica of the serotypes Paratyphi A, Paratyphi B, or '\n", - " 'Paratyphi C growing in the intestines and blood. They '\n", - " 'are usually spread by eating or drinking food or water '\n", - " 'contaminated with the feces of an infected person. They '\n", - " 'may occur when a person who prepares food is infected. '\n", - " 'Risk factors include poor sanitation as is found among '\n", - " 'poor crowded populations. Occasionally, they may be '\n", - " 'transmitted by sex. Humans are the only animals '\n", - " 'infected. Diagnosis may be based on symptoms and '\n", - " 'confirmed by either culturing the bacteria or detecting '\n", - " 'the bacterial DNA in the blood, stool, or bone marrow. '\n", - " 'Culturing the bacteria can be difficult. Bone-marrow '\n", - " 'testing is the most accurate. Symptoms are similar to '\n", - " 'that of many other infectious diseases. Typhus is an '\n", - " 'unrelated disease.While no vaccine is available '\n", - " 'specifically for paratyphoid, the typhoid vaccine may '\n", - " 'provide some benefit. Prevention includes drinking '\n", - " 'clean water, better sanitation, and better handwashing. '\n", - " 'Treatment of the disease is with antibiotics such as '\n", - " 'azithromycin. Resistance to a number of other '\n", - " 'previously effective antibiotics is common.Paratyphoid '\n", - " 'affects about six million people a year. It is most '\n", - " 'common in parts of Asia and rare in the developed '\n", - " 'world. Most cases are due to Paratyphi A rather than '\n", - " 'Paratyphi B or C. In 2015, paratyphoid fever resulted '\n", - " 'in about 29,200 deaths, down from 63,000 deaths in '\n", - " '1990. The risk of death is between 10 and 15% without '\n", - " 'treatment, while with treatment, it may be less than '\n", - " '1%.'},\n", - " 'score': 0.325077146,\n", - " 'values': []},\n", - " {'id': '24d322a7ce10e9e2fa12fcf54f50c651',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:47:04.001270',\n", - " 'text': 'Although Aronson isolated this mycobacterium in 1926 '\n", - " 'from a fish, it was not until 1951 that it was found to '\n", - " 'be the cause of human disease by Linell and Norden. '\n", - " 'Large outbreaks of infection due to this atypical '\n", - " 'mycobacterium have been described in association with '\n", - " 'swimming. Infections related to swimming pools have now '\n", - " 'drastically fallen due to the improvements in the '\n", - " 'construction and maintenance of these facilities.The '\n", - " 'first case of M. marinum infection associated with a '\n", - " \"fish-tank ('fish-tank granuloma') was reported in 1962 \"\n", - " 'by Swift and Cohen. M. marinum infection may be an '\n", - " 'occupational hazard for certain professions such as pet '\n", - " 'shop workers, but most infections occur in fish '\n", - " 'fanciers who keep an aquarium at home. Although '\n", - " 'infection may be caused by direct injury from the fish '\n", - " 'fins or bites, most are acquired during the handling of '\n", - " 'the aquariums such as cleaning or changing the water. '\n", - " 'Indirect infection has also been described related to a '\n", - " 'child’s bathing utensils that had been used to clean a '\n", - " 'fish tank. Due to an increased awareness of the disease '\n", - " 'and improved isolation methods, more and more cases are '\n", - " 'being recognized and reported worldwide.'},\n", - " 'score': 0.301440448,\n", - " 'values': []}]" + "cell_type": "code", + "execution_count": 8, + "id": "7f2fdfe7", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "7f2fdfe7", + "outputId": "7d863252-0f5c-47b7-ec90-881aedf7102d" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'ae76cc4dfd345ecaeea9b8ba0d5c3437'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def my_hash(s):\n", + " # Return the MD5 hash of the input string as a hexadecimal string\n", + " return hashlib.md5(s.encode()).hexdigest()\n", + "\n", + "my_hash('I love to hash it')" ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "query_from_pinecone('Does an infection for Sandflies go away over time?')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5b92699e-2d92-4083-9508-ae47355c9c1a", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "206ab80e-f4b4-48d2-8aef-b522024f6658", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "7e180a25-37e4-47a8-8ae8-a49f39c106ab", - "metadata": {}, - "source": [ - "# Part 2: Making results more relevant with a cross-encoder" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "074fab6f", - "metadata": { - "id": "074fab6f" - }, - "outputs": [], - "source": [ - "# if you didn't import before\n", - "\n", - "from sentence_transformers.cross_encoder import CrossEncoder\n", - "import numpy as np\n", - "from torch import nn" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "414fc2d5", - "metadata": { - "id": "414fc2d5" - }, - "outputs": [], - "source": [ - "from copy import copy\n", - "\n", - "def get_results_from_pinecone(query, top_k=3, re_rank_model=None, verbose=True, correct_hash=None):\n", - " results_from_pinecone = query_from_pinecone(query, top_k=top_k)\n", - " if not results_from_pinecone:\n", - " return []\n", - " if verbose:\n", - " print(\"Query:\", query)\n", - " final_results = []\n", - "\n", - " retrieved_correct_position, reranked_correct_position = None, None\n", - " for idx, result_from_pinecone in enumerate(results_from_pinecone):\n", - " if correct_hash and result_from_pinecone['id'] == correct_hash:\n", - " retrieved_correct_position = idx\n", - "\n", - " if re_rank_model is not None:\n", - " if verbose:\n", - " print('Document ID (Hash)\\t\\tRetrieval Score\\tCE Score\\tText')\n", - "\n", - " sentence_combinations = [[query, result_from_pinecone['metadata']['text']] for result_from_pinecone in results_from_pinecone]\n", - "\n", - " # Compute the similarity scores for these combinations\n", - " similarity_scores = re_rank_model.predict(sentence_combinations, activation_fct=nn.Sigmoid())\n", - "\n", - " # Sort the scores in decreasing order\n", - " sim_scores_argsort = list(reversed(np.argsort(similarity_scores)))\n", - " sim_scores_sort = list(reversed(np.sort(similarity_scores)))\n", - " top_re_rank_score = sim_scores_sort[0]\n", - "\n", - " # Print the scores\n", - " for idx in sim_scores_argsort:\n", - " result_from_pinecone = results_from_pinecone[idx]\n", - " if correct_hash and result_from_pinecone['id'] == correct_hash:\n", - " reranked_correct_position = idx\n", - " final_results.append({'score': similarity_scores[idx], 'id': result_from_pinecone['id'], 'metadata': result_from_pinecone['metadata']})\n", - " if verbose:\n", - " print(f\"{result_from_pinecone['id']}\\t{result_from_pinecone['score']:.2f}\\t{similarity_scores[idx]:.6f}\\t{result_from_pinecone['metadata']['text'][:50]}\")\n", - " return {'final_results': final_results, 'retrieved_correct_position': retrieved_correct_position, 'reranked_correct_position': reranked_correct_position, 'results_from_pinecone': results_from_pinecone, 'top_re_rank_score': top_re_rank_score}\n", - "\n", - " if verbose:\n", - " print('Document ID (Hash)\\t\\tRetrieval Score\\tText')\n", - " for result_from_pinecone in results_from_pinecone:\n", - " final_results.append(result_from_pinecone)\n", - " if verbose:\n", - " print(f\"{result_from_pinecone['id']}\\t{result_from_pinecone['score']:.2f}\\t{result_from_pinecone['metadata']['text'][:50]}\")\n", - "\n", - " return {'final_results': final_results, 'retrieved_correct_position': retrieved_correct_position, 'reranked_correct_position': reranked_correct_position}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "118a061c-db0a-4903-8260-46c6f15189ae", - "metadata": { - "id": "118a061c-db0a-4903-8260-46c6f15189ae" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "b25a8a8f-e873-43e7-9f56-b57bdf328416", - "metadata": { - "id": "b25a8a8f-e873-43e7-9f56-b57bdf328416" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/homebrew/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "# Pre-trained cross encoder\n", - "cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2', num_labels=1)\n", - "\n", - "q_to_hash = {data['question']: my_hash(data['context']) for data in dataset['test']}" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "vptFN0wPw0uS", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "vptFN0wPw0uS", - "outputId": "72a9e946-3b3a-429b-b476-a945e94e26ec" - }, - "outputs": [ { - "data": { - "text/plain": [ - "1148" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "unique_inputs = list(set(dataset['test']['question']))\n", - "len(unique_inputs)" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "dc56237c-f9d6-4b3d-a8ac-31098cf4f904", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "How close did John make it to Johnston Atoll?\n", - "The origins of Hurricane John were thought by the United States National Hurricane Center (NHC) to be from a tropical wave that moved off the coast of Africa on July 25, 1994. The wave subsequently moved across the Atlantic Ocean and Caribbean without distinction, before it crossed Central America and moved into the Eastern Pacific Ocean on or around August 8. Upon entering the Eastern Pacific the wave gradually developed, before the NHC initiated advisories on the system and designated it as Tropical Depression Ten-E during August 11. The system was at this time moving westwards and located around 345 miles (555 km) to the south-southeast of Acapulco, Mexico. Quickly developing banding features and well-defined outflow, it was upgraded to a tropical storm and named John later that day.A strong ridge of high pressure over the northeastern Pacific Ocean forced John westward, where upper level wind shear kept John a tropical storm. Intensity fluctuated considerably, however, as shear levels varied. More than once, shear cleared away most of the clouds above John and nearly caused it to weaken to a tropical depression. However, after eight days of slow westward movement across the Pacific Ocean, shear lessened greatly on August 19, and John intensified significantly and was designated as a hurricane at 17:00 PDT. During an eighteen-hour period between August 19 and August 20, John further strengthened from a weak Category 1 hurricane to a Category 3 major hurricane. Around 1100 PDT on August 20, John crossed into the central Pacific, the first of three basin crosses John would make.After entering the central Pacific, John left the area monitored by the NHC and was instead monitored by the Central Pacific Hurricane Center (CPHC). As the storm moved slowly westward, Hurricane John continued to strengthen considerably in an increasingly favorable environment well south of the Hawaiian Islands; on August 22, John was designated a Category 5 hurricane on the Saffir–Simpson hurricane scale (the highest classification for hurricanes) and later that day (by Hawaii Standard Time) reached its peak intensity, with 1-minute sustained winds of 175 miles per hour (280 km/h) and a minimum central pressure of 929 millibars (27.4 inHg). Also, on August 22 (by Hawaii Standard Time), John made its closest approach to the Hawaiian Islands, 345 miles (500 km) to the south. John had threatened to turn north and affect the islands days before, but the ridge of high pressure that typically shields the islands from hurricanes kept John on its southerly path. Nonetheless, heavy rains and wind from the outer bands of John affected the islands.With the Hawaiian Islands behind it, John began a slow turn to the north, taking near-direct aim at Johnston Atoll, a small group of islands populated only by a United States military base. The storm slowly weakened from its peak as a Category 5 hurricane in the face of increasing shear, dropping down to a Category 1 hurricane with 90 miles per hour (145 km/h) maximum winds. On August 25 local time, John made its closest approach to the Johnston Atoll only 15 miles (24 km) to the north. On Johnston Atoll, sustained winds were reported up to 60 miles per hour (95 km/h), the equivalent of a strong tropical storm, and gusts up to 75 miles per hour (120 km/h) were recorded.\n" - ] - } - ], - "source": [ - "query = unique_inputs[0]\n", - "print(query)\n", - "\n", - "for t in dataset['test']:\n", - " if t['question'] == query:\n", - " print(t['context'])" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "id": "4b0da5ac-0345-45e2-9913-188590a31522", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(None, None)" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "query_result = get_results_from_pinecone(\n", - " query, \n", - " top_k=2, # grab 2 results\n", - " re_rank_model=cross_encoder, \n", - " correct_hash=q_to_hash[query],\n", - " verbose=False\n", - " )\n", - "\n", - "query_result['retrieved_correct_position'], query_result['reranked_correct_position']" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "dfbb3b3c-9baa-4298-abd8-0e7b75dd24ec", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'final_results': [{'score': 0.99843186,\n", - " 'id': 'a76b6a3dfcbdb1ca832bbf40710ad2c8',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:32.060499',\n", - " 'text': \"John affected both the Hawaiian Islands and Johnston Atoll, but only lightly. While John passed over 345 miles (550 km) to the south of Hawaiʻi, the islands did experience strengthened trade winds and rough surf along the southeast- and south-facing shores, and, while moving westward, on west-facing shores as well. The waves, ranging from 6 to 10 ft (1.8 to 3.0 m) in height, flooded beach parks in Kailua-Kona. Additionally, heavy rains on the Big Island of Hawaiʻi caused minor, localized flooding and some short-term road closures. No deaths, injuries or significant damages were reported in Hawaiʻi.Although John passed within 25 km (16 mi) of Johnston Atoll, it had weakened greatly to a Category 1 system by closest approach. Prior to the storm's arrival, waves between 20 and 30 ft (6.1 and 9.1 m) were reported on the island. Additionally, in the Northern Hemisphere, the strongest winds and heaviest rain lie to the north of a tropical cyclone, so the atoll, which lay to the south of the storm's path, was spared the brunt of the storm. Nonetheless, the 1,100-man personnel for the United States military base on Johnston Atoll had been evacuated to Honolulu as a precaution while John approached. Damage to structures was considerable, but the size of the island and relative functionality of the base led to low damage; monetary losses were estimated at close to $15 million (1994 US$).The remnants of John moved through the Aleutian Islands, producing a wind gust of 46 mph (74 km/h) in Unalaska. The storm brought a plume of warm air, and two stations recorded a high temperature of 66 °F (19 °C).\"}},\n", - " {'score': 0.991396,\n", - " 'id': '8f3fd30f7d46c05089f7f84d71806b77',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:41.323346',\n", - " 'text': \"Clearing Johnston Atoll, John turned to the northwest and began strengthening again as shear decreased. On August 27 local time, John reached a secondary peak strength of 135 miles per hour (210 km/h), and shortly thereafter it crossed the International Date Line at approximately 22° N and came under the surveillance of the Guam branch of the Joint Typhoon Warning Center (JTWC). By crossing into the western Pacific, John also became a typhoon and was referred to as Typhoon John during its time in the western Pacific. Immediately after crossing the Date Line, John again weakened and its forward motion stalled. By September 1, John had weakened to a tropical storm and was nearly motionless just west of the Date Line. There, John lingered for six days while performing a multi-day counterclockwise loop. On September 7, a trough moved into the area and quickly moved John to the northeast. John crossed the Date Line again on September 8 and reentered the central Pacific.After reentering the central Pacific, John briefly reached a tertiary peak strength of 90 miles per hour (145 km/h), a strong Category 1 hurricane, well to the north of Midway Island. However, the trough was rapidly pulling apart John's structure, and the cold waters of the northern central Pacific were not conducive to a tropical cyclone. On September 10, the 120th advisory was released on the system, finally declaring John to have become extratropical approximately 1,000 miles (1600 km) south of Unalaska Island.\"}}],\n", - " 'retrieved_correct_position': None,\n", - " 'reranked_correct_position': None,\n", - " 'results_from_pinecone': [{'id': 'a76b6a3dfcbdb1ca832bbf40710ad2c8',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:32.060499',\n", - " 'text': 'John affected both the Hawaiian Islands and Johnston '\n", - " 'Atoll, but only lightly. While John passed over 345 '\n", - " 'miles (550 km) to the south of Hawaiʻi, the islands did '\n", - " 'experience strengthened trade winds and rough surf '\n", - " 'along the southeast- and south-facing shores, and, '\n", - " 'while moving westward, on west-facing shores as well. '\n", - " 'The waves, ranging from 6 to 10 ft (1.8 to 3.0 m) in '\n", - " 'height, flooded beach parks in Kailua-Kona. '\n", - " 'Additionally, heavy rains on the Big Island of Hawaiʻi '\n", - " 'caused minor, localized flooding and some short-term '\n", - " 'road closures. No deaths, injuries or significant '\n", - " 'damages were reported in Hawaiʻi.Although John passed '\n", - " 'within 25 km (16 mi) of Johnston Atoll, it had weakened '\n", - " 'greatly to a Category 1 system by closest approach. '\n", - " \"Prior to the storm's arrival, waves between 20 and 30 \"\n", - " 'ft (6.1 and 9.1 m) were reported on the island. '\n", - " 'Additionally, in the Northern Hemisphere, the strongest '\n", - " 'winds and heaviest rain lie to the north of a tropical '\n", - " 'cyclone, so the atoll, which lay to the south of the '\n", - " \"storm's path, was spared the brunt of the storm. \"\n", - " 'Nonetheless, the 1,100-man personnel for the United '\n", - " 'States military base on Johnston Atoll had been '\n", - " 'evacuated to Honolulu as a precaution while John '\n", - " 'approached. Damage to structures was considerable, but '\n", - " 'the size of the island and relative functionality of '\n", - " 'the base led to low damage; monetary losses were '\n", - " 'estimated at close to $15 million (1994 US$).The '\n", - " 'remnants of John moved through the Aleutian Islands, '\n", - " 'producing a wind gust of 46 mph (74 km/h) in Unalaska. '\n", - " 'The storm brought a plume of warm air, and two stations '\n", - " 'recorded a high temperature of 66 °F (19 °C).'},\n", - " 'score': 0.593409777,\n", - " 'values': []},\n", - " {'id': '8f3fd30f7d46c05089f7f84d71806b77',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:41.323346',\n", - " 'text': 'Clearing Johnston Atoll, John turned to the northwest '\n", - " 'and began strengthening again as shear decreased. On '\n", - " 'August 27 local time, John reached a secondary peak '\n", - " 'strength of 135 miles per hour (210 km/h), and shortly '\n", - " 'thereafter it crossed the International Date Line at '\n", - " 'approximately 22° N and came under the surveillance of '\n", - " 'the Guam branch of the Joint Typhoon Warning Center '\n", - " '(JTWC). By crossing into the western Pacific, John also '\n", - " 'became a typhoon and was referred to as Typhoon John '\n", - " 'during its time in the western Pacific. Immediately '\n", - " 'after crossing the Date Line, John again weakened and '\n", - " 'its forward motion stalled. By September 1, John had '\n", - " 'weakened to a tropical storm and was nearly motionless '\n", - " 'just west of the Date Line. There, John lingered for '\n", - " 'six days while performing a multi-day counterclockwise '\n", - " 'loop. On September 7, a trough moved into the area and '\n", - " 'quickly moved John to the northeast. John crossed the '\n", - " 'Date Line again on September 8 and reentered the '\n", - " 'central Pacific.After reentering the central Pacific, '\n", - " 'John briefly reached a tertiary peak strength of 90 '\n", - " 'miles per hour (145 km/h), a strong Category 1 '\n", - " 'hurricane, well to the north of Midway Island. However, '\n", - " \"the trough was rapidly pulling apart John's structure, \"\n", - " 'and the cold waters of the northern central Pacific '\n", - " 'were not conducive to a tropical cyclone. On September '\n", - " '10, the 120th advisory was released on the system, '\n", - " 'finally declaring John to have become extratropical '\n", - " 'approximately 1,000 miles (1600 km) south of Unalaska '\n", - " 'Island.'},\n", - " 'score': 0.564386189,\n", - " 'values': []}],\n", - " 'top_re_rank_score': 0.99843186}" + "cell_type": "code", + "execution_count": 9, + "id": "ecd86f51", + "metadata": { + "id": "ecd86f51" + }, + "outputs": [], + "source": [ + "def prepare_for_pinecone(texts, engine=ENGINE):\n", + " # Get the current UTC date and time\n", + " now = datetime.utcnow()\n", + "\n", + " # Generate vector embeddings for each string in the input list, using the specified engine\n", + " embeddings = get_embeddings(texts, engine=engine)\n", + "\n", + " # Create tuples of (hash, embedding, metadata) for each input string and its corresponding vector embedding\n", + " # The my_hash() function is used to generate a unique hash for each string, and the datetime.utcnow() function is used to generate the current UTC date and time\n", + " return [\n", + " (\n", + " my_hash(text), # A unique ID for each string, generated using the my_hash() function\n", + " embedding, # The vector embedding of the string\n", + " dict(text=text, date_uploaded=now) # A dictionary of metadata, including the original text and the current UTC date and time\n", + " )\n", + " for text, embedding in zip(texts, embeddings) # Iterate over each input string and its corresponding vector embedding\n", + " ]\n" ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "query_result # the right context isn't there!" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "id": "caa66ac8-d69c-49a1-9f55-e9ef93379fec", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "caa66ac8-d69c-49a1-9f55-e9ef93379fec", - "outputId": "575fa7f5-32dc-4a03-8f31-01df1ee11cc4" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(2, 2)" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "query_result = get_results_from_pinecone(\n", - " query, \n", - " top_k=10, # grab 10 results\n", - " re_rank_model=cross_encoder, correct_hash=q_to_hash[query],\n", - " verbose=False\n", - " )\n", - "\n", - "query_result['retrieved_correct_position'], query_result['reranked_correct_position']" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "id": "de1c0500-db49-4be2-8f3b-73ba96ec096b", - "metadata": {}, - "outputs": [ { - "data": { - "text/plain": [ - "{'final_results': [{'score': 0.998432,\n", - " 'id': 'a76b6a3dfcbdb1ca832bbf40710ad2c8',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:32.060499',\n", - " 'text': \"John affected both the Hawaiian Islands and Johnston Atoll, but only lightly. While John passed over 345 miles (550 km) to the south of Hawaiʻi, the islands did experience strengthened trade winds and rough surf along the southeast- and south-facing shores, and, while moving westward, on west-facing shores as well. The waves, ranging from 6 to 10 ft (1.8 to 3.0 m) in height, flooded beach parks in Kailua-Kona. Additionally, heavy rains on the Big Island of Hawaiʻi caused minor, localized flooding and some short-term road closures. No deaths, injuries or significant damages were reported in Hawaiʻi.Although John passed within 25 km (16 mi) of Johnston Atoll, it had weakened greatly to a Category 1 system by closest approach. Prior to the storm's arrival, waves between 20 and 30 ft (6.1 and 9.1 m) were reported on the island. Additionally, in the Northern Hemisphere, the strongest winds and heaviest rain lie to the north of a tropical cyclone, so the atoll, which lay to the south of the storm's path, was spared the brunt of the storm. Nonetheless, the 1,100-man personnel for the United States military base on Johnston Atoll had been evacuated to Honolulu as a precaution while John approached. Damage to structures was considerable, but the size of the island and relative functionality of the base led to low damage; monetary losses were estimated at close to $15 million (1994 US$).The remnants of John moved through the Aleutian Islands, producing a wind gust of 46 mph (74 km/h) in Unalaska. The storm brought a plume of warm air, and two stations recorded a high temperature of 66 °F (19 °C).\"}},\n", - " {'score': 0.991396,\n", - " 'id': '8f3fd30f7d46c05089f7f84d71806b77',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:41.323346',\n", - " 'text': \"Clearing Johnston Atoll, John turned to the northwest and began strengthening again as shear decreased. On August 27 local time, John reached a secondary peak strength of 135 miles per hour (210 km/h), and shortly thereafter it crossed the International Date Line at approximately 22° N and came under the surveillance of the Guam branch of the Joint Typhoon Warning Center (JTWC). By crossing into the western Pacific, John also became a typhoon and was referred to as Typhoon John during its time in the western Pacific. Immediately after crossing the Date Line, John again weakened and its forward motion stalled. By September 1, John had weakened to a tropical storm and was nearly motionless just west of the Date Line. There, John lingered for six days while performing a multi-day counterclockwise loop. On September 7, a trough moved into the area and quickly moved John to the northeast. John crossed the Date Line again on September 8 and reentered the central Pacific.After reentering the central Pacific, John briefly reached a tertiary peak strength of 90 miles per hour (145 km/h), a strong Category 1 hurricane, well to the north of Midway Island. However, the trough was rapidly pulling apart John's structure, and the cold waters of the northern central Pacific were not conducive to a tropical cyclone. On September 10, the 120th advisory was released on the system, finally declaring John to have become extratropical approximately 1,000 miles (1600 km) south of Unalaska Island.\"}},\n", - " {'score': 0.32219857,\n", - " 'id': '29f43cf2a1e4b7da49f726bbf846044f',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:58.291507',\n", - " 'text': 'The origins of Hurricane John were thought by the United States National Hurricane Center (NHC) to be from a tropical wave that moved off the coast of Africa on July 25, 1994. The wave subsequently moved across the Atlantic Ocean and Caribbean without distinction, before it crossed Central America and moved into the Eastern Pacific Ocean on or around August 8. Upon entering the Eastern Pacific the wave gradually developed, before the NHC initiated advisories on the system and designated it as Tropical Depression Ten-E during August 11. The system was at this time moving westwards and located around 345 miles (555 km) to the south-southeast of Acapulco, Mexico. Quickly developing banding features and well-defined outflow, it was upgraded to a tropical storm and named John later that day.A strong ridge of high pressure over the northeastern Pacific Ocean forced John westward, where upper level wind shear kept John a tropical storm. Intensity fluctuated considerably, however, as shear levels varied. More than once, shear cleared away most of the clouds above John and nearly caused it to weaken to a tropical depression. However, after eight days of slow westward movement across the Pacific Ocean, shear lessened greatly on August 19, and John intensified significantly and was designated as a hurricane at 17:00 PDT. During an eighteen-hour period between August 19 and August 20, John further strengthened from a weak Category 1 hurricane to a Category 3 major hurricane. Around 1100 PDT on August 20, John crossed into the central Pacific, the first of three basin crosses John would make.After entering the central Pacific, John left the area monitored by the NHC and was instead monitored by the Central Pacific Hurricane Center (CPHC). As the storm moved slowly westward, Hurricane John continued to strengthen considerably in an increasingly favorable environment well south of the Hawaiian Islands; on August 22, John was designated a Category 5 hurricane on the Saffir–Simpson hurricane scale (the highest classification for hurricanes) and later that day (by Hawaii Standard Time) reached its peak intensity, with 1-minute sustained winds of 175 miles per hour (280 km/h) and a minimum central pressure of 929 millibars (27.4 inHg). Also, on August 22 (by Hawaii Standard Time), John made its closest approach to the Hawaiian Islands, 345 miles (500 km) to the south. John had threatened to turn north and affect the islands days before, but the ridge of high pressure that typically shields the islands from hurricanes kept John on its southerly path. Nonetheless, heavy rains and wind from the outer bands of John affected the islands.With the Hawaiian Islands behind it, John began a slow turn to the north, taking near-direct aim at Johnston Atoll, a small group of islands populated only by a United States military base. The storm slowly weakened from its peak as a Category 5 hurricane in the face of increasing shear, dropping down to a Category 1 hurricane with 90 miles per hour (145 km/h) maximum winds. On August 25 local time, John made its closest approach to the Johnston Atoll only 15 miles (24 km) to the north. On Johnston Atoll, sustained winds were reported up to 60 miles per hour (95 km/h), the equivalent of a strong tropical storm, and gusts up to 75 miles per hour (120 km/h) were recorded.'}},\n", - " {'score': 0.00035284052,\n", - " 'id': '29d27c6f43d083d4c282bdb5e7ab51a2',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:45:56.884103',\n", - " 'text': 'At the same time, however, two separate ship reports indicated that John had sustained winds of at least 55 knots (100 km/h, 65 mph), far stronger than the advisory strength of 35 knots (65 km/h, 40 mph). John would go on to restrengthen into a strong Category 1 hurricane after reentering the Central North Pacific, defying all JTWC predictions. After later reanalysis, the JTWC raised the estimated wind speeds of John for every advisory from 1200 UTC September 1 to its final advisory exactly a week later by at least 5 knots (9 km/h, 6 mph) and as much as 25 knots (46 km/h, 29 mph).'}},\n", - " {'score': 0.00029191337,\n", - " 'id': '86b032a63e2f13222643295ae9a96fcc',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:45:56.884103',\n", - " 'text': \"During John's time in the Western North Pacific, the Joint Typhoon Warning Center (JTWC) had particular difficulty in forecasting and even estimating the strength of John. John weakened considerably after entering the Western North Pacific, and, before estimates were later revised, four consecutive advisories were issued that declared John a tropical depression. Each of these advisories called for imminent dissipation. As John persisted and did not dissipate as the JTWC had predicted, it was upgraded to a minimal tropical storm in the next advisory.\"}},\n", - " {'score': 8.675499e-05,\n", - " 'id': '757483e06a54900810afc0ab86b020c4',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:52.938765',\n", - " 'text': \"Johnston was born and raised in Ontario, studying there before enrolling at Harvard University and later Cambridge and Queen's universities. He went on to work as a professor at various post-secondary institutions in Canada, eventually serving administrative roles as dean of law at the University of Western Ontario, principal of McGill University, and president of the University of Waterloo. At the same time, Johnston involved himself with politics and public service, moderating political debates and chairing commissions in both the federal and provincial spheres, his most renowned position in that field being the chairmanship of the inquiry into the Airbus affair. He was in 2010 appointed as governor general by Queen Elizabeth II, on the recommendation of then Prime Minister of Canada Stephen Harper, to replace Michaëlle Jean as viceroy and he occupied the post until succeeded by Julie Payette in 2017. At the time, Johnston was predominantly praised as a worthy choice for the Queen's representative, though his appointment was denounced by some Quebec sovereigntists.\"}},\n", - " {'score': 8.4590385e-05,\n", - " 'id': 'c31dd2e7e6cbe2ab80361f321e57d725',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:47:15.201237',\n", - " 'text': \"Hurricane Norman originated from a broad area of low pressure that formed several hundred miles south-southwest of Acapulco, Mexico on August 25. Traveling west-northwest, the system coalesced into a tropical depression by at 15:00 UTC on August 28 while situated approximately 420 miles (675 km) south-southwest of the southern tip of Baja California. A subtropical ridge steered the system west for several days. Early on August 29, the depression intensified into a tropical storm and received the name Norman. Favorable environmental conditions enabled quick intensification, and the system achieved hurricane strength early on August 30. Rapid intensification ensued throughout the day, culminating with Norman attaining its peak intensity at 15:00 UTC, with sustained winds of 150 mph (240 km/h) and a central pressure of 937 mbar (27.67 inHg). During a 24-hour period, the hurricane's winds increased by 80 mph (130 km/h), the largest such increase since Hurricane Patricia in 2015.The combination of an eyewall replacement cycle and increasing wind shear induced weakening beginning on August 31. At 03:00 UTC on August 31, Norman turned to the west-southwest due to a deep-layer ridge to the north. Norman fell to Category 2 status for a period, before unexpectedly rapidly intensifying back to a Category 4 hurricane on September 2. The storm attained a secondary peak with winds of 140 mph (235 km/h) and a pressure of 947 mbar (28.00 inHg). Initially proving resilient to adverse conditions, Norman succumbed to increasing wind shear and lower sea surface temperatures on September 3. Its central dense overcast warmed and its eye filled. At the same time, Norman took a turn to a more westerly direction. On September 4, the hurricane crossed west of 140°W, and warning responsibility shifted to the Central Pacific Hurricane Center (CPHC). On the next day, another bout of unexpected intensification ensued and Norman regained major hurricane status. However, wind shear increased once again thereafter, and Norman weakened into a Category 1 hurricane on September 6. On September 7, Norman weakened further to a tropical storm as it began to lose its tropical characteristics. The CPHC issued its final advisory on Norman at 21:00 UTC on September 8, as it was rapidly becoming extratropical; Norman subsequently completed its extratropical transition on the next day.\"}},\n", - " {'score': 7.123619e-05,\n", - " 'id': '7ac86164fe9f53c7ce92529fa5904f4e',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:26.299391',\n", - " 'text': \"On 18 August 1916, Kronprinz took part in an operation to bombard Sunderland. Admiral Scheer attempted a repeat of the original 31 May plan; the two serviceable German battlecruisers—Moltke and Von der Tann—supported by three dreadnoughts, were to bombard the coastal town of Sunderland in an attempt to draw out and destroy Beatty's battlecruisers. The rest of the fleet, including Kronprinz, would trail behind and provide cover. The British were aware of the German plans and sortied the Grand Fleet to meet them. By 14:35, Admiral Scheer had been warned of the Grand Fleet's approach and, unwilling to engage the whole of the Grand Fleet just eleven weeks after the decidedly close call at Jutland, turned his forces around and retreated to German ports.Kronprinz participated in two uneventful fleet operations, one a month prior on 16 July to the north of Helgoland, and one into the North Sea on 18–20 October. Kronprinz and the rest of III Squadron were sent to the Baltic directly afterward for training, which lasted until 2 November. Upon returning from the Baltic, Kronprinz and the rest of III Squadron were ordered to cover the retrieval of a pair of U-boats that were stranded on the Danish coast. On the return trip, on 5 November 1916, Kronprinz was torpedoed by the British submarine J1 near Horns Reef. The torpedo struck the ship beneath the forward-most gun turret and allowed approximately 250 metric tons (250 long tons; 280 short tons) of water into the ship. Kronprinz maintained her speed and reached port. The following day she was placed in drydock at the Imperial Dockyard in Wilhelmshaven for repairs, which lasted from 6 November to 4 December. During this period, Bernhard Rösing took command of the vessel.After returning to III Squadron, Kronprinz took part in squadron training in the Baltic before conducting defensive patrols in the German Bight. In early 1917, the ship became the flagship of the deputy commander of the squadron, at that time Rear Admiral Karl Seiferling. During training maneuvers on 5 March 1917, Kronprinz was accidentally rammed by her sister ship Grosser Kurfürst in the Heligoland Bight. The collision caused minor flooding in the area abreast of her forward superfiring turret; Kronprinz shipped some 600 t (590 long tons; 660 short tons) of water. She again went into the drydock in Wilhelmshaven, from 6 March to 14 May. On 11 September, Kronprinz was detached for training in the Baltic. She then joined the Special Unit for Operation Albion.\"}},\n", - " {'score': 1.6988468e-05,\n", - " 'id': 'b38a1c1b709975f51326970267e67bb7',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:47:20.914703',\n", - " 'text': \"Kennedy was born in Brookline, Massachusetts and graduated from Harvard University in 1940 before joining the U.S. Naval Reserve the following year. During World War II, he commanded a series of PT boats in the Pacific theater and earned the Navy and Marine Corps Medal for his service. After the war, Kennedy represented the 11th congressional district of Massachusetts in the U.S. House of Representatives from 1947 to 1953. He was subsequently elected to the U.S. Senate and served as the junior Senator from Massachusetts from 1953 to 1960. While in the Senate, he published his book Profiles in Courage, which won a Pulitzer Prize for Biography. In the 1960 presidential election, Kennedy narrowly defeated Republican opponent Richard Nixon, who was the incumbent vice president. At age 43, he became the second-youngest person to serve as president, the youngest person to be elected as U.S. president, as well as the only Roman Catholic to occupy that office. He was also the first president to have served in the U.S. Navy.Kennedy's time in office was marked by high tensions with communist states in the Cold War. He increased the number of American military advisers in South Vietnam by a factor of 18 over President Dwight D. Eisenhower. In April 1961, he authorized a failed joint-CIA attempt to overthrow the Cuban government of Fidel Castro in the Bay of Pigs Invasion. He subsequently rejected Operation Northwoods plans by the Joint Chiefs of Staff to orchestrate false flag attacks on American soil in order to gain public approval for a war against Cuba. However his administration continued to plan for an invasion of Cuba in the summer of 1962. In October 1962, U.S. spy planes discovered that Soviet missile bases had been deployed in Cuba; the resulting period of tensions, termed the Cuban Missile Crisis, nearly resulted in the breakout of a global thermonuclear conflict. Domestically, Kennedy presided over the establishment of the Peace Corps and supported the civil rights movement, but was only somewhat successful in passing his New Frontier domestic policies.\"}},\n", - " {'score': 1.4858122e-05,\n", - " 'id': '413ce9a56e49699b6057daed5a44f81e',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:45:56.884103',\n", - " 'text': 'On August 29, an upper-level low absorbed the remnants of Hurricane Lane to the west-northwest of Hawaii. The storm was assigned the designation 96C by the United States Naval Research Laboratory (NRL). Traversing an area with sea surface temperatures 2 °C (3.6 °F) above-normal, the system coalesced into a subtropical storm by August 31. On September 2, the system reached its peak intensity and began to display an eye. Afterward, the system gradually began to weaken, while accelerating northward into colder waters. On September 3, the system weakened below tropical depression intensity, back into an extratropical low. On September 4, the system was absorbed by a larger extratropical storm in the Bering Sea.'}}],\n", - " 'retrieved_correct_position': 2,\n", - " 'reranked_correct_position': 2,\n", - " 'results_from_pinecone': [{'id': 'a76b6a3dfcbdb1ca832bbf40710ad2c8',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:32.060499',\n", - " 'text': 'John affected both the Hawaiian Islands and Johnston '\n", - " 'Atoll, but only lightly. While John passed over 345 '\n", - " 'miles (550 km) to the south of Hawaiʻi, the islands did '\n", - " 'experience strengthened trade winds and rough surf '\n", - " 'along the southeast- and south-facing shores, and, '\n", - " 'while moving westward, on west-facing shores as well. '\n", - " 'The waves, ranging from 6 to 10 ft (1.8 to 3.0 m) in '\n", - " 'height, flooded beach parks in Kailua-Kona. '\n", - " 'Additionally, heavy rains on the Big Island of Hawaiʻi '\n", - " 'caused minor, localized flooding and some short-term '\n", - " 'road closures. No deaths, injuries or significant '\n", - " 'damages were reported in Hawaiʻi.Although John passed '\n", - " 'within 25 km (16 mi) of Johnston Atoll, it had weakened '\n", - " 'greatly to a Category 1 system by closest approach. '\n", - " \"Prior to the storm's arrival, waves between 20 and 30 \"\n", - " 'ft (6.1 and 9.1 m) were reported on the island. '\n", - " 'Additionally, in the Northern Hemisphere, the strongest '\n", - " 'winds and heaviest rain lie to the north of a tropical '\n", - " 'cyclone, so the atoll, which lay to the south of the '\n", - " \"storm's path, was spared the brunt of the storm. \"\n", - " 'Nonetheless, the 1,100-man personnel for the United '\n", - " 'States military base on Johnston Atoll had been '\n", - " 'evacuated to Honolulu as a precaution while John '\n", - " 'approached. Damage to structures was considerable, but '\n", - " 'the size of the island and relative functionality of '\n", - " 'the base led to low damage; monetary losses were '\n", - " 'estimated at close to $15 million (1994 US$).The '\n", - " 'remnants of John moved through the Aleutian Islands, '\n", - " 'producing a wind gust of 46 mph (74 km/h) in Unalaska. '\n", - " 'The storm brought a plume of warm air, and two stations '\n", - " 'recorded a high temperature of 66 °F (19 °C).'},\n", - " 'score': 0.593409777,\n", - " 'values': []},\n", - " {'id': '8f3fd30f7d46c05089f7f84d71806b77',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:41.323346',\n", - " 'text': 'Clearing Johnston Atoll, John turned to the northwest '\n", - " 'and began strengthening again as shear decreased. On '\n", - " 'August 27 local time, John reached a secondary peak '\n", - " 'strength of 135 miles per hour (210 km/h), and shortly '\n", - " 'thereafter it crossed the International Date Line at '\n", - " 'approximately 22° N and came under the surveillance of '\n", - " 'the Guam branch of the Joint Typhoon Warning Center '\n", - " '(JTWC). By crossing into the western Pacific, John also '\n", - " 'became a typhoon and was referred to as Typhoon John '\n", - " 'during its time in the western Pacific. Immediately '\n", - " 'after crossing the Date Line, John again weakened and '\n", - " 'its forward motion stalled. By September 1, John had '\n", - " 'weakened to a tropical storm and was nearly motionless '\n", - " 'just west of the Date Line. There, John lingered for '\n", - " 'six days while performing a multi-day counterclockwise '\n", - " 'loop. On September 7, a trough moved into the area and '\n", - " 'quickly moved John to the northeast. John crossed the '\n", - " 'Date Line again on September 8 and reentered the '\n", - " 'central Pacific.After reentering the central Pacific, '\n", - " 'John briefly reached a tertiary peak strength of 90 '\n", - " 'miles per hour (145 km/h), a strong Category 1 '\n", - " 'hurricane, well to the north of Midway Island. However, '\n", - " \"the trough was rapidly pulling apart John's structure, \"\n", - " 'and the cold waters of the northern central Pacific '\n", - " 'were not conducive to a tropical cyclone. On September '\n", - " '10, the 120th advisory was released on the system, '\n", - " 'finally declaring John to have become extratropical '\n", - " 'approximately 1,000 miles (1600 km) south of Unalaska '\n", - " 'Island.'},\n", - " 'score': 0.564386189,\n", - " 'values': []},\n", - " {'id': '29f43cf2a1e4b7da49f726bbf846044f',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:58.291507',\n", - " 'text': 'The origins of Hurricane John were thought by the '\n", - " 'United States National Hurricane Center (NHC) to be '\n", - " 'from a tropical wave that moved off the coast of Africa '\n", - " 'on July 25, 1994. The wave subsequently moved across '\n", - " 'the Atlantic Ocean and Caribbean without distinction, '\n", - " 'before it crossed Central America and moved into the '\n", - " 'Eastern Pacific Ocean on or around August 8. Upon '\n", - " 'entering the Eastern Pacific the wave gradually '\n", - " 'developed, before the NHC initiated advisories on the '\n", - " 'system and designated it as Tropical Depression Ten-E '\n", - " 'during August 11. The system was at this time moving '\n", - " 'westwards and located around 345 miles (555 km) to the '\n", - " 'south-southeast of Acapulco, Mexico. Quickly developing '\n", - " 'banding features and well-defined outflow, it was '\n", - " 'upgraded to a tropical storm and named John later that '\n", - " 'day.A strong ridge of high pressure over the '\n", - " 'northeastern Pacific Ocean forced John westward, where '\n", - " 'upper level wind shear kept John a tropical storm. '\n", - " 'Intensity fluctuated considerably, however, as shear '\n", - " 'levels varied. More than once, shear cleared away most '\n", - " 'of the clouds above John and nearly caused it to weaken '\n", - " 'to a tropical depression. However, after eight days of '\n", - " 'slow westward movement across the Pacific Ocean, shear '\n", - " 'lessened greatly on August 19, and John intensified '\n", - " 'significantly and was designated as a hurricane at '\n", - " '17:00 PDT. During an eighteen-hour period between '\n", - " 'August 19 and August 20, John further strengthened from '\n", - " 'a weak Category 1 hurricane to a Category 3 major '\n", - " 'hurricane. Around 1100 PDT on August 20, John crossed '\n", - " 'into the central Pacific, the first of three basin '\n", - " 'crosses John would make.After entering the central '\n", - " 'Pacific, John left the area monitored by the NHC and '\n", - " 'was instead monitored by the Central Pacific Hurricane '\n", - " 'Center (CPHC). As the storm moved slowly westward, '\n", - " 'Hurricane John continued to strengthen considerably in '\n", - " 'an increasingly favorable environment well south of the '\n", - " 'Hawaiian Islands; on August 22, John was designated a '\n", - " 'Category 5 hurricane on the Saffir–Simpson hurricane '\n", - " 'scale (the highest classification for hurricanes) and '\n", - " 'later that day (by Hawaii Standard Time) reached its '\n", - " 'peak intensity, with 1-minute sustained winds of 175 '\n", - " 'miles per hour (280 km/h) and a minimum central '\n", - " 'pressure of 929 millibars (27.4 inHg). Also, on August '\n", - " '22 (by Hawaii Standard Time), John made its closest '\n", - " 'approach to the Hawaiian Islands, 345 miles (500 km) to '\n", - " 'the south. John had threatened to turn north and affect '\n", - " 'the islands days before, but the ridge of high pressure '\n", - " 'that typically shields the islands from hurricanes kept '\n", - " 'John on its southerly path. Nonetheless, heavy rains '\n", - " 'and wind from the outer bands of John affected the '\n", - " 'islands.With the Hawaiian Islands behind it, John began '\n", - " 'a slow turn to the north, taking near-direct aim at '\n", - " 'Johnston Atoll, a small group of islands populated only '\n", - " 'by a United States military base. The storm slowly '\n", - " 'weakened from its peak as a Category 5 hurricane in the '\n", - " 'face of increasing shear, dropping down to a Category 1 '\n", - " 'hurricane with 90 miles per hour (145 km/h) maximum '\n", - " 'winds. On August 25 local time, John made its closest '\n", - " 'approach to the Johnston Atoll only 15 miles (24 km) to '\n", - " 'the north. On Johnston Atoll, sustained winds were '\n", - " 'reported up to 60 miles per hour (95 km/h), the '\n", - " 'equivalent of a strong tropical storm, and gusts up to '\n", - " '75 miles per hour (120 km/h) were recorded.'},\n", - " 'score': 0.516601801,\n", - " 'values': []},\n", - " {'id': '29d27c6f43d083d4c282bdb5e7ab51a2',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:45:56.884103',\n", - " 'text': 'At the same time, however, two separate ship reports '\n", - " 'indicated that John had sustained winds of at least 55 '\n", - " 'knots (100 km/h, 65 mph), far stronger than the '\n", - " 'advisory strength of 35 knots (65 km/h, 40 mph). John '\n", - " 'would go on to restrengthen into a strong Category 1 '\n", - " 'hurricane after reentering the Central North Pacific, '\n", - " 'defying all JTWC predictions. After later reanalysis, '\n", - " 'the JTWC raised the estimated wind speeds of John for '\n", - " 'every advisory from 1200 UTC September 1 to its final '\n", - " 'advisory exactly a week later by at least 5 knots (9 '\n", - " 'km/h, 6 mph) and as much as 25 knots (46 km/h, 29 '\n", - " 'mph).'},\n", - " 'score': 0.448611259,\n", - " 'values': []},\n", - " {'id': '86b032a63e2f13222643295ae9a96fcc',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:45:56.884103',\n", - " 'text': \"During John's time in the Western North Pacific, the \"\n", - " 'Joint Typhoon Warning Center (JTWC) had particular '\n", - " 'difficulty in forecasting and even estimating the '\n", - " 'strength of John. John weakened considerably after '\n", - " 'entering the Western North Pacific, and, before '\n", - " 'estimates were later revised, four consecutive '\n", - " 'advisories were issued that declared John a tropical '\n", - " 'depression. Each of these advisories called for '\n", - " 'imminent dissipation. As John persisted and did not '\n", - " 'dissipate as the JTWC had predicted, it was upgraded to '\n", - " 'a minimal tropical storm in the next advisory.'},\n", - " 'score': 0.446436226,\n", - " 'values': []},\n", - " {'id': 'b38a1c1b709975f51326970267e67bb7',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:47:20.914703',\n", - " 'text': 'Kennedy was born in Brookline, Massachusetts and '\n", - " 'graduated from Harvard University in 1940 before '\n", - " 'joining the U.S. Naval Reserve the following year. '\n", - " 'During World War II, he commanded a series of PT boats '\n", - " 'in the Pacific theater and earned the Navy and Marine '\n", - " 'Corps Medal for his service. After the war, Kennedy '\n", - " 'represented the 11th congressional district of '\n", - " 'Massachusetts in the U.S. House of Representatives from '\n", - " '1947 to 1953. He was subsequently elected to the U.S. '\n", - " 'Senate and served as the junior Senator from '\n", - " 'Massachusetts from 1953 to 1960. While in the Senate, '\n", - " 'he published his book Profiles in Courage, which won a '\n", - " 'Pulitzer Prize for Biography. In the 1960 presidential '\n", - " 'election, Kennedy narrowly defeated Republican opponent '\n", - " 'Richard Nixon, who was the incumbent vice president. At '\n", - " 'age 43, he became the second-youngest person to serve '\n", - " 'as president, the youngest person to be elected as U.S. '\n", - " 'president, as well as the only Roman Catholic to occupy '\n", - " 'that office. He was also the first president to have '\n", - " \"served in the U.S. Navy.Kennedy's time in office was \"\n", - " 'marked by high tensions with communist states in the '\n", - " 'Cold War. He increased the number of American military '\n", - " 'advisers in South Vietnam by a factor of 18 over '\n", - " 'President Dwight D. Eisenhower. In April 1961, he '\n", - " 'authorized a failed joint-CIA attempt to overthrow the '\n", - " 'Cuban government of Fidel Castro in the Bay of Pigs '\n", - " 'Invasion. He subsequently rejected Operation Northwoods '\n", - " 'plans by the Joint Chiefs of Staff to orchestrate false '\n", - " 'flag attacks on American soil in order to gain public '\n", - " 'approval for a war against Cuba. However his '\n", - " 'administration continued to plan for an invasion of '\n", - " 'Cuba in the summer of 1962. In October 1962, U.S. spy '\n", - " 'planes discovered that Soviet missile bases had been '\n", - " 'deployed in Cuba; the resulting period of tensions, '\n", - " 'termed the Cuban Missile Crisis, nearly resulted in the '\n", - " 'breakout of a global thermonuclear conflict. '\n", - " 'Domestically, Kennedy presided over the establishment '\n", - " 'of the Peace Corps and supported the civil rights '\n", - " 'movement, but was only somewhat successful in passing '\n", - " 'his New Frontier domestic policies.'},\n", - " 'score': 0.282856554,\n", - " 'values': []},\n", - " {'id': '413ce9a56e49699b6057daed5a44f81e',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:45:56.884103',\n", - " 'text': 'On August 29, an upper-level low absorbed the remnants '\n", - " 'of Hurricane Lane to the west-northwest of Hawaii. The '\n", - " 'storm was assigned the designation 96C by the United '\n", - " 'States Naval Research Laboratory (NRL). Traversing an '\n", - " 'area with sea surface temperatures 2 °C (3.6 °F) '\n", - " 'above-normal, the system coalesced into a subtropical '\n", - " 'storm by August 31. On September 2, the system reached '\n", - " 'its peak intensity and began to display an eye. '\n", - " 'Afterward, the system gradually began to weaken, while '\n", - " 'accelerating northward into colder waters. On September '\n", - " '3, the system weakened below tropical depression '\n", - " 'intensity, back into an extratropical low. On September '\n", - " '4, the system was absorbed by a larger extratropical '\n", - " 'storm in the Bering Sea.'},\n", - " 'score': 0.278695226,\n", - " 'values': []},\n", - " {'id': 'c31dd2e7e6cbe2ab80361f321e57d725',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:47:15.201237',\n", - " 'text': 'Hurricane Norman originated from a broad area of low '\n", - " 'pressure that formed several hundred miles '\n", - " 'south-southwest of Acapulco, Mexico on August 25. '\n", - " 'Traveling west-northwest, the system coalesced into a '\n", - " 'tropical depression by at 15:00 UTC on August 28 while '\n", - " 'situated approximately 420 miles (675 km) '\n", - " 'south-southwest of the southern tip of Baja California. '\n", - " 'A subtropical ridge steered the system west for several '\n", - " 'days. Early on August 29, the depression intensified '\n", - " 'into a tropical storm and received the name Norman. '\n", - " 'Favorable environmental conditions enabled quick '\n", - " 'intensification, and the system achieved hurricane '\n", - " 'strength early on August 30. Rapid intensification '\n", - " 'ensued throughout the day, culminating with Norman '\n", - " 'attaining its peak intensity at 15:00 UTC, with '\n", - " 'sustained winds of 150 mph (240 km/h) and a central '\n", - " 'pressure of 937 mbar (27.67 inHg). During a 24-hour '\n", - " \"period, the hurricane's winds increased by 80 mph (130 \"\n", - " 'km/h), the largest such increase since Hurricane '\n", - " 'Patricia in 2015.The combination of an eyewall '\n", - " 'replacement cycle and increasing wind shear induced '\n", - " 'weakening beginning on August 31. At 03:00 UTC on '\n", - " 'August 31, Norman turned to the west-southwest due to a '\n", - " 'deep-layer ridge to the north. Norman fell to Category '\n", - " '2 status for a period, before unexpectedly rapidly '\n", - " 'intensifying back to a Category 4 hurricane on '\n", - " 'September 2. The storm attained a secondary peak with '\n", - " 'winds of 140 mph (235 km/h) and a pressure of 947 mbar '\n", - " '(28.00 inHg). Initially proving resilient to adverse '\n", - " 'conditions, Norman succumbed to increasing wind shear '\n", - " 'and lower sea surface temperatures on September 3. Its '\n", - " 'central dense overcast warmed and its eye filled. At '\n", - " 'the same time, Norman took a turn to a more westerly '\n", - " 'direction. On September 4, the hurricane crossed west '\n", - " 'of 140°W, and warning responsibility shifted to the '\n", - " 'Central Pacific Hurricane Center (CPHC). On the next '\n", - " 'day, another bout of unexpected intensification ensued '\n", - " 'and Norman regained major hurricane status. However, '\n", - " 'wind shear increased once again thereafter, and Norman '\n", - " 'weakened into a Category 1 hurricane on September 6. On '\n", - " 'September 7, Norman weakened further to a tropical '\n", - " 'storm as it began to lose its tropical characteristics. '\n", - " 'The CPHC issued its final advisory on Norman at 21:00 '\n", - " 'UTC on September 8, as it was rapidly becoming '\n", - " 'extratropical; Norman subsequently completed its '\n", - " 'extratropical transition on the next day.'},\n", - " 'score': 0.273503363,\n", - " 'values': []},\n", - " {'id': '7ac86164fe9f53c7ce92529fa5904f4e',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:26.299391',\n", - " 'text': 'On 18 August 1916, Kronprinz took part in an operation '\n", - " 'to bombard Sunderland. Admiral Scheer attempted a '\n", - " 'repeat of the original 31 May plan; the two serviceable '\n", - " 'German battlecruisers—Moltke and Von der Tann—supported '\n", - " 'by three dreadnoughts, were to bombard the coastal town '\n", - " 'of Sunderland in an attempt to draw out and destroy '\n", - " \"Beatty's battlecruisers. The rest of the fleet, \"\n", - " 'including Kronprinz, would trail behind and provide '\n", - " 'cover. The British were aware of the German plans and '\n", - " 'sortied the Grand Fleet to meet them. By 14:35, Admiral '\n", - " \"Scheer had been warned of the Grand Fleet's approach \"\n", - " 'and, unwilling to engage the whole of the Grand Fleet '\n", - " 'just eleven weeks after the decidedly close call at '\n", - " 'Jutland, turned his forces around and retreated to '\n", - " 'German ports.Kronprinz participated in two uneventful '\n", - " 'fleet operations, one a month prior on 16 July to the '\n", - " 'north of Helgoland, and one into the North Sea on 18–20 '\n", - " 'October. Kronprinz and the rest of III Squadron were '\n", - " 'sent to the Baltic directly afterward for training, '\n", - " 'which lasted until 2 November. Upon returning from the '\n", - " 'Baltic, Kronprinz and the rest of III Squadron were '\n", - " 'ordered to cover the retrieval of a pair of U-boats '\n", - " 'that were stranded on the Danish coast. On the return '\n", - " 'trip, on 5 November 1916, Kronprinz was torpedoed by '\n", - " 'the British submarine J1 near Horns Reef. The torpedo '\n", - " 'struck the ship beneath the forward-most gun turret and '\n", - " 'allowed approximately 250 metric tons (250 long tons; '\n", - " '280 short tons) of water into the ship. Kronprinz '\n", - " 'maintained her speed and reached port. The following '\n", - " 'day she was placed in drydock at the Imperial Dockyard '\n", - " 'in Wilhelmshaven for repairs, which lasted from 6 '\n", - " 'November to 4 December. During this period, Bernhard '\n", - " 'Rösing took command of the vessel.After returning to '\n", - " 'III Squadron, Kronprinz took part in squadron training '\n", - " 'in the Baltic before conducting defensive patrols in '\n", - " 'the German Bight. In early 1917, the ship became the '\n", - " 'flagship of the deputy commander of the squadron, at '\n", - " 'that time Rear Admiral Karl Seiferling. During training '\n", - " 'maneuvers on 5 March 1917, Kronprinz was accidentally '\n", - " 'rammed by her sister ship Grosser Kurfürst in the '\n", - " 'Heligoland Bight. The collision caused minor flooding '\n", - " 'in the area abreast of her forward superfiring turret; '\n", - " 'Kronprinz shipped some 600 t (590 long tons; 660 short '\n", - " 'tons) of water. She again went into the drydock in '\n", - " 'Wilhelmshaven, from 6 March to 14 May. On 11 September, '\n", - " 'Kronprinz was detached for training in the Baltic. She '\n", - " 'then joined the Special Unit for Operation Albion.'},\n", - " 'score': 0.269413084,\n", - " 'values': []},\n", - " {'id': '757483e06a54900810afc0ab86b020c4',\n", - " 'metadata': {'date_uploaded': '2024-02-04T15:46:52.938765',\n", - " 'text': 'Johnston was born and raised in Ontario, studying there '\n", - " 'before enrolling at Harvard University and later '\n", - " \"Cambridge and Queen's universities. He went on to work \"\n", - " 'as a professor at various post-secondary institutions '\n", - " 'in Canada, eventually serving administrative roles as '\n", - " 'dean of law at the University of Western Ontario, '\n", - " 'principal of McGill University, and president of the '\n", - " 'University of Waterloo. At the same time, Johnston '\n", - " 'involved himself with politics and public service, '\n", - " 'moderating political debates and chairing commissions '\n", - " 'in both the federal and provincial spheres, his most '\n", - " 'renowned position in that field being the chairmanship '\n", - " 'of the inquiry into the Airbus affair. He was in 2010 '\n", - " 'appointed as governor general by Queen Elizabeth II, on '\n", - " 'the recommendation of then Prime Minister of Canada '\n", - " 'Stephen Harper, to replace Michaëlle Jean as viceroy '\n", - " 'and he occupied the post until succeeded by Julie '\n", - " 'Payette in 2017. At the time, Johnston was '\n", - " 'predominantly praised as a worthy choice for the '\n", - " \"Queen's representative, though his appointment was \"\n", - " 'denounced by some Quebec sovereigntists.'},\n", - " 'score': 0.265240133,\n", - " 'values': []}],\n", - " 'top_re_rank_score': 0.998432}" + "cell_type": "code", + "execution_count": 10, + "id": "4c40d99a", + "metadata": { + "id": "4c40d99a" + }, + "outputs": [], + "source": [ + "texts = ['hi']" ] - }, - "execution_count": 64, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "query_result # the 2nd position has the right context \"The origins of Hurricane John were...\"" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "a63ec53e", - "metadata": { - "id": "a63ec53e" - }, - "outputs": [], - "source": [ - "test_sample = dataset['test']" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "21a3a147", - "metadata": { - "id": "21a3a147" - }, - "outputs": [], - "source": [ - "TOP_K=50" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b98413a3", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "b98413a3", - "outputId": "c6557744-6998-492f-db27-98b94ed7ec31", - "scrolled": true - }, - "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - " 9%|▊ | 100/1148 [02:39<27:57, 1.60s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.78\n", - "Accuracy with re-ranking: 0.84\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 17%|█▋ | 200/1148 [05:19<24:58, 1.58s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.765\n", - "Accuracy with re-ranking: 0.835\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 26%|██▌ | 300/1148 [07:59<22:08, 1.57s/it]" - ] + "cell_type": "code", + "execution_count": 11, + "id": "3e1b73f3", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3e1b73f3", + "outputId": "4bb8f973-72ad-4b18-f5d9-e090c88475cc" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ID: 49f68a5c8493ec2c0bf489821c21fc3b \n", + "LEN: 3072 \n", + "META: {'text': 'hi', 'date_uploaded': datetime.datetime(2025, 2, 9, 1, 36, 34, 26529)}\n" + ] + } + ], + "source": [ + "_id, embedding, metadata = prepare_for_pinecone(texts)[0]\n", + "\n", + "print('ID: ',_id, '\\nLEN: ', len(embedding), '\\nMETA:', metadata)" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.7666666666666667\n", - "Accuracy with re-ranking: 0.8166666666666667\n" - ] + "cell_type": "code", + "execution_count": 11, + "id": "b49debd5", + "metadata": { + "id": "b49debd5" + }, + "outputs": [], + "source": [] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 35%|███▍ | 400/1148 [12:10<19:30, 1.56s/it]" - ] + "cell_type": "code", + "execution_count": 12, + "id": "bf47aabd", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bf47aabd", + "outputId": "30615424-73cb-4f4e-9f54-67a42f05f095" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def upload_texts_to_pinecone(texts, namespace=NAMESPACE, batch_size=None, show_progress_bar=False):\n", + " # Call the prepare_for_pinecone function to prepare the input texts for indexing\n", + " total_upserted = 0\n", + " if not batch_size:\n", + " batch_size = len(texts)\n", + "\n", + " _range = range(0, len(texts), batch_size)\n", + " for i in tqdm(_range) if show_progress_bar else _range:\n", + " batch = texts[i: i + batch_size]\n", + " prepared_texts = prepare_for_pinecone(batch)\n", + "\n", + " # Use the upsert() method of the index object to upload the prepared texts to Pinecone\n", + " total_upserted += index.upsert(\n", + " vectors=prepared_texts,\n", + " namespace=namespace\n", + " )['upserted_count']\n", + "\n", + "\n", + " return total_upserted\n", + "\n", + "# Call the upload_texts_to_pinecone() function with the input texts\n", + "upload_texts_to_pinecone(texts)\n" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.7625\n", - "Accuracy with re-ranking: 0.825\n" - ] + "cell_type": "code", + "execution_count": 13, + "id": "V0XI6RAom-Ln", + "metadata": { + "id": "V0XI6RAom-Ln" + }, + "outputs": [], + "source": [ + "def query_from_pinecone(query, top_k=3, include_metadata=True):\n", + " # get embedding from THE SAME embedder as the documents\n", + " query_embedding = get_embedding(query, engine=ENGINE)\n", + "\n", + " return index.query(\n", + " vector=query_embedding,\n", + " top_k=top_k,\n", + " namespace=NAMESPACE,\n", + " include_metadata=include_metadata # gets the metadata (dates, text, etc)\n", + " ).get('matches')" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 44%|████▎ | 500/1148 [14:50<17:13, 1.59s/it]" - ] + "cell_type": "code", + "execution_count": 14, + "id": "84a0871f", + "metadata": { + "id": "84a0871f" + }, + "outputs": [], + "source": [ + "def delete_texts_from_pinecone(texts, namespace=NAMESPACE):\n", + " # Compute the hash (id) for each text\n", + " hashes = [hashlib.md5(text.encode()).hexdigest() for text in texts]\n", + "\n", + " # The ids parameter is used to specify the list of IDs (hashes) to delete\n", + " return index.delete(ids=hashes, namespace=namespace)" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.764\n", - "Accuracy with re-ranking: 0.834\n" - ] + "cell_type": "code", + "execution_count": 14, + "id": "a6a5172f-a0c6-4692-ab77-83321e141679", + "metadata": { + "id": "a6a5172f-a0c6-4692-ab77-83321e141679" + }, + "outputs": [], + "source": [] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 52%|█████▏ | 600/1148 [17:29<14:38, 1.60s/it]" - ] + "cell_type": "code", + "execution_count": 15, + "id": "4523cab6-0bc3-4791-9c73-7bd7d7e5858b", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4523cab6-0bc3-4791-9c73-7bd7d7e5858b", + "outputId": "5d3a7742-7dd8-49ac-da15-85a70b4ed6ef" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatasetDict({\n", + " test: Dataset({\n", + " features: ['id', 'title', 'context', 'question', 'answers'],\n", + " num_rows: 1148\n", + " })\n", + " train: Dataset({\n", + " features: ['id', 'title', 'context', 'question', 'answers'],\n", + " num_rows: 11590\n", + " })\n", + "})" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datasets import load_dataset\n", + "\n", + "dataset = load_dataset(\"xtreme\", \"MLQA.en.en\")\n", + "\n", + "# rename test -> train and val -> test (as we will use it in later in this chapter)\n", + "dataset['train'] = dataset['test']\n", + "dataset['test'] = dataset['validation']\n", + "del dataset['validation']\n", + "\n", + "dataset" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.7683333333333333\n", - "Accuracy with re-ranking: 0.8466666666666667\n" - ] + "cell_type": "code", + "execution_count": 16, + "id": "e622fbfa-2dde-4711-9c46-1390eb3430f9", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "e622fbfa-2dde-4711-9c46-1390eb3430f9", + "outputId": "2b2df7a8-15e1-4285-95a5-627fb6249d31" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "({'id': 'a4968ca8a18de16aa3859be760e43dbd3af3fce9',\n", + " 'title': 'Area 51',\n", + " 'context': 'In 1994, five unnamed civilian contractors and the widows of contractors Walter Kasza and Robert Frost sued the USAF and the United States Environmental Protection Agency. Their suit, in which they were represented by George Washington University law professor Jonathan Turley, alleged they had been present when large quantities of unknown chemicals had been burned in open pits and trenches at Groom. Biopsies taken from the complainants were analyzed by Rutgers University biochemists, who found high levels of dioxin, dibenzofuran, and trichloroethylene in their body fat. The complainants alleged they had sustained skin, liver, and respiratory injuries due to their work at Groom, and that this had contributed to the deaths of Frost and Kasza. The suit sought compensation for the injuries they had sustained, claiming the USAF had illegally handled toxic materials, and that the EPA had failed in its duty to enforce the Resource Conservation and Recovery Act (which governs handling of dangerous materials). They also sought detailed information about the chemicals to which they were allegedly exposed, hoping this would facilitate the medical treatment of survivors. Congressman Lee H. Hamilton, former chairman of the House Intelligence Committee, told 60 Minutes reporter Lesley Stahl, \"The Air Force is classifying all information about Area 51 in order to protect themselves from a lawsuit.\"',\n", + " 'question': 'Who analyzed the biopsies?',\n", + " 'answers': {'answer_start': [457],\n", + " 'text': ['Rutgers University biochemists']}},\n", + " {'id': 'f251ea56c4f1aa1df270137f7e6d89c0cc1b6ef4',\n", + " 'title': 'Area 51',\n", + " 'context': 'In 1994, five unnamed civilian contractors and the widows of contractors Walter Kasza and Robert Frost sued the USAF and the United States Environmental Protection Agency. Their suit, in which they were represented by George Washington University law professor Jonathan Turley, alleged they had been present when large quantities of unknown chemicals had been burned in open pits and trenches at Groom. Biopsies taken from the complainants were analyzed by Rutgers University biochemists, who found high levels of dioxin, dibenzofuran, and trichloroethylene in their body fat. The complainants alleged they had sustained skin, liver, and respiratory injuries due to their work at Groom, and that this had contributed to the deaths of Frost and Kasza. The suit sought compensation for the injuries they had sustained, claiming the USAF had illegally handled toxic materials, and that the EPA had failed in its duty to enforce the Resource Conservation and Recovery Act (which governs handling of dangerous materials). They also sought detailed information about the chemicals to which they were allegedly exposed, hoping this would facilitate the medical treatment of survivors. Congressman Lee H. Hamilton, former chairman of the House Intelligence Committee, told 60 Minutes reporter Lesley Stahl, \"The Air Force is classifying all information about Area 51 in order to protect themselves from a lawsuit.\"',\n", + " 'question': 'who represented robert frost and walter kasza in their suit?',\n", + " 'answers': {'answer_start': [218],\n", + " 'text': ['George Washington University law professor Jonathan Turley']}})" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset['train'][0], dataset['train'][1]" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 61%|██████ | 700/1148 [20:08<12:14, 1.64s/it]" - ] + "cell_type": "code", + "execution_count": 17, + "id": "0221343a", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0221343a", + "outputId": "8b46cfc0-60da-488d-8b39-d4cb2cbe589f" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 31/31 [00:40<00:00, 1.30s/it]\n" + ] + } + ], + "source": [ + "unique_passages = list(set(dataset['test']['context']))\n", + "for idx in tqdm(range(0, len(unique_passages), 32)):\n", + " passages = unique_passages[idx:idx + 32]\n", + " upload_texts_to_pinecone(passages)\n" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.7471428571428571\n", - "Accuracy with re-ranking: 0.8285714285714286\n" - ] + "cell_type": "code", + "execution_count": 18, + "id": "796a7c80-7149-430e-b22c-9926c0d1daee", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "796a7c80-7149-430e-b22c-9926c0d1daee", + "outputId": "32378891-b767-455e-9251-e5aa2518d5cd" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "978" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(unique_passages)" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 70%|██████▉ | 800/1148 [22:46<09:11, 1.58s/it]" - ] + "cell_type": "code", + "execution_count": 19, + "id": "33yf7QrWtwt-", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "33yf7QrWtwt-", + "outputId": "69eb488e-f99b-45ca-dcc2-d2f9bc8b8586" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'dimension': 3072,\n", + " 'index_fullness': 0.0,\n", + " 'namespaces': {'default': {'vector_count': 979}},\n", + " 'total_vector_count': 979}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "index.describe_index_stats()" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.74875\n", - "Accuracy with re-ranking: 0.82125\n" - ] + "cell_type": "code", + "execution_count": 20, + "id": "432b021b-2cdb-4d73-a2c6-f2e0d1eb5ffc", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "432b021b-2cdb-4d73-a2c6-f2e0d1eb5ffc", + "outputId": "840217b3-de53-4f7c-de10-09afc58ba15c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': '569666f4dc3983dab5624e989212c1d9d0cd1798',\n", + " 'title': 'Pappataci fever',\n", + " 'context': 'Pappataci fever is prevalent in the subtropical zone of the Eastern Hemisphere between 20°N and 45°N, particularly in Southern Europe, North Africa, the Balkans, Eastern Mediterranean, Iraq, Iran, Pakistan, Afghanistan and India.The disease is transmitted by the bites of phlebotomine sandflies of the Genus Phlebotomus, in particular, Phlebotomus papatasi, Phlebotomus perniciosus and Phlebotomus perfiliewi. The sandfly becomes infected when biting an infected human in the period between 48 hours before the onset of fever and 24 hours after the end of the fever, and remains infected for its lifetime. Besides this horizontal virus transmission from man to sandfly, the virus can be transmitted in insects transovarially, from an infected female sandfly to its offspring.Pappataci fever is seldom recognised in endemic populations because it is mixed with other febrile illnesses of childhood, but it is more well-known among immigrants and military personnel from non-endemic regions.',\n", + " 'question': 'Does an infection for Sandflies go away over time?',\n", + " 'answers': {'answer_start': [571],\n", + " 'text': ['remains infected for its lifetime']}}" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset['test'][0]" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 78%|███████▊ | 900/1148 [25:26<06:36, 1.60s/it]" - ] + "cell_type": "code", + "execution_count": 21, + "id": "36e13316-2295-4a72-8f2a-b459f0e7826a", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "36e13316-2295-4a72-8f2a-b459f0e7826a", + "outputId": "0859f356-06d6-420d-e605-33530ef96497", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': '2f90090e21f19450887d5f3ff781e541',\n", + " 'metadata': {'date_uploaded': '2025-02-09T01:36:44.647948',\n", + " 'text': 'Pappataci fever is prevalent in the subtropical zone of '\n", + " 'the Eastern Hemisphere between 20°N and 45°N, '\n", + " 'particularly in Southern Europe, North Africa, the '\n", + " 'Balkans, Eastern Mediterranean, Iraq, Iran, Pakistan, '\n", + " 'Afghanistan and India.The disease is transmitted by the '\n", + " 'bites of phlebotomine sandflies of the Genus '\n", + " 'Phlebotomus, in particular, Phlebotomus papatasi, '\n", + " 'Phlebotomus perniciosus and Phlebotomus perfiliewi. The '\n", + " 'sandfly becomes infected when biting an infected human '\n", + " 'in the period between 48 hours before the onset of '\n", + " 'fever and 24 hours after the end of the fever, and '\n", + " 'remains infected for its lifetime. Besides this '\n", + " 'horizontal virus transmission from man to sandfly, the '\n", + " 'virus can be transmitted in insects transovarially, '\n", + " 'from an infected female sandfly to its '\n", + " 'offspring.Pappataci fever is seldom recognised in '\n", + " 'endemic populations because it is mixed with other '\n", + " 'febrile illnesses of childhood, but it is more '\n", + " 'well-known among immigrants and military personnel from '\n", + " 'non-endemic regions.'},\n", + " 'score': 0.436177373,\n", + " 'values': []},\n", + " {'id': '00661b04eb84a4664717245513ea30cd',\n", + " 'metadata': {'date_uploaded': '2025-02-09T01:37:00.096215',\n", + " 'text': 'Paratyphoid fever, also known simply as paratyphoid, is '\n", + " 'a bacterial infection caused by one of the three types '\n", + " 'of Salmonella enterica. Symptoms usually begin 6–30 '\n", + " 'days after exposure and are the same as those of '\n", + " 'typhoid fever. Often, a gradual onset of a high fever '\n", + " 'occurs over several days. Weakness, loss of appetite, '\n", + " 'and headaches also commonly occur. Some people develop '\n", + " 'a skin rash with rose-colored spots. Without treatment, '\n", + " 'symptoms may last weeks or months. Other people may '\n", + " 'carry the bacteria without being affected; however, '\n", + " 'they are still able to spread the disease to others. '\n", + " 'Both typhoid and paratyphoid are of similar severity. '\n", + " 'Paratyphoid and typhoid fever are types of enteric '\n", + " 'fever.Paratyphoid is caused by the bacterium Salmonella '\n", + " 'enterica of the serotypes Paratyphi A, Paratyphi B, or '\n", + " 'Paratyphi C growing in the intestines and blood. They '\n", + " 'are usually spread by eating or drinking food or water '\n", + " 'contaminated with the feces of an infected person. They '\n", + " 'may occur when a person who prepares food is infected. '\n", + " 'Risk factors include poor sanitation as is found among '\n", + " 'poor crowded populations. Occasionally, they may be '\n", + " 'transmitted by sex. Humans are the only animals '\n", + " 'infected. Diagnosis may be based on symptoms and '\n", + " 'confirmed by either culturing the bacteria or detecting '\n", + " 'the bacterial DNA in the blood, stool, or bone marrow. '\n", + " 'Culturing the bacteria can be difficult. Bone-marrow '\n", + " 'testing is the most accurate. Symptoms are similar to '\n", + " 'that of many other infectious diseases. Typhus is an '\n", + " 'unrelated disease.While no vaccine is available '\n", + " 'specifically for paratyphoid, the typhoid vaccine may '\n", + " 'provide some benefit. Prevention includes drinking '\n", + " 'clean water, better sanitation, and better handwashing. '\n", + " 'Treatment of the disease is with antibiotics such as '\n", + " 'azithromycin. Resistance to a number of other '\n", + " 'previously effective antibiotics is common.Paratyphoid '\n", + " 'affects about six million people a year. It is most '\n", + " 'common in parts of Asia and rare in the developed '\n", + " 'world. Most cases are due to Paratyphi A rather than '\n", + " 'Paratyphi B or C. In 2015, paratyphoid fever resulted '\n", + " 'in about 29,200 deaths, down from 63,000 deaths in '\n", + " '1990. The risk of death is between 10 and 15% without '\n", + " 'treatment, while with treatment, it may be less than '\n", + " '1%.'},\n", + " 'score': 0.325614125,\n", + " 'values': []},\n", + " {'id': '24d322a7ce10e9e2fa12fcf54f50c651',\n", + " 'metadata': {'date_uploaded': '2025-02-09T01:37:14.510155',\n", + " 'text': 'Although Aronson isolated this mycobacterium in 1926 '\n", + " 'from a fish, it was not until 1951 that it was found to '\n", + " 'be the cause of human disease by Linell and Norden. '\n", + " 'Large outbreaks of infection due to this atypical '\n", + " 'mycobacterium have been described in association with '\n", + " 'swimming. Infections related to swimming pools have now '\n", + " 'drastically fallen due to the improvements in the '\n", + " 'construction and maintenance of these facilities.The '\n", + " 'first case of M. marinum infection associated with a '\n", + " \"fish-tank ('fish-tank granuloma') was reported in 1962 \"\n", + " 'by Swift and Cohen. M. marinum infection may be an '\n", + " 'occupational hazard for certain professions such as pet '\n", + " 'shop workers, but most infections occur in fish '\n", + " 'fanciers who keep an aquarium at home. Although '\n", + " 'infection may be caused by direct injury from the fish '\n", + " 'fins or bites, most are acquired during the handling of '\n", + " 'the aquariums such as cleaning or changing the water. '\n", + " 'Indirect infection has also been described related to a '\n", + " 'child’s bathing utensils that had been used to clean a '\n", + " 'fish tank. Due to an increased awareness of the disease '\n", + " 'and improved isolation methods, more and more cases are '\n", + " 'being recognized and reported worldwide.'},\n", + " 'score': 0.3016873,\n", + " 'values': []}]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_from_pinecone('Does an infection for Sandflies go away over time?')" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.7388888888888889\n", - "Accuracy with re-ranking: 0.8188888888888889\n" - ] + "cell_type": "code", + "execution_count": 21, + "id": "5b92699e-2d92-4083-9508-ae47355c9c1a", + "metadata": { + "id": "5b92699e-2d92-4083-9508-ae47355c9c1a" + }, + "outputs": [], + "source": [] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 87%|████████▋ | 1000/1148 [28:05<04:05, 1.66s/it]" - ] + "cell_type": "code", + "execution_count": 21, + "id": "206ab80e-f4b4-48d2-8aef-b522024f6658", + "metadata": { + "id": "206ab80e-f4b4-48d2-8aef-b522024f6658" + }, + "outputs": [], + "source": [] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.741\n", - "Accuracy with re-ranking: 0.819\n" - ] + "cell_type": "markdown", + "id": "7e180a25-37e4-47a8-8ae8-a49f39c106ab", + "metadata": { + "id": "7e180a25-37e4-47a8-8ae8-a49f39c106ab" + }, + "source": [ + "# Part 2: Making results more relevant with a cross-encoder" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 96%|█████████▌| 1100/1148 [30:45<01:16, 1.60s/it]" - ] + "cell_type": "code", + "execution_count": 22, + "id": "074fab6f", + "metadata": { + "id": "074fab6f" + }, + "outputs": [], + "source": [ + "# if you didn't import before\n", + "\n", + "from sentence_transformers.cross_encoder import CrossEncoder\n", + "import numpy as np\n", + "from torch import nn" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.7509090909090909\n", - "Accuracy with re-ranking: 0.8272727272727273\n" - ] + "cell_type": "code", + "execution_count": 23, + "id": "414fc2d5", + "metadata": { + "id": "414fc2d5" + }, + "outputs": [], + "source": [ + "from copy import copy\n", + "\n", + "def get_results_from_pinecone(query, top_k=3, re_rank_model=None, verbose=True, correct_hash=None):\n", + "\n", + " results_from_pinecone = query_from_pinecone(query, top_k=top_k)\n", + "\n", + " if not results_from_pinecone:\n", + " return []\n", + "\n", + " if verbose:\n", + " print(\"Query:\", query)\n", + "\n", + "\n", + " final_results = []\n", + "\n", + " retrieved_correct_position, reranked_correct_position = None, None\n", + " for idx, result_from_pinecone in enumerate(results_from_pinecone):\n", + " if correct_hash and result_from_pinecone['id'] == correct_hash:\n", + " retrieved_correct_position = idx\n", + "\n", + " if re_rank_model is not None:\n", + " if verbose:\n", + " print('Document ID (Hash)\\t\\tRetrieval Score\\tCE Score\\tText')\n", + "\n", + " sentence_combinations = [[query, result_from_pinecone['metadata']['text']] for result_from_pinecone in results_from_pinecone]\n", + "\n", + " # Compute the similarity scores for these combinations\n", + " similarity_scores = re_rank_model.predict(sentence_combinations, activation_fct=nn.Sigmoid())\n", + "\n", + " # Sort the scores in decreasing order\n", + " sim_scores_argsort = list(reversed(np.argsort(similarity_scores)))\n", + " sim_scores_sort = list(reversed(np.sort(similarity_scores)))\n", + " top_re_rank_score = sim_scores_sort[0]\n", + "\n", + " # Print the scores\n", + " # print(list(zip(sim_scores_argsort, sim_scores_sort)))\n", + " for idx, _ in enumerate(sim_scores_argsort):\n", + " result_from_pinecone = results_from_pinecone[_]\n", + " if correct_hash and retrieved_correct_position == _:\n", + " reranked_correct_position = idx\n", + " final_results.append({'score': similarity_scores[idx], 'id': result_from_pinecone['id'], 'metadata': result_from_pinecone['metadata']})\n", + " if verbose:\n", + " print(f\"{result_from_pinecone['id']}\\t{result_from_pinecone['score']:.2f}\\t{similarity_scores[idx]:.6f}\\t{result_from_pinecone['metadata']['text'][:50]}\")\n", + " return {'final_results': final_results, 'retrieved_correct_position': retrieved_correct_position, 'reranked_correct_position': reranked_correct_position, 'results_from_pinecone': results_from_pinecone, 'top_re_rank_score': top_re_rank_score}\n", + "\n", + " if verbose:\n", + " print('Document ID (Hash)\\t\\tRetrieval Score\\tText')\n", + " for result_from_pinecone in results_from_pinecone:\n", + " final_results.append(result_from_pinecone)\n", + " if verbose:\n", + " print(f\"{result_from_pinecone['id']}\\t{result_from_pinecone['score']:.2f}\\t{result_from_pinecone['metadata']['text'][:50]}\")\n", + "\n", + " return {'final_results': final_results, 'retrieved_correct_position': retrieved_correct_position, 'reranked_correct_position': reranked_correct_position}" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1148/1148 [32:01<00:00, 1.67s/it]\n" - ] - } - ], - "source": [ - "logger.setLevel(logging.CRITICAL)\n", - "\n", - "predictions = []\n", - "\n", - "for question in tqdm(test_sample['question']):\n", - " r = get_results_from_pinecone(\n", - " question, top_k=TOP_K, re_rank_model=cross_encoder, correct_hash=q_to_hash[question],\n", - " verbose=False\n", - " )\n", - "\n", - " r['retrieved_correct_position'], r['reranked_correct_position']\n", - " predictions.append(r)\n", - " if len(predictions) % 100 == 0:\n", - " retrieved_accuracy = sum([_['retrieved_correct_position'] == 0 for _ in predictions])/len(predictions)\n", - " re_ranked_accuracy = sum([_['reranked_correct_position'] == 0 for _ in predictions])/len(predictions)\n", - "\n", - " print(f'Accuracy without re-ranking: {retrieved_accuracy}')\n", - " print(f'Accuracy with re-ranking: {re_ranked_accuracy}')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2ff94e7-0b78-40ca-8cf0-42e345439e8b", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cell_type": "code", + "execution_count": 23, + "id": "_GPhx4MMIVqU", + "metadata": { + "id": "_GPhx4MMIVqU" + }, + "outputs": [], + "source": [] }, - "id": "a2ff94e7-0b78-40ca-8cf0-42e345439e8b", - "outputId": "a2cd7fd3-ae14-43dd-cab8-559f7878fd6f" - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.7543554006968641\n", - "Accuracy with re-ranking: 0.8327526132404182\n" - ] - } - ], - "source": [ - "retrieved_accuracy = sum([_['retrieved_correct_position'] == 0 for _ in predictions])/len(predictions)\n", - "re_ranked_accuracy = sum([_['reranked_correct_position'] == 0 for _ in predictions])/len(predictions)\n", - "\n", - "print(f'Accuracy without re-ranking: {retrieved_accuracy}')\n", - "print(f'Accuracy with re-ranking: {re_ranked_accuracy}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fT4FuVcf9ONw", - "metadata": { - "id": "fT4FuVcf9ONw" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6649afac-9434-440b-b332-de9149a7ebfa", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 + "cell_type": "code", + "execution_count": 24, + "id": "b25a8a8f-e873-43e7-9f56-b57bdf328416", + "metadata": { + "id": "b25a8a8f-e873-43e7-9f56-b57bdf328416" + }, + "outputs": [], + "source": [ + "# Pre-trained cross encoder\n", + "cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2', num_labels=1)\n", + "\n", + "q_to_hash = {data['question']: my_hash(data['context']) for data in dataset['test']}" + ] }, - "id": "6649afac-9434-440b-b332-de9149a7ebfa", - "outputId": "17b2e526-cd7e-409e-e0f3-2ae4afd29160" - }, - "outputs": [ { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
final_resultsretrieved_correct_positionreranked_correct_positionresults_from_pinecone
0{'final_results': [{'score': 0.8746502, 'id': ...0.00.0[{'id': '2f90090e21f19450887d5f3ff781e541',\n", - " '...
1{'final_results': [{'score': 0.8746502, 'id': ...0.00.0[{'id': '49201636ad4102735125e146c0dbafa4',\n", - " '...
2{'final_results': [{'score': 0.8746502, 'id': ...0.00.0[{'id': '80d92494d2b06f341842f1855d2938cf',\n", - " '...
3{'final_results': [{'score': 0.8746502, 'id': ...0.02.0[{'id': 'e3fd54f33b021ea3cf88b438fefcada7',\n", - " '...
4{'final_results': [{'score': 0.8746502, 'id': ...0.00.0[{'id': '37ed40c20d4e1b9cc8d6cc27a4d0fff3',\n", - " '...
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "cell_type": "code", + "execution_count": 25, + "id": "vptFN0wPw0uS", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vptFN0wPw0uS", + "outputId": "b2fb5f17-7c9d-4ffb-ff00-7bf532fa683a" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1148" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } ], - "text/plain": [ - " final_results \\\n", - "0 {'final_results': [{'score': 0.8746502, 'id': ... \n", - "1 {'final_results': [{'score': 0.8746502, 'id': ... \n", - "2 {'final_results': [{'score': 0.8746502, 'id': ... \n", - "3 {'final_results': [{'score': 0.8746502, 'id': ... \n", - "4 {'final_results': [{'score': 0.8746502, 'id': ... \n", - "\n", - " retrieved_correct_position reranked_correct_position \\\n", - "0 0.0 0.0 \n", - "1 0.0 0.0 \n", - "2 0.0 0.0 \n", - "3 0.0 2.0 \n", - "4 0.0 0.0 \n", - "\n", - " results_from_pinecone \n", - "0 [{'id': '2f90090e21f19450887d5f3ff781e541',\n", - " '... \n", - "1 [{'id': '49201636ad4102735125e146c0dbafa4',\n", - " '... \n", - "2 [{'id': '80d92494d2b06f341842f1855d2938cf',\n", - " '... \n", - "3 [{'id': 'e3fd54f33b021ea3cf88b438fefcada7',\n", - " '... \n", - "4 [{'id': '37ed40c20d4e1b9cc8d6cc27a4d0fff3',\n", - " '... " + "source": [ + "unique_inputs = list(set(dataset['test']['question']))\n", + "len(unique_inputs)" ] - }, - "execution_count": 285, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "predictions_df = pd.DataFrame(predictions)\n", - "predictions_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "831397d3-606a-4c7f-b77f-6ee85bd24f7e", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "831397d3-606a-4c7f-b77f-6ee85bd24f7e", - "outputId": "041c1a5c-cdef-4668-bb59-2700997c67ab" - }, - "outputs": [ { - "data": { - "text/plain": [ - "retrieved_correct_position 1.311052\n", - "reranked_correct_position 0.682709\n", - "dtype: float64" + "cell_type": "code", + "execution_count": 26, + "id": "dc56237c-f9d6-4b3d-a8ac-31098cf4f904", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dc56237c-f9d6-4b3d-a8ac-31098cf4f904", + "outputId": "5270a095-bb39-4c6e-8da7-bb771c6eda7d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "What was the year range?\n", + "Of Natures Obvious Laws & Processes in Vegetation (unpublished, c. 1671–75)\n" + ] + } + ], + "source": [ + "query = unique_inputs[0]\n", + "print(query)\n", + "\n", + "for t in dataset['test']:\n", + " if t['question'] == query:\n", + " print(t['context'])" ] - }, - "execution_count": 288, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "predictions_df[['retrieved_correct_position', 'reranked_correct_position']].mean() # lower is better" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eyv-6pPIEy0r", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "eyv-6pPIEy0r", - "outputId": "4d82a213-bd82-44e4-d933-8d39870c2f4d" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1 866 956\n", - "3 1009 1057\n", - "5 1048 1077\n", - "10 1072 1094\n", - "25 1108 1120\n", - "50 1122 1122\n" - ] - } - ], - "source": [ - "# do recall @ 1, 3, 5, 10\n", - "for k in (1, 3, 5, 10, 25, 50):\n", - " embedding_only_recall = predictions_df[predictions_df['retrieved_correct_position'] < k].shape[0]\n", - " reranked_recall = predictions_df[predictions_df['reranked_correct_position'] < k].shape[0]\n", - " print(k, embedding_only_recall, reranked_recall)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bd9aef05-e271-41e0-a8ce-88b6c81bed4c", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "3f14809e", - "metadata": { - "id": "3f14809e" - }, - "source": [ - "## OPEN SOURCE ALTERNATIVE TO EMBEDDING" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bfa9c8c7-4d9c-4217-b226-bc376394716d", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "8ed58d3a-bed5-4f70-a4d6-3d624ab98868", - "metadata": {}, - "outputs": [], - "source": [ - "from sentence_transformers import SentenceTransformer" - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "id": "99138055", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 67, - "referenced_widgets": [ - "c7800a5a8f1b4c3fa541df6b5ba6281f", - "9d726c5753564258bbe1c281fa8bfefa", - "ea3f511248734bbab197277b68346826", - "3838d1bfccb5402d91ecf96f51f9c2d7", - "ecc8ed6e869d4b01b7905ca951d0068c", - "66f8508878e94e4296e2ffb009afdb75", - "999f219c7c9449f0a48feb59eddd23eb", - "7ab68b0af00248f5bbc15a39bbf38166", - "b6ecc1cc5b2e4332b56fdc1614823d3e", - "e732511315484861b39d0efb86b9cfb5", - "3df968e9ff654de7bfc95f08c88a99c6" - ] }, - "id": "99138055", - "outputId": "2414eff0-be1c-44fb-be4c-a77d11ae4c57" - }, - "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/homebrew/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "# load up our open source embedding model\n", - "bi_encoder = SentenceTransformer(\"sentence-transformers/all-mpnet-base-v2\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a1140c18", - "metadata": { - "id": "a1140c18" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 69, - "id": "be37c48b", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 49, - "referenced_widgets": [ - "0caf250cdae643479160eedc708ffeea", - "fa6089718c574ee19bd3c378e356ea6b", - "f78b642ffb784f9097e0cbf97c44b9bd", - "7c322a232739486a937883ac787f8750", - "1bc5237b0db94f4aa124c8c79a891955", - "2c83393c2b7745889765d8efb4890763", - "6854fe2978954c7b9e30202a6ea34b57", - "4f4986d008434fc9912c496636b40b38", - "348701c9256e4eb48b8ffb0d9c0d6c83", - "11e5c44fa73c4d5d88561036f2a0a070", - "9781773e5d4d4e0ab8e3ff3140bd0983" - ] - }, - "id": "be37c48b", - "outputId": "aeec917e-853f-4b24-facc-c8c42468f3fd" - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "8aff0c10ce584f27b0989168c1c302e9", - "version_major": 2, - "version_minor": 0 + "cell_type": "code", + "execution_count": 27, + "id": "4b0da5ac-0345-45e2-9913-188590a31522", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4b0da5ac-0345-45e2-9913-188590a31522", + "outputId": "5edf7712-a789-4974-e115-ba78a8877dbb" }, - "text/plain": [ - "Batches: 0%| | 0/36 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
retrieved_correct_positionreranked_correct_position
00.00.0
10.00.0
20.00.0
31.00.0
40.00.0
\n", - "" + "cell_type": "code", + "execution_count": 28, + "id": "dfbb3b3c-9baa-4298-abd8-0e7b75dd24ec", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dfbb3b3c-9baa-4298-abd8-0e7b75dd24ec", + "outputId": "df6f1cdd-1779-499b-fd66-91e1fd314860", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'final_results': [{'score': 0.00039841508,\n", + " 'id': '7760dbdfeaf1828b780539812180a8a3',\n", + " 'metadata': {'date_uploaded': '2025-02-09T01:36:58.059608',\n", + " 'text': \"In the middle of the 18th century, a series of colonial conflicts began between France and Britain, which ultimately resulted in the destruction of most of the first French colonial empire and the near-complete expulsion of France from the Americas. These wars were the War of the Austrian Succession (1740–1748), the Seven Years' War (1756–1763), the American Revolution (1765–1783), the French Revolutionary Wars (1793–1802) and the Napoleonic Wars (1803–1815). It may even be seen further back in time to the first of the French and Indian Wars. This cyclic conflict is sometimes known as the Second Hundred Years' War.\"}},\n", + " {'score': 0.0002454633,\n", + " 'id': '517f23b9677bddd92be1e469c9182d88',\n", + " 'metadata': {'date_uploaded': '2025-02-09T01:37:12.088736',\n", + " 'text': 'From 1972, the National Park Service began allowing natural fires in Yellowstone to burn under controlled conditions. Fires of this type were referred to as prescribed natural fires. Between 1972 and 1987, a total of 235 prescribed natural fires burned a relatively small 33,759 acres (137 km2) under the directives of the new policy. Of these, only 15 spread to more than 100 acres (0.4 km2). The five years prior to 1988 were much wetter than normal and this may have reduced the area of the fires during that period. The prescribed natural fire policy appeared to be an effective way to manage fires, especially in the Yellowstone region.'}}],\n", + " 'retrieved_correct_position': None,\n", + " 'reranked_correct_position': None,\n", + " 'results_from_pinecone': [{'id': '7760dbdfeaf1828b780539812180a8a3',\n", + " 'metadata': {'date_uploaded': '2025-02-09T01:36:58.059608',\n", + " 'text': 'In the middle of the 18th century, a series of colonial '\n", + " 'conflicts began between France and Britain, which '\n", + " 'ultimately resulted in the destruction of most of the '\n", + " 'first French colonial empire and the near-complete '\n", + " 'expulsion of France from the Americas. These wars were '\n", + " 'the War of the Austrian Succession (1740–1748), the '\n", + " \"Seven Years' War (1756–1763), the American Revolution \"\n", + " '(1765–1783), the French Revolutionary Wars (1793–1802) '\n", + " 'and the Napoleonic Wars (1803–1815). It may even be '\n", + " 'seen further back in time to the first of the French '\n", + " 'and Indian Wars. This cyclic conflict is sometimes '\n", + " \"known as the Second Hundred Years' War.\"},\n", + " 'score': 0.270442665,\n", + " 'values': []},\n", + " {'id': '517f23b9677bddd92be1e469c9182d88',\n", + " 'metadata': {'date_uploaded': '2025-02-09T01:37:12.088736',\n", + " 'text': 'From 1972, the National Park Service began allowing '\n", + " 'natural fires in Yellowstone to burn under controlled '\n", + " 'conditions. Fires of this type were referred to as '\n", + " 'prescribed natural fires. Between 1972 and 1987, a '\n", + " 'total of 235 prescribed natural fires burned a '\n", + " 'relatively small 33,759 acres (137 km2) under the '\n", + " 'directives of the new policy. Of these, only 15 spread '\n", + " 'to more than 100 acres (0.4 km2). The five years prior '\n", + " 'to 1988 were much wetter than normal and this may have '\n", + " 'reduced the area of the fires during that period. The '\n", + " 'prescribed natural fire policy appeared to be an '\n", + " 'effective way to manage fires, especially in the '\n", + " 'Yellowstone region.'},\n", + " 'score': 0.244492292,\n", + " 'values': []}],\n", + " 'top_re_rank_score': 0.00039841508}" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } ], - "text/plain": [ - " retrieved_correct_position reranked_correct_position\n", - "0 0.0 0.0\n", - "1 0.0 0.0\n", - "2 0.0 0.0\n", - "3 1.0 0.0\n", - "4 0.0 0.0" + "source": [ + "query_result # the right context isn't there!" ] - }, - "execution_count": 107, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "os_predictions_df = pd.DataFrame(os_predictions)\n", - "os_predictions_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "id": "88419a17-598b-4b3e-b314-e1c5ef3475eb", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.5017421602787456\n", - "Accuracy with re-ranking: 0.6202090592334495\n" - ] - } - ], - "source": [ - "raw_accuracy = sum([p['retrieved_correct_position'] == 0 for p in os_predictions])/len(os_predictions)\n", - "reranked_accuracy = sum([p['reranked_correct_position'] == 0 for p in os_predictions])/len(os_predictions)\n", - "\n", - "print(f'Accuracy without re-ranking: {raw_accuracy}')\n", - "print(f'Accuracy with re-ranking: {reranked_accuracy}')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 114, - "id": "cf6fc44d-6af1-48b6-b859-01a1698c7dd9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1 576 712\n", - "3 887 1009\n", - "5 948 1055\n", - "10 1008 1082\n", - "25 1073 1104\n", - "50 1107 1107\n" - ] - } - ], - "source": [ - "# do recall @ 1, 3, 5, 10\n", - "OPEN_SOURCE_RETRIEVAL = []\n", - "OPEN_SOURCE_RETRIEVAL_PLUS_PRE_CE = []\n", - "for k in (1, 3, 5, 10, 25, 50):\n", - " embedding_only_recall = os_predictions_df[os_predictions_df['retrieved_correct_position'] < k].shape[0]\n", - " reranked_recall = os_predictions_df[os_predictions_df['reranked_correct_position'] < k].shape[0]\n", - " print(k, embedding_only_recall, reranked_recall)\n", - " OPEN_SOURCE_RETRIEVAL.append(embedding_only_recall / os_predictions_df.shape[0])\n", - " OPEN_SOURCE_RETRIEVAL_PLUS_PRE_CE.append(reranked_recall / os_predictions_df.shape[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 118, - "id": "f733061c-eb98-40cb-b116-602e1c0dcd3e", - "metadata": {}, - "outputs": [ + }, { - "data": { - "image/png": "", - "text/plain": [ - "
" + "cell_type": "code", + "execution_count": 29, + "id": "caa66ac8-d69c-49a1-9f55-e9ef93379fec", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "caa66ac8-d69c-49a1-9f55-e9ef93379fec", + "outputId": "00d71d0d-24f9-4ac1-d419-5b735fbe86c5" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(None, None)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_result = get_results_from_pinecone(\n", + " query,\n", + " top_k=100, # grab 10 results\n", + " re_rank_model=cross_encoder, correct_hash=q_to_hash[query],\n", + " verbose=False\n", + " )\n", + "\n", + "query_result['retrieved_correct_position'], query_result['reranked_correct_position']" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "X = [1, 3, 5, 10, 25, 50]\n", - "\n", - "OPENAI_RETRIEVAL = [866 / os_predictions_df.shape[0], 1009/ os_predictions_df.shape[0], 1048/ os_predictions_df.shape[0], 1072/ os_predictions_df.shape[0], 1108/ os_predictions_df.shape[0], 1122/ os_predictions_df.shape[0]]\n", - "OLD_CROSS_ENCODER = [956 / os_predictions_df.shape[0], 1057/ os_predictions_df.shape[0], 1077/ os_predictions_df.shape[0], 1094/ os_predictions_df.shape[0], 1120/ os_predictions_df.shape[0], 1122/ os_predictions_df.shape[0]]\n", - "\n", - "# Creating the plot\n", - "plt.figure(figsize=(10, 6))\n", - "plt.plot(X, OPENAI_RETRIEVAL, label='OAI Retrieval Only', marker='o')\n", - "plt.plot(X, OLD_CROSS_ENCODER, label='OAI + Pretrained CE', marker='s')\n", - "\n", - "plt.plot(X, OPEN_SOURCE_RETRIEVAL, label='OS Retrieval Only', marker='*')\n", - "plt.plot(X, OPEN_SOURCE_RETRIEVAL_PLUS_PRE_CE, label='OS + Pretrained CE', marker='^')\n", - "\n", - "\n", - "# Adding titles and labels\n", - "plt.title('Comparing embedding models + pre-trained vs fine-tuned CE (all retrieved 50 results then re-ranked)')\n", - "plt.xlabel('Recall @')\n", - "plt.ylabel('Performance')\n", - "plt.xticks(X)\n", - "plt.yticks([i/100 for i in range(70, 101, 5)]) # Adjusting y-ticks to start from 0.75\n", - "\n", - "# Adding legend\n", - "plt.legend()\n", - "\n", - "# Show the plot\n", - "plt.grid(True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "097fe3bc-4fc1-4e69-bf60-6ab4f75da968", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f11e5dcd-7ad8-43d0-aa1b-2b94b3784253", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06e3a30f-e913-43ad-9fa7-f712abfa59d0", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "91e6c207-7f75-4b04-8d32-541265902fa5", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9ec04ddb-5a7d-4506-a0ba-a3ddedf985df", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "30c5cb27-018c-4299-a91f-6b39293c583e", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c3200a66-9fc0-4f39-94e8-3456b85b6a96", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "ea30153c", - "metadata": { - "id": "ea30153c" - }, - "source": [ - "## Advanced: Fine-tuning the re-ranker" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "58979a1e-0fe1-488f-9805-56efd499278d", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "508edaf6", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "508edaf6", - "outputId": "1938803f-c8bd-478b-a37c-3a3894e1c106" - }, - "outputs": [ { - "data": { - "text/plain": [ - "{'id': 'a4968ca8a18de16aa3859be760e43dbd3af3fce9',\n", - " 'title': 'Area 51',\n", - " 'context': 'In 1994, five unnamed civilian contractors and the widows of contractors Walter Kasza and Robert Frost sued the USAF and the United States Environmental Protection Agency. Their suit, in which they were represented by George Washington University law professor Jonathan Turley, alleged they had been present when large quantities of unknown chemicals had been burned in open pits and trenches at Groom. Biopsies taken from the complainants were analyzed by Rutgers University biochemists, who found high levels of dioxin, dibenzofuran, and trichloroethylene in their body fat. The complainants alleged they had sustained skin, liver, and respiratory injuries due to their work at Groom, and that this had contributed to the deaths of Frost and Kasza. The suit sought compensation for the injuries they had sustained, claiming the USAF had illegally handled toxic materials, and that the EPA had failed in its duty to enforce the Resource Conservation and Recovery Act (which governs handling of dangerous materials). They also sought detailed information about the chemicals to which they were allegedly exposed, hoping this would facilitate the medical treatment of survivors. Congressman Lee H. Hamilton, former chairman of the House Intelligence Committee, told 60 Minutes reporter Lesley Stahl, \"The Air Force is classifying all information about Area 51 in order to protect themselves from a lawsuit.\"',\n", - " 'question': 'Who analyzed the biopsies?',\n", - " 'answers': {'answer_start': [457],\n", - " 'text': ['Rutgers University biochemists']}}" - ] - }, - "execution_count": 191, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dataset['train'][0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "51bf1607", - "metadata": { - "id": "51bf1607" - }, - "outputs": [], - "source": [ - "from sentence_transformers import InputExample, losses, evaluation\n", - "from torch.utils.data import DataLoader\n", - "from random import shuffle\n", - "\n", - "shuffled_training_passages = dataset['train']['question'].copy()\n", - "shuffle(shuffled_training_passages)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6b58d2d0", - "metadata": { - "id": "6b58d2d0" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "50fd5c19-894a-41e4-bf6f-d9e1197c053f", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cell_type": "code", + "execution_count": 29, + "id": "uk1lhptdUfrL", + "metadata": { + "id": "uk1lhptdUfrL" + }, + "outputs": [], + "source": [] }, - "id": "50fd5c19-894a-41e4-bf6f-d9e1197c053f", - "outputId": "8501a6c6-548c-4928-ecb7-4ed99339063b" - }, - "outputs": [ { - "data": { - "text/plain": [ - "(9916, 11590)" + "cell_type": "code", + "execution_count": 30, + "id": "a63ec53e", + "metadata": { + "id": "a63ec53e" + }, + "outputs": [], + "source": [ + "test_sample = dataset['test']" ] - }, - "execution_count": 197, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "unique_train_passages = list(set(dataset['train']['context']))\n", - "len(unique_train_passages), len(dataset['train']['context'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ce8e076e-4822-4686-b57a-b8c0e09583e5", - "metadata": { - "id": "ce8e076e-4822-4686-b57a-b8c0e09583e5" - }, - "outputs": [], - "source": [ - "from sentence_transformers import SentenceTransformer\n", - "\n", - "# We will use this embedder to find negative examples\n", - "bi_encoder = SentenceTransformer(\"sentence-transformers/msmarco-MiniLM-L-6-v3\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c4ae6f1f-0523-40ff-8d5d-f374e4ead044", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 49, - "referenced_widgets": [ - "dbca2d7da970438892d45cd387434f6f", - "6f5389fdc3674a05ad79bb9456a5995f", - "7a5c511f2fe84c66beef7fed87023669", - "70e45cb4f2f64c499acc785ec64743fe", - "bb260568c96a4397bba2b1058404eec2", - "e281ca37b72d422285aa64193265aaa4", - "687d7bf7373f4541852203637e18d2e3", - "5a7b319329164b27bf452869e2335aef", - "e5a88639e7e5430fbe44f73d2e5a4b23", - "d9f6781147a24bc3aec97b87927bdfbb", - "d0c85ebbf1a54fdcaa33210aa3e247b8" - ] }, - "id": "c4ae6f1f-0523-40ff-8d5d-f374e4ead044", - "outputId": "847007cf-bf2e-4d3a-a10f-fd84123c1eb0" - }, - "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "dbca2d7da970438892d45cd387434f6f", - "version_major": 2, - "version_minor": 0 + "cell_type": "code", + "execution_count": 31, + "id": "21a3a147", + "metadata": { + "id": "21a3a147" }, - "text/plain": [ - "Batches: 0%| | 0/620 [00:00\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
final_resultsretrieved_correct_positionreranked_correct_positionresults_from_pineconetop_re_rank_score
0[{'score': 0.29227442, 'id': '2f90090e21f19450...0.00.0[{'id': '2f90090e21f19450887d5f3ff781e541',\n", + " '...0.292274
1[{'score': 0.17795162, 'id': '49201636ad410273...0.00.0[{'id': '49201636ad4102735125e146c0dbafa4',\n", + " '...0.177952
2[{'score': 0.9939328, 'id': '80d92494d2b06f341...0.00.0[{'id': '80d92494d2b06f341842f1855d2938cf',\n", + " '...0.993933
3[{'score': 0.16567454, 'id': '99247f2042d1ed0b...0.02.0[{'id': 'e3fd54f33b021ea3cf88b438fefcada7',\n", + " '...0.848871
4[{'score': 0.8213388, 'id': '37ed40c20d4e1b9cc...0.00.0[{'id': '37ed40c20d4e1b9cc8d6cc27a4d0fff3',\n", + " '...0.821339
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n" + ], + "text/plain": [ + " final_results \\\n", + "0 [{'score': 0.29227442, 'id': '2f90090e21f19450... \n", + "1 [{'score': 0.17795162, 'id': '49201636ad410273... \n", + "2 [{'score': 0.9939328, 'id': '80d92494d2b06f341... \n", + "3 [{'score': 0.16567454, 'id': '99247f2042d1ed0b... \n", + "4 [{'score': 0.8213388, 'id': '37ed40c20d4e1b9cc... \n", + "\n", + " retrieved_correct_position reranked_correct_position \\\n", + "0 0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 0.0 \n", + "3 0.0 2.0 \n", + "4 0.0 0.0 \n", + "\n", + " results_from_pinecone top_re_rank_score \n", + "0 [{'id': '2f90090e21f19450887d5f3ff781e541',\n", + " '... 0.292274 \n", + "1 [{'id': '49201636ad4102735125e146c0dbafa4',\n", + " '... 0.177952 \n", + "2 [{'id': '80d92494d2b06f341842f1855d2938cf',\n", + " '... 0.993933 \n", + "3 [{'id': 'e3fd54f33b021ea3cf88b438fefcada7',\n", + " '... 0.848871 \n", + "4 [{'id': '37ed40c20d4e1b9cc8d6cc27a4d0fff3',\n", + " '... 0.821339 " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "predictions_df = pd.DataFrame(predictions)\n", + "predictions_df.head()" + ] }, - "id": "b281241b-54ed-4405-a101-f774efd2317d", - "outputId": "649df482-1761-4ba9-c20d-4364e037e2b0" - }, - "outputs": [ { - "data": { - "text/plain": [ - "34770" + "cell_type": "code", + "execution_count": 35, + "id": "831397d3-606a-4c7f-b77f-6ee85bd24f7e", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 147 + }, + "id": "831397d3-606a-4c7f-b77f-6ee85bd24f7e", + "outputId": "b87a375b-a56d-42e3-d1d6-d7fdbdb0cfff" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
retrieved_correct_position1.306595
reranked_correct_position0.680927
\n", + "

" + ], + "text/plain": [ + "retrieved_correct_position 1.306595\n", + "reranked_correct_position 0.680927\n", + "dtype: float64" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictions_df[['retrieved_correct_position', 'reranked_correct_position']].mean() # lower is better" ] - }, - "execution_count": 203, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(train_samples)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8e7eaa40", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "8e7eaa40", - "outputId": "2d1ab8ba-9f2e-4b24-b104-baf10d58e45f" - }, - "outputs": [ { - "data": { - "text/plain": [ - "{'guid': '',\n", - " 'texts': ['How was Mike Leander also known as?',\n", - " \"Günter Theodor Netzer (born 14 September 1944 in Mönchengladbach) is a former German football player and team general manager currently working in the media business. He achieved great success in Germany with Borussia Mönchengladbach in the early 1970s, and, after moving to Spain in 1973, with Real Madrid. He played as an attacking midfielder, and is considered one of the greatest passers in the game's history. He was voted Footballer of the Year in Germany twice, in 1972 and 1973. He was the general manager for Hamburger SV during much of the team's golden period from the late 1970s through the early 1980s, when the club won three league titles and the 1983 European Cup.\"],\n", - " 'label': 0}" + "cell_type": "code", + "execution_count": 36, + "id": "eyv-6pPIEy0r", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "eyv-6pPIEy0r", + "outputId": "14dcdf11-c330-4ab5-8b0d-6541df2fefbc" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 865 957\n", + "3 1009 1057\n", + "5 1048 1076\n", + "10 1073 1094\n", + "25 1108 1120\n", + "50 1122 1122\n" + ] + } + ], + "source": [ + "# do recall @ 1, 3, 5, 10, etc\n", + "X = [1, 3, 5, 10, 25, 50]\n", + "OPENAI_RETRIEVAL = []\n", + "OLD_CROSS_ENCODER = []\n", + "\n", + "for k in X:\n", + " embedding_only_recall = predictions_df[predictions_df['retrieved_correct_position'] < k].shape[0]\n", + " reranked_recall = predictions_df[predictions_df['reranked_correct_position'] < k].shape[0]\n", + " OPENAI_RETRIEVAL.append(embedding_only_recall / predictions_df.shape[0])\n", + " OLD_CROSS_ENCODER.append(reranked_recall / predictions_df.shape[0])\n", + " print(k, embedding_only_recall, reranked_recall)" ] - }, - "execution_count": 204, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "train_samples[0].__dict__" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bc2bc9d1", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "bc2bc9d1", - "outputId": "97ead4f4-bce2-4e8b-d379-1baab6ddd641" - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warmup-steps: 174\n" - ] - } - ], - "source": [ - "from sentence_transformers.cross_encoder.evaluation import CECorrelationEvaluator, CEBinaryClassificationEvaluator\n", - "import math\n", - "import torch\n", - "from random import sample\n", - "\n", - "logger.setLevel(logging.DEBUG) # just to get some logs\n", - "\n", - "num_epochs = 1\n", - "\n", - "model_save_path = './fine_tuned_ir_cross_encoder'\n", - "\n", - "train_dataloader = DataLoader(train_samples[:int(len(train_samples)*.8)], shuffle=True, batch_size=16)\n", - "\n", - "# An evaluator for training performance\n", - "evaluator = CECorrelationEvaluator.from_input_examples(train_samples[int(len(train_samples)*.8):], name='test')\n", - "\n", - "# Rule of thumb for warmup steps\n", - "warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1) # 10% of train data for warm-up\n", - "print(f\"Warmup-steps: {warmup_steps}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ce4e8079-1335-4d53-944d-3ed0f6537781", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 137, - "referenced_widgets": [ - "f36d8f08f6284b848622fd560196cc78", - "608d1345487046ceb64b0b31a4287ae0", - "43c8db4be4984f6da1540de6bc89b616", - "6cb81ad9ee8141aa9d0fbefc276f244d", - "8e989ddf9d274876a3ba06056e7147a1", - "fce9aa1662c34941afadd6141aba9180", - "098016d32abc4f45be0a4f747cbc3f11", - "dc26c84713fb45f5a6c5b870c96d9d9e", - "a6ac9d7aed4c493db567b015ab8502a5", - "80e18075a957476591fa1ba2585b7a73", - "e1d8ec56e5614d77a6999a42283be94b", - "21316ef8a22d40ab9109ccf0fa7fa645", - "458bcab796d642c9afd2e534085b991f", - "266b42f10bd74b9f907588c1251c8d6c", - "ba968ae891a2400fa0db9f331b85b66c", - "5d09ef7e47f446a3a3a51458195a0fe3", - "352af51e6e7943a7a41d3bbe41885d8d", - "2cb4c1bff01b4f669ecd8583187bab9f", - "b070922627a04195a41899a169984b08", - "85a67e7f5d2845329d63f0922bb5d9a6", - "5b3c84192618410ea1793239c4514c6e", - "7bd0527b8e174aec9911c5b32dc048fd" - ] + "cell_type": "code", + "execution_count": 36, + "id": "bd9aef05-e271-41e0-a8ce-88b6c81bed4c", + "metadata": { + "id": "bd9aef05-e271-41e0-a8ce-88b6c81bed4c" + }, + "outputs": [], + "source": [] }, - "id": "ce4e8079-1335-4d53-944d-3ed0f6537781", - "outputId": "446b8d35-7bc9-48d5-c9ab-60dc6b3762fc" - }, - "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f36d8f08f6284b848622fd560196cc78", - "version_major": 2, - "version_minor": 0 + "cell_type": "markdown", + "id": "3f14809e", + "metadata": { + "id": "3f14809e" }, - "text/plain": [ - "Batches: 0%| | 0/1 [00:00\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
retrieved_correct_positionreranked_correct_position
00.00.0
10.00.0
20.00.0
31.00.0
40.00.0
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n" + ], + "text/plain": [ + " retrieved_correct_position reranked_correct_position\n", + "0 0.0 0.0\n", + "1 0.0 0.0\n", + "2 0.0 0.0\n", + "3 1.0 0.0\n", + "4 0.0 0.0" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os_predictions_df = pd.DataFrame(os_predictions)\n", + "os_predictions_df.head()" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.78\n", - "Accuracy with re-ranking: 0.84\n" - ] + "cell_type": "code", + "execution_count": 46, + "id": "88419a17-598b-4b3e-b314-e1c5ef3475eb", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "88419a17-598b-4b3e-b314-e1c5ef3475eb", + "outputId": "e79196b1-4b8f-4c30-f064-57484be08930" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy without re-ranking: 0.5017421602787456\n", + "Accuracy with re-ranking: 0.6202090592334495\n" + ] + } + ], + "source": [ + "raw_accuracy = sum([p['retrieved_correct_position'] == 0 for p in os_predictions])/len(os_predictions)\n", + "reranked_accuracy = sum([p['reranked_correct_position'] == 0 for p in os_predictions])/len(os_predictions)\n", + "\n", + "print(f'Accuracy without re-ranking: {raw_accuracy}')\n", + "print(f'Accuracy with re-ranking: {reranked_accuracy}')\n" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 17%|█▋ | 200/1148 [05:18<26:10, 1.66s/it]" - ] + "cell_type": "code", + "execution_count": 47, + "id": "cf6fc44d-6af1-48b6-b859-01a1698c7dd9", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cf6fc44d-6af1-48b6-b859-01a1698c7dd9", + "outputId": "a49bc1ad-7cf6-4980-bf39-521832c240a0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 576 712\n", + "3 887 1009\n", + "5 948 1055\n", + "10 1008 1082\n", + "25 1073 1104\n", + "50 1107 1107\n" + ] + } + ], + "source": [ + "# do recall @ 1, 3, 5, 10\n", + "OPEN_SOURCE_RETRIEVAL = []\n", + "OPEN_SOURCE_RETRIEVAL_PLUS_PRE_CE = []\n", + "for k in X:\n", + " embedding_only_recall = os_predictions_df[os_predictions_df['retrieved_correct_position'] < k].shape[0]\n", + " reranked_recall = os_predictions_df[os_predictions_df['reranked_correct_position'] < k].shape[0]\n", + " print(k, embedding_only_recall, reranked_recall)\n", + " OPEN_SOURCE_RETRIEVAL.append(embedding_only_recall / os_predictions_df.shape[0])\n", + " OPEN_SOURCE_RETRIEVAL_PLUS_PRE_CE.append(reranked_recall / os_predictions_df.shape[0])" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.765\n", - "Accuracy with re-ranking: 0.84\n" - ] + "cell_type": "code", + "execution_count": 48, + "id": "f733061c-eb98-40cb-b116-602e1c0dcd3e", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 564 + }, + "id": "f733061c-eb98-40cb-b116-602e1c0dcd3e", + "outputId": "b1d6c0c9-3e54-4193-9395-8fab5bdd0f07" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Creating the plot\n", + "plt.figure(figsize=(10, 6))\n", + "plt.plot(X, OPENAI_RETRIEVAL, label='OAI Retrieval Only', marker='o')\n", + "plt.plot(X, OLD_CROSS_ENCODER, label='OAI + Pretrained CE', marker='s')\n", + "\n", + "plt.plot(X, OPEN_SOURCE_RETRIEVAL, label='OS Retrieval Only', marker='*')\n", + "plt.plot(X, OPEN_SOURCE_RETRIEVAL_PLUS_PRE_CE, label='OS + Pretrained CE', marker='^')\n", + "\n", + "\n", + "# Adding titles and labels\n", + "plt.title('Comparing embedding models + pre-trained vs fine-tuned CE (all retrieved 50 results then re-ranked)')\n", + "plt.xlabel('Recall @')\n", + "plt.ylabel('Performance')\n", + "plt.xticks(X)\n", + "plt.yticks([i/100 for i in range(70, 101, 5)]) # Adjusting y-ticks to start from 0.75\n", + "\n", + "# Adding legend\n", + "plt.legend()\n", + "\n", + "# Show the plot\n", + "plt.grid(True)" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 26%|██▌ | 300/1148 [07:57<22:00, 1.56s/it]" - ] + "cell_type": "code", + "execution_count": 48, + "id": "097fe3bc-4fc1-4e69-bf60-6ab4f75da968", + "metadata": { + "id": "097fe3bc-4fc1-4e69-bf60-6ab4f75da968" + }, + "outputs": [], + "source": [] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.7666666666666667\n", - "Accuracy with re-ranking: 0.8266666666666667\n" - ] + "cell_type": "code", + "execution_count": 48, + "id": "f11e5dcd-7ad8-43d0-aa1b-2b94b3784253", + "metadata": { + "id": "f11e5dcd-7ad8-43d0-aa1b-2b94b3784253" + }, + "outputs": [], + "source": [] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 35%|███▍ | 400/1148 [10:37<19:44, 1.58s/it]" - ] + "cell_type": "code", + "execution_count": 48, + "id": "06e3a30f-e913-43ad-9fa7-f712abfa59d0", + "metadata": { + "id": "06e3a30f-e913-43ad-9fa7-f712abfa59d0" + }, + "outputs": [], + "source": [] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.7625\n", - "Accuracy with re-ranking: 0.84\n" - ] + "cell_type": "code", + "execution_count": 48, + "id": "91e6c207-7f75-4b04-8d32-541265902fa5", + "metadata": { + "id": "91e6c207-7f75-4b04-8d32-541265902fa5" + }, + "outputs": [], + "source": [] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 44%|████▎ | 500/1148 [13:17<17:16, 1.60s/it]" - ] + "cell_type": "code", + "execution_count": 48, + "id": "9ec04ddb-5a7d-4506-a0ba-a3ddedf985df", + "metadata": { + "id": "9ec04ddb-5a7d-4506-a0ba-a3ddedf985df" + }, + "outputs": [], + "source": [] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.764\n", - "Accuracy with re-ranking: 0.85\n" - ] + "cell_type": "code", + "execution_count": 48, + "id": "30c5cb27-018c-4299-a91f-6b39293c583e", + "metadata": { + "id": "30c5cb27-018c-4299-a91f-6b39293c583e" + }, + "outputs": [], + "source": [] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 52%|█████▏ | 600/1148 [16:00<14:35, 1.60s/it]" - ] + "cell_type": "code", + "execution_count": 48, + "id": "c3200a66-9fc0-4f39-94e8-3456b85b6a96", + "metadata": { + "id": "c3200a66-9fc0-4f39-94e8-3456b85b6a96" + }, + "outputs": [], + "source": [] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.7683333333333333\n", - "Accuracy with re-ranking: 0.8566666666666667\n" - ] + "cell_type": "markdown", + "id": "ea30153c", + "metadata": { + "id": "ea30153c" + }, + "source": [ + "## Advanced: Fine-tuning the re-ranker" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 61%|██████ | 700/1148 [18:40<12:28, 1.67s/it]" - ] + "cell_type": "code", + "execution_count": 48, + "id": "58979a1e-0fe1-488f-9805-56efd499278d", + "metadata": { + "id": "58979a1e-0fe1-488f-9805-56efd499278d" + }, + "outputs": [], + "source": [] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.7471428571428571\n", - "Accuracy with re-ranking: 0.8428571428571429\n" - ] + "cell_type": "code", + "execution_count": 49, + "id": "508edaf6", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "508edaf6", + "outputId": "f2be3e3c-c9cf-4d00-b6ee-cc03060cb1ea" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': 'a4968ca8a18de16aa3859be760e43dbd3af3fce9',\n", + " 'title': 'Area 51',\n", + " 'context': 'In 1994, five unnamed civilian contractors and the widows of contractors Walter Kasza and Robert Frost sued the USAF and the United States Environmental Protection Agency. Their suit, in which they were represented by George Washington University law professor Jonathan Turley, alleged they had been present when large quantities of unknown chemicals had been burned in open pits and trenches at Groom. Biopsies taken from the complainants were analyzed by Rutgers University biochemists, who found high levels of dioxin, dibenzofuran, and trichloroethylene in their body fat. The complainants alleged they had sustained skin, liver, and respiratory injuries due to their work at Groom, and that this had contributed to the deaths of Frost and Kasza. The suit sought compensation for the injuries they had sustained, claiming the USAF had illegally handled toxic materials, and that the EPA had failed in its duty to enforce the Resource Conservation and Recovery Act (which governs handling of dangerous materials). They also sought detailed information about the chemicals to which they were allegedly exposed, hoping this would facilitate the medical treatment of survivors. Congressman Lee H. Hamilton, former chairman of the House Intelligence Committee, told 60 Minutes reporter Lesley Stahl, \"The Air Force is classifying all information about Area 51 in order to protect themselves from a lawsuit.\"',\n", + " 'question': 'Who analyzed the biopsies?',\n", + " 'answers': {'answer_start': [457],\n", + " 'text': ['Rutgers University biochemists']}}" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset['train'][0]" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 70%|██████▉ | 800/1148 [21:21<09:13, 1.59s/it]" - ] + "cell_type": "code", + "execution_count": 127, + "id": "51bf1607", + "metadata": { + "id": "51bf1607" + }, + "outputs": [], + "source": [ + "from sentence_transformers import InputExample, losses, evaluation\n", + "from torch.utils.data import DataLoader\n", + "from random import shuffle" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.74875\n", - "Accuracy with re-ranking: 0.835\n" - ] + "cell_type": "code", + "execution_count": 127, + "id": "6b58d2d0", + "metadata": { + "id": "6b58d2d0" + }, + "outputs": [], + "source": [] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 78%|███████▊ | 900/1148 [25:36<06:43, 1.63s/it]" - ] + "cell_type": "code", + "execution_count": 128, + "id": "50fd5c19-894a-41e4-bf6f-d9e1197c053f", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "50fd5c19-894a-41e4-bf6f-d9e1197c053f", + "outputId": "64064125-f34e-4f03-c4ab-3855ae2572c7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(9916, 11590)" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unique_train_passages = list(set(dataset['train']['context']))\n", + "len(unique_train_passages), len(dataset['train']['context'])" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.7388888888888889\n", - "Accuracy with re-ranking: 0.8333333333333334\n" - ] + "cell_type": "code", + "execution_count": 129, + "id": "5eaad504-b944-4e36-987f-34126189c1a9", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5eaad504-b944-4e36-987f-34126189c1a9", + "outputId": "03bd8016-9531-4004-d84f-648035105bd0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(9916, (1148, 768))" + ] + }, + "execution_count": 129, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(unique_train_passages), doc_emb.shape" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 87%|████████▋ | 1000/1148 [28:16<04:00, 1.62s/it]" - ] + "cell_type": "code", + "execution_count": 133, + "id": "361d5ef0-3fe3-4ed8-87c0-1cea3d9abcc2", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "0452b93e26154e0d915ad3a201e307fd", + "486ab2a8181d4b32aedbb3fea18bef40", + "da4aa9007a4c4814a7fcd55ea9bcce9f", + "fac98921b88f41b08db63e93399f7c87", + "f7930aa886e941b3b15239a4f35138b9", + "fd0a7934959d4fb88f5dab916d42d3a7", + "75f7bbb0bb384ae592dea5059a146b9c", + "f6f951a5f4fe43d3b6840459520925a6", + "5d958e1742124b5b9ca2357c22686754", + "41ae50f47c174b3ebec44009b57639cb", + "10e72a6bbfa6473f952f8572259277ab" + ] + }, + "id": "361d5ef0-3fe3-4ed8-87c0-1cea3d9abcc2", + "outputId": "fa963999-c3a8-4ab8-cdfb-813f24a33425", + "scrolled": true + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0452b93e26154e0d915ad3a201e307fd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Batches: 0%| | 0/310 [00:00" + ] + }, + "execution_count": 159, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "pd.Series([t.label for t in train_samples]).value_counts().plot(kind='bar')" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1148/1148 [32:14<00:00, 1.68s/it]\n" - ] - } - ], - "source": [ - "logger.setLevel(logging.CRITICAL)\n", - "\n", - "ft_predictions = []\n", - "\n", - "for question in tqdm(test_sample['question']):\n", - " r = get_results_from_pinecone(\n", - " question, top_k=TOP_K, re_rank_model=finetuned, correct_hash=q_to_hash[question],\n", - " verbose=False\n", - " )\n", - "\n", - " r['retrieved_correct_position'], r['reranked_correct_position']\n", - " ft_predictions.append(r)\n", - " if len(ft_predictions) % 100 == 0:\n", - " retrieved_accuracy = sum([_['retrieved_correct_position'] == 0 for _ in ft_predictions])/len(ft_predictions)\n", - " re_ranked_accuracy = sum([_['reranked_correct_position'] == 0 for _ in ft_predictions])/len(ft_predictions)\n", - "\n", - " print(f'Accuracy without re-ranking: {retrieved_accuracy}')\n", - " print(f'Accuracy with re-ranking: {re_ranked_accuracy}')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a07b782d-ef97-4c0a-be53-aefea5936676", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cell_type": "code", + "execution_count": 160, + "id": "LOZkS9bSgwga", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LOZkS9bSgwga", + "outputId": "d003de25-ca53-47ce-9c39-76d2230f03eb" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': 'a4968ca8a18de16aa3859be760e43dbd3af3fce9',\n", + " 'title': 'Area 51',\n", + " 'context': 'In 1994, five unnamed civilian contractors and the widows of contractors Walter Kasza and Robert Frost sued the USAF and the United States Environmental Protection Agency. Their suit, in which they were represented by George Washington University law professor Jonathan Turley, alleged they had been present when large quantities of unknown chemicals had been burned in open pits and trenches at Groom. Biopsies taken from the complainants were analyzed by Rutgers University biochemists, who found high levels of dioxin, dibenzofuran, and trichloroethylene in their body fat. The complainants alleged they had sustained skin, liver, and respiratory injuries due to their work at Groom, and that this had contributed to the deaths of Frost and Kasza. The suit sought compensation for the injuries they had sustained, claiming the USAF had illegally handled toxic materials, and that the EPA had failed in its duty to enforce the Resource Conservation and Recovery Act (which governs handling of dangerous materials). They also sought detailed information about the chemicals to which they were allegedly exposed, hoping this would facilitate the medical treatment of survivors. Congressman Lee H. Hamilton, former chairman of the House Intelligence Committee, told 60 Minutes reporter Lesley Stahl, \"The Air Force is classifying all information about Area 51 in order to protect themselves from a lawsuit.\"',\n", + " 'question': 'Who analyzed the biopsies?',\n", + " 'answers': {'answer_start': [457],\n", + " 'text': ['Rutgers University biochemists']}}" + ] + }, + "execution_count": 160, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset['train'][0]" + ] }, - "id": "a07b782d-ef97-4c0a-be53-aefea5936676", - "outputId": "b4daa149-efbf-4383-c233-ccd7b2bcfc17" - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy without re-ranking: 0.7543554006968641\n", - "Accuracy with re-ranking: 0.8493031358885017\n" - ] - } - ], - "source": [ - "retrieved_accuracy = sum([_['retrieved_correct_position'] == 0 for _ in ft_predictions])/len(ft_predictions)\n", - "re_ranked_accuracy = sum([_['reranked_correct_position'] == 0 for _ in ft_predictions])/len(ft_predictions)\n", - "\n", - "print(f'Accuracy without re-ranking: {retrieved_accuracy}')\n", - "print(f'Accuracy with re-ranking: {re_ranked_accuracy}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bb46e242", - "metadata": { - "id": "bb46e242" - }, - "outputs": [], - "source": [ - "# Re-ranking got slightly better after 1 epoch" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0cfcd92e-31ad-4bf7-9e30-b6d260a06871", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 + "cell_type": "code", + "execution_count": 161, + "id": "8e7eaa40", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8e7eaa40", + "outputId": "f6201d48-c8f6-49b3-f063-00d577bfc6c2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'guid': '',\n", + " 'texts': ['Whose oral histories are recorded?',\n", + " \"The story begins in 1885 with the arrival of an important new guest star in Buffalo Bill Cody's grand illusion, Chief Sitting Bull of Little Big Horn fame. Much to Cody's annoyance, Sitting Bull proves not to be a murdering savage but a genuine embodiment of what the whites believe about their own history out west. He is quietly heroic and morally pure.\"],\n", + " 'label': 0}" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_samples[2].__dict__" + ] }, - "id": "0cfcd92e-31ad-4bf7-9e30-b6d260a06871", - "outputId": "ea88ceee-5307-491e-afe8-a5a8a43dba97" - }, - "outputs": [ { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
final_resultsretrieved_correct_positionreranked_correct_positionresults_from_pinecone
0{'final_results': [{'score': 0.8746502, 'id': ...0.00.0[{'id': '2f90090e21f19450887d5f3ff781e541',\n", - " '...
1{'final_results': [{'score': 0.8746502, 'id': ...0.00.0[{'id': '49201636ad4102735125e146c0dbafa4',\n", - " '...
2{'final_results': [{'score': 0.8746502, 'id': ...0.00.0[{'id': '80d92494d2b06f341842f1855d2938cf',\n", - " '...
3{'final_results': [{'score': 0.8746502, 'id': ...0.00.0[{'id': 'e3fd54f33b021ea3cf88b438fefcada7',\n", - " '...
4{'final_results': [{'score': 0.8746502, 'id': ...0.00.0[{'id': '37ed40c20d4e1b9cc8d6cc27a4d0fff3',\n", - " '...
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "cell_type": "code", + "execution_count": 162, + "id": "bc2bc9d1", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bc2bc9d1", + "outputId": "508244d4-c05c-4cac-fe52-5a51cbd41fb1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Warmup-steps: 174\n" + ] + } ], - "text/plain": [ - " final_results \\\n", - "0 {'final_results': [{'score': 0.8746502, 'id': ... \n", - "1 {'final_results': [{'score': 0.8746502, 'id': ... \n", - "2 {'final_results': [{'score': 0.8746502, 'id': ... \n", - "3 {'final_results': [{'score': 0.8746502, 'id': ... \n", - "4 {'final_results': [{'score': 0.8746502, 'id': ... \n", - "\n", - " retrieved_correct_position reranked_correct_position \\\n", - "0 0.0 0.0 \n", - "1 0.0 0.0 \n", - "2 0.0 0.0 \n", - "3 0.0 0.0 \n", - "4 0.0 0.0 \n", - "\n", - " results_from_pinecone \n", - "0 [{'id': '2f90090e21f19450887d5f3ff781e541',\n", - " '... \n", - "1 [{'id': '49201636ad4102735125e146c0dbafa4',\n", - " '... \n", - "2 [{'id': '80d92494d2b06f341842f1855d2938cf',\n", - " '... \n", - "3 [{'id': 'e3fd54f33b021ea3cf88b438fefcada7',\n", - " '... \n", - "4 [{'id': '37ed40c20d4e1b9cc8d6cc27a4d0fff3',\n", - " '... " + "source": [ + "from sentence_transformers.cross_encoder.evaluation import CECorrelationEvaluator, CEBinaryClassificationEvaluator\n", + "import math\n", + "import torch\n", + "from random import sample\n", + "\n", + "logger.setLevel(logging.DEBUG) # just to get some logs\n", + "\n", + "num_epochs = 1\n", + "\n", + "model_save_path = './fine_tuned_ir_cross_encoder'\n", + "\n", + "train_dataloader = DataLoader(train_samples[:int(len(train_samples)*.8)], shuffle=True, batch_size=16)\n", + "\n", + "# An evaluator for training performance\n", + "evaluator = CECorrelationEvaluator.from_input_examples(train_samples[int(len(train_samples)*.8):], name='test')\n", + "\n", + "# Rule of thumb for warmup steps\n", + "warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1) # 10% of train data for warm-up\n", + "print(f\"Warmup-steps: {warmup_steps}\")" ] - }, - "execution_count": 276, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ft_predictions_df = pd.DataFrame(ft_predictions)\n", - "ft_predictions_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9Sr2q5IN9LMz", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "9Sr2q5IN9LMz", - "outputId": "4a670895-a2df-4300-9b8c-5d549c2e8517" - }, - "outputs": [ { - "data": { - "text/plain": [ - "retrieved_correct_position 26\n", - "reranked_correct_position 26\n", - "dtype: int64" + "cell_type": "code", + "execution_count": 163, + "id": "ce4e8079-1335-4d53-944d-3ed0f6537781", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 172, + "referenced_widgets": [ + "4ec583e4c526449b993c0f7f9e6e5456", + "8daf379772f6429293b659d29efbebef", + "ffcefea0254449869f46b1a160222aa2", + "690731860e1643d4a3873f52786127eb", + "0098c6042695487eabdc18342cda7186", + "4ccf610615584abc9e10caa78e3100f4", + "87782a63074c4714b8a80f5070c8830c", + "ed03d3a8b6ad48c08c74bcd361eec589", + "c2ed1b757c3840468db308d6489e1d50", + "6fd3718bdc214b2ea9ebc4961ff6c9dc", + "338a83663ea946aca37b2a109bbdd437", + "1aab71dc8f7345718950c689bf25b115", + "8534c0df3c684480bf53ae7c0e605a97", + "8c7dddcedd9d4f9f85478ef50f2852f1", + "4aa325d9b5a4423bbe4f4395897bd4f2", + "4f15b9ad923b49289277175ca5103c13", + "d10ac840d1284c1d93cd3790bd69459a", + "c6dbd568908a470fa9eee035a46005c2", + "04f07748dd33447784c3354f58f38727", + "cb74baeb010049fdbd3f3e3be7b1988f", + "c09ebe1563d8477aa0e5ec88160ccda2", + "f697561ad26048cc821cb4614f664edc" + ] + }, + "id": "ce4e8079-1335-4d53-944d-3ed0f6537781", + "outputId": "744174d9-3474-40d2-ce04-ca0db6ee5e6d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Example of label 1\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4ec583e4c526449b993c0f7f9e6e5456", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Batches: 0%| | 0/1 [00:00\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
retrieved_correct_positionreranked_correct_position
00.00.0
10.00.0
20.00.0
31.00.0
40.00.0
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - " \n" + "cell_type": "code", + "execution_count": 167, + "id": "2f747537", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 135, + "referenced_widgets": [ + "1c9e78b5942a444ab729333d57ab00a1", + "de21ddc2e22b49cd8429654f9187b61a", + "efbb6facf5484cb990f663a05e981b02", + "9a43fe57b2974c3f82c96226e31e6e64", + "d8120cb8a36f47e0b355ace30dbccf30", + "1a3c1db02f9544e78c2599c7566befc0", + "b915cbcacccc4dc68440908fb073a48e", + "928aa8b44ee34dcea5f9656024bda180", + "7b6179c28a554ce5a6b70ec6318c284f", + "69bcf5c05eb5461f9e5fe64448d38875", + "8976021d2c7a421785f3559a24105089", + "fd1d2ee78abd494db082ad8d161bd623", + "d388a00d8638436c9d3aa477fe708a02", + "d53245c4a74745e9a70c03924debcf98", + "e6bc3b7cc0d04ca2b5ae6fab4d5c3ec3", + "a2eafe380ca74843abd46ccb0a4132f8", + "f6e42a3fad9f445d8385a691715e5441", + "878d1b336d084e90b84c5e7d9585dc71", + "8e3c115a975c4ecb9f7948d0dc318282", + "fc916e59dce04b68993a6922695d7226", + "79562014e45a4002a0f364cfa7a1f94e", + "e59da773d0ed452bbec059a7973e8ad1" + ] + }, + "id": "2f747537", + "outputId": "55a7a79e-0609-4328-ecf5-0d48c51eb0fc" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sentence_transformers.cross_encoder.CrossEncoder:Use pytorch device: cuda\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1c9e78b5942a444ab729333d57ab00a1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Batches: 0%| | 0/1 [00:00" + "cell_type": "code", + "execution_count": 170, + "id": "bb46e242", + "metadata": { + "id": "bb46e242" + }, + "outputs": [], + "source": [ + "# Re-ranking got slightly better after 1 epoch" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "X = [1, 3, 5, 10, 25, 50]\n", - "\n", - "OPENAI_RETRIEVAL = [866 / predictions_df.shape[0], 1009/ predictions_df.shape[0], 1048/ predictions_df.shape[0], 1072/ predictions_df.shape[0], 1108/ predictions_df.shape[0], 1122/ predictions_df.shape[0]]\n", - "# OPEN_SOURCE_RETRIEVAL = [663 / predictions_df.shape[0], 795/ predictions_df.shape[0], 838/ predictions_df.shape[0], 891/ predictions_df.shape[0], 966/ predictions_df.shape[0], 1004/ predictions_df.shape[0]]\n", - "OLD_CROSS_ENCODER = [956 / predictions_df.shape[0], 1057/ predictions_df.shape[0], 1077/ predictions_df.shape[0], 1094/ predictions_df.shape[0], 1120/ predictions_df.shape[0], 1122/ predictions_df.shape[0]]\n", - "NEW_CROSS_ENCODER = [975 / predictions_df.shape[0], 1056/ predictions_df.shape[0], 1077/ predictions_df.shape[0], 1101/ predictions_df.shape[0], 1117/ predictions_df.shape[0], 1122/ predictions_df.shape[0]]\n", - "\n", - "# Creating the plot\n", - "plt.figure(figsize=(10, 6))\n", - "plt.plot(X, OPENAI_RETRIEVAL, label='OAI Retrieval Only', marker='o')\n", - "plt.plot(X, OPEN_SOURCE_RETRIEVAL, label='OS Retrieval Only', marker='*')\n", - "plt.plot(X, OPEN_SOURCE_RETRIEVAL_PLUS_FT_CE, label='OS + Finetuned CE', marker='v')\n", - "plt.plot(X, OLD_CROSS_ENCODER, label='OAI + Pretrained CE', marker='s')\n", - "plt.plot(X, NEW_CROSS_ENCODER, label='OAI + Finetuned CE', marker='^')\n", - "\n", - "# Adding titles and labels\n", - "plt.title('Comparing embedding models + pre-trained vs fine-tuned CE (all retrieved 50 results then re-ranked)')\n", - "plt.xlabel('Recall @')\n", - "plt.ylabel('Performance')\n", - "plt.xticks(X)\n", - "plt.yticks([i/100 for i in range(70, 101, 5)]) # Adjusting y-ticks to start from 0.75\n", - "\n", - "# Adding legend\n", - "plt.legend()\n", - "\n", - "# Show the plot\n", - "plt.grid(True)\n", - "# plt.show()\n", - "\n", - "plt.savefig('recall_at_k.png', dpi=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "LFaZAKPhaymM", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 275 }, - "id": "LFaZAKPhaymM", - "outputId": "a58e68c7-f61e-420a-aef9-c8336bb48702" - }, - "outputs": [ { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RECALL @OS_Retrieval_OnlyOS_Retrieval_Plus_Finetuned_CEOAI_Retrieval_OnlyOAI_Retrieval_Plus_Pretrained_CEOAI_Retrieval_Plus_Finetuned_CE
010.5017420.6193380.7543550.8327530.849303
130.7726480.8763070.8789200.9207320.919861
250.8257840.9189900.9128920.9381530.938153
3100.8780490.9468640.9337980.9529620.959059
4250.9346690.9608010.9651570.9756100.972997
5500.9642860.9642860.9773520.9773520.977352
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - " \n", - " \n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "cell_type": "code", + "execution_count": 171, + "id": "0cfcd92e-31ad-4bf7-9e30-b6d260a06871", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 310 + }, + "id": "0cfcd92e-31ad-4bf7-9e30-b6d260a06871", + "outputId": "accb7c26-3dd6-42fc-f4d0-a9693089894d" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"ft_predictions_df\",\n \"rows\": 1148,\n \"fields\": [\n {\n \"column\": \"final_results\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"retrieved_correct_position\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4.837119995625484,\n \"min\": 0.0,\n \"max\": 49.0,\n \"num_unique_values\": 31,\n \"samples\": [\n 26.0,\n 10.0,\n 39.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"reranked_correct_position\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.2961654984934645,\n \"min\": 0.0,\n \"max\": 48.0,\n \"num_unique_values\": 25,\n \"samples\": [\n 17.0,\n 9.0,\n 0.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"results_from_pinecone\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"top_re_rank_score\",\n \"properties\": {\n \"dtype\": \"float32\",\n \"num_unique_values\": 1148,\n \"samples\": [\n 0.989776611328125,\n 0.9937517046928406,\n 0.10413498431444168\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe", + "variable_name": "ft_predictions_df" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
final_resultsretrieved_correct_positionreranked_correct_positionresults_from_pineconetop_re_rank_score
0[{'score': 0.27245408, 'id': '2f90090e21f19450...0.00.0[{'id': '2f90090e21f19450887d5f3ff781e541',\n", + " '...0.272454
1[{'score': 0.95008755, 'id': '49201636ad410273...0.00.0[{'id': '49201636ad4102735125e146c0dbafa4',\n", + " '...0.950088
2[{'score': 0.9484397, 'id': '80d92494d2b06f341...0.00.0[{'id': '80d92494d2b06f341842f1855d2938cf',\n", + " '...0.948440
3[{'score': 0.32520765, 'id': 'e3fd54f33b021ea3...0.00.0[{'id': 'e3fd54f33b021ea3cf88b438fefcada7',\n", + " '...0.325208
4[{'score': 0.4159641, 'id': '37ed40c20d4e1b9cc...0.00.0[{'id': '37ed40c20d4e1b9cc8d6cc27a4d0fff3',\n", + " '...0.415964
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " final_results \\\n", + "0 [{'score': 0.27245408, 'id': '2f90090e21f19450... \n", + "1 [{'score': 0.95008755, 'id': '49201636ad410273... \n", + "2 [{'score': 0.9484397, 'id': '80d92494d2b06f341... \n", + "3 [{'score': 0.32520765, 'id': 'e3fd54f33b021ea3... \n", + "4 [{'score': 0.4159641, 'id': '37ed40c20d4e1b9cc... \n", + "\n", + " retrieved_correct_position reranked_correct_position \\\n", + "0 0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "\n", + " results_from_pinecone top_re_rank_score \n", + "0 [{'id': '2f90090e21f19450887d5f3ff781e541',\n", + " '... 0.272454 \n", + "1 [{'id': '49201636ad4102735125e146c0dbafa4',\n", + " '... 0.950088 \n", + "2 [{'id': '80d92494d2b06f341842f1855d2938cf',\n", + " '... 0.948440 \n", + "3 [{'id': 'e3fd54f33b021ea3cf88b438fefcada7',\n", + " '... 0.325208 \n", + "4 [{'id': '37ed40c20d4e1b9cc8d6cc27a4d0fff3',\n", + " '... 0.415964 " + ] + }, + "execution_count": 171, + "metadata": {}, + "output_type": "execute_result" + } ], - "text/plain": [ - " RECALL @ OS_Retrieval_Only OS_Retrieval_Plus_Finetuned_CE \\\n", - "0 1 0.501742 0.619338 \n", - "1 3 0.772648 0.876307 \n", - "2 5 0.825784 0.918990 \n", - "3 10 0.878049 0.946864 \n", - "4 25 0.934669 0.960801 \n", - "5 50 0.964286 0.964286 \n", - "\n", - " OAI_Retrieval_Only OAI_Retrieval_Plus_Pretrained_CE \\\n", - "0 0.754355 0.832753 \n", - "1 0.878920 0.920732 \n", - "2 0.912892 0.938153 \n", - "3 0.933798 0.952962 \n", - "4 0.965157 0.975610 \n", - "5 0.977352 0.977352 \n", - "\n", - " OAI_Retrieval_Plus_Finetuned_CE \n", - "0 0.849303 \n", - "1 0.919861 \n", - "2 0.938153 \n", - "3 0.959059 \n", - "4 0.972997 \n", - "5 0.977352 " + "source": [ + "ft_predictions_df = pd.DataFrame(ft_predictions)\n", + "ft_predictions_df.head()" ] - }, - "execution_count": 448, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# show results as a table\n", - "\n", - "results_df = pd.DataFrame({'RECALL @': [1, 3, 5, 10, 25, 50], 'OS_Retrieval_Only': OPEN_SOURCE_RETRIEVAL, 'OS_Retrieval_Plus_Finetuned_CE': OPEN_SOURCE_RETRIEVAL_PLUS_FT_CE , 'OAI_Retrieval_Only': OPENAI_RETRIEVAL , 'OAI_Retrieval_Plus_Pretrained_CE': OLD_CROSS_ENCODER, 'OAI_Retrieval_Plus_Finetuned_CE': NEW_CROSS_ENCODER})\n", - "results_df" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "machine_shape": "hm", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "0295736b3f254a8590d911792f2c19db": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3f1ba16f17c94fb8a409d152b9a6045a", - "max": 1739, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c485139a148e4207957e96b4275eeed2", - "value": 1739 - } - }, - "084ebc36eb9d4616acc1a0843673ac4f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "098016d32abc4f45be0a4f747cbc3f11": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } }, - "0caf250cdae643479160eedc708ffeea": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_fa6089718c574ee19bd3c378e356ea6b", - "IPY_MODEL_f78b642ffb784f9097e0cbf97c44b9bd", - "IPY_MODEL_7c322a232739486a937883ac787f8750" + { + "cell_type": "code", + "execution_count": 172, + "id": "9Sr2q5IN9LMz", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 147 + }, + "id": "9Sr2q5IN9LMz", + "outputId": "57687adb-038f-443b-884c-fcac1ee25bcf" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
retrieved_correct_position25
reranked_correct_position25
\n", + "

" + ], + "text/plain": [ + "retrieved_correct_position 25\n", + "reranked_correct_position 25\n", + "dtype: int64" + ] + }, + "execution_count": 172, + "metadata": {}, + "output_type": "execute_result" + } ], - "layout": "IPY_MODEL_1bc5237b0db94f4aa124c8c79a891955" - } - }, - "11e5c44fa73c4d5d88561036f2a0a070": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "168948926c2340abb8b52f8dd67a5e73": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1bc5237b0db94f4aa124c8c79a891955": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + "source": [ + "ft_predictions_df[['retrieved_correct_position', 'reranked_correct_position']].isnull().sum()" + ] }, - "21316ef8a22d40ab9109ccf0fa7fa645": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_458bcab796d642c9afd2e534085b991f", - "IPY_MODEL_266b42f10bd74b9f907588c1251c8d6c", - "IPY_MODEL_ba968ae891a2400fa0db9f331b85b66c" + { + "cell_type": "code", + "execution_count": 173, + "id": "75b414c6", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 147 + }, + "id": "75b414c6", + "outputId": "214e9739-8574-42df-9e5f-eff32d954ac8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
retrieved_correct_position1.346394
reranked_correct_position0.547640
\n", + "

" + ], + "text/plain": [ + "retrieved_correct_position 1.346394\n", + "reranked_correct_position 0.547640\n", + "dtype: float64" + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + } ], - "layout": "IPY_MODEL_5d09ef7e47f446a3a3a51458195a0fe3" - } - }, - "252dfe4d46604a089c263bb0eae17a3a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "266b42f10bd74b9f907588c1251c8d6c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b070922627a04195a41899a169984b08", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_85a67e7f5d2845329d63f0922bb5d9a6", - "value": 1 - } - }, - "2aa32422748943759daf5ef05077bda0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2c83393c2b7745889765d8efb4890763": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2cb4c1bff01b4f669ecd8583187bab9f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "348701c9256e4eb48b8ffb0d9c0d6c83": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "352af51e6e7943a7a41d3bbe41885d8d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + "source": [ + "ft_predictions_df[['retrieved_correct_position', 'reranked_correct_position']].mean()" + ] }, - "37395cf0cac548c783ccbdac230c4a3c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_8085fe243d814d5da2b4d6b665a6aa51", - "IPY_MODEL_0295736b3f254a8590d911792f2c19db", - "IPY_MODEL_ad079ced956841be88a3ab6f2fdcd88d" + { + "cell_type": "code", + "execution_count": 174, + "id": "2535cbae", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2535cbae", + "outputId": "f7dc4eb2-88fa-4f17-f8d3-e1f563e4185b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 865 1022\n", + "3 1009 1083\n", + "5 1048 1090\n", + "10 1073 1104\n", + "25 1108 1117\n", + "50 1123 1123\n" + ] + } ], - "layout": "IPY_MODEL_953b16307b514bc995efc40ff8f4698b" - } - }, - "3838d1bfccb5402d91ecf96f51f9c2d7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e732511315484861b39d0efb86b9cfb5", - "placeholder": "​", - "style": "IPY_MODEL_3df968e9ff654de7bfc95f08c88a99c6", - "value": " 1/1 [00:00<00:00, 33.52it/s]" - } - }, - "3df968e9ff654de7bfc95f08c88a99c6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3f1ba16f17c94fb8a409d152b9a6045a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "43c8db4be4984f6da1540de6bc89b616": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_dc26c84713fb45f5a6c5b870c96d9d9e", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_a6ac9d7aed4c493db567b015ab8502a5", - "value": 1 - } - }, - "458bcab796d642c9afd2e534085b991f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_352af51e6e7943a7a41d3bbe41885d8d", - "placeholder": "​", - "style": "IPY_MODEL_2cb4c1bff01b4f669ecd8583187bab9f", - "value": "Batches: 100%" - } - }, - "4d26773cf0ec45fc9ac3185f8141cfe6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4f4986d008434fc9912c496636b40b38": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5a7b319329164b27bf452869e2335aef": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5b3c84192618410ea1793239c4514c6e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5d09ef7e47f446a3a3a51458195a0fe3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "608d1345487046ceb64b0b31a4287ae0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fce9aa1662c34941afadd6141aba9180", - "placeholder": "​", - "style": "IPY_MODEL_098016d32abc4f45be0a4f747cbc3f11", - "value": "Batches: 100%" - } - }, - "66f8508878e94e4296e2ffb009afdb75": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6854fe2978954c7b9e30202a6ea34b57": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "687d7bf7373f4541852203637e18d2e3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "6cb81ad9ee8141aa9d0fbefc276f244d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_80e18075a957476591fa1ba2585b7a73", - "placeholder": "​", - "style": "IPY_MODEL_e1d8ec56e5614d77a6999a42283be94b", - "value": " 1/1 [00:00<00:00, 27.76it/s]" - } - }, - "6f5389fdc3674a05ad79bb9456a5995f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e281ca37b72d422285aa64193265aaa4", - "placeholder": "​", - "style": "IPY_MODEL_687d7bf7373f4541852203637e18d2e3", - "value": "Batches: 100%" - } - }, - "6f96c8a415744597bcaa83141b3a457c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "70e45cb4f2f64c499acc785ec64743fe": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d9f6781147a24bc3aec97b87927bdfbb", - "placeholder": "​", - "style": "IPY_MODEL_d0c85ebbf1a54fdcaa33210aa3e247b8", - "value": " 620/620 [00:30<00:00, 100.95it/s]" - } - }, - "7a5c511f2fe84c66beef7fed87023669": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5a7b319329164b27bf452869e2335aef", - "max": 620, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_e5a88639e7e5430fbe44f73d2e5a4b23", - "value": 620 - } - }, - "7ab68b0af00248f5bbc15a39bbf38166": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7bd0527b8e174aec9911c5b32dc048fd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "7c322a232739486a937883ac787f8750": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_11e5c44fa73c4d5d88561036f2a0a070", - "placeholder": "​", - "style": "IPY_MODEL_9781773e5d4d4e0ab8e3ff3140bd0983", - "value": " 36/36 [00:16<00:00, 5.64it/s]" - } - }, - "8085fe243d814d5da2b4d6b665a6aa51": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4d26773cf0ec45fc9ac3185f8141cfe6", - "placeholder": "​", - "style": "IPY_MODEL_d7959d1933e5466095bed23bb268afd5", - "value": "Iteration: 100%" - } - }, - "80e18075a957476591fa1ba2585b7a73": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8396e1788d5540a79a9d195e2753351a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "85a67e7f5d2845329d63f0922bb5d9a6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "8e41a0a7932b4f0b8a448847ff3f14ff": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8e989ddf9d274876a3ba06056e7147a1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "953b16307b514bc995efc40ff8f4698b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9781773e5d4d4e0ab8e3ff3140bd0983": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "999f219c7c9449f0a48feb59eddd23eb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9d726c5753564258bbe1c281fa8bfefa": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_66f8508878e94e4296e2ffb009afdb75", - "placeholder": "​", - "style": "IPY_MODEL_999f219c7c9449f0a48feb59eddd23eb", - "value": "Batches: 100%" - } - }, - "a6ac9d7aed4c493db567b015ab8502a5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "ad079ced956841be88a3ab6f2fdcd88d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6f96c8a415744597bcaa83141b3a457c", - "placeholder": "​", - "style": "IPY_MODEL_8e41a0a7932b4f0b8a448847ff3f14ff", - "value": " 1739/1739 [10:05<00:00, 2.86it/s]" - } - }, - "b070922627a04195a41899a169984b08": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b281e2a5a8204b52827f01316d637952": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_084ebc36eb9d4616acc1a0843673ac4f", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_252dfe4d46604a089c263bb0eae17a3a", - "value": 1 - } - }, - "b6ecc1cc5b2e4332b56fdc1614823d3e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "b86aa89087cb425198afa2db30d86600": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b9fbabd33a2a4d0a94055cd086d83228": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_168948926c2340abb8b52f8dd67a5e73", - "placeholder": "​", - "style": "IPY_MODEL_8396e1788d5540a79a9d195e2753351a", - "value": "Epoch: 100%" - } - }, - "ba968ae891a2400fa0db9f331b85b66c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5b3c84192618410ea1793239c4514c6e", - "placeholder": "​", - "style": "IPY_MODEL_7bd0527b8e174aec9911c5b32dc048fd", - "value": " 1/1 [00:00<00:00, 28.53it/s]" - } - }, - "bb260568c96a4397bba2b1058404eec2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + "source": [ + "# do recall @ 1, 3, 5, 10\n", + "OPENAI_RETRIEVAL = []\n", + "OPENAI_RETRIEVAL_PLUS_FT_CE = []\n", + "for k in X:\n", + " embedding_only_recall = ft_predictions_df[ft_predictions_df['retrieved_correct_position'] < k].shape[0]\n", + " reranked_recall = ft_predictions_df[ft_predictions_df['reranked_correct_position'] < k].shape[0]\n", + " OPENAI_RETRIEVAL.append(embedding_only_recall / ft_predictions_df.shape[0])\n", + " OPENAI_RETRIEVAL_PLUS_FT_CE.append(reranked_recall / ft_predictions_df.shape[0])\n", + " print(k, embedding_only_recall, reranked_recall)" + ] }, - "c485139a148e4207957e96b4275eeed2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + { + "cell_type": "code", + "execution_count": 174, + "id": "d9e5e922-7667-4a02-b4fc-7acb9cc97c24", + "metadata": { + "id": "d9e5e922-7667-4a02-b4fc-7acb9cc97c24" + }, + "outputs": [], + "source": [] }, - "c7800a5a8f1b4c3fa541df6b5ba6281f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9d726c5753564258bbe1c281fa8bfefa", - "IPY_MODEL_ea3f511248734bbab197277b68346826", - "IPY_MODEL_3838d1bfccb5402d91ecf96f51f9c2d7" + { + "cell_type": "code", + "execution_count": 175, + "id": "8faf0c1a", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 224 + }, + "id": "8faf0c1a", + "outputId": "686c27ab-23ef-4b4a-90ad-8a281023a26e" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1148/1148 [09:07<00:00, 2.10it/s]\n" + ] + }, + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"os_predictions_df\",\n \"rows\": 1148,\n \"fields\": [\n {\n \"column\": \"retrieved_correct_position\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.001637072855341,\n \"min\": 0.0,\n \"max\": 49.0,\n \"num_unique_values\": 44,\n \"samples\": [\n 33.0,\n 4.0,\n 22.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"reranked_correct_position\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.021110978160727,\n \"min\": 0.0,\n \"max\": 46.0,\n \"num_unique_values\": 21,\n \"samples\": [\n 0.0,\n 14.0,\n 8.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe", + "variable_name": "os_predictions_df" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
retrieved_correct_positionreranked_correct_position
00.00.0
10.00.0
20.00.0
31.00.0
40.00.0
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " retrieved_correct_position reranked_correct_position\n", + "0 0.0 0.0\n", + "1 0.0 0.0\n", + "2 0.0 0.0\n", + "3 1.0 0.0\n", + "4 0.0 0.0" + ] + }, + "execution_count": 175, + "metadata": {}, + "output_type": "execute_result" + } ], - "layout": "IPY_MODEL_ecc8ed6e869d4b01b7905ca951d0068c" - } - }, - "cdd38d9a948a4a4c8408dcb06d7c584a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2aa32422748943759daf5ef05077bda0", - "placeholder": "​", - "style": "IPY_MODEL_e7c672eb19574b32811bdcc3f6ac0b34", - "value": " 1/1 [11:44<00:00, 704.92s/it]" - } - }, - "d0c85ebbf1a54fdcaa33210aa3e247b8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d7959d1933e5466095bed23bb268afd5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d9f6781147a24bc3aec97b87927bdfbb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + "source": [ + "logger.setLevel(logging.CRITICAL)\n", + "os_predictions = []\n", + "\n", + "for i, question in tqdm(enumerate(test_sample), total=len(test_sample)):\n", + " os_predictions.append(eval_ranking_open_source(bi_encoder, doc_emb, question['question'], top_k=TOP_K, re_rank_model=finetuned))\n", + "\n", + "os_predictions_df = pd.DataFrame(os_predictions)\n", + "os_predictions_df.head()" + ] }, - "dbca2d7da970438892d45cd387434f6f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_6f5389fdc3674a05ad79bb9456a5995f", - "IPY_MODEL_7a5c511f2fe84c66beef7fed87023669", - "IPY_MODEL_70e45cb4f2f64c499acc785ec64743fe" + { + "cell_type": "code", + "execution_count": 176, + "id": "6eaeb9b4", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6eaeb9b4", + "outputId": "8e97a733-1aaa-403c-b8f3-cfdd181814d5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy without re-ranking: 0.5017421602787456\n", + "Accuracy with re-ranking: 0.6428571428571429\n" + ] + } ], - "layout": "IPY_MODEL_bb260568c96a4397bba2b1058404eec2" - } - }, - "dc26c84713fb45f5a6c5b870c96d9d9e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e1d8ec56e5614d77a6999a42283be94b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e281ca37b72d422285aa64193265aaa4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + "source": [ + "raw_accuracy = sum([p['retrieved_correct_position'] == 0 for p in os_predictions])/len(os_predictions)\n", + "reranked_accuracy = sum([p['reranked_correct_position'] == 0 for p in os_predictions])/len(os_predictions)\n", + "\n", + "print(f'Accuracy without re-ranking: {raw_accuracy}')\n", + "print(f'Accuracy with re-ranking: {reranked_accuracy}')\n" + ] }, - "e31ae00bfabe4ed5b058d7dbe55a80f6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_b9fbabd33a2a4d0a94055cd086d83228", - "IPY_MODEL_b281e2a5a8204b52827f01316d637952", - "IPY_MODEL_cdd38d9a948a4a4c8408dcb06d7c584a" + { + "cell_type": "code", + "execution_count": 177, + "id": "T00iM9SyaUFI", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "T00iM9SyaUFI", + "outputId": "360d171f-1fd0-41f5-a29b-80f380d046c7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 576 738\n", + "3 887 1029\n", + "5 948 1072\n", + "10 1008 1091\n", + "25 1073 1103\n", + "50 1107 1107\n" + ] + } ], - "layout": "IPY_MODEL_b86aa89087cb425198afa2db30d86600" - } - }, - "e5a88639e7e5430fbe44f73d2e5a4b23": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "e732511315484861b39d0efb86b9cfb5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e7c672eb19574b32811bdcc3f6ac0b34": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ea3f511248734bbab197277b68346826": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7ab68b0af00248f5bbc15a39bbf38166", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_b6ecc1cc5b2e4332b56fdc1614823d3e", - "value": 1 - } - }, - "ecc8ed6e869d4b01b7905ca951d0068c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + "source": [ + "# do recall @ 1, 3, 5, 10\n", + "OPEN_SOURCE_RETRIEVAL = []\n", + "OPEN_SOURCE_RETRIEVAL_PLUS_FT_CE = []\n", + "for k in X:\n", + " embedding_only_recall = os_predictions_df[os_predictions_df['retrieved_correct_position'] < k].shape[0]\n", + " reranked_recall = os_predictions_df[os_predictions_df['reranked_correct_position'] < k].shape[0]\n", + " print(k, embedding_only_recall, reranked_recall)\n", + " OPEN_SOURCE_RETRIEVAL.append(embedding_only_recall / os_predictions_df.shape[0])\n", + " OPEN_SOURCE_RETRIEVAL_PLUS_FT_CE.append(reranked_recall / os_predictions_df.shape[0])" + ] }, - "f36d8f08f6284b848622fd560196cc78": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_608d1345487046ceb64b0b31a4287ae0", - "IPY_MODEL_43c8db4be4984f6da1540de6bc89b616", - "IPY_MODEL_6cb81ad9ee8141aa9d0fbefc276f244d" + { + "cell_type": "code", + "execution_count": 178, + "id": "-ub8HU8BZAW1", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 564 + }, + "id": "-ub8HU8BZAW1", + "outputId": "f15b2eac-5c5b-4242-925a-26fc12c638bb" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "layout": "IPY_MODEL_8e989ddf9d274876a3ba06056e7147a1" - } + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Creating the plot\n", + "plt.figure(figsize=(10, 6))\n", + "plt.plot(X, OPENAI_RETRIEVAL, label='OAI Retrieval Only', marker='o')\n", + "plt.plot(X, OPEN_SOURCE_RETRIEVAL, label='OS Retrieval Only', marker='*')\n", + "plt.plot(X, OPEN_SOURCE_RETRIEVAL_PLUS_PRE_CE, label='OS + Pretrained CE', marker='^')\n", + "\n", + "plt.plot(X, OPEN_SOURCE_RETRIEVAL_PLUS_FT_CE, label='OS + Finetuned CE', marker='v')\n", + "plt.plot(X, OLD_CROSS_ENCODER, label='OAI + Pretrained CE', marker='s')\n", + "plt.plot(X, OPENAI_RETRIEVAL_PLUS_FT_CE, label='OAI + Finetuned CE', marker='d')\n", + "\n", + "# Adding titles and labels\n", + "plt.title('Comparing embedding models + pre-trained vs fine-tuned CE (all retrieved 50 results then re-ranked)')\n", + "plt.xlabel('Recall @')\n", + "plt.ylabel('Performance')\n", + "plt.xticks(X)\n", + "plt.yticks([i/100 for i in range(70, 101, 5)]) # Adjusting y-ticks to start from 0.75\n", + "\n", + "# Adding legend\n", + "plt.legend()\n", + "\n", + "# Show the plot\n", + "plt.grid(True)\n", + "# plt.show()\n", + "\n", + "plt.savefig('recall_at_k.png', dpi=1000)" + ] }, - "f78b642ffb784f9097e0cbf97c44b9bd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4f4986d008434fc9912c496636b40b38", - "max": 36, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_348701c9256e4eb48b8ffb0d9c0d6c83", - "value": 36 - } + { + "cell_type": "code", + "execution_count": 181, + "id": "9oGQvj0xZAwo", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 275 + }, + "id": "9oGQvj0xZAwo", + "outputId": "b74b982f-5949-4efd-f26c-1af1220becf8" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results_df\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"RECALL @\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 18,\n \"min\": 1,\n \"max\": 50,\n \"num_unique_values\": 6,\n \"samples\": [\n 1,\n 3,\n 50\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"OS_Retrieval_Only\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.16768726331934317,\n \"min\": 0.5017421602787456,\n \"max\": 0.9642857142857143,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.5017421602787456,\n 0.7726480836236934,\n 0.9642857142857143\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"OS_Retrieval_Plus_Finetuned_CE\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.1242572741439965,\n \"min\": 0.6428571428571429,\n \"max\": 0.9642857142857143,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.6428571428571429,\n 0.8963414634146342,\n 0.9642857142857143\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"OAI_Retrieval_Only\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.08191797444458304,\n \"min\": 0.7534843205574913,\n \"max\": 0.9782229965156795,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.7534843205574913,\n 0.8789198606271778,\n 0.9782229965156795\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"OAI_Retrieval_Plus_Pretrained_CE\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.053339701651705686,\n \"min\": 0.8336236933797909,\n \"max\": 0.9773519163763066,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.8336236933797909,\n 0.9207317073170732,\n 0.9773519163763066\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"OAI_Retrieval_Plus_Finetuned_CE\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.03185542173744024,\n \"min\": 0.8902439024390244,\n \"max\": 0.9782229965156795,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.8902439024390244,\n 0.9433797909407665,\n 0.9782229965156795\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RECALL @OS_Retrieval_OnlyOS_Retrieval_Plus_Finetuned_CEOAI_Retrieval_OnlyOAI_Retrieval_Plus_Pretrained_CEOAI_Retrieval_Plus_Finetuned_CE
010.5017420.6428570.7534840.8336240.890244
130.7726480.8963410.8789200.9207320.943380
250.8257840.9337980.9128920.9372820.949477
3100.8780490.9503480.9346690.9529620.961672
4250.9346690.9608010.9651570.9756100.972997
5500.9642860.9642860.9782230.9773520.978223
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " RECALL @ OS_Retrieval_Only OS_Retrieval_Plus_Finetuned_CE \\\n", + "0 1 0.501742 0.642857 \n", + "1 3 0.772648 0.896341 \n", + "2 5 0.825784 0.933798 \n", + "3 10 0.878049 0.950348 \n", + "4 25 0.934669 0.960801 \n", + "5 50 0.964286 0.964286 \n", + "\n", + " OAI_Retrieval_Only OAI_Retrieval_Plus_Pretrained_CE \\\n", + "0 0.753484 0.833624 \n", + "1 0.878920 0.920732 \n", + "2 0.912892 0.937282 \n", + "3 0.934669 0.952962 \n", + "4 0.965157 0.975610 \n", + "5 0.978223 0.977352 \n", + "\n", + " OAI_Retrieval_Plus_Finetuned_CE \n", + "0 0.890244 \n", + "1 0.943380 \n", + "2 0.949477 \n", + "3 0.961672 \n", + "4 0.972997 \n", + "5 0.978223 " + ] + }, + "execution_count": 181, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# show results as a table\n", + "\n", + "results_df = pd.DataFrame({'RECALL @': [1, 3, 5, 10, 25, 50], 'OS_Retrieval_Only': OPEN_SOURCE_RETRIEVAL, 'OS_Retrieval_Plus_Finetuned_CE': OPEN_SOURCE_RETRIEVAL_PLUS_FT_CE , 'OAI_Retrieval_Only': OPENAI_RETRIEVAL , 'OAI_Retrieval_Plus_Pretrained_CE': OLD_CROSS_ENCODER, 'OAI_Retrieval_Plus_Finetuned_CE': OPENAI_RETRIEVAL_PLUS_FT_CE})\n", + "results_df.sort_values(by='RECALL @')" + ] }, - "fa6089718c574ee19bd3c378e356ea6b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2c83393c2b7745889765d8efb4890763", - "placeholder": "​", - "style": "IPY_MODEL_6854fe2978954c7b9e30202a6ea34b57", - "value": "Batches: 100%" - } + { + "cell_type": "code", + "execution_count": 1, + "id": "OFno8WyIZDR6", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 507 + }, + "id": "OFno8WyIZDR6", + "outputId": "e08f4366-bd16-4aaa-ba5f-bc1f3676d0e5" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Data\n", + "methods = [\n", + " \"OS_Retrieval_Only\",\n", + " \"OS_Retrieval_Plus_Finetuned_CE\",\n", + " \"OAI_Retrieval_Only\",\n", + " \"OAI_Retrieval_Plus_Pretrained_CE\",\n", + " \"OAI_Retrieval_Plus_Finetuned_CE\"\n", + "]\n", + "\n", + "recalls = [0.501742, 0.642857, 0.753484, 0.833624, 0.890244]\n", + "\n", + "# Create a bar plot\n", + "plt.figure(figsize=(8, 5))\n", + "bars = plt.bar(methods, recalls, color=\"skyblue\")\n", + "\n", + "# Add value labels on top of each bar\n", + "for bar in bars:\n", + " height = bar.get_height()\n", + " plt.text(\n", + " bar.get_x() + bar.get_width() / 2,\n", + " height + 0.005,\n", + " f\"{height:.3f}\",\n", + " ha=\"center\",\n", + " va=\"bottom\",\n", + " fontsize=9\n", + " )\n", + "\n", + "# Labeling and aesthetics\n", + "plt.title(\"Recall@1 Across Different Methods\")\n", + "plt.ylabel(\"Recall@1\")\n", + "plt.xticks(rotation=25, ha=\"right\") # Rotate x-axis labels if needed\n", + "plt.ylim([0, 1]) # Since recall values typically range [0,1]\n", + "plt.tight_layout()\n", + "\n", + "# Display the plot\n", + "plt.show()\n" + ] }, - "fce9aa1662c34941afadd6141aba9180": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "cell_type": "code", + "execution_count": null, + "id": "sIA-PtwdIEv3", + "metadata": { + "id": "sIA-PtwdIEv3" + }, + "outputs": [], + "source": [] } - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "machine_shape": "hm", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "0098c6042695487eabdc18342cda7186": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "00dc9b746d744e779fc9e0182d20e504": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1f5f910fb18549d58dd876c38aed4107", + "max": 36, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_13b90299c7884befa75180d0476f9913", + "value": 36 + } + }, + "0452b93e26154e0d915ad3a201e307fd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_486ab2a8181d4b32aedbb3fea18bef40", + "IPY_MODEL_da4aa9007a4c4814a7fcd55ea9bcce9f", + "IPY_MODEL_fac98921b88f41b08db63e93399f7c87" + ], + "layout": "IPY_MODEL_f7930aa886e941b3b15239a4f35138b9" + } + }, + "04f07748dd33447784c3354f58f38727": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "09eea1073fc246bf9bcd0e6320457896": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fdb72352779f499c869e390ccd9027eb", + "max": 1739, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_55c623662d81423da5bb0d5058c0ed66", + "value": 1739 + } + }, + "0de4f03a3b1f4bc3b2131607be95a808": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2f41f083156247fa97ae3646052a3235", + "IPY_MODEL_09eea1073fc246bf9bcd0e6320457896", + "IPY_MODEL_1e97a5e7963a44119524ff4d091e9629" + ], + "layout": "IPY_MODEL_8c34c7a0f089455b85a650fea82cd591" + } + }, + "10e72a6bbfa6473f952f8572259277ab": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "13b90299c7884befa75180d0476f9913": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "18564cf9b4f04df9a9a309fcb5152a81": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "1a3c1db02f9544e78c2599c7566befc0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1aab71dc8f7345718950c689bf25b115": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8534c0df3c684480bf53ae7c0e605a97", + "IPY_MODEL_8c7dddcedd9d4f9f85478ef50f2852f1", + "IPY_MODEL_4aa325d9b5a4423bbe4f4395897bd4f2" + ], + "layout": "IPY_MODEL_4f15b9ad923b49289277175ca5103c13" + } + }, + "1c9e78b5942a444ab729333d57ab00a1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_de21ddc2e22b49cd8429654f9187b61a", + "IPY_MODEL_efbb6facf5484cb990f663a05e981b02", + "IPY_MODEL_9a43fe57b2974c3f82c96226e31e6e64" + ], + "layout": "IPY_MODEL_d8120cb8a36f47e0b355ace30dbccf30" + } + }, + "1e97a5e7963a44119524ff4d091e9629": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d204f0d7c2a3486aa8c6360f26518037", + "placeholder": "​", + "style": "IPY_MODEL_4c158c4160514069870a1f4c280dcb5a", + "value": " 1739/1739 [05:07<00:00,  5.51it/s]" + } + }, + "1f5f910fb18549d58dd876c38aed4107": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2f41f083156247fa97ae3646052a3235": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d7a99cc5398549ad9e9177307a521e17", + "placeholder": "​", + "style": "IPY_MODEL_a5c060a68f2449a99fffe826e5385093", + "value": "Iteration: 100%" + } + }, + "338a83663ea946aca37b2a109bbdd437": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "41ae50f47c174b3ebec44009b57639cb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "486ab2a8181d4b32aedbb3fea18bef40": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fd0a7934959d4fb88f5dab916d42d3a7", + "placeholder": "​", + "style": "IPY_MODEL_75f7bbb0bb384ae592dea5059a146b9c", + "value": "Batches: 100%" + } + }, + "4aa325d9b5a4423bbe4f4395897bd4f2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c09ebe1563d8477aa0e5ec88160ccda2", + "placeholder": "​", + "style": "IPY_MODEL_f697561ad26048cc821cb4614f664edc", + "value": " 1/1 [00:00<00:00, 38.69it/s]" + } + }, + "4c158c4160514069870a1f4c280dcb5a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4ccf610615584abc9e10caa78e3100f4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4cd23982da854b909ca6e435ce354270": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4ec583e4c526449b993c0f7f9e6e5456": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8daf379772f6429293b659d29efbebef", + "IPY_MODEL_ffcefea0254449869f46b1a160222aa2", + "IPY_MODEL_690731860e1643d4a3873f52786127eb" + ], + "layout": "IPY_MODEL_0098c6042695487eabdc18342cda7186" + } + }, + "4f15b9ad923b49289277175ca5103c13": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4ff59bde36c24d568a567e3457e5aaf7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_72a697990c264e4aa48f5fa68b89f935", + "placeholder": "​", + "style": "IPY_MODEL_bfbe6dd8bf0d40a691d8998004bec8c0", + "value": " 1/1 [06:11<00:00, 371.94s/it]" + } + }, + "5271f5cf18e6427eb6df0c59e4b0cb2b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "55c623662d81423da5bb0d5058c0ed66": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5835246fb0aa4d93b76509468d1c2211": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5d958e1742124b5b9ca2357c22686754": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "690731860e1643d4a3873f52786127eb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6fd3718bdc214b2ea9ebc4961ff6c9dc", + "placeholder": "​", + "style": "IPY_MODEL_338a83663ea946aca37b2a109bbdd437", + "value": " 1/1 [00:00<00:00, 32.36it/s]" + } + }, + "693d2f59f1f54307ac5b9024b6e28e73": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "69bcf5c05eb5461f9e5fe64448d38875": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6db3f21c727e4fed89f3ba0be163d8a9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4cd23982da854b909ca6e435ce354270", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_18564cf9b4f04df9a9a309fcb5152a81", + "value": 1 + } + }, + "6fd3718bdc214b2ea9ebc4961ff6c9dc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "72a697990c264e4aa48f5fa68b89f935": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "75083236d5b6461fa2110ea747c8c010": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "75f7bbb0bb384ae592dea5059a146b9c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "79562014e45a4002a0f364cfa7a1f94e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7b6179c28a554ce5a6b70ec6318c284f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7e83f014fa3b423f94a3942a8531299c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e9e49580bfb8425b8ef88cea4862e0ee", + "IPY_MODEL_00dc9b746d744e779fc9e0182d20e504", + "IPY_MODEL_cc64395318d142a48563671b0ad9087f" + ], + "layout": "IPY_MODEL_5271f5cf18e6427eb6df0c59e4b0cb2b" + } + }, + "8534c0df3c684480bf53ae7c0e605a97": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d10ac840d1284c1d93cd3790bd69459a", + "placeholder": "​", + "style": "IPY_MODEL_c6dbd568908a470fa9eee035a46005c2", + "value": "Batches: 100%" + } + }, + "87782a63074c4714b8a80f5070c8830c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "878d1b336d084e90b84c5e7d9585dc71": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8976021d2c7a421785f3559a24105089": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8c34c7a0f089455b85a650fea82cd591": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8c7dddcedd9d4f9f85478ef50f2852f1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_04f07748dd33447784c3354f58f38727", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_cb74baeb010049fdbd3f3e3be7b1988f", + "value": 1 + } + }, + "8daf379772f6429293b659d29efbebef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4ccf610615584abc9e10caa78e3100f4", + "placeholder": "​", + "style": "IPY_MODEL_87782a63074c4714b8a80f5070c8830c", + "value": "Batches: 100%" + } + }, + "8e3c115a975c4ecb9f7948d0dc318282": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "928aa8b44ee34dcea5f9656024bda180": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9a43fe57b2974c3f82c96226e31e6e64": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_69bcf5c05eb5461f9e5fe64448d38875", + "placeholder": "​", + "style": "IPY_MODEL_8976021d2c7a421785f3559a24105089", + "value": " 1/1 [00:00<00:00, 27.80it/s]" + } + }, + "a0c863deec544e019560157cb557c132": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a2eafe380ca74843abd46ccb0a4132f8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a5c060a68f2449a99fffe826e5385093": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ace7c202d5ba4c4ea4983f96268d5e89": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "aecbde694330456fb5d1b06a4abb1cd5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b915cbcacccc4dc68440908fb073a48e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "bfbe6dd8bf0d40a691d8998004bec8c0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c09ebe1563d8477aa0e5ec88160ccda2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c2ed1b757c3840468db308d6489e1d50": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c6dbd568908a470fa9eee035a46005c2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cb74baeb010049fdbd3f3e3be7b1988f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "cc64395318d142a48563671b0ad9087f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ace7c202d5ba4c4ea4983f96268d5e89", + "placeholder": "​", + "style": "IPY_MODEL_aecbde694330456fb5d1b06a4abb1cd5", + "value": " 36/36 [00:15<00:00,  6.51it/s]" + } + }, + "cf3cfee3688b4322bd76495ebd010077": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d10ac840d1284c1d93cd3790bd69459a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d204f0d7c2a3486aa8c6360f26518037": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d388a00d8638436c9d3aa477fe708a02": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f6e42a3fad9f445d8385a691715e5441", + "placeholder": "​", + "style": "IPY_MODEL_878d1b336d084e90b84c5e7d9585dc71", + "value": "Batches: 100%" + } + }, + "d53245c4a74745e9a70c03924debcf98": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8e3c115a975c4ecb9f7948d0dc318282", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_fc916e59dce04b68993a6922695d7226", + "value": 1 + } + }, + "d744293ea11f4c3d988df2cd37d9b313": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5835246fb0aa4d93b76509468d1c2211", + "placeholder": "​", + "style": "IPY_MODEL_a0c863deec544e019560157cb557c132", + "value": "Epoch: 100%" + } + }, + "d7a99cc5398549ad9e9177307a521e17": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d8120cb8a36f47e0b355ace30dbccf30": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "da4aa9007a4c4814a7fcd55ea9bcce9f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f6f951a5f4fe43d3b6840459520925a6", + "max": 310, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5d958e1742124b5b9ca2357c22686754", + "value": 310 + } + }, + "de21ddc2e22b49cd8429654f9187b61a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1a3c1db02f9544e78c2599c7566befc0", + "placeholder": "​", + "style": "IPY_MODEL_b915cbcacccc4dc68440908fb073a48e", + "value": "Batches: 100%" + } + }, + "e59da773d0ed452bbec059a7973e8ad1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e631186c46714758a005cea53a1f5dda": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d744293ea11f4c3d988df2cd37d9b313", + "IPY_MODEL_6db3f21c727e4fed89f3ba0be163d8a9", + "IPY_MODEL_4ff59bde36c24d568a567e3457e5aaf7" + ], + "layout": "IPY_MODEL_75083236d5b6461fa2110ea747c8c010" + } + }, + "e6bc3b7cc0d04ca2b5ae6fab4d5c3ec3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_79562014e45a4002a0f364cfa7a1f94e", + "placeholder": "​", + "style": "IPY_MODEL_e59da773d0ed452bbec059a7973e8ad1", + "value": " 1/1 [00:00<00:00, 39.47it/s]" + } + }, + "e9e49580bfb8425b8ef88cea4862e0ee": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cf3cfee3688b4322bd76495ebd010077", + "placeholder": "​", + "style": "IPY_MODEL_693d2f59f1f54307ac5b9024b6e28e73", + "value": "Batches: 100%" + } + }, + "ed03d3a8b6ad48c08c74bcd361eec589": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "efbb6facf5484cb990f663a05e981b02": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_928aa8b44ee34dcea5f9656024bda180", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7b6179c28a554ce5a6b70ec6318c284f", + "value": 1 + } + }, + "f697561ad26048cc821cb4614f664edc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f6e42a3fad9f445d8385a691715e5441": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f6f951a5f4fe43d3b6840459520925a6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f7930aa886e941b3b15239a4f35138b9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fac98921b88f41b08db63e93399f7c87": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_41ae50f47c174b3ebec44009b57639cb", + "placeholder": "​", + "style": "IPY_MODEL_10e72a6bbfa6473f952f8572259277ab", + "value": " 310/310 [02:08<00:00, 14.01it/s]" + } + }, + "fc916e59dce04b68993a6922695d7226": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "fd0a7934959d4fb88f5dab916d42d3a7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fd1d2ee78abd494db082ad8d161bd623": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d388a00d8638436c9d3aa477fe708a02", + "IPY_MODEL_d53245c4a74745e9a70c03924debcf98", + "IPY_MODEL_e6bc3b7cc0d04ca2b5ae6fab4d5c3ec3" + ], + "layout": "IPY_MODEL_a2eafe380ca74843abd46ccb0a4132f8" + } + }, + "fdb72352779f499c869e390ccd9027eb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ffcefea0254449869f46b1a160222aa2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ed03d3a8b6ad48c08c74bcd361eec589", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c2ed1b757c3840468db308d6489e1d50", + "value": 1 + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 }