misc

weaviate · Feb 20, 2025 · 7787957 · 7787957
1 parent 11ddce2
commit 7787957
Show file tree

Hide file tree

Showing 11 changed files with 9,681 additions and 20 deletions.
diff --git a/data/update-dataset.py b/data/update-dataset.py
@@ -0,0 +1,49 @@
+import json
+
+INPUT_FILE = "./synthetic-weaviate-queries-with-schemas.json"
+OUTPUT_FILE = "./updated-queries-with-schemas.json"
+
+def update_query(query: dict) -> dict:
+    """
+    Update the aggregations in the query:
+    - Map COUNT to total_count=True (and remove that aggregation)
+    - Remove any aggregation with metric TYPE
+    """
+    # List of aggregation fields to check
+    agg_fields = [
+        "integer_property_aggregation",
+        "text_property_aggregation",
+        "boolean_property_aggregation"
+    ]
+
+    for field in agg_fields:
+        agg = query.get(field)
+        if agg is not None:
+            metric = agg.get("metrics")
+            if metric == "COUNT":
+                # Remove the aggregation and set total_count
+                query[field] = None
+                query["total_count"] = True
+            elif metric == "TYPE":
+                # Remove the aggregation entirely
+                query[field] = None
+    return query
+
+def main():
+    # Load the original file
+    with open(INPUT_FILE, "r") as infile:
+        data = json.load(infile)
+
+    # Process each record if it contains a query
+    for record in data:
+        if "query" in record and isinstance(record["query"], dict):
+            record["query"] = update_query(record["query"])
+
+    # Write out the updated data to a new file
+    with open(OUTPUT_FILE, "w") as outfile:
+        json.dump(data, outfile, indent=4)
+
+    print(f"Updated queries written to {OUTPUT_FILE}")
+
+if __name__ == "__main__":
+    main()
diff --git a/data/updated-queries-with-schemas.json b/data/updated-queries-with-schemas.json
diff --git a/notebooks/pydantic-opro.ipynb b/notebooks/pydantic-opro.ipynb
@@ -0,0 +1,130 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated vow: My Beloved,\n",
+      "\n",
+      "Today, as we stand amidst the beauty of nature, surrounded by vibrant blooms and towering trees, I am reminded of the everlasting cycles of life and love.\n",
+      "\n",
+      "Just like the sun rises each day to kiss the earth, I promise to wake each morning with gratitude for the gift of your love. Like the trees that reach for the sky to find light, I vow to always strive to uplift you and support your dreams.\n",
+      "\n",
+      "In the way rivers carve their paths with gentle persistence, I promise to be patient and understanding, to adapt and grow with you, as we journey together down the river of life.\n",
+      "\n",
+      "I am humbled by the depth of the ocean, and it is with that same depth that I commit my heart to you, wholly and unconditionally, casting away any fears like leaves in the wind.\n",
+      "\n",
+      "Just as seasons change, I promise to love you through every winter and spring, every storm and sunshine, knowing that each moment only strengthens the roots of our bond.\n",
+      "\n",
+      "With the song of the birds as our witness, I vow to cherish you, with a love that is as enduring as the mountains and as boundless as the sky.\n",
+      "\n",
+      "Together, we will walk in step upon this earth, hand in hand, heart to heart, for all the days of our lives.\n",
+      "\n",
+      "This is my solemn promise, now and forever.\n",
+      "Vow score: 3/5\n",
+      "\n",
+      "Suggested prompt improvement: Craft a deeply personal and heartfelt wedding vow that reflects a lifelong commitment to support, companionship, and shared dreams, incorporating specific personal anecdotes or qualities that highlight the uniqueness of the relationship.\n",
+      "Explanation: The original prompt is quite general and lacks specificity, which can lead to generic vows. By adding phrases like 'deeply personal,' 'lifelong commitment,' and encouraging the use of 'specific personal anecdotes or qualities,' the prompt guides the AI to create vows that are more tailored and emotionally resonant, potentially making them more impactful and memorable.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datetime import date\n",
+    "from pydantic import BaseModel\n",
+    "from pydantic_ai import Agent, RunContext\n",
+    "\n",
+    "import nest_asyncio\n",
+    "nest_asyncio.apply()\n",
+    "\n",
+    "class Vow(BaseModel):\n",
+    "    vow: str\n",
+    "    score: int | None = None\n",
+    "\n",
+    "class SystemPromptSuggestion(BaseModel):\n",
+    "    original_prompt: str\n",
+    "    suggested_prompt: str\n",
+    "    explanation: str\n",
+    "\n",
+    "vow_writer = Agent(\n",
+    "    'openai:gpt-4o',\n",
+    "    deps_type=str,\n",
+    "    result_type=Vow,\n",
+    "    system_prompt=\"Write a heartfelt and poetic wedding vow that expresses deep love and commitment. The vow should be personal, emotional and memorable.\"\n",
+    ")\n",
+    "\n",
+    "vow_judge = Agent(\n",
+    "    'openai:gpt-4o', \n",
+    "    deps_type=Vow,\n",
+    "    result_type=int,\n",
+    "    system_prompt=\"You are an expert judge of wedding vows. Rate the given wedding vow on a scale of 1 to 5, where 1 is poor and 5 is exceptional. Consider factors like emotional depth, poetic quality, originality, and sincerity.\"\n",
+    ")\n",
+    "\n",
+    "opro = Agent(\n",
+    "    'openai:gpt-4o',\n",
+    "    deps_type=tuple[str, Vow],\n",
+    "    result_type=SystemPromptSuggestion,\n",
+    "    system_prompt=\"You are an expert at optimizing system prompts. Given a system prompt and the vow it generated (along with its score), suggest an improved version of the system prompt that would likely generate better wedding vows. Explain your reasoning.\"\n",
+    ")\n",
+    "\n",
+    "# Example usage:\n",
+    "vow = vow_writer.run_sync(\"Write a wedding vow for someone who loves nature\", deps=\"nature-loving\")\n",
+    "print(f\"Generated vow: {vow.data.vow}\")\n",
+    "\n",
+    "score = vow_judge.run_sync(\"Rate this vow\", deps=vow.data)\n",
+    "vow.data.score = score.data\n",
+    "print(f\"Vow score: {score.data}/5\")\n",
+    "\n",
+    "suggestion = opro.run_sync(\n",
+    "    \"Suggest improvements to the system prompt\",\n",
+    "    deps=(vow_writer.system_prompt, vow.data)\n",
+    ")\n",
+    "print(f\"\\nSuggested prompt improvement: {suggestion.data.suggested_prompt}\")\n",
+    "print(f\"Explanation: {suggestion.data.explanation}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/docker-compose.yml b/src/docker-compose.yml
@@ -16,18 +16,14 @@ services:
     - weaviate_data:/var/lib/weaviate
     restart: on-failure:0
     environment:
-      TRANSFORMERS_INFERENCE_API: 'http://t2v-transformers:8080'
       QUERY_DEFAULTS_LIMIT: 25
       AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
       PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
-      DEFAULT_VECTORIZER_MODULE: 'text2vec-transformers'
-      ENABLE_MODULES: 'text2vec-transformers'
+      DEFAULT_VECTORIZER_MODULE: 'text2vec-openai'
+      OPENAI_API_KEY: ''
+      ENABLE_MODULES: 'text2vec-openai'
       ENABLE_API_BASED_MODULES: 'true'
       CLUSTER_HOSTNAME: 'node1'
-  t2v-transformers:
-    image: semitechnologies/transformers-inference:sentence-transformers-paraphrase-MiniLM-L6-v2
-    environment:
-      ENABLE_CUDA: '0'
 volumes:
   weaviate_data:
 ...