Skip to content

Commit

Permalink
misc
Browse files Browse the repository at this point in the history
  • Loading branch information
CShorten committed Feb 20, 2025
1 parent 11ddce2 commit 7787957
Show file tree
Hide file tree
Showing 11 changed files with 9,681 additions and 20 deletions.
49 changes: 49 additions & 0 deletions data/update-dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import json

INPUT_FILE = "./synthetic-weaviate-queries-with-schemas.json"
OUTPUT_FILE = "./updated-queries-with-schemas.json"

def update_query(query: dict) -> dict:
"""
Update the aggregations in the query:
- Map COUNT to total_count=True (and remove that aggregation)
- Remove any aggregation with metric TYPE
"""
# List of aggregation fields to check
agg_fields = [
"integer_property_aggregation",
"text_property_aggregation",
"boolean_property_aggregation"
]

for field in agg_fields:
agg = query.get(field)
if agg is not None:
metric = agg.get("metrics")
if metric == "COUNT":
# Remove the aggregation and set total_count
query[field] = None
query["total_count"] = True
elif metric == "TYPE":
# Remove the aggregation entirely
query[field] = None
return query

def main():
# Load the original file
with open(INPUT_FILE, "r") as infile:
data = json.load(infile)

# Process each record if it contains a query
for record in data:
if "query" in record and isinstance(record["query"], dict):
record["query"] = update_query(record["query"])

# Write out the updated data to a new file
with open(OUTPUT_FILE, "w") as outfile:
json.dump(data, outfile, indent=4)

print(f"Updated queries written to {OUTPUT_FILE}")

if __name__ == "__main__":
main()
8,301 changes: 8,301 additions & 0 deletions data/updated-queries-with-schemas.json

Large diffs are not rendered by default.

130 changes: 130 additions & 0 deletions notebooks/pydantic-opro.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generated vow: My Beloved,\n",
"\n",
"Today, as we stand amidst the beauty of nature, surrounded by vibrant blooms and towering trees, I am reminded of the everlasting cycles of life and love.\n",
"\n",
"Just like the sun rises each day to kiss the earth, I promise to wake each morning with gratitude for the gift of your love. Like the trees that reach for the sky to find light, I vow to always strive to uplift you and support your dreams.\n",
"\n",
"In the way rivers carve their paths with gentle persistence, I promise to be patient and understanding, to adapt and grow with you, as we journey together down the river of life.\n",
"\n",
"I am humbled by the depth of the ocean, and it is with that same depth that I commit my heart to you, wholly and unconditionally, casting away any fears like leaves in the wind.\n",
"\n",
"Just as seasons change, I promise to love you through every winter and spring, every storm and sunshine, knowing that each moment only strengthens the roots of our bond.\n",
"\n",
"With the song of the birds as our witness, I vow to cherish you, with a love that is as enduring as the mountains and as boundless as the sky.\n",
"\n",
"Together, we will walk in step upon this earth, hand in hand, heart to heart, for all the days of our lives.\n",
"\n",
"This is my solemn promise, now and forever.\n",
"Vow score: 3/5\n",
"\n",
"Suggested prompt improvement: Craft a deeply personal and heartfelt wedding vow that reflects a lifelong commitment to support, companionship, and shared dreams, incorporating specific personal anecdotes or qualities that highlight the uniqueness of the relationship.\n",
"Explanation: The original prompt is quite general and lacks specificity, which can lead to generic vows. By adding phrases like 'deeply personal,' 'lifelong commitment,' and encouraging the use of 'specific personal anecdotes or qualities,' the prompt guides the AI to create vows that are more tailored and emotionally resonant, potentially making them more impactful and memorable.\n"
]
}
],
"source": [
"from datetime import date\n",
"from pydantic import BaseModel\n",
"from pydantic_ai import Agent, RunContext\n",
"\n",
"import nest_asyncio\n",
"nest_asyncio.apply()\n",
"\n",
"class Vow(BaseModel):\n",
" vow: str\n",
" score: int | None = None\n",
"\n",
"class SystemPromptSuggestion(BaseModel):\n",
" original_prompt: str\n",
" suggested_prompt: str\n",
" explanation: str\n",
"\n",
"vow_writer = Agent(\n",
" 'openai:gpt-4o',\n",
" deps_type=str,\n",
" result_type=Vow,\n",
" system_prompt=\"Write a heartfelt and poetic wedding vow that expresses deep love and commitment. The vow should be personal, emotional and memorable.\"\n",
")\n",
"\n",
"vow_judge = Agent(\n",
" 'openai:gpt-4o', \n",
" deps_type=Vow,\n",
" result_type=int,\n",
" system_prompt=\"You are an expert judge of wedding vows. Rate the given wedding vow on a scale of 1 to 5, where 1 is poor and 5 is exceptional. Consider factors like emotional depth, poetic quality, originality, and sincerity.\"\n",
")\n",
"\n",
"opro = Agent(\n",
" 'openai:gpt-4o',\n",
" deps_type=tuple[str, Vow],\n",
" result_type=SystemPromptSuggestion,\n",
" system_prompt=\"You are an expert at optimizing system prompts. Given a system prompt and the vow it generated (along with its score), suggest an improved version of the system prompt that would likely generate better wedding vows. Explain your reasoning.\"\n",
")\n",
"\n",
"# Example usage:\n",
"vow = vow_writer.run_sync(\"Write a wedding vow for someone who loves nature\", deps=\"nature-loving\")\n",
"print(f\"Generated vow: {vow.data.vow}\")\n",
"\n",
"score = vow_judge.run_sync(\"Rate this vow\", deps=vow.data)\n",
"vow.data.score = score.data\n",
"print(f\"Vow score: {score.data}/5\")\n",
"\n",
"suggestion = opro.run_sync(\n",
" \"Suggest improvements to the system prompt\",\n",
" deps=(vow_writer.system_prompt, vow.data)\n",
")\n",
"print(f\"\\nSuggested prompt improvement: {suggestion.data.suggested_prompt}\")\n",
"print(f\"Explanation: {suggestion.data.explanation}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
10 changes: 3 additions & 7 deletions src/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,14 @@ services:
- weaviate_data:/var/lib/weaviate
restart: on-failure:0
environment:
TRANSFORMERS_INFERENCE_API: 'http://t2v-transformers:8080'
QUERY_DEFAULTS_LIMIT: 25
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
DEFAULT_VECTORIZER_MODULE: 'text2vec-transformers'
ENABLE_MODULES: 'text2vec-transformers'
DEFAULT_VECTORIZER_MODULE: 'text2vec-openai'
OPENAI_API_KEY: ''
ENABLE_MODULES: 'text2vec-openai'
ENABLE_API_BASED_MODULES: 'true'
CLUSTER_HOSTNAME: 'node1'
t2v-transformers:
image: semitechnologies/transformers-inference:sentence-transformers-paraphrase-MiniLM-L6-v2
environment:
ENABLE_CUDA: '0'
volumes:
weaviate_data:
...
Loading

0 comments on commit 7787957

Please sign in to comment.