Merge pull request #782 from expectedparrot/speedup

Speedup
expectedparrot · Jul 21, 2024 · 5aaec25 · 5aaec25
2 parents 29a18ed + 6c468ae
commit 5aaec25
Show file tree

Hide file tree

Showing 152 changed files with 9,835 additions and 3,852 deletions.
diff --git a/docs/notebooks/adding_metadata.ipynb b/docs/notebooks/adding_metadata.ipynb
@@ -58,13 +58,13 @@
    "outputs": [],
    "source": [
     "q_reference = QuestionFreeText(\n",
-    "    question_name = \"reference\",\n",
-    "    question_text = \"What is this headline referring to: {{ headline }}\"\n",
+    "    question_name=\"reference\",\n",
+    "    question_text=\"What is this headline referring to: {{ headline }}\",\n",
     ")\n",
     "\n",
     "q_frontpage = QuestionYesNo(\n",
-    "    question_name = \"frontpage\",\n",
-    "    question_text = \"Is this story likely to be on the front page of the newspaper: {{ headline }}\"\n",
+    "    question_name=\"frontpage\",\n",
+    "    question_text=\"Is this story likely to be on the front page of the newspaper: {{ headline }}\",\n",
     ")\n",
     "\n",
     "survey = Survey([q_reference, q_frontpage])"
@@ -98,7 +98,7 @@
     "        \"New York Yankees Win First Pennant in Franchise History\",\n",
     "        \"Subway Expansion Project Approved by City Council\",\n",
     "        \"Harlem Renaissance: New Wave of Cultural Expression\",\n",
-    "        \"Mayor Announces New Housing Initiative for Veterans\"\n",
+    "        \"Mayor Announces New Housing Initiative for Veterans\",\n",
     "    ],\n",
     "    \"date\": [\n",
     "        \"1918-11-11\",\n",
@@ -110,7 +110,7 @@
     "        \"1918-09-30\",\n",
     "        \"1918-08-18\",\n",
     "        \"1918-04-25\",\n",
-    "        \"1918-11-20\"\n",
+    "        \"1918-11-20\",\n",
     "    ],\n",
     "    \"author\": [\n",
     "        \"John Doe\",\n",
@@ -122,7 +122,7 @@
     "        \"William Davis\",\n",
     "        \"Barbara Wilson\",\n",
     "        \"Charles Miller\",\n",
-    "        \"Elizabeth Taylor\"\n",
+    "        \"Elizabeth Taylor\",\n",
     "    ],\n",
     "    \"section\": [\n",
     "        \"Front Page\",\n",
@@ -134,8 +134,8 @@
     "        \"Sports\",\n",
     "        \"City News\",\n",
     "        \"Culture\",\n",
-    "        \"Housing\"\n",
-    "    ]\n",
+    "        \"Housing\",\n",
+    "    ],\n",
     "}\n",
     "\n",
     "df = pd.DataFrame(data)"
@@ -156,12 +156,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "scenarios = [Scenario({\n",
-    "    \"headline\": row['headline'],\n",
-    "    \"date\": row['date'],\n",
-    "    \"author\": row['author'],\n",
-    "    \"section\": row['section']\n",
-    "}) for _, row in df.iterrows()]"
+    "scenarios = [\n",
+    "    Scenario(\n",
+    "        {\n",
+    "            \"headline\": row[\"headline\"],\n",
+    "            \"date\": row[\"date\"],\n",
+    "            \"author\": row[\"author\"],\n",
+    "            \"section\": row[\"section\"],\n",
+    "        }\n",
+    "    )\n",
+    "    for _, row in df.iterrows()\n",
+    "]"
    ]
   },
   {
@@ -669,7 +674,9 @@
     }
    ],
    "source": [
-    "results.select(\"headline\", \"date\", \"author\", \"section\", \"reference\", \"frontpage\").print(format=\"rich\")"
+    "results.select(\"headline\", \"date\", \"author\", \"section\", \"reference\", \"frontpage\").print(\n",
+    "    format=\"rich\"\n",
+    ")"
    ]
   }
  ],

diff --git a/docs/notebooks/agentifying_responses.ipynb b/docs/notebooks/agentifying_responses.ipynb
@@ -29,7 +29,7 @@
    "source": [
     "import pandas as pd\n",
     "\n",
-    "df = pd.read_csv('newsletter_survey_responses.csv')"
+    "df = pd.read_csv(\"newsletter_survey_responses.csv\")"
    ]
   },
   {
@@ -121,7 +121,7 @@
     }
    ],
    "source": [
-    "df[['question_type', 'question_text', 'question_options']].drop_duplicates()"
+    "df[[\"question_type\", \"question_text\", \"question_options\"]].drop_duplicates()"
    ]
   },
   {
@@ -236,7 +236,7 @@
     }
    ],
    "source": [
-    "df[['question_text', 'response']].sample(8)"
+    "df[[\"question_text\", \"response\"]].sample(8)"
    ]
   },
   {
@@ -264,33 +264,33 @@
    "outputs": [],
    "source": [
     "# Change the set of all responses into a dictionary\n",
-    "all_responses = df.to_dict(orient = 'records')\n",
+    "all_responses = df.to_dict(orient=\"records\")\n",
     "\n",
     "# Initialize a dictionary to store responses by respondent_id\n",
     "respondents_dict = {}\n",
     "\n",
     "# Iterate over each row in the responses\n",
     "for row in all_responses:\n",
-    "    respondent_id = row['respondent_id']\n",
-    "    question_id = row['question_id']\n",
-    "    question_text = row['question_text']\n",
-    "    question_options = row['question_options']\n",
-    "    response = row['response']\n",
-    "    \n",
+    "    respondent_id = row[\"respondent_id\"]\n",
+    "    question_id = row[\"question_id\"]\n",
+    "    question_text = row[\"question_text\"]\n",
+    "    question_options = row[\"question_options\"]\n",
+    "    response = row[\"response\"]\n",
+    "\n",
     "    # Format each response as background information for the relevant respondent\n",
     "    formatted_string = (\n",
     "        f\"You were asked: '{question_text}' \"\n",
     "        f\"The answer options were: '{question_options}'. \"\n",
     "        f\"You responded: '{response}'\"\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    # Add the information to the respondent's dictionary\n",
     "    if respondent_id not in respondents_dict:\n",
     "        respondents_dict[respondent_id] = {}\n",
-    "    \n",
+    "\n",
     "    respondents_dict[respondent_id][question_id] = formatted_string\n",
     "\n",
-    "# Print the new dictionary \n",
+    "# Print the new dictionary\n",
     "# print(respondents_dict)"
    ]
   },
@@ -316,35 +316,43 @@
     "from rich import print\n",
     "\n",
     "# Interview topic\n",
-    "interview_topic = 'home construction'\n",
-    "interview_title = 'Home Construction Newsletter Follow-up Interview'\n",
+    "interview_topic = \"home construction\"\n",
+    "interview_title = \"Home Construction Newsletter Follow-up Interview\"\n",
     "\n",
     "# Persona for the interview subjects\n",
-    "interview_subject_persona = textwrap.dedent(f\"\"\"\\\n",
+    "interview_subject_persona = textwrap.dedent(\n",
+    "    f\"\"\"\\\n",
     "You are a professional with a keen interest in {interview_topic}.\n",
-    "\"\"\")\n",
+    "\"\"\"\n",
+    ")\n",
     "\n",
     "# Instructions for the interview subject agents\n",
-    "interview_subject_instructions = textwrap.dedent(f\"\"\"\\\n",
+    "interview_subject_instructions = textwrap.dedent(\n",
+    "    f\"\"\"\\\n",
     "You recently completed a reader survey about the newsletter of a \n",
     "{interview_topic} expert who also produces a popular blog and podcast. \n",
     "Now they are asking you some follow-on questions.\n",
-    "\"\"\")\n",
+    "\"\"\"\n",
+    ")\n",
     "\n",
     "# Persona for the interviewer agent\n",
-    "interviewer_persona = textwrap.dedent(f\"\"\"\\\n",
+    "interviewer_persona = textwrap.dedent(\n",
+    "    f\"\"\"\\\n",
     "You are an well-known expert on {interview_topic} who produces \n",
     "a popular newsletter, blog and podcast on {interview_topic}.  \n",
-    "\"\"\")\n",
+    "\"\"\"\n",
+    ")\n",
     "\n",
     "# Instructions for the interviewer agent\n",
-    "interviewer_instructions = textwrap.dedent(f\"\"\"\\\n",
+    "interviewer_instructions = textwrap.dedent(\n",
+    "    f\"\"\"\\\n",
     "You recently conducted a reader survey about your newsletter on\n",
     "{interview_topic}. Now you are asking respondents some follow-on questions.\n",
-    "\"\"\")\n",
+    "\"\"\"\n",
+    ")\n",
     "\n",
     "# Total number of questions to ask in the interview\n",
-    "total_questions = 5 "
+    "total_questions = 5"
    ]
   },
   {
@@ -395,18 +403,20 @@
     "\n",
     "# Iterate over the respondents' data\n",
     "for respondent_id, questions in respondents_dict.items():\n",
-    "    \n",
+    "\n",
     "    # Initialize the traits for each agent\n",
     "    traits = {}\n",
-    "    \n",
+    "\n",
     "    # Iterate over the questions\n",
     "    for question_id, formatted_string in questions.items():\n",
     "        traits[f\"question_id_{question_id}\"] = formatted_string\n",
-    "    \n",
+    "\n",
     "    # Create the agent and add it to the agents list\n",
-    "    agent = Agent(name = f\"Respondent {respondent_id}\", \n",
-    "                  traits = traits, \n",
-    "                  instruction = interview_subject_instructions)\n",
+    "    agent = Agent(\n",
+    "        name=f\"Respondent {respondent_id}\",\n",
+    "        traits=traits,\n",
+    "        instruction=interview_subject_instructions,\n",
+    "    )\n",
     "    interview_subjects.append(agent)\n",
     "\n",
     "# Inspecting the first one\n",
@@ -429,9 +439,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Create an agent for the interviewer \n",
-    "interviewer = Agent(traits = {'persona':interviewer_persona},\n",
-    "                    instruction = interviewer_instructions)"
+    "# Create an agent for the interviewer\n",
+    "interviewer = Agent(\n",
+    "    traits={\"persona\": interviewer_persona}, instruction=interviewer_instructions\n",
+    ")"
    ]
   },
   {
@@ -456,77 +467,101 @@
     "\n",
     "# Selecting a language model to use\n",
     "# Model.available()  # To see a list of all available models\n",
-    "model = Model('gpt-4')\n",
+    "model = Model(\"gpt-4\")\n",
+    "\n",
     "\n",
     "def get_next_question(subject, researcher, dialog_so_far):\n",
-    "    scenario = Scenario({'subject': str(subject.traits), 'dialog_so_far': dialog_so_far})\n",
+    "    scenario = Scenario(\n",
+    "        {\"subject\": str(subject.traits), \"dialog_so_far\": dialog_so_far}\n",
+    "    )\n",
     "    meta_q = QuestionFreeText(\n",
-    "        question_name = 'next_question',\n",
-    "        question_text = \"\"\"\n",
+    "        question_name=\"next_question\",\n",
+    "        question_text=\"\"\"\n",
     "        This is the background information for the interview subject: {{ subject }}\n",
     "        This is your current dialog with the interview subject: {{ dialog_so_far }}\n",
     "        What question you would ask the interview subject next?\n",
-    "        \"\"\"\n",
+    "        \"\"\",\n",
+    "    )\n",
+    "    question_text = (\n",
+    "        meta_q.by(model)\n",
+    "        .by(researcher)\n",
+    "        .by(scenario)\n",
+    "        .run()\n",
+    "        .select(\"next_question\")\n",
+    "        .first()\n",
     "    )\n",
-    "    question_text = meta_q.by(model).by(researcher).by(scenario).run().select(\"next_question\").first()\n",
     "    return question_text\n",
     "\n",
+    "\n",
     "def get_response_to_question(question_text, subject, dialog_so_far):\n",
     "    q_to_subject = QuestionFreeText(\n",
-    "        question_name = 'question',\n",
-    "        question_text = f\"\"\"\n",
+    "        question_name=\"question\",\n",
+    "        question_text=f\"\"\"\n",
     "        This is your current dialog with the interview subject: {dialog_so_far}.\n",
-    "        You are now being asked:\"\"\" + question_text\n",
+    "        You are now being asked:\"\"\"\n",
+    "        + question_text,\n",
     "    )\n",
-    "    response = q_to_subject.by(model).by(subject).run().select('question').first()\n",
+    "    response = q_to_subject.by(model).by(subject).run().select(\"question\").first()\n",
     "    return response\n",
     "\n",
+    "\n",
     "def ask_question(subject, researcher, dialog_so_far):\n",
     "    question_text = get_next_question(subject, researcher, dialog_so_far)\n",
     "    response = get_response_to_question(question_text, subject, dialog_so_far)\n",
     "\n",
-    "    print(' \\nQuestion: \\n\\n' + question_text + '\\n\\nResponse: \\n\\n' + response)\n",
-    "    \n",
-    "    return {'question': question_text, 'response': response}\n",
+    "    print(\" \\nQuestion: \\n\\n\" + question_text + \"\\n\\nResponse: \\n\\n\" + response)\n",
+    "\n",
+    "    return {\"question\": question_text, \"response\": response}\n",
+    "\n",
     "\n",
     "def dialog_to_string(d):\n",
-    "    return '\\n'.join([f\"Question: {d['question']}\\nResponse: {d['response']}\" for d in d])\n",
+    "    return \"\\n\".join(\n",
+    "        [f\"Question: {d['question']}\\nResponse: {d['response']}\" for d in d]\n",
+    "    )\n",
+    "\n",
     "\n",
     "def clean_dict(d):\n",
     "    \"\"\"Convert dictionary to string and remove braces.\"\"\"\n",
-    "    return str(d).replace('{', '').replace('}', '')\n",
+    "    return str(d).replace(\"{\", \"\").replace(\"}\", \"\")\n",
+    "\n",
     "\n",
     "def summarize_interview(subject, interview_topic, dialog_so_far, researcher):\n",
-    "    interview_subject_name = subject['name']\n",
-    "    interview_subject_traits = subject['traits']\n",
+    "    interview_subject_name = subject[\"name\"]\n",
+    "    interview_subject_traits = subject[\"traits\"]\n",
     "    summary_q = QuestionFreeText(\n",
-    "        question_name = 'summary',\n",
-    "        question_text = (\n",
-    "        f\"You have just conducted the following interview of {interview_subject_name} \"\n",
-    "        f\"who has these traits: {clean_dict(interview_subject_traits)} \"\n",
-    "        f\"The topic of the interview was {interview_topic}. \"\n",
-    "        f\"Please draft a summary of the interview: {clean_dict(dialog_so_far)}\")\n",
+    "        question_name=\"summary\",\n",
+    "        question_text=(\n",
+    "            f\"You have just conducted the following interview of {interview_subject_name} \"\n",
+    "            f\"who has these traits: {clean_dict(interview_subject_traits)} \"\n",
+    "            f\"The topic of the interview was {interview_topic}. \"\n",
+    "            f\"Please draft a summary of the interview: {clean_dict(dialog_so_far)}\"\n",
+    "        ),\n",
     "    )\n",
     "    themes_q = QuestionFreeText(\n",
-    "        question_name = 'themes',\n",
-    "        question_text = 'List the major themes of the interview.'\n",
+    "        question_name=\"themes\", question_text=\"List the major themes of the interview.\"\n",
     "    )\n",
     "    survey = Survey([summary_q, themes_q]).set_full_memory_mode()\n",
     "    results = survey.by(model).by(researcher).run()\n",
-    "    summary = results.select('summary').first()\n",
-    "    themes = results.select('themes').first()\n",
-    "    print('\\n\\nSummary:\\n\\n' + summary + '\\n\\nThemes:\\n\\n' + themes)\n",
+    "    summary = results.select(\"summary\").first()\n",
+    "    themes = results.select(\"themes\").first()\n",
+    "    print(\"\\n\\nSummary:\\n\\n\" + summary + \"\\n\\nThemes:\\n\\n\" + themes)\n",
+    "\n",
     "\n",
     "def conduct_interview(subject, researcher, interview_topic):\n",
     "\n",
-    "    print('\\n\\nInterview subject: ' + subject['name'] + '\\n\\nInterview topic: ' + interview_topic)\n",
-    "    \n",
-    "    dialog_so_far = []  \n",
-    "    \n",
+    "    print(\n",
+    "        \"\\n\\nInterview subject: \"\n",
+    "        + subject[\"name\"]\n",
+    "        + \"\\n\\nInterview topic: \"\n",
+    "        + interview_topic\n",
+    "    )\n",
+    "\n",
+    "    dialog_so_far = []\n",
+    "\n",
     "    for i in range(total_questions):\n",
     "        result = ask_question(subject, researcher, dialog_to_string(dialog_so_far))\n",
     "        dialog_so_far.append(result)\n",
-    "    \n",
+    "\n",
     "    summarize_interview(subject, interview_topic, dialog_so_far, researcher)"
    ]
   },