From 149a7d6581428b2c808dccf27f9ef4dae1fd7097 Mon Sep 17 00:00:00 2001 From: John Horton Date: Fri, 10 Jan 2025 07:20:53 -0500 Subject: [PATCH 01/38] Model serialization now includes inference service --- edsl/language_models/LanguageModel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/edsl/language_models/LanguageModel.py b/edsl/language_models/LanguageModel.py index e7be4fa6..d2181d23 100644 --- a/edsl/language_models/LanguageModel.py +++ b/edsl/language_models/LanguageModel.py @@ -500,6 +500,7 @@ def to_dict(self, add_edsl_version: bool = True) -> dict[str, Any]: d = { "model": self.model, "parameters": self.parameters, + "inference_service": self._inference_service_, } if add_edsl_version: from edsl import __version__ From 1f9caa37903afa23c1f94d941dc42ab744f68538 Mon Sep 17 00:00:00 2001 From: John Horton Date: Fri, 10 Jan 2025 07:45:02 -0500 Subject: [PATCH 02/38] tidying up --- edsl/agents/Invigilator.py | 5 +++-- edsl/language_models/LanguageModel.py | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/edsl/agents/Invigilator.py b/edsl/agents/Invigilator.py index 869e65c1..4812600e 100644 --- a/edsl/agents/Invigilator.py +++ b/edsl/agents/Invigilator.py @@ -1,6 +1,6 @@ """Module for creating Invigilators, which are objects to administer a question to an Agent.""" -from typing import Dict, Any, Optional, TYPE_CHECKING +from typing import Dict, Any, Optional, TYPE_CHECKING, Literal from edsl.utilities.decorators import sync_wrapper from edsl.exceptions.questions import QuestionAnswerValidationError @@ -12,6 +12,7 @@ from edsl.scenarios.Scenario import Scenario from edsl.surveys.Survey import Survey +PromptType = Literal["user_prompt", "system_prompt", "encoded_image", "files_list"] NA = "Not Applicable" @@ -19,7 +20,7 @@ class InvigilatorAI(InvigilatorBase): """An invigilator that uses an AI model to answer questions.""" - def get_prompts(self) -> Dict[str, "Prompt"]: + def get_prompts(self) -> Dict[PromptType, "Prompt"]: """Return the prompts used.""" return self.prompt_constructor.get_prompts() diff --git a/edsl/language_models/LanguageModel.py b/edsl/language_models/LanguageModel.py index d2181d23..6351d5b3 100644 --- a/edsl/language_models/LanguageModel.py +++ b/edsl/language_models/LanguageModel.py @@ -512,7 +512,10 @@ def to_dict(self, add_edsl_version: bool = True) -> dict[str, Any]: @classmethod @remove_edsl_version def from_dict(cls, data: dict) -> Type[LanguageModel]: - """Convert dictionary to a LanguageModel child instance.""" + """Convert dictionary to a LanguageModel child instance. + + NB: This method does not use the stores inference_service but rather just fetches a model class based on the name. + """ from edsl.language_models.model import get_model_class model_class = get_model_class(data["model"]) From 3d94146d66868a874bc687c766bcf65530156ee6 Mon Sep 17 00:00:00 2001 From: John Horton Date: Fri, 10 Jan 2025 08:12:30 -0500 Subject: [PATCH 03/38] Update hashes --- edsl/jobs/interviews/Interview.py | 2 +- edsl/language_models/LanguageModel.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/edsl/jobs/interviews/Interview.py b/edsl/jobs/interviews/Interview.py index 9eca980e..77285179 100644 --- a/edsl/jobs/interviews/Interview.py +++ b/edsl/jobs/interviews/Interview.py @@ -153,7 +153,7 @@ def to_dict(self, include_exceptions=True, add_edsl_version=True) -> dict[str, A >>> i = Interview.example() >>> hash(i) - 193593189022259693 + 767745459362662063 """ d = { "agent": self.agent.to_dict(add_edsl_version=add_edsl_version), diff --git a/edsl/language_models/LanguageModel.py b/edsl/language_models/LanguageModel.py index 6351d5b3..4ec54ce1 100644 --- a/edsl/language_models/LanguageModel.py +++ b/edsl/language_models/LanguageModel.py @@ -244,7 +244,7 @@ def __hash__(self) -> str: >>> m = LanguageModel.example() >>> hash(m) - 1811901442659237949 + 325654563661254408 """ from edsl.utilities.utilities import dict_hash @@ -495,7 +495,7 @@ def to_dict(self, add_edsl_version: bool = True) -> dict[str, Any]: >>> m = LanguageModel.example() >>> m.to_dict() - {'model': '...', 'parameters': {'temperature': ..., 'max_tokens': ..., 'top_p': ..., 'frequency_penalty': ..., 'presence_penalty': ..., 'logprobs': False, 'top_logprobs': ...}, 'edsl_version': '...', 'edsl_class_name': 'LanguageModel'} + {'model': '...', 'parameters': {'temperature': ..., 'max_tokens': ..., 'top_p': ..., 'frequency_penalty': ..., 'presence_penalty': ..., 'logprobs': False, 'top_logprobs': ...}, 'inference_service': 'openai', 'edsl_version': '...', 'edsl_class_name': 'LanguageModel'} """ d = { "model": self.model, From b5a92bd3546bc503fc4da558a7bf0762cc4108c9 Mon Sep 17 00:00:00 2001 From: John Horton Date: Mon, 20 Jan 2025 07:33:18 -0500 Subject: [PATCH 04/38] Fix pdf extractor --- edsl/scenarios/PdfExtractor.py | 9 +++------ edsl/scenarios/Scenario.py | 3 ++- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/edsl/scenarios/PdfExtractor.py b/edsl/scenarios/PdfExtractor.py index fb8f50f1..4263674e 100644 --- a/edsl/scenarios/PdfExtractor.py +++ b/edsl/scenarios/PdfExtractor.py @@ -2,14 +2,11 @@ class PdfExtractor: - def __init__(self, pdf_path: str, parent_object: object): + def __init__(self, pdf_path: str): self.pdf_path = pdf_path - self.constructor = parent_object.__class__ + #self.constructor = parent_object.__class__ - def get_object(self) -> object: - return self.constructor(self._get_pdf_dict()) - - def _get_pdf_dict(self) -> dict: + def get_pdf_dict(self) -> dict: # Ensure the file exists import fitz diff --git a/edsl/scenarios/Scenario.py b/edsl/scenarios/Scenario.py index 6b091d27..85f70c78 100644 --- a/edsl/scenarios/Scenario.py +++ b/edsl/scenarios/Scenario.py @@ -358,7 +358,8 @@ def from_image( def from_pdf(cls, pdf_path: str): from edsl.scenarios.PdfExtractor import PdfExtractor - return PdfExtractor(pdf_path, cls).get_object() + extractor = PdfExtractor(pdf_path) + return Scenario(extractor.get_pdf_dict()) @classmethod def from_docx(cls, docx_path: str) -> "Scenario": From 56282c4028b0f0072ce684391a920536862e355a Mon Sep 17 00:00:00 2001 From: robin Date: Mon, 20 Jan 2025 09:51:21 -0500 Subject: [PATCH 05/38] deleting old message --- edsl/exceptions/jobs.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/edsl/exceptions/jobs.py b/edsl/exceptions/jobs.py index 9d57a73a..7b06f2a5 100644 --- a/edsl/exceptions/jobs.py +++ b/edsl/exceptions/jobs.py @@ -10,15 +10,7 @@ class JobsRunError(JobsErrors): class MissingRemoteInferenceError(JobsErrors): - def __init__(self): - message = dedent( - """\\ - You are trying to run the job remotely, but you have not set the EXPECTED_PARROT_INFERENCE_URL environment variable. - This remote running service is not quite ready yet! - But please see https://docs.expectedparrot.com/en/latest/coop.html for what we are working on. - """ - ) - super().__init__(message) + pass class InterviewError(Exception): From 8eb1dd595c95adc0bcd651cb7f6986784135010d Mon Sep 17 00:00:00 2001 From: robin Date: Mon, 20 Jan 2025 09:56:14 -0500 Subject: [PATCH 06/38] updating missing model message --- edsl/exceptions/language_models.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/edsl/exceptions/language_models.py b/edsl/exceptions/language_models.py index f481d8f1..978112a3 100644 --- a/edsl/exceptions/language_models.py +++ b/edsl/exceptions/language_models.py @@ -34,11 +34,15 @@ def __init__(self, model_name): msg = dedent( f"""\ Model {model_name} not found. - To create an instance, you can do: - >>> m = Model('gpt-4-1106-preview', temperature=0.5, ...) + To create an instance of this model, pass the model name to a `Model` object. + You can optionally pass additional parameters to the model, e.g.: + >>> m = Model('gpt-4-1106-preview', temperature=0.5) - To get the default model, you can leave out the model name. - To see the available models, you can do: + To use the default model, simply run your job without specifying a model. + To check the default model, run the following code: + >>> Model() + + To see information about all available models, run the following code: >>> Model.available() See https://docs.expectedparrot.com/en/latest/language_models.html#available-models for more details. From 105e6ab9cf4b968bb791e5d9ab463a9a2a0f93e1 Mon Sep 17 00:00:00 2001 From: robin Date: Mon, 20 Jan 2025 09:59:00 -0500 Subject: [PATCH 07/38] updating messages --- edsl/exceptions/questions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/edsl/exceptions/questions.py b/edsl/exceptions/questions.py index a4a5a7ca..bc2861c4 100644 --- a/edsl/exceptions/questions.py +++ b/edsl/exceptions/questions.py @@ -16,7 +16,8 @@ def __init__(self, message="An error occurred with the question"): class QuestionAnswerValidationError(QuestionErrors): documentation = "https://docs.expectedparrot.com/en/latest/exceptions.html" - explanation = """This when the answer coming from the Language Model does not conform to the expectation for that question type. + explanation = """ + This can occur when the answer coming from the Language Model does not conform to the expectations for the question type. For example, if the question is a multiple choice question, the answer should be drawn from the list of options provided. """ From 5a0d400c2f367d580ef1fe45a89c8a67fde11f12 Mon Sep 17 00:00:00 2001 From: robin Date: Mon, 20 Jan 2025 14:14:38 -0500 Subject: [PATCH 08/38] Updating job status messages --- edsl/jobs/results_exceptions_handler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/edsl/jobs/results_exceptions_handler.py b/edsl/jobs/results_exceptions_handler.py index a907fef1..9a2c20e0 100644 --- a/edsl/jobs/results_exceptions_handler.py +++ b/edsl/jobs/results_exceptions_handler.py @@ -66,9 +66,9 @@ def _get_remote_logging_setting(self) -> bool: def _generate_error_message(self, indices) -> str: """Generate appropriate error message based on number of exceptions.""" - msg = f"Exceptions were raised in {len(indices)} interviews.\n" - if len(indices) > 5: - msg += f"Exceptions were raised in the following interviews: {indices}.\n" + msg = f"Exceptions were raised." # in {len(indices)} interviews.\n" + # if len(indices) > 5: + # msg += f"Exceptions were raised in the following interviews: {indices}.\n" return msg def handle_exceptions(self) -> None: @@ -84,7 +84,7 @@ def handle_exceptions(self) -> None: # Generate HTML report filepath = self.results.task_history.html( - cta="Open report to see details.", + cta="Click to open the Error Report for details on exceptions.", open_in_browser=self.open_in_browser, return_link=True, ) From 56945db68353299cb413589be226b7f09d71126e Mon Sep 17 00:00:00 2001 From: robin Date: Mon, 20 Jan 2025 15:07:35 -0500 Subject: [PATCH 09/38] updating messages --- edsl/jobs/JobsRemoteInferenceLogger.py | 2 +- edsl/jobs/results_exceptions_handler.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/edsl/jobs/JobsRemoteInferenceLogger.py b/edsl/jobs/JobsRemoteInferenceLogger.py index 1e6fd722..bf33861a 100644 --- a/edsl/jobs/JobsRemoteInferenceLogger.py +++ b/edsl/jobs/JobsRemoteInferenceLogger.py @@ -32,7 +32,7 @@ class JobsInfo: pretty_names = { "job_uuid": "Job UUID", "progress_bar_url": "Progress Bar URL", - "error_report_url": "Error Report URL", + "error_report_url": "Exceptions Report URL", "results_uuid": "Results UUID", "results_url": "Results URL", } diff --git a/edsl/jobs/results_exceptions_handler.py b/edsl/jobs/results_exceptions_handler.py index 9a2c20e0..2962cffb 100644 --- a/edsl/jobs/results_exceptions_handler.py +++ b/edsl/jobs/results_exceptions_handler.py @@ -66,7 +66,7 @@ def _get_remote_logging_setting(self) -> bool: def _generate_error_message(self, indices) -> str: """Generate appropriate error message based on number of exceptions.""" - msg = f"Exceptions were raised." # in {len(indices)} interviews.\n" + msg = f"Exceptions were raised. See the Exceptions Report ^ for details.\n" # in {len(indices)} interviews.\n" # if len(indices) > 5: # msg += f"Exceptions were raised in the following interviews: {indices}.\n" return msg @@ -84,7 +84,7 @@ def handle_exceptions(self) -> None: # Generate HTML report filepath = self.results.task_history.html( - cta="Click to open the Error Report for details on exceptions.", + cta="", #Click to open the Exceptions Report for details.", open_in_browser=self.open_in_browser, return_link=True, ) From 8e3088c741438e6a01e9d11a65be6b13d898ed63 Mon Sep 17 00:00:00 2001 From: zer0dss Date: Tue, 21 Jan 2025 16:08:16 +0200 Subject: [PATCH 10/38] work on adding deepseek service --- edsl/enums.py | 2 ++ edsl/inference_services/DeepSeekService.py | 18 ++++++++++++++++++ edsl/inference_services/registry.py | 2 ++ 3 files changed, 22 insertions(+) create mode 100644 edsl/inference_services/DeepSeekService.py diff --git a/edsl/enums.py b/edsl/enums.py index 66727c0d..70f8f868 100644 --- a/edsl/enums.py +++ b/edsl/enums.py @@ -66,6 +66,7 @@ class InferenceServiceType(EnumWithChecks): MISTRAL = "mistral" TOGETHER = "together" PERPLEXITY = "perplexity" + DEEPSEEK = "deepseek" # unavoidable violation of the DRY principle but it is necessary @@ -84,6 +85,7 @@ class InferenceServiceType(EnumWithChecks): "mistral", "together", "perplexity", + "deepseek", ] available_models_urls = { diff --git a/edsl/inference_services/DeepSeekService.py b/edsl/inference_services/DeepSeekService.py new file mode 100644 index 00000000..a6589b1e --- /dev/null +++ b/edsl/inference_services/DeepSeekService.py @@ -0,0 +1,18 @@ +import aiohttp +import json +import requests +from typing import Any, List + +# from edsl.inference_services.InferenceServiceABC import InferenceServiceABC +from edsl.language_models import LanguageModel + +from edsl.inference_services.OpenAIService import OpenAIService + + +class DeepSeekService(OpenAIService): + """DeepInfra service class.""" + + _inference_service_ = "deepseek" + _env_key_name_ = "DEEPSEEK_API_KEY" + _base_url_ = "https://api.deepseek.com" + _models_list_cache: List[str] = [] diff --git a/edsl/inference_services/registry.py b/edsl/inference_services/registry.py index c82f0419..7c4ebc75 100644 --- a/edsl/inference_services/registry.py +++ b/edsl/inference_services/registry.py @@ -13,6 +13,7 @@ from edsl.inference_services.TestService import TestService from edsl.inference_services.TogetherAIService import TogetherAIService from edsl.inference_services.PerplexityService import PerplexityService +from edsl.inference_services.DeepSeekService import DeepSeekService try: from edsl.inference_services.MistralAIService import MistralAIService @@ -33,6 +34,7 @@ TestService, TogetherAIService, PerplexityService, + DeepSeekService, ] if mistral_available: From 6840ccf3758b22604240cbc195c7e3fb244d9bc0 Mon Sep 17 00:00:00 2001 From: zer0dss Date: Tue, 21 Jan 2025 17:09:01 +0200 Subject: [PATCH 11/38] add deepseek inference service --- edsl/enums.py | 1 + 1 file changed, 1 insertion(+) diff --git a/edsl/enums.py b/edsl/enums.py index 70f8f868..9e7b92c2 100644 --- a/edsl/enums.py +++ b/edsl/enums.py @@ -109,6 +109,7 @@ class InferenceServiceType(EnumWithChecks): InferenceServiceType.MISTRAL.value: "MISTRAL_API_KEY", InferenceServiceType.TOGETHER.value: "TOGETHER_API_KEY", InferenceServiceType.PERPLEXITY.value: "PERPLEXITY_API_KEY", + InferenceServiceType.DEEPSEEK.value: "DEEPSEEK_API_KEY", } From 7a5a65d304490cfad5c2c24e45f134e150fd8d51 Mon Sep 17 00:00:00 2001 From: robin Date: Tue, 21 Jan 2025 10:17:08 -0500 Subject: [PATCH 12/38] updating exceptions report formatting --- edsl/exceptions/questions.py | 16 +-- edsl/jobs/results_exceptions_handler.py | 4 +- edsl/jobs/tasks/TaskHistory.py | 61 +++++++-- edsl/templates/error_reporting/base.html | 6 +- .../error_reporting/interview_details.html | 120 ++++++++++-------- .../templates/error_reporting/interviews.html | 21 +-- edsl/templates/error_reporting/overview.html | 36 +++++- .../error_reporting/performance_plot.html | 2 +- 8 files changed, 160 insertions(+), 106 deletions(-) diff --git a/edsl/exceptions/questions.py b/edsl/exceptions/questions.py index bc2861c4..14c7f5db 100644 --- a/edsl/exceptions/questions.py +++ b/edsl/exceptions/questions.py @@ -53,28 +53,24 @@ def __str__(self): def to_html_dict(self): return { - "error_type": ("Name of the exception", "p", "/p", self.__class__.__name__), - "explaination": ("Explanation", "p", "/p", self.explanation), - "edsl answer": ( - "What model returned", + "Exception type": ("p", "/p", self.__class__.__name__), + "Explanation": ("p", "/p", self.explanation), + "EDSL response": ( "pre", "/pre", json.dumps(self.data, indent=2), ), - "validating_model": ( - "Pydantic model for answers", + "Validating model": ( "pre", "/pre", json.dumps(self.model.model_json_schema(), indent=2), ), - "error_message": ( - "Error message Pydantic returned", + "Error message": ( "p", "/p", self.message, ), - "documentation_url": ( - "URL to EDSL docs", + "Documentation": ( f"a href='{self.documentation}'", "/a", self.documentation, diff --git a/edsl/jobs/results_exceptions_handler.py b/edsl/jobs/results_exceptions_handler.py index 2962cffb..6f4b6a12 100644 --- a/edsl/jobs/results_exceptions_handler.py +++ b/edsl/jobs/results_exceptions_handler.py @@ -66,7 +66,7 @@ def _get_remote_logging_setting(self) -> bool: def _generate_error_message(self, indices) -> str: """Generate appropriate error message based on number of exceptions.""" - msg = f"Exceptions were raised. See the Exceptions Report ^ for details.\n" # in {len(indices)} interviews.\n" + msg = f"Exceptions were raised. Please see the Exceptions Report ^ for details.\n" # in {len(indices)} interviews.\n" # if len(indices) > 5: # msg += f"Exceptions were raised in the following interviews: {indices}.\n" return msg @@ -84,7 +84,7 @@ def handle_exceptions(self) -> None: # Generate HTML report filepath = self.results.task_history.html( - cta="", #Click to open the Exceptions Report for details.", + cta="Please see the Exceptions Report for details.", open_in_browser=self.open_in_browser, return_link=True, ) diff --git a/edsl/jobs/tasks/TaskHistory.py b/edsl/jobs/tasks/TaskHistory.py index 760d64ea..2c3e2a1a 100644 --- a/edsl/jobs/tasks/TaskHistory.py +++ b/edsl/jobs/tasks/TaskHistory.py @@ -264,9 +264,27 @@ def javascript(self): js = env.joinpath("report.js").read_text() return js + @property + def exceptions_table(self) -> dict: + """Return a dictionary of exceptions organized by type, service, model, and question name.""" + exceptions_table = {} + for interview in self.total_interviews: + for question_name, exceptions in interview.exceptions.items(): + for exception in exceptions: + key = ( + exception.exception.__class__.__name__, # Exception type + interview.model._inference_service_, # Service + interview.model.model, # Model + question_name # Question name + ) + if key not in exceptions_table: + exceptions_table[key] = 0 + exceptions_table[key] += 1 + return exceptions_table + @property def exceptions_by_type(self) -> dict: - """Return a dictionary of exceptions by type.""" + """Return a dictionary of exceptions tallied by type.""" exceptions_by_type = {} for interview in self.total_interviews: for question_name, exceptions in interview.exceptions.items(): @@ -324,6 +342,27 @@ def exceptions_by_question_name(self) -> dict: } return sorted_exceptions_by_question_name + # @property + # def exceptions_by_model(self) -> dict: + # """Return a dictionary of exceptions tallied by model and question name.""" + # exceptions_by_model = {} + # for interview in self.total_interviews: + # model = interview.model.model + # service = interview.model._inference_service_ + # if (service, model) not in exceptions_by_model: + # exceptions_by_model[(service, model)] = 0 + # if interview.exceptions != {}: + # exceptions_by_model[(service, model)] += len(interview.exceptions) + + # # sort the exceptions by model + # sorted_exceptions_by_model = { + # k: v + # for k, v in sorted( + # exceptions_by_model.items(), key=lambda item: item[1], reverse=True + # ) + # } + # return sorted_exceptions_by_model + @property def exceptions_by_model(self) -> dict: """Return a dictionary of exceptions tallied by model and question name.""" @@ -331,19 +370,12 @@ def exceptions_by_model(self) -> dict: for interview in self.total_interviews: model = interview.model.model service = interview.model._inference_service_ - if (service, model) not in exceptions_by_model: - exceptions_by_model[(service, model)] = 0 - if interview.exceptions != {}: - exceptions_by_model[(service, model)] += len(interview.exceptions) - - # sort the exceptions by model - sorted_exceptions_by_model = { - k: v - for k, v in sorted( - exceptions_by_model.items(), key=lambda item: item[1], reverse=True - ) - } - return sorted_exceptions_by_model + for question_name, exceptions in interview.exceptions.items(): + key = (service, model, question_name) + if key not in exceptions_by_model: + exceptions_by_model[key] = 0 + exceptions_by_model[key] += len(exceptions) + return exceptions_by_model def generate_html_report(self, css: Optional[str], include_plot=False): if include_plot: @@ -372,6 +404,7 @@ def generate_html_report(self, css: Optional[str], include_plot=False): javascript=self.javascript(), num_exceptions=len(self.exceptions), performance_plot_html=performance_plot_html, + exceptions_table=self.exceptions_table, exceptions_by_type=self.exceptions_by_type, exceptions_by_question_name=self.exceptions_by_question_name, exceptions_by_model=self.exceptions_by_model, diff --git a/edsl/templates/error_reporting/base.html b/edsl/templates/error_reporting/base.html index 636608b8..2ab22542 100644 --- a/edsl/templates/error_reporting/base.html +++ b/edsl/templates/error_reporting/base.html @@ -3,7 +3,7 @@ - Exception Details + Exceptions Report @@ -15,9 +15,7 @@ {% include 'overview.html' %} - {% include 'exceptions_by_type.html' %} - {% include 'exceptions_by_model.html' %} - {% include 'exceptions_by_question_name.html' %} + {% include 'exceptions_table.html' %} {% include 'interviews.html' %} {% include 'performance_plot.html' %} diff --git a/edsl/templates/error_reporting/interview_details.html b/edsl/templates/error_reporting/interview_details.html index 835d1e7c..d307f73f 100644 --- a/edsl/templates/error_reporting/interview_details.html +++ b/edsl/templates/error_reporting/interview_details.html @@ -1,43 +1,67 @@ -
question_name: {{ question }}
- + -

Exception details

+
question_name: {{ question }}
{% for exception_message in exceptions %}
-
+
Exception: {{ exception_message.name }} - -
-
+ +
+
- - - - - + - - + - + + + + @@ -47,24 +71,20 @@

Exception details

- - + + - - - - - - + + - - + + @@ -77,7 +97,7 @@

Exception details

- +
KeyValue
Interview ID (index in results) {{ index }}
Question name (question_name)Question name {{ question }}
Question type (question_type)Question type {{ exception_message.question_type }}
Human-readable question {{ interview.survey._get_question_by_name(question).html( scenario = interview.scenario, agent = interview.agent, - answers = exception_message.answers) - + answers = exception_message.answers + ) }}
User Prompt
{{ exception_message.rendered_prompts['user_prompt'] }}
Scenario {{ interview.scenario.__repr__() }} {{ interview.agent.__repr__() }}
Model name{{ interview.model.model }}System Prompt
{{ exception_message.rendered_prompts['system_prompt'] }}
Inference service {{ interview.model._inference_service_ }}
Model parameters{{ interview.model.__repr__() }}
User Prompt
{{ exception_message.rendered_prompts['user_prompt'] }}
Model name{{ interview.model.model }}
System Prompt
{{ exception_message.rendered_prompts['system_prompt'] }}
Model parameters{{ interview.model.__repr__() }}
Raw model response
Code to (likely) reproduce the errorCode likely to reproduce the error @@ -85,32 +105,26 @@

Exception details

- - - {% if exception_message.exception.__class__.__name__ == 'QuestionAnswerValidationError' %} -

Answer validation details

- - - - - - {% for field, (explanation, open_tag, close_tag, value) in exception_message.exception.to_html_dict().items() %} - - - - - - {% endfor %} -
FieldValue
{{ field }}: ({{ explanation }})<{{open_tag}}> {{ value | escape }} <{{close_tag}}>
- {% endif %} - -
Time: {{ exception_message.time }}
-
Traceback: - -
{{ exception_message.traceback }}
-
-
+ + {% if exception_message.exception.__class__.__name__ == 'QuestionAnswerValidationError' %} +

Answer validation details

+ + {% for field, (open_tag, close_tag, value) in exception_message.exception.to_html_dict().items() %} + + + + + {% endfor %} +
{{ field }}<{{ open_tag }}> {{ value | escape }} <{{ close_tag }}>
+ {% endif %} +

+
Time: {{ exception_message.time }}
+
Traceback: + +
{{ exception_message.traceback }}
+
+
{% endfor %} \ No newline at end of file diff --git a/edsl/templates/error_reporting/interviews.html b/edsl/templates/error_reporting/interviews.html index 7483dfdf..0e9ba36a 100644 --- a/edsl/templates/error_reporting/interviews.html +++ b/edsl/templates/error_reporting/interviews.html @@ -1,19 +1,6 @@ - -{% if interviews|length > max_interviews %} -

Only showing the first {{ max_interviews }} interviews with errors

-{% else %} -

Showing all interviews

-{% endif %} - +

Exceptions Details

{% for index, interview in interviews.items() %} - {% if index < max_interviews %} - {% if interview.exceptions != {} %} -
Interview: {{ index }}
- Model: {{ interview.model.model }} -

Failing questions

- {% endif %} - {% for question, exceptions in interview.exceptions.items() %} - {% include 'interview_details.html' %} - {% endfor %} - {% endif %} + {% for question, exceptions in interview.exceptions.items() %} + {% include 'interview_details.html' %} + {% endfor %} {% endfor %} diff --git a/edsl/templates/error_reporting/overview.html b/edsl/templates/error_reporting/overview.html index e7659b23..925f002b 100644 --- a/edsl/templates/error_reporting/overview.html +++ b/edsl/templates/error_reporting/overview.html @@ -1,5 +1,31 @@ -

Overview

-

There were {{ interviews|length }} total interview(s). An 'interview' is the result of one survey, taken by one agent, with one model, with one scenario.

-The number of interviews with any exceptions was {{ num_exceptions }}.

-

For advice on dealing with exceptions on Expected Parrot, -see here.

\ No newline at end of file + + +

Exceptions Report

+

+ This report summarizes exceptions encountered in the job that was run. +

+

+ For advice on dealing with exceptions, please see the EDSL documentation page.
+ You can also post a question at the Expected Parrot Discord channel, open an issue on GitHub, or send an email to info@expectedparrot.com. +

+ +

Overview

+ + + + + + + + + + + +
Total interviews{{ interviews|length }}
Interviews with exceptions{{ num_exceptions }}
+

+ An "interview" is the result of one survey, taken by one agent, with one model and one scenario (if any). +

diff --git a/edsl/templates/error_reporting/performance_plot.html b/edsl/templates/error_reporting/performance_plot.html index 1f6af77c..905b8a6a 100644 --- a/edsl/templates/error_reporting/performance_plot.html +++ b/edsl/templates/error_reporting/performance_plot.html @@ -1,2 +1,2 @@ -

Performance Plot

+

Performance Plot

{{ performance_plot_html }} \ No newline at end of file From 65b5f7d78ec1a587a11de654fb3737826abf4fac Mon Sep 17 00:00:00 2001 From: robin Date: Tue, 21 Jan 2025 10:31:49 -0500 Subject: [PATCH 13/38] updating exceptions report layout --- edsl/jobs/results_exceptions_handler.py | 9 ++------- edsl/jobs/tasks/TaskHistory.py | 5 ++--- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/edsl/jobs/results_exceptions_handler.py b/edsl/jobs/results_exceptions_handler.py index 6f4b6a12..e7a85d35 100644 --- a/edsl/jobs/results_exceptions_handler.py +++ b/edsl/jobs/results_exceptions_handler.py @@ -66,9 +66,7 @@ def _get_remote_logging_setting(self) -> bool: def _generate_error_message(self, indices) -> str: """Generate appropriate error message based on number of exceptions.""" - msg = f"Exceptions were raised. Please see the Exceptions Report ^ for details.\n" # in {len(indices)} interviews.\n" - # if len(indices) > 5: - # msg += f"Exceptions were raised in the following interviews: {indices}.\n" + msg = f"Exceptions were raised.\n" return msg def handle_exceptions(self) -> None: @@ -84,7 +82,6 @@ def handle_exceptions(self) -> None: # Generate HTML report filepath = self.results.task_history.html( - cta="Please see the Exceptions Report for details.", open_in_browser=self.open_in_browser, return_link=True, ) @@ -92,7 +89,5 @@ def handle_exceptions(self) -> None: # Handle remote logging if enabled if self.remote_logging: filestore = HTMLFileStore(filepath) - coop_details = filestore.push(description="Error report") + coop_details = filestore.push(description="Exceptions Report") print(coop_details) - - print("Also see: https://docs.expectedparrot.com/en/latest/exceptions.html") diff --git a/edsl/jobs/tasks/TaskHistory.py b/edsl/jobs/tasks/TaskHistory.py index 2c3e2a1a..56bb7a3e 100644 --- a/edsl/jobs/tasks/TaskHistory.py +++ b/edsl/jobs/tasks/TaskHistory.py @@ -419,11 +419,10 @@ def html( filename: Optional[str] = None, return_link=False, css=None, - cta="Open Report in New Tab", + cta="\nClick to open the report in a new tab\n", open_in_browser=False, ): """Return an HTML report.""" - from IPython.display import display, HTML import tempfile import os @@ -452,7 +451,7 @@ def html( html_link = f'{cta}' display(HTML(html_link)) escaped_output = html.escape(output) - iframe = f"""" + iframe = f""" """ display(HTML(iframe)) From 1c8a212d36b245326ccc6b1960165d0d32a56856 Mon Sep 17 00:00:00 2001 From: robin Date: Tue, 21 Jan 2025 10:40:17 -0500 Subject: [PATCH 14/38] adding combined exceptions summary table --- .../error_reporting/exceptions_table.html | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 edsl/templates/error_reporting/exceptions_table.html diff --git a/edsl/templates/error_reporting/exceptions_table.html b/edsl/templates/error_reporting/exceptions_table.html new file mode 100644 index 00000000..999e869a --- /dev/null +++ b/edsl/templates/error_reporting/exceptions_table.html @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + {% for (exception_type, service, model, question_name), count in exceptions_table.items() %} + + + + + + + + {% endfor %} + +
Exception TypeServiceModelQuestion NameTotal
{{ exception_type }}{{ service }}{{ model }}{{ question_name }}{{ count }}
+

+ Click to expand the details below for information about each exception, including code for reproducing it. +

\ No newline at end of file From c1d69d636bb8d27d1faefd5b12f446dae63180f7 Mon Sep 17 00:00:00 2001 From: robin Date: Tue, 21 Jan 2025 10:49:24 -0500 Subject: [PATCH 15/38] updating exceptions report layout --- edsl/templates/error_reporting/exceptions_table.html | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/edsl/templates/error_reporting/exceptions_table.html b/edsl/templates/error_reporting/exceptions_table.html index 999e869a..2c3cc954 100644 --- a/edsl/templates/error_reporting/exceptions_table.html +++ b/edsl/templates/error_reporting/exceptions_table.html @@ -26,6 +26,10 @@ {% endfor %} +

+ Note: You may encounter repeated exceptions where retries were attempted. + You can modify the maximum number of attempts for failed API calls in `edsl/config.py`. +

Click to expand the details below for information about each exception, including code for reproducing it.

\ No newline at end of file From 6882146232a4cd65e6361ae3339b1a605d32f253 Mon Sep 17 00:00:00 2001 From: robin Date: Tue, 21 Jan 2025 12:48:46 -0500 Subject: [PATCH 16/38] updating doctest --- edsl/language_models/LanguageModel.py | 1 - 1 file changed, 1 deletion(-) diff --git a/edsl/language_models/LanguageModel.py b/edsl/language_models/LanguageModel.py index e7be4fa6..981eacba 100644 --- a/edsl/language_models/LanguageModel.py +++ b/edsl/language_models/LanguageModel.py @@ -558,7 +558,6 @@ def example( >>> m = LanguageModel.example(test_model = True, canned_response = "WOWZA!", throw_exception = True) >>> r = q.by(m).run(cache = False, disable_remote_cache = True, disable_remote_inference = True, print_exceptions = True) Exception report saved to ... - Also see: ... """ from edsl.language_models.model import Model From cd6464752b9aa4a600927bc38ad02092c7080602 Mon Sep 17 00:00:00 2001 From: Rae Date: Tue, 21 Jan 2025 15:02:26 -0500 Subject: [PATCH 17/38] Fix invigilator deserialization The deserialization of the invigilator did not return an object --- edsl/agents/InvigilatorBase.py | 1 + 1 file changed, 1 insertion(+) diff --git a/edsl/agents/InvigilatorBase.py b/edsl/agents/InvigilatorBase.py index 10b3a52c..b4739688 100644 --- a/edsl/agents/InvigilatorBase.py +++ b/edsl/agents/InvigilatorBase.py @@ -135,6 +135,7 @@ def from_dict(cls, data) -> "InvigilatorBase": d["additional_prompt_data"] = data["additional_prompt_data"] d = cls(**d) + return d def __repr__(self) -> str: """Return a string representation of the Invigilator. From 7eab5f92c4981e2e0596d3ed0a4cc72e798447a7 Mon Sep 17 00:00:00 2001 From: Rae Date: Tue, 21 Jan 2025 15:07:01 -0500 Subject: [PATCH 18/38] Fix repr typos --- edsl/agents/InvigilatorBase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edsl/agents/InvigilatorBase.py b/edsl/agents/InvigilatorBase.py index b4739688..61403be9 100644 --- a/edsl/agents/InvigilatorBase.py +++ b/edsl/agents/InvigilatorBase.py @@ -144,7 +144,7 @@ def __repr__(self) -> str: 'InvigilatorExample(...)' """ - return f"{self.__class__.__name__}(agent={repr(self.agent)}, question={repr(self.question)}, scneario={repr(self.scenario)}, model={repr(self.model)}, memory_plan={repr(self.memory_plan)}, current_answers={repr(self.current_answers)}, iteration{repr(self.iteration)}, additional_prompt_data={repr(self.additional_prompt_data)}, cache={repr(self.cache)})" + return f"{self.__class__.__name__}(agent={repr(self.agent)}, question={repr(self.question)}, scenario={repr(self.scenario)}, model={repr(self.model)}, memory_plan={repr(self.memory_plan)}, current_answers={repr(self.current_answers)}, iteration={repr(self.iteration)}, additional_prompt_data={repr(self.additional_prompt_data)}, cache={repr(self.cache)})" def get_failed_task_result(self, failure_reason: str) -> EDSLResultObjectInput: """Return an AgentResponseDict used in case the question-asking fails. From f68394c43863004a886e616b54d3326b523c689d Mon Sep 17 00:00:00 2001 From: robin Date: Tue, 21 Jan 2025 15:41:05 -0500 Subject: [PATCH 19/38] updating docs page on cache --- docs/data.rst | 199 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 178 insertions(+), 21 deletions(-) diff --git a/docs/data.rst b/docs/data.rst index 61e9adf4..d9bd80a9 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -12,42 +12,196 @@ The `Cache` class is used to store responses from a language model so that they Why caching? -^^^^^^^^^^^^ +------------ + Language model outputs are expensive to create, both in terms of time and money. -As such, it is useful to store the outputs of a language model in a cache so that they can be re-used later. +As such, it is useful to store them in a cache so that they can be shared or reused later. Use cases: -* Avoid re-running the same queries if a job fails only partially, only sending the new queries to the language model. -* Share your cache with others so they can re-run your queries at no cost. -* Use a common remote cache to avoid re-running queries that others have already run. +* Avoid rerunning questions when a job fails only partially, by only resending unanswered questions to a language model. +* Share your cache with others so they can rerun your questions at no cost. +* Use a common remote cache to avoid rerunning questions that others have already run. * Build up training data to train or fine-tune a smaller model. -* Build up a public repository of queries and responses so others can learn from them. +* Build up a public repository of questions and responses so others can learn from them. How it works -^^^^^^^^^^^^ +------------ + A `Cache` is a dictionary-like object that stores the inputs and outputs of a language model. Specifically, a cache has an attribute, `data`, that is dictionary-like. The keys of a cache are hashes of the unique inputs to a language model. -The values are `CacheEntry` objects, which store the inputs and outputs of a language model. +The values are `CacheEntry` objects, which contains the inputs and outputs. A cache can be stored as either a Python in-memory dictionary or a dictionary connected to a SQLite3 database. The default constructor is an in-memory dictionary. If a SQLite3 database is used, a cache will persist automatically between sessions. You can also specify that a cache be used for a specific session, in which case it will not persist between sessions. -After a session, the cache will have new entries. +After a session, the cache will have new entries from any new jobs that have been run during the session. These can be written to a local SQLite3 database, a JSONL file, or a remote server. +Generating a cache +------------------ + +A cache is automatically created whenever results are generated for a question or survey. +This cache is specific to the results and is attached to the results object. +It can be accessed using the `cache` attribute of the results object. + +For example: + +.. code-block:: python + + from edsl import QuestionNumerical, Model + + m = Model("gemini-1.5-flash") + + q = QuestionNumerical( + question_name = "random", + question_text = "Please give me a random number between 1 and 100." + ) + + results = q.by(m).run() + + results.cache + + +Example output: + +.. list-table:: + :header-rows: 1 + + * - model + - parameters + - system_prompt + - user_prompt + - output + - iteration + - timestamp + - cache_key + * - gemini-1.5-flash + - {'temperature': 0.5, 'topP': 1, 'topK': 1, 'maxOutputTokens': 2048, 'stopSequences': []} + - nan + - Please give me a random number between 1 and 100. This question requires a numerical response in the form of an integer or decimal (e.g., -12, 0, 1, 2, 3.45, ...). Respond with just your number on a single line. If your response is equivalent to zero, report '0' After the answer, put a comment explaining your choice on the next line. + - {"candidates": [{"content": {"parts": [{"text": "87\n# This is a randomly generated number between 1 and 100.\n"}], "role": "model"}, "finish_reason": 1, "safety_ratings": [{"category": 8, "probability": 1, "blocked": false}, {"category": 10, "probability": 1, "blocked": false}, {"category": 7, "probability": 1, "blocked": false}, {"category": 9, "probability": 1, "blocked": false}], "avg_logprobs": -0.03539780080318451, "token_count": 0, "grounding_attributions": []}], "usage_metadata": {"prompt_token_count": 97, "candidates_token_count": 20, "total_token_count": 117, "cached_content_token_count": 0}} + - 0 + - 1737491116 + - 7f057154c60a1b9ae343b0634fe7a370 + + +We can also see that the results object include columns of information about the cache: + +.. code-block:: python + + results.columns + +Output: + +.. code-block:: python + +.. list-table:: + :header-rows: 1 + + * - 0 + - agent.agent_index + * - 1 + - agent.agent_instruction + * - 2 + - agent.agent_name + * - 3 + - answer.random + * - 4 + - cache_keys.random_cache_key + * - 5 + - cache_used.random_cache_used + * - 6 + - comment.random_comment + * - 7 + - generated_tokens.random_generated_tokens + * - 8 + - iteration.iteration + * - 9 + - model.maxOutputTokens + * - 10 + - model.model + * - 11 + - model.model_index + * - 12 + - model.stopSequences + * - 13 + - model.temperature + * - 14 + - model.topK + * - 15 + - model.topP + * - 16 + - prompt.random_system_prompt + * - 17 + - prompt.random_user_prompt + * - 18 + - question_options.random_question_options + * - 19 + - question_text.random_question_text + * - 20 + - question_type.random_question_type + * - 21 + - raw_model_response.random_cost + * - 22 + - raw_model_response.random_one_usd_buys + * - 23 + - raw_model_response.random_raw_model_response + * - 24 + - scenario.scenario_index + + +The `cache_keys` column contains the cache key for each question. +It is a unique identifier for the cache entry, and can be used to retrieve the cache entry later. + +For example, here we retrieve the cache key and use it when running a survey that includes the relevant question: + +.. code-block:: python + + my_cache_key = results.select("cache_keys.random_cache_key").first() + + from edsl import QuestionFreeText, QuestionNumerical, Survey, Model + + m = Model("gemini-1.5-flash") + + q1 = QuestionNumerical( + question_name = "random", + question_text = "Please give me a random number between 1 and 100." + ) + + q2 = QuestionFreeText( + question_name = "explain", + question_text = "How does an AI choose a random number?" + ) + + survey = Survey(questions = [q1,q2]) + + new_results = survey.by(m).run(cache_key = my_cache_key) + + +We could also pass the cache itself: + +.. code-block:: python + + my_cache = results.cache + + new_results = survey.by(m).run(cache = my_cache) + + Instantiating a new cache -^^^^^^^^^^^^^^^^^^^^^^^^^ +------------------------- + This code will instantiate a new cache object but using a dictionary as the data attribute. In-memory usage ^^^^^^^^^^^^^^^ + .. code-block:: python from edsl import Cache @@ -69,6 +223,7 @@ More on this below. Local persistence for an in-memory cache ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + .. code-block:: python c = Cache() @@ -89,8 +244,9 @@ You can then load the cache from the SQLite3 database or JSONL file using Cache SQLite3Dict for transactions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Instead of using a dictionary as the data attribute, you can use a special dictionary-like object based on -SQLite3. This will persist the cache between sessions. + +Instead of using a dictionary as the data attribute, you can use a special dictionary-like object based on SQLite3. +This will persist the cache between sessions. This is the "normal" way that a cache is used for runs where no specic cache is passed. .. code-block:: python @@ -107,17 +263,18 @@ It will persist between sessions and can be loaded using the `from_sqlite_db` me Default SQLite Cache: .edsl_cache/data.db ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + By default, the cache will be stored in a SQLite3 database at the path `.edsl_cache/data.db`. You can interact with this cache directly, e.g., .. code-block:: bash - sqlite3 .edsl_cache/data.db - + sqlite3 .edsl_cache/data.db Setting a session cache ^^^^^^^^^^^^^^^^^^^^^^^ + The `set_session_cache` function is used to set the cache for a session: .. code-block:: python @@ -150,32 +307,33 @@ The `unset_session_cache` function is used to unset the cache for a session: This will unset the cache for the current session, and you will need to pass the cache object to the `run` method during the session. - Avoiding cache persistence ^^^^^^^^^^^^^^^^^^^^^^^^^^ + We can avoid cache persistence by passing `cache=False` to the `run` method: .. code-block:: python from edsl import QuestionFreeText - q = QuestionFreeText.example() - results = q.run(cache = False) + q = QuestionFreeText.example() + results = q.run(cache = False) For developers -^^^^^^^^^^^^^^ +-------------- Delayed cache-writing: Useful for remote caching ------------------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + Separate from this remote cache syncing, delays can be made in writing to the cache itself. By default, the cache will write to the cache immediately after storing a new entry. However, this can be changed by setting the `immediate_write` parameter to False. .. code-block:: python - c = Cache(immediate_write = False) + c = Cache(immediate_write = False) This is useful when you want to store entries to the cache only after a block of code has been executed. This is also controlled by using the cache object as a context. @@ -190,7 +348,6 @@ This is also controlled by using the cache object as a context. - Cache class ----------- From 0e3ceaf2ab1cff8ccb3978ffc50991cdc40f58ba Mon Sep 17 00:00:00 2001 From: robin Date: Tue, 21 Jan 2025 18:05:48 -0500 Subject: [PATCH 20/38] updating changelog --- CHANGELOG.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b2ff1e91..1af4363c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,19 @@ # Changelog +## [0.1.42] - TBD +### Added + +### Changed +- Formatting improvements to the Exceptions Report. + +## [0.1.41] - 2025-01- + ## [0.1.40] - 2025-01-15 ### Added - Question type `QuestionDict` returns a response as a dictionary with specified keys and (optionally) specified value types and descriptions. Details: https://docs.expectedparrot.com/en/latest/questions.html#questiodict-class ### Changed -- Previously, results of jobs run remotely were automatically synced to your local cache. Now, results are only added to the cache where the job is being run, local or remote. +- Previously, results of jobs run remotely were automatically synced to your local cache. Now, results are only added to the cache where the job is being run, local or remote. Results now include the following fields for the cache associated with the results: `cache_keys._cache_key` and `cache_used._cache_used` - Improvements to web-based progress bar for remote jobs. From ec4b134facc0b2044dd764b454f74c8fd68967f6 Mon Sep 17 00:00:00 2001 From: John Horton Date: Wed, 22 Jan 2025 06:49:27 -0500 Subject: [PATCH 21/38] Fix double running of jobs --- edsl/jobs/Jobs.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/edsl/jobs/Jobs.py b/edsl/jobs/Jobs.py index ac163d61..538b78ef 100644 --- a/edsl/jobs/Jobs.py +++ b/edsl/jobs/Jobs.py @@ -521,16 +521,18 @@ async def _execute_with_remote_cache(self, run_job_async: bool) -> Results: results = runner.run(self.run_config.parameters) return results - def _setup_and_check(self) -> Tuple[RunConfig, Optional[Results]]: - self._prepare_to_run() - self._check_if_remote_keys_ok() + # def _setup_and_check(self) -> Tuple[RunConfig, Optional[Results]]: + # self._prepare_to_run() + # self._check_if_remote_keys_ok() - # first try to run the job remotely - if results := self._remote_results(): - return results + # # first try to run the job remotely + # results = self._remote_results() + # #breakpoint() + # if results is not None: + # return results - self._check_if_local_keys_ok() - return None + # self._check_if_local_keys_ok() + # return None @property def num_interviews(self): @@ -539,7 +541,7 @@ def num_interviews(self): else: return len(self) * self.run_config.parameters.n - def _run(self, config: RunConfig): + def _run(self, config: RunConfig) -> Union[None, "Results"]: "Shared code for run and run_async" if config.environment.cache is not None: self.run_config.environment.cache = config.environment.cache @@ -581,7 +583,7 @@ def _run(self, config: RunConfig): # first try to run the job remotely if results := self._remote_results(): return results - + self._check_if_local_keys_ok() if config.environment.bucket_collection is None: @@ -589,6 +591,8 @@ def _run(self, config: RunConfig): self.create_bucket_collection() ) + return None + @with_config def run(self, *, config: RunConfig) -> "Results": """ @@ -608,7 +612,10 @@ def run(self, *, config: RunConfig) -> "Results": :param bucket_collection: A BucketCollection object to track API calls :param key_lookup: A KeyLookup object to manage API keys """ - self._run(config) + potentially_completed_results = self._run(config) + + if potentially_completed_results is not None: + return potentially_completed_results return asyncio.run(self._execute_with_remote_cache(run_job_async=False)) From d0041d4367591c127e411c924d708af67d3dc292 Mon Sep 17 00:00:00 2001 From: stefanm <96014354+zer0dss@users.noreply.github.com> Date: Wed, 22 Jan 2025 14:40:46 +0200 Subject: [PATCH 22/38] rerun tests --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 08c59faa..b69a8492 100644 --- a/README.md +++ b/README.md @@ -51,4 +51,4 @@ See instructions on [storing API keys](https://docs.expectedparrot.com/en/latest ## 💡 Contributions, feature requests & bugs Interested in contributing? Want us to add a new feature? Found a bug for us to squash? -Please send us an email at [info@expectedparrot.com](mailto:info@expectedparrot.com) or message us at our [Discord channel](https://discord.com/invite/mxAYkjfy9m). +Please send us an email at [info@expectedparrot.com](mailto:info@expectedparrot.com) or message us at our [Discord channel](https://discord.com/invite/mxAYkjfy9m).. From 414476cc6dd35cb664aabe49cc3922b30937fb31 Mon Sep 17 00:00:00 2001 From: robin Date: Wed, 22 Jan 2025 10:18:59 -0500 Subject: [PATCH 23/38] updating docs --- docs/language_models.rst | 140 ++++++++++++++++++++++++++++++++++----- 1 file changed, 122 insertions(+), 18 deletions(-) diff --git a/docs/language_models.rst b/docs/language_models.rst index d17b5481..3959dfc3 100644 --- a/docs/language_models.rst +++ b/docs/language_models.rst @@ -14,7 +14,7 @@ Output for examples shown below can also be viewed in this notebook at Coop. Available services ------------------ -The following code will return a table of currently available services (model providers): +The following code will return a table of currently available inference services (model providers) together with an indicator whether a local key is currently stored for each service: .. code-block:: python @@ -32,6 +32,7 @@ Output: * - openai * - anthropic * - deep_infra + * - deepseek * - google * - groq * - bedrock @@ -46,7 +47,7 @@ Output: Available models ---------------- -The following code will return a table of all the available models for all services: +The following code will return a table of all the available models for all services (output omitted here for brevity): .. code-block:: python @@ -55,10 +56,7 @@ The following code will return a table of all the available models for all servi Model.available() -This will return a list of the models we can choose from, for all service providers (omitted here for brevity). -Run the code on yor own to see an up-to-date list. - -To see a list of all models for a specific service, pass the service: +To see a list of all models for a specific service, pass the service name as an argument: .. code-block:: python @@ -83,7 +81,8 @@ Output: *Note:* It is important to check that selected models are working as expected before running a survey. -We recommend running test questions with any models, agents and scenarios that you plan to use in a survey to validate performance before moving onto larger jobs. +Up-to-date information on available models can be found at the Expected Parrot model pricing page: https://www.expectedparrot.com/getting-started/coop-pricing. +We also recommend checking providers' website. Adding a model @@ -96,25 +95,40 @@ If you do not see a publicly available model that you want to work with, please from edsl import Model - Model.add_model(service_name = "anthropic", model_name = "new_model") + Model.add_model(service_name = "google", model_name = "new_model") -This will add the model `new_model` to the `anthropic` service. +This will add the model `new_model` to the `google` service. You can then see the model in the list of available models, and search by service name: .. code-block:: python - Model.available(service = "anthropic") + Model.available(service = "google") Output: +.. list-table:: + :header-rows: 1 + + * - Model Name + - gemmini-1.0-pro + - gemmini-1.0-flash + - gemmini-1.5-pro + - gemmini-pro + - new_model + * - Service Name + - google + - google + - google + - google + - google Check models ------------ -To check for models where API keys have been stored: +The following code checks for models where API keys have been stored locally: .. code-block:: python @@ -142,23 +156,23 @@ Specifying a model We specify a model to use with a survey by creating a `Model` object and passing it the name of an available model. We can optionally set other model parameters as well (temperature, etc.). -For example, the following code creates a `Model` object for Claude 3.5 Sonnet with default model parameters: +For example, the following code creates a `Model` object for `gpt-4o` with default model parameters: .. code-block:: python from edsl import Model - model = Model('gpt-4o') + m = Model('gpt-4o') We can see that the object consists of a model name and a dictionary of parameters: .. code-block:: python - model + m -This will show the default parameters of the model: +This will show the default parameters of the model, together with the name of the inference service (some models are made provided by multiple services): .. list-table:: :header-rows: 1 @@ -181,6 +195,79 @@ This will show the default parameters of the model: - False * - parameters:top_logprobs - 3 + * - inference_service + - openai + + +Creating a list of models +------------------------- + +We can create a list of models by passing a list of model names to the `ModelList` class. +For example, the following code creates a `ModelList` object for `gpt-4o` and `gemini-pro`: + +.. code-block:: python + + from edsl import Model, ModelList + + ml = ModelList([Model('gpt-4o'), Model('gemini-pro')]) + + +We can also create a model list from a list of model names: + +.. code-block:: python + + from edsl import Model, ModelList + + model_names = ['gpt-4o', 'gemini-pro'] + + ml = ModelList.from_names(model_names) + + ml + + +Output: + +.. list-table:: + :header-rows: 1 + + * - frequency_penalty + - max_tokens + - stopSequences + - temperature + - logprobs + - model + - maxOutputTokens + - topK + - top_logprobs + - topP + - presence_penalty + - top_p + + * - 0.000000 + - 1000.000000 + - nan + - 0.500000 + - False + - gpt-4o + - nan + - nan + - 3.000000 + - nan + - 0.000000 + - 1.000000 + + * - nan + - nan + - [] + - 0.500000 + - nan + - gemini-pro + - 2048.000000 + - 1.000000 + - nan + - 1.000000 + - nan + - nan Running a survey with models @@ -227,7 +314,6 @@ The following commands are equivalent: results = survey.by(models).by(agents).by(scenarios).run() - Default model ------------- @@ -242,6 +328,22 @@ For example, the following code runs the example survey with the default model ( results = Survey.example().run() +We can verify the model that was used: + +.. code-block:: python + + results.select("model.model") # selecting only the model name + + +Output: + +.. list-table:: + :header-rows: 1 + + * - model + * - gpt-4o + + Inspecting model details in results ----------------------------------- @@ -250,11 +352,13 @@ For example, we can verify the default model when running a survey without speci .. code-block:: python - from edsl import Survey + from edsl import Survey, Model, ModelList + + m = ModelList.from_names(["gpt-4o", "gemini-pro"]) survey = Survey.example() - results = survey.run() + results = survey.by(m).run() results.models From 5a4f253043d811cb3c021a07bf8362e88cabcc07 Mon Sep 17 00:00:00 2001 From: Rae Date: Wed, 22 Jan 2025 11:12:43 -0500 Subject: [PATCH 24/38] Add inference service to ModelList table --- edsl/language_models/ModelList.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edsl/language_models/ModelList.py b/edsl/language_models/ModelList.py index 76dc77a5..843e51af 100644 --- a/edsl/language_models/ModelList.py +++ b/edsl/language_models/ModelList.py @@ -60,7 +60,7 @@ def to_scenario_list(self): sl = ScenarioList() for model in self: - d = {"model": model.model} + d = {"model": model.model, "inference_service": model._inference_service_} d.update(model.parameters) sl.append(Scenario(d)) return sl From beb449050172695c9bb84f6e9e20e00a189fc128 Mon Sep 17 00:00:00 2001 From: robin Date: Wed, 22 Jan 2025 12:34:28 -0500 Subject: [PATCH 25/38] updating example survey (no more bees) --- edsl/surveys/Survey.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/edsl/surveys/Survey.py b/edsl/surveys/Survey.py index 4819f5b2..0337ed23 100644 --- a/edsl/surveys/Survey.py +++ b/edsl/surveys/Survey.py @@ -1183,31 +1183,32 @@ def example( >>> s = Survey.example() >>> [q.question_text for q in s.questions] - ['Do you like school?', 'Why not?', 'Why?'] + ['What is the capital of France?', 'In what year was {{ q0.answer }} founded?', 'Why?'] """ - from edsl.questions.QuestionMultipleChoice import QuestionMultipleChoice + from edsl import QuestionMultipleChoice, QuestionList, QuestionNumerical, QuestionYesNo addition = "" if not randomize else str(uuid4()) q0 = QuestionMultipleChoice( - question_text=f"Do you like school?{addition}", - question_options=["yes", "no"], question_name="q0", + question_text=f"What is the capital of France?{addition}", + question_options=["London", "Paris", "Rome", "Boston"] ) - q1 = QuestionMultipleChoice( - question_text="Why not?", - question_options=["killer bees in cafeteria", "other"], + q1 = QuestionList( question_name="q1", + question_text="Name some cities in France.", + max_list_items = 5 ) - q2 = QuestionMultipleChoice( - question_text="Why?", - question_options=["**lack*** of killer bees in cafeteria", "other"], + q2 = QuestionNumerical( question_name="q2", + question_text="What is the population of {{ q0.answer }}?" ) if params: - q3 = QuestionMultipleChoice( - question_text="To the question '{{ q0.question_text}}', you said '{{ q0.answer }}'. Do you still feel this way?", - question_options=["yes", "no"], + q3 = QuestionYesNo( question_name="q3", + question_text=""" + In answer to the question '{{ q0.question_text}}' you responsed '{{ q0.answer }}'. + Are you confidant in your response? + """ ) s = cls(questions=[q0, q1, q2, q3]) return s From e643935bb769783d3c730a5222112923cf1f2aa2 Mon Sep 17 00:00:00 2001 From: robin Date: Wed, 22 Jan 2025 13:34:48 -0500 Subject: [PATCH 26/38] updating docs --- docs/language_models.rst | 246 ++++++++------------------------------- 1 file changed, 50 insertions(+), 196 deletions(-) diff --git a/docs/language_models.rst b/docs/language_models.rst index 3959dfc3..c1857543 100644 --- a/docs/language_models.rst +++ b/docs/language_models.rst @@ -3,12 +3,21 @@ Language Models =============== -Language models are used to generate agent responses to survey questions and can be specified using the `Model` and `ModelList` classes. +Language models are used to generate agents' responses to survey questions and can be specified using the `Model` and `ModelList` classes. -API keys are required in order to access available models, and should be stored in your private `.env` file. -See the :ref:`api_keys` page for instructions on storing your API keys. +EDSL works with a variety of different popular inference service providers, including Anthropic, Google, OpenAI and others. +Current information about available models can be found at the Expected Parrot model pricing page: https://www.expectedparrot.com/getting-started/coop-pricing. +We also recommend checking providers' websites for the most up-to-date information on available models. +It is important to check that the models you want to use are available and working as expected before running a survey. +If you need assistance checking whether a model is working, please send a message to info@expectedparrot.com or post a message at our `Discord channel `_. -Output for examples shown below can also be viewed in this notebook at Coop. + +API keys +-------- + +In order to use a model, you need to have an API key for the relevant service provider. +EDSL allows you to choose whether to provide your own API keys for models or use an Expected Parrot API key to access all available models at once. +See the :ref:`api_keys` page for instructions on storing API keys. Available services @@ -80,11 +89,6 @@ Output: - google -*Note:* It is important to check that selected models are working as expected before running a survey. -Up-to-date information on available models can be found at the Expected Parrot model pricing page: https://www.expectedparrot.com/getting-started/coop-pricing. -We also recommend checking providers' website. - - Adding a model -------------- @@ -163,16 +167,10 @@ For example, the following code creates a `Model` object for `gpt-4o` with defau from edsl import Model m = Model('gpt-4o') - - -We can see that the object consists of a model name and a dictionary of parameters: - -.. code-block:: python - m -This will show the default parameters of the model, together with the name of the inference service (some models are made provided by multiple services): +Output: .. list-table:: :header-rows: 1 @@ -199,6 +197,9 @@ This will show the default parameters of the model, together with the name of th - openai +We can see that the object consists of a model name and a dictionary of the default parameters of the model, together with the name of the inference service (some models are made provided by multiple services). + + Creating a list of models ------------------------- @@ -230,42 +231,40 @@ Output: .. list-table:: :header-rows: 1 - * - frequency_penalty + * - topP + - topK + - presence_penalty + - top_logprobs + - top_p - max_tokens - - stopSequences + - maxOutputTokens - temperature - - logprobs - model - - maxOutputTokens - - topK - - top_logprobs - - topP - - presence_penalty - - top_p - - * - 0.000000 + - stopSequences + - logprobs + - frequency_penalty + * - nan + - nan + - 0.000000 + - 3.000000 + - 1.000000 - 1000.000000 - nan - 0.500000 - - False - gpt-4o - nan + - False + - 0.000000 + * - 1.000000 + - 1.000000 - nan - - 3.000000 - nan - - 0.000000 - - 1.000000 - - * - nan - nan - - [] - - 0.500000 - nan - - gemini-pro - 2048.000000 - - 1.000000 - - nan - - 1.000000 + - 0.500000 + - gemini-pro + - [] - nan - nan @@ -273,16 +272,16 @@ Output: Running a survey with models ---------------------------- -Similar to how we specify :ref:`agents` and :ref:`scenarios` in running a survey, we specify the models to use by adding them to a survey with the `by()` method when the survey is run. +Similar to how we specify :ref:`agents` and :ref:`scenarios` to use with a survey, we specify the models to use by adding them to a survey with the `by()` method when the survey is run. We can pass either a single `Model` object or a list of models to the `by()` method. If multiple models are to be used they are passed as a list or as a `ModelList` object. -For example, the following code specifies that a survey be run with each of GPT 4 and Gemini Pro: +For example, the following code specifies that a survey will be run with each of `gpt-4o` and `gemini-1.5-flash`: .. code-block:: python from edsl import Model, Survey - models = [Model('gpt-4o'), Model('gemini-pro')] + models = [Model('gpt-4o'), Model('gemini-1.5-flash')] survey = Survey.example() @@ -295,7 +294,7 @@ This code uses `ModelList` instead of a list of `Model` objects: from edsl import Model, ModelList, Survey - models = ModelList(Model(m) for m in ['gpt-4o', 'gemini-pro']) + models = ModelList(Model(m) for m in ['gpt-4o', 'gemini-1.5-flash']) survey = Survey.example() @@ -309,8 +308,11 @@ The following commands are equivalent: .. code-block:: python + # add code for creating survey, scenarios, agents, models here ... + results = survey.by(scenarios).by(agents).by(models).run() + # this is equivalent: results = survey.by(models).by(agents).by(scenarios).run() @@ -344,17 +346,17 @@ Output: * - gpt-4o -Inspecting model details in results ------------------------------------ +Inspecting model parameters +--------------------------- -If a survey has been run, we can inspect the models that were used by calling the `models` method on the `Results` object. +We can also inspect parameters of the models that were used by calling the `models` method on the `Results` object. For example, we can verify the default model when running a survey without specifying a model: .. code-block:: python from edsl import Survey, Model, ModelList - m = ModelList.from_names(["gpt-4o", "gemini-pro"]) + m = ModelList.from_names(["gpt-4o", "gemini-1.5-flash"]) survey = Survey.example() @@ -363,159 +365,11 @@ For example, we can verify the default model when running a survey without speci results.models -This will return the following information about the default model that was used (note the default model may have changed since this page was last updated): - -.. list-table:: - :header-rows: 1 - - * - model - - temperature - - max_tokens - - top_p - - frequency_penalty - - presence_penalty - - logprobs - - top_logprobs - * - gpt-4o - - 0.5 - - 1000 - - 1 - - 0 - - 0 - - False - - 3 - +This will return the same information as the `ModelList` created above. To learn more about all the components of a `Results` object, please see the :ref:`results` section. -Printing model attributes -------------------------- - -If multiple models were used to generate results, we can print the attributes in a table. -For example, the following code prints a table of the model names and temperatures for some results: - -.. code-block:: python - - from edsl import Survey, ModelList, Model - - models = ModelList( - Model(m) for m in ['gpt-4o', 'gemini-1.5-pro'] - ) - - survey = Survey.example() - - results = survey.by(models).run() - - results.select("model", "temperature") # This is equivalent to: results.select("model.model", "model.temperature") - - -Output: - -.. list-table:: - :header-rows: 1 - - * - model.model - - model.temperature - * - gpt-4o - - 0.5 - * - gemini-1.5-pro - - 0.5 - - -We can also print model attributes together with other components of results. -We can see a list of all components by calling the `columns` method on the results: - -.. code-block:: python - - results.columns - - -Output: - -.. list-table:: - :header-rows: 1 - - * - 0 - * - agent.agent_instruction - * - agent.agent_name - * - answer.q0 - * - answer.q1 - * - answer.q2 - * - comment.q0_comment - * - comment.q1_comment - * - comment.q2_comment - * - generated_tokens.q0_generated_tokens - * - generated_tokens.q1_generated_tokens - * - generated_tokens.q2_generated_tokens - * - iteration.iteration - * - model.frequency_penalty - * - model.logprobs - * - model.maxOutputTokens - * - model.max_tokens - * - model.model - * - model.presence_penalty - * - model.stopSequences - * - model.temperature - * - model.topK - * - model.topP - * - model.top_logprobs - * - model.top_p - * - prompt.q0_system_prompt - * - prompt.q0_user_prompt - * - prompt.q1_system_prompt - * - prompt.q1_user_prompt - * - prompt.q2_system_prompt - * - prompt.q2_user_prompt - * - question_options.q0_question_options - * - question_options.q1_question_options - * - question_options.q2_question_options - * - question_text.q0_question_text - * - question_text.q1_question_text - * - question_text.q2_question_text - * - question_type.q0_question_type - * - question_type.q1_question_type - * - question_type.q2_question_type - * - raw_model_response.q0_cost - * - raw_model_response.q0_one_usd_buys - * - raw_model_response.q0_raw_model_response - * - raw_model_response.q1_cost - * - raw_model_response.q1_one_usd_buys - * - raw_model_response.q1_raw_model_response - * - raw_model_response.q2_cost - * - raw_model_response.q2_one_usd_buys - * - raw_model_response.q2_raw_model_response - - -The following code will display a table of the model names together with the simulated answers: - -.. code-block:: python - - results.select("model", "answer.*") - - -Output: - -.. list-table:: - :header-rows: 1 - - * - model.model - - answer.q0 - - answer.q1 - - answer.q2 - * - gpt-4o - - no - - killer bees in cafeteria - - - * - gemini-1.5-pro - - yes - - - - other - - -To learn more about methods of inspecting and printing results, please see the :ref:`results` section. - - ModelList class --------------- From a56e317c0ba8a1b7c820d301915ddfd3c6a95efb Mon Sep 17 00:00:00 2001 From: robin Date: Wed, 22 Jan 2025 14:15:33 -0500 Subject: [PATCH 27/38] updating docs --- docs/results.rst | 1015 ++++++++++++++++++++++++---------------------- 1 file changed, 519 insertions(+), 496 deletions(-) diff --git a/docs/results.rst b/docs/results.rst index c59602b5..eb168e93 100644 --- a/docs/results.rst +++ b/docs/results.rst @@ -4,250 +4,259 @@ Results ======= A `Results` object represents the outcome of running a `Survey`. -It contains a list of individual `Result` objects, where each `Result` corresponds to a response to the survey for a unique combination of `Agent`, `Model`, and `Scenario` object used in the survey. +It contains a list of individual `Result` objects, where each `Result` corresponds to a response to the survey for a unique combination of `Agent`, `Model`, and `Scenario` objects used with the survey. -For example, if a survey is administered to 2 agents and 2 language models without any scenarios, the `Results` will contain 4 `Result` objects (one for each combination of agent and model). -If the survey includes parameterized questions with 2 scenarios, the `Results` will expand to include 8 `Result` objects (accounting for all combinations of agents, models, and scenarios). +For example, if a survey (of one more more questions) is administered to 2 agents and 2 language models (without any scenarios for the questions), the `Results` will contain 4 `Result` objects: one for each combination of agent and model used with the survey. +If the survey questions are parameterized with 2 scenarios, the `Results` will expand to include 8 `Result` objects, accounting for all combinations of agents, models, and scenarios. Generating results -^^^^^^^^^^^^^^^^^^ +------------------ A `Results` object is not typically instantiated directly, but is returned by calling the `run()` method of a `Survey` after any agents, language models and scenarios are added to it. In order to demonstrate how to access and interact with results, we use the following code to generate results for a simple survey. -Note that specifying agent traits, scenarios (question parameter values) and language models is optional, and we include those steps here for illustrative purposes. -(See the :ref:`agents`, :ref:`scenarios` and :ref:`models` sections for more details on these components.) +Note that specifying agent traits, scenarios (question parameter values) and language models is optional, and we include those steps here for illustration purposes. +See the :ref:`agents`, :ref:`scenarios` and :ref:`models` sections for more details on these components. **Note:** You must store API keys for language models in order to generate results. -Please see the :ref:`api_keys` section for instructions on activating :ref:`remote_inference` from your :ref:`coop` account or storing your own API keys from service providers. +Please see the :ref:`api_keys` section for instructions on activating :ref:`remote_inference` or storing your own API keys for inference service providers. To construct a survey we start by creating questions: .. code-block:: python - from edsl import QuestionLinearScale, QuestionMultipleChoice + from edsl import QuestionLinearScale, QuestionMultipleChoice - q1 = QuestionLinearScale( - question_name = "important", - question_text = "On a scale from 1 to 5, how important to you is {{ topic }}?", - question_options = [0, 1, 2, 3, 4, 5], - option_labels = {0:"Not at all important", 5:"Very important"} - ) + q1 = QuestionLinearScale( + question_name = "important", + question_text = "On a scale from 1 to 5, how important to you is {{ topic }}?", + question_options = [0, 1, 2, 3, 4, 5], + option_labels = {0:"Not at all important", 5:"Very important"} + ) - q2 = QuestionMultipleChoice( - question_name = "read", - question_text = "Have you read any books about {{ topic }}?", - question_options = ["Yes", "No", "I do not know"] - ) + q2 = QuestionMultipleChoice( + question_name = "read", + question_text = "Have you read any books about {{ topic }}?", + question_options = ["Yes", "No", "I do not know"] + ) We combine them in a survey to administer them together: .. code-block:: python - from edsl import Survey + from edsl import Survey - survey = Survey([q1, q2]) + survey = Survey([q1, q2]) We have parameterized our questions, so we can use them with different scenarios: .. code-block:: python - from edsl import ScenarioList + from edsl import ScenarioList - scenarios = ScenarioList.from_list("topic", ["climate change", "house prices"]) + scenarios = ScenarioList.from_list("topic", ["climate change", "house prices"]) We can optionally create agents with personas or other relevant traits to answer the survey: .. code-block:: python - from edsl import AgentList, Agent + from edsl import AgentList, Agent - agents = AgentList( - Agent(traits = {"persona": p}) for p in ["student", "celebrity"] - ) + agents = AgentList( + Agent(traits = {"persona": p}) for p in ["student", "celebrity"] + ) We can specify the language models that we want to use to generate responses: .. code-block:: python - from edsl import ModelList, Model + from edsl import ModelList, Model - models = ModelList( - Model(m) for m in ["gemini-1.5-flash", "gpt-4o"] - ) + models = ModelList( + Model(m) for m in ["gemini-1.5-flash", "gpt-4o"] + ) Finally, we generate results by adding the scenarios, agents and models to the survey and calling the `run()` method: .. code-block:: python - results = survey.by(scenarios).by(agents).by(models).run() + results = survey.by(scenarios).by(agents).by(models).run() For more details on each of the above steps, please see the :ref:`agents`, :ref:`scenarios` and :ref:`models` sections of the docs. Result objects -^^^^^^^^^^^^^^ +-------------- We can check the number of `Result` objects created by inspecting the length of the `Results`: .. code-block:: python - len(results) + len(results) This will count 2 (scenarios) x 2 (agents) x 2 (models) = 8 `Result` objects: .. code-block:: text - 8 + 8 Generating multiple results -^^^^^^^^^^^^^^^^^^^^^^^^^^^ +--------------------------- If we want to generate multiple results for a survey--i.e., more than 1 result for each combination of `Agent`, `Model` and `Scenario` objects used--we can pass the desired number of iterations when calling the `run()` method. For example, the following code will generate 3 results for our survey (n=3): .. code-block:: python - results = survey.by(scenarios).by(agents).by(models).run(n=3) + results = survey.by(scenarios).by(agents).by(models).run(n=3) We can verify that the number of `Result` objects created is now 24 = 3 iterations x 2 scenarios x 2 agents x 2 models: .. code-block:: python - len(results) + len(results) .. code-block:: text - 24 + 24 We can readily inspect a result: .. code-block:: python - results[0] + results[0] Output: .. list-table:: - :header-rows: 1 - - * - key - - value - * - agent:traits - - {'persona': 'student'} - * - scenario:topic - - climate change - * - model:model - - gemini-1.5-flash - * - model:parameters - - {'temperature': 0.5, 'topP': 1, 'topK': 1, 'maxOutputTokens': 2048, 'stopSequences': []} - * - iteration - - 0 - * - answer:important - - 5 - * - answer:read - - Yes - * - prompt:important_user_prompt - - {'text': 'On a scale from 1 to 5, how important to you is climate change?\n\n0 : Not at all important\n\n1 : \n\n2 : \n\n3 : \n\n4 : \n\n5 : Very important\n\nOnly 1 option may be selected.\n\nRespond only with the code corresponding to one of the options. E.g., "1" or "5" by itself.\n\nAfter the answer, you can put a comment explaining why you chose that option on the next line.', 'class_name': 'Prompt'} - * - prompt:important_system_prompt - - {'text': "You are answering questions as if you were a human. Do not break character. Your traits: {'persona': 'student'}", 'class_name': 'Prompt'} - * - prompt:read_user_prompt - - {'text': '\nHave you read any books about climate change?\n\n \nYes\n \nNo\n \nI do not know\n \n\nOnly 1 option may be selected.\n\nRespond only with a string corresponding to one of the options.\n\n\nAfter the answer, you can put a comment explaining why you chose that option on the next line.', 'class_name': 'Prompt'} - * - prompt:read_system_prompt - - {'text': "You are answering questions as if you were a human. Do not break character. Your traits: {'persona': 'student'}", 'class_name': 'Prompt'} - * - raw_model_response:important_raw_model_response - - {'candidates': [{'content': {'parts': [{'text': "5\n\nIt's, like, a huge deal. The future of the planet is at stake, and that affects everything – from the environment to the economy to social justice. It's something I worry about a lot.\n"}], 'role': 'model'}, 'finish_reason': 1, 'safety_ratings': [{'category': 8, 'probability': 1, 'blocked': False}, {'category': 10, 'probability': 1, 'blocked': False}, {'category': 7, 'probability': 1, 'blocked': False}, {'category': 9, 'probability': 1, 'blocked': False}], 'avg_logprobs': -0.12477729758437799, 'token_count': 0, 'grounding_attributions': []}], 'usage_metadata': {'prompt_token_count': 129, 'candidates_token_count': 49, 'total_token_count': 178, 'cached_content_token_count': 0}} - * - raw_model_response:important_cost - - 1.78e-12 - * - raw_model_response:important_one_usd_buys - - 561797752808.9888 - * - raw_model_response:read_raw_model_response - - {'candidates': [{'content': {'parts': [{'text': "Yes\n\nI've read a few articles and some chapters from textbooks for my environmental science class, which covered climate change extensively. It's not exactly the same as reading a whole book dedicated to the subject, but I've definitely learned about it.\n"}], 'role': 'model'}, 'finish_reason': 1, 'safety_ratings': [{'category': 8, 'probability': 1, 'blocked': False}, {'category': 10, 'probability': 1, 'blocked': False}, {'category': 7, 'probability': 1, 'blocked': False}, {'category': 9, 'probability': 1, 'blocked': False}], 'avg_logprobs': -0.14903209827564382, 'token_count': 0, 'grounding_attributions': []}], 'usage_metadata': {'prompt_token_count': 96, 'candidates_token_count': 54, 'total_token_count': 150, 'cached_content_token_count': 0}} - * - raw_model_response:read_cost - - 1.5e-12 - * - raw_model_response:read_one_usd_buys - - 666666666666.6666 - * - question_to_attributes:important - - {'question_text': 'On a scale from 1 to 5, how important to you is {{ topic }}?', 'question_type': 'linear_scale', 'question_options': [0, 1, 2, 3, 4, 5]} - * - question_to_attributes:read - - {'question_text': 'Have you read any books about {{ topic }}?', 'question_type': 'multiple_choice', 'question_options': ['Yes', 'No', 'I do not know']} - * - generated_tokens:important_generated_tokens - - 5 - - It's, like, a huge deal. The future of the planet is at stake, and that affects everything – from the environment to the economy to social justice. It's something I worry about a lot. - * - generated_tokens:read_generated_tokens - - Yes - - I've read a few articles and some chapters from textbooks for my environmental science class, which covered climate change extensively. It's not exactly the same as reading a whole book dedicated to the subject, but I've definitely learned about it. - * - comments_dict:important_comment - - It's, like, a huge deal. The future of the planet is at stake, and that affects everything – from the environment to the economy to social justice. It's something I worry about a lot. - * - comments_dict:read_comment - - I've read a few articles and some chapters from textbooks for my environmental science class, which covered climate change extensively. It's not exactly the same as reading a whole book dedicated to the subject, but I've definitely learned about it. + :header-rows: 1 + + * - key + - value + * - agent:traits + - {'persona': 'student'} + * - scenario:topic + - climate change + * - model:model + - gemini-1.5-flash + * - model:parameters + - {'temperature': 0.5, 'topP': 1, 'topK': 1, 'maxOutputTokens': 2048, 'stopSequences': []} + * - iteration + - 0 + * - answer:important + - 5 + * - answer:read + - Yes + * - prompt:important_user_prompt + - {'text': 'On a scale from 1 to 5, how important to you is climate change?\n\n0 : Not at all important\n\n1 : \n\n2 : \n\n3 : \n\n4 : \n\n5 : Very important\n\nOnly 1 option may be selected.\n\nRespond only with the code corresponding to one of the options. E.g., "1" or "5" by itself.\n\nAfter the answer, you can put a comment explaining why you chose that option on the next line.', 'class_name': 'Prompt'} + * - prompt:important_system_prompt + - {'text': "You are answering questions as if you were a human. Do not break character. Your traits: {'persona': 'student'}", 'class_name': 'Prompt'} + * - prompt:read_user_prompt + - {'text': '\nHave you read any books about climate change?\n\n \nYes\n \nNo\n \nI do not know\n \n\nOnly 1 option may be selected.\n\nRespond only with a string corresponding to one of the options.\n\n\nAfter the answer, you can put a comment explaining why you chose that option on the next line.', 'class_name': 'Prompt'} + * - prompt:read_system_prompt + - {'text': "You are answering questions as if you were a human. Do not break character. Your traits: {'persona': 'student'}", 'class_name': 'Prompt'} + * - raw_model_response:important_raw_model_response + - {'candidates': [{'content': {'parts': [{'text': "5\n\nIt's, like, a huge deal. The future of the planet is at stake, you know? We're talking about everything from extreme weather to rising sea levels – it affects everyone, and it's something we all need to be seriously concerned about.\n"}], 'role': 'model'}, 'finish_reason': 1, 'safety_ratings': [{'category': 8, 'probability': 1, 'blocked': False}, {'category': 10, 'probability': 1, 'blocked': False}, {'category': 7, 'probability': 1, 'blocked': False}, {'category': 9, 'probability': 1, 'blocked': False}], 'avg_logprobs': -0.19062816490561274, 'token_count': 0, 'grounding_attributions': []}], 'usage_metadata': {'prompt_token_count': 129, 'candidates_token_count': 59, 'total_token_count': 188, 'cached_content_token_count': 0}} + * - raw_model_response:important_cost + - 0.000027 + * - raw_model_response:important_one_usd_buys + - 36529.685735 + * - raw_model_response:read_raw_model_response + - {'candidates': [{'content': {'parts': [{'text': "Yes\n\nI've read a few articles and some chapters from textbooks for my environmental science class, which touched upon climate change. It's not exactly the same as reading a whole book dedicated to the topic, but it counts, right?\n"}], 'role': 'model'}, 'finish_reason': 1, 'safety_ratings': [{'category': 8, 'probability': 1, 'blocked': False}, {'category': 10, 'probability': 1, 'blocked': False}, {'category': 7, 'probability': 1, 'blocked': False}, {'category': 9, 'probability': 1, 'blocked': False}], 'avg_logprobs': -0.13118227790383732, 'token_count': 0, 'grounding_attributions': []}], 'usage_metadata': {'prompt_token_count': 96, 'candidates_token_count': 51, 'total_token_count': 147, 'cached_content_token_count': 0}} + * - raw_model_response:read_cost + - 0.000022 + * - raw_model_response:read_one_usd_buys + - 44444.451200 + * - question_to_attributes:important + - {'question_text': 'On a scale from 1 to 5, how important to you is {{ topic }}?', 'question_type': 'linear_scale', 'question_options': [0, 1, 2, 3, 4, 5]} + * - question_to_attributes:read + - {'question_text': 'Have you read any books about {{ topic }}?', 'question_type': 'multiple_choice', 'question_options': ['Yes', 'No', 'I do not know']} + * - generated_tokens:important_generated_tokens + - 5 It's, like, a huge deal. The future of the planet is at stake, you know? We're talking about everything from extreme weather to rising sea levels – it affects everyone, and it's something we all need to be seriously concerned about. + * - generated_tokens:read_generated_tokens + - Yes I've read a few articles and some chapters from textbooks for my environmental science class, which touched upon climate change. It's not exactly the same as reading a whole book dedicated to the topic, but it counts, right? + * - comments_dict:important_comment + - It's, like, a huge deal. The future of the planet is at stake, you know? We're talking about everything from extreme weather to rising sea levels – it affects everyone, and it's something we all need to be seriously concerned about. + * - comments_dict:read_comment + - I've read a few articles and some chapters from textbooks for my environmental science class, which touched upon climate change. It's not exactly the same as reading a whole book dedicated to the topic, but it counts, right? + * - cache_keys:important + - 98d6961d0529335b74f2363ba9b7a8de + * - cache_keys:read + - 12af825953d89c1f776bd3af40e37cfb Results fields -^^^^^^^^^^^^^^ +-------------- Results contain fields that can be accessed and analyzed individually or collectively. We can see a list of these fields by calling the `columns` method: .. code-block:: python - results.columns + results.columns The following list will be returned for the results generated by the above code: .. list-table:: - :header-rows: 1 - - * - 0 - - agent.agent_instruction - - agent.agent_name - - agent.persona - - answer.important - - answer.read - - comment.important_comment - - comment.read_comment - - generated_tokens.important_generated_tokens - - generated_tokens.read_generated_tokens - - iteration.iteration - - model.frequency_penalty - - model.logprobs - - model.max_tokens - - model.model - - model.presence_penalty - - model.temperature - - model.top_logprobs - - model.top_p - - prompt.important_system_prompt - - prompt.important_user_prompt - - prompt.read_system_prompt - - prompt.read_user_prompt - - question_options.important_question_options - - question_options.read_question_options - - question_text.important_question_text - - question_text.read_question_text - - question_type.important_question_type - - question_type.read_question_type - - raw_model_response.important_cost - - raw_model_response.important_one_usd_buys - - raw_model_response.important_raw_model_response - - raw_model_response.read_cost - - raw_model_response.read_one_usd_buys - - raw_model_response.read_raw_model_response - - scenario.topic + :header-rows: 1 + + * - 0 + - agent.agent_instruction + - agent.agent_name + - agent.persona + - answer.important + - answer.read + - cache_keys.important_cache_key + - cache_keys.read_cache_key + - cache_keys.important_cache_used + - cache_keys.read_cache_used + - comment.important_comment + - comment.read_comment + - generated_tokens.important_generated_tokens + - generated_tokens.read_generated_tokens + - iteration.iteration + - model.frequency_penalty + - model.logprobs + - model.maxOutputTokens + - model.max_tokens + - model.model + - model.presence_penalty + - model.stopSequences + - model.temperature + - model.topK + - model.topP + - model.top_logprobs + - model.top_p + - prompt.important_system_prompt + - prompt.important_user_prompt + - prompt.read_system_prompt + - prompt.read_user_prompt + - question_options.important_question_options + - question_options.read_question_options + - question_text.important_question_text + - question_text.read_question_text + - question_type.important_question_type + - question_type.read_question_type + - raw_model_response.important_cost + - raw_model_response.important_one_usd_buys + - raw_model_response.important_raw_model_response + - raw_model_response.read_cost + - raw_model_response.read_one_usd_buys + - raw_model_response.read_raw_model_response + - scenario.scenario_index + - scenario.topic The columns include information about each *agent*, *model* and corresponding *prompts* used to simulate the *answer* to each *question* and *scenario* in the survey, together with each *raw model response*. @@ -264,6 +273,13 @@ If the survey was run multiple times (`run(n=)`) then the `iteration.it * **answer.important**: Agent responses to the linear scale `important` question. * **answer.read**: Agent responses to the multiple choice `read` question. +*Cache* information: + +* **cache_keys.important_cache_key**: The cache key for the `important` question. +* **cache_keys.important_cache_used**: Whether the existing cache was used for the `important` question. +* **cache_keys.read_cache_key**: The cache key for the `read` question. +* **cache_keys.read_cache_used**: Whether the existing cache was used for the `read` question. + *Comment* information: A "comment" field is automatically included for every question in a survey other than free text questions, to allow the model to provide additional information about its response. @@ -288,14 +304,20 @@ Each of `model` columns is a modifiable parameter of the models used to generate * **model.frequency_penalty**: The frequency penalty for the model. * **model.logprobs**: The logprobs for the model. +* **model.maxOutputTokens**: The maximum number of output tokens for the model. * **model.max_tokens**: The maximum number of tokens for the model. * **model.model**: The name of the model used. * **model.presence_penalty**: The presence penalty for the model. +* **model.stopSequences**: The stop sequences for the model. * **model.temperature**: The temperature for the model. +* **model.topK**: The top k for the model. +* **model.topP**: The top p for the model. * **model.top_logprobs**: The top logprobs for the model. * **model.top_p**: The top p for the model. * **model.use_cache**: Whether the model uses cache. +*Note:* Some of the above fields are particular to specific models, and may have different names (e.g., `top_p` vs. `topP`). + *Prompt* information: * **prompt.important_system_prompt**: The system prompt for the `important` question. @@ -326,11 +348,12 @@ Note that the cost of a result for a question is specific to the components (sce *Scenario* information: +* **scenario.scenario_index**: The index of the scenario. * **scenario.topic**: The values provided for the "topic" scenario for the questions. Creating tables by selecting/dropping and printing -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +-------------------------------------------------- Each of these columns can be accessed directly by calling the `select()` method and passing the column names. Alternatively, we can specify the columns to exclude by calling the `drop()` method. @@ -341,237 +364,237 @@ For example, the following code will print a table showing the answers for `read .. code-block:: python - results = survey.by(scenarios).by(agents).by(models).run() # Running the survey once - results.select("model", "persona", "topic", "read", "important") + results = survey.by(scenarios).by(agents).by(models).run() # Running the survey once + results.select("model", "persona", "topic", "read", "important") A table with the selected columns will be printed: .. list-table:: - :header-rows: 1 - - * - model.model - - agent.persona - - scenario.topic - - answer.read - - answer.important - * - gemini-1.5-flash - - student - - climate change - - Yes - - 5 - * - gpt-4o - - student - - climate change - - Yes - - 5 - * - gemini-1.5-flash - - student - - house prices - - No - - 1 - * - gpt-4o - - student - - house prices - - No - - 3 - * - gemini-1.5-flash - - celebrity - - climate change - - Yes - - 5 - * - gpt-4o - - celebrity - - climate change - - Yes - - 5 - * - gemini-1.5-flash - - celebrity - - house prices - - Yes - - 3 - * - gpt-4o - - celebrity - - house prices - - No - - 3 + :header-rows: 1 + + * - model.model + - agent.persona + - scenario.topic + - answer.read + - answer.important + * - gemini-1.5-flash + - student + - climate change + - Yes + - 5 + * - gpt-4o + - student + - climate change + - Yes + - 5 + * - gemini-1.5-flash + - student + - house prices + - No + - 1 + * - gpt-4o + - student + - house prices + - No + - 3 + * - gemini-1.5-flash + - celebrity + - climate change + - Yes + - 5 + * - gpt-4o + - celebrity + - climate change + - Yes + - 5 + * - gemini-1.5-flash + - celebrity + - house prices + - Yes + - 3 + * - gpt-4o + - celebrity + - house prices + - No + - 3 Sorting results -^^^^^^^^^^^^^^^ +--------------- We can sort the columns by calling the `sort_by` method and passing it the column names to sort by: .. code-block:: python - ( - results - .sort_by("model", "persona", reverse=False) - .select("model", "persona", "topic", "read", "important") - ) + ( + results + .sort_by("model", "persona", reverse=False) + .select("model", "persona", "topic", "read", "important") + ) The following table will be printed: .. list-table:: - :header-rows: 1 - - * - model.model - - agent.persona - - scenario.topic - - answer.read - - answer.important - * - gemini-1.5-flash - - celebrity - - climate change - - Yes - - 5 - * - gemini-1.5-flash - - celebrity - - house prices - - Yes - - 3 - * - gemini-1.5-flash - - student - - climate change - - Yes - - 5 - * - gemini-1.5-flash - - student - - house prices - - No - - 1 - * - gpt-4o - - celebrity - - climate change - - Yes - - 5 - * - gpt-4o - - celebrity - - house prices - - No - - 3 - * - gpt-4o - - student - - climate change - - Yes - - 5 - * - gpt-4o - - student - - house prices - - No - - 3 + :header-rows: 1 + + * - model.model + - agent.persona + - scenario.topic + - answer.read + - answer.important + * - gemini-1.5-flash + - celebrity + - climate change + - Yes + - 5 + * - gemini-1.5-flash + - celebrity + - house prices + - Yes + - 3 + * - gemini-1.5-flash + - student + - climate change + - Yes + - 5 + * - gemini-1.5-flash + - student + - house prices + - No + - 1 + * - gpt-4o + - celebrity + - climate change + - Yes + - 5 + * - gpt-4o + - celebrity + - house prices + - No + - 3 + * - gpt-4o + - student + - climate change + - Yes + - 5 + * - gpt-4o + - student + - house prices + - No + - 3 Labeling results -^^^^^^^^^^^^^^^^ +---------------- We can also add some table labels by passing a dictionary to the `pretty_labels` argument of the `print` method (note that we need to include the column prefixes when specifying the table labels, as shown below): .. code-block:: python - ( - results - .sort_by("model", "persona", reverse=True) - .select("model", "persona", "topic", "read", "important") - .print(pretty_labels={ - "model.model": "LLM", - "agent.persona": "Agent", - "scenario.topic": "Topic", - "answer.read": q2.question_text, - "answer.important": q1.question_text - }, format="rich") - ) + ( + results + .sort_by("model", "persona", reverse=True) + .select("model", "persona", "topic", "read", "important") + .print(pretty_labels={ + "model.model": "LLM", + "agent.persona": "Agent", + "scenario.topic": "Topic", + "answer.read": q2.question_text, + "answer.important": q1.question_text + }, format="rich") + ) The following table will be printed: .. list-table:: - :header-rows: 1 - - * - LLM - - Agent - - Topic - - Have you read any books about {{ topic }}? - - On a scale from 1 to 5, how important to you is {{ topic }}? - * - gpt-4o - - student - - climate change - - Yes - - 5 - * - gpt-4o - - student - - house prices - - No - - 3 - * - gpt-4o - - celebrity - - climate change - - Yes - - 5 - * - gpt-4o - - celebrity - - house prices - - No - - 3 - * - gemini-1.5-flash - - student - - climate change - - Yes - - 5 - * - gemini-1.5-flash - - student - - house prices - - No - - 1 - * - gemini-1.5-flash - - celebrity - - climate change - - Yes - - 5 - * - gemini-1.5-flash - - celebrity - - house prices - - Yes - - 3 + :header-rows: 1 + + * - LLM + - Agent + - Topic + - Have you read any books about {{ topic }}? + - On a scale from 1 to 5, how important to you is {{ topic }}? + * - gpt-4o + - student + - climate change + - Yes + - 5 + * - gpt-4o + - student + - house prices + - No + - 3 + * - gpt-4o + - celebrity + - climate change + - Yes + - 5 + * - gpt-4o + - celebrity + - house prices + - No + - 3 + * - gemini-1.5-flash + - student + - climate change + - Yes + - 5 + * - gemini-1.5-flash + - student + - house prices + - No + - 1 + * - gemini-1.5-flash + - celebrity + - climate change + - Yes + - 5 + * - gemini-1.5-flash + - celebrity + - house prices + - Yes + - 3 Filtering results -^^^^^^^^^^^^^^^^^ +----------------- Results can be filtered by using the `filter` method and passing it a logical expression identifying the results that should be selected. For example, the following code will filter results where the answer to `important` is "5" and then just print the `topic` and `important_comment` columns: .. code-block:: python - ( - results - .filter("important == 5") - .select("topic", "important", "important_comment") - ) + ( + results + .filter("important == 5") + .select("topic", "important", "important_comment") + ) This will return an abbreviated table: .. list-table:: - :header-rows: 1 - - * - scenario.topic - - answer.important - - comment.important_comment - * - climate change - - 5 - - It's, like, a huge deal. The future of the planet is at stake, and that affects everything – from the environment to the economy to social justice. It's something I worry about a lot. - * - climate change - - 5 - - As a student, I'm really concerned about climate change because it affects our future and the planet we'll inherit. It's crucial to understand and address it to ensure a sustainable world for generations to come. - * - climate change - - 5 - - It's a huge issue, you know? We only have one planet, and if we don't take care of it, what kind of world are we leaving for future generations? It's not just about polar bears; it's about everything. It's my responsibility, as someone with a platform, to speak out about it. - * - climate change - - 5 - - Climate change is a critical issue that affects everyone globally, and as a public figure, I believe it's important to use my platform to raise awareness and advocate for sustainable practices. + :header-rows: 1 + + * - scenario.topic + - answer.important + - comment.important_comment + * - climate change + - 5 + - It's, like, a huge deal. The future of the planet is at stake, and that affects everything – from the environment to the economy to social justice. It's something I worry about a lot. + * - climate change + - 5 + - As a student, I'm really concerned about climate change because it affects our future and the planet we'll inherit. It's crucial to understand and address it to ensure a sustainable world for generations to come. + * - climate change + - 5 + - It's a huge issue, you know? We only have one planet, and if we don't take care of it, what kind of world are we leaving for future generations? It's not just about polar bears; it's about everything. It's my responsibility, as someone with a platform, to speak out about it. + * - climate change + - 5 + - Climate change is a critical issue that affects everyone globally, and as a public figure, I believe it's important to use my platform to raise awareness and advocate for sustainable practices. **Note:** The `filter` method allows us to pass the unique short names of the columns (without the prefixes) when specifying the logical expression. @@ -579,221 +602,221 @@ However, because the `model.model` column name is also a prefix, we need to incl .. code-block:: python - ( - results - .filter("model.model == 'gpt-4o'") - .select("model", "persona", "topic", "read", "important") - ) + ( + results + .filter("model.model == 'gpt-4o'") + .select("model", "persona", "topic", "read", "important") + ) This will return a table of results where the model is "gpt-4o": .. list-table:: - :header-rows: 1 - - * - model.model - - agent.persona - - scenario.topic - - answer.read - - answer.important - * - gpt-4o - - student - - climate change - - Yes - - 5 - * - gpt-4o - - student - - house prices - - No - - 3 - * - gpt-4o - - celebrity - - climate change - - Yes - - 5 - * - gpt-4o - - celebrity - - house prices - - No - - 3 + :header-rows: 1 + + * - model.model + - agent.persona + - scenario.topic + - answer.read + - answer.important + * - gpt-4o + - student + - climate change + - Yes + - 5 + * - gpt-4o + - student + - house prices + - No + - 3 + * - gpt-4o + - celebrity + - climate change + - Yes + - 5 + * - gpt-4o + - celebrity + - house prices + - No + - 3 Limiting results -^^^^^^^^^^^^^^^^ +---------------- We can select and print a limited number of results by passing the desired number of `max_rows` to the `print()` method. This can be useful for quickly checking the first few results: .. code-block:: python - ( - results - .select("model", "persona", "topic", "read", "important") - .print(max_rows=4, format="rich") - ) + ( + results + .select("model", "persona", "topic", "read", "important") + .print(max_rows=4, format="rich") + ) This will return a table of the selected components of the first 4 results: .. list-table:: - :header-rows: 1 - - * - model.model - - agent.persona - - scenario.topic - - answer.read - - answer.important - * - gemini-1.5-flash - - student - - climate change - - Yes - - 5 - * - gpt-4o - - student - - climate change - - Yes - - 5 - * - gemini-1.5-flash - - student - - house prices - - No - - 1 - * - gpt-4o - - student - - house prices - - No - - 3 + :header-rows: 1 + + * - model.model + - agent.persona + - scenario.topic + - answer.read + - answer.important + * - gemini-1.5-flash + - student + - climate change + - Yes + - 5 + * - gpt-4o + - student + - climate change + - Yes + - 5 + * - gemini-1.5-flash + - student + - house prices + - No + - 1 + * - gpt-4o + - student + - house prices + - No + - 3 Sampling results -^^^^^^^^^^^^^^^^ +---------------- We can select a sample of `n` results by passing the desired number of random results to the `sample()` method. This can be useful for checking a random subset of the results with different parameters: .. code-block:: python - sample_results = results.sample(2) + sample_results = results.sample(2) - ( - sample_results - .sort_by("model") - .select("model", "persona", "topic", "read", "important") - ) + ( + sample_results + .sort_by("model") + .select("model", "persona", "topic", "read", "important") + ) This will return a table of the specified number of randomly selected results: .. list-table:: - :header-rows: 1 - - * - model.model - - agent.persona - - scenario.topic - - answer.read - - answer.important - * - gpt-4o - - celebrity - - house prices - - No - - 3 - * - gpt-4o - - celebrity - - climate change - - Yes - - 5 + :header-rows: 1 + + * - model.model + - agent.persona + - scenario.topic + - answer.read + - answer.important + * - gpt-4o + - celebrity + - house prices + - No + - 3 + * - gpt-4o + - celebrity + - climate change + - Yes + - 5 Shuffling results -^^^^^^^^^^^^^^^^^ +----------------- We can shuffle results by calling the `shuffle()` method. This can be useful for quickly checking the first few results: .. code-block:: python - shuffle_results = results.shuffle() + shuffle_results = results.shuffle() - ( - shuffle_results - .select("model", "persona", "topic", "read", "important") - ) + ( + shuffle_results + .select("model", "persona", "topic", "read", "important") + ) This will return a table of shuffled results: .. list-table:: - :header-rows: 1 - - * - model.model - - agent.persona - - scenario.topic - - answer.read - - answer.important - * - gemini-1.5-flash - - celebrity - - climate change - - Yes - - 5 - * - gpt-4o - - student - - house prices - - No - - 3 - * - gemini-1.5-flash - - celebrity - - house prices - - Yes - - 3 - * - gemini-1.5-flash - - student - - house prices - - No - - 1 - * - gpt-4o - - celebrity - - house prices - - No - - 3 - * - gpt-4o - - celebrity - - climate change - - Yes - - 5 - * - gpt-4o - - student - - climate change - - Yes - - 5 - * - gemini-1.5-flash - - student - - climate change - - Yes - - 5 + :header-rows: 1 + + * - model.model + - agent.persona + - scenario.topic + - answer.read + - answer.important + * - gemini-1.5-flash + - celebrity + - climate change + - Yes + - 5 + * - gpt-4o + - student + - house prices + - No + - 3 + * - gemini-1.5-flash + - celebrity + - house prices + - Yes + - 3 + * - gemini-1.5-flash + - student + - house prices + - No + - 1 + * - gpt-4o + - celebrity + - house prices + - No + - 3 + * - gpt-4o + - celebrity + - climate change + - Yes + - 5 + * - gpt-4o + - student + - climate change + - Yes + - 5 + * - gemini-1.5-flash + - student + - climate change + - Yes + - 5 Adding results -^^^^^^^^^^^^^^ +-------------- We can add results together straightforwardly by using the `+` operator: .. code-block:: python - add_results = results + results + add_results = results + results We can see that the results have doubled: .. code-block:: text - len(add_results) + len(add_results) This will return the number of results: .. code-block:: text - 16 + 16 Interacting via SQL @@ -806,75 +829,75 @@ For example, the following code will return a table showing the `model`, `person .. code-block:: python - results.sql("select model, persona, read, important from self limit 4") + results.sql("select model, persona, read, important from self limit 4") This following table will be displayed .. list-table:: - :header-rows: 1 - - * - model - - persona - - read - - important - * - gemini-1.5-flash - - student - - Yes - - 5 - * - gpt-4o - - student - - Yes - - 5 - * - gemini-1.5-flash - - student - - No - - 1 - * - gpt-4o - - student - - No - - 3 + :header-rows: 1 + + * - model + - persona + - read + - important + * - gemini-1.5-flash + - student + - Yes + - 5 + * - gpt-4o + - student + - Yes + - 5 + * - gemini-1.5-flash + - student + - No + - 1 + * - gpt-4o + - student + - No + - 3 Dataframes -^^^^^^^^^^ +---------- We can also export results to other formats. The `to_pandas` method will turn our results into a Pandas dataframe: .. code-block:: python - results.to_pandas() + results.to_pandas() For example, here we use it to create a dataframe consisting of the models, personas and the answers to the `important` question: .. code-block:: python - results.to_pandas()[["model.model", "agent.persona", "answer.important"]] + results.to_pandas()[["model.model", "agent.persona", "answer.important"]] Exporting to CSV or JSON -^^^^^^^^^^^^^^^^^^^^^^^^ +------------------------ The `to_csv` method will write the results to a CSV file: .. code-block:: python - results.to_pandas().to_csv("results.csv") + results.to_pandas().to_csv("results.csv") The `to_json` method will write the results to a JSON file: .. code-block:: python - results.to_pandas().to_json("results.json") + results.to_pandas().to_json("results.json") Exceptions -^^^^^^^^^^ +---------- If any exceptions are raised when the survey is run a detailed exceptions report is generated and can be opened in your browser. See the :ref:`exceptions` section for more information on exceptions. From 8c06838b6618545bddbd545745752c1051620b7e Mon Sep 17 00:00:00 2001 From: John Horton Date: Wed, 22 Jan 2025 14:46:19 -0500 Subject: [PATCH 28/38] Bug fixes --- edsl/agents/QuestionTemplateReplacementsBuilder.py | 9 +++++++-- edsl/jobs/loggers/HTMLTableJobLogger.py | 7 ++++++- edsl/questions/descriptors.py | 2 +- edsl/results/Result.py | 3 +-- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/edsl/agents/QuestionTemplateReplacementsBuilder.py b/edsl/agents/QuestionTemplateReplacementsBuilder.py index 205b154c..e7ea9093 100644 --- a/edsl/agents/QuestionTemplateReplacementsBuilder.py +++ b/edsl/agents/QuestionTemplateReplacementsBuilder.py @@ -1,4 +1,4 @@ -from jinja2 import Environment, meta +from jinja2 import Environment, meta, TemplateSyntaxError from typing import Any, Set, TYPE_CHECKING if TYPE_CHECKING: @@ -29,7 +29,12 @@ def get_jinja2_variables(template_str: str) -> Set[str]: Set[str]: A set of variable names found in the template """ env = Environment() - ast = env.parse(template_str) + try: + ast = env.parse(template_str) + except TemplateSyntaxError: + print(f"Error parsing template: {template_str}") + raise + return meta.find_undeclared_variables(ast) @staticmethod diff --git a/edsl/jobs/loggers/HTMLTableJobLogger.py b/edsl/jobs/loggers/HTMLTableJobLogger.py index 16657339..4c43780d 100644 --- a/edsl/jobs/loggers/HTMLTableJobLogger.py +++ b/edsl/jobs/loggers/HTMLTableJobLogger.py @@ -9,7 +9,8 @@ class HTMLTableJobLogger(JobLogger): def __init__(self, verbose=True, theme="auto", **kwargs): super().__init__(verbose=verbose) - self.display_handle = display(HTML(""), display_id=True) + self.display_handle = display(HTML(""), display_id=True) if verbose else None + #self.display_handle = display(HTML(""), display_id=True) self.current_message = None self.log_id = str(uuid.uuid4()) self.is_expanded = True @@ -22,6 +23,9 @@ def __init__(self, verbose=True, theme="auto", **kwargs): def _init_css(self): """Initialize the CSS styles with enhanced theme support""" + if not self.verbose: + return None + css = """