FullFact · c-j-johnston · Dec 8, 2025 · Dec 2, 2025 · Dec 2, 2025 · Dec 3, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -57,7 +57,17 @@ jobs:
     needs: test
     runs-on: ubuntu-latest
     steps:
-      - name: Build & Release
-        uses: FullFact/ff_release@v2
+      - name: Bump version and push tag
+        id: tag_version
+        uses: mathieudutour/[email protected]
         with:
-          docker_build: false
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          release_branches: main
+          pre_release_branches: dev
+
+      - name: Create a GitHub release
+        uses: ncipollo/release-action@v1
+        with:
+          tag: ${{ steps.tag_version.outputs.new_tag }}
+          name: Release ${{ steps.tag_version.outputs.new_tag }}
+          body: ${{ steps.tag_version.outputs.changelog }}
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,7 @@ dependencies = [
     "scipy>=1.15.3",
     "scipy-stubs>=1.15.3.0",
     "types-requests>=2.32.4.20250809",
+    "tenacity>=9.1.2",
 ]
 
 [tool.uv.sources]

diff --git a/src/pastel/pastel.py b/src/pastel/pastel.py
@@ -6,13 +6,14 @@
 import json
 import logging
 from collections.abc import Callable
-from typing import Dict, Sequence, Tuple, TypeAlias
+from typing import Sequence, Tuple, TypeAlias
 
 import numpy as np
 import numpy.typing as npt
 import tenacity
 from genai_utils.gemini import run_prompt
 from google.api_core import exceptions as core_exceptions
+from pydantic import BaseModel
 
 from pastel import pastel_functions
 from pastel.models import Sentence
@@ -46,6 +47,15 @@ def feature_as_string(feature: FEATURE_TYPE) -> str:
     return str(feature)
 
 
+class ScoreAndAnswers(BaseModel):
+    """Used to parse scores for sentences and store the answers to
+    PASTEL questions."""
+
+    sentence: Sentence
+    score: float
+    answers: dict[FEATURE_TYPE, float]
+
+
 def log_retry_attempt(retry_state: tenacity.RetryCallState) -> None:
     """Log the retry attempt number and the exception that occurred."""
     if (not retry_state.outcome) or (not retry_state.next_action):
@@ -170,28 +180,28 @@ def get_functions(self) -> list[Callable[[Sentence], float]]:
     def make_prompt(self, sentence: Sentence) -> str:
         """Makes a prompt for a single given sentence."""
 
+        questions = self.get_questions()
+
         prompt = """
-    Your task is to answer a series of questions about a sentence. Ensure your answers are truthful and reliable.
-    You are expected to answer with ‘Yes’ or ‘No’ but you are also allowed to answer with ‘Unsure’ if you do not
-    have enough information or context to provide a reliable answer.
-    Your response should be limited to the question number and yes/no/unsure.
-    Example output:
-    0. Yes
-    1. Yes
-    2. No
+Your task is to answer a series of questions about a sentence. Ensure your answers are truthful and reliable.
+You are expected to answer with ‘Yes’ or ‘No’ but you are also allowed to answer with ‘Unsure’ if you do not
+have enough information or context to provide a reliable answer.
+Your response should be limited to the question number and yes/no/unsure.
+Example output:
+0. Yes
+1. Yes
+2. No
 
-    Here are the questions:
-    [QUESTIONS]
+Here are the questions:
+[QUESTIONS]
 
-    Here is the sentence: ```[SENT1]```
+Here is the sentence: ```[SENT1]```
 
-    """
+"""
         # extract the PastelFeatures whose type is string
         prompt = prompt.replace(
             "[QUESTIONS]",
-            "\n".join(
-                [f"Question {idx} {q}" for idx, q in enumerate(self.get_questions())]
-            ),
+            "\n".join([f"Question {idx} {q}" for idx, q in enumerate(questions)]),
         )
         prompt = prompt.replace("[SENT1]", sentence.sentence_text)
 
@@ -211,11 +221,11 @@ def _label_mapping(label: str) -> float:
         retry=tenacity.retry_if_exception_type(RETRYABLE_EXCEPTIONS),
         before=log_retry_attempt,
     )
-    async def _get_answers_for_single_sentence(
+    async def _get_llm_answers_for_single_sentence(
         self, sentence: Sentence
     ) -> dict[FEATURE_TYPE, float]:
-        sent_answers: Dict[FEATURE_TYPE, float] = {}
-        # First, get answers to all the questions from genAI:
+        """Runs all genAI questions on the given sentence."""
+        sent_answers: dict[FEATURE_TYPE, float] = {}
         prompt = self.make_prompt(sentence)
         raw_output = run_prompt(prompt)
         raw_output = raw_output.strip().lower()
@@ -233,16 +243,33 @@ async def _get_answers_for_single_sentence(
             raise ValueError(
                 f"Failed to parse output for the sentence: {sentence.sentence_text}. Output received: {output}"
             )
-        # Second, get values from the functions
+        return sent_answers
+
+    def _get_function_answers_for_single_sentence(
+        self, sentence: Sentence
+    ) -> dict[FEATURE_TYPE, float]:
+        """Runs all the functions in the model on the given sentence."""
+        sent_answers: dict[FEATURE_TYPE, float] = {}
         for f in self.get_functions():
             sent_answers[f] = f(sentence)
-
         return sent_answers
 
+    async def _get_answers_for_single_sentence(
+        self, sentence: Sentence
+    ) -> dict[FEATURE_TYPE, float]:
+        # First, get answers to all the questions from genAI:
+        llm_sent_answers = await self._get_llm_answers_for_single_sentence(sentence)
+
+        # Second, get values from the functions
+        function_sent_answers = self._get_function_answers_for_single_sentence(sentence)
+
+        return llm_sent_answers | function_sent_answers
+
     async def get_answers_to_questions(
         self, sentences: list[Sentence]
     ) -> dict[Sentence, dict[FEATURE_TYPE, float]]:
-        """Embed each example into the prompt and pass to genAI.
+        """Embed each example into the prompt and pass to genAI, then
+        get answers for non-genAI functions.
         For each sentence, this Returns a dictionary mapping features to scores."""
 
         jobs = [
@@ -299,21 +326,58 @@ def get_scores_from_answers(
         scores = X.dot(weights)
         return scores
 
-    def make_predictions(self, sentences: list[Sentence]) -> ARRAY_TYPE:
+    async def make_predictions(
+        self, sentences: list[Sentence]
+    ) -> dict[Sentence, ScoreAndAnswers]:
         """Use the Pastel questions and weights model to generate
-        a score for each of a list of sentences."""
-        answers = asyncio.run(self.get_answers_to_questions(sentences))
+        a score for each of a list of sentences. Return this along with
+        the questions and their scores."""
+        answers = await self.get_answers_to_questions(sentences)
         if answers:
             scores = self.get_scores_from_answers(list(answers.values()))
         else:
             scores = np.array([])
 
         scores_dict = {}
         for sentence, score in zip(answers.keys(), scores):
-            scores_dict[sentence] = float(score)
+            scores_dict[sentence.sentence_text] = float(score)
 
         for sentence in sentences:
-            if sentence not in scores_dict:
-                scores_dict[sentence] = 0.0
+            if sentence.sentence_text not in scores_dict:
+                scores_dict[sentence.sentence_text] = 0.0
+            if sentence not in answers.keys():
+                answers[sentence] = {}
 
-        return np.array([scores_dict[sentence] for sentence in sentences])
+        return {
+            sentence: ScoreAndAnswers(
+                sentence=sentence,
+                score=scores_dict[sentence.sentence_text],
+                answers=answers[sentence],
+            )
+            for sentence in sentences
+        }
+
+    def update_predictions(
+        self, sentences: list[Sentence], old_answers: list[dict[FEATURE_TYPE, float]]
+    ) -> dict[Sentence, ScoreAndAnswers]:
+        """Takes a list of sentences and their original LLM and function answers,
+        then re-runs the functions only and updates the scores with these new answers.
+        Returns ScoresAndAnswers for each sentence as before."""
+        new_answers = [
+            self._get_function_answers_for_single_sentence(sentence)
+            for sentence in sentences
+        ]
+        updated_answers = [old | new for old, new in zip(old_answers, new_answers)]
+        updated_scores = self.get_scores_from_answers(updated_answers)
+
+        updated_scores_and_answers = {
+            sentence: ScoreAndAnswers(
+                sentence=sentence,
+                score=score,
+                answers=answers,
+            )
+            for sentence, score, answers in zip(
+                sentences, updated_scores, updated_answers
+            )
+        }
+        return updated_scores_and_answers