FullFact · c-j-johnston · Dec 8, 2025 · Dec 2, 2025 · Dec 2, 2025 · Dec 3, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -57,7 +57,17 @@ jobs:
     needs: test
     runs-on: ubuntu-latest
     steps:
-      - name: Build & Release
-        uses: FullFact/ff_release@v2
+      - name: Bump version and push tag
+        id: tag_version
+        uses: mathieudutour/[email protected]
         with:
-          docker_build: false
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          release_branches: main
+          pre_release_branches: dev
+
+      - name: Create a GitHub release
+        uses: ncipollo/release-action@v1
+        with:
+          tag: ${{ steps.tag_version.outputs.new_tag }}
+          name: Release ${{ steps.tag_version.outputs.new_tag }}
+          body: ${{ steps.tag_version.outputs.changelog }}
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,7 @@ dependencies = [
     "scipy>=1.15.3",
     "scipy-stubs>=1.15.3.0",
     "types-requests>=2.32.4.20250809",
+    "tenacity>=9.1.2",
 ]
 
 [tool.uv.sources]

diff --git a/scripts/demo_pastel.py b/scripts/demo_pastel.py
@@ -2,9 +2,9 @@
 import json
 import tempfile
 
-from pastel.models import Sentence
+from pastel.models import BiasType, Sentence
 from pastel.optimise_weights import learn_weights
-from pastel.pastel import BiasType, Pastel
+from pastel.pastel import Pastel
 
 
 def demo_predict(pasteliser: Pastel) -> None:

diff --git a/src/pastel/models.py b/src/pastel/models.py
@@ -1,5 +1,15 @@
+import enum
+from collections.abc import Callable
 from dataclasses import dataclass
-from typing import Tuple
+from typing import Tuple, TypeAlias
+
+from pydantic import BaseModel
+
+
+class BiasType(enum.Enum):
+    """Used as the key for the bias term in Pastel models"""
+
+    BIAS = "BIAS"
 
 
 @dataclass(frozen=True)
@@ -8,3 +18,15 @@ class Sentence:
 
     sentence_text: str
     claim_type: Tuple[str, ...] = ()
+
+
+FEATURE_TYPE: TypeAlias = Callable[[Sentence], float] | str | BiasType
+
+
+class ScoreAndAnswers(BaseModel):
+    """Used to parse scores for sentences and store the answers to
+    PASTEL questions."""
+
+    sentence: Sentence
+    score: float
+    answers: dict[FEATURE_TYPE, float]
diff --git a/src/pastel/pastel.py b/src/pastel/pastel.py
@@ -2,11 +2,10 @@
 # See paper: https://arxiv.org/abs/2309.07601v3 "Weakly Supervised Veracity Classification with LLM-Predicted Credibility Signals"
 
 import asyncio
-import enum
 import json
 import logging
 from collections.abc import Callable
-from typing import Dict, Sequence, Tuple, TypeAlias
+from typing import Sequence, Tuple, TypeAlias
 
 import numpy as np
 import numpy.typing as npt
@@ -15,7 +14,7 @@
 from google.api_core import exceptions as core_exceptions
 
 from pastel import pastel_functions
-from pastel.models import Sentence
+from pastel.models import FEATURE_TYPE, BiasType, ScoreAndAnswers, Sentence
 
 _logger = logging.getLogger(__name__)
 
@@ -31,15 +30,6 @@
 )
 
 
-class BiasType(enum.Enum):
-    """Used as the key for the bias term in Pastel models"""
-
-    BIAS = "BIAS"
-
-
-FEATURE_TYPE: TypeAlias = Callable[[Sentence], float] | str | BiasType
-
-
 def feature_as_string(feature: FEATURE_TYPE) -> str:
     if callable(feature):
         return feature.__name__
@@ -170,28 +160,28 @@ def get_functions(self) -> list[Callable[[Sentence], float]]:
     def make_prompt(self, sentence: Sentence) -> str:
         """Makes a prompt for a single given sentence."""
 
+        questions = self.get_questions()
+
         prompt = """
-    Your task is to answer a series of questions about a sentence. Ensure your answers are truthful and reliable.
-    You are expected to answer with ‘Yes’ or ‘No’ but you are also allowed to answer with ‘Unsure’ if you do not
-    have enough information or context to provide a reliable answer.
-    Your response should be limited to the question number and yes/no/unsure.
-    Example output:
-    0. Yes
-    1. Yes
-    2. No
+Your task is to answer a series of questions about a sentence. Ensure your answers are truthful and reliable.
+You are expected to answer with ‘Yes’ or ‘No’ but you are also allowed to answer with ‘Unsure’ if you do not
+have enough information or context to provide a reliable answer.
+Your response should be limited to the question number and yes/no/unsure.
+Example output:
+0. Yes
+1. Yes
+2. No
 
-    Here are the questions:
-    [QUESTIONS]
+Here are the questions:
+[QUESTIONS]
 
-    Here is the sentence: ```[SENT1]```
+Here is the sentence: ```[SENT1]```
 
-    """
+"""
         # extract the PastelFeatures whose type is string
         prompt = prompt.replace(
             "[QUESTIONS]",
-            "\n".join(
-                [f"Question {idx} {q}" for idx, q in enumerate(self.get_questions())]
-            ),
+            "\n".join([f"Question {idx} {q}" for idx, q in enumerate(questions)]),
         )
         prompt = prompt.replace("[SENT1]", sentence.sentence_text)
 
@@ -211,11 +201,11 @@ def _label_mapping(label: str) -> float:
         retry=tenacity.retry_if_exception_type(RETRYABLE_EXCEPTIONS),
         before=log_retry_attempt,
     )
-    async def _get_answers_for_single_sentence(
+    async def _get_llm_answers_for_single_sentence(
         self, sentence: Sentence
     ) -> dict[FEATURE_TYPE, float]:
-        sent_answers: Dict[FEATURE_TYPE, float] = {}
-        # First, get answers to all the questions from genAI:
+        """Runs all genAI questions on the given sentence."""
+        sent_answers: dict[FEATURE_TYPE, float] = {}
         prompt = self.make_prompt(sentence)
         raw_output = run_prompt(prompt)
         raw_output = raw_output.strip().lower()
@@ -233,16 +223,33 @@ async def _get_answers_for_single_sentence(
             raise ValueError(
                 f"Failed to parse output for the sentence: {sentence.sentence_text}. Output received: {output}"
             )
-        # Second, get values from the functions
+        return sent_answers
+
+    def _get_function_answers_for_single_sentence(
+        self, sentence: Sentence
+    ) -> dict[FEATURE_TYPE, float]:
+        """Runs all the functions in the model on the given sentence."""
+        sent_answers: dict[FEATURE_TYPE, float] = {}
         for f in self.get_functions():
             sent_answers[f] = f(sentence)
-
         return sent_answers
 
+    async def _get_answers_for_single_sentence(
+        self, sentence: Sentence
+    ) -> dict[FEATURE_TYPE, float]:
+        # First, get answers to all the questions from genAI:
+        llm_sent_answers = await self._get_llm_answers_for_single_sentence(sentence)
+
+        # Second, get values from the functions
+        function_sent_answers = self._get_function_answers_for_single_sentence(sentence)
+
+        return llm_sent_answers | function_sent_answers
+
     async def get_answers_to_questions(
         self, sentences: list[Sentence]
     ) -> dict[Sentence, dict[FEATURE_TYPE, float]]:
-        """Embed each example into the prompt and pass to genAI.
+        """Embed each example into the prompt and pass to genAI, then
+        get answers for non-genAI functions.
         For each sentence, this Returns a dictionary mapping features to scores."""
 
         jobs = [
@@ -299,21 +306,58 @@ def get_scores_from_answers(
         scores = X.dot(weights)
         return scores
 
-    def make_predictions(self, sentences: list[Sentence]) -> ARRAY_TYPE:
+    async def make_predictions(
+        self, sentences: list[Sentence]
+    ) -> dict[Sentence, ScoreAndAnswers]:
         """Use the Pastel questions and weights model to generate
-        a score for each of a list of sentences."""
-        answers = asyncio.run(self.get_answers_to_questions(sentences))
+        a score for each of a list of sentences. Return this along with
+        the questions and their scores."""
+        answers = await self.get_answers_to_questions(sentences)
         if answers:
             scores = self.get_scores_from_answers(list(answers.values()))
         else:
             scores = np.array([])
 
         scores_dict = {}
         for sentence, score in zip(answers.keys(), scores):
-            scores_dict[sentence] = float(score)
+            scores_dict[sentence.sentence_text] = float(score)
 
         for sentence in sentences:
-            if sentence not in scores_dict:
-                scores_dict[sentence] = 0.0
+            if sentence.sentence_text not in scores_dict:
+                scores_dict[sentence.sentence_text] = 0.0
+            if sentence not in answers.keys():
+                answers[sentence] = {}
 
-        return np.array([scores_dict[sentence] for sentence in sentences])
+        return {
+            sentence: ScoreAndAnswers(
+                sentence=sentence,
+                score=scores_dict[sentence.sentence_text],
+                answers=answers[sentence],
+            )
+            for sentence in sentences
+        }
+
+    def update_predictions(
+        self, sentences: list[Sentence], old_answers: list[dict[FEATURE_TYPE, float]]
+    ) -> dict[Sentence, ScoreAndAnswers]:
+        """Takes a list of sentences and their original LLM and function answers,
+        then re-runs the functions only and updates the scores with these new answers.
+        Returns ScoresAndAnswers for each sentence as before."""
+        new_answers = [
+            self._get_function_answers_for_single_sentence(sentence)
+            for sentence in sentences
+        ]
+        updated_answers = [old | new for old, new in zip(old_answers, new_answers)]
+        updated_scores = self.get_scores_from_answers(updated_answers)
+
+        updated_scores_and_answers = {
+            sentence: ScoreAndAnswers(
+                sentence=sentence,
+                score=score,
+                answers=answers,
+            )
+            for sentence, score, answers in zip(
+                sentences, updated_scores, updated_answers
+            )
+        }
+        return updated_scores_and_answers
diff --git a/src/training/beam_search.py b/src/training/beam_search.py
@@ -10,8 +10,9 @@
 import numpy as np
 from sklearn.model_selection import train_test_split  # type: ignore
 
+from pastel.models import FEATURE_TYPE, BiasType
 from pastel.optimise_weights import lin_reg
-from pastel.pastel import EXAMPLES_TYPE, FEATURE_TYPE, BiasType, Pastel
+from pastel.pastel import EXAMPLES_TYPE, Pastel
 from training.cached_pastel import CachedPastel
 from training.crossvalidate_pastel import (
     evaluate_model,

diff --git a/src/training/cached_pastel.py b/src/training/cached_pastel.py
@@ -5,8 +5,8 @@
 import logging
 from typing import List, Optional, Set, Tuple
 
-from pastel.models import Sentence
-from pastel.pastel import ARRAY_TYPE, FEATURE_TYPE, BiasType, Pastel, feature_as_string
+from pastel.models import FEATURE_TYPE, BiasType, Sentence
+from pastel.pastel import ARRAY_TYPE, Pastel, feature_as_string
 from training.db_manager import DatabaseManager
 
 _logger = logging.getLogger(__name__)

diff --git a/src/training/crossvalidate_pastel.py b/src/training/crossvalidate_pastel.py
@@ -15,9 +15,9 @@
 from sklearn.metrics import f1_score, precision_score, recall_score  # type: ignore
 from sklearn.model_selection import train_test_split  # type: ignore
 
-from pastel.models import Sentence
+from pastel.models import FEATURE_TYPE, BiasType, Sentence
 from pastel.optimise_weights import lin_reg
-from pastel.pastel import EXAMPLES_TYPE, FEATURE_TYPE, BiasType, Pastel
+from pastel.pastel import EXAMPLES_TYPE, Pastel
 from training.cached_pastel import CachedPastel
 
 

diff --git a/tests/pastel/test_beam_search.py b/tests/pastel/test_beam_search.py
@@ -1,6 +1,7 @@
 from unittest.mock import Mock, patch
 
-from pastel.pastel import BiasType, Pastel
+from pastel.models import BiasType
+from pastel.pastel import Pastel
 from training.beam_search import add_one, run_beam_search
 
 

diff --git a/tests/pastel/test_cached_pastel.py b/tests/pastel/test_cached_pastel.py
@@ -1,7 +1,7 @@
 import numpy as np
 
-from pastel.models import Sentence
-from pastel.pastel import FEATURE_TYPE, BiasType, Pastel
+from pastel.models import FEATURE_TYPE, BiasType, Sentence
+from pastel.pastel import Pastel
 from training.cached_pastel import CachedPastel
 
 Q1 = "Is the statement factual?"