Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,17 @@ jobs:
needs: test
runs-on: ubuntu-latest
steps:
- name: Build & Release
uses: FullFact/ff_release@v2
- name: Bump version and push tag
id: tag_version
uses: mathieudutour/[email protected]
with:
docker_build: false
github_token: ${{ secrets.GITHUB_TOKEN }}
release_branches: main
pre_release_branches: dev

- name: Create a GitHub release
uses: ncipollo/release-action@v1
with:
tag: ${{ steps.tag_version.outputs.new_tag }}
name: Release ${{ steps.tag_version.outputs.new_tag }}
body: ${{ steps.tag_version.outputs.changelog }}
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ dependencies = [
"scipy>=1.15.3",
"scipy-stubs>=1.15.3.0",
"types-requests>=2.32.4.20250809",
"tenacity>=9.1.2",
]

[tool.uv.sources]
Expand Down
4 changes: 2 additions & 2 deletions scripts/demo_pastel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import json
import tempfile

from pastel.models import Sentence
from pastel.models import BiasType, Sentence
from pastel.optimise_weights import learn_weights
from pastel.pastel import BiasType, Pastel
from pastel.pastel import Pastel


def demo_predict(pasteliser: Pastel) -> None:
Expand Down
24 changes: 23 additions & 1 deletion src/pastel/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
import enum
from collections.abc import Callable
from dataclasses import dataclass
from typing import Tuple
from typing import Tuple, TypeAlias

from pydantic import BaseModel


class BiasType(enum.Enum):
"""Used as the key for the bias term in Pastel models"""

BIAS = "BIAS"


@dataclass(frozen=True)
Expand All @@ -8,3 +18,15 @@ class Sentence:

sentence_text: str
claim_type: Tuple[str, ...] = ()


FEATURE_TYPE: TypeAlias = Callable[[Sentence], float] | str | BiasType


class ScoreAndAnswers(BaseModel):
"""Used to parse scores for sentences and store the answers to
PASTEL questions."""

sentence: Sentence
score: float
answers: dict[FEATURE_TYPE, float]
124 changes: 84 additions & 40 deletions src/pastel/pastel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
# See paper: https://arxiv.org/abs/2309.07601v3 "Weakly Supervised Veracity Classification with LLM-Predicted Credibility Signals"

import asyncio
import enum
import json
import logging
from collections.abc import Callable
from typing import Dict, Sequence, Tuple, TypeAlias
from typing import Sequence, Tuple, TypeAlias

import numpy as np
import numpy.typing as npt
Expand All @@ -15,7 +14,7 @@
from google.api_core import exceptions as core_exceptions

from pastel import pastel_functions
from pastel.models import Sentence
from pastel.models import FEATURE_TYPE, BiasType, ScoreAndAnswers, Sentence

_logger = logging.getLogger(__name__)

Expand All @@ -31,15 +30,6 @@
)


class BiasType(enum.Enum):
"""Used as the key for the bias term in Pastel models"""

BIAS = "BIAS"


FEATURE_TYPE: TypeAlias = Callable[[Sentence], float] | str | BiasType


def feature_as_string(feature: FEATURE_TYPE) -> str:
if callable(feature):
return feature.__name__
Expand Down Expand Up @@ -170,28 +160,28 @@ def get_functions(self) -> list[Callable[[Sentence], float]]:
def make_prompt(self, sentence: Sentence) -> str:
"""Makes a prompt for a single given sentence."""

questions = self.get_questions()

prompt = """
Your task is to answer a series of questions about a sentence. Ensure your answers are truthful and reliable.
You are expected to answer with ‘Yes’ or ‘No’ but you are also allowed to answer with ‘Unsure’ if you do not
have enough information or context to provide a reliable answer.
Your response should be limited to the question number and yes/no/unsure.
Example output:
0. Yes
1. Yes
2. No
Your task is to answer a series of questions about a sentence. Ensure your answers are truthful and reliable.
You are expected to answer with ‘Yes’ or ‘No’ but you are also allowed to answer with ‘Unsure’ if you do not
have enough information or context to provide a reliable answer.
Your response should be limited to the question number and yes/no/unsure.
Example output:
0. Yes
1. Yes
2. No

Here are the questions:
[QUESTIONS]
Here are the questions:
[QUESTIONS]

Here is the sentence: ```[SENT1]```
Here is the sentence: ```[SENT1]```

"""
"""
# extract the PastelFeatures whose type is string
prompt = prompt.replace(
"[QUESTIONS]",
"\n".join(
[f"Question {idx} {q}" for idx, q in enumerate(self.get_questions())]
),
"\n".join([f"Question {idx} {q}" for idx, q in enumerate(questions)]),
)
prompt = prompt.replace("[SENT1]", sentence.sentence_text)

Expand All @@ -211,11 +201,11 @@ def _label_mapping(label: str) -> float:
retry=tenacity.retry_if_exception_type(RETRYABLE_EXCEPTIONS),
before=log_retry_attempt,
)
async def _get_answers_for_single_sentence(
async def _get_llm_answers_for_single_sentence(
self, sentence: Sentence
) -> dict[FEATURE_TYPE, float]:
sent_answers: Dict[FEATURE_TYPE, float] = {}
# First, get answers to all the questions from genAI:
"""Runs all genAI questions on the given sentence."""
sent_answers: dict[FEATURE_TYPE, float] = {}
prompt = self.make_prompt(sentence)
raw_output = run_prompt(prompt)
raw_output = raw_output.strip().lower()
Expand All @@ -233,16 +223,33 @@ async def _get_answers_for_single_sentence(
raise ValueError(
f"Failed to parse output for the sentence: {sentence.sentence_text}. Output received: {output}"
)
# Second, get values from the functions
return sent_answers

def _get_function_answers_for_single_sentence(
self, sentence: Sentence
) -> dict[FEATURE_TYPE, float]:
"""Runs all the functions in the model on the given sentence."""
sent_answers: dict[FEATURE_TYPE, float] = {}
for f in self.get_functions():
sent_answers[f] = f(sentence)

return sent_answers

async def _get_answers_for_single_sentence(
self, sentence: Sentence
) -> dict[FEATURE_TYPE, float]:
# First, get answers to all the questions from genAI:
llm_sent_answers = await self._get_llm_answers_for_single_sentence(sentence)

# Second, get values from the functions
function_sent_answers = self._get_function_answers_for_single_sentence(sentence)

return llm_sent_answers | function_sent_answers

async def get_answers_to_questions(
self, sentences: list[Sentence]
) -> dict[Sentence, dict[FEATURE_TYPE, float]]:
"""Embed each example into the prompt and pass to genAI.
"""Embed each example into the prompt and pass to genAI, then
get answers for non-genAI functions.
For each sentence, this Returns a dictionary mapping features to scores."""

jobs = [
Expand Down Expand Up @@ -299,21 +306,58 @@ def get_scores_from_answers(
scores = X.dot(weights)
return scores

def make_predictions(self, sentences: list[Sentence]) -> ARRAY_TYPE:
async def make_predictions(
self, sentences: list[Sentence]
) -> dict[Sentence, ScoreAndAnswers]:
"""Use the Pastel questions and weights model to generate
a score for each of a list of sentences."""
answers = asyncio.run(self.get_answers_to_questions(sentences))
a score for each of a list of sentences. Return this along with
the questions and their scores."""
answers = await self.get_answers_to_questions(sentences)
if answers:
scores = self.get_scores_from_answers(list(answers.values()))
else:
scores = np.array([])

scores_dict = {}
for sentence, score in zip(answers.keys(), scores):
scores_dict[sentence] = float(score)
scores_dict[sentence.sentence_text] = float(score)

for sentence in sentences:
if sentence not in scores_dict:
scores_dict[sentence] = 0.0
if sentence.sentence_text not in scores_dict:
scores_dict[sentence.sentence_text] = 0.0
if sentence not in answers.keys():
answers[sentence] = {}

return np.array([scores_dict[sentence] for sentence in sentences])
return {
sentence: ScoreAndAnswers(
sentence=sentence,
score=scores_dict[sentence.sentence_text],
answers=answers[sentence],
)
for sentence in sentences
}

def update_predictions(
self, sentences: list[Sentence], old_answers: list[dict[FEATURE_TYPE, float]]
) -> dict[Sentence, ScoreAndAnswers]:
"""Takes a list of sentences and their original LLM and function answers,
then re-runs the functions only and updates the scores with these new answers.
Returns ScoresAndAnswers for each sentence as before."""
new_answers = [
self._get_function_answers_for_single_sentence(sentence)
for sentence in sentences
]
updated_answers = [old | new for old, new in zip(old_answers, new_answers)]
updated_scores = self.get_scores_from_answers(updated_answers)

updated_scores_and_answers = {
sentence: ScoreAndAnswers(
sentence=sentence,
score=score,
answers=answers,
)
for sentence, score, answers in zip(
sentences, updated_scores, updated_answers
)
}
return updated_scores_and_answers
3 changes: 2 additions & 1 deletion src/training/beam_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
import numpy as np
from sklearn.model_selection import train_test_split # type: ignore

from pastel.models import FEATURE_TYPE, BiasType
from pastel.optimise_weights import lin_reg
from pastel.pastel import EXAMPLES_TYPE, FEATURE_TYPE, BiasType, Pastel
from pastel.pastel import EXAMPLES_TYPE, Pastel
from training.cached_pastel import CachedPastel
from training.crossvalidate_pastel import (
evaluate_model,
Expand Down
4 changes: 2 additions & 2 deletions src/training/cached_pastel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import logging
from typing import List, Optional, Set, Tuple

from pastel.models import Sentence
from pastel.pastel import ARRAY_TYPE, FEATURE_TYPE, BiasType, Pastel, feature_as_string
from pastel.models import FEATURE_TYPE, BiasType, Sentence
from pastel.pastel import ARRAY_TYPE, Pastel, feature_as_string
from training.db_manager import DatabaseManager

_logger = logging.getLogger(__name__)
Expand Down
4 changes: 2 additions & 2 deletions src/training/crossvalidate_pastel.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
from sklearn.metrics import f1_score, precision_score, recall_score # type: ignore
from sklearn.model_selection import train_test_split # type: ignore

from pastel.models import Sentence
from pastel.models import FEATURE_TYPE, BiasType, Sentence
from pastel.optimise_weights import lin_reg
from pastel.pastel import EXAMPLES_TYPE, FEATURE_TYPE, BiasType, Pastel
from pastel.pastel import EXAMPLES_TYPE, Pastel
from training.cached_pastel import CachedPastel


Expand Down
3 changes: 2 additions & 1 deletion tests/pastel/test_beam_search.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from unittest.mock import Mock, patch

from pastel.pastel import BiasType, Pastel
from pastel.models import BiasType
from pastel.pastel import Pastel
from training.beam_search import add_one, run_beam_search


Expand Down
4 changes: 2 additions & 2 deletions tests/pastel/test_cached_pastel.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np

from pastel.models import Sentence
from pastel.pastel import FEATURE_TYPE, BiasType, Pastel
from pastel.models import FEATURE_TYPE, BiasType, Sentence
from pastel.pastel import Pastel
from training.cached_pastel import CachedPastel

Q1 = "Is the statement factual?"
Expand Down
Loading