Skip to content

Commit cde60d6

Browse files
committed
add additional aggregations
1 parent acb6f73 commit cde60d6

File tree

2 files changed

+223
-4
lines changed

2 files changed

+223
-4
lines changed

questions/types.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,13 @@ class AggregationMethod(models.TextChoices):
66
UNWEIGHTED = "unweighted"
77
SINGLE_AGGREGATION = "single_aggregation"
88
METACULUS_PREDICTION = "metaculus_prediction"
9+
MEDALISTS = "medalists"
10+
EXPERIENCED_USERS_25_RESOLVED = "experienced_users_25_resolved"
11+
IGNORANCE = "ignorance"
12+
RECENCY_WEIGHTED_LOG_ODDS = "recency_weighted_log_odds"
13+
RECENCY_WEIGHTED_MEAN_NO_OUTLIERS = "recency_weighted_mean_no_outliers"
14+
RECENCY_WEIGHTED_MEDALISTS = "recency_weighted_medalists"
15+
RECENCY_WEIGHTED_EXPERIENCED_USERS_25_RESOLVED = (
16+
"recency_weighted_experienced_users_25_resolved"
17+
)
18+
RECENCY_WEIGHTED_LOG_ODDS_NO_OUTLIERS = "recency_weighted_log_odds_no_outliers"

utils/the_math/aggregations.py

Lines changed: 213 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,22 @@
1212
from bisect import bisect_left, bisect_right
1313
from dataclasses import dataclass
1414
from collections import defaultdict
15-
from datetime import datetime, timedelta, timezone as dt_timezone
15+
from datetime import datetime, timedelta, timezone
1616
from typing import Sequence
1717

1818
import numpy as np
19-
from django.utils import timezone
20-
from django.db.models import Q, QuerySet
19+
from django.db.models import F, Q, QuerySet
2120

21+
from projects.permissions import ObjectPermission
2222
from questions.models import (
2323
QUESTION_CONTINUOUS_TYPES,
2424
Question,
2525
Forecast,
2626
AggregateForecast,
2727
)
2828
from questions.types import AggregationMethod
29-
from scoring.models import Score
29+
from scoring.models import Score, LeaderboardEntry
30+
from users.models import User
3031
from utils.the_math.measures import (
3132
weighted_percentile_2d,
3233
percent_point_function,
@@ -535,10 +536,218 @@ def get_weights(self, forecast_set: ForecastSet) -> np.ndarray | None:
535536
return weights if weights.size else None
536537

537538

539+
class MedalistsAggregation(ReputationWeighted, Aggregation):
540+
"""
541+
unweighted
542+
median
543+
only medalists
544+
"""
545+
546+
reputations: dict[int, list[Reputation]]
547+
question: Question
548+
method = AggregationMethod.MEDALISTS
549+
550+
def get_reputation_history(
551+
self, user_ids: list[int]
552+
) -> dict[int, list[Reputation]]:
553+
"""returns a dict reputations. Each one is a record of what a particular
554+
user's reputation was at a particular time.
555+
The reputation can change during the interval."""
556+
start = self.question.open_time
557+
end = self.question.scheduled_close_time
558+
if end is None:
559+
end = timezone.now()
560+
medals = (
561+
LeaderboardEntry.objects.filter(
562+
user_id__in=user_ids,
563+
medal__isnull=False,
564+
leaderboard__project__default_permission=ObjectPermission.FORECASTER,
565+
)
566+
.annotate(set_time=F("leaderboard__finalize_time"))
567+
.filter(set_time__lte=end)
568+
.order_by("set_time")
569+
)
570+
571+
# setup
572+
reputations: dict[int, list[Reputation]] = defaultdict(list)
573+
574+
# Establish initial reputations at the start of the interval.
575+
old_medals = list(medals.filter(set_time__lte=start).order_by("set_time"))
576+
for medal in old_medals:
577+
user_id = medal.user_id
578+
reputations[user_id] = [Reputation(user_id, 1, start)]
579+
for user_id in user_ids:
580+
if user_id not in reputations:
581+
reputations[user_id] = [Reputation(user_id, 0, start)]
582+
# Then, for each new medal, add a new reputation record
583+
new_medals = list(medals.filter(set_time__gt=start).order_by("set_time"))
584+
for medal in new_medals:
585+
user_id = medal.user_id
586+
if reputations[user_id][-1].value == 0:
587+
reputations[user_id].append(
588+
Reputation(user_id, 1, medal.edited_at or medal.created_at)
589+
)
590+
return reputations
591+
592+
593+
class Experienced25ResolvedAggregation(ReputationWeighted, Aggregation):
594+
"""
595+
unweighted
596+
median
597+
only forecasters with at least 25 Resolved
598+
"""
599+
600+
reputations: dict[int, list[Reputation]]
601+
question: Question
602+
method = AggregationMethod.MEDALISTS
603+
604+
def get_reputation_history(
605+
self, user_ids: list[int]
606+
) -> dict[int, list[Reputation]]:
607+
"""returns a dict reputations. Each one is a record of what a particular
608+
user's reputation was at a particular time.
609+
The reputation can change during the interval."""
610+
start = self.question.open_time
611+
end = self.question.scheduled_close_time
612+
if end is None:
613+
end = timezone.now()
614+
peer_scores = (
615+
Score.objects.filter(
616+
user_id__in=user_ids,
617+
score_type=Score.ScoreTypes.PEER,
618+
question__in=Question.objects.filter_public(),
619+
)
620+
.annotate(set_time=F("question__actual_resolve_time"))
621+
.order_by("set_time")
622+
.filter(set_time__lte=end)
623+
.distinct()
624+
)
625+
626+
# setup
627+
resolved_per_user: dict[int, int] = defaultdict(int)
628+
reputations: dict[int, list[Reputation]] = defaultdict(list)
629+
630+
# Establish reputations at the start of the interval.
631+
old_peer_scores = list(
632+
peer_scores.filter(set_time__lte=start).order_by("set_time")
633+
)
634+
for score in old_peer_scores:
635+
resolved_per_user[score.user_id] += 1
636+
for user_id in user_ids:
637+
reputations[user_id].append(
638+
Reputation(user_id, 1 if resolved_per_user[user_id] >= 25 else 0, start)
639+
)
640+
641+
# Then, for each new score, add a new reputation record
642+
new_peer_scores = list(
643+
peer_scores.filter(set_time__gt=start).order_by("set_time")
644+
)
645+
for score in new_peer_scores:
646+
# update the scores by user, then calculate the updated reputation
647+
resolved_per_user[score.user_id] += 1
648+
reputations[score.user_id].append(
649+
Reputation(
650+
score.user_id,
651+
1 if resolved_per_user[score.user_id] >= 25 else 0,
652+
score.set_time,
653+
)
654+
)
655+
return reputations
656+
657+
658+
class IgnoranceAggregation(Aggregation):
659+
"""
660+
always returns ignorance values
661+
"""
662+
663+
method = AggregationMethod.IGNORANCE
664+
665+
def __init__(self, *args, question_type: Question.QuestionType, **kwargs):
666+
super().__init__(*args, question_type=question_type, **kwargs)
667+
self.question_type = question_type
668+
669+
def calculate_forecast_values(
670+
self, forecast_set: ForecastSet, weights: np.ndarray | None = None
671+
) -> np.ndarray:
672+
values_count = len(forecast_set.forecasts_values[0])
673+
if self.question_type in QUESTION_CONTINUOUS_TYPES:
674+
# prediction is a CDF
675+
return np.linspace(0.05, 0.95, values_count)
676+
else:
677+
return np.ones_like(forecast_set.forecasts_values[0]) / values_count
678+
679+
680+
class RecencyWeightedLogOddsAggregation(
681+
LogOddsMeanValues, RecencyWeighted, Aggregation
682+
):
683+
"""
684+
recency weighted
685+
log odds mean
686+
"""
687+
688+
method = AggregationMethod.RECENCY_WEIGHTED_LOG_ODDS
689+
690+
691+
class RecencyWeightedMeanNoOutliersAggregation(
692+
NoOutliers, MeanValues, RecencyWeightedAggregation
693+
):
694+
"""
695+
recency weighted
696+
mean
697+
remove 10% outliers
698+
"""
699+
700+
method = AggregationMethod.RECENCY_WEIGHTED_MEAN_NO_OUTLIERS
701+
702+
703+
class RecencyWeightedMedalistsAggregation(RecencyWeighted, MedalistsAggregation):
704+
"""
705+
recency weighted
706+
median
707+
only medalists
708+
"""
709+
710+
method = AggregationMethod.RECENCY_WEIGHTED_MEDALISTS
711+
712+
713+
class RecencyWeightedExperienced25ResolvedAggregation(
714+
RecencyWeighted, Experienced25ResolvedAggregation
715+
):
716+
"""
717+
recency weighted
718+
median
719+
only forecasters with at least 25 Resolved
720+
"""
721+
722+
method = AggregationMethod.RECENCY_WEIGHTED_EXPERIENCED_USERS_25_RESOLVED
723+
724+
725+
class RecencyWeightedLogOddsNoOutliersAggregation(
726+
LogOddsMeanValues, RecencyWeightedMeanNoOutliersAggregation
727+
):
728+
"""
729+
recency weighted
730+
log odds mean
731+
remove 10% outliers
732+
"""
733+
734+
method = AggregationMethod.RECENCY_WEIGHTED_LOG_ODDS_NO_OUTLIERS
735+
736+
538737
aggregation_method_map: dict[AggregationMethod, type[Aggregation]] = {
539738
AggregationMethod.UNWEIGHTED: UnweightedAggregation,
540739
AggregationMethod.RECENCY_WEIGHTED: RecencyWeightedAggregation,
541740
AggregationMethod.SINGLE_AGGREGATION: SingleAggregation,
741+
AggregationMethod.MEDALISTS: MedalistsAggregation,
742+
AggregationMethod.EXPERIENCED_USERS_25_RESOLVED: Experienced25ResolvedAggregation,
743+
AggregationMethod.IGNORANCE: IgnoranceAggregation,
744+
AggregationMethod.RECENCY_WEIGHTED_LOG_ODDS: RecencyWeightedLogOddsAggregation,
745+
AggregationMethod.RECENCY_WEIGHTED_MEAN_NO_OUTLIERS: RecencyWeightedMeanNoOutliersAggregation,
746+
AggregationMethod.RECENCY_WEIGHTED_MEDALISTS: RecencyWeightedMedalistsAggregation,
747+
AggregationMethod.RECENCY_WEIGHTED_EXPERIENCED_USERS_25_RESOLVED: (
748+
RecencyWeightedExperienced25ResolvedAggregation
749+
),
750+
AggregationMethod.RECENCY_WEIGHTED_LOG_ODDS_NO_OUTLIERS: RecencyWeightedLogOddsNoOutliersAggregation,
542751
}
543752

544753

0 commit comments

Comments
 (0)