|
12 | 12 | from bisect import bisect_left, bisect_right
|
13 | 13 | from dataclasses import dataclass
|
14 | 14 | from collections import defaultdict
|
15 |
| -from datetime import datetime, timedelta, timezone as dt_timezone |
| 15 | +from datetime import datetime, timedelta, timezone |
16 | 16 | from typing import Sequence
|
17 | 17 |
|
18 | 18 | import numpy as np
|
19 |
| -from django.utils import timezone |
20 |
| -from django.db.models import Q, QuerySet |
| 19 | +from django.db.models import F, Q, QuerySet |
21 | 20 |
|
| 21 | +from projects.permissions import ObjectPermission |
22 | 22 | from questions.models import (
|
23 | 23 | QUESTION_CONTINUOUS_TYPES,
|
24 | 24 | Question,
|
25 | 25 | Forecast,
|
26 | 26 | AggregateForecast,
|
27 | 27 | )
|
28 | 28 | from questions.types import AggregationMethod
|
29 |
| -from scoring.models import Score |
| 29 | +from scoring.models import Score, LeaderboardEntry |
| 30 | +from users.models import User |
30 | 31 | from utils.the_math.measures import (
|
31 | 32 | weighted_percentile_2d,
|
32 | 33 | percent_point_function,
|
@@ -535,10 +536,218 @@ def get_weights(self, forecast_set: ForecastSet) -> np.ndarray | None:
|
535 | 536 | return weights if weights.size else None
|
536 | 537 |
|
537 | 538 |
|
| 539 | +class MedalistsAggregation(ReputationWeighted, Aggregation): |
| 540 | + """ |
| 541 | + unweighted |
| 542 | + median |
| 543 | + only medalists |
| 544 | + """ |
| 545 | + |
| 546 | + reputations: dict[int, list[Reputation]] |
| 547 | + question: Question |
| 548 | + method = AggregationMethod.MEDALISTS |
| 549 | + |
| 550 | + def get_reputation_history( |
| 551 | + self, user_ids: list[int] |
| 552 | + ) -> dict[int, list[Reputation]]: |
| 553 | + """returns a dict reputations. Each one is a record of what a particular |
| 554 | + user's reputation was at a particular time. |
| 555 | + The reputation can change during the interval.""" |
| 556 | + start = self.question.open_time |
| 557 | + end = self.question.scheduled_close_time |
| 558 | + if end is None: |
| 559 | + end = timezone.now() |
| 560 | + medals = ( |
| 561 | + LeaderboardEntry.objects.filter( |
| 562 | + user_id__in=user_ids, |
| 563 | + medal__isnull=False, |
| 564 | + leaderboard__project__default_permission=ObjectPermission.FORECASTER, |
| 565 | + ) |
| 566 | + .annotate(set_time=F("leaderboard__finalize_time")) |
| 567 | + .filter(set_time__lte=end) |
| 568 | + .order_by("set_time") |
| 569 | + ) |
| 570 | + |
| 571 | + # setup |
| 572 | + reputations: dict[int, list[Reputation]] = defaultdict(list) |
| 573 | + |
| 574 | + # Establish initial reputations at the start of the interval. |
| 575 | + old_medals = list(medals.filter(set_time__lte=start).order_by("set_time")) |
| 576 | + for medal in old_medals: |
| 577 | + user_id = medal.user_id |
| 578 | + reputations[user_id] = [Reputation(user_id, 1, start)] |
| 579 | + for user_id in user_ids: |
| 580 | + if user_id not in reputations: |
| 581 | + reputations[user_id] = [Reputation(user_id, 0, start)] |
| 582 | + # Then, for each new medal, add a new reputation record |
| 583 | + new_medals = list(medals.filter(set_time__gt=start).order_by("set_time")) |
| 584 | + for medal in new_medals: |
| 585 | + user_id = medal.user_id |
| 586 | + if reputations[user_id][-1].value == 0: |
| 587 | + reputations[user_id].append( |
| 588 | + Reputation(user_id, 1, medal.edited_at or medal.created_at) |
| 589 | + ) |
| 590 | + return reputations |
| 591 | + |
| 592 | + |
| 593 | +class Experienced25ResolvedAggregation(ReputationWeighted, Aggregation): |
| 594 | + """ |
| 595 | + unweighted |
| 596 | + median |
| 597 | + only forecasters with at least 25 Resolved |
| 598 | + """ |
| 599 | + |
| 600 | + reputations: dict[int, list[Reputation]] |
| 601 | + question: Question |
| 602 | + method = AggregationMethod.MEDALISTS |
| 603 | + |
| 604 | + def get_reputation_history( |
| 605 | + self, user_ids: list[int] |
| 606 | + ) -> dict[int, list[Reputation]]: |
| 607 | + """returns a dict reputations. Each one is a record of what a particular |
| 608 | + user's reputation was at a particular time. |
| 609 | + The reputation can change during the interval.""" |
| 610 | + start = self.question.open_time |
| 611 | + end = self.question.scheduled_close_time |
| 612 | + if end is None: |
| 613 | + end = timezone.now() |
| 614 | + peer_scores = ( |
| 615 | + Score.objects.filter( |
| 616 | + user_id__in=user_ids, |
| 617 | + score_type=Score.ScoreTypes.PEER, |
| 618 | + question__in=Question.objects.filter_public(), |
| 619 | + ) |
| 620 | + .annotate(set_time=F("question__actual_resolve_time")) |
| 621 | + .order_by("set_time") |
| 622 | + .filter(set_time__lte=end) |
| 623 | + .distinct() |
| 624 | + ) |
| 625 | + |
| 626 | + # setup |
| 627 | + resolved_per_user: dict[int, int] = defaultdict(int) |
| 628 | + reputations: dict[int, list[Reputation]] = defaultdict(list) |
| 629 | + |
| 630 | + # Establish reputations at the start of the interval. |
| 631 | + old_peer_scores = list( |
| 632 | + peer_scores.filter(set_time__lte=start).order_by("set_time") |
| 633 | + ) |
| 634 | + for score in old_peer_scores: |
| 635 | + resolved_per_user[score.user_id] += 1 |
| 636 | + for user_id in user_ids: |
| 637 | + reputations[user_id].append( |
| 638 | + Reputation(user_id, 1 if resolved_per_user[user_id] >= 25 else 0, start) |
| 639 | + ) |
| 640 | + |
| 641 | + # Then, for each new score, add a new reputation record |
| 642 | + new_peer_scores = list( |
| 643 | + peer_scores.filter(set_time__gt=start).order_by("set_time") |
| 644 | + ) |
| 645 | + for score in new_peer_scores: |
| 646 | + # update the scores by user, then calculate the updated reputation |
| 647 | + resolved_per_user[score.user_id] += 1 |
| 648 | + reputations[score.user_id].append( |
| 649 | + Reputation( |
| 650 | + score.user_id, |
| 651 | + 1 if resolved_per_user[score.user_id] >= 25 else 0, |
| 652 | + score.set_time, |
| 653 | + ) |
| 654 | + ) |
| 655 | + return reputations |
| 656 | + |
| 657 | + |
| 658 | +class IgnoranceAggregation(Aggregation): |
| 659 | + """ |
| 660 | + always returns ignorance values |
| 661 | + """ |
| 662 | + |
| 663 | + method = AggregationMethod.IGNORANCE |
| 664 | + |
| 665 | + def __init__(self, *args, question_type: Question.QuestionType, **kwargs): |
| 666 | + super().__init__(*args, question_type=question_type, **kwargs) |
| 667 | + self.question_type = question_type |
| 668 | + |
| 669 | + def calculate_forecast_values( |
| 670 | + self, forecast_set: ForecastSet, weights: np.ndarray | None = None |
| 671 | + ) -> np.ndarray: |
| 672 | + values_count = len(forecast_set.forecasts_values[0]) |
| 673 | + if self.question_type in QUESTION_CONTINUOUS_TYPES: |
| 674 | + # prediction is a CDF |
| 675 | + return np.linspace(0.05, 0.95, values_count) |
| 676 | + else: |
| 677 | + return np.ones_like(forecast_set.forecasts_values[0]) / values_count |
| 678 | + |
| 679 | + |
| 680 | +class RecencyWeightedLogOddsAggregation( |
| 681 | + LogOddsMeanValues, RecencyWeighted, Aggregation |
| 682 | +): |
| 683 | + """ |
| 684 | + recency weighted |
| 685 | + log odds mean |
| 686 | + """ |
| 687 | + |
| 688 | + method = AggregationMethod.RECENCY_WEIGHTED_LOG_ODDS |
| 689 | + |
| 690 | + |
| 691 | +class RecencyWeightedMeanNoOutliersAggregation( |
| 692 | + NoOutliers, MeanValues, RecencyWeightedAggregation |
| 693 | +): |
| 694 | + """ |
| 695 | + recency weighted |
| 696 | + mean |
| 697 | + remove 10% outliers |
| 698 | + """ |
| 699 | + |
| 700 | + method = AggregationMethod.RECENCY_WEIGHTED_MEAN_NO_OUTLIERS |
| 701 | + |
| 702 | + |
| 703 | +class RecencyWeightedMedalistsAggregation(RecencyWeighted, MedalistsAggregation): |
| 704 | + """ |
| 705 | + recency weighted |
| 706 | + median |
| 707 | + only medalists |
| 708 | + """ |
| 709 | + |
| 710 | + method = AggregationMethod.RECENCY_WEIGHTED_MEDALISTS |
| 711 | + |
| 712 | + |
| 713 | +class RecencyWeightedExperienced25ResolvedAggregation( |
| 714 | + RecencyWeighted, Experienced25ResolvedAggregation |
| 715 | +): |
| 716 | + """ |
| 717 | + recency weighted |
| 718 | + median |
| 719 | + only forecasters with at least 25 Resolved |
| 720 | + """ |
| 721 | + |
| 722 | + method = AggregationMethod.RECENCY_WEIGHTED_EXPERIENCED_USERS_25_RESOLVED |
| 723 | + |
| 724 | + |
| 725 | +class RecencyWeightedLogOddsNoOutliersAggregation( |
| 726 | + LogOddsMeanValues, RecencyWeightedMeanNoOutliersAggregation |
| 727 | +): |
| 728 | + """ |
| 729 | + recency weighted |
| 730 | + log odds mean |
| 731 | + remove 10% outliers |
| 732 | + """ |
| 733 | + |
| 734 | + method = AggregationMethod.RECENCY_WEIGHTED_LOG_ODDS_NO_OUTLIERS |
| 735 | + |
| 736 | + |
538 | 737 | aggregation_method_map: dict[AggregationMethod, type[Aggregation]] = {
|
539 | 738 | AggregationMethod.UNWEIGHTED: UnweightedAggregation,
|
540 | 739 | AggregationMethod.RECENCY_WEIGHTED: RecencyWeightedAggregation,
|
541 | 740 | AggregationMethod.SINGLE_AGGREGATION: SingleAggregation,
|
| 741 | + AggregationMethod.MEDALISTS: MedalistsAggregation, |
| 742 | + AggregationMethod.EXPERIENCED_USERS_25_RESOLVED: Experienced25ResolvedAggregation, |
| 743 | + AggregationMethod.IGNORANCE: IgnoranceAggregation, |
| 744 | + AggregationMethod.RECENCY_WEIGHTED_LOG_ODDS: RecencyWeightedLogOddsAggregation, |
| 745 | + AggregationMethod.RECENCY_WEIGHTED_MEAN_NO_OUTLIERS: RecencyWeightedMeanNoOutliersAggregation, |
| 746 | + AggregationMethod.RECENCY_WEIGHTED_MEDALISTS: RecencyWeightedMedalistsAggregation, |
| 747 | + AggregationMethod.RECENCY_WEIGHTED_EXPERIENCED_USERS_25_RESOLVED: ( |
| 748 | + RecencyWeightedExperienced25ResolvedAggregation |
| 749 | + ), |
| 750 | + AggregationMethod.RECENCY_WEIGHTED_LOG_ODDS_NO_OUTLIERS: RecencyWeightedLogOddsNoOutliersAggregation, |
542 | 751 | }
|
543 | 752 |
|
544 | 753 |
|
|
0 commit comments