Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion surveyequivalence/equivalence.py
Original file line number Diff line number Diff line change
Expand Up @@ -1406,7 +1406,7 @@ def xtick_formatter(x, pos):
# add the right y-axis for performance_ratio
if self.performance_ratio_k is not None and include_performance_ratio:
scale = self.expert_power_curve.values[self.performance_ratio_k]-self.expert_power_curve.values[0]
regular_ticks = [i/2 for i in range(0, math.ceil(self.ymax/scale)*2+2)]
regular_ticks = [i/2 for i in range(0, math.ceil(self.ymax/scale)*2+1)]
ticks_to_use = list(set(regular_ticks) - set([nearest_tick(regular_ticks, y) for y in self.performance_ratio_intercepts]))
y_ticks = sorted(ticks_to_use+self.performance_ratio_intercepts)
ax2.set_yticks(y_ticks)
Expand Down
87 changes: 78 additions & 9 deletions surveyequivalence/examples/paper_running_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from surveyequivalence import AgreementScore, PluralityVote, CrossEntropyScore, \
AnonymousBayesianCombiner, FrequencyCombiner, \
AnalysisPipeline, Plot, ClassifierResults, DiscretePrediction, DiscreteDistributionPrediction, DMIScore_for_Soft_Classifier
from surveyequivalence import F1Score

def main(path = f'data/running_example_50_items', num_bootstrap_item_samples=5, nrows=None):

Expand Down Expand Up @@ -82,7 +83,9 @@ def str2prediction_instance(s):
# )
# pl.save(pipeline.path_for_saving("running_example/plurality_plus_agreement"), fig=fig)

def ABC_CE(target_panel_size=1):
saved_data = []

def ABC_CE(target_panel_size=1,item_size=None):
abc = AnonymousBayesianCombiner(allowable_labels=['pos', 'neg'],W=W)
cross_entropy = CrossEntropyScore(num_ref_raters_per_virtual_rater=target_panel_size)
# Here we set anonymous_raters to True, so that we will compute expected score against a randomly selected
Expand All @@ -97,6 +100,7 @@ def ABC_CE(target_panel_size=1):
anonymous_raters=True,
performance_ratio_k = 2,
verbosity = 1,
max_K = 10-target_panel_size+1,
procs=1)

pipeline2.save(path=pipeline2.path_for_saving("running_example/abc_plus_cross_entropy_target_panel_size_"+str(target_panel_size)),
Expand All @@ -119,7 +123,7 @@ def ABC_CE(target_panel_size=1):
name='running example: ABC + cross entropy',
legend_label='k raters',
generate_pgf=True,
performance_ratio_k=3,
performance_ratio_k=2,
)

pl.plot(include_classifiers=True,
Expand All @@ -131,8 +135,10 @@ def ABC_CE(target_panel_size=1):
)
pl.save(path=pipeline2.path_for_saving("running_example/abc_plus_cross_entropy_target_panel_size_"+str(target_panel_size)), fig=fig)

saved_data.append(['ABC',"CE",target_panel_size,item_size,pl])


def ABC_DMI(target_panel_size=1):
def ABC_DMI(target_panel_size=1,item_size=None):
# abc+dmi
abc = AnonymousBayesianCombiner(allowable_labels=['pos', 'neg'],W=W)
dmi = DMIScore_for_Soft_Classifier(num_ref_raters_per_virtual_rater=target_panel_size)
Expand All @@ -148,6 +154,7 @@ def ABC_DMI(target_panel_size=1):
anonymous_raters=True,
verbosity = 1,
performance_ratio_k = 2,
max_K = 10-target_panel_size+1,
procs=1)

pipeline4.save(path=pipeline4.path_for_saving("running_example/abc_plus_dmi_target_panel_size_"+str(target_panel_size)),
Expand All @@ -166,7 +173,7 @@ def ABC_DMI(target_panel_size=1):
color_map=color_map,
y_axis_label='information gain ($c_k - c_0$)',
center_on=pipeline4.expert_power_curve.values[0],
y_range=(0, 0.4),
y_range=(0, 0.2),
name='running example: ABC + dmi',
legend_label='k raters',
generate_pgf=True,
Expand All @@ -182,16 +189,72 @@ def ABC_DMI(target_panel_size=1):
)
pl.save(path=pipeline4.path_for_saving("running_example/abc_plus_dmi_target_panel_size_"+str(target_panel_size)), fig=fig)

for i in range(1):
saved_data.append(['ABC',"DMI",target_panel_size,item_size,pl])


def ABC_F1(target_panel_size=1,item_size=None):
# abc+dmi
abc = AnonymousBayesianCombiner(allowable_labels=['pos', 'neg'],W=W)
f1 = F1Score(num_ref_raters_per_virtual_rater=target_panel_size)
# Here we set anonymous_raters to True, so that we will compute expected score against a randomly selected
# rater for each item, rather than against a randomly selected column
pipeline5 = AnalysisPipeline(W,
expert_cols=list(W.columns),
classifier_predictions=classifier_predictions[soft_classifiers],
combiner=abc,
scorer=f1,
allowable_labels=['pos', 'neg'],
num_bootstrap_item_samples=num_bootstrap_item_samples,
anonymous_raters=True,
verbosity = 1,
performance_ratio_k = 2,
max_K = 10-target_panel_size+1,
procs=1)

pipeline5.save(path=pipeline5.path_for_saving("running_example/abc_plus_f1_target_panel_size_"+str(target_panel_size)),
msg = f"""
Running example with {len(W)} items and {len(W.columns)} raters per item
{num_bootstrap_item_samples} bootstrap itemsets
Anonymous Bayesian combiner with cross entropy score
""")

fig, ax = plt.subplots()
fig.set_size_inches(8.5, 10.5)

pl = Plot(ax,
pipeline5.expert_power_curve,
classifier_scores=ClassifierResults(pipeline5.classifier_scores.df[['calibrated hard classifier']]),
color_map=color_map,
y_axis_label='information gain ($c_k - c_0$)',
center_on=pipeline5.expert_power_curve.values[0],
y_range=(0, 0.2),
name='running example: ABC + F1',
legend_label='k raters',
generate_pgf=True,
performance_ratio_k=2
)

pl.plot(include_classifiers=True,
include_classifier_equivalences=True,
include_droplines=True,
include_expert_points='all',
connect_expert_points=True,
include_classifier_cis=True
)
pl.save(path=pipeline4.path_for_saving("running_example/abc_plus_f1_target_panel_size_"+str(target_panel_size)), fig=fig)

for i in range(3):
t0=time.time()
ABC_CE(i*2+1)
ABC_CE(i*2+1,nrows)
t1=time.time()
print("running time: ",t1-t0)
for i in range(0):
for i in range(3):
t0=time.time()
ABC_DMI(i*2+1)
ABC_DMI(i*2+1,nrows)
t1=time.time()
print("running time: ",t1-t0)

return saved_data

# ###### Frequency combiner plus cross entropy ######
# freq_combiner = FrequencyCombiner(allowable_labels=['pos', 'neg'])
Expand All @@ -213,4 +276,10 @@ def ABC_DMI(target_panel_size=1):


if __name__ == '__main__':
main(path = '../data/running_example', nrows=200)

nrow = [100]
data = []
for i in nrow:
data = data + main(path = '../data/running_example', nrows=i)

df = pd.DataFrame(data,columns=['Combiner','Scorer','Target Panel Size','Item Size','Plot'],dtype=float)
9 changes: 5 additions & 4 deletions surveyequivalence/scoring_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,11 @@ def expected_score_anonymous_raters(self,
# create a bunch of virtual raters (samples)
# for each virtual rater, pick a random combination randomly selected set of num_ref_raters_per_virtual_rater non-null ratings for each column

W_np = W.to_numpy()

virtual_raters_collection = []
if ref_rater_combiner=="majority_vote":
for _, virtual_rater_i in W.iterrows():
vals = virtual_rater_i.dropna().values
for vals in W_np:
if len(vals) > 0:
ratings_for_i = []
num = min(len(vals),num_ref_raters_per_virtual_rater)
Expand Down Expand Up @@ -799,8 +800,8 @@ def score(classifier_predictions: Sequence[DiscreteDistributionPrediction],


class F1Score(Scorer):
def __init__(self):
super().__init__()
def __init__(self, num_virtual_raters=100, num_ref_raters_per_virtual_rater=1, ref_rater_combiner="majority_vote", verbosity=0):
super().__init__(num_virtual_raters, num_ref_raters_per_virtual_rater, ref_rater_combiner, verbosity)

@staticmethod
def score(classifier_predictions: Sequence[DiscreteDistributionPrediction],
Expand Down
3 changes: 2 additions & 1 deletion surveyequivalence/templates/pgf_template.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ yticklabel style={
/pgf/number format/fixed,
/pgf/number format/precision=5
},
scaled y ticks=false
scaled y ticks=false,
ytick pos=left
]

$plots
Expand Down