Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 17 additions & 37 deletions multi-skill/src/bin/codechef_checkpoints.rs
Original file line number Diff line number Diff line change
@@ -1,61 +1,42 @@
use multi_skill::data_processing::{
get_dataset_by_name, read_csv, try_write_slice_to_file, write_json,
};
use multi_skill::metrics::{compute_metrics_custom, PerformanceReport};
use multi_skill::summary::make_leaderboard;
use multi_skill::systems::{get_rating_system_by_name, simulate_contest, Player, PlayersByName};

use serde::{Deserialize, Serialize};
use std::cell::RefCell;

#[derive(Serialize, Deserialize, Clone, Debug)]
struct SimplePlayer {
handle: String,
cur_mu: f64,
cur_sigma: f64,
}

fn make_checkpoint(players: Vec<SimplePlayer>) -> PlayersByName {
players
.into_iter()
.map(|simp| {
let player = Player::with_rating(simp.cur_mu, simp.cur_sigma, 0);
(simp.handle, RefCell::new(player))
})
.collect()
}
use multi_skill::systems::{simulate_contest, EloMMR, EloMMRVariant};

fn main() {
tracing_subscriber::fmt::init();

// Parse arguments, prepare rating system and datasets
let args: Vec<String> = std::env::args().collect();
if args.len() != 2 {
tracing::error!("Usage: {} system_name", args[0]);
return;
}
let system = &args[1];
let system = get_rating_system_by_name(system).unwrap();
let dataset = get_dataset_by_name("codechef").unwrap();
let mut mu_noob = 1500.;
let sig_noob = 350.;
let sig_noob = 325.;
let weight_limit = 0.2;
let sig_limit = 75.;
let system = EloMMR {
weight_limit,
sig_limit,
drift_per_sec: 0.,
split_ties: false,
subsample_size: 512,
subsample_bucket: 0.5,
variant: EloMMRVariant::Logistic(0.1),
};

let mut players = std::collections::HashMap::new();
let mut avg_perf = compute_metrics_custom(&mut players, &[]);

// Get list of contest names to compare with Codechef's rating system
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like an explanation here of what the files actually contain.

Maybe for all each of these binary files, we can clarify what the inputs and outputs are. Now would also be a good time if you want to revise the Rust file names.

let paths = std::fs::read_dir("/home/work_space/elommr-data/ratings").unwrap();
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to keep the organization consistent, could this be moved to "../data/codechef/old_ratings"?
(all binaries currently assume that they're being run from Elo-MMR/multi-skill)

let mut checkpoints = std::collections::HashSet::<String>::new();
for path in paths {
if let Some(contest_name) = path.unwrap().path().file_stem() {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the intention to abort on all failures? If so, we can do

let contest_name = path.unwrap().path().file_stem().unwrap().to_str().unwrap();
checkpoints.insert(contest_name.to_owned());

And just to avoid the path.path(), maybe path could be named to something like file_entry.

if let Some(string_name) = contest_name.to_os_string().into_string().ok() {
if let Ok(string_name) = contest_name.to_os_string().into_string() {
checkpoints.insert(string_name);
}
}
}

// Run the contest histories and measure
let dir = std::path::PathBuf::from("/home/work_space/elommr-data/elommr-checkpoints/codechef/");
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is just the usual contest data, shouldn't it be at cache/codechef?

let now = std::time::Instant::now();
for (index, contest) in dataset.iter().enumerate() {
tracing::debug!(
"Processing\n{:6} contestants in{:5}th contest with wt={}: {}",
Expand All @@ -71,13 +52,12 @@ fn main() {
}

// Now run the actual rating update
simulate_contest(&mut players, &contest, &*system, mu_noob, sig_noob, index);
simulate_contest(&mut players, &contest, &system, mu_noob, sig_noob, index);

if checkpoints.contains(&contest.name) {
let output_file = dir.join(contest.name.clone() + ".csv");
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Output could go somewhere like
let output_dir = "..data/codechef/informative_dir_name/"

let (summary, rating_data) = make_leaderboard(&players, 0);
let (_summary, rating_data) = make_leaderboard(&players, 0);
try_write_slice_to_file(&rating_data, &output_file);
}
}
let secs_elapsed = now.elapsed().as_nanos() as f64 * 1e-9;
}
122 changes: 122 additions & 0 deletions multi-skill/src/bin/mmr_from_cc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
use multi_skill::data_processing::{get_dataset_by_name, read_csv, try_write_slice_to_file};
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of my comments from codechef_checkpoints.rs apply to this file too, so I won't copy them here.

use multi_skill::summary::make_leaderboard;
use multi_skill::systems::{
simulate_contest, EloMMR, EloMMRVariant, Player, PlayerEvent, PlayersByName,
};

use serde::{Deserialize, Serialize};
use std::cell::RefCell;

#[derive(Serialize, Deserialize, Clone, Debug)]
struct SimplePlayer {
handle: String,
cur_mu: f64,
cur_sigma: Option<f64>,
num_contests: Option<usize>,
}

fn make_checkpoint(players: Vec<SimplePlayer>) -> PlayersByName {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe include a comment that this struct & function are duplicated in another file, so I might remember to extract them someday.

players
.into_iter()
.map(|simp| {
// In priority order: use cur_sigma, num_contests, or a default
let sig = match simp.cur_sigma {
Some(sig) => sig,
None => {
const SIG_LIM_SQ: f64 = 80. * 80.;
const WEIGHT: f64 = 0.2;
let sig_perf_sq = (1. + 1. / WEIGHT) * SIG_LIM_SQ;
let sig_drift_sq = WEIGHT * SIG_LIM_SQ;
let mut sig_sq = 350. * 350.;
for _ in 0..simp.num_contests.unwrap_or(1) {
sig_sq += sig_drift_sq;
sig_sq *= sig_perf_sq / (sig_sq + sig_perf_sq);
}
sig_sq.sqrt()
}
};

// Hack to create a Player with a non-empty history,
// when we don't have access to their actual history.
let mut player = Player::with_rating(simp.cur_mu, sig, 0);
let fake_event = PlayerEvent {
contest_index: 0,
rating_mu: 0,
rating_sig: 0,
perf_score: 0,
place: 0,
};
player.event_history.push(fake_event);
player.update_rating(player.approx_posterior, simp.cur_mu);
(simp.handle, RefCell::new(player))
})
.collect()
}

fn main() {
tracing_subscriber::fmt::init();

// Set up the rating system
let dataset = get_dataset_by_name("codechef").unwrap();
let mut mu_noob = 1500.;
let sig_noob = 325.;
let weight_limit = 0.2;
let sig_limit = 75.;
let system = EloMMR {
weight_limit,
sig_limit,
drift_per_sec: 0.,
split_ties: false,
subsample_size: 512,
subsample_bucket: 0.5,
variant: EloMMRVariant::Logistic(0.1),
};

let input_file =
std::path::PathBuf::from("/home/work_space/elommr-data/cc_init_condition-MAY21B-516.csv");
let summary = read_csv(input_file, true).expect("Failed to read summaries");
let mut players = make_checkpoint(summary);
let contest_cutoff = 516;

// Get list of contest names to compare with Codechef's rating system
let paths = std::fs::read_dir("/home/work_space/elommr-data/ratings").unwrap();
let mut checkpoints = std::collections::HashSet::<String>::new();
for path in paths {
if let Some(contest_name) = path.unwrap().path().file_stem() {
if let Ok(string_name) = contest_name.to_os_string().into_string() {
checkpoints.insert(string_name);
}
}
}

// Run the contest histories and measure
let dir =
std::path::PathBuf::from("/home/work_space/elommr-data/elommr-checkpoints/start-from-516/");
for (index, contest) in dataset.iter().enumerate() {
if index <= contest_cutoff {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replace this check with .skip(contest_cutoff + 1) on the iterator.

Btw, if you prefer indexing to start after the cutoff, you can exchange the order of .enumerate() and .skip().

continue;
}

tracing::debug!(
"Processing\n{:6} contestants in{:5}th contest with wt={}: {}",
contest.standings.len(),
index,
contest.weight,
contest.name
);

// At some point, codechef changed the default rating!
if contest.name == "START25B" {
mu_noob = 1000.;
}

// Now run the actual rating update
simulate_contest(&mut players, &contest, &system, mu_noob, sig_noob, index);

if checkpoints.contains(&contest.name) {
let output_file = dir.join(contest.name.clone() + ".csv");
let (_summary, rating_data) = make_leaderboard(&players, 0);
try_write_slice_to_file(&rating_data, &output_file);
}
}
}
Loading