-
Notifications
You must be signed in to change notification settings - Fork 23
Codechef experiments #16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
46fc422
dad64e4
461d055
865d250
8b20707
1bbcae8
4d80368
588f4d0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,61 +1,42 @@ | ||
| use multi_skill::data_processing::{ | ||
| get_dataset_by_name, read_csv, try_write_slice_to_file, write_json, | ||
| }; | ||
| use multi_skill::metrics::{compute_metrics_custom, PerformanceReport}; | ||
| use multi_skill::summary::make_leaderboard; | ||
| use multi_skill::systems::{get_rating_system_by_name, simulate_contest, Player, PlayersByName}; | ||
|
|
||
| use serde::{Deserialize, Serialize}; | ||
| use std::cell::RefCell; | ||
|
|
||
| #[derive(Serialize, Deserialize, Clone, Debug)] | ||
| struct SimplePlayer { | ||
| handle: String, | ||
| cur_mu: f64, | ||
| cur_sigma: f64, | ||
| } | ||
|
|
||
| fn make_checkpoint(players: Vec<SimplePlayer>) -> PlayersByName { | ||
| players | ||
| .into_iter() | ||
| .map(|simp| { | ||
| let player = Player::with_rating(simp.cur_mu, simp.cur_sigma, 0); | ||
| (simp.handle, RefCell::new(player)) | ||
| }) | ||
| .collect() | ||
| } | ||
| use multi_skill::systems::{simulate_contest, EloMMR, EloMMRVariant}; | ||
|
|
||
| fn main() { | ||
| tracing_subscriber::fmt::init(); | ||
|
|
||
| // Parse arguments, prepare rating system and datasets | ||
| let args: Vec<String> = std::env::args().collect(); | ||
| if args.len() != 2 { | ||
| tracing::error!("Usage: {} system_name", args[0]); | ||
| return; | ||
| } | ||
| let system = &args[1]; | ||
| let system = get_rating_system_by_name(system).unwrap(); | ||
| let dataset = get_dataset_by_name("codechef").unwrap(); | ||
| let mut mu_noob = 1500.; | ||
| let sig_noob = 350.; | ||
| let sig_noob = 325.; | ||
| let weight_limit = 0.2; | ||
| let sig_limit = 75.; | ||
| let system = EloMMR { | ||
| weight_limit, | ||
| sig_limit, | ||
| drift_per_sec: 0., | ||
| split_ties: false, | ||
| subsample_size: 512, | ||
| subsample_bucket: 0.5, | ||
| variant: EloMMRVariant::Logistic(0.1), | ||
| }; | ||
|
|
||
| let mut players = std::collections::HashMap::new(); | ||
| let mut avg_perf = compute_metrics_custom(&mut players, &[]); | ||
|
|
||
| // Get list of contest names to compare with Codechef's rating system | ||
| let paths = std::fs::read_dir("/home/work_space/elommr-data/ratings").unwrap(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to keep the organization consistent, could this be moved to "../data/codechef/old_ratings"? |
||
| let mut checkpoints = std::collections::HashSet::<String>::new(); | ||
| for path in paths { | ||
| if let Some(contest_name) = path.unwrap().path().file_stem() { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the intention to abort on all failures? If so, we can do And just to avoid the |
||
| if let Some(string_name) = contest_name.to_os_string().into_string().ok() { | ||
| if let Ok(string_name) = contest_name.to_os_string().into_string() { | ||
| checkpoints.insert(string_name); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Run the contest histories and measure | ||
| let dir = std::path::PathBuf::from("/home/work_space/elommr-data/elommr-checkpoints/codechef/"); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is just the usual contest data, shouldn't it be at |
||
| let now = std::time::Instant::now(); | ||
| for (index, contest) in dataset.iter().enumerate() { | ||
| tracing::debug!( | ||
| "Processing\n{:6} contestants in{:5}th contest with wt={}: {}", | ||
|
|
@@ -71,13 +52,12 @@ fn main() { | |
| } | ||
|
|
||
| // Now run the actual rating update | ||
| simulate_contest(&mut players, &contest, &*system, mu_noob, sig_noob, index); | ||
| simulate_contest(&mut players, &contest, &system, mu_noob, sig_noob, index); | ||
|
|
||
| if checkpoints.contains(&contest.name) { | ||
| let output_file = dir.join(contest.name.clone() + ".csv"); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Output could go somewhere like |
||
| let (summary, rating_data) = make_leaderboard(&players, 0); | ||
| let (_summary, rating_data) = make_leaderboard(&players, 0); | ||
| try_write_slice_to_file(&rating_data, &output_file); | ||
| } | ||
| } | ||
| let secs_elapsed = now.elapsed().as_nanos() as f64 * 1e-9; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,122 @@ | ||
| use multi_skill::data_processing::{get_dataset_by_name, read_csv, try_write_slice_to_file}; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some of my comments from |
||
| use multi_skill::summary::make_leaderboard; | ||
| use multi_skill::systems::{ | ||
| simulate_contest, EloMMR, EloMMRVariant, Player, PlayerEvent, PlayersByName, | ||
| }; | ||
|
|
||
| use serde::{Deserialize, Serialize}; | ||
| use std::cell::RefCell; | ||
|
|
||
| #[derive(Serialize, Deserialize, Clone, Debug)] | ||
| struct SimplePlayer { | ||
| handle: String, | ||
| cur_mu: f64, | ||
| cur_sigma: Option<f64>, | ||
| num_contests: Option<usize>, | ||
| } | ||
|
|
||
| fn make_checkpoint(players: Vec<SimplePlayer>) -> PlayersByName { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe include a comment that this struct & function are duplicated in another file, so I might remember to extract them someday. |
||
| players | ||
| .into_iter() | ||
| .map(|simp| { | ||
| // In priority order: use cur_sigma, num_contests, or a default | ||
| let sig = match simp.cur_sigma { | ||
| Some(sig) => sig, | ||
| None => { | ||
| const SIG_LIM_SQ: f64 = 80. * 80.; | ||
| const WEIGHT: f64 = 0.2; | ||
| let sig_perf_sq = (1. + 1. / WEIGHT) * SIG_LIM_SQ; | ||
| let sig_drift_sq = WEIGHT * SIG_LIM_SQ; | ||
| let mut sig_sq = 350. * 350.; | ||
| for _ in 0..simp.num_contests.unwrap_or(1) { | ||
| sig_sq += sig_drift_sq; | ||
| sig_sq *= sig_perf_sq / (sig_sq + sig_perf_sq); | ||
| } | ||
| sig_sq.sqrt() | ||
| } | ||
| }; | ||
|
|
||
| // Hack to create a Player with a non-empty history, | ||
| // when we don't have access to their actual history. | ||
| let mut player = Player::with_rating(simp.cur_mu, sig, 0); | ||
| let fake_event = PlayerEvent { | ||
| contest_index: 0, | ||
| rating_mu: 0, | ||
| rating_sig: 0, | ||
| perf_score: 0, | ||
| place: 0, | ||
| }; | ||
| player.event_history.push(fake_event); | ||
| player.update_rating(player.approx_posterior, simp.cur_mu); | ||
| (simp.handle, RefCell::new(player)) | ||
| }) | ||
| .collect() | ||
| } | ||
|
|
||
| fn main() { | ||
| tracing_subscriber::fmt::init(); | ||
|
|
||
| // Set up the rating system | ||
| let dataset = get_dataset_by_name("codechef").unwrap(); | ||
| let mut mu_noob = 1500.; | ||
| let sig_noob = 325.; | ||
| let weight_limit = 0.2; | ||
| let sig_limit = 75.; | ||
| let system = EloMMR { | ||
| weight_limit, | ||
| sig_limit, | ||
| drift_per_sec: 0., | ||
| split_ties: false, | ||
| subsample_size: 512, | ||
| subsample_bucket: 0.5, | ||
| variant: EloMMRVariant::Logistic(0.1), | ||
| }; | ||
|
|
||
| let input_file = | ||
| std::path::PathBuf::from("/home/work_space/elommr-data/cc_init_condition-MAY21B-516.csv"); | ||
| let summary = read_csv(input_file, true).expect("Failed to read summaries"); | ||
| let mut players = make_checkpoint(summary); | ||
| let contest_cutoff = 516; | ||
|
|
||
| // Get list of contest names to compare with Codechef's rating system | ||
| let paths = std::fs::read_dir("/home/work_space/elommr-data/ratings").unwrap(); | ||
| let mut checkpoints = std::collections::HashSet::<String>::new(); | ||
| for path in paths { | ||
| if let Some(contest_name) = path.unwrap().path().file_stem() { | ||
| if let Ok(string_name) = contest_name.to_os_string().into_string() { | ||
| checkpoints.insert(string_name); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Run the contest histories and measure | ||
| let dir = | ||
| std::path::PathBuf::from("/home/work_space/elommr-data/elommr-checkpoints/start-from-516/"); | ||
| for (index, contest) in dataset.iter().enumerate() { | ||
| if index <= contest_cutoff { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Replace this check with Btw, if you prefer indexing to start after the cutoff, you can exchange the order of |
||
| continue; | ||
| } | ||
|
|
||
| tracing::debug!( | ||
| "Processing\n{:6} contestants in{:5}th contest with wt={}: {}", | ||
| contest.standings.len(), | ||
| index, | ||
| contest.weight, | ||
| contest.name | ||
| ); | ||
|
|
||
| // At some point, codechef changed the default rating! | ||
| if contest.name == "START25B" { | ||
| mu_noob = 1000.; | ||
| } | ||
|
|
||
| // Now run the actual rating update | ||
| simulate_contest(&mut players, &contest, &system, mu_noob, sig_noob, index); | ||
|
|
||
| if checkpoints.contains(&contest.name) { | ||
| let output_file = dir.join(contest.name.clone() + ".csv"); | ||
| let (_summary, rating_data) = make_leaderboard(&players, 0); | ||
| try_write_slice_to_file(&rating_data, &output_file); | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd like an explanation here of what the files actually contain.
Maybe for all each of these binary files, we can clarify what the inputs and outputs are. Now would also be a good time if you want to revise the Rust file names.