Skip to content

Commit

Permalink
feat(launcher): parse oom signal (huggingface#404)
Browse files Browse the repository at this point in the history
  • Loading branch information
OlivierDehaene authored Jun 2, 2023
1 parent 62fc401 commit 83b8448
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion launcher/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,9 +410,14 @@ fn shard_manager(
let mut wait_time = Instant::now();
loop {
// Process exited
if p.poll().is_some() {
if let Some(exit_status) = p.poll() {
let mut err = String::new();
p.stderr.take().unwrap().read_to_string(&mut err).unwrap();

if let ExitStatus::Signaled(signal) = exit_status {
tracing::error!("Shard process was signaled to shutdown with signal {signal}");
}

status_sender
.send(ShardStatus::Failed((rank, err)))
.unwrap();
Expand Down

0 comments on commit 83b8448

Please sign in to comment.