-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmanager.py
28 lines (25 loc) · 1.02 KB
/
manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# poll the progress.txt file forever
import os
from datetime import datetime
from time import sleep
from loguru import logger
while True:
try:
with open("progress.txt", "r") as f:
progress = f.read()
last_mod_time = datetime.fromtimestamp(os.path.getmtime("progress.txt"))
if (datetime.now() - last_mod_time).seconds > 60 * 7:
# no progress for 7 minutes, restart/kill with -9
logger.info("restarting server to fix cuda issues (device side asserts)")
os.system("/usr/bin/bash kill -SIGHUP `pgrep gunicorn`")
os.system("/usr/bin/bash kill -SIGHUP `pgrep uvicorn`")
os.system("kill -9 `pgrep gunicorn`")
os.system("kill -9 `pgrep uvicorn`")
os.system("killall -9 uvicorn")
os.system("ps | grep uvicorn | awk '{print $1}' | xargs kill -9")
if progress == "done":
break
except Exception as e:
print(e)
pass
sleep(60*5)