-
Notifications
You must be signed in to change notification settings - Fork 52
Expand file tree
/
Copy pathsitecustomize.py
More file actions
59 lines (52 loc) · 2.19 KB
/
Copy pathsitecustomize.py
File metadata and controls
59 lines (52 loc) · 2.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: OpenMDW-1.1
# Runtime load tracer.
#
# Auto-imported by every Python process started with PYTHONPATH=.
# When LOAD_TRACE_DIR is set, registers an atexit hook that walks
# sys.modules at shutdown and writes the file paths (filtered to those
# under LOAD_TRACE_ROOT) into {LOAD_TRACE_DIR}/{LOAD_TRACE_TAG}_pid{PID}.txt.
#
# Used to inventory which released files are actually touched by each
# end-to-end smoke. Union the per-experiment traces, diff against the full
# .py list, and the residual is dead code (relative to that smoke set).
import atexit
import os
import sys
# Opt-in (COSMOS_DL_FILE_SYSTEM_SHARING=1): switch torch's DataLoader IPC from the
# default 'file_descriptor' strategy (which stages worker tensors in /dev/shm) to
# 'file_system'. On shm-constrained containers, large video batches overflow the
# small /dev/shm tmpfs and a worker dies mid-transfer -> the main process then sees
# "unable to open shared memory object ... No such file or directory". 'file_system'
# sidesteps /dev/shm entirely. Guarded so non-training processes never import torch.
if os.environ.get("COSMOS_DL_FILE_SYSTEM_SHARING") == "1":
try:
import torch.multiprocessing as _tmp
_tmp.set_sharing_strategy("file_system")
except Exception:
pass
_DIR = os.environ.get("LOAD_TRACE_DIR", "")
if _DIR:
_TAG = os.environ.get("LOAD_TRACE_TAG", "default")
_ROOT = os.path.realpath(os.environ.get("LOAD_TRACE_ROOT", os.getcwd()))
os.makedirs(_DIR, exist_ok=True)
def _dump():
seen = set()
for mod in list(sys.modules.values()):
f = getattr(mod, "__file__", None)
if not f:
continue
try:
rp = os.path.realpath(f)
except OSError:
continue
if rp.startswith(_ROOT):
seen.add(rp)
path = os.path.join(_DIR, f"{_TAG}_pid{os.getpid()}.txt")
try:
with open(path, "w") as h:
for p in sorted(seen):
h.write(p + "\n")
except OSError:
pass
atexit.register(_dump)