-
Notifications
You must be signed in to change notification settings - Fork 817
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Profile] Add pytorch profiler (#1604)
- Loading branch information
Showing
4 changed files
with
162 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import json | ||
import re | ||
import sys | ||
|
||
|
||
def clean_json_file(input_file, output_file): | ||
try: | ||
# Open the input file with 'replace' option for handling bad characters | ||
with open(input_file, "r", encoding="utf-8", errors="replace") as f: | ||
data = f.read() | ||
|
||
# Replace bad characters (represented by '�' after decoding) with a space | ||
cleaned_data = data.replace("�", " ") | ||
|
||
# Remove control characters (e.g., ASCII control characters like \x00 to \x1F) | ||
# These can cause issues in JSON parsing. | ||
cleaned_data = re.sub(r"[\x00-\x1F]+", " ", cleaned_data) | ||
|
||
# Parse cleaned data as JSON | ||
json_data = json.loads(cleaned_data) | ||
|
||
# Write the cleaned JSON to a new output file | ||
with open(output_file, "w", encoding="utf-8") as f: | ||
json.dump(json_data, f, ensure_ascii=False, indent=4) | ||
|
||
print(f"Cleaned JSON file has been saved to {output_file}") | ||
|
||
except Exception as e: | ||
print(f"Error: {e}") | ||
|
||
|
||
if __name__ == "__main__": | ||
assert len(sys.argv) > 1, "please give the input file path" | ||
if len(sys.argv) == 3: | ||
input_file = sys.argv[1] | ||
output_file = sys.argv[2] | ||
else: | ||
input_file = output_file = sys.argv[1] | ||
|
||
clean_json_file(input_file, output_file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import glob | ||
import json | ||
import os | ||
import re | ||
import sys | ||
|
||
from tqdm import tqdm | ||
|
||
sys.path.append("../../") | ||
from fix_corrupted_json import clean_json_file | ||
|
||
dirpath = "/Users/ying" | ||
output_file_prefix = "analyzed_log" | ||
|
||
time = {} | ||
tot_time = {} | ||
size = {} | ||
|
||
os.system(f"rm {output_file_prefix}*") | ||
|
||
for dirname in glob.glob(os.path.join(dirpath, "trace*")): | ||
print(dirname) | ||
trace_name = dirname.split("/")[-1] | ||
time[trace_name] = {} | ||
size[trace_name] = {} | ||
total_time = 0 | ||
for filename in tqdm(glob.glob(os.path.join(dirname, "*.json"))): | ||
step_name = filename.split("/")[-1].split(".")[0] | ||
step_name = "_".join(step_name.split("_")[1:]) | ||
if "prefill" not in filename and "decode" not in filename: | ||
continue | ||
|
||
match = re.search(r"(prefill|decode)_step_(\d+)\.json", filename) | ||
if match: | ||
phase = match.group(1) | ||
step = match.group(2) | ||
else: | ||
raise Exception(f"Cannot parse {filename}") | ||
|
||
try: | ||
with open(filename, "r") as f: | ||
trace = json.load(f) | ||
except: | ||
clean_json_file(filename, filename) | ||
with open(filename, "r") as f: | ||
trace = json.load(f) | ||
|
||
for event in trace["traceEvents"]: | ||
name = event["name"] | ||
if name in ["profile_prefill_step", "profile_decode_step"]: | ||
dur = event["dur"] / 1e3 | ||
time[trace_name][step_name] = dur | ||
break | ||
total_time += dur | ||
|
||
step = int(step_name.split("_")[-1]) | ||
with open(os.path.join(dirname, f"size_{step}.json"), "r") as f: | ||
size_info = json.load(f) | ||
size[trace_name][step_name] = size_info["size"] | ||
|
||
tot_time[trace_name] = total_time | ||
time[trace_name] = dict( | ||
sorted(time[trace_name].items(), key=lambda x: int(x[0].split("_")[-1])) | ||
) | ||
size[trace_name] = dict( | ||
sorted(size[trace_name].items(), key=lambda x: int(x[0].split("_")[-1])) | ||
) | ||
|
||
with open(f"{output_file_prefix}_{trace_name}", "a") as f: | ||
for k, v in time[trace_name].items(): | ||
size_v = size[trace_name][k] | ||
print(f"{k:>15}{v:10.2f}\t{size_v}") | ||
f.write(f"{k:>15}{v:10.2f}\t{size_v}\n") | ||
|
||
with open(f"{output_file_prefix}_total_time", "w") as f: | ||
print(tot_time) | ||
json.dump(tot_time, f) |