From 9b00db02e29804915e42a2cca3f673bcb2c71e99 Mon Sep 17 00:00:00 2001 From: koki Date: Wed, 18 Dec 2024 08:19:10 +0800 Subject: [PATCH] ark_disasm start --- examples/dis_demo.py | 22 +++++ ohre/abcre/dis/AsmString.py | 18 ++++ ohre/abcre/dis/DisFile.py | 176 ++++++++++++++++++++++++++++++++++++ ohre/abcre/dis/Method.py | 90 ++++++++++++++++++ ohre/abcre/dis/Record.py | 34 +++++++ ohre/abcre/dis/Types.py | 14 +++ 6 files changed, 354 insertions(+) create mode 100644 examples/dis_demo.py create mode 100644 ohre/abcre/dis/AsmString.py create mode 100644 ohre/abcre/dis/DisFile.py create mode 100644 ohre/abcre/dis/Method.py create mode 100644 ohre/abcre/dis/Record.py create mode 100644 ohre/abcre/dis/Types.py diff --git a/examples/dis_demo.py b/examples/dis_demo.py new file mode 100644 index 0000000..b58a0a9 --- /dev/null +++ b/examples/dis_demo.py @@ -0,0 +1,22 @@ +import argparse + +import ohre +from ohre.abcre.dis.DisFile import DisFile +from ohre.misc import Log + +if __name__ == "__main__": # clear; pip install -e .; python3 examples/dis_demo.py name.abc.dis + Log.init_log("abcre", ".") + ohre.set_log_level("info") + ohre.set_log_print(True) + parser = argparse.ArgumentParser() + parser.add_argument("dis_path", type=str, help="path to the dis file (ark_disasm-ed abc)") + arg = parser.parse_args() + dis_path = arg.dis_path + dis_file = DisFile(dis_path) + + print(f"> {dis_file}") + # print(f"\n> {dis_file.debug_deep()}") + for method in dis_file.methods: + print(f">> {method.debug_short()}") + for asmstr in dis_file.asmstrs: + print(f">> {asmstr}") diff --git a/ohre/abcre/dis/AsmString.py b/ohre/abcre/dis/AsmString.py new file mode 100644 index 0000000..0c83145 --- /dev/null +++ b/ohre/abcre/dis/AsmString.py @@ -0,0 +1,18 @@ +from typing import Any, Dict, Iterable, List, Tuple +from ohre.misc import Log +from ohre.abcre.dis.Types import AsmTpye + + +class AsmString: + def __init__(self, line: str): + idx = line.find(", ") + assert idx > 2 and idx < len(line) - 2 + self.offset = int(line[:idx].split(":")[1], 16) + self.name_value = line[idx + 2:].split(":")[1] + + def __str__(self): + return self.debug_deep() + + def debug_deep(self): + out = f"AsmString {hex(self.offset)} {self.name_value}" + return out diff --git a/ohre/abcre/dis/DisFile.py b/ohre/abcre/dis/DisFile.py new file mode 100644 index 0000000..2f921e1 --- /dev/null +++ b/ohre/abcre/dis/DisFile.py @@ -0,0 +1,176 @@ +from typing import Any, Dict, Iterable, List, Tuple + +from ohre.abcre.dis.Record import Record +from ohre.abcre.dis.Method import Method +from ohre.abcre.dis.AsmString import AsmString +from ohre.misc import Log + + +class STATE: + INIT = 0 + NEW_SEC = 1 + LITERALS = 2 + RECORDS = 3 + METHODS = 4 + STRING = 5 + + +def _is_delimiter(s: str) -> bool: + if (s.startswith("# ")): + if (s.strip().endswith("====================")): + return True + return False + + +class DisFile(): + def __init__(self, value): + self.source_binary_name: str = "" + self.language: str = "" + self.lines: List[str] = list() + self.records: List[Record] = list() + self.methods: List[Method] = list() + self.asmstrs: List[AsmString] = list() + if (isinstance(value, str)): + file = open(value, "r", encoding="utf-8", errors="ignore") + for line in file: + self.lines.append(line) + file.close() + else: + Log.error(f"DisFile init ERROR: value type NOT supported, {type(value)} {value}", True) + self._dis_process_main() + + def _dis_process_main(self): + l_n = 0 # line number + state = STATE.INIT + while (l_n < len(self.lines)): + Log.info(f"DisFile processing: state {state} line-{l_n}: {self.lines[l_n].rstrip()}", True) + if (state == STATE.INIT): + state, l_n = self._read_disheader(l_n) + elif (state == STATE.NEW_SEC): + state, l_n = self._read_section_type(l_n) + elif (state == STATE.LITERALS): + state, l_n = self._read_literals(l_n) + elif (state == STATE.RECORDS): + state, l_n = self._read_records(l_n) + elif (state == STATE.METHODS): + state, l_n = self._read_methods(l_n) + elif (state == STATE.STRING): + state, l_n = self._read_strings(l_n) + else: + Log.error(f"state ERROR, state {state} l_n {l_n}") + return + Log.info(f"DisFile process END, l_n {l_n} should >= {len(self.lines)}") + + def _read_section_type(self, l_n) -> Tuple[int, int]: + line: str = self.lines[l_n].strip() + if (line.startswith("# ") and len(line) > 3): + if (line[2:] == "LITERALS"): + return STATE.LITERALS, l_n + 1 + if (line[2:] == "RECORDS"): + return STATE.RECORDS, l_n + 1 + if (line[2:] == "METHODS"): + return STATE.METHODS, l_n + 1 + if (line[2:] == "STRING"): + return STATE.STRING, l_n + 1 + Log.error(f"cannot determint what section is, line: {line}") + return None, len(self.lines) + + def _read_disheader(self, l_n) -> Tuple[int, int]: + while (l_n < len(self.lines)): + line: str = self.lines[l_n].strip() + if (_is_delimiter(line)): + return STATE.NEW_SEC, l_n + 1 + elif (line.startswith("# ")): + if ("source binary:" in line): + self.source_binary_name = line.split(":")[1].strip() + elif (line.startswith(".language")): + self.language = line.split(" ")[1].strip() + elif (len(line) == 0): + pass + else: + Log.error(f"ERROR in _read_disheader, else hit. line {line}") + l_n += 1 + + def _read_literals(self, l_n) -> Tuple[int, int]: + while (l_n < len(self.lines)): + line: str = self.lines[l_n].strip() + if (_is_delimiter(line)): + return STATE.NEW_SEC, l_n + 1 + l_n += 1 + return None, l_n + 1 + + def _read_records(self, l_n) -> Tuple[int, int]: + while (l_n < len(self.lines)): + line: str = self.lines[l_n].strip() + if (_is_delimiter(line)): + return STATE.NEW_SEC, l_n + 1 + elif (line.strip().startswith(".record")): + lines_record: List[str] = list() + while (l_n < len(self.lines)): # find "}" + line_rec: str = self.lines[l_n].rstrip() + lines_record.append(line_rec) + l_n += 1 + if ("}" in line_rec): + break + rec = Record(lines_record) + self.records.append(rec) + else: + l_n += 1 + return None, l_n + 1 + + def _read_methods(self, l_n) -> Tuple[int, int]: + while (l_n < len(self.lines)): + line: str = self.lines[l_n].strip() + if (_is_delimiter(line)): + return STATE.NEW_SEC, l_n + 1 + elif (line == "L_ESSlotNumberAnnotation:"): + l_n += 1 + line: str = self.lines[l_n].strip() + parts = line.strip().split(" ") + slotNumberIdx = int(parts[-2], 16) + l_n += 1 + lines_method: List[str] = list() + while (l_n < len(self.lines)): # find "}" + line_method: str = self.lines[l_n].rstrip() + lines_method.append(line_method) + l_n += 1 + if ("}" == line_method): + break + method = Method(slotNumberIdx, lines_method) + self.methods.append(method) + else: + l_n += 1 + return None, l_n + 1 + + def _read_strings(self, l_n) -> Tuple[int, int]: + while (l_n < len(self.lines)): + line: str = self.lines[l_n].strip() + if (_is_delimiter(line)): + return STATE.NEW_SEC, l_n + 1 + elif (len(line) == 0): + pass + elif (line.startswith("[") and line.endswith("]") and len(line) > 6): + asmstr = AsmString(line[1:-1]) + self.asmstrs.append(asmstr) + else: + Log.error(f"ERROR in _read_strings, else hit. line {line}") + l_n += 1 + return None, l_n + 1 + + def __str__(self): + return self.debug_short() + + def debug_short(self) -> str: + out = f"DisFile: {self.source_binary_name} language {self.language} lines({len(self.lines)}) \ +records({len(self.records)}) methods({len(self.methods)}) asmstrs({len(self.asmstrs)})" + return out + + def debug_deep(self) -> str: + out = self.debug_short() + "\n" + for rec in self.records: + out += f">> {rec.debug_deep()}\n" + for method in self.methods: + out += f">> {method.debug_deep()}\n" + for asmstr in self.asmstrs: + out += f">> {asmstr}\n" + return out diff --git a/ohre/abcre/dis/Method.py b/ohre/abcre/dis/Method.py new file mode 100644 index 0000000..0707bdb --- /dev/null +++ b/ohre/abcre/dis/Method.py @@ -0,0 +1,90 @@ +from typing import Any, Dict, Iterable, List, Tuple +from ohre.misc import Log +from ohre.abcre.dis.Types import AsmTpye + + +class Method: + # fields in Class + def __init__(self, slotNumberIdx, lines: List[str]): + assert len(lines) >= 2 + self.slotNumberIdx: int = slotNumberIdx + self.return_type = "None" + self.file_name: str = "" + self.class_func_name: str = "" + self.func_type: str = "" + self.args: List = list() + self.insts: List = list() + self._process_method(lines) + + def _process_1st_line(self, line: str): + parts = line.split(" ") + assert parts[0] == ".function" + self.return_type = parts[1].strip() + file_func_name = parts[2].split("(")[0] + num = file_func_name.find(".ets") + if (not num > 0): + num = file_func_name.find(".src") + if (num > 0 and num < len(file_func_name) - 5): + self.file_name = file_func_name[:num + 4] + self.class_func_name = file_func_name[num + 4 + 1:] + else: + self.file_name = file_func_name + self.class_func_name = file_func_name + i = len(parts) - 1 + while (i >= 0): + if (parts[i].startswith("<") and parts[i].endswith(">") and len(parts[i]) >= 3): + self.func_type = parts[i][1:-1] + break + else: + i -= 1 + # process args now + parts = line.split("(") + parts = parts[1].split(")")[0] + parts = parts.split(",") + for arg_pair in parts: + ty, name = arg_pair.strip().split(" ") + self.args.append((ty, name)) + + def _process_method(self, lines: List[str]): + self._process_1st_line(lines[0].strip()) + for line in lines[1:]: + line = line.strip() + if (line.endswith(":")): + if (len(line.split(" ")) == 1): + tu = [line] + self.insts.append(tu) + else: + Log.error(f"ERROR: {line} NOT tag?", True) + elif (len(line) == 0): + continue + elif (line == "}"): + return + else: + tu = list(line.split(" ")) + for i in range(len(tu)): + if (tu[i].endswith(",")): + tu[i] = tu[i][:-1] + self.insts.append(tu) + + def __str__(self): + return self.debug_short() + + def debug_short(self) -> str: + out = f"Method: {self.slotNumberIdx} {self.func_type} {self.class_func_name} file: {self.file_name}\n\ +args({len(self.args)}) {self.args} insts({len(self.insts)})" + return out + + def debug_deep(self) -> str: + out_insts = "" + for line_num in range(len(self.insts)): + inst = self.insts[line_num] + out = f"{line_num}\t{inst[0]} " + for i in range(1, len(inst)): + if (i != len(inst) - 1): + out += f"{inst[i]}, " + else: + out += f"{inst[i]}" + out_insts += f"{out}\n" + out = f"Method: {self.slotNumberIdx} {self.func_type} {self.class_func_name} file: {self.file_name}\n\ +args({len(self.args)}) {self.args} insts({len(self.insts)})\n{out_insts}" + return out diff --git a/ohre/abcre/dis/Record.py b/ohre/abcre/dis/Record.py new file mode 100644 index 0000000..fa67c3e --- /dev/null +++ b/ohre/abcre/dis/Record.py @@ -0,0 +1,34 @@ +from typing import Any, Dict, Iterable, List, Tuple +from ohre.misc import Log +from ohre.abcre.dis.Types import AsmTpye + + +class Record: + # fields in Class + def __init__(self, lines: List[str]): + self.class_name: str = "" + self.fields: Dict[Tuple[str, Any]] = dict() # k: field name; v: (type, value) + for line in lines: + line = line.strip() + if ("}" in line): + return + elif ("{" in line and ".record" in line): + parts = line.split(" ") + self.class_name = parts[1].split("@")[0] + elif ("=" in line): + parts = line.split("=") + ty, name = parts[0].split(" ")[0].strip(), parts[0].split(" ")[1].strip() + value = parts[1].strip() + if (AsmTpye.is_uint(ty)): + value = int(value, 16) + else: + Log.error(f"ERROR in Record init: ty {ty} name {name} value {value} {type(value)}") + self.fields[name] = (ty, value) + else: + Log.warn(f"invalid line in Record: {line},\nlines: {lines}") + + def debug_deep(self): + out = f"Record {self.class_name}: " + for field_name, (ty, value) in self.fields.items(): + out += f"{field_name}({ty}) {value};" + return out diff --git a/ohre/abcre/dis/Types.py b/ohre/abcre/dis/Types.py new file mode 100644 index 0000000..be69d12 --- /dev/null +++ b/ohre/abcre/dis/Types.py @@ -0,0 +1,14 @@ +from ohre.abcre.enum.BaseEnum import BaseEnum + + +class AsmTpye(BaseEnum): + uint_types = {"u8", "u16", "u32", "u64"} + + def __init__(self): + super().__init__() + + @classmethod + def is_uint(cls, type_name: str): + if (type_name in cls.uint_types): + return True + return False