Skip to content

Commit

Permalink
ark_disasm start
Browse files Browse the repository at this point in the history
  • Loading branch information
kokifish committed Dec 18, 2024
1 parent 3217359 commit 9b00db0
Show file tree
Hide file tree
Showing 6 changed files with 354 additions and 0 deletions.
22 changes: 22 additions & 0 deletions examples/dis_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import argparse

import ohre
from ohre.abcre.dis.DisFile import DisFile
from ohre.misc import Log

if __name__ == "__main__": # clear; pip install -e .; python3 examples/dis_demo.py name.abc.dis
Log.init_log("abcre", ".")
ohre.set_log_level("info")
ohre.set_log_print(True)
parser = argparse.ArgumentParser()
parser.add_argument("dis_path", type=str, help="path to the dis file (ark_disasm-ed abc)")
arg = parser.parse_args()
dis_path = arg.dis_path
dis_file = DisFile(dis_path)

print(f"> {dis_file}")
# print(f"\n> {dis_file.debug_deep()}")
for method in dis_file.methods:
print(f">> {method.debug_short()}")
for asmstr in dis_file.asmstrs:
print(f">> {asmstr}")
18 changes: 18 additions & 0 deletions ohre/abcre/dis/AsmString.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from typing import Any, Dict, Iterable, List, Tuple
from ohre.misc import Log
from ohre.abcre.dis.Types import AsmTpye


class AsmString:
def __init__(self, line: str):
idx = line.find(", ")
assert idx > 2 and idx < len(line) - 2
self.offset = int(line[:idx].split(":")[1], 16)
self.name_value = line[idx + 2:].split(":")[1]

def __str__(self):
return self.debug_deep()

def debug_deep(self):
out = f"AsmString {hex(self.offset)} {self.name_value}"
return out
176 changes: 176 additions & 0 deletions ohre/abcre/dis/DisFile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
from typing import Any, Dict, Iterable, List, Tuple

from ohre.abcre.dis.Record import Record
from ohre.abcre.dis.Method import Method
from ohre.abcre.dis.AsmString import AsmString
from ohre.misc import Log


class STATE:
INIT = 0
NEW_SEC = 1
LITERALS = 2
RECORDS = 3
METHODS = 4
STRING = 5


def _is_delimiter(s: str) -> bool:
if (s.startswith("# ")):
if (s.strip().endswith("====================")):
return True
return False


class DisFile():
def __init__(self, value):
self.source_binary_name: str = ""
self.language: str = ""
self.lines: List[str] = list()
self.records: List[Record] = list()
self.methods: List[Method] = list()
self.asmstrs: List[AsmString] = list()
if (isinstance(value, str)):
file = open(value, "r", encoding="utf-8", errors="ignore")
for line in file:
self.lines.append(line)
file.close()
else:
Log.error(f"DisFile init ERROR: value type NOT supported, {type(value)} {value}", True)
self._dis_process_main()

def _dis_process_main(self):
l_n = 0 # line number
state = STATE.INIT
while (l_n < len(self.lines)):
Log.info(f"DisFile processing: state {state} line-{l_n}: {self.lines[l_n].rstrip()}", True)
if (state == STATE.INIT):
state, l_n = self._read_disheader(l_n)
elif (state == STATE.NEW_SEC):
state, l_n = self._read_section_type(l_n)
elif (state == STATE.LITERALS):
state, l_n = self._read_literals(l_n)
elif (state == STATE.RECORDS):
state, l_n = self._read_records(l_n)
elif (state == STATE.METHODS):
state, l_n = self._read_methods(l_n)
elif (state == STATE.STRING):
state, l_n = self._read_strings(l_n)
else:
Log.error(f"state ERROR, state {state} l_n {l_n}")
return
Log.info(f"DisFile process END, l_n {l_n} should >= {len(self.lines)}")

def _read_section_type(self, l_n) -> Tuple[int, int]:
line: str = self.lines[l_n].strip()
if (line.startswith("# ") and len(line) > 3):
if (line[2:] == "LITERALS"):
return STATE.LITERALS, l_n + 1
if (line[2:] == "RECORDS"):
return STATE.RECORDS, l_n + 1
if (line[2:] == "METHODS"):
return STATE.METHODS, l_n + 1
if (line[2:] == "STRING"):
return STATE.STRING, l_n + 1
Log.error(f"cannot determint what section is, line: {line}")
return None, len(self.lines)

def _read_disheader(self, l_n) -> Tuple[int, int]:
while (l_n < len(self.lines)):
line: str = self.lines[l_n].strip()
if (_is_delimiter(line)):
return STATE.NEW_SEC, l_n + 1
elif (line.startswith("# ")):
if ("source binary:" in line):
self.source_binary_name = line.split(":")[1].strip()
elif (line.startswith(".language")):
self.language = line.split(" ")[1].strip()
elif (len(line) == 0):
pass
else:
Log.error(f"ERROR in _read_disheader, else hit. line {line}")
l_n += 1

def _read_literals(self, l_n) -> Tuple[int, int]:
while (l_n < len(self.lines)):
line: str = self.lines[l_n].strip()
if (_is_delimiter(line)):
return STATE.NEW_SEC, l_n + 1
l_n += 1
return None, l_n + 1

def _read_records(self, l_n) -> Tuple[int, int]:
while (l_n < len(self.lines)):
line: str = self.lines[l_n].strip()
if (_is_delimiter(line)):
return STATE.NEW_SEC, l_n + 1
elif (line.strip().startswith(".record")):
lines_record: List[str] = list()
while (l_n < len(self.lines)): # find "}"
line_rec: str = self.lines[l_n].rstrip()
lines_record.append(line_rec)
l_n += 1
if ("}" in line_rec):
break
rec = Record(lines_record)
self.records.append(rec)
else:
l_n += 1
return None, l_n + 1

def _read_methods(self, l_n) -> Tuple[int, int]:
while (l_n < len(self.lines)):
line: str = self.lines[l_n].strip()
if (_is_delimiter(line)):
return STATE.NEW_SEC, l_n + 1
elif (line == "L_ESSlotNumberAnnotation:"):
l_n += 1
line: str = self.lines[l_n].strip()
parts = line.strip().split(" ")
slotNumberIdx = int(parts[-2], 16)
l_n += 1
lines_method: List[str] = list()
while (l_n < len(self.lines)): # find "}"
line_method: str = self.lines[l_n].rstrip()
lines_method.append(line_method)
l_n += 1
if ("}" == line_method):
break
method = Method(slotNumberIdx, lines_method)
self.methods.append(method)
else:
l_n += 1
return None, l_n + 1

def _read_strings(self, l_n) -> Tuple[int, int]:
while (l_n < len(self.lines)):
line: str = self.lines[l_n].strip()
if (_is_delimiter(line)):
return STATE.NEW_SEC, l_n + 1
elif (len(line) == 0):
pass
elif (line.startswith("[") and line.endswith("]") and len(line) > 6):
asmstr = AsmString(line[1:-1])
self.asmstrs.append(asmstr)
else:
Log.error(f"ERROR in _read_strings, else hit. line {line}")
l_n += 1
return None, l_n + 1

def __str__(self):
return self.debug_short()

def debug_short(self) -> str:
out = f"DisFile: {self.source_binary_name} language {self.language} lines({len(self.lines)}) \
records({len(self.records)}) methods({len(self.methods)}) asmstrs({len(self.asmstrs)})"
return out

def debug_deep(self) -> str:
out = self.debug_short() + "\n"
for rec in self.records:
out += f">> {rec.debug_deep()}\n"
for method in self.methods:
out += f">> {method.debug_deep()}\n"
for asmstr in self.asmstrs:
out += f">> {asmstr}\n"
return out
90 changes: 90 additions & 0 deletions ohre/abcre/dis/Method.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from typing import Any, Dict, Iterable, List, Tuple
from ohre.misc import Log
from ohre.abcre.dis.Types import AsmTpye


class Method:
# fields in Class
def __init__(self, slotNumberIdx, lines: List[str]):
assert len(lines) >= 2
self.slotNumberIdx: int = slotNumberIdx
self.return_type = "None"
self.file_name: str = ""
self.class_func_name: str = ""
self.func_type: str = ""
self.args: List = list()
self.insts: List = list()
self._process_method(lines)

def _process_1st_line(self, line: str):
parts = line.split(" ")
assert parts[0] == ".function"
self.return_type = parts[1].strip()
file_func_name = parts[2].split("(")[0]
num = file_func_name.find(".ets")
if (not num > 0):
num = file_func_name.find(".src")
if (num > 0 and num < len(file_func_name) - 5):
self.file_name = file_func_name[:num + 4]
self.class_func_name = file_func_name[num + 4 + 1:]
else:
self.file_name = file_func_name
self.class_func_name = file_func_name
i = len(parts) - 1
while (i >= 0):
if (parts[i].startswith("<") and parts[i].endswith(">") and len(parts[i]) >= 3):
self.func_type = parts[i][1:-1]
break
else:
i -= 1
# process args now
parts = line.split("(")
parts = parts[1].split(")")[0]
parts = parts.split(",")
for arg_pair in parts:
ty, name = arg_pair.strip().split(" ")
self.args.append((ty, name))

def _process_method(self, lines: List[str]):
self._process_1st_line(lines[0].strip())
for line in lines[1:]:
line = line.strip()
if (line.endswith(":")):
if (len(line.split(" ")) == 1):
tu = [line]
self.insts.append(tu)
else:
Log.error(f"ERROR: {line} NOT tag?", True)
elif (len(line) == 0):
continue
elif (line == "}"):
return
else:
tu = list(line.split(" "))
for i in range(len(tu)):
if (tu[i].endswith(",")):
tu[i] = tu[i][:-1]
self.insts.append(tu)

def __str__(self):
return self.debug_short()

def debug_short(self) -> str:
out = f"Method: {self.slotNumberIdx} {self.func_type} {self.class_func_name} file: {self.file_name}\n\
args({len(self.args)}) {self.args} insts({len(self.insts)})"
return out

def debug_deep(self) -> str:
out_insts = ""
for line_num in range(len(self.insts)):
inst = self.insts[line_num]
out = f"{line_num}\t{inst[0]} "
for i in range(1, len(inst)):
if (i != len(inst) - 1):
out += f"{inst[i]}, "
else:
out += f"{inst[i]}"
out_insts += f"{out}\n"
out = f"Method: {self.slotNumberIdx} {self.func_type} {self.class_func_name} file: {self.file_name}\n\
args({len(self.args)}) {self.args} insts({len(self.insts)})\n{out_insts}"
return out
34 changes: 34 additions & 0 deletions ohre/abcre/dis/Record.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from typing import Any, Dict, Iterable, List, Tuple
from ohre.misc import Log
from ohre.abcre.dis.Types import AsmTpye


class Record:
# fields in Class
def __init__(self, lines: List[str]):
self.class_name: str = ""
self.fields: Dict[Tuple[str, Any]] = dict() # k: field name; v: (type, value)
for line in lines:
line = line.strip()
if ("}" in line):
return
elif ("{" in line and ".record" in line):
parts = line.split(" ")
self.class_name = parts[1].split("@")[0]
elif ("=" in line):
parts = line.split("=")
ty, name = parts[0].split(" ")[0].strip(), parts[0].split(" ")[1].strip()
value = parts[1].strip()
if (AsmTpye.is_uint(ty)):
value = int(value, 16)
else:
Log.error(f"ERROR in Record init: ty {ty} name {name} value {value} {type(value)}")
self.fields[name] = (ty, value)
else:
Log.warn(f"invalid line in Record: {line},\nlines: {lines}")

def debug_deep(self):
out = f"Record {self.class_name}: "
for field_name, (ty, value) in self.fields.items():
out += f"{field_name}({ty}) {value};"
return out
14 changes: 14 additions & 0 deletions ohre/abcre/dis/Types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from ohre.abcre.enum.BaseEnum import BaseEnum


class AsmTpye(BaseEnum):
uint_types = {"u8", "u16", "u32", "u64"}

def __init__(self):
super().__init__()

@classmethod
def is_uint(cls, type_name: str):
if (type_name in cls.uint_types):
return True
return False

0 comments on commit 9b00db0

Please sign in to comment.