Skip to content

Commit a61a8fc

Browse files
committed
ISA start
1 parent 6b06be7 commit a61a8fc

11 files changed

+357
-35
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ rules_local/
77
*.out
88
*.dis
99
*.log
10+
isa.yaml
1011
tmp/
1112
tmp_extract/
1213
tmp_hap_extract/

examples/dis_demo.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,6 @@
1717
print(f"> {dis_file}")
1818
# print(f"\n> {dis_file.debug_deep()}")
1919
for method in dis_file.methods:
20-
print(f">> {method.debug_short()}")
21-
for asmstr in dis_file.asmstrs:
22-
print(f">> {asmstr}")
20+
print(f">> {method.debug_deep()}")
21+
# for asmstr in dis_file.asmstrs:
22+
# print(f">> {asmstr}")

ohre/abcre/dis/AsmMethod.py

+36-26
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
from typing import Any, Dict, Iterable, List, Tuple
22

33
from ohre.abcre.dis.AsmTypes import AsmTypes
4+
from ohre.misc import utils
45
from ohre.misc import Log
6+
from ohre.abcre.dis.NAC import NAC
7+
from ohre.abcre.dis.NACBlock import NACBlock
8+
from ohre.abcre.dis.NACBlocks import NACBlocks
59

610

711
class AsmMethod:
@@ -14,8 +18,9 @@ def __init__(self, slotNumberIdx, lines: List[str]):
1418
self.class_func_name: str = ""
1519
self.func_type: str = ""
1620
self.args: List = list()
17-
self.insts: List = list()
18-
self._process_method(lines)
21+
self.nac_blocks: NACBlocks | None = None
22+
insts = self._process_method(lines)
23+
self.nac_blocks = NACBlocks(insts)
1924

2025
def _process_1st_line(self, line: str):
2126
parts = line.split(" ")
@@ -46,46 +51,51 @@ def _process_1st_line(self, line: str):
4651
ty, name = arg_pair.strip().split(" ")
4752
self.args.append((ty, name))
4853

49-
def _process_method(self, lines: List[str]):
54+
def _process_method(self, lines: List[str]) -> List[List[str]]:
55+
insts = list()
5056
self._process_1st_line(lines[0].strip())
5157
for line in lines[1:]:
5258
line = line.strip()
5359
if (line.endswith(":")):
54-
if (len(line.split(" ")) == 1):
60+
if (len(line.split(" ")) == 1): # single str in a single line endswith ":", maybe label?
5561
tu = [line]
56-
self.insts.append(tu)
62+
insts.append(tu)
5763
else:
5864
Log.error(f"ERROR: {line} NOT tag?", True)
59-
elif (len(line) == 0):
65+
elif (len(line) == 0): # skip empty line
6066
continue
61-
elif (line == "}"):
62-
return
63-
else:
64-
tu = list(line.split(" "))
65-
for i in range(len(tu)):
66-
if (tu[i].endswith(",")):
67-
tu[i] = tu[i][:-1]
68-
self.insts.append(tu)
67+
elif (line == "}"): # process END
68+
return insts
69+
else: # common situation
70+
tu = self._process_common_inst(line)
71+
insts.append(tu)
72+
return insts
73+
74+
def _process_common_inst(self, line: str) -> List[str]:
75+
line = line.strip()
76+
idx = line.find(" ")
77+
if (idx < 0):
78+
ret = [line[:]]
79+
return ret
80+
ret = [line[:idx]] # opcode
81+
idx += 1
82+
while (idx < len(line)):
83+
start_idx = idx
84+
idx = utils.find_next_delimiter(line, start_idx)
85+
ret.append(line[start_idx: idx].strip())
86+
idx = idx + 1
87+
print(f"final ret({len(ret)}) {ret}")
88+
return ret
6989

7090
def __str__(self):
7191
return self.debug_short()
7292

7393
def debug_short(self) -> str:
7494
out = f"AsmMethod: {self.slotNumberIdx} {self.func_type} {self.class_func_name} file: {self.file_name}\n\
75-
args({len(self.args)}) {self.args} insts({len(self.insts)})"
95+
args({len(self.args)}) {self.args} nac_blocks({self.nac_blocks.len})"
7696
return out
7797

7898
def debug_deep(self) -> str:
79-
out_insts = ""
80-
for line_num in range(len(self.insts)):
81-
inst = self.insts[line_num]
82-
out = f"{line_num}\t{inst[0]} "
83-
for i in range(1, len(inst)):
84-
if (i != len(inst) - 1):
85-
out += f"{inst[i]}, "
86-
else:
87-
out += f"{inst[i]}"
88-
out_insts += f"{out}\n"
8999
out = f"AsmMethod: {self.slotNumberIdx} {self.func_type} {self.class_func_name} file: {self.file_name}\n\
90-
args({len(self.args)}) {self.args} insts({len(self.insts)})\n{out_insts}"
100+
args({len(self.args)}) {self.args} nac_blocks({self.nac_blocks.len})\n{self.nac_blocks.debug_deep()}"
91101
return out

ohre/abcre/dis/AsmString.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@ def __init__(self, line: str):
99
idx = line.find(", ")
1010
assert idx > 2 and idx < len(line) - 2
1111
self.offset = int(line[:idx].split(":")[1], 16)
12-
self.name_value = line[idx + 2:].split(":")[1]
12+
remain_line = line[idx + 2:]
13+
idx2 = remain_line.find(":")
14+
self.name_value = remain_line[idx2 + 1:]
1315

1416
def __str__(self):
1517
return self.debug_deep()
1618

1719
def debug_deep(self):
18-
out = f"AsmString {hex(self.offset)} {self.name_value}"
20+
out = f"AsmString({hex(self.offset)}) {len(self.name_value)} {self.name_value}"
1921
return out

ohre/abcre/dis/ISA_reader.py

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import json
2+
import os
3+
from typing import Any, Dict, Iterable, List, Tuple
4+
5+
import yaml
6+
7+
import ohre
8+
from ohre.misc import Log, utils
9+
10+
11+
class ISA:
12+
def __init__(self, isa_file_path: str):
13+
self.ori_d: Dict = utils.read_dict_from_yaml_file(isa_file_path)
14+
assert self.ori_d is not None
15+
16+
self.prefixes: Dict | None = None
17+
self.prefixes = self._get_prefixes_dict()
18+
assert self.prefixes is not None
19+
Log.info(f"[ISA] self.prefixes {len(self.prefixes)} {self.prefixes}")
20+
21+
self.opstr2infod: Dict[str, Dict] | None = None
22+
self.opstr2infod = self._get_opstr_dict()
23+
assert self.opstr2infod is not None
24+
Log.info(f"[ISA] self.opstr2infod {len(self.opstr2infod)} keys: {self.opstr2infod.keys()}")
25+
26+
def _get_prefixes_dict(self) -> Dict:
27+
if (self.prefixes is not None):
28+
return self.prefixes
29+
ret = {}
30+
for sub_d in self.ori_d["prefixes"]:
31+
ret[sub_d["name"]] = {"description": sub_d["description"], "opcode_idx": sub_d["opcode_idx"]}
32+
return ret
33+
34+
def _get_prefix_opcode(self, prefix: str) -> int:
35+
if (prefix in self.prefixes.keys()):
36+
return self.prefixes[prefix]["opcode_idx"]
37+
return -1
38+
39+
def _get_opstr_dict(self) -> Dict[str, Dict]:
40+
ret = dict()
41+
for group in self.ori_d["groups"]:
42+
title = group["title"] if "title" in group.keys() else None
43+
assert len(title) > 0 and isinstance(title, str)
44+
description: str = group["description"].strip() if "description" in group.keys() else None
45+
verification: List | None = group["verification"] if "verification" in group.keys() else None
46+
exceptions: List | None = group["exceptions"] if "exceptions" in group.keys() else None
47+
properties: List | None = group["properties"] if "properties" in group.keys() else None
48+
namespace: str = group["namespace"].strip() if "namespace" in group.keys() else None
49+
pseudo: str = group["pseudo"].strip() if "pseudo" in group.keys() else None
50+
semantics: str = group["semantics"].strip() if "semantics" in group.keys() else None
51+
52+
assert "instructions" in group.keys()
53+
for ins in group["instructions"]:
54+
assert "sig" in ins.keys() and "opcode_idx" in ins.keys()
55+
opstr = ins["sig"].split(" ")[0].strip()
56+
opcode_idx = ins["opcode_idx"]
57+
58+
acc = ins["acc"] if "acc" in ins.keys() else None
59+
format = ins["format"] if "format" in ins.keys() else None
60+
prefix = ins["prefix"] if "prefix" in ins.keys() else None
61+
62+
if (prefix is not None): # final_opcode = prefix_opcode|op_code # concat, not 'or'
63+
prefix_opcode = self._get_prefix_opcode(prefix)
64+
assert prefix_opcode != -1
65+
opcode_idx = [(prefix_opcode << 8) + op_code for op_code in opcode_idx]
66+
67+
ret[opstr] = {
68+
"sig": ins["sig"],
69+
"acc": acc, "opcode_idx": opcode_idx, "prefix": prefix, "format": format, "title": title,
70+
"description": description, "verification": verification, "exceptions": exceptions,
71+
"properties": properties, "namespace": namespace, "pseudo": pseudo, "semantics": semantics}
72+
return ret
73+
74+
def get_opcodes(self, opstr: str) -> List | None:
75+
opcode_info_d = self.get_opcode_info_dict(opstr)
76+
if (opcode_info_d is None):
77+
return None
78+
else:
79+
if ("opcode_idx" in opcode_info_d.keys()):
80+
return opcode_info_d["opcode_idx"]
81+
else:
82+
Log.warn(f"[ISA] opstr {opstr}, opcode_idx not in {opcode_info_d.keys()}")
83+
return None
84+
85+
def get_opcode_info_dict(self, opstr: str) -> Dict | None:
86+
if opstr in self.opstr2infod.keys():
87+
return self.opstr2infod[opstr]
88+
else:
89+
Log.warn(f"[ISA] opstr NOT hit directly, opstr {opstr}, remove prefix and match again", True)
90+
for key_opstr in self.opstr2infod.keys():
91+
opstr_rhs = key_opstr
92+
tmp = opstr_rhs.split(".")
93+
if (len(tmp) > 1 and opstr == tmp[1]):
94+
Log.warn(f"[ISA] opstr change: {opstr} -> {key_opstr}", True)
95+
return self.opstr2infod[key_opstr]
96+
return None
97+
98+
99+
if __name__ == "__main__":
100+
ohre.set_log_print(True)
101+
d = utils.read_dict_from_yaml_file(os.path.join(os.path.dirname(os.path.abspath(__file__)), "isa.yaml"))
102+
isa = ISA(os.path.join(os.path.dirname(os.path.abspath(__file__)), "isa.yaml"))
103+
# print(json.dumps(isa.ori_d["groups"], indent=4))
104+
assert isa.get_opcodes("deprecated.getiteratornext") == [0xfc02]
105+
assert isa.get_opcodes("callruntime.notifyconcurrentresult") == [0xfb00]
106+
for ins_str in ["mov", "callruntime.definefieldbyindex", "isin"]:
107+
print(f"{ins_str}: {utils.hexstr(isa.get_opcodes(ins_str))} {isa.get_opcode_info_dict(ins_str)}")
108+
title_set = set()
109+
for opstr in isa.opstr2infod.keys():
110+
title_set.add(isa.opstr2infod[opstr]["title"])
111+
print(f"{len(title_set)} {title_set}")

ohre/abcre/dis/NAC.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from typing import Any, Dict, Iterable, List, Tuple
2+
from ohre.abcre.dis.NACTYPE import NACTYPE
3+
4+
5+
class NAC(): # N Address Code
6+
# Native representation of ark_disasm-ed ArkTS bytecode
7+
# corresponding to a single line in a panda function
8+
9+
def __init__(self, op_args: List[str]):
10+
assert len(op_args) > 0
11+
self.op = op_args[0]
12+
self.type = NACTYPE.get_NAC_type(self.op)
13+
self.args = list()
14+
for i in range(1, len(op_args)):
15+
self.args.append(op_args[i])
16+
17+
def __str__(self):
18+
return self.debug_short()
19+
20+
def _is_std_nac(self):
21+
std_nac_set = {NACTYPE.ASSIGN, NACTYPE.COND_JMP, NACTYPE.UNCN_JMP,
22+
NACTYPE.CALL, NACTYPE.COND_THROW, NACTYPE.UNCN_THROW, NACTYPE.RETURN}
23+
if (self.type in std_nac_set):
24+
return True
25+
return False
26+
27+
def debug_short(self):
28+
out = f"{self.op} "
29+
for i in range(len(self.args)):
30+
if (i == len(self.args) - 1):
31+
out += f"{self.args[i]}"
32+
else:
33+
out += f"{self.args[i]}, "
34+
return out
35+
36+
def debug_deep(self):
37+
out = f"({NACTYPE.get_code_name(self.type)}) {self.op} "
38+
for i in range(len(self.args)):
39+
if (i == len(self.args) - 1):
40+
out += f"{self.args[i]}"
41+
else:
42+
out += f"{self.args[i]}, "
43+
return out

ohre/abcre/dis/NACBlock.py

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from typing import Any, Dict, Iterable, List, Tuple
2+
from ohre.abcre.dis.NAC import NAC
3+
from ohre.abcre.dis.NACTYPE import NACTYPE
4+
import copy
5+
6+
7+
class NACBLOCK_LV:
8+
NATIVE = 0
9+
LEVEL1 = 1
10+
LEVEL2 = 2
11+
12+
13+
class NACBlock():
14+
def __init__(self, insts: List[List[str]], level=NACBLOCK_LV.NATIVE):
15+
assert len(insts) > 0
16+
self.nacs: List[NAC] = list()
17+
self.level = level
18+
for inst in insts:
19+
assert len(inst) > 0
20+
self.nacs.append(NAC(inst))
21+
22+
def __str__(self):
23+
return self.debug_short()
24+
25+
def debug_short(self):
26+
out = f"NACBlock: nacs {len(self.nacs)} lv {self.level}"
27+
return out
28+
29+
def debug_deep(self):
30+
out = f"NACBlock: nacs {len(self.nacs)} lv {self.level}\n"
31+
for i in range(len(self.nacs)):
32+
out += f"{i}\t{self.nacs[i].debug_deep()}\n"
33+
return out

ohre/abcre/dis/NACBlocks.py

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from typing import Any, Dict, Iterable, List, Tuple
2+
from ohre.abcre.dis.NACBlock import NACBlock
3+
from ohre.abcre.dis.NAC import NAC
4+
from ohre.abcre.dis.NACTYPE import NACTYPE
5+
import copy
6+
7+
8+
class NACBlocks():
9+
def __init__(self, insts: List[List[str]]):
10+
self.nac_blocks: List[NACBlock] = [NACBlock(insts)]
11+
12+
def __str__(self):
13+
return self.debug_short()
14+
15+
@property
16+
def len(self):
17+
return len(self.nac_blocks)
18+
19+
def debug_short(self):
20+
out = f"NACBlocks: block len {len(self.nac_blocks)}"
21+
return out
22+
23+
def debug_deep(self):
24+
out = f"{self.debug_short()}\n"
25+
for i in range(len(self.nac_blocks)):
26+
out += f"{i}-block: {self.nac_blocks[i].debug_deep()}\n"
27+
return out

ohre/abcre/dis/NACTYPE.py

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from ohre.abcre.enum.BaseEnum import BaseEnum
2+
3+
4+
class NACTYPE(BaseEnum):
5+
def __init__(self):
6+
super().__init__()
7+
ASSIGN = 0 # at most 3 arg
8+
COND_JMP = 1 # 3 arg
9+
UNCN_JMP = 2 # 1 arg # unconditional
10+
CALL = 3 # 1 or more arg
11+
COND_THROW = 4 # 3 arg
12+
UNCN_THROW = 5 # 1 arg
13+
RETURN = 6 # 1 arg
14+
IMPORT = 11
15+
LABEL = 12
16+
UNKNOWN = 99
17+
18+
@classmethod
19+
def get_NAC_type(cls, op: str) -> int:
20+
return NACTYPE.UNKNOWN

0 commit comments

Comments
 (0)