-
Notifications
You must be signed in to change notification settings - Fork 13.2k
/
Copy pathsolve_dfa.py
executable file
·78 lines (71 loc) · 3.19 KB
/
solve_dfa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
# Use z3 to solve UTF-8 validation DFA for offset and transition table,
# in order to encode transition table into u32.
# We minimize the output variables in the solution to make it deterministic.
# Ref: <https://gist.github.com/dougallj/166e326de6ad4cf2c94be97a204c025f>
# See more detail explanation in `./validations.rs`.
#
# It is expected to find a solution in <30s on a modern machine, and the
# solution is appended to the end of this file.
from z3 import *
STATE_CNT = 9
# The transition table.
# A value X on column Y means state Y should transition to state X on some
# input bytes. We assign state 0 as ERROR and state 1 as ACCEPT (initial).
# Eg. first line: for input byte 00..=7F, transition S1 -> S1, others -> S0.
TRANSITIONS = [
# 0 1 2 3 4 5 6 7 8
# First bytes
((0, 1, 0, 0, 0, 0, 0, 0, 0), "00-7F"),
((0, 2, 0, 0, 0, 0, 0, 0, 0), "C2-DF"),
((0, 3, 0, 0, 0, 0, 0, 0, 0), "E0"),
((0, 4, 0, 0, 0, 0, 0, 0, 0), "E1-EC, EE-EF"),
((0, 5, 0, 0, 0, 0, 0, 0, 0), "ED"),
((0, 6, 0, 0, 0, 0, 0, 0, 0), "F0"),
((0, 7, 0, 0, 0, 0, 0, 0, 0), "F1-F3"),
((0, 8, 0, 0, 0, 0, 0, 0, 0), "F4"),
# Continuation bytes
((0, 0, 1, 0, 2, 2, 0, 4, 4), "80-8F"),
((0, 0, 1, 0, 2, 2, 4, 4, 0), "90-9F"),
((0, 0, 1, 2, 2, 0, 4, 4, 0), "A0-BF"),
# Illegal
((0, 0, 0, 0, 0, 0, 0, 0, 0), "C0-C1, F5-FF"),
]
o = Optimize()
offsets = [BitVec(f"o{i}", 32) for i in range(STATE_CNT)]
trans_table = [BitVec(f"t{i}", 32) for i in range(len(TRANSITIONS))]
# Add some guiding constraints to make solving faster.
o.add(offsets[0] == 0)
o.add(trans_table[-1] == 0)
for i in range(len(offsets)):
# Do not over-shift. It's not necessary but makes solving faster.
o.add(offsets[i] < 32 - 5)
for j in range(i):
o.add(offsets[i] != offsets[j])
for trans, (targets, _) in zip(trans_table, TRANSITIONS):
for src, tgt in enumerate(targets):
o.add((LShR(trans, offsets[src]) & 31) == offsets[tgt])
# Minimize ordered outputs to get a unique solution.
goal = Concat(*offsets, *trans_table)
o.minimize(goal)
print(o.check())
print("Offset[]= ", [o.model()[i].as_long() for i in offsets])
print("Transitions:")
for (_, label), v in zip(TRANSITIONS, [o.model()[i].as_long() for i in trans_table]):
print(f"{label:14} => {v:#10x}, // {v:032b}")
# Output should be deterministic:
# sat
# Offset[]= [0, 6, 16, 19, 1, 25, 11, 18, 24]
# Transitions:
# 00-7F => 0x180, // 00000000000000000000000110000000
# C2-DF => 0x400, // 00000000000000000000010000000000
# E0 => 0x4c0, // 00000000000000000000010011000000
# E1-EC, EE-EF => 0x40, // 00000000000000000000000001000000
# ED => 0x640, // 00000000000000000000011001000000
# F0 => 0x2c0, // 00000000000000000000001011000000
# F1-F3 => 0x480, // 00000000000000000000010010000000
# F4 => 0x600, // 00000000000000000000011000000000
# 80-8F => 0x21060020, // 00100001000001100000000000100000
# 90-9F => 0x20060820, // 00100000000001100000100000100000
# A0-BF => 0x860820, // 00000000100001100000100000100000
# C0-C1, F5-FF => 0x0, // 00000000000000000000000000000000