Skip to content

Commit

Permalink
script tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
bnnm committed Feb 24, 2024
1 parent 246b3ef commit 3f5b3ed
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 33 deletions.
5 changes: 5 additions & 0 deletions scripts/txt-cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
_ENDS_WITH = ['bc']
DONE = set()
split = False
remove_numbers = False

def get_match_max(line, regex):
count = 0
Expand Down Expand Up @@ -85,9 +86,13 @@ def read_line(line, outfile_ok, outfile_ko, outfile_dp):
for item in items:
if item in DONE:
continue
if remove_numbers and item.isnumeric():
continue
DONE.add(item)
outfile_ok.write(item + '\n')
else:
if remove_numbers and line.isnumeric():
return
outfile_ok.write(line + '\n')
else:
outfile_ko.write(line + '\n')
Expand Down
42 changes: 30 additions & 12 deletions sstr/sstr.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
Expand All @@ -12,6 +13,7 @@
// Some games have strings like (size)(id)(string), or (size)(string).
// strings2.exe trips on those and may create things like "b(string)A",
// while this program should handle them fine (may still output some false positives though)
// Mainly for names found in stuff like the Decima engine.

// todo config bufsize
// todo BE mode
Expand All @@ -31,6 +33,7 @@ typedef struct {
uint32_t buf_size;
const char* targets[MAX_TARGETS];
uint32_t targets_count;
bool limited;
} sstr_config;

//*************************************************************************
Expand All @@ -44,12 +47,24 @@ static uint32_t get_u32le(const uint8_t *p) {
return ret;
}

static int is_ascii_str(const uint8_t* buf, int str_len) {
for (int i = 0; i < str_len - 1; i++) {
uint8_t curr = buf[i];
if (curr < 0x20 || curr >= 0x7F) // useful only ASCII
return 0;
static int is_ascii_str(const uint8_t* buf, int str_len, bool limited) {
if (limited) {
// decima hashes only
for (int i = 0; i < str_len - 1; i++) {
uint8_t curr = buf[i];
if (curr < 0x2d && curr != 0x20 || curr > 0x7a || curr >= 0x3b && curr <= 0x40 || curr >= 0x5b && curr <= 0x5e)
return 0;
}
}
else {
// useful only ASCII
for (int i = 0; i < str_len - 1; i++) {
uint8_t curr = buf[i];
if (curr < 0x20 || curr >= 0x7F)
return 0;
}
}

// last char can be a null
uint8_t last = buf[str_len-1];
if (last != 0 && last < 0x20 || last >= 0x7F)
Expand All @@ -58,25 +73,25 @@ static int is_ascii_str(const uint8_t* buf, int str_len) {
return 1;
}

static int test_str(const uint8_t* buf, int str_len) {
if (is_ascii_str(buf, str_len)) {
static int test_str(const uint8_t* buf, int str_len, bool limited) {
if (is_ascii_str(buf, str_len, limited)) {
printf("%.*s\n", str_len, buf);
return 1;
}
return 0;
}

static void find_string(const uint8_t* buf, uint32_t buf_size) {
static void find_string(const uint8_t* buf, uint32_t buf_size, bool limited) {
uint32_t pos = 0;

// test (len)(str) and (len)(id)(str)
while (pos < buf_size) {
uint32_t str_len = get_u32le(buf + pos + 0x00);
if (str_len > MIN_STR && str_len < MAX_STR) {
// both are possible at the same time in some cases
int test1 = test_str(buf + pos + 0x04, str_len);
int test2 = test_str(buf + pos + 0x08, str_len);
int test1 = test_str(buf + pos + 0x04, str_len, limited);
int test2 = test_str(buf + pos + 0x08, str_len, limited);

if (test2) {
pos += 0x08 + str_len;
}
Expand Down Expand Up @@ -130,6 +145,9 @@ static int parse_cfg(sstr_config* cfg, int argc, const char* argv[]) {
case 'h':
print_usage(argv[0]);
return 0;
case 'l':
cfg->limited = true;
break;
default:
CHECK_EXIT(1, "ERROR: unknown parameter '%s'\n", argv[i]);
break;
Expand Down Expand Up @@ -199,7 +217,7 @@ int main(int argc, const char* argv[]) {
if (!bytes)
break;

find_string(buf, BUF_HEAD + bytes);
find_string(buf, BUF_HEAD + bytes, cfg.limited);

// copy last bytes as next head (shouldn't overlap)
memcpy(buf, buf + BUF_HEAD + bytes - BUF_HEAD, BUF_HEAD);
Expand Down
Binary file modified sstr/sstr.exe
Binary file not shown.
66 changes: 45 additions & 21 deletions wwnames/_wwnames-fixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,18 @@
# - should have lines like "# 3933301714"
# - add words.py reversed names, format "3933301714: banana"
# - run this tool (drag and drop)
# - this will replace "# 3933301714" by "banana"
# - this will replace "# 3933301714" with "banana"
# - if list has "### (name)" sections, sections are sorted too
# - output is "(name)-clean.txt"
# - output is "(name)-clean.txt", except if (name) is wwname-*.txt in which case will be replaced
# - if some name is wrong add "#ko" at the end ("ZZSXSBanana #ko")
# - use this script and wrong name will be converted back to "# (hash number)"

import sys, re

FULL_CLEAN = True
CLEAN_ORDER = True
UPDATE_ORIGINAL = True
FNV_FORMAT = re.compile(r"^[A-Za-z_][A-Za-z0-9\_]*$")
#HDR_FORMAT = re.compile(r"^###+*\([^\t]+\).+[\t ]*([^\t]*)[\t ]*([^\t]*)")
HDR_FORMAT1 = re.compile(r"^###.+\(langs/(.+)\.bnk\)")
HDR_FORMAT2 = re.compile(r"^###.+\((.+)\.bnk\)")

Expand Down Expand Up @@ -131,7 +132,10 @@ def fix_wwnames(inname):
if items:
# register solved ids and ignore line
sid, hashname = items
hashed[sid] = hashname
if sid not in hashed:
hashed[sid] = []
if hashname not in hashed[sid]:
hashed[sid].append(hashname)
else:
# register base lines as-is, except when fixing headers
if line.startswith('### '):
Expand All @@ -145,9 +149,9 @@ def fix_wwnames(inname):

if bankname.isdigit():
sid = int(bankname)
hashname = hashed.get(sid)
if hashname:
line = line.replace('.bnk', '.bnk: %s' % hashname)
hashnames = hashed.get(sid)
if hashnames:
line = line.replace('.bnk', '.bnk: %s' % hashnames[0])

# use case as found in first line
# (so if BLAH is used in several points and changed once to Blah, other points use that too)
Expand All @@ -160,47 +164,67 @@ def fix_wwnames(inname):
if not line.startswith('#'):
hashname = line.split('#')[0]
sid = get_fnv(hashname)
hashed[sid] = hashname

if sid not in hashed:
hashed[sid] = []
if hashname not in hashed[sid]:
hashed[sid].append(hashname)


section = False
clines = []
for bline in blines:
if bline in koed:
if bline.startswith('### '):
section = True

if bline.lower() in koed:
sid = get_fnv(bline)
bline = "# %s" % (sid)

if bline.startswith('#ko') and ':' in bline and FULL_CLEAN:
if bline and bline.startswith('#ko') and ':' in bline and FULL_CLEAN:
_, hashname = bline.split(':')
hashname = hashname.strip()
sid = get_fnv(hashname)
bline = "# %s" % (sid)
koed.add(hashname)
koed.add(hashname.lower())
if section: # '#ko' on top get ignored
bline = "# %s" % (sid)
else:
continue

if bline.endswith('#ko') and FULL_CLEAN:
if bline.startswith('# '):
fnv = bline.split(' ')[1]
koed.add(fnv.strip())
continue
elif bline.startswith('#'):
pass
else:
hashname, _ = bline.split('#ko')
hashname, _ = bline.split('#ko', 1)
hashname = hashname.strip()
sid = get_fnv(hashname)
bline = "# %s" % (sid)
koed.add(hashname)
koed.add(hashname.lower())
if section: # '#ko' on top get ignored
bline = "# %s" % (sid)
else:
continue


if bline.startswith('# ') and ':' not in bline:
sid = bline[2:].strip()
if sid in hashed:
hashname = hashed[sid]
if FULL_CLEAN:
bline = "%s" % (hashname)
else:
bline = "%s: %s" % (sid, hashname)
hashnames = hashed[sid]
for i, hashname in enumerate(hashnames):
if FULL_CLEAN:
bline = "%s" % (hashname)
else:
bline = "%s: %s" % (sid, hashname)
if i > 0:
bline += ' #alt'
clines.append(bline)
continue

clines.append(bline)


clines = order_list(clines)
clines = clean_lines(clines)
outname = inname
Expand Down

0 comments on commit 3f5b3ed

Please sign in to comment.