Skip to content

Commit 58cb267

Browse files
authored
Merge pull request #37 from Laserlicht/master
more robust json parsing
2 parents d26ebdf + 6efd975 commit 58cb267

5 files changed

Lines changed: 42 additions & 10 deletions

File tree

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ mkdocs-material==9.5.21
44
mkdocs-glightbox==0.3.7 # newer versions causes problem (lightbox works only for cached images)
55
mkdocs-rss-plugin==1.12.2
66
mdutils==1.6.0
7+
json_repair==0.20.1
78
json5==0.9.25
89
pillow==10.3.0
910
numpy==1.26.4

src/build.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
import json5
21
import urllib.request
32
import shutil
43
from mdutils.mdutils import MdUtils
54
import logging
65
import sys
76

7+
from helper import load_vcmi_json
8+
89
from build_extract_main_repo import build_extract_main_repo
910
from build_mod_overview import build_mod_overview
1011
from build_mod_page import build_mod_page
@@ -20,13 +21,13 @@
2021
shutil.rmtree("docs", ignore_errors=True)
2122

2223
vcmi_url = "https://github.com/vcmi/vcmi/archive/refs/heads/develop.zip"
23-
settings_schema = json5.loads(urllib.request.urlopen("https://raw.githubusercontent.com/vcmi/vcmi/develop/config/schemas/settings.json").read())
24+
settings_schema = load_vcmi_json(urllib.request.urlopen("https://raw.githubusercontent.com/vcmi/vcmi/develop/config/schemas/settings.json").read().decode())
2425
vcmi_mod_url = settings_schema["properties"]["launcher"]["properties"]["defaultRepositoryURL"]["default"]
2526
#vcmi_mod_url = "https://pastebin.com/raw/MUYS7dbJ" #test
2627

2728
log.info('Download mod repo')
2829
repo = urllib.request.urlopen(vcmi_url).read()
29-
repo_mod = json5.loads(urllib.request.urlopen(vcmi_mod_url).read())
30+
repo_mod = load_vcmi_json(urllib.request.urlopen(vcmi_mod_url).read().decode())
3031

3132
log.info('Create main page')
3233
build_extract_main_repo(repo)

src/build_mod_overview.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
import os
2-
import json5
32
import urllib.request
43
from mdutils.mdutils import MdUtils
54
from mdutils.tools import Html
65
from mdutils.tools.Link import Inline
76

7+
from helper import load_vcmi_json
8+
89
def build_mod_overview(mod_repo, cb):
910
os.makedirs("docs/Mod Repository", exist_ok=True)
1011
mdModOverview = MdUtils(file_name='docs/Mod Repository/Overview', title='Overview')
1112
mdModOverviewTable = ["Mod", "Type", "Description", "Version", "Translations"]
1213
for key, value in mod_repo.items():
13-
mod = json5.loads(urllib.request.urlopen(value["mod"].replace(" ", "%20")).read())
14+
mod = load_vcmi_json(urllib.request.urlopen(value["mod"].replace(" ", "%20")).read().decode())
1415

1516
translations = [k for k, v in mod.items() if isinstance(v, dict) and "translations" in v]
1617

src/helper.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,30 @@
1+
import re
2+
import json_repair
3+
import json5
4+
15
def get_value_if_exists(elem, val):
2-
return elem[val] if val in elem else ""
6+
return elem[val] if val in elem else ""
7+
8+
# https://stackoverflow.com/a/18381470 (Onur Yıldırım, CC BY-SA 4.0)
9+
def remove_comments(string):
10+
pattern = r"(\".*?\"|\'.*?\')|(/\*.*?\*/|//[^\r\n]*$)"
11+
# first group captures quoted strings (double or single)
12+
# second group captures comments (//single-line or /* multi-line */)
13+
regex = re.compile(pattern, re.MULTILINE|re.DOTALL)
14+
def _replacer(match):
15+
# if the 2nd group (capturing comments) is not None,
16+
# it means we have captured a non-quoted (real) comment string.
17+
if match.group(2) is not None:
18+
return "" # so we will return empty to remove the comment
19+
else: # otherwise, we will return the 1st group
20+
return match.group(1) # captured quoted-string
21+
return regex.sub(_replacer, string)
22+
23+
def load_vcmi_json(string):
24+
try:
25+
obj = json5.loads(string)
26+
except:
27+
tmp = remove_comments(string)
28+
obj = json_repair.loads(tmp)
29+
30+
return obj

src/parse_mod.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import collections.abc
22
import io
33
import os
4-
import json5
54
import tempfile
65
import zipfile
76
import urllib.request
@@ -12,6 +11,8 @@
1211
import logging
1312
log = logging.getLogger('LOGGER_NAME')
1413

14+
from helper import load_vcmi_json
15+
1516
from defextract import extract_def
1617

1718
def nested_update(d, u):
@@ -53,7 +54,7 @@ def __parse_subconfig(self, data, dir):
5354
for file in files:
5455
if fullpath.lower() == os.path.join(subdir, file).lower():
5556
log.info('open json: ' + os.path.join(subdir, file))
56-
tmp2 = nested_update(tmp2, json5.load(open(os.path.join(subdir, file))))
57+
tmp2 = nested_update(tmp2, load_vcmi_json(open(os.path.join(subdir, file)).read()))
5758
if len(tmp2) > 0:
5859
tmp[key.lower()] = tmp2
5960
return tmp
@@ -64,7 +65,7 @@ def get_mods(self):
6465
for file in files:
6566
if file.lower() == "mod.json":
6667
log.info('open json: ' + os.path.join(subdir, file))
67-
data = json5.load(open(os.path.join(subdir, file)))
68+
data = load_vcmi_json(open(os.path.join(subdir, file)).read())
6869
mods.append(
6970
{
7071
"pyhsicaldir": subdir,
@@ -119,7 +120,7 @@ def get_animations(self, mod, path):
119120
if file.lower().endswith(".json") or file.lower().endswith(".def"):
120121
if file.lower().endswith(".json"):
121122
log.info('open json: ' + os.path.join(subdir, file))
122-
tmp = json5.load(open(os.path.join(subdir, file)))
123+
tmp = load_vcmi_json(open(os.path.join(subdir, file)).read())
123124
for i, sequence in enumerate(tmp["sequences"]):
124125
for j, frame in enumerate(sequence["frames"]):
125126
path_img = tmp["basepath"] + frame

0 commit comments

Comments
 (0)