forked from Devamitta/exporter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelpers.py
214 lines (185 loc) · 7.66 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import os
import sys
import subprocess
import re
from pathlib import Path
from typing import TypedDict
from datetime import date
from datetime import datetime
from timeis import timeis, green, red, line
from superscripter import superscripter
from dotenv import load_dotenv
import pandas as pd
from pandas.core.frame import DataFrame
load_dotenv()
class DataFrames(TypedDict):
words_df: DataFrame
roots_df: DataFrame
abbrev_df: DataFrame
help_df: DataFrame
class ResourcePaths(TypedDict):
output_dir: Path
output_html_dir: Path
output_root_html_dir: Path
output_help_html_dir: Path
output_share_dir: Path
error_log_dir: Path
compound_families_dir: Path
frequency_dir: Path
root_families_dir: Path
word_families_dir: Path
inflections_dir: Path
sets_dir: Path
words_path: Path
roots_path: Path
abbrev_path: Path
help_path: Path
dpd_words_css_path: Path
dpd_roots_css_path: Path
dpd_help_css_path: Path
epd_css_path: Path
sandhi_css_path: Path
tpp_css_path: Path
buttons_js_path: Path
gd_json_path: Path
icon_path: Path
icon_bmp_path: Path
output_stardict_zip_path: Path
output_stardict_light_zip_path: Path
all_inflections_dict_path: Path
def parse_data_frames(rsc: ResourcePaths) -> DataFrames:
"""Parse csv files into pandas data frames"""
words_df = pd.read_csv(rsc['words_path'], sep = "\t", dtype=str)
words_df = words_df.fillna("")
roots_df = pd.read_csv(rsc['roots_path'], sep="\t", dtype=str)
roots_df.fillna("", inplace=True)
roots_df = roots_df[roots_df["Fin"] != ""] # remove extra iines
abbrev_df = pd.read_csv(rsc['abbrev_path'], sep="\t", dtype=str)
abbrev_df.fillna("", inplace=True)
help_df = pd.read_csv(rsc['help_path'], sep="\t", dtype=str)
help_df.fillna("", inplace=True)
return DataFrames(
words_df = words_df,
roots_df = roots_df,
abbrev_df = abbrev_df,
help_df = help_df
)
def get_resource_paths() -> ResourcePaths:
s = os.getenv('DPD_DIR')
if s is None:
print(f"{timeis()} {red}ERROR! DPD_DIR is not set.")
sys.exit(2)
else:
dpd_dir = Path(s)
rsc = ResourcePaths(
# Project output
output_dir = Path("./output/"),
output_html_dir = Path("./output/html/"),
output_root_html_dir = Path("./output/root html/"),
output_help_html_dir = Path("./output/help html/"),
output_share_dir = Path("./share/"),
gd_json_path = Path("./output/gd.json"),
output_stardict_zip_path = Path("dpd.zip"),
output_stardict_light_zip_path=Path("dpd light.zip"),
error_log_dir = Path("./errorlogs/"),
# Project assets
dpd_words_css_path = Path("./assets/dpd-words.css"),
dpd_roots_css_path = Path("./assets/dpd-roots.css"),
dpd_help_css_path = Path("./assets/dpd-help.css"),
epd_css_path = Path("./assets/epd.css"),
sandhi_css_path=Path("./assets/sandhi.css"),
tpp_css_path = Path("./assets/tpp.css"),
buttons_js_path = Path("./assets/buttons.js"),
abbrev_path = Path("./assets/abbreviations.tsv"),
help_path = Path("./assets/help.csv"),
# Project input
compound_families_dir = dpd_dir.joinpath("compound families generator/"),
frequency_dir = dpd_dir.joinpath("frequency maps/"),
root_families_dir = dpd_dir.joinpath("root families generator/"),
word_families_dir=dpd_dir.joinpath("word families/"),
inflections_dir = dpd_dir.joinpath("inflection generator/"),
sets_dir = dpd_dir.joinpath("sets/"),
words_path = dpd_dir.joinpath("csvs/dpd-full.csv"),
roots_path = dpd_dir.joinpath("csvs/roots.csv"),
all_inflections_dict_path = dpd_dir.joinpath("inflection generator/output/all inflections dict"),
icon_path = dpd_dir.joinpath("favicon/favicon_io nu circle/favicon.ico"),
icon_bmp_path = dpd_dir.joinpath("favicon/favicon_io nu circle/dpd.bmp")
)
# ensure write dirs exist
for d in [rsc['output_dir'],
rsc['output_html_dir'],
rsc['output_root_html_dir'],
rsc['output_share_dir'],
rsc['error_log_dir']]:
d.mkdir(parents=True, exist_ok=True)
return rsc
def copy_goldendict(src_path: Path, dest_dir: Path, name):
print(f"{timeis()} {green}copying {name} to share")
today = date.today()
# file name without .zip suffix
dest_base = src_path.name.replace(src_path.suffix, '')
dest_path = dest_dir.joinpath(f"{dest_base}-goldendict.zip")
try:
subprocess.run(
['mv', '--backup=numbered', src_path, dest_path],
check=True)
except Exception as e:
print(f"{timeis()} {red}{e}")
# sys.exit(2)
class DpdWord:
def __init__(self, df: DataFrame, row: int):
self.pali: str = df.loc[row, "Pāli1"]
self.pali_: str = "_" + re.sub(" ", "_", self.pali)
self.pali_super = superscripter(self.pali)
self.pali2: str = df.loc[row, "Pāli2"]
self.pali_clean: str = re.sub(" \\d.*$", "", self.pali)
self.pos: str = df.loc[row, "POS"]
self.grammar: str = df.loc[row, "Grammar"]
self.neg: str = df.loc[row, "Neg"]
self.verb: str = df.loc[row, "Verb"]
self.trans: str = df.loc[row, "Trans"]
self.case: str = df.loc[row, "Case"]
self.meaning: str = df.loc[row, "Meaning IN CONTEXT"]
self.lit: str = df.loc[row, "Literal Meaning"]
self.buddhadatta: str = df.loc[row, "Buddhadatta"]
self.non_ia: str = df.loc[row, "Non IA"]
self.sk: str = df.loc[row, "Sanskrit"]
self.sk_root: str = df.loc[row, "Sk Root"]
self.sk_root_mn: str = df.loc[row, "Sk Root Mn"]
self.sk_root_cl: str = df.loc[row, "Cl"]
self.root: str = df.loc[row, "Pāli Root"]
self.root_clean: str = re.sub(" \\d*$", "", self.root)
self.root_in_comps: str = df.loc[row, "Root In Comps"]
self.root_verb: str = df.loc[row, "V"]
self.root_grp: str = df.loc[row, "Grp"]
self.root_sign: str = df.loc[row, "Sgn"]
self.root_meaning: str = df.loc[row, "Root Meaning"]
self.base: str = df.loc[row, "Base"]
self.family: str = df.loc[row, "Family"]
self.word_family = df.loc[row, "Word Family"]
self.family2: str = df.loc[row, "Family2"]
self.construction: str = df.loc[row, "Construction"]
self.derivative: str = df.loc[row, "Derivative"]
self.suffix: str = df.loc[row, "Suffix"]
self.pc: str = df.loc[row, "Phonetic Changes"]
self.comp: str = df.loc[row, "Compound"]
self.comp_constr: str = df.loc[row, "Compound Construction"]
self.source1: str = df.loc[row, "Source1"]
self.sutta1: str = df.loc[row, "Sutta1"]
self.eg1: str = df.loc[row, "Example1"]
# self.eg1: str = re.sub(r"'", "", self.eg1)
self.source2: str = df.loc[row, "Source 2"]
self.sutta2: str = df.loc[row, "Sutta2"]
self.eg2: str = df.loc[row, "Example 2"]
# self.eg2: str = re.sub(r"'", "", self.eg2)
self.ant: str = df.loc[row, "Antonyms"]
self.syn: str = df.loc[row, "Synonyms – different word"]
self.var: str = df.loc[row, "Variant – same constr or diff reading"]
self.comm: str = df.loc[row, "Commentary"]
# self.comm: str = re.sub(r"'", "", self.comm)
self.comm: str = re.sub(r"(.+)\.$", "\\1", self.comm)
self.notes: str = df.loc[row, "Notes"]
self.cognate: str = df.loc[row, "Cognate"]
self.sets: str = df.loc[row, "Category"]
self.stem: str = df.loc[row, "Stem"]
self.link: str = df.loc[row, "Link"]