Skip to content

Commit 3c9bbf6

Browse files
Remove duplicate enums that are defined in one line (#18)
* - Implemented regex pattern for single-line enums * ⚡ Refactor file handling and definition removal * fix black formatter and linting * ⚡ Refactor file processing and definition removal Refactored file processing to remove duplicate class and enum definitions. Added functions for checking and removing classes/enums from files. * Update src/bo4egenerator/duplicates.py Co-authored-by: konstantin <[email protected]> * regex cleaning up --------- Co-authored-by: konstantin <[email protected]>
1 parent 3980ca1 commit 3c9bbf6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+151
-3631
lines changed

Diff for: src/bo4egenerator/duplicates.py

+151-81
Original file line numberDiff line numberDiff line change
@@ -11,144 +11,214 @@
1111

1212
_logger = logging.getLogger(__name__)
1313

14+
single_line_enum_pattern = re.compile(r"\s*public\s+enum\s+\w+\s*{[^}]*}\s*;")
15+
class_pattern = re.compile(r"\s*public\s+partial\s+class\s+(\w+)")
16+
enum_pattern = re.compile(r"\s*public\s+enum\s+(\w+)")
17+
1418

1519
def find_classes_and_enums_in_file(file_path: Path) -> tuple[list[str], list[str]]:
1620
"""
1721
Find all partial class and enum definitions in a given file.
1822
1923
Args:
20-
file_path (str): The path to the file.
24+
file_path (Path): The path to the file.
2125
2226
Returns:
2327
tuple: Two lists containing class names and enum names found in the file.
2428
"""
25-
class_pattern = re.compile(r"\bpublic\s+partial\s+class\s+(\w+)")
26-
enum_pattern = re.compile(r"\bpublic\s+enum\s+(\w+)")
27-
classes: list[str] = []
28-
enums: list[str] = []
2929

30-
with open(file_path, "r", encoding="utf-8") as file:
30+
classes, enums = [], []
31+
with file_path.open("r", encoding="utf-8") as file:
3132
content = file.read()
32-
class_matches = class_pattern.findall(content)
33-
enum_matches = enum_pattern.findall(content)
34-
classes.extend(class_matches)
35-
enums.extend(enum_matches)
33+
classes = class_pattern.findall(content)
34+
enums = enum_pattern.findall(content)
3635

36+
_logger.debug("Classes found: %s", classes)
37+
_logger.debug("Enums found: %s", enums)
3738
return classes, enums
3839

3940

40-
def remove_definitions( # pylint: disable=too-many-locals, too-many-branches
41-
file_path: Path, main_class_name: str, classes_to_remove: list[str], enums_to_remove: list[str]
42-
) -> None:
41+
def read_file(file_path: Path) -> list[str]:
4342
"""
44-
Remove specified class and enum definitions from a file, keeping the main class intact.
45-
46-
Args:
47-
file_path (str): The path to the file.
48-
main_class_name (str): The main class to keep intact.
49-
classes_to_remove (list): List of class names to remove.
50-
enums_to_remove (list): List of enum names to remove.
43+
read the file and return the lines.
5144
"""
5245
try:
53-
with open(file_path, "r", encoding="utf-8") as file:
54-
lines = file.readlines()
55-
except (PermissionError, OSError) as e:
46+
with file_path.open("r", encoding="utf-8") as file:
47+
return file.readlines()
48+
except OSError as e:
5649
_logger.error("Error reading file %s: %s", file_path, e)
57-
return
50+
return []
5851

59-
in_definition = False
52+
53+
def write_file(file_path: Path, lines: list[str]) -> None:
54+
"""
55+
write the lines to the file.
56+
"""
57+
try:
58+
with file_path.open("w", encoding="utf-8") as file:
59+
file.writelines(lines)
60+
except OSError as e:
61+
_logger.error("Error writing to file %s: %s", file_path, e)
62+
63+
64+
def find_comment_block_start(lines: list[str], index: int) -> int:
65+
"""
66+
find the start of the docstring block.
67+
"""
68+
while index > 0 and re.match(r"\s*///", lines[index - 1]):
69+
index -= 1
70+
return index
71+
72+
73+
def parse_definitions(lines: list[str], main_class_name: str) -> dict[str, tuple[int, int]]:
74+
"""
75+
Parse the definitions of the classes and enums in the file.
76+
"""
6077
definitions = {}
78+
in_definition = False
6179
current_definition = None
62-
definition_start_index = 0
6380
comment_block_start_index = None
6481

65-
def find_comment_block_start(index: int) -> int:
66-
"""Find the start index of the comment block preceding the definition."""
67-
while index > 0 and re.match(r"\s*///", lines[index - 1]):
68-
index -= 1
69-
return index
70-
71-
# Read the file line by line and identify class and enum definitions
7282
for index, line in enumerate(lines):
7383
if re.match(r"\s*///", line) and not in_definition:
7484
if comment_block_start_index is None:
7585
comment_block_start_index = index
7686
continue
87+
7788
if comment_block_start_index is not None and not re.match(r"\s*///", line):
7889
comment_block_start_index = None
7990

80-
if re.match(r"\s*public\s+partial\s+class\s+\w+", line):
81-
class_name = re.findall(r"\bpublic\s+partial\s+class\s+(\w+)", line)[0]
91+
if re.match(class_pattern, line):
92+
class_name = re.findall(class_pattern, line)[0]
8293
if class_name == main_class_name:
83-
current_definition = None
8494
in_definition = False
8595
continue
96+
8697
current_definition = class_name
8798
in_definition = True
88-
definition_start_index = find_comment_block_start(index) if comment_block_start_index is not None else index
89-
elif re.match(r"\s*public\s+enum\s+\w+", line):
90-
enum_name = re.findall(r"\bpublic\s+enum\s+(\w+)", line)[0]
99+
definition_start_index = (
100+
find_comment_block_start(lines, index) if comment_block_start_index is not None else index
101+
)
102+
103+
elif re.match(enum_pattern, line):
104+
if single_line_enum_pattern.match(line):
105+
enum_name = re.findall(enum_pattern, line)[0]
106+
definition_start_index = (
107+
find_comment_block_start(lines, index) if comment_block_start_index is not None else index
108+
)
109+
definitions[enum_name] = (definition_start_index, index)
110+
_logger.debug("Single-line enum found: %s at lines %d to %d", enum_name, definition_start_index, index)
111+
continue
112+
113+
enum_name = re.findall(enum_pattern, line)[0]
91114
current_definition = enum_name
92115
in_definition = True
93-
definition_start_index = find_comment_block_start(index) if comment_block_start_index is not None else index
116+
definition_start_index = (
117+
find_comment_block_start(lines, index) if comment_block_start_index is not None else index
118+
)
119+
120+
if in_definition and re.match(r"\s*\}", line):
121+
definitions[current_definition] = (definition_start_index, index)
122+
_logger.debug(
123+
"Multi-line definition found: %s at lines %d to %d", current_definition, definition_start_index, index
124+
)
125+
in_definition = False
126+
127+
return definitions
94128

95-
if in_definition:
96-
if re.match(r"\s*\}", line):
97-
definitions[current_definition] = (definition_start_index, index)
98-
in_definition = False
99-
current_definition = None
100129

101-
# Remove classes and enums to remove from the lines
102-
for name in classes_to_remove + enums_to_remove:
130+
def remove_definitions(
131+
lines: list[str], definitions: dict[str, tuple[int, int]], names_to_remove: list[str]
132+
) -> list[str]:
133+
"""
134+
Remove the definitions of the classes and enums that are not needed.
135+
"""
136+
for name in names_to_remove:
103137
if name in definitions:
104138
start, end = definitions[name]
105-
# Check for leading empty lines and comments
106139
while start > 0 and re.match(r"^\s*$", lines[start - 1]):
107140
start -= 1
108141
while start > 0 and re.match(r"\s*///", lines[start - 1]):
109142
start -= 1
110-
lines[start : end + 1] = []
143+
_logger.debug("Removing definition: %s from lines %d to %d", name, start, end)
144+
del lines[start : end + 1]
145+
return lines
111146

112-
# Write the cleaned content back to the file
113-
try:
114-
with open(file_path, "w", encoding="utf-8") as file:
115-
file.writelines(lines)
116-
except (PermissionError, OSError) as e:
117-
_logger.error("Error writing to file %s: %s", file_path, e)
147+
148+
def remove_duplicate_definitions( # pylint: disable=too-many-locals, too-many-branches, too-many-statements
149+
file_path: Path, main_class_name: str, classes_to_remove: list[str], enums_to_remove: list[str]
150+
) -> None:
151+
"""_summary_
152+
remove the duplicate definitions from the file, keeping the main class.
153+
"""
154+
lines = read_file(file_path)
155+
if not lines:
156+
return
157+
158+
definitions = parse_definitions(lines, main_class_name)
159+
lines = remove_definitions(lines, definitions, classes_to_remove + enums_to_remove)
160+
write_file(file_path, lines)
161+
162+
163+
def should_remove_class(class_name: str, directory_path: Path) -> bool:
164+
"""
165+
make sure if there is a .cs file for that? if there is, then remove it.
166+
"""
167+
class_file_path_bo = directory_path / "bo" / f"{class_name}.cs"
168+
class_file_path_com = directory_path / "com" / f"{class_name}.cs"
169+
return class_file_path_bo.exists() or class_file_path_com.exists()
170+
171+
172+
def should_remove_enum(enum_name: str, directory_path: Path) -> bool:
173+
"""
174+
make sure if there is a .cs file for that? if there is, then remove it.
175+
"""
176+
enum_file_path = directory_path / "enum" / f"{enum_name}.cs"
177+
return enum_file_path.exists()
178+
179+
180+
def process_file(file_path: Path, directory_path: Path) -> None:
181+
"""
182+
iterate over the file and remove the classes and enums that are not needed.
183+
"""
184+
class_name_from_filename = file_path.stem
185+
classes_in_file, enums_in_file = find_classes_and_enums_in_file(file_path)
186+
187+
classes_to_remove = [class_name for class_name in classes_in_file if class_name != class_name_from_filename]
188+
enums_to_remove = list(enums_in_file)
189+
190+
for class_name in classes_to_remove:
191+
if should_remove_class(class_name, directory_path):
192+
_logger.info("Removing class %s from %s", class_name, file_path)
193+
remove_duplicate_definitions(
194+
file_path,
195+
main_class_name=class_name_from_filename,
196+
classes_to_remove=[class_name],
197+
enums_to_remove=[],
198+
)
199+
200+
for enum_name in enums_to_remove:
201+
if should_remove_enum(enum_name, directory_path):
202+
_logger.info("Removing enum %s from %s", enum_name, file_path)
203+
remove_duplicate_definitions(
204+
file_path,
205+
main_class_name=class_name_from_filename,
206+
classes_to_remove=[],
207+
enums_to_remove=[enum_name],
208+
)
209+
210+
_logger.info("Processed %s", file_path)
118211

119212

120213
def process_directory(directory_path: Path) -> None:
121214
"""
122215
Process all files in the directory to remove duplicate class and enum definitions.
216+
123217
Args:
124-
directory_path (Path): The path to the directory.
218+
directory_path (Path): The path to the output directory.
125219
"""
126220
for root, _, files in os.walk(directory_path):
127221
for filename in files:
128222
if filename.endswith(".cs"):
129223
file_path = Path(root) / filename
130-
class_name_from_filename = file_path.stem
131-
classes_in_file, enums_in_file = find_classes_and_enums_in_file(file_path)
132-
133-
# Exclude the main class that matches the filename
134-
classes_to_remove = [
135-
class_name for class_name in classes_in_file if class_name != class_name_from_filename
136-
]
137-
enums_to_remove = list(enums_in_file)
138-
139-
for class_name in classes_to_remove:
140-
class_file_path_bo = directory_path / "bo" / f"{class_name}.cs"
141-
class_file_path_com = directory_path / "com" / f"{class_name}.cs"
142-
143-
if class_file_path_bo.exists() or class_file_path_com.exists():
144-
_logger.info("Removing class %s from %s", class_name, file_path)
145-
remove_definitions(file_path, class_name_from_filename, [class_name], [])
146-
147-
for enum_name in enums_to_remove:
148-
enum_file_path = directory_path / "enum" / f"{enum_name}.cs"
149-
150-
if enum_file_path.exists():
151-
_logger.info("Removing enum %s from %s", enum_name, file_path)
152-
remove_definitions(file_path, class_name_from_filename, [], [enum_name])
153-
154-
_logger.info("Processed %s", file_path)
224+
process_file(file_path, directory_path)

0 commit comments

Comments
 (0)