From 2d8bc4d4215717c4727554581c3b968ba00d058c Mon Sep 17 00:00:00 2001 From: Andrew Beveridge Date: Wed, 4 Dec 2024 12:37:28 -0500 Subject: [PATCH] Added logic to split nested parentheses and keep track of them better, with tests --- .../karaoke_lyrics_processor.py | 67 +++++++++++--- pyproject.toml | 2 +- tests/test_karaoke_lyrics_processor.py | 92 +++++++++++++++++++ 3 files changed, 146 insertions(+), 15 deletions(-) diff --git a/karaoke_lyrics_processor/karaoke_lyrics_processor.py b/karaoke_lyrics_processor/karaoke_lyrics_processor.py index 2ba3885..aca17bd 100644 --- a/karaoke_lyrics_processor/karaoke_lyrics_processor.py +++ b/karaoke_lyrics_processor/karaoke_lyrics_processor.py @@ -196,33 +196,37 @@ def process_line(self, line): Process a single line to ensure it's within the maximum length, handle parentheses, and replace non-printable spaces. """ - # Replace non-printable spaces at the beginning line = self.replace_non_printable_spaces(line) - # Clean up punctuation spacing line = self.clean_punctuation_spacing(line) - # Fix commas inside quotes line = self.fix_commas_inside_quotes(line) processed_lines = [] iteration_count = 0 max_iterations = 100 # Failsafe limit - while len(line) > self.max_line_length: - if iteration_count > max_iterations: - self.logger.error(f"Maximum iterations exceeded in process_line for line: {line}") - break - + while len(line) > self.max_line_length and iteration_count < max_iterations: # Check if the line contains parentheses if "(" in line and ")" in line: start_paren = line.find("(") - end_paren = line.find(")") + 1 + end_paren = self.find_matching_paren(line, start_paren) if end_paren < len(line) and line[end_paren] == ",": end_paren += 1 + # Process text before parentheses if it exists if start_paren > 0: - processed_lines.append(line[:start_paren].strip()) - processed_lines.append(line[start_paren:end_paren].strip()) - line = line[end_paren:].strip() + before_paren = line[:start_paren].strip() + processed_lines.extend(self.split_line(before_paren)) + + # Process text within parentheses + paren_content = line[start_paren : end_paren + 1].strip() + if len(paren_content) > self.max_line_length: + # Split the content within parentheses if it's too long + split_paren_content = self.split_line(paren_content) + processed_lines.extend(split_paren_content) + else: + processed_lines.append(paren_content) + + line = line[end_paren + 1 :].strip() else: split_point = self.find_best_split_point(line) processed_lines.append(line[:split_point].strip()) @@ -230,11 +234,46 @@ def process_line(self, line): iteration_count += 1 - if line: # Add the remaining part if not empty - processed_lines.append(line) + if line: # Add any remaining part + processed_lines.extend(self.split_line(line)) + + if iteration_count >= max_iterations: + self.logger.error(f"Maximum iterations exceeded in process_line for line: {line}") return processed_lines + def find_matching_paren(self, line, start_index): + """ + Find the index of the matching closing parenthesis for the opening parenthesis at start_index. + """ + stack = 0 + for i in range(start_index, len(line)): + if line[i] == "(": + stack += 1 + elif line[i] == ")": + stack -= 1 + if stack == 0: + return i + return -1 # No matching parenthesis found + + def split_line(self, line): + """ + Split a line into multiple lines if it exceeds the maximum length. + """ + if len(line) <= self.max_line_length: + return [line] + + split_lines = [] + while len(line) > self.max_line_length: + split_point = self.find_best_split_point(line) + split_lines.append(line[:split_point].strip()) + line = line[split_point:].strip() + + if line: + split_lines.append(line) + + return split_lines + def process(self): self.logger.info(f"Processing input lyrics from {self.input_filename}") diff --git a/pyproject.toml b/pyproject.toml index 8fe9eab..525764a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "karaoke-lyrics-processor" -version = "0.3.2" +version = "0.4.0" description = "Process song lyrics to prepare them for karaoke video production, e.g. by splitting long lines" authors = ["Andrew Beveridge "] license = "MIT" diff --git a/tests/test_karaoke_lyrics_processor.py b/tests/test_karaoke_lyrics_processor.py index 8428f66..cc8557e 100644 --- a/tests/test_karaoke_lyrics_processor.py +++ b/tests/test_karaoke_lyrics_processor.py @@ -87,6 +87,98 @@ def test_commas_inside_quotes_with_no_commas(self): self.assertEqual(result.strip(), expected_output.strip()) + def test_long_content_within_parentheses(self): + input_lyrics = ( + "This line has a very long (content inside parentheses that exceeds the maximum line length) and should be split correctly." + ) + expected_output = [ + "This line has a very long", + "(content inside parentheses that", + "exceeds the maximum line length)", + "and should be split correctly.", + ] + + self.processor.input_lyrics_lines = [input_lyrics] + result = self.processor.process() + + self.assertEqual(result, "\n".join(expected_output)) + + def test_long_content_within_parentheses_at_start(self): + input_lyrics = ( + "(This is a very long content inside parentheses that exceeds the maximum line length) and should be split correctly." + ) + expected_output = [ + "(This is a very long content inside", + "parentheses that exceeds", + "the maximum line length)", + "and should be split correctly.", + ] + + self.processor.input_lyrics_lines = [input_lyrics] + result = self.processor.process() + + self.assertEqual(result, "\n".join(expected_output)) + + def test_long_content_within_parentheses_at_end(self): + input_lyrics = ( + "This line should be split correctly with (a very long content inside parentheses that exceeds the maximum line length)." + ) + expected_output = [ + "This line should", + "be split correctly with", + "(a very long content inside", + "parentheses that exceeds", + "the maximum line length).", + ] + + self.processor.input_lyrics_lines = [input_lyrics] + result = self.processor.process() + + self.assertEqual(result, "\n".join(expected_output)) + + def test_long_content_within_nested_parentheses(self): + input_lyrics = "This line has (nested (parentheses with very long content that exceeds the maximum line length)) and should be split correctly." + expected_output = [ + "This line has", + "(nested (parentheses with very long", + "content that exceeds", + "the maximum line length))", + "and should be split correctly.", + ] + + self.processor.input_lyrics_lines = [input_lyrics] + result = self.processor.process() + + self.assertEqual(result, "\n".join(expected_output)) + + def test_split_line_function(self): + # Directly test the split_line function + long_line = "This is a very long line that should be split into multiple lines because it exceeds the maximum line length." + expected_output = [ + "This is a very long line that", + "should be split into multiple lines", + "because it exceeds", + "the maximum line length.", + ] + + result = self.processor.split_line(long_line) + self.assertEqual(result, expected_output) + + def test_find_matching_paren(self): + # Test cases for find_matching_paren + test_cases = [ + ("(a (b) c)", 0, 8), # Simple nested + ("(a (b (c) d) e)", 0, 14), # More complex nesting + ("(a (b (c) d) e)", 3, 11), # Start from inner parenthesis + ("No parentheses", 0, -1), # No parentheses + ("(a (b (c) d) e", 0, -1), # Unmatched parenthesis + ] + + for line, start_index, expected in test_cases: + with self.subTest(line=line, start_index=start_index): + result = self.processor.find_matching_paren(line, start_index) + self.assertEqual(result, expected) + if __name__ == "__main__": unittest.main()