From 2d8bc4d4215717c4727554581c3b968ba00d058c Mon Sep 17 00:00:00 2001
From: Andrew Beveridge <andrew@beveridge.uk>
Date: Wed, 4 Dec 2024 12:37:28 -0500
Subject: [PATCH] Added logic to split nested parentheses and keep track of
 them better, with tests

---
 .../karaoke_lyrics_processor.py               | 67 +++++++++++---
 pyproject.toml                                |  2 +-
 tests/test_karaoke_lyrics_processor.py        | 92 +++++++++++++++++++
 3 files changed, 146 insertions(+), 15 deletions(-)

diff --git a/karaoke_lyrics_processor/karaoke_lyrics_processor.py b/karaoke_lyrics_processor/karaoke_lyrics_processor.py
index 2ba3885..aca17bd 100644
--- a/karaoke_lyrics_processor/karaoke_lyrics_processor.py
+++ b/karaoke_lyrics_processor/karaoke_lyrics_processor.py
@@ -196,33 +196,37 @@ def process_line(self, line):
         Process a single line to ensure it's within the maximum length,
         handle parentheses, and replace non-printable spaces.
         """
-        # Replace non-printable spaces at the beginning
         line = self.replace_non_printable_spaces(line)
-        # Clean up punctuation spacing
         line = self.clean_punctuation_spacing(line)
-        # Fix commas inside quotes
         line = self.fix_commas_inside_quotes(line)
 
         processed_lines = []
         iteration_count = 0
         max_iterations = 100  # Failsafe limit
 
-        while len(line) > self.max_line_length:
-            if iteration_count > max_iterations:
-                self.logger.error(f"Maximum iterations exceeded in process_line for line: {line}")
-                break
-
+        while len(line) > self.max_line_length and iteration_count < max_iterations:
             # Check if the line contains parentheses
             if "(" in line and ")" in line:
                 start_paren = line.find("(")
-                end_paren = line.find(")") + 1
+                end_paren = self.find_matching_paren(line, start_paren)
                 if end_paren < len(line) and line[end_paren] == ",":
                     end_paren += 1
 
+                # Process text before parentheses if it exists
                 if start_paren > 0:
-                    processed_lines.append(line[:start_paren].strip())
-                processed_lines.append(line[start_paren:end_paren].strip())
-                line = line[end_paren:].strip()
+                    before_paren = line[:start_paren].strip()
+                    processed_lines.extend(self.split_line(before_paren))
+
+                # Process text within parentheses
+                paren_content = line[start_paren : end_paren + 1].strip()
+                if len(paren_content) > self.max_line_length:
+                    # Split the content within parentheses if it's too long
+                    split_paren_content = self.split_line(paren_content)
+                    processed_lines.extend(split_paren_content)
+                else:
+                    processed_lines.append(paren_content)
+
+                line = line[end_paren + 1 :].strip()
             else:
                 split_point = self.find_best_split_point(line)
                 processed_lines.append(line[:split_point].strip())
@@ -230,11 +234,46 @@ def process_line(self, line):
 
             iteration_count += 1
 
-        if line:  # Add the remaining part if not empty
-            processed_lines.append(line)
+        if line:  # Add any remaining part
+            processed_lines.extend(self.split_line(line))
+
+        if iteration_count >= max_iterations:
+            self.logger.error(f"Maximum iterations exceeded in process_line for line: {line}")
 
         return processed_lines
 
+    def find_matching_paren(self, line, start_index):
+        """
+        Find the index of the matching closing parenthesis for the opening parenthesis at start_index.
+        """
+        stack = 0
+        for i in range(start_index, len(line)):
+            if line[i] == "(":
+                stack += 1
+            elif line[i] == ")":
+                stack -= 1
+                if stack == 0:
+                    return i
+        return -1  # No matching parenthesis found
+
+    def split_line(self, line):
+        """
+        Split a line into multiple lines if it exceeds the maximum length.
+        """
+        if len(line) <= self.max_line_length:
+            return [line]
+
+        split_lines = []
+        while len(line) > self.max_line_length:
+            split_point = self.find_best_split_point(line)
+            split_lines.append(line[:split_point].strip())
+            line = line[split_point:].strip()
+
+        if line:
+            split_lines.append(line)
+
+        return split_lines
+
     def process(self):
         self.logger.info(f"Processing input lyrics from {self.input_filename}")
 
diff --git a/pyproject.toml b/pyproject.toml
index 8fe9eab..525764a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "karaoke-lyrics-processor"
-version = "0.3.2"
+version = "0.4.0"
 description = "Process song lyrics to prepare them for karaoke video production, e.g. by splitting long lines"
 authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
 license = "MIT"
diff --git a/tests/test_karaoke_lyrics_processor.py b/tests/test_karaoke_lyrics_processor.py
index 8428f66..cc8557e 100644
--- a/tests/test_karaoke_lyrics_processor.py
+++ b/tests/test_karaoke_lyrics_processor.py
@@ -87,6 +87,98 @@ def test_commas_inside_quotes_with_no_commas(self):
 
         self.assertEqual(result.strip(), expected_output.strip())
 
+    def test_long_content_within_parentheses(self):
+        input_lyrics = (
+            "This line has a very long (content inside parentheses that exceeds the maximum line length) and should be split correctly."
+        )
+        expected_output = [
+            "This line has a very long",
+            "(content inside parentheses that",
+            "exceeds the maximum line length)",
+            "and should be split correctly.",
+        ]
+
+        self.processor.input_lyrics_lines = [input_lyrics]
+        result = self.processor.process()
+
+        self.assertEqual(result, "\n".join(expected_output))
+
+    def test_long_content_within_parentheses_at_start(self):
+        input_lyrics = (
+            "(This is a very long content inside parentheses that exceeds the maximum line length) and should be split correctly."
+        )
+        expected_output = [
+            "(This is a very long content inside",
+            "parentheses that exceeds",
+            "the maximum line length)",
+            "and should be split correctly.",
+        ]
+
+        self.processor.input_lyrics_lines = [input_lyrics]
+        result = self.processor.process()
+
+        self.assertEqual(result, "\n".join(expected_output))
+
+    def test_long_content_within_parentheses_at_end(self):
+        input_lyrics = (
+            "This line should be split correctly with (a very long content inside parentheses that exceeds the maximum line length)."
+        )
+        expected_output = [
+            "This line should",
+            "be split correctly with",
+            "(a very long content inside",
+            "parentheses that exceeds",
+            "the maximum line length).",
+        ]
+
+        self.processor.input_lyrics_lines = [input_lyrics]
+        result = self.processor.process()
+
+        self.assertEqual(result, "\n".join(expected_output))
+
+    def test_long_content_within_nested_parentheses(self):
+        input_lyrics = "This line has (nested (parentheses with very long content that exceeds the maximum line length)) and should be split correctly."
+        expected_output = [
+            "This line has",
+            "(nested (parentheses with very long",
+            "content that exceeds",
+            "the maximum line length))",
+            "and should be split correctly.",
+        ]
+
+        self.processor.input_lyrics_lines = [input_lyrics]
+        result = self.processor.process()
+
+        self.assertEqual(result, "\n".join(expected_output))
+
+    def test_split_line_function(self):
+        # Directly test the split_line function
+        long_line = "This is a very long line that should be split into multiple lines because it exceeds the maximum line length."
+        expected_output = [
+            "This is a very long line that",
+            "should be split into multiple lines",
+            "because it exceeds",
+            "the maximum line length.",
+        ]
+
+        result = self.processor.split_line(long_line)
+        self.assertEqual(result, expected_output)
+
+    def test_find_matching_paren(self):
+        # Test cases for find_matching_paren
+        test_cases = [
+            ("(a (b) c)", 0, 8),  # Simple nested
+            ("(a (b (c) d) e)", 0, 14),  # More complex nesting
+            ("(a (b (c) d) e)", 3, 11),  # Start from inner parenthesis
+            ("No parentheses", 0, -1),  # No parentheses
+            ("(a (b (c) d) e", 0, -1),  # Unmatched parenthesis
+        ]
+
+        for line, start_index, expected in test_cases:
+            with self.subTest(line=line, start_index=start_index):
+                result = self.processor.find_matching_paren(line, start_index)
+                self.assertEqual(result, expected)
+
 
 if __name__ == "__main__":
     unittest.main()