Skip to content

Commit 3b907fa

Browse files
committed
test(converters): remove long commented line to satisfy ruff E501
Signed-off-by: Arya Tayshete <[email protected]>
1 parent fc7f91f commit 3b907fa

File tree

1 file changed

+20
-20
lines changed

1 file changed

+20
-20
lines changed

test/components/converters/test_csv_to_document.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import logging
66
import os
7+
from pathlib import Path
78

89
import pytest
910

@@ -17,10 +18,10 @@ def csv_converter():
1718

1819

1920
class TestCSVToDocument:
20-
def test_init(self, csv_converter):
21+
def test_init(self, csv_converter: CSVToDocument):
2122
assert isinstance(csv_converter, CSVToDocument)
2223

23-
def test_run(self, test_files_path):
24+
def test_run(self, test_files_path: Path):
2425
"""
2526
Test if the component runs correctly.
2627
"""
@@ -38,7 +39,7 @@ def test_run(self, test_files_path):
3839
assert docs[1].meta["file_path"] == os.path.basename(files[1])
3940
assert docs[2].meta["file_path"] == os.path.basename(files[2])
4041

41-
def test_run_with_store_full_path_false(self, test_files_path):
42+
def test_run_with_store_full_path_false(self, test_files_path: Path):
4243
"""
4344
Test if the component runs correctly with store_full_path=False
4445
"""
@@ -57,7 +58,7 @@ def test_run_with_store_full_path_false(self, test_files_path):
5758
assert docs[1].meta["file_path"] == "sample_2.csv"
5859
assert docs[2].meta["file_path"] == "sample_3.csv"
5960

60-
def test_run_error_handling(self, test_files_path, caplog):
61+
def test_run_error_handling(self, test_files_path: Path, caplog: pytest.LogCaptureFixture):
6162
"""
6263
Test if the component correctly handles errors.
6364
"""
@@ -74,7 +75,7 @@ def test_run_error_handling(self, test_files_path, caplog):
7475
assert len(docs) == 2
7576
assert docs[0].meta["file_path"] == os.path.basename(paths[0])
7677

77-
def test_encoding_override(self, test_files_path, caplog):
78+
def test_encoding_override(self, test_files_path: Path, caplog: pytest.LogCaptureFixture):
7879
"""
7980
Test if the encoding metadata field is used properly
8081
"""
@@ -103,7 +104,7 @@ def test_run_with_meta(self):
103104
# check that the metadata from the bytestream is merged with that from the meta parameter
104105
assert document.meta == {"name": "test_name", "language": "it"}
105106

106-
# --- NEW TESTS for row mode reviewer asks ---
107+
# --- NEW TESTS for row mode ---
107108

108109
def test_row_mode_with_missing_content_column_warns_and_fallbacks(self, tmp_path, caplog):
109110
csv_text = "a,b\r\n1,2\r\n3,4\r\n"
@@ -121,7 +122,7 @@ def test_row_mode_with_missing_content_column_warns_and_fallbacks(self, tmp_path
121122
# Fallback content is a readable listing
122123
assert "a: 1" in docs[0].content and "b: 2" in docs[0].content
123124

124-
def test_row_mode_meta_collision_prefixed(self, tmp_path):
125+
def test_row_mode_meta_collision_prefixed(self, tmp_path: Path):
125126
# ByteStream meta has file_path and encoding; CSV also has those columns.
126127
csv_text = "file_path,encoding,comment\r\nrowpath.csv,latin1,ok\r\n"
127128
f = tmp_path / "collide.csv"
@@ -147,21 +148,20 @@ def test_init_validates_delimiter_and_quotechar(self):
147148
with pytest.raises(ValueError):
148149
CSVToDocument(quotechar='""')
149150

150-
def test_row_mode_large_file_warns(self, tmp_path, caplog):
151-
# Build a ~1.2MB CSV to trigger the warning (threshold ~5MB in component;
152-
# If you want to keep this super fast, you can comment this test out.)
153-
rows = 60_000
154-
header = "text,author\n"
155-
body = "".join("hello,Ada\n" for _ in range(rows))
156-
data = (header + body).encode("utf-8")
157-
bs = ByteStream(data=data, meta={"file_path": "big.csv"})
151+
def test_row_mode_large_file_warns(self, caplog: pytest.LogCaptureFixture, monkeypatch: pytest.MonkeyPatch):
152+
# Make the threshold tiny so the warning always triggers, regardless of platform.
153+
import haystack.components.converters.csv as csv_mod
154+
155+
monkeypatch.setattr(csv_mod, "_ROW_MODE_SIZE_WARN_BYTES", 1, raising=False)
156+
157+
bs = ByteStream(data=b"text,author\nhi,Ada\n", meta={"file_path": "big.csv"})
158158
conv = CSVToDocument(conversion_mode="row")
159-
with caplog.at_level(logging.WARNING):
159+
# Capture the converter module's logger explicitly for reliability across CI runners.
160+
with caplog.at_level(logging.WARNING, logger="haystack.components.converters.csv"):
160161
_ = conv.run(sources=[bs])
161-
# Not asserting exact MB value to avoid brittleness; look for the key phrase
162162
assert "parsing a large CSV" in caplog.text
163163

164-
def test_row_mode_with_content_column(self, tmp_path):
164+
def test_row_mode_with_content_column(self, tmp_path: Path):
165165
"""
166166
Each row becomes a Document, with `content` from a chosen column and other columns in meta.
167167
"""
@@ -185,7 +185,7 @@ def test_row_mode_with_content_column(self, tmp_path):
185185
# still respects store_full_path default=False trimming when present
186186
assert os.path.basename(f) == docs[0].meta["file_path"]
187187

188-
def test_row_mode_without_content_column(self, tmp_path):
188+
def test_row_mode_without_content_column(self, tmp_path: Path):
189189
"""
190190
Without `content_column`, the content is a human-readable 'key: value' listing of the row.
191191
"""
@@ -202,7 +202,7 @@ def test_row_mode_without_content_column(self, tmp_path):
202202
assert docs[0].meta["a"] == "1" and docs[0].meta["b"] == "2"
203203
assert docs[0].meta["row_number"] == 0
204204

205-
def test_row_mode_meta_merging(self, tmp_path):
205+
def test_row_mode_meta_merging(self, tmp_path: Path):
206206
"""
207207
File-level meta and explicit `meta` arg are merged into each row's meta.
208208
"""

0 commit comments

Comments
 (0)