-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathquery_csv_min_max.py
More file actions
54 lines (44 loc) · 1.49 KB
/
query_csv_min_max.py
File metadata and controls
54 lines (44 loc) · 1.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import csv
import sys
if len(sys.argv) < 4:
print("Usage: python3 minmax_by_column.py input.csv source_header field1 [field2 ...]")
sys.exit(1)
csv_file = sys.argv[1]
source = sys.argv[2]
fields = sys.argv[3:]
with open(csv_file, newline='') as f:
raw_reader = csv.reader(f)
# Find the first non-empty header row
for row in raw_reader:
if all(cell.strip() != '' for cell in row):
header = row
break
else:
print("Error: No valid header row found (a row with all non-empty fields).")
sys.exit(1)
# Now treat the rest as data
reader = csv.DictReader(f, fieldnames=header)
data = list(reader)
# Validate fields
all_fields = [source] + fields
for field in all_fields:
if field not in header:
print(f"Error: Field '{field}' not found in header.")
sys.exit(1)
# Convert source column to float and filter valid rows
valid_rows = []
for row in data:
try:
row[source] = float(row[source])
valid_rows.append(row)
except (ValueError, KeyError):
continue # Skip rows with non-numeric or missing source
if not valid_rows:
print(f"No valid numeric data found in column '{source}'")
sys.exit(1)
min_row = min(valid_rows, key=lambda r: r[source])
max_row = max(valid_rows, key=lambda r: r[source])
# Print output
print(",".join(all_fields))
print(",".join([str(min_row[source])] + [min_row[f] for f in fields]))
print(",".join([str(max_row[source])] + [max_row[f] for f in fields]))