Skip to content

Commit 808ed12

Browse files
authored
Merge pull request #78 from yibeichan/fix-strip
fix strip() issue for choices
2 parents aecab71 + d10601a commit 808ed12

File tree

2 files changed

+138
-6
lines changed

2 files changed

+138
-6
lines changed

reproschema/redcap2reproschema.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,10 @@ def process_choices(choices_str, field_name):
201201
choices = []
202202
choices_value_type = []
203203
for ii, choice in enumerate(choices_str.split("|")):
204-
parts = choice.split(", ")
204+
choice = (
205+
choice.strip()
206+
) # Strip leading/trailing whitespace for each choice
207+
parts = [p.strip() for p in choice.split(",")]
205208

206209
# Handle the case where the choice is something like "1,"
207210
if len(parts) == 1:
@@ -213,14 +216,22 @@ def process_choices(choices_str, field_name):
213216
)
214217
parts = [ii, parts[0]]
215218

216-
# Try to convert the first part to an integer, if it fails, keep it as a string
217-
try:
218-
value = int(parts[0])
219+
# Determine if value should be treated as an integer or string
220+
if parts[0] == "0":
221+
# Special case for "0", treat it as an integer
222+
value = 0
219223
choices_value_type.append("xsd:integer")
220-
except ValueError:
224+
elif parts[0].isdigit() and parts[0][0] == "0":
225+
# If it has leading zeros, treat it as a string
221226
value = parts[0]
222227
choices_value_type.append("xsd:string")
223-
228+
else:
229+
try:
230+
value = int(parts[0])
231+
choices_value_type.append("xsd:integer")
232+
except ValueError:
233+
value = parts[0]
234+
choices_value_type.append("xsd:string")
224235
choice_obj = {
225236
"name": {"en": " ".join(parts[1:]).strip()},
226237
"value": value,
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
import os
2+
import shutil
3+
4+
import pytest
5+
import yaml
6+
from click.testing import CliRunner
7+
8+
from ..cli import main
9+
from ..redcap2reproschema import process_choices
10+
11+
12+
def test_process_choices_numeric_codes():
13+
# Test standard numeric codes with descriptions
14+
choices_str = "1, Male | 2, Female | 3, Other"
15+
choices, value_types = process_choices(choices_str, "gender")
16+
assert choices == [
17+
{"name": {"en": "Male"}, "value": 1},
18+
{"name": {"en": "Female"}, "value": 2},
19+
{"name": {"en": "Other"}, "value": 3},
20+
]
21+
assert value_types == ["xsd:integer"]
22+
23+
24+
def test_process_choices_boolean():
25+
# Test boolean choices (Yes/No)
26+
choices_str = "1, Yes | 0, No"
27+
choices, value_types = process_choices(choices_str, "boolean_field")
28+
assert choices == [
29+
{"name": {"en": "Yes"}, "value": 1},
30+
{"name": {"en": "No"}, "value": 0},
31+
]
32+
assert value_types == ["xsd:integer"]
33+
34+
35+
def test_process_choices_special_characters():
36+
# Test choices with special characters
37+
choices_str = "1, Option A | 2, \"Option B\" | 3, Option C with 'quotes'"
38+
choices, value_types = process_choices(choices_str, "special_chars")
39+
assert choices == [
40+
{"name": {"en": "Option A"}, "value": 1},
41+
{"name": {"en": '"Option B"'}, "value": 2},
42+
{"name": {"en": "Option C with 'quotes'"}, "value": 3},
43+
]
44+
assert value_types == ["xsd:integer"]
45+
46+
47+
def test_process_choices_with_missing_values():
48+
# Test choices with a missing value (commonly used for "Not applicable" or "Prefer not to say")
49+
choices_str = "1, Yes | 2, No | 99, Not applicable"
50+
choices, value_types = process_choices(choices_str, "missing_values")
51+
assert choices == [
52+
{"name": {"en": "Yes"}, "value": 1},
53+
{"name": {"en": "No"}, "value": 2},
54+
{"name": {"en": "Not applicable"}, "value": 99},
55+
]
56+
assert value_types == ["xsd:integer"]
57+
58+
59+
def test_process_choices_with_unicode():
60+
# Test choices with Unicode characters (e.g., accents, symbols)
61+
choices_str = "1, Café | 2, Niño | 3, Résumé | 4, ☺"
62+
choices, value_types = process_choices(choices_str, "unicode_field")
63+
assert choices == [
64+
{"name": {"en": "Café"}, "value": 1},
65+
{"name": {"en": "Niño"}, "value": 2},
66+
{"name": {"en": "Résumé"}, "value": 3},
67+
{"name": {"en": "☺"}, "value": 4},
68+
]
69+
assert value_types == ["xsd:integer"]
70+
71+
72+
def test_process_choices_alpha_codes():
73+
# Test alpha codes (e.g., categorical text codes)
74+
choices_str = "A, Apple | B, Banana | C, Cherry"
75+
choices, value_types = process_choices(choices_str, "alpha_codes")
76+
assert choices == [
77+
{"name": {"en": "Apple"}, "value": "A"},
78+
{"name": {"en": "Banana"}, "value": "B"},
79+
{"name": {"en": "Cherry"}, "value": "C"},
80+
]
81+
assert sorted(value_types) == ["xsd:string"]
82+
83+
84+
def test_process_choices_incomplete_values():
85+
# Test choices with missing descriptions
86+
choices_str = "1, Yes | 2, | 3, No"
87+
choices, value_types = process_choices(choices_str, "incomplete_values")
88+
assert choices == [
89+
{"name": {"en": "Yes"}, "value": 1},
90+
{"name": {"en": ""}, "value": 2},
91+
{"name": {"en": "No"}, "value": 3},
92+
]
93+
assert value_types == ["xsd:integer"]
94+
95+
96+
def test_process_choices_numeric_strings():
97+
# Test numeric strings as values (e.g., not converted to integers)
98+
choices_str = "001, Option 001 | 002, Option 002 | 003, Option 003"
99+
choices, value_types = process_choices(choices_str, "numeric_strings")
100+
assert choices == [
101+
{"name": {"en": "Option 001"}, "value": "001"},
102+
{"name": {"en": "Option 002"}, "value": "002"},
103+
{"name": {"en": "Option 003"}, "value": "003"},
104+
]
105+
assert sorted(value_types) == ["xsd:string"]
106+
107+
108+
def test_process_choices_spaces_in_values():
109+
# Test choices with spaces in values and names
110+
choices_str = "A B, Choice AB | C D, Choice CD"
111+
choices, value_types = process_choices(choices_str, "spaces_in_values")
112+
assert choices == [
113+
{"name": {"en": "Choice AB"}, "value": "A B"},
114+
{"name": {"en": "Choice CD"}, "value": "C D"},
115+
]
116+
assert sorted(value_types) == ["xsd:string"]
117+
118+
119+
# Run pytest if script is called directly
120+
if __name__ == "__main__":
121+
pytest.main()

0 commit comments

Comments
 (0)