Skip to content

Commit e744533

Browse files
committed
Merge branch 'main' into 1532_Do_not_allow_words_that_are_digits
2 parents 0644d26 + edca194 commit e744533

File tree

12 files changed

+1288
-722
lines changed

12 files changed

+1288
-722
lines changed

src/main/java/ai/elimu/util/csv/CsvContentExtractionHelper.java

Lines changed: 78 additions & 179 deletions
Large diffs are not rendered by default.
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package ai.elimu.util.csv;
2+
3+
import ai.elimu.model.content.Letter;
4+
import ai.elimu.web.content.letter.LetterCsvExportController;
5+
import org.apache.commons.csv.CSVFormat;
6+
import org.apache.commons.csv.CSVParser;
7+
import org.apache.commons.csv.CSVRecord;
8+
import org.apache.commons.lang.math.NumberUtils;
9+
import org.apache.logging.log4j.LogManager;
10+
import org.apache.logging.log4j.Logger;
11+
import org.jetbrains.annotations.NotNull;
12+
13+
import java.io.File;
14+
import java.io.IOException;
15+
import java.nio.file.Files;
16+
import java.nio.file.Path;
17+
import java.nio.file.Paths;
18+
import java.util.List;
19+
20+
import static java.util.Collections.emptyList;
21+
import static java.util.stream.Collectors.toUnmodifiableList;
22+
23+
public class CsvLetterExtractionHelper {
24+
25+
private CsvLetterExtractionHelper() {
26+
}
27+
28+
private static final Logger logger = LogManager.getLogger();
29+
30+
/**
31+
* For information on how the CSV files were generated, see {@link LetterCsvExportController#handleRequest}.
32+
*/
33+
public static List<Letter> getLettersFromCsvBackup(File csvFile) {
34+
logger.info("getLettersFromCsvBackup");
35+
36+
Path csvFilePath = Paths.get(csvFile.toURI());
37+
logger.info("csvFilePath: {}", csvFilePath);
38+
39+
CSVFormat csvFormat = CSVFormat.Builder.create()
40+
.setHeader(
41+
"id",
42+
"text",
43+
"diacritic",
44+
"usage_count"
45+
)
46+
.setSkipHeaderRecord(true)
47+
.build();
48+
49+
try (var csvParser = new CSVParser(Files.newBufferedReader(csvFilePath), csvFormat)) {
50+
return csvParser.stream()
51+
.map(CsvLetterExtractionHelper::toLetter)
52+
.collect(toUnmodifiableList());
53+
} catch (IOException ex) {
54+
logger.error(ex);
55+
}
56+
57+
return emptyList();
58+
}
59+
60+
@NotNull
61+
private static Letter toLetter(CSVRecord csvRecord) {
62+
logger.info("csvRecord: {}", csvRecord);
63+
64+
Letter letter = new Letter();
65+
66+
String text = csvRecord.get("text");
67+
letter.setText(text);
68+
69+
boolean diacritic = Boolean.parseBoolean(csvRecord.get("diacritic"));
70+
letter.setDiacritic(diacritic);
71+
72+
Integer usageCount = NumberUtils.toInt(csvRecord.get("usage_count"));
73+
letter.setUsageCount(usageCount);
74+
75+
return letter;
76+
}
77+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package ai.elimu.util.csv;
2+
3+
import ai.elimu.model.content.Sound;
4+
import ai.elimu.model.v2.enums.content.sound.SoundType;
5+
import ai.elimu.web.content.sound.SoundCsvExportController;
6+
import org.apache.commons.csv.CSVFormat;
7+
import org.apache.commons.csv.CSVParser;
8+
import org.apache.commons.csv.CSVRecord;
9+
import org.apache.commons.lang.StringUtils;
10+
import org.apache.commons.lang.math.NumberUtils;
11+
import org.apache.logging.log4j.LogManager;
12+
import org.apache.logging.log4j.Logger;
13+
import org.jetbrains.annotations.NotNull;
14+
15+
import java.io.File;
16+
import java.io.IOException;
17+
import java.nio.file.Files;
18+
import java.nio.file.Path;
19+
import java.nio.file.Paths;
20+
import java.util.List;
21+
22+
import static java.util.Collections.emptyList;
23+
import static java.util.stream.Collectors.toUnmodifiableList;
24+
25+
public class CsvSoundExtractionHelper {
26+
27+
private CsvSoundExtractionHelper() {
28+
}
29+
30+
private static final Logger logger = LogManager.getLogger();
31+
32+
/**
33+
* For information on how the CSV files were generated, see {@link SoundCsvExportController#handleRequest}.
34+
*/
35+
public static List<Sound> getSoundsFromCsvBackup(File csvFile) {
36+
logger.info("getSoundsFromCsvBackup");
37+
38+
Path csvFilePath = Paths.get(csvFile.toURI());
39+
logger.info("csvFilePath: {}", csvFilePath);
40+
41+
CSVFormat csvFormat = CSVFormat.Builder.create()
42+
.setHeader(
43+
"id",
44+
"value_ipa",
45+
"value_sampa",
46+
"audio_id",
47+
"diacritic",
48+
"sound_type",
49+
"usage_count"
50+
)
51+
.setSkipHeaderRecord(true)
52+
.build();
53+
54+
try (var csvParser = new CSVParser(Files.newBufferedReader(csvFilePath), csvFormat)) {
55+
return csvParser.stream()
56+
.map(CsvSoundExtractionHelper::toSound)
57+
.collect(toUnmodifiableList());
58+
} catch (IOException ex) {
59+
logger.error(ex);
60+
}
61+
62+
return emptyList();
63+
}
64+
65+
@NotNull
66+
private static Sound toSound(CSVRecord csvRecord) {
67+
logger.info("csvRecord: {}", csvRecord);
68+
69+
Sound sound = new Sound();
70+
71+
String valueIpa = csvRecord.get("value_ipa");
72+
sound.setValueIpa(valueIpa);
73+
74+
String valueSampa = csvRecord.get("value_sampa");
75+
sound.setValueSampa(valueSampa);
76+
77+
boolean diacritic = Boolean.parseBoolean(csvRecord.get("diacritic"));
78+
sound.setDiacritic(diacritic);
79+
80+
SoundType soundType = extractSoundType(csvRecord.get("sound_type"));
81+
sound.setSoundType(soundType);
82+
83+
Integer usageCount = NumberUtils.toInt(csvRecord.get("usage_count"));
84+
sound.setUsageCount(usageCount);
85+
86+
return sound;
87+
}
88+
89+
// TODO: 05.07.2022 This method can be replaced by a {@link org.apache.commons.lang3.EnumUtils::getEnum}
90+
private static SoundType extractSoundType(String soundTypeCsvValue) {
91+
if (StringUtils.isBlank(soundTypeCsvValue)) {
92+
return null;
93+
}
94+
95+
try {
96+
return SoundType.valueOf(soundTypeCsvValue);
97+
} catch (IllegalArgumentException e) {
98+
logger.error(
99+
"Tried to extract incorrect value: {} of {} enum",
100+
soundTypeCsvValue,
101+
SoundType.class.getSimpleName()
102+
);
103+
return null;
104+
}
105+
}
106+
}

0 commit comments

Comments
 (0)