Skip to content

Commit

Permalink
[BugFix][Excel] Fix read formulas/number cell value of excel (#8316)
Browse files Browse the repository at this point in the history
Co-authored-by: Tu-maimes <[email protected]>
  • Loading branch information
Tu-maimes and Tu-maimes authored Dec 23, 2024
1 parent e128ccc commit 00c5aed
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,14 @@
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.CellValue;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.NumberToTextConverter;
import org.apache.poi.xssf.usermodel.XSSFFormulaEvaluator;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import lombok.SneakyThrows;
Expand Down Expand Up @@ -88,15 +93,19 @@ protected void readProcess(
String currentFileName)
throws IOException {
Workbook workbook;
FormulaEvaluator formulaEvaluator;
if (currentFileName.endsWith(".xls")) {
workbook = new HSSFWorkbook(inputStream);
formulaEvaluator = workbook.getCreationHelper().createFormulaEvaluator();
} else if (currentFileName.endsWith(".xlsx")) {
workbook = new XSSFWorkbook(inputStream);
formulaEvaluator = new XSSFFormulaEvaluator((XSSFWorkbook) workbook);
} else {
throw new FileConnectorException(
CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION,
"Only support read excel file");
}
DataFormatter formatter = new DataFormatter();
Sheet sheet =
pluginConfig.hasPath(BaseSourceConfigOptions.SHEET_NAME.key())
? workbook.getSheet(
Expand Down Expand Up @@ -131,7 +140,11 @@ protected void readProcess(
cell == null
? null
: convert(
getCellValue(cell.getCellType(), cell),
getCellValue(
cell.getCellType(),
cell,
formulaEvaluator,
formatter),
fieldTypes[z - 1]));
}
if (isMergePartition) {
Expand Down Expand Up @@ -182,7 +195,11 @@ public SeaTunnelRowType getSeaTunnelRowTypeInfo(String path) throws FileConnecto
"User must defined schema for json file type");
}

private Object getCellValue(CellType cellType, Cell cell) {
private Object getCellValue(
CellType cellType,
Cell cell,
FormulaEvaluator formulaEvaluator,
DataFormatter formatter) {
switch (cellType) {
case STRING:
return cell.getStringCellValue();
Expand All @@ -192,11 +209,18 @@ private Object getCellValue(CellType cellType, Cell cell) {
if (DateUtil.isCellDateFormatted(cell)) {
return cell.getLocalDateTimeCellValue();
}
return cell.getNumericCellValue();
return formatter.formatCellValue(cell);
case BLANK:
return "";
case ERROR:
break;
case FORMULA:
CellValue evaluate = formulaEvaluator.evaluate(cell);
if (evaluate.getCellType().equals(CellType.NUMERIC)) {
return NumberToTextConverter.toText(evaluate.getNumberValue());
} else {
return evaluate.formatAsString();
}
default:
throw new FileConnectorException(
CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,143 @@ public void testExcelReadDateString() throws IOException, URISyntaxException {
}
}

@Test
public void testExcelReadFormulaXls() throws IOException, URISyntaxException {
URL excelFile = ExcelReadStrategyTest.class.getResource("/excel/test_read_formula.xls");
URL conf = ExcelReadStrategyTest.class.getResource("/excel/test_read_excel.conf");
Assertions.assertNotNull(excelFile);
Assertions.assertNotNull(conf);
String excelFilePath = Paths.get(excelFile.toURI()).toString();
String confPath = Paths.get(conf.toURI()).toString();
Config pluginConfig = ConfigFactory.parseFile(new File(confPath));
ExcelReadStrategy excelReadStrategy = new ExcelReadStrategy();
LocalConf localConf = new LocalConf(FS_DEFAULT_NAME_DEFAULT);
excelReadStrategy.setPluginConfig(pluginConfig);
excelReadStrategy.init(localConf);

List<String> fileNamesByPath = excelReadStrategy.getFileNamesByPath(excelFilePath);
CatalogTable userDefinedCatalogTable = CatalogTableUtil.buildWithConfig(pluginConfig);
excelReadStrategy.setCatalogTable(userDefinedCatalogTable);
TestCollector testCollector = new TestCollector();
excelReadStrategy.read(fileNamesByPath.get(0), "", testCollector);

for (SeaTunnelRow seaTunnelRow : testCollector.getRows()) {
Assertions.assertEquals(seaTunnelRow.getArity(), 14);
Assertions.assertEquals(seaTunnelRow.getField(0).getClass(), Byte.class);
Assertions.assertEquals(seaTunnelRow.getField(1).getClass(), Short.class);
Assertions.assertEquals(seaTunnelRow.getField(2).getClass(), Integer.class);
Assertions.assertEquals(seaTunnelRow.getField(3).getClass(), Long.class);
Assertions.assertEquals(seaTunnelRow.getField(4).getClass(), String.class);
Assertions.assertEquals(seaTunnelRow.getField(5).getClass(), Double.class);
Assertions.assertEquals(seaTunnelRow.getField(6).getClass(), Float.class);
Assertions.assertEquals(seaTunnelRow.getField(7).getClass(), BigDecimal.class);
Assertions.assertEquals(seaTunnelRow.getField(8).getClass(), Boolean.class);
Assertions.assertEquals(seaTunnelRow.getField(9).getClass(), LinkedHashMap.class);
Assertions.assertEquals(seaTunnelRow.getField(10).getClass(), String[].class);
Assertions.assertEquals(seaTunnelRow.getField(11).getClass(), LocalDate.class);
Assertions.assertEquals(seaTunnelRow.getField(12).getClass(), LocalDateTime.class);
Assertions.assertEquals(seaTunnelRow.getField(13).getClass(), LocalTime.class);
Assertions.assertEquals(seaTunnelRow.getField(0), (byte) 1);
Assertions.assertEquals(seaTunnelRow.getField(1), (short) 22);
Assertions.assertEquals(seaTunnelRow.getField(2), 333);
Assertions.assertEquals(seaTunnelRow.getField(3), 355L);
Assertions.assertEquals(seaTunnelRow.getField(4), "Cosmos");
Assertions.assertEquals(seaTunnelRow.getField(5), 5.555);
Assertions.assertEquals(seaTunnelRow.getField(6), (float) 6.666);
Assertions.assertEquals(seaTunnelRow.getField(7), new BigDecimal("7.78"));
Assertions.assertEquals(seaTunnelRow.getField(8), Boolean.FALSE);
Assertions.assertEquals(
seaTunnelRow.getField(9),
new LinkedHashMap<String, String>() {
{
put("name", "Ivan");
put("age", "26");
}
});
Assertions.assertArrayEquals(
(String[]) seaTunnelRow.getField(10), new String[] {"Ivan", "Dusayi"});
Assertions.assertEquals(
seaTunnelRow.getField(11),
DateUtils.parse("2024-01-31", DateUtils.Formatter.YYYY_MM_DD));
Assertions.assertEquals(
seaTunnelRow.getField(12),
DateTimeUtils.parse(
"2024-01-31 16:00:48", DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS));
Assertions.assertEquals(
seaTunnelRow.getField(13),
TimeUtils.parse("16:00:48", TimeUtils.Formatter.HH_MM_SS));
}
}

@Test
public void testExcelReadFormula() throws IOException, URISyntaxException {
URL excelFile =
ExcelReadStrategyTest.class.getResource("/excel/test_read_excel_formula.xlsx");
URL conf = ExcelReadStrategyTest.class.getResource("/excel/test_read_excel.conf");
Assertions.assertNotNull(excelFile);
Assertions.assertNotNull(conf);
String excelFilePath = Paths.get(excelFile.toURI()).toString();
String confPath = Paths.get(conf.toURI()).toString();
Config pluginConfig = ConfigFactory.parseFile(new File(confPath));
ExcelReadStrategy excelReadStrategy = new ExcelReadStrategy();
LocalConf localConf = new LocalConf(FS_DEFAULT_NAME_DEFAULT);
excelReadStrategy.setPluginConfig(pluginConfig);
excelReadStrategy.init(localConf);

List<String> fileNamesByPath = excelReadStrategy.getFileNamesByPath(excelFilePath);
CatalogTable userDefinedCatalogTable = CatalogTableUtil.buildWithConfig(pluginConfig);
excelReadStrategy.setCatalogTable(userDefinedCatalogTable);
TestCollector testCollector = new TestCollector();
excelReadStrategy.read(fileNamesByPath.get(0), "", testCollector);

for (SeaTunnelRow seaTunnelRow : testCollector.getRows()) {
Assertions.assertEquals(seaTunnelRow.getArity(), 14);
Assertions.assertEquals(seaTunnelRow.getField(0).getClass(), Byte.class);
Assertions.assertEquals(seaTunnelRow.getField(1).getClass(), Short.class);
Assertions.assertEquals(seaTunnelRow.getField(2).getClass(), Integer.class);
Assertions.assertEquals(seaTunnelRow.getField(3).getClass(), Long.class);
Assertions.assertEquals(seaTunnelRow.getField(4).getClass(), String.class);
Assertions.assertEquals(seaTunnelRow.getField(5).getClass(), Double.class);
Assertions.assertEquals(seaTunnelRow.getField(6).getClass(), Float.class);
Assertions.assertEquals(seaTunnelRow.getField(7).getClass(), BigDecimal.class);
Assertions.assertEquals(seaTunnelRow.getField(8).getClass(), Boolean.class);
Assertions.assertEquals(seaTunnelRow.getField(9).getClass(), LinkedHashMap.class);
Assertions.assertEquals(seaTunnelRow.getField(10).getClass(), String[].class);
Assertions.assertEquals(seaTunnelRow.getField(11).getClass(), LocalDate.class);
Assertions.assertEquals(seaTunnelRow.getField(12).getClass(), LocalDateTime.class);
Assertions.assertEquals(seaTunnelRow.getField(13).getClass(), LocalTime.class);
Assertions.assertEquals(seaTunnelRow.getField(0), (byte) 1);
Assertions.assertEquals(seaTunnelRow.getField(1), (short) 22);
Assertions.assertEquals(seaTunnelRow.getField(2), 333);
Assertions.assertEquals(seaTunnelRow.getField(3), 355L);
Assertions.assertEquals(seaTunnelRow.getField(4), "Cosmos");
Assertions.assertEquals(seaTunnelRow.getField(5), 5.555);
Assertions.assertEquals(seaTunnelRow.getField(6), (float) 6.666);
Assertions.assertEquals(seaTunnelRow.getField(7), new BigDecimal("7.78"));
Assertions.assertEquals(seaTunnelRow.getField(8), Boolean.FALSE);
Assertions.assertEquals(
seaTunnelRow.getField(9),
new LinkedHashMap<String, String>() {
{
put("name", "Ivan");
put("age", "26");
}
});
Assertions.assertArrayEquals(
(String[]) seaTunnelRow.getField(10), new String[] {"Ivan", "Dusayi"});
Assertions.assertEquals(
seaTunnelRow.getField(11),
DateUtils.parse("2024-01-31", DateUtils.Formatter.YYYY_MM_DD));
Assertions.assertEquals(
seaTunnelRow.getField(12),
DateTimeUtils.parse(
"2024-01-31 16:00:48", DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS));
Assertions.assertEquals(
seaTunnelRow.getField(13),
TimeUtils.parse("16:00:48", TimeUtils.Formatter.HH_MM_SS));
}
}

@Getter
public static class TestCollector implements Collector<SeaTunnelRow> {
private final List<SeaTunnelRow> rows = new ArrayList<>();
Expand Down
Binary file not shown.
Binary file not shown.

0 comments on commit 00c5aed

Please sign in to comment.