diff --git a/wrangler-core/src/main/java/io/cdap/directives/parser/ParseExcel.java b/wrangler-core/src/main/java/io/cdap/directives/parser/ParseExcel.java index a253382ac..f3df25078 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/parser/ParseExcel.java +++ b/wrangler-core/src/main/java/io/cdap/directives/parser/ParseExcel.java @@ -42,6 +42,7 @@ import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.CellType; +import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.slf4j.Logger; @@ -101,6 +102,7 @@ public List execute(List records, final ExecutorContext context) throws DirectiveExecutionException, ErrorRowException { List results = new ArrayList<>(); ByteArrayInputStream input = null; + DataFormatter formatter = new DataFormatter(); try { for (Row record : records) { int idx = record.find(column); @@ -159,25 +161,22 @@ public List execute(List records, final ExecutorContext context) String value = ""; switch (cell.getCellTypeEnum()) { case STRING: - newRow.add(name, cell.getStringCellValue()); value = cell.getStringCellValue(); break; case NUMERIC: if (HSSFDateUtil.isCellDateFormatted(cell)) { - newRow.add(name, cell.getDateCellValue()); - value = cell.getDateCellValue().toString(); + value = formatter.formatCellValue(cell); } else { - newRow.add(name, cell.getNumericCellValue()); value = String.valueOf(cell.getNumericCellValue()); } break; case BOOLEAN: - newRow.add(name, cell.getBooleanCellValue()); value = String.valueOf(cell.getBooleanCellValue()); break; } + newRow.add(name, value); if (rows == 0 && firstRowAsHeader) { columnNames.put(cell.getAddress().getColumn(), value); diff --git a/wrangler-core/src/test/java/io/cdap/directives/parser/ParseExcelTest.java b/wrangler-core/src/test/java/io/cdap/directives/parser/ParseExcelTest.java index 653d358f7..addf23eba 100644 --- a/wrangler-core/src/test/java/io/cdap/directives/parser/ParseExcelTest.java +++ b/wrangler-core/src/test/java/io/cdap/directives/parser/ParseExcelTest.java @@ -67,5 +67,24 @@ public void testNoSheetName() throws Exception { Assert.assertEquals(1, pipeline.getSecond().size()); } } -} + @Test + public void testDateFormatting() throws Exception { + try (InputStream stream = + ParseAvroFileTest.class.getClassLoader().getResourceAsStream("date-formats-test-sheet.xlsx")) { + byte[] data = IOUtils.toByteArray(stream); + + String[] directives = new String[]{ + "parse-as-excel :body '0'", + }; + + List rows = new ArrayList<>(); + rows.add(new Row("body", data)); + List results = TestingRig.execute(directives, rows); + + for (Row result : results) { + Assert.assertEquals(result.getValue("A"), result.getValue("B")); + } + } + } +} diff --git a/wrangler-core/src/test/resources/date-formats-test-sheet.xlsx b/wrangler-core/src/test/resources/date-formats-test-sheet.xlsx new file mode 100644 index 000000000..41b826b69 Binary files /dev/null and b/wrangler-core/src/test/resources/date-formats-test-sheet.xlsx differ