diff --git a/backends-clickhouse/src/test/resources/text-data/cr_end_of_line/csv_with_cr_end_of_line.csv b/backends-clickhouse/src/test/resources/text-data/cr_end_of_line/csv_with_cr_end_of_line.csv new file mode 100644 index 0000000000000..077ca2c84c539 --- /dev/null +++ b/backends-clickhouse/src/test/resources/text-data/cr_end_of_line/csv_with_cr_end_of_line.csv @@ -0,0 +1,2 @@ +A,110,208819249 +B,112,208819248 C,123,783434434 diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseHiveTableSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseHiveTableSuite.scala index 01d21ea96b11b..f320e90afdb80 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseHiveTableSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseHiveTableSuite.scala @@ -971,4 +971,27 @@ class GlutenClickHouseHiveTableSuite() spark.sql("DROP TABLE test_tbl_3337") } + test("GLUTEN-3548: Bug fix csv allow cr end of line") { + val data_path = rootPath + "/text-data/cr_end_of_line" + spark.sql(s""" + | CREATE TABLE test_tbl_3548( + | a string, + | b string, + | c string) + | ROW FORMAT SERDE + | 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + |WITH SERDEPROPERTIES ( + | 'field.delim'=',' + | ) + | STORED AS INPUTFORMAT + | 'org.apache.hadoop.mapred.TextInputFormat' + |OUTPUTFORMAT + | 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' + |LOCATION '$data_path' + |""".stripMargin) + val select_sql = "select * from test_tbl_3548" + compareResultsAgainstVanillaSpark(select_sql, compareResult = true, _ => {}) + spark.sql("DROP TABLE test_tbl_3548") + } + } diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp b/cpp-ch/local-engine/Common/CHUtil.cpp index 698eb25686d74..af10b6de62479 100644 --- a/cpp-ch/local-engine/Common/CHUtil.cpp +++ b/cpp-ch/local-engine/Common/CHUtil.cpp @@ -598,6 +598,7 @@ void BackendInitializerUtil::initSettings(std::map & b settings.set("input_format_parquet_import_nested", true); settings.set("input_format_json_read_numbers_as_strings", true); settings.set("input_format_json_read_bools_as_numbers", false); + settings.set("input_format_csv_allow_cr_at_end_of_line", true); settings.set("output_format_orc_string_as_string", true); settings.set("output_format_parquet_version", "1.0"); settings.set("output_format_parquet_compression_method", "snappy");