Skip to content

Commit

Permalink
Bug fix csv read field whitespaces
Browse files Browse the repository at this point in the history
  • Loading branch information
KevinyhZou committed Oct 27, 2023
1 parent 478e0c1 commit 636437f
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
abc, cc ,ab
a,b,c
Original file line number Diff line number Diff line change
Expand Up @@ -1005,4 +1005,27 @@ class GlutenClickHouseHiveTableSuite()
)
}
}

test("GLUTEN-3552: Bug fix csv field whitespaces") {
val data_path = rootPath + "/text-data/field_whitespaces"
spark.sql(s"""
| CREATE TABLE test_tbl_3552(
| a string,
| b string,
| c string)
| ROW FORMAT SERDE
| 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
|WITH SERDEPROPERTIES (
| 'field.delim'=','
| )
| STORED AS INPUTFORMAT
| 'org.apache.hadoop.mapred.TextInputFormat'
|OUTPUTFORMAT
| 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
|LOCATION '$data_path'
|""".stripMargin)
val select_sql = "select * from test_tbl_3552"
compareResultsAgainstVanillaSpark(select_sql, compareResult = true, _ => {})
spark.sql("DROP TABLE test_tbl_3552")
}
}
1 change: 1 addition & 0 deletions cpp-ch/local-engine/Common/CHUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,7 @@ void BackendInitializerUtil::initSettings(std::map<std::string, std::string> & b
settings.set("input_format_parquet_import_nested", true);
settings.set("input_format_json_read_numbers_as_strings", true);
settings.set("input_format_json_read_bools_as_numbers", false);
settings.set("input_format_csv_trim_whitespaces", false);
settings.set("output_format_orc_string_as_string", true);
settings.set("output_format_parquet_version", "1.0");
settings.set("output_format_parquet_compression_method", "snappy");
Expand Down

0 comments on commit 636437f

Please sign in to comment.