Skip to content

Commit

Permalink
[fix](hive) report error with escape char and null format (#39700)
Browse files Browse the repository at this point in the history
## Proposed changes

Because be did not process escape char and null format when reading the
hive text table, an error was reported when fe found that this value was
not the default value.
  • Loading branch information
suxiaogang223 authored Aug 22, 2024
1 parent 44beec3 commit f53d1eb
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,37 @@ STORED AS INPUTFORMAT
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';

CREATE TABLE `serde_test7`(
`id` int,
`name` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'escape.delim' = '|'
)
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';

CREATE TABLE `serde_test8`(
`id` int,
`name` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.null.format' = 'null'
)
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';

insert into serde_test1 values(1, "abc"),(2, "def");
insert into serde_test2 values(1, "abc"),(2, "def");
insert into serde_test3 values(1, "abc"),(2, "def");
insert into serde_test4 values(1, "abc"),(2, "def");
insert into serde_test5 values(1, "abc"),(2, "def");
insert into serde_test6 values(1, "abc"),(2, "def");
insert into serde_test7 values(1, "abc"),(2, "def");
insert into serde_test8 values(1, "abc"),(2, "def");
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ public class HiveScanNode extends FileQueryScanNode {
public static final String PROP_MAP_KV_DELIMITER = "mapkey.delim";
public static final String DEFAULT_MAP_KV_DELIMITER = "\003";

public static final String PROP_ESCAPE_DELIMITER = "escape.delim";
public static final String DEFAULT_ESCAPE_DELIMIER = "\\";
public static final String PROP_NULL_FORMAT = "serialization.null.format";
public static final String DEFAULT_NULL_FORMAT = "\\N";

protected final HMSExternalTable hmsTable;
private HiveTransaction hiveTransaction = null;

Expand Down Expand Up @@ -458,6 +463,21 @@ protected TFileAttributes getFileAttributes() throws UserException {
textParams.setEnclose(serdeParams.get(PROP_QUOTE_CHAR).getBytes()[0]);
}

// TODO: support escape char and null format in csv_reader
Optional<String> escapeChar = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_ESCAPE_DELIMITER);
if (escapeChar.isPresent() && !escapeChar.get().equals(DEFAULT_ESCAPE_DELIMIER)) {
throw new UserException(
"not support serde prop " + PROP_ESCAPE_DELIMITER + " in hive text reading");
}

Optional<String> nullFormat = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_NULL_FORMAT);
if (nullFormat.isPresent() && !nullFormat.get().equals(DEFAULT_NULL_FORMAT)) {
throw new UserException(
"not support serde prop " + PROP_NULL_FORMAT + " in hive text reading");
}

TFileAttributes fileAttributes = new TFileAttributes();
fileAttributes.setTextParams(textParams);
fileAttributes.setHeaderType("");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,24 @@ suite("test_hive_serde_prop", "external_docker,hive,external_docker_hive,p0,exte
qt_5 """select * from ${catalog_name}.regression.serde_test4 order by id;"""
qt_6 """select * from ${catalog_name}.regression.serde_test5 order by id;"""
qt_7 """select * from ${catalog_name}.regression.serde_test6 order by id;"""

def success = true;
try {
sql """select * from ${catalog_name}.regression.serde_test7 order by id;"""
} catch(Exception e) {
assertTrue(e.getMessage().contains("not support serde prop"), e.getMessage())
success = false;
}
assertEquals(success, false)

success = true;
try {
sql """select * from ${catalog_name}.regression.serde_test8 order by id;"""
} catch(Exception e) {
assertTrue(e.getMessage().contains("not support serde prop"), e.getMessage())
success = false;
}
assertEquals(success, false)
}
}

0 comments on commit f53d1eb

Please sign in to comment.