From 14f4781fe4e548d5cdd1f570db96bf0be2ffcf55 Mon Sep 17 00:00:00 2001 From: James McMullan Date: Mon, 29 Jan 2024 11:57:31 -0500 Subject: [PATCH] HPCC4J-569 Correct EOS character handling - Modified BinaryRecordWriter to only truncate strings at EOS in var strings - Added unit test that verifies read / write all of Unicode chars and mid EOS Signed-off-by: James McMullan James.McMullan@lexisnexis.com --- .../dfs/client/BinaryRecordWriter.java | 41 ++++++--- .../dfs/client/DFSReadWriteTest.java | 87 +++++++++++++++++++ 2 files changed, 117 insertions(+), 11 deletions(-) diff --git a/dfsclient/src/main/java/org/hpccsystems/dfs/client/BinaryRecordWriter.java b/dfsclient/src/main/java/org/hpccsystems/dfs/client/BinaryRecordWriter.java index 00e362399..0b795fd8c 100644 --- a/dfsclient/src/main/java/org/hpccsystems/dfs/client/BinaryRecordWriter.java +++ b/dfsclient/src/main/java/org/hpccsystems/dfs/client/BinaryRecordWriter.java @@ -52,6 +52,8 @@ public class BinaryRecordWriter implements IRecordWriter private static final int QSTR_COMPRESSED_CHUNK_LEN = 3; private static final int QSTR_EXPANDED_CHUNK_LEN = 4; + private static final byte NULL_TERMINATOR = '\0'; + private byte[] scratchBuffer = new byte[SCRATCH_BUFFER_SIZE]; private OutputStream outputStream = null; @@ -307,7 +309,7 @@ private void writeField(FieldDef fd, Object fieldValue) throws Exception fillLength = SCRATCH_BUFFER_SIZE; } - Arrays.fill(scratchBuffer, 0, fillLength, (byte) '\0'); + Arrays.fill(scratchBuffer, 0, fillLength, NULL_TERMINATOR); writeByteArray(scratchBuffer, 0, fillLength); numFillBytes -= fillLength; } @@ -462,7 +464,7 @@ else if (fd.getDataLen() == 8) } case CHAR: { - byte c='\0'; + byte c = NULL_TERMINATOR; if (fieldValue!=null) { String value = (String) fieldValue; @@ -475,10 +477,13 @@ else if (fd.getDataLen() == 8) case STRING: { String value = fieldValue != null ? (String) fieldValue : ""; - int eosIdx = value.indexOf('\0'); - if (eosIdx > -1) + if (fd.getFieldType() == FieldType.VAR_STRING) { - value = value.substring(0,eosIdx); + int eosIdx = value.indexOf(NULL_TERMINATOR); + if (eosIdx > -1) + { + value = value.substring(0,eosIdx); + } } byte[] data = new byte[0]; @@ -573,10 +578,10 @@ else if (fd.getSourceType() == HpccSrcType.QSTRING) { if (fd.getFieldType() == FieldType.VAR_STRING && bytesToWrite > 0) { - data[bytesToWrite - 1] = '\0'; + data[bytesToWrite - 1] = NULL_TERMINATOR; if (fd.getSourceType().isUTF16() && bytesToWrite > 1) { - data[bytesToWrite - 2] = '\0'; + data[bytesToWrite - 2] = NULL_TERMINATOR; } } @@ -594,7 +599,7 @@ else if (fd.getSourceType() == HpccSrcType.QSTRING) fillLength = SCRATCH_BUFFER_SIZE; } - Arrays.fill(scratchBuffer, 0, fillLength, (byte) '\0'); + Arrays.fill(scratchBuffer, 0, fillLength, NULL_TERMINATOR); writeByteArray(scratchBuffer, 0, fillLength); numFillBytes -= fillLength; } @@ -608,11 +613,25 @@ else if (fd.getSourceType() == HpccSrcType.QSTRING) if (fd.getFieldType() == FieldType.VAR_STRING) { - byte nullByte = '\0'; - this.buffer.put(nullByte); if (fd.getSourceType().isUTF16()) { - this.buffer.put(nullByte); + boolean needsNullAdded = data.length < 2 + || data[data.length - 1] != NULL_TERMINATOR + || data[data.length - 2] != NULL_TERMINATOR; + if (needsNullAdded) + { + this.buffer.put(NULL_TERMINATOR); + this.buffer.put(NULL_TERMINATOR); + } + } + else + { + boolean needsNullAdded = data.length < 1 + || data[data.length - 1] != NULL_TERMINATOR; + if (needsNullAdded) + { + this.buffer.put(NULL_TERMINATOR); + } } } } diff --git a/dfsclient/src/test/java/org/hpccsystems/dfs/client/DFSReadWriteTest.java b/dfsclient/src/test/java/org/hpccsystems/dfs/client/DFSReadWriteTest.java index 6d2720405..eb3f2a2fa 100644 --- a/dfsclient/src/test/java/org/hpccsystems/dfs/client/DFSReadWriteTest.java +++ b/dfsclient/src/test/java/org/hpccsystems/dfs/client/DFSReadWriteTest.java @@ -88,6 +88,93 @@ public void readWithForcedTimeoutTest() throws Exception assertEquals("Not all records loaded",expectedCounts[0], records.size()); } + @Test + public void nullCharTests() throws Exception + { + // Unicode + { + FieldDef recordDef = null; + { + FieldDef[] fieldDefs = new FieldDef[2]; + fieldDefs[0] = new FieldDef("uni", FieldType.STRING, "STRING", 100, false, false, HpccSrcType.UTF16LE, new FieldDef[0]); + fieldDefs[1] = new FieldDef("fixedUni", FieldType.STRING, "STRING", 100, true, false, HpccSrcType.UTF16LE, new FieldDef[0]); + + recordDef = new FieldDef("RootRecord", FieldType.RECORD, "rec", 4, false, false, HpccSrcType.LITTLE_ENDIAN, fieldDefs); + } + + List records = new ArrayList(); + int maxUTF16BMPChar = Character.MAX_CODE_POINT; + for (int i = 0; i < maxUTF16BMPChar; i++) { + String strMidEOS = ""; + for (int j = 0; j < 98; j++, i++) { + if (j == 50) { + strMidEOS += "\0"; + } + strMidEOS += Character.toString((char) i); + } + + Object[] fields = {strMidEOS, strMidEOS}; + records.add(new HPCCRecord(fields, recordDef)); + } + + String fileName = "unicode::all_chars::test"; + writeFile(records, fileName, recordDef, connTO); + + HPCCFile file = new HPCCFile(fileName, connString , hpccUser, hpccPass); + List readRecords = readFile(file, 10000, false, false, BinaryRecordReader.TRIM_STRINGS); + + for (int i = 0; i < records.size(); i++) { + HPCCRecord record = records.get(i); + HPCCRecord readRecord = readRecords.get(i); + if (readRecord.equals(record) == false) + { + System.out.println("Record: " + i + " did not match\n" + record + "\n" + readRecord); + } + } + } + + // SBC / ASCII + { + FieldDef recordDef = null; + { + FieldDef[] fieldDefs = new FieldDef[2]; + fieldDefs[0] = new FieldDef("str", FieldType.STRING, "STRING", 10, false, false, HpccSrcType.SINGLE_BYTE_CHAR, new FieldDef[0]); + fieldDefs[1] = new FieldDef("fixedStr", FieldType.STRING, "STRING", 10, true, false, HpccSrcType.SINGLE_BYTE_CHAR, new FieldDef[0]); + + recordDef = new FieldDef("RootRecord", FieldType.RECORD, "rec", 4, false, false, HpccSrcType.LITTLE_ENDIAN, fieldDefs); + } + + List records = new ArrayList(); + for (int i = 0; i < 255; i++) { + String strMidEOS = ""; + for (int j = 0; j < 9; j++, i++) { + if (j == 5) { + strMidEOS += "\0"; + } + strMidEOS += Character.toString((char) i); + } + + Object[] fields = {strMidEOS, strMidEOS}; + records.add(new HPCCRecord(fields, recordDef)); + } + + String fileName = "ascii::all_chars::test"; + writeFile(records, fileName, recordDef, connTO); + + HPCCFile file = new HPCCFile(fileName, connString , hpccUser, hpccPass); + List readRecords = readFile(file, 10000, false, false, BinaryRecordReader.TRIM_STRINGS); + + for (int i = 0; i < records.size(); i++) { + HPCCRecord record = records.get(i); + HPCCRecord readRecord = readRecords.get(i); + if (readRecord.equals(record) == false) + { + System.out.println("Record: " + i + " did not match\n" + record + "\n" + readRecord); + } + } + } + } + @Test public void integrationReadWriteBackTest() throws Exception {