Skip to content

Commit

Permalink
ICU-22453 Fix non null terminated buffer issue.
Browse files Browse the repository at this point in the history
See #2543
  • Loading branch information
FrankYFTang committed Aug 9, 2023
1 parent 56850c9 commit ca1435c
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 36 deletions.
33 changes: 17 additions & 16 deletions icu4c/source/common/locid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ class AliasDataBuilder {
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
void (*checkType)(const char* type),
void (*checkReplacement)(const UnicodeString& replacement),
void (*checkReplacement)(const UChar* replacement),
UErrorCode &status);

// Read the languageAlias data from alias to
Expand Down Expand Up @@ -700,7 +700,7 @@ AliasDataBuilder::readAlias(
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
void (*checkType)(const char* type),
void (*checkReplacement)(const UnicodeString& replacement),
void (*checkReplacement)(const UChar* replacement),
UErrorCode &status) {
if (U_FAILURE(status)) {
return;
Expand All @@ -720,8 +720,8 @@ AliasDataBuilder::readAlias(
LocalUResourceBundlePointer res(
ures_getNextResource(alias, nullptr, &status));
const char* aliasFrom = ures_getKey(res.getAlias());
UnicodeString aliasTo =
ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status);
const UChar* aliasTo =
ures_getStringByKey(res.getAlias(), "replacement", nullptr, &status);
if (U_FAILURE(status)) return;

checkType(aliasFrom);
Expand Down Expand Up @@ -766,7 +766,7 @@ AliasDataBuilder::readLanguageAlias(
#else
[](const char*) {},
#endif
[](const UnicodeString&) {}, status);
[](const UChar*) {}, status);
}

/**
Expand All @@ -790,12 +790,12 @@ AliasDataBuilder::readScriptAlias(
[](const char* type) {
U_ASSERT(uprv_strlen(type) == 4);
},
[](const UnicodeString& replacement) {
U_ASSERT(replacement.length() == 4);
[](const UChar* replacement) {
U_ASSERT(u_strlen(replacement) == 4);
},
#else
[](const char*) {},
[](const UnicodeString&) { },
[](const UChar*) { },
#endif
status);
}
Expand Down Expand Up @@ -824,7 +824,7 @@ AliasDataBuilder::readTerritoryAlias(
#else
[](const char*) {},
#endif
[](const UnicodeString&) { },
[](const UChar*) { },
status);
}

Expand All @@ -851,15 +851,16 @@ AliasDataBuilder::readVariantAlias(
U_ASSERT(uprv_strlen(type) != 4 ||
(type[0] >= '0' && type[0] <= '9'));
},
[](const UnicodeString& replacement) {
U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8);
U_ASSERT(replacement.length() != 4 ||
(replacement.charAt(0) >= u'0' &&
replacement.charAt(0) <= u'9'));
[](const UChar* replacement) {
int32_t len = u_strlen(replacement);
U_ASSERT(len >= 4 && len <= 8);
U_ASSERT(len != 4 ||
(*replacement >= u'0' &&
*replacement <= u'9'));
},
#else
[](const char*) {},
[](const UnicodeString&) { },
[](const UChar*) { },
#endif
status);
}
Expand Down Expand Up @@ -888,7 +889,7 @@ AliasDataBuilder::readSubdivisionAlias(
#else
[](const char*) {},
#endif
[](const UnicodeString&) { },
[](const UChar*) { },
status);
}

Expand Down
24 changes: 14 additions & 10 deletions icu4c/source/common/loclikelysubtags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,11 @@ struct XLikelySubtagsData {
return false;
}
for (int i = 0; i < length; ++i) {
stringArray.getValue(i, value); // returns true because i < length
rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode);
if (U_FAILURE(errorCode)) { return false; }
if (stringArray.getValue(i, value)) { // returns true because i < length
int32_t strLength = 0;
rawIndexes[i] = strings.add(value.getString(strLength, errorCode), errorCode);
if (U_FAILURE(errorCode)) { return false; }
}
}
}
return true;
Expand All @@ -261,10 +263,10 @@ struct XLikelySubtagsData {
lang[0] = 'a' + ((encoded % 27) - 1);
lang[1] = 'a' + (((encoded / 27 ) % 27) - 1);
if (encoded / (27 * 27) == 0) {
return UnicodeString(lang, 2);
return UnicodeString(lang, 2, US_INV);
}
lang[2] = 'a' + ((encoded / (27 * 27)) - 1);
return UnicodeString(lang, 3);
return UnicodeString(lang, 3, US_INV);
}
UnicodeString toScript(int encoded) {
if (encoded == 0) {
Expand All @@ -278,7 +280,8 @@ struct XLikelySubtagsData {
if (script == nullptr) {
return UNICODE_STRING_SIMPLE("");
}
return UnicodeString(script, 4);
U_ASSERT(uprv_strlen(script) == 4);
return UnicodeString(script, 4, US_INV);
}
UnicodeString m49IndexToCode(const ResourceArray &m49Array, ResourceValue &value, int index, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) {
Expand Down Expand Up @@ -306,7 +309,7 @@ struct XLikelySubtagsData {
char region[2];
region[0] = 'A' + ((encoded % 27) - 1);
region[1] = 'A' + (((encoded / 27) % 27) - 1);
return UnicodeString(region, 2);
return UnicodeString(region, 2, US_INV);
}

bool readLSREncodedStrings(const ResourceTable &table, const char* key, ResourceValue &value, const ResourceArray& m49Array,
Expand All @@ -321,9 +324,10 @@ struct XLikelySubtagsData {
return false;
}
for (int i = 0; i < length; ++i) {
rawIndexes[i*3] = strings.add(toLanguage(vectors[i]), errorCode);
rawIndexes[i*3+1] = strings.add(toScript(vectors[i]), errorCode);
rawIndexes[i*3+2] = strings.add(toRegion(m49Array, value, vectors[i], errorCode), errorCode);
rawIndexes[i*3] = strings.addByValue(toLanguage(vectors[i]), errorCode);
rawIndexes[i*3+1] = strings.addByValue(toScript(vectors[i]), errorCode);
rawIndexes[i*3+2] = strings.addByValue(
toRegion(m49Array, value, vectors[i], errorCode), errorCode);
if (U_FAILURE(errorCode)) { return false; }
}
length *= 3;
Expand Down
42 changes: 32 additions & 10 deletions icu4c/source/common/uniquecharstr.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "charstr.h"
#include "uassert.h"
#include "uhash.h"
#include "cmemory.h"

U_NAMESPACE_BEGIN

Expand Down Expand Up @@ -47,22 +48,20 @@ class UniqueCharStrings {
}

/**
* Adds a string and returns a unique number for it.
* The string's buffer contents must not change, nor move around in memory,
* Adds a NUL-terminated string and returns a unique number for it.
* The string must not change, nor move around in memory,
* while this UniqueCharStrings is in use.
* The string contents must be NUL-terminated exactly at s.length().
*
* Best used with read-only-alias UnicodeString objects that point to
* stable storage, such as strings returned by resource bundle functions.
* Best used with string data in a stable storage, such as strings returned
* by resource bundle functions.
*/
int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return 0; }
int32_t add(const char16_t*p, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return -1; }
if (isFrozen) {
errorCode = U_NO_WRITE_PERMISSION;
return 0;
return -1;
}
// The string points into the resource bundle.
const char16_t *p = s.getBuffer();
int32_t oldIndex = uhash_geti(&map, p);
if (oldIndex != 0) { // found duplicate
return oldIndex;
Expand All @@ -71,11 +70,33 @@ class UniqueCharStrings {
// The strings object is also terminated with one implicit NUL.
strings->append(0, errorCode);
int32_t newIndex = strings->length();
strings->appendInvariantChars(s, errorCode);
strings->appendInvariantChars(p, u_strlen(p), errorCode);
uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
return newIndex;
}

/**
* Adds a unicode string by value and returns a unique number for it.
*/
int32_t addByValue(UnicodeString s, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return -1; }
if (isFrozen) {
errorCode = U_NO_WRITE_PERMISSION;
return -1;
}
int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer());
if (oldIndex != 0) { // found duplicate
return oldIndex;
}
// We need to store the string content of the UnicodeString.
UnicodeString *key = keyStore.create(s);
if (key == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return -1;
}
return add(key->getTerminatedBuffer(), errorCode);
}

void freeze() { isFrozen = true; }

/**
Expand All @@ -90,6 +111,7 @@ class UniqueCharStrings {
private:
UHashtable map;
CharString *strings;
MemoryPool<UnicodeString> keyStore;
bool isFrozen = false;
};

Expand Down

0 comments on commit ca1435c

Please sign in to comment.