Skip to content

Commit

Permalink
ICU-22364 Modify ulocimp_getRegionForSupplementalData() to ignore the…
Browse files Browse the repository at this point in the history
… subdivision code, rather than requiring it to

be "zzzz".
  • Loading branch information
richgillam committed Aug 21, 2023
1 parent 248b1c2 commit 52177cc
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 11 deletions.
14 changes: 9 additions & 5 deletions icu4c/source/common/loclikely.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -811,15 +811,19 @@ ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,

// First check for rg keyword value
int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
if (U_FAILURE(rgStatus) || rgLen != 6) {
if (U_FAILURE(rgStatus) || rgLen < 3 || rgLen > 7) {
rgLen = 0;
} else {
// rgBuf guaranteed to be zero terminated here, with text len 6
char *rgPtr = rgBuf;
for (; *rgPtr!= 0; rgPtr++) {
*rgPtr = uprv_toupper(*rgPtr);
// chop off the subdivision code (which will generally be "zzzz" anyway)
if (uprv_isASCIILetter(rgBuf[0])) {
rgLen = 2;
rgBuf[0] = uprv_toupper(rgBuf[0]);
rgBuf[1] = uprv_toupper(rgBuf[1]);
} else {
// assume three-digit region code
rgLen = 3;
}
rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
}

if (rgLen == 0) {
Expand Down
19 changes: 17 additions & 2 deletions icu4c/source/test/cintltst/ccaltst.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,10 @@ static const UCalGetTypeTest ucalGetTypeTests[] = {
{ "fr_CH", UCAL_DEFAULT, "gregorian" },
{ "fr_SA", UCAL_DEFAULT, "islamic-umalqura" },
{ "fr_CH@rg=sazzzz", UCAL_DEFAULT, "islamic-umalqura" },
{ "fr_CH@rg=sa14", UCAL_DEFAULT, "islamic-umalqura" },
{ "fr_CH@calendar=japanese;rg=sazzzz", UCAL_DEFAULT, "japanese" },
{ "fr_CH@rg=twcyi", UCAL_DEFAULT, "gregorian" }, // test for ICU-22364
{ "fr_CH@rg=ugw", UCAL_DEFAULT, "gregorian" }, // test for ICU-22364
{ "fr_TH@rg=SA", UCAL_DEFAULT, "buddhist" }, /* ignore malformed rg tag */
{ "th@rg=SA", UCAL_DEFAULT, "buddhist" }, /* ignore malformed rg tag */
{ "", UCAL_GREGORIAN, "gregorian" },
Expand Down Expand Up @@ -1613,7 +1616,7 @@ void TestGregorianChange() {
}

static void TestGetKeywordValuesForLocale() {
#define PREFERRED_SIZE 16
#define PREFERRED_SIZE 26
#define MAX_NUMBER_OF_KEYWORDS 5
const char *PREFERRED[PREFERRED_SIZE][MAX_NUMBER_OF_KEYWORDS+1] = {
{ "root", "gregorian", NULL, NULL, NULL, NULL },
Expand All @@ -1632,8 +1635,20 @@ static void TestGetKeywordValuesForLocale() {
{ "zh_TW", "gregorian", "roc", "chinese", NULL, NULL },
{ "ar_IR", "persian", "gregorian", "islamic", "islamic-civil", "islamic-tbla" },
{ "th@rg=SAZZZZ", "islamic-umalqura", "gregorian", "islamic", "islamic-rgsa", NULL },

// tests for ICU-22364
{ "zh_CN@rg=TW", "gregorian", "chinese", NULL, NULL, NULL }, // invalid subdivision code
{ "zh_CN@rg=TWzzzz", "gregorian", "roc", "chinese", NULL, NULL }, // whole region
{ "zh_TW@rg=TWxxxx", "gregorian", "roc", "chinese", NULL, NULL }, // invalid subdivision code (ignored)
{ "zh_TW@rg=ARa", "gregorian", NULL, NULL, NULL, NULL }, // single-letter subdivision code
{ "zh_TW@rg=AT1", "gregorian", NULL, NULL, NULL, NULL }, // single-digit subdivision code
{ "zh_TW@rg=USca", "gregorian", NULL, NULL, NULL, NULL }, // two-letter subdivision code
{ "zh_TW@rg=IT53", "gregorian", NULL, NULL, NULL, NULL }, // two-digit subdivision code
{ "zh_TW@rg=AUnsw", "gregorian", NULL, NULL, NULL, NULL }, // three-letter subdivision code
{ "zh_TW@rg=EE130", "gregorian", NULL, NULL, NULL, NULL }, // three-digit subdivision code
{ "zh_TW@rg=417zzzz", "gregorian", NULL, NULL, NULL, NULL }, // three-digit region code
};
const int32_t EXPECTED_SIZE[PREFERRED_SIZE] = { 1, 1, 1, 1, 2, 2, 2, 5, 5, 2, 2, 2, 1, 3, 5, 4 };
const int32_t EXPECTED_SIZE[PREFERRED_SIZE] = { 1, 1, 1, 1, 2, 2, 2, 5, 5, 2, 2, 2, 1, 3, 5, 4, 2, 3, 3, 1, 1, 1, 1, 1, 1, 1 };
UErrorCode status = U_ZERO_ERROR;
int32_t i, size, j;
UEnumeration *all, *pref;
Expand Down
10 changes: 6 additions & 4 deletions icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
Original file line number Diff line number Diff line change
Expand Up @@ -982,10 +982,12 @@ public static String getCountry(String localeID) {
public static String getRegionForSupplementalData(
ULocale locale, boolean inferRegion) {
String region = locale.getKeywordValue("rg");
if (region != null && region.length() == 6) {
String regionUpper = AsciiUtil.toUpperString(region);
if (regionUpper.endsWith("ZZZZ")) {
return regionUpper.substring(0,2);
if (region != null && region.length() >= 3 && region.length() <= 7) {
if (Character.isLetter(region.charAt(0))) {
return AsciiUtil.toUpperString(region.substring(0, 2));
} else {
// assume three-digit region code
return region.substring(0, 3);
}
}
region = locale.getCountry();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2182,6 +2182,18 @@ public void TestGetKeywordValuesForLocale(){
{"zh_TW", "gregorian", "roc", "chinese"},
{"ar_IR", "persian", "gregorian", "islamic", "islamic-civil", "islamic-tbla"},
{"th@rg=SAZZZZ", "islamic-umalqura", "gregorian", "islamic", "islamic-rgsa"},

// tests for ICU-22364
{ "zh_CN@rg=TW", "gregorian", "chinese" }, // invalid subdivision code
{ "zh_CN@rg=TWzzzz", "gregorian", "roc", "chinese", }, // whole region
{ "zh_TW@rg=TWxxxx", "gregorian", "roc", "chinese" }, // invalid subdivision code (ignored)
{ "zh_TW@rg=ARa", "gregorian" }, // single-letter subdivision code
{ "zh_TW@rg=AT1", "gregorian" }, // single-digit subdivision code
{ "zh_TW@rg=USca", "gregorian" }, // two-letter subdivision code
{ "zh_TW@rg=IT53", "gregorian" }, // two-digit subdivision code
{ "zh_TW@rg=AUnsw", "gregorian" }, // three-letter subdivision code
{ "zh_TW@rg=EE130", "gregorian" }, // three-digit subdivision code
{ "zh_TW@rg=417zzzz", "gregorian" }, // three-digit region code
};

String[] ALL = Calendar.getKeywordValuesForLocale("calendar", ULocale.getDefault(), false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,10 @@ public void TestTypes() {
"fr_CH",
"fr_SA",
"fr_CH@rg=sazzzz",
"fr_CH@rg=sa14",
"fr_CH@calendar=japanese;rg=sazzzz",
"fr_CH@rg=twcyi", // test for ICU-22364
"fr_CH@rg=ugw", // test for ICU-22364
"fr_TH@rg=SA", // ignore malformed rg tag, use buddhist
"th@rg=SA", // ignore malformed rg tag, use buddhist
};
Expand All @@ -1121,7 +1124,10 @@ public void TestTypes() {
"gregorian",
"islamic-umalqura",
"islamic-umalqura",
"islamic-umalqura",
"japanese",
"gregorian",
"gregorian",
"buddhist",
"buddhist",
};
Expand Down

1 comment on commit 52177cc

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 2.

Benchmark suite Current: 52177cc Previous: 248b1c2 Ratio
TestCharsetEncoderICU 9.417777782361036 ns/iter 4.069168091432872 ns/iter 2.31

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.