From 52177cc8c78901b2085bc1d8beb0df10c24afadb Mon Sep 17 00:00:00 2001 From: Rich Gillam Date: Thu, 10 Aug 2023 16:54:19 -0700 Subject: [PATCH] ICU-22364 Modify ulocimp_getRegionForSupplementalData() to ignore the subdivision code, rather than requiring it to be "zzzz". --- icu4c/source/common/loclikely.cpp | 14 +++++++++----- icu4c/source/test/cintltst/ccaltst.c | 19 +++++++++++++++++-- .../core/src/com/ibm/icu/util/ULocale.java | 10 ++++++---- .../test/calendar/CalendarRegressionTest.java | 12 ++++++++++++ .../dev/test/calendar/IBMCalendarTest.java | 6 ++++++ 5 files changed, 50 insertions(+), 11 deletions(-) diff --git a/icu4c/source/common/loclikely.cpp b/icu4c/source/common/loclikely.cpp index 5235daaf59df..d361dc96a351 100644 --- a/icu4c/source/common/loclikely.cpp +++ b/icu4c/source/common/loclikely.cpp @@ -811,15 +811,19 @@ ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, // First check for rg keyword value int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus); - if (U_FAILURE(rgStatus) || rgLen != 6) { + if (U_FAILURE(rgStatus) || rgLen < 3 || rgLen > 7) { rgLen = 0; } else { // rgBuf guaranteed to be zero terminated here, with text len 6 - char *rgPtr = rgBuf; - for (; *rgPtr!= 0; rgPtr++) { - *rgPtr = uprv_toupper(*rgPtr); + // chop off the subdivision code (which will generally be "zzzz" anyway) + if (uprv_isASCIILetter(rgBuf[0])) { + rgLen = 2; + rgBuf[0] = uprv_toupper(rgBuf[0]); + rgBuf[1] = uprv_toupper(rgBuf[1]); + } else { + // assume three-digit region code + rgLen = 3; } - rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0; } if (rgLen == 0) { diff --git a/icu4c/source/test/cintltst/ccaltst.c b/icu4c/source/test/cintltst/ccaltst.c index 19b655d4f2d0..0cf10822f910 100644 --- a/icu4c/source/test/cintltst/ccaltst.c +++ b/icu4c/source/test/cintltst/ccaltst.c @@ -101,7 +101,10 @@ static const UCalGetTypeTest ucalGetTypeTests[] = { { "fr_CH", UCAL_DEFAULT, "gregorian" }, { "fr_SA", UCAL_DEFAULT, "islamic-umalqura" }, { "fr_CH@rg=sazzzz", UCAL_DEFAULT, "islamic-umalqura" }, + { "fr_CH@rg=sa14", UCAL_DEFAULT, "islamic-umalqura" }, { "fr_CH@calendar=japanese;rg=sazzzz", UCAL_DEFAULT, "japanese" }, + { "fr_CH@rg=twcyi", UCAL_DEFAULT, "gregorian" }, // test for ICU-22364 + { "fr_CH@rg=ugw", UCAL_DEFAULT, "gregorian" }, // test for ICU-22364 { "fr_TH@rg=SA", UCAL_DEFAULT, "buddhist" }, /* ignore malformed rg tag */ { "th@rg=SA", UCAL_DEFAULT, "buddhist" }, /* ignore malformed rg tag */ { "", UCAL_GREGORIAN, "gregorian" }, @@ -1613,7 +1616,7 @@ void TestGregorianChange() { } static void TestGetKeywordValuesForLocale() { -#define PREFERRED_SIZE 16 +#define PREFERRED_SIZE 26 #define MAX_NUMBER_OF_KEYWORDS 5 const char *PREFERRED[PREFERRED_SIZE][MAX_NUMBER_OF_KEYWORDS+1] = { { "root", "gregorian", NULL, NULL, NULL, NULL }, @@ -1632,8 +1635,20 @@ static void TestGetKeywordValuesForLocale() { { "zh_TW", "gregorian", "roc", "chinese", NULL, NULL }, { "ar_IR", "persian", "gregorian", "islamic", "islamic-civil", "islamic-tbla" }, { "th@rg=SAZZZZ", "islamic-umalqura", "gregorian", "islamic", "islamic-rgsa", NULL }, + + // tests for ICU-22364 + { "zh_CN@rg=TW", "gregorian", "chinese", NULL, NULL, NULL }, // invalid subdivision code + { "zh_CN@rg=TWzzzz", "gregorian", "roc", "chinese", NULL, NULL }, // whole region + { "zh_TW@rg=TWxxxx", "gregorian", "roc", "chinese", NULL, NULL }, // invalid subdivision code (ignored) + { "zh_TW@rg=ARa", "gregorian", NULL, NULL, NULL, NULL }, // single-letter subdivision code + { "zh_TW@rg=AT1", "gregorian", NULL, NULL, NULL, NULL }, // single-digit subdivision code + { "zh_TW@rg=USca", "gregorian", NULL, NULL, NULL, NULL }, // two-letter subdivision code + { "zh_TW@rg=IT53", "gregorian", NULL, NULL, NULL, NULL }, // two-digit subdivision code + { "zh_TW@rg=AUnsw", "gregorian", NULL, NULL, NULL, NULL }, // three-letter subdivision code + { "zh_TW@rg=EE130", "gregorian", NULL, NULL, NULL, NULL }, // three-digit subdivision code + { "zh_TW@rg=417zzzz", "gregorian", NULL, NULL, NULL, NULL }, // three-digit region code }; - const int32_t EXPECTED_SIZE[PREFERRED_SIZE] = { 1, 1, 1, 1, 2, 2, 2, 5, 5, 2, 2, 2, 1, 3, 5, 4 }; + const int32_t EXPECTED_SIZE[PREFERRED_SIZE] = { 1, 1, 1, 1, 2, 2, 2, 5, 5, 2, 2, 2, 1, 3, 5, 4, 2, 3, 3, 1, 1, 1, 1, 1, 1, 1 }; UErrorCode status = U_ZERO_ERROR; int32_t i, size, j; UEnumeration *all, *pref; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java index 50241cf0926d..e68396cf119b 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java @@ -982,10 +982,12 @@ public static String getCountry(String localeID) { public static String getRegionForSupplementalData( ULocale locale, boolean inferRegion) { String region = locale.getKeywordValue("rg"); - if (region != null && region.length() == 6) { - String regionUpper = AsciiUtil.toUpperString(region); - if (regionUpper.endsWith("ZZZZ")) { - return regionUpper.substring(0,2); + if (region != null && region.length() >= 3 && region.length() <= 7) { + if (Character.isLetter(region.charAt(0))) { + return AsciiUtil.toUpperString(region.substring(0, 2)); + } else { + // assume three-digit region code + return region.substring(0, 3); } } region = locale.getCountry(); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java index 6bd6eb5e6866..2d0e07225abd 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java @@ -2182,6 +2182,18 @@ public void TestGetKeywordValuesForLocale(){ {"zh_TW", "gregorian", "roc", "chinese"}, {"ar_IR", "persian", "gregorian", "islamic", "islamic-civil", "islamic-tbla"}, {"th@rg=SAZZZZ", "islamic-umalqura", "gregorian", "islamic", "islamic-rgsa"}, + + // tests for ICU-22364 + { "zh_CN@rg=TW", "gregorian", "chinese" }, // invalid subdivision code + { "zh_CN@rg=TWzzzz", "gregorian", "roc", "chinese", }, // whole region + { "zh_TW@rg=TWxxxx", "gregorian", "roc", "chinese" }, // invalid subdivision code (ignored) + { "zh_TW@rg=ARa", "gregorian" }, // single-letter subdivision code + { "zh_TW@rg=AT1", "gregorian" }, // single-digit subdivision code + { "zh_TW@rg=USca", "gregorian" }, // two-letter subdivision code + { "zh_TW@rg=IT53", "gregorian" }, // two-digit subdivision code + { "zh_TW@rg=AUnsw", "gregorian" }, // three-letter subdivision code + { "zh_TW@rg=EE130", "gregorian" }, // three-digit subdivision code + { "zh_TW@rg=417zzzz", "gregorian" }, // three-digit region code }; String[] ALL = Calendar.getKeywordValuesForLocale("calendar", ULocale.getDefault(), false); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java index 8be6e47f3574..0bc673aeabb3 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java @@ -1099,7 +1099,10 @@ public void TestTypes() { "fr_CH", "fr_SA", "fr_CH@rg=sazzzz", + "fr_CH@rg=sa14", "fr_CH@calendar=japanese;rg=sazzzz", + "fr_CH@rg=twcyi", // test for ICU-22364 + "fr_CH@rg=ugw", // test for ICU-22364 "fr_TH@rg=SA", // ignore malformed rg tag, use buddhist "th@rg=SA", // ignore malformed rg tag, use buddhist }; @@ -1121,7 +1124,10 @@ public void TestTypes() { "gregorian", "islamic-umalqura", "islamic-umalqura", + "islamic-umalqura", "japanese", + "gregorian", + "gregorian", "buddhist", "buddhist", };