diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 088c136bcf0d..965e47fe956e 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -2563,6 +2563,18 @@ static int32_t binarySearch( return -1; } +/** + * Helper function to get the subtype range for a given type index. + * @param typeIdx the type index (0 to UPRV_LENGTHOF(gTypes)-1) + * @param start will be set to the starting index in gSubTypes + * @param end will be set to the ending index in gSubTypes (exclusive) + */ +static void getSubtypeRange(int32_t typeIdx, int32_t &start, int32_t &end) { + U_ASSERT(typeIdx >= 0 && typeIdx < UPRV_LENGTHOF(gTypes)); + start = gOffsets[typeIdx]; + end = gOffsets[typeIdx + 1]; +} + MeasureUnit::MeasureUnit() : MeasureUnit(kBaseTypeIdx, kBaseSubTypeIdx) { } @@ -2680,7 +2692,9 @@ int32_t MeasureUnit::getAvailable( } int32_t idx = 0; for (int32_t typeIdx = 0; typeIdx < UPRV_LENGTHOF(gTypes); ++typeIdx) { - int32_t len = gOffsets[typeIdx + 1] - gOffsets[typeIdx]; + int32_t start, end; + getSubtypeRange(typeIdx, start, end); + int32_t len = end - start; for (int32_t subTypeIdx = 0; subTypeIdx < len; ++subTypeIdx) { dest[idx].setTo(typeIdx, subTypeIdx); ++idx; @@ -2702,7 +2716,9 @@ int32_t MeasureUnit::getAvailable( if (typeIdx == -1) { return 0; } - int32_t len = gOffsets[typeIdx + 1] - gOffsets[typeIdx]; + int32_t start, end; + getSubtypeRange(typeIdx, start, end); + int32_t len = end - start; if (destCapacity < len) { errorCode = U_BUFFER_OVERFLOW_ERROR; return len; @@ -2729,6 +2745,26 @@ StringEnumeration* MeasureUnit::getAvailableTypes(UErrorCode &errorCode) { return result; } +bool MeasureUnit::validateAndGet(StringPiece type, StringPiece subtype, MeasureUnit &result) { + // Find the type index using binary search + int32_t typeIdx = binarySearch(gTypes, 0, UPRV_LENGTHOF(gTypes), type); + if (typeIdx == -1) { + return false; // Type not found + } + + // Find the subtype within the type's range using binary search + int32_t start, end; + getSubtypeRange(typeIdx, start, end); + int32_t subtypeIdx = binarySearch(gSubTypes, start, end, subtype); + if (subtypeIdx == -1) { + return false; // Subtype not found + } + + // Create the MeasureUnit and return it + result.setTo(typeIdx, subtypeIdx - start); + return true; +} + bool MeasureUnit::findBySubType(StringPiece subType, MeasureUnit* output) { // Sanity checking kCurrencyOffset and final entry in gOffsets U_ASSERT(uprv_strcmp(gTypes[kCurrencyOffset], "currency") == 0); @@ -2739,9 +2775,11 @@ bool MeasureUnit::findBySubType(StringPiece subType, MeasureUnit* output) { if (t == kCurrencyOffset) { continue; } - int32_t st = binarySearch(gSubTypes, gOffsets[t], gOffsets[t + 1], subType); + int32_t start, end; + getSubtypeRange(t, start, end); + int32_t st = binarySearch(gSubTypes, start, end, subType); if (st >= 0) { - output->setTo(t, st - gOffsets[t]); + output->setTo(t, st - start); return true; } } diff --git a/icu4c/source/i18n/number_skeletons.cpp b/icu4c/source/i18n/number_skeletons.cpp index 67a38dad0737..1d21d805a0e2 100644 --- a/icu4c/source/i18n/number_skeletons.cpp +++ b/icu4c/source/i18n/number_skeletons.cpp @@ -1072,25 +1072,12 @@ void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, Mac CharString subType; SKELETON_UCHAR_TO_CHAR(subType, stemString, firstHyphen + 1, stemString.length(), status); - // Note: the largest type as of this writing (Aug 2020) is "volume", which has 33 units. - static constexpr int32_t CAPACITY = 40; - MeasureUnit units[CAPACITY]; - UErrorCode localStatus = U_ZERO_ERROR; - int32_t numUnits = MeasureUnit::getAvailable(type.data(), units, CAPACITY, localStatus); - if (U_FAILURE(localStatus)) { - // More than 30 units in this type? - status = U_INTERNAL_PROGRAM_ERROR; + MeasureUnit unit; + if (MeasureUnit::validateAndGet(type.toStringPiece(), subType.toStringPiece(), unit)) { + macros.unit = unit; return; - } - for (int32_t i = 0; i < numUnits; i++) { - auto& unit = units[i]; - if (uprv_strcmp(subType.data(), unit.getSubtype()) == 0) { - macros.unit = unit; - return; - } - } - - // throw new SkeletonSyntaxException("Unknown measure unit", segment); + } + status = U_NUMBER_SKELETON_SYNTAX_ERROR; } diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index b5f3a2986f74..d8c3e7c20027 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -720,6 +720,22 @@ class U_I18N_API MeasureUnit: public UObject { */ static StringEnumeration* getAvailableTypes(UErrorCode &errorCode); +#ifndef U_HIDE_INTERNAL_API + /** + * Validates that a specific type and subtype combination exists and retrieve the unit. + * + *
Note: This is more efficient than calling getAvailable() when you only need
+ * to validate and retrieve a single unit.
+ *
+ * @param type the unit type (e.g., "length", "mass", "volume")
+ * @param subtype the unit subtype (e.g., "meter", "kilogram", "liter")
+ * @param result if the unit is valid, this will be set to the MeasureUnit
+ * @return true if the type/subtype combination is valid, false otherwise
+ * @internal
+ */
+ static bool validateAndGet(StringPiece type, StringPiece subtype, MeasureUnit &result);
+#endif /* U_HIDE_INTERNAL_API */
+
/**
* Return the class ID for this class. This is useful only for comparing to
* a return value from getDynamicClassID(). For example:
diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp
index 695a00936c2c..c5f96a4ad993 100644
--- a/icu4c/source/test/intltest/numbertest_api.cpp
+++ b/icu4c/source/test/intltest/numbertest_api.cpp
@@ -888,6 +888,17 @@ void NumberFormatterApiTest::unitMeasure() {
Locale("en"),
100,
u"100");
+
+ // This test checks the behavior of using a fixed-size length for the units array with a fixed number of units.
+ // The array size is fixed, and we are now using a binary search approach.
+ assertFormatSingle(
+ u"One of the latest unit, volume-teaspoon",
+ u"measure-unit/volume-teaspoon",
+ u"unit/teaspoon",
+ NumberFormatter::with().unit(MeasureUnit::forIdentifier("teaspoon", status)),
+ Locale("en-US"),
+ 100,
+ u"100 tsp");
// TODO: desired behaviour for this "pathological" case?
// Since this is pointless, we don't test that its behaviour doesn't change.
diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/number/NumberSkeletonImpl.java b/icu4j/main/core/src/main/java/com/ibm/icu/number/NumberSkeletonImpl.java
index 9cefd3860288..e3a31d7b660c 100644
--- a/icu4j/main/core/src/main/java/com/ibm/icu/number/NumberSkeletonImpl.java
+++ b/icu4j/main/core/src/main/java/com/ibm/icu/number/NumberSkeletonImpl.java
@@ -1095,12 +1095,10 @@ private static void parseMeasureUnitOption(StringSegment segment, MacroProps mac
}
String type = segment.subSequence(0, firstHyphen).toString();
String subType = segment.subSequence(firstHyphen + 1, segment.length()).toString();
- Set