Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 42 additions & 4 deletions icu4c/source/i18n/measunit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2563,6 +2563,18 @@ static int32_t binarySearch(
return -1;
}

/**
* Helper function to get the subtype range for a given type index.
* @param typeIdx the type index (0 to UPRV_LENGTHOF(gTypes)-1)
* @param start will be set to the starting index in gSubTypes
* @param end will be set to the ending index in gSubTypes (exclusive)
*/
static void getSubtypeRange(int32_t typeIdx, int32_t &start, int32_t &end) {
U_ASSERT(typeIdx >= 0 && typeIdx < UPRV_LENGTHOF(gTypes));
start = gOffsets[typeIdx];
end = gOffsets[typeIdx + 1];
}

MeasureUnit::MeasureUnit() : MeasureUnit(kBaseTypeIdx, kBaseSubTypeIdx) {
}

Expand Down Expand Up @@ -2680,7 +2692,9 @@ int32_t MeasureUnit::getAvailable(
}
int32_t idx = 0;
for (int32_t typeIdx = 0; typeIdx < UPRV_LENGTHOF(gTypes); ++typeIdx) {
int32_t len = gOffsets[typeIdx + 1] - gOffsets[typeIdx];
int32_t start, end;
getSubtypeRange(typeIdx, start, end);
int32_t len = end - start;
for (int32_t subTypeIdx = 0; subTypeIdx < len; ++subTypeIdx) {
dest[idx].setTo(typeIdx, subTypeIdx);
++idx;
Expand All @@ -2702,7 +2716,9 @@ int32_t MeasureUnit::getAvailable(
if (typeIdx == -1) {
return 0;
}
int32_t len = gOffsets[typeIdx + 1] - gOffsets[typeIdx];
int32_t start, end;
getSubtypeRange(typeIdx, start, end);
int32_t len = end - start;
if (destCapacity < len) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
return len;
Expand All @@ -2729,6 +2745,26 @@ StringEnumeration* MeasureUnit::getAvailableTypes(UErrorCode &errorCode) {
return result;
}

bool MeasureUnit::validateAndGet(StringPiece type, StringPiece subtype, MeasureUnit &result) {
// Find the type index using binary search
int32_t typeIdx = binarySearch(gTypes, 0, UPRV_LENGTHOF(gTypes), type);
if (typeIdx == -1) {
return false; // Type not found
}

// Find the subtype within the type's range using binary search
int32_t start, end;
getSubtypeRange(typeIdx, start, end);
int32_t subtypeIdx = binarySearch(gSubTypes, start, end, subtype);
if (subtypeIdx == -1) {
return false; // Subtype not found
}

// Create the MeasureUnit and return it
result.setTo(typeIdx, subtypeIdx - start);
return true;
}

bool MeasureUnit::findBySubType(StringPiece subType, MeasureUnit* output) {
// Sanity checking kCurrencyOffset and final entry in gOffsets
U_ASSERT(uprv_strcmp(gTypes[kCurrencyOffset], "currency") == 0);
Expand All @@ -2739,9 +2775,11 @@ bool MeasureUnit::findBySubType(StringPiece subType, MeasureUnit* output) {
if (t == kCurrencyOffset) {
continue;
}
int32_t st = binarySearch(gSubTypes, gOffsets[t], gOffsets[t + 1], subType);
int32_t start, end;
getSubtypeRange(t, start, end);
int32_t st = binarySearch(gSubTypes, start, end, subType);
if (st >= 0) {
output->setTo(t, st - gOffsets[t]);
output->setTo(t, st - start);
return true;
}
}
Expand Down
23 changes: 5 additions & 18 deletions icu4c/source/i18n/number_skeletons.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1072,25 +1072,12 @@ void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, Mac
CharString subType;
SKELETON_UCHAR_TO_CHAR(subType, stemString, firstHyphen + 1, stemString.length(), status);

// Note: the largest type as of this writing (Aug 2020) is "volume", which has 33 units.
static constexpr int32_t CAPACITY = 40;
MeasureUnit units[CAPACITY];
UErrorCode localStatus = U_ZERO_ERROR;
int32_t numUnits = MeasureUnit::getAvailable(type.data(), units, CAPACITY, localStatus);
if (U_FAILURE(localStatus)) {
// More than 30 units in this type?
status = U_INTERNAL_PROGRAM_ERROR;
MeasureUnit unit;
if (MeasureUnit::validateAndGet(type.toStringPiece(), subType.toStringPiece(), unit)) {
macros.unit = unit;
return;
}
for (int32_t i = 0; i < numUnits; i++) {
auto& unit = units[i];
if (uprv_strcmp(subType.data(), unit.getSubtype()) == 0) {
macros.unit = unit;
return;
}
}

// throw new SkeletonSyntaxException("Unknown measure unit", segment);
}

status = U_NUMBER_SKELETON_SYNTAX_ERROR;
}

Expand Down
16 changes: 16 additions & 0 deletions icu4c/source/i18n/unicode/measunit.h
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,22 @@ class U_I18N_API MeasureUnit: public UObject {
*/
static StringEnumeration* getAvailableTypes(UErrorCode &errorCode);

#ifndef U_HIDE_INTERNAL_API
/**
* Validates that a specific type and subtype combination exists and retrieve the unit.
*
* <p> Note: This is more efficient than calling getAvailable() when you only need
* to validate and retrieve a single unit.
*
* @param type the unit type (e.g., "length", "mass", "volume")
* @param subtype the unit subtype (e.g., "meter", "kilogram", "liter")
* @param result if the unit is valid, this will be set to the MeasureUnit
* @return true if the type/subtype combination is valid, false otherwise
* @internal
*/
static bool validateAndGet(StringPiece type, StringPiece subtype, MeasureUnit &result);
#endif /* U_HIDE_INTERNAL_API */

/**
* Return the class ID for this class. This is useful only for comparing to
* a return value from getDynamicClassID(). For example:
Expand Down
11 changes: 11 additions & 0 deletions icu4c/source/test/intltest/numbertest_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,17 @@ void NumberFormatterApiTest::unitMeasure() {
Locale("en"),
100,
u"100");

// This test checks the behavior of using a fixed-size length for the units array with a fixed number of units.
// The array size is fixed, and we are now using a binary search approach.
assertFormatSingle(
u"One of the latest unit, volume-teaspoon",
u"measure-unit/volume-teaspoon",
u"unit/teaspoon",
NumberFormatter::with().unit(MeasureUnit::forIdentifier("teaspoon", status)),
Locale("en-US"),
100,
u"100 tsp");

// TODO: desired behaviour for this "pathological" case?
// Since this is pointless, we don't test that its behaviour doesn't change.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1095,12 +1095,10 @@ private static void parseMeasureUnitOption(StringSegment segment, MacroProps mac
}
String type = segment.subSequence(0, firstHyphen).toString();
String subType = segment.subSequence(firstHyphen + 1, segment.length()).toString();
Set<MeasureUnit> units = MeasureUnit.getAvailable(type);
for (MeasureUnit unit : units) {
if (subType.equals(unit.getSubtype())) {
macros.unit = unit;
return;
}
MeasureUnit unit = MeasureUnit.getUnit(type, subType);
if (unit != null) {
macros.unit = unit;
return;
}
throw new SkeletonSyntaxException("Unknown measure unit", segment);
}
Expand Down
23 changes: 23 additions & 0 deletions icu4j/main/core/src/main/java/com/ibm/icu/util/MeasureUnit.java
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,29 @@ public static MeasureUnit findBySubType(String subType) {
return null;
}

/**
* Returns the MeasureUnit instance if the given type and subtype combination is
* valid,
* or null otherwise.
*
* Example:
* "length", "meter" -> METER
* "length", "kilometer" -> KILOMETER
* "length", "kilometer-per-hour" -> null --> not valid
*
* @param type the unit type (e.g., "length", "mass", "volume")
* @param subtype the unit subtype (e.g., "meter", "kilogram", "liter")
* @return the MeasureUnit if valid, otherwise null
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static MeasureUnit getUnit(String type, String subtype) {
populateCache();
Map<String, MeasureUnit> units = cache.get(type);
return units != null ? units.get(subtype) : null;
}

static final UnicodeSet ASCII = new UnicodeSet('a', 'z').freeze();
static final UnicodeSet ASCII_HYPHEN_DIGITS =
new UnicodeSet('-', '-', '0', '9', 'a', 'z').freeze();
Expand Down
Loading