Skip to content

Commit

Permalink
Add new inline mapping strategy to IdnaMappingTable (#7882)
Browse files Browse the repository at this point in the history
* Add new inline mapping strategy to IdnaMappingTable

---------

Co-authored-by: Adam Erb <[email protected]>
  • Loading branch information
aerb and Adam Erb authored Jun 9, 2023
1 parent 0c61b0b commit 8c8c3db
Show file tree
Hide file tree
Showing 6 changed files with 241 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package okhttp3.internal.idn

import kotlin.math.abs
import okio.ByteString

internal sealed interface MappedRange {
Expand Down Expand Up @@ -72,6 +73,31 @@ internal sealed interface MappedRange {
get() = mappedTo[1] and 0x7f
}

data class InlineDelta(
override val rangeStart: Int,
val codepointDelta: Int
) : MappedRange {

private val absoluteDelta = abs(codepointDelta)

val b1: Int
get() = when {
codepointDelta < 0 -> 0x40 or (absoluteDelta shr 14)
codepointDelta > 0 -> 0x50 or (absoluteDelta shr 14)
else -> error("Unexpected codepointDelta of 0")
}

val b2: Int
get() = absoluteDelta shr 7 and 0x7f

val b3: Int
get() = absoluteDelta and 0x7f

companion object {
const val MAX_VALUE = 0x3FFFF
}
}

data class External(
override val rangeStart: Int,
val mappedTo: ByteString
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
*/
package okhttp3.internal.idn

import kotlin.math.abs
import kotlin.streams.toList
import okio.Buffer

/** Index [table] for compactness as specified by `IdnaMappingTable`. */
Expand Down Expand Up @@ -64,6 +66,11 @@ fun buildIdnaMappingTableData(table: SimpleIdnaMappingTable): IdnaMappingTableDa
rangesBuffer.writeByte(range.b2)
rangesBuffer.writeByte(range.b3)
}
is MappedRange.InlineDelta -> {
rangesBuffer.writeByte(range.b1)
rangesBuffer.writeByte(range.b2)
rangesBuffer.writeByte(range.b3)
}
is MappedRange.External -> {
// Write the mapping.
val mappingOffset: Int
Expand Down Expand Up @@ -95,6 +102,25 @@ fun buildIdnaMappingTableData(table: SimpleIdnaMappingTable): IdnaMappingTableDa
)
}

/**
* If [mapping] qualifies to be encoded as [MappedRange.InlineDelta] return new instance, otherwise null.
* An [MappedRange.InlineDelta] must be a mapping from a single code-point to a single code-point with a difference
* that can be represented in 2^18-1.
*/
internal fun inlineDeltaOrNull(mapping: Mapping): MappedRange.InlineDelta? {
if (mapping.hasSingleSourceCodePoint) {
val sourceCodePoint = mapping.sourceCodePoint0
val mappedCodePoints = mapping.mappedTo.utf8().codePoints().toList()
if (mappedCodePoints.size == 1) {
val codePointDelta = mappedCodePoints.single() - sourceCodePoint
if (MappedRange.InlineDelta.MAX_VALUE >= abs(codePointDelta)) {
return MappedRange.InlineDelta(mapping.rangeStart, codePointDelta)
}
}
}
return null
}

/**
* Inputs must have applied [withoutSectionSpans].
*/
Expand All @@ -110,7 +136,12 @@ internal fun sections(mappings: List<Mapping>): Map<Int, List<MappedRange>> {
val sectionList = result.getOrPut(section) { mutableListOf() }

sectionList += when (mapping.type) {
TYPE_MAPPED -> {
TYPE_MAPPED -> run {
val deltaMapping = inlineDeltaOrNull(mapping)
if (deltaMapping != null) {
return@run deltaMapping
}

when (mapping.mappedTo.size) {
1 -> MappedRange.Inline1(rangeStart, mapping.mappedTo)
2 -> MappedRange.Inline2(rangeStart, mapping.mappedTo)
Expand All @@ -126,9 +157,38 @@ internal fun sections(mappings: List<Mapping>): Map<Int, List<MappedRange>> {
}
}

for (sectionList in result.values) {
mergeAdjacentDeltaMappedRanges(sectionList)
}

return result.toMap()
}

/**
* Modifies [ranges] to combine any adjacent [MappedRange.InlineDelta] of same size to single entry.
* @returns same instance of [ranges] for convenience
*/
internal fun mergeAdjacentDeltaMappedRanges(ranges: MutableList<MappedRange>): MutableList<MappedRange> {
var i = 0
while (i < ranges.size) {
val curr = ranges[i]
if (curr is MappedRange.InlineDelta) {
val j = i + 1
mergeAdjacent@ while (j < ranges.size) {
val next = ranges[j]
if (next is MappedRange.InlineDelta &&
curr.codepointDelta == next.codepointDelta
) {
ranges.removeAt(j)
} else {
break@mergeAdjacent
}
}
}
i++
}
return ranges
}

/**
* Returns a copy of [mappings], splitting to ensure that each mapping is entirely contained within
Expand Down Expand Up @@ -219,3 +279,4 @@ internal infix fun Byte.and(mask: Int): Int = toInt() and mask
internal infix fun Short.and(mask: Int): Int = toInt() and mask

internal infix fun Int.and(mask: Long): Long = toLong() and mask

Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,9 @@ internal data class Mapping(
val rangeStart: Int
get() = sourceCodePoint0 and 0x7f

val hasSingleSourceCodePoint: Boolean
get() = sourceCodePoint0 == sourceCodePoint1

val spansSections: Boolean
get() = (sourceCodePoint0 and 0x1fff80) != (sourceCodePoint1 and 0x1fff80)
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ package okhttp3.internal.idn

import assertk.assertThat
import assertk.assertions.containsExactly
import assertk.assertions.isEqualTo
import okhttp3.internal.idn.MappedRange.InlineDelta
import okio.Buffer
import okio.ByteString
import okio.ByteString.Companion.encodeUtf8
import org.junit.jupiter.api.Test
Expand Down Expand Up @@ -107,4 +110,123 @@ class MappingTablesTest {
Mapping(0x40080, 0x400ff, TYPE_DISALLOWED, ByteString.EMPTY),
)
}

@Test fun mergeAdjacentDeltaMappedRangesWithMultipleDeltas() {
assertThat(
mergeAdjacentDeltaMappedRanges(
mutableListOf(
InlineDelta(1, 5),
InlineDelta(2, 5),
InlineDelta(3, 5),
MappedRange.External(4, "a".encodeUtf8()),
)
)
).containsExactly(
InlineDelta(1, 5),
MappedRange.External(4, "a".encodeUtf8()),
)
}

@Test fun mergeAdjacentDeltaMappedRangesWithDifferentSizedDeltas() {
assertThat(
mergeAdjacentDeltaMappedRanges(
mutableListOf(
InlineDelta(1, 5),
InlineDelta(2, 5),
InlineDelta(3, 1),
)
)
).containsExactly(
InlineDelta(1, 5),
InlineDelta(3, 1),
)
}

@Test fun inlineDeltaOrNullValid() {
assertThat(
inlineDeltaOrNull(
mappingOf(
sourceCodePoint0 = 1,
sourceCodePoint1 = 1,
mappedToCodePoints = listOf(2)
)
)
).isEqualTo(InlineDelta(1, 1))

assertThat(
inlineDeltaOrNull(
mappingOf(
sourceCodePoint0 = 2,
sourceCodePoint1 = 2,
mappedToCodePoints = listOf(1)
)
)
).isEqualTo(InlineDelta(2, -1))
}

@Test fun inlineDeltaOrNullMultipleSourceCodePoints() {
assertThat(
inlineDeltaOrNull(
mappingOf(
sourceCodePoint0 = 2,
sourceCodePoint1 = 3,
mappedToCodePoints = listOf(2)
)
)
).isEqualTo(null)
}

@Test fun inlineDeltaOrNullMultipleMappedToCodePoints() {
assertThat(
inlineDeltaOrNull(
mappingOf(
sourceCodePoint0 = 1,
sourceCodePoint1 = 1,
mappedToCodePoints = listOf(2, 3)
)
)
).isEqualTo(null)
}

@Test fun inlineDeltaOrNullMaxCodepointDelta() {
assertThat(
inlineDeltaOrNull(
mappingOf(
sourceCodePoint0 = 0,
sourceCodePoint1 = 0,
mappedToCodePoints = listOf((1 shl 18) - 1)
)
)
).isEqualTo(
InlineDelta(
rangeStart = 0,
codepointDelta = InlineDelta.MAX_VALUE
)
)

assertThat(
inlineDeltaOrNull(
mappingOf(
sourceCodePoint0 = 0,
sourceCodePoint1 = 0,
mappedToCodePoints = listOf(1 shl 18)
)
)
).isEqualTo(null)
}

private fun mappingOf(
sourceCodePoint0: Int,
sourceCodePoint1: Int,
mappedToCodePoints: List<Int>
): Mapping = Mapping(
sourceCodePoint0 = sourceCodePoint0,
sourceCodePoint1 = sourceCodePoint1,
type = TYPE_MAPPED,
mappedTo = Buffer().also {
for (cp in mappedToCodePoints) {
it.writeUtf8CodePoint(cp)
}
}.readByteString()
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,18 @@ import okio.BufferedSink
* This is either a mapping decision or the length of the mapped output, according to this table:
*
* ```
* 0..63 : Length of the UTF-16 sequence that this range maps to. The offset is b2b3.
* 119 : Ignored.
* 120 : Valid.
* 121 : Disallowed
* 122 : Mapped inline to the sequence: [b2].
* 123 : Mapped inline to the sequence: [b2a].
* 124 : Mapped inline to the sequence: [b2, b3].
* 125 : Mapped inline to the sequence: [b2a, b3].
* 126 : Mapped inline to the sequence: [b2, b3a].
* 127 : Mapped inline to the sequence: [b2a, b3a].
* 0..63 : Length of the UTF-16 sequence that this range maps to. The offset is b2b3.
* 64..79 : Offset by a fixed negative offset. The bottom 4 bits of the offset are the top 4 bits of the offset.
* 80..95 : Offset by a fixed positive offset. The bottom 4 bits of the offset are the top 4 bits of the offset.
* 119 : Ignored.
* 120 : Valid.
* 121 : Disallowed
* 122 : Mapped inline to the sequence: [b2].
* 123 : Mapped inline to the sequence: [b2a].
* 124 : Mapped inline to the sequence: [b2, b3].
* 125 : Mapped inline to the sequence: [b2a, b3].
* 126 : Mapped inline to the sequence: [b2, b3a].
* 127 : Mapped inline to the sequence: [b2a, b3a].
*
* The range goes until the beginning of the next range.
*
Expand Down Expand Up @@ -129,6 +131,22 @@ internal class IdnaMappingTable internal constructor(
val beginIndex = ranges.read14BitInt(rangesIndex + 2)
sink.writeUtf8(mappings, beginIndex, beginIndex + b1)
}
in 64..79 -> {
// Mapped inline as codePoint delta to subtract
val b2 = ranges[rangesIndex + 2].code
val b3 = ranges[rangesIndex + 3].code

val codepointDelta = (b1 and 0xF shl 14) or (b2 shl 7) or b3
sink.writeUtf8CodePoint(codePoint - codepointDelta)
}
in 80..95 -> {
// Mapped inline as codePoint delta to add
val b2 = ranges[rangesIndex + 2].code
val b3 = ranges[rangesIndex + 3].code

val codepointDelta = (b1 and 0xF shl 14) or (b2 shl 7) or b3
sink.writeUtf8CodePoint(codePoint + codepointDelta)
}
119 -> {
// Ignored.
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import assertk.assertions.isGreaterThan
import assertk.assertions.isLessThan
import kotlin.test.assertEquals
import kotlin.test.assertFailsWith
import okhttp3.internal.toHexString
import okio.Buffer
import okio.FileSystem
import okio.Path.Companion.toPath
Expand Down

0 comments on commit 8c8c3db

Please sign in to comment.