JuliaLang · vtjnash · Feb 1, 2024 · Oct 21, 2023 · Dec 14, 2023 · Dec 28, 2023
diff --git a/NEWS.md b/NEWS.md
@@ -11,11 +11,20 @@ New language features
 * The new macro `Base.Cartesian.@ncallkw` is analogous to `Base.Cartesian.@ncall`,
   but allows to add keyword arguments to the function call ([#51501]).
 * Support for Unicode 15.1 ([#51799]).
-* A new `AbstractString` type, `AnnotatedString`, is introduced that allows for
-  regional annotations to be attached to an underlying string. This type is
-  particularly useful for holding styling information, and is used extensively
-  in the new `StyledStrings` standard library. There is also a new `AnnotatedChar`
-  type, that is the equivalent new `AbstractChar` type.
+* Three new types around the idea of text with "annotations" (`Pair{Symbol, Any}`
+  entries, e.g. `:lang => "en"` or `:face => :magenta`). These annotations
+  are preserved across operations (e.g. string concatenation with `*`) when
+  possible.
+  * `AnnotatedString` is a new `AbstractString` type. It wraps an underlying
+    string and allows for annotations to be attached to regions of the string.
+    This type is used extensively in the new `StyledStrings` standard library to
+    hold styling information.
+  * `AnnotatedChar` is a new `AbstractChar` type. It wraps another char and
+    holds a list of annotations that apply to it.
+  * `AnnotatedIOBuffer` is a new `IO` type that mimics an `IOBuffer`, but has
+    specialised `read`/`write` methods for annotated content. This can be
+    thought of both as a "string builder" of sorts and also as glue between
+    annotated and unannotated content.
 * `Manifest.toml` files can now be renamed in the format `Manifest-v{major}.{minor}.toml`
   to be preferentially picked up by the given julia version. i.e. in the same folder,
   a `Manifest-v1.11.toml` would be used by v1.11 and `Manifest.toml` by every other julia

diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl
@@ -323,14 +323,15 @@ To remove existing `label` annotations, use a value of `nothing`.
 """
 function annotate!(s::AnnotatedString, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any}))
     label, val = labelval
-    indices = searchsorted(s.annotations, (range,), by=first)
     if val === nothing
+        indices = searchsorted(s.annotations, (range,), by=first)
         labelindex = filter(i -> first(s.annotations[i][2]) === label, indices)
         for index in Iterators.reverse(labelindex)
             deleteat!(s.annotations, index)
         end
     else
-        splice!(s.annotations, indices, [(range, Pair{Symbol, Any}(label, val))])
+        sortedindex = searchsortedlast(s.annotations, (range,), by=first) + 1
+        insert!(s.annotations, sortedindex, (range, Pair{Symbol, Any}(label, val)))
     end
     s
 end
@@ -386,3 +387,126 @@ annotations(s::SubString{<:AnnotatedString}, pos::UnitRange{<:Integer}) =
 Get all annotations of `chr`.
 """
 annotations(c::AnnotatedChar) = c.annotations
+
+## AnnotatedIOBuffer
+
+struct AnnotatedIOBuffer <: AbstractPipe
+    io::IOBuffer
+    annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}
+end
+
+AnnotatedIOBuffer(io::IOBuffer) = AnnotatedIOBuffer(io, Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}())
+AnnotatedIOBuffer() = AnnotatedIOBuffer(IOBuffer())
+
+function show(io::IO, aio::AnnotatedIOBuffer)
+    show(io, AnnotatedIOBuffer)
+    print(io, '(', aio.io.size, " byte", ifelse(aio.io.size == 1, "", "s"), ", ",
+          length(aio.annotations), " annotation", ifelse(length(aio.annotations) == 1, "", "s"), ")")
+end
+
+pipe_reader(io::AnnotatedIOBuffer) = io.io
+pipe_writer(io::AnnotatedIOBuffer) = io.io
+
+# Useful `IOBuffer` methods that we don't get from `AbstractPipe`
+position(io::AnnotatedIOBuffer) = position(io.io)
+seek(io::AnnotatedIOBuffer, n::Integer) = (seek(io.io, n); io)
+seekend(io::AnnotatedIOBuffer) = seekend(io.io)
+skip(io::AnnotatedIOBuffer, n::Integer) = (skip(io.io, n); io)
+copy(io::AnnotatedIOBuffer) = AnnotatedIOBuffer(copy(io.io), copy(io.annotations))
+
+annotations(io::AnnotatedIOBuffer) = io.annotations
+
+function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}})
+    astr = AnnotatedString(astr)
+    offset = position(io.io)
+    eof(io) || _clear_annotations_in_region!(io.annotations, offset+1:offset+ncodeunits(astr))
+    _insert_annotations!(io, astr.annotations)
+    write(io.io, String(astr))
+end
+
+write(io::AnnotatedIOBuffer, c::AnnotatedChar) = write(io, AnnotatedString(c))
+write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x)
+write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s)
+write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b)
+
+function write(dest::AnnotatedIOBuffer, src::AnnotatedIOBuffer)
+    destpos = position(dest)
+    isappending = eof(dest)
+    srcpos = position(src)
+    nb = write(dest.io, src.io)
+    isappending || _clear_annotations_in_region!(dest.annotations, destpos:destpos+nb)
+    srcannots = [(max(1 + srcpos, first(region)):last(region), annot)
+                 for (region, annot) in src.annotations if first(region) >= srcpos]
+    _insert_annotations!(dest, srcannots, destpos - srcpos)
+    nb
+end
+
+function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, span::UnitRange{Int})
+    # Clear out any overlapping pre-existing annotations.
+    filter!(((region, _),) -> first(region) < first(span) || last(region) > last(span), annotations)
+    extras = Tuple{UnitRange{Int}, Pair{Symbol, Any}}[]
+    for i in eachindex(annotations)
+        region, annot = annotations[i]
+        # Test for partial overlap
+        if first(region) <= first(span) <= last(region) || first(region) <= last(span) <= last(region)
+            annotations[i] = (if first(region) < first(span)
+                                        first(region):first(span)-1
+                                    else last(span)+1:last(region) end, annot)
+            # If `span` fits exactly within `region`, then we've only copied over
+            # the beginning overhang, but also need to conserve the end overhang.
+            if first(region) < first(span) && last(span) < last(region)
+                push!(extras, (last(span)+1:last(region), annot))
+            end
+        end
+        # Insert any extra entries in the appropriate position
+        for entry in extras
+            sortedindex = searchsortedlast(annotations, (first(entry),), by=first) + 1
+            insert!(annotations, sortedindex, entry)
+        end
+    end
+    annotations
+end
+
+function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, offset::Int = position(io))
+    if !eof(io)
+        for (region, annot) in annotations
+            region = first(region)+offset:last(region)+offset
+            sortedindex = searchsortedlast(io.annotations, (region,), by=first) + 1
+            insert!(io.annotations, sortedindex, (region, annot))
+        end
+    else
+        for (region, annot) in annotations
+            region = first(region)+offset:last(region)+offset
+            push!(io.annotations, (region, annot))
+        end
+    end
+end
+
+function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString}
+    if (start = position(io)) == 0
+        AnnotatedString(read(io.io, T), copy(io.annotations))
+    else
+        annots = [(max(1, first(region) - start):last(region)-start, val)
+                  for (region, val) in io.annotations if last(region) > start]
+        AnnotatedString(read(io.io, T), annots)
+    end
+end
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String})
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String})
+
+function read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{T}}) where {T <: AbstractChar}
+    pos = position(io)
+    char = read(io.io, T)
+    annots = [annot for (range, annot) in io.annotations if pos+1 in range]
+    AnnotatedChar(char, annots)
+end
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{AbstractChar}}) = read(io, AnnotatedChar{Char})
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar}) = read(io, AnnotatedChar{Char})
+
+function truncate(io::AnnotatedIOBuffer, size::Integer)
+    truncate(io.io, size)
+    filter!(((range, _),) -> first(range) <= size, io.annotations)
+    map!(((range, val),) -> (first(range):min(size, last(range)), val),
+         io.annotations, io.annotations)
+    io
+end
diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl
@@ -107,3 +107,57 @@ end
     @test reverse(str1) == Base.AnnotatedString("tset", [(1:4, :label => 5)])
     @test reverse(str2) == Base.AnnotatedString("esac", [(2:3, :label => "oomph")])
 end
+
+@testset "AnnotatedIOBuffer" begin
+    aio = Base.AnnotatedIOBuffer()
+    # Append-only writing
+    @test write(aio, Base.AnnotatedString("hello", [(1:5, :tag => 1)])) == 5
+    @test write(aio, ' ') == 1
+    @test write(aio, Base.AnnotatedString("world", [(1:5, :tag => 2)])) == 5
+    @test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)]
+    # Reading
+    @test read(seekstart(deepcopy(aio.io)), String) == "hello world"
+    @test read(seekstart(deepcopy(aio)), String) == "hello world"
+    @test read(seek(aio, 0), Base.AnnotatedString) == Base.AnnotatedString("hello world", [(1:5, :tag => 1), (7:11, :tag => 2)])
+    @test read(seek(aio, 1), Base.AnnotatedString) == Base.AnnotatedString("ello world", [(1:4, :tag => 1), (6:10, :tag => 2)])
+    @test read(seek(aio, 4), Base.AnnotatedString) == Base.AnnotatedString("o world", [(1:1, :tag => 1), (3:7, :tag => 2)])
+    @test read(seek(aio, 5), Base.AnnotatedString) == Base.AnnotatedString(" world", [(2:6, :tag => 2)])
+    @test read(seekstart(truncate(deepcopy(aio), 5)), Base.AnnotatedString) == Base.AnnotatedString("hello", [(1:5, :tag => 1)])
+    @test read(seekstart(truncate(deepcopy(aio), 6)), Base.AnnotatedString) == Base.AnnotatedString("hello ", [(1:5, :tag => 1)])
+    @test read(seekstart(truncate(deepcopy(aio), 7)), Base.AnnotatedString) == Base.AnnotatedString("hello w", [(1:5, :tag => 1), (7:7, :tag => 2)])
+    @test read(seek(aio, 0), Base.AnnotatedChar) == Base.AnnotatedChar('h', [:tag => 1])
+    @test read(seek(aio, 5), Base.AnnotatedChar) == Base.AnnotatedChar(' ', Pair{Symbol, Any}[])
+    @test read(seek(aio, 6), Base.AnnotatedChar) == Base.AnnotatedChar('w', [:tag => 2])
+    # Check method compatibility with IOBuffer
+    @test position(aio) == 7
+    @test seek(aio, 4) === aio
+    @test skip(aio, 2) === aio
+    @test Base.annotations(copy(aio)) == Base.annotations(aio)
+    @test take!(copy(aio).io) == take!(copy(aio.io))
+    # Writing into the middle of the buffer
+    @test write(seek(aio, 6), "alice") == 5 # Replace 'world' with 'alice'
+    @test read(seekstart(aio), String) == "hello alice"
+    @test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)] # Should be unchanged
+    @test write(seek(aio, 0), Base.AnnotatedString("hey-o", [(1:5, :hey => 'o')])) == 5
+    @test read(seekstart(aio), String) == "hey-o alice"
+    @test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:11, :tag => 2)] # First annotation should have been entirely replaced
+    @test write(seek(aio, 7), Base.AnnotatedString("bbi", [(1:3, :hey => 'a')])) == 3 # a[lic => bbi]e ('alice' => 'abbie')
+    @test read(seekstart(aio), String) == "hey-o abbie"
+    @test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
+    @test write(seek(aio, 0), Base.AnnotatedString("ab")) == 2 # Check first annotation's region is adjusted correctly
+    @test read(seekstart(aio), String) == "aby-o abbie"
+    @test Base.annotations(aio) == [(3:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
+    @test write(seek(aio, 3), Base.AnnotatedString("ss")) == 2
+    @test read(seekstart(aio), String) == "abyss abbie"
+    @test Base.annotations(aio) == [(3:3, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
+    # Writing one buffer to another
+    newaio = Base.AnnotatedIOBuffer()
+    @test write(newaio, seekstart(aio)) == 11
+    @test read(seekstart(newaio), String) == "abyss abbie"
+    @test Base.annotations(newaio) == Base.annotations(aio)
+    @test write(seek(newaio, 5), seek(aio, 5)) == 6
+    @test Base.annotations(newaio) == Base.annotations(aio)
+    @test write(newaio, seek(aio, 5)) == 6
+    @test read(seekstart(newaio), String) == "abyss abbie abbie"
+    @test Base.annotations(newaio) == vcat(Base.annotations(aio), [(13:13, :tag => 2), (14:16, :hey => 'a'), (17:17, :tag => 2)])
+end