From f410925210f058506b4f2e777d1d01552fc9740f Mon Sep 17 00:00:00 2001 From: john xu Date: Sun, 28 Dec 2025 14:08:03 +0000 Subject: [PATCH 1/2] perf: cache grapheme widths in word wrap --- bench/bench.zig | 167 ++++++++++++++++++++++++++++++++++++++++++++++++ src/Window.zig | 98 +++++++++++++++++++++------- 2 files changed, 243 insertions(+), 22 deletions(-) diff --git a/bench/bench.zig b/bench/bench.zig index 83afc8b4..487ea536 100644 --- a/bench/bench.zig +++ b/bench/bench.zig @@ -20,6 +20,71 @@ fn printResults(writer: anytype, label: []const u8, iterations: usize, elapsed_n ); } +fn buildRepeated(allocator: std.mem.Allocator, pattern: []const u8, repeat: usize) ![]u8 { + const total_len = pattern.len * repeat; + var buf = try allocator.alloc(u8, total_len); + for (0..repeat) |i| { + const start = i * pattern.len; + const end = start + pattern.len; + @memcpy(buf[start..end], pattern); + } + return buf; +} + +fn totalBytes(segments: []const vaxis.Segment) usize { + var total: usize = 0; + for (segments) |segment| { + total += segment.text.len; + } + return total; +} + +fn benchPrintWord(writer: anytype, label: []const u8, win: vaxis.Window, segments: []const vaxis.Segment, opts: vaxis.PrintOptions, iterations: usize) !void { + const bytes_per_iter = totalBytes(segments); + var timer = try std.time.Timer.start(); + var i: usize = 0; + while (i < iterations) : (i += 1) { + const result = win.print(segments, opts); + std.mem.doNotOptimizeAway(result); + } + const elapsed_ns = timer.read(); + try printResults(writer, label, iterations, elapsed_ns, bytes_per_iter * iterations); +} + +/// Iterate word tokens and compute gwidth(word) for each. This simulates the +/// extra pass that existed before caching grapheme widths in Window.print. +fn benchWordWidthPass(win: vaxis.Window, segments: []const vaxis.Segment) void { + var total: u32 = 0; + for (segments) |segment| { + var line_iter: BenchLineIterator = .{ .buf = segment.text }; + while (line_iter.next()) |line| { + var iter: BenchWhitespaceTokenizer = .{ .buf = line }; + while (iter.next()) |token| { + switch (token) { + .whitespace => {}, + .word => |word| { + total +|= win.gwidth(word); + }, + } + } + } + } + std.mem.doNotOptimizeAway(total); +} + +fn benchPrintWordBaseline(writer: anytype, label: []const u8, win: vaxis.Window, segments: []const vaxis.Segment, opts: vaxis.PrintOptions, iterations: usize) !void { + const bytes_per_iter = totalBytes(segments); + var timer = try std.time.Timer.start(); + var i: usize = 0; + while (i < iterations) : (i += 1) { + const result = win.print(segments, opts); + std.mem.doNotOptimizeAway(result); + benchWordWidthPass(win, segments); + } + const elapsed_ns = timer.read(); + try printResults(writer, label, iterations, elapsed_ns, bytes_per_iter * iterations); +} + pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); @@ -59,4 +124,106 @@ pub fn main() !void { const dirty_ns = timer.read(); const dirty_bytes: usize = dirty_writer.writer.end; try printResults(stdout, "dirty", iterations, dirty_ns, dirty_bytes); + + const pattern = "hello δΈ–η•Œ πŸ‘©β€πŸš€ foo bar "; + const small_text = try buildRepeated(allocator, pattern, 8); + defer allocator.free(small_text); + const medium_text = try buildRepeated(allocator, pattern, 32); + defer allocator.free(medium_text); + const large_text = try buildRepeated(allocator, pattern, 64); + defer allocator.free(large_text); + + const small_segments = [_]vaxis.Segment{.{ .text = small_text }}; + const medium_segments = [_]vaxis.Segment{.{ .text = medium_text }}; + const large_segments = [_]vaxis.Segment{.{ .text = large_text }}; + + const print_opts: vaxis.PrintOptions = .{ .wrap = .word, .commit = false }; + const win = vx.window(); + + try benchPrintWordBaseline(stdout, "print_word_small_baseline", win, small_segments[0..], print_opts, iterations); + try benchPrintWord(stdout, "print_word_small_cached", win, small_segments[0..], print_opts, iterations); + try benchPrintWordBaseline(stdout, "print_word_medium_baseline", win, medium_segments[0..], print_opts, iterations); + try benchPrintWord(stdout, "print_word_medium_cached", win, medium_segments[0..], print_opts, iterations); + try benchPrintWordBaseline(stdout, "print_word_large_baseline", win, large_segments[0..], print_opts, iterations); + try benchPrintWord(stdout, "print_word_large_cached", win, large_segments[0..], print_opts, iterations); } + +/// Iterates a slice of bytes by linebreaks. Lines are split by '\r', '\n', or '\r\n' +const BenchLineIterator = struct { + buf: []const u8, + index: usize = 0, + has_break: bool = true, + + fn next(self: *BenchLineIterator) ?[]const u8 { + if (self.index >= self.buf.len) return null; + + const start = self.index; + const end = std.mem.indexOfAnyPos(u8, self.buf, self.index, "\r\n") orelse { + if (start == 0) self.has_break = false; + self.index = self.buf.len; + return self.buf[start..]; + }; + + self.index = end; + self.consumeCR(); + self.consumeLF(); + return self.buf[start..end]; + } + + // consumes a \n byte + fn consumeLF(self: *BenchLineIterator) void { + if (self.index >= self.buf.len) return; + if (self.buf[self.index] == '\n') self.index += 1; + } + + // consumes a \r byte + fn consumeCR(self: *BenchLineIterator) void { + if (self.index >= self.buf.len) return; + if (self.buf[self.index] == '\r') self.index += 1; + } +}; + +/// Returns tokens of text and whitespace +const BenchWhitespaceTokenizer = struct { + buf: []const u8, + index: usize = 0, + + const Token = union(enum) { + // the length of whitespace. Tab = 8 + whitespace: usize, + word: []const u8, + }; + + fn next(self: *BenchWhitespaceTokenizer) ?Token { + if (self.index >= self.buf.len) return null; + const Mode = enum { + whitespace, + word, + }; + const first = self.buf[self.index]; + const mode: Mode = if (first == ' ' or first == '\t') .whitespace else .word; + switch (mode) { + .whitespace => { + var len: usize = 0; + while (self.index < self.buf.len) : (self.index += 1) { + switch (self.buf[self.index]) { + ' ' => len += 1, + '\t' => len += 8, + else => break, + } + } + return .{ .whitespace = len }; + }, + .word => { + const start = self.index; + while (self.index < self.buf.len) : (self.index += 1) { + switch (self.buf[self.index]) { + ' ', '\t' => break, + else => {}, + } + } + return .{ .word = self.buf[start..self.index] }; + }, + } + } +}; diff --git a/src/Window.zig b/src/Window.zig index 0e8d49fc..6f46816d 100644 --- a/src/Window.zig +++ b/src/Window.zig @@ -285,6 +285,12 @@ pub const PrintResult = struct { overflow: bool, }; +const WordPiece = struct { + start: usize, + len: usize, + width: u16, +}; + /// prints segments to the window. returns true if the text overflowed with the /// given wrap strategy and size. pub fn print(self: Window, segments: []const Segment, opts: PrintOptions) PrintResult { @@ -370,34 +376,82 @@ pub fn print(self: Window, segments: []const Segment, opts: PrintOptions) PrintR } }, .word => |word| { - const width = self.gwidth(word); + // Fixed buffer avoids heap allocation for the per-word cache + // (ArrayListUnmanaged append); overflow falls back to the + // original per-grapheme path (4KB ~ 170 graphemes on 64-bit). + var cache_buf: [4096]u8 = undefined; + var fba = std.heap.FixedBufferAllocator.init(&cache_buf); + var pieces = std.ArrayListUnmanaged(WordPiece){}; + var cached_all = true; + var width: u16 = 0; + var width_iter = unicode.graphemeIterator(word); + while (width_iter.next()) |grapheme| { + const s = grapheme.bytes(word); + const w = self.gwidth(s); + width +|= w; + if (cached_all) { + pieces.append(fba.allocator(), .{ + .start = grapheme.start, + .len = grapheme.len, + .width = @intCast(w), + }) catch { + cached_all = false; + }; + } + } if (width + col > self.width and width < self.width) { row += 1; col = 0; } - var grapheme_iterator = unicode.graphemeIterator(word); - while (grapheme_iterator.next()) |grapheme| { - soft_wrapped = false; - if (row >= self.height) { - overflow = true; - break :outer; + if (cached_all) { + for (pieces.items) |piece| { + soft_wrapped = false; + if (row >= self.height) { + overflow = true; + break :outer; + } + const s = word[piece.start .. piece.start + piece.len]; + const w = piece.width; + if (opts.commit) self.writeCell(col, row, .{ + .char = .{ + .grapheme = s, + .width = @intCast(w), + }, + .style = segment.style, + .link = segment.link, + }); + col += w; + if (col >= self.width) { + row += 1; + col = 0; + soft_wrapped = true; + } } - const s = grapheme.bytes(word); - const w = self.gwidth(s); - if (opts.commit) self.writeCell(col, row, .{ - .char = .{ - .grapheme = s, - .width = @intCast(w), - }, - .style = segment.style, - .link = segment.link, - }); - col += w; - if (col >= self.width) { - row += 1; - col = 0; - soft_wrapped = true; + } else { + var grapheme_iterator = unicode.graphemeIterator(word); + while (grapheme_iterator.next()) |grapheme| { + soft_wrapped = false; + if (row >= self.height) { + overflow = true; + break :outer; + } + const s = grapheme.bytes(word); + const w = self.gwidth(s); + if (opts.commit) self.writeCell(col, row, .{ + .char = .{ + .grapheme = s, + .width = @intCast(w), + }, + .style = segment.style, + .link = segment.link, + }); + col += w; + if (col >= self.width) { + row += 1; + col = 0; + soft_wrapped = true; + } } } }, From 45c8f5de3f77924fa270a8f9ff22d472513cc981 Mon Sep 17 00:00:00 2001 From: john xu Date: Sun, 28 Dec 2025 14:16:04 +0000 Subject: [PATCH 2/2] perf: reuse cached prefix when word cache overflows --- bench/bench.zig | 6 ++++++ src/Window.zig | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/bench/bench.zig b/bench/bench.zig index 487ea536..6d56e77c 100644 --- a/bench/bench.zig +++ b/bench/bench.zig @@ -126,16 +126,20 @@ pub fn main() !void { try printResults(stdout, "dirty", iterations, dirty_ns, dirty_bytes); const pattern = "hello δΈ–η•Œ πŸ‘©β€πŸš€ foo bar "; + const long_token = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; const small_text = try buildRepeated(allocator, pattern, 8); defer allocator.free(small_text); const medium_text = try buildRepeated(allocator, pattern, 32); defer allocator.free(medium_text); const large_text = try buildRepeated(allocator, pattern, 64); defer allocator.free(large_text); + const overflow_text = try buildRepeated(allocator, long_token, 200); + defer allocator.free(overflow_text); const small_segments = [_]vaxis.Segment{.{ .text = small_text }}; const medium_segments = [_]vaxis.Segment{.{ .text = medium_text }}; const large_segments = [_]vaxis.Segment{.{ .text = large_text }}; + const overflow_segments = [_]vaxis.Segment{.{ .text = overflow_text }}; const print_opts: vaxis.PrintOptions = .{ .wrap = .word, .commit = false }; const win = vx.window(); @@ -146,6 +150,8 @@ pub fn main() !void { try benchPrintWord(stdout, "print_word_medium_cached", win, medium_segments[0..], print_opts, iterations); try benchPrintWordBaseline(stdout, "print_word_large_baseline", win, large_segments[0..], print_opts, iterations); try benchPrintWord(stdout, "print_word_large_cached", win, large_segments[0..], print_opts, iterations); + try benchPrintWordBaseline(stdout, "print_word_overflow_baseline", win, overflow_segments[0..], print_opts, iterations); + try benchPrintWord(stdout, "print_word_overflow_cached", win, overflow_segments[0..], print_opts, iterations); } /// Iterates a slice of bytes by linebreaks. Lines are split by '\r', '\n', or '\r\n' diff --git a/src/Window.zig b/src/Window.zig index 6f46816d..7fd646f4 100644 --- a/src/Window.zig +++ b/src/Window.zig @@ -377,12 +377,14 @@ pub fn print(self: Window, segments: []const Segment, opts: PrintOptions) PrintR }, .word => |word| { // Fixed buffer avoids heap allocation for the per-word cache - // (ArrayListUnmanaged append); overflow falls back to the - // original per-grapheme path (4KB ~ 170 graphemes on 64-bit). + // (ArrayListUnmanaged append); if it fills, reuse the cached + // prefix and fall back to the original per-grapheme path for + // the remainder (4KB ~ 170 graphemes on 64-bit). var cache_buf: [4096]u8 = undefined; var fba = std.heap.FixedBufferAllocator.init(&cache_buf); var pieces = std.ArrayListUnmanaged(WordPiece){}; var cached_all = true; + var fallback_start: usize = word.len; var width: u16 = 0; var width_iter = unicode.graphemeIterator(word); while (width_iter.next()) |grapheme| { @@ -396,6 +398,7 @@ pub fn print(self: Window, segments: []const Segment, opts: PrintOptions) PrintR .width = @intCast(w), }) catch { cached_all = false; + fallback_start = grapheme.start; }; } } @@ -429,14 +432,39 @@ pub fn print(self: Window, segments: []const Segment, opts: PrintOptions) PrintR } } } else { - var grapheme_iterator = unicode.graphemeIterator(word); + for (pieces.items) |piece| { + soft_wrapped = false; + if (row >= self.height) { + overflow = true; + break :outer; + } + const s = word[piece.start .. piece.start + piece.len]; + const w = piece.width; + if (opts.commit) self.writeCell(col, row, .{ + .char = .{ + .grapheme = s, + .width = @intCast(w), + }, + .style = segment.style, + .link = segment.link, + }); + col += w; + if (col >= self.width) { + row += 1; + col = 0; + soft_wrapped = true; + } + } + + const tail_start = fallback_start; + var grapheme_iterator = unicode.graphemeIterator(word[tail_start..]); while (grapheme_iterator.next()) |grapheme| { soft_wrapped = false; if (row >= self.height) { overflow = true; break :outer; } - const s = grapheme.bytes(word); + const s = grapheme.bytes(word[tail_start..]); const w = self.gwidth(s); if (opts.commit) self.writeCell(col, row, .{ .char = .{