diff --git a/bench/bench.zig b/bench/bench.zig index 83afc8b4..7433586f 100644 --- a/bench/bench.zig +++ b/bench/bench.zig @@ -1,5 +1,6 @@ const std = @import("std"); const vaxis = @import("vaxis"); +const ascii = vaxis.ascii; fn parseIterations(allocator: std.mem.Allocator) !usize { var args = try std.process.argsWithAllocator(allocator); @@ -20,6 +21,56 @@ fn printResults(writer: anytype, label: []const u8, iterations: usize, elapsed_n ); } +fn benchParseStreamBaseline(writer: anytype, label: []const u8, parser: *vaxis.Parser, input: []const u8, iterations: usize) !void { + var timer = try std.time.Timer.start(); + var i: usize = 0; + while (i < iterations) : (i += 1) { + var idx: usize = 0; + while (idx < input.len) { + const result = try parser.parse(input[idx..], null); + if (result.n == 0) break; + idx += result.n; + std.mem.doNotOptimizeAway(result); + } + std.mem.doNotOptimizeAway(idx); + } + const elapsed_ns = timer.read(); + try printResults(writer, label, iterations, elapsed_ns, input.len * iterations); +} + +fn benchParseStreamSimd(writer: anytype, label: []const u8, parser: *vaxis.Parser, input: []const u8, iterations: usize) !void { + var timer = try std.time.Timer.start(); + var i: usize = 0; + while (i < iterations) : (i += 1) { + var idx: usize = 0; + while (idx < input.len) { + const slice = input[idx..]; + const ascii_len = ascii.fastPathLen(slice); + if (ascii_len > 0) { + var j: usize = 0; + while (j < ascii_len) : (j += 1) { + const key: vaxis.Key = .{ + .codepoint = slice[j], + .text = slice[j .. j + 1], + }; + const event: vaxis.Event = .{ .key_press = key }; + std.mem.doNotOptimizeAway(event); + } + idx += ascii_len; + continue; + } + + const result = try parser.parse(slice, null); + if (result.n == 0) break; + idx += result.n; + std.mem.doNotOptimizeAway(result); + } + std.mem.doNotOptimizeAway(idx); + } + const elapsed_ns = timer.read(); + try printResults(writer, label, iterations, elapsed_ns, input.len * iterations); +} + pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); @@ -59,4 +110,14 @@ pub fn main() !void { const dirty_ns = timer.read(); const dirty_bytes: usize = dirty_writer.writer.end; try printResults(stdout, "dirty", iterations, dirty_ns, dirty_bytes); + + var parser_baseline: vaxis.Parser = .{}; + var parser_simd: vaxis.Parser = .{}; + const mixed_stream = "The quick brown fox jumps over the lazy dog " ++ + "1234567890 !@#$%^&*() " ++ + "\x1b[A" ++ + "世界 1️⃣ 👩‍🚀!" ++ + "\r"; + try benchParseStreamBaseline(stdout, "parse_stream_loop_baseline", &parser_baseline, mixed_stream, iterations); + try benchParseStreamSimd(stdout, "parse_stream_loop_simd", &parser_simd, mixed_stream, iterations); } diff --git a/src/Loop.zig b/src/Loop.zig index a11e2f79..982631b1 100644 --- a/src/Loop.zig +++ b/src/Loop.zig @@ -5,6 +5,7 @@ const GraphemeCache = @import("GraphemeCache.zig"); const Parser = @import("Parser.zig"); const Queue = @import("queue.zig").Queue; const vaxis = @import("main.zig"); +const ascii = @import("ascii.zig"); const Tty = vaxis.Tty; const Vaxis = @import("Vaxis.zig"); @@ -135,17 +136,36 @@ pub fn Loop(comptime T: type) type { // read loop read_loop: while (!self.should_quit) { const n = try self.tty.read(buf[read_start..]); + const total = read_start + n; var seq_start: usize = 0; - while (seq_start < n) { - const result = try parser.parse(buf[seq_start..n], paste_allocator); + while (seq_start < total) { + if (@hasField(Event, "key_press")) { + const input = buf[seq_start..total]; + const ascii_len = ascii.fastPathLen(input); + if (ascii_len > 0) { + var i: usize = 0; + while (i < ascii_len) : (i += 1) { + const key: vaxis.Key = .{ + .codepoint = input[i], + .text = input[i .. i + 1], + }; + const event: Event = .{ .key_press = key }; + try handleEventGeneric(self, self.vaxis, &cache, Event, event, paste_allocator); + } + read_start = 0; + seq_start += ascii_len; + continue; + } + } + const result = try parser.parse(buf[seq_start..total], paste_allocator); if (result.n == 0) { // copy the read to the beginning. We don't use memcpy because // this could be overlapping, and it's also rare const initial_start = seq_start; - while (seq_start < n) : (seq_start += 1) { + while (seq_start < total) : (seq_start += 1) { buf[seq_start - initial_start] = buf[seq_start]; } - read_start = seq_start - initial_start + 1; + read_start = total - initial_start; continue :read_loop; } read_start = 0; diff --git a/src/ascii.zig b/src/ascii.zig new file mode 100644 index 00000000..a583953e --- /dev/null +++ b/src/ascii.zig @@ -0,0 +1,116 @@ +const std = @import("std"); +const uucode = @import("uucode"); + +/// Returns the length of a contiguous run of printable ASCII bytes (0x20..0x7E). +pub fn printableRunLen(input: []const u8) usize { + const VecLenOpt = std.simd.suggestVectorLength(u8); + if (VecLenOpt) |VecLen| { + const Vec = @Vector(VecLen, u8); + const lo: Vec = @splat(0x20); + const hi: Vec = @splat(0x7E); + var i: usize = 0; + while (i + VecLen <= input.len) : (i += VecLen) { + const chunk = @as(*const [VecLen]u8, @ptrCast(input[i..].ptr)).*; + const vec: Vec = chunk; + const ok = (vec >= lo) & (vec <= hi); + if (!@reduce(.And, ok)) { + var j: usize = 0; + while (j < VecLen) : (j += 1) { + const b = input[i + j]; + if (b < 0x20 or b > 0x7E) return i + j; + } + } + } + while (i < input.len) : (i += 1) { + const b = input[i]; + if (b < 0x20 or b > 0x7E) return i; + } + return input.len; + } + + var i: usize = 0; + while (i < input.len) : (i += 1) { + const b = input[i]; + if (b < 0x20 or b > 0x7E) return i; + } + return input.len; +} + +/// Returns the safe fast-path length for ASCII runs. +/// +/// This behaves like printableRunLen, but if the next codepoint is a combining +/// mark (Mn/Mc/Me) or a variation selector (U+FE00..U+FE0F, U+E0100..U+E01EF), +/// it leaves the last ASCII byte for the parser to avoid breaking grapheme +/// clusters. If the following UTF-8 sequence is incomplete, it also leaves the +/// last ASCII byte. +pub fn fastPathLen(input: []const u8) usize { + const run = printableRunLen(input); + if (run == 0) return 0; + if (run < input.len) { + const next = input[run..]; + const first = next[0]; + if (first >= 0x80) { + const seq_len = std.unicode.utf8ByteSequenceLength(first) catch return run; + if (next.len < seq_len) return run - 1; + const cp = std.unicode.utf8Decode(next[0..seq_len]) catch return run; + if (isVariationSelector(cp)) return run - 1; + const gc = uucode.get(.general_category, cp); + switch (gc) { + .mark_nonspacing, + .mark_spacing_combining, + .mark_enclosing, + => return run - 1, + else => {}, + } + } + } + return run; +} + +fn isVariationSelector(cp: u21) bool { + return (cp >= 0xFE00 and cp <= 0xFE0F) or (cp >= 0xE0100 and cp <= 0xE01EF); +} + +test "printableRunLen: empty" { + try std.testing.expectEqual(@as(usize, 0), printableRunLen("")); +} + +test "printableRunLen: ascii run" { + try std.testing.expectEqual(@as(usize, 4), printableRunLen("abcd")); +} + +test "printableRunLen: stops at control" { + try std.testing.expectEqual(@as(usize, 1), printableRunLen("a\nb")); +} + +test "printableRunLen: stops at utf8" { + try std.testing.expectEqual(@as(usize, 5), printableRunLen("hello世界")); +} + +test "printableRunLen: includes space and tilde" { + try std.testing.expectEqual(@as(usize, 2), printableRunLen(" ~")); +} + +test "fastPathLen: keeps ascii before utf8" { + try std.testing.expectEqual(@as(usize, 5), fastPathLen("hello世界")); +} + +test "fastPathLen: holds for combining mark" { + try std.testing.expectEqual(@as(usize, 0), fastPathLen("a\u{0301}")); +} + +test "fastPathLen: holds for keycap" { + try std.testing.expectEqual(@as(usize, 0), fastPathLen("1\u{20E3}")); +} + +test "fastPathLen: holds for variation selector" { + try std.testing.expectEqual(@as(usize, 0), fastPathLen("a\u{FE0F}")); +} + +test "fastPathLen: holds for incomplete utf8" { + try std.testing.expectEqual(@as(usize, 0), fastPathLen("a\xE2")); +} + +test "fastPathLen: leaves last ascii for incomplete utf8 after run" { + try std.testing.expectEqual(@as(usize, 2), fastPathLen("abc\xE2")); +} diff --git a/src/main.zig b/src/main.zig index 644626ee..df623331 100644 --- a/src/main.zig +++ b/src/main.zig @@ -29,6 +29,7 @@ pub const ctlseqs = @import("ctlseqs.zig"); pub const GraphemeCache = @import("GraphemeCache.zig"); pub const Event = @import("event.zig").Event; pub const unicode = @import("unicode.zig"); +pub const ascii = @import("ascii.zig"); pub const vxfw = @import("vxfw/vxfw.zig");