rockorager · dyxushuai · Dec 28, 2025 · Dec 28, 2025 · Dec 28, 2025 · Dec 28, 2025
diff --git a/bench/bench.zig b/bench/bench.zig
@@ -1,5 +1,6 @@
 const std = @import("std");
 const vaxis = @import("vaxis");
+const ascii = vaxis.ascii;
 
 fn parseIterations(allocator: std.mem.Allocator) !usize {
     var args = try std.process.argsWithAllocator(allocator);
@@ -20,6 +21,56 @@ fn printResults(writer: anytype, label: []const u8, iterations: usize, elapsed_n
     );
 }
 
+fn benchParseStreamBaseline(writer: anytype, label: []const u8, parser: *vaxis.Parser, input: []const u8, iterations: usize) !void {
+    var timer = try std.time.Timer.start();
+    var i: usize = 0;
+    while (i < iterations) : (i += 1) {
+        var idx: usize = 0;
+        while (idx < input.len) {
+            const result = try parser.parse(input[idx..], null);
+            if (result.n == 0) break;
+            idx += result.n;
+            std.mem.doNotOptimizeAway(result);
+        }
+        std.mem.doNotOptimizeAway(idx);
+    }
+    const elapsed_ns = timer.read();
+    try printResults(writer, label, iterations, elapsed_ns, input.len * iterations);
+}
+
+fn benchParseStreamSimd(writer: anytype, label: []const u8, parser: *vaxis.Parser, input: []const u8, iterations: usize) !void {
+    var timer = try std.time.Timer.start();
+    var i: usize = 0;
+    while (i < iterations) : (i += 1) {
+        var idx: usize = 0;
+        while (idx < input.len) {
+            const slice = input[idx..];
+            const ascii_len = ascii.fastPathLen(slice);
+            if (ascii_len > 0) {
+                var j: usize = 0;
+                while (j < ascii_len) : (j += 1) {
+                    const key: vaxis.Key = .{
+                        .codepoint = slice[j],
+                        .text = slice[j .. j + 1],
+                    };
+                    const event: vaxis.Event = .{ .key_press = key };
+                    std.mem.doNotOptimizeAway(event);
+                }
+                idx += ascii_len;
+                continue;
+            }
+
+            const result = try parser.parse(slice, null);
+            if (result.n == 0) break;
+            idx += result.n;
+            std.mem.doNotOptimizeAway(result);
+        }
+        std.mem.doNotOptimizeAway(idx);
+    }
+    const elapsed_ns = timer.read();
+    try printResults(writer, label, iterations, elapsed_ns, input.len * iterations);
+}
+
 pub fn main() !void {
     var gpa = std.heap.GeneralPurposeAllocator(.{}){};
     defer _ = gpa.deinit();
@@ -59,4 +110,14 @@ pub fn main() !void {
     const dirty_ns = timer.read();
     const dirty_bytes: usize = dirty_writer.writer.end;
     try printResults(stdout, "dirty", iterations, dirty_ns, dirty_bytes);
+
+    var parser_baseline: vaxis.Parser = .{};
+    var parser_simd: vaxis.Parser = .{};
+    const mixed_stream = "The quick brown fox jumps over the lazy dog " ++
+        "1234567890 !@#$%^&*() " ++
+        "\x1b[A" ++
+        "世界 1️⃣ 👩‍🚀!" ++
+        "\r";
+    try benchParseStreamBaseline(stdout, "parse_stream_loop_baseline", &parser_baseline, mixed_stream, iterations);
+    try benchParseStreamSimd(stdout, "parse_stream_loop_simd", &parser_simd, mixed_stream, iterations);
 }
diff --git a/src/Loop.zig b/src/Loop.zig
@@ -5,6 +5,7 @@ const GraphemeCache = @import("GraphemeCache.zig");
 const Parser = @import("Parser.zig");
 const Queue = @import("queue.zig").Queue;
 const vaxis = @import("main.zig");
+const ascii = @import("ascii.zig");
 const Tty = vaxis.Tty;
 const Vaxis = @import("Vaxis.zig");
 
@@ -135,17 +136,36 @@ pub fn Loop(comptime T: type) type {
                     // read loop
                     read_loop: while (!self.should_quit) {
                         const n = try self.tty.read(buf[read_start..]);
+                        const total = read_start + n;
                         var seq_start: usize = 0;
-                        while (seq_start < n) {
-                            const result = try parser.parse(buf[seq_start..n], paste_allocator);
+                        while (seq_start < total) {
+                            if (@hasField(Event, "key_press")) {
+                                const input = buf[seq_start..total];
+                                const ascii_len = ascii.fastPathLen(input);
+                                if (ascii_len > 0) {
+                                    var i: usize = 0;
+                                    while (i < ascii_len) : (i += 1) {
+                                        const key: vaxis.Key = .{
+                                            .codepoint = input[i],
+                                            .text = input[i .. i + 1],
+                                        };
+                                        const event: Event = .{ .key_press = key };
+                                        try handleEventGeneric(self, self.vaxis, &cache, Event, event, paste_allocator);
+                                    }
+                                    read_start = 0;
+                                    seq_start += ascii_len;
+                                    continue;
+                                }
+                            }
+                            const result = try parser.parse(buf[seq_start..total], paste_allocator);
                             if (result.n == 0) {
                                 // copy the read to the beginning. We don't use memcpy because
                                 // this could be overlapping, and it's also rare
                                 const initial_start = seq_start;
-                                while (seq_start < n) : (seq_start += 1) {
+                                while (seq_start < total) : (seq_start += 1) {
                                     buf[seq_start - initial_start] = buf[seq_start];
                                 }
-                                read_start = seq_start - initial_start + 1;
+                                read_start = total - initial_start;
                                 continue :read_loop;
                             }
                             read_start = 0;

diff --git a/src/ascii.zig b/src/ascii.zig
@@ -0,0 +1,98 @@
+const std = @import("std");
+const uucode = @import("uucode");
+
+/// Returns the length of a contiguous run of printable ASCII bytes (0x20..0x7E).
+pub fn printableRunLen(input: []const u8) usize {
+    const VecLenOpt = std.simd.suggestVectorLength(u8);
+    if (VecLenOpt) |VecLen| {
+        const Vec = @Vector(VecLen, u8);
+        const lo: Vec = @splat(0x20);
+        const hi: Vec = @splat(0x7E);
+        var i: usize = 0;
+        while (i + VecLen <= input.len) : (i += VecLen) {
+            const chunk = @as(*const [VecLen]u8, @ptrCast(input[i..].ptr)).*;
+            const vec: Vec = chunk;
+            const ok = (vec >= lo) & (vec <= hi);
+            if (!@reduce(.And, ok)) {
+                var j: usize = 0;
+                while (j < VecLen) : (j += 1) {
+                    const b = input[i + j];
+                    if (b < 0x20 or b > 0x7E) return i + j;
+                }
+            }
+        }
+        while (i < input.len) : (i += 1) {
+            const b = input[i];
+            if (b < 0x20 or b > 0x7E) return i;
+        }
+        return input.len;
+    }
+
+    var i: usize = 0;
+    while (i < input.len) : (i += 1) {
+        const b = input[i];
+        if (b < 0x20 or b > 0x7E) return i;
+    }
+    return input.len;
+}
+
+/// Returns the safe fast-path length for ASCII runs.
+///
+/// This behaves like printableRunLen, but if the next codepoint is a combining
+/// mark (Mn/Mc/Me, including keycaps/variation selectors), it leaves the last
+/// ASCII byte for the parser to avoid breaking grapheme clusters. If the
+/// following UTF-8 sequence is incomplete, it also leaves the last ASCII byte.
+pub fn fastPathLen(input: []const u8) usize {
+    const run = printableRunLen(input);
+    if (run == 0) return 0;
+    if (run < input.len) {
+        const next = input[run..];
+        const first = next[0];
+        if (first >= 0x80) {
+            const seq_len = std.unicode.utf8ByteSequenceLength(first) catch return run;
+            if (next.len < seq_len) return run - 1;
+            const cp = std.unicode.utf8Decode(next[0..seq_len]) catch return run;
+            const gc = uucode.get(.general_category, cp);
+            switch (gc) {
+                .mark_nonspacing,
+                .mark_spacing_combining,
+                .mark_enclosing,
+                => return run - 1,
+                else => {},
+            }
+        }
+    }
+    return run;
+}
+
+test "printableRunLen: empty" {
+    try std.testing.expectEqual(@as(usize, 0), printableRunLen(""));
+}
+
+test "printableRunLen: ascii run" {
+    try std.testing.expectEqual(@as(usize, 4), printableRunLen("abcd"));
+}
+
+test "printableRunLen: stops at control" {
+    try std.testing.expectEqual(@as(usize, 1), printableRunLen("a\nb"));
+}
+
+test "printableRunLen: stops at utf8" {
+    try std.testing.expectEqual(@as(usize, 5), printableRunLen("hello世界"));
+}
+
+test "fastPathLen: keeps ascii before utf8" {
+    try std.testing.expectEqual(@as(usize, 5), fastPathLen("hello世界"));
+}
+
+test "fastPathLen: holds for combining mark" {
+    try std.testing.expectEqual(@as(usize, 0), fastPathLen("a\u{0301}"));
+}
+
+test "fastPathLen: holds for keycap" {
+    try std.testing.expectEqual(@as(usize, 0), fastPathLen("1\u{20E3}"));
+}
+
+test "fastPathLen: holds for incomplete utf8" {
+    try std.testing.expectEqual(@as(usize, 0), fastPathLen("a\xE2"));
+}
diff --git a/src/main.zig b/src/main.zig
@@ -29,6 +29,7 @@ pub const ctlseqs = @import("ctlseqs.zig");
 pub const GraphemeCache = @import("GraphemeCache.zig");
 pub const Event = @import("event.zig").Event;
 pub const unicode = @import("unicode.zig");
+pub const ascii = @import("ascii.zig");
 
 pub const vxfw = @import("vxfw/vxfw.zig");