From c73a3b1a85cb073e9b00a47377850ad354dcb2ab Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Sat, 10 Aug 2024 10:29:35 +0200 Subject: [PATCH 01/25] Create .gitignore --- .gitignore | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5ab9435 --- /dev/null +++ b/.gitignore @@ -0,0 +1,32 @@ +# This file is for zig-specific build artifacts. + +.zig-cache/ +zig-out/ +build/ +build-*/ +docgen_tmp/ + +# Compiled Object files +*.slo +*.lo +*.o +*.obj +*.elf +*.ko + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib From fc5e83db44eccb61f5196f47e5a1322aa6e9a8a0 Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Sat, 10 Aug 2024 10:40:45 +0200 Subject: [PATCH 02/25] Add working Unicode support Add a new implementation for Unicode. Add a library to correctly handle Unicode strings. Making non-breaking changes to ASCII implementation. --- build.zig | 17 +++++ build.zig.zon | 4 ++ src/root.zig | 172 ++++++++++++++++++++++++++++++++++++++++++++++---- src/utils.zig | 30 +++++++++ 4 files changed, 210 insertions(+), 13 deletions(-) diff --git a/build.zig b/build.zig index 0a535ee..c16cc36 100644 --- a/build.zig +++ b/build.zig @@ -4,6 +4,11 @@ pub fn build(b: *std.Build) void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); + const zg = b.dependency("zg", .{ + .target = target, + .optimize = optimize, + }); + _ = b.addModule("fuzzig", .{ .root_source_file = b.path("src/root.zig") }); const lib = b.addStaticLibrary(.{ @@ -13,6 +18,12 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); + lib.root_module.addImport("code_point", zg.module("code_point")); + lib.root_module.addImport("GenCatData", zg.module("GenCatData")); + lib.root_module.addImport("CaseData", zg.module("CaseData")); + lib.root_module.addImport("Normalize", zg.module("Normalize")); + lib.root_module.addImport("CaseFold", zg.module("CaseFold")); + b.installArtifact(lib); const lib_unit_tests = b.addTest(.{ @@ -21,6 +32,12 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); + lib_unit_tests.root_module.addImport("code_point", zg.module("code_point")); + lib_unit_tests.root_module.addImport("GenCatData", zg.module("GenCatData")); + lib_unit_tests.root_module.addImport("CaseData", zg.module("CaseData")); + lib_unit_tests.root_module.addImport("Normalize", zg.module("Normalize")); + lib_unit_tests.root_module.addImport("CaseFold", zg.module("CaseFold")); + const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); const test_step = b.step("test", "Run unit tests"); diff --git a/build.zig.zon b/build.zig.zon index 712ece9..b4f86f4 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -38,6 +38,10 @@ // // computed. This field and `url` are mutually exclusive. // .path = "foo", //}, + .zg = .{ + .url = "https://codeberg.org/dude_the_builder/zg/archive/v0.13.2.tar.gz", + .hash = "122055beff332830a391e9895c044d33b15ea21063779557024b46169fb1984c6e40", + }, }, // Specifies the set of files and directories that are included in this package. diff --git a/src/root.zig b/src/root.zig index 765d6d5..2115954 100644 --- a/src/root.zig +++ b/src/root.zig @@ -2,6 +2,13 @@ const std = @import("std"); const utils = @import("utils.zig"); const structures = @import("structures.zig"); +const code_point = @import("code_point"); +const GenCatData = @import("GenCatData"); +const CaseData = @import("CaseData"); +const Normalize = @import("Normalize"); + +const Allocator = std.mem.Allocator; + const CharacterType = utils.CharacterType; const MatrixT = structures.MatrixT; @@ -100,6 +107,12 @@ pub fn Algorithm( impl: Impl, + const TypeOfCaracter = switch (Impl) { + AsciiOptions => u8, + UnicodeOptions => u21, + else => unreachable, + }; + pub fn deinit(self: *Self) void { self.m.deinit(); self.x.deinit(); @@ -117,6 +130,9 @@ pub fn Algorithm( max_needle: usize, impl: Impl, ) !Self { + var impl_with_allocator = impl; + impl_with_allocator.allocator = allocator; + const rows = max_needle + 1; const cols = max_haystack + 1; @@ -149,7 +165,7 @@ pub fn Algorithm( .first_match_buffer = first_match_buffer, .traceback_buffer = traceback_buffer, .allocator = allocator, - .impl = impl, + .impl = impl_with_allocator, }; } @@ -179,8 +195,14 @@ pub fn Algorithm( .score = 0, }; - const rows = needle.len; - const cols = haystack.len; + const haystack_normal = self.impl.convertString(haystack); + defer self.allocator.free(haystack_normal); + + const needle_normal = self.impl.convertString(needle); + defer self.allocator.free(needle_normal); + + const rows = needle_normal.len; + const cols = haystack_normal.len; // resize the view into memory self.m.resizeNoAlloc(rows + 1, cols + 1); @@ -188,25 +210,25 @@ pub fn Algorithm( self.m_skip.resizeNoAlloc(rows + 1, cols + 1); const first_match_indices = utils.firstMatchesGeneric( - ElType, + TypeOfCaracter, &self.impl, Impl.eqlFunc, self.first_match_buffer, - haystack, - needle, + haystack_normal, + needle_normal, ) orelse return null; self.reset(rows + 1, cols + 1, first_match_indices); - self.determineBonuses(haystack); + self.determineBonuses(TypeOfCaracter, haystack_normal); - try self.populateMatrices(haystack, needle, first_match_indices); + try self.populateMatrices(haystack_normal, needle_normal, first_match_indices); const col_max = self.findMaximalElement( first_match_indices, rows, cols, ); - const last_row_index = needle.len; + const last_row_index = needle_normal.len; const s = self.m.get(last_row_index, col_max); return .{ .score = s, @@ -268,8 +290,8 @@ pub fn Algorithm( return buf; } - fn determineBonuses(self: *Self, haystack: []const ElType) void { - var prev: u8 = 0; + fn determineBonuses(self: *Self, T: type, haystack: []const T) void { + var prev: T = 0; for (1.., haystack) |i, h| { self.role_bonus[i] = Impl.bonusFunc(&self.impl, scores, prev, h); prev = h; @@ -325,8 +347,8 @@ pub fn Algorithm( fn populateMatrices( self: *Self, - haystack: []const ElType, - needle: []const ElType, + haystack: []const TypeOfCaracter, + needle: []const TypeOfCaracter, first_match_indices: []const usize, ) !void { for (1.., needle) |i, n| { @@ -455,6 +477,8 @@ pub fn Algorithm( pub const AsciiOptions = struct { const AsciiScores = Scores(i32); + pub const TypeOfCharacter = u8; + case_sensitive: bool = true, case_penalize: bool = false, // treat spaces as wildcards for any kind of boundary @@ -463,6 +487,13 @@ pub const AsciiOptions = struct { penalty_case_mistmatch: i32 = -2, + /// Don't forget the allocator !!! + allocator: Allocator = undefined, + + fn convertString(a: *const AsciiOptions, string: []const u8) []const TypeOfCharacter { + return a.allocator.dupe(TypeOfCharacter, string) catch @panic("Memory error"); + } + fn eqlFunc(a: *const AsciiOptions, h: u8, n: u8) bool { if (n == ' ' and a.wildcard_spaces) { return switch (h) { @@ -508,9 +539,98 @@ pub const AsciiOptions = struct { } }; +pub const UnicodeOptions = struct { + const UnicodeScores = Scores(i32); + + pub const TypeOfCharacter: type = u21; + + case_sensitive: bool = true, + case_penalize: bool = false, + // treat spaces as wildcards for any kind of boundary + // i.e. match with any `[^a-z,A-Z,0-9]` + wildcard_spaces: bool = false, + + penalty_case_mistmatch: i32 = -2, + + /// Don't forget the allocator !!! + allocator: Allocator = undefined, + + fn convertString(a: *const UnicodeOptions, string: []const u8) []const TypeOfCharacter { + var norm_data: Normalize.NormData = undefined; + Normalize.NormData.init(&norm_data, a.allocator) catch @panic("Cannot normalize string"); + defer norm_data.deinit(); + + const n = Normalize{ .norm_data = &norm_data }; + + const nfc_result = n.nfc(a.allocator, string) catch @panic("Cannot normalize string"); + defer nfc_result.deinit(); + + var iter = code_point.Iterator{ .bytes = nfc_result.slice }; + + var converted_string = std.ArrayList(TypeOfCharacter).init(a.allocator); + defer converted_string.deinit(); + + while (iter.next()) |c| { + converted_string.append(c.code) catch @panic("Memory error"); + } + return converted_string.toOwnedSlice() catch @panic("Memory error"); + } + + fn eqlFunc(a: *const UnicodeOptions, h: u21, n: u21) bool { + const gcd = GenCatData.init(a.allocator) catch @panic("Memory error"); + defer gcd.deinit(); + if (gcd.isSeparator(n) and a.wildcard_spaces) { + if (gcd.isLetter(h) or gcd.isNumber(h) or gcd.isSymbol(h)) { + return true; + } else { + return false; + } + } else if (!a.case_sensitive) { + const cd = CaseData.init(a.allocator) catch @panic("Memory error"); + defer cd.deinit(); + return cd.toLower(h) == cd.toLower(n); + } else { + return h == n; + } + } + + fn scoreFunc( + a: *const UnicodeOptions, + comptime scores: UnicodeScores, + h: u21, + n: u21, + ) ?i32 { + if (!a.eqlFunc(h, n)) return null; + + if (a.case_penalize and (h != n)) { + return scores.score_match + a.penalty_case_mistmatch; + } + return scores.score_match; + } + + fn bonusFunc( + self: *const UnicodeOptions, + comptime scores: UnicodeScores, + h: u21, + n: u21, + ) i32 { + const p = CharacterType.fromUnicode(h, self.allocator); + const c = CharacterType.fromUnicode(n, self.allocator); + + return switch (p.roleNextTo(c)) { + .Head => scores.bonus_head, + .Camel => scores.bonus_camel, + .Break => scores.bonus_break, + .Tail => scores.bonus_tail, + }; + } +}; + /// Default ASCII Fuzzy Finder pub const Ascii = Algorithm(u8, i32, .{}, AsciiOptions); +pub const Unicode = Algorithm(u8, i32, .{}, UnicodeOptions); + fn doTestScore(alg: *Ascii, haystack: []const u8, needle: []const u8, comptime score: i32) !void { const s = alg.score(haystack, needle); @@ -521,6 +641,18 @@ fn doTestScore(alg: *Ascii, haystack: []const u8, needle: []const u8, comptime s try std.testing.expectEqual(score, s.?); } +fn doTestScoreUnicode(alg: *Unicode, haystack: []const u8, needle: []const u8, comptime score: ?i32) !void { + const s = alg.score(haystack, needle); + + if (score == null) { + // const stderr = std.io.getStdErr().writer(); + // try alg.debugPrint(stderr, haystack, needle); + std.debug.print("SCORE : {d}\n", .{s orelse -1}); + } else { + try std.testing.expectEqual(score, s.?); + } +} + test "algorithm test" { const o = AsciiOptions.AsciiScores{}; @@ -714,3 +846,17 @@ test "traceback" { try doTestTraceback(&alg, "A" ++ "a" ** 20 ++ "B", "AB", &.{ 0, 21 }); try doTestTraceback(&alg, "./src/main.zig", "main", &.{ 6, 7, 8, 9 }); } + +test "Unicode search" { + const o = UnicodeOptions.UnicodeScores{}; + + var alg = try Unicode.init( + std.testing.allocator, + 128, + 32, + .{}, + ); + defer alg.deinit(); + + try doTestScoreUnicode(&alg, "zig⚡ fast", "⚡", o.score_match); +} diff --git a/src/utils.zig b/src/utils.zig index 02741ec..2b8cb56 100644 --- a/src/utils.zig +++ b/src/utils.zig @@ -1,5 +1,8 @@ const std = @import("std"); +const GenCatData = @import("GenCatData"); +const CaseData = @import("CaseData"); + pub fn digitCount(v: anytype) usize { const abs: u32 = @intCast(@abs(v)); if (abs == 0) return 1; @@ -29,6 +32,33 @@ pub const CharacterType = enum { }; } + pub fn fromUnicode(c: u21, allocator: std.mem.Allocator) CharacterType { + const cd = CaseData.init(allocator) catch @panic("Memory error"); + defer cd.deinit(); + const gcd = GenCatData.init(allocator) catch @panic("Memory error"); + defer gcd.deinit(); + if (cd.isLower(c)) { + return .Lower; + } else if (cd.isUpper(c)) { + return .Upper; + } else if (gcd.isNumber(c)) { + return .Number; + } else if (switch (c) { + ' ', '\\', '/', '|', '(', ')', '[', ']', '{', '}' => true, + else => false, + }) { + return .HardSeperator; + } else if (gcd.isSeparator(c)) { + return .HardSeperator; + } else if (gcd.isPunctuation(c) or gcd.isSymbol(c) or gcd.isMark(c)) { + return .SoftSeperator; + } else if (gcd.isControl(c)) { + return .Empty; + } else { + return .Lower; // Maybe .Empty instead ? + } + } + const Role = enum { Head, Break, From 6ea5cceca53673ff18fa03db68c18b488c28771b Mon Sep 17 00:00:00 2001 From: fjebaker <fergusbkr@gmail.com> Date: Sat, 10 Aug 2024 16:25:20 +0100 Subject: [PATCH 03/25] feat: added simple benchmark suite --- build.zig | 11 + src/benchmarks.zig | 60 ++++ src/main.zig | 763 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 834 insertions(+) create mode 100644 src/benchmarks.zig create mode 100644 src/main.zig diff --git a/build.zig b/build.zig index c16cc36..4f97f09 100644 --- a/build.zig +++ b/build.zig @@ -42,4 +42,15 @@ pub fn build(b: *std.Build) void { const test_step = b.step("test", "Run unit tests"); test_step.dependOn(&run_lib_unit_tests.step); + + const exe = b.addExecutable(.{ + .name = "fuzzig", + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + const run_cmd = b.addRunArtifact(exe); + const benchmark_step = b.step("benchmark", "Run benchmarks."); + benchmark_step.dependOn(&run_cmd.step); } diff --git a/src/benchmarks.zig b/src/benchmarks.zig new file mode 100644 index 0000000..3d454c7 --- /dev/null +++ b/src/benchmarks.zig @@ -0,0 +1,60 @@ +const std = @import("std"); + +pub const BenchmarkOptions = struct { + trials: u32 = 10_000, + warmup: u32 = 100, +}; + +pub const BenchmarkResult = struct { + const Self = @This(); + + alloc: std.mem.Allocator, + opts: BenchmarkOptions, + mean: u64, + + pub fn deinit(_: *Self) void {} + + pub fn printSummary(self: *const Self) void { + const print = std.debug.print; + print( + \\ Benchmark summary for {d} trials: + \\ Mean: {s} + \\ + , .{ + self.opts.trials, + std.fmt.fmtDuration(self.mean), + }); + } +}; + +fn invoke(comptime func: anytype, args: std.meta.ArgsTuple(@TypeOf(func))) void { + const ReturnType = @typeInfo(@TypeOf(func)).Fn.return_type.?; + switch (@typeInfo(ReturnType)) { + .ErrorUnion => { + _ = @call(.never_inline, func, args) catch { + // std.debug.panic("Benchmarked function returned error {s}", .{err}); + }; + }, + else => _ = @call(.never_inline, func, args), + } +} + +pub fn benchmark( + alloc: std.mem.Allocator, + comptime func: anytype, + args: std.meta.ArgsTuple(@TypeOf(func)), + opts: BenchmarkOptions, +) !BenchmarkResult { + var count: usize = 0; + while (count < opts.warmup) : (count += 1) { + std.mem.doNotOptimizeAway(true); + invoke(func, args); + } + var timer = try std.time.Timer.start(); + while (count < opts.trials) : (count += 1) { + std.mem.doNotOptimizeAway(true); + invoke(func, args); + } + const mean = @divFloor(timer.lap(), opts.trials); + return .{ .alloc = alloc, .opts = opts, .mean = mean }; +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..ef3dc0e --- /dev/null +++ b/src/main.zig @@ -0,0 +1,763 @@ +const std = @import("std"); +const fuzzy = @import("root.zig"); +const bmark = @import("benchmarks.zig"); + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + + const alloc = gpa.allocator(); + + var finder = try fuzzy.Ascii.init(alloc, 6000, 500, .{}); + defer finder.deinit(); + + const scores = try alloc.alloc(i32, LINES.len); + defer alloc.free(scores); + + var time = try std.time.Timer.start(); + for (LINES, scores) |line, *s| { + s.* = finder.score(line, "a") orelse 0; + } + const end = time.lap(); + std.debug.print("Elapsed: {s}\n", .{std.fmt.fmtDuration(end)}); + + { + const score = finder.score( + "hello world this is a short message about things", + "short about", + ).?; + std.debug.print("Passed 1: {any} \n", .{score == 217}); + } + { + const score = finder.score( + "hello world this is a short message about things", + "short abut", + ).?; + std.debug.print("Passed 2: {any} \n", .{score == 189}); + } + + var result = try bmark.benchmark( + alloc, + runBmark, + .{&finder}, + .{ .warmup = 10, .trials = 100 }, + ); + defer result.deinit(); + + result.printSummary(); +} + +pub fn runBmark(finder: *fuzzy.Ascii) void { + const score = finder.score( + "hello world this is a short message about things" ** 100, + "short abut thns", + ).?; + std.mem.doNotOptimizeAway(score); +} + +pub const LINES = [_][]const u8{ + "./zig-cache", + "./zig-cache/z", + "./zig-cache/z/18f4c34f7e4a1bb42c247c350f2c8f2c", + "./zig-cache/z/ba8144543f956bafd95fbf07300ab485", + "./zig-cache/z/783202c1a3c6b1c4ac29be10ac8d17ce", + "./zig-cache/z/3bb19410f205f2473ff55b0dc2e4d4f0", + "./zig-cache/z/c9569ed275b0d644e3c85adab09cc334", + "./zig-cache/z/bc8b1b263b123bd9ac43f821d8c71394", + "./zig-cache/z/33f486c9eeac22a10ccf16fbd8a8f94a", + "./zig-cache/z/a03b9d706caedeaa5f50f562546535c4", + "./zig-cache/z/3e80d6ce17fa03fdf3c238c49cee0cda", + "./zig-cache/z/b6c041049ff9815a7e38d613b6a631e4", + "./zig-cache/z/b2cb6a1f26363a1e3beaffccb60f94bf", + "./zig-cache/z/4dc0ffa4989929fd413eea0cf1c19f8f", + "./zig-cache/o", + "./zig-cache/o/18d612284a30703119e63186f028b81f", + "./zig-cache/o/18d612284a30703119e63186f028b81f/test", + "./zig-cache/o/18d612284a30703119e63186f028b81f/test.o", + "./zig-cache/o/61050a7d2873b1552bad7107e6e706bb", + "./zig-cache/o/61050a7d2873b1552bad7107e6e706bb/libfuzzig.a", + "./zig-cache/o/61050a7d2873b1552bad7107e6e706bb/libfuzzig.a.o", + "./zig-cache/o/cfb50865ff388b1e1fbbd918ed0ea038", + "./zig-cache/o/cfb50865ff388b1e1fbbd918ed0ea038/fuzzig.o", + "./zig-cache/o/cfb50865ff388b1e1fbbd918ed0ea038/fuzzig", + "./zig-cache/o/0aa2494f8106ef317df0772f7872cff7", + "./zig-cache/o/0aa2494f8106ef317df0772f7872cff7/test", + "./zig-cache/o/0aa2494f8106ef317df0772f7872cff7/test.o", + "./zig-cache/o/1800844e67305d56ba6e5204188b60f7", + "./zig-cache/o/1800844e67305d56ba6e5204188b60f7/test", + "./zig-cache/o/1800844e67305d56ba6e5204188b60f7/test.o", + "./zig-cache/o/9ccd0442b1a415a5ebb1e99a912efa36", + "./zig-cache/o/9ccd0442b1a415a5ebb1e99a912efa36/test", + "./zig-cache/o/9ccd0442b1a415a5ebb1e99a912efa36/test.o", + "./zig-cache/o/4d7836411e7da0f1d220fdf04f552a9a", + "./zig-cache/o/4d7836411e7da0f1d220fdf04f552a9a/test", + "./zig-cache/o/4d7836411e7da0f1d220fdf04f552a9a/test.o", + "./zig-cache/o/f2053673c1f2233e518ba320f2880cdd", + "./zig-cache/o/f2053673c1f2233e518ba320f2880cdd/test", + "./zig-cache/o/f2053673c1f2233e518ba320f2880cdd/test.o", + "./zig-cache/o/f3afa1384fad52e84f96ccd8218fdd20", + "./zig-cache/o/f3afa1384fad52e84f96ccd8218fdd20/test", + "./zig-cache/o/f3afa1384fad52e84f96ccd8218fdd20/test.o", + "./zig-cache/o/16972b9ceb740354eced8ff48566d039", + "./zig-cache/o/16972b9ceb740354eced8ff48566d039/test", + "./zig-cache/o/16972b9ceb740354eced8ff48566d039/test.o", + "./zig-cache/o/eab85a2e06cb9628b0dad6e40abd9515", + "./zig-cache/o/eab85a2e06cb9628b0dad6e40abd9515/fuzzig.o", + "./zig-cache/o/eab85a2e06cb9628b0dad6e40abd9515/fuzzig", + "./zig-cache/o/64df18876b819e8359322ef661bf9674", + "./zig-cache/o/64df18876b819e8359322ef661bf9674/test", + "./zig-cache/o/64df18876b819e8359322ef661bf9674/test.o", + "./zig-cache/o/517cd864faf4b62f64ec42efefdaf919", + "./zig-cache/o/517cd864faf4b62f64ec42efefdaf919/test", + "./zig-cache/o/517cd864faf4b62f64ec42efefdaf919/test.o", + "./zig-cache/o/2410bdd92c6cc29a59722363f59fb980", + "./zig-cache/o/2410bdd92c6cc29a59722363f59fb980/test", + "./zig-cache/o/2410bdd92c6cc29a59722363f59fb980/test.o", + "./zig-cache/o/737c4459c18098cbd44f10aeac0bbb32", + "./zig-cache/o/737c4459c18098cbd44f10aeac0bbb32/build", + "./zig-cache/o/737c4459c18098cbd44f10aeac0bbb32/build.o", + "./zig-cache/o/00cb7f8c7c15864a6ba340f730f4fea1", + "./zig-cache/o/00cb7f8c7c15864a6ba340f730f4fea1/test", + "./zig-cache/o/00cb7f8c7c15864a6ba340f730f4fea1/test.o", + "./zig-cache/o/f5f69f6f35c66d9ebce191c9913c531d", + "./zig-cache/o/f5f69f6f35c66d9ebce191c9913c531d/test", + "./zig-cache/o/f5f69f6f35c66d9ebce191c9913c531d/test.o", + "./zig-cache/o/caf37492c1bff8b9250a100ae4fa5a99", + "./zig-cache/o/caf37492c1bff8b9250a100ae4fa5a99/test", + "./zig-cache/o/caf37492c1bff8b9250a100ae4fa5a99/test.o", + "./zig-cache/o/f1ea8a23a883a382e4d1dee03f34a4f1", + "./zig-cache/o/f1ea8a23a883a382e4d1dee03f34a4f1/test", + "./zig-cache/o/f1ea8a23a883a382e4d1dee03f34a4f1/test.o", + "./zig-cache/o/4936b3fb0c4a3de229ecd2f26842904d", + "./zig-cache/o/4936b3fb0c4a3de229ecd2f26842904d/test", + "./zig-cache/o/4936b3fb0c4a3de229ecd2f26842904d/test.o", + "./zig-cache/o/66f018a8ee7c1d24c6c22fe27a765406", + "./zig-cache/o/66f018a8ee7c1d24c6c22fe27a765406/test", + "./zig-cache/o/66f018a8ee7c1d24c6c22fe27a765406/test.o", + "./zig-cache/o/8d1621cc2c964087bc9fb245ca99d79f", + "./zig-cache/o/8d1621cc2c964087bc9fb245ca99d79f/libfuzzig.a", + "./zig-cache/o/8d1621cc2c964087bc9fb245ca99d79f/libfuzzig.a.o", + "./zig-cache/o/afa4d7d542ac69ff56dd2c4cdedbd156", + "./zig-cache/o/afa4d7d542ac69ff56dd2c4cdedbd156/test", + "./zig-cache/o/afa4d7d542ac69ff56dd2c4cdedbd156/test.o", + "./zig-cache/o/a1c5456d4ec9de80092fc1c3b6b09896", + "./zig-cache/o/a1c5456d4ec9de80092fc1c3b6b09896/test", + "./zig-cache/o/a1c5456d4ec9de80092fc1c3b6b09896/test.o", + "./zig-cache/o/c2d22f662253fd9c19a372dbb1c0cef8", + "./zig-cache/o/c2d22f662253fd9c19a372dbb1c0cef8/test", + "./zig-cache/o/c2d22f662253fd9c19a372dbb1c0cef8/test.o", + "./zig-cache/o/806e6a3ffbd45810b14f4f266635ea05", + "./zig-cache/o/806e6a3ffbd45810b14f4f266635ea05/test", + "./zig-cache/o/806e6a3ffbd45810b14f4f266635ea05/test.o", + "./zig-cache/o/28b33e16a4f85f7f24f2a00a2fdf55ab", + "./zig-cache/o/28b33e16a4f85f7f24f2a00a2fdf55ab/test", + "./zig-cache/o/28b33e16a4f85f7f24f2a00a2fdf55ab/test.o", + "./zig-cache/o/8c8d53ac23d3947c21f37a6adeffd8d9", + "./zig-cache/o/8c8d53ac23d3947c21f37a6adeffd8d9/libfuzzig.a", + "./zig-cache/o/8c8d53ac23d3947c21f37a6adeffd8d9/libfuzzig.a.o", + "./zig-cache/o/1b46eb604181df08b388d0e3cfab6777", + "./zig-cache/o/1b46eb604181df08b388d0e3cfab6777/test", + "./zig-cache/o/1b46eb604181df08b388d0e3cfab6777/test.o", + "./zig-cache/o/60b1dd20abdb90a1538bdf40d5bd6f63", + "./zig-cache/o/60b1dd20abdb90a1538bdf40d5bd6f63/libfuzzig.a", + "./zig-cache/o/60b1dd20abdb90a1538bdf40d5bd6f63/libfuzzig.a.o", + "./zig-cache/o/42350867890c8cfdd171a860e260b713", + "./zig-cache/o/42350867890c8cfdd171a860e260b713/test", + "./zig-cache/o/42350867890c8cfdd171a860e260b713/test.o", + "./zig-cache/o/d350e46d7ff25f183a0cc449606dd405", + "./zig-cache/o/d350e46d7ff25f183a0cc449606dd405/test", + "./zig-cache/o/d350e46d7ff25f183a0cc449606dd405/test.o", + "./zig-cache/o/ab6fed59324f0434a775895193608cbf", + "./zig-cache/o/ab6fed59324f0434a775895193608cbf/test", + "./zig-cache/o/ab6fed59324f0434a775895193608cbf/test.o", + "./zig-cache/o/8d160820534d053c1285d1b59f835e70", + "./zig-cache/o/8d160820534d053c1285d1b59f835e70/test", + "./zig-cache/o/8d160820534d053c1285d1b59f835e70/test.o", + "./zig-cache/o/b28c2511f0bf96b9238cbc8b0a8f48cd", + "./zig-cache/o/b28c2511f0bf96b9238cbc8b0a8f48cd/test", + "./zig-cache/o/b28c2511f0bf96b9238cbc8b0a8f48cd/test.o", + "./zig-cache/o/d48bcce223843027c3dcefe0a276a13a", + "./zig-cache/o/d48bcce223843027c3dcefe0a276a13a/test", + "./zig-cache/o/d48bcce223843027c3dcefe0a276a13a/test.o", + "./zig-cache/o/0c591d6b970f7319f63221559e9b797d", + "./zig-cache/o/0c591d6b970f7319f63221559e9b797d/test", + "./zig-cache/o/0c591d6b970f7319f63221559e9b797d/test.o", + "./zig-cache/o/14c1180af52cee54339bbce73077b742", + "./zig-cache/o/14c1180af52cee54339bbce73077b742/test", + "./zig-cache/o/14c1180af52cee54339bbce73077b742/test.o", + "./zig-cache/o/8b674eb25ed6d849dd4c02dcd7ec3e53", + "./zig-cache/o/8b674eb25ed6d849dd4c02dcd7ec3e53/test", + "./zig-cache/o/8b674eb25ed6d849dd4c02dcd7ec3e53/test.o", + "./zig-cache/o/e5d7d6d1ef4063e7ee6d4c93d11f550f", + "./zig-cache/o/e5d7d6d1ef4063e7ee6d4c93d11f550f/test", + "./zig-cache/o/e5d7d6d1ef4063e7ee6d4c93d11f550f/test.o", + "./zig-cache/o/2a6e4e895f2b4ab7370b7bfa4b36e984", + "./zig-cache/o/2a6e4e895f2b4ab7370b7bfa4b36e984/test", + "./zig-cache/o/2a6e4e895f2b4ab7370b7bfa4b36e984/test.o", + "./zig-cache/o/549ab24351955c7ef38f16e628f21e33", + "./zig-cache/o/549ab24351955c7ef38f16e628f21e33/test", + "./zig-cache/o/549ab24351955c7ef38f16e628f21e33/test.o", + "./zig-cache/o/d7da96308e47fa79cced1d3f6c288e18", + "./zig-cache/o/d7da96308e47fa79cced1d3f6c288e18/test", + "./zig-cache/o/d7da96308e47fa79cced1d3f6c288e18/test.o", + "./zig-cache/o/0f7ec06f62f762d395e96a80b6932ebf", + "./zig-cache/o/0f7ec06f62f762d395e96a80b6932ebf/test", + "./zig-cache/o/0f7ec06f62f762d395e96a80b6932ebf/test.o", + "./zig-cache/o/effd117ce0ab78ae41df34119d9d9f43", + "./zig-cache/o/effd117ce0ab78ae41df34119d9d9f43/test", + "./zig-cache/o/effd117ce0ab78ae41df34119d9d9f43/test.o", + "./zig-cache/o/30d3831db936bea4f0cabb1a609b4d0f", + "./zig-cache/o/30d3831db936bea4f0cabb1a609b4d0f/build", + "./zig-cache/o/30d3831db936bea4f0cabb1a609b4d0f/build.o", + "./zig-cache/o/0901aa4e83b8449cfd395fa16e5a28d6", + "./zig-cache/o/0901aa4e83b8449cfd395fa16e5a28d6/fuzzig.o", + "./zig-cache/o/0901aa4e83b8449cfd395fa16e5a28d6/fuzzig", + "./zig-cache/o/119d37875f703ba8e1c878f3faabc6a0", + "./zig-cache/o/119d37875f703ba8e1c878f3faabc6a0/test", + "./zig-cache/o/119d37875f703ba8e1c878f3faabc6a0/test.o", + "./zig-cache/o/b45af1a0810e732c6b958da5b94f71d5", + "./zig-cache/o/b45af1a0810e732c6b958da5b94f71d5/test", + "./zig-cache/o/b45af1a0810e732c6b958da5b94f71d5/test.o", + "./zig-cache/o/b9a2882231c67560ceb1677e0e666ef1", + "./zig-cache/o/b9a2882231c67560ceb1677e0e666ef1/build", + "./zig-cache/o/b9a2882231c67560ceb1677e0e666ef1/build.o", + "./zig-cache/o/500eb0e36e48fa8436d38898b55aaf19", + "./zig-cache/o/500eb0e36e48fa8436d38898b55aaf19/test", + "./zig-cache/o/500eb0e36e48fa8436d38898b55aaf19/test.o", + "./zig-cache/o/efff1b16534be509387e29589ece1c94", + "./zig-cache/o/efff1b16534be509387e29589ece1c94/test", + "./zig-cache/o/efff1b16534be509387e29589ece1c94/test.o", + "./zig-cache/o/04ed83b7581ca6d58cf10c72659b055f", + "./zig-cache/o/04ed83b7581ca6d58cf10c72659b055f/test", + "./zig-cache/o/04ed83b7581ca6d58cf10c72659b055f/test.o", + "./zig-cache/o/d1ed30f066b32a543a8bfbe589a4ccde", + "./zig-cache/o/d1ed30f066b32a543a8bfbe589a4ccde/fuzzig.o", + "./zig-cache/o/d1ed30f066b32a543a8bfbe589a4ccde/fuzzig", + "./zig-cache/o/ac3a835da8b1c05c11f550d2eb08cb2a", + "./zig-cache/o/ac3a835da8b1c05c11f550d2eb08cb2a/test", + "./zig-cache/o/ac3a835da8b1c05c11f550d2eb08cb2a/test.o", + "./zig-cache/o/bf56ed9b3c47aaa1b831f74f8a8c7752", + "./zig-cache/o/bf56ed9b3c47aaa1b831f74f8a8c7752/test", + "./zig-cache/o/bf56ed9b3c47aaa1b831f74f8a8c7752/test.o", + "./zig-cache/o/638dd55b3a499bce149e6484503e33a9", + "./zig-cache/o/638dd55b3a499bce149e6484503e33a9/test", + "./zig-cache/o/638dd55b3a499bce149e6484503e33a9/test.o", + "./zig-cache/o/0d26108fae490f73def46ef46dd69e04", + "./zig-cache/o/0d26108fae490f73def46ef46dd69e04/test", + "./zig-cache/o/0d26108fae490f73def46ef46dd69e04/test.o", + "./zig-cache/o/045ecece80fb4d49cffabe598712ad51", + "./zig-cache/o/045ecece80fb4d49cffabe598712ad51/test", + "./zig-cache/o/045ecece80fb4d49cffabe598712ad51/test.o", + "./zig-cache/o/e37883933cfd9cca51e5d0964889bc88", + "./zig-cache/o/e37883933cfd9cca51e5d0964889bc88/test", + "./zig-cache/o/e37883933cfd9cca51e5d0964889bc88/test.o", + "./zig-cache/o/049d85435dfb8b5b207066d4236a75a7", + "./zig-cache/o/049d85435dfb8b5b207066d4236a75a7/test", + "./zig-cache/o/049d85435dfb8b5b207066d4236a75a7/test.o", + "./zig-cache/o/7bc9eebee56ef1e23b234e19d0356813", + "./zig-cache/o/7bc9eebee56ef1e23b234e19d0356813/test", + "./zig-cache/o/7bc9eebee56ef1e23b234e19d0356813/test.o", + "./zig-cache/o/6a61c2c6482bb2312730abcd4306d169", + "./zig-cache/o/6a61c2c6482bb2312730abcd4306d169/test", + "./zig-cache/o/6a61c2c6482bb2312730abcd4306d169/test.o", + "./zig-cache/o/17babca0ad47f3300b427afff93de9eb", + "./zig-cache/o/17babca0ad47f3300b427afff93de9eb/test", + "./zig-cache/o/17babca0ad47f3300b427afff93de9eb/test.o", + "./zig-cache/o/d45532bf34011c78bae17b957e992680", + "./zig-cache/o/d45532bf34011c78bae17b957e992680/test", + "./zig-cache/o/d45532bf34011c78bae17b957e992680/test.o", + "./zig-cache/o/8abb818773d9441296510b48bf0f9463", + "./zig-cache/o/8abb818773d9441296510b48bf0f9463/test", + "./zig-cache/o/8abb818773d9441296510b48bf0f9463/test.o", + "./zig-cache/o/6f07ead36de9078322553d83021b4c3a", + "./zig-cache/o/6f07ead36de9078322553d83021b4c3a/test", + "./zig-cache/o/6f07ead36de9078322553d83021b4c3a/test.o", + "./zig-cache/o/065bdae9ce63dc5803077b77d6b64b56", + "./zig-cache/o/065bdae9ce63dc5803077b77d6b64b56/libfuzzig.a", + "./zig-cache/o/065bdae9ce63dc5803077b77d6b64b56/libfuzzig.a.o", + "./zig-cache/o/f2862f80bd61533030e378e58804a22a", + "./zig-cache/o/f2862f80bd61533030e378e58804a22a/fuzzig.o", + "./zig-cache/o/f2862f80bd61533030e378e58804a22a/fuzzig", + "./zig-cache/o/0a86361483a510ffcd075f572f7f2f32", + "./zig-cache/o/0a86361483a510ffcd075f572f7f2f32/test", + "./zig-cache/o/0a86361483a510ffcd075f572f7f2f32/test.o", + "./zig-cache/o/afdebd30bf93a7450a9a048f39015bb3", + "./zig-cache/o/afdebd30bf93a7450a9a048f39015bb3/libfuzzig.a", + "./zig-cache/o/afdebd30bf93a7450a9a048f39015bb3/libfuzzig.a.o", + "./zig-cache/o/ee84e03298974cd03fba88df38ab4c9d", + "./zig-cache/o/ee84e03298974cd03fba88df38ab4c9d/fuzzig.o", + "./zig-cache/o/ee84e03298974cd03fba88df38ab4c9d/fuzzig", + "./zig-cache/o/a442c00964828d83755f8b2ed6f9dc9e", + "./zig-cache/o/a442c00964828d83755f8b2ed6f9dc9e/libfuzzig.a", + "./zig-cache/o/a442c00964828d83755f8b2ed6f9dc9e/libfuzzig.a.o", + "./zig-cache/o/faa0d8be3ecbc1bb08706ca5e9fb24fe", + "./zig-cache/o/faa0d8be3ecbc1bb08706ca5e9fb24fe/fuzzig.o", + "./zig-cache/o/faa0d8be3ecbc1bb08706ca5e9fb24fe/fuzzig", + "./zig-cache/o/7765f6d751c4db0589969651ce4b5194", + "./zig-cache/o/7765f6d751c4db0589969651ce4b5194/test", + "./zig-cache/o/7765f6d751c4db0589969651ce4b5194/test.o", + "./zig-cache/o/9f4bbc87cffa9601f27ace7d130cecc4", + "./zig-cache/o/9f4bbc87cffa9601f27ace7d130cecc4/test", + "./zig-cache/o/9f4bbc87cffa9601f27ace7d130cecc4/test.o", + "./zig-cache/o/11dd9ab6a3f880670022601c3fa75435", + "./zig-cache/o/11dd9ab6a3f880670022601c3fa75435/test", + "./zig-cache/o/11dd9ab6a3f880670022601c3fa75435/test.o", + "./zig-cache/o/eed48b752aa9ae10872c2dac1760c732", + "./zig-cache/o/eed48b752aa9ae10872c2dac1760c732/test", + "./zig-cache/o/eed48b752aa9ae10872c2dac1760c732/test.o", + "./zig-cache/o/c76af37adf91fc90ae341bfb8a0023f6", + "./zig-cache/o/c76af37adf91fc90ae341bfb8a0023f6/test", + "./zig-cache/o/c76af37adf91fc90ae341bfb8a0023f6/test.o", + "./zig-cache/o/fb381531c1bb57d4f15fac14326e8a10", + "./zig-cache/o/fb381531c1bb57d4f15fac14326e8a10/test", + "./zig-cache/o/fb381531c1bb57d4f15fac14326e8a10/test.o", + "./zig-cache/o/09bd1dbc17b23b5cc2bb9e9d09f14ff9", + "./zig-cache/o/09bd1dbc17b23b5cc2bb9e9d09f14ff9/test", + "./zig-cache/o/09bd1dbc17b23b5cc2bb9e9d09f14ff9/test.o", + "./zig-cache/o/dce2fdc15249dc1654c1f9d86f6b8c68", + "./zig-cache/o/dce2fdc15249dc1654c1f9d86f6b8c68/test", + "./zig-cache/o/dce2fdc15249dc1654c1f9d86f6b8c68/test.o", + "./zig-cache/o/80ec8c8bf35c1de7e1b6372d4407fa62", + "./zig-cache/o/80ec8c8bf35c1de7e1b6372d4407fa62/test", + "./zig-cache/o/80ec8c8bf35c1de7e1b6372d4407fa62/test.o", + "./zig-cache/o/338cb8edeeae23515f2a11482cc63b18", + "./zig-cache/o/338cb8edeeae23515f2a11482cc63b18/test", + "./zig-cache/o/338cb8edeeae23515f2a11482cc63b18/test.o", + "./zig-cache/o/788fb3319671918d450e6e37750f2db5", + "./zig-cache/o/788fb3319671918d450e6e37750f2db5/libfuzzig.a", + "./zig-cache/o/788fb3319671918d450e6e37750f2db5/libfuzzig.a.o", + "./zig-cache/o/2bdac12df67ae73eb9a1c42ad6ef94be", + "./zig-cache/o/2bdac12df67ae73eb9a1c42ad6ef94be/test", + "./zig-cache/o/2bdac12df67ae73eb9a1c42ad6ef94be/test.o", + "./zig-cache/o/8949e688232b079daa5335a1c684624f", + "./zig-cache/o/8949e688232b079daa5335a1c684624f/test", + "./zig-cache/o/8949e688232b079daa5335a1c684624f/test.o", + "./zig-cache/o/1e60e767fdfc858ed7324c42b8514511", + "./zig-cache/o/1e60e767fdfc858ed7324c42b8514511/test", + "./zig-cache/o/1e60e767fdfc858ed7324c42b8514511/test.o", + "./zig-cache/o/21a8dd9cfe50dc808a798f026b96d32b", + "./zig-cache/o/21a8dd9cfe50dc808a798f026b96d32b/test", + "./zig-cache/o/21a8dd9cfe50dc808a798f026b96d32b/test.o", + "./zig-cache/o/8e77da6d70e0954df5f2c286fea79005", + "./zig-cache/o/8e77da6d70e0954df5f2c286fea79005/test", + "./zig-cache/o/8e77da6d70e0954df5f2c286fea79005/test.o", + "./zig-cache/o/95eab175e928b3de7d7f00365b33c360", + "./zig-cache/o/95eab175e928b3de7d7f00365b33c360/test", + "./zig-cache/o/95eab175e928b3de7d7f00365b33c360/test.o", + "./zig-cache/o/83e65b5a2b2e0ca1e2ed56ba8ef3663e", + "./zig-cache/o/83e65b5a2b2e0ca1e2ed56ba8ef3663e/build", + "./zig-cache/o/83e65b5a2b2e0ca1e2ed56ba8ef3663e/build.o", + "./zig-cache/o/9ebaf93e818115ed2d251e0e80c2aa5a", + "./zig-cache/o/9ebaf93e818115ed2d251e0e80c2aa5a/test", + "./zig-cache/o/9ebaf93e818115ed2d251e0e80c2aa5a/test.o", + "./zig-cache/o/905494531cc40de7dbb7c2a38ee2ad9c", + "./zig-cache/o/905494531cc40de7dbb7c2a38ee2ad9c/test", + "./zig-cache/o/905494531cc40de7dbb7c2a38ee2ad9c/test.o", + "./zig-cache/o/e4ae6c8c0b0f8cd8e6e6b08d0e323fe9", + "./zig-cache/o/e4ae6c8c0b0f8cd8e6e6b08d0e323fe9/test", + "./zig-cache/o/e4ae6c8c0b0f8cd8e6e6b08d0e323fe9/test.o", + "./zig-cache/o/2ccf1a389a32f644da75ccceb7a4a544", + "./zig-cache/o/2ccf1a389a32f644da75ccceb7a4a544/test", + "./zig-cache/o/2ccf1a389a32f644da75ccceb7a4a544/test.o", + "./zig-cache/o/0ef54979a48251b3f4d754221643e658", + "./zig-cache/o/0ef54979a48251b3f4d754221643e658/test", + "./zig-cache/o/0ef54979a48251b3f4d754221643e658/test.o", + "./zig-cache/o/7db0c480c2f1af60fc1d87b817d388d5", + "./zig-cache/o/7db0c480c2f1af60fc1d87b817d388d5/test", + "./zig-cache/o/7db0c480c2f1af60fc1d87b817d388d5/test.o", + "./zig-cache/o/da8cc98c99cc424313a18ea4e81ced00", + "./zig-cache/o/da8cc98c99cc424313a18ea4e81ced00/test", + "./zig-cache/o/da8cc98c99cc424313a18ea4e81ced00/test.o", + "./zig-cache/o/0607bdeb75ec85b045a63ae71c9fd75e", + "./zig-cache/o/0607bdeb75ec85b045a63ae71c9fd75e/test", + "./zig-cache/o/0607bdeb75ec85b045a63ae71c9fd75e/test.o", + "./zig-cache/o/5c369b795e8cb1d9242399f5fa3624a7", + "./zig-cache/o/5c369b795e8cb1d9242399f5fa3624a7/test", + "./zig-cache/o/5c369b795e8cb1d9242399f5fa3624a7/test.o", + "./zig-cache/o/3f514bda379952383f3bc85b98e2a03e", + "./zig-cache/o/3f514bda379952383f3bc85b98e2a03e/test", + "./zig-cache/o/3f514bda379952383f3bc85b98e2a03e/test.o", + "./zig-cache/o/084044d6ca51db4b19723443ad6a22b1", + "./zig-cache/o/084044d6ca51db4b19723443ad6a22b1/test", + "./zig-cache/o/084044d6ca51db4b19723443ad6a22b1/test.o", + "./zig-cache/o/8f30f4011b6a644043900b8a25d983bd", + "./zig-cache/o/8f30f4011b6a644043900b8a25d983bd/test", + "./zig-cache/o/8f30f4011b6a644043900b8a25d983bd/test.o", + "./zig-cache/o/be3d908b9823c454333cb8adb293c619", + "./zig-cache/o/be3d908b9823c454333cb8adb293c619/test", + "./zig-cache/o/be3d908b9823c454333cb8adb293c619/test.o", + "./zig-cache/o/9b16df3f194833afadb84b639d7fff3b", + "./zig-cache/o/9b16df3f194833afadb84b639d7fff3b/test", + "./zig-cache/o/9b16df3f194833afadb84b639d7fff3b/test.o", + "./zig-cache/o/5d33b995a6d92f17b57938ce67fab637", + "./zig-cache/o/5d33b995a6d92f17b57938ce67fab637/test", + "./zig-cache/o/5d33b995a6d92f17b57938ce67fab637/test.o", + "./zig-cache/o/f175b3a9baa91bad68882fc2a2d19aa8", + "./zig-cache/o/f175b3a9baa91bad68882fc2a2d19aa8/test", + "./zig-cache/o/f175b3a9baa91bad68882fc2a2d19aa8/test.o", + "./zig-cache/o/37bb5e6e00518c8abd1c483daac928b9", + "./zig-cache/o/37bb5e6e00518c8abd1c483daac928b9/test", + "./zig-cache/o/37bb5e6e00518c8abd1c483daac928b9/test.o", + "./zig-cache/o/45d180b5a2f8c5c1f07f307a0870ca2b", + "./zig-cache/o/45d180b5a2f8c5c1f07f307a0870ca2b/test", + "./zig-cache/o/45d180b5a2f8c5c1f07f307a0870ca2b/test.o", + "./zig-cache/o/772ef301d07cec020088bb16b7c1a28c", + "./zig-cache/o/772ef301d07cec020088bb16b7c1a28c/test", + "./zig-cache/o/772ef301d07cec020088bb16b7c1a28c/test.o", + "./zig-cache/o/4b656088bffb2e05be5355c0d6b1b75d", + "./zig-cache/o/4b656088bffb2e05be5355c0d6b1b75d/test", + "./zig-cache/o/4b656088bffb2e05be5355c0d6b1b75d/test.o", + "./zig-cache/o/f0a82962d8de6655f55d6d115a70df9d", + "./zig-cache/o/f0a82962d8de6655f55d6d115a70df9d/test", + "./zig-cache/o/f0a82962d8de6655f55d6d115a70df9d/test.o", + "./zig-cache/o/ad795b40049fd312d5e81c7b416a934f", + "./zig-cache/o/ad795b40049fd312d5e81c7b416a934f/test", + "./zig-cache/o/ad795b40049fd312d5e81c7b416a934f/test.o", + "./zig-cache/o/7e61d8a139b11f95527b49b6dbee3db1", + "./zig-cache/o/7e61d8a139b11f95527b49b6dbee3db1/test", + "./zig-cache/o/7e61d8a139b11f95527b49b6dbee3db1/test.o", + "./zig-cache/o/0f91d5b0c499bc9d8074cde559fb77b1", + "./zig-cache/o/0f91d5b0c499bc9d8074cde559fb77b1/test", + "./zig-cache/o/0f91d5b0c499bc9d8074cde559fb77b1/test.o", + "./zig-cache/o/803afd70d20b788587b012e4adbf4daa", + "./zig-cache/o/803afd70d20b788587b012e4adbf4daa/test", + "./zig-cache/o/803afd70d20b788587b012e4adbf4daa/test.o", + "./zig-cache/o/eca37f28364ecabd2dadd2a15e041279", + "./zig-cache/o/eca37f28364ecabd2dadd2a15e041279/build", + "./zig-cache/o/eca37f28364ecabd2dadd2a15e041279/build.o", + "./zig-cache/o/d89e257cf35d936c77d9ef72202e5cd4", + "./zig-cache/o/d89e257cf35d936c77d9ef72202e5cd4/test", + "./zig-cache/o/d89e257cf35d936c77d9ef72202e5cd4/test.o", + "./zig-cache/o/a26db9235a31b97dece359b7515cf623", + "./zig-cache/o/a26db9235a31b97dece359b7515cf623/test", + "./zig-cache/o/a26db9235a31b97dece359b7515cf623/test.o", + "./zig-cache/o/587ddd004f6aedcb6cb7284674e4a6b4", + "./zig-cache/o/587ddd004f6aedcb6cb7284674e4a6b4/libfuzzig.a", + "./zig-cache/o/587ddd004f6aedcb6cb7284674e4a6b4/libfuzzig.a.o", + "./zig-cache/o/374d616267524535d03b832c44da75de", + "./zig-cache/o/374d616267524535d03b832c44da75de/test", + "./zig-cache/o/374d616267524535d03b832c44da75de/test.o", + "./zig-cache/o/50cba5e31264f1b9f81e4dda51d05c4b", + "./zig-cache/o/50cba5e31264f1b9f81e4dda51d05c4b/test", + "./zig-cache/o/50cba5e31264f1b9f81e4dda51d05c4b/test.o", + "./zig-cache/o/fb252ccc2faacd57c982ffd3953e897e", + "./zig-cache/o/fb252ccc2faacd57c982ffd3953e897e/test", + "./zig-cache/o/fb252ccc2faacd57c982ffd3953e897e/test.o", + "./zig-cache/o/abb1b12b1562d25058ca087772f00a0b", + "./zig-cache/o/abb1b12b1562d25058ca087772f00a0b/test", + "./zig-cache/o/abb1b12b1562d25058ca087772f00a0b/test.o", + "./zig-cache/o/642007d324d4edf3d32ed52edc6a492b", + "./zig-cache/o/642007d324d4edf3d32ed52edc6a492b/test", + "./zig-cache/o/642007d324d4edf3d32ed52edc6a492b/test.o", + "./zig-cache/o/4d87dec328f4f54710ec9eda36719dcc", + "./zig-cache/o/4d87dec328f4f54710ec9eda36719dcc/test", + "./zig-cache/o/4d87dec328f4f54710ec9eda36719dcc/test.o", + "./zig-cache/o/000e7ea396af3733c476630085992f1d", + "./zig-cache/o/000e7ea396af3733c476630085992f1d/build", + "./zig-cache/o/000e7ea396af3733c476630085992f1d/build.o", + "./zig-cache/o/390093ac3f8f3a09b959d5d1af3d3d13", + "./zig-cache/o/390093ac3f8f3a09b959d5d1af3d3d13/test", + "./zig-cache/o/390093ac3f8f3a09b959d5d1af3d3d13/test.o", + "./zig-cache/o/9d663d737778b38eeefc7d4038eb3d40", + "./zig-cache/o/9d663d737778b38eeefc7d4038eb3d40/test", + "./zig-cache/o/9d663d737778b38eeefc7d4038eb3d40/test.o", + "./zig-cache/o/df4ab095e09f547309a72d6d811e99d5", + "./zig-cache/o/df4ab095e09f547309a72d6d811e99d5/test", + "./zig-cache/o/df4ab095e09f547309a72d6d811e99d5/test.o", + "./zig-cache/o/1e151a0af8cfa3cc1671a3199155bf40", + "./zig-cache/o/1e151a0af8cfa3cc1671a3199155bf40/test", + "./zig-cache/o/1e151a0af8cfa3cc1671a3199155bf40/test.o", + "./zig-cache/o/448b539727939c019d166b6176b57d47", + "./zig-cache/o/448b539727939c019d166b6176b57d47/test", + "./zig-cache/o/448b539727939c019d166b6176b57d47/test.o", + "./zig-cache/o/776fd6b3f996aa09a4102949ab90f030", + "./zig-cache/o/776fd6b3f996aa09a4102949ab90f030/libfuzzig.a", + "./zig-cache/o/776fd6b3f996aa09a4102949ab90f030/libfuzzig.a.o", + "./zig-cache/o/d1cb69cff7599fcbf513c8c88007ba56", + "./zig-cache/o/d1cb69cff7599fcbf513c8c88007ba56/test", + "./zig-cache/o/d1cb69cff7599fcbf513c8c88007ba56/test.o", + "./zig-cache/o/154049c04e13734fd405aa6c1b3290f9", + "./zig-cache/o/154049c04e13734fd405aa6c1b3290f9/test", + "./zig-cache/o/154049c04e13734fd405aa6c1b3290f9/test.o", + "./zig-cache/o/52d72ce8b9d6ae6fd57ebbb60b9438c7", + "./zig-cache/o/52d72ce8b9d6ae6fd57ebbb60b9438c7/test", + "./zig-cache/o/52d72ce8b9d6ae6fd57ebbb60b9438c7/test.o", + "./zig-cache/o/05a7825346961a255a052e52327ef0fb", + "./zig-cache/o/05a7825346961a255a052e52327ef0fb/test", + "./zig-cache/o/05a7825346961a255a052e52327ef0fb/test.o", + "./zig-cache/o/ae661224e9029c495241d2e07e3d4db7", + "./zig-cache/o/ae661224e9029c495241d2e07e3d4db7/test", + "./zig-cache/o/ae661224e9029c495241d2e07e3d4db7/test.o", + "./zig-cache/o/cdeacc4d8c4c83c0faab7412af515b71", + "./zig-cache/o/cdeacc4d8c4c83c0faab7412af515b71/test", + "./zig-cache/o/cdeacc4d8c4c83c0faab7412af515b71/test.o", + "./zig-cache/o/2e0e4d413b3c0f715ba2e59de4483e89", + "./zig-cache/o/2e0e4d413b3c0f715ba2e59de4483e89/test", + "./zig-cache/o/2e0e4d413b3c0f715ba2e59de4483e89/test.o", + "./zig-cache/o/40a978c87c539d9bd4ec67d9ec9aca88", + "./zig-cache/o/40a978c87c539d9bd4ec67d9ec9aca88/test", + "./zig-cache/o/40a978c87c539d9bd4ec67d9ec9aca88/test.o", + "./zig-cache/o/1e433e718adc8ff3779522850c192bf5", + "./zig-cache/o/1e433e718adc8ff3779522850c192bf5/test", + "./zig-cache/o/1e433e718adc8ff3779522850c192bf5/test.o", + "./zig-cache/o/d77ffc8fb51b2f851bc1689d4a9ede3a", + "./zig-cache/o/d77ffc8fb51b2f851bc1689d4a9ede3a/test", + "./zig-cache/o/d77ffc8fb51b2f851bc1689d4a9ede3a/test.o", + "./zig-cache/o/2a8d1e3559c5368ad172f7103d6cf9f8", + "./zig-cache/o/2a8d1e3559c5368ad172f7103d6cf9f8/test", + "./zig-cache/o/2a8d1e3559c5368ad172f7103d6cf9f8/test.o", + "./zig-cache/o/14b981eb5fdbcbd51e47cac7c46c61ef", + "./zig-cache/o/14b981eb5fdbcbd51e47cac7c46c61ef/test", + "./zig-cache/o/14b981eb5fdbcbd51e47cac7c46c61ef/test.o", + "./zig-cache/o/397bd19f3cc285292176d14b5b9f88dc", + "./zig-cache/o/397bd19f3cc285292176d14b5b9f88dc/test", + "./zig-cache/o/397bd19f3cc285292176d14b5b9f88dc/test.o", + "./zig-cache/o/2c01944c86a8f41945b54d2f1cff6739", + "./zig-cache/o/2c01944c86a8f41945b54d2f1cff6739/test", + "./zig-cache/o/2c01944c86a8f41945b54d2f1cff6739/test.o", + "./zig-cache/o/15a4b05e7840667a9b89f9c329f9abf5", + "./zig-cache/o/15a4b05e7840667a9b89f9c329f9abf5/test", + "./zig-cache/o/15a4b05e7840667a9b89f9c329f9abf5/test.o", + "./zig-cache/o/24fe9686394d682386e54748663ebbe6", + "./zig-cache/o/24fe9686394d682386e54748663ebbe6/test", + "./zig-cache/o/24fe9686394d682386e54748663ebbe6/test.o", + "./zig-cache/o/e24b00a34850ee3bd8998a2d670f104d", + "./zig-cache/o/e24b00a34850ee3bd8998a2d670f104d/test", + "./zig-cache/o/e24b00a34850ee3bd8998a2d670f104d/test.o", + "./zig-cache/o/4d49b76a3106d9f6434456fc143d3bcb", + "./zig-cache/o/4d49b76a3106d9f6434456fc143d3bcb/test", + "./zig-cache/o/4d49b76a3106d9f6434456fc143d3bcb/test.o", + "./zig-cache/o/9e0edc0b470b5edb39a2017ad2e1a9c4", + "./zig-cache/o/9e0edc0b470b5edb39a2017ad2e1a9c4/test", + "./zig-cache/o/9e0edc0b470b5edb39a2017ad2e1a9c4/test.o", + "./zig-cache/o/f53dd650529610705af31b47363172ab", + "./zig-cache/o/f53dd650529610705af31b47363172ab/fuzzig.o", + "./zig-cache/o/f53dd650529610705af31b47363172ab/fuzzig", + "./zig-cache/o/8e207de926f123f2a4e0b53d01d183f7", + "./zig-cache/o/8e207de926f123f2a4e0b53d01d183f7/test", + "./zig-cache/o/8e207de926f123f2a4e0b53d01d183f7/test.o", + "./zig-cache/o/c44c896ce82565bb989be2cbd31786d1", + "./zig-cache/o/c44c896ce82565bb989be2cbd31786d1/test", + "./zig-cache/o/c44c896ce82565bb989be2cbd31786d1/test.o", + "./zig-cache/o/9080a6a45168eb4387b621fe997f9afa", + "./zig-cache/o/9080a6a45168eb4387b621fe997f9afa/test", + "./zig-cache/o/9080a6a45168eb4387b621fe997f9afa/test.o", + "./zig-cache/o/b9ad22efee4a62f808041ba762246144", + "./zig-cache/o/b9ad22efee4a62f808041ba762246144/test", + "./zig-cache/o/b9ad22efee4a62f808041ba762246144/test.o", + "./zig-cache/o/c14fa8c360da85c572ea6b2df5ffe01b", + "./zig-cache/o/c14fa8c360da85c572ea6b2df5ffe01b/fuzzig.o", + "./zig-cache/o/c14fa8c360da85c572ea6b2df5ffe01b/fuzzig", + "./zig-cache/o/f1221315566bbf3fa5293fa76e03b409", + "./zig-cache/o/f1221315566bbf3fa5293fa76e03b409/test", + "./zig-cache/o/f1221315566bbf3fa5293fa76e03b409/test.o", + "./zig-cache/o/f6c9a69c1c6bb481ad238ad7bb3428ed", + "./zig-cache/o/f6c9a69c1c6bb481ad238ad7bb3428ed/test", + "./zig-cache/o/f6c9a69c1c6bb481ad238ad7bb3428ed/test.o", + "./zig-cache/o/35beaa3d87fd55be67411e2eb31399c7", + "./zig-cache/o/35beaa3d87fd55be67411e2eb31399c7/test", + "./zig-cache/o/35beaa3d87fd55be67411e2eb31399c7/test.o", + "./zig-cache/o/37f2eecfb5b5d03cfde4021350816324", + "./zig-cache/o/37f2eecfb5b5d03cfde4021350816324/test", + "./zig-cache/o/37f2eecfb5b5d03cfde4021350816324/test.o", + "./zig-cache/o/4d62154d4ef948d911238531c63b0311", + "./zig-cache/o/4d62154d4ef948d911238531c63b0311/libfuzzig.a", + "./zig-cache/o/4d62154d4ef948d911238531c63b0311/libfuzzig.a.o", + "./zig-cache/o/a2c5ff3cd295631bc9c5c35000961f30", + "./zig-cache/o/a2c5ff3cd295631bc9c5c35000961f30/test", + "./zig-cache/o/a2c5ff3cd295631bc9c5c35000961f30/test.o", + "./zig-cache/o/53528130a65b449977986765c8e6561a", + "./zig-cache/o/53528130a65b449977986765c8e6561a/test", + "./zig-cache/o/53528130a65b449977986765c8e6561a/test.o", + "./zig-cache/o/7e9dd673539464cb4b84861947ed7be3", + "./zig-cache/o/7e9dd673539464cb4b84861947ed7be3/fuzzig.o", + "./zig-cache/o/7e9dd673539464cb4b84861947ed7be3/fuzzig", + "./zig-cache/o/9be45abc1811aff12bde5f0582d0f60b", + "./zig-cache/o/9be45abc1811aff12bde5f0582d0f60b/test", + "./zig-cache/o/9be45abc1811aff12bde5f0582d0f60b/test.o", + "./zig-cache/o/e01f24923190fe996e754a45cd362b3b", + "./zig-cache/o/e01f24923190fe996e754a45cd362b3b/test", + "./zig-cache/o/e01f24923190fe996e754a45cd362b3b/test.o", + "./zig-cache/o/ae7ad3129ac93900e475c8c76b03788b", + "./zig-cache/o/ae7ad3129ac93900e475c8c76b03788b/test", + "./zig-cache/o/ae7ad3129ac93900e475c8c76b03788b/test.o", + "./zig-cache/o/e28ee7b6d2646d85aaa751985431e4ab", + "./zig-cache/o/e28ee7b6d2646d85aaa751985431e4ab/test", + "./zig-cache/o/e28ee7b6d2646d85aaa751985431e4ab/test.o", + "./zig-cache/o/23768f4002a948ce5e6df92777da7afb", + "./zig-cache/o/23768f4002a948ce5e6df92777da7afb/fuzzig.o", + "./zig-cache/o/23768f4002a948ce5e6df92777da7afb/fuzzig", + "./zig-cache/o/9ad206dc1ab907d90c00b4955abee492", + "./zig-cache/o/9ad206dc1ab907d90c00b4955abee492/test", + "./zig-cache/o/9ad206dc1ab907d90c00b4955abee492/test.o", + "./zig-cache/o/07a6711b6bc82226d11cae9e431069fe", + "./zig-cache/o/07a6711b6bc82226d11cae9e431069fe/test", + "./zig-cache/o/07a6711b6bc82226d11cae9e431069fe/test.o", + "./zig-cache/o/1f664c0cb589a3400b86a5587c63c9bb", + "./zig-cache/o/1f664c0cb589a3400b86a5587c63c9bb/test", + "./zig-cache/o/1f664c0cb589a3400b86a5587c63c9bb/test.o", + "./zig-cache/o/4f7f2ca3e1111919e1994bad7d7e7493", + "./zig-cache/o/4f7f2ca3e1111919e1994bad7d7e7493/fuzzig.o", + "./zig-cache/o/4f7f2ca3e1111919e1994bad7d7e7493/fuzzig", + "./zig-cache/o/1727a362890f9d526726c5df9f52b1bf", + "./zig-cache/o/1727a362890f9d526726c5df9f52b1bf/test", + "./zig-cache/o/1727a362890f9d526726c5df9f52b1bf/test.o", + "./zig-cache/o/4ee9e6e496c3f789b9d5484d50a76ed4", + "./zig-cache/o/4ee9e6e496c3f789b9d5484d50a76ed4/test", + "./zig-cache/o/4ee9e6e496c3f789b9d5484d50a76ed4/test.o", + "./zig-cache/o/39e2e2d5bb656043959cde482c4807fa", + "./zig-cache/o/39e2e2d5bb656043959cde482c4807fa/test", + "./zig-cache/o/39e2e2d5bb656043959cde482c4807fa/test.o", + "./zig-cache/o/acb28f6666b4ce030462a1047ea41fd6", + "./zig-cache/o/acb28f6666b4ce030462a1047ea41fd6/test", + "./zig-cache/o/acb28f6666b4ce030462a1047ea41fd6/test.o", + "./zig-cache/o/6557504277eec2c7b969ec0617d8145e", + "./zig-cache/o/6557504277eec2c7b969ec0617d8145e/test", + "./zig-cache/o/6557504277eec2c7b969ec0617d8145e/test.o", + "./zig-cache/o/316f6201e0ba9975314055c6b6a44521", + "./zig-cache/o/316f6201e0ba9975314055c6b6a44521/test", + "./zig-cache/o/316f6201e0ba9975314055c6b6a44521/test.o", + "./zig-cache/o/d4ec33a5b5617b6a8621b6cd9baf43c1", + "./zig-cache/o/d4ec33a5b5617b6a8621b6cd9baf43c1/test", + "./zig-cache/o/d4ec33a5b5617b6a8621b6cd9baf43c1/test.o", + "./zig-cache/o/951ea70f47d35489c85795f7503fd048", + "./zig-cache/o/951ea70f47d35489c85795f7503fd048/test", + "./zig-cache/o/951ea70f47d35489c85795f7503fd048/test.o", + "./zig-cache/o/ef72b516100c16e5df14693daddc26e2", + "./zig-cache/o/0c7454ef4e5f86069b2c321d4b19b2cb", + "./zig-cache/o/0c7454ef4e5f86069b2c321d4b19b2cb/test", + "./zig-cache/o/0c7454ef4e5f86069b2c321d4b19b2cb/test.o", + "./zig-cache/o/525828319740be3a472aa1372c4bca75", + "./zig-cache/o/525828319740be3a472aa1372c4bca75/test", + "./zig-cache/o/525828319740be3a472aa1372c4bca75/test.o", + "./zig-cache/o/76a2305a5eca96e9db3c502f37cbd92e", + "./zig-cache/o/76a2305a5eca96e9db3c502f37cbd92e/test", + "./zig-cache/o/76a2305a5eca96e9db3c502f37cbd92e/test.o", + "./zig-cache/o/26489238589fb3150f744cc568d9fe18", + "./zig-cache/o/26489238589fb3150f744cc568d9fe18/test", + "./zig-cache/o/26489238589fb3150f744cc568d9fe18/test.o", + "./zig-cache/o/4113f7501df8f01d9ee996c03d7c2a5c", + "./zig-cache/o/4113f7501df8f01d9ee996c03d7c2a5c/test", + "./zig-cache/o/4113f7501df8f01d9ee996c03d7c2a5c/test.o", + "./zig-cache/o/d7ec61c0a7ea4481c5b3035da73f8abf", + "./zig-cache/o/d7ec61c0a7ea4481c5b3035da73f8abf/test", + "./zig-cache/o/d7ec61c0a7ea4481c5b3035da73f8abf/test.o", + "./zig-cache/o/42f6a76b0d91844ce5bb206c87b8e545", + "./zig-cache/o/42f6a76b0d91844ce5bb206c87b8e545/test", + "./zig-cache/o/42f6a76b0d91844ce5bb206c87b8e545/test.o", + "./zig-cache/o/c812c736fa69c93f3494b6332ca83ce4", + "./zig-cache/o/c812c736fa69c93f3494b6332ca83ce4/test", + "./zig-cache/o/c812c736fa69c93f3494b6332ca83ce4/test.o", + "./zig-cache/o/70ed4ad8696d508e9d0aca53681cdfe7", + "./zig-cache/o/70ed4ad8696d508e9d0aca53681cdfe7/test", + "./zig-cache/o/70ed4ad8696d508e9d0aca53681cdfe7/test.o", + "./zig-cache/o/4e4715d578651fac8d00eb1a0259c3c2", + "./zig-cache/o/4e4715d578651fac8d00eb1a0259c3c2/test", + "./zig-cache/o/4e4715d578651fac8d00eb1a0259c3c2/test.o", + "./zig-cache/o/871d2615dd7f36200284911a9c502124", + "./zig-cache/o/871d2615dd7f36200284911a9c502124/test", + "./zig-cache/o/871d2615dd7f36200284911a9c502124/test.o", + "./zig-cache/o/d01f27fce960b29cefb27b705aea3824", + "./zig-cache/o/d01f27fce960b29cefb27b705aea3824/test", + "./zig-cache/o/d01f27fce960b29cefb27b705aea3824/test.o", + "./zig-cache/o/d8d9e6b5c119d30c0355052f2ae7d751", + "./zig-cache/o/d8d9e6b5c119d30c0355052f2ae7d751/test", + "./zig-cache/o/d8d9e6b5c119d30c0355052f2ae7d751/test.o", + "./zig-cache/o/81759d9e8e395d24800f021862ad69e4", + "./zig-cache/o/81759d9e8e395d24800f021862ad69e4/fuzzig.o", + "./zig-cache/o/81759d9e8e395d24800f021862ad69e4/fuzzig", + "./zig-cache/o/81291d30e5a76bddb2f9b6f7c1d734a0", + "./zig-cache/o/81291d30e5a76bddb2f9b6f7c1d734a0/test", + "./zig-cache/o/81291d30e5a76bddb2f9b6f7c1d734a0/test.o", + "./zig-cache/o/ff8720701fd61c2dc3c4fd4b42ec844b", + "./zig-cache/o/ff8720701fd61c2dc3c4fd4b42ec844b/test", + "./zig-cache/o/ff8720701fd61c2dc3c4fd4b42ec844b/test.o", + "./zig-cache/o/effe5f0ad2ab300514d501be3366578e", + "./zig-cache/o/effe5f0ad2ab300514d501be3366578e/test", + "./zig-cache/o/effe5f0ad2ab300514d501be3366578e/test.o", + "./zig-cache/o/aee2e89522efd9c1a647405bce9c2cf7", + "./zig-cache/o/aee2e89522efd9c1a647405bce9c2cf7/test", + "./zig-cache/o/aee2e89522efd9c1a647405bce9c2cf7/test.o", + "./zig-cache/o/0bf22c20d841e9b137e8d4c63bd67d87", + "./zig-cache/o/0bf22c20d841e9b137e8d4c63bd67d87/test", + "./zig-cache/o/0bf22c20d841e9b137e8d4c63bd67d87/test.o", + "./zig-cache/o/37727bfee6a0def8d667ef36504d7fe1", + "./zig-cache/o/37727bfee6a0def8d667ef36504d7fe1/test", + "./zig-cache/o/37727bfee6a0def8d667ef36504d7fe1/test.o", + "./zig-cache/o/8369a48b605e7d54942facb7d3e05531", + "./zig-cache/o/8369a48b605e7d54942facb7d3e05531/test", + "./zig-cache/o/8369a48b605e7d54942facb7d3e05531/test.o", + "./zig-cache/o/c3b83b96d9185b2236378605a4bb97f3", + "./zig-cache/o/c3b83b96d9185b2236378605a4bb97f3/libfuzzig.a", + "./zig-cache/o/c3b83b96d9185b2236378605a4bb97f3/libfuzzig.a.o", + "./zig-cache/o/7a7ec9c992ded7bc5ee0056a26416cd9", + "./zig-cache/o/7a7ec9c992ded7bc5ee0056a26416cd9/test", + "./zig-cache/o/7a7ec9c992ded7bc5ee0056a26416cd9/test.o", + "./zig-cache/o/b462b7ab55ce4e559d04c39336910ade", + "./zig-cache/o/b462b7ab55ce4e559d04c39336910ade/fuzzig.o", + "./zig-cache/o/b462b7ab55ce4e559d04c39336910ade/fuzzig", + "./zig-cache/o/842e88b659dfbc04e91f490547989756", + "./zig-cache/o/842e88b659dfbc04e91f490547989756/test", + "./zig-cache/o/842e88b659dfbc04e91f490547989756/test.o", + "./zig-cache/o/c19ee21f5097459f46b7a92d6ca5ce69", + "./zig-cache/o/c19ee21f5097459f46b7a92d6ca5ce69/build", + "./zig-cache/o/c19ee21f5097459f46b7a92d6ca5ce69/build.o", + "./zig-cache/o/2487a4dc15d71f29bc2d1cea53368bce", + "./zig-cache/o/2487a4dc15d71f29bc2d1cea53368bce/libfuzzig.a", + "./zig-cache/o/2487a4dc15d71f29bc2d1cea53368bce/libfuzzig.a.o", + "./zig-cache/o/d6c87955cd54665e4f393b4bbef14a89", + "./zig-cache/o/d6c87955cd54665e4f393b4bbef14a89/test", + "./zig-cache/o/d6c87955cd54665e4f393b4bbef14a89/test.o", + "./zig-cache/o/0bef5aed517170c69ad3dd3e16c1641f", + "./zig-cache/o/0bef5aed517170c69ad3dd3e16c1641f/test", + "./zig-cache/o/0bef5aed517170c69ad3dd3e16c1641f/test.o", + "./zig-cache/o/0aa2b804c577ea18285459aefd8057d9", + "./zig-cache/o/0aa2b804c577ea18285459aefd8057d9/test", + "./zig-cache/o/0aa2b804c577ea18285459aefd8057d9/test.o", + "./zig-cache/o/2c1a396ddbbce8cbfe427f79f5c722c6", + "./zig-cache/o/2c1a396ddbbce8cbfe427f79f5c722c6/test", + "./zig-cache/o/2c1a396ddbbce8cbfe427f79f5c722c6/test.o", + "./zig-cache/o/b29e01fca5c7698753df95c610f59b94", + "./zig-cache/o/b29e01fca5c7698753df95c610f59b94/build", + "./zig-cache/o/b29e01fca5c7698753df95c610f59b94/build.o", + "./zig-cache/o/cfdaf579b7e33dff9996bbc3b39400cd", + "./zig-cache/o/cfdaf579b7e33dff9996bbc3b39400cd/test", + "./zig-cache/o/cfdaf579b7e33dff9996bbc3b39400cd/test.o", + "./zig-cache/o/bc78b5aa5f532991c9e96ddb7328434a", + "./zig-cache/o/bc78b5aa5f532991c9e96ddb7328434a/libfuzzig.a", + "./zig-cache/o/bc78b5aa5f532991c9e96ddb7328434a/libfuzzig.a.o", + "./zig-cache/o/ed4dc24d8f93c54510adc808b82dadcf", + "./zig-cache/o/ed4dc24d8f93c54510adc808b82dadcf/test", + "./zig-cache/o/ed4dc24d8f93c54510adc808b82dadcf/test.o", + "./zig-cache/o/4fdae59d0759bc3807926615181213be", + "./zig-cache/o/4fdae59d0759bc3807926615181213be/test", + "./zig-cache/o/4fdae59d0759bc3807926615181213be/test.o", + "./zig-cache/o/72cd987d5ce056936e1d8da56ba9d87e", + "./zig-cache/o/72cd987d5ce056936e1d8da56ba9d87e/test", + "./zig-cache/o/72cd987d5ce056936e1d8da56ba9d87e/test.o", + "./zig-cache/o/494504b602357552b19f7a3db0863036", + "./zig-cache/o/494504b602357552b19f7a3db0863036/test", + "./zig-cache/o/494504b602357552b19f7a3db0863036/test.o", + "./zig-cache/o/fd6c44c75c42d2c43306113e0e7a508f", + "./zig-cache/o/fd6c44c75c42d2c43306113e0e7a508f/test", + "./zig-cache/o/fd6c44c75c42d2c43306113e0e7a508f/test.o", + "./zig-cache/o/deeefc9b670fda42d5b76e6e152d6a06", + "./zig-cache/o/deeefc9b670fda42d5b76e6e152d6a06/test", + "./zig-cache/o/deeefc9b670fda42d5b76e6e152d6a06/test.o", + "./zig-cache/o/c1a154aa26d59432eac89ab59ee7f975", + "./zig-cache/o/c1a154aa26d59432eac89ab59ee7f975/test", + "./zig-cache/o/c1a154aa26d59432eac89ab59ee7f975/test.o", + "./zig-cache/o/93247dba341dc8277a8bb030dc06b5da", + "./zig-cache/o/93247dba341dc8277a8bb030dc06b5da/test", + "./zig-cache/o/93247dba341dc8277a8bb030dc06b5da/test.o", + "./zig-cache/o/d71ea76c4013b6df0b912bdae78fdda7", + "./zig-cache/o/d71ea76c4013b6df0b912bdae78fdda7/fuzzig.o", + "./zig-cache/o/d71ea76c4013b6df0b912bdae78fdda7/fuzzig", + "./zig-cache/o/af327a617cd35a29503af80a5ae34f7a", + "./zig-cache/o/af327a617cd35a29503af80a5ae34f7a/test", + "./zig-cache/o/af327a617cd35a29503af80a5ae34f7a/test.o", + "./zig-cache/o/fa9d512f29398074e05c71713f8adf5a", + "./zig-cache/o/fa9d512f29398074e05c71713f8adf5a/test", + "./zig-cache/o/fa9d512f29398074e05c71713f8adf5a/test.o", + "./zig-cache/o/68c7abf20d23320c87ab091529640015", +}; From e88ba6735c628faf0b41785b8bef30c851fda72b Mon Sep 17 00:00:00 2001 From: fjebaker <fergusbkr@gmail.com> Date: Sat, 10 Aug 2024 11:05:50 +0100 Subject: [PATCH 04/25] chore: cleanup build.zig.zon --- build.zig.zon | 59 +++++---------------------------------------------- 1 file changed, 5 insertions(+), 54 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index b4f86f4..24f61a4 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -1,66 +1,17 @@ .{ .name = "fuzzig", - // This is a [Semantic Version](https://semver.org/). - // In a future version of Zig it will be used for package deduplication. .version = "0.0.0", - - // This field is optional. - // This is currently advisory only; Zig does not yet do anything - // with this value. - //.minimum_zig_version = "0.11.0", - - // This field is optional. - // Each dependency must either provide a `url` and `hash`, or a `path`. - // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. - // Once all dependencies are fetched, `zig build` no longer requires - // internet connectivity. .dependencies = .{ - // See `zig fetch --save <url>` for a command-line interface for adding dependencies. - //.example = .{ - // // When updating this field to a new URL, be sure to delete the corresponding - // // `hash`, otherwise you are communicating that you expect to find the old hash at - // // the new URL. - // .url = "https://example.com/foo.tar.gz", - // - // // This is computed from the file contents of the directory of files that is - // // obtained after fetching `url` and applying the inclusion rules given by - // // `paths`. - // // - // // This field is the source of truth; packages do not come from a `url`; they - // // come from a `hash`. `url` is just one of many possible mirrors for how to - // // obtain a package matching this `hash`. - // // - // // Uses the [multihash](https://multiformats.io/multihash/) format. - // .hash = "...", - // - // // When this is provided, the package is found in a directory relative to the - // // build root. In this case the package's hash is irrelevant and therefore not - // // computed. This field and `url` are mutually exclusive. - // .path = "foo", - //}, .zg = .{ .url = "https://codeberg.org/dude_the_builder/zg/archive/v0.13.2.tar.gz", .hash = "122055beff332830a391e9895c044d33b15ea21063779557024b46169fb1984c6e40", }, }, - - // Specifies the set of files and directories that are included in this package. - // Only files and directories listed here are included in the `hash` that - // is computed for this package. - // Paths are relative to the build root. Use the empty string (`""`) to refer to - // the build root itself. - // A directory listed here means that all files within, recursively, are included. .paths = .{ - // This makes *all* files, recursively, included in this package. It is generally - // better to explicitly list the files and directories instead, to insure that - // fetching from tarballs, file system paths, and version control all result - // in the same contents hash. - "", - // For example... - //"build.zig", - //"build.zig.zon", - //"src", - //"LICENSE", - //"README.md", + "build.zig", + "build.zig.zon", + "src", + "LICENSE", + "README.md", }, } From d9de50d1e8af1538e20824dc71f63d8ea594d09b Mon Sep 17 00:00:00 2001 From: fjebaker <fergusbkr@gmail.com> Date: Sat, 10 Aug 2024 13:11:35 +0100 Subject: [PATCH 05/25] feat: overhaul implementation Pass the context and callback functions to the algorithm instead of defining its type by them. This lets the algorithm be owned by the implementation, which also provides the public API, allowing transformations, such as the unicode u8 -> u21, to happen in the guard. --- src/root.zig | 335 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 215 insertions(+), 120 deletions(-) diff --git a/src/root.zig b/src/root.zig index 2115954..9f0be87 100644 --- a/src/root.zig +++ b/src/root.zig @@ -81,12 +81,21 @@ pub fn Algorithm( comptime ElType: type, comptime ScoreT: type, comptime scores: Scores(ScoreT), - comptime Impl: type, ) type { return struct { const Matrix = MatrixT(ScoreT); const Self = @This(); + fn FunctionTable( + comptime Ctx: type, + ) type { + return struct { + isEqual: fn (Ctx, ElType, ElType) bool, + bonus: fn (Ctx, Scores(ScoreT), ElType, ElType) ScoreT, + score: fn (Ctx, Scores(ScoreT), ElType, ElType) ?ScoreT, + }; + } + // Scoring matrix m: Matrix, // Skip score matrix @@ -105,14 +114,6 @@ pub fn Algorithm( allocator: std.mem.Allocator, - impl: Impl, - - const TypeOfCaracter = switch (Impl) { - AsciiOptions => u8, - UnicodeOptions => u21, - else => unreachable, - }; - pub fn deinit(self: *Self) void { self.m.deinit(); self.x.deinit(); @@ -128,11 +129,7 @@ pub fn Algorithm( allocator: std.mem.Allocator, max_haystack: usize, max_needle: usize, - impl: Impl, ) !Self { - var impl_with_allocator = impl; - impl_with_allocator.allocator = allocator; - const rows = max_needle + 1; const cols = max_haystack + 1; @@ -165,17 +162,18 @@ pub fn Algorithm( .first_match_buffer = first_match_buffer, .traceback_buffer = traceback_buffer, .allocator = allocator, - .impl = impl_with_allocator, }; } /// Compute matching score pub fn score( self: *Self, + ctx: anytype, + funcTable: FunctionTable(@TypeOf(ctx)), haystack: []const ElType, needle: []const ElType, ) ?ScoreT { - const info = self.scoreImpl(haystack, needle) orelse + const info = self.scoreImpl(ctx, funcTable, haystack, needle) orelse return null; return info.score; } @@ -188,6 +186,8 @@ pub fn Algorithm( fn scoreImpl( self: *Self, + ctx: anytype, + comptime funcTable: FunctionTable(@TypeOf(ctx)), haystack: []const ElType, needle: []const ElType, ) ?ScoreInfo { @@ -195,14 +195,8 @@ pub fn Algorithm( .score = 0, }; - const haystack_normal = self.impl.convertString(haystack); - defer self.allocator.free(haystack_normal); - - const needle_normal = self.impl.convertString(needle); - defer self.allocator.free(needle_normal); - - const rows = needle_normal.len; - const cols = haystack_normal.len; + const rows = needle.len; + const cols = haystack.len; // resize the view into memory self.m.resizeNoAlloc(rows + 1, cols + 1); @@ -210,25 +204,31 @@ pub fn Algorithm( self.m_skip.resizeNoAlloc(rows + 1, cols + 1); const first_match_indices = utils.firstMatchesGeneric( - TypeOfCaracter, - &self.impl, - Impl.eqlFunc, + ElType, + ctx, + funcTable.isEqual, self.first_match_buffer, - haystack_normal, - needle_normal, + haystack, + needle, ) orelse return null; self.reset(rows + 1, cols + 1, first_match_indices); - self.determineBonuses(TypeOfCaracter, haystack_normal); + self.determineBonuses(ctx, funcTable, haystack); - try self.populateMatrices(haystack_normal, needle_normal, first_match_indices); + try self.populateMatrices( + ctx, + funcTable, + haystack, + needle, + first_match_indices, + ); const col_max = self.findMaximalElement( first_match_indices, rows, cols, ); - const last_row_index = needle_normal.len; + const last_row_index = needle.len; const s = self.m.get(last_row_index, col_max); return .{ .score = s, @@ -245,10 +245,12 @@ pub fn Algorithm( /// Compute the score and the indices of the matched characters pub fn scoreMatches( self: *Self, - haystack: []const u8, - needle: []const u8, + ctx: anytype, + funcTable: FunctionTable(@TypeOf(ctx)), + haystack: []const ElType, + needle: []const ElType, ) Matches { - const s = self.scoreImpl(haystack, needle) orelse + const s = self.scoreImpl(ctx, funcTable, haystack, needle) orelse return .{ .score = null }; const matches = self.traceback( @@ -290,10 +292,15 @@ pub fn Algorithm( return buf; } - fn determineBonuses(self: *Self, T: type, haystack: []const T) void { - var prev: T = 0; + fn determineBonuses( + self: *Self, + ctx: anytype, + comptime funcTable: FunctionTable(@TypeOf(ctx)), + haystack: []const ElType, + ) void { + var prev: ElType = 0; for (1.., haystack) |i, h| { - self.role_bonus[i] = Impl.bonusFunc(&self.impl, scores, prev, h); + self.role_bonus[i] = funcTable.bonus(ctx, scores, prev, h); prev = h; } @@ -347,8 +354,10 @@ pub fn Algorithm( fn populateMatrices( self: *Self, - haystack: []const TypeOfCaracter, - needle: []const TypeOfCaracter, + ctx: anytype, + funcTable: FunctionTable(@TypeOf(ctx)), + haystack: []const ElType, + needle: []const ElType, first_match_indices: []const usize, ) !void { for (1.., needle) |i, n| { @@ -362,7 +371,7 @@ pub fn Algorithm( // start by updating the M matrix // compute score - if (Impl.scoreFunc(&self.impl, scores, h, n)) |current| { + if (funcTable.score(ctx, scores, h, n)) |current| { const prev_bonus = self.bonus_buffer[j - 1]; // role bonus for current character @@ -474,33 +483,22 @@ pub fn Algorithm( }; } -pub const AsciiOptions = struct { - const AsciiScores = Scores(i32); - - pub const TypeOfCharacter = u8; - - case_sensitive: bool = true, - case_penalize: bool = false, - // treat spaces as wildcards for any kind of boundary - // i.e. match with any `[^a-z,A-Z,0-9]` - wildcard_spaces: bool = false, - - penalty_case_mistmatch: i32 = -2, - - /// Don't forget the allocator !!! - allocator: Allocator = undefined, - - fn convertString(a: *const AsciiOptions, string: []const u8) []const TypeOfCharacter { - return a.allocator.dupe(TypeOfCharacter, string) catch @panic("Memory error"); - } +const AsciiImplementation = struct { + const AlgorithmT = Algorithm(u8, i32, .{}); + pub const ScoresT = Scores(i32); + const FunctionTable: AlgorithmT.FunctionTable(*AsciiImplementation) = .{ + .score = scoreFunc, + .bonus = bonusFunc, + .isEqual = eqlFunc, + }; - fn eqlFunc(a: *const AsciiOptions, h: u8, n: u8) bool { - if (n == ' ' and a.wildcard_spaces) { + fn eqlFunc(self: *AsciiImplementation, h: u8, n: u8) bool { + if (n == ' ' and self.opts.wildcard_spaces) { return switch (h) { 'a'...'z', 'A'...'Z', '0'...'9' => false, else => true, }; - } else if (!a.case_sensitive) { + } else if (!self.opts.case_sensitive) { return std.ascii.toLower(h) == std.ascii.toLower(n); } else { return h == n; @@ -508,22 +506,22 @@ pub const AsciiOptions = struct { } fn scoreFunc( - a: *const AsciiOptions, - comptime scores: AsciiScores, + a: *AsciiImplementation, + scores: ScoresT, h: u8, n: u8, ) ?i32 { if (!a.eqlFunc(h, n)) return null; - if (a.case_penalize and (h != n)) { - return scores.score_match + a.penalty_case_mistmatch; + if (a.opts.case_penalize and (h != n)) { + return scores.score_match + a.opts.penalty_case_mistmatch; } return scores.score_match; } fn bonusFunc( - _: *const AsciiOptions, - comptime scores: AsciiScores, + _: *AsciiImplementation, + scores: ScoresT, h: u8, n: u8, ) i32 { @@ -537,56 +535,72 @@ pub const AsciiOptions = struct { .Tail => scores.bonus_tail, }; } -}; -pub const UnicodeOptions = struct { - const UnicodeScores = Scores(i32); + pub const Options = struct { + case_sensitive: bool = true, + case_penalize: bool = false, + // treat spaces as wildcards for any kind of boundary + // i.e. match with any `[^a-z,A-Z,0-9]` + wildcard_spaces: bool = false, - pub const TypeOfCharacter: type = u21; - - case_sensitive: bool = true, - case_penalize: bool = false, - // treat spaces as wildcards for any kind of boundary - // i.e. match with any `[^a-z,A-Z,0-9]` - wildcard_spaces: bool = false, - - penalty_case_mistmatch: i32 = -2, - - /// Don't forget the allocator !!! - allocator: Allocator = undefined, - - fn convertString(a: *const UnicodeOptions, string: []const u8) []const TypeOfCharacter { - var norm_data: Normalize.NormData = undefined; - Normalize.NormData.init(&norm_data, a.allocator) catch @panic("Cannot normalize string"); - defer norm_data.deinit(); + penalty_case_mistmatch: i32 = -2, + }; - const n = Normalize{ .norm_data = &norm_data }; + alg: AlgorithmT, + opts: Options, - const nfc_result = n.nfc(a.allocator, string) catch @panic("Cannot normalize string"); - defer nfc_result.deinit(); + // public interface - var iter = code_point.Iterator{ .bytes = nfc_result.slice }; + pub fn init( + allocator: std.mem.Allocator, + max_haystack: usize, + max_needle: usize, + opts: Options, + ) !AsciiImplementation { + const alg = try AlgorithmT.init(allocator, max_haystack, max_needle); + return .{ .alg = alg, .opts = opts }; + } - var converted_string = std.ArrayList(TypeOfCharacter).init(a.allocator); - defer converted_string.deinit(); + pub fn deinit(self: *AsciiImplementation) void { + self.alg.deinit(); + } - while (iter.next()) |c| { - converted_string.append(c.code) catch @panic("Memory error"); - } - return converted_string.toOwnedSlice() catch @panic("Memory error"); + pub fn score( + self: *AsciiImplementation, + haystack: []const u8, + needle: []const u8, + ) ?i32 { + return self.alg.score(self, FunctionTable, haystack, needle); + } + pub fn scoreMatches( + self: *AsciiImplementation, + haystack: []const u8, + needle: []const u8, + ) AlgorithmT.Matches { + return self.alg.scoreMatches(self, FunctionTable, haystack, needle); } +}; - fn eqlFunc(a: *const UnicodeOptions, h: u21, n: u21) bool { - const gcd = GenCatData.init(a.allocator) catch @panic("Memory error"); +pub const UnicodeImplementation = struct { + const AlgorithmT = Algorithm(u21, i32, .{}); + const UnicodeScores = Scores(i32); + const FunctionTable: AlgorithmT.FunctionTable(*UnicodeImplementation) = .{ + .score = scoreFunc, + .bonus = bonusFunc, + .isEqual = eqlFunc, + }; + + fn eqlFunc(a: *UnicodeImplementation, h: u21, n: u21) bool { + const gcd = GenCatData.init(a.alg.allocator) catch @panic("Memory error"); defer gcd.deinit(); - if (gcd.isSeparator(n) and a.wildcard_spaces) { + if (gcd.isSeparator(n) and a.opts.wildcard_spaces) { if (gcd.isLetter(h) or gcd.isNumber(h) or gcd.isSymbol(h)) { return true; } else { return false; } - } else if (!a.case_sensitive) { - const cd = CaseData.init(a.allocator) catch @panic("Memory error"); + } else if (!a.opts.case_sensitive) { + const cd = CaseData.init(a.alg.allocator) catch @panic("Memory error"); defer cd.deinit(); return cd.toLower(h) == cd.toLower(n); } else { @@ -595,27 +609,27 @@ pub const UnicodeOptions = struct { } fn scoreFunc( - a: *const UnicodeOptions, - comptime scores: UnicodeScores, + a: *UnicodeImplementation, + scores: UnicodeScores, h: u21, n: u21, ) ?i32 { if (!a.eqlFunc(h, n)) return null; - if (a.case_penalize and (h != n)) { - return scores.score_match + a.penalty_case_mistmatch; + if (a.opts.case_penalize and (h != n)) { + return scores.score_match + a.opts.penalty_case_mistmatch; } return scores.score_match; } fn bonusFunc( - self: *const UnicodeOptions, - comptime scores: UnicodeScores, + self: *UnicodeImplementation, + scores: UnicodeScores, h: u21, n: u21, ) i32 { - const p = CharacterType.fromUnicode(h, self.allocator); - const c = CharacterType.fromUnicode(n, self.allocator); + const p = CharacterType.fromUnicode(h, self.alg.allocator); + const c = CharacterType.fromUnicode(n, self.alg.allocator); return switch (p.roleNextTo(c)) { .Head => scores.bonus_head, @@ -624,12 +638,93 @@ pub const UnicodeOptions = struct { .Tail => scores.bonus_tail, }; } + + fn convertString(a: *const UnicodeImplementation, string: []const u8) []const u21 { + var norm_data: Normalize.NormData = undefined; + Normalize.NormData.init(&norm_data, a.alg.allocator) catch @panic("Cannot normalize string"); + defer norm_data.deinit(); + + const n = Normalize{ .norm_data = &norm_data }; + + const nfc_result = n.nfc(a.alg.allocator, string) catch @panic("Cannot normalize string"); + defer nfc_result.deinit(); + + var iter = code_point.Iterator{ .bytes = nfc_result.slice }; + + var converted_string = std.ArrayList(u21).init(a.alg.allocator); + defer converted_string.deinit(); + + while (iter.next()) |c| { + converted_string.append(c.code) catch @panic("Memory error"); + } + return converted_string.toOwnedSlice() catch @panic("Memory error"); + } + + pub const Options = struct { + case_sensitive: bool = true, + case_penalize: bool = false, + // treat spaces as wildcards for any kind of boundary + // i.e. match with any `[^a-z,A-Z,0-9]` + wildcard_spaces: bool = false, + + penalty_case_mistmatch: i32 = -2, + }; + + alg: AlgorithmT, + opts: Options, + + pub fn init( + allocator: std.mem.Allocator, + max_haystack: usize, + max_needle: usize, + opts: Options, + ) !UnicodeImplementation { + const alg = try AlgorithmT.init(allocator, max_haystack, max_needle); + return .{ .alg = alg, .opts = opts }; + } + + pub fn deinit(self: *UnicodeImplementation) void { + self.alg.deinit(); + } + + pub fn score( + self: *UnicodeImplementation, + haystack: []const u8, + needle: []const u8, + ) !?i32 { + const haystack_normal = self.convertString(haystack); + defer self.alg.allocator.free(haystack_normal); + const needle_normal = self.convertString(needle); + defer self.alg.allocator.free(needle_normal); + return self.alg.score( + self, + FunctionTable, + haystack_normal, + needle_normal, + ); + } + + pub fn scoreMatches( + self: *UnicodeImplementation, + haystack: []const u8, + needle: []const u8, + ) !AlgorithmT.Matches { + const haystack_normal = self.convertString(haystack); + defer self.allocator.free(haystack_normal); + const needle_normal = self.convertString(needle); + defer self.allocator.free(needle_normal); + return self.alg.scoreMatches( + self, + FunctionTable, + haystack_normal, + needle_normal, + ); + } }; /// Default ASCII Fuzzy Finder -pub const Ascii = Algorithm(u8, i32, .{}, AsciiOptions); - -pub const Unicode = Algorithm(u8, i32, .{}, UnicodeOptions); +pub const Ascii = AsciiImplementation; +pub const Unicode = UnicodeImplementation; fn doTestScore(alg: *Ascii, haystack: []const u8, needle: []const u8, comptime score: i32) !void { const s = alg.score(haystack, needle); @@ -642,7 +737,7 @@ fn doTestScore(alg: *Ascii, haystack: []const u8, needle: []const u8, comptime s } fn doTestScoreUnicode(alg: *Unicode, haystack: []const u8, needle: []const u8, comptime score: ?i32) !void { - const s = alg.score(haystack, needle); + const s = try alg.score(haystack, needle); if (score == null) { // const stderr = std.io.getStdErr().writer(); @@ -654,7 +749,7 @@ fn doTestScoreUnicode(alg: *Unicode, haystack: []const u8, needle: []const u8, c } test "algorithm test" { - const o = AsciiOptions.AsciiScores{}; + const o = AsciiImplementation.ScoresT{}; var alg = try Ascii.init( std.testing.allocator, @@ -740,7 +835,7 @@ test "algorithm test" { } test "case sensitivity" { - const o = AsciiOptions.AsciiScores{}; + const o = AsciiImplementation.ScoresT{}; var alg1 = try Ascii.init( std.testing.allocator, @@ -777,7 +872,7 @@ test "case sensitivity" { ); defer alg2.deinit(); - const A: AsciiOptions = .{}; + const A: AsciiImplementation.Options = .{}; try doTestScore( &alg2, "xaB", @@ -788,7 +883,7 @@ test "case sensitivity" { } test "wildcard space" { - const o = AsciiOptions.AsciiScores{}; + const o = AsciiImplementation.ScoresT{}; var alg = try Ascii.init( std.testing.allocator, 128, @@ -848,7 +943,7 @@ test "traceback" { } test "Unicode search" { - const o = UnicodeOptions.UnicodeScores{}; + const o = UnicodeImplementation.UnicodeScores{}; var alg = try Unicode.init( std.testing.allocator, From 692ee8876a17b0a6260dd6f0aadbb0fb240f731d Mon Sep 17 00:00:00 2001 From: fjebaker <fergusbkr@gmail.com> Date: Sat, 10 Aug 2024 13:16:07 +0100 Subject: [PATCH 06/25] chore: rename fields to be consistent + function tables comptime --- src/root.zig | 58 +++++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/src/root.zig b/src/root.zig index 9f0be87..10247b1 100644 --- a/src/root.zig +++ b/src/root.zig @@ -58,7 +58,7 @@ const MatrixT = structures.MatrixT; // // The algorithm is now O(mn). -pub fn Scores(comptime ScoreT: type) type { +pub fn ScoresType(comptime ScoreT: type) type { return struct { score_match: ScoreT = 16, score_gap_start: ScoreT = -3, @@ -77,10 +77,10 @@ pub fn Scores(comptime ScoreT: type) type { }; } -pub fn Algorithm( +pub fn AlgorithmType( comptime ElType: type, comptime ScoreT: type, - comptime scores: Scores(ScoreT), + comptime scores: ScoresType(ScoreT), ) type { return struct { const Matrix = MatrixT(ScoreT); @@ -91,8 +91,8 @@ pub fn Algorithm( ) type { return struct { isEqual: fn (Ctx, ElType, ElType) bool, - bonus: fn (Ctx, Scores(ScoreT), ElType, ElType) ScoreT, - score: fn (Ctx, Scores(ScoreT), ElType, ElType) ?ScoreT, + bonus: fn (Ctx, ScoresType(ScoreT), ElType, ElType) ScoreT, + score: fn (Ctx, ScoresType(ScoreT), ElType, ElType) ?ScoreT, }; } @@ -169,7 +169,7 @@ pub fn Algorithm( pub fn score( self: *Self, ctx: anytype, - funcTable: FunctionTable(@TypeOf(ctx)), + comptime funcTable: FunctionTable(@TypeOf(ctx)), haystack: []const ElType, needle: []const ElType, ) ?ScoreT { @@ -246,7 +246,7 @@ pub fn Algorithm( pub fn scoreMatches( self: *Self, ctx: anytype, - funcTable: FunctionTable(@TypeOf(ctx)), + comptime funcTable: FunctionTable(@TypeOf(ctx)), haystack: []const ElType, needle: []const ElType, ) Matches { @@ -355,7 +355,7 @@ pub fn Algorithm( fn populateMatrices( self: *Self, ctx: anytype, - funcTable: FunctionTable(@TypeOf(ctx)), + comptime funcTable: FunctionTable(@TypeOf(ctx)), haystack: []const ElType, needle: []const ElType, first_match_indices: []const usize, @@ -484,9 +484,10 @@ pub fn Algorithm( } const AsciiImplementation = struct { - const AlgorithmT = Algorithm(u8, i32, .{}); - pub const ScoresT = Scores(i32); - const FunctionTable: AlgorithmT.FunctionTable(*AsciiImplementation) = .{ + pub const Algorithm = AlgorithmType(u8, i32, .{}); + pub const Scores = ScoresType(i32); + + const FunctionTable: Algorithm.FunctionTable(*AsciiImplementation) = .{ .score = scoreFunc, .bonus = bonusFunc, .isEqual = eqlFunc, @@ -507,7 +508,7 @@ const AsciiImplementation = struct { fn scoreFunc( a: *AsciiImplementation, - scores: ScoresT, + scores: Scores, h: u8, n: u8, ) ?i32 { @@ -521,7 +522,7 @@ const AsciiImplementation = struct { fn bonusFunc( _: *AsciiImplementation, - scores: ScoresT, + scores: Scores, h: u8, n: u8, ) i32 { @@ -546,7 +547,7 @@ const AsciiImplementation = struct { penalty_case_mistmatch: i32 = -2, }; - alg: AlgorithmT, + alg: Algorithm, opts: Options, // public interface @@ -557,7 +558,7 @@ const AsciiImplementation = struct { max_needle: usize, opts: Options, ) !AsciiImplementation { - const alg = try AlgorithmT.init(allocator, max_haystack, max_needle); + const alg = try Algorithm.init(allocator, max_haystack, max_needle); return .{ .alg = alg, .opts = opts }; } @@ -576,15 +577,16 @@ const AsciiImplementation = struct { self: *AsciiImplementation, haystack: []const u8, needle: []const u8, - ) AlgorithmT.Matches { + ) Algorithm.Matches { return self.alg.scoreMatches(self, FunctionTable, haystack, needle); } }; pub const UnicodeImplementation = struct { - const AlgorithmT = Algorithm(u21, i32, .{}); - const UnicodeScores = Scores(i32); - const FunctionTable: AlgorithmT.FunctionTable(*UnicodeImplementation) = .{ + pub const Algorithm = AlgorithmType(u21, i32, .{}); + pub const Scores = ScoresType(i32); + + const FunctionTable: Algorithm.FunctionTable(*UnicodeImplementation) = .{ .score = scoreFunc, .bonus = bonusFunc, .isEqual = eqlFunc, @@ -610,7 +612,7 @@ pub const UnicodeImplementation = struct { fn scoreFunc( a: *UnicodeImplementation, - scores: UnicodeScores, + scores: Scores, h: u21, n: u21, ) ?i32 { @@ -624,7 +626,7 @@ pub const UnicodeImplementation = struct { fn bonusFunc( self: *UnicodeImplementation, - scores: UnicodeScores, + scores: Scores, h: u21, n: u21, ) i32 { @@ -670,7 +672,7 @@ pub const UnicodeImplementation = struct { penalty_case_mistmatch: i32 = -2, }; - alg: AlgorithmT, + alg: Algorithm, opts: Options, pub fn init( @@ -679,7 +681,7 @@ pub const UnicodeImplementation = struct { max_needle: usize, opts: Options, ) !UnicodeImplementation { - const alg = try AlgorithmT.init(allocator, max_haystack, max_needle); + const alg = try Algorithm.init(allocator, max_haystack, max_needle); return .{ .alg = alg, .opts = opts }; } @@ -708,7 +710,7 @@ pub const UnicodeImplementation = struct { self: *UnicodeImplementation, haystack: []const u8, needle: []const u8, - ) !AlgorithmT.Matches { + ) !Algorithm.Matches { const haystack_normal = self.convertString(haystack); defer self.allocator.free(haystack_normal); const needle_normal = self.convertString(needle); @@ -749,7 +751,7 @@ fn doTestScoreUnicode(alg: *Unicode, haystack: []const u8, needle: []const u8, c } test "algorithm test" { - const o = AsciiImplementation.ScoresT{}; + const o = AsciiImplementation.Scores{}; var alg = try Ascii.init( std.testing.allocator, @@ -835,7 +837,7 @@ test "algorithm test" { } test "case sensitivity" { - const o = AsciiImplementation.ScoresT{}; + const o = AsciiImplementation.Scores{}; var alg1 = try Ascii.init( std.testing.allocator, @@ -883,7 +885,7 @@ test "case sensitivity" { } test "wildcard space" { - const o = AsciiImplementation.ScoresT{}; + const o = AsciiImplementation.Scores{}; var alg = try Ascii.init( std.testing.allocator, 128, @@ -943,7 +945,7 @@ test "traceback" { } test "Unicode search" { - const o = UnicodeImplementation.UnicodeScores{}; + const o = UnicodeImplementation.Scores{}; var alg = try Unicode.init( std.testing.allocator, From 5d77462dccd242acfaf1bb77ff30340b95bc94f4 Mon Sep 17 00:00:00 2001 From: fjebaker <fergusbkr@gmail.com> Date: Sat, 10 Aug 2024 16:16:55 +0100 Subject: [PATCH 07/25] chore: rename implementations --- src/root.zig | 66 ++++++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/src/root.zig b/src/root.zig index 10247b1..0dabb1e 100644 --- a/src/root.zig +++ b/src/root.zig @@ -483,17 +483,17 @@ pub fn AlgorithmType( }; } -const AsciiImplementation = struct { +pub const Ascii = struct { pub const Algorithm = AlgorithmType(u8, i32, .{}); pub const Scores = ScoresType(i32); - const FunctionTable: Algorithm.FunctionTable(*AsciiImplementation) = .{ + const FunctionTable: Algorithm.FunctionTable(*Ascii) = .{ .score = scoreFunc, .bonus = bonusFunc, .isEqual = eqlFunc, }; - fn eqlFunc(self: *AsciiImplementation, h: u8, n: u8) bool { + fn eqlFunc(self: *Ascii, h: u8, n: u8) bool { if (n == ' ' and self.opts.wildcard_spaces) { return switch (h) { 'a'...'z', 'A'...'Z', '0'...'9' => false, @@ -507,7 +507,7 @@ const AsciiImplementation = struct { } fn scoreFunc( - a: *AsciiImplementation, + a: *Ascii, scores: Scores, h: u8, n: u8, @@ -521,7 +521,7 @@ const AsciiImplementation = struct { } fn bonusFunc( - _: *AsciiImplementation, + _: *Ascii, scores: Scores, h: u8, n: u8, @@ -557,24 +557,24 @@ const AsciiImplementation = struct { max_haystack: usize, max_needle: usize, opts: Options, - ) !AsciiImplementation { + ) !Ascii { const alg = try Algorithm.init(allocator, max_haystack, max_needle); return .{ .alg = alg, .opts = opts }; } - pub fn deinit(self: *AsciiImplementation) void { + pub fn deinit(self: *Ascii) void { self.alg.deinit(); } pub fn score( - self: *AsciiImplementation, + self: *Ascii, haystack: []const u8, needle: []const u8, ) ?i32 { return self.alg.score(self, FunctionTable, haystack, needle); } pub fn scoreMatches( - self: *AsciiImplementation, + self: *Ascii, haystack: []const u8, needle: []const u8, ) Algorithm.Matches { @@ -582,17 +582,17 @@ const AsciiImplementation = struct { } }; -pub const UnicodeImplementation = struct { +pub const Unicode = struct { pub const Algorithm = AlgorithmType(u21, i32, .{}); pub const Scores = ScoresType(i32); - const FunctionTable: Algorithm.FunctionTable(*UnicodeImplementation) = .{ + const FunctionTable: Algorithm.FunctionTable(*Unicode) = .{ .score = scoreFunc, .bonus = bonusFunc, .isEqual = eqlFunc, }; - fn eqlFunc(a: *UnicodeImplementation, h: u21, n: u21) bool { + fn eqlFunc(a: *Unicode, h: u21, n: u21) bool { const gcd = GenCatData.init(a.alg.allocator) catch @panic("Memory error"); defer gcd.deinit(); if (gcd.isSeparator(n) and a.opts.wildcard_spaces) { @@ -611,7 +611,7 @@ pub const UnicodeImplementation = struct { } fn scoreFunc( - a: *UnicodeImplementation, + a: *Unicode, scores: Scores, h: u21, n: u21, @@ -625,7 +625,7 @@ pub const UnicodeImplementation = struct { } fn bonusFunc( - self: *UnicodeImplementation, + self: *Unicode, scores: Scores, h: u21, n: u21, @@ -641,7 +641,7 @@ pub const UnicodeImplementation = struct { }; } - fn convertString(a: *const UnicodeImplementation, string: []const u8) []const u21 { + fn convertString(a: *const Unicode, string: []const u8) []const u21 { var norm_data: Normalize.NormData = undefined; Normalize.NormData.init(&norm_data, a.alg.allocator) catch @panic("Cannot normalize string"); defer norm_data.deinit(); @@ -680,17 +680,17 @@ pub const UnicodeImplementation = struct { max_haystack: usize, max_needle: usize, opts: Options, - ) !UnicodeImplementation { + ) !Unicode { const alg = try Algorithm.init(allocator, max_haystack, max_needle); return .{ .alg = alg, .opts = opts }; } - pub fn deinit(self: *UnicodeImplementation) void { + pub fn deinit(self: *Unicode) void { self.alg.deinit(); } pub fn score( - self: *UnicodeImplementation, + self: *Unicode, haystack: []const u8, needle: []const u8, ) !?i32 { @@ -707,7 +707,7 @@ pub const UnicodeImplementation = struct { } pub fn scoreMatches( - self: *UnicodeImplementation, + self: *Unicode, haystack: []const u8, needle: []const u8, ) !Algorithm.Matches { @@ -724,34 +724,18 @@ pub const UnicodeImplementation = struct { } }; -/// Default ASCII Fuzzy Finder -pub const Ascii = AsciiImplementation; -pub const Unicode = UnicodeImplementation; - fn doTestScore(alg: *Ascii, haystack: []const u8, needle: []const u8, comptime score: i32) !void { const s = alg.score(haystack, needle); - - // const stderr = std.io.getStdErr().writer(); - // try alg.debugPrint(stderr, haystack, needle); - // std.debug.print("SCORE : {d}\n", .{s orelse -1}); - try std.testing.expectEqual(score, s.?); } fn doTestScoreUnicode(alg: *Unicode, haystack: []const u8, needle: []const u8, comptime score: ?i32) !void { const s = try alg.score(haystack, needle); - - if (score == null) { - // const stderr = std.io.getStdErr().writer(); - // try alg.debugPrint(stderr, haystack, needle); - std.debug.print("SCORE : {d}\n", .{s orelse -1}); - } else { - try std.testing.expectEqual(score, s.?); - } + try std.testing.expectEqual(score, s.?); } test "algorithm test" { - const o = AsciiImplementation.Scores{}; + const o = Ascii.Scores{}; var alg = try Ascii.init( std.testing.allocator, @@ -837,7 +821,7 @@ test "algorithm test" { } test "case sensitivity" { - const o = AsciiImplementation.Scores{}; + const o = Ascii.Scores{}; var alg1 = try Ascii.init( std.testing.allocator, @@ -874,7 +858,7 @@ test "case sensitivity" { ); defer alg2.deinit(); - const A: AsciiImplementation.Options = .{}; + const A: Ascii.Options = .{}; try doTestScore( &alg2, "xaB", @@ -885,7 +869,7 @@ test "case sensitivity" { } test "wildcard space" { - const o = AsciiImplementation.Scores{}; + const o = Ascii.Scores{}; var alg = try Ascii.init( std.testing.allocator, 128, @@ -945,7 +929,7 @@ test "traceback" { } test "Unicode search" { - const o = UnicodeImplementation.Scores{}; + const o = Unicode.Scores{}; var alg = try Unicode.init( std.testing.allocator, From 5250b266c1f2fd6f3651f2e16a13779b3cadecd4 Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Sat, 10 Aug 2024 12:21:20 +0200 Subject: [PATCH 08/25] No longer need to know the max size by advance Re-allocate Matrix and buffers in scoreImpl at each call depending of the length of the strings. --- src/root.zig | 124 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 80 insertions(+), 44 deletions(-) diff --git a/src/root.zig b/src/root.zig index 0dabb1e..fc5fb8a 100644 --- a/src/root.zig +++ b/src/root.zig @@ -114,55 +114,93 @@ pub fn AlgorithmType( allocator: std.mem.Allocator, + impl: Impl, + + initialised: bool, + + const TypeOfCaracter = switch (Impl) { + AsciiOptions => u8, + UnicodeOptions => u21, + else => unreachable, + }; + pub fn deinit(self: *Self) void { - self.m.deinit(); - self.x.deinit(); - self.m_skip.deinit(); - self.allocator.free(self.role_bonus); - self.allocator.free(self.bonus_buffer); - self.allocator.free(self.first_match_buffer); - self.allocator.free(self.traceback_buffer); + self.deallocateMatrixAndBuffer(); self.* = undefined; } pub fn init( allocator: std.mem.Allocator, + impl: Impl, + ) !Self { + var impl_with_allocator = impl; + impl_with_allocator.allocator = allocator; + + return .{ + .m = undefined, + .x = undefined, + .m_skip = undefined, + + .role_bonus = undefined, + .bonus_buffer = undefined, + .first_match_buffer = undefined, + .traceback_buffer = undefined, + .allocator = allocator, + .impl = impl_with_allocator, + .initialised = false, + }; + } + + fn allocateMatrixAndBuffer( + self: *Self, max_haystack: usize, max_needle: usize, - ) !Self { + ) !void { const rows = max_needle + 1; const cols = max_haystack + 1; - var m = try Matrix.init(allocator, rows, cols); + var m = try Matrix.init(self.allocator, rows, cols); errdefer m.deinit(); - var x = try Matrix.init(allocator, rows, cols); + var x = try Matrix.init(self.allocator, rows, cols); errdefer x.deinit(); - var m_skip = try MatrixT(bool).init(allocator, rows, cols); + var m_skip = try MatrixT(bool).init(self.allocator, rows, cols); errdefer m_skip.deinit(); - const role_bonus = try allocator.alloc(ScoreT, cols); - errdefer allocator.free(role_bonus); + const role_bonus = try self.allocator.alloc(ScoreT, cols); + errdefer self.allocator.free(role_bonus); - const bonus_buffer = try allocator.alloc(ScoreT, cols); - errdefer allocator.free(bonus_buffer); + const bonus_buffer = try self.allocator.alloc(ScoreT, cols); + errdefer self.allocator.free(bonus_buffer); - const first_match_buffer = try allocator.alloc(usize, rows); - errdefer allocator.free(first_match_buffer); + const first_match_buffer = try self.allocator.alloc(usize, rows); + errdefer self.allocator.free(first_match_buffer); - const traceback_buffer = try allocator.alloc(usize, cols); - errdefer allocator.free(traceback_buffer); + const traceback_buffer = try self.allocator.alloc(usize, cols); + errdefer self.allocator.free(traceback_buffer); - return .{ - .m = m, - .x = x, - .m_skip = m_skip, - - .role_bonus = role_bonus, - .bonus_buffer = bonus_buffer, - .first_match_buffer = first_match_buffer, - .traceback_buffer = traceback_buffer, - .allocator = allocator, - }; + self.m = m; + self.x = x; + self.m_skip = m_skip; + + self.role_bonus = role_bonus; + self.bonus_buffer = bonus_buffer; + self.first_match_buffer = first_match_buffer; + self.traceback_buffer = traceback_buffer; + + self.initialised = true; + } + + fn deallocateMatrixAndBuffer(self: *Self) void { + if (self.initialised) { + self.m.deinit(); + self.x.deinit(); + self.m_skip.deinit(); + self.allocator.free(self.role_bonus); + self.allocator.free(self.bonus_buffer); + self.allocator.free(self.first_match_buffer); + self.allocator.free(self.traceback_buffer); + } + self.initialised = false; } /// Compute matching score @@ -195,8 +233,18 @@ pub fn AlgorithmType( .score = 0, }; - const rows = needle.len; - const cols = haystack.len; + self.deallocateMatrixAndBuffer(); + + const haystack_normal = self.impl.convertString(haystack); + defer self.allocator.free(haystack_normal); + + const needle_normal = self.impl.convertString(needle); + defer self.allocator.free(needle_normal); + + const rows = needle_normal.len; + const cols = haystack_normal.len; + + self.allocateMatrixAndBuffer(cols, rows) catch @panic("Memory error"); // resize the view into memory self.m.resizeNoAlloc(rows + 1, cols + 1); @@ -739,8 +787,6 @@ test "algorithm test" { var alg = try Ascii.init( std.testing.allocator, - 128, - 32, .{}, ); defer alg.deinit(); @@ -825,8 +871,6 @@ test "case sensitivity" { var alg1 = try Ascii.init( std.testing.allocator, - 128, - 32, .{ .case_sensitive = false }, ); defer alg1.deinit(); @@ -849,8 +893,6 @@ test "case sensitivity" { var alg2 = try Ascii.init( std.testing.allocator, - 128, - 32, .{ .case_sensitive = false, .case_penalize = true, @@ -872,8 +914,6 @@ test "wildcard space" { const o = Ascii.Scores{}; var alg = try Ascii.init( std.testing.allocator, - 128, - 32, .{ .wildcard_spaces = true }, ); defer alg.deinit(); @@ -915,8 +955,6 @@ fn doTestTraceback(alg: *Ascii, haystack: []const u8, needle: []const u8, compti test "traceback" { var alg = try Ascii.init( std.testing.allocator, - 64, - 32, .{}, ); defer alg.deinit(); @@ -933,8 +971,6 @@ test "Unicode search" { var alg = try Unicode.init( std.testing.allocator, - 128, - 32, .{}, ); defer alg.deinit(); From 4ae5d78d75fce650a71c4684b7a730d5b6594c22 Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Sat, 10 Aug 2024 12:57:57 +0200 Subject: [PATCH 09/25] Remove allocator field in implementations Use instead the allocator provided to the algorithm, so many many many side effects. --- src/root.zig | 36 ++++++++++++++++++++++++------------ src/utils.zig | 14 ++++++++------ 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/root.zig b/src/root.zig index fc5fb8a..6a7509c 100644 --- a/src/root.zig +++ b/src/root.zig @@ -133,9 +133,6 @@ pub fn AlgorithmType( allocator: std.mem.Allocator, impl: Impl, ) !Self { - var impl_with_allocator = impl; - impl_with_allocator.allocator = allocator; - return .{ .m = undefined, .x = undefined, @@ -146,7 +143,7 @@ pub fn AlgorithmType( .first_match_buffer = undefined, .traceback_buffer = undefined, .allocator = allocator, - .impl = impl_with_allocator, + .impl = impl, .initialised = false, }; } @@ -235,10 +232,10 @@ pub fn AlgorithmType( self.deallocateMatrixAndBuffer(); - const haystack_normal = self.impl.convertString(haystack); + const haystack_normal = self.impl.convertString(haystack, self.allocator); defer self.allocator.free(haystack_normal); - const needle_normal = self.impl.convertString(needle); + const needle_normal = self.impl.convertString(needle, self.allocator); defer self.allocator.free(needle_normal); const rows = needle_normal.len; @@ -256,8 +253,8 @@ pub fn AlgorithmType( ctx, funcTable.isEqual, self.first_match_buffer, - haystack, - needle, + haystack_normal, + needle_normal, ) orelse return null; self.reset(rows + 1, cols + 1, first_match_indices); @@ -348,7 +345,7 @@ pub fn AlgorithmType( ) void { var prev: ElType = 0; for (1.., haystack) |i, h| { - self.role_bonus[i] = funcTable.bonus(ctx, scores, prev, h); + self.role_bonus[i] = Impl.bonusFunc(&self.impl, scores, prev, h, self.allocator); prev = h; } @@ -419,7 +416,7 @@ pub fn AlgorithmType( // start by updating the M matrix // compute score - if (funcTable.score(ctx, scores, h, n)) |current| { + if (Impl.scoreFunc(&self.impl, scores, h, n, self.allocator)) |current| { const prev_bonus = self.bonus_buffer[j - 1]; // role bonus for current character @@ -541,6 +538,18 @@ pub const Ascii = struct { .isEqual = eqlFunc, }; + case_sensitive: bool = true, + case_penalize: bool = false, + // treat spaces as wildcards for any kind of boundary + // i.e. match with any `[^a-z,A-Z,0-9]` + wildcard_spaces: bool = false, + + penalty_case_mistmatch: i32 = -2, + + fn convertString(_: *const AsciiOptions, string: []const u8, allocator: Allocator) []const TypeOfCharacter { + return allocator.dupe(TypeOfCharacter, string) catch @panic("Memory error"); + } + fn eqlFunc(self: *Ascii, h: u8, n: u8) bool { if (n == ' ' and self.opts.wildcard_spaces) { return switch (h) { @@ -559,8 +568,9 @@ pub const Ascii = struct { scores: Scores, h: u8, n: u8, + allocator: Allocator, ) ?i32 { - if (!a.eqlFunc(h, n)) return null; + if (!a.eqlFunc(h, n, allocator)) return null; if (a.opts.case_penalize and (h != n)) { return scores.score_match + a.opts.penalty_case_mistmatch; @@ -663,8 +673,9 @@ pub const Unicode = struct { scores: Scores, h: u21, n: u21, + allocator: Allocator, ) ?i32 { - if (!a.eqlFunc(h, n)) return null; + if (!a.eqlFunc(h, n, allocator)) return null; if (a.opts.case_penalize and (h != n)) { return scores.score_match + a.opts.penalty_case_mistmatch; @@ -677,6 +688,7 @@ pub const Unicode = struct { scores: Scores, h: u21, n: u21, + allocator: Allocator, ) i32 { const p = CharacterType.fromUnicode(h, self.alg.allocator); const c = CharacterType.fromUnicode(n, self.alg.allocator); diff --git a/src/utils.zig b/src/utils.zig index 2b8cb56..ae60857 100644 --- a/src/utils.zig +++ b/src/utils.zig @@ -82,10 +82,11 @@ pub const CharacterType = enum { pub fn firstMatchesGeneric( comptime T: type, ctx: anytype, - comptime eqlFunc: fn (@TypeOf(ctx), h: T, n: T) bool, + comptime eqlFunc: fn (@TypeOf(ctx), h: T, n: T, allocator: std.mem.Allocator) bool, indices: []usize, haystack: []const T, needle: []const T, + allocator: std.mem.Allocator, ) ?[]const usize { if (needle.len == 0) { return &.{}; @@ -98,7 +99,7 @@ pub fn firstMatchesGeneric( for (0.., haystack) |i, h| { const n = needle[index]; - if (eqlFunc(ctx, h, n)) { + if (eqlFunc(ctx, h, n, allocator)) { indices[index] = i; index += 1; if (index >= needle.len) break; @@ -108,9 +109,9 @@ pub fn firstMatchesGeneric( return indices[0..index]; } -fn simpleEql(comptime T: type) fn (void, T, T) bool { +fn simpleEql(comptime T: type) fn (void, T, T, std.mem.Allocator) bool { return struct { - fn f(_: void, h: T, n: T) bool { + fn f(_: void, h: T, n: T, _: std.mem.Allocator) bool { return h == n; } }.f; @@ -123,8 +124,9 @@ pub fn firstMatches( indices: []usize, haystack: []const T, needle: []const T, + allocator: std.mem.Allocator, ) ?[]const usize { - return firstMatchesGeneric(T, {}, simpleEql(T), indices, haystack, needle); + return firstMatchesGeneric(T, {}, simpleEql(T), indices, haystack, needle, allocator); } pub fn firstMatchesAlloc( @@ -135,7 +137,7 @@ pub fn firstMatchesAlloc( ) !?[]const usize { const indices = try allocator.alloc(usize, needle.len); errdefer allocator.free(indices); - return firstMatches(T, indices, haystack, needle); + return firstMatches(T, indices, haystack, needle, allocator); } fn testFirstMatch( From 67e3c14925b9be891995f3740c89a2eaabf2fcff Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Sat, 10 Aug 2024 16:27:56 +0200 Subject: [PATCH 10/25] Remove the TypeOfCaracter switch Now, the user SHOULD use []const u8 strings. --- src/root.zig | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/root.zig b/src/root.zig index 6a7509c..076c531 100644 --- a/src/root.zig +++ b/src/root.zig @@ -118,12 +118,6 @@ pub fn AlgorithmType( initialised: bool, - const TypeOfCaracter = switch (Impl) { - AsciiOptions => u8, - UnicodeOptions => u21, - else => unreachable, - }; - pub fn deinit(self: *Self) void { self.deallocateMatrixAndBuffer(); self.* = undefined; @@ -479,8 +473,8 @@ pub fn AlgorithmType( fn debugPrint( self: *const Self, writer: anytype, - haystack: []const ElType, - needle: []const ElType, + haystack: []const u8, + needle: []const u8, ) !void { const el_width = bonus: { var max_digits: usize = 1; @@ -546,8 +540,11 @@ pub const Ascii = struct { penalty_case_mistmatch: i32 = -2, - fn convertString(_: *const AsciiOptions, string: []const u8, allocator: Allocator) []const TypeOfCharacter { - return allocator.dupe(TypeOfCharacter, string) catch @panic("Memory error"); + alg: Algorithm, + opts: Options, + + fn convertString(_: *const Ascii, string: []const u8, allocator: Allocator) []const u8 { + return allocator.dupe(u8, string) catch @panic("Memory error"); } fn eqlFunc(self: *Ascii, h: u8, n: u8) bool { @@ -605,9 +602,6 @@ pub const Ascii = struct { penalty_case_mistmatch: i32 = -2, }; - alg: Algorithm, - opts: Options, - // public interface pub fn init( @@ -688,7 +682,6 @@ pub const Unicode = struct { scores: Scores, h: u21, n: u21, - allocator: Allocator, ) i32 { const p = CharacterType.fromUnicode(h, self.alg.allocator); const c = CharacterType.fromUnicode(n, self.alg.allocator); From 2fe86fc427258be3eede4e1112aebdec207b4bbc Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Sat, 10 Aug 2024 18:13:37 +0200 Subject: [PATCH 11/25] make rebase adjustements adjust the rebase (not in valid state) --- src/root.zig | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/src/root.zig b/src/root.zig index 076c531..bbe1562 100644 --- a/src/root.zig +++ b/src/root.zig @@ -92,7 +92,7 @@ pub fn AlgorithmType( return struct { isEqual: fn (Ctx, ElType, ElType) bool, bonus: fn (Ctx, ScoresType(ScoreT), ElType, ElType) ScoreT, - score: fn (Ctx, ScoresType(ScoreT), ElType, ElType) ?ScoreT, + score: fn (Ctx, ScoresType(ScoreT), ElType, ElType, std.mem.Allocator) ?ScoreT, }; } @@ -114,8 +114,6 @@ pub fn AlgorithmType( allocator: std.mem.Allocator, - impl: Impl, - initialised: bool, pub fn deinit(self: *Self) void { @@ -125,7 +123,6 @@ pub fn AlgorithmType( pub fn init( allocator: std.mem.Allocator, - impl: Impl, ) !Self { return .{ .m = undefined, @@ -137,7 +134,6 @@ pub fn AlgorithmType( .first_match_buffer = undefined, .traceback_buffer = undefined, .allocator = allocator, - .impl = impl, .initialised = false, }; } @@ -226,10 +222,10 @@ pub fn AlgorithmType( self.deallocateMatrixAndBuffer(); - const haystack_normal = self.impl.convertString(haystack, self.allocator); + const haystack_normal = self.funcTable.convertString(haystack, self.allocator); defer self.allocator.free(haystack_normal); - const needle_normal = self.impl.convertString(needle, self.allocator); + const needle_normal = self.funcTable.convertString(needle, self.allocator); defer self.allocator.free(needle_normal); const rows = needle_normal.len; @@ -339,7 +335,7 @@ pub fn AlgorithmType( ) void { var prev: ElType = 0; for (1.., haystack) |i, h| { - self.role_bonus[i] = Impl.bonusFunc(&self.impl, scores, prev, h, self.allocator); + self.role_bonus[i] = funcTable.bonusFunc(&self.impl, scores, prev, h, self.allocator); prev = h; } @@ -410,7 +406,7 @@ pub fn AlgorithmType( // start by updating the M matrix // compute score - if (Impl.scoreFunc(&self.impl, scores, h, n, self.allocator)) |current| { + if (funcTable.scoreFunc(&self.impl, scores, h, n, self.allocator)) |current| { const prev_bonus = self.bonus_buffer[j - 1]; // role bonus for current character @@ -543,10 +539,6 @@ pub const Ascii = struct { alg: Algorithm, opts: Options, - fn convertString(_: *const Ascii, string: []const u8, allocator: Allocator) []const u8 { - return allocator.dupe(u8, string) catch @panic("Memory error"); - } - fn eqlFunc(self: *Ascii, h: u8, n: u8) bool { if (n == ' ' and self.opts.wildcard_spaces) { return switch (h) { @@ -606,12 +598,10 @@ pub const Ascii = struct { pub fn init( allocator: std.mem.Allocator, - max_haystack: usize, - max_needle: usize, opts: Options, ) !Ascii { - const alg = try Algorithm.init(allocator, max_haystack, max_needle); - return .{ .alg = alg, .opts = opts }; + const alg = try Algorithm.init(allocator); + return Ascii{ .alg = alg, .opts = opts }; } pub fn deinit(self: *Ascii) void { @@ -625,6 +615,7 @@ pub const Ascii = struct { ) ?i32 { return self.alg.score(self, FunctionTable, haystack, needle); } + pub fn scoreMatches( self: *Ascii, haystack: []const u8, @@ -730,11 +721,9 @@ pub const Unicode = struct { pub fn init( allocator: std.mem.Allocator, - max_haystack: usize, - max_needle: usize, opts: Options, ) !Unicode { - const alg = try Algorithm.init(allocator, max_haystack, max_needle); + const alg = try Algorithm.init(allocator); return .{ .alg = alg, .opts = opts }; } From 0c7594df9882d4d13139d519b39b19cd6d8a0334 Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Sat, 10 Aug 2024 21:15:51 +0200 Subject: [PATCH 12/25] WIP: make adjustement Not working for now --- src/root.zig | 86 +++++++++++++++++++++++++++++++-------------------- src/utils.zig | 25 +++++++-------- 2 files changed, 63 insertions(+), 48 deletions(-) diff --git a/src/root.zig b/src/root.zig index bbe1562..e5c0a24 100644 --- a/src/root.zig +++ b/src/root.zig @@ -92,7 +92,7 @@ pub fn AlgorithmType( return struct { isEqual: fn (Ctx, ElType, ElType) bool, bonus: fn (Ctx, ScoresType(ScoreT), ElType, ElType) ScoreT, - score: fn (Ctx, ScoresType(ScoreT), ElType, ElType, std.mem.Allocator) ?ScoreT, + score: fn (Ctx, ScoresType(ScoreT), ElType, ElType) ?ScoreT, }; } @@ -220,17 +220,10 @@ pub fn AlgorithmType( .score = 0, }; - self.deallocateMatrixAndBuffer(); - - const haystack_normal = self.funcTable.convertString(haystack, self.allocator); - defer self.allocator.free(haystack_normal); - - const needle_normal = self.funcTable.convertString(needle, self.allocator); - defer self.allocator.free(needle_normal); - - const rows = needle_normal.len; - const cols = haystack_normal.len; + const rows = needle.len; + const cols = haystack.len; + // TODO: resize if needed instead of reallocate self.allocateMatrixAndBuffer(cols, rows) catch @panic("Memory error"); // resize the view into memory @@ -243,8 +236,8 @@ pub fn AlgorithmType( ctx, funcTable.isEqual, self.first_match_buffer, - haystack_normal, - needle_normal, + haystack, + needle, ) orelse return null; self.reset(rows + 1, cols + 1, first_match_indices); @@ -625,6 +618,13 @@ pub const Ascii = struct { } }; +pub const UnicodeToolBox = struct { + gcd: *GenCatData, + norm: *Normalize, + norm_data: *Normalize.NormData, + cd: *CaseData, +}; + pub const Unicode = struct { pub const Algorithm = AlgorithmType(u21, i32, .{}); pub const Scores = ScoresType(i32); @@ -635,19 +635,18 @@ pub const Unicode = struct { .isEqual = eqlFunc, }; - fn eqlFunc(a: *Unicode, h: u21, n: u21) bool { - const gcd = GenCatData.init(a.alg.allocator) catch @panic("Memory error"); - defer gcd.deinit(); - if (gcd.isSeparator(n) and a.opts.wildcard_spaces) { - if (gcd.isLetter(h) or gcd.isNumber(h) or gcd.isSymbol(h)) { + fn eqlFunc(self: *Unicode, h: u21, n: u21) bool { + if (self.unicode_toolbox.gcd.isSeparator(n) and self.opts.wildcard_spaces) { + if (self.unicode_toolbox.gcd.isLetter(h) or + self.unicode_toolbox.gcd.isNumber(h) or + self.unicode_toolbox.gcd.isSymbol(h)) + { return true; } else { return false; } - } else if (!a.opts.case_sensitive) { - const cd = CaseData.init(a.alg.allocator) catch @panic("Memory error"); - defer cd.deinit(); - return cd.toLower(h) == cd.toLower(n); + } else if (!self.opts.case_sensitive) { + return self.unicode_toolbox.cd.toLower(h) == self.unicode_toolbox.cd.toLower(n); } else { return h == n; } @@ -674,8 +673,8 @@ pub const Unicode = struct { h: u21, n: u21, ) i32 { - const p = CharacterType.fromUnicode(h, self.alg.allocator); - const c = CharacterType.fromUnicode(n, self.alg.allocator); + const p = CharacterType.fromUnicode(h, self.unicode_toolbox); + const c = CharacterType.fromUnicode(n, self.unicode_toolbox); return switch (p.roleNextTo(c)) { .Head => scores.bonus_head, @@ -685,19 +684,13 @@ pub const Unicode = struct { }; } - fn convertString(a: *const Unicode, string: []const u8) []const u21 { - var norm_data: Normalize.NormData = undefined; - Normalize.NormData.init(&norm_data, a.alg.allocator) catch @panic("Cannot normalize string"); - defer norm_data.deinit(); - - const n = Normalize{ .norm_data = &norm_data }; - - const nfc_result = n.nfc(a.alg.allocator, string) catch @panic("Cannot normalize string"); + fn convertString(self: *const Unicode, string: []const u8) []const u21 { + const nfc_result = self.unicode_toolbox.norm.nfc(self.alg.allocator, string) catch @panic("Cannot normalize string"); defer nfc_result.deinit(); var iter = code_point.Iterator{ .bytes = nfc_result.slice }; - var converted_string = std.ArrayList(u21).init(a.alg.allocator); + var converted_string = std.ArrayList(u21).init(self.alg.allocator); defer converted_string.deinit(); while (iter.next()) |c| { @@ -718,16 +711,41 @@ pub const Unicode = struct { alg: Algorithm, opts: Options, + unicode_toolbox: UnicodeToolBox, pub fn init( allocator: std.mem.Allocator, opts: Options, ) !Unicode { const alg = try Algorithm.init(allocator); - return .{ .alg = alg, .opts = opts }; + + const gcd = try GenCatData.init(allocator); + + const norm_data: *Normalize.NormData = allocator.create(Normalize.NormData); + try Normalize.NormData.init(norm_data, allocator); + + const norm = Normalize{ .norm_data = norm_data }; + + const cd = try CaseData.init(allocator); + + return .{ + .alg = alg, + .opts = opts, + .unicode_toolbox = .{ + .gcd = &gcd, + .norm = &norm, + .norm_data = norm_data, + .cd = &cd, + }, + }; } pub fn deinit(self: *Unicode) void { + self.unicode_toolbox.gcd.deinit(); + self.unicode_toolbox.norm_data.deinit(); + self.alg.allocator.destroy(self.unicode_toolbox.norm_data); + self.alg.allocator.destroy(self.unicode_toolbox.norm); + self.unicode_toolbox.cd.deinit(); self.alg.deinit(); } diff --git a/src/utils.zig b/src/utils.zig index ae60857..63ea5b4 100644 --- a/src/utils.zig +++ b/src/utils.zig @@ -3,6 +3,8 @@ const std = @import("std"); const GenCatData = @import("GenCatData"); const CaseData = @import("CaseData"); +const UnicodeToolBox = @import("root.zig").UnicodeToolBox; + pub fn digitCount(v: anytype) usize { const abs: u32 = @intCast(@abs(v)); if (abs == 0) return 1; @@ -32,27 +34,23 @@ pub const CharacterType = enum { }; } - pub fn fromUnicode(c: u21, allocator: std.mem.Allocator) CharacterType { - const cd = CaseData.init(allocator) catch @panic("Memory error"); - defer cd.deinit(); - const gcd = GenCatData.init(allocator) catch @panic("Memory error"); - defer gcd.deinit(); - if (cd.isLower(c)) { + pub fn fromUnicode(c: u21, unicode_toolbox: UnicodeToolBox) CharacterType { + if (unicode_toolbox.cd.isLower(c)) { return .Lower; - } else if (cd.isUpper(c)) { + } else if (unicode_toolbox.cd.isUpper(c)) { return .Upper; - } else if (gcd.isNumber(c)) { + } else if (unicode_toolbox.gcd.isNumber(c)) { return .Number; } else if (switch (c) { ' ', '\\', '/', '|', '(', ')', '[', ']', '{', '}' => true, else => false, }) { return .HardSeperator; - } else if (gcd.isSeparator(c)) { + } else if (unicode_toolbox.gcd.isSeparator(c)) { return .HardSeperator; - } else if (gcd.isPunctuation(c) or gcd.isSymbol(c) or gcd.isMark(c)) { + } else if (unicode_toolbox.gcd.isPunctuation(c) or unicode_toolbox.gcd.isSymbol(c) or unicode_toolbox.gcd.isMark(c)) { return .SoftSeperator; - } else if (gcd.isControl(c)) { + } else if (unicode_toolbox.gcd.isControl(c)) { return .Empty; } else { return .Lower; // Maybe .Empty instead ? @@ -82,11 +80,10 @@ pub const CharacterType = enum { pub fn firstMatchesGeneric( comptime T: type, ctx: anytype, - comptime eqlFunc: fn (@TypeOf(ctx), h: T, n: T, allocator: std.mem.Allocator) bool, + comptime eqlFunc: fn (@TypeOf(ctx), h: T, n: T) bool, indices: []usize, haystack: []const T, needle: []const T, - allocator: std.mem.Allocator, ) ?[]const usize { if (needle.len == 0) { return &.{}; @@ -99,7 +96,7 @@ pub fn firstMatchesGeneric( for (0.., haystack) |i, h| { const n = needle[index]; - if (eqlFunc(ctx, h, n, allocator)) { + if (eqlFunc(ctx, h, n)) { indices[index] = i; index += 1; if (index >= needle.len) break; From 4e998d4e9170d3602b81cca55b92988d3055a976 Mon Sep 17 00:00:00 2001 From: fjebaker <fergusbkr@gmail.com> Date: Sat, 10 Aug 2024 23:00:44 +0100 Subject: [PATCH 13/25] feat: init category and case unicode data only once --- src/root.zig | 53 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/src/root.zig b/src/root.zig index 0dabb1e..999aa3e 100644 --- a/src/root.zig +++ b/src/root.zig @@ -593,18 +593,14 @@ pub const Unicode = struct { }; fn eqlFunc(a: *Unicode, h: u21, n: u21) bool { - const gcd = GenCatData.init(a.alg.allocator) catch @panic("Memory error"); - defer gcd.deinit(); - if (gcd.isSeparator(n) and a.opts.wildcard_spaces) { - if (gcd.isLetter(h) or gcd.isNumber(h) or gcd.isSymbol(h)) { + if (a.gcd.isSeparator(n) and a.opts.wildcard_spaces) { + if (a.gcd.isLetter(h) or a.gcd.isNumber(h) or a.gcd.isSymbol(h)) { return true; } else { return false; } } else if (!a.opts.case_sensitive) { - const cd = CaseData.init(a.alg.allocator) catch @panic("Memory error"); - defer cd.deinit(); - return cd.toLower(h) == cd.toLower(n); + return a.cd.toLower(h) == a.cd.toLower(n); } else { return h == n; } @@ -630,8 +626,9 @@ pub const Unicode = struct { h: u21, n: u21, ) i32 { - const p = CharacterType.fromUnicode(h, self.alg.allocator); - const c = CharacterType.fromUnicode(n, self.alg.allocator); + const alloc = self.alg.allocator; + const p = CharacterType.fromUnicode(h, alloc); + const c = CharacterType.fromUnicode(n, alloc); return switch (p.roleNextTo(c)) { .Head => scores.bonus_head, @@ -641,14 +638,14 @@ pub const Unicode = struct { }; } - fn convertString(a: *const Unicode, string: []const u8) []const u21 { + fn convertString(a: *const Unicode, string: []const u8) ![]const u21 { var norm_data: Normalize.NormData = undefined; - Normalize.NormData.init(&norm_data, a.alg.allocator) catch @panic("Cannot normalize string"); + try Normalize.NormData.init(&norm_data, a.alg.allocator); defer norm_data.deinit(); const n = Normalize{ .norm_data = &norm_data }; - const nfc_result = n.nfc(a.alg.allocator, string) catch @panic("Cannot normalize string"); + const nfc_result = try n.nfc(a.alg.allocator, string); defer nfc_result.deinit(); var iter = code_point.Iterator{ .bytes = nfc_result.slice }; @@ -657,9 +654,9 @@ pub const Unicode = struct { defer converted_string.deinit(); while (iter.next()) |c| { - converted_string.append(c.code) catch @panic("Memory error"); + try converted_string.append(c.code); } - return converted_string.toOwnedSlice() catch @panic("Memory error"); + return converted_string.toOwnedSlice(); } pub const Options = struct { @@ -670,10 +667,14 @@ pub const Unicode = struct { wildcard_spaces: bool = false, penalty_case_mistmatch: i32 = -2, + + char_buffer_size: usize = 8192, }; alg: Algorithm, opts: Options, + gcd: GenCatData, + cd: CaseData, pub fn init( allocator: std.mem.Allocator, @@ -681,11 +682,27 @@ pub const Unicode = struct { max_needle: usize, opts: Options, ) !Unicode { + // todo: these can likely be a static singleton that is shared amongst + // all instances of the Unicode fuzzy finder + var gcd = try GenCatData.init(allocator); + errdefer gcd.deinit(); + + var cd = try CaseData.init(allocator); + errdefer cd.deinit(); + const alg = try Algorithm.init(allocator, max_haystack, max_needle); - return .{ .alg = alg, .opts = opts }; + + return .{ + .alg = alg, + .opts = opts, + .gcd = gcd, + .cd = cd, + }; } pub fn deinit(self: *Unicode) void { + self.gcd.deinit(); + self.cd.deinit(); self.alg.deinit(); } @@ -694,10 +711,12 @@ pub const Unicode = struct { haystack: []const u8, needle: []const u8, ) !?i32 { - const haystack_normal = self.convertString(haystack); + const haystack_normal = try self.convertString(haystack); defer self.alg.allocator.free(haystack_normal); - const needle_normal = self.convertString(needle); + + const needle_normal = try self.convertString(needle); defer self.alg.allocator.free(needle_normal); + return self.alg.score( self, FunctionTable, From 068c73e057aac31611ae80a89c993c37bcb800ef Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Sun, 11 Aug 2024 22:14:22 +0200 Subject: [PATCH 14/25] WIP Memory leaks --- src/root.zig | 48 ++++++++++++++++++++++++++---------------------- src/utils.zig | 9 ++++----- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/src/root.zig b/src/root.zig index e5c0a24..26c05b7 100644 --- a/src/root.zig +++ b/src/root.zig @@ -124,6 +124,7 @@ pub fn AlgorithmType( pub fn init( allocator: std.mem.Allocator, ) !Self { + // TODO: init all matrices and buffers return .{ .m = undefined, .x = undefined, @@ -328,7 +329,7 @@ pub fn AlgorithmType( ) void { var prev: ElType = 0; for (1.., haystack) |i, h| { - self.role_bonus[i] = funcTable.bonusFunc(&self.impl, scores, prev, h, self.allocator); + self.role_bonus[i] = funcTable.bonus(ctx, scores, prev, h); prev = h; } @@ -399,7 +400,7 @@ pub fn AlgorithmType( // start by updating the M matrix // compute score - if (funcTable.scoreFunc(&self.impl, scores, h, n, self.allocator)) |current| { + if (funcTable.score(ctx, scores, h, n)) |current| { const prev_bonus = self.bonus_buffer[j - 1]; // role bonus for current character @@ -462,8 +463,8 @@ pub fn AlgorithmType( fn debugPrint( self: *const Self, writer: anytype, - haystack: []const u8, - needle: []const u8, + haystack: []const ElType, + needle: []const ElType, ) !void { const el_width = bonus: { var max_digits: usize = 1; @@ -521,14 +522,6 @@ pub const Ascii = struct { .isEqual = eqlFunc, }; - case_sensitive: bool = true, - case_penalize: bool = false, - // treat spaces as wildcards for any kind of boundary - // i.e. match with any `[^a-z,A-Z,0-9]` - wildcard_spaces: bool = false, - - penalty_case_mistmatch: i32 = -2, - alg: Algorithm, opts: Options, @@ -550,9 +543,8 @@ pub const Ascii = struct { scores: Scores, h: u8, n: u8, - allocator: Allocator, ) ?i32 { - if (!a.eqlFunc(h, n, allocator)) return null; + if (!a.eqlFunc(h, n)) return null; if (a.opts.case_penalize and (h != n)) { return scores.score_match + a.opts.penalty_case_mistmatch; @@ -619,10 +611,10 @@ pub const Ascii = struct { }; pub const UnicodeToolBox = struct { - gcd: *GenCatData, - norm: *Normalize, + gcd: *const GenCatData, + norm: *const Normalize, norm_data: *Normalize.NormData, - cd: *CaseData, + cd: *const CaseData, }; pub const Unicode = struct { @@ -657,9 +649,8 @@ pub const Unicode = struct { scores: Scores, h: u21, n: u21, - allocator: Allocator, ) ?i32 { - if (!a.eqlFunc(h, n, allocator)) return null; + if (!a.eqlFunc(h, n)) return null; if (a.opts.case_penalize and (h != n)) { return scores.score_match + a.opts.penalty_case_mistmatch; @@ -719,11 +710,22 @@ pub const Unicode = struct { ) !Unicode { const alg = try Algorithm.init(allocator); + // const gcd: *GenCatData = try allocator.create(GenCatData); + // gcd.* = try GenCatData.init(allocator); + + // const norm_data: *Normalize.NormData = try allocator.create(Normalize.NormData); + // try Normalize.NormData.init(norm_data, allocator); + + // const norm: *Normalize = try allocator.create(Normalize); + // norm.* = Normalize{ .norm_data = norm_data }; + + // const cd: *CaseData = try allocator.create(CaseData); + // cd.* = try CaseData.init(allocator); + const gcd = try GenCatData.init(allocator); - const norm_data: *Normalize.NormData = allocator.create(Normalize.NormData); + const norm_data: *Normalize.NormData = try allocator.create(Normalize.NormData); try Normalize.NormData.init(norm_data, allocator); - const norm = Normalize{ .norm_data = norm_data }; const cd = try CaseData.init(allocator); @@ -742,10 +744,12 @@ pub const Unicode = struct { pub fn deinit(self: *Unicode) void { self.unicode_toolbox.gcd.deinit(); + // self.alg.allocator.destroy(self.unicode_toolbox.gcd); self.unicode_toolbox.norm_data.deinit(); - self.alg.allocator.destroy(self.unicode_toolbox.norm_data); + // self.alg.allocator.destroy(self.unicode_toolbox.norm_data); self.alg.allocator.destroy(self.unicode_toolbox.norm); self.unicode_toolbox.cd.deinit(); + // self.alg.allocator.destroy(self.unicode_toolbox.cd); self.alg.deinit(); } diff --git a/src/utils.zig b/src/utils.zig index 63ea5b4..efc6059 100644 --- a/src/utils.zig +++ b/src/utils.zig @@ -106,9 +106,9 @@ pub fn firstMatchesGeneric( return indices[0..index]; } -fn simpleEql(comptime T: type) fn (void, T, T, std.mem.Allocator) bool { +fn simpleEql(comptime T: type) fn (void, T, T) bool { return struct { - fn f(_: void, h: T, n: T, _: std.mem.Allocator) bool { + fn f(_: void, h: T, n: T) bool { return h == n; } }.f; @@ -121,9 +121,8 @@ pub fn firstMatches( indices: []usize, haystack: []const T, needle: []const T, - allocator: std.mem.Allocator, ) ?[]const usize { - return firstMatchesGeneric(T, {}, simpleEql(T), indices, haystack, needle, allocator); + return firstMatchesGeneric(T, {}, simpleEql(T), indices, haystack, needle); } pub fn firstMatchesAlloc( @@ -134,7 +133,7 @@ pub fn firstMatchesAlloc( ) !?[]const usize { const indices = try allocator.alloc(usize, needle.len); errdefer allocator.free(indices); - return firstMatches(T, indices, haystack, needle, allocator); + return firstMatches(T, indices, haystack, needle); } fn testFirstMatch( From 4db079d1340fe0957546088c20a3c96242d0e295 Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Mon, 12 Aug 2024 12:09:32 +0200 Subject: [PATCH 15/25] Solve unicode tool init problem Currently working --- src/root.zig | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/src/root.zig b/src/root.zig index 26c05b7..aeb0ac6 100644 --- a/src/root.zig +++ b/src/root.zig @@ -225,6 +225,7 @@ pub fn AlgorithmType( const cols = haystack.len; // TODO: resize if needed instead of reallocate + self.deallocateMatrixAndBuffer(); self.allocateMatrixAndBuffer(cols, rows) catch @panic("Memory error"); // resize the view into memory @@ -611,10 +612,10 @@ pub const Ascii = struct { }; pub const UnicodeToolBox = struct { - gcd: *const GenCatData, - norm: *const Normalize, + gcd: GenCatData, + norm: Normalize, norm_data: *Normalize.NormData, - cd: *const CaseData, + cd: CaseData, }; pub const Unicode = struct { @@ -710,21 +711,10 @@ pub const Unicode = struct { ) !Unicode { const alg = try Algorithm.init(allocator); - // const gcd: *GenCatData = try allocator.create(GenCatData); - // gcd.* = try GenCatData.init(allocator); - - // const norm_data: *Normalize.NormData = try allocator.create(Normalize.NormData); - // try Normalize.NormData.init(norm_data, allocator); - - // const norm: *Normalize = try allocator.create(Normalize); - // norm.* = Normalize{ .norm_data = norm_data }; - - // const cd: *CaseData = try allocator.create(CaseData); - // cd.* = try CaseData.init(allocator); - const gcd = try GenCatData.init(allocator); - const norm_data: *Normalize.NormData = try allocator.create(Normalize.NormData); + var norm_data: *Normalize.NormData = undefined; + norm_data = try allocator.create(Normalize.NormData); try Normalize.NormData.init(norm_data, allocator); const norm = Normalize{ .norm_data = norm_data }; @@ -734,22 +724,19 @@ pub const Unicode = struct { .alg = alg, .opts = opts, .unicode_toolbox = .{ - .gcd = &gcd, - .norm = &norm, + .gcd = gcd, + .norm = norm, .norm_data = norm_data, - .cd = &cd, + .cd = cd, }, }; } pub fn deinit(self: *Unicode) void { self.unicode_toolbox.gcd.deinit(); - // self.alg.allocator.destroy(self.unicode_toolbox.gcd); self.unicode_toolbox.norm_data.deinit(); - // self.alg.allocator.destroy(self.unicode_toolbox.norm_data); - self.alg.allocator.destroy(self.unicode_toolbox.norm); + self.alg.allocator.destroy(self.unicode_toolbox.norm_data); self.unicode_toolbox.cd.deinit(); - // self.alg.allocator.destroy(self.unicode_toolbox.cd); self.alg.deinit(); } From b1f437ab28e4cfcf0c55db022a98ae38ec3a02ad Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Mon, 12 Aug 2024 14:25:02 +0200 Subject: [PATCH 16/25] fix: convertString of Unicode can fail --- src/root.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/root.zig b/src/root.zig index cfb8a18..8af5eac 100644 --- a/src/root.zig +++ b/src/root.zig @@ -676,7 +676,7 @@ pub const Unicode = struct { }; } - fn convertString(self: *const Unicode, string: []const u8) []const u21 { + fn convertString(self: *const Unicode, string: []const u8) ![]const u21 { const nfc_result = self.unicode_toolbox.norm.nfc(self.alg.allocator, string) catch @panic("Cannot normalize string"); defer nfc_result.deinit(); From abac757451e6ab1b8dbc00ae680978782bfddb91 Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Mon, 12 Aug 2024 14:47:02 +0200 Subject: [PATCH 17/25] Resize Matrix on need --- src/root.zig | 144 ++++++++++++++++++++++----------------------- src/structures.zig | 8 +++ 2 files changed, 79 insertions(+), 73 deletions(-) diff --git a/src/root.zig b/src/root.zig index 8af5eac..b72feca 100644 --- a/src/root.zig +++ b/src/root.zig @@ -114,81 +114,81 @@ pub fn AlgorithmType( allocator: std.mem.Allocator, - initialised: bool, - pub fn deinit(self: *Self) void { - self.deallocateMatrixAndBuffer(); + self.m.deinit(); + self.x.deinit(); + self.m_skip.deinit(); + self.allocator.free(self.role_bonus); + self.allocator.free(self.bonus_buffer); + self.allocator.free(self.first_match_buffer); + self.allocator.free(self.traceback_buffer); self.* = undefined; } pub fn init( allocator: std.mem.Allocator, ) !Self { - // TODO: init all matrices and buffers - return .{ - .m = undefined, - .x = undefined, - .m_skip = undefined, - - .role_bonus = undefined, - .bonus_buffer = undefined, - .first_match_buffer = undefined, - .traceback_buffer = undefined, - .allocator = allocator, - .initialised = false, - }; - } - fn allocateMatrixAndBuffer( - self: *Self, - max_haystack: usize, - max_needle: usize, - ) !void { - const rows = max_needle + 1; - const cols = max_haystack + 1; + // init to a min size, and resize after if needed + const rows = 1; + const cols = 1; - var m = try Matrix.init(self.allocator, rows, cols); + var m = try Matrix.init(allocator, rows, cols); errdefer m.deinit(); - var x = try Matrix.init(self.allocator, rows, cols); + var x = try Matrix.init(allocator, rows, cols); errdefer x.deinit(); - var m_skip = try MatrixT(bool).init(self.allocator, rows, cols); + var m_skip = try MatrixT(bool).init(allocator, rows, cols); errdefer m_skip.deinit(); - const role_bonus = try self.allocator.alloc(ScoreT, cols); - errdefer self.allocator.free(role_bonus); + const role_bonus = try allocator.alloc(ScoreT, cols); + errdefer allocator.free(role_bonus); - const bonus_buffer = try self.allocator.alloc(ScoreT, cols); - errdefer self.allocator.free(bonus_buffer); + const bonus_buffer = try allocator.alloc(ScoreT, cols); + errdefer allocator.free(bonus_buffer); - const first_match_buffer = try self.allocator.alloc(usize, rows); - errdefer self.allocator.free(first_match_buffer); + const first_match_buffer = try allocator.alloc(usize, rows); + errdefer allocator.free(first_match_buffer); - const traceback_buffer = try self.allocator.alloc(usize, cols); - errdefer self.allocator.free(traceback_buffer); + const traceback_buffer = try allocator.alloc(usize, cols); + errdefer allocator.free(traceback_buffer); - self.m = m; - self.x = x; - self.m_skip = m_skip; + return .{ + .m = m, + .x = x, + .m_skip = m_skip, + + .role_bonus = role_bonus, + .bonus_buffer = bonus_buffer, + .first_match_buffer = first_match_buffer, + .traceback_buffer = traceback_buffer, + .allocator = allocator, + }; + } - self.role_bonus = role_bonus; - self.bonus_buffer = bonus_buffer; - self.first_match_buffer = first_match_buffer; - self.traceback_buffer = traceback_buffer; + fn resize(self: *Self, new_cols: usize, new_rows: usize) !void { + if (new_rows == self.m.rows and new_cols == self.m.cols) { + return; + } - self.initialised = true; - } + if (new_rows != self.m.rows) { + self.first_match_buffer = try self.allocator.realloc(self.first_match_buffer, new_rows); + } + if (new_cols != self.m.cols) { + self.role_bonus = try self.allocator.realloc(self.role_bonus, new_cols); + self.bonus_buffer = try self.allocator.realloc(self.bonus_buffer, new_cols); + self.traceback_buffer = try self.allocator.realloc(self.traceback_buffer, new_cols); + } - fn deallocateMatrixAndBuffer(self: *Self) void { - if (self.initialised) { - self.m.deinit(); - self.x.deinit(); - self.m_skip.deinit(); - self.allocator.free(self.role_bonus); - self.allocator.free(self.bonus_buffer); - self.allocator.free(self.first_match_buffer); - self.allocator.free(self.traceback_buffer); + if (new_rows * new_cols <= self.m.matrix.len) { + self.m.resizeNoAlloc(new_rows, new_cols); + self.x.resizeNoAlloc(new_rows, new_cols); + self.m_skip.resizeNoAlloc(new_rows, new_cols); + } else { + try self.m.resizeAlloc(new_rows, new_cols); + try self.x.resizeAlloc(new_rows, new_cols); + try self.m_skip.resizeAlloc(new_rows, new_cols); } - self.initialised = false; + return; } /// Compute matching score @@ -198,8 +198,8 @@ pub fn AlgorithmType( comptime funcTable: FunctionTable(@TypeOf(ctx)), haystack: []const ElType, needle: []const ElType, - ) ?ScoreT { - const info = self.scoreImpl(ctx, funcTable, haystack, needle) orelse + ) !?ScoreT { + const info = try self.scoreImpl(ctx, funcTable, haystack, needle) orelse return null; return info.score; } @@ -216,7 +216,7 @@ pub fn AlgorithmType( comptime funcTable: FunctionTable(@TypeOf(ctx)), haystack: []const ElType, needle: []const ElType, - ) ?ScoreInfo { + ) !?ScoreInfo { if (needle.len == 0) return .{ .score = 0, }; @@ -224,14 +224,12 @@ pub fn AlgorithmType( const rows = needle.len; const cols = haystack.len; - // TODO: resize if needed instead of reallocate - self.deallocateMatrixAndBuffer(); - self.allocateMatrixAndBuffer(cols, rows) catch @panic("Memory error"); + try self.resize(cols + 1, rows + 1); // resize the view into memory - self.m.resizeNoAlloc(rows + 1, cols + 1); - self.x.resizeNoAlloc(rows + 1, cols + 1); - self.m_skip.resizeNoAlloc(rows + 1, cols + 1); + // self.m.resizeNoAlloc(rows + 1, cols + 1); + // self.x.resizeNoAlloc(rows + 1, cols + 1); + // self.m_skip.resizeNoAlloc(rows + 1, cols + 1); const first_match_indices = utils.firstMatchesGeneric( ElType, @@ -279,8 +277,8 @@ pub fn AlgorithmType( comptime funcTable: FunctionTable(@TypeOf(ctx)), haystack: []const ElType, needle: []const ElType, - ) Matches { - const s = self.scoreImpl(ctx, funcTable, haystack, needle) orelse + ) !Matches { + const s = try self.scoreImpl(ctx, funcTable, haystack, needle) orelse return .{ .score = null }; const matches = self.traceback( @@ -598,16 +596,16 @@ pub const Ascii = struct { self: *Ascii, haystack: []const u8, needle: []const u8, - ) ?i32 { - return self.alg.score(self, FunctionTable, haystack, needle); + ) !?i32 { + return try self.alg.score(self, FunctionTable, haystack, needle); } pub fn scoreMatches( self: *Ascii, haystack: []const u8, needle: []const u8, - ) Algorithm.Matches { - return self.alg.scoreMatches(self, FunctionTable, haystack, needle); + ) !Algorithm.Matches { + return try self.alg.scoreMatches(self, FunctionTable, haystack, needle); } }; @@ -677,7 +675,7 @@ pub const Unicode = struct { } fn convertString(self: *const Unicode, string: []const u8) ![]const u21 { - const nfc_result = self.unicode_toolbox.norm.nfc(self.alg.allocator, string) catch @panic("Cannot normalize string"); + const nfc_result = try self.unicode_toolbox.norm.nfc(self.alg.allocator, string); defer nfc_result.deinit(); var iter = code_point.Iterator{ .bytes = nfc_result.slice }; @@ -753,7 +751,7 @@ pub const Unicode = struct { const needle_normal = try self.convertString(needle); defer self.alg.allocator.free(needle_normal); - return self.alg.score( + return try self.alg.score( self, FunctionTable, haystack_normal, @@ -780,7 +778,7 @@ pub const Unicode = struct { }; fn doTestScore(alg: *Ascii, haystack: []const u8, needle: []const u8, comptime score: i32) !void { - const s = alg.score(haystack, needle); + const s = try alg.score(haystack, needle); try std.testing.expectEqual(score, s.?); } @@ -950,7 +948,7 @@ test "wildcard space" { } fn doTestTraceback(alg: *Ascii, haystack: []const u8, needle: []const u8, comptime matches: []const usize) !void { - const s = alg.scoreMatches(haystack, needle); + const s = try alg.scoreMatches(haystack, needle); // const stderr = std.io.getStdErr().writer(); // try alg.debugPrint(stderr, haystack, needle); diff --git a/src/structures.zig b/src/structures.zig index 8789ae9..55373ff 100644 --- a/src/structures.zig +++ b/src/structures.zig @@ -63,6 +63,14 @@ pub fn MatrixT(comptime T: type) type { m.cols = new_cols; } + /// Resize the matrix to a bigger or smaller one. Note that the data will be invalided. + pub fn resizeAlloc(m: *Self, new_rows: usize, new_cols: usize) !void { + m.rows = new_rows; + m.cols = new_cols; + + m.matrix = try m.allocator.realloc(m.matrix, new_cols * new_rows); + } + /// Set the currently active region of the matrix to a specific value. pub fn fill(m: Self, value: ElementType) void { const end = m.rows * m.cols; From 94f8f87819da0764991672d400aacd4d8c98bf39 Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Mon, 12 Aug 2024 15:37:13 +0200 Subject: [PATCH 18/25] Let the user choice if he want to resize the matrix and buffer --- src/root.zig | 99 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 27 deletions(-) diff --git a/src/root.zig b/src/root.zig index b72feca..dc22964 100644 --- a/src/root.zig +++ b/src/root.zig @@ -127,11 +127,13 @@ pub fn AlgorithmType( pub fn init( allocator: std.mem.Allocator, + max_haystack: usize, + max_needle: usize, ) !Self { // init to a min size, and resize after if needed - const rows = 1; - const cols = 1; + const rows = max_needle + 1; + const cols = max_haystack + 1; var m = try Matrix.init(allocator, rows, cols); errdefer m.deinit(); @@ -165,15 +167,19 @@ pub fn AlgorithmType( }; } - fn resize(self: *Self, new_cols: usize, new_rows: usize) !void { + /// Resize matrixs and buffer if it is needed + pub fn resizeIfNeeded(self: *Self, max_haystack: usize, max_needle: usize) !void { + const new_rows = max_needle + 1; + const new_cols = max_haystack + 1; + if (new_rows == self.m.rows and new_cols == self.m.cols) { return; } - if (new_rows != self.m.rows) { + if (new_rows > self.first_match_buffer.len) { self.first_match_buffer = try self.allocator.realloc(self.first_match_buffer, new_rows); } - if (new_cols != self.m.cols) { + if (new_cols > self.role_bonus.len) { self.role_bonus = try self.allocator.realloc(self.role_bonus, new_cols); self.bonus_buffer = try self.allocator.realloc(self.bonus_buffer, new_cols); self.traceback_buffer = try self.allocator.realloc(self.traceback_buffer, new_cols); @@ -191,6 +197,28 @@ pub fn AlgorithmType( return; } + /// Force to resize all matrixs and buffer + pub fn resize(self: *Self, max_haystack: usize, max_needle: usize) !void { + const new_rows = max_needle + 1; + const new_cols = max_haystack + 1; + + self.first_match_buffer = try self.allocator.realloc(self.first_match_buffer, new_rows); + + self.role_bonus = try self.allocator.realloc(self.role_bonus, new_cols); + self.bonus_buffer = try self.allocator.realloc(self.bonus_buffer, new_cols); + self.traceback_buffer = try self.allocator.realloc(self.traceback_buffer, new_cols); + + self.m.resizeNoAlloc(new_rows, new_cols); + self.x.resizeNoAlloc(new_rows, new_cols); + self.m_skip.resizeNoAlloc(new_rows, new_cols); + + try self.m.resizeAlloc(new_rows, new_cols); + try self.x.resizeAlloc(new_rows, new_cols); + try self.m_skip.resizeAlloc(new_rows, new_cols); + + return; + } + /// Compute matching score pub fn score( self: *Self, @@ -198,8 +226,8 @@ pub fn AlgorithmType( comptime funcTable: FunctionTable(@TypeOf(ctx)), haystack: []const ElType, needle: []const ElType, - ) !?ScoreT { - const info = try self.scoreImpl(ctx, funcTable, haystack, needle) orelse + ) ?ScoreT { + const info = self.scoreImpl(ctx, funcTable, haystack, needle) orelse return null; return info.score; } @@ -216,20 +244,21 @@ pub fn AlgorithmType( comptime funcTable: FunctionTable(@TypeOf(ctx)), haystack: []const ElType, needle: []const ElType, - ) !?ScoreInfo { + ) ?ScoreInfo { if (needle.len == 0) return .{ .score = 0, }; + std.debug.assert(haystack.len < self.traceback_buffer.len); + std.debug.assert(needle.len < self.first_match_buffer.len); + const rows = needle.len; const cols = haystack.len; - try self.resize(cols + 1, rows + 1); - // resize the view into memory - // self.m.resizeNoAlloc(rows + 1, cols + 1); - // self.x.resizeNoAlloc(rows + 1, cols + 1); - // self.m_skip.resizeNoAlloc(rows + 1, cols + 1); + self.m.resizeNoAlloc(rows + 1, cols + 1); + self.x.resizeNoAlloc(rows + 1, cols + 1); + self.m_skip.resizeNoAlloc(rows + 1, cols + 1); const first_match_indices = utils.firstMatchesGeneric( ElType, @@ -243,7 +272,7 @@ pub fn AlgorithmType( self.reset(rows + 1, cols + 1, first_match_indices); self.determineBonuses(ctx, funcTable, haystack); - try self.populateMatrices( + self.populateMatrices( ctx, funcTable, haystack, @@ -277,8 +306,8 @@ pub fn AlgorithmType( comptime funcTable: FunctionTable(@TypeOf(ctx)), haystack: []const ElType, needle: []const ElType, - ) !Matches { - const s = try self.scoreImpl(ctx, funcTable, haystack, needle) orelse + ) Matches { + const s = self.scoreImpl(ctx, funcTable, haystack, needle) orelse return .{ .score = null }; const matches = self.traceback( @@ -387,7 +416,7 @@ pub fn AlgorithmType( haystack: []const ElType, needle: []const ElType, first_match_indices: []const usize, - ) !void { + ) void { for (1.., needle) |i, n| { // how many characters of the haystack do we skip @@ -581,10 +610,12 @@ pub const Ascii = struct { // public interface pub fn init( + max_haystack: usize, + max_needle: usize, allocator: std.mem.Allocator, opts: Options, ) !Ascii { - const alg = try Algorithm.init(allocator); + const alg = try Algorithm.init(allocator, max_haystack, max_needle); return Ascii{ .alg = alg, .opts = opts }; } @@ -596,16 +627,16 @@ pub const Ascii = struct { self: *Ascii, haystack: []const u8, needle: []const u8, - ) !?i32 { - return try self.alg.score(self, FunctionTable, haystack, needle); + ) ?i32 { + return self.alg.score(self, FunctionTable, haystack, needle); } pub fn scoreMatches( self: *Ascii, haystack: []const u8, needle: []const u8, - ) !Algorithm.Matches { - return try self.alg.scoreMatches(self, FunctionTable, haystack, needle); + ) Algorithm.Matches { + return self.alg.scoreMatches(self, FunctionTable, haystack, needle); } }; @@ -706,10 +737,12 @@ pub const Unicode = struct { unicode_toolbox: UnicodeToolBox, pub fn init( + max_haystack: usize, + max_needle: usize, allocator: std.mem.Allocator, opts: Options, ) !Unicode { - const alg = try Algorithm.init(allocator); + const alg = try Algorithm.init(allocator, max_haystack, max_needle); const gcd = try GenCatData.init(allocator); @@ -751,7 +784,7 @@ pub const Unicode = struct { const needle_normal = try self.convertString(needle); defer self.alg.allocator.free(needle_normal); - return try self.alg.score( + return self.alg.score( self, FunctionTable, haystack_normal, @@ -763,7 +796,7 @@ pub const Unicode = struct { self: *Unicode, haystack: []const u8, needle: []const u8, - ) !Algorithm.Matches { + ) Algorithm.Matches { const haystack_normal = self.convertString(haystack); defer self.allocator.free(haystack_normal); const needle_normal = self.convertString(needle); @@ -778,7 +811,7 @@ pub const Unicode = struct { }; fn doTestScore(alg: *Ascii, haystack: []const u8, needle: []const u8, comptime score: i32) !void { - const s = try alg.score(haystack, needle); + const s = alg.score(haystack, needle); try std.testing.expectEqual(score, s.?); } @@ -791,6 +824,8 @@ test "algorithm test" { const o = Ascii.Scores{}; var alg = try Ascii.init( + 128, + 32, std.testing.allocator, .{}, ); @@ -875,6 +910,8 @@ test "case sensitivity" { const o = Ascii.Scores{}; var alg1 = try Ascii.init( + 128, + 32, std.testing.allocator, .{ .case_sensitive = false }, ); @@ -897,6 +934,8 @@ test "case sensitivity" { ); var alg2 = try Ascii.init( + 128, + 32, std.testing.allocator, .{ .case_sensitive = false, @@ -918,6 +957,8 @@ test "case sensitivity" { test "wildcard space" { const o = Ascii.Scores{}; var alg = try Ascii.init( + 128, + 32, std.testing.allocator, .{ .wildcard_spaces = true }, ); @@ -948,7 +989,7 @@ test "wildcard space" { } fn doTestTraceback(alg: *Ascii, haystack: []const u8, needle: []const u8, comptime matches: []const usize) !void { - const s = try alg.scoreMatches(haystack, needle); + const s = alg.scoreMatches(haystack, needle); // const stderr = std.io.getStdErr().writer(); // try alg.debugPrint(stderr, haystack, needle); @@ -959,6 +1000,8 @@ fn doTestTraceback(alg: *Ascii, haystack: []const u8, needle: []const u8, compti test "traceback" { var alg = try Ascii.init( + 64, + 32, std.testing.allocator, .{}, ); @@ -975,6 +1018,8 @@ test "Unicode search" { const o = Unicode.Scores{}; var alg = try Unicode.init( + 128, + 32, std.testing.allocator, .{}, ); From 17903ad6e4a1756dbf4600ba00a55d2d3b18fb10 Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Mon, 12 Aug 2024 15:09:36 +0200 Subject: [PATCH 19/25] fix: add errdefer in Unicode init --- src/root.zig | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/root.zig b/src/root.zig index dc22964..f34f739 100644 --- a/src/root.zig +++ b/src/root.zig @@ -742,16 +742,21 @@ pub const Unicode = struct { allocator: std.mem.Allocator, opts: Options, ) !Unicode { - const alg = try Algorithm.init(allocator, max_haystack, max_needle); + var alg = try Algorithm.init(allocator, max_haystack, max_needle); + errdefer alg.deinit(); - const gcd = try GenCatData.init(allocator); + var gcd = try GenCatData.init(allocator); + errdefer gcd.deinit(); var norm_data: *Normalize.NormData = undefined; norm_data = try allocator.create(Normalize.NormData); + errdefer norm_data.deinit(); // reverse order is needed to proper deinit + errdefer allocator.destroy(norm_data); try Normalize.NormData.init(norm_data, allocator); const norm = Normalize{ .norm_data = norm_data }; - const cd = try CaseData.init(allocator); + var cd = try CaseData.init(allocator); + errdefer cd.deinit(); return .{ .alg = alg, From 4aebb9705a3c9b0e627dc64fe6f6d25f543c9d2a Mon Sep 17 00:00:00 2001 From: alberic89 <alberic89@gmx.com> Date: Mon, 12 Aug 2024 17:55:37 +0200 Subject: [PATCH 20/25] fix: various style and optimization improvements --- src/root.zig | 28 ++++++++++------------------ src/structures.zig | 4 +--- src/utils.zig | 3 --- 3 files changed, 11 insertions(+), 24 deletions(-) diff --git a/src/root.zig b/src/root.zig index f34f739..9dc2179 100644 --- a/src/root.zig +++ b/src/root.zig @@ -172,7 +172,7 @@ pub fn AlgorithmType( const new_rows = max_needle + 1; const new_cols = max_haystack + 1; - if (new_rows == self.m.rows and new_cols == self.m.cols) { + if (new_rows <= self.m.rows and new_cols <= self.m.cols) { return; } @@ -185,11 +185,7 @@ pub fn AlgorithmType( self.traceback_buffer = try self.allocator.realloc(self.traceback_buffer, new_cols); } - if (new_rows * new_cols <= self.m.matrix.len) { - self.m.resizeNoAlloc(new_rows, new_cols); - self.x.resizeNoAlloc(new_rows, new_cols); - self.m_skip.resizeNoAlloc(new_rows, new_cols); - } else { + if (new_rows * new_cols > self.m.matrix.len) { try self.m.resizeAlloc(new_rows, new_cols); try self.x.resizeAlloc(new_rows, new_cols); try self.m_skip.resizeAlloc(new_rows, new_cols); @@ -208,10 +204,6 @@ pub fn AlgorithmType( self.bonus_buffer = try self.allocator.realloc(self.bonus_buffer, new_cols); self.traceback_buffer = try self.allocator.realloc(self.traceback_buffer, new_cols); - self.m.resizeNoAlloc(new_rows, new_cols); - self.x.resizeNoAlloc(new_rows, new_cols); - self.m_skip.resizeNoAlloc(new_rows, new_cols); - try self.m.resizeAlloc(new_rows, new_cols); try self.x.resizeAlloc(new_rows, new_cols); try self.m_skip.resizeAlloc(new_rows, new_cols); @@ -610,9 +602,9 @@ pub const Ascii = struct { // public interface pub fn init( + allocator: std.mem.Allocator, max_haystack: usize, max_needle: usize, - allocator: std.mem.Allocator, opts: Options, ) !Ascii { const alg = try Algorithm.init(allocator, max_haystack, max_needle); @@ -737,9 +729,9 @@ pub const Unicode = struct { unicode_toolbox: UnicodeToolBox, pub fn init( + allocator: std.mem.Allocator, max_haystack: usize, max_needle: usize, - allocator: std.mem.Allocator, opts: Options, ) !Unicode { var alg = try Algorithm.init(allocator, max_haystack, max_needle); @@ -829,9 +821,9 @@ test "algorithm test" { const o = Ascii.Scores{}; var alg = try Ascii.init( + std.testing.allocator, 128, 32, - std.testing.allocator, .{}, ); defer alg.deinit(); @@ -915,9 +907,9 @@ test "case sensitivity" { const o = Ascii.Scores{}; var alg1 = try Ascii.init( + std.testing.allocator, 128, 32, - std.testing.allocator, .{ .case_sensitive = false }, ); defer alg1.deinit(); @@ -939,9 +931,9 @@ test "case sensitivity" { ); var alg2 = try Ascii.init( + std.testing.allocator, 128, 32, - std.testing.allocator, .{ .case_sensitive = false, .case_penalize = true, @@ -962,9 +954,9 @@ test "case sensitivity" { test "wildcard space" { const o = Ascii.Scores{}; var alg = try Ascii.init( + std.testing.allocator, 128, 32, - std.testing.allocator, .{ .wildcard_spaces = true }, ); defer alg.deinit(); @@ -1005,9 +997,9 @@ fn doTestTraceback(alg: *Ascii, haystack: []const u8, needle: []const u8, compti test "traceback" { var alg = try Ascii.init( + std.testing.allocator, 64, 32, - std.testing.allocator, .{}, ); defer alg.deinit(); @@ -1023,9 +1015,9 @@ test "Unicode search" { const o = Unicode.Scores{}; var alg = try Unicode.init( + std.testing.allocator, 128, 32, - std.testing.allocator, .{}, ); defer alg.deinit(); diff --git a/src/structures.zig b/src/structures.zig index 55373ff..e8de4e2 100644 --- a/src/structures.zig +++ b/src/structures.zig @@ -65,10 +65,8 @@ pub fn MatrixT(comptime T: type) type { /// Resize the matrix to a bigger or smaller one. Note that the data will be invalided. pub fn resizeAlloc(m: *Self, new_rows: usize, new_cols: usize) !void { - m.rows = new_rows; - m.cols = new_cols; - m.matrix = try m.allocator.realloc(m.matrix, new_cols * new_rows); + m.resizeNoAlloc(new_rows, new_cols); } /// Set the currently active region of the matrix to a specific value. diff --git a/src/utils.zig b/src/utils.zig index efc6059..5122fb8 100644 --- a/src/utils.zig +++ b/src/utils.zig @@ -1,8 +1,5 @@ const std = @import("std"); -const GenCatData = @import("GenCatData"); -const CaseData = @import("CaseData"); - const UnicodeToolBox = @import("root.zig").UnicodeToolBox; pub fn digitCount(v: anytype) usize { From ad33235902003f4e953b7107c47503454ffab810 Mon Sep 17 00:00:00 2001 From: fjebaker <fergusbkr@gmail.com> Date: Mon, 12 Aug 2024 18:06:55 +0100 Subject: [PATCH 21/25] feat: cleanup unicode support Unicode support only enabled with `-Dunicode` to avoid labouring users with unnecessary dependencies if they only need ASCII. Move all unicode related functions to a seperate `unicode.zig` which is conditionally included at compile time. --- build.zig | 50 +++++++---- src/main.zig | 6 +- src/root.zig | 214 +++---------------------------------------- src/unicode.zig | 234 ++++++++++++++++++++++++++++++++++++++++++++++++ src/utils.zig | 27 +----- 5 files changed, 283 insertions(+), 248 deletions(-) create mode 100644 src/unicode.zig diff --git a/build.zig b/build.zig index 4f97f09..9039784 100644 --- a/build.zig +++ b/build.zig @@ -9,22 +9,30 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); - _ = b.addModule("fuzzig", .{ .root_source_file = b.path("src/root.zig") }); + const with_unicode = b.option( + bool, + "unicode", + "Compile with unicode support (fetches additional dependencies)", + ) orelse false; - const lib = b.addStaticLibrary(.{ - .name = "fuzzig", - .root_source_file = b.path("src/root.zig"), - .target = target, - .optimize = optimize, - }); + const opts = b.addOptions(); + + opts.addOption( + bool, + "unicode", + with_unicode, + ); - lib.root_module.addImport("code_point", zg.module("code_point")); - lib.root_module.addImport("GenCatData", zg.module("GenCatData")); - lib.root_module.addImport("CaseData", zg.module("CaseData")); - lib.root_module.addImport("Normalize", zg.module("Normalize")); - lib.root_module.addImport("CaseFold", zg.module("CaseFold")); + const mod = b.addModule("fuzzig", .{ .root_source_file = b.path("src/root.zig") }); + mod.addOptions("options", opts); - b.installArtifact(lib); + if (with_unicode) { + mod.addImport("code_point", zg.module("code_point")); + mod.addImport("GenCatData", zg.module("GenCatData")); + mod.addImport("CaseData", zg.module("CaseData")); + mod.addImport("Normalize", zg.module("Normalize")); + mod.addImport("CaseFold", zg.module("CaseFold")); + } const lib_unit_tests = b.addTest(.{ .root_source_file = b.path("src/root.zig"), @@ -32,11 +40,15 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); - lib_unit_tests.root_module.addImport("code_point", zg.module("code_point")); - lib_unit_tests.root_module.addImport("GenCatData", zg.module("GenCatData")); - lib_unit_tests.root_module.addImport("CaseData", zg.module("CaseData")); - lib_unit_tests.root_module.addImport("Normalize", zg.module("Normalize")); - lib_unit_tests.root_module.addImport("CaseFold", zg.module("CaseFold")); + lib_unit_tests.root_module.addOptions("options", opts); + + if (with_unicode) { + lib_unit_tests.root_module.addImport("code_point", zg.module("code_point")); + lib_unit_tests.root_module.addImport("GenCatData", zg.module("GenCatData")); + lib_unit_tests.root_module.addImport("CaseData", zg.module("CaseData")); + lib_unit_tests.root_module.addImport("Normalize", zg.module("Normalize")); + lib_unit_tests.root_module.addImport("CaseFold", zg.module("CaseFold")); + } const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); @@ -50,6 +62,8 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); + exe.root_module.addImport("fuzzig", mod); + const run_cmd = b.addRunArtifact(exe); const benchmark_step = b.step("benchmark", "Run benchmarks."); benchmark_step.dependOn(&run_cmd.step); diff --git a/src/main.zig b/src/main.zig index ef3dc0e..dd3c006 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const fuzzy = @import("root.zig"); +const fuzzig = @import("fuzzig"); const bmark = @import("benchmarks.zig"); pub fn main() !void { @@ -8,7 +8,7 @@ pub fn main() !void { const alloc = gpa.allocator(); - var finder = try fuzzy.Ascii.init(alloc, 6000, 500, .{}); + var finder = try fuzzig.Ascii.init(alloc, 6000, 500, .{}); defer finder.deinit(); const scores = try alloc.alloc(i32, LINES.len); @@ -47,7 +47,7 @@ pub fn main() !void { result.printSummary(); } -pub fn runBmark(finder: *fuzzy.Ascii) void { +pub fn runBmark(finder: *fuzzig.Ascii) void { const score = finder.score( "hello world this is a short message about things" ** 100, "short abut thns", diff --git a/src/root.zig b/src/root.zig index 9dc2179..a889ca9 100644 --- a/src/root.zig +++ b/src/root.zig @@ -2,16 +2,20 @@ const std = @import("std"); const utils = @import("utils.zig"); const structures = @import("structures.zig"); -const code_point = @import("code_point"); -const GenCatData = @import("GenCatData"); -const CaseData = @import("CaseData"); -const Normalize = @import("Normalize"); - -const Allocator = std.mem.Allocator; - const CharacterType = utils.CharacterType; const MatrixT = structures.MatrixT; +pub const Unicode = if (@import("options").unicode) + @import("unicode.zig").Unicode +else + @compileError("Not compiled with unicode support"); + +test "other" { + comptime if (@import("options").unicode) { + _ = @import("unicode.zig"); + }; +} + // Matrix Filling: for two sequences a1a2a3..., b1b2b3... we have a reward // function // @@ -86,7 +90,7 @@ pub fn AlgorithmType( const Matrix = MatrixT(ScoreT); const Self = @This(); - fn FunctionTable( + pub fn FunctionTable( comptime Ctx: type, ) type { return struct { @@ -632,191 +636,11 @@ pub const Ascii = struct { } }; -pub const UnicodeToolBox = struct { - gcd: GenCatData, - norm: Normalize, - norm_data: *Normalize.NormData, - cd: CaseData, -}; - -pub const Unicode = struct { - pub const Algorithm = AlgorithmType(u21, i32, .{}); - pub const Scores = ScoresType(i32); - - const FunctionTable: Algorithm.FunctionTable(*Unicode) = .{ - .score = scoreFunc, - .bonus = bonusFunc, - .isEqual = eqlFunc, - }; - - fn eqlFunc(self: *Unicode, h: u21, n: u21) bool { - if (self.unicode_toolbox.gcd.isSeparator(n) and self.opts.wildcard_spaces) { - if (self.unicode_toolbox.gcd.isLetter(h) or - self.unicode_toolbox.gcd.isNumber(h) or - self.unicode_toolbox.gcd.isSymbol(h)) - { - return true; - } else { - return false; - } - } else if (!self.opts.case_sensitive) { - return self.unicode_toolbox.cd.toLower(h) == self.unicode_toolbox.cd.toLower(n); - } else { - return h == n; - } - } - - fn scoreFunc( - a: *Unicode, - scores: Scores, - h: u21, - n: u21, - ) ?i32 { - if (!a.eqlFunc(h, n)) return null; - - if (a.opts.case_penalize and (h != n)) { - return scores.score_match + a.opts.penalty_case_mistmatch; - } - return scores.score_match; - } - - fn bonusFunc( - self: *Unicode, - scores: Scores, - h: u21, - n: u21, - ) i32 { - const p = CharacterType.fromUnicode(h, self.unicode_toolbox); - const c = CharacterType.fromUnicode(n, self.unicode_toolbox); - - return switch (p.roleNextTo(c)) { - .Head => scores.bonus_head, - .Camel => scores.bonus_camel, - .Break => scores.bonus_break, - .Tail => scores.bonus_tail, - }; - } - - fn convertString(self: *const Unicode, string: []const u8) ![]const u21 { - const nfc_result = try self.unicode_toolbox.norm.nfc(self.alg.allocator, string); - defer nfc_result.deinit(); - - var iter = code_point.Iterator{ .bytes = nfc_result.slice }; - - var converted_string = std.ArrayList(u21).init(self.alg.allocator); - defer converted_string.deinit(); - - while (iter.next()) |c| { - try converted_string.append(c.code); - } - return converted_string.toOwnedSlice(); - } - - pub const Options = struct { - case_sensitive: bool = true, - case_penalize: bool = false, - // treat spaces as wildcards for any kind of boundary - // i.e. match with any `[^a-z,A-Z,0-9]` - wildcard_spaces: bool = false, - - penalty_case_mistmatch: i32 = -2, - - char_buffer_size: usize = 8192, - }; - - alg: Algorithm, - opts: Options, - unicode_toolbox: UnicodeToolBox, - - pub fn init( - allocator: std.mem.Allocator, - max_haystack: usize, - max_needle: usize, - opts: Options, - ) !Unicode { - var alg = try Algorithm.init(allocator, max_haystack, max_needle); - errdefer alg.deinit(); - - var gcd = try GenCatData.init(allocator); - errdefer gcd.deinit(); - - var norm_data: *Normalize.NormData = undefined; - norm_data = try allocator.create(Normalize.NormData); - errdefer norm_data.deinit(); // reverse order is needed to proper deinit - errdefer allocator.destroy(norm_data); - try Normalize.NormData.init(norm_data, allocator); - const norm = Normalize{ .norm_data = norm_data }; - - var cd = try CaseData.init(allocator); - errdefer cd.deinit(); - - return .{ - .alg = alg, - .opts = opts, - .unicode_toolbox = .{ - .gcd = gcd, - .norm = norm, - .norm_data = norm_data, - .cd = cd, - }, - }; - } - - pub fn deinit(self: *Unicode) void { - self.unicode_toolbox.gcd.deinit(); - self.unicode_toolbox.norm_data.deinit(); - self.alg.allocator.destroy(self.unicode_toolbox.norm_data); - self.unicode_toolbox.cd.deinit(); - self.alg.deinit(); - } - - pub fn score( - self: *Unicode, - haystack: []const u8, - needle: []const u8, - ) !?i32 { - const haystack_normal = try self.convertString(haystack); - defer self.alg.allocator.free(haystack_normal); - - const needle_normal = try self.convertString(needle); - defer self.alg.allocator.free(needle_normal); - - return self.alg.score( - self, - FunctionTable, - haystack_normal, - needle_normal, - ); - } - - pub fn scoreMatches( - self: *Unicode, - haystack: []const u8, - needle: []const u8, - ) Algorithm.Matches { - const haystack_normal = self.convertString(haystack); - defer self.allocator.free(haystack_normal); - const needle_normal = self.convertString(needle); - defer self.allocator.free(needle_normal); - return self.alg.scoreMatches( - self, - FunctionTable, - haystack_normal, - needle_normal, - ); - } -}; - fn doTestScore(alg: *Ascii, haystack: []const u8, needle: []const u8, comptime score: i32) !void { const s = alg.score(haystack, needle); try std.testing.expectEqual(score, s.?); } -fn doTestScoreUnicode(alg: *Unicode, haystack: []const u8, needle: []const u8, comptime score: ?i32) !void { - const s = try alg.score(haystack, needle); - try std.testing.expectEqual(score, s.?); -} - test "algorithm test" { const o = Ascii.Scores{}; @@ -1010,17 +834,3 @@ test "traceback" { try doTestTraceback(&alg, "A" ++ "a" ** 20 ++ "B", "AB", &.{ 0, 21 }); try doTestTraceback(&alg, "./src/main.zig", "main", &.{ 6, 7, 8, 9 }); } - -test "Unicode search" { - const o = Unicode.Scores{}; - - var alg = try Unicode.init( - std.testing.allocator, - 128, - 32, - .{}, - ); - defer alg.deinit(); - - try doTestScoreUnicode(&alg, "zig⚡ fast", "⚡", o.score_match); -} diff --git a/src/unicode.zig b/src/unicode.zig new file mode 100644 index 0000000..843096f --- /dev/null +++ b/src/unicode.zig @@ -0,0 +1,234 @@ +const std = @import("std"); +const root = @import("root.zig"); +const utils = @import("utils.zig"); +const structures = @import("structures.zig"); + +const code_point = @import("code_point"); +const GenCatData = @import("GenCatData"); +const CaseData = @import("CaseData"); +const Normalize = @import("Normalize"); + +const CharacterType = utils.CharacterType; +const MatrixT = structures.MatrixT; + +const AlgorithmType = root.AlgorithmType; +const ScoresType = root.ScoresType; + +fn charFromUnicode(c: u21, cd: CaseData, gcd: GenCatData) CharacterType { + if (cd.isLower(c)) { + return .Lower; + } else if (cd.isUpper(c)) { + return .Upper; + } else if (gcd.isNumber(c)) { + return .Number; + } else if (switch (c) { + ' ', '\\', '/', '|', '(', ')', '[', ']', '{', '}' => true, + else => false, + }) { + return .HardSeperator; + } else if (gcd.isSeparator(c)) { + return .HardSeperator; + } else if (gcd.isPunctuation(c) or gcd.isSymbol(c) or gcd.isMark(c)) { + return .SoftSeperator; + } else if (gcd.isControl(c)) { + return .Empty; + } else { + return .Lower; // Maybe .Empty instead ? + } +} + +pub const Unicode = struct { + pub const Algorithm = AlgorithmType(u21, i32, .{}); + pub const Scores = ScoresType(i32); + + const FunctionTable: Algorithm.FunctionTable(*Unicode) = .{ + .score = scoreFunc, + .bonus = bonusFunc, + .isEqual = eqlFunc, + }; + + fn eqlFunc(self: *Unicode, h: u21, n: u21) bool { + if (self.gcd.isSeparator(n) and self.opts.wildcard_spaces) { + if (self.gcd.isLetter(h) or + self.gcd.isNumber(h) or + self.gcd.isSymbol(h)) + { + return true; + } else { + return false; + } + } else if (!self.opts.case_sensitive) { + return self.cd.toLower(h) == self.cd.toLower(n); + } else { + return h == n; + } + } + + fn scoreFunc( + a: *Unicode, + scores: Scores, + h: u21, + n: u21, + ) ?i32 { + if (!a.eqlFunc(h, n)) return null; + + if (a.opts.case_penalize and (h != n)) { + return scores.score_match + a.opts.penalty_case_mistmatch; + } + return scores.score_match; + } + + fn bonusFunc( + self: *Unicode, + scores: Scores, + h: u21, + n: u21, + ) i32 { + const p = charFromUnicode(h, self.cd, self.gcd); + const c = charFromUnicode(n, self.cd, self.gcd); + + return switch (p.roleNextTo(c)) { + .Head => scores.bonus_head, + .Camel => scores.bonus_camel, + .Break => scores.bonus_break, + .Tail => scores.bonus_tail, + }; + } + + fn convertString(self: *const Unicode, string: []const u8) ![]const u21 { + const nfc_result = try self.norm.nfc(self.alg.allocator, string); + defer nfc_result.deinit(); + + var iter = code_point.Iterator{ .bytes = nfc_result.slice }; + + var converted_string = std.ArrayList(u21).init(self.alg.allocator); + defer converted_string.deinit(); + + while (iter.next()) |c| { + try converted_string.append(c.code); + } + return converted_string.toOwnedSlice(); + } + + pub const Options = struct { + case_sensitive: bool = true, + case_penalize: bool = false, + // treat spaces as wildcards for any kind of boundary + // i.e. match with any `[^a-z,A-Z,0-9]` + wildcard_spaces: bool = false, + + penalty_case_mistmatch: i32 = -2, + + char_buffer_size: usize = 8192, + }; + + alg: Algorithm, + opts: Options, + // unicode specific things + gcd: GenCatData, + norm: Normalize, + norm_data_ptr: *Normalize.NormData, + cd: CaseData, + + pub fn init( + allocator: std.mem.Allocator, + max_haystack: usize, + max_needle: usize, + opts: Options, + ) !Unicode { + var alg = try Algorithm.init(allocator, max_haystack, max_needle); + errdefer alg.deinit(); + + var gcd = try GenCatData.init(allocator); + errdefer gcd.deinit(); + + var norm: Normalize = undefined; + const norm_data_ptr = try allocator.create(Normalize.NormData); + errdefer allocator.destroy(norm_data_ptr); + + try Normalize.NormData.init(norm_data_ptr, allocator); + errdefer norm_data_ptr.deinit(); + + norm.norm_data = norm_data_ptr; + + var cd = try CaseData.init(allocator); + errdefer cd.deinit(); + + return .{ + .alg = alg, + .opts = opts, + .gcd = gcd, + .norm = norm, + .norm_data_ptr = norm_data_ptr, + .cd = cd, + }; + } + + pub fn deinit(self: *Unicode) void { + self.cd.deinit(); + self.gcd.deinit(); + self.norm_data_ptr.deinit(); + self.alg.allocator.destroy(self.norm_data_ptr); + self.alg.deinit(); + } + + pub fn score( + self: *Unicode, + haystack: []const u8, + needle: []const u8, + ) !?i32 { + const haystack_normal = try self.convertString(haystack); + defer self.alg.allocator.free(haystack_normal); + + const needle_normal = try self.convertString(needle); + defer self.alg.allocator.free(needle_normal); + + return self.alg.score( + self, + FunctionTable, + haystack_normal, + needle_normal, + ); + } + + pub fn scoreMatches( + self: *Unicode, + haystack: []const u8, + needle: []const u8, + ) Algorithm.Matches { + const haystack_normal = self.convertString(haystack); + defer self.allocator.free(haystack_normal); + const needle_normal = self.convertString(needle); + defer self.allocator.free(needle_normal); + return self.alg.scoreMatches( + self, + FunctionTable, + haystack_normal, + needle_normal, + ); + } +}; + +fn doTestScoreUnicode( + alg: *Unicode, + haystack: []const u8, + needle: []const u8, + comptime score: ?i32, +) !void { + const s = try alg.score(haystack, needle); + try std.testing.expectEqual(score, s.?); +} + +test "Unicode search" { + const o = Unicode.Scores{}; + + var alg = try Unicode.init( + std.testing.allocator, + 128, + 32, + .{}, + ); + defer alg.deinit(); + + try doTestScoreUnicode(&alg, "zig⚡ fast", "⚡", o.score_match); +} diff --git a/src/utils.zig b/src/utils.zig index 5122fb8..4d33aee 100644 --- a/src/utils.zig +++ b/src/utils.zig @@ -1,6 +1,6 @@ const std = @import("std"); - -const UnicodeToolBox = @import("root.zig").UnicodeToolBox; +const GenCatData = @import("GenCatData"); +const CaseData = @import("CaseData"); pub fn digitCount(v: anytype) usize { const abs: u32 = @intCast(@abs(v)); @@ -31,29 +31,6 @@ pub const CharacterType = enum { }; } - pub fn fromUnicode(c: u21, unicode_toolbox: UnicodeToolBox) CharacterType { - if (unicode_toolbox.cd.isLower(c)) { - return .Lower; - } else if (unicode_toolbox.cd.isUpper(c)) { - return .Upper; - } else if (unicode_toolbox.gcd.isNumber(c)) { - return .Number; - } else if (switch (c) { - ' ', '\\', '/', '|', '(', ')', '[', ']', '{', '}' => true, - else => false, - }) { - return .HardSeperator; - } else if (unicode_toolbox.gcd.isSeparator(c)) { - return .HardSeperator; - } else if (unicode_toolbox.gcd.isPunctuation(c) or unicode_toolbox.gcd.isSymbol(c) or unicode_toolbox.gcd.isMark(c)) { - return .SoftSeperator; - } else if (unicode_toolbox.gcd.isControl(c)) { - return .Empty; - } else { - return .Lower; // Maybe .Empty instead ? - } - } - const Role = enum { Head, Break, From 967df944157e9ac5d6c1320a76d00a489ece6a36 Mon Sep 17 00:00:00 2001 From: fjebaker <fergusbkr@gmail.com> Date: Mon, 12 Aug 2024 18:13:58 +0100 Subject: [PATCH 22/25] feat: pre-allocated buffer resizing Removed `resizeIfNeeded`, since `realloc` effectively that for us. Exposed the `resize` functions to the public `Ascii` and `Unicode` interfaces to make them available to users. --- src/root.zig | 67 ++++++++++++++++++++++--------------------------- src/unicode.zig | 12 +++++++++ 2 files changed, 42 insertions(+), 37 deletions(-) diff --git a/src/root.zig b/src/root.zig index a889ca9..2a88835 100644 --- a/src/root.zig +++ b/src/root.zig @@ -134,8 +134,6 @@ pub fn AlgorithmType( max_haystack: usize, max_needle: usize, ) !Self { - - // init to a min size, and resize after if needed const rows = max_needle + 1; const cols = max_haystack + 1; @@ -171,48 +169,35 @@ pub fn AlgorithmType( }; } - /// Resize matrixs and buffer if it is needed - pub fn resizeIfNeeded(self: *Self, max_haystack: usize, max_needle: usize) !void { + /// Resize pre-allocated buffers to fit a new maximum haystack and + /// needle size + pub fn resize(self: *Self, max_haystack: usize, max_needle: usize) !void { const new_rows = max_needle + 1; const new_cols = max_haystack + 1; - if (new_rows <= self.m.rows and new_cols <= self.m.cols) { - return; - } - - if (new_rows > self.first_match_buffer.len) { - self.first_match_buffer = try self.allocator.realloc(self.first_match_buffer, new_rows); - } - if (new_cols > self.role_bonus.len) { - self.role_bonus = try self.allocator.realloc(self.role_bonus, new_cols); - self.bonus_buffer = try self.allocator.realloc(self.bonus_buffer, new_cols); - self.traceback_buffer = try self.allocator.realloc(self.traceback_buffer, new_cols); - } - - if (new_rows * new_cols > self.m.matrix.len) { - try self.m.resizeAlloc(new_rows, new_cols); - try self.x.resizeAlloc(new_rows, new_cols); - try self.m_skip.resizeAlloc(new_rows, new_cols); - } - return; - } + self.first_match_buffer = try self.allocator.realloc( + self.first_match_buffer, + new_rows, + ); - /// Force to resize all matrixs and buffer - pub fn resize(self: *Self, max_haystack: usize, max_needle: usize) !void { - const new_rows = max_needle + 1; - const new_cols = max_haystack + 1; + self.role_bonus = try self.allocator.realloc( + self.role_bonus, + new_cols, + ); - self.first_match_buffer = try self.allocator.realloc(self.first_match_buffer, new_rows); + self.bonus_buffer = try self.allocator.realloc( + self.bonus_buffer, + new_cols, + ); - self.role_bonus = try self.allocator.realloc(self.role_bonus, new_cols); - self.bonus_buffer = try self.allocator.realloc(self.bonus_buffer, new_cols); - self.traceback_buffer = try self.allocator.realloc(self.traceback_buffer, new_cols); + self.traceback_buffer = try self.allocator.realloc( + self.traceback_buffer, + new_cols, + ); try self.m.resizeAlloc(new_rows, new_cols); try self.x.resizeAlloc(new_rows, new_cols); try self.m_skip.resizeAlloc(new_rows, new_cols); - - return; } /// Compute matching score @@ -546,9 +531,6 @@ pub const Ascii = struct { .isEqual = eqlFunc, }; - alg: Algorithm, - opts: Options, - fn eqlFunc(self: *Ascii, h: u8, n: u8) bool { if (n == ' ' and self.opts.wildcard_spaces) { return switch (h) { @@ -603,6 +585,9 @@ pub const Ascii = struct { penalty_case_mistmatch: i32 = -2, }; + alg: Algorithm, + opts: Options, + // public interface pub fn init( @@ -619,6 +604,7 @@ pub const Ascii = struct { self.alg.deinit(); } + /// Compute matching score pub fn score( self: *Ascii, haystack: []const u8, @@ -627,6 +613,7 @@ pub const Ascii = struct { return self.alg.score(self, FunctionTable, haystack, needle); } + /// Compute the score and the indices of the matched characters pub fn scoreMatches( self: *Ascii, haystack: []const u8, @@ -634,6 +621,12 @@ pub const Ascii = struct { ) Algorithm.Matches { return self.alg.scoreMatches(self, FunctionTable, haystack, needle); } + + /// Resize pre-allocated buffers to fit a new maximum haystack and + /// needle size + pub fn resize(self: *Ascii, max_haystack: usize, max_needle: usize) !void { + try self.alg.resize(max_haystack, max_needle); + } }; fn doTestScore(alg: *Ascii, haystack: []const u8, needle: []const u8, comptime score: i32) !void { diff --git a/src/unicode.zig b/src/unicode.zig index 843096f..8afe232 100644 --- a/src/unicode.zig +++ b/src/unicode.zig @@ -172,6 +172,8 @@ pub const Unicode = struct { self.alg.deinit(); } + /// Compute matching score. Recasts the `u8` array to `u21` to properly + /// encode unicode characters pub fn score( self: *Unicode, haystack: []const u8, @@ -191,6 +193,8 @@ pub const Unicode = struct { ); } + /// Compute the score and the indices of the matched characters. Recasts + /// the `u8` array to `u21` to properly encode unicode characters pub fn scoreMatches( self: *Unicode, haystack: []const u8, @@ -198,8 +202,10 @@ pub const Unicode = struct { ) Algorithm.Matches { const haystack_normal = self.convertString(haystack); defer self.allocator.free(haystack_normal); + const needle_normal = self.convertString(needle); defer self.allocator.free(needle_normal); + return self.alg.scoreMatches( self, FunctionTable, @@ -207,6 +213,12 @@ pub const Unicode = struct { needle_normal, ); } + + /// Resize pre-allocated buffers to fit a new maximum haystack and + /// needle size + pub fn resize(self: *Unicode, max_haystack: usize, max_needle: usize) !void { + try self.alg.resize(max_haystack, max_needle); + } }; fn doTestScoreUnicode( From 22158379a7c7de7719eb4cf8e72d48c5b95d2964 Mon Sep 17 00:00:00 2001 From: fjebaker <fergusbkr@gmail.com> Date: Mon, 12 Aug 2024 18:22:24 +0100 Subject: [PATCH 23/25] feat: expose maximum haystack / needle query functions --- src/root.zig | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/root.zig b/src/root.zig index 2a88835..2740083 100644 --- a/src/root.zig +++ b/src/root.zig @@ -116,6 +116,9 @@ pub fn AlgorithmType( traceback_buffer: []usize, + max_haystack: usize, + max_needle: usize, + allocator: std.mem.Allocator, pub fn deinit(self: *Self) void { @@ -165,6 +168,10 @@ pub fn AlgorithmType( .bonus_buffer = bonus_buffer, .first_match_buffer = first_match_buffer, .traceback_buffer = traceback_buffer, + + .max_haystack = max_haystack, + .max_needle = max_needle, + .allocator = allocator, }; } @@ -198,6 +205,9 @@ pub fn AlgorithmType( try self.m.resizeAlloc(new_rows, new_cols); try self.x.resizeAlloc(new_rows, new_cols); try self.m_skip.resizeAlloc(new_rows, new_cols); + + self.max_haystack = max_haystack; + self.max_needle = max_needle; } /// Compute matching score @@ -213,6 +223,18 @@ pub fn AlgorithmType( return info.score; } + /// Return the maximum (`haystack.len <= MAXIMUM`) haystack length that + /// the algorithm has allocated memory for. To increase, use `resize` + pub fn maximumHaystackLen(self: *const Self) usize { + return self.max_haystack; + } + + /// Return the maximum (`needle.len <= MAXIMUM`) needle length that + /// the algorithm has allocated memory for. To increase, use `resize` + pub fn maximumNeedleLen(self: *const Self) usize { + return self.max_needle; + } + const ScoreInfo = struct { score: ScoreT, col_max: usize = 0, @@ -230,8 +252,8 @@ pub fn AlgorithmType( .score = 0, }; - std.debug.assert(haystack.len < self.traceback_buffer.len); - std.debug.assert(needle.len < self.first_match_buffer.len); + std.debug.assert(haystack.len <= self.maximumHaystackLen()); + std.debug.assert(needle.len < self.maximumNeedleLen()); const rows = needle.len; const cols = haystack.len; From 3b435d5f98f2a94ad7bf497eeab7de8f0d7b9b7e Mon Sep 17 00:00:00 2001 From: fjebaker <fergusbkr@gmail.com> Date: Mon, 12 Aug 2024 18:29:10 +0100 Subject: [PATCH 24/25] ci: add unicode to ci test --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bed8a2c..57513cf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,5 +21,5 @@ jobs: with: version: 0.13.0 - name: Build and test - run: zig build test + run: zig build test -Dunicode From 30a5f6213350244ee3b7e501c0e7cfcd88c6b358 Mon Sep 17 00:00:00 2001 From: fjebaker <fergusbkr@gmail.com> Date: Mon, 12 Aug 2024 18:30:43 +0100 Subject: [PATCH 25/25] chore: bump version 0.1.0 --- build.zig.zon | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.zig.zon b/build.zig.zon index 24f61a4..5d2fc57 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -1,6 +1,6 @@ .{ .name = "fuzzig", - .version = "0.0.0", + .version = "0.1.0", .dependencies = .{ .zg = .{ .url = "https://codeberg.org/dude_the_builder/zg/archive/v0.13.2.tar.gz",