From 1626c91bc9f58499b2c46c7b932fa65794ec3a09 Mon Sep 17 00:00:00 2001 From: Albert Meltzer <7529386+kitbellew@users.noreply.github.com> Date: Thu, 20 Nov 2025 10:29:34 -0800 Subject: [PATCH] RE2: avoid an extra copy of the groups array Save it directly into the provided array if available. --- java/com/google/re2j/Machine.java | 7 ++----- java/com/google/re2j/RE2.java | 23 +++++++++++------------ 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/java/com/google/re2j/Machine.java b/java/com/google/re2j/Machine.java index ba785917..9819fc07 100644 --- a/java/com/google/re2j/Machine.java +++ b/java/com/google/re2j/Machine.java @@ -156,11 +156,8 @@ private void initNewCap(int ncap) { this.matchcap = new int[ncap]; } - int[] submatches() { - if (ncap == 0) { - return Utils.EMPTY_INTS; - } - return Arrays.copyOf(matchcap, ncap); + void submatches(int[] cap) { + System.arraycopy(matchcap, 0, cap, 0, Math.min(cap.length, ncap)); } // alloc() allocates a new thread with the given instruction. diff --git a/java/com/google/re2j/RE2.java b/java/com/google/re2j/RE2.java index 46837354..2349d8b6 100644 --- a/java/com/google/re2j/RE2.java +++ b/java/com/google/re2j/RE2.java @@ -292,6 +292,11 @@ public String toString() { // the position of its subexpressions. // Derived from exec.go. private int[] doExecute(MachineInput in, int pos, int anchor, int ncap) { + int[] cap = ncap == 0 ? Utils.EMPTY_INTS : new int[ncap]; + return doExecute(in, pos, anchor, ncap, cap) ? cap : null; + } + + private boolean doExecute(MachineInput in, int pos, int anchor, int ncap, int[] cap) { Machine m = get(); // The Treiber stack cannot reuse nodes, unless the node to be reused has only ever been at // the bottom of the stack (i.e., next == null). @@ -305,9 +310,12 @@ private int[] doExecute(MachineInput in, int pos, int anchor, int ncap) { } m.init(ncap); - int[] cap = m.match(in, pos, anchor) ? m.submatches() : null; + boolean ok = m.match(in, pos, anchor); + if (ok && cap != null) { + m.submatches(cap); + } put(m, isNew); - return cap; + return ok; } /** @@ -350,16 +358,7 @@ boolean match(MatcherInput input, int start, int end, int anchor, int[] group, i input.getEncoding() == Encoding.UTF_16 ? MachineInput.fromUTF16(input.asCharSequence(), 0, end) : MachineInput.fromUTF8(input.asBytes(), 0, end); - int[] groupMatch = doExecute(machineInput, start, anchor, 2 * ngroup); - - if (groupMatch == null) { - return false; - } - - if (group != null) { - System.arraycopy(groupMatch, 0, group, 0, groupMatch.length); - } - return true; + return doExecute(machineInput, start, anchor, 2 * ngroup, group); } /**