From 2c80b22fa680c357eaf09a5337641d2fa708f2f0 Mon Sep 17 00:00:00 2001 From: Paul Irwin Date: Sun, 16 Mar 2025 21:13:38 -0600 Subject: [PATCH 1/2] Optimize StemmerUtil for ReadOnlySpan, #1140 --- .../Analysis/Util/StemmerUtil.cs | 90 ++++++++++--------- .../Analysis/Util/TestStemmerUtil.cs | 78 ++++++++++++++++ 2 files changed, 127 insertions(+), 41 deletions(-) create mode 100644 src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestStemmerUtil.cs diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs index f4b6ddce66..bfbb96eab1 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs @@ -1,8 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Diagnostics; -using Lucene.Net.Support; using System; -using System.Diagnostics; namespace Lucene.Net.Analysis.Util { @@ -25,7 +23,7 @@ namespace Lucene.Net.Analysis.Util /// /// Some commonly-used stemming functions - /// + /// /// @lucene.internal /// public static class StemmerUtil // LUCENENET specific: CA1052 Static holder types should be Static or NotInheritable @@ -37,21 +35,34 @@ public static class StemmerUtil // LUCENENET specific: CA1052 Static holder type /// length of input buffer /// Prefix string to test /// true if starts with - public static bool StartsWith(char[] s, int len, string prefix) + /// + /// LUCENENET NOTE: This method has been converted to use . + /// + public static bool StartsWith(ReadOnlySpan s, int len, string prefix) + { + return StartsWith(s, len, prefix.AsSpan()); + } + + /// + /// Returns true if the character array starts with the prefix. + /// + /// Input Buffer + /// length of input buffer + /// Prefix string to test + /// true if starts with + /// + /// LUCENENET NOTE: This method has been converted to use . + /// + public static bool StartsWith(ReadOnlySpan s, int len, ReadOnlySpan prefix) { int prefixLen = prefix.Length; if (prefixLen > len) { return false; } - for (int i = 0; i < prefixLen; i++) - { - if (s[i] != prefix[i]) - { - return false; - } - } - return true; + + // LUCENENET: use more efficient implementation in MemoryExtensions + return s.StartsWith(prefix); } /// @@ -61,22 +72,12 @@ public static bool StartsWith(char[] s, int len, string prefix) /// length of input buffer /// Suffix string to test /// true if ends with - public static bool EndsWith(char[] s, int len, string suffix) + /// + /// LUCENENET NOTE: This method has been converted to use . + /// + public static bool EndsWith(ReadOnlySpan s, int len, string suffix) { - int suffixLen = suffix.Length; - if (suffixLen > len) - { - return false; - } - for (int i = suffixLen - 1; i >= 0; i--) - { - if (s[len - (suffixLen - i)] != suffix[i]) - { - return false; - } - } - - return true; + return EndsWith(s, len, suffix.AsSpan()); } /// @@ -86,24 +87,23 @@ public static bool EndsWith(char[] s, int len, string suffix) /// length of input buffer /// Suffix string to test /// true if ends with - public static bool EndsWith(char[] s, int len, char[] suffix) + /// + /// LUCENENET NOTE: This method has been converted to use . + /// + public static bool EndsWith(ReadOnlySpan s, int len, ReadOnlySpan suffix) { int suffixLen = suffix.Length; if (suffixLen > len) { return false; } - for (int i = suffixLen - 1; i >= 0; i--) - { - if (s[len - (suffixLen - i)] != suffix[i]) - { - return false; - } - } - return true; + // LUCENENET: use more efficient implementation in MemoryExtensions + return s.Slice(0, len).EndsWith(suffix); } + // LUCENENET NOTE: char[] overload of EndsWith removed because the ReadOnlySpan overload can be used instead + /// /// Delete a character in-place /// @@ -111,12 +111,16 @@ public static bool EndsWith(char[] s, int len, char[] suffix) /// Position of character to delete /// length of input buffer /// length of input buffer after deletion - public static int Delete(char[] s, int pos, int len) + /// + /// LUCENENET NOTE: This method has been converted to use . + /// + public static int Delete(Span s, int pos, int len) { if (Debugging.AssertsEnabled) Debugging.Assert(pos < len); if (pos < len - 1) // don't arraycopy if asked to delete last character { - Arrays.Copy(s, pos + 1, s, pos, len - pos - 1); + // Arrays.Copy(s, pos + 1, s, pos, len - pos - 1); + s.Slice(pos + 1, len - pos - 1).CopyTo(s.Slice(pos, len - pos - 1)); } return len - 1; } @@ -129,14 +133,18 @@ public static int Delete(char[] s, int pos, int len) /// Length of input buffer /// number of characters to delete /// length of input buffer after deletion - public static int DeleteN(char[] s, int pos, int len, int nChars) + /// + /// LUCENENET NOTE: This method has been converted to use . + /// + public static int DeleteN(Span s, int pos, int len, int nChars) { if (Debugging.AssertsEnabled) Debugging.Assert(pos + nChars <= len); if (pos + nChars < len) // don't arraycopy if asked to delete the last characters { - Arrays.Copy(s, pos + nChars, s, pos, len - pos - nChars); + // Arrays.Copy(s, pos + nChars, s, pos, len - pos - nChars); + s.Slice(pos + nChars, len - pos - nChars).CopyTo(s.Slice(pos, len - pos - nChars)); } return len - nChars; } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestStemmerUtil.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestStemmerUtil.cs new file mode 100644 index 0000000000..af8b1605a9 --- /dev/null +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestStemmerUtil.cs @@ -0,0 +1,78 @@ +using Lucene.Net.Attributes; +using Lucene.Net.Util; +using NUnit.Framework; +using System; +using Assert = Lucene.Net.TestFramework.Assert; + +namespace Lucene.Net.Analysis.Util +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// + /// Tests for + /// + [TestFixture] + [LuceneNetSpecific] + public class TestStemmerUtil : LuceneTestCase + { + [Test] + [TestCase("foobar", 6, "foo", true)] + [TestCase("foobar", 3, "foo", true)] + [TestCase("foobar", 6, "bar", false)] + [TestCase("foobar", 2, "foo", false)] + public void TestStartsWith(string input, int len, string prefix, bool expected) + { + Assert.AreEqual(expected, StemmerUtil.StartsWith(input.AsSpan(), len, prefix)); + } + + [Test] + [TestCase("foobar", 6, "bar", true)] + [TestCase("foobar", 3, "bar", false)] + [TestCase("foobar", 6, "foo", false)] + [TestCase("foobar", 2, "bar", false)] + [TestCase("foobar", 3, "foo", true)] + public void TestEndsWith(string input, int len, string prefix, bool expected) + { + Assert.AreEqual(expected, StemmerUtil.EndsWith(input.AsSpan(), len, prefix)); + } + + [Test] + [TestCase("foobar", 3, 6, "fooar", 5)] + [TestCase("foobar", 0, 6, "oobar", 5)] + [TestCase("foobar", 0, 3, "oo", 2)] + [TestCase("foobar", 5, 6, "fooba", 5)] + public void TestDelete(string input, int pos, int len, string expected, int expectedLen) + { + char[] buffer = input.ToCharArray(); + Assert.AreEqual(expectedLen, StemmerUtil.Delete(buffer, pos, len)); + Assert.AreEqual(expected, new string(buffer, 0, expectedLen)); + } + + [Test] + [TestCase("foobar", 3, 6, 2, "foor", 4)] + [TestCase("foobar", 0, 6, 2, "obar", 4)] + [TestCase("foobar", 0, 3, 2, "o", 1)] + [TestCase("foobar", 4, 6, 2, "foob", 4)] + public void TestDeleteN(string input, int pos, int len, int nChars, string expected, int expectedLen) + { + char[] buffer = input.ToCharArray(); + Assert.AreEqual(expectedLen, StemmerUtil.DeleteN(buffer, pos, len, nChars)); + Assert.AreEqual(expected, new string(buffer, 0, expectedLen)); + } + } +} From d12d85269af0717cd332e3b1e1ed1a109823df70 Mon Sep 17 00:00:00 2001 From: Paul Irwin Date: Sun, 16 Mar 2025 21:53:09 -0600 Subject: [PATCH 2/2] Use StringComparison.Ordinal --- src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs index bfbb96eab1..fc7ff0d760 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs @@ -62,7 +62,7 @@ public static bool StartsWith(ReadOnlySpan s, int len, ReadOnlySpan } // LUCENENET: use more efficient implementation in MemoryExtensions - return s.StartsWith(prefix); + return s.StartsWith(prefix, StringComparison.Ordinal); } /// @@ -99,7 +99,7 @@ public static bool EndsWith(ReadOnlySpan s, int len, ReadOnlySpan su } // LUCENENET: use more efficient implementation in MemoryExtensions - return s.Slice(0, len).EndsWith(suffix); + return s.Slice(0, len).EndsWith(suffix, StringComparison.Ordinal); } // LUCENENET NOTE: char[] overload of EndsWith removed because the ReadOnlySpan overload can be used instead