From 4694e994775b8ae59cdc945e9d9ff0426c076c8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Pol=C3=A1=C5=A1ek?= Date: Thu, 29 Jan 2026 04:23:12 +0100 Subject: [PATCH] CmdPal: Upgrade FuzzyStringMatcher in the Command Palette Extensions SDK (#44809) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary of the Pull Request This PR upgrades the `FuzzyStringMatcher` used in the Command Palette Extensions SDK with a focus on performance, memory efficiency, and improved matching behavior, while preserving compatibility with the existing API. This PR is a backwards compatible alternative to precomputed fuzzy matcher introduces in another PR. The new implementation is designed as a drop-in replacement. Any behavioral differences are intentional and primarily related to improved diacritic handling, scoring consistency, and correctness of highlight positions. Changes: - Keeps the existing public API intact and preserves behavior in nearly all cases. - Enables diacritics-insensitive matching by default, improving results across accented and non-English languages. - Significantly improves performance, with measured speedups in the range of ~5–20 times, depending on scenario and input size. - Reduces heap allocations to near zero by using stack allocation and pooled buffers instead of large per-match DP arrays. - Simplifies and optimizes matching logic: - Folds the haystack only once per match. - Uses rolling DP buffers instead of `O(query × target)` tables. - Replaces large match tables with a compact bitset when tracking highlight positions. - Improves consistency and correctness: - Normalizes path separators (`\` → `/`) during folding. - Avoids returning highlight positions for PinYin-only matches where no 1:1 mapping exists. - Introduces unit tests, including comparison tests against the legacy implementation to validate compatibility. ## PR Checklist - [x] Closes: #44066 - [ ] **Communication:** I've discussed this with core contributors already. If the work hasn't been agreed, this work might be rejected - [ ] **Tests:** Added/updated and all pass - [ ] **Localization:** All end-user-facing strings can be localized - [ ] **Dev docs:** Added/updated - [ ] **New binaries:** Added on the required places - [ ] [JSON for signing](https://github.com/microsoft/PowerToys/blob/main/.pipelines/ESRPSigning_core.json) for new binaries - [ ] [WXS for installer](https://github.com/microsoft/PowerToys/blob/main/installer/PowerToysSetup/Product.wxs) for new binaries and localization folder - [ ] [YML for CI pipeline](https://github.com/microsoft/PowerToys/blob/main/.pipelines/ci/templates/build-powertoys-steps.yml) for new test projects - [ ] [YML for signed pipeline](https://github.com/microsoft/PowerToys/blob/main/.pipelines/release.yml) - [ ] **Documentation updated:** If checked, please file a pull request on [our docs repo](https://github.com/MicrosoftDocs/windows-uwp/tree/docs/hub/powertoys) and link it here: #xxx ## Detailed Description of the Pull Request / Additional comments ## Validation Steps Performed --- .github/actions/spell-check/excludes.txt | 2 + PowerToys.slnx | 4 + src/modules/cmdpal/CommandPalette.slnf | 1 + .../FuzzyMatcherComparisonTests.cs | 235 ++++ .../FuzzyMatcherDiacriticsTests.cs | 85 ++ .../FuzzyMatcherPinyinLogicTests.cs | 46 + .../FuzzyMatcherValidationTests.cs | 43 + .../Legacy/LegacyFuzzyStringMatcher.cs | 225 ++++ ...alette.Extensions.Toolkit.UnitTests.csproj | 30 + .../FuzzyStringMatcher.cs | 1138 +++++++++++++++-- ...t.CommandPalette.Extensions.Toolkit.csproj | 6 +- 11 files changed, 1677 insertions(+), 138 deletions(-) create mode 100644 src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherComparisonTests.cs create mode 100644 src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherDiacriticsTests.cs create mode 100644 src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherPinyinLogicTests.cs create mode 100644 src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherValidationTests.cs create mode 100644 src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/Legacy/LegacyFuzzyStringMatcher.cs create mode 100644 src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests.csproj diff --git a/.github/actions/spell-check/excludes.txt b/.github/actions/spell-check/excludes.txt index 3ffc4199f3..b7f4d46897 100644 --- a/.github/actions/spell-check/excludes.txt +++ b/.github/actions/spell-check/excludes.txt @@ -104,6 +104,8 @@ ^src/common/ManagedCommon/ColorFormatHelper\.cs$ ^src/common/notifications/BackgroundActivatorDLL/cpp\.hint$ ^src/common/sysinternals/Eula/ +^src/modules/cmdpal/Tests/Microsoft\.CommandPalette\.Extensions\.Toolkit\.UnitTests/FuzzyMatcherComparisonTests.cs$ +^src/modules/cmdpal/Tests/Microsoft\.CommandPalette\.Extensions\.Toolkit\.UnitTests/FuzzyMatcherDiacriticsTests.cs$ ^src/modules/cmdpal/doc/initial-sdk-spec/list-elements-mock-002\.pdn$ ^src/modules/cmdpal/ext/SamplePagesExtension/Pages/SampleMarkdownImagesPage\.cs$ ^src/modules/cmdpal/Microsoft\.CmdPal\.UI/Settings/InternalPage\.SampleData\.cs$ diff --git a/PowerToys.slnx b/PowerToys.slnx index 8a166bb32e..1dc26be394 100644 --- a/PowerToys.slnx +++ b/PowerToys.slnx @@ -360,6 +360,10 @@ + + + + diff --git a/src/modules/cmdpal/CommandPalette.slnf b/src/modules/cmdpal/CommandPalette.slnf index 6575a60790..c6ccbb7338 100644 --- a/src/modules/cmdpal/CommandPalette.slnf +++ b/src/modules/cmdpal/CommandPalette.slnf @@ -30,6 +30,7 @@ "src\\modules\\cmdpal\\Tests\\Microsoft.CmdPal.Ext.WindowWalker.UnitTests\\Microsoft.CmdPal.Ext.WindowWalker.UnitTests.csproj", "src\\modules\\cmdpal\\Tests\\Microsoft.CmdPal.UI.ViewModels.UnitTests\\Microsoft.CmdPal.UI.ViewModels.UnitTests.csproj", "src\\modules\\cmdpal\\Tests\\Microsoft.CmdPal.UITests\\Microsoft.CmdPal.UITests.csproj", + "src\\modules\\cmdpal\\Tests\\Microsoft.CommandPalette.Extensions.Toolkit.UnitTests\\Microsoft.CommandPalette.Extensions.Toolkit.UnitTests.csproj", "src\\modules\\cmdpal\\ext\\Microsoft.CmdPal.Ext.Apps\\Microsoft.CmdPal.Ext.Apps.csproj", "src\\modules\\cmdpal\\ext\\Microsoft.CmdPal.Ext.Bookmark\\Microsoft.CmdPal.Ext.Bookmarks.csproj", "src\\modules\\cmdpal\\ext\\Microsoft.CmdPal.Ext.Calc\\Microsoft.CmdPal.Ext.Calc.csproj", diff --git a/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherComparisonTests.cs b/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherComparisonTests.cs new file mode 100644 index 0000000000..11c3113dac --- /dev/null +++ b/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherComparisonTests.cs @@ -0,0 +1,235 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using Microsoft.CommandPalette.Extensions.Toolkit.UnitTests.Legacy; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.CommandPalette.Extensions.Toolkit.UnitTests; + +[TestClass] +public class FuzzyMatcherComparisonTests +{ + public static IEnumerable TestData => + [ + ["a", "a"], + ["a", "A"], + ["A", "a"], + ["abc", "abc"], + ["abc", "axbycz"], + ["abc", "abxcyz"], + ["sln", "solution.sln"], + ["vs", "visualstudio"], + ["test", "Test"], + ["pt", "PowerToys"], + ["p/t", "power\\toys"], + ["p\\t", "power/toys"], + ["c/w", "c:\\windows"], + ["foo", "bar"], + ["verylongstringthatdoesnotmatch", "short"], + [string.Empty, "anything"], + ["something", string.Empty], + ["git", "git"], + ["em", "Emmy"], + ["my", "Emmy"], + ["word", "word"], + ["wd", "word"], + ["w d", "word"], + ["a", "ba"], + ["a", "ab"], + ["a", "bab"], + ["z", "abcdefg"], + ["CC", "CamelCase"], + ["cc", "camelCase"], + ["cC", "camelCase"], + ["some", "awesome"], + ["some", "somewhere"], + ["1", "1"], + ["1", "2"], + [".", "."], + ["f.t", "file.txt"], + ["excel", "Excel"], + ["Excel", "excel"], + ["PowerPoint", "Power Point"], + ["power point", "PowerPoint"], + ["visual studio code", "Visual Studio Code"], + ["vsc", "Visual Studio Code"], + ["code", "Visual Studio Code"], + ["vs code", "Visual Studio Code"], + ["word", "Microsoft Word"], + ["ms word", "Microsoft Word"], + ["browser", "Internet Explorer"], + ["chrome", "Google Chrome"], + ["edge", "Microsoft Edge"], + ["term", "Windows Terminal"], + ["cmd", "Command Prompt"], + ["calc", "Calculator"], + ["snipping", "Snipping Tool"], + ["note", "Notepad"], + ["file expl", "File Explorer"], + ["settings", "Settings"], + ["p t", "PowerToys"], + ["p t", "PowerToys"], + [" v ", " Visual Studio "], + [" a b ", " a b c d "], + [string.Empty, string.Empty], + [" ", " "], + [" ", " "], + [" ", "abc"], + ["abc", " "], + [" ", " "], + [" ", " a b "], + ["sh", "ShangHai"], + ["bj", "BeiJing"], + ["bj", "北京"], + ["sh", "上海"], + ["nh", "你好"], + ["bj", "Beijing"], + ["hello", "你好"], + ["nihao", "你好"], + ["rmb", "人民币"], + ["zwr", "中文"], + ["zw", "中文"], + ["fbr", "foobar"], + ["w11", "windows 11"], + ["pwr", "powershell"], + ["vm", "void main"], + ["ps", "PowerShell"], + ["az", "Azure"], + ["od", "onedrive"], + ["gc", "google chrome"], + ["ff", "firefox"], + ["fs", "file_system"], + ["pt", "power-toys"], + ["jt", "json.test"], + ["ps", "power shell"], + ["ps", "power'shell"], + ["ps", "power\"shell"], + ["hw", "hello:world"], + ["abc", "a_b_c"], + ["abc", "a-b-c"], + ["abc", "a.b.c"], + ["abc", "a b c"], + ["abc", "a'b'c"], + ["abc", "a\"b\"c"], + ["abc", "a:b:c"], + ["_a", "_a"], + ["a_", "a_"], + ["-a", "-a"], + ["a-", "a-"] + ]; + + [TestMethod] + [DynamicData(nameof(TestData))] + public void CompareScores(string needle, string haystack) + { + var legacyScore = LegacyFuzzyStringMatcher.ScoreFuzzy(needle, haystack); + var newScore = FuzzyStringMatcher.ScoreFuzzy(needle, haystack); + + Assert.AreEqual(legacyScore, newScore, $"Score mismatch for needle='{needle}', haystack='{haystack}'"); + } + + [TestMethod] + [DynamicData(nameof(TestData))] + public void ComparePositions(string needle, string haystack) + { + var (legacyScore, legacyPos) = LegacyFuzzyStringMatcher.ScoreFuzzyWithPositions(needle, haystack, true); + var (newScore, newPos) = FuzzyStringMatcher.ScoreFuzzyWithPositions(needle, haystack, true); + + Assert.AreEqual(legacyScore, newScore, $"Score mismatch (with pos) for needle='{needle}', haystack='{haystack}'"); + + // Ensure lists are not null + legacyPos ??= []; + newPos ??= []; + + // Compare list contents + var legacyPosStr = string.Join(',', legacyPos); + var newPosStr = string.Join(',', newPos); + + Assert.AreEqual(legacyPos.Count, newPos.Count, $"Position count mismatch: Legacy=[{legacyPosStr}], New=[{newPosStr}]"); + + for (var i = 0; i < legacyPos.Count; i++) + { + Assert.AreEqual(legacyPos[i], newPos[i], $"Position mismatch at index {i}: Legacy=[{legacyPosStr}], New=[{newPosStr}]"); + } + } + + [TestMethod] + [DynamicData(nameof(TestData))] + public void CompareScores_ContiguousOnly(string needle, string haystack) + { + var legacyScore = LegacyFuzzyStringMatcher.ScoreFuzzy(needle, haystack, allowNonContiguousMatches: false); + var newScore = FuzzyStringMatcher.ScoreFuzzy(needle, haystack, allowNonContiguousMatches: false); + + Assert.AreEqual(legacyScore, newScore, $"Score mismatch (contiguous only) for needle='{needle}', haystack='{haystack}'"); + } + + [TestMethod] + [DynamicData(nameof(TestData))] + public void CompareScores_PinyinEnabled(string needle, string haystack) + { + var originalNew = FuzzyStringMatcher.ChinesePinYinSupport; + var originalLegacy = LegacyFuzzyStringMatcher.ChinesePinYinSupport; + try + { + FuzzyStringMatcher.ChinesePinYinSupport = true; + LegacyFuzzyStringMatcher.ChinesePinYinSupport = true; + + var legacyScore = LegacyFuzzyStringMatcher.ScoreFuzzy(needle, haystack); + var newScore = FuzzyStringMatcher.ScoreFuzzy(needle, haystack); + + Assert.AreEqual(legacyScore, newScore, $"Score mismatch (Pinyin enabled) for needle='{needle}', haystack='{haystack}'"); + } + finally + { + FuzzyStringMatcher.ChinesePinYinSupport = originalNew; + LegacyFuzzyStringMatcher.ChinesePinYinSupport = originalLegacy; + } + } + + [TestMethod] + [DynamicData(nameof(TestData))] + public void ComparePositions_PinyinEnabled(string needle, string haystack) + { + var originalNew = FuzzyStringMatcher.ChinesePinYinSupport; + var originalLegacy = LegacyFuzzyStringMatcher.ChinesePinYinSupport; + try + { + FuzzyStringMatcher.ChinesePinYinSupport = true; + LegacyFuzzyStringMatcher.ChinesePinYinSupport = true; + + var (legacyScore, legacyPos) = LegacyFuzzyStringMatcher.ScoreFuzzyWithPositions(needle, haystack, true); + var (newScore, newPos) = FuzzyStringMatcher.ScoreFuzzyWithPositions(needle, haystack, true); + + Assert.AreEqual(legacyScore, newScore, $"Score mismatch (with pos, Pinyin enabled) for needle='{needle}', haystack='{haystack}'"); + + // Ensure lists are not null + legacyPos ??= []; + newPos ??= []; + + // If newPos is empty but newScore > 0, it means it's a secondary match (like Pinyin) + // which we don't return positions for in the new matcher. + if (newScore > 0 && newPos.Count == 0 && legacyPos.Count > 0) + { + return; + } + + // Compare list contents + var legacyPosStr = string.Join(',', legacyPos); + var newPosStr = string.Join(',', newPos); + + Assert.AreEqual(legacyPos.Count, newPos.Count, $"Position count mismatch: Legacy=[{legacyPosStr}], New=[{newPosStr}]"); + + for (var i = 0; i < legacyPos.Count; i++) + { + Assert.AreEqual(legacyPos[i], newPos[i], $"Position mismatch at index {i}: Legacy=[{legacyPosStr}], New=[{newPosStr}]"); + } + } + finally + { + FuzzyStringMatcher.ChinesePinYinSupport = originalNew; + LegacyFuzzyStringMatcher.ChinesePinYinSupport = originalLegacy; + } + } +} diff --git a/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherDiacriticsTests.cs b/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherDiacriticsTests.cs new file mode 100644 index 0000000000..d4b6b8614f --- /dev/null +++ b/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherDiacriticsTests.cs @@ -0,0 +1,85 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.CommandPalette.Extensions.Toolkit.UnitTests; + +[TestClass] +public class FuzzyMatcherDiacriticsTests +{ + [TestMethod] + public void ScoreFuzzy_WithDiacriticsRemoval_MatchesWithDiacritics() + { + // "eco" should match "école" when diacritics are removed (é -> E) + var score = FuzzyStringMatcher.ScoreFuzzy("eco", "école", allowNonContiguousMatches: true, removeDiacritics: true); + Assert.IsTrue(score > 0, "Should match 'école' with 'eco' when diacritics are removed"); + + // "uber" should match "über" + score = FuzzyStringMatcher.ScoreFuzzy("uber", "über", allowNonContiguousMatches: true, removeDiacritics: true); + Assert.IsTrue(score > 0, "Should match 'über' with 'uber' when diacritics are removed"); + } + + [TestMethod] + public void ScoreFuzzy_WithoutDiacriticsRemoval_DoesNotMatchWhenCharactersDiffer() + { + // "eco" should NOT match "école" if 'é' is treated as distinct from 'e' and order is strict + // 'é' (index 0) != 'e'. 'e' (index 4) is after 'c' (index 1) and 'o' (index 2). + // Since needle is "e-c-o", to match "école": + // 'e' matches 'e' at 4. + // 'c' must show up after. No. + // So no match. + var score = FuzzyStringMatcher.ScoreFuzzy("eco", "école", allowNonContiguousMatches: true, removeDiacritics: false); + Assert.AreEqual(0, score, "Should not match 'école' with 'eco' when diacritics are NOT removed"); + + // "uber" vs "über" + // u != ü. + // b (index 1) match b (index 2). e (2) match e (3). r (3) match r (4). + // but 'u' has no match. + score = FuzzyStringMatcher.ScoreFuzzy("uber", "über", allowNonContiguousMatches: true, removeDiacritics: false); + Assert.AreEqual(0, score, "Should not match 'über' with 'uber' when diacritics are NOT removed"); + } + + [TestMethod] + public void ScoreFuzzy_DefaultRemovesDiacritics() + { + // Now default is true, so "eco" vs "école" should match + var score = FuzzyStringMatcher.ScoreFuzzy("eco", "école"); + Assert.IsTrue(score > 0, "Default should remove diacritics and match 'école'"); + } + + [DataTestMethod] + [DataRow("a", "à", true)] + [DataRow("e", "é", true)] + [DataRow("i", "ï", true)] + [DataRow("o", "ô", true)] + [DataRow("u", "ü", true)] + [DataRow("c", "ç", true)] + [DataRow("n", "ñ", true)] + [DataRow("s", "ß", false)] // ß doesn't strip to s via simple invalid-uppercasing + public void VerifySpecificCharacters(string needle, string haystack, bool expectingMatch) + { + var score = FuzzyStringMatcher.ScoreFuzzy(needle, haystack, allowNonContiguousMatches: true, removeDiacritics: true); + if (expectingMatch) + { + Assert.IsTrue(score > 0, $"Expected match for '{needle}' in '{haystack}' with diacritics removal"); + } + else + { + Assert.AreEqual(0, score, $"Expected NO match for '{needle}' in '{haystack}' even with diacritics removal"); + } + } + + [TestMethod] + public void VerifyBothPathsWorkSameForASCII() + { + var needle = "test"; + var haystack = "TestString"; + + var score1 = FuzzyStringMatcher.ScoreFuzzy(needle, haystack, allowNonContiguousMatches: true, removeDiacritics: true); + var score2 = FuzzyStringMatcher.ScoreFuzzy(needle, haystack, allowNonContiguousMatches: true, removeDiacritics: false); + + Assert.AreEqual(score1, score2, "Scores should be identical for ASCII strings regardless of diacritics setting"); + } +} diff --git a/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherPinyinLogicTests.cs b/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherPinyinLogicTests.cs new file mode 100644 index 0000000000..8898fe5035 --- /dev/null +++ b/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherPinyinLogicTests.cs @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.CommandPalette.Extensions.Toolkit.UnitTests; + +[TestClass] +public class FuzzyMatcherPinyinLogicTests +{ + [TestInitialize] + public void Setup() + { + FuzzyStringMatcher.ChinesePinYinSupport = true; + FuzzyStringMatcher.ClearCache(); + } + + [TestCleanup] + public void Cleanup() + { + FuzzyStringMatcher.ChinesePinYinSupport = false; // Reset to default state + FuzzyStringMatcher.ClearCache(); + } + + [DataTestMethod] + [DataRow("bj", "北京")] + [DataRow("sh", "上海")] + [DataRow("nihao", "你好")] + [DataRow("北京", "北京")] + [DataRow("北京", "Beijing")] + [DataRow("北", "北京")] + [DataRow("你好", "nihao")] + public void PinyinMatch_DataDriven(string needle, string haystack) + { + Assert.IsTrue(FuzzyStringMatcher.ScoreFuzzy(needle, haystack) > 0, $"Expected match for '{needle}' in '{haystack}'"); + } + + [TestMethod] + public void PinyinPositions_ShouldBeEmpty() + { + var (score, positions) = FuzzyStringMatcher.ScoreFuzzyWithPositions("bj", "北京", true); + Assert.IsTrue(score > 0); + Assert.AreEqual(0, positions.Count); + } +} diff --git a/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherValidationTests.cs b/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherValidationTests.cs new file mode 100644 index 0000000000..a03c2ccbb6 --- /dev/null +++ b/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/FuzzyMatcherValidationTests.cs @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.CommandPalette.Extensions.Toolkit.UnitTests; + +[TestClass] +public class FuzzyMatcherValidationTests +{ + [DataTestMethod] + [DataRow(null, "haystack")] + [DataRow("", "haystack")] + [DataRow("needle", null)] + [DataRow("needle", "")] + [DataRow(null, null)] + public void ScoreFuzzy_HandlesIncorrectInputs(string needle, string haystack) + { + Assert.AreEqual(0, FuzzyStringMatcher.ScoreFuzzy(needle!, haystack!)); + Assert.AreEqual(0, FuzzyStringMatcher.ScoreFuzzy(needle!, haystack!, allowNonContiguousMatches: true, removeDiacritics: true)); + Assert.AreEqual(0, FuzzyStringMatcher.ScoreFuzzy(needle!, haystack!, allowNonContiguousMatches: false, removeDiacritics: false)); + } + + [DataTestMethod] + [DataRow(null, "haystack")] + [DataRow("", "haystack")] + [DataRow("needle", null)] + [DataRow("needle", "")] + [DataRow(null, null)] + public void ScoreFuzzyWithPositions_HandlesIncorrectInputs(string needle, string haystack) + { + var (score1, pos1) = FuzzyStringMatcher.ScoreFuzzyWithPositions(needle!, haystack!, true); + Assert.AreEqual(0, score1); + Assert.IsNotNull(pos1); + Assert.AreEqual(0, pos1.Count); + + var (score2, pos2) = FuzzyStringMatcher.ScoreFuzzyWithPositions(needle!, haystack!, allowNonContiguousMatches: true, removeDiacritics: true); + Assert.AreEqual(0, score2); + Assert.IsNotNull(pos2); + Assert.AreEqual(0, pos2.Count); + } +} diff --git a/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/Legacy/LegacyFuzzyStringMatcher.cs b/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/Legacy/LegacyFuzzyStringMatcher.cs new file mode 100644 index 0000000000..9cb2f4556d --- /dev/null +++ b/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/Legacy/LegacyFuzzyStringMatcher.cs @@ -0,0 +1,225 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using ToolGood.Words.Pinyin; + +namespace Microsoft.CommandPalette.Extensions.Toolkit.UnitTests.Legacy; + +// Inspired by the fuzzy.rs from edit.exe +public static class LegacyFuzzyStringMatcher +{ + private const int NOMATCH = 0; + + /// + /// Gets or sets a value indicating whether to support Chinese PinYin. + /// Automatically enabled when the system UI culture is Simplified Chinese. + /// + public static bool ChinesePinYinSupport { get; set; } = IsSimplifiedChinese(); + + private static bool IsSimplifiedChinese() + { + var culture = CultureInfo.CurrentUICulture; + + // Detect Simplified Chinese: zh-CN, zh-Hans, zh-Hans-* + return culture.Name.StartsWith("zh-CN", StringComparison.OrdinalIgnoreCase) + || culture.Name.StartsWith("zh-Hans", StringComparison.OrdinalIgnoreCase); + } + + public static int ScoreFuzzy(string needle, string haystack, bool allowNonContiguousMatches = true) + { + var (s, _) = ScoreFuzzyWithPositions(needle, haystack, allowNonContiguousMatches); + return s; + } + + public static (int Score, List Positions) ScoreFuzzyWithPositions(string needle, string haystack, bool allowNonContiguousMatches) + => ScoreAllFuzzyWithPositions(needle, haystack, allowNonContiguousMatches).MaxBy(i => i.Score); + + public static IEnumerable<(int Score, List Positions)> ScoreAllFuzzyWithPositions(string needle, string haystack, bool allowNonContiguousMatches) + { + List needles = [needle]; + List haystacks = [haystack]; + + if (ChinesePinYinSupport) + { + // Remove IME composition split characters. + var input = needle.Replace("'", string.Empty); + needles.Add(WordsHelper.GetPinyin(input)); + if (WordsHelper.HasChinese(haystack)) + { + haystacks.Add(WordsHelper.GetPinyin(haystack)); + } + } + + return needles.SelectMany(i => haystacks.Select(j => ScoreFuzzyWithPositionsInternal(i, j, allowNonContiguousMatches))); + } + + private static (int Score, List Positions) ScoreFuzzyWithPositionsInternal(string needle, string haystack, bool allowNonContiguousMatches) + { + if (string.IsNullOrEmpty(haystack) || string.IsNullOrEmpty(needle)) + { + return (NOMATCH, new List()); + } + + var target = haystack.ToCharArray(); + var query = needle.ToCharArray(); + + if (target.Length < query.Length) + { + return (NOMATCH, new List()); + } + + var targetUpper = FoldCase(haystack); + var queryUpper = FoldCase(needle); + var targetUpperChars = targetUpper.ToCharArray(); + var queryUpperChars = queryUpper.ToCharArray(); + + var area = query.Length * target.Length; + var scores = new int[area]; + var matches = new int[area]; + + for (var qi = 0; qi < query.Length; qi++) + { + var qiOffset = qi * target.Length; + var qiPrevOffset = qi > 0 ? (qi - 1) * target.Length : 0; + + for (var ti = 0; ti < target.Length; ti++) + { + var currentIndex = qiOffset + ti; + var diagIndex = (qi > 0 && ti > 0) ? qiPrevOffset + ti - 1 : 0; + var leftScore = ti > 0 ? scores[currentIndex - 1] : 0; + var diagScore = (qi > 0 && ti > 0) ? scores[diagIndex] : 0; + var matchSeqLen = (qi > 0 && ti > 0) ? matches[diagIndex] : 0; + + var score = (diagScore == 0 && qi != 0) ? 0 : + ComputeCharScore( + query[qi], + queryUpperChars[qi], + ti != 0 ? target[ti - 1] : null, + target[ti], + targetUpperChars[ti], + matchSeqLen); + + var isValidScore = score != 0 && diagScore + score >= leftScore && + (allowNonContiguousMatches || qi > 0 || + targetUpperChars.Skip(ti).Take(queryUpperChars.Length).SequenceEqual(queryUpperChars)); + + if (isValidScore) + { + matches[currentIndex] = matchSeqLen + 1; + scores[currentIndex] = diagScore + score; + } + else + { + matches[currentIndex] = NOMATCH; + scores[currentIndex] = leftScore; + } + } + } + + var positions = new List(); + if (query.Length > 0 && target.Length > 0) + { + var qi = query.Length - 1; + var ti = target.Length - 1; + + while (true) + { + var index = (qi * target.Length) + ti; + if (matches[index] == NOMATCH) + { + if (ti == 0) + { + break; + } + + ti--; + } + else + { + positions.Add(ti); + if (qi == 0 || ti == 0) + { + break; + } + + qi--; + ti--; + } + } + + positions.Reverse(); + } + + return (scores[area - 1], positions); + } + + private static string FoldCase(string input) + { + return input.ToUpperInvariant(); + } + + private static int ComputeCharScore( + char query, + char queryLower, + char? targetPrev, + char targetCurr, + char targetLower, + int matchSeqLen) + { + if (!ConsiderAsEqual(queryLower, targetLower)) + { + return 0; + } + + var score = 1; // Character match bonus + + if (matchSeqLen > 0) + { + score += matchSeqLen * 5; // Consecutive match bonus + } + + if (query == targetCurr) + { + score += 1; // Same case bonus + } + + if (targetPrev.HasValue) + { + var sepBonus = ScoreSeparator(targetPrev.Value); + if (sepBonus > 0) + { + score += sepBonus; + } + else if (char.IsUpper(targetCurr) && matchSeqLen == 0) + { + score += 2; // CamelCase bonus + } + } + else + { + score += 8; // Start of word bonus + } + + return score; + } + + private static bool ConsiderAsEqual(char a, char b) + { + return a == b || (a == '/' && b == '\\') || (a == '\\' && b == '/'); + } + + private static int ScoreSeparator(char ch) + { + return ch switch + { + '/' or '\\' => 5, + '_' or '-' or '.' or ' ' or '\'' or '"' or ':' => 4, + _ => 0, + }; + } +} diff --git a/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests.csproj b/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests.csproj new file mode 100644 index 0000000000..91d423031a --- /dev/null +++ b/src/modules/cmdpal/Tests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests/Microsoft.CommandPalette.Extensions.Toolkit.UnitTests.csproj @@ -0,0 +1,30 @@ + + + + + + $(MSBuildThisFileDirectory)..\..\..\..\..\ + false + true + Microsoft.CommandPalette.Extensions.Toolkit.UnitTests + $(SolutionDir)$(Platform)\$(Configuration)\WinUI3Apps\CmdPal\tests\ + false + false + enable + + + + + + + + + + + + + true + true + $(RepoRoot).pipelines\272MSSharedLibSN2048.snk + + \ No newline at end of file diff --git a/src/modules/cmdpal/extensionsdk/Microsoft.CommandPalette.Extensions.Toolkit/FuzzyStringMatcher.cs b/src/modules/cmdpal/extensionsdk/Microsoft.CommandPalette.Extensions.Toolkit/FuzzyStringMatcher.cs index 7ecfc74222..40491970b3 100644 --- a/src/modules/cmdpal/extensionsdk/Microsoft.CommandPalette.Extensions.Toolkit/FuzzyStringMatcher.cs +++ b/src/modules/cmdpal/extensionsdk/Microsoft.CommandPalette.Extensions.Toolkit/FuzzyStringMatcher.cs @@ -2,8 +2,10 @@ // The Microsoft Corporation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Buffers; using System.Globalization; - +using System.Runtime.CompilerServices; +using System.Text; using ToolGood.Words.Pinyin; namespace Microsoft.CommandPalette.Extensions.Toolkit; @@ -11,213 +13,1075 @@ namespace Microsoft.CommandPalette.Extensions.Toolkit; // Inspired by the fuzzy.rs from edit.exe public static class FuzzyStringMatcher { - private const int NOMATCH = 0; + private const int NoMatchScore = 0; + private const int StackAllocThreshold = 512; /// /// Gets a value indicating whether to support Chinese PinYin. /// Automatically enabled when the system UI culture is Simplified Chinese. /// - public static bool ChinesePinYinSupport { get; } = IsSimplifiedChinese(); + public static bool ChinesePinYinSupport { get; internal set; } = IsSimplifiedChinese(); private static bool IsSimplifiedChinese() { var culture = CultureInfo.CurrentUICulture; - - // Detect Simplified Chinese: zh-CN, zh-Hans, zh-Hans-* return culture.Name.StartsWith("zh-CN", StringComparison.OrdinalIgnoreCase) || culture.Name.StartsWith("zh-Hans", StringComparison.OrdinalIgnoreCase); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static PreparedFuzzyQuery GetOrPrepareThreadCached(string needle, bool removeDiacritics) + { + return PreparedFuzzyQueryThreadCache.GetOrPrepare(needle, removeDiacritics); + } + + /// + /// Prepare a query for repeated scoring against many targets. + /// + private static PreparedFuzzyQuery PrepareQuery(string input, bool mayNeedDiacriticsRemoval = false) + => new(input, precomputeNoDiacritics: mayNeedDiacriticsRemoval); + + // ============================================================ + // Public API + // ============================================================ public static int ScoreFuzzy(string needle, string haystack, bool allowNonContiguousMatches = true) { - var (s, _) = ScoreFuzzyWithPositions(needle, haystack, allowNonContiguousMatches); - return s; + return ScoreFuzzy(needle, haystack, allowNonContiguousMatches, removeDiacritics: true); + } + + public static int ScoreFuzzy(string needle, string haystack, bool allowNonContiguousMatches, bool removeDiacritics) + { + var query = GetOrPrepareThreadCached(needle, removeDiacritics); + return ScoreBestVariant(in query, haystack, allowNonContiguousMatches, removeDiacritics); } public static (int Score, List Positions) ScoreFuzzyWithPositions(string needle, string haystack, bool allowNonContiguousMatches) - => ScoreAllFuzzyWithPositions(needle, haystack, allowNonContiguousMatches).MaxBy(i => i.Score); - - public static IEnumerable<(int Score, List Positions)> ScoreAllFuzzyWithPositions(string needle, string haystack, bool allowNonContiguousMatches) { - List needles = [needle]; - List haystacks = [haystack]; + return ScoreFuzzyWithPositions(needle, haystack, allowNonContiguousMatches, removeDiacritics: true); + } - if (ChinesePinYinSupport) + public static (int Score, List Positions) ScoreFuzzyWithPositions( + string needle, string haystack, bool allowNonContiguousMatches, bool removeDiacritics) + { + var query = GetOrPrepareThreadCached(needle, removeDiacritics); + return ScoreBestVariantWithPositions(in query, haystack, allowNonContiguousMatches, removeDiacritics); + } + + internal static void ClearCache() + { + PreparedFuzzyQueryThreadCache.Clear(); + } + + // ============================================================ + // Best-variant selection + // ============================================================ + [SkipLocalsInit] + private static int ScoreBestVariant( + in PreparedFuzzyQuery query, + string haystack, + bool allowNonContiguousMatches, + bool removeDiacritics) + { + if (string.IsNullOrEmpty(haystack)) { - // Remove IME composition split characters. - var input = needle.Replace("'", string.Empty); - needles.Add(WordsHelper.GetPinyin(input)); - if (WordsHelper.HasChinese(haystack)) + return NoMatchScore; + } + + var tLen = haystack.Length; + + // Fold haystack ONCE + using var tFoldBuffer = new RentedSpan(tLen, stackalloc char[Math.Min(tLen, StackAllocThreshold)]); + Folding.FoldInto(haystack, removeDiacritics, tFoldBuffer.Span); + ReadOnlySpan tFold = tFoldBuffer.Span; + + var qFold = query.GetPrimaryFolded(removeDiacritics); + var best = ScoreCore(query.PrimaryRaw, qFold, haystack, tFold, allowNonContiguousMatches); + + if (!ChinesePinYinSupport || !query.HasSecondary) + { + return best; + } + + var qRawSecondary = query.SecondaryRaw ?? string.Empty; + var qFoldSecondary = query.GetSecondaryFolded(removeDiacritics); + + best = Math.Max(best, ScoreCore(qRawSecondary, qFoldSecondary, haystack, tFold, allowNonContiguousMatches)); + + if (!WordsHelper.HasChinese(haystack)) + { + return best; + } + + // Fold PinYin target ONCE + var tPinYin = WordsHelper.GetPinyin(haystack) ?? string.Empty; + var tPinYinLen = tPinYin.Length; + + using var tPinYinFoldBuffer = new RentedSpan(tPinYinLen, stackalloc char[Math.Min(tPinYinLen, StackAllocThreshold)]); + Folding.FoldInto(tPinYin, removeDiacritics, tPinYinFoldBuffer.Span); + ReadOnlySpan tPinYinFold = tPinYinFoldBuffer.Span; + + best = Math.Max(best, ScoreCore(query.PrimaryRaw, qFold, tPinYin, tPinYinFold, allowNonContiguousMatches)); + best = Math.Max(best, ScoreCore(qRawSecondary, qFoldSecondary, tPinYin, tPinYinFold, allowNonContiguousMatches)); + + return best; + } + + private static (int Score, List Positions) ScoreBestVariantWithPositions( + in PreparedFuzzyQuery query, + string haystack, + bool allowNonContiguousMatches, + bool removeDiacritics) + { + if (string.IsNullOrEmpty(haystack)) + { + return (NoMatchScore, []); + } + + var tLen = haystack.Length; + + // Fold haystack ONCE + using var tFoldBuffer = new RentedSpan(tLen, stackalloc char[Math.Min(tLen, StackAllocThreshold)]); + Folding.FoldInto(haystack, removeDiacritics, tFoldBuffer.Span); + ReadOnlySpan tFold = tFoldBuffer.Span; + + var needsPinYin = ChinesePinYinSupport && query.HasSecondary && WordsHelper.HasChinese(haystack); + var tPinYin = needsPinYin ? (WordsHelper.GetPinyin(haystack) ?? string.Empty) : string.Empty; + var tPinYinLen = tPinYin.Length; + + // Fold PinYin target if needed + using var tPinYinFoldBuffer = new RentedSpan( + needsPinYin ? tPinYinLen : 0, + needsPinYin ? stackalloc char[Math.Min(tPinYinLen, StackAllocThreshold)] : Span.Empty); + + if (needsPinYin) + { + Folding.FoldInto(tPinYin, removeDiacritics, tPinYinFoldBuffer.Span); + } + + ReadOnlySpan tPinYinFold = tPinYinFoldBuffer.Span; + + var qFoldPrimary = query.GetPrimaryFoldedString(removeDiacritics); + + // (primary query, original haystack) - get score AND positions + var (bestScore, bestPositions) = ScoreWithPositionsCore(query.PrimaryRaw, qFoldPrimary, haystack, tFold, allowNonContiguousMatches); + + // (primary query, pinyin target) - score only. + // We only return positions for matches against the original haystack. + // For Pinyin variants, we typically don't show highlights in the UI since there's + // no 1:1 mapping back to the original characters' positions. + if (needsPinYin) + { + var score = ScoreCore(query.PrimaryRaw, qFoldPrimary, tPinYin, tPinYinFold, allowNonContiguousMatches); + if (score > bestScore) { - haystacks.Add(WordsHelper.GetPinyin(haystack)); + bestScore = score; } } - return needles.SelectMany(i => haystacks.Select(j => ScoreFuzzyWithPositionsInternal(i, j, allowNonContiguousMatches))); + if (ChinesePinYinSupport && query.HasSecondary) + { + var qRawSecondary = query.SecondaryRaw ?? string.Empty; + var qFoldSecondary = query.GetSecondaryFoldedString(removeDiacritics) ?? string.Empty; + + // (secondary query, original haystack) - get score AND positions + var (scoreSecondary, positionsSecondary) = ScoreWithPositionsCore( + qRawSecondary, qFoldSecondary, haystack, tFold, allowNonContiguousMatches); + + if (scoreSecondary > bestScore) + { + bestScore = scoreSecondary; + bestPositions = positionsSecondary; + } + + // (secondary query, pinyin target) - score only. + // Highlight positions are not returned for Pinyin variants. + if (needsPinYin) + { + var score = ScoreCore(qRawSecondary, qFoldSecondary, tPinYin, tPinYinFold, allowNonContiguousMatches); + if (score > bestScore) + { + bestScore = score; + } + } + } + + return (bestScore, bestPositions); } - private static (int Score, List Positions) ScoreFuzzyWithPositionsInternal(string needle, string haystack, bool allowNonContiguousMatches) + // ============================================================ + // Core scoring + // ============================================================ + private static int ScoreCore( + ReadOnlySpan qRaw, + ReadOnlySpan qFold, + ReadOnlySpan tRaw, + ReadOnlySpan tFold, + bool allowNonContiguousMatches) { - if (string.IsNullOrEmpty(haystack) || string.IsNullOrEmpty(needle)) + var qLen = qRaw.Length; + var tLen = tRaw.Length; + + if (qLen == 0 || tLen < qLen || qFold.Length != qLen) { - return (NOMATCH, new List()); + return NoMatchScore; } - var target = haystack.ToCharArray(); - var query = needle.ToCharArray(); + return allowNonContiguousMatches + ? ScoreNonContiguous(qRaw, qFold, tRaw, tFold, qLen, tLen) + : ScoreContiguous(qRaw, qFold, tRaw, tFold).Score; + } - if (target.Length < query.Length) + private static (int Score, List Positions) ScoreWithPositionsCore( + ReadOnlySpan qRaw, + ReadOnlySpan qFold, + ReadOnlySpan tRaw, + ReadOnlySpan tFold, + bool allowNonContiguousMatches) + { + var qLen = qRaw.Length; + var tLen = tRaw.Length; + + if (qLen == 0 || tLen < qLen || qFold.Length != qLen) { - return (NOMATCH, new List()); + return (NoMatchScore, []); } - var targetUpper = FoldCase(haystack); - var queryUpper = FoldCase(needle); - var targetUpperChars = targetUpper.ToCharArray(); - var queryUpperChars = queryUpper.ToCharArray(); + return allowNonContiguousMatches + ? ScoreNonContiguousWithPositions(qRaw, qFold, tRaw, tFold, qLen, tLen) + : ScoreContiguousWithPositions(qRaw, qFold, tRaw, tFold); + } - var area = query.Length * target.Length; - var scores = new int[area]; - var matches = new int[area]; - - for (var qi = 0; qi < query.Length; qi++) + // ============================================================ + // Non-contiguous matching (score only) + // ============================================================ + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + [SkipLocalsInit] + private static int ScoreNonContiguous( + ReadOnlySpan qRaw, + ReadOnlySpan qFold, + ReadOnlySpan tRaw, + ReadOnlySpan tFold, + int qLen, + int tLen) + { + if (!Scoring.CanMatchSubsequence(qFold, tFold)) { - var qiOffset = qi * target.Length; - var qiPrevOffset = qi > 0 ? (qi - 1) * target.Length : 0; + return NoMatchScore; + } - for (var ti = 0; ti < target.Length; ti++) + using var dpBuffer = new RentedSpan(tLen * 2, stackalloc int[Math.Min(tLen * 2, StackAllocThreshold)]); + var scores = dpBuffer.Span[..tLen]; + var seqLens = dpBuffer.Span.Slice(tLen, tLen); + scores.Clear(); + seqLens.Clear(); + + for (var qi = 0; qi < qLen; qi++) + { + var qChar = qRaw[qi]; + var qCharFold = qFold[qi]; + + var leftScore = 0; + var diagScore = 0; + var diagSeqLen = 0; + + var isFirstRow = qi == 0; + var tiMax = tLen - qLen + qi; + + for (var ti = 0; ti <= tiMax; ti++) { - var currentIndex = qiOffset + ti; - var diagIndex = (qi > 0 && ti > 0) ? qiPrevOffset + ti - 1 : 0; - var leftScore = ti > 0 ? scores[currentIndex - 1] : 0; - var diagScore = (qi > 0 && ti > 0) ? scores[diagIndex] : 0; - var matchSeqLen = (qi > 0 && ti > 0) ? matches[diagIndex] : 0; + var upScore = scores[ti]; + var upSeqLen = seqLens[ti]; - var score = (diagScore == 0 && qi != 0) ? 0 : - ComputeCharScore( - query[qi], - queryUpperChars[qi], - ti != 0 ? target[ti - 1] : null, - target[ti], - targetUpperChars[ti], - matchSeqLen); - - var isValidScore = score != 0 && diagScore + score >= leftScore && - (allowNonContiguousMatches || qi > 0 || - targetUpperChars.Skip(ti).Take(queryUpperChars.Length).SequenceEqual(queryUpperChars)); - - if (isValidScore) + var charScore = 0; + if (diagScore != 0 || isFirstRow) { - matches[currentIndex] = matchSeqLen + 1; - scores[currentIndex] = diagScore + score; + var tCharFold = tFold[ti]; + if (qCharFold == tCharFold) + { + charScore = Scoring.ComputeCharScore( + qRawChar: qChar, + tHasPrev: ti != 0, + tRawCharPrev: ti != 0 ? tRaw[ti - 1] : '\0', + tRawCharCurr: tRaw[ti], + matchSeqLen: diagSeqLen); + } + } + + var candidateScore = diagScore + charScore; + + if (charScore != 0 && candidateScore >= leftScore) + { + scores[ti] = candidateScore; + seqLens[ti] = diagSeqLen + 1; } else { - matches[currentIndex] = NOMATCH; - scores[currentIndex] = leftScore; + scores[ti] = leftScore; + seqLens[ti] = 0; } + + leftScore = scores[ti]; + diagScore = upScore; + diagSeqLen = upSeqLen; + } + + if (leftScore == 0) + { + return NoMatchScore; + } + + if (qi == qLen - 1) + { + return leftScore; } } - var positions = new List(); - if (query.Length > 0 && target.Length > 0) + return scores[tLen - 1]; + } + + // ============================================================ + // Non-contiguous matching (with positions) + // ============================================================ + private static (int Score, List Positions) ScoreNonContiguousWithPositions( + ReadOnlySpan qRaw, + ReadOnlySpan qFold, + ReadOnlySpan tRaw, + ReadOnlySpan tFold, + int qLen, + int tLen) + { + if (!Scoring.CanMatchSubsequence(qFold, tFold)) { - var qi = query.Length - 1; - var ti = target.Length - 1; + return (NoMatchScore, []); + } - while (true) + var areaLong = (long)qLen * tLen; + if (areaLong is <= 0 or > int.MaxValue) + { + return (NoMatchScore, []); + } + + var area = (int)areaLong; + var bitCount = (area + 63) >> 6; + + using var bitsBuffer = new RentedSpan(bitCount, stackalloc ulong[Math.Min(bitCount, StackAllocThreshold / 8)]); + bitsBuffer.Span.Clear(); + + using var dpBuffer = new RentedSpan(tLen * 2, stackalloc int[Math.Min(tLen * 2, StackAllocThreshold)]); + var scores = dpBuffer.Span[..tLen]; + var seqLens = dpBuffer.Span.Slice(tLen, tLen); + scores.Clear(); + seqLens.Clear(); + + for (var qi = 0; qi < qLen; qi++) + { + var qChar = qRaw[qi]; + var qCharFold = qFold[qi]; + + var leftScore = 0; + var diagScore = 0; + var diagSeqLen = 0; + + var isFirstRow = qi == 0; + var rowBase = qi * tLen; + + for (var ti = 0; ti < tLen; ti++) { - var index = (qi * target.Length) + ti; - if (matches[index] == NOMATCH) - { - if (ti == 0) - { - break; - } + var upScore = scores[ti]; + var upSeqLen = seqLens[ti]; - ti--; + var charScore = 0; + if (diagScore != 0 || isFirstRow) + { + var tCharFold = tFold[ti]; + if (qCharFold == tCharFold) + { + charScore = Scoring.ComputeCharScore( + qRawChar: qChar, + tHasPrev: ti != 0, + tRawCharPrev: ti != 0 ? tRaw[ti - 1] : '\0', + tRawCharCurr: tRaw[ti], + matchSeqLen: diagSeqLen); + } + } + + var candidateScore = diagScore + charScore; + + if (charScore != 0 && candidateScore >= leftScore) + { + scores[ti] = candidateScore; + seqLens[ti] = diagSeqLen + 1; + SetBit(bitsBuffer.Span, rowBase + ti); } else { - positions.Add(ti); - if (qi == 0 || ti == 0) - { - break; - } + scores[ti] = leftScore; + seqLens[ti] = 0; + } - qi--; - ti--; + leftScore = scores[ti]; + diagScore = upScore; + diagSeqLen = upSeqLen; + } + + if (leftScore == 0) + { + return (NoMatchScore, []); + } + } + + var finalScore = scores[tLen - 1]; + if (finalScore == 0) + { + return (NoMatchScore, []); + } + + // Backtrack to find positions + var positions = new List(qLen); + var q = qLen - 1; + var t = tLen - 1; + + while (true) + { + var bitIdx = (q * tLen) + t; + + if (!GetBit(bitsBuffer.Span, bitIdx)) + { + if (t == 0) + { + break; + } + + t--; + } + else + { + positions.Add(t); + if (q == 0 || t == 0) + { + break; + } + + q--; + t--; + } + } + + positions.Reverse(); + return (finalScore, positions); + } + + // ============================================================ + // Contiguous matching + // ============================================================ + private static (int Score, int Start) ScoreContiguous( + ReadOnlySpan qRaw, + ReadOnlySpan qFold, + ReadOnlySpan tRaw, + ReadOnlySpan tFold) + { + var qLen = qRaw.Length; + var tLen = tRaw.Length; + + if (qLen == 0 || tLen == 0 || tLen < qLen) + { + return (NoMatchScore, -1); + } + + var bestScore = NoMatchScore; + var bestStart = -1; + var searchStart = 0; + + while (searchStart <= tLen - qLen) + { + var relativeIdx = tFold.Slice(searchStart).IndexOf(qFold); + if (relativeIdx < 0) + { + break; + } + + var matchStart = searchStart + relativeIdx; + var score = 0; + + for (var i = 0; i < qLen; i++) + { + var ti = matchStart + i; + score += Scoring.ComputeCharScore( + qRawChar: qRaw[i], + tHasPrev: ti != 0, + tRawCharPrev: ti != 0 ? tRaw[ti - 1] : '\0', + tRawCharCurr: tRaw[ti], + matchSeqLen: i); + } + + if (score >= bestScore) + { + bestScore = score; + bestStart = matchStart; + } + + searchStart = matchStart + 1; + } + + return (bestScore, bestStart); + } + + private static (int Score, List Positions) ScoreContiguousWithPositions( + ReadOnlySpan qRaw, + ReadOnlySpan qFold, + ReadOnlySpan tRaw, + ReadOnlySpan tFold) + { + var (score, bestStart) = ScoreContiguous(qRaw, qFold, tRaw, tFold); + + if (bestStart < 0 || score == NoMatchScore) + { + return (NoMatchScore, []); + } + + var qLen = qRaw.Length; + var positions = new List(qLen); + for (var i = 0; i < qLen; i++) + { + positions.Add(bestStart + i); + } + + return (score, positions); + } + + // ============================================================ + // Bit manipulation helpers + // ============================================================ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void SetBit(Span words, int idx) + { + words[idx >> 6] |= 1UL << (idx & 63); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool GetBit(ReadOnlySpan words, int idx) + { + return ((words[idx >> 6] >> (idx & 63)) & 1UL) != 0; + } + + // ============================================================ + // Scoring helpers + // ============================================================ + private static class Scoring + { + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + internal static bool CanMatchSubsequence(ReadOnlySpan qFold, ReadOnlySpan tFold) + { + var qi = 0; + var qLen = qFold.Length; + + foreach (var tChar in tFold) + { + if (qi < qLen && qFold[qi] == tChar) + { + qi++; } } - positions.Reverse(); + return qi == qLen; } - return (scores[area - 1], positions); - } - - private static string FoldCase(string input) - { - return input.ToUpperInvariant(); - } - - private static int ComputeCharScore( - char query, - char queryLower, - char? targetPrev, - char targetCurr, - char targetLower, - int matchSeqLen) - { - if (!ConsiderAsEqual(queryLower, targetLower)) + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + internal static int ComputeCharScore( + char qRawChar, + bool tHasPrev, + char tRawCharPrev, + char tRawCharCurr, + int matchSeqLen) { - return 0; - } + var score = Bonus.CharacterMatch; - var score = 1; // Character match bonus - - if (matchSeqLen > 0) - { - score += matchSeqLen * 5; // Consecutive match bonus - } - - if (query == targetCurr) - { - score += 1; // Same case bonus - } - - if (targetPrev.HasValue) - { - var sepBonus = ScoreSeparator(targetPrev.Value); - if (sepBonus > 0) + if (matchSeqLen > 0) { - score += sepBonus; + score += matchSeqLen * Bonus.ConsecutiveMultiplier; } - else if (char.IsUpper(targetCurr) && matchSeqLen == 0) + + var tCharCurrIsUpper = char.IsUpper(tRawCharCurr); + if (qRawChar == tRawCharCurr) { - score += 2; // CamelCase bonus + score += Bonus.ExactCase; + } + + if (!tHasPrev) + { + return score + Bonus.StringStart; + } + + var separatorBonus = GetSeparatorBonus(tRawCharPrev); + if (separatorBonus != 0) + { + return score + separatorBonus; + } + + if (matchSeqLen == 0 && tCharCurrIsUpper) + { + return score + Bonus.CamelCase; + } + + return score; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + private static int GetSeparatorBonus(char ch) + { + return ch switch + { + '/' or '\\' => Bonus.PathSeparator, + '_' or '-' or '.' or ' ' or '\'' or '"' or ':' => Bonus.WordSeparator, + _ => 0, + }; + } + } + + // ============================================================ + // Text folding + // ============================================================ + + // Folding: slash normalization + upper case + optional diacritics stripping + private static class Folding + { + // Cache maps an upper case char to its diacritics-stripped upper case char. + // '\0' means "not cached yet". + private static readonly char[] StripCacheUpper = new char[char.MaxValue + 1]; + + /// + /// Folds into : + /// - Normalizes slashes: '\' -> '/' + /// - Upper case with char.ToUpperInvariant (length-preserving) + /// - Optionally strips diacritics (length-preserving) + /// + public static void FoldInto(ReadOnlySpan input, bool removeDiacritics, Span dest) + { + // Assumes dest.Length >= input.Length. + if (!removeDiacritics) + { + for (var i = 0; i < input.Length; i++) + { + var c = input[i]; + dest[i] = c == '\\' ? '/' : char.ToUpperInvariant(c); + } + + return; + } + + // ASCII cannot have diacritics (and ToUpperInvariant is cheap), but we STILL normalize slashes. + if (Ascii.IsValid(input)) + { + for (var i = 0; i < input.Length; i++) + { + var c = input[i]; + dest[i] = c == '\\' ? '/' : char.ToUpperInvariant(c); + } + + return; + } + + // Non-ASCII + removeDiacritics + for (var i = 0; i < input.Length; i++) + { + var c = input[i]; + var upper = c == '\\' ? '/' : char.ToUpperInvariant(c); + dest[i] = StripDiacriticsFromUpper(upper); } } - else + + /// + /// Creates a folded string for fast equality comparisons: + /// - ALWAYS normalizes slashes: '\' -> '/' + /// - Upper case with char.ToUpperInvariant (length-preserving) + /// - Optionally strips diacritics (length-preserving) + /// + /// Returns the original when it is already in the desired form. + /// + public static string FoldForComparison(string input, bool removeDiacritics) { - score += 8; // Start of word bonus + if (string.IsNullOrEmpty(input)) + { + return string.Empty; + } + + // If already fully normalized (slashes + casing), return input without allocating. + // Note: when removeDiacritics==true we still must run diacritics stripping on non-ASCII, + // so the "no-op" path is only safe if removeDiacritics==false OR input is ASCII. + if (!removeDiacritics) + { + if (IsAlreadyFoldedAndSlashNormalized(input)) + { + return input; + } + + return string.Create(input.Length, input, static (dst, src) => + { + for (var i = 0; i < src.Length; i++) + { + var c = src[i]; + dst[i] = c == '\\' ? '/' : char.ToUpperInvariant(c); + } + }); + } + + // removeDiacritics == true + if (Ascii.IsValid(input)) + { + // IMPORTANT: still normalize slashes for ASCII so caller can do simple equality checks. + if (IsAlreadyFoldedAndSlashNormalized(input)) + { + return input; + } + + return string.Create(input.Length, input, static (dst, src) => + { + for (var i = 0; i < src.Length; i++) + { + var c = src[i]; + dst[i] = c == '\\' ? '/' : char.ToUpperInvariant(c); + } + }); + } + + // Non-ASCII + removeDiacritics: must fold + strip (and still normalize slashes). + return string.Create(input.Length, input, static (dst, src) => + { + for (var i = 0; i < src.Length; i++) + { + var c = src[i]; + var upper = c == '\\' ? '/' : char.ToUpperInvariant(c); + dst[i] = StripDiacriticsFromUpper(upper); + } + }); } - return score; - } - - private static bool ConsiderAsEqual(char a, char b) - { - return a == b || (a == '/' && b == '\\') || (a == '\\' && b == '/'); - } - - private static int ScoreSeparator(char ch) - { - return ch switch + // ============================================================ + // "No-op" detector (fast, avoids ToUpperInvariant per char for CJK) + // ============================================================ + private static bool IsAlreadyFoldedAndSlashNormalized(string input) { - '/' or '\\' => 5, - '_' or '-' or '.' or ' ' or '\'' or '"' or ':' => 4, - _ => 0, - }; + var sawNonAscii = false; + + // Tier 1: cheap ASCII checks. + for (var i = 0; i < input.Length; i++) + { + var c = input[i]; + + if (c == '\\') + { + return false; + } + + // ASCII lowercase present => would change. + if ((uint)(c - 'a') <= ('z' - 'a')) + { + return false; + } + + if (c > 0x7F) + { + sawNonAscii = true; + } + } + + // Tier 2: only when non-ASCII exists; avoid char.ToUpperInvariant for scripts without case. + if (sawNonAscii) + { + for (var i = 0; i < input.Length; i++) + { + var c = input[i]; + if (c <= 0x7F) + { + continue; + } + + var cat = CharUnicodeInfo.GetUnicodeCategory(c); + + // Lowercase/Titlecase letters will change under ToUpperInvariant. + if (cat is UnicodeCategory.LowercaseLetter or UnicodeCategory.TitlecaseLetter) + { + return false; + } + } + } + + return true; + } + + // ============================================================ + // Diacritics stripping (cached; input is expected to be uppercase already) + // ============================================================ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static char StripDiacriticsFromUpper(char upper) + { + if (upper <= 0x7F) + { + return upper; + } + + var cached = StripCacheUpper[upper]; + if (cached != '\0') + { + return cached; + } + + var mapped = StripDiacriticsSlow(upper); + StripCacheUpper[upper] = mapped; + return mapped; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static char StripDiacriticsSlow(char upper) + { + var baseChar = FirstNonMark(upper, NormalizationForm.FormD); + if (baseChar == '\0' || baseChar == upper) + { + var kd = FirstNonMark(upper, NormalizationForm.FormKD); + if (kd != '\0') + { + baseChar = kd; + } + } + + // Ensure result remains uppercase invariant. + return char.ToUpperInvariant(baseChar == '\0' ? upper : baseChar); + + static char FirstNonMark(char c, NormalizationForm form) + { + var normalized = c.ToString().Normalize(form); + + foreach (var ch in normalized) + { + var cat = CharUnicodeInfo.GetUnicodeCategory(ch); + if (cat is not (UnicodeCategory.NonSpacingMark + or UnicodeCategory.SpacingCombiningMark + or UnicodeCategory.EnclosingMark)) + { + return ch; + } + } + + return '\0'; + } + } + } + + // ============================================================ + // Text utilities + // ============================================================ + private static class Text + { + internal static string RemoveApostrophes(ReadOnlySpan input) + { + var firstIdx = input.IndexOf('\''); + if (firstIdx < 0) + { + return input.ToString(); + } + + var count = 1; + for (var i = firstIdx + 1; i < input.Length; i++) + { + if (input[i] == '\'') + { + count++; + } + } + + return string.Create(input.Length - count, input.ToString(), static (dest, src) => + { + var destIdx = 0; + foreach (var c in src) + { + if (c != '\'') + { + dest[destIdx++] = c; + } + } + }); + } + } + + // ============================================================ + // Scoring bonuses + // ============================================================ + private static class Bonus + { + public const int CharacterMatch = 1; + public const int ConsecutiveMultiplier = 5; + public const int ExactCase = 1; + public const int StringStart = 8; + public const int PathSeparator = 5; + public const int WordSeparator = 4; + public const int CamelCase = 2; + } + + // ============================================================ + // Memory management + // ============================================================ + private ref struct RentedSpan + { + private readonly Span _span; + private T[]? _poolArray; + + public readonly Span Span => _span; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public RentedSpan(int length, Span stackBuffer) + { + if (length <= stackBuffer.Length) + { + _poolArray = null; + _span = stackBuffer[..length]; + } + else + { + _poolArray = ArrayPool.Shared.Rent(length); + _span = new Span(_poolArray, 0, length); + } + } + + public static implicit operator Span(RentedSpan rented) => rented._span; + + public static implicit operator ReadOnlySpan(RentedSpan rented) => rented._span; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Dispose() + { + var toReturn = _poolArray; + if (toReturn != null) + { + _poolArray = null; + ArrayPool.Shared.Return(toReturn, clearArray: RuntimeHelpers.IsReferenceOrContainsReferences()); + } + } + } + + // ============================================================ + // Prepared query + // ============================================================ + private readonly struct PreparedFuzzyQuery + { + public readonly string PrimaryRaw; + internal readonly string? SecondaryRaw; + + internal readonly string PrimaryFolded; + internal readonly string? PrimaryFoldedNoDiacritics; + + internal readonly string? SecondaryFolded; + internal readonly string? SecondaryFoldedNoDiacritics; + + internal PreparedFuzzyQuery(string primaryRaw, bool precomputeNoDiacritics) + { + PrimaryRaw = primaryRaw ?? string.Empty; + + PrimaryFolded = Folding.FoldForComparison(PrimaryRaw, removeDiacritics: false); + PrimaryFoldedNoDiacritics = precomputeNoDiacritics + ? Folding.FoldForComparison(PrimaryRaw, removeDiacritics: true) + : null; + + if (ChinesePinYinSupport) + { + var input = Text.RemoveApostrophes(PrimaryRaw); + SecondaryRaw = WordsHelper.GetPinyin(input) ?? string.Empty; + + SecondaryFolded = Folding.FoldForComparison(SecondaryRaw, removeDiacritics: false); + SecondaryFoldedNoDiacritics = precomputeNoDiacritics + ? Folding.FoldForComparison(SecondaryRaw, removeDiacritics: true) + : null; + } + else + { + SecondaryRaw = null; + SecondaryFolded = null; + SecondaryFoldedNoDiacritics = null; + } + } + + internal bool HasSecondary => SecondaryFolded is not null; + + internal string GetPrimaryFoldedString(bool removeDiacritics) + { + return !removeDiacritics + ? PrimaryFolded + : (PrimaryFoldedNoDiacritics ?? Folding.FoldForComparison(PrimaryRaw, removeDiacritics: true)); + } + + internal string? GetSecondaryFoldedString(bool removeDiacritics) + { + if (SecondaryFolded is null) + { + return null; + } + + return !removeDiacritics + ? SecondaryFolded + : (SecondaryFoldedNoDiacritics ?? Folding.FoldForComparison(SecondaryRaw ?? string.Empty, removeDiacritics: true)); + } + + internal ReadOnlySpan GetPrimaryFolded(bool removeDiacritics) + { + return GetPrimaryFoldedString(removeDiacritics).AsSpan(); + } + + internal ReadOnlySpan GetSecondaryFolded(bool removeDiacritics) + { + return GetSecondaryFoldedString(removeDiacritics).AsSpan(); + } + } + + // ============================================================ + // Thread-local query cache + // ============================================================ + private static class PreparedFuzzyQueryThreadCache + { + [ThreadStatic] + private static Cache? _cache; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Cache GetCache() + { + return _cache ??= new Cache(); + } + + public static void Clear() + { + _cache = null; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static PreparedFuzzyQuery GetOrPrepare(string? needle, bool removeDiacritics) + { + needle ??= string.Empty; + + var cache = GetCache(); + + if (string.Equals(cache.Needle, needle, StringComparison.Ordinal)) + { + if (!removeDiacritics || cache.HasDiacriticsVersion) + { + return cache.Query; + } + + cache.Query = PrepareQuery(needle, true); + cache.HasDiacriticsVersion = true; + return cache.Query; + } + + cache.Needle = needle; + cache.Query = PrepareQuery(needle, removeDiacritics); + cache.HasDiacriticsVersion = removeDiacritics; + return cache.Query; + } + + private sealed class Cache + { + public string? Needle { get; set; } + + public PreparedFuzzyQuery Query { get; set; } + + public bool HasDiacriticsVersion { get; set; } + } } } diff --git a/src/modules/cmdpal/extensionsdk/Microsoft.CommandPalette.Extensions.Toolkit/Microsoft.CommandPalette.Extensions.Toolkit.csproj b/src/modules/cmdpal/extensionsdk/Microsoft.CommandPalette.Extensions.Toolkit/Microsoft.CommandPalette.Extensions.Toolkit.csproj index 24f2c50c5a..bce6dc1016 100644 --- a/src/modules/cmdpal/extensionsdk/Microsoft.CommandPalette.Extensions.Toolkit/Microsoft.CommandPalette.Extensions.Toolkit.csproj +++ b/src/modules/cmdpal/extensionsdk/Microsoft.CommandPalette.Extensions.Toolkit/Microsoft.CommandPalette.Extensions.Toolkit.csproj @@ -56,7 +56,7 @@ PreserveNewest - + @@ -83,4 +83,8 @@ IL2081;$(WarningsNotAsErrors) + + + +