mirror of
https://github.com/microsoft/PowerToys.git
synced 2026-02-24 04:00:02 +01:00
CmdPal: Upgrade FuzzyStringMatcher in the Command Palette Extensions SDK (#44809)
## Summary of the Pull Request This PR upgrades the `FuzzyStringMatcher` used in the Command Palette Extensions SDK with a focus on performance, memory efficiency, and improved matching behavior, while preserving compatibility with the existing API. This PR is a backwards compatible alternative to precomputed fuzzy matcher introduces in another PR. The new implementation is designed as a drop-in replacement. Any behavioral differences are intentional and primarily related to improved diacritic handling, scoring consistency, and correctness of highlight positions. Changes: - Keeps the existing public API intact and preserves behavior in nearly all cases. - Enables diacritics-insensitive matching by default, improving results across accented and non-English languages. - Significantly improves performance, with measured speedups in the range of ~5–20 times, depending on scenario and input size. - Reduces heap allocations to near zero by using stack allocation and pooled buffers instead of large per-match DP arrays. - Simplifies and optimizes matching logic: - Folds the haystack only once per match. - Uses rolling DP buffers instead of `O(query × target)` tables. - Replaces large match tables with a compact bitset when tracking highlight positions. - Improves consistency and correctness: - Normalizes path separators (`\` → `/`) during folding. - Avoids returning highlight positions for PinYin-only matches where no 1:1 mapping exists. - Introduces unit tests, including comparison tests against the legacy implementation to validate compatibility. <!-- Please review the items on the PR checklist before submitting--> ## PR Checklist - [x] Closes: #44066 <!-- - [ ] Closes: #yyy (add separate lines for additional resolved issues) --> - [ ] **Communication:** I've discussed this with core contributors already. If the work hasn't been agreed, this work might be rejected - [ ] **Tests:** Added/updated and all pass - [ ] **Localization:** All end-user-facing strings can be localized - [ ] **Dev docs:** Added/updated - [ ] **New binaries:** Added on the required places - [ ] [JSON for signing](https://github.com/microsoft/PowerToys/blob/main/.pipelines/ESRPSigning_core.json) for new binaries - [ ] [WXS for installer](https://github.com/microsoft/PowerToys/blob/main/installer/PowerToysSetup/Product.wxs) for new binaries and localization folder - [ ] [YML for CI pipeline](https://github.com/microsoft/PowerToys/blob/main/.pipelines/ci/templates/build-powertoys-steps.yml) for new test projects - [ ] [YML for signed pipeline](https://github.com/microsoft/PowerToys/blob/main/.pipelines/release.yml) - [ ] **Documentation updated:** If checked, please file a pull request on [our docs repo](https://github.com/MicrosoftDocs/windows-uwp/tree/docs/hub/powertoys) and link it here: #xxx <!-- Provide a more detailed description of the PR, other things fixed, or any additional comments/features here --> ## Detailed Description of the Pull Request / Additional comments <!-- Describe how you validated the behavior. Add automated tests wherever possible, but list manual validation steps taken as well --> ## Validation Steps Performed
This commit is contained in:
@@ -30,6 +30,7 @@
|
||||
"src\\modules\\cmdpal\\Tests\\Microsoft.CmdPal.Ext.WindowWalker.UnitTests\\Microsoft.CmdPal.Ext.WindowWalker.UnitTests.csproj",
|
||||
"src\\modules\\cmdpal\\Tests\\Microsoft.CmdPal.UI.ViewModels.UnitTests\\Microsoft.CmdPal.UI.ViewModels.UnitTests.csproj",
|
||||
"src\\modules\\cmdpal\\Tests\\Microsoft.CmdPal.UITests\\Microsoft.CmdPal.UITests.csproj",
|
||||
"src\\modules\\cmdpal\\Tests\\Microsoft.CommandPalette.Extensions.Toolkit.UnitTests\\Microsoft.CommandPalette.Extensions.Toolkit.UnitTests.csproj",
|
||||
"src\\modules\\cmdpal\\ext\\Microsoft.CmdPal.Ext.Apps\\Microsoft.CmdPal.Ext.Apps.csproj",
|
||||
"src\\modules\\cmdpal\\ext\\Microsoft.CmdPal.Ext.Bookmark\\Microsoft.CmdPal.Ext.Bookmarks.csproj",
|
||||
"src\\modules\\cmdpal\\ext\\Microsoft.CmdPal.Ext.Calc\\Microsoft.CmdPal.Ext.Calc.csproj",
|
||||
|
||||
@@ -0,0 +1,235 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using System.Collections.Generic;
|
||||
using Microsoft.CommandPalette.Extensions.Toolkit.UnitTests.Legacy;
|
||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||
|
||||
namespace Microsoft.CommandPalette.Extensions.Toolkit.UnitTests;
|
||||
|
||||
[TestClass]
|
||||
public class FuzzyMatcherComparisonTests
|
||||
{
|
||||
public static IEnumerable<object[]> TestData =>
|
||||
[
|
||||
["a", "a"],
|
||||
["a", "A"],
|
||||
["A", "a"],
|
||||
["abc", "abc"],
|
||||
["abc", "axbycz"],
|
||||
["abc", "abxcyz"],
|
||||
["sln", "solution.sln"],
|
||||
["vs", "visualstudio"],
|
||||
["test", "Test"],
|
||||
["pt", "PowerToys"],
|
||||
["p/t", "power\\toys"],
|
||||
["p\\t", "power/toys"],
|
||||
["c/w", "c:\\windows"],
|
||||
["foo", "bar"],
|
||||
["verylongstringthatdoesnotmatch", "short"],
|
||||
[string.Empty, "anything"],
|
||||
["something", string.Empty],
|
||||
["git", "git"],
|
||||
["em", "Emmy"],
|
||||
["my", "Emmy"],
|
||||
["word", "word"],
|
||||
["wd", "word"],
|
||||
["w d", "word"],
|
||||
["a", "ba"],
|
||||
["a", "ab"],
|
||||
["a", "bab"],
|
||||
["z", "abcdefg"],
|
||||
["CC", "CamelCase"],
|
||||
["cc", "camelCase"],
|
||||
["cC", "camelCase"],
|
||||
["some", "awesome"],
|
||||
["some", "somewhere"],
|
||||
["1", "1"],
|
||||
["1", "2"],
|
||||
[".", "."],
|
||||
["f.t", "file.txt"],
|
||||
["excel", "Excel"],
|
||||
["Excel", "excel"],
|
||||
["PowerPoint", "Power Point"],
|
||||
["power point", "PowerPoint"],
|
||||
["visual studio code", "Visual Studio Code"],
|
||||
["vsc", "Visual Studio Code"],
|
||||
["code", "Visual Studio Code"],
|
||||
["vs code", "Visual Studio Code"],
|
||||
["word", "Microsoft Word"],
|
||||
["ms word", "Microsoft Word"],
|
||||
["browser", "Internet Explorer"],
|
||||
["chrome", "Google Chrome"],
|
||||
["edge", "Microsoft Edge"],
|
||||
["term", "Windows Terminal"],
|
||||
["cmd", "Command Prompt"],
|
||||
["calc", "Calculator"],
|
||||
["snipping", "Snipping Tool"],
|
||||
["note", "Notepad"],
|
||||
["file expl", "File Explorer"],
|
||||
["settings", "Settings"],
|
||||
["p t", "PowerToys"],
|
||||
["p t", "PowerToys"],
|
||||
[" v ", " Visual Studio "],
|
||||
[" a b ", " a b c d "],
|
||||
[string.Empty, string.Empty],
|
||||
[" ", " "],
|
||||
[" ", " "],
|
||||
[" ", "abc"],
|
||||
["abc", " "],
|
||||
[" ", " "],
|
||||
[" ", " a b "],
|
||||
["sh", "ShangHai"],
|
||||
["bj", "BeiJing"],
|
||||
["bj", "北京"],
|
||||
["sh", "上海"],
|
||||
["nh", "你好"],
|
||||
["bj", "Beijing"],
|
||||
["hello", "你好"],
|
||||
["nihao", "你好"],
|
||||
["rmb", "人民币"],
|
||||
["zwr", "中文"],
|
||||
["zw", "中文"],
|
||||
["fbr", "foobar"],
|
||||
["w11", "windows 11"],
|
||||
["pwr", "powershell"],
|
||||
["vm", "void main"],
|
||||
["ps", "PowerShell"],
|
||||
["az", "Azure"],
|
||||
["od", "onedrive"],
|
||||
["gc", "google chrome"],
|
||||
["ff", "firefox"],
|
||||
["fs", "file_system"],
|
||||
["pt", "power-toys"],
|
||||
["jt", "json.test"],
|
||||
["ps", "power shell"],
|
||||
["ps", "power'shell"],
|
||||
["ps", "power\"shell"],
|
||||
["hw", "hello:world"],
|
||||
["abc", "a_b_c"],
|
||||
["abc", "a-b-c"],
|
||||
["abc", "a.b.c"],
|
||||
["abc", "a b c"],
|
||||
["abc", "a'b'c"],
|
||||
["abc", "a\"b\"c"],
|
||||
["abc", "a:b:c"],
|
||||
["_a", "_a"],
|
||||
["a_", "a_"],
|
||||
["-a", "-a"],
|
||||
["a-", "a-"]
|
||||
];
|
||||
|
||||
[TestMethod]
|
||||
[DynamicData(nameof(TestData))]
|
||||
public void CompareScores(string needle, string haystack)
|
||||
{
|
||||
var legacyScore = LegacyFuzzyStringMatcher.ScoreFuzzy(needle, haystack);
|
||||
var newScore = FuzzyStringMatcher.ScoreFuzzy(needle, haystack);
|
||||
|
||||
Assert.AreEqual(legacyScore, newScore, $"Score mismatch for needle='{needle}', haystack='{haystack}'");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[DynamicData(nameof(TestData))]
|
||||
public void ComparePositions(string needle, string haystack)
|
||||
{
|
||||
var (legacyScore, legacyPos) = LegacyFuzzyStringMatcher.ScoreFuzzyWithPositions(needle, haystack, true);
|
||||
var (newScore, newPos) = FuzzyStringMatcher.ScoreFuzzyWithPositions(needle, haystack, true);
|
||||
|
||||
Assert.AreEqual(legacyScore, newScore, $"Score mismatch (with pos) for needle='{needle}', haystack='{haystack}'");
|
||||
|
||||
// Ensure lists are not null
|
||||
legacyPos ??= [];
|
||||
newPos ??= [];
|
||||
|
||||
// Compare list contents
|
||||
var legacyPosStr = string.Join(',', legacyPos);
|
||||
var newPosStr = string.Join(',', newPos);
|
||||
|
||||
Assert.AreEqual(legacyPos.Count, newPos.Count, $"Position count mismatch: Legacy=[{legacyPosStr}], New=[{newPosStr}]");
|
||||
|
||||
for (var i = 0; i < legacyPos.Count; i++)
|
||||
{
|
||||
Assert.AreEqual(legacyPos[i], newPos[i], $"Position mismatch at index {i}: Legacy=[{legacyPosStr}], New=[{newPosStr}]");
|
||||
}
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[DynamicData(nameof(TestData))]
|
||||
public void CompareScores_ContiguousOnly(string needle, string haystack)
|
||||
{
|
||||
var legacyScore = LegacyFuzzyStringMatcher.ScoreFuzzy(needle, haystack, allowNonContiguousMatches: false);
|
||||
var newScore = FuzzyStringMatcher.ScoreFuzzy(needle, haystack, allowNonContiguousMatches: false);
|
||||
|
||||
Assert.AreEqual(legacyScore, newScore, $"Score mismatch (contiguous only) for needle='{needle}', haystack='{haystack}'");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[DynamicData(nameof(TestData))]
|
||||
public void CompareScores_PinyinEnabled(string needle, string haystack)
|
||||
{
|
||||
var originalNew = FuzzyStringMatcher.ChinesePinYinSupport;
|
||||
var originalLegacy = LegacyFuzzyStringMatcher.ChinesePinYinSupport;
|
||||
try
|
||||
{
|
||||
FuzzyStringMatcher.ChinesePinYinSupport = true;
|
||||
LegacyFuzzyStringMatcher.ChinesePinYinSupport = true;
|
||||
|
||||
var legacyScore = LegacyFuzzyStringMatcher.ScoreFuzzy(needle, haystack);
|
||||
var newScore = FuzzyStringMatcher.ScoreFuzzy(needle, haystack);
|
||||
|
||||
Assert.AreEqual(legacyScore, newScore, $"Score mismatch (Pinyin enabled) for needle='{needle}', haystack='{haystack}'");
|
||||
}
|
||||
finally
|
||||
{
|
||||
FuzzyStringMatcher.ChinesePinYinSupport = originalNew;
|
||||
LegacyFuzzyStringMatcher.ChinesePinYinSupport = originalLegacy;
|
||||
}
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[DynamicData(nameof(TestData))]
|
||||
public void ComparePositions_PinyinEnabled(string needle, string haystack)
|
||||
{
|
||||
var originalNew = FuzzyStringMatcher.ChinesePinYinSupport;
|
||||
var originalLegacy = LegacyFuzzyStringMatcher.ChinesePinYinSupport;
|
||||
try
|
||||
{
|
||||
FuzzyStringMatcher.ChinesePinYinSupport = true;
|
||||
LegacyFuzzyStringMatcher.ChinesePinYinSupport = true;
|
||||
|
||||
var (legacyScore, legacyPos) = LegacyFuzzyStringMatcher.ScoreFuzzyWithPositions(needle, haystack, true);
|
||||
var (newScore, newPos) = FuzzyStringMatcher.ScoreFuzzyWithPositions(needle, haystack, true);
|
||||
|
||||
Assert.AreEqual(legacyScore, newScore, $"Score mismatch (with pos, Pinyin enabled) for needle='{needle}', haystack='{haystack}'");
|
||||
|
||||
// Ensure lists are not null
|
||||
legacyPos ??= [];
|
||||
newPos ??= [];
|
||||
|
||||
// If newPos is empty but newScore > 0, it means it's a secondary match (like Pinyin)
|
||||
// which we don't return positions for in the new matcher.
|
||||
if (newScore > 0 && newPos.Count == 0 && legacyPos.Count > 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Compare list contents
|
||||
var legacyPosStr = string.Join(',', legacyPos);
|
||||
var newPosStr = string.Join(',', newPos);
|
||||
|
||||
Assert.AreEqual(legacyPos.Count, newPos.Count, $"Position count mismatch: Legacy=[{legacyPosStr}], New=[{newPosStr}]");
|
||||
|
||||
for (var i = 0; i < legacyPos.Count; i++)
|
||||
{
|
||||
Assert.AreEqual(legacyPos[i], newPos[i], $"Position mismatch at index {i}: Legacy=[{legacyPosStr}], New=[{newPosStr}]");
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
FuzzyStringMatcher.ChinesePinYinSupport = originalNew;
|
||||
LegacyFuzzyStringMatcher.ChinesePinYinSupport = originalLegacy;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||
|
||||
namespace Microsoft.CommandPalette.Extensions.Toolkit.UnitTests;
|
||||
|
||||
[TestClass]
|
||||
public class FuzzyMatcherDiacriticsTests
|
||||
{
|
||||
[TestMethod]
|
||||
public void ScoreFuzzy_WithDiacriticsRemoval_MatchesWithDiacritics()
|
||||
{
|
||||
// "eco" should match "école" when diacritics are removed (é -> E)
|
||||
var score = FuzzyStringMatcher.ScoreFuzzy("eco", "école", allowNonContiguousMatches: true, removeDiacritics: true);
|
||||
Assert.IsTrue(score > 0, "Should match 'école' with 'eco' when diacritics are removed");
|
||||
|
||||
// "uber" should match "über"
|
||||
score = FuzzyStringMatcher.ScoreFuzzy("uber", "über", allowNonContiguousMatches: true, removeDiacritics: true);
|
||||
Assert.IsTrue(score > 0, "Should match 'über' with 'uber' when diacritics are removed");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void ScoreFuzzy_WithoutDiacriticsRemoval_DoesNotMatchWhenCharactersDiffer()
|
||||
{
|
||||
// "eco" should NOT match "école" if 'é' is treated as distinct from 'e' and order is strict
|
||||
// 'é' (index 0) != 'e'. 'e' (index 4) is after 'c' (index 1) and 'o' (index 2).
|
||||
// Since needle is "e-c-o", to match "école":
|
||||
// 'e' matches 'e' at 4.
|
||||
// 'c' must show up after. No.
|
||||
// So no match.
|
||||
var score = FuzzyStringMatcher.ScoreFuzzy("eco", "école", allowNonContiguousMatches: true, removeDiacritics: false);
|
||||
Assert.AreEqual(0, score, "Should not match 'école' with 'eco' when diacritics are NOT removed");
|
||||
|
||||
// "uber" vs "über"
|
||||
// u != ü.
|
||||
// b (index 1) match b (index 2). e (2) match e (3). r (3) match r (4).
|
||||
// but 'u' has no match.
|
||||
score = FuzzyStringMatcher.ScoreFuzzy("uber", "über", allowNonContiguousMatches: true, removeDiacritics: false);
|
||||
Assert.AreEqual(0, score, "Should not match 'über' with 'uber' when diacritics are NOT removed");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void ScoreFuzzy_DefaultRemovesDiacritics()
|
||||
{
|
||||
// Now default is true, so "eco" vs "école" should match
|
||||
var score = FuzzyStringMatcher.ScoreFuzzy("eco", "école");
|
||||
Assert.IsTrue(score > 0, "Default should remove diacritics and match 'école'");
|
||||
}
|
||||
|
||||
[DataTestMethod]
|
||||
[DataRow("a", "à", true)]
|
||||
[DataRow("e", "é", true)]
|
||||
[DataRow("i", "ï", true)]
|
||||
[DataRow("o", "ô", true)]
|
||||
[DataRow("u", "ü", true)]
|
||||
[DataRow("c", "ç", true)]
|
||||
[DataRow("n", "ñ", true)]
|
||||
[DataRow("s", "ß", false)] // ß doesn't strip to s via simple invalid-uppercasing
|
||||
public void VerifySpecificCharacters(string needle, string haystack, bool expectingMatch)
|
||||
{
|
||||
var score = FuzzyStringMatcher.ScoreFuzzy(needle, haystack, allowNonContiguousMatches: true, removeDiacritics: true);
|
||||
if (expectingMatch)
|
||||
{
|
||||
Assert.IsTrue(score > 0, $"Expected match for '{needle}' in '{haystack}' with diacritics removal");
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert.AreEqual(0, score, $"Expected NO match for '{needle}' in '{haystack}' even with diacritics removal");
|
||||
}
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void VerifyBothPathsWorkSameForASCII()
|
||||
{
|
||||
var needle = "test";
|
||||
var haystack = "TestString";
|
||||
|
||||
var score1 = FuzzyStringMatcher.ScoreFuzzy(needle, haystack, allowNonContiguousMatches: true, removeDiacritics: true);
|
||||
var score2 = FuzzyStringMatcher.ScoreFuzzy(needle, haystack, allowNonContiguousMatches: true, removeDiacritics: false);
|
||||
|
||||
Assert.AreEqual(score1, score2, "Scores should be identical for ASCII strings regardless of diacritics setting");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||
|
||||
namespace Microsoft.CommandPalette.Extensions.Toolkit.UnitTests;
|
||||
|
||||
[TestClass]
|
||||
public class FuzzyMatcherPinyinLogicTests
|
||||
{
|
||||
[TestInitialize]
|
||||
public void Setup()
|
||||
{
|
||||
FuzzyStringMatcher.ChinesePinYinSupport = true;
|
||||
FuzzyStringMatcher.ClearCache();
|
||||
}
|
||||
|
||||
[TestCleanup]
|
||||
public void Cleanup()
|
||||
{
|
||||
FuzzyStringMatcher.ChinesePinYinSupport = false; // Reset to default state
|
||||
FuzzyStringMatcher.ClearCache();
|
||||
}
|
||||
|
||||
[DataTestMethod]
|
||||
[DataRow("bj", "北京")]
|
||||
[DataRow("sh", "上海")]
|
||||
[DataRow("nihao", "你好")]
|
||||
[DataRow("北京", "北京")]
|
||||
[DataRow("北京", "Beijing")]
|
||||
[DataRow("北", "北京")]
|
||||
[DataRow("你好", "nihao")]
|
||||
public void PinyinMatch_DataDriven(string needle, string haystack)
|
||||
{
|
||||
Assert.IsTrue(FuzzyStringMatcher.ScoreFuzzy(needle, haystack) > 0, $"Expected match for '{needle}' in '{haystack}'");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void PinyinPositions_ShouldBeEmpty()
|
||||
{
|
||||
var (score, positions) = FuzzyStringMatcher.ScoreFuzzyWithPositions("bj", "北京", true);
|
||||
Assert.IsTrue(score > 0);
|
||||
Assert.AreEqual(0, positions.Count);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||
|
||||
namespace Microsoft.CommandPalette.Extensions.Toolkit.UnitTests;
|
||||
|
||||
[TestClass]
|
||||
public class FuzzyMatcherValidationTests
|
||||
{
|
||||
[DataTestMethod]
|
||||
[DataRow(null, "haystack")]
|
||||
[DataRow("", "haystack")]
|
||||
[DataRow("needle", null)]
|
||||
[DataRow("needle", "")]
|
||||
[DataRow(null, null)]
|
||||
public void ScoreFuzzy_HandlesIncorrectInputs(string needle, string haystack)
|
||||
{
|
||||
Assert.AreEqual(0, FuzzyStringMatcher.ScoreFuzzy(needle!, haystack!));
|
||||
Assert.AreEqual(0, FuzzyStringMatcher.ScoreFuzzy(needle!, haystack!, allowNonContiguousMatches: true, removeDiacritics: true));
|
||||
Assert.AreEqual(0, FuzzyStringMatcher.ScoreFuzzy(needle!, haystack!, allowNonContiguousMatches: false, removeDiacritics: false));
|
||||
}
|
||||
|
||||
[DataTestMethod]
|
||||
[DataRow(null, "haystack")]
|
||||
[DataRow("", "haystack")]
|
||||
[DataRow("needle", null)]
|
||||
[DataRow("needle", "")]
|
||||
[DataRow(null, null)]
|
||||
public void ScoreFuzzyWithPositions_HandlesIncorrectInputs(string needle, string haystack)
|
||||
{
|
||||
var (score1, pos1) = FuzzyStringMatcher.ScoreFuzzyWithPositions(needle!, haystack!, true);
|
||||
Assert.AreEqual(0, score1);
|
||||
Assert.IsNotNull(pos1);
|
||||
Assert.AreEqual(0, pos1.Count);
|
||||
|
||||
var (score2, pos2) = FuzzyStringMatcher.ScoreFuzzyWithPositions(needle!, haystack!, allowNonContiguousMatches: true, removeDiacritics: true);
|
||||
Assert.AreEqual(0, score2);
|
||||
Assert.IsNotNull(pos2);
|
||||
Assert.AreEqual(0, pos2.Count);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,225 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using ToolGood.Words.Pinyin;
|
||||
|
||||
namespace Microsoft.CommandPalette.Extensions.Toolkit.UnitTests.Legacy;
|
||||
|
||||
// Inspired by the fuzzy.rs from edit.exe
|
||||
public static class LegacyFuzzyStringMatcher
|
||||
{
|
||||
private const int NOMATCH = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets a value indicating whether to support Chinese PinYin.
|
||||
/// Automatically enabled when the system UI culture is Simplified Chinese.
|
||||
/// </summary>
|
||||
public static bool ChinesePinYinSupport { get; set; } = IsSimplifiedChinese();
|
||||
|
||||
private static bool IsSimplifiedChinese()
|
||||
{
|
||||
var culture = CultureInfo.CurrentUICulture;
|
||||
|
||||
// Detect Simplified Chinese: zh-CN, zh-Hans, zh-Hans-*
|
||||
return culture.Name.StartsWith("zh-CN", StringComparison.OrdinalIgnoreCase)
|
||||
|| culture.Name.StartsWith("zh-Hans", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
public static int ScoreFuzzy(string needle, string haystack, bool allowNonContiguousMatches = true)
|
||||
{
|
||||
var (s, _) = ScoreFuzzyWithPositions(needle, haystack, allowNonContiguousMatches);
|
||||
return s;
|
||||
}
|
||||
|
||||
public static (int Score, List<int> Positions) ScoreFuzzyWithPositions(string needle, string haystack, bool allowNonContiguousMatches)
|
||||
=> ScoreAllFuzzyWithPositions(needle, haystack, allowNonContiguousMatches).MaxBy(i => i.Score);
|
||||
|
||||
public static IEnumerable<(int Score, List<int> Positions)> ScoreAllFuzzyWithPositions(string needle, string haystack, bool allowNonContiguousMatches)
|
||||
{
|
||||
List<string> needles = [needle];
|
||||
List<string> haystacks = [haystack];
|
||||
|
||||
if (ChinesePinYinSupport)
|
||||
{
|
||||
// Remove IME composition split characters.
|
||||
var input = needle.Replace("'", string.Empty);
|
||||
needles.Add(WordsHelper.GetPinyin(input));
|
||||
if (WordsHelper.HasChinese(haystack))
|
||||
{
|
||||
haystacks.Add(WordsHelper.GetPinyin(haystack));
|
||||
}
|
||||
}
|
||||
|
||||
return needles.SelectMany(i => haystacks.Select(j => ScoreFuzzyWithPositionsInternal(i, j, allowNonContiguousMatches)));
|
||||
}
|
||||
|
||||
private static (int Score, List<int> Positions) ScoreFuzzyWithPositionsInternal(string needle, string haystack, bool allowNonContiguousMatches)
|
||||
{
|
||||
if (string.IsNullOrEmpty(haystack) || string.IsNullOrEmpty(needle))
|
||||
{
|
||||
return (NOMATCH, new List<int>());
|
||||
}
|
||||
|
||||
var target = haystack.ToCharArray();
|
||||
var query = needle.ToCharArray();
|
||||
|
||||
if (target.Length < query.Length)
|
||||
{
|
||||
return (NOMATCH, new List<int>());
|
||||
}
|
||||
|
||||
var targetUpper = FoldCase(haystack);
|
||||
var queryUpper = FoldCase(needle);
|
||||
var targetUpperChars = targetUpper.ToCharArray();
|
||||
var queryUpperChars = queryUpper.ToCharArray();
|
||||
|
||||
var area = query.Length * target.Length;
|
||||
var scores = new int[area];
|
||||
var matches = new int[area];
|
||||
|
||||
for (var qi = 0; qi < query.Length; qi++)
|
||||
{
|
||||
var qiOffset = qi * target.Length;
|
||||
var qiPrevOffset = qi > 0 ? (qi - 1) * target.Length : 0;
|
||||
|
||||
for (var ti = 0; ti < target.Length; ti++)
|
||||
{
|
||||
var currentIndex = qiOffset + ti;
|
||||
var diagIndex = (qi > 0 && ti > 0) ? qiPrevOffset + ti - 1 : 0;
|
||||
var leftScore = ti > 0 ? scores[currentIndex - 1] : 0;
|
||||
var diagScore = (qi > 0 && ti > 0) ? scores[diagIndex] : 0;
|
||||
var matchSeqLen = (qi > 0 && ti > 0) ? matches[diagIndex] : 0;
|
||||
|
||||
var score = (diagScore == 0 && qi != 0) ? 0 :
|
||||
ComputeCharScore(
|
||||
query[qi],
|
||||
queryUpperChars[qi],
|
||||
ti != 0 ? target[ti - 1] : null,
|
||||
target[ti],
|
||||
targetUpperChars[ti],
|
||||
matchSeqLen);
|
||||
|
||||
var isValidScore = score != 0 && diagScore + score >= leftScore &&
|
||||
(allowNonContiguousMatches || qi > 0 ||
|
||||
targetUpperChars.Skip(ti).Take(queryUpperChars.Length).SequenceEqual(queryUpperChars));
|
||||
|
||||
if (isValidScore)
|
||||
{
|
||||
matches[currentIndex] = matchSeqLen + 1;
|
||||
scores[currentIndex] = diagScore + score;
|
||||
}
|
||||
else
|
||||
{
|
||||
matches[currentIndex] = NOMATCH;
|
||||
scores[currentIndex] = leftScore;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var positions = new List<int>();
|
||||
if (query.Length > 0 && target.Length > 0)
|
||||
{
|
||||
var qi = query.Length - 1;
|
||||
var ti = target.Length - 1;
|
||||
|
||||
while (true)
|
||||
{
|
||||
var index = (qi * target.Length) + ti;
|
||||
if (matches[index] == NOMATCH)
|
||||
{
|
||||
if (ti == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
ti--;
|
||||
}
|
||||
else
|
||||
{
|
||||
positions.Add(ti);
|
||||
if (qi == 0 || ti == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
qi--;
|
||||
ti--;
|
||||
}
|
||||
}
|
||||
|
||||
positions.Reverse();
|
||||
}
|
||||
|
||||
return (scores[area - 1], positions);
|
||||
}
|
||||
|
||||
private static string FoldCase(string input)
|
||||
{
|
||||
return input.ToUpperInvariant();
|
||||
}
|
||||
|
||||
private static int ComputeCharScore(
|
||||
char query,
|
||||
char queryLower,
|
||||
char? targetPrev,
|
||||
char targetCurr,
|
||||
char targetLower,
|
||||
int matchSeqLen)
|
||||
{
|
||||
if (!ConsiderAsEqual(queryLower, targetLower))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
var score = 1; // Character match bonus
|
||||
|
||||
if (matchSeqLen > 0)
|
||||
{
|
||||
score += matchSeqLen * 5; // Consecutive match bonus
|
||||
}
|
||||
|
||||
if (query == targetCurr)
|
||||
{
|
||||
score += 1; // Same case bonus
|
||||
}
|
||||
|
||||
if (targetPrev.HasValue)
|
||||
{
|
||||
var sepBonus = ScoreSeparator(targetPrev.Value);
|
||||
if (sepBonus > 0)
|
||||
{
|
||||
score += sepBonus;
|
||||
}
|
||||
else if (char.IsUpper(targetCurr) && matchSeqLen == 0)
|
||||
{
|
||||
score += 2; // CamelCase bonus
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
score += 8; // Start of word bonus
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
private static bool ConsiderAsEqual(char a, char b)
|
||||
{
|
||||
return a == b || (a == '/' && b == '\\') || (a == '\\' && b == '/');
|
||||
}
|
||||
|
||||
private static int ScoreSeparator(char ch)
|
||||
{
|
||||
return ch switch
|
||||
{
|
||||
'/' or '\\' => 5,
|
||||
'_' or '-' or '.' or ' ' or '\'' or '"' or ':' => 4,
|
||||
_ => 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<!-- Look at Directory.Build.props in root for common stuff as well -->
|
||||
<Import Project="..\..\..\..\Common.Dotnet.CsWinRT.props" />
|
||||
|
||||
<PropertyGroup>
|
||||
<RepoRoot>$(MSBuildThisFileDirectory)..\..\..\..\..\</RepoRoot>
|
||||
<IsPackable>false</IsPackable>
|
||||
<IsTestProject>true</IsTestProject>
|
||||
<RootNamespace>Microsoft.CommandPalette.Extensions.Toolkit.UnitTests</RootNamespace>
|
||||
<OutputPath>$(SolutionDir)$(Platform)\$(Configuration)\WinUI3Apps\CmdPal\tests\</OutputPath>
|
||||
<AppendTargetFrameworkToOutputPath>false</AppendTargetFrameworkToOutputPath>
|
||||
<AppendRuntimeIdentifierToOutputPath>false</AppendRuntimeIdentifierToOutputPath>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Moq" />
|
||||
<PackageReference Include="MSTest" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\extensionsdk\Microsoft.CommandPalette.Extensions.Toolkit\Microsoft.CommandPalette.Extensions.Toolkit.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(CIBuild)'=='true'">
|
||||
<SignAssembly>true</SignAssembly>
|
||||
<DelaySign>true</DelaySign>
|
||||
<AssemblyOriginatorKeyFile>$(RepoRoot).pipelines\272MSSharedLibSN2048.snk</AssemblyOriginatorKeyFile>
|
||||
</PropertyGroup>
|
||||
</Project>
|
||||
File diff suppressed because it is too large
Load Diff
@@ -56,7 +56,7 @@
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
|
||||
<ItemGroup>
|
||||
<Content Include="$(RepoRoot)$(Platform)\$(Configuration)\Microsoft.CommandPalette.Extensions\Microsoft.CommandPalette.Extensions.winmd" Link="Microsoft.CommandPalette.Extensions.winmd" CopyToOutputDirectory="PreserveNewest" />
|
||||
</ItemGroup>
|
||||
@@ -83,4 +83,8 @@
|
||||
<!-- Suppress DynamicallyAccessedMemberTypes.PublicParameterlessConstructor in fallback code path of Windows SDK projection -->
|
||||
<WarningsNotAsErrors>IL2081;$(WarningsNotAsErrors)</WarningsNotAsErrors>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<InternalsVisibleTo Include="Microsoft.CommandPalette.Extensions.Toolkit.UnitTests, PublicKey=002400000c80000014010000060200000024000052534131000800000100010085aad0bef0688d1b994a0d78e1fd29fc24ac34ed3d3ac3fb9b3d0c48386ba834aa880035060a8848b2d8adf58e670ed20914be3681a891c9c8c01eef2ab22872547c39be00af0e6c72485d7cfd1a51df8947d36ceba9989106b58abe79e6a3e71a01ed6bdc867012883e0b1a4d35b1b5eeed6df21e401bb0c22f2246ccb69979dc9e61eef262832ed0f2064853725a75485fa8a3efb7e027319c86dec03dc3b1bca2b5081bab52a627b9917450dfad534799e1c7af58683bdfa135f1518ff1ea60e90d7b993a6c87fd3dd93408e35d1296f9a7f9a97c5db56c0f3cc25ad11e9777f94d138b3cea53b9a8331c2e6dcb8d2ea94e18bf1163ff112a22dbd92d429a" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
||||
Reference in New Issue
Block a user