From 7477b561a174975f04844bdb25b406dc732145ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Pol=C3=A1=C5=A1ek?= Date: Mon, 9 Feb 2026 20:37:59 +0100 Subject: [PATCH] CmdPal: Add precomputed fuzzy string matching to Command Palette (#44090) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary of the Pull Request This PR improves fuzzy matching in Command Palette by: - Precomputing normalized strings to enable faster comparisons - Reducing memory allocations during matching, effectively down to zero It also introduces several behavioral improvements: - Strips diacritics from the normalized search string to improve matching across languages - Suppresses the same-case bonus when the query consists entirely of lowercase characters -- reflecting typical user input patterns - Allows skipping word separators -- enabling queries like Power Point to match PowerPoint This implementation is currently kept internal and is used only on the home page. For other scenarios, the `FuzzyStringMatcher` from `Microsoft.CommandPalette.Extensions.Toolkit` is being improved instead. `PrecomputedFuzzyMatcher` offers up to a 100× performance improvement over the current `FuzzyStringMatcher`, and approximately 2–5× better performance compared to the improved version. The improvement might seem small, but it adds up and becomes quite noticeable when filtering the entire home page—whether the user starts a new search or changes the query non-incrementally (e.g., using backspace). ## PR Checklist - [x] Closes: #45226 - [x] Closes: #44066 - [ ] **Communication:** I've discussed this with core contributors already. If the work hasn't been agreed, this work might be rejected - [ ] **Tests:** Added/updated and all pass - [ ] **Localization:** All end-user-facing strings can be localized - [ ] **Dev docs:** Added/updated - [ ] **New binaries:** Added on the required places - [ ] [JSON for signing](https://github.com/microsoft/PowerToys/blob/main/.pipelines/ESRPSigning_core.json) for new binaries - [ ] [WXS for installer](https://github.com/microsoft/PowerToys/blob/main/installer/PowerToysSetup/Product.wxs) for new binaries and localization folder - [ ] [YML for CI pipeline](https://github.com/microsoft/PowerToys/blob/main/.pipelines/ci/templates/build-powertoys-steps.yml) for new test projects - [ ] [YML for signed pipeline](https://github.com/microsoft/PowerToys/blob/main/.pipelines/release.yml) - [ ] **Documentation updated:** If checked, please file a pull request on [our docs repo](https://github.com/MicrosoftDocs/windows-uwp/tree/docs/hub/powertoys) and link it here: #xxx ## Detailed Description of the Pull Request / Additional comments ## Validation Steps Performed --- .github/actions/spell-check/excludes.txt | 1 + .../Helpers/IPrecomputedListItem.cs | 27 + .../Helpers/InternalListHelpers.cs | 142 +++++ .../Text/BloomFilter.cs | 40 ++ .../Text/FuzzyMatcherProvider.cs | 52 ++ .../Text/FuzzyQuery.cs | 65 ++ .../Text/FuzzyTarget.cs | 46 ++ .../Text/FuzzyTargetCache.cs | 34 ++ .../Text/IBloomFilter.cs | 12 + .../Text/IFuzzyMatcherProvider.cs | 12 + .../Text/IPrecomputedFuzzyMatcher.cs | 16 + .../Text/IStringFolder.cs | 10 + .../Text/PinyinFuzzyMatcherOptions.cs | 13 + .../Text/PinyinMode.cs | 12 + .../Text/PrecomputedFuzzyMatcher.cs | 575 ++++++++++++++++++ .../Text/PrecomputedFuzzyMatcherOptions.cs | 40 ++ .../Text/PrecomputedFuzzyMatcherWithPinyin.cs | 177 ++++++ .../Text/StringFolder.cs | 163 +++++ .../Text/SymbolClassifier.cs | 29 + .../Text/SymbolKind.cs | 12 + .../CommandItemViewModel.cs | 23 +- .../ContextMenuViewModel.cs | 31 +- .../Commands/MainListPage.cs | 179 +++--- .../Commands/MainListPageResultFactory.cs | 11 +- .../TopLevelViewModel.cs | 35 +- .../cmdpal/Microsoft.CmdPal.UI/App.xaml.cs | 4 + .../Controls/ContextMenu.xaml.cs | 12 +- .../PowerToysRootPageService.cs | 5 +- .../Text/PrecomputedFuzzyMatcherEmojiTests.cs | 78 +++ .../PrecomputedFuzzyMatcherOptionsTests.cs | 84 +++ ...computedFuzzyMatcherSecondaryInputTests.cs | 227 +++++++ .../Text/PrecomputedFuzzyMatcherTests.cs | 209 +++++++ .../PrecomputedFuzzyMatcherUnicodeTests.cs | 124 ++++ .../PrecomputedFuzzyMatcherWithPinyinTests.cs | 117 ++++ .../Text/StringFolderTests.cs | 55 ++ .../MainListPageResultFactoryTests.cs | 21 +- .../RecentCommandsTests.cs | 33 +- .../Microsoft.CmdPal.Ext.Apps/AppListItem.cs | 38 +- 38 files changed, 2626 insertions(+), 138 deletions(-) create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Helpers/IPrecomputedListItem.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Helpers/InternalListHelpers.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/BloomFilter.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyMatcherProvider.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyQuery.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyTarget.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyTargetCache.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IBloomFilter.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IFuzzyMatcherProvider.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IPrecomputedFuzzyMatcher.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IStringFolder.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PinyinFuzzyMatcherOptions.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PinyinMode.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PrecomputedFuzzyMatcher.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PrecomputedFuzzyMatcherOptions.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PrecomputedFuzzyMatcherWithPinyin.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/StringFolder.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/SymbolClassifier.cs create mode 100644 src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/SymbolKind.cs create mode 100644 src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherEmojiTests.cs create mode 100644 src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherOptionsTests.cs create mode 100644 src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherSecondaryInputTests.cs create mode 100644 src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherTests.cs create mode 100644 src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherUnicodeTests.cs create mode 100644 src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherWithPinyinTests.cs create mode 100644 src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/StringFolderTests.cs diff --git a/.github/actions/spell-check/excludes.txt b/.github/actions/spell-check/excludes.txt index 9e587fa284..b029f1dbcb 100644 --- a/.github/actions/spell-check/excludes.txt +++ b/.github/actions/spell-check/excludes.txt @@ -111,6 +111,7 @@ ^src/modules/cmdpal/ext/SamplePagesExtension/Pages/SampleMarkdownImagesPage\.cs$ ^src/modules/cmdpal/Microsoft\.CmdPal\.UI/Settings/InternalPage\.SampleData\.cs$ ^src/modules/cmdpal/Tests/Microsoft\.CmdPal\.Core\.Common\.UnitTests/.*\.TestData\.cs$ +^src/modules/cmdpal/Tests/Microsoft\.CmdPal\.Core\.Common\.UnitTests/Text/.*\.cs$ ^src/modules/colorPicker/ColorPickerUI/Shaders/GridShader\.cso$ ^src/modules/launcher/Plugins/Microsoft\.PowerToys\.Run\.Plugin\.TimeDate/Properties/ ^src/modules/MouseUtils/MouseJumpUI/MainForm\.resx$ diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Helpers/IPrecomputedListItem.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Helpers/IPrecomputedListItem.cs new file mode 100644 index 0000000000..2847ee7b12 --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Helpers/IPrecomputedListItem.cs @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.CmdPal.Core.Common.Text; + +namespace Microsoft.CmdPal.Core.Common.Helpers; + +/// +/// Represents an item that can provide precomputed fuzzy matching targets for its title and subtitle. +/// +public interface IPrecomputedListItem +{ + /// + /// Gets the fuzzy matching target for the item's title. + /// + /// The precomputed fuzzy matcher used to build the target. + /// The fuzzy target for the title. + FuzzyTarget GetTitleTarget(IPrecomputedFuzzyMatcher matcher); + + /// + /// Gets the fuzzy matching target for the item's subtitle. + /// + /// The precomputed fuzzy matcher used to build the target. + /// The fuzzy target for the subtitle. + FuzzyTarget GetSubtitleTarget(IPrecomputedFuzzyMatcher matcher); +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Helpers/InternalListHelpers.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Helpers/InternalListHelpers.cs new file mode 100644 index 0000000000..60d841aaf8 --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Helpers/InternalListHelpers.cs @@ -0,0 +1,142 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.Diagnostics; +using Microsoft.CmdPal.Core.Common.Text; + +namespace Microsoft.CmdPal.Core.Common.Helpers; + +public static partial class InternalListHelpers +{ + public static RoScored[] FilterListWithScores( + IEnumerable? items, + in FuzzyQuery query, + in ScoringFunction scoreFunction) + { + if (items == null) + { + return []; + } + + // Try to get initial capacity hint + var initialCapacity = items switch + { + ICollection col => col.Count, + IReadOnlyCollection rc => rc.Count, + _ => 64, + }; + + var buffer = ArrayPool>.Shared.Rent(initialCapacity); + var count = 0; + + try + { + foreach (var item in items) + { + var score = scoreFunction(in query, item); + if (score <= 0) + { + continue; + } + + if (count == buffer.Length) + { + GrowBuffer(ref buffer, count); + } + + buffer[count++] = new RoScored(item, score); + } + + Array.Sort(buffer, 0, count, default(RoScoredDescendingComparer)); + var result = GC.AllocateUninitializedArray>(count); + buffer.AsSpan(0, count).CopyTo(result); + return result; + } + finally + { + ArrayPool>.Shared.Return(buffer); + } + } + + private static void GrowBuffer(ref RoScored[] buffer, int count) + { + var newBuffer = ArrayPool>.Shared.Rent(buffer.Length * 2); + buffer.AsSpan(0, count).CopyTo(newBuffer); + ArrayPool>.Shared.Return(buffer); + buffer = newBuffer; + } + + public static T[] FilterList(IEnumerable items, in FuzzyQuery query, ScoringFunction scoreFunction) + { + // Try to get initial capacity hint + var initialCapacity = items switch + { + ICollection col => col.Count, + IReadOnlyCollection rc => rc.Count, + _ => 64, + }; + + var buffer = ArrayPool>.Shared.Rent(initialCapacity); + var count = 0; + + try + { + foreach (var item in items) + { + var score = scoreFunction(in query, item); + if (score <= 0) + { + continue; + } + + if (count == buffer.Length) + { + GrowBuffer(ref buffer, count); + } + + buffer[count++] = new RoScored(item, score); + } + + Array.Sort(buffer, 0, count, default(RoScoredDescendingComparer)); + + var result = GC.AllocateUninitializedArray(count); + for (var i = 0; i < count; i++) + { + result[i] = buffer[i].Item; + } + + return result; + } + finally + { + ArrayPool>.Shared.Return(buffer); + } + } + + private readonly struct RoScoredDescendingComparer : IComparer> + { + public int Compare(RoScored x, RoScored y) => y.Score.CompareTo(x.Score); + } +} + +public delegate int ScoringFunction(in FuzzyQuery query, T item); + +[DebuggerDisplay($"{{{nameof(GetDebuggerDisplay)}(),nq}}")] +public readonly struct RoScored +{ + public readonly int Score; + public readonly T Item; + + public RoScored(T item, int score) + { + Score = score; + Item = item; + } + + private string GetDebuggerDisplay() + { + return "Score = " + Score + ", Item = " + Item; + } +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/BloomFilter.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/BloomFilter.cs new file mode 100644 index 0000000000..59255a1bae --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/BloomFilter.cs @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; + +namespace Microsoft.CmdPal.Core.Common.Text; + +public sealed class BloomFilter : IBloomFilter +{ + public ulong Compute(string input) + { + ulong bloom = 0; + + foreach (var ch in input) + { + if (SymbolClassifier.Classify(ch) == SymbolKind.WordSeparator) + { + continue; + } + + var h = (uint)ch * 0x45d9f3b; + bloom |= 1UL << (int)(h & 31); + bloom |= 1UL << (int)(((h >> 16) & 31) + 32); + + if (bloom == ulong.MaxValue) + { + break; + } + } + + return bloom; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool MightContain(ulong candidateBloom, ulong queryBloom) + { + return (candidateBloom & queryBloom) == queryBloom; + } +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyMatcherProvider.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyMatcherProvider.cs new file mode 100644 index 0000000000..80c5fa9ace --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyMatcherProvider.cs @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Globalization; + +namespace Microsoft.CmdPal.Core.Common.Text; + +public sealed class FuzzyMatcherProvider : IFuzzyMatcherProvider +{ + private readonly IBloomFilter _bloomCalculator = new BloomFilter(); + private readonly IStringFolder _normalizer = new StringFolder(); + + private IPrecomputedFuzzyMatcher _current; + + public FuzzyMatcherProvider(PrecomputedFuzzyMatcherOptions core, PinyinFuzzyMatcherOptions? pinyin = null) + { + _current = CreateMatcher(core, pinyin); + } + + public IPrecomputedFuzzyMatcher Current => Volatile.Read(ref _current); + + public void UpdateSettings(PrecomputedFuzzyMatcherOptions core, PinyinFuzzyMatcherOptions? pinyin = null) + { + Volatile.Write(ref _current, CreateMatcher(core, pinyin)); + } + + private IPrecomputedFuzzyMatcher CreateMatcher(PrecomputedFuzzyMatcherOptions core, PinyinFuzzyMatcherOptions? pinyin) + { + return pinyin is null || !IsPinyinEnabled(pinyin) + ? new PrecomputedFuzzyMatcher(core, _normalizer, _bloomCalculator) + : new PrecomputedFuzzyMatcherWithPinyin(core, pinyin, _normalizer, _bloomCalculator); + } + + private static bool IsPinyinEnabled(PinyinFuzzyMatcherOptions o) + { + return o.Mode switch + { + PinyinMode.Off => false, + PinyinMode.On => true, + PinyinMode.AutoSimplifiedChineseUi => IsSimplifiedChineseUi(), + _ => false, + }; + } + + private static bool IsSimplifiedChineseUi() + { + var culture = CultureInfo.CurrentUICulture; + return culture.Name.StartsWith("zh-CN", StringComparison.OrdinalIgnoreCase) + || culture.Name.StartsWith("zh-Hans", StringComparison.OrdinalIgnoreCase); + } +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyQuery.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyQuery.cs new file mode 100644 index 0000000000..80de31bd7a --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyQuery.cs @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.CmdPal.Core.Common.Text; + +public readonly struct FuzzyQuery +{ + public readonly string Original; + + public readonly string Folded; + + public readonly ulong Bloom; + + public readonly int EffectiveLength; + + public readonly bool IsAllLowercaseAsciiOrNonLetter; + + public readonly string? SecondaryOriginal; + + public readonly string? SecondaryFolded; + + public readonly ulong SecondaryBloom; + + public readonly int SecondaryEffectiveLength; + + public readonly bool SecondaryIsAllLowercaseAsciiOrNonLetter; + + public int Length => Folded.Length; + + public bool HasSecondary => SecondaryFolded is not null; + + public ReadOnlySpan OriginalSpan => Original.AsSpan(); + + public ReadOnlySpan FoldedSpan => Folded.AsSpan(); + + public ReadOnlySpan SecondaryOriginalSpan => SecondaryOriginal.AsSpan(); + + public ReadOnlySpan SecondaryFoldedSpan => SecondaryFolded.AsSpan(); + + public FuzzyQuery( + string original, + string folded, + ulong bloom, + int effectiveLength, + bool isAllLowercaseAsciiOrNonLetter, + string? secondaryOriginal = null, + string? secondaryFolded = null, + ulong secondaryBloom = 0, + int secondaryEffectiveLength = 0, + bool secondaryIsAllLowercaseAsciiOrNonLetter = true) + { + Original = original; + Folded = folded; + Bloom = bloom; + EffectiveLength = effectiveLength; + IsAllLowercaseAsciiOrNonLetter = isAllLowercaseAsciiOrNonLetter; + + SecondaryOriginal = secondaryOriginal; + SecondaryFolded = secondaryFolded; + SecondaryBloom = secondaryBloom; + SecondaryEffectiveLength = secondaryEffectiveLength; + SecondaryIsAllLowercaseAsciiOrNonLetter = secondaryIsAllLowercaseAsciiOrNonLetter; + } +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyTarget.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyTarget.cs new file mode 100644 index 0000000000..b0c2927f20 --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyTarget.cs @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.CmdPal.Core.Common.Text; + +public readonly struct FuzzyTarget +{ + public readonly string Original; + public readonly string Folded; + public readonly ulong Bloom; + + public readonly string? SecondaryOriginal; + public readonly string? SecondaryFolded; + public readonly ulong SecondaryBloom; + + public int Length => Folded.Length; + + public bool HasSecondary => SecondaryFolded is not null; + + public int SecondaryLength => SecondaryFolded?.Length ?? 0; + + public ReadOnlySpan OriginalSpan => Original.AsSpan(); + + public ReadOnlySpan FoldedSpan => Folded.AsSpan(); + + public ReadOnlySpan SecondaryOriginalSpan => SecondaryOriginal.AsSpan(); + + public ReadOnlySpan SecondaryFoldedSpan => SecondaryFolded.AsSpan(); + + public FuzzyTarget( + string original, + string folded, + ulong bloom, + string? secondaryOriginal = null, + string? secondaryFolded = null, + ulong secondaryBloom = 0) + { + Original = original; + Folded = folded; + Bloom = bloom; + SecondaryOriginal = secondaryOriginal; + SecondaryFolded = secondaryFolded; + SecondaryBloom = secondaryBloom; + } +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyTargetCache.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyTargetCache.cs new file mode 100644 index 0000000000..dc5ec6e011 --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/FuzzyTargetCache.cs @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.CmdPal.Core.Common.Text; + +public struct FuzzyTargetCache +{ + private string? _lastRaw; + private uint _schemaId; + private FuzzyTarget _target; + + public FuzzyTarget GetOrUpdate(IPrecomputedFuzzyMatcher matcher, string? raw) + { + raw ??= string.Empty; + + if (_schemaId == matcher.SchemaId && string.Equals(_lastRaw, raw, StringComparison.Ordinal)) + { + return _target; + } + + _target = matcher.PrecomputeTarget(raw); + _schemaId = matcher.SchemaId; + _lastRaw = raw; + return _target; + } + + public void Invalidate() + { + _lastRaw = null; + _target = default; + _schemaId = 0; + } +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IBloomFilter.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IBloomFilter.cs new file mode 100644 index 0000000000..e9234e7adf --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IBloomFilter.cs @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.CmdPal.Core.Common.Text; + +public interface IBloomFilter +{ + ulong Compute(string input); + + bool MightContain(ulong candidateBloom, ulong queryBloom); +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IFuzzyMatcherProvider.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IFuzzyMatcherProvider.cs new file mode 100644 index 0000000000..706dd0d8bf --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IFuzzyMatcherProvider.cs @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.CmdPal.Core.Common.Text; + +public interface IFuzzyMatcherProvider +{ + IPrecomputedFuzzyMatcher Current { get; } + + void UpdateSettings(PrecomputedFuzzyMatcherOptions core, PinyinFuzzyMatcherOptions? pinyin = null); +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IPrecomputedFuzzyMatcher.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IPrecomputedFuzzyMatcher.cs new file mode 100644 index 0000000000..dfb8af378e --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IPrecomputedFuzzyMatcher.cs @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.CmdPal.Core.Common.Text; + +public interface IPrecomputedFuzzyMatcher +{ + uint SchemaId { get; } + + FuzzyQuery PrecomputeQuery(string? input); + + FuzzyTarget PrecomputeTarget(string? input); + + int Score(scoped in FuzzyQuery query, scoped in FuzzyTarget target); +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IStringFolder.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IStringFolder.cs new file mode 100644 index 0000000000..6fcfbfaf61 --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/IStringFolder.cs @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.CmdPal.Core.Common.Text; + +public interface IStringFolder +{ + string Fold(string input, bool removeDiacritics); +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PinyinFuzzyMatcherOptions.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PinyinFuzzyMatcherOptions.cs new file mode 100644 index 0000000000..c060c33c92 --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PinyinFuzzyMatcherOptions.cs @@ -0,0 +1,13 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.CmdPal.Core.Common.Text; + +public sealed class PinyinFuzzyMatcherOptions +{ + public PinyinMode Mode { get; init; } = PinyinMode.AutoSimplifiedChineseUi; + + /// Remove IME syllable separators (') for query secondary variant. + public bool RemoveApostrophesForQuery { get; init; } = true; +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PinyinMode.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PinyinMode.cs new file mode 100644 index 0000000000..0da88e14c0 --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PinyinMode.cs @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.CmdPal.Core.Common.Text; + +public enum PinyinMode +{ + Off = 0, + AutoSimplifiedChineseUi = 1, + On = 2, +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PrecomputedFuzzyMatcher.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PrecomputedFuzzyMatcher.cs new file mode 100644 index 0000000000..0994f1d328 --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PrecomputedFuzzyMatcher.cs @@ -0,0 +1,575 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Microsoft.CmdPal.Core.Common.Text; + +public sealed class PrecomputedFuzzyMatcher : IPrecomputedFuzzyMatcher +{ + private const int NoMatchScore = 0; + private const int StackallocThresholdChars = 512; + private const int FolderSchemaVersion = 1; + private const int BloomSchemaVersion = 1; + + private readonly PrecomputedFuzzyMatcherOptions _options; + private readonly IStringFolder _stringFolder; + private readonly IBloomFilter _bloom; + + public PrecomputedFuzzyMatcher( + PrecomputedFuzzyMatcherOptions? options = null, + IStringFolder? normalization = null, + IBloomFilter? bloomCalculator = null) + { + _options = options ?? PrecomputedFuzzyMatcherOptions.Default; + _bloom = bloomCalculator ?? new BloomFilter(); + _stringFolder = normalization ?? new StringFolder(); + + SchemaId = ComputeSchemaId(_options); + } + + public uint SchemaId { get; } + + public FuzzyQuery PrecomputeQuery(string? input) => PrecomputeQuery(input, null); + + public FuzzyTarget PrecomputeTarget(string? input) => PrecomputeTarget(input, null); + + public int Score(in FuzzyQuery query, in FuzzyTarget target) + { + var qFold = query.FoldedSpan; + var tLen = target.Length; + + if (query.EffectiveLength == 0 || tLen == 0) + { + return NoMatchScore; + } + + var skipWordSeparators = _options.SkipWordSeparators; + var bestScore = 0; + + // 1. Primary → Primary + if (tLen >= query.EffectiveLength && _bloom.MightContain(target.Bloom, query.Bloom)) + { + if (CanMatchSubsequence(qFold, target.FoldedSpan, skipWordSeparators)) + { + bestScore = ScoreNonContiguous( + qRaw: query.OriginalSpan, + qFold: qFold, + qEffectiveLen: query.EffectiveLength, + tRaw: target.OriginalSpan, + tFold: target.FoldedSpan, + ignoreSameCaseBonusForThisQuery: _options.IgnoreSameCaseBonusIfQueryIsAllLowercase && query.IsAllLowercaseAsciiOrNonLetter); + } + } + + // 2. Secondary → Secondary + if (query.HasSecondary && target.HasSecondary) + { + var qSecFold = query.SecondaryFoldedSpan; + + if (target.SecondaryLength >= query.SecondaryEffectiveLength && + _bloom.MightContain(target.SecondaryBloom, query.SecondaryBloom) && + CanMatchSubsequence(qSecFold, target.SecondaryFoldedSpan, skipWordSeparators)) + { + var score = ScoreNonContiguous( + qRaw: query.SecondaryOriginalSpan, + qFold: qSecFold, + qEffectiveLen: query.SecondaryEffectiveLength, + tRaw: target.SecondaryOriginalSpan, + tFold: target.SecondaryFoldedSpan, + ignoreSameCaseBonusForThisQuery: _options.IgnoreSameCaseBonusIfQueryIsAllLowercase && query.SecondaryIsAllLowercaseAsciiOrNonLetter); + + if (score > bestScore) + { + bestScore = score; + } + } + } + + // 3. Primary query → Secondary target + if (target.HasSecondary && + target.SecondaryLength >= query.EffectiveLength && + _bloom.MightContain(target.SecondaryBloom, query.Bloom)) + { + if (CanMatchSubsequence(qFold, target.SecondaryFoldedSpan, skipWordSeparators)) + { + var score = ScoreNonContiguous( + qRaw: query.OriginalSpan, + qFold: qFold, + qEffectiveLen: query.EffectiveLength, + tRaw: target.SecondaryOriginalSpan, + tFold: target.SecondaryFoldedSpan, + ignoreSameCaseBonusForThisQuery: _options.IgnoreSameCaseBonusIfQueryIsAllLowercase && query.IsAllLowercaseAsciiOrNonLetter); + + if (score > bestScore) + { + bestScore = score; + } + } + } + + // 4. Secondary query → Primary target + if (query.HasSecondary && + tLen >= query.SecondaryEffectiveLength && + _bloom.MightContain(target.Bloom, query.SecondaryBloom)) + { + var qSecFold = query.SecondaryFoldedSpan; + + if (CanMatchSubsequence(qSecFold, target.FoldedSpan, skipWordSeparators)) + { + var score = ScoreNonContiguous( + qRaw: query.SecondaryOriginalSpan, + qFold: qSecFold, + qEffectiveLen: query.SecondaryEffectiveLength, + tRaw: target.OriginalSpan, + tFold: target.FoldedSpan, + ignoreSameCaseBonusForThisQuery: _options.IgnoreSameCaseBonusIfQueryIsAllLowercase && query.SecondaryIsAllLowercaseAsciiOrNonLetter); + + if (score > bestScore) + { + bestScore = score; + } + } + } + + return bestScore; + } + + private FuzzyQuery PrecomputeQuery(string? input, string? secondaryInput) + { + input ??= string.Empty; + + var folded = _stringFolder.Fold(input, _options.RemoveDiacritics); + var bloom = _bloom.Compute(folded); + var effectiveLength = _options.SkipWordSeparators + ? folded.Length - CountWordSeparators(folded) + : folded.Length; + + var isAllLowercase = IsAllLowercaseAsciiOrNonLetter(input); + + string? secondaryOriginal = null; + string? secondaryFolded = null; + ulong secondaryBloom = 0; + var secondaryEffectiveLength = 0; + var secondaryIsAllLowercase = true; + + if (!string.IsNullOrEmpty(secondaryInput)) + { + secondaryOriginal = secondaryInput; + secondaryFolded = _stringFolder.Fold(secondaryInput, _options.RemoveDiacritics); + secondaryBloom = _bloom.Compute(secondaryFolded); + secondaryEffectiveLength = _options.SkipWordSeparators + ? secondaryFolded.Length - CountWordSeparators(secondaryFolded) + : secondaryFolded.Length; + + secondaryIsAllLowercase = IsAllLowercaseAsciiOrNonLetter(secondaryInput); + } + + return new FuzzyQuery( + original: input, + folded: folded, + bloom: bloom, + effectiveLength: effectiveLength, + isAllLowercaseAsciiOrNonLetter: isAllLowercase, + secondaryOriginal: secondaryOriginal, + secondaryFolded: secondaryFolded, + secondaryBloom: secondaryBloom, + secondaryEffectiveLength: secondaryEffectiveLength, + secondaryIsAllLowercaseAsciiOrNonLetter: secondaryIsAllLowercase); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static int CountWordSeparators(string s) + { + var count = 0; + foreach (var c in s) + { + if (SymbolClassifier.Classify(c) == SymbolKind.WordSeparator) + { + count++; + } + } + + return count; + } + } + + internal FuzzyTarget PrecomputeTarget(string? input, string? secondaryInput) + { + input ??= string.Empty; + + var folded = _stringFolder.Fold(input, _options.RemoveDiacritics); + var bloom = _bloom.Compute(folded); + + string? secondaryFolded = null; + ulong secondaryBloom = 0; + + if (!string.IsNullOrEmpty(secondaryInput)) + { + secondaryFolded = _stringFolder.Fold(secondaryInput, _options.RemoveDiacritics); + secondaryBloom = _bloom.Compute(secondaryFolded); + } + + return new FuzzyTarget( + input, + folded, + bloom, + secondaryInput, + secondaryFolded, + secondaryBloom); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsAllLowercaseAsciiOrNonLetter(string s) + { + foreach (var c in s) + { + if ((uint)(c - 'A') <= ('Z' - 'A')) + { + return false; + } + } + + return true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool CanMatchSubsequence( + ReadOnlySpan qFold, + ReadOnlySpan tFold, + bool skipWordSeparators) + { + var qi = 0; + var ti = 0; + + while (qi < qFold.Length && ti < tFold.Length) + { + var qChar = qFold[qi]; + + if (skipWordSeparators && SymbolClassifier.Classify(qChar) == SymbolKind.WordSeparator) + { + qi++; + continue; + } + + if (qChar == tFold[ti]) + { + qi++; + } + + ti++; + } + + // Skip trailing word separators in query + if (skipWordSeparators) + { + while (qi < qFold.Length && SymbolClassifier.Classify(qFold[qi]) == SymbolKind.WordSeparator) + { + qi++; + } + } + + return qi == qFold.Length; + } + + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + [SkipLocalsInit] + private int ScoreNonContiguous( + scoped in ReadOnlySpan qRaw, + scoped in ReadOnlySpan qFold, + int qEffectiveLen, + scoped in ReadOnlySpan tRaw, + scoped in ReadOnlySpan tFold, + bool ignoreSameCaseBonusForThisQuery) + { + Debug.Assert(qRaw.Length == qFold.Length, "Original and folded spans are traversed in lockstep: requires qRaw.Length == qFold.Length"); + Debug.Assert(tRaw.Length == tFold.Length, "Original and folded spans are traversed in lockstep: requires tRaw.Length == tFold.Length"); + Debug.Assert(qEffectiveLen <= qFold.Length, "Effective length must be less than or equal to folded length"); + + var qLen = qFold.Length; + var tLen = tFold.Length; + + // Copy options to local variables to avoid repeated field accesses + var charMatchBonus = _options.CharMatchBonus; + var sameCaseBonus = ignoreSameCaseBonusForThisQuery ? 0 : _options.SameCaseBonus; + var consecutiveMultiplier = _options.ConsecutiveMultiplier; + var camelCaseBonus = _options.CamelCaseBonus; + var startOfWordBonus = _options.StartOfWordBonus; + var pathSeparatorBonus = _options.PathSeparatorBonus; + var wordSeparatorBonus = _options.WordSeparatorBonus; + var separatorAlignmentBonus = _options.SeparatorAlignmentBonus; + var exactSeparatorBonus = _options.ExactSeparatorBonus; + var skipWordSeparators = _options.SkipWordSeparators; + + // DP buffer: two rows of length tLen + var bufferSize = tLen * 2; + int[]? rented = null; + + try + { + scoped Span buffer; + if (bufferSize <= StackallocThresholdChars) + { + buffer = stackalloc int[bufferSize]; + } + else + { + rented = ArrayPool.Shared.Rent(bufferSize); + buffer = rented.AsSpan(0, bufferSize); + } + + var scores = buffer[..tLen]; + var seqLens = buffer.Slice(tLen, tLen); + + scores.Clear(); + seqLens.Clear(); + + ref var scores0 = ref MemoryMarshal.GetReference(scores); + ref var seqLens0 = ref MemoryMarshal.GetReference(seqLens); + ref var qRaw0 = ref MemoryMarshal.GetReference(qRaw); + ref var qFold0 = ref MemoryMarshal.GetReference(qFold); + ref var tRaw0 = ref MemoryMarshal.GetReference(tRaw); + ref var tFold0 = ref MemoryMarshal.GetReference(tFold); + + var qiEffective = 0; + + for (var qi = 0; qi < qLen; qi++) + { + var qCharFold = Unsafe.Add(ref qFold0, qi); + var qCharKind = SymbolClassifier.Classify(qCharFold); + + if (skipWordSeparators && qCharKind == SymbolKind.WordSeparator) + { + continue; + } + + // Hoisted values + var qRawIsUpper = char.IsUpper(Unsafe.Add(ref qRaw0, qi)); + + // row computation + var leftScore = 0; + var diagScore = 0; + var diagSeqLen = 0; + + // limit ti to ensure enough remaining characters to match the rest of the query + var tiMax = tLen - qEffectiveLen + qiEffective; + + for (var ti = 0; ti <= tiMax; ti++) + { + var upScore = Unsafe.Add(ref scores0, ti); + var upSeqLen = Unsafe.Add(ref seqLens0, ti); + + var charScore = 0; + if (diagScore != 0 || qiEffective == 0) + { + charScore = ComputeCharScore( + qi, + ti, + qCharFold, + qCharKind, + diagSeqLen, + qRawIsUpper, + ref tRaw0, + ref qFold0, + ref tFold0); + } + + var candidateScore = diagScore + charScore; + if (charScore != 0 && candidateScore >= leftScore) + { + Unsafe.Add(ref scores0, ti) = candidateScore; + Unsafe.Add(ref seqLens0, ti) = diagSeqLen + 1; + leftScore = candidateScore; + } + else + { + Unsafe.Add(ref scores0, ti) = leftScore; + Unsafe.Add(ref seqLens0, ti) = 0; + /* leftScore remains unchanged */ + } + + diagScore = upScore; + diagSeqLen = upSeqLen; + } + + // Early exit: no match possible + if (leftScore == 0) + { + return NoMatchScore; + } + + // Advance effective query index + // Only counts non-separator characters if skipWordSeparators is enabled + qiEffective++; + + if (qiEffective == qEffectiveLen) + { + return leftScore; + } + } + + return scores[tLen - 1]; + + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + int ComputeCharScore( + int qi, + int ti, + char qCharFold, + SymbolKind qCharKind, + int seqLen, + bool qCharRawCurrIsUpper, + ref char tRaw0, + ref char qFold0, + ref char tFold0) + { + // Match check: + // - exact folded char match always ok + // - otherwise, allow equivalence only for word separators (e.g. '_' matches '-') + var tCharFold = Unsafe.Add(ref tFold0, ti); + if (qCharFold != tCharFold) + { + if (!skipWordSeparators) + { + return 0; + } + + if (qCharKind != SymbolKind.WordSeparator || + SymbolClassifier.Classify(tCharFold) != SymbolKind.WordSeparator) + { + return 0; + } + } + + // 0. Base char match bonus + var score = charMatchBonus; + + // 1. Consecutive match bonus + if (seqLen > 0) + { + score += seqLen * consecutiveMultiplier; + } + + // 2. Same case bonus + // Early outs to appease the branch predictor + if (sameCaseBonus != 0) + { + var tCharRawCurr = Unsafe.Add(ref tRaw0, ti); + var tCharRawCurrIsUpper = char.IsUpper(tCharRawCurr); + if (qCharRawCurrIsUpper == tCharRawCurrIsUpper) + { + score += sameCaseBonus; + } + + if (ti == 0) + { + score += startOfWordBonus; + return score; + } + + var tPrevFold = Unsafe.Add(ref tFold0, ti - 1); + var tPrevKind = SymbolClassifier.Classify(tPrevFold); + if (tPrevKind != SymbolKind.Other) + { + score += tPrevKind == SymbolKind.PathSeparator + ? pathSeparatorBonus + : wordSeparatorBonus; + + if (skipWordSeparators && seqLen == 0 && qi > 0) + { + var qPrevFold = Unsafe.Add(ref qFold0, qi - 1); + var qPrevKind = SymbolClassifier.Classify(qPrevFold); + + if (qPrevKind == SymbolKind.WordSeparator) + { + score += separatorAlignmentBonus; + + if (tPrevKind == SymbolKind.WordSeparator && qPrevFold == tPrevFold) + { + score += exactSeparatorBonus; + } + } + } + + return score; + } + + if (tCharRawCurrIsUpper && seqLen == 0) + { + score += camelCaseBonus; + return score; + } + + return score; + } + else + { + if (ti == 0) + { + score += startOfWordBonus; + return score; + } + + var tPrevFold = Unsafe.Add(ref tFold0, ti - 1); + var tPrevKind = SymbolClassifier.Classify(tPrevFold); + if (tPrevKind != SymbolKind.Other) + { + score += tPrevKind == SymbolKind.PathSeparator + ? pathSeparatorBonus + : wordSeparatorBonus; + + if (skipWordSeparators && seqLen == 0 && qi > 0) + { + var qPrevFold = Unsafe.Add(ref qFold0, qi - 1); + var qPrevKind = SymbolClassifier.Classify(qPrevFold); + + if (qPrevKind == SymbolKind.WordSeparator) + { + score += separatorAlignmentBonus; + + if (tPrevKind == SymbolKind.WordSeparator && qPrevFold == tPrevFold) + { + score += exactSeparatorBonus; + } + } + } + + return score; + } + + if (camelCaseBonus != 0 && seqLen == 0 && char.IsUpper(Unsafe.Add(ref tRaw0, ti))) + { + score += camelCaseBonus; + return score; + } + + return score; + } + } + } + finally + { + if (rented is not null) + { + ArrayPool.Shared.Return(rented); + } + } + } + + // Schema ID is for cache invalidation of precomputed targets. + // Only includes options that affect folding/bloom, not scoring. + private static uint ComputeSchemaId(PrecomputedFuzzyMatcherOptions o) + { + const uint fnvOffset = 2166136261; + const uint fnvPrime = 16777619; + + var h = fnvOffset; + h = unchecked((h ^ FolderSchemaVersion) * fnvPrime); + h = unchecked((h ^ BloomSchemaVersion) * fnvPrime); + h = unchecked((h ^ (uint)(o.RemoveDiacritics ? 1 : 0)) * fnvPrime); + + return h; + } +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PrecomputedFuzzyMatcherOptions.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PrecomputedFuzzyMatcherOptions.cs new file mode 100644 index 0000000000..b1b01d60f1 --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PrecomputedFuzzyMatcherOptions.cs @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.CmdPal.Core.Common.Text; + +public sealed class PrecomputedFuzzyMatcherOptions +{ + public static PrecomputedFuzzyMatcherOptions Default { get; } = new(); + + /* + * Bonuses + */ + public int CharMatchBonus { get; init; } = 1; + + public int SameCaseBonus { get; init; } = 1; + + public int ConsecutiveMultiplier { get; init; } = 5; + + public int CamelCaseBonus { get; init; } = 2; + + public int StartOfWordBonus { get; init; } = 8; + + public int PathSeparatorBonus { get; init; } = 5; + + public int WordSeparatorBonus { get; init; } = 4; + + public int SeparatorAlignmentBonus { get; init; } = 2; + + public int ExactSeparatorBonus { get; init; } = 1; + + /* + * Settings + */ + public bool RemoveDiacritics { get; init; } = true; + + public bool SkipWordSeparators { get; init; } = true; + + public bool IgnoreSameCaseBonusIfQueryIsAllLowercase { get; init; } = true; +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PrecomputedFuzzyMatcherWithPinyin.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PrecomputedFuzzyMatcherWithPinyin.cs new file mode 100644 index 0000000000..026328f2c5 --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/PrecomputedFuzzyMatcherWithPinyin.cs @@ -0,0 +1,177 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Globalization; +using System.Runtime.CompilerServices; +using ToolGood.Words.Pinyin; + +namespace Microsoft.CmdPal.Core.Common.Text; + +public sealed class PrecomputedFuzzyMatcherWithPinyin : IPrecomputedFuzzyMatcher +{ + private readonly IBloomFilter _bloom; + private readonly PrecomputedFuzzyMatcher _core; + + private readonly IStringFolder _stringFolder; + private readonly PinyinFuzzyMatcherOptions _pinyin; + + public PrecomputedFuzzyMatcherWithPinyin( + PrecomputedFuzzyMatcherOptions coreOptions, + PinyinFuzzyMatcherOptions pinyinOptions, + IStringFolder stringFolder, + IBloomFilter bloom) + { + _pinyin = pinyinOptions; + _stringFolder = stringFolder; + _bloom = bloom; + + _core = new PrecomputedFuzzyMatcher(coreOptions, stringFolder, bloom); + + SchemaId = CombineSchema(_core.SchemaId, _pinyin); + } + + public uint SchemaId { get; } + + public FuzzyQuery PrecomputeQuery(string? input) + { + input ??= string.Empty; + + var primary = _core.PrecomputeQuery(input); + + // Fast exit if effectively off (provider should already filter, but keep robust) + if (!IsPinyinEnabled(_pinyin)) + { + return primary; + } + + // Match legacy: remove apostrophes for query secondary + var queryForPinyin = _pinyin.RemoveApostrophesForQuery ? RemoveApostrophesIfAny(input) : input; + + var pinyin = WordsHelper.GetPinyin(queryForPinyin); + if (string.IsNullOrEmpty(pinyin)) + { + return primary; + } + + var secondary = _core.PrecomputeQuery(pinyin); + return new FuzzyQuery( + primary.Original, + primary.Folded, + primary.Bloom, + primary.EffectiveLength, + primary.IsAllLowercaseAsciiOrNonLetter, + secondary.Original, + secondary.Folded, + secondary.Bloom, + secondary.EffectiveLength, + secondary.SecondaryIsAllLowercaseAsciiOrNonLetter); + } + + public FuzzyTarget PrecomputeTarget(string? input) + { + input ??= string.Empty; + + var primary = _core.PrecomputeTarget(input); + + if (!IsPinyinEnabled(_pinyin)) + { + return primary; + } + + // Match legacy: only compute target pinyin when target contains Chinese + if (!ContainsToolGoodChinese(input)) + { + return primary; + } + + var pinyin = WordsHelper.GetPinyin(input); + if (string.IsNullOrEmpty(pinyin)) + { + return primary; + } + + var secondary = _core.PrecomputeTarget(pinyin); + return new FuzzyTarget( + primary.Original, + primary.Folded, + primary.Bloom, + secondary.Original, + secondary.Folded, + secondary.Bloom); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int Score(scoped in FuzzyQuery query, scoped in FuzzyTarget target) + => _core.Score(in query, in target); + + private static bool IsPinyinEnabled(PinyinFuzzyMatcherOptions o) => o.Mode switch + { + PinyinMode.Off => false, + PinyinMode.On => true, + PinyinMode.AutoSimplifiedChineseUi => IsSimplifiedChineseUi(), + _ => false, + }; + + private static bool IsSimplifiedChineseUi() + { + var culture = CultureInfo.CurrentUICulture; + return culture.Name.StartsWith("zh-CN", StringComparison.OrdinalIgnoreCase) + || culture.Name.StartsWith("zh-Hans", StringComparison.OrdinalIgnoreCase); + } + + private static bool ContainsToolGoodChinese(string s) + { + return WordsHelper.HasChinese(s); + } + + private static string RemoveApostrophesIfAny(string input) + { + var first = input.IndexOf('\''); + if (first < 0) + { + return input; + } + + var removeCount = 1; + for (var i = first + 1; i < input.Length; i++) + { + if (input[i] == '\'') + { + removeCount++; + } + } + + return string.Create(input.Length - removeCount, input, static (dst, src) => + { + var di = 0; + for (var i = 0; i < src.Length; i++) + { + var c = src[i]; + if (c == '\'') + { + continue; + } + + dst[di++] = c; + } + }); + } + + private static uint CombineSchema(uint coreSchemaId, PinyinFuzzyMatcherOptions p) + { + const uint fnvOffset = 2166136261; + const uint fnvPrime = 16777619; + + var h = fnvOffset; + h = unchecked((h ^ coreSchemaId) * fnvPrime); + h = unchecked((h ^ (uint)p.Mode) * fnvPrime); + h = unchecked((h ^ (p.RemoveApostrophesForQuery ? 1u : 0u)) * fnvPrime); + + // bump if you change formatting/conversion behavior + const uint pinyinAlgoVersion = 1; + h = unchecked((h ^ pinyinAlgoVersion) * fnvPrime); + + return h; + } +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/StringFolder.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/StringFolder.cs new file mode 100644 index 0000000000..2d814be553 --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/StringFolder.cs @@ -0,0 +1,163 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Globalization; +using System.Runtime.CompilerServices; +using System.Text; + +namespace Microsoft.CmdPal.Core.Common.Text; + +public sealed class StringFolder : IStringFolder +{ + // Cache for diacritic-stripped uppercase characters. + // Benign race: worst case is redundant computation writing the same value. + // 0 = uncached, else cachedChar + 1 + private static readonly ushort[] StripCacheUpper = new ushort[char.MaxValue + 1]; + + public string Fold(string input, bool removeDiacritics) + { + if (string.IsNullOrEmpty(input)) + { + return string.Empty; + } + + if (!removeDiacritics || Ascii.IsValid(input)) + { + if (IsAlreadyFoldedAndSlashNormalized(input)) + { + return input; + } + + return string.Create(input.Length, input, static (dst, src) => + { + for (var i = 0; i < src.Length; i++) + { + var c = src[i]; + dst[i] = c == '\\' ? '/' : char.ToUpperInvariant(c); + } + }); + } + + return string.Create(input.Length, input, static (dst, src) => + { + for (var i = 0; i < src.Length; i++) + { + var c = src[i]; + var upper = c == '\\' ? '/' : char.ToUpperInvariant(c); + dst[i] = StripDiacriticsFromUpper(upper); + } + }); + } + + private static bool IsAlreadyFoldedAndSlashNormalized(string input) + { + var sawNonAscii = false; + + for (var i = 0; i < input.Length; i++) + { + var c = input[i]; + + if (c == '\\') + { + return false; + } + + if ((uint)(c - 'a') <= 'z' - 'a') + { + return false; + } + + if (c > 0x7F) + { + sawNonAscii = true; + } + } + + if (sawNonAscii) + { + for (var i = 0; i < input.Length; i++) + { + var c = input[i]; + if (c <= 0x7F) + { + continue; + } + + var cat = CharUnicodeInfo.GetUnicodeCategory(c); + if (cat is UnicodeCategory.LowercaseLetter or UnicodeCategory.TitlecaseLetter) + { + return false; + } + } + } + + return true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static char StripDiacriticsFromUpper(char upper) + { + if (upper <= 0x7F) + { + return upper; + } + + // Never attempt normalization on lone UTF-16 surrogates. + if (char.IsSurrogate(upper)) + { + return upper; + } + + var cachedPlus1 = StripCacheUpper[upper]; + if (cachedPlus1 != 0) + { + return (char)(cachedPlus1 - 1); + } + + var mapped = StripDiacriticsSlow(upper); + StripCacheUpper[upper] = (ushort)(mapped + 1); + return mapped; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static char StripDiacriticsSlow(char upper) + { + try + { + var baseChar = FirstNonMark(upper, NormalizationForm.FormD); + if (baseChar == '\0' || baseChar == upper) + { + var kd = FirstNonMark(upper, NormalizationForm.FormKD); + if (kd != '\0') + { + baseChar = kd; + } + } + + return char.ToUpperInvariant(baseChar == '\0' ? upper : baseChar); + } + catch + { + // Absolute safety: if globalization tables ever throw for some reason, + // degrade gracefully rather than failing hard. + return upper; + } + + static char FirstNonMark(char c, NormalizationForm form) + { + var normalized = c.ToString().Normalize(form); + + foreach (var ch in normalized) + { + var cat = CharUnicodeInfo.GetUnicodeCategory(ch); + if (cat is not (UnicodeCategory.NonSpacingMark or UnicodeCategory.SpacingCombiningMark or UnicodeCategory.EnclosingMark)) + { + return ch; + } + } + + return '\0'; + } + } +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/SymbolClassifier.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/SymbolClassifier.cs new file mode 100644 index 0000000000..e1be786646 --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/SymbolClassifier.cs @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; + +namespace Microsoft.CmdPal.Core.Common.Text; + +internal static class SymbolClassifier +{ + // Embedded in .data section - no allocation, no static constructor + private static ReadOnlySpan Lookup => + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0-15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 + 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 2, 1, // 32-47: space=2, "=2, '=2, -=2, .=2, /=1 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, // 48-63: :=2 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64-79 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, // 80-95: _=2 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 96-111 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // 112-127 + ]; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static SymbolKind Classify(char c) + { + return c > 0x7F ? SymbolKind.Other : (SymbolKind)Lookup[c]; + } +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/SymbolKind.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/SymbolKind.cs new file mode 100644 index 0000000000..d2644be420 --- /dev/null +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.Common/Text/SymbolKind.cs @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.CmdPal.Core.Common.Text; + +internal enum SymbolKind : byte +{ + Other = 0, + PathSeparator = 1, + WordSeparator = 2, +} diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.ViewModels/CommandItemViewModel.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.ViewModels/CommandItemViewModel.cs index f8e9478023..af10995cf9 100644 --- a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.ViewModels/CommandItemViewModel.cs +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.ViewModels/CommandItemViewModel.cs @@ -4,6 +4,8 @@ using System.Diagnostics.CodeAnalysis; using Microsoft.CmdPal.Core.Common; +using Microsoft.CmdPal.Core.Common.Helpers; +using Microsoft.CmdPal.Core.Common.Text; using Microsoft.CmdPal.Core.ViewModels.Messages; using Microsoft.CmdPal.Core.ViewModels.Models; using Microsoft.CommandPalette.Extensions; @@ -13,7 +15,7 @@ using Windows.ApplicationModel.DataTransfer; namespace Microsoft.CmdPal.Core.ViewModels; [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)] -public partial class CommandItemViewModel : ExtensionObjectViewModel, ICommandBarContext +public partial class CommandItemViewModel : ExtensionObjectViewModel, ICommandBarContext, IPrecomputedListItem { public ExtensionObject Model => _commandItemModel; @@ -22,6 +24,9 @@ public partial class CommandItemViewModel : ExtensionObjectViewModel, ICommandBa private readonly ExtensionObject _commandItemModel = new(null); private CommandContextItemViewModel? _defaultCommandContextItemViewModel; + private FuzzyTargetCache _titleCache; + private FuzzyTargetCache _subtitleCache; + internal InitializedState Initialized { get; private set; } = InitializedState.Uninitialized; protected bool IsFastInitialized => IsInErrorState || Initialized.HasFlag(InitializedState.FastInitialized); @@ -116,6 +121,8 @@ public partial class CommandItemViewModel : ExtensionObjectViewModel, ICommandBa _itemTitle = model.Title; Subtitle = model.Subtitle; + _titleCache.Invalidate(); + _subtitleCache.Invalidate(); Initialized |= InitializedState.FastInitialized; } @@ -249,6 +256,8 @@ public partial class CommandItemViewModel : ExtensionObjectViewModel, ICommandBa Subtitle = "Item failed to load"; MoreCommands = []; _icon = _errorIcon; + _titleCache.Invalidate(); + _subtitleCache.Invalidate(); Initialized |= InitializedState.Error; } @@ -286,6 +295,8 @@ public partial class CommandItemViewModel : ExtensionObjectViewModel, ICommandBa Subtitle = "Item failed to load"; MoreCommands = []; _icon = _errorIcon; + _titleCache.Invalidate(); + _subtitleCache.Invalidate(); Initialized |= InitializedState.Error; } @@ -335,12 +346,14 @@ public partial class CommandItemViewModel : ExtensionObjectViewModel, ICommandBa case nameof(Title): _itemTitle = model.Title; + _titleCache.Invalidate(); break; case nameof(Subtitle): var modelSubtitle = model.Subtitle; this.Subtitle = modelSubtitle; _defaultCommandContextItemViewModel?.Subtitle = modelSubtitle; + _subtitleCache.Invalidate(); break; case nameof(Icon): @@ -415,6 +428,7 @@ public partial class CommandItemViewModel : ExtensionObjectViewModel, ICommandBa // Extensions based on Command Palette SDK < 0.3 CommandItem class won't notify when Title changes because Command // or Command.Name change. This is a workaround to ensure that the Title is always up-to-date for extensions with old SDK. _itemTitle = model.Title; + _titleCache.Invalidate(); UpdateProperty(nameof(Title), nameof(Name)); _defaultCommandContextItemViewModel?.UpdateTitle(model.Command.Name); @@ -436,6 +450,7 @@ public partial class CommandItemViewModel : ExtensionObjectViewModel, ICommandBa private void UpdateTitle(string? title) { _itemTitle = title ?? string.Empty; + _titleCache.Invalidate(); UpdateProperty(nameof(Title)); } @@ -456,6 +471,12 @@ public partial class CommandItemViewModel : ExtensionObjectViewModel, ICommandBa UpdateProperty(nameof(DataPackage)); } + public FuzzyTarget GetTitleTarget(IPrecomputedFuzzyMatcher matcher) + => _titleCache.GetOrUpdate(matcher, Title); + + public FuzzyTarget GetSubtitleTarget(IPrecomputedFuzzyMatcher matcher) + => _subtitleCache.GetOrUpdate(matcher, Subtitle); + protected override void UnsafeCleanup() { base.UnsafeCleanup(); diff --git a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.ViewModels/ContextMenuViewModel.cs b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.ViewModels/ContextMenuViewModel.cs index 07c238ab42..83f314a11f 100644 --- a/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.ViewModels/ContextMenuViewModel.cs +++ b/src/modules/cmdpal/Core/Microsoft.CmdPal.Core.ViewModels/ContextMenuViewModel.cs @@ -3,9 +3,12 @@ // See the LICENSE file in the project root for more information. using System.Collections.ObjectModel; +using System.Runtime.CompilerServices; using CommunityToolkit.Mvvm.ComponentModel; using CommunityToolkit.Mvvm.Messaging; using Microsoft.CmdPal.Core.Common; +using Microsoft.CmdPal.Core.Common.Helpers; +using Microsoft.CmdPal.Core.Common.Text; using Microsoft.CmdPal.Core.ViewModels.Messages; using Microsoft.CommandPalette.Extensions; using Microsoft.CommandPalette.Extensions.Toolkit; @@ -16,6 +19,8 @@ namespace Microsoft.CmdPal.Core.ViewModels; public partial class ContextMenuViewModel : ObservableObject, IRecipient { + private readonly IFuzzyMatcherProvider _fuzzyMatcherProvider; + public ICommandBarContext? SelectedItem { get => field; @@ -39,8 +44,9 @@ public partial class ContextMenuViewModel : ObservableObject, private string _lastSearchText = string.Empty; - public ContextMenuViewModel() + public ContextMenuViewModel(IFuzzyMatcherProvider fuzzyMatcherProvider) { + _fuzzyMatcherProvider = fuzzyMatcherProvider; WeakReferenceMessenger.Default.Register(this); } @@ -91,13 +97,14 @@ public partial class ContextMenuViewModel : ObservableObject, .OfType() .Where(c => c.ShouldBeVisible); - var newResults = ListHelpers.FilterList(commands, searchText, ScoreContextCommand); + var query = _fuzzyMatcherProvider.Current.PrecomputeQuery(searchText); + var newResults = InternalListHelpers.FilterList(commands, in query, ScoreFunction); ListHelpers.InPlaceUpdateList(FilteredItems, newResults); } - private static int ScoreContextCommand(string query, CommandContextItemViewModel item) + private int ScoreFunction(in FuzzyQuery query, CommandContextItemViewModel item) { - if (string.IsNullOrEmpty(query) || string.IsNullOrWhiteSpace(query)) + if (string.IsNullOrWhiteSpace(query.Original)) { return 1; } @@ -107,11 +114,21 @@ public partial class ContextMenuViewModel : ObservableObject, return 0; } - var nameMatch = FuzzyStringMatcher.ScoreFuzzy(query, item.Title); + var fuzzyMatcher = _fuzzyMatcherProvider.Current; + var title = item.GetTitleTarget(fuzzyMatcher); + var subtitle = item.GetSubtitleTarget(fuzzyMatcher); - var descriptionMatch = FuzzyStringMatcher.ScoreFuzzy(query, item.Subtitle); + var titleScore = fuzzyMatcher.Score(query, title); + var subtitleScore = (fuzzyMatcher.Score(query, subtitle) - 4) / 2; - return new[] { nameMatch, (descriptionMatch - 4) / 2, 0 }.Max(); + return Max3(titleScore, subtitleScore, 0); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int Max3(int a, int b, int c) + { + var m = a > b ? a : b; + return m > c ? m : c; } /// diff --git a/src/modules/cmdpal/Microsoft.CmdPal.UI.ViewModels/Commands/MainListPage.cs b/src/modules/cmdpal/Microsoft.CmdPal.UI.ViewModels/Commands/MainListPage.cs index 2ee8f1e357..325f9b5ff8 100644 --- a/src/modules/cmdpal/Microsoft.CmdPal.UI.ViewModels/Commands/MainListPage.cs +++ b/src/modules/cmdpal/Microsoft.CmdPal.UI.ViewModels/Commands/MainListPage.cs @@ -8,6 +8,7 @@ using System.Diagnostics; using CommunityToolkit.Mvvm.Messaging; using ManagedCommon; using Microsoft.CmdPal.Core.Common.Helpers; +using Microsoft.CmdPal.Core.Common.Text; using Microsoft.CmdPal.Core.ViewModels.Messages; using Microsoft.CmdPal.Ext.Apps; using Microsoft.CmdPal.Ext.Apps.Programs; @@ -24,7 +25,7 @@ namespace Microsoft.CmdPal.UI.ViewModels.MainPage; /// This class encapsulates the data we load from built-in providers and extensions to use within the same extension-UI system for a . /// TODO: Need to think about how we structure/interop for the page -> section -> item between the main setup, the extensions, and our viewmodels. /// -public partial class MainListPage : DynamicListPage, +public sealed partial class MainListPage : DynamicListPage, IRecipient, IRecipient, IDisposable { @@ -32,13 +33,18 @@ public partial class MainListPage : DynamicListPage, private readonly AliasManager _aliasManager; private readonly SettingsModel _settings; private readonly AppStateModel _appStateModel; - private List>? _filteredItems; - private List>? _filteredApps; + private readonly ScoringFunction _scoringFunction; + private readonly ScoringFunction _fallbackScoringFunction; + private readonly IFuzzyMatcherProvider _fuzzyMatcherProvider; + + private RoScored[]? _filteredItems; + private RoScored[]? _filteredApps; // Keep as IEnumerable for deferred execution. Fallback item titles are updated // asynchronously, so scoring must happen lazily when GetItems is called. - private IEnumerable>? _scoredFallbackItems; - private IEnumerable>? _fallbackItems; + private IEnumerable>? _scoredFallbackItems; + private IEnumerable>? _fallbackItems; + private bool _includeApps; private bool _filteredItemsIncludesApps; private int _appResultLimit = 10; @@ -48,7 +54,12 @@ public partial class MainListPage : DynamicListPage, private CancellationTokenSource? _cancellationTokenSource; - public MainListPage(TopLevelCommandManager topLevelCommandManager, SettingsModel settings, AliasManager aliasManager, AppStateModel appStateModel) + public MainListPage( + TopLevelCommandManager topLevelCommandManager, + SettingsModel settings, + AliasManager aliasManager, + AppStateModel appStateModel, + IFuzzyMatcherProvider fuzzyMatcherProvider) { Title = Resources.builtin_home_name; Icon = IconHelpers.FromRelativePath("Assets\\StoreLogo.scale-200.png"); @@ -58,6 +69,10 @@ public partial class MainListPage : DynamicListPage, _aliasManager = aliasManager; _appStateModel = appStateModel; _tlcManager = topLevelCommandManager; + _fuzzyMatcherProvider = fuzzyMatcherProvider; + _scoringFunction = (in query, item) => ScoreTopLevelItem(in query, item, _appStateModel.RecentCommands, _fuzzyMatcherProvider.Current); + _fallbackScoringFunction = (in _, item) => ScoreFallbackItem(item, _settings.FallbackRanks); + _tlcManager.PropertyChanged += TlcManager_PropertyChanged; _tlcManager.TopLevelCommands.CollectionChanged += Commands_CollectionChanged; @@ -190,8 +205,7 @@ public partial class MainListPage : DynamicListPage, public override void UpdateSearchText(string oldSearch, string newSearch) { - var timer = new Stopwatch(); - timer.Start(); + var stopwatch = Stopwatch.StartNew(); _cancellationTokenSource?.Cancel(); _cancellationTokenSource?.Dispose(); @@ -354,15 +368,14 @@ public partial class MainListPage : DynamicListPage, if (_includeApps) { - var allNewApps = AllAppsCommandProvider.Page.GetItems().ToList(); + var allNewApps = AllAppsCommandProvider.Page.GetItems().Cast().ToList(); // We need to remove pinned apps from allNewApps so they don't show twice. var pinnedApps = PinnedAppsManager.Instance.GetPinnedAppIdentifiers(); if (pinnedApps.Length > 0) { - newApps = allNewApps.Where(w => - pinnedApps.IndexOf(((AppListItem)w).AppIdentifier) < 0); + newApps = allNewApps.Where(w => pinnedApps.IndexOf(w.AppIdentifier) < 0); } else { @@ -376,11 +389,10 @@ public partial class MainListPage : DynamicListPage, } } - var history = _appStateModel.RecentCommands!; - Func scoreItem = (a, b) => { return ScoreTopLevelItem(a, b, history); }; + var searchQuery = _fuzzyMatcherProvider.Current.PrecomputeQuery(SearchText); // Produce a list of everything that matches the current filter. - _filteredItems = [.. ListHelpers.FilterListWithScores(newFilteredItems ?? [], SearchText, scoreItem)]; + _filteredItems = InternalListHelpers.FilterListWithScores(newFilteredItems, searchQuery, _scoringFunction); if (token.IsCancellationRequested) { @@ -388,21 +400,14 @@ public partial class MainListPage : DynamicListPage, } IEnumerable newFallbacksForScoring = commands.Where(s => s.IsFallback && globalFallbacks.Contains(s.Id)); + _scoredFallbackItems = InternalListHelpers.FilterListWithScores(newFallbacksForScoring, searchQuery, _scoringFunction); if (token.IsCancellationRequested) { return; } - _scoredFallbackItems = ListHelpers.FilterListWithScores(newFallbacksForScoring ?? [], SearchText, scoreItem); - - if (token.IsCancellationRequested) - { - return; - } - - Func scoreFallbackItem = (a, b) => { return ScoreFallbackItem(a, b, _settings.FallbackRanks); }; - _fallbackItems = [.. ListHelpers.FilterListWithScores(newFallbacks ?? [], SearchText, scoreFallbackItem)]; + _fallbackItems = InternalListHelpers.FilterListWithScores(newFallbacks ?? [], searchQuery, _fallbackScoringFunction); if (token.IsCancellationRequested) { @@ -412,18 +417,7 @@ public partial class MainListPage : DynamicListPage, // Produce a list of filtered apps with the appropriate limit if (newApps.Any()) { - var scoredApps = ListHelpers.FilterListWithScores(newApps, SearchText, scoreItem); - - if (token.IsCancellationRequested) - { - return; - } - - // We'll apply this limit in the GetItems method after merging with commands - // but we need to know the limit now to avoid re-scoring apps - var appLimit = AllAppsCommandProvider.TopLevelResultLimit; - - _filteredApps = [.. scoredApps]; + _filteredApps = InternalListHelpers.FilterListWithScores(newApps, searchQuery, _scoringFunction); if (token.IsCancellationRequested) { @@ -431,10 +425,15 @@ public partial class MainListPage : DynamicListPage, } } + var filterDoneTimestamp = stopwatch.ElapsedMilliseconds; + Logger.LogDebug($"Filter with '{newSearch}' in {filterDoneTimestamp}ms"); + RaiseItemsChanged(); - timer.Stop(); - Logger.LogDebug($"Filter with '{newSearch}' in {timer.ElapsedMilliseconds}ms"); + var listPageUpdatedTimestamp = stopwatch.ElapsedMilliseconds; + Logger.LogDebug($"Render items with '{newSearch}' in {listPageUpdatedTimestamp}ms /d {listPageUpdatedTimestamp - filterDoneTimestamp}ms"); + + stopwatch.Stop(); } } @@ -478,7 +477,11 @@ public partial class MainListPage : DynamicListPage, // Almost verbatim ListHelpers.ScoreListItem, but also accounting for the // fact that we want fallback handlers down-weighted, so that they don't // _always_ show up first. - internal static int ScoreTopLevelItem(string query, IListItem topLevelOrAppItem, IRecentCommandsManager history) + internal static int ScoreTopLevelItem( + in FuzzyQuery query, + IListItem topLevelOrAppItem, + IRecentCommandsManager history, + IPrecomputedFuzzyMatcher precomputedFuzzyMatcher) { var title = topLevelOrAppItem.Title; if (string.IsNullOrWhiteSpace(title)) @@ -486,94 +489,80 @@ public partial class MainListPage : DynamicListPage, return 0; } - var isWhiteSpace = string.IsNullOrWhiteSpace(query); - var isFallback = false; var isAliasSubstringMatch = false; var isAliasMatch = false; var id = IdForTopLevelOrAppItem(topLevelOrAppItem); - var extensionDisplayName = string.Empty; + FuzzyTarget? extensionDisplayNameTarget = null; if (topLevelOrAppItem is TopLevelViewModel topLevel) { isFallback = topLevel.IsFallback; + extensionDisplayNameTarget = topLevel.GetExtensionNameTarget(precomputedFuzzyMatcher); + if (topLevel.HasAlias) { var alias = topLevel.AliasText; - isAliasMatch = alias == query; - isAliasSubstringMatch = isAliasMatch || alias.StartsWith(query, StringComparison.CurrentCultureIgnoreCase); + isAliasMatch = alias == query.Original; + isAliasSubstringMatch = isAliasMatch || alias.StartsWith(query.Original, StringComparison.CurrentCultureIgnoreCase); } - - extensionDisplayName = topLevel.ExtensionHost?.Extension?.PackageDisplayName ?? string.Empty; } - // StringMatcher.FuzzySearch will absolutely BEEF IT if you give it a - // whitespace-only query. - // - // in that scenario, we'll just use a simple string contains for the - // query. Maybe someone is really looking for things with a space in - // them, I don't know. - - // Title: - // * whitespace query: 1 point - // * otherwise full weight match - var nameMatch = isWhiteSpace ? - (title.Contains(query) ? 1 : 0) : - FuzzyStringMatcher.ScoreFuzzy(query, title); - - // Subtitle: - // * whitespace query: 1/2 point - // * otherwise ~half weight match. Minus a bit, because subtitles tend to be longer - var descriptionMatch = isWhiteSpace ? - (topLevelOrAppItem.Subtitle.Contains(query) ? .5 : 0) : - (FuzzyStringMatcher.ScoreFuzzy(query, topLevelOrAppItem.Subtitle) - 4) / 2.0; - - // Extension title: despite not being visible, give the extension name itself some weight - // * whitespace query: 0 points - // * otherwise more weight than a subtitle, but not much - var extensionTitleMatch = isWhiteSpace ? 0 : FuzzyStringMatcher.ScoreFuzzy(query, extensionDisplayName) / 1.5; - - var scores = new[] + // Handle whitespace query separately - FuzzySearch doesn't handle it well + if (string.IsNullOrWhiteSpace(query.Original)) { - nameMatch, - descriptionMatch, - isFallback ? 1 : 0, // Always give fallbacks a chance - }; - var max = scores.Max(); + return ScoreWhitespaceQuery(query.Original, title, topLevelOrAppItem.Subtitle, isFallback); + } - // _Add_ the extension name. This will bubble items that match both - // title and extension name up above ones that just match title. - // e.g. "git" will up-weight "GitHub searches" from the GitHub extension - // above "git" from "whatever" - max = max + extensionTitleMatch; + // Get precomputed targets + var (titleTarget, subtitleTarget) = topLevelOrAppItem is IPrecomputedListItem precomputedItem + ? (precomputedItem.GetTitleTarget(precomputedFuzzyMatcher), precomputedItem.GetSubtitleTarget(precomputedFuzzyMatcher)) + : (precomputedFuzzyMatcher.PrecomputeTarget(title), precomputedFuzzyMatcher.PrecomputeTarget(topLevelOrAppItem.Subtitle)); + + // Score components + var nameScore = precomputedFuzzyMatcher.Score(query, titleTarget); + var descriptionScore = (precomputedFuzzyMatcher.Score(query, subtitleTarget) - 4) / 2.0; + var extensionScore = extensionDisplayNameTarget is { } extTarget ? precomputedFuzzyMatcher.Score(query, extTarget) / 1.5 : 0; + + // Take best match from title/description/fallback, then add extension score + // Extension adds to max so items matching both title AND extension bubble up + var baseScore = Math.Max(Math.Max(nameScore, descriptionScore), isFallback ? 1 : 0); + var matchScore = baseScore + extensionScore; // Apply a penalty to fallback items so they rank below direct matches. // Fallbacks that dynamically match queries (like RDP connections) should // appear after apps and direct command matches. - if (isFallback && max > 1) + if (isFallback && matchScore > 1) { // Reduce fallback scores by 50% to prioritize direct matches - max = max * 0.5; + matchScore = matchScore * 0.5; } - var matchSomething = max - + (isAliasMatch ? 9001 : (isAliasSubstringMatch ? 1 : 0)); + // Alias matching: exact match is overwhelming priority, substring match adds a small boost + var aliasBoost = isAliasMatch ? 9001 : (isAliasSubstringMatch ? 1 : 0); + var totalMatch = matchScore + aliasBoost; - // If we matched title, subtitle, or alias (something real), then - // here we add the recent command weight boost - // - // Otherwise something like `x` will still match everything you've run before - var finalScore = matchSomething * 10; - if (matchSomething > 0) + // Apply scaling and history boost only if we matched something real + var finalScore = totalMatch * 10; + if (totalMatch > 0) { - var recentWeightBoost = history.GetCommandHistoryWeight(id); - finalScore += recentWeightBoost; + finalScore += history.GetCommandHistoryWeight(id); } return (int)finalScore; } - internal static int ScoreFallbackItem(string query, IListItem topLevelOrAppItem, string[] fallbackRanks) + private static int ScoreWhitespaceQuery(string query, string title, string subtitle, bool isFallback) + { + // Simple contains check for whitespace queries + var nameMatch = title.Contains(query, StringComparison.Ordinal) ? 1.0 : 0; + var descriptionMatch = subtitle.Contains(query, StringComparison.Ordinal) ? 0.5 : 0; + var baseScore = Math.Max(Math.Max(nameMatch, descriptionMatch), isFallback ? 1 : 0); + + return (int)(baseScore * 10); + } + + private static int ScoreFallbackItem(IListItem topLevelOrAppItem, string[] fallbackRanks) { // Default to 1 so it always shows in list. var finalScore = 1; diff --git a/src/modules/cmdpal/Microsoft.CmdPal.UI.ViewModels/Commands/MainListPageResultFactory.cs b/src/modules/cmdpal/Microsoft.CmdPal.UI.ViewModels/Commands/MainListPageResultFactory.cs index d63c0e4f90..0c0d876179 100644 --- a/src/modules/cmdpal/Microsoft.CmdPal.UI.ViewModels/Commands/MainListPageResultFactory.cs +++ b/src/modules/cmdpal/Microsoft.CmdPal.UI.ViewModels/Commands/MainListPageResultFactory.cs @@ -4,6 +4,7 @@ #pragma warning disable IDE0007 // Use implicit type +using Microsoft.CmdPal.Core.Common.Helpers; using Microsoft.CommandPalette.Extensions; using Microsoft.CommandPalette.Extensions.Toolkit; @@ -16,10 +17,10 @@ internal static class MainListPageResultFactory /// applying an application result limit and filtering fallback items as needed. /// public static IListItem[] Create( - IList>? filteredItems, - IList>? scoredFallbackItems, - IList>? filteredApps, - IList>? fallbackItems, + IList>? filteredItems, + IList>? scoredFallbackItems, + IList>? filteredApps, + IList>? fallbackItems, int appResultLimit) { if (appResultLimit < 0) @@ -147,7 +148,7 @@ internal static class MainListPageResultFactory return result; } - private static int GetNonEmptyFallbackItemsCount(IList>? fallbackItems) + private static int GetNonEmptyFallbackItemsCount(IList>? fallbackItems) { int fallbackItemsCount = 0; diff --git a/src/modules/cmdpal/Microsoft.CmdPal.UI.ViewModels/TopLevelViewModel.cs b/src/modules/cmdpal/Microsoft.CmdPal.UI.ViewModels/TopLevelViewModel.cs index cc863fe362..13b9423119 100644 --- a/src/modules/cmdpal/Microsoft.CmdPal.UI.ViewModels/TopLevelViewModel.cs +++ b/src/modules/cmdpal/Microsoft.CmdPal.UI.ViewModels/TopLevelViewModel.cs @@ -3,8 +3,11 @@ // See the LICENSE file in the project root for more information. using System.Collections.ObjectModel; +using System.Diagnostics; using CommunityToolkit.Mvvm.ComponentModel; using ManagedCommon; +using Microsoft.CmdPal.Core.Common.Helpers; +using Microsoft.CmdPal.Core.Common.Text; using Microsoft.CmdPal.Core.ViewModels; using Microsoft.CmdPal.Core.ViewModels.Messages; using Microsoft.CmdPal.UI.ViewModels.Settings; @@ -16,7 +19,8 @@ using WyHash; namespace Microsoft.CmdPal.UI.ViewModels; -public sealed partial class TopLevelViewModel : ObservableObject, IListItem, IExtendedAttributesProvider +[DebuggerDisplay($"{{{nameof(GetDebuggerDisplay)}(),nq}}")] +public sealed partial class TopLevelViewModel : ObservableObject, IListItem, IExtendedAttributesProvider, IPrecomputedListItem { private readonly SettingsModel _settings; private readonly ProviderSettings _providerSettings; @@ -34,6 +38,10 @@ public sealed partial class TopLevelViewModel : ObservableObject, IListItem, IEx private HotkeySettings? _hotkey; private IIconInfo? _initialIcon; + private FuzzyTargetCache _titleCache; + private FuzzyTargetCache _subtitleCache; + private FuzzyTargetCache _extensionNameCache; + private CommandAlias? Alias { get; set; } public bool IsFallback { get; private set; } @@ -176,6 +184,8 @@ public sealed partial class TopLevelViewModel : ObservableObject, IListItem, IEx } } + public string ExtensionName => ExtensionHost.GetExtensionDisplayName() ?? string.Empty; + public TopLevelViewModel( CommandItemViewModel item, bool isFallback, @@ -230,6 +240,15 @@ public sealed partial class TopLevelViewModel : ObservableObject, IListItem, IEx { PropChanged?.Invoke(this, new PropChangedEventArgs(e.PropertyName)); + if (e.PropertyName is nameof(CommandItemViewModel.Title) or nameof(CommandItemViewModel.Name)) + { + _titleCache.Invalidate(); + } + else if (e.PropertyName is nameof(CommandItemViewModel.Subtitle)) + { + _subtitleCache.Invalidate(); + } + if (e.PropertyName is "IsInitialized" or nameof(CommandItemViewModel.Command)) { GenerateId(); @@ -420,4 +439,18 @@ public sealed partial class TopLevelViewModel : ObservableObject, IListItem, IEx [WellKnownExtensionAttributes.DataPackage] = _commandItemViewModel?.DataPackage, }; } + + public FuzzyTarget GetTitleTarget(IPrecomputedFuzzyMatcher matcher) + => _titleCache.GetOrUpdate(matcher, Title); + + public FuzzyTarget GetSubtitleTarget(IPrecomputedFuzzyMatcher matcher) + => _subtitleCache.GetOrUpdate(matcher, Subtitle); + + public FuzzyTarget GetExtensionNameTarget(IPrecomputedFuzzyMatcher matcher) + => _extensionNameCache.GetOrUpdate(matcher, ExtensionName); + + private string GetDebuggerDisplay() + { + return ToString(); + } } diff --git a/src/modules/cmdpal/Microsoft.CmdPal.UI/App.xaml.cs b/src/modules/cmdpal/Microsoft.CmdPal.UI/App.xaml.cs index eb103d3157..152bf95a62 100644 --- a/src/modules/cmdpal/Microsoft.CmdPal.UI/App.xaml.cs +++ b/src/modules/cmdpal/Microsoft.CmdPal.UI/App.xaml.cs @@ -6,6 +6,7 @@ using ManagedCommon; using Microsoft.CmdPal.Core.Common; using Microsoft.CmdPal.Core.Common.Helpers; using Microsoft.CmdPal.Core.Common.Services; +using Microsoft.CmdPal.Core.Common.Text; using Microsoft.CmdPal.Core.ViewModels; using Microsoft.CmdPal.Ext.Apps; using Microsoft.CmdPal.Ext.Bookmarks; @@ -206,6 +207,9 @@ public partial class App : Application, IDisposable services.AddSingleton(); services.AddSingleton(); + services.AddSingleton( + _ => new FuzzyMatcherProvider(new PrecomputedFuzzyMatcherOptions(), new PinyinFuzzyMatcherOptions())); + // ViewModels services.AddSingleton(); services.AddSingleton(); diff --git a/src/modules/cmdpal/Microsoft.CmdPal.UI/Controls/ContextMenu.xaml.cs b/src/modules/cmdpal/Microsoft.CmdPal.UI/Controls/ContextMenu.xaml.cs index afc2d190ef..58f7c6318f 100644 --- a/src/modules/cmdpal/Microsoft.CmdPal.UI/Controls/ContextMenu.xaml.cs +++ b/src/modules/cmdpal/Microsoft.CmdPal.UI/Controls/ContextMenu.xaml.cs @@ -4,9 +4,11 @@ using CommunityToolkit.Mvvm.Messaging; using CommunityToolkit.WinUI; +using Microsoft.CmdPal.Core.Common.Text; using Microsoft.CmdPal.Core.ViewModels; using Microsoft.CmdPal.Core.ViewModels.Messages; using Microsoft.CmdPal.UI.Messages; +using Microsoft.Extensions.DependencyInjection; using Microsoft.UI.Input; using Microsoft.UI.Xaml; using Microsoft.UI.Xaml.Controls; @@ -21,21 +23,19 @@ public sealed partial class ContextMenu : UserControl, IRecipient, IRecipient { - public ContextMenuViewModel ViewModel { get; } = new(); + public ContextMenuViewModel ViewModel { get; } public ContextMenu() { this.InitializeComponent(); + ViewModel = new ContextMenuViewModel(App.Current.Services.GetRequiredService()); + ViewModel.PropertyChanged += ViewModel_PropertyChanged; + // RegisterAll isn't AOT compatible WeakReferenceMessenger.Default.Register(this); WeakReferenceMessenger.Default.Register(this); WeakReferenceMessenger.Default.Register(this); - - if (ViewModel is not null) - { - ViewModel.PropertyChanged += ViewModel_PropertyChanged; - } } public void Receive(OpenContextMenuMessage message) diff --git a/src/modules/cmdpal/Microsoft.CmdPal.UI/PowerToysRootPageService.cs b/src/modules/cmdpal/Microsoft.CmdPal.UI/PowerToysRootPageService.cs index 9a877358f0..23d8b413e0 100644 --- a/src/modules/cmdpal/Microsoft.CmdPal.UI/PowerToysRootPageService.cs +++ b/src/modules/cmdpal/Microsoft.CmdPal.UI/PowerToysRootPageService.cs @@ -6,6 +6,7 @@ using System.Runtime.InteropServices; using System.Runtime.Versioning; using ManagedCommon; using Microsoft.CmdPal.Core.Common.Services; +using Microsoft.CmdPal.Core.Common.Text; using Microsoft.CmdPal.Core.ViewModels; using Microsoft.CmdPal.UI.ViewModels; using Microsoft.CmdPal.UI.ViewModels.MainPage; @@ -23,13 +24,13 @@ internal sealed class PowerToysRootPageService : IRootPageService private IExtensionWrapper? _activeExtension; private Lazy _mainListPage; - public PowerToysRootPageService(TopLevelCommandManager topLevelCommandManager, SettingsModel settings, AliasManager aliasManager, AppStateModel appStateModel) + public PowerToysRootPageService(TopLevelCommandManager topLevelCommandManager, SettingsModel settings, AliasManager aliasManager, AppStateModel appStateModel, IFuzzyMatcherProvider fuzzyMatcherProvider) { _tlcManager = topLevelCommandManager; _mainListPage = new Lazy(() => { - return new MainListPage(_tlcManager, settings, aliasManager, appStateModel); + return new MainListPage(_tlcManager, settings, aliasManager, appStateModel, fuzzyMatcherProvider); }); } diff --git a/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherEmojiTests.cs b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherEmojiTests.cs new file mode 100644 index 0000000000..fc85834b2e --- /dev/null +++ b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherEmojiTests.cs @@ -0,0 +1,78 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.CmdPal.Core.Common.Text; + +namespace Microsoft.CmdPal.Common.UnitTests.Text; + +[TestClass] +public sealed class PrecomputedFuzzyMatcherEmojiTests +{ + private readonly PrecomputedFuzzyMatcher _matcher = new(); + + [TestMethod] + public void ExactMatch_SimpleEmoji_ReturnsScore() + { + const string needle = "🚀"; + const string haystack = "Launch 🚀 sequence"; + + var query = _matcher.PrecomputeQuery(needle); + var target = _matcher.PrecomputeTarget(haystack); + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected match for simple emoji"); + } + + [TestMethod] + public void ExactMatch_SkinTone_ReturnsScore() + { + const string needle = "👍🏽"; // Medium skin tone + const string haystack = "Thumbs up 👍🏽 here"; + + var query = _matcher.PrecomputeQuery(needle); + var target = _matcher.PrecomputeTarget(haystack); + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected match for emoji with skin tone"); + } + + [TestMethod] + public void ZWJSequence_Family_Match() + { + const string needle = "👨‍👩‍👧‍👦"; // Family: Man, Woman, Girl, Boy + const string haystack = "Emoji 👨‍👩‍👧‍👦 Test"; + + var query = _matcher.PrecomputeQuery(needle); + var target = _matcher.PrecomputeTarget(haystack); + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected match for ZWJ sequence"); + } + + [TestMethod] + public void Flags_Match() + { + const string needle = "🇺🇸"; // US Flag (Regional Indicator U + Regional Indicator S) + const string haystack = "USA 🇺🇸"; + + var query = _matcher.PrecomputeQuery(needle); + var target = _matcher.PrecomputeTarget(haystack); + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected match for flag emoji"); + } + + [TestMethod] + public void Emoji_MixedWithText_Search() + { + const string needle = "t🌮o"; // "t" + taco + "o" + const string haystack = "taco 🌮 on tuesday"; + + var query = _matcher.PrecomputeQuery(needle); + var target = _matcher.PrecomputeTarget(haystack); + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected match for emoji mixed with text"); + } +} diff --git a/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherOptionsTests.cs b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherOptionsTests.cs new file mode 100644 index 0000000000..b5798986ff --- /dev/null +++ b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherOptionsTests.cs @@ -0,0 +1,84 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.CmdPal.Core.Common.Text; + +namespace Microsoft.CmdPal.Common.UnitTests.Text; + +[TestClass] +public sealed class PrecomputedFuzzyMatcherOptionsTests +{ + [TestMethod] + public void Score_RemoveDiacriticsOption_AffectsMatching() + { + var withDiacriticsRemoved = new PrecomputedFuzzyMatcher( + new PrecomputedFuzzyMatcherOptions { RemoveDiacritics = true }); + var withoutDiacriticsRemoved = new PrecomputedFuzzyMatcher( + new PrecomputedFuzzyMatcherOptions { RemoveDiacritics = false }); + + const string needle = "cafe"; + const string haystack = "CAFÉ"; + + var scoreWithRemoval = withDiacriticsRemoved.Score( + withDiacriticsRemoved.PrecomputeQuery(needle), + withDiacriticsRemoved.PrecomputeTarget(haystack)); + var scoreWithoutRemoval = withoutDiacriticsRemoved.Score( + withoutDiacriticsRemoved.PrecomputeQuery(needle), + withoutDiacriticsRemoved.PrecomputeTarget(haystack)); + + Assert.IsTrue(scoreWithRemoval > 0, "Expected match when diacritics are removed."); + Assert.AreEqual(0, scoreWithoutRemoval, "Expected no match when diacritics are preserved."); + } + + [TestMethod] + public void Score_SkipWordSeparatorsOption_AffectsMatching() + { + var skipSeparators = new PrecomputedFuzzyMatcher( + new PrecomputedFuzzyMatcherOptions { SkipWordSeparators = true }); + var keepSeparators = new PrecomputedFuzzyMatcher( + new PrecomputedFuzzyMatcherOptions { SkipWordSeparators = false }); + + const string needle = "a b"; + const string haystack = "ab"; + + var scoreSkip = skipSeparators.Score( + skipSeparators.PrecomputeQuery(needle), + skipSeparators.PrecomputeTarget(haystack)); + var scoreKeep = keepSeparators.Score( + keepSeparators.PrecomputeQuery(needle), + keepSeparators.PrecomputeTarget(haystack)); + + Assert.IsTrue(scoreSkip > 0, "Expected match when word separators are skipped."); + Assert.AreEqual(0, scoreKeep, "Expected no match when word separators are preserved."); + } + + [TestMethod] + public void Score_IgnoreSameCaseBonusOption_AffectsLowercaseQuery() + { + var ignoreSameCase = new PrecomputedFuzzyMatcher( + new PrecomputedFuzzyMatcherOptions + { + IgnoreSameCaseBonusIfQueryIsAllLowercase = true, + SameCaseBonus = 10, + }); + var applySameCase = new PrecomputedFuzzyMatcher( + new PrecomputedFuzzyMatcherOptions + { + IgnoreSameCaseBonusIfQueryIsAllLowercase = false, + SameCaseBonus = 10, + }); + + const string needle = "test"; + const string haystack = "test"; + + var scoreIgnore = ignoreSameCase.Score( + ignoreSameCase.PrecomputeQuery(needle), + ignoreSameCase.PrecomputeTarget(haystack)); + var scoreApply = applySameCase.Score( + applySameCase.PrecomputeQuery(needle), + applySameCase.PrecomputeTarget(haystack)); + + Assert.IsTrue(scoreApply > scoreIgnore, "Expected same-case bonus to apply when not ignored."); + } +} diff --git a/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherSecondaryInputTests.cs b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherSecondaryInputTests.cs new file mode 100644 index 0000000000..70c86a4598 --- /dev/null +++ b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherSecondaryInputTests.cs @@ -0,0 +1,227 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.CmdPal.Core.Common.Text; + +namespace Microsoft.CmdPal.Common.UnitTests.Text; + +[TestClass] +public sealed class PrecomputedFuzzyMatcherSecondaryInputTests +{ + private readonly PrecomputedFuzzyMatcher _matcher = new(); + private readonly StringFolder _folder = new(); + private readonly BloomFilter _bloom = new(); + + [TestMethod] + public void Score_PrimaryQueryMatchesSecondaryTarget_ShouldMatch() + { + // Scenario: Searching for "calc" should match a file "calculator.exe" where primary is filename, secondary is path + var query = CreateQuery("calc"); + var target = CreateTarget(primary: "important.txt", secondary: "C:\\Programs\\Calculator\\"); + + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected primary query to match secondary target"); + } + + [TestMethod] + public void Score_SecondaryQueryMatchesPrimaryTarget_ShouldMatch() + { + // Scenario: User types "documents\\report" and we want to match against filename + var query = CreateQuery(primary: "documents", secondary: "report"); + var target = CreateTarget(primary: "report.docx"); + + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected secondary query to match primary target"); + } + + [TestMethod] + public void Score_SecondaryQueryMatchesSecondaryTarget_ShouldMatch() + { + // Scenario: Both query and target have secondary info that matches + var query = CreateQuery(primary: "test", secondary: "documents"); + var target = CreateTarget(primary: "something.txt", secondary: "C:\\Users\\Documents\\"); + + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected secondary query to match secondary target"); + } + + [TestMethod] + public void Score_PrimaryQueryMatchesBothTargets_ShouldReturnBestScore() + { + // The same query matches both primary and secondary of target + var query = CreateQuery("test"); + var target = CreateTarget(primary: "test.txt", secondary: "test_folder\\"); + + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected query to match when it appears in both primary and secondary"); + } + + [TestMethod] + public void Score_NoSecondaryInQuery_MatchesSecondaryTarget() + { + // Query without secondary can still match target's secondary + var query = CreateQuery("downloads"); + var target = CreateTarget(primary: "file.txt", secondary: "C:\\Downloads\\"); + + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected primary query to match secondary target"); + } + + [TestMethod] + public void Score_NoSecondaryInTarget_SecondaryQueryShouldNotMatch() + { + // Query with secondary but target without secondary - secondary query shouldn't interfere + var query = CreateQuery(primary: "test", secondary: "extra"); + var target = CreateTarget(primary: "test.txt"); + + var score = _matcher.Score(query, target); + + // Primary should still match, secondary query just doesn't contribute + Assert.IsTrue(score > 0, "Expected primary query to match primary target"); + } + + [TestMethod] + public void Score_SecondaryQueryNoMatch_PrimaryCanStillMatch() + { + // Secondary doesn't match anything, but primary does + var query = CreateQuery(primary: "file", secondary: "nomatch"); + var target = CreateTarget(primary: "myfile.txt", secondary: "C:\\Documents\\"); + + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected primary query to match even when secondary doesn't"); + } + + [TestMethod] + public void Score_OnlySecondaryMatches_ShouldReturnScore() + { + // Only the secondary parts match, primary doesn't + var query = CreateQuery(primary: "xyz", secondary: "documents"); + var target = CreateTarget(primary: "abc.txt", secondary: "C:\\Users\\Documents\\"); + + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected match when only secondary parts match"); + } + + [TestMethod] + public void Score_BothQueriesMatchDifferentTargets_ShouldReturnBestScore() + { + // Primary query matches secondary target, secondary query matches primary target + var query = CreateQuery(primary: "docs", secondary: "report"); + var target = CreateTarget(primary: "report.pdf", secondary: "C:\\Documents\\"); + + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected match when queries cross-match with targets"); + } + + [TestMethod] + public void Score_CompletelyDifferent_ShouldNotMatch() + { + var query = CreateQuery(primary: "xyz", secondary: "abc"); + var target = CreateTarget(primary: "hello", secondary: "world"); + + var score = _matcher.Score(query, target); + + Assert.AreEqual(0, score, "Expected no match when nothing matches"); + } + + [TestMethod] + public void Score_EmptySecondaryInputs_ShouldMatchOnPrimary() + { + var query = CreateQuery(primary: "test", secondary: string.Empty); + var target = CreateTarget(primary: "test.txt", secondary: string.Empty); + + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected match on primary when secondaries are empty"); + } + + [TestMethod] + public void Score_WordSeparatorMatching_AcrossSecondary() + { + // Test that "Power Point" matches "PowerPoint" using secondary + var query = CreateQuery(primary: "power", secondary: "point"); + var target = CreateTarget(primary: "PowerPoint.exe"); + + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected 'power' + 'point' to match 'PowerPoint'"); + } + + private FuzzyQuery CreateQuery(string primary, string? secondary = null) + { + var primaryFolded = _folder.Fold(primary, removeDiacritics: true); + var primaryBloom = _bloom.Compute(primaryFolded); + var primaryEffectiveLength = primaryFolded.Length; + var primaryIsAllLowercase = IsAllLowercaseAsciiOrNonLetter(primary); + + string? secondaryFolded = null; + ulong secondaryBloom = 0; + var secondaryEffectiveLength = 0; + var secondaryIsAllLowercase = true; + + if (!string.IsNullOrEmpty(secondary)) + { + secondaryFolded = _folder.Fold(secondary, removeDiacritics: true); + secondaryBloom = _bloom.Compute(secondaryFolded); + secondaryEffectiveLength = secondaryFolded.Length; + secondaryIsAllLowercase = IsAllLowercaseAsciiOrNonLetter(secondary); + } + + return new FuzzyQuery( + original: primary, + folded: primaryFolded, + bloom: primaryBloom, + effectiveLength: primaryEffectiveLength, + isAllLowercaseAsciiOrNonLetter: primaryIsAllLowercase, + secondaryOriginal: secondary, + secondaryFolded: secondaryFolded, + secondaryBloom: secondaryBloom, + secondaryEffectiveLength: secondaryEffectiveLength, + secondaryIsAllLowercaseAsciiOrNonLetter: secondaryIsAllLowercase); + } + + private FuzzyTarget CreateTarget(string primary, string? secondary = null) + { + var primaryFolded = _folder.Fold(primary, removeDiacritics: true); + var primaryBloom = _bloom.Compute(primaryFolded); + + string? secondaryFolded = null; + ulong secondaryBloom = 0; + + if (!string.IsNullOrEmpty(secondary)) + { + secondaryFolded = _folder.Fold(secondary, removeDiacritics: true); + secondaryBloom = _bloom.Compute(secondaryFolded); + } + + return new FuzzyTarget( + original: primary, + folded: primaryFolded, + bloom: primaryBloom, + secondaryOriginal: secondary, + secondaryFolded: secondaryFolded, + secondaryBloom: secondaryBloom); + } + + private static bool IsAllLowercaseAsciiOrNonLetter(string s) + { + foreach (var c in s) + { + if ((uint)(c - 'A') <= ('Z' - 'A')) + { + return false; + } + } + + return true; + } +} diff --git a/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherTests.cs b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherTests.cs new file mode 100644 index 0000000000..bdd3898ac9 --- /dev/null +++ b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherTests.cs @@ -0,0 +1,209 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.CmdPal.Core.Common.Text; + +namespace Microsoft.CmdPal.Common.UnitTests.Text; + +[TestClass] +public class PrecomputedFuzzyMatcherTests +{ + private readonly PrecomputedFuzzyMatcher _matcher = new(); + + public static IEnumerable MatchData => + [ + ["a", "a"], + ["abc", "abc"], + ["a", "ab"], + ["b", "ab"], + ["abc", "axbycz"], + ["pt", "PowerToys"], + ["calc", "Calculator"], + ["vs", "Visual Studio"], + ["code", "Visual Studio Code"], + + // Diacritics + ["abc", "ÁBC"], + + // Separators + ["p/t", "power\\toys"], + ]; + + public static IEnumerable NonMatchData => + [ + ["z", "abc"], + ["verylongstring", "short"], + ]; + + [TestMethod] + [DynamicData(nameof(MatchData))] + public void Score_Matches_ShouldHavePositiveScore(string needle, string haystack) + { + var query = _matcher.PrecomputeQuery(needle); + var target = _matcher.PrecomputeTarget(haystack); + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, $"Expected positive score for needle='{needle}', haystack='{haystack}'"); + } + + [TestMethod] + [DynamicData(nameof(NonMatchData))] + public void Score_NonMatches_ShouldHaveZeroScore(string needle, string haystack) + { + var query = _matcher.PrecomputeQuery(needle); + var target = _matcher.PrecomputeTarget(haystack); + var score = _matcher.Score(query, target); + + Assert.AreEqual(0, score, $"Expected 0 score for needle='{needle}', haystack='{haystack}'"); + } + + [TestMethod] + public void Score_EmptyQuery_ReturnsZero() + { + var query = _matcher.PrecomputeQuery(string.Empty); + var target = _matcher.PrecomputeTarget("something"); + Assert.AreEqual(0, _matcher.Score(query, target)); + } + + [TestMethod] + public void Score_EmptyTarget_ReturnsZero() + { + var query = _matcher.PrecomputeQuery("something"); + var target = _matcher.PrecomputeTarget(string.Empty); + Assert.AreEqual(0, _matcher.Score(query, target)); + } + + [TestMethod] + public void SchemaId_DefaultMatcher_IsConsistent() + { + var matcher1 = new PrecomputedFuzzyMatcher(); + var matcher2 = new PrecomputedFuzzyMatcher(); + + Assert.AreEqual(matcher1.SchemaId, matcher2.SchemaId, "Default matchers should have the same SchemaId"); + } + + [TestMethod] + public void SchemaId_SameOptions_ProducesSameId() + { + var options = new PrecomputedFuzzyMatcherOptions { RemoveDiacritics = true }; + var matcher1 = new PrecomputedFuzzyMatcher(options); + var matcher2 = new PrecomputedFuzzyMatcher(options); + + Assert.AreEqual(matcher1.SchemaId, matcher2.SchemaId, "Matchers with same options should have the same SchemaId"); + } + + [TestMethod] + public void SchemaId_DifferentRemoveDiacriticsOption_ProducesDifferentId() + { + var matcherWithDiacriticsRemoval = new PrecomputedFuzzyMatcher( + new PrecomputedFuzzyMatcherOptions { RemoveDiacritics = true }); + var matcherWithoutDiacriticsRemoval = new PrecomputedFuzzyMatcher( + new PrecomputedFuzzyMatcherOptions { RemoveDiacritics = false }); + + Assert.AreNotEqual( + matcherWithDiacriticsRemoval.SchemaId, + matcherWithoutDiacriticsRemoval.SchemaId, + "Different RemoveDiacritics option should produce different SchemaId"); + } + + [TestMethod] + public void SchemaId_ScoringOptionsDoNotAffectId() + { + // SchemaId should only be affected by options that affect folding/bloom, not scoring + var matcher1 = new PrecomputedFuzzyMatcher( + new PrecomputedFuzzyMatcherOptions { CharMatchBonus = 1, CamelCaseBonus = 2 }); + var matcher2 = new PrecomputedFuzzyMatcher( + new PrecomputedFuzzyMatcherOptions { CharMatchBonus = 100, CamelCaseBonus = 200 }); + + Assert.AreEqual(matcher1.SchemaId, matcher2.SchemaId, "Scoring options should not affect SchemaId"); + } + + [TestMethod] + public void Score_WordSeparatorMatching_PowerPoint() + { + // Test that "Power Point" can match "PowerPoint" when word separators are skipped + var query = _matcher.PrecomputeQuery("Power Point"); + var target = _matcher.PrecomputeTarget("PowerPoint"); + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected 'Power Point' to match 'PowerPoint'"); + } + + [TestMethod] + public void Score_WordSeparatorMatching_UnderscoreDash() + { + // Test that different word separators match each other + var query = _matcher.PrecomputeQuery("hello_world"); + var target = _matcher.PrecomputeTarget("hello-world"); + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected 'hello_world' to match 'hello-world'"); + } + + [TestMethod] + public void Score_WordSeparatorMatching_MixedSeparators() + { + // Test multiple different separators + var query = _matcher.PrecomputeQuery("my.file_name"); + var target = _matcher.PrecomputeTarget("my-file.name"); + var score = _matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected mixed separators to match"); + } + + [TestMethod] + public void Score_PrecomputedQueryReuse_ShouldWorkConsistently() + { + // Test that precomputed query can be reused across multiple targets + var query = _matcher.PrecomputeQuery("test"); + var target1 = _matcher.PrecomputeTarget("test123"); + var target2 = _matcher.PrecomputeTarget("mytest"); + var target3 = _matcher.PrecomputeTarget("unrelated"); + + var score1 = _matcher.Score(query, target1); + var score2 = _matcher.Score(query, target2); + var score3 = _matcher.Score(query, target3); + + Assert.IsTrue(score1 > 0, "Expected query to match first target"); + Assert.IsTrue(score2 > 0, "Expected query to match second target"); + Assert.AreEqual(0, score3, "Expected query not to match third target"); + } + + [TestMethod] + public void Score_PrecomputedTargetReuse_ShouldWorkConsistently() + { + // Test that precomputed target can be reused across multiple queries + var target = _matcher.PrecomputeTarget("calculator"); + var query1 = _matcher.PrecomputeQuery("calc"); + var query2 = _matcher.PrecomputeQuery("lator"); + var query3 = _matcher.PrecomputeQuery("xyz"); + + var score1 = _matcher.Score(query1, target); + var score2 = _matcher.Score(query2, target); + var score3 = _matcher.Score(query3, target); + + Assert.IsTrue(score1 > 0, "Expected first query to match target"); + Assert.IsTrue(score2 > 0, "Expected second query to match target"); + Assert.AreEqual(0, score3, "Expected third query not to match target"); + } + + [TestMethod] + public void Score_CaseInsensitiveMatching_Works() + { + // Test various case combinations + var query1 = _matcher.PrecomputeQuery("test"); + var query2 = _matcher.PrecomputeQuery("TEST"); + var query3 = _matcher.PrecomputeQuery("TeSt"); + + var target = _matcher.PrecomputeTarget("TestFile"); + + var score1 = _matcher.Score(query1, target); + var score2 = _matcher.Score(query2, target); + var score3 = _matcher.Score(query3, target); + + Assert.IsTrue(score1 > 0, "Expected lowercase query to match"); + Assert.IsTrue(score2 > 0, "Expected uppercase query to match"); + Assert.IsTrue(score3 > 0, "Expected mixed case query to match"); + } +} diff --git a/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherUnicodeTests.cs b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherUnicodeTests.cs new file mode 100644 index 0000000000..8cdf39bc82 --- /dev/null +++ b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherUnicodeTests.cs @@ -0,0 +1,124 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.CmdPal.Core.Common.Text; + +namespace Microsoft.CmdPal.Common.UnitTests.Text; + +[TestClass] +public sealed class PrecomputedFuzzyMatcherUnicodeTests +{ + private readonly PrecomputedFuzzyMatcher _defaultMatcher = new(); + + [TestMethod] + public void UnpairedHighSurrogateInNeedle_ShouldNotThrow() + { + const string needle = "\uD83D"; // high surrogate (unpaired) + const string haystack = "abc"; + + var q = _defaultMatcher.PrecomputeQuery(needle); + var t = _defaultMatcher.PrecomputeTarget(haystack); + _ = _defaultMatcher.Score(q, t); + } + + [TestMethod] + public void UnpairedLowSurrogateInNeedle_ShouldNotThrow() + { + const string needle = "\uDC00"; // low surrogate (unpaired) + const string haystack = "abc"; + + var q = _defaultMatcher.PrecomputeQuery(needle); + var t = _defaultMatcher.PrecomputeTarget(haystack); + _ = _defaultMatcher.Score(q, t); + } + + [TestMethod] + public void UnpairedHighSurrogateInHaystack_ShouldNotThrow() + { + const string needle = "a"; + const string haystack = "a\uD83D" + "bc"; // inject unpaired high surrogate + + var q = _defaultMatcher.PrecomputeQuery(needle); + var t = _defaultMatcher.PrecomputeTarget(haystack); + _ = _defaultMatcher.Score(q, t); + } + + [TestMethod] + public void MixedSurrogatesAndMarks_ShouldNotThrow() + { + // "Garbage smoothie": unpaired surrogate + combining mark + emoji surrogate pair + const string needle = "a\uD83D\u0301"; // 'a' + unpaired high surrogate + combining acute + const string haystack = "a\u0301 \U0001F600"; // 'a' + combining acute + space + 😀 (valid pair) + + var q = _defaultMatcher.PrecomputeQuery(needle); + var t = _defaultMatcher.PrecomputeTarget(haystack); + _ = _defaultMatcher.Score(q, t); + } + + [TestMethod] + public void ValidEmojiSurrogatePair_ShouldNotThrow_AndCanMatch() + { + // 😀 U+1F600 encoded as surrogate pair in UTF-16 + const string needle = "\U0001F600"; + const string haystack = "x \U0001F600 y"; + + var q = _defaultMatcher.PrecomputeQuery(needle); + var t = _defaultMatcher.PrecomputeTarget(haystack); + var score = _defaultMatcher.Score(q, t); + + Assert.IsTrue(score > 0, "Expected emoji to produce a match score > 0."); + } + + [TestMethod] + public void RandomUtf16Garbage_ShouldNotThrow() + { + // Deterministic pseudo-random "UTF-16 garbage", including surrogates. + var s1 = MakeDeterministicGarbage(seed: 1234, length: 512); + var s2 = MakeDeterministicGarbage(seed: 5678, length: 1024); + + var q = _defaultMatcher.PrecomputeQuery(s1); + var t = _defaultMatcher.PrecomputeTarget(s2); + _ = _defaultMatcher.Score(q, t); + } + + [TestMethod] + public void HighSurrogateAtEndOfHaystack_ShouldNotThrow() + { + const string needle = "a"; + const string haystack = "abc\uD83D"; // Ends with high surrogate + + var q = _defaultMatcher.PrecomputeQuery(needle); + var t = _defaultMatcher.PrecomputeTarget(haystack); + _ = _defaultMatcher.Score(q, t); + } + + [TestMethod] + public void VeryLongStrings_ShouldNotThrow() + { + var needle = new string('a', 100); + var haystack = new string('b', 10000) + needle + new string('c', 10000); + + var q = _defaultMatcher.PrecomputeQuery(needle); + var t = _defaultMatcher.PrecomputeTarget(haystack); + _ = _defaultMatcher.Score(q, t); + } + + private static string MakeDeterministicGarbage(int seed, int length) + { + // LCG for deterministic generation without Random’s platform/version surprises. + var x = (uint)seed; + var chars = length <= 2048 ? stackalloc char[length] : new char[length]; + + for (var i = 0; i < chars.Length; i++) + { + // LCG: x = (a*x + c) mod 2^32 + x = unchecked((1664525u * x) + 1013904223u); + + // Take top 16 bits as UTF-16 code unit (includes surrogates). + chars[i] = (char)(x >> 16); + } + + return new string(chars); + } +} diff --git a/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherWithPinyinTests.cs b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherWithPinyinTests.cs new file mode 100644 index 0000000000..3e811c050a --- /dev/null +++ b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/PrecomputedFuzzyMatcherWithPinyinTests.cs @@ -0,0 +1,117 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Globalization; +using Microsoft.CmdPal.Core.Common.Text; + +namespace Microsoft.CmdPal.Common.UnitTests.Text; + +[TestClass] +public class PrecomputedFuzzyMatcherWithPinyinTests +{ + private PrecomputedFuzzyMatcherWithPinyin CreateMatcher(PinyinMode mode = PinyinMode.On, bool removeApostrophes = true) + { + return new PrecomputedFuzzyMatcherWithPinyin( + new PrecomputedFuzzyMatcherOptions(), + new PinyinFuzzyMatcherOptions { Mode = mode, RemoveApostrophesForQuery = removeApostrophes }, + new StringFolder(), + new BloomFilter()); + } + + [TestMethod] + [DataRow("bj", "北京")] + [DataRow("sh", "上海")] + [DataRow("nihao", "你好")] + [DataRow("beijing", "北京")] + [DataRow("ce", "测试")] + public void Score_PinyinMatches_ShouldHavePositiveScore(string needle, string haystack) + { + var matcher = CreateMatcher(PinyinMode.On); + var query = matcher.PrecomputeQuery(needle); + var target = matcher.PrecomputeTarget(haystack); + var score = matcher.Score(query, target); + + Assert.IsTrue(score > 0, $"Expected positive score for needle='{needle}', haystack='{haystack}'"); + } + + [TestMethod] + public void Score_PinyinOff_ShouldNotMatchPinyin() + { + var matcher = CreateMatcher(PinyinMode.Off); + var needle = "bj"; + var haystack = "北京"; + + var query = matcher.PrecomputeQuery(needle); + var target = matcher.PrecomputeTarget(haystack); + var score = matcher.Score(query, target); + + Assert.AreEqual(0, score, "Pinyin match should be disabled."); + } + + [TestMethod] + public void Score_StandardMatch_WorksWithPinyinMatcher() + { + var matcher = CreateMatcher(PinyinMode.On); + var needle = "abc"; + var haystack = "abc"; + + var query = matcher.PrecomputeQuery(needle); + var target = matcher.PrecomputeTarget(haystack); + var score = matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Standard match should still work."); + } + + [TestMethod] + public void Score_ApostropheRemoval_Works() + { + var matcher = CreateMatcher(PinyinMode.On, removeApostrophes: true); + var needle = "xi'an"; + + // "xi'an" -> "xian" -> matches "西安" (Xi An) + var haystack = "西安"; + + var query = matcher.PrecomputeQuery(needle); + var target = matcher.PrecomputeTarget(haystack); + var score = matcher.Score(query, target); + + Assert.IsTrue(score > 0, "Expected match for 'xi'an' -> '西安' with apostrophe removal."); + } + + [TestMethod] + public void AutoMode_EnablesForChineseCulture() + { + var originalCulture = CultureInfo.CurrentUICulture; + try + { + CultureInfo.CurrentUICulture = new CultureInfo("zh-CN"); + var matcher = CreateMatcher(PinyinMode.AutoSimplifiedChineseUi); + + var score = matcher.Score(matcher.PrecomputeQuery("bj"), matcher.PrecomputeTarget("北京")); + Assert.IsTrue(score > 0, "Should match when UI culture is zh-CN"); + } + finally + { + CultureInfo.CurrentUICulture = originalCulture; + } + } + + [TestMethod] + public void AutoMode_DisablesForNonChineseCulture() + { + var originalCulture = CultureInfo.CurrentUICulture; + try + { + CultureInfo.CurrentUICulture = new CultureInfo("en-US"); + var matcher = CreateMatcher(PinyinMode.AutoSimplifiedChineseUi); + + var score = matcher.Score(matcher.PrecomputeQuery("bj"), matcher.PrecomputeTarget("北京")); + Assert.AreEqual(0, score, "Should NOT match when UI culture is en-US"); + } + finally + { + CultureInfo.CurrentUICulture = originalCulture; + } + } +} diff --git a/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/StringFolderTests.cs b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/StringFolderTests.cs new file mode 100644 index 0000000000..076636f2fb --- /dev/null +++ b/src/modules/cmdpal/Tests/Microsoft.CmdPal.Core.Common.UnitTests/Text/StringFolderTests.cs @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.CmdPal.Core.Common.Text; + +namespace Microsoft.CmdPal.Common.UnitTests.Text; + +[TestClass] +public class StringFolderTests +{ + private readonly StringFolder _folder = new(); + + [TestMethod] + [DataRow(null, "")] + [DataRow("", "")] + [DataRow("abc", "ABC")] + [DataRow("ABC", "ABC")] + [DataRow("a\\b", "A/B")] + [DataRow("a/b", "A/B")] + [DataRow("ÁBC", "ABC")] // Diacritic removal + [DataRow("ñ", "N")] + [DataRow("hello world", "HELLO WORLD")] + public void Fold_RemoveDiacritics_Works(string input, string expected) + { + Assert.AreEqual(expected, _folder.Fold(input, removeDiacritics: true)); + } + + [TestMethod] + [DataRow("abc", "ABC")] + [DataRow("ÁBC", "ÁBC")] // No diacritic removal + [DataRow("a\\b", "A/B")] + public void Fold_KeepDiacritics_Works(string input, string expected) + { + Assert.AreEqual(expected, _folder.Fold(input, removeDiacritics: false)); + } + + [TestMethod] + public void Fold_IsAlreadyFolded_ReturnsSameInstance() + { + var input = "ALREADY/FOLDED"; + var result = _folder.Fold(input, removeDiacritics: true); + Assert.AreSame(input, result); + } + + [TestMethod] + public void Fold_WithNonAsciiButNoDiacritics_ReturnsFolded() + { + // E.g. Cyrillic or other scripts that might not decompose in a simple way or just upper case + // "привет" -> "ПРИВЕТ" + var input = "привет"; + var expected = "ПРИВЕТ"; + Assert.AreEqual(expected, _folder.Fold(input, removeDiacritics: true)); + } +} diff --git a/src/modules/cmdpal/Tests/Microsoft.CmdPal.UI.ViewModels.UnitTests/MainListPageResultFactoryTests.cs b/src/modules/cmdpal/Tests/Microsoft.CmdPal.UI.ViewModels.UnitTests/MainListPageResultFactoryTests.cs index 5a1e4bff54..5cb60ca03b 100644 --- a/src/modules/cmdpal/Tests/Microsoft.CmdPal.UI.ViewModels.UnitTests/MainListPageResultFactoryTests.cs +++ b/src/modules/cmdpal/Tests/Microsoft.CmdPal.UI.ViewModels.UnitTests/MainListPageResultFactoryTests.cs @@ -5,6 +5,7 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.CmdPal.Core.Common.Helpers; using Microsoft.CmdPal.UI.ViewModels.Commands; using Microsoft.CommandPalette.Extensions; using Microsoft.CommandPalette.Extensions.Toolkit; @@ -43,38 +44,34 @@ public partial class MainListPageResultFactoryTests public override string ToString() => Title; } - private static Scored S(string title, int score) + private static RoScored S(string title, int score) { - return new Scored - { - Score = score, - Item = new MockListItem { Title = title }, - }; + return new RoScored(score: score, item: new MockListItem { Title = title }); } [TestMethod] public void Merge_PrioritizesListsCorrectly() { - var filtered = new List> + var filtered = new List> { S("F1", 100), S("F2", 50), }; - var scoredFallback = new List> + var scoredFallback = new List> { S("SF1", 100), S("SF2", 60), }; - var apps = new List> + var apps = new List> { S("A1", 100), S("A2", 55), }; // Fallbacks are not scored. - var fallbacks = new List> + var fallbacks = new List> { S("FB1", 0), S("FB2", 0), @@ -104,7 +101,7 @@ public partial class MainListPageResultFactoryTests [TestMethod] public void Merge_AppliesAppLimit() { - var apps = new List> + var apps = new List> { S("A1", 100), S("A2", 90), @@ -126,7 +123,7 @@ public partial class MainListPageResultFactoryTests [TestMethod] public void Merge_FiltersEmptyFallbacks() { - var fallbacks = new List> + var fallbacks = new List> { S("FB1", 0), S("FB3", 0), diff --git a/src/modules/cmdpal/Tests/Microsoft.CmdPal.UI.ViewModels.UnitTests/RecentCommandsTests.cs b/src/modules/cmdpal/Tests/Microsoft.CmdPal.UI.ViewModels.UnitTests/RecentCommandsTests.cs index 78ead1588e..ec93373f74 100644 --- a/src/modules/cmdpal/Tests/Microsoft.CmdPal.UI.ViewModels.UnitTests/RecentCommandsTests.cs +++ b/src/modules/cmdpal/Tests/Microsoft.CmdPal.UI.ViewModels.UnitTests/RecentCommandsTests.cs @@ -5,6 +5,7 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.CmdPal.Core.Common.Text; using Microsoft.CmdPal.Ext.UnitTestBase; using Microsoft.CmdPal.UI.ViewModels.MainPage; using Microsoft.CommandPalette.Extensions; @@ -263,10 +264,12 @@ public partial class RecentCommandsTests : CommandPaletteUnitTestBase }; var history = CreateHistory(items.Reverse().ToList()); + var fuzzyMatcher = CreateMatcher(); + var q = fuzzyMatcher.PrecomputeQuery("C"); - var scoreA = MainListPage.ScoreTopLevelItem("C", items[0], history); - var scoreB = MainListPage.ScoreTopLevelItem("C", items[1], history); - var scoreC = MainListPage.ScoreTopLevelItem("C", items[2], history); + var scoreA = MainListPage.ScoreTopLevelItem(q, items[0], history, fuzzyMatcher); + var scoreB = MainListPage.ScoreTopLevelItem(q, items[1], history, fuzzyMatcher); + var scoreC = MainListPage.ScoreTopLevelItem(q, items[2], history, fuzzyMatcher); // Assert // All of these equally match the query, and they're all in the same bucket, @@ -296,6 +299,11 @@ public partial class RecentCommandsTests : CommandPaletteUnitTestBase return history; } + private static IPrecomputedFuzzyMatcher CreateMatcher() + { + return new PrecomputedFuzzyMatcher(new PrecomputedFuzzyMatcherOptions()); + } + private sealed record ScoredItem(ListItemMock Item, int Score) { public string Title => Item.Title; @@ -337,9 +345,11 @@ public partial class RecentCommandsTests : CommandPaletteUnitTestBase var items = CreateMockHistoryItems(); var emptyHistory = CreateMockHistoryService(new()); var history = CreateMockHistoryService(items); + var fuzzyMatcher = CreateMatcher(); - var unweightedScores = items.Select(item => MainListPage.ScoreTopLevelItem("C", item, emptyHistory)).ToList(); - var weightedScores = items.Select(item => MainListPage.ScoreTopLevelItem("C", item, history)).ToList(); + var q = fuzzyMatcher.PrecomputeQuery("C"); + var unweightedScores = items.Select(item => MainListPage.ScoreTopLevelItem(q, item, emptyHistory, fuzzyMatcher)).ToList(); + var weightedScores = items.Select(item => MainListPage.ScoreTopLevelItem(q, item, history, fuzzyMatcher)).ToList(); Assert.AreEqual(unweightedScores.Count, weightedScores.Count, "Both score lists should have the same number of items"); for (var i = 0; i < unweightedScores.Count; i++) { @@ -380,7 +390,10 @@ public partial class RecentCommandsTests : CommandPaletteUnitTestBase var items = CreateMockHistoryItems(); var emptyHistory = CreateMockHistoryService(new()); var history = CreateMockHistoryService(items); - var weightedScores = items.Select(item => MainListPage.ScoreTopLevelItem("te", item, history)).ToList(); + var fuzzyMatcher = CreateMatcher(); + var q = fuzzyMatcher.PrecomputeQuery("te"); + + var weightedScores = items.Select(item => MainListPage.ScoreTopLevelItem(q, item, history, fuzzyMatcher)).ToList(); var weightedMatches = GetMatches(items, weightedScores).ToList(); Assert.AreEqual(3, weightedMatches.Count, "Find Terminal, VsCode and Run commands"); @@ -398,6 +411,8 @@ public partial class RecentCommandsTests : CommandPaletteUnitTestBase var items = CreateMockHistoryItems(); var emptyHistory = CreateMockHistoryService(new()); var history = CreateMockHistoryService(items); + var fuzzyMatcher = CreateMatcher(); + var q = fuzzyMatcher.PrecomputeQuery("te"); // Add extra uses of VS Code to try and push it above Terminal for (var i = 0; i < 10; i++) @@ -405,7 +420,7 @@ public partial class RecentCommandsTests : CommandPaletteUnitTestBase history.AddHistoryItem(items[1].Id); } - var weightedScores = items.Select(item => MainListPage.ScoreTopLevelItem("te", item, history)).ToList(); + var weightedScores = items.Select(item => MainListPage.ScoreTopLevelItem(q, item, history, fuzzyMatcher)).ToList(); var weightedMatches = GetMatches(items, weightedScores).ToList(); Assert.AreEqual(3, weightedMatches.Count, "Find Terminal, VsCode and Run commands"); @@ -423,6 +438,8 @@ public partial class RecentCommandsTests : CommandPaletteUnitTestBase var items = CreateMockHistoryItems(); var emptyHistory = CreateMockHistoryService(new()); var history = CreateMockHistoryService(items); + var fuzzyMatcher = CreateMatcher(); + var q = fuzzyMatcher.PrecomputeQuery("C"); // We're gonna run this test and keep adding more uses of VS Code till // it breaks past Command Prompt @@ -431,7 +448,7 @@ public partial class RecentCommandsTests : CommandPaletteUnitTestBase { history.AddHistoryItem(vsCodeId); - var weightedScores = items.Select(item => MainListPage.ScoreTopLevelItem("C", item, history)).ToList(); + var weightedScores = items.Select(item => MainListPage.ScoreTopLevelItem(q, item, history, fuzzyMatcher)).ToList(); var weightedMatches = GetMatches(items, weightedScores).ToList(); Assert.AreEqual(4, weightedMatches.Count); diff --git a/src/modules/cmdpal/ext/Microsoft.CmdPal.Ext.Apps/AppListItem.cs b/src/modules/cmdpal/ext/Microsoft.CmdPal.Ext.Apps/AppListItem.cs index 8d1a05d641..5d1c413281 100644 --- a/src/modules/cmdpal/ext/Microsoft.CmdPal.Ext.Apps/AppListItem.cs +++ b/src/modules/cmdpal/ext/Microsoft.CmdPal.Ext.Apps/AppListItem.cs @@ -7,6 +7,7 @@ using System.Collections.Generic; using System.Threading.Tasks; using ManagedCommon; using Microsoft.CmdPal.Core.Common.Helpers; +using Microsoft.CmdPal.Core.Common.Text; using Microsoft.CmdPal.Ext.Apps.Commands; using Microsoft.CmdPal.Ext.Apps.Helpers; using Microsoft.CommandPalette.Extensions; @@ -14,7 +15,7 @@ using Microsoft.CommandPalette.Extensions.Toolkit; namespace Microsoft.CmdPal.Ext.Apps.Programs; -public sealed partial class AppListItem : ListItem +public sealed partial class AppListItem : ListItem, IPrecomputedListItem { private readonly AppCommand _appCommand; private readonly AppItem _app; @@ -25,6 +26,35 @@ public sealed partial class AppListItem : ListItem private InterlockedBoolean _isLoadingIcon; private InterlockedBoolean _isLoadingDetails; + private FuzzyTargetCache _titleCache; + private FuzzyTargetCache _subtitleCache; + + public override string Title + { + get => base.Title; + set + { + if (!string.Equals(base.Title, value, StringComparison.Ordinal)) + { + base.Title = value; + _titleCache.Invalidate(); + } + } + } + + public override string Subtitle + { + get => base.Subtitle; + set + { + if (!string.Equals(value, base.Subtitle, StringComparison.Ordinal)) + { + base.Subtitle = value; + _subtitleCache.Invalidate(); + } + } + } + public override IDetails? Details { get @@ -259,4 +289,10 @@ public sealed partial class AppListItem : ListItem return null; }).ConfigureAwait(false); } + + public FuzzyTarget GetTitleTarget(IPrecomputedFuzzyMatcher matcher) + => _titleCache.GetOrUpdate(matcher, Title); + + public FuzzyTarget GetSubtitleTarget(IPrecomputedFuzzyMatcher matcher) + => _subtitleCache.GetOrUpdate(matcher, Subtitle); }