Files
PowerToys/src/common/Common.Search/FuzzSearch/StringMatcher.cs
Kai Tao 4ad951eb56 Setting search (#41285)
<!-- Enter a brief description/summary of your PR here. What does it
fix/what does it change/how was it tested (even manually, if necessary)?
-->
## Summary of the Pull Request

<!-- Please review the items on the PR checklist before submitting-->
## PR Checklist

- [ ] Closes: #xxx
- [ ] **Communication:** I've discussed this with core contributors
already. If the work hasn't been agreed, this work might be rejected
- [x] **Tests:** Added/updated and all pass
- [ ] **Localization:** All end-user-facing strings can be localized
- [ ] **Dev docs:** Added/updated
- [ ] **New binaries:** Added on the required places
- [ ] [JSON for
signing](https://github.com/microsoft/PowerToys/blob/main/.pipelines/ESRPSigning_core.json)
for new binaries
- [ ] [WXS for
installer](https://github.com/microsoft/PowerToys/blob/main/installer/PowerToysSetup/Product.wxs)
for new binaries and localization folder
- [ ] [YML for CI
pipeline](https://github.com/microsoft/PowerToys/blob/main/.pipelines/ci/templates/build-powertoys-steps.yml)
for new test projects
- [ ] [YML for signed
pipeline](https://github.com/microsoft/PowerToys/blob/main/.pipelines/release.yml)
- [ ] **Documentation updated:** If checked, please file a pull request
on [our docs
repo](https://github.com/MicrosoftDocs/windows-uwp/tree/docs/hub/powertoys)
and link it here: #xxx

<!-- Provide a more detailed description of the PR, other things fixed,
or any additional comments/features here -->
## Detailed Description of the Pull Request / Additional comments

<!-- Describe how you validated the behavior. Add automated tests
wherever possible, but list manual validation steps taken as well -->
## Validation Steps Performed
Localized search:
<img width="1576" height="480" alt="image"
src="https://github.com/user-attachments/assets/dd6e5e9f-419b-40b1-b796-f0799481ecfc"
/>


## AI summary
This pull request introduces infrastructure and code to support search
functionality for PowerToys settings, including a new search index
specification, a dedicated search library, and updates to the solution
configuration. The main changes are the addition of a spec describing
how settings should be indexed and navigated, the creation of a new
`Common.Search` project with a fuzz search implementation, and updates
to the solution file to include these new components.

**Settings Search Feature Implementation**

* Documentation:
* Added a detailed specification (`settings-search.md`) describing the
structure of PowerToys settings pages, how to index settings, navigation
logic, runtime search, result grouping, build-time indexing strategy,
and corner cases.

* New Search Library:
* Added the new `Common.Search` project to the solution, including its
project file and implementation of a fuzz search service
(`FuzzSearchService<T>`), match options, match results, and search
precision scoring.
[[1]](diffhunk://#diff-ddc06fa41e4e723e54181b0cb85cdd00f57f75725d51ceefa242d4d651a9a363R1-R8)
[[2]](diffhunk://#diff-1a2ca29fc33bcccf338a7843a040ca2c31ba821e8cab7064fab0dbb1224d454cR1-R39)
[[3]](diffhunk://#diff-242764d948b795f39653a84d9b6bfcdc52730100deab2e3a0995be95bb8e7868R1-R10)
[[4]](diffhunk://#diff-61e525491ed916ebd65dabb66dd4f5dc720320d7e295ef1e0bd6d506ea0f7df6R1-R67)
[[5]](diffhunk://#diff-a775f6de2e8d42982829b4161668f49dedbbd9dcbb05ce20003de7e62275c57aR1-R12)

* Solution Configuration:
* Updated `PowerToys.sln` to include `Common.Search` and
`Settings.UI.XamlIndexBuilder` projects, and configured their build
settings for various platforms and mapped project dependencies.
[[1]](diffhunk://#diff-ca837ce490070b91656ffffe31cbad8865ba9174e0f020231f77baf35ff3f811R714-R716)
[[2]](diffhunk://#diff-ca837ce490070b91656ffffe31cbad8865ba9174e0f020231f77baf35ff3f811R2704-R2727)
[[3]](diffhunk://#diff-ca837ce490070b91656ffffe31cbad8865ba9174e0f020231f77baf35ff3f811R2889)
[[4]](diffhunk://#diff-ca837ce490070b91656ffffe31cbad8865ba9174e0f020231f77baf35ff3f811R3157-R3158)

**Spell-check Dictionary Updates**

* Added new terms related to navigation and settings UI components (such
as `Navigatable`, `NavigatablePage`, `settingscard`, `Tru`, `tweakable`)
to the spell-check dictionary to support the new search and indexing
features.
[[1]](diffhunk://#diff-5dcab162c1b233a49973ae010f2b88c7ec4844382abd705e6154685e62bd5c4dR1020-R1021)
[[2]](diffhunk://#diff-5dcab162c1b233a49973ae010f2b88c7ec4844382abd705e6154685e62bd5c4dR1498)
[[3]](diffhunk://#diff-5dcab162c1b233a49973ae010f2b88c7ec4844382abd705e6154685e62bd5c4dR1755-R1761)

---------

Co-authored-by: Niels Laute <niels.laute@live.nl>
Co-authored-by: Gordon Lam (SH) <yeelam@microsoft.com>
Co-authored-by: Gordon Lam <73506701+yeelam-gordon@users.noreply.github.com>
2025-08-25 21:23:07 +08:00

273 lines
11 KiB
C#

// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Globalization;
namespace Common.Search.FuzzSearch;
public class StringMatcher
{
public StringMatcher()
{
}
private static readonly char[] Separator = [' '];
/// <summary>
/// Current method:
/// Character matching + substring matching;
/// 1. Query search string is split into substrings, separator is whitespace.
/// 2. Check each query substring's characters against full compare string,
/// 3. if a character in the substring is matched, loop back to verify the previous character.
/// 4. If previous character also matches, and is the start of the substring, update list.
/// 5. Once the previous character is verified, move on to the next character in the query substring.
/// 6. Move onto the next substring's characters until all substrings are checked.
/// 7. Consider success and move onto scoring if every char or substring without whitespaces matched
/// </summary>
public static MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt = null)
{
opt = opt ?? new MatchOption();
if (string.IsNullOrEmpty(stringToCompare))
{
return new MatchResult(false, SearchPrecisionScore.Regular);
}
SearchPrecisionScore score = SearchPrecisionScore.Regular;
var bestResult = new MatchResult(false, score);
for (int startIndex = 0; startIndex < stringToCompare.Length; startIndex++)
{
MatchResult result = FuzzyMatch(query, stringToCompare, opt, startIndex);
if (result.Success && (!bestResult.Success || result.Score > bestResult.Score))
{
bestResult = result;
}
}
return bestResult;
}
private static MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt, int startIndex)
{
if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query))
{
return new MatchResult(false, SearchPrecisionScore.Regular);
}
ArgumentNullException.ThrowIfNull(opt);
query = query.Trim();
// Using InvariantCulture since this is internal
var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToUpper(CultureInfo.InvariantCulture) : stringToCompare;
var queryWithoutCase = opt.IgnoreCase ? query.ToUpper(CultureInfo.InvariantCulture) : query;
var querySubstrings = queryWithoutCase.Split(Separator, StringSplitOptions.RemoveEmptyEntries);
int currentQuerySubstringIndex = 0;
var currentQuerySubstring = querySubstrings[currentQuerySubstringIndex];
var currentQuerySubstringCharacterIndex = 0;
var firstMatchIndex = -1;
var firstMatchIndexInWord = -1;
var lastMatchIndex = 0;
bool allQuerySubstringsMatched = false;
bool matchFoundInPreviousLoop = false;
bool allSubstringsContainedInCompareString = true;
var indexList = new List<int>();
List<int> spaceIndices = new List<int>();
for (var compareStringIndex = startIndex; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++)
{
// To maintain a list of indices which correspond to spaces in the string to compare
// To populate the list only for the first query substring
if (fullStringToCompareWithoutCase[compareStringIndex].Equals(' ') && currentQuerySubstringIndex == 0)
{
spaceIndices.Add(compareStringIndex);
}
bool compareResult;
if (opt.IgnoreCase)
{
var fullStringToCompare = fullStringToCompareWithoutCase[compareStringIndex].ToString();
var querySubstring = currentQuerySubstring[currentQuerySubstringCharacterIndex].ToString();
#pragma warning disable CA1309 // Use ordinal string comparison (We are looking for a fuzzy match here)
compareResult = string.Compare(fullStringToCompare, querySubstring, CultureInfo.CurrentCulture, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace) != 0;
#pragma warning restore CA1309 // Use ordinal string comparison
}
else
{
compareResult = fullStringToCompareWithoutCase[compareStringIndex] != currentQuerySubstring[currentQuerySubstringCharacterIndex];
}
if (compareResult)
{
matchFoundInPreviousLoop = false;
continue;
}
if (firstMatchIndex < 0)
{
// first matched char will become the start of the compared string
firstMatchIndex = compareStringIndex;
}
if (currentQuerySubstringCharacterIndex == 0)
{
// first letter of current word
matchFoundInPreviousLoop = true;
firstMatchIndexInWord = compareStringIndex;
}
else if (!matchFoundInPreviousLoop)
{
// we want to verify that there is not a better match if this is not a full word
// in order to do so we need to verify all previous chars are part of the pattern
var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex;
if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex, fullStringToCompareWithoutCase, currentQuerySubstring))
{
matchFoundInPreviousLoop = true;
// if it's the beginning character of the first query substring that is matched then we need to update start index
firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex;
indexList = GetUpdatedIndexList(startIndexToVerify, currentQuerySubstringCharacterIndex, firstMatchIndexInWord, indexList);
}
}
lastMatchIndex = compareStringIndex + 1;
indexList.Add(compareStringIndex);
currentQuerySubstringCharacterIndex++;
// if finished looping through every character in the current substring
if (currentQuerySubstringCharacterIndex == currentQuerySubstring.Length)
{
// if any of the substrings was not matched then consider as all are not matched
allSubstringsContainedInCompareString = matchFoundInPreviousLoop && allSubstringsContainedInCompareString;
currentQuerySubstringIndex++;
allQuerySubstringsMatched = AllQuerySubstringsMatched(currentQuerySubstringIndex, querySubstrings.Length);
if (allQuerySubstringsMatched)
{
break;
}
// otherwise move to the next query substring
currentQuerySubstring = querySubstrings[currentQuerySubstringIndex];
currentQuerySubstringCharacterIndex = 0;
}
}
// proceed to calculate score if every char or substring without whitespaces matched
if (allQuerySubstringsMatched)
{
var nearestSpaceIndex = CalculateClosestSpaceIndex(spaceIndices, firstMatchIndex);
var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex - nearestSpaceIndex - 1, lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString);
return new MatchResult(true, SearchPrecisionScore.Regular, indexList, score);
}
return new MatchResult(false, SearchPrecisionScore.Regular);
}
// To get the index of the closest space which precedes the first matching index
private static int CalculateClosestSpaceIndex(List<int> spaceIndices, int firstMatchIndex)
{
if (spaceIndices.Count == 0)
{
return -1;
}
else
{
return spaceIndices.OrderBy(item => firstMatchIndex - item).Where(item => firstMatchIndex > item).FirstOrDefault(-1);
}
}
private static bool AllPreviousCharsMatched(int startIndexToVerify, int currentQuerySubstringCharacterIndex, string fullStringToCompareWithoutCase, string currentQuerySubstring)
{
var allMatch = true;
for (int indexToCheck = 0; indexToCheck < currentQuerySubstringCharacterIndex; indexToCheck++)
{
if (fullStringToCompareWithoutCase[startIndexToVerify + indexToCheck] !=
currentQuerySubstring[indexToCheck])
{
allMatch = false;
}
}
return allMatch;
}
private static List<int> GetUpdatedIndexList(int startIndexToVerify, int currentQuerySubstringCharacterIndex, int firstMatchIndexInWord, List<int> indexList)
{
var updatedList = new List<int>();
indexList.RemoveAll(x => x >= firstMatchIndexInWord);
updatedList.AddRange(indexList);
for (int indexToCheck = 0; indexToCheck < currentQuerySubstringCharacterIndex; indexToCheck++)
{
updatedList.Add(startIndexToVerify + indexToCheck);
}
return updatedList;
}
private static bool AllQuerySubstringsMatched(int currentQuerySubstringIndex, int querySubstringsLength)
{
return currentQuerySubstringIndex >= querySubstringsLength;
}
private static int CalculateSearchScore(string query, string stringToCompare, int firstIndex, int matchLen, bool allSubstringsContainedInCompareString)
{
// A match found near the beginning of a string is scored more than a match found near the end
// A match is scored more if the characters in the patterns are closer to each other,
// while the score is lower if they are more spread out
// The length of the match is assigned a larger weight factor.
const int matchLenWeightFactor = 2;
var score = 100 * (query.Length + 1) * matchLenWeightFactor / (1 + firstIndex + (matchLenWeightFactor * (matchLen + 1)));
// A match with less characters assigning more weights
if (stringToCompare.Length - query.Length < 5)
{
score += 20;
}
else if (stringToCompare.Length - query.Length < 10)
{
score += 10;
}
if (allSubstringsContainedInCompareString)
{
int count = query.Count(c => !char.IsWhiteSpace(c));
int threshold = 4;
if (count <= threshold)
{
score += count * 10;
}
else
{
score += (threshold * 10) + ((count - threshold) * 5);
}
}
#pragma warning disable CA1309 // Use ordinal string comparison (Using CurrentCultureIgnoreCase since this relates to queries input by user)
if (string.Equals(query, stringToCompare, StringComparison.CurrentCultureIgnoreCase))
{
var bonusForExactMatch = 10;
score += bonusForExactMatch;
}
#pragma warning restore CA1309 // Use ordinal string comparison
return score;
}
}