Files
PowerToys/src/modules/cmdpal/extensionsdk/Microsoft.CommandPalette.Extensions.Toolkit/FuzzyStringMatcher.cs
舰队的偶像-岛风酱! a37add8f08 feat(cmdpal): add pinyin support for Chinese input method (#39354)
<!-- Enter a brief description/summary of your PR here. What does it
fix/what does it change/how was it tested (even manually, if necessary)?
-->
## Summary of the Pull Request

- Add ToolGood.Words.Pinyin package to support pinyin conversion
- Implement pinyin matching in StringMatcher class
- Update project dependencies and Directory.Packages.props

<!-- Please review the items on the PR checklist before submitting-->
## PR Checklist

- [x] **Closes:** #38417 #39343
- [ ] **Communication:** I've discussed this with core contributors
already. If work hasn't been agreed, this work might be rejected
- [ ] **Tests:** Added/updated and all pass
- [ ] **Localization:** All end user facing strings can be localized
- [ ] **Dev docs:** Added/updated
- [ ] **New binaries:** Added on the required places
- [ ] [JSON for
signing](https://github.com/microsoft/PowerToys/blob/main/.pipelines/ESRPSigning_core.json)
for new binaries
- [ ] [WXS for
installer](https://github.com/microsoft/PowerToys/blob/main/installer/PowerToysSetup/Product.wxs)
for new binaries and localization folder
- [ ] [YML for CI
pipeline](https://github.com/microsoft/PowerToys/blob/main/.pipelines/ci/templates/build-powertoys-steps.yml)
for new test projects
- [ ] [YML for signed
pipeline](https://github.com/microsoft/PowerToys/blob/main/.pipelines/release.yml)
- [ ] **Documentation updated:** If checked, please file a pull request
on [our docs
repo](https://github.com/MicrosoftDocs/windows-uwp/tree/docs/hub/powertoys)
and link it here: #xxx

<!-- Provide a more detailed description of the PR, other things fixed
or any additional comments/features here -->
## Detailed Description of the Pull Request / Additional comments

I've completed a rough implementation of pinyin support, but since I'm
currently unsure where to add the toggle for pinyin support, this
feature is enabled by default for now.



https://github.com/user-attachments/assets/59df0180-05ad-4b4a-a858-29aa15e40fd2



<!-- Describe how you validated the behavior. Add automated tests
wherever possible, but list manual validation steps taken as well -->
## Validation Steps Performed

---------

Signed-off-by: 舰队的偶像-岛风酱! <frg2089@outlook.com>
Co-authored-by: Yu Leng <yuleng@microsoft.com>
2025-12-08 11:14:00 +08:00

224 lines
6.9 KiB
C#

// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Globalization;
using ToolGood.Words.Pinyin;
namespace Microsoft.CommandPalette.Extensions.Toolkit;
// Inspired by the fuzzy.rs from edit.exe
public static class FuzzyStringMatcher
{
private const int NOMATCH = 0;
/// <summary>
/// Gets a value indicating whether to support Chinese PinYin.
/// Automatically enabled when the system UI culture is Simplified Chinese.
/// </summary>
public static bool ChinesePinYinSupport { get; } = IsSimplifiedChinese();
private static bool IsSimplifiedChinese()
{
var culture = CultureInfo.CurrentUICulture;
// Detect Simplified Chinese: zh-CN, zh-Hans, zh-Hans-*
return culture.Name.StartsWith("zh-CN", StringComparison.OrdinalIgnoreCase)
|| culture.Name.StartsWith("zh-Hans", StringComparison.OrdinalIgnoreCase);
}
public static int ScoreFuzzy(string needle, string haystack, bool allowNonContiguousMatches = true)
{
var (s, _) = ScoreFuzzyWithPositions(needle, haystack, allowNonContiguousMatches);
return s;
}
public static (int Score, List<int> Positions) ScoreFuzzyWithPositions(string needle, string haystack, bool allowNonContiguousMatches)
=> ScoreAllFuzzyWithPositions(needle, haystack, allowNonContiguousMatches).MaxBy(i => i.Score);
public static IEnumerable<(int Score, List<int> Positions)> ScoreAllFuzzyWithPositions(string needle, string haystack, bool allowNonContiguousMatches)
{
List<string> needles = [needle];
List<string> haystacks = [haystack];
if (ChinesePinYinSupport)
{
// Remove IME composition split characters.
var input = needle.Replace("'", string.Empty);
needles.Add(WordsHelper.GetPinyin(input));
if (WordsHelper.HasChinese(haystack))
{
haystacks.Add(WordsHelper.GetPinyin(haystack));
}
}
return needles.SelectMany(i => haystacks.Select(j => ScoreFuzzyWithPositionsInternal(i, j, allowNonContiguousMatches)));
}
private static (int Score, List<int> Positions) ScoreFuzzyWithPositionsInternal(string needle, string haystack, bool allowNonContiguousMatches)
{
if (string.IsNullOrEmpty(haystack) || string.IsNullOrEmpty(needle))
{
return (NOMATCH, new List<int>());
}
var target = haystack.ToCharArray();
var query = needle.ToCharArray();
if (target.Length < query.Length)
{
return (NOMATCH, new List<int>());
}
var targetUpper = FoldCase(haystack);
var queryUpper = FoldCase(needle);
var targetUpperChars = targetUpper.ToCharArray();
var queryUpperChars = queryUpper.ToCharArray();
var area = query.Length * target.Length;
var scores = new int[area];
var matches = new int[area];
for (var qi = 0; qi < query.Length; qi++)
{
var qiOffset = qi * target.Length;
var qiPrevOffset = qi > 0 ? (qi - 1) * target.Length : 0;
for (var ti = 0; ti < target.Length; ti++)
{
var currentIndex = qiOffset + ti;
var diagIndex = (qi > 0 && ti > 0) ? qiPrevOffset + ti - 1 : 0;
var leftScore = ti > 0 ? scores[currentIndex - 1] : 0;
var diagScore = (qi > 0 && ti > 0) ? scores[diagIndex] : 0;
var matchSeqLen = (qi > 0 && ti > 0) ? matches[diagIndex] : 0;
var score = (diagScore == 0 && qi != 0) ? 0 :
ComputeCharScore(
query[qi],
queryUpperChars[qi],
ti != 0 ? target[ti - 1] : null,
target[ti],
targetUpperChars[ti],
matchSeqLen);
var isValidScore = score != 0 && diagScore + score >= leftScore &&
(allowNonContiguousMatches || qi > 0 ||
targetUpperChars.Skip(ti).Take(queryUpperChars.Length).SequenceEqual(queryUpperChars));
if (isValidScore)
{
matches[currentIndex] = matchSeqLen + 1;
scores[currentIndex] = diagScore + score;
}
else
{
matches[currentIndex] = NOMATCH;
scores[currentIndex] = leftScore;
}
}
}
var positions = new List<int>();
if (query.Length > 0 && target.Length > 0)
{
var qi = query.Length - 1;
var ti = target.Length - 1;
while (true)
{
var index = (qi * target.Length) + ti;
if (matches[index] == NOMATCH)
{
if (ti == 0)
{
break;
}
ti--;
}
else
{
positions.Add(ti);
if (qi == 0 || ti == 0)
{
break;
}
qi--;
ti--;
}
}
positions.Reverse();
}
return (scores[area - 1], positions);
}
private static string FoldCase(string input)
{
return input.ToUpperInvariant();
}
private static int ComputeCharScore(
char query,
char queryLower,
char? targetPrev,
char targetCurr,
char targetLower,
int matchSeqLen)
{
if (!ConsiderAsEqual(queryLower, targetLower))
{
return 0;
}
var score = 1; // Character match bonus
if (matchSeqLen > 0)
{
score += matchSeqLen * 5; // Consecutive match bonus
}
if (query == targetCurr)
{
score += 1; // Same case bonus
}
if (targetPrev.HasValue)
{
var sepBonus = ScoreSeparator(targetPrev.Value);
if (sepBonus > 0)
{
score += sepBonus;
}
else if (char.IsUpper(targetCurr) && matchSeqLen == 0)
{
score += 2; // CamelCase bonus
}
}
else
{
score += 8; // Start of word bonus
}
return score;
}
private static bool ConsiderAsEqual(char a, char b)
{
return a == b || (a == '/' && b == '\\') || (a == '\\' && b == '/');
}
private static int ScoreSeparator(char ch)
{
return ch switch
{
'/' or '\\' => 5,
'_' or '-' or '.' or ' ' or '\'' or '"' or ':' => 4,
_ => 0,
};
}
}