From 07310c771466fd9a13ccd65e9202428ebc94c87b Mon Sep 17 00:00:00 2001 From: Jeremy Wu Date: Mon, 20 Jan 2020 10:06:16 +1100 Subject: [PATCH] Fix pinyin fuzzysearch (#131) * fix typo * make function obsolete it is not used in the code * rewrite the function that converts chinese chars to pinyin 1. Only difference in this rewrite is instead of returning 2D array, return as a combined single string of all the possible pinyin combination. Since fuzzy search does character matching, this shouldn't be a problem. 2. Added a function that returns a custom language converter. In this case Pinyin converter. New converters can be added. * Use new language converter param + strip out ScoreForPinyin method * update * Change parameter name * fix failing tests * WIP * Remove todo There should be some distinction between score after precision filter and actual raw score derived from FuzzySearch. Although so far RawScore is used in testing, but it seems to describe the structure. Originally it was to avoid assigning score directly as it would be hard to reason about that output of FuzzySearch score is. * Add constructors, remove default to enforce required properties * remove setting rawscore in SearchPrecision * Change method name to reflect intention * Change parameter name + update comment * update * Remove params comment Co-authored-by: theClueless <14300910+theClueless@users.noreply.github.com> --- Wox.Infrastructure/Alphabet.cs | 64 +++++++--- Wox.Infrastructure/FuzzyMatcher.cs | 4 +- Wox.Infrastructure/StringMatcher.cs | 123 +++++++++----------- Wox.Infrastructure/UserSettings/Settings.cs | 16 +-- Wox.Test/FuzzyMatcherTest.cs | 22 ++-- Wox/App.xaml.cs | 19 ++- Wox/PublicAPIInstance.cs | 7 +- 7 files changed, 133 insertions(+), 122 deletions(-) diff --git a/Wox.Infrastructure/Alphabet.cs b/Wox.Infrastructure/Alphabet.cs index 63e178ba1e..487c0a4b0a 100644 --- a/Wox.Infrastructure/Alphabet.cs +++ b/Wox.Infrastructure/Alphabet.cs @@ -2,28 +2,35 @@ using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; +using System.Text; using hyjiacan.util.p4n; using hyjiacan.util.p4n.format; +using JetBrains.Annotations; using Wox.Infrastructure.Logger; using Wox.Infrastructure.Storage; using Wox.Infrastructure.UserSettings; namespace Wox.Infrastructure { - public static class Alphabet + public interface IAlphabet { - private static readonly HanyuPinyinOutputFormat Format = new HanyuPinyinOutputFormat(); - private static ConcurrentDictionary PinyinCache; - private static BinaryStorage> _pinyinStorage; - private static Settings _settings; + string Translate(string stringToTranslate); + } + + public class Alphabet : IAlphabet + { + private readonly HanyuPinyinOutputFormat Format = new HanyuPinyinOutputFormat(); + private ConcurrentDictionary PinyinCache; + private BinaryStorage> _pinyinStorage; + private Settings _settings; - public static void Initialize(Settings settings) + public void Initialize([NotNull] Settings settings) { - _settings = settings; + _settings = settings ?? throw new ArgumentNullException(nameof(settings)); InitializePinyinHelpers(); } - private static void InitializePinyinHelpers() + private void InitializePinyinHelpers() { Format.setToneType(HanyuPinyinToneType.WITHOUT_TONE); @@ -38,7 +45,35 @@ namespace Wox.Infrastructure Log.Info($"|Wox.Infrastructure.Alphabet.Initialize|Number of preload pinyin combination<{PinyinCache.Count}>"); } - public static void Save() + public string Translate(string str) + { + return ConvertChineseCharactersToPinyin(str); + } + + public string ConvertChineseCharactersToPinyin(string source) + { + if (!_settings.ShouldUsePinyin) + return source; + + if (string.IsNullOrEmpty(source)) + return source; + + if (!ContainsChinese(source)) + return source; + + var combination = PinyinCombination(source); + + var pinyinArray=combination.Select(x => string.Join("", x)); + var acronymArray = combination.Select(Acronym).Distinct(); + + var joinedSingleStringCombination = new StringBuilder(); + var all = acronymArray.Concat(pinyinArray); + all.ToList().ForEach(x => joinedSingleStringCombination.Append(x)); + + return joinedSingleStringCombination.ToString(); + } + + public void Save() { if (!_settings.ShouldUsePinyin) { @@ -50,11 +85,12 @@ namespace Wox.Infrastructure private static string[] EmptyStringArray = new string[0]; private static string[][] Empty2DStringArray = new string[0][]; + [Obsolete("Not accurate, eg 音乐 will not return yinyue but returns yinle ")] /// /// replace chinese character with pinyin, non chinese character won't be modified /// should be word or sentence, instead of single character. e.g. 微软 /// - public static string[] Pinyin(string word) + public string[] Pinyin(string word) { if (!_settings.ShouldUsePinyin) { @@ -76,7 +112,7 @@ namespace Wox.Infrastructure /// e.g. 音乐 will return yinyue and yinle /// should be word or sentence, instead of single character. e.g. 微软 /// - public static string[][] PinyinComination(string characters) + public string[][] PinyinCombination(string characters) { if (!_settings.ShouldUsePinyin || string.IsNullOrEmpty(characters)) { @@ -111,13 +147,13 @@ namespace Wox.Infrastructure } } - public static string Acronym(string[] pinyin) + public string Acronym(string[] pinyin) { var acronym = string.Join("", pinyin.Select(p => p[0])); return acronym; } - public static bool ContainsChinese(string word) + public bool ContainsChinese(string word) { if (!_settings.ShouldUsePinyin) { @@ -135,7 +171,7 @@ namespace Wox.Infrastructure return chinese; } - private static string[] Combination(string[] array1, string[] array2) + private string[] Combination(string[] array1, string[] array2) { if (!_settings.ShouldUsePinyin) { diff --git a/Wox.Infrastructure/FuzzyMatcher.cs b/Wox.Infrastructure/FuzzyMatcher.cs index ddd26be832..49520e19d2 100644 --- a/Wox.Infrastructure/FuzzyMatcher.cs +++ b/Wox.Infrastructure/FuzzyMatcher.cs @@ -16,7 +16,7 @@ namespace Wox.Infrastructure public static FuzzyMatcher Create(string query) { - return new FuzzyMatcher(query, StringMatcher.DefaultMatchOption); + return new FuzzyMatcher(query, new MatchOption()); } public static FuzzyMatcher Create(string query, MatchOption opt) @@ -26,7 +26,7 @@ namespace Wox.Infrastructure public MatchResult Evaluate(string str) { - return StringMatcher.FuzzySearch(query, str, opt); + return StringMatcher.Instance.FuzzyMatch(query, str, opt); } } } diff --git a/Wox.Infrastructure/StringMatcher.cs b/Wox.Infrastructure/StringMatcher.cs index d1cc1fdecb..442c1e9ddb 100644 --- a/Wox.Infrastructure/StringMatcher.cs +++ b/Wox.Infrastructure/StringMatcher.cs @@ -1,43 +1,46 @@ using System; using System.Collections.Generic; +using System.ComponentModel; using System.Linq; -using System.Text; -using Wox.Infrastructure.Logger; -using Wox.Infrastructure.UserSettings; using static Wox.Infrastructure.StringMatcher; namespace Wox.Infrastructure { - public static class StringMatcher + public class StringMatcher { - public static MatchOption DefaultMatchOption = new MatchOption(); + private readonly MatchOption _defaultMatchOption = new MatchOption(); - public static SearchPrecisionScore UserSettingSearchPrecision { get; set; } + public SearchPrecisionScore UserSettingSearchPrecision { get; set; } - public static bool ShouldUsePinyin { get; set; } + private readonly IAlphabet _alphabet; + + public StringMatcher(IAlphabet alphabet = null) + { + _alphabet = alphabet; + } + + public static StringMatcher Instance { get; internal set; } [Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")] public static int Score(string source, string target) { - if (!string.IsNullOrEmpty(source) && !string.IsNullOrEmpty(target)) - { - return FuzzySearch(target, source, DefaultMatchOption).Score; - } - else - { - return 0; - } + return FuzzySearch(target, source).Score; } [Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")] public static bool IsMatch(string source, string target) { - return FuzzySearch(target, source, DefaultMatchOption).Score > 0; + return Score(source, target) > 0; } public static MatchResult FuzzySearch(string query, string stringToCompare) { - return FuzzySearch(query, stringToCompare, DefaultMatchOption); + return Instance.FuzzyMatch(query, stringToCompare); + } + + public MatchResult FuzzyMatch(string query, string stringToCompare) + { + return FuzzyMatch(query, stringToCompare, _defaultMatchOption); } /// @@ -51,12 +54,18 @@ namespace Wox.Infrastructure /// 6. Move onto the next substring's characters until all substrings are checked. /// 7. Consider success and move onto scoring if every char or substring without whitespaces matched /// - public static MatchResult FuzzySearch(string query, string stringToCompare, MatchOption opt) + public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt) { - if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) return new MatchResult { Success = false }; + if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) return new MatchResult (false, UserSettingSearchPrecision); query = query.Trim(); + if (_alphabet != null) + { + query = _alphabet.Translate(query); + stringToCompare = _alphabet.Translate(stringToCompare); + } + var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare; var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query; @@ -139,19 +148,11 @@ namespace Wox.Infrastructure if (allQuerySubstringsMatched) { var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex, lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString); - var pinyinScore = ScoreForPinyin(stringToCompare, query); - var result = new MatchResult - { - Success = true, - MatchData = indexList, - RawScore = Math.Max(score, pinyinScore) - }; - - return result; + return new MatchResult(true, UserSettingSearchPrecision, indexList, score); } - return new MatchResult { Success = false }; + return new MatchResult (false, UserSettingSearchPrecision); } private static bool AllPreviousCharsMatched(int startIndexToVerify, int currentQuerySubstringCharacterIndex, @@ -224,46 +225,28 @@ namespace Wox.Infrastructure Low = 20, None = 0 } - - public static int ScoreForPinyin(string source, string target) - { - if (!ShouldUsePinyin) - { - return 0; - } - - if (!string.IsNullOrEmpty(source) && !string.IsNullOrEmpty(target)) - { - if (Alphabet.ContainsChinese(source)) - { - var combination = Alphabet.PinyinComination(source); - var pinyinScore = combination - .Select(pinyin => FuzzySearch(target, string.Join("", pinyin)).Score) - .Max(); - var acronymScore = combination.Select(Alphabet.Acronym) - .Select(pinyin => FuzzySearch(target, pinyin).Score) - .Max(); - var score = Math.Max(pinyinScore, acronymScore); - return score; - } - else - { - return 0; - } - } - else - { - return 0; - } - } } public class MatchResult { + public MatchResult(bool success, SearchPrecisionScore searchPrecision) + { + Success = success; + SearchPrecision = searchPrecision; + } + + public MatchResult(bool success, SearchPrecisionScore searchPrecision, List matchData, int rawScore) + { + Success = success; + SearchPrecision = searchPrecision; + MatchData = matchData; + RawScore = rawScore; + } + public bool Success { get; set; } /// - /// The final score of the match result with all search precision filters applied. + /// The final score of the match result with search precision filters applied. /// public int Score { get; private set; } @@ -278,7 +261,7 @@ namespace Wox.Infrastructure set { _rawScore = value; - Score = ApplySearchPrecisionFilter(_rawScore); + Score = ScoreAfterSearchPrecisionFilter(_rawScore); } } @@ -287,19 +270,21 @@ namespace Wox.Infrastructure /// public List MatchData { get; set; } + public SearchPrecisionScore SearchPrecision { get; set; } + public bool IsSearchPrecisionScoreMet() { - return IsSearchPrecisionScoreMet(Score); + return IsSearchPrecisionScoreMet(_rawScore); } - private bool IsSearchPrecisionScoreMet(int score) + private bool IsSearchPrecisionScoreMet(int rawScore) { - return score >= (int)UserSettingSearchPrecision; + return rawScore >= (int)SearchPrecision; } - private int ApplySearchPrecisionFilter(int score) + private int ScoreAfterSearchPrecisionFilter(int rawScore) { - return IsSearchPrecisionScoreMet(score) ? score : 0; + return IsSearchPrecisionScoreMet(rawScore) ? rawScore : 0; } } @@ -319,4 +304,4 @@ namespace Wox.Infrastructure public bool IgnoreCase { get; set; } = true; } -} \ No newline at end of file +} diff --git a/Wox.Infrastructure/UserSettings/Settings.cs b/Wox.Infrastructure/UserSettings/Settings.cs index b11ec069ca..73e13a8386 100644 --- a/Wox.Infrastructure/UserSettings/Settings.cs +++ b/Wox.Infrastructure/UserSettings/Settings.cs @@ -21,19 +21,11 @@ namespace Wox.Infrastructure.UserSettings public string ResultFontWeight { get; set; } public string ResultFontStretch { get; set; } + /// /// when false Alphabet static service will always return empty results /// - private bool _shouldUsePinyin = true; - public bool ShouldUsePinyin - { - get { return _shouldUsePinyin; } - set - { - _shouldUsePinyin = value; - StringMatcher.ShouldUsePinyin = value; - } - } + public bool ShouldUsePinyin { get; set; } = true; internal StringMatcher.SearchPrecisionScore QuerySearchPrecision { get; private set; } = StringMatcher.SearchPrecisionScore.Regular; @@ -49,14 +41,14 @@ namespace Wox.Infrastructure.UserSettings .Parse(typeof(StringMatcher.SearchPrecisionScore), value); QuerySearchPrecision = precisionScore; - StringMatcher.UserSettingSearchPrecision = precisionScore; + StringMatcher.Instance.UserSettingSearchPrecision = precisionScore; } catch (ArgumentException e) { Logger.Log.Exception(nameof(Settings), "Failed to load QuerySearchPrecisionString value from Settings file", e); QuerySearchPrecision = StringMatcher.SearchPrecisionScore.Regular; - StringMatcher.UserSettingSearchPrecision = StringMatcher.SearchPrecisionScore.Regular; + StringMatcher.Instance.UserSettingSearchPrecision = StringMatcher.SearchPrecisionScore.Regular; throw; } diff --git a/Wox.Test/FuzzyMatcherTest.cs b/Wox.Test/FuzzyMatcherTest.cs index f2de39a289..011f050ac1 100644 --- a/Wox.Test/FuzzyMatcherTest.cs +++ b/Wox.Test/FuzzyMatcherTest.cs @@ -57,12 +57,13 @@ namespace Wox.Test }; var results = new List(); + var matcher = new StringMatcher(); foreach (var str in sources) { results.Add(new Result { Title = str, - Score = StringMatcher.FuzzySearch("inst", str).RawScore + Score = matcher.FuzzyMatch("inst", str).RawScore }); } @@ -78,8 +79,8 @@ namespace Wox.Test public void WhenGivenNotAllCharactersFoundInSearchStringThenShouldReturnZeroScore(string searchString) { var compareString = "Can have rum only in my glass"; - - var scoreResult = StringMatcher.FuzzySearch(searchString, compareString).RawScore; + var matcher = new StringMatcher(); + var scoreResult = matcher.FuzzyMatch(searchString, compareString).RawScore; Assert.True(scoreResult == 0); } @@ -93,13 +94,13 @@ namespace Wox.Test public void WhenGivenStringsAndAppliedPrecisionFilteringThenShouldReturnGreaterThanPrecisionScoreResults(string searchTerm) { var results = new List(); - + var matcher = new StringMatcher(); foreach (var str in GetSearchStrings()) { results.Add(new Result { Title = str, - Score = StringMatcher.FuzzySearch(searchTerm, str).Score + Score = matcher.FuzzyMatch(searchTerm, str).Score }); } @@ -131,7 +132,8 @@ namespace Wox.Test public void WhenGivenQueryStringThenShouldReturnCurrentScoring(string queryString, string compareString, int expectedScore) { // When, Given - var rawScore = StringMatcher.FuzzySearch(queryString, compareString).RawScore; + var matcher = new StringMatcher(); + var rawScore = matcher.FuzzyMatch(queryString, compareString).RawScore; // Should Assert.AreEqual(expectedScore, rawScore, $"Expected score for compare string '{compareString}': {expectedScore}, Actual: {rawScore}"); @@ -154,10 +156,10 @@ namespace Wox.Test bool expectedPrecisionResult) { // When - StringMatcher.UserSettingSearchPrecision = expectedPrecisionScore; + var matcher = new StringMatcher {UserSettingSearchPrecision = expectedPrecisionScore}; // Given - var matchResult = StringMatcher.FuzzySearch(queryString, compareString); + var matchResult = matcher.FuzzyMatch(queryString, compareString); Debug.WriteLine(""); Debug.WriteLine("###############################################"); @@ -200,10 +202,10 @@ namespace Wox.Test bool expectedPrecisionResult) { // When - StringMatcher.UserSettingSearchPrecision = expectedPrecisionScore; + var matcher = new StringMatcher { UserSettingSearchPrecision = expectedPrecisionScore }; // Given - var matchResult = StringMatcher.FuzzySearch(queryString, compareString); + var matchResult = matcher.FuzzyMatch(queryString, compareString); Debug.WriteLine(""); Debug.WriteLine("###############################################"); diff --git a/Wox/App.xaml.cs b/Wox/App.xaml.cs index 6d6716a866..0dbd50abd5 100644 --- a/Wox/App.xaml.cs +++ b/Wox/App.xaml.cs @@ -26,6 +26,8 @@ namespace Wox private MainViewModel _mainVM; private SettingWindowViewModel _settingsVM; private readonly Updater _updater = new Updater(Wox.Properties.Settings.Default.GithubRepo); + private readonly Alphabet _alphabet = new Alphabet(); + private StringMatcher _stringMatcher; [STAThread] public static void Main() @@ -54,15 +56,14 @@ namespace Wox _settingsVM = new SettingWindowViewModel(_updater); _settings = _settingsVM.Settings; - Alphabet.Initialize(_settings); - - StringMatcher.UserSettingSearchPrecision = _settings.QuerySearchPrecision; - StringMatcher.ShouldUsePinyin = _settings.ShouldUsePinyin; + _alphabet.Initialize(_settings); + StringMatcher.Instance = new StringMatcher(_alphabet); + _stringMatcher.UserSettingSearchPrecision = _settings.QuerySearchPrecision; PluginManager.LoadPlugins(_settings.PluginSettings); _mainVM = new MainViewModel(_settings); var window = new MainWindow(_settings, _mainVM); - API = new PublicAPIInstance(_settingsVM, _mainVM); + API = new PublicAPIInstance(_settingsVM, _mainVM, _alphabet); PluginManager.InitializePlugins(API); Log.Info($"|App.OnStartup|Dependencies Info:{ErrorReporting.DependenciesInfo()}"); @@ -158,13 +159,7 @@ namespace Wox // but if sessionending is not called, exit won't be called when log off / shutdown if (!_disposed) { - _mainVM.Save(); - _settingsVM.Save(); - - PluginManager.Save(); - ImageLoader.Save(); - Alphabet.Save(); - + API.SaveAppAllSettings(); _disposed = true; } } diff --git a/Wox/PublicAPIInstance.cs b/Wox/PublicAPIInstance.cs index 4bcc825309..ab198d6bf1 100644 --- a/Wox/PublicAPIInstance.cs +++ b/Wox/PublicAPIInstance.cs @@ -20,16 +20,17 @@ namespace Wox { private readonly SettingWindowViewModel _settingsVM; private readonly MainViewModel _mainVM; + private readonly Alphabet _alphabet; #region Constructor - public PublicAPIInstance(SettingWindowViewModel settingsVM, MainViewModel mainVM) + public PublicAPIInstance(SettingWindowViewModel settingsVM, MainViewModel mainVM, Alphabet alphabet) { _settingsVM = settingsVM; _mainVM = mainVM; + _alphabet = alphabet; GlobalHotkey.Instance.hookedKeyboardCallback += KListener_hookedKeyboardCallback; WebRequest.RegisterPrefix("data", new DataWebRequestFactory()); - } #endregion @@ -70,7 +71,7 @@ namespace Wox _settingsVM.Save(); PluginManager.Save(); ImageLoader.Save(); - Alphabet.Save(); + _alphabet.Save(); } public void ReloadAllPluginData()