Merge pull request #112 from theClueless/fuzzyMatchUpdates

Fuzzy match logic update
This commit is contained in:
Jeremy Wu
2020-01-14 08:26:45 +11:00
committed by GitHub
6 changed files with 323 additions and 194 deletions

View File

@@ -47,8 +47,53 @@ namespace Wox.Infrastructure.Logger
return valid;
}
/// <param name="message">example: "|prefix|unprefixed" </param>
public static void Error(string message)
[MethodImpl(MethodImplOptions.Synchronized)]
public static void Exception(string className, string message, System.Exception exception, [CallerMemberName] string methodName = "")
{
if (string.IsNullOrWhiteSpace(className))
{
LogFaultyFormat($"Fail to specify a class name during logging of message: {message ?? "no message entered"}");
}
if (string.IsNullOrWhiteSpace(message))
{ // todo: not sure we really need that
LogFaultyFormat($"Fail to specify a message during logging");
}
if (!string.IsNullOrWhiteSpace(methodName))
{
className += "." + methodName;
}
ExceptionInternal(className, message, exception);
}
private static void ExceptionInternal(string classAndMethod, string message, System.Exception e)
{
var logger = LogManager.GetLogger(classAndMethod);
System.Diagnostics.Debug.WriteLine($"ERROR|{message}");
logger.Error("-------------------------- Begin exception --------------------------");
logger.Error(message);
do
{
logger.Error($"Exception full name:\n <{e.GetType().FullName}>");
logger.Error($"Exception message:\n <{e.Message}>");
logger.Error($"Exception stack trace:\n <{e.StackTrace}>");
logger.Error($"Exception source:\n <{e.Source}>");
logger.Error($"Exception target site:\n <{e.TargetSite}>");
logger.Error($"Exception HResult:\n <{e.HResult}>");
e = e.InnerException;
} while (e != null);
logger.Error("-------------------------- End exception --------------------------");
}
private static void LogInternal(string message, LogLevel level)
{
if (FormatValid(message))
{
@@ -57,8 +102,8 @@ namespace Wox.Infrastructure.Logger
var unprefixed = parts[2];
var logger = LogManager.GetLogger(prefix);
System.Diagnostics.Debug.WriteLine($"ERROR|{message}");
logger.Error(unprefixed);
System.Diagnostics.Debug.WriteLine($"{level.Name}|{message}");
logger.Log(level, unprefixed);
}
else
{
@@ -78,25 +123,7 @@ namespace Wox.Infrastructure.Logger
var parts = message.Split('|');
var prefix = parts[1];
var unprefixed = parts[2];
var logger = LogManager.GetLogger(prefix);
System.Diagnostics.Debug.WriteLine($"ERROR|{message}");
logger.Error("-------------------------- Begin exception --------------------------");
logger.Error(unprefixed);
do
{
logger.Error($"Exception full name:\n <{e.GetType().FullName}>");
logger.Error($"Exception message:\n <{e.Message}>");
logger.Error($"Exception stack trace:\n <{e.StackTrace}>");
logger.Error($"Exception source:\n <{e.Source}>");
logger.Error($"Exception target site:\n <{e.TargetSite}>");
logger.Error($"Exception HResult:\n <{e.HResult}>");
e = e.InnerException;
} while (e != null);
logger.Error("-------------------------- End exception --------------------------");
ExceptionInternal(prefix, unprefixed, e);
}
else
{
@@ -104,62 +131,29 @@ namespace Wox.Infrastructure.Logger
}
#endif
}
/// <param name="message">example: "|prefix|unprefixed" </param>
public static void Error(string message)
{
LogInternal(message, LogLevel.Error);
}
/// <param name="message">example: "|prefix|unprefixed" </param>
public static void Debug(string message)
{
if (FormatValid(message))
{
var parts = message.Split('|');
var prefix = parts[1];
var unprefixed = parts[2];
var logger = LogManager.GetLogger(prefix);
System.Diagnostics.Debug.WriteLine($"DEBUG|{message}");
logger.Debug(unprefixed);
}
else
{
LogFaultyFormat(message);
}
LogInternal(message, LogLevel.Debug);
}
/// <param name="message">example: "|prefix|unprefixed" </param>
public static void Info(string message)
{
if (FormatValid(message))
{
var parts = message.Split('|');
var prefix = parts[1];
var unprefixed = parts[2];
var logger = LogManager.GetLogger(prefix);
System.Diagnostics.Debug.WriteLine($"INFO|{message}");
logger.Info(unprefixed);
}
else
{
LogFaultyFormat(message);
}
LogInternal(message, LogLevel.Info);
}
/// <param name="message">example: "|prefix|unprefixed" </param>
public static void Warn(string message)
{
if (FormatValid(message))
{
var parts = message.Split('|');
var prefix = parts[1];
var unprefixed = parts[2];
var logger = LogManager.GetLogger(prefix);
System.Diagnostics.Debug.WriteLine($"WARN|{message}");
logger.Warn(unprefixed);
}
else
{
LogFaultyFormat(message);
}
LogInternal(message, LogLevel.Warn);
}
}
}

View File

@@ -6,13 +6,14 @@ using Wox.Infrastructure.Logger;
using Wox.Infrastructure.UserSettings;
using static Wox.Infrastructure.StringMatcher;
namespace Wox.Infrastructure
namespace Wox.Infrastructure
{
public static class StringMatcher
{
public static MatchOption DefaultMatchOption = new MatchOption();
public static string UserSettingSearchPrecision { get; set; }
public static SearchPrecisionScore UserSettingSearchPrecision { get; set; }
public static bool ShouldUsePinyin { get; set; }
[Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")]
@@ -40,56 +41,104 @@ namespace Wox.Infrastructure
}
/// <summary>
/// refer to https://github.com/mattyork/fuzzy
/// Current method:
/// Character matching + substring matching;
/// 1. Query search string is split into substrings, separator is whitespace.
/// 2. Check each query substring's characters against full compare string,
/// 3. if a character in the substring is matched, loop back to verify the previous character.
/// 4. If previous character also matches, and is the start of the substring, update list.
/// 5. Once the previous character is verified, move on to the next character in the query substring.
/// 6. Move onto the next substring's characters until all substrings are checked.
/// 7. Consider success and move onto scoring if every char or substring without whitespaces matched
/// </summary>
public static MatchResult FuzzySearch(string query, string stringToCompare, MatchOption opt)
{
if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) return new MatchResult { Success = false };
query = query.Trim();
var len = stringToCompare.Length;
var compareString = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare;
var pattern = opt.IgnoreCase ? query.ToLower() : query;
var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare;
var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query;
var querySubstrings = queryWithoutCase.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
int currentQuerySubstringIndex = 0;
var currentQuerySubstring = querySubstrings[currentQuerySubstringIndex];
var currentQuerySubstringCharacterIndex = 0;
var sb = new StringBuilder(stringToCompare.Length + (query.Length * (opt.Prefix.Length + opt.Suffix.Length)));
var patternIdx = 0;
var firstMatchIndex = -1;
var firstMatchIndexInWord = -1;
var lastMatchIndex = 0;
char ch;
bool allQuerySubstringsMatched = false;
bool matchFoundInPreviousLoop = false;
bool allSubstringsContainedInCompareString = true;
var indexList = new List<int>();
for (var idx = 0; idx < len; idx++)
for (var compareStringIndex = 0; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++)
{
ch = stringToCompare[idx];
if (compareString[idx] == pattern[patternIdx])
if (fullStringToCompareWithoutCase[compareStringIndex] != currentQuerySubstring[currentQuerySubstringCharacterIndex])
{
if (firstMatchIndex < 0)
firstMatchIndex = idx;
lastMatchIndex = idx + 1;
indexList.Add(idx);
sb.Append(opt.Prefix + ch + opt.Suffix);
patternIdx += 1;
}
else
{
sb.Append(ch);
matchFoundInPreviousLoop = false;
continue;
}
// match success, append remain char
if (patternIdx == pattern.Length && (idx + 1) != compareString.Length)
if (firstMatchIndex < 0)
{
sb.Append(stringToCompare.Substring(idx + 1));
break;
// first matched char will become the start of the compared string
firstMatchIndex = compareStringIndex;
}
if (currentQuerySubstringCharacterIndex == 0)
{
// first letter of current word
matchFoundInPreviousLoop = true;
firstMatchIndexInWord = compareStringIndex;
}
else if (!matchFoundInPreviousLoop)
{
// we want to verify that there is not a better match if this is not a full word
// in order to do so we need to verify all previous chars are part of the pattern
var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex;
if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex, fullStringToCompareWithoutCase, currentQuerySubstring))
{
matchFoundInPreviousLoop = true;
// if it's the beginning character of the first query substring that is matched then we need to update start index
firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex;
indexList = GetUpdatedIndexList(startIndexToVerify, currentQuerySubstringCharacterIndex, firstMatchIndexInWord, indexList);
}
}
lastMatchIndex = compareStringIndex + 1;
indexList.Add(compareStringIndex);
currentQuerySubstringCharacterIndex++;
// if finished looping through every character in the current substring
if (currentQuerySubstringCharacterIndex == currentQuerySubstring.Length)
{
// if any of the substrings was not matched then consider as all are not matched
allSubstringsContainedInCompareString = matchFoundInPreviousLoop && allSubstringsContainedInCompareString;
currentQuerySubstringIndex++;
allQuerySubstringsMatched = AllQuerySubstringsMatched(currentQuerySubstringIndex, querySubstrings.Length);
if (allQuerySubstringsMatched)
break;
// otherwise move to the next query substring
currentQuerySubstring = querySubstrings[currentQuerySubstringIndex];
currentQuerySubstringCharacterIndex = 0;
}
}
// return rendered string if we have a match for every char
if (patternIdx == pattern.Length)
// proceed to calculate score if every char or substring without whitespaces matched
if (allQuerySubstringsMatched)
{
var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex, lastMatchIndex - firstMatchIndex);
var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex, lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString);
var pinyinScore = ScoreForPinyin(stringToCompare, query);
var result = new MatchResult
@@ -105,7 +154,44 @@ namespace Wox.Infrastructure
return new MatchResult { Success = false };
}
private static int CalculateSearchScore(string query, string stringToCompare, int firstIndex, int matchLen)
private static bool AllPreviousCharsMatched(int startIndexToVerify, int currentQuerySubstringCharacterIndex,
string fullStringToCompareWithoutCase, string currentQuerySubstring)
{
var allMatch = true;
for (int indexToCheck = 0; indexToCheck < currentQuerySubstringCharacterIndex; indexToCheck++)
{
if (fullStringToCompareWithoutCase[startIndexToVerify + indexToCheck] !=
currentQuerySubstring[indexToCheck])
{
allMatch = false;
}
}
return allMatch;
}
private static List<int> GetUpdatedIndexList(int startIndexToVerify, int currentQuerySubstringCharacterIndex, int firstMatchIndexInWord, List<int> indexList)
{
var updatedList = new List<int>();
indexList.RemoveAll(x => x >= firstMatchIndexInWord);
updatedList.AddRange(indexList);
for (int indexToCheck = 0; indexToCheck < currentQuerySubstringCharacterIndex; indexToCheck++)
{
updatedList.Add(startIndexToVerify + indexToCheck);
}
return updatedList;
}
private static bool AllQuerySubstringsMatched(int currentQuerySubstringIndex, int querySubstringsLength)
{
return currentQuerySubstringIndex >= querySubstringsLength;
}
private static int CalculateSearchScore(string query, string stringToCompare, int firstIndex, int matchLen, bool allSubstringsContainedInCompareString)
{
// A match found near the beginning of a string is scored more than a match found near the end
// A match is scored more if the characters in the patterns are closer to each other,
@@ -122,6 +208,13 @@ namespace Wox.Infrastructure
score += 10;
}
if (allSubstringsContainedInCompareString)
{
int count = query.Count(c => !char.IsWhiteSpace(c));
int factor = count < 4 ? 10 : 5;
score += factor * count;
}
return score;
}
@@ -143,11 +236,11 @@ namespace Wox.Infrastructure
{
if (Alphabet.ContainsChinese(source))
{
var combination = Alphabet.PinyinComination(source);
var combination = Alphabet.PinyinComination(source);
var pinyinScore = combination
.Select(pinyin => FuzzySearch(target, string.Join("", pinyin)).Score)
.Max();
var acronymScore = combination.Select(Alphabet.Acronym)
var acronymScore = combination.Select(Alphabet.Acronym)
.Select(pinyin => FuzzySearch(target, pinyin).Score)
.Max();
var score = Math.Max(pinyinScore, acronymScore);
@@ -162,7 +255,7 @@ namespace Wox.Infrastructure
{
return 0;
}
}
}
}
public class MatchResult
@@ -178,6 +271,7 @@ namespace Wox.Infrastructure
/// The raw calculated search score without any search precision filtering applied.
/// </summary>
private int _rawScore;
public int RawScore
{
get { return _rawScore; }
@@ -200,10 +294,7 @@ namespace Wox.Infrastructure
private bool IsSearchPrecisionScoreMet(int score)
{
var precisionScore = (SearchPrecisionScore)Enum.Parse(
typeof(SearchPrecisionScore),
UserSettingSearchPrecision ?? SearchPrecisionScore.Regular.ToString());
return score >= (int)precisionScore;
return score >= (int)UserSettingSearchPrecision;
}
private int ApplySearchPrecisionFilter(int score)
@@ -214,22 +305,18 @@ namespace Wox.Infrastructure
public class MatchOption
{
public MatchOption()
{
Prefix = "";
Suffix = "";
IgnoreCase = true;
}
/// <summary>
/// prefix of match char, use for hightlight
/// </summary>
public string Prefix { get; set; }
[Obsolete("this is never used")]
public string Prefix { get; set; } = "";
/// <summary>
/// suffix of match char, use for hightlight
/// </summary>
public string Suffix { get; set; }
[Obsolete("this is never used")]
public string Suffix { get; set; } = "";
public bool IgnoreCase { get; set; }
public bool IgnoreCase { get; set; } = true;
}
}
}

View File

@@ -36,14 +36,30 @@ namespace Wox.Infrastructure.UserSettings
}
private string _querySearchPrecision { get; set; } = StringMatcher.SearchPrecisionScore.Regular.ToString();
public string QuerySearchPrecision
internal StringMatcher.SearchPrecisionScore QuerySearchPrecision { get; private set; } = StringMatcher.SearchPrecisionScore.Regular;
public string QuerySearchPrecisionString
{
get { return _querySearchPrecision; }
get { return QuerySearchPrecision.ToString(); }
set
{
_querySearchPrecision = value;
StringMatcher.UserSettingSearchPrecision = value;
try
{
var precisionScore = (StringMatcher.SearchPrecisionScore)Enum
.Parse(typeof(StringMatcher.SearchPrecisionScore), value);
QuerySearchPrecision = precisionScore;
StringMatcher.UserSettingSearchPrecision = precisionScore;
}
catch (ArgumentException e)
{
Logger.Log.Exception(nameof(Settings), "Failed to load QuerySearchPrecisionString value from Settings file", e);
QuerySearchPrecision = StringMatcher.SearchPrecisionScore.Regular;
StringMatcher.UserSettingSearchPrecision = StringMatcher.SearchPrecisionScore.Regular;
throw;
}
}
}