Remove unused pinyin code (#7455)

* Remove pinyin

* Remove pinyin mappings and files

* Remove pinyin files from MSI
This commit is contained in:
Divyansh Srivastava
2020-10-23 10:34:24 -07:00
committed by GitHub
parent 859e9f8b04
commit ba07b52cc2
11 changed files with 4 additions and 27367 deletions

View File

@@ -1,205 +0,0 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using hyjiacan.util.p4n;
using hyjiacan.util.p4n.format;
using JetBrains.Annotations;
using Wox.Infrastructure.Logger;
using Wox.Infrastructure.Storage;
using Wox.Infrastructure.UserSettings;
namespace Wox.Infrastructure
{
public class Alphabet : IAlphabet
{
private readonly HanyuPinyinOutputFormat _pinyinFormat = new HanyuPinyinOutputFormat();
private ConcurrentDictionary<string, string[][]> _pinyinCache;
private BinaryStorage<Dictionary<string, string[][]>> _pinyinStorage;
private Settings _settings;
public void Initialize([NotNull] Settings settings)
{
_settings = settings ?? throw new ArgumentNullException(nameof(settings));
InitializePinyinHelpers();
}
private void InitializePinyinHelpers()
{
_pinyinFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
Stopwatch.Normal("|Wox.Infrastructure.Alphabet.Initialize|Preload pinyin cache", () =>
{
_pinyinStorage = new BinaryStorage<Dictionary<string, string[][]>>("Pinyin");
SetPinyinCacheAsDictionary(_pinyinStorage.TryLoad(new Dictionary<string, string[][]>()));
// force pinyin library static constructor initialize
PinyinHelper.toHanyuPinyinStringArray('T', _pinyinFormat);
});
Log.Info($"Number of preload pinyin combination<{_pinyinCache.Count}>", GetType());
}
public string Translate(string str)
{
return ConvertChineseCharactersToPinyin(str);
}
public string ConvertChineseCharactersToPinyin(string source)
{
if (!_settings.ShouldUsePinyin)
{
return source;
}
if (string.IsNullOrEmpty(source))
{
return source;
}
if (!ContainsChinese(source))
{
return source;
}
var combination = PinyinCombination(source);
var pinyinArray = combination.Select(x => string.Join(string.Empty, x));
var acronymArray = combination.Select(Acronym).Distinct();
var joinedSingleStringCombination = new StringBuilder();
var all = acronymArray.Concat(pinyinArray);
all.ToList().ForEach(x => joinedSingleStringCombination.Append(x));
return joinedSingleStringCombination.ToString();
}
public void Save()
{
if (!_settings.ShouldUsePinyin)
{
return;
}
_pinyinStorage.Save(GetPinyinCacheAsDictionary());
}
private static readonly string[] _emptyStringArray = new string[0];
private static readonly string[][] _empty2DStringArray = new string[0][];
/// <summary>
/// replace chinese character with pinyin, non chinese character won't be modified
/// <param name="word"> should be word or sentence, instead of single character. e.g. 微软 </param>
/// </summary>
[Obsolete("Not accurate, eg 音乐 will not return yinyue but returns yinle ")]
public string[] Pinyin(string word)
{
if (!_settings.ShouldUsePinyin)
{
return _emptyStringArray;
}
var pinyin = word.Select(c =>
{
var pinyins = PinyinHelper.toHanyuPinyinStringArray(c);
var result = pinyins == null ? c.ToString() : pinyins[0];
return result;
}).ToArray();
return pinyin;
}
/// <summary>
/// replace chinese character with pinyin, non chinese character won't be modified
/// Because we don't have words dictionary, so we can only return all possibly pinyin combination
/// e.g. 音乐 will return yinyue and yinle
/// <param name="characters"> should be word or sentence, instead of single character. e.g. 微软 </param>
/// </summary>
public string[][] PinyinCombination(string characters)
{
if (!_settings.ShouldUsePinyin || string.IsNullOrEmpty(characters))
{
return _empty2DStringArray;
}
if (!_pinyinCache.ContainsKey(characters))
{
var allPinyins = new List<string[]>();
foreach (var c in characters)
{
var pinyins = PinyinHelper.toHanyuPinyinStringArray(c, _pinyinFormat);
if (pinyins != null)
{
var r = pinyins.Distinct().ToArray();
allPinyins.Add(r);
}
else
{
var r = new[] { c.ToString() };
allPinyins.Add(r);
}
}
var combination = allPinyins.Aggregate(Combination).Select(c => c.Split(';')).ToArray();
_pinyinCache[characters] = combination;
return combination;
}
else
{
return _pinyinCache[characters];
}
}
public string Acronym(string[] pinyin)
{
var acronym = string.Join(string.Empty, pinyin.Select(p => p[0]));
return acronym;
}
public bool ContainsChinese(string word)
{
if (!_settings.ShouldUsePinyin)
{
return false;
}
if (word.Length > 40)
{
// Skip strings that are too long string for Pinyin conversion.
return false;
}
var chinese = word.Select(PinyinHelper.toHanyuPinyinStringArray)
.Any(p => p != null);
return chinese;
}
private string[] Combination(string[] array1, string[] array2)
{
if (!_settings.ShouldUsePinyin)
{
return _emptyStringArray;
}
var combination = (
from a1 in array1
from a2 in array2
select $"{a1};{a2}"
).ToArray();
return combination;
}
private Dictionary<string, string[][]> GetPinyinCacheAsDictionary()
{
return new Dictionary<string, string[][]>(_pinyinCache);
}
private void SetPinyinCacheAsDictionary(Dictionary<string, string[][]> usage)
{
_pinyinCache = new ConcurrentDictionary<string, string[][]>(usage);
}
}
}

View File

@@ -1,11 +0,0 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace Wox.Infrastructure
{
public interface IAlphabet
{
string Translate(string stringToTranslate);
}
}

View File

@@ -17,13 +17,6 @@ namespace Wox.Infrastructure
public SearchPrecisionScore UserSettingSearchPrecision { get; set; }
private readonly IAlphabet _alphabet;
public StringMatcher(IAlphabet alphabet = null)
{
_alphabet = alphabet;
}
public static StringMatcher Instance { get; internal set; }
[Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")]
@@ -67,15 +60,7 @@ namespace Wox.Infrastructure
}
query = query.Trim();
if (_alphabet != null)
{
query = _alphabet.Translate(query);
stringToCompare = _alphabet.Translate(stringToCompare);
}
var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare;
var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query;
var querySubstrings = queryWithoutCase.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

View File

@@ -62,11 +62,6 @@ namespace Wox.Infrastructure.UserSettings
public string ResultFontStretch { get; set; }
/// <summary>
/// Gets or sets a value indicating whether when false Alphabet static service will always return empty results
/// </summary>
public bool ShouldUsePinyin { get; set; } = false;
internal StringMatcher.SearchPrecisionScore QuerySearchPrecision { get; private set; } = StringMatcher.SearchPrecisionScore.Regular;
[JsonIgnore]

View File

@@ -58,24 +58,10 @@
<PackageReference Include="JetBrains.Annotations" Version="2020.1.0" />
<PackageReference Include="NLog.Extensions.Logging" Version="1.6.5" />
<PackageReference Include="NLog.Schema" Version="4.7.4" />
<PackageReference Include="Pinyin4DotNet" Version="2016.4.23.4">
<NoWarn>NU1701</NoWarn>
</PackageReference>
<PackageReference Include="System.Drawing.Common" Version="4.7.0" />
<PackageReference Include="System.Runtime" Version="4.3.1" />
</ItemGroup>
<ItemGroup>
<None Update="pinyindb\pinyin_gwoyeu_mapping.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="pinyindb\pinyin_mapping.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="pinyindb\unicode_to_hanyu_pinyin.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<Compile Include="..\..\..\codeAnalysis\GlobalSuppressions.cs">
<Link>GlobalSuppressions.cs</Link>