mirror of
https://github.com/microsoft/PowerToys.git
synced 2026-04-08 20:27:36 +02:00
Improve several small quality of life issues on Text Extractor (#26021)
This commit is contained in:
@@ -10,13 +10,14 @@ using System.Globalization;
|
|||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Text.RegularExpressions;
|
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using System.Windows;
|
using System.Windows;
|
||||||
using System.Windows.Input;
|
using System.Windows.Input;
|
||||||
using System.Windows.Markup;
|
using System.Windows.Markup;
|
||||||
using System.Windows.Media;
|
using System.Windows.Media;
|
||||||
using System.Windows.Media.Imaging;
|
using System.Windows.Media.Imaging;
|
||||||
|
using PowerOCR.Helpers;
|
||||||
|
using PowerOCR.Models;
|
||||||
using Windows.Globalization;
|
using Windows.Globalization;
|
||||||
using Windows.Graphics.Imaging;
|
using Windows.Graphics.Imaging;
|
||||||
using Windows.Media.Ocr;
|
using Windows.Media.Ocr;
|
||||||
@@ -26,6 +27,26 @@ namespace PowerOCR;
|
|||||||
|
|
||||||
internal sealed class ImageMethods
|
internal sealed class ImageMethods
|
||||||
{
|
{
|
||||||
|
internal static Bitmap PadImage(Bitmap image, int minW = 64, int minH = 64)
|
||||||
|
{
|
||||||
|
if (image.Height >= minH && image.Width >= minW)
|
||||||
|
{
|
||||||
|
return image;
|
||||||
|
}
|
||||||
|
|
||||||
|
int width = Math.Max(image.Width + 16, minW + 16);
|
||||||
|
int height = Math.Max(image.Height + 16, minH + 16);
|
||||||
|
|
||||||
|
// Create a compatible bitmap
|
||||||
|
Bitmap dest = new(width, height, image.PixelFormat);
|
||||||
|
using Graphics gd = Graphics.FromImage(dest);
|
||||||
|
|
||||||
|
gd.Clear(image.GetPixel(0, 0));
|
||||||
|
gd.DrawImageUnscaled(image, 8, 8);
|
||||||
|
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
|
||||||
internal static ImageSource GetWindowBoundsImage(Window passedWindow)
|
internal static ImageSource GetWindowBoundsImage(Window passedWindow)
|
||||||
{
|
{
|
||||||
bool isGrabFrame = false;
|
bool isGrabFrame = false;
|
||||||
@@ -46,7 +67,7 @@ internal sealed class ImageMethods
|
|||||||
windowHeight -= (int)(70 * dpi.DpiScaleY);
|
windowHeight -= (int)(70 * dpi.DpiScaleY);
|
||||||
}
|
}
|
||||||
|
|
||||||
using Bitmap bmp = new Bitmap(windowWidth, windowHeight, System.Drawing.Imaging.PixelFormat.Format32bppArgb);
|
using Bitmap bmp = new(windowWidth, windowHeight, System.Drawing.Imaging.PixelFormat.Format32bppArgb);
|
||||||
using Graphics g = Graphics.FromImage(bmp);
|
using Graphics g = Graphics.FromImage(bmp);
|
||||||
|
|
||||||
g.CopyFromScreen(thisCorrectedLeft, thisCorrectedTop, 0, 0, bmp.Size, CopyPixelOperation.SourceCopy);
|
g.CopyFromScreen(thisCorrectedLeft, thisCorrectedTop, 0, 0, bmp.Size, CopyPixelOperation.SourceCopy);
|
||||||
@@ -55,7 +76,7 @@ internal sealed class ImageMethods
|
|||||||
|
|
||||||
internal static async Task<string> GetRegionsText(Window? passedWindow, Rectangle selectedRegion, Language? preferredLanguage)
|
internal static async Task<string> GetRegionsText(Window? passedWindow, Rectangle selectedRegion, Language? preferredLanguage)
|
||||||
{
|
{
|
||||||
using Bitmap bmp = new Bitmap(selectedRegion.Width, selectedRegion.Height, System.Drawing.Imaging.PixelFormat.Format32bppArgb);
|
Bitmap bmp = new(selectedRegion.Width, selectedRegion.Height, System.Drawing.Imaging.PixelFormat.Format32bppArgb);
|
||||||
using Graphics g = Graphics.FromImage(bmp);
|
using Graphics g = Graphics.FromImage(bmp);
|
||||||
|
|
||||||
System.Windows.Point absPosPoint = passedWindow == null ? default(System.Windows.Point) : passedWindow.GetAbsolutePosition();
|
System.Windows.Point absPosPoint = passedWindow == null ? default(System.Windows.Point) : passedWindow.GetAbsolutePosition();
|
||||||
@@ -65,7 +86,7 @@ internal sealed class ImageMethods
|
|||||||
|
|
||||||
g.CopyFromScreen(thisCorrectedLeft, thisCorrectedTop, 0, 0, bmp.Size, CopyPixelOperation.SourceCopy);
|
g.CopyFromScreen(thisCorrectedLeft, thisCorrectedTop, 0, 0, bmp.Size, CopyPixelOperation.SourceCopy);
|
||||||
|
|
||||||
// bmp = PadImage(bmp);
|
bmp = PadImage(bmp);
|
||||||
string? resultText = await ExtractText(bmp, preferredLanguage);
|
string? resultText = await ExtractText(bmp, preferredLanguage);
|
||||||
|
|
||||||
return resultText != null ? resultText.Trim() : string.Empty;
|
return resultText != null ? resultText.Trim() : string.Empty;
|
||||||
@@ -74,7 +95,7 @@ internal sealed class ImageMethods
|
|||||||
internal static async Task<string> GetClickedWord(Window passedWindow, System.Windows.Point clickedPoint, Language? preferredLanguage)
|
internal static async Task<string> GetClickedWord(Window passedWindow, System.Windows.Point clickedPoint, Language? preferredLanguage)
|
||||||
{
|
{
|
||||||
DpiScale dpi = VisualTreeHelper.GetDpi(passedWindow);
|
DpiScale dpi = VisualTreeHelper.GetDpi(passedWindow);
|
||||||
Bitmap bmp = new Bitmap((int)(passedWindow.ActualWidth * dpi.DpiScaleX), (int)(passedWindow.ActualHeight * dpi.DpiScaleY), System.Drawing.Imaging.PixelFormat.Format32bppArgb);
|
Bitmap bmp = new((int)(passedWindow.ActualWidth * dpi.DpiScaleX), (int)(passedWindow.ActualHeight * dpi.DpiScaleY), System.Drawing.Imaging.PixelFormat.Format32bppArgb);
|
||||||
Graphics g = Graphics.FromImage(bmp);
|
Graphics g = Graphics.FromImage(bmp);
|
||||||
|
|
||||||
System.Windows.Point absPosPoint = passedWindow.GetAbsolutePosition();
|
System.Windows.Point absPosPoint = passedWindow.GetAbsolutePosition();
|
||||||
@@ -91,35 +112,17 @@ internal sealed class ImageMethods
|
|||||||
|
|
||||||
public static async Task<string> ExtractText(Bitmap bmp, Language? preferredLanguage, System.Windows.Point? singlePoint = null)
|
public static async Task<string> ExtractText(Bitmap bmp, Language? preferredLanguage, System.Windows.Point? singlePoint = null)
|
||||||
{
|
{
|
||||||
Language? selectedLanguage = preferredLanguage;
|
Language? selectedLanguage = preferredLanguage ?? GetOCRLanguage();
|
||||||
if (selectedLanguage == null)
|
|
||||||
{
|
|
||||||
selectedLanguage = GetOCRLanguage();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (selectedLanguage == null)
|
if (selectedLanguage == null)
|
||||||
{
|
{
|
||||||
return string.Empty;
|
return string.Empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isCJKLang = false;
|
|
||||||
|
|
||||||
if (selectedLanguage.LanguageTag.StartsWith("zh", StringComparison.InvariantCultureIgnoreCase) == true)
|
|
||||||
{
|
|
||||||
isCJKLang = true;
|
|
||||||
}
|
|
||||||
else if (selectedLanguage.LanguageTag.StartsWith("ja", StringComparison.InvariantCultureIgnoreCase) == true)
|
|
||||||
{
|
|
||||||
isCJKLang = true;
|
|
||||||
}
|
|
||||||
else if (selectedLanguage.LanguageTag.StartsWith("ko", StringComparison.InvariantCultureIgnoreCase) == true)
|
|
||||||
{
|
|
||||||
isCJKLang = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
XmlLanguage lang = XmlLanguage.GetLanguage(selectedLanguage.LanguageTag);
|
XmlLanguage lang = XmlLanguage.GetLanguage(selectedLanguage.LanguageTag);
|
||||||
CultureInfo culture = lang.GetEquivalentCulture();
|
CultureInfo culture = lang.GetEquivalentCulture();
|
||||||
|
|
||||||
|
bool isSpaceJoiningLang = LanguageHelper.IsLanguageSpaceJoining(selectedLanguage);
|
||||||
|
|
||||||
bool scaleBMP = true;
|
bool scaleBMP = true;
|
||||||
|
|
||||||
if (singlePoint != null
|
if (singlePoint != null
|
||||||
@@ -129,68 +132,38 @@ internal sealed class ImageMethods
|
|||||||
}
|
}
|
||||||
|
|
||||||
using Bitmap scaledBitmap = scaleBMP ? ScaleBitmapUniform(bmp, 1.5) : ScaleBitmapUniform(bmp, 1.0);
|
using Bitmap scaledBitmap = scaleBMP ? ScaleBitmapUniform(bmp, 1.5) : ScaleBitmapUniform(bmp, 1.0);
|
||||||
StringBuilder text = new StringBuilder();
|
StringBuilder text = new();
|
||||||
|
|
||||||
await using (MemoryStream memory = new MemoryStream())
|
await using MemoryStream memoryStream = new();
|
||||||
|
using WrappingStream wrappingStream = new(memoryStream);
|
||||||
|
|
||||||
|
scaledBitmap.Save(wrappingStream, ImageFormat.Bmp);
|
||||||
|
wrappingStream.Position = 0;
|
||||||
|
BitmapDecoder bmpDecoder = await BitmapDecoder.CreateAsync(wrappingStream.AsRandomAccessStream());
|
||||||
|
SoftwareBitmap softwareBmp = await bmpDecoder.GetSoftwareBitmapAsync();
|
||||||
|
|
||||||
|
OcrEngine ocrEngine = OcrEngine.TryCreateFromLanguage(selectedLanguage);
|
||||||
|
OcrResult ocrResult = await ocrEngine.RecognizeAsync(softwareBmp);
|
||||||
|
|
||||||
|
GC.Collect();
|
||||||
|
|
||||||
|
if (singlePoint == null)
|
||||||
{
|
{
|
||||||
scaledBitmap.Save(memory, ImageFormat.Bmp);
|
foreach (OcrLine ocrLine in ocrResult.Lines)
|
||||||
memory.Position = 0;
|
|
||||||
BitmapDecoder bmpDecoder = await BitmapDecoder.CreateAsync(memory.AsRandomAccessStream());
|
|
||||||
SoftwareBitmap softwareBmp = await bmpDecoder.GetSoftwareBitmapAsync();
|
|
||||||
|
|
||||||
OcrEngine ocrEngine = OcrEngine.TryCreateFromLanguage(selectedLanguage);
|
|
||||||
OcrResult ocrResult = await ocrEngine.RecognizeAsync(softwareBmp);
|
|
||||||
|
|
||||||
if (singlePoint == null)
|
|
||||||
{
|
{
|
||||||
if (isCJKLang == false)
|
ocrLine.GetTextFromOcrLine(isSpaceJoiningLang, text);
|
||||||
{
|
|
||||||
foreach (OcrLine line in ocrResult.Lines)
|
|
||||||
{
|
|
||||||
text.AppendLine(line.Text);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Kanji, Hiragana, Katakana, Hankaku-Katakana do not need blank.(not only the symbol in CJKUnifiedIdeographs).
|
|
||||||
// Maybe there are more symbols that don't require spaces like \u3001 \u3002.
|
|
||||||
// var cjkRegex = new Regex(@"\p{IsCJKUnifiedIdeographs}|\p{IsHiragana}|\p{IsKatakana}|[\uFF61-\uFF9F]|[\u3000-\u3003]");
|
|
||||||
var cjkRegex = new Regex(@"\p{IsCJKUnifiedIdeographs}|\p{IsHiragana}|\p{IsKatakana}|[\uFF61-\uFF9F]");
|
|
||||||
|
|
||||||
foreach (OcrLine ocrLine in ocrResult.Lines)
|
|
||||||
{
|
|
||||||
bool isBeginning = true;
|
|
||||||
bool isCJKPrev = false;
|
|
||||||
foreach (OcrWord ocrWord in ocrLine.Words)
|
|
||||||
{
|
|
||||||
bool isCJK = cjkRegex.IsMatch(ocrWord.Text);
|
|
||||||
|
|
||||||
// Use spaces to separate non-CJK words.
|
|
||||||
if (!isBeginning && (!isCJK || !isCJKPrev))
|
|
||||||
{
|
|
||||||
_ = text.Append(' ');
|
|
||||||
}
|
|
||||||
|
|
||||||
_ = text.Append(ocrWord.Text);
|
|
||||||
isCJKPrev = isCJK;
|
|
||||||
isBeginning = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
text.Append(Environment.NewLine);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Windows.Foundation.Point fPoint = new Windows.Foundation.Point(singlePoint.Value.X, singlePoint.Value.Y);
|
||||||
|
foreach (OcrLine ocrLine in ocrResult.Lines)
|
||||||
{
|
{
|
||||||
Windows.Foundation.Point fPoint = new Windows.Foundation.Point(singlePoint.Value.X, singlePoint.Value.Y);
|
foreach (OcrWord ocrWord in ocrLine.Words)
|
||||||
foreach (OcrLine ocrLine in ocrResult.Lines)
|
|
||||||
{
|
{
|
||||||
foreach (OcrWord ocrWord in ocrLine.Words)
|
if (ocrWord.BoundingRect.Contains(fPoint))
|
||||||
{
|
{
|
||||||
if (ocrWord.BoundingRect.Contains(fPoint))
|
_ = text.Append(ocrWord.Text);
|
||||||
{
|
|
||||||
_ = text.Append(ocrWord.Text);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -205,7 +178,7 @@ internal sealed class ImageMethods
|
|||||||
{
|
{
|
||||||
List<string> wordArray = textLine.Split().ToList();
|
List<string> wordArray = textLine.Split().ToList();
|
||||||
wordArray.Reverse();
|
wordArray.Reverse();
|
||||||
_ = isCJKLang == true ? text.Append(string.Join(string.Empty, wordArray)) : text.Append(string.Join(' ', wordArray));
|
_ = text.Append(string.Join(' ', wordArray));
|
||||||
|
|
||||||
if (textLine.Length > 0)
|
if (textLine.Length > 0)
|
||||||
{
|
{
|
||||||
@@ -223,27 +196,29 @@ internal sealed class ImageMethods
|
|||||||
|
|
||||||
public static Bitmap ScaleBitmapUniform(Bitmap passedBitmap, double scale)
|
public static Bitmap ScaleBitmapUniform(Bitmap passedBitmap, double scale)
|
||||||
{
|
{
|
||||||
using MemoryStream memory = new MemoryStream();
|
using MemoryStream memoryStream = new();
|
||||||
passedBitmap.Save(memory, ImageFormat.Bmp);
|
using WrappingStream wrappingStream = new(memoryStream);
|
||||||
memory.Position = 0;
|
passedBitmap.Save(wrappingStream, ImageFormat.Bmp);
|
||||||
BitmapImage bitmapimage = new BitmapImage();
|
wrappingStream.Position = 0;
|
||||||
|
BitmapImage bitmapimage = new();
|
||||||
bitmapimage.BeginInit();
|
bitmapimage.BeginInit();
|
||||||
bitmapimage.StreamSource = memory;
|
bitmapimage.StreamSource = wrappingStream;
|
||||||
bitmapimage.CacheOption = BitmapCacheOption.OnLoad;
|
bitmapimage.CacheOption = BitmapCacheOption.OnLoad;
|
||||||
bitmapimage.EndInit();
|
bitmapimage.EndInit();
|
||||||
bitmapimage.Freeze();
|
bitmapimage.Freeze();
|
||||||
TransformedBitmap transformedBmp = new TransformedBitmap();
|
TransformedBitmap transformedBmp = new();
|
||||||
transformedBmp.BeginInit();
|
transformedBmp.BeginInit();
|
||||||
transformedBmp.Source = bitmapimage;
|
transformedBmp.Source = bitmapimage;
|
||||||
transformedBmp.Transform = new ScaleTransform(scale, scale);
|
transformedBmp.Transform = new ScaleTransform(scale, scale);
|
||||||
transformedBmp.EndInit();
|
transformedBmp.EndInit();
|
||||||
transformedBmp.Freeze();
|
transformedBmp.Freeze();
|
||||||
|
GC.Collect();
|
||||||
return BitmapSourceToBitmap(transformedBmp);
|
return BitmapSourceToBitmap(transformedBmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Bitmap BitmapSourceToBitmap(BitmapSource source)
|
public static Bitmap BitmapSourceToBitmap(BitmapSource source)
|
||||||
{
|
{
|
||||||
Bitmap bmp = new Bitmap(
|
Bitmap bmp = new(
|
||||||
source.PixelWidth,
|
source.PixelWidth,
|
||||||
source.PixelHeight,
|
source.PixelHeight,
|
||||||
System.Drawing.Imaging.PixelFormat.Format32bppPArgb);
|
System.Drawing.Imaging.PixelFormat.Format32bppPArgb);
|
||||||
@@ -257,21 +232,24 @@ internal sealed class ImageMethods
|
|||||||
data.Height * data.Stride,
|
data.Height * data.Stride,
|
||||||
data.Stride);
|
data.Stride);
|
||||||
bmp.UnlockBits(data);
|
bmp.UnlockBits(data);
|
||||||
|
GC.Collect();
|
||||||
return bmp;
|
return bmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static BitmapImage BitmapToImageSource(Bitmap bitmap)
|
internal static BitmapImage BitmapToImageSource(Bitmap bitmap)
|
||||||
{
|
{
|
||||||
using MemoryStream memory = new MemoryStream();
|
using MemoryStream memoryStream = new();
|
||||||
bitmap.Save(memory, ImageFormat.Bmp);
|
using WrappingStream wrappingStream = new(memoryStream);
|
||||||
memory.Position = 0;
|
|
||||||
BitmapImage bitmapimage = new BitmapImage();
|
bitmap.Save(wrappingStream, ImageFormat.Bmp);
|
||||||
|
wrappingStream.Position = 0;
|
||||||
|
BitmapImage bitmapimage = new();
|
||||||
bitmapimage.BeginInit();
|
bitmapimage.BeginInit();
|
||||||
bitmapimage.StreamSource = memory;
|
bitmapimage.StreamSource = wrappingStream;
|
||||||
bitmapimage.CacheOption = BitmapCacheOption.OnLoad;
|
bitmapimage.CacheOption = BitmapCacheOption.OnLoad;
|
||||||
bitmapimage.EndInit();
|
bitmapimage.EndInit();
|
||||||
bitmapimage.Freeze();
|
bitmapimage.Freeze();
|
||||||
|
GC.Collect();
|
||||||
return bitmapimage;
|
return bitmapimage;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -280,7 +258,7 @@ internal sealed class ImageMethods
|
|||||||
// use currently selected Language
|
// use currently selected Language
|
||||||
string inputLang = InputLanguageManager.Current.CurrentInputLanguage.Name;
|
string inputLang = InputLanguageManager.Current.CurrentInputLanguage.Name;
|
||||||
|
|
||||||
Language? selectedLanguage = new Language(inputLang);
|
Language? selectedLanguage = new(inputLang);
|
||||||
List<Language> possibleOcrLanguages = OcrEngine.AvailableRecognizerLanguages.ToList();
|
List<Language> possibleOcrLanguages = OcrEngine.AvailableRecognizerLanguages.ToList();
|
||||||
|
|
||||||
if (possibleOcrLanguages.Count < 1)
|
if (possibleOcrLanguages.Count < 1)
|
||||||
|
|||||||
26
src/modules/PowerOCR/PowerOCR/Helpers/LanguageHelper.cs
Normal file
26
src/modules/PowerOCR/PowerOCR/Helpers/LanguageHelper.cs
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
// Copyright (c) Microsoft Corporation
|
||||||
|
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||||
|
// See the LICENSE file in the project root for more information.
|
||||||
|
|
||||||
|
using System;
|
||||||
|
using Windows.Globalization;
|
||||||
|
|
||||||
|
namespace PowerOCR.Helpers
|
||||||
|
{
|
||||||
|
internal static class LanguageHelper
|
||||||
|
{
|
||||||
|
public static bool IsLanguageSpaceJoining(Language selectedLanguage)
|
||||||
|
{
|
||||||
|
if (selectedLanguage.LanguageTag.StartsWith("zh", StringComparison.InvariantCultureIgnoreCase))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else if (selectedLanguage.LanguageTag.Equals("ja", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
54
src/modules/PowerOCR/PowerOCR/Helpers/OcrExtensions.cs
Normal file
54
src/modules/PowerOCR/PowerOCR/Helpers/OcrExtensions.cs
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
// Copyright (c) Microsoft Corporation
|
||||||
|
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||||
|
// See the LICENSE file in the project root for more information.
|
||||||
|
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
using Windows.Media.Ocr;
|
||||||
|
|
||||||
|
namespace PowerOCR.Helpers
|
||||||
|
{
|
||||||
|
internal static class OcrExtensions
|
||||||
|
{
|
||||||
|
public static void GetTextFromOcrLine(this OcrLine ocrLine, bool isSpaceJoiningOCRLang, StringBuilder text)
|
||||||
|
{
|
||||||
|
// (when OCR language is zh or ja)
|
||||||
|
// matches words in a space-joining language, which contains:
|
||||||
|
// - one letter that is not in "other letters" (CJK characters are "other letters")
|
||||||
|
// - one number digit
|
||||||
|
// - any words longer than one character
|
||||||
|
// Chinese and Japanese characters are single-character words
|
||||||
|
// when a word is one punctuation/symbol, join it without spaces
|
||||||
|
if (isSpaceJoiningOCRLang)
|
||||||
|
{
|
||||||
|
text.AppendLine(ocrLine.Text);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bool isFirstWord = true;
|
||||||
|
bool isPrevWordSpaceJoining = false;
|
||||||
|
|
||||||
|
Regex regexSpaceJoiningWord = new(@"(^[\p{L}-[\p{Lo}]]|\p{Nd}$)|.{2,}");
|
||||||
|
|
||||||
|
foreach (OcrWord ocrWord in ocrLine.Words)
|
||||||
|
{
|
||||||
|
string wordString = ocrWord.Text;
|
||||||
|
|
||||||
|
bool isThisWordSpaceJoining = regexSpaceJoiningWord.IsMatch(wordString);
|
||||||
|
|
||||||
|
if (isFirstWord || (!isThisWordSpaceJoining && !isPrevWordSpaceJoining))
|
||||||
|
{
|
||||||
|
_ = text.Append(wordString);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_ = text.Append(' ').Append(wordString);
|
||||||
|
}
|
||||||
|
|
||||||
|
isFirstWord = false;
|
||||||
|
isPrevWordSpaceJoining = isThisWordSpaceJoining;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
228
src/modules/PowerOCR/PowerOCR/Helpers/WrappingStream.cs
Normal file
228
src/modules/PowerOCR/PowerOCR/Helpers/WrappingStream.cs
Normal file
@@ -0,0 +1,228 @@
|
|||||||
|
// Copyright (c) Microsoft Corporation
|
||||||
|
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||||
|
// See the LICENSE file in the project root for more information.
|
||||||
|
|
||||||
|
using System;
|
||||||
|
using System.IO;
|
||||||
|
|
||||||
|
namespace PowerOCR.Models;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// A <see cref="Stream"/> that wraps another stream. The major feature of <see cref="WrappingStream"/> is that it does not dispose the
|
||||||
|
/// underlying stream when it is disposed; this is useful when using classes such as <see cref="BinaryReader"/> and
|
||||||
|
/// <see cref="System.Security.Cryptography.CryptoStream"/> that take ownership of the stream passed to their constructors.
|
||||||
|
/// </summary>
|
||||||
|
public class WrappingStream : Stream
|
||||||
|
{
|
||||||
|
private Stream? _streamBase;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Initializes a new instance of the <see cref="WrappingStream"/> class.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="streamBase">The wrapped stream.</param>
|
||||||
|
public WrappingStream(Stream streamBase)
|
||||||
|
{
|
||||||
|
_streamBase = streamBase ?? throw new ArgumentNullException(nameof(streamBase));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets a value indicating whether the current stream supports reading.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns><c>true</c> if the stream supports reading; otherwise, <c>false</c>.</returns>
|
||||||
|
public override bool CanRead => _streamBase != null && _streamBase.CanRead;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets a value indicating whether the current stream supports seeking.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns><c>true</c> if the stream supports seeking; otherwise, <c>false</c>.</returns>
|
||||||
|
public override bool CanSeek => _streamBase != null && _streamBase.CanSeek;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets a value indicating whether the current stream supports writing.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns><c>true</c> if the stream supports writing; otherwise, <c>false</c>.</returns>
|
||||||
|
public override bool CanWrite => _streamBase != null && _streamBase.CanWrite;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets the length in bytes of the stream.
|
||||||
|
/// </summary>
|
||||||
|
public override long Length
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
return _streamBase is not null ? _streamBase.Length : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets or sets the position within the current stream.
|
||||||
|
/// </summary>
|
||||||
|
public override long Position
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
return _streamBase is not null ? _streamBase.Position : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
set
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
if (_streamBase is not null)
|
||||||
|
{
|
||||||
|
_streamBase.Position = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Begins an asynchronous read operation.
|
||||||
|
/// </summary>
|
||||||
|
public override IAsyncResult BeginRead(byte[] buffer, int offset, int count, AsyncCallback? callback, object? state)
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
|
||||||
|
return _streamBase is not null && callback is not null && state is not null
|
||||||
|
? _streamBase.BeginRead(buffer, offset, count, callback, state)
|
||||||
|
: new NullAsyncResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Begins an asynchronous write operation.
|
||||||
|
/// </summary>
|
||||||
|
public override IAsyncResult BeginWrite(byte[] buffer, int offset, int count, AsyncCallback? callback, object? state)
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
|
||||||
|
return _streamBase is not null && callback is not null && state is not null
|
||||||
|
? _streamBase.BeginWrite(buffer, offset, count, callback, state)
|
||||||
|
: new NullAsyncResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Waits for the pending asynchronous read to complete.
|
||||||
|
/// </summary>
|
||||||
|
public override int EndRead(IAsyncResult asyncResult)
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
|
||||||
|
return _streamBase is not null ? _streamBase.EndRead(asyncResult) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Ends an asynchronous write operation.
|
||||||
|
/// </summary>
|
||||||
|
public override void EndWrite(IAsyncResult asyncResult)
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
|
||||||
|
_streamBase?.EndWrite(asyncResult);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Clears all buffers for this stream and causes any buffered data to be written to the underlying device.
|
||||||
|
/// </summary>
|
||||||
|
public override void Flush()
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
|
||||||
|
_streamBase?.Flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Reads a sequence of bytes from the current stream and advances the position
|
||||||
|
/// within the stream by the number of bytes read.
|
||||||
|
/// </summary>
|
||||||
|
public override int Read(byte[] buffer, int offset, int count)
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
|
||||||
|
return _streamBase is not null ? _streamBase.Read(buffer, offset, count) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Reads a byte from the stream and advances the position within the stream by one byte, or returns -1 if at the end of the stream.
|
||||||
|
/// </summary>
|
||||||
|
public override int ReadByte()
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
|
||||||
|
return _streamBase is not null ? _streamBase.ReadByte() : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Sets the position within the current stream.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="offset">A byte offset relative to the <paramref name="origin"/> parameter.</param>
|
||||||
|
/// <param name="origin">A value of type see System.IO.SeekOrigin indicating the reference point used to obtain the new position.</param>
|
||||||
|
/// <returns>The new position within the current stream.</returns>
|
||||||
|
public override long Seek(long offset, SeekOrigin origin)
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
|
||||||
|
return _streamBase is not null ? _streamBase.Seek(offset, origin) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Sets the length of the current stream.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The desired length of the current stream in bytes.</param>
|
||||||
|
public override void SetLength(long value)
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
|
||||||
|
_streamBase?.SetLength(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Writes a sequence of bytes to the current stream and advances the current position
|
||||||
|
/// within this stream by the number of bytes written.
|
||||||
|
/// </summary>
|
||||||
|
public override void Write(byte[] buffer, int offset, int count)
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
|
||||||
|
_streamBase?.Write(buffer, offset, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Writes a byte to the current position in the stream and advances the position within the stream by one byte.
|
||||||
|
/// </summary>
|
||||||
|
public override void WriteByte(byte value)
|
||||||
|
{
|
||||||
|
ThrowIfDisposed();
|
||||||
|
|
||||||
|
_streamBase?.WriteByte(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets the wrapped stream.
|
||||||
|
/// </summary>
|
||||||
|
/// <value>The wrapped stream.</value>
|
||||||
|
protected Stream? WrappedStream => _streamBase;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Releases the unmanaged resources used by the <see cref="WrappingStream"/> and optionally releases the managed resources.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="disposing">true to release both managed and unmanaged resources; false to release only unmanaged resources.</param>
|
||||||
|
protected override void Dispose(bool disposing)
|
||||||
|
{
|
||||||
|
// doesn't close the base stream, but just prevents access to it through this WrappingStream
|
||||||
|
if (disposing)
|
||||||
|
{
|
||||||
|
_streamBase = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
base.Dispose(disposing);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ThrowIfDisposed()
|
||||||
|
{
|
||||||
|
// throws an ObjectDisposedException if this object has been disposed
|
||||||
|
if (_streamBase == null)
|
||||||
|
{
|
||||||
|
throw new ObjectDisposedException(GetType().Name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
19
src/modules/PowerOCR/PowerOCR/Models/NullAsyncResult.cs
Normal file
19
src/modules/PowerOCR/PowerOCR/Models/NullAsyncResult.cs
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
// Copyright (c) Microsoft Corporation
|
||||||
|
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||||
|
// See the LICENSE file in the project root for more information.
|
||||||
|
|
||||||
|
using System;
|
||||||
|
using System.Threading;
|
||||||
|
|
||||||
|
namespace PowerOCR.Models;
|
||||||
|
|
||||||
|
public class NullAsyncResult : IAsyncResult
|
||||||
|
{
|
||||||
|
public object? AsyncState => null;
|
||||||
|
|
||||||
|
public WaitHandle AsyncWaitHandle => new NullWaitHandle();
|
||||||
|
|
||||||
|
public bool CompletedSynchronously => true;
|
||||||
|
|
||||||
|
public bool IsCompleted => true;
|
||||||
|
}
|
||||||
11
src/modules/PowerOCR/PowerOCR/Models/NullWaitHandle.cs
Normal file
11
src/modules/PowerOCR/PowerOCR/Models/NullWaitHandle.cs
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
// Copyright (c) Microsoft Corporation
|
||||||
|
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||||
|
// See the LICENSE file in the project root for more information.
|
||||||
|
|
||||||
|
using System.Threading;
|
||||||
|
|
||||||
|
namespace PowerOCR.Models;
|
||||||
|
|
||||||
|
public class NullWaitHandle : WaitHandle
|
||||||
|
{
|
||||||
|
}
|
||||||
@@ -30,7 +30,7 @@ public partial class OCROverlay : Window
|
|||||||
|
|
||||||
private bool IsSelecting { get; set; }
|
private bool IsSelecting { get; set; }
|
||||||
|
|
||||||
private Border selectBorder = new Border();
|
private Border selectBorder = new();
|
||||||
|
|
||||||
private DpiScale? dpiScale;
|
private DpiScale? dpiScale;
|
||||||
|
|
||||||
@@ -57,7 +57,7 @@ public partial class OCROverlay : Window
|
|||||||
{
|
{
|
||||||
InitializeComponent();
|
InitializeComponent();
|
||||||
|
|
||||||
var userSettings = new UserSettings(new Helpers.ThrottledActionInvoker());
|
var userSettings = new UserSettings(new ThrottledActionInvoker());
|
||||||
string? selectedLanguageName = userSettings.PreferredLanguage.Value;
|
string? selectedLanguageName = userSettings.PreferredLanguage.Value;
|
||||||
|
|
||||||
// build context menu
|
// build context menu
|
||||||
@@ -70,7 +70,7 @@ public partial class OCROverlay : Window
|
|||||||
List<Language> possibleOcrLanguages = OcrEngine.AvailableRecognizerLanguages.ToList();
|
List<Language> possibleOcrLanguages = OcrEngine.AvailableRecognizerLanguages.ToList();
|
||||||
foreach (Language language in possibleOcrLanguages)
|
foreach (Language language in possibleOcrLanguages)
|
||||||
{
|
{
|
||||||
MenuItem menuItem = new MenuItem() { Header = language.NativeName, Tag = language, IsCheckable = true };
|
MenuItem menuItem = new() { Header = language.NativeName, Tag = language, IsCheckable = true };
|
||||||
menuItem.IsChecked = language.DisplayName.Equals(selectedLanguageName, StringComparison.Ordinal);
|
menuItem.IsChecked = language.DisplayName.Equals(selectedLanguageName, StringComparison.Ordinal);
|
||||||
if (language.DisplayName.Equals(selectedLanguageName, StringComparison.Ordinal))
|
if (language.DisplayName.Equals(selectedLanguageName, StringComparison.Ordinal))
|
||||||
{
|
{
|
||||||
@@ -94,9 +94,8 @@ public partial class OCROverlay : Window
|
|||||||
MenuItem menuItem = (MenuItem)sender;
|
MenuItem menuItem = (MenuItem)sender;
|
||||||
foreach (var item in CanvasContextMenu.Items)
|
foreach (var item in CanvasContextMenu.Items)
|
||||||
{
|
{
|
||||||
if (item is MenuItem)
|
if (item is MenuItem menuItemLoop)
|
||||||
{
|
{
|
||||||
MenuItem menuItemLoop = (MenuItem)item;
|
|
||||||
menuItemLoop.IsChecked = item.Equals(menuItem);
|
menuItemLoop.IsChecked = item.Equals(menuItem);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -195,7 +194,7 @@ public partial class OCROverlay : Window
|
|||||||
Canvas.SetTop(selectBorder, clickedPoint.Y);
|
Canvas.SetTop(selectBorder, clickedPoint.Y);
|
||||||
|
|
||||||
var screens = System.Windows.Forms.Screen.AllScreens;
|
var screens = System.Windows.Forms.Screen.AllScreens;
|
||||||
System.Drawing.Point formsPoint = new System.Drawing.Point((int)clickedPoint.X, (int)clickedPoint.Y);
|
System.Drawing.Point formsPoint = new((int)clickedPoint.X, (int)clickedPoint.Y);
|
||||||
foreach (var scr in screens)
|
foreach (var scr in screens)
|
||||||
{
|
{
|
||||||
if (scr.Bounds.Contains(formsPoint))
|
if (scr.Bounds.Contains(formsPoint))
|
||||||
@@ -295,7 +294,7 @@ public partial class OCROverlay : Window
|
|||||||
double xDimScaled = Canvas.GetLeft(selectBorder) * m.M11;
|
double xDimScaled = Canvas.GetLeft(selectBorder) * m.M11;
|
||||||
double yDimScaled = Canvas.GetTop(selectBorder) * m.M22;
|
double yDimScaled = Canvas.GetTop(selectBorder) * m.M22;
|
||||||
|
|
||||||
System.Drawing.Rectangle regionScaled = new System.Drawing.Rectangle(
|
System.Drawing.Rectangle regionScaled = new(
|
||||||
(int)xDimScaled,
|
(int)xDimScaled,
|
||||||
(int)yDimScaled,
|
(int)yDimScaled,
|
||||||
(int)(selectBorder.Width * m.M11),
|
(int)(selectBorder.Width * m.M11),
|
||||||
|
|||||||
Reference in New Issue
Block a user