Improve several small quality of life issues on Text Extractor (#26021)

This commit is contained in:
Joseph Finney
2023-06-07 10:07:10 -05:00
committed by GitHub
parent 40335a6998
commit d1d7c74440
7 changed files with 418 additions and 103 deletions

View File

@@ -10,13 +10,14 @@ using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Input;
using System.Windows.Markup;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using PowerOCR.Helpers;
using PowerOCR.Models;
using Windows.Globalization;
using Windows.Graphics.Imaging;
using Windows.Media.Ocr;
@@ -26,6 +27,26 @@ namespace PowerOCR;
internal sealed class ImageMethods
{
internal static Bitmap PadImage(Bitmap image, int minW = 64, int minH = 64)
{
if (image.Height >= minH && image.Width >= minW)
{
return image;
}
int width = Math.Max(image.Width + 16, minW + 16);
int height = Math.Max(image.Height + 16, minH + 16);
// Create a compatible bitmap
Bitmap dest = new(width, height, image.PixelFormat);
using Graphics gd = Graphics.FromImage(dest);
gd.Clear(image.GetPixel(0, 0));
gd.DrawImageUnscaled(image, 8, 8);
return dest;
}
internal static ImageSource GetWindowBoundsImage(Window passedWindow)
{
bool isGrabFrame = false;
@@ -46,7 +67,7 @@ internal sealed class ImageMethods
windowHeight -= (int)(70 * dpi.DpiScaleY);
}
using Bitmap bmp = new Bitmap(windowWidth, windowHeight, System.Drawing.Imaging.PixelFormat.Format32bppArgb);
using Bitmap bmp = new(windowWidth, windowHeight, System.Drawing.Imaging.PixelFormat.Format32bppArgb);
using Graphics g = Graphics.FromImage(bmp);
g.CopyFromScreen(thisCorrectedLeft, thisCorrectedTop, 0, 0, bmp.Size, CopyPixelOperation.SourceCopy);
@@ -55,7 +76,7 @@ internal sealed class ImageMethods
internal static async Task<string> GetRegionsText(Window? passedWindow, Rectangle selectedRegion, Language? preferredLanguage)
{
using Bitmap bmp = new Bitmap(selectedRegion.Width, selectedRegion.Height, System.Drawing.Imaging.PixelFormat.Format32bppArgb);
Bitmap bmp = new(selectedRegion.Width, selectedRegion.Height, System.Drawing.Imaging.PixelFormat.Format32bppArgb);
using Graphics g = Graphics.FromImage(bmp);
System.Windows.Point absPosPoint = passedWindow == null ? default(System.Windows.Point) : passedWindow.GetAbsolutePosition();
@@ -65,7 +86,7 @@ internal sealed class ImageMethods
g.CopyFromScreen(thisCorrectedLeft, thisCorrectedTop, 0, 0, bmp.Size, CopyPixelOperation.SourceCopy);
// bmp = PadImage(bmp);
bmp = PadImage(bmp);
string? resultText = await ExtractText(bmp, preferredLanguage);
return resultText != null ? resultText.Trim() : string.Empty;
@@ -74,7 +95,7 @@ internal sealed class ImageMethods
internal static async Task<string> GetClickedWord(Window passedWindow, System.Windows.Point clickedPoint, Language? preferredLanguage)
{
DpiScale dpi = VisualTreeHelper.GetDpi(passedWindow);
Bitmap bmp = new Bitmap((int)(passedWindow.ActualWidth * dpi.DpiScaleX), (int)(passedWindow.ActualHeight * dpi.DpiScaleY), System.Drawing.Imaging.PixelFormat.Format32bppArgb);
Bitmap bmp = new((int)(passedWindow.ActualWidth * dpi.DpiScaleX), (int)(passedWindow.ActualHeight * dpi.DpiScaleY), System.Drawing.Imaging.PixelFormat.Format32bppArgb);
Graphics g = Graphics.FromImage(bmp);
System.Windows.Point absPosPoint = passedWindow.GetAbsolutePosition();
@@ -91,35 +112,17 @@ internal sealed class ImageMethods
public static async Task<string> ExtractText(Bitmap bmp, Language? preferredLanguage, System.Windows.Point? singlePoint = null)
{
Language? selectedLanguage = preferredLanguage;
if (selectedLanguage == null)
{
selectedLanguage = GetOCRLanguage();
}
Language? selectedLanguage = preferredLanguage ?? GetOCRLanguage();
if (selectedLanguage == null)
{
return string.Empty;
}
bool isCJKLang = false;
if (selectedLanguage.LanguageTag.StartsWith("zh", StringComparison.InvariantCultureIgnoreCase) == true)
{
isCJKLang = true;
}
else if (selectedLanguage.LanguageTag.StartsWith("ja", StringComparison.InvariantCultureIgnoreCase) == true)
{
isCJKLang = true;
}
else if (selectedLanguage.LanguageTag.StartsWith("ko", StringComparison.InvariantCultureIgnoreCase) == true)
{
isCJKLang = true;
}
XmlLanguage lang = XmlLanguage.GetLanguage(selectedLanguage.LanguageTag);
CultureInfo culture = lang.GetEquivalentCulture();
bool isSpaceJoiningLang = LanguageHelper.IsLanguageSpaceJoining(selectedLanguage);
bool scaleBMP = true;
if (singlePoint != null
@@ -129,68 +132,38 @@ internal sealed class ImageMethods
}
using Bitmap scaledBitmap = scaleBMP ? ScaleBitmapUniform(bmp, 1.5) : ScaleBitmapUniform(bmp, 1.0);
StringBuilder text = new StringBuilder();
StringBuilder text = new();
await using (MemoryStream memory = new MemoryStream())
await using MemoryStream memoryStream = new();
using WrappingStream wrappingStream = new(memoryStream);
scaledBitmap.Save(wrappingStream, ImageFormat.Bmp);
wrappingStream.Position = 0;
BitmapDecoder bmpDecoder = await BitmapDecoder.CreateAsync(wrappingStream.AsRandomAccessStream());
SoftwareBitmap softwareBmp = await bmpDecoder.GetSoftwareBitmapAsync();
OcrEngine ocrEngine = OcrEngine.TryCreateFromLanguage(selectedLanguage);
OcrResult ocrResult = await ocrEngine.RecognizeAsync(softwareBmp);
GC.Collect();
if (singlePoint == null)
{
scaledBitmap.Save(memory, ImageFormat.Bmp);
memory.Position = 0;
BitmapDecoder bmpDecoder = await BitmapDecoder.CreateAsync(memory.AsRandomAccessStream());
SoftwareBitmap softwareBmp = await bmpDecoder.GetSoftwareBitmapAsync();
OcrEngine ocrEngine = OcrEngine.TryCreateFromLanguage(selectedLanguage);
OcrResult ocrResult = await ocrEngine.RecognizeAsync(softwareBmp);
if (singlePoint == null)
foreach (OcrLine ocrLine in ocrResult.Lines)
{
if (isCJKLang == false)
{
foreach (OcrLine line in ocrResult.Lines)
{
text.AppendLine(line.Text);
}
}
else
{
// Kanji, Hiragana, Katakana, Hankaku-Katakana do not need blank.(not only the symbol in CJKUnifiedIdeographs).
// Maybe there are more symbols that don't require spaces like \u3001 \u3002.
// var cjkRegex = new Regex(@"\p{IsCJKUnifiedIdeographs}|\p{IsHiragana}|\p{IsKatakana}|[\uFF61-\uFF9F]|[\u3000-\u3003]");
var cjkRegex = new Regex(@"\p{IsCJKUnifiedIdeographs}|\p{IsHiragana}|\p{IsKatakana}|[\uFF61-\uFF9F]");
foreach (OcrLine ocrLine in ocrResult.Lines)
{
bool isBeginning = true;
bool isCJKPrev = false;
foreach (OcrWord ocrWord in ocrLine.Words)
{
bool isCJK = cjkRegex.IsMatch(ocrWord.Text);
// Use spaces to separate non-CJK words.
if (!isBeginning && (!isCJK || !isCJKPrev))
{
_ = text.Append(' ');
}
_ = text.Append(ocrWord.Text);
isCJKPrev = isCJK;
isBeginning = false;
}
text.Append(Environment.NewLine);
}
}
ocrLine.GetTextFromOcrLine(isSpaceJoiningLang, text);
}
else
}
else
{
Windows.Foundation.Point fPoint = new Windows.Foundation.Point(singlePoint.Value.X, singlePoint.Value.Y);
foreach (OcrLine ocrLine in ocrResult.Lines)
{
Windows.Foundation.Point fPoint = new Windows.Foundation.Point(singlePoint.Value.X, singlePoint.Value.Y);
foreach (OcrLine ocrLine in ocrResult.Lines)
foreach (OcrWord ocrWord in ocrLine.Words)
{
foreach (OcrWord ocrWord in ocrLine.Words)
if (ocrWord.BoundingRect.Contains(fPoint))
{
if (ocrWord.BoundingRect.Contains(fPoint))
{
_ = text.Append(ocrWord.Text);
}
_ = text.Append(ocrWord.Text);
}
}
}
@@ -205,7 +178,7 @@ internal sealed class ImageMethods
{
List<string> wordArray = textLine.Split().ToList();
wordArray.Reverse();
_ = isCJKLang == true ? text.Append(string.Join(string.Empty, wordArray)) : text.Append(string.Join(' ', wordArray));
_ = text.Append(string.Join(' ', wordArray));
if (textLine.Length > 0)
{
@@ -223,27 +196,29 @@ internal sealed class ImageMethods
public static Bitmap ScaleBitmapUniform(Bitmap passedBitmap, double scale)
{
using MemoryStream memory = new MemoryStream();
passedBitmap.Save(memory, ImageFormat.Bmp);
memory.Position = 0;
BitmapImage bitmapimage = new BitmapImage();
using MemoryStream memoryStream = new();
using WrappingStream wrappingStream = new(memoryStream);
passedBitmap.Save(wrappingStream, ImageFormat.Bmp);
wrappingStream.Position = 0;
BitmapImage bitmapimage = new();
bitmapimage.BeginInit();
bitmapimage.StreamSource = memory;
bitmapimage.StreamSource = wrappingStream;
bitmapimage.CacheOption = BitmapCacheOption.OnLoad;
bitmapimage.EndInit();
bitmapimage.Freeze();
TransformedBitmap transformedBmp = new TransformedBitmap();
TransformedBitmap transformedBmp = new();
transformedBmp.BeginInit();
transformedBmp.Source = bitmapimage;
transformedBmp.Transform = new ScaleTransform(scale, scale);
transformedBmp.EndInit();
transformedBmp.Freeze();
GC.Collect();
return BitmapSourceToBitmap(transformedBmp);
}
public static Bitmap BitmapSourceToBitmap(BitmapSource source)
{
Bitmap bmp = new Bitmap(
Bitmap bmp = new(
source.PixelWidth,
source.PixelHeight,
System.Drawing.Imaging.PixelFormat.Format32bppPArgb);
@@ -257,21 +232,24 @@ internal sealed class ImageMethods
data.Height * data.Stride,
data.Stride);
bmp.UnlockBits(data);
GC.Collect();
return bmp;
}
internal static BitmapImage BitmapToImageSource(Bitmap bitmap)
{
using MemoryStream memory = new MemoryStream();
bitmap.Save(memory, ImageFormat.Bmp);
memory.Position = 0;
BitmapImage bitmapimage = new BitmapImage();
using MemoryStream memoryStream = new();
using WrappingStream wrappingStream = new(memoryStream);
bitmap.Save(wrappingStream, ImageFormat.Bmp);
wrappingStream.Position = 0;
BitmapImage bitmapimage = new();
bitmapimage.BeginInit();
bitmapimage.StreamSource = memory;
bitmapimage.StreamSource = wrappingStream;
bitmapimage.CacheOption = BitmapCacheOption.OnLoad;
bitmapimage.EndInit();
bitmapimage.Freeze();
GC.Collect();
return bitmapimage;
}
@@ -280,7 +258,7 @@ internal sealed class ImageMethods
// use currently selected Language
string inputLang = InputLanguageManager.Current.CurrentInputLanguage.Name;
Language? selectedLanguage = new Language(inputLang);
Language? selectedLanguage = new(inputLang);
List<Language> possibleOcrLanguages = OcrEngine.AvailableRecognizerLanguages.ToList();
if (possibleOcrLanguages.Count < 1)

View File

@@ -0,0 +1,26 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using Windows.Globalization;
namespace PowerOCR.Helpers
{
internal static class LanguageHelper
{
public static bool IsLanguageSpaceJoining(Language selectedLanguage)
{
if (selectedLanguage.LanguageTag.StartsWith("zh", StringComparison.InvariantCultureIgnoreCase))
{
return false;
}
else if (selectedLanguage.LanguageTag.Equals("ja", StringComparison.OrdinalIgnoreCase))
{
return false;
}
return true;
}
}
}

View File

@@ -0,0 +1,54 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Text;
using System.Text.RegularExpressions;
using Windows.Media.Ocr;
namespace PowerOCR.Helpers
{
internal static class OcrExtensions
{
public static void GetTextFromOcrLine(this OcrLine ocrLine, bool isSpaceJoiningOCRLang, StringBuilder text)
{
// (when OCR language is zh or ja)
// matches words in a space-joining language, which contains:
// - one letter that is not in "other letters" (CJK characters are "other letters")
// - one number digit
// - any words longer than one character
// Chinese and Japanese characters are single-character words
// when a word is one punctuation/symbol, join it without spaces
if (isSpaceJoiningOCRLang)
{
text.AppendLine(ocrLine.Text);
}
else
{
bool isFirstWord = true;
bool isPrevWordSpaceJoining = false;
Regex regexSpaceJoiningWord = new(@"(^[\p{L}-[\p{Lo}]]|\p{Nd}$)|.{2,}");
foreach (OcrWord ocrWord in ocrLine.Words)
{
string wordString = ocrWord.Text;
bool isThisWordSpaceJoining = regexSpaceJoiningWord.IsMatch(wordString);
if (isFirstWord || (!isThisWordSpaceJoining && !isPrevWordSpaceJoining))
{
_ = text.Append(wordString);
}
else
{
_ = text.Append(' ').Append(wordString);
}
isFirstWord = false;
isPrevWordSpaceJoining = isThisWordSpaceJoining;
}
}
}
}
}

View File

@@ -0,0 +1,228 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.IO;
namespace PowerOCR.Models;
/// <summary>
/// A <see cref="Stream"/> that wraps another stream. The major feature of <see cref="WrappingStream"/> is that it does not dispose the
/// underlying stream when it is disposed; this is useful when using classes such as <see cref="BinaryReader"/> and
/// <see cref="System.Security.Cryptography.CryptoStream"/> that take ownership of the stream passed to their constructors.
/// </summary>
public class WrappingStream : Stream
{
private Stream? _streamBase;
/// <summary>
/// Initializes a new instance of the <see cref="WrappingStream"/> class.
/// </summary>
/// <param name="streamBase">The wrapped stream.</param>
public WrappingStream(Stream streamBase)
{
_streamBase = streamBase ?? throw new ArgumentNullException(nameof(streamBase));
}
/// <summary>
/// Gets a value indicating whether the current stream supports reading.
/// </summary>
/// <returns><c>true</c> if the stream supports reading; otherwise, <c>false</c>.</returns>
public override bool CanRead => _streamBase != null && _streamBase.CanRead;
/// <summary>
/// Gets a value indicating whether the current stream supports seeking.
/// </summary>
/// <returns><c>true</c> if the stream supports seeking; otherwise, <c>false</c>.</returns>
public override bool CanSeek => _streamBase != null && _streamBase.CanSeek;
/// <summary>
/// Gets a value indicating whether the current stream supports writing.
/// </summary>
/// <returns><c>true</c> if the stream supports writing; otherwise, <c>false</c>.</returns>
public override bool CanWrite => _streamBase != null && _streamBase.CanWrite;
/// <summary>
/// Gets the length in bytes of the stream.
/// </summary>
public override long Length
{
get
{
ThrowIfDisposed();
return _streamBase is not null ? _streamBase.Length : 0;
}
}
/// <summary>
/// Gets or sets the position within the current stream.
/// </summary>
public override long Position
{
get
{
ThrowIfDisposed();
return _streamBase is not null ? _streamBase.Position : 0;
}
set
{
ThrowIfDisposed();
if (_streamBase is not null)
{
_streamBase.Position = value;
}
}
}
/// <summary>
/// Begins an asynchronous read operation.
/// </summary>
public override IAsyncResult BeginRead(byte[] buffer, int offset, int count, AsyncCallback? callback, object? state)
{
ThrowIfDisposed();
return _streamBase is not null && callback is not null && state is not null
? _streamBase.BeginRead(buffer, offset, count, callback, state)
: new NullAsyncResult();
}
/// <summary>
/// Begins an asynchronous write operation.
/// </summary>
public override IAsyncResult BeginWrite(byte[] buffer, int offset, int count, AsyncCallback? callback, object? state)
{
ThrowIfDisposed();
return _streamBase is not null && callback is not null && state is not null
? _streamBase.BeginWrite(buffer, offset, count, callback, state)
: new NullAsyncResult();
}
/// <summary>
/// Waits for the pending asynchronous read to complete.
/// </summary>
public override int EndRead(IAsyncResult asyncResult)
{
ThrowIfDisposed();
return _streamBase is not null ? _streamBase.EndRead(asyncResult) : 0;
}
/// <summary>
/// Ends an asynchronous write operation.
/// </summary>
public override void EndWrite(IAsyncResult asyncResult)
{
ThrowIfDisposed();
_streamBase?.EndWrite(asyncResult);
}
/// <summary>
/// Clears all buffers for this stream and causes any buffered data to be written to the underlying device.
/// </summary>
public override void Flush()
{
ThrowIfDisposed();
_streamBase?.Flush();
}
/// <summary>
/// Reads a sequence of bytes from the current stream and advances the position
/// within the stream by the number of bytes read.
/// </summary>
public override int Read(byte[] buffer, int offset, int count)
{
ThrowIfDisposed();
return _streamBase is not null ? _streamBase.Read(buffer, offset, count) : 0;
}
/// <summary>
/// Reads a byte from the stream and advances the position within the stream by one byte, or returns -1 if at the end of the stream.
/// </summary>
public override int ReadByte()
{
ThrowIfDisposed();
return _streamBase is not null ? _streamBase.ReadByte() : 0;
}
/// <summary>
/// Sets the position within the current stream.
/// </summary>
/// <param name="offset">A byte offset relative to the <paramref name="origin"/> parameter.</param>
/// <param name="origin">A value of type see System.IO.SeekOrigin indicating the reference point used to obtain the new position.</param>
/// <returns>The new position within the current stream.</returns>
public override long Seek(long offset, SeekOrigin origin)
{
ThrowIfDisposed();
return _streamBase is not null ? _streamBase.Seek(offset, origin) : 0;
}
/// <summary>
/// Sets the length of the current stream.
/// </summary>
/// <param name="value">The desired length of the current stream in bytes.</param>
public override void SetLength(long value)
{
ThrowIfDisposed();
_streamBase?.SetLength(value);
}
/// <summary>
/// Writes a sequence of bytes to the current stream and advances the current position
/// within this stream by the number of bytes written.
/// </summary>
public override void Write(byte[] buffer, int offset, int count)
{
ThrowIfDisposed();
_streamBase?.Write(buffer, offset, count);
}
/// <summary>
/// Writes a byte to the current position in the stream and advances the position within the stream by one byte.
/// </summary>
public override void WriteByte(byte value)
{
ThrowIfDisposed();
_streamBase?.WriteByte(value);
}
/// <summary>
/// Gets the wrapped stream.
/// </summary>
/// <value>The wrapped stream.</value>
protected Stream? WrappedStream => _streamBase;
/// <summary>
/// Releases the unmanaged resources used by the <see cref="WrappingStream"/> and optionally releases the managed resources.
/// </summary>
/// <param name="disposing">true to release both managed and unmanaged resources; false to release only unmanaged resources.</param>
protected override void Dispose(bool disposing)
{
// doesn't close the base stream, but just prevents access to it through this WrappingStream
if (disposing)
{
_streamBase = null;
}
base.Dispose(disposing);
}
private void ThrowIfDisposed()
{
// throws an ObjectDisposedException if this object has been disposed
if (_streamBase == null)
{
throw new ObjectDisposedException(GetType().Name);
}
}
}

View File

@@ -0,0 +1,19 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Threading;
namespace PowerOCR.Models;
public class NullAsyncResult : IAsyncResult
{
public object? AsyncState => null;
public WaitHandle AsyncWaitHandle => new NullWaitHandle();
public bool CompletedSynchronously => true;
public bool IsCompleted => true;
}

View File

@@ -0,0 +1,11 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Threading;
namespace PowerOCR.Models;
public class NullWaitHandle : WaitHandle
{
}

View File

@@ -30,7 +30,7 @@ public partial class OCROverlay : Window
private bool IsSelecting { get; set; }
private Border selectBorder = new Border();
private Border selectBorder = new();
private DpiScale? dpiScale;
@@ -57,7 +57,7 @@ public partial class OCROverlay : Window
{
InitializeComponent();
var userSettings = new UserSettings(new Helpers.ThrottledActionInvoker());
var userSettings = new UserSettings(new ThrottledActionInvoker());
string? selectedLanguageName = userSettings.PreferredLanguage.Value;
// build context menu
@@ -70,7 +70,7 @@ public partial class OCROverlay : Window
List<Language> possibleOcrLanguages = OcrEngine.AvailableRecognizerLanguages.ToList();
foreach (Language language in possibleOcrLanguages)
{
MenuItem menuItem = new MenuItem() { Header = language.NativeName, Tag = language, IsCheckable = true };
MenuItem menuItem = new() { Header = language.NativeName, Tag = language, IsCheckable = true };
menuItem.IsChecked = language.DisplayName.Equals(selectedLanguageName, StringComparison.Ordinal);
if (language.DisplayName.Equals(selectedLanguageName, StringComparison.Ordinal))
{
@@ -94,9 +94,8 @@ public partial class OCROverlay : Window
MenuItem menuItem = (MenuItem)sender;
foreach (var item in CanvasContextMenu.Items)
{
if (item is MenuItem)
if (item is MenuItem menuItemLoop)
{
MenuItem menuItemLoop = (MenuItem)item;
menuItemLoop.IsChecked = item.Equals(menuItem);
}
}
@@ -195,7 +194,7 @@ public partial class OCROverlay : Window
Canvas.SetTop(selectBorder, clickedPoint.Y);
var screens = System.Windows.Forms.Screen.AllScreens;
System.Drawing.Point formsPoint = new System.Drawing.Point((int)clickedPoint.X, (int)clickedPoint.Y);
System.Drawing.Point formsPoint = new((int)clickedPoint.X, (int)clickedPoint.Y);
foreach (var scr in screens)
{
if (scr.Bounds.Contains(formsPoint))
@@ -295,7 +294,7 @@ public partial class OCROverlay : Window
double xDimScaled = Canvas.GetLeft(selectBorder) * m.M11;
double yDimScaled = Canvas.GetTop(selectBorder) * m.M22;
System.Drawing.Rectangle regionScaled = new System.Drawing.Rectangle(
System.Drawing.Rectangle regionScaled = new(
(int)xDimScaled,
(int)yDimScaled,
(int)(selectBorder.Width * m.M11),