From 6ee326beb4e7fd1958560cd1508668cb749bcf08 Mon Sep 17 00:00:00 2001 From: Joseph Finney Date: Mon, 11 Sep 2023 04:32:49 -0500 Subject: [PATCH] [TextExtractor]Version 2.0 with table scan and many QOL improvements (#28358) * Remove unsed code and organize. * If clicking a word hide the overlay to do the OCR Code formatting * spelling * Add ResultTable * Result Table working * Spelling Fixes * Initial Get UX for new Overlay UX working * Basic top buttons working and loading correctly * use Single Line and Table Toggle buttons when doing OCR * Code style and error and warning removal * Dispose Wrapping Stream as much as possible * Fix spelling catches * Use WPF UI 3.0.0-Preview.4 * Revert changes to ColorPicker UI * Add Settings DeepLink * Use Accent Color where possible * Remove redundant button styles, fix table click event * Fix disposing too early --- .github/actions/spell-check/expect.txt | 2 + src/modules/PowerOCR/PowerOCR/App.xaml | 14 +- src/modules/PowerOCR/PowerOCR/App.xaml.cs | 11 +- .../PowerOCR/PowerOCR/Helpers/ImageMethods.cs | 118 ++- .../PowerOCR/Helpers/OcrExtensions.cs | 53 ++ .../PowerOCR/Helpers/StringHelpers.cs | 44 ++ .../PowerOCR/Helpers/WindowUtilities.cs | 54 +- .../PowerOCR/PowerOCR/Models/ResultColumn.cs | 16 + .../PowerOCR/PowerOCR/Models/ResultRow.cs | 16 + .../PowerOCR/PowerOCR/Models/ResultTable.cs | 642 ++++++++++++++++ .../PowerOCR/PowerOCR/Models/WordBorder.cs | 42 ++ src/modules/PowerOCR/PowerOCR/OCROverlay.xaml | 140 +++- .../PowerOCR/PowerOCR/OCROverlay.xaml.cs | 289 ++++++-- src/modules/PowerOCR/PowerOCR/PowerOCR.csproj | 1 + .../PowerOCR/Settings/UserSettings.cs | 4 +- .../PowerOCR/Styles/ButtonStyles.xaml | 695 ++++++++++++++++++ .../PowerOCR/PowerOCR/Styles/Colors.xaml | 6 + 17 files changed, 1987 insertions(+), 160 deletions(-) create mode 100644 src/modules/PowerOCR/PowerOCR/Helpers/StringHelpers.cs create mode 100644 src/modules/PowerOCR/PowerOCR/Models/ResultColumn.cs create mode 100644 src/modules/PowerOCR/PowerOCR/Models/ResultRow.cs create mode 100644 src/modules/PowerOCR/PowerOCR/Models/ResultTable.cs create mode 100644 src/modules/PowerOCR/PowerOCR/Models/WordBorder.cs create mode 100644 src/modules/PowerOCR/PowerOCR/Styles/ButtonStyles.xaml create mode 100644 src/modules/PowerOCR/PowerOCR/Styles/Colors.xaml diff --git a/.github/actions/spell-check/expect.txt b/.github/actions/spell-check/expect.txt index 373fd2e106..c7c72631cc 100644 --- a/.github/actions/spell-check/expect.txt +++ b/.github/actions/spell-check/expect.txt @@ -784,6 +784,7 @@ IFACEMETHODIMP IFile IFilter IGraphics +IGT iid Iindex IIO @@ -1496,6 +1497,7 @@ qit QITAB QITABENT qps +Quarternary QUERYENDSESSION QUERYOPEN QUEUESYNC diff --git a/src/modules/PowerOCR/PowerOCR/App.xaml b/src/modules/PowerOCR/PowerOCR/App.xaml index 37b06e87cb..a8dcd7c7a0 100644 --- a/src/modules/PowerOCR/PowerOCR/App.xaml +++ b/src/modules/PowerOCR/PowerOCR/App.xaml @@ -3,8 +3,18 @@ xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:local="clr-namespace:PowerOCR" - ShutdownMode="OnExplicitShutdown" + xmlns:ui="http://schemas.lepo.co/wpfui/2022/xaml" Exit="Application_Exit" + ShutdownMode="OnExplicitShutdown" Startup="Application_Startup"> - + + + + + + + + + + diff --git a/src/modules/PowerOCR/PowerOCR/App.xaml.cs b/src/modules/PowerOCR/PowerOCR/App.xaml.cs index 60c6c03b81..fbc955a796 100644 --- a/src/modules/PowerOCR/PowerOCR/App.xaml.cs +++ b/src/modules/PowerOCR/PowerOCR/App.xaml.cs @@ -6,7 +6,6 @@ using System; using System.Threading; using System.Windows; using ManagedCommon; -using PowerOCR.Helpers; using PowerOCR.Keyboard; using PowerOCR.Settings; @@ -67,10 +66,10 @@ public partial class App : Application, IDisposable { Logger.LogInfo("PowerToys Runner exited. Exiting TextExtractor"); NativeThreadCTS.Cancel(); - Application.Current.Dispatcher.Invoke(() => Shutdown()); + Current.Dispatcher.Invoke(() => Shutdown()); }); var userSettings = new UserSettings(new Helpers.ThrottledActionInvoker()); - eventMonitor = new EventMonitor(Application.Current.Dispatcher, NativeThreadCTS.Token); + eventMonitor = new EventMonitor(Current.Dispatcher, NativeThreadCTS.Token); } catch (Exception ex) { @@ -89,11 +88,7 @@ public partial class App : Application, IDisposable protected override void OnExit(ExitEventArgs e) { - if (_instanceMutex != null) - { - _instanceMutex.ReleaseMutex(); - } - + _instanceMutex?.ReleaseMutex(); base.OnExit(e); } diff --git a/src/modules/PowerOCR/PowerOCR/Helpers/ImageMethods.cs b/src/modules/PowerOCR/PowerOCR/Helpers/ImageMethods.cs index 0d7a3314b7..1e11a83801 100644 --- a/src/modules/PowerOCR/PowerOCR/Helpers/ImageMethods.cs +++ b/src/modules/PowerOCR/PowerOCR/Helpers/ImageMethods.cs @@ -38,19 +38,17 @@ internal sealed class ImageMethods int height = Math.Max(image.Height + 16, minH + 16); // Create a compatible bitmap - Bitmap dest = new(width, height, image.PixelFormat); - using Graphics gd = Graphics.FromImage(dest); + Bitmap destination = new(width, height, image.PixelFormat); + using Graphics gd = Graphics.FromImage(destination); gd.Clear(image.GetPixel(0, 0)); gd.DrawImageUnscaled(image, 8, 8); - return dest; + return destination; } internal static ImageSource GetWindowBoundsImage(Window passedWindow) { - bool isGrabFrame = false; - DpiScale dpi = VisualTreeHelper.GetDpi(passedWindow); int windowWidth = (int)(passedWindow.ActualWidth * dpi.DpiScaleX); int windowHeight = (int)(passedWindow.ActualHeight * dpi.DpiScaleY); @@ -59,14 +57,6 @@ internal sealed class ImageMethods int thisCorrectedLeft = (int)absPosPoint.X; int thisCorrectedTop = (int)absPosPoint.Y; - if (isGrabFrame == true) - { - thisCorrectedLeft += (int)(2 * dpi.DpiScaleX); - thisCorrectedTop += (int)(26 * dpi.DpiScaleY); - windowWidth -= (int)(4 * dpi.DpiScaleX); - windowHeight -= (int)(70 * dpi.DpiScaleY); - } - using Bitmap bmp = new(windowWidth, windowHeight, System.Drawing.Imaging.PixelFormat.Format32bppArgb); using Graphics g = Graphics.FromImage(bmp); @@ -74,19 +64,67 @@ internal sealed class ImageMethods return BitmapToImageSource(bmp); } - internal static async Task GetRegionsText(Window? passedWindow, Rectangle selectedRegion, Language? preferredLanguage) + internal static Bitmap GetWindowBoundsBitmap(Window passedWindow) { - Bitmap bmp = new(selectedRegion.Width, selectedRegion.Height, System.Drawing.Imaging.PixelFormat.Format32bppArgb); + DpiScale dpi = VisualTreeHelper.GetDpi(passedWindow); + int windowWidth = (int)(passedWindow.ActualWidth * dpi.DpiScaleX); + int windowHeight = (int)(passedWindow.ActualHeight * dpi.DpiScaleY); + + System.Windows.Point absPosPoint = passedWindow.GetAbsolutePosition(); + int thisCorrectedLeft = (int)absPosPoint.X; + int thisCorrectedTop = (int)absPosPoint.Y; + + Bitmap bmp = new( + windowWidth, + windowHeight, + System.Drawing.Imaging.PixelFormat.Format32bppArgb); using Graphics g = Graphics.FromImage(bmp); - System.Windows.Point absPosPoint = passedWindow == null ? default(System.Windows.Point) : passedWindow.GetAbsolutePosition(); + g.CopyFromScreen( + thisCorrectedLeft, + thisCorrectedTop, + 0, + 0, + bmp.Size, + CopyPixelOperation.SourceCopy); + + return bmp; + } + + internal static Bitmap GetRegionAsBitmap(Window passedWindow, Rectangle selectedRegion) + { + Bitmap bmp = new( + selectedRegion.Width, + selectedRegion.Height, + System.Drawing.Imaging.PixelFormat.Format32bppArgb); + + using Graphics g = Graphics.FromImage(bmp); + + System.Windows.Point absPosPoint = passedWindow.GetAbsolutePosition(); int thisCorrectedLeft = (int)absPosPoint.X + selectedRegion.Left; int thisCorrectedTop = (int)absPosPoint.Y + selectedRegion.Top; - g.CopyFromScreen(thisCorrectedLeft, thisCorrectedTop, 0, 0, bmp.Size, CopyPixelOperation.SourceCopy); + g.CopyFromScreen( + thisCorrectedLeft, + thisCorrectedTop, + 0, + 0, + bmp.Size, + CopyPixelOperation.SourceCopy); bmp = PadImage(bmp); + return bmp; + } + + internal static async Task GetRegionsText(Window? passedWindow, Rectangle selectedRegion, Language? preferredLanguage) + { + if (passedWindow is null) + { + return string.Empty; + } + + Bitmap bmp = GetRegionAsBitmap(passedWindow, selectedRegion); string? resultText = await ExtractText(bmp, preferredLanguage); return resultText != null ? resultText.Trim() : string.Empty; @@ -104,7 +142,7 @@ internal sealed class ImageMethods g.CopyFromScreen(thisCorrectedLeft, thisCorrectedTop, 0, 0, bmp.Size, CopyPixelOperation.SourceCopy); - System.Windows.Point adjustedPoint = new System.Windows.Point(clickedPoint.X, clickedPoint.Y); + System.Windows.Point adjustedPoint = new(clickedPoint.X, clickedPoint.Y); string resultText = await ExtractText(bmp, preferredLanguage, adjustedPoint); return resultText.Trim(); @@ -145,6 +183,8 @@ internal sealed class ImageMethods OcrEngine ocrEngine = OcrEngine.TryCreateFromLanguage(selectedLanguage); OcrResult ocrResult = await ocrEngine.RecognizeAsync(softwareBmp); + await memoryStream.DisposeAsync(); + await wrappingStream.DisposeAsync(); GC.Collect(); if (singlePoint == null) @@ -156,7 +196,7 @@ internal sealed class ImageMethods } else { - Windows.Foundation.Point fPoint = new Windows.Foundation.Point(singlePoint.Value.X, singlePoint.Value.Y); + Windows.Foundation.Point fPoint = new(singlePoint.Value.X, singlePoint.Value.Y); foreach (OcrLine ocrLine in ocrResult.Lines) { foreach (OcrWord ocrWord in ocrLine.Words) @@ -188,10 +228,8 @@ internal sealed class ImageMethods return text.ToString(); } - else - { - return text.ToString(); - } + + return text.ToString(); } public static Bitmap ScaleBitmapUniform(Bitmap passedBitmap, double scale) @@ -200,18 +238,21 @@ internal sealed class ImageMethods using WrappingStream wrappingStream = new(memoryStream); passedBitmap.Save(wrappingStream, ImageFormat.Bmp); wrappingStream.Position = 0; - BitmapImage bitmapimage = new(); - bitmapimage.BeginInit(); - bitmapimage.StreamSource = wrappingStream; - bitmapimage.CacheOption = BitmapCacheOption.OnLoad; - bitmapimage.EndInit(); - bitmapimage.Freeze(); + BitmapImage bitmapImage = new(); + bitmapImage.BeginInit(); + bitmapImage.StreamSource = wrappingStream; + bitmapImage.CacheOption = BitmapCacheOption.OnLoad; + bitmapImage.EndInit(); + bitmapImage.Freeze(); TransformedBitmap transformedBmp = new(); transformedBmp.BeginInit(); - transformedBmp.Source = bitmapimage; + transformedBmp.Source = bitmapImage; transformedBmp.Transform = new ScaleTransform(scale, scale); transformedBmp.EndInit(); transformedBmp.Freeze(); + + memoryStream.Dispose(); + wrappingStream.Dispose(); GC.Collect(); return BitmapSourceToBitmap(transformedBmp); } @@ -243,14 +284,17 @@ internal sealed class ImageMethods bitmap.Save(wrappingStream, ImageFormat.Bmp); wrappingStream.Position = 0; - BitmapImage bitmapimage = new(); - bitmapimage.BeginInit(); - bitmapimage.StreamSource = wrappingStream; - bitmapimage.CacheOption = BitmapCacheOption.OnLoad; - bitmapimage.EndInit(); - bitmapimage.Freeze(); + BitmapImage bitmapImage = new(); + bitmapImage.BeginInit(); + bitmapImage.StreamSource = wrappingStream; + bitmapImage.CacheOption = BitmapCacheOption.OnLoad; + bitmapImage.EndInit(); + bitmapImage.Freeze(); + + memoryStream.Dispose(); + wrappingStream.Dispose(); GC.Collect(); - return bitmapimage; + return bitmapImage; } public static Language? GetOCRLanguage() diff --git a/src/modules/PowerOCR/PowerOCR/Helpers/OcrExtensions.cs b/src/modules/PowerOCR/PowerOCR/Helpers/OcrExtensions.cs index b506912c3d..3bce062178 100644 --- a/src/modules/PowerOCR/PowerOCR/Helpers/OcrExtensions.cs +++ b/src/modules/PowerOCR/PowerOCR/Helpers/OcrExtensions.cs @@ -2,8 +2,19 @@ // The Microsoft Corporation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; +using System.Collections.Generic; +using System.Drawing; +using System.Drawing.Imaging; +using System.IO; using System.Text; using System.Text.RegularExpressions; +using System.Threading.Tasks; +using System.Windows; +using System.Windows.Media; +using PowerOCR.Models; +using Windows.Globalization; +using Windows.Graphics.Imaging; using Windows.Media.Ocr; namespace PowerOCR.Helpers @@ -50,5 +61,47 @@ namespace PowerOCR.Helpers } } } + + public static async Task GetRegionsTextAsTableAsync(Window passedWindow, Rectangle regionScaled, Language? language) + { + if (language is null) + { + return string.Empty; + } + + Bitmap bmp = ImageMethods.GetRegionAsBitmap(passedWindow, regionScaled); + + bool scaleBMP = true; + + if (bmp.Width * 1.5 > OcrEngine.MaxImageDimension) + { + scaleBMP = false; + } + + using Bitmap scaledBitmap = scaleBMP ? ImageMethods.ScaleBitmapUniform(bmp, 1.5) : ImageMethods.ScaleBitmapUniform(bmp, 1.0); + DpiScale dpiScale = VisualTreeHelper.GetDpi(passedWindow); + + OcrResult ocrResult = await GetOcrResultFromImageAsync(scaledBitmap, language); + List wordBorders = ResultTable.ParseOcrResultIntoWordBorders(ocrResult, dpiScale); + return ResultTable.GetWordsAsTable(wordBorders, dpiScale, LanguageHelper.IsLanguageSpaceJoining(language)); + } + + internal static async Task GetOcrResultFromImageAsync(Bitmap bmp, Language language) + { + await using MemoryStream memoryStream = new(); + using WrappingStream wrappingStream = new(memoryStream); + + bmp.Save(wrappingStream, ImageFormat.Bmp); + wrappingStream.Position = 0; + + BitmapDecoder bmpDecoder = await BitmapDecoder.CreateAsync(wrappingStream.AsRandomAccessStream()); + SoftwareBitmap softwareBmp = await bmpDecoder.GetSoftwareBitmapAsync(); + + await memoryStream.DisposeAsync(); + await wrappingStream.DisposeAsync(); + + OcrEngine ocrEngine = OcrEngine.TryCreateFromLanguage(language); + return await ocrEngine.RecognizeAsync(softwareBmp); + } } } diff --git a/src/modules/PowerOCR/PowerOCR/Helpers/StringHelpers.cs b/src/modules/PowerOCR/PowerOCR/Helpers/StringHelpers.cs new file mode 100644 index 0000000000..92ac1becec --- /dev/null +++ b/src/modules/PowerOCR/PowerOCR/Helpers/StringHelpers.cs @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Text; +using System.Text.RegularExpressions; + +namespace PowerOCR.Helpers; + +internal static class StringHelpers +{ + public static string MakeStringSingleLine(this string textToEdit) + { + if (!textToEdit.Contains('\n') + && !textToEdit.Contains('\r')) + { + return textToEdit; + } + + StringBuilder workingString = new(textToEdit); + + workingString.Replace("\r\n", " "); + workingString.Replace(Environment.NewLine, " "); + workingString.Replace('\n', ' '); + workingString.Replace('\r', ' '); + + Regex regex = new("[ ]{2,}"); + string temp = regex.Replace(workingString.ToString(), " "); + workingString.Clear(); + workingString.Append(temp); + if (workingString[0] == ' ') + { + workingString.Remove(0, 1); + } + + if (workingString[workingString.Length - 1] == ' ') + { + workingString.Remove(workingString.Length - 1, 1); + } + + return workingString.ToString(); + } +} diff --git a/src/modules/PowerOCR/PowerOCR/Helpers/WindowUtilities.cs b/src/modules/PowerOCR/PowerOCR/Helpers/WindowUtilities.cs index 939f987ace..c62f900106 100644 --- a/src/modules/PowerOCR/PowerOCR/Helpers/WindowUtilities.cs +++ b/src/modules/PowerOCR/PowerOCR/Helpers/WindowUtilities.cs @@ -2,11 +2,12 @@ // The Microsoft Corporation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Windows; using System.Windows.Forms; +using System.Windows.Input; using ManagedCommon; using Microsoft.PowerToys.Telemetry; -using PowerOCR.Helpers; namespace PowerOCR.Utilities; @@ -20,33 +21,11 @@ public static class WindowUtilities return; } + Logger.LogInfo($"Adding Overlays for each screen"); foreach (Screen screen in Screen.AllScreens) { - OCROverlay overlay = new OCROverlay() - { - WindowStartupLocation = WindowStartupLocation.Manual, - Width = 200, - Height = 200, - WindowState = WindowState.Normal, - }; - - if (screen.WorkingArea.Left >= 0) - { - overlay.Left = screen.WorkingArea.Left; - } - else - { - overlay.Left = screen.WorkingArea.Left + (screen.WorkingArea.Width / 2); - } - - if (screen.WorkingArea.Top >= 0) - { - overlay.Top = screen.WorkingArea.Top; - } - else - { - overlay.Top = screen.WorkingArea.Top + (screen.WorkingArea.Height / 2); - } + Logger.LogInfo($"screen {screen}"); + OCROverlay overlay = new(screen.Bounds); overlay.Show(); ActivateWindow(overlay); @@ -61,7 +40,7 @@ public static class WindowUtilities foreach (Window window in allWindows) { - if (window is OCROverlay overlay) + if (window is OCROverlay) { return true; } @@ -82,6 +61,8 @@ public static class WindowUtilities } } + GC.Collect(); + // TODO: Decide when to close the process // System.Windows.Application.Current.Shutdown(); } @@ -105,4 +86,23 @@ public static class WindowUtilities OSInterop.SetForegroundWindow(handle); } } + + internal static void OcrOverlayKeyDown(Key key, bool? isActive = null) + { + WindowCollection allWindows = System.Windows.Application.Current.Windows; + + if (key == Key.Escape) + { + PowerToysTelemetry.Log.WriteEvent(new PowerOCR.Telemetry.PowerOCRCancelledEvent()); + CloseAllOCROverlays(); + } + + foreach (Window window in allWindows) + { + if (window is OCROverlay overlay) + { + overlay.KeyPressed(key, isActive); + } + } + } } diff --git a/src/modules/PowerOCR/PowerOCR/Models/ResultColumn.cs b/src/modules/PowerOCR/PowerOCR/Models/ResultColumn.cs new file mode 100644 index 0000000000..a25ba6c137 --- /dev/null +++ b/src/modules/PowerOCR/PowerOCR/Models/ResultColumn.cs @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace PowerOCR.Models; + +public struct ResultColumn +{ + public double Width { get; set; } + + public double Left { get; set; } + + public double Right { get; set; } + + public int ID { get; set; } +} diff --git a/src/modules/PowerOCR/PowerOCR/Models/ResultRow.cs b/src/modules/PowerOCR/PowerOCR/Models/ResultRow.cs new file mode 100644 index 0000000000..12850e6d60 --- /dev/null +++ b/src/modules/PowerOCR/PowerOCR/Models/ResultRow.cs @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace PowerOCR.Models; + +public struct ResultRow +{ + public double Height { get; set; } + + public double Top { get; set; } + + public double Bottom { get; set; } + + public int ID { get; set; } +} diff --git a/src/modules/PowerOCR/PowerOCR/Models/ResultTable.cs b/src/modules/PowerOCR/PowerOCR/Models/ResultTable.cs new file mode 100644 index 0000000000..06d3afa009 --- /dev/null +++ b/src/modules/PowerOCR/PowerOCR/Models/ResultTable.cs @@ -0,0 +1,642 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Drawing; +using System.Linq; +using System.Text; +using System.Windows; +using System.Windows.Controls; +using System.Windows.Media; +using PowerOCR.Helpers; +using Windows.Media.Ocr; +using Rect = System.Windows.Rect; + +namespace PowerOCR.Models; + +public class ResultTable +{ + public List Columns { get; set; } = new(); + + public List Rows { get; set; } = new(); + + public Rect BoundingRect { get; set; } + + public List ColumnLines { get; set; } = new(); + + public List RowLines { get; set; } = new(); + + public Canvas? TableLines { get; set; } + + public ResultTable(ref List wordBorders, DpiScale dpiScale) + { + int borderBuffer = 3; + var leftsMin = wordBorders.Select(x => x.Left).Min(); + var topsMin = wordBorders.Select(x => x.Top).Min(); + var rightsMax = wordBorders.Select(x => x.Right).Max(); + var bottomsMax = wordBorders.Select(x => x.Bottom).Max(); + + Rectangle bordersBorder = new() + { + X = (int)leftsMin - borderBuffer, + Y = (int)topsMin - borderBuffer, + Width = (int)(rightsMax + borderBuffer), + Height = (int)(bottomsMax + borderBuffer), + }; + + bordersBorder.Width = (int)(bordersBorder.Width * dpiScale.DpiScaleX); + bordersBorder.Height = (int)(bordersBorder.Height * dpiScale.DpiScaleY); + + AnalyzeAsTable(wordBorders, bordersBorder); + } + + private void ParseRowAndColumnLines() + { + // Draw Bounding Rect + int topBound = 0; + int bottomBound = topBound; + int leftBound = 0; + int rightBound = leftBound; + + if (Rows.Count >= 1) + { + topBound = (int)Rows[0].Top; + bottomBound = (int)Rows[Rows.Count - 1].Bottom; + } + + if (Columns.Count >= 1) + { + leftBound = (int)Columns[0].Left; + rightBound = (int)Columns[Columns.Count - 1].Right; + } + + BoundingRect = new() + { + Width = (rightBound - leftBound) + 10, + Height = (bottomBound - topBound) + 10, + X = leftBound - 5, + Y = topBound - 5, + }; + + // parse columns + ColumnLines = new(); + + for (int i = 0; i < Columns.Count - 1; i++) + { + int columnMid = (int)(Columns[i].Right + Columns[i + 1].Left) / 2; + ColumnLines.Add(columnMid); + } + + // parse rows + RowLines = new(); + + for (int i = 0; i < Rows.Count - 1; i++) + { + int rowMid = (int)(Rows[i].Bottom + Rows[i + 1].Top) / 2; + RowLines.Add(rowMid); + } + } + + public static List ParseOcrResultIntoWordBorders(OcrResult ocrResult, DpiScale dpi) + { + List wordBorders = new(); + int lineNumber = 0; + + foreach (OcrLine ocrLine in ocrResult.Lines) + { + double top = ocrLine.Words.Select(x => x.BoundingRect.Top).Min(); + double bottom = ocrLine.Words.Select(x => x.BoundingRect.Bottom).Max(); + double left = ocrLine.Words.Select(x => x.BoundingRect.Left).Min(); + double right = ocrLine.Words.Select(x => x.BoundingRect.Right).Max(); + + Rect lineRect = new() + { + X = left, + Y = top, + Width = Math.Abs(right - left), + Height = Math.Abs(bottom - top), + }; + + StringBuilder lineText = new(); + ocrLine.GetTextFromOcrLine(true, lineText); + + WordBorder wordBorderBox = new() + { + Width = lineRect.Width / dpi.DpiScaleX, + Height = lineRect.Height / dpi.DpiScaleY, + Top = lineRect.Y, + Left = lineRect.X, + Word = lineText.ToString().Trim(), + LineNumber = lineNumber, + }; + wordBorders.Add(wordBorderBox); + + lineNumber++; + } + + return wordBorders; + } + + public void AnalyzeAsTable(ICollection wordBorders, Rectangle rectCanvasSize) + { + int hitGridSpacing = 3; + + int numberOfVerticalLines = rectCanvasSize.Width / hitGridSpacing; + int numberOfHorizontalLines = rectCanvasSize.Height / hitGridSpacing; + + Canvas tableIntersectionCanvas = new(); + + List rowAreas = CalculateRowAreas(rectCanvasSize, hitGridSpacing, numberOfHorizontalLines, tableIntersectionCanvas, wordBorders); + List resultRows = CalculateResultRows(hitGridSpacing, rowAreas); + + List columnAreas = CalculateColumnAreas(rectCanvasSize, hitGridSpacing, numberOfVerticalLines, tableIntersectionCanvas, wordBorders); + List resultColumns = CalculateResultColumns(hitGridSpacing, columnAreas); + + Rect tableBoundingRect = new() + { + X = columnAreas.FirstOrDefault(), + Y = rowAreas.FirstOrDefault(), + Width = columnAreas.LastOrDefault() - columnAreas.FirstOrDefault(), + Height = rowAreas.LastOrDefault() - rowAreas.FirstOrDefault(), + }; + + CombineOutliers(wordBorders, resultRows, tableIntersectionCanvas, resultColumns, tableBoundingRect); + + Rows.Clear(); + Rows.AddRange(resultRows); + Columns.Clear(); + Columns.AddRange(resultColumns); + + ParseRowAndColumnLines(); + DrawTable(); + } + + private static List CalculateResultRows(int hitGridSpacing, List rowAreas) + { + List resultRows = new(); + int rowTop = 0; + int rowCount = 0; + for (int i = 0; i < rowAreas.Count; i++) + { + int thisLine = rowAreas[i]; + + // check if should set this as top + if (i == 0) + { + rowTop = thisLine; + } + else if (i - 1 > 0) + { + int prevRow = rowAreas[i - 1]; + if (thisLine - prevRow != hitGridSpacing) + { + rowTop = thisLine; + } + } + + // check to see if at bottom of row + if (i == rowAreas.Count - 1) + { + resultRows.Add(new ResultRow { Top = rowTop, Bottom = thisLine, ID = rowCount }); + rowCount++; + } + else if (i + 1 < rowAreas.Count) + { + int nextRow = rowAreas[i + 1]; + if (nextRow - thisLine != hitGridSpacing) + { + resultRows.Add(new ResultRow { Top = rowTop, Bottom = thisLine, ID = rowCount }); + rowCount++; + } + } + } + + return resultRows; + } + + private static List CalculateRowAreas(Rectangle rectCanvasSize, int hitGridSpacing, int numberOfHorizontalLines, Canvas tableIntersectionCanvas, ICollection wordBorders) + { + List rowAreas = new(); + + for (int i = 0; i < numberOfHorizontalLines; i++) + { + Border horizontalLine = new() + { + Height = 1, + Width = rectCanvasSize.Width, + Opacity = 0, + Background = new SolidColorBrush(Colors.Gray), + }; + Rect horizontalLineRect = new(0, i * hitGridSpacing, horizontalLine.Width, horizontalLine.Height); + _ = tableIntersectionCanvas.Children.Add(horizontalLine); + Canvas.SetTop(horizontalLine, i * 3); + + CheckIntersectionsWithWordBorders(hitGridSpacing, wordBorders, rowAreas, i, horizontalLineRect); + } + + return rowAreas; + } + + private static void CheckIntersectionsWithWordBorders(int hitGridSpacing, ICollection wordBorders, ICollection rowAreas, int i, Rect horizontalLineRect) + { + foreach (WordBorder wb in wordBorders) + { + if (wb.IntersectsWith(horizontalLineRect)) + { + rowAreas.Add(i * hitGridSpacing); + break; + } + } + } + + private static void CombineOutliers(ICollection wordBorders, List resultRows, Canvas tableIntersectionCanvas, List resultColumns, Rect tableBoundingRect) + { + // try 4 times to refine the rows and columns for outliers + // on the fifth time set the word boundary properties + for (int r = 0; r < 5; r++) + { + int outlierThreshold = 2; + List outlierRowIDs = FindOutlierRowIds(wordBorders, resultRows, tableIntersectionCanvas, tableBoundingRect, r, outlierThreshold); + + if (outlierRowIDs.Count > 0) + { + MergeTheseRowIDs(resultRows, outlierRowIDs); + } + + List outlierColumnIDs = FindOutlierColumnIds(wordBorders, tableIntersectionCanvas, resultColumns, tableBoundingRect, outlierThreshold); + + if (outlierColumnIDs.Count > 0 && r != 4) + { + MergeTheseColumnIDs(resultColumns, outlierColumnIDs); + } + } + } + + private static List FindOutlierRowIds( + ICollection wordBorders, + ICollection resultRows, + Canvas tableIntersectionCanvas, + Rect tableBoundingRect, + int r, + int outlierThreshold) + { + List outlierRowIDs = new(); + + foreach (ResultRow row in resultRows) + { + int numberOfIntersectingWords = 0; + Border rowBorder = new() + { + Height = row.Bottom - row.Top, + Width = tableBoundingRect.Width, + Background = new SolidColorBrush(Colors.Red), + Tag = row.ID, + }; + tableIntersectionCanvas.Children.Add(rowBorder); + Canvas.SetLeft(rowBorder, tableBoundingRect.X); + Canvas.SetTop(rowBorder, row.Top); + + Rect rowRect = new(tableBoundingRect.X, row.Top, rowBorder.Width, rowBorder.Height); + + foreach (WordBorder wb in wordBorders) + { + if (wb.IntersectsWith(rowRect)) + { + numberOfIntersectingWords++; + wb.ResultRowID = row.ID; + } + } + + if (numberOfIntersectingWords <= outlierThreshold && r != 4) + { + outlierRowIDs.Add(row.ID); + } + } + + return outlierRowIDs; + } + + private static List FindOutlierColumnIds( + ICollection wordBorders, + Canvas tableIntersectionCanvas, + List resultColumns, + Rect tableBoundingRect, + int outlierThreshold) + { + List outlierColumnIDs = new(); + + foreach (ResultColumn column in resultColumns) + { + int numberOfIntersectingWords = 0; + Border columnBorder = new() + { + Height = tableBoundingRect.Height, + Width = column.Right - column.Left, + Background = new SolidColorBrush(Colors.Blue), + Opacity = 0.2, + Tag = column.ID, + }; + tableIntersectionCanvas.Children.Add(columnBorder); + Canvas.SetLeft(columnBorder, column.Left); + Canvas.SetTop(columnBorder, tableBoundingRect.Y); + + Rect columnRect = new(column.Left, tableBoundingRect.Y, columnBorder.Width, columnBorder.Height); + foreach (WordBorder wb in wordBorders) + { + if (wb.IntersectsWith(columnRect)) + { + numberOfIntersectingWords++; + wb.ResultColumnID = column.ID; + } + } + + if (numberOfIntersectingWords <= outlierThreshold) + { + outlierColumnIDs.Add(column.ID); + } + } + + return outlierColumnIDs; + } + + private static List CalculateResultColumns(int hitGridSpacing, List columnAreas) + { + List resultColumns = new(); + int columnLeft = 0; + int columnCount = 0; + for (int i = 0; i < columnAreas.Count; i++) + { + int thisLine = columnAreas[i]; + + // check if should set this as top + if (i == 0) + { + columnLeft = thisLine; + } + else if (i - 1 > 0) + { + int prevColumn = columnAreas[i - 1]; + if (thisLine - prevColumn != hitGridSpacing) + { + columnLeft = thisLine; + } + } + + // check to see if at last Column + if (i == columnAreas.Count - 1) + { + resultColumns.Add(new ResultColumn { Left = columnLeft, Right = thisLine, ID = columnCount }); + columnCount++; + } + else if (i + 1 < columnAreas.Count) + { + int nextColumn = columnAreas[i + 1]; + if (nextColumn - thisLine != hitGridSpacing) + { + resultColumns.Add(new ResultColumn { Left = columnLeft, Right = thisLine, ID = columnCount }); + columnCount++; + } + } + } + + return resultColumns; + } + + private static List CalculateColumnAreas(Rectangle rectCanvasSize, int hitGridSpacing, int numberOfVerticalLines, Canvas tableIntersectionCanvas, ICollection wordBorders) + { + List columnAreas = new(); + for (int i = 0; i < numberOfVerticalLines; i++) + { + Border vertLine = new() + { + Height = rectCanvasSize.Height, + Width = 1, + Opacity = 0, + Background = new SolidColorBrush(Colors.Gray), + }; + _ = tableIntersectionCanvas.Children.Add(vertLine); + Canvas.SetLeft(vertLine, i * hitGridSpacing); + + Rect vertLineRect = new(i * hitGridSpacing, 0, vertLine.Width, vertLine.Height); + + foreach (WordBorder wb in wordBorders) + { + if (wb.IntersectsWith(vertLineRect)) + { + columnAreas.Add(i * hitGridSpacing); + break; + } + } + } + + return columnAreas; + } + + private static void MergeTheseColumnIDs(List resultColumns, List outlierColumnIDs) + { + for (int i = 0; i < outlierColumnIDs.Count; i++) + { + for (int j = 0; j < resultColumns.Count; j++) + { + ResultColumn column = resultColumns[j]; + if (column.ID == outlierColumnIDs[i]) + { + if (j == 0) + { + // merge with next column if possible + if (j + 1 < resultColumns.Count) + { + ResultColumn nextColumn = resultColumns[j + 1]; + nextColumn.Left = column.Left; + } + } + else if (j == resultColumns.Count - 1) + { + // merge with previous column + if (j - 1 >= 0) + { + ResultColumn prevColumn = resultColumns[j - 1]; + prevColumn.Right = column.Right; + } + } + else + { + // merge with closet column + ResultColumn prevColumn = resultColumns[j - 1]; + ResultColumn nextColumn = resultColumns[j + 1]; + int distanceToPrev = (int)(column.Left - prevColumn.Right); + int distanceToNext = (int)(nextColumn.Left - column.Right); + + if (distanceToNext < distanceToPrev) + { + // merge with next column + nextColumn.Left = column.Left; + } + else + { + // merge with prev column + prevColumn.Right = column.Right; + } + } + + resultColumns.RemoveAt(j); + } + } + } + } + + public static void GetTextFromTabledWordBorders(StringBuilder stringBuilder, List wordBorders, bool isSpaceJoining) + { + List? selectedBorders = wordBorders.Where(w => w.IsSelected).ToList(); + + if (selectedBorders.Count == 0) + { + selectedBorders.AddRange(wordBorders); + } + + List lineList = new(); + int? lastLineNum = 0; + int lastColumnNum = 0; + + if (selectedBorders.FirstOrDefault() != null) + { + lastLineNum = selectedBorders.FirstOrDefault()!.LineNumber; + } + + selectedBorders = selectedBorders.OrderBy(x => x.ResultColumnID).ToList(); + selectedBorders = selectedBorders.OrderBy(x => x.ResultRowID).ToList(); + + int numberOfDistinctRows = selectedBorders.Select(x => x.ResultRowID).Distinct().Count(); + + foreach (WordBorder border in selectedBorders) + { + if (lineList.Count == 0) + { + lastLineNum = border.ResultRowID; + } + + if (border.ResultRowID != lastLineNum) + { + if (isSpaceJoining) + { + stringBuilder.Append(string.Join(' ', lineList)); + } + else + { + stringBuilder.Append(string.Join(string.Empty, lineList)); + } + + stringBuilder.Replace(" \t ", "\t"); + stringBuilder.Replace("\t ", "\t"); + stringBuilder.Replace(" \t", "\t"); + stringBuilder.Append(Environment.NewLine); + lineList.Clear(); + lastLineNum = border.ResultRowID; + } + + if (border.ResultColumnID != lastColumnNum && numberOfDistinctRows > 1) + { + string borderWord = border.Word; + int numberOfOffColumns = border.ResultColumnID - lastColumnNum; + if (numberOfOffColumns < 0) + { + lastColumnNum = 0; + } + + numberOfOffColumns = border.ResultColumnID - lastColumnNum; + + if (numberOfOffColumns > 0) + { + lineList.Add(new string('\t', numberOfOffColumns)); + } + } + + lastColumnNum = border.ResultColumnID; + + lineList.Add(border.Word); + } + + stringBuilder.Append(string.Join(string.Empty, lineList)); + } + + private static void MergeTheseRowIDs(List resultRows, List outlierRowIDs) + { + } + + private void DrawTable() + { + // Draw the lines and bounds of the table + SolidColorBrush tableColor = new(System.Windows.Media.Color.FromArgb(255, 40, 118, 126)); + + TableLines = new Canvas() + { + Tag = "TableLines", + }; + + Border tableOutline = new() + { + Width = this.BoundingRect.Width, + Height = this.BoundingRect.Height, + BorderThickness = new Thickness(3), + BorderBrush = tableColor, + }; + TableLines.Children.Add(tableOutline); + Canvas.SetTop(tableOutline, this.BoundingRect.Y); + Canvas.SetLeft(tableOutline, this.BoundingRect.X); + + foreach (int columnLine in this.ColumnLines) + { + Border vertLine = new() + { + Width = 2, + Height = this.BoundingRect.Height, + Background = tableColor, + }; + TableLines.Children.Add(vertLine); + Canvas.SetTop(vertLine, this.BoundingRect.Y); + Canvas.SetLeft(vertLine, columnLine); + } + + foreach (int rowLine in this.RowLines) + { + Border horizontalLine = new() + { + Height = 2, + Width = this.BoundingRect.Width, + Background = tableColor, + }; + TableLines.Children.Add(horizontalLine); + Canvas.SetTop(horizontalLine, rowLine); + Canvas.SetLeft(horizontalLine, this.BoundingRect.X); + } + } + + public static string GetWordsAsTable(List wordBorders, DpiScale dpiScale, bool isSpaceJoining) + { + List smallerBorders = new(); + foreach (WordBorder originalWB in wordBorders) + { + WordBorder newWB = new() + { + Word = originalWB.Word, + Left = originalWB.Left, + Top = originalWB.Top, + Width = originalWB.Width > 10 ? originalWB.Width - 6 : originalWB.Width, + Height = originalWB.Height > 10 ? originalWB.Height - 6 : originalWB.Height, + ResultRowID = originalWB.ResultRowID, + ResultColumnID = originalWB.ResultColumnID, + }; + smallerBorders.Add(newWB); + } + + ResultTable resultTable = new(ref smallerBorders, dpiScale); + StringBuilder stringBuilder = new(); + GetTextFromTabledWordBorders( + stringBuilder, + smallerBorders, + isSpaceJoining); + return stringBuilder.ToString(); + } +} diff --git a/src/modules/PowerOCR/PowerOCR/Models/WordBorder.cs b/src/modules/PowerOCR/PowerOCR/Models/WordBorder.cs new file mode 100644 index 0000000000..08517837be --- /dev/null +++ b/src/modules/PowerOCR/PowerOCR/Models/WordBorder.cs @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft Corporation +// The Microsoft Corporation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Windows; + +namespace PowerOCR.Models; + +public class WordBorder +{ + public bool IsSelected { get; set; } + + public string Word { get; set; } = string.Empty; + + public double Top { get; set; } + + public double Left { get; set; } + + public double Width { get; set; } + + public double Height { get; set; } + + public int LineNumber { get; set; } + + public double Right => Left + Width; + + public double Bottom => Top + Height; + + public int ResultRowID { get; set; } + + public int ResultColumnID { get; set; } + + public Rect AsRect() + { + return new Rect(Left, Top, Width, Height); + } + + public bool IntersectsWith(Rect rect) + { + return rect.IntersectsWith(AsRect()); + } +} diff --git a/src/modules/PowerOCR/PowerOCR/OCROverlay.xaml b/src/modules/PowerOCR/PowerOCR/OCROverlay.xaml index e026782a25..c22453aae1 100644 --- a/src/modules/PowerOCR/PowerOCR/OCROverlay.xaml +++ b/src/modules/PowerOCR/PowerOCR/OCROverlay.xaml @@ -5,19 +5,40 @@ xmlns:d="http://schemas.microsoft.com/expression/blend/2008" xmlns:local="clr-namespace:PowerOCR" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" + xmlns:ui="http://schemas.lepo.co/wpfui/2022/xaml" Title="TextExtractor" - Width="800" - Height="450" - ShowActivated="False" - ShowInTaskbar="False" + Width="200" + Height="200" AllowsTransparency="True" Background="Transparent" Loaded="Window_Loaded" - Unloaded="Window_Unloaded" ResizeMode="NoResize" + ShowActivated="False" + ShowInTaskbar="False" Topmost="True" + Unloaded="Window_Unloaded" + WindowStartupLocation="Manual" + WindowState="Normal" WindowStyle="None" mc:Ignorable="d"> + + + + + + @@ -45,8 +66,115 @@ Color="Black" /> - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/modules/PowerOCR/PowerOCR/OCROverlay.xaml.cs b/src/modules/PowerOCR/PowerOCR/OCROverlay.xaml.cs index cb6f0b461a..e79d0d3ea4 100644 --- a/src/modules/PowerOCR/PowerOCR/OCROverlay.xaml.cs +++ b/src/modules/PowerOCR/PowerOCR/OCROverlay.xaml.cs @@ -7,8 +7,10 @@ using System.Collections.Generic; using System.Linq; using System.Windows; using System.Windows.Controls; +using System.Windows.Controls.Primitives; using System.Windows.Input; using System.Windows.Media; +using Common.UI; using ManagedCommon; using Microsoft.PowerToys.Telemetry; using PowerOCR.Helpers; @@ -27,37 +29,32 @@ public partial class OCROverlay : Window private bool isShiftDown; private Point clickedPoint; private Point shiftPoint; + private Border selectBorder = new(); + private Language? selectedLanguage; private bool IsSelecting { get; set; } - private Border selectBorder = new(); - - private DpiScale? dpiScale; - - private Point GetMousePos() => PointToScreen(Mouse.GetPosition(this)); - - private Language? selectedLanguage; - private MenuItem cancelMenuItem; - - private System.Windows.Forms.Screen? CurrentScreen - { - get; - set; - } - private double selectLeft; private double selectTop; private double xShiftDelta; private double yShiftDelta; - + private bool isComboBoxReady; private const double ActiveOpacity = 0.4; + private readonly UserSettings userSettings = new(new ThrottledActionInvoker()); - public OCROverlay() + public OCROverlay(System.Drawing.Rectangle screenRectangle) { + Left = screenRectangle.Left >= 0 ? screenRectangle.Left : screenRectangle.Left + (screenRectangle.Width / 2); + Top = screenRectangle.Top >= 0 ? screenRectangle.Top : screenRectangle.Top + (screenRectangle.Height / 2); + InitializeComponent(); - var userSettings = new UserSettings(new ThrottledActionInvoker()); + PopulateLanguageMenu(); + } + + private void PopulateLanguageMenu() + { string? selectedLanguageName = userSettings.PreferredLanguage.Value; // build context menu @@ -68,25 +65,26 @@ public partial class OCROverlay : Window } List possibleOcrLanguages = OcrEngine.AvailableRecognizerLanguages.ToList(); + + int count = 0; + foreach (Language language in possibleOcrLanguages) { MenuItem menuItem = new() { Header = language.NativeName, Tag = language, IsCheckable = true }; menuItem.IsChecked = language.DisplayName.Equals(selectedLanguageName, StringComparison.Ordinal); + LanguagesComboBox.Items.Add(language); if (language.DisplayName.Equals(selectedLanguageName, StringComparison.Ordinal)) { selectedLanguage = language; + LanguagesComboBox.SelectedIndex = count; } menuItem.Click += LanguageMenuItem_Click; CanvasContextMenu.Items.Add(menuItem); + count++; } - CanvasContextMenu.Items.Add(new Separator()); - - // ResourceLoader resourceLoader = ResourceLoader.GetForViewIndependentUse(); // resourceLoader.GetString("TextExtractor_Cancel") - cancelMenuItem = new MenuItem() { Header = "cancel" }; - cancelMenuItem.Click += CancelMenuItem_Click; - CanvasContextMenu.Items.Add(cancelMenuItem); + isComboBoxReady = true; } private void LanguageMenuItem_Click(object sender, RoutedEventArgs e) @@ -101,6 +99,7 @@ public partial class OCROverlay : Window } selectedLanguage = menuItem.Tag as Language; + LanguagesComboBox.SelectedItem = selectedLanguage; } private void Window_Loaded(object sender, RoutedEventArgs e) @@ -112,6 +111,12 @@ public partial class OCROverlay : Window BackgroundImage.Source = ImageMethods.GetWindowBoundsImage(this); BackgroundBrush.Opacity = ActiveOpacity; + + TopButtonsStackPanel.Visibility = Visibility.Visible; + +#if DEBUG + Topmost = false; +#endif } private void Window_Unloaded(object sender, RoutedEventArgs e) @@ -119,9 +124,6 @@ public partial class OCROverlay : Window BackgroundImage.Source = null; BackgroundImage.UpdateLayout(); - CurrentScreen = null; - dpiScale = null; - KeyDown -= MainWindow_KeyDown; KeyUp -= MainWindow_KeyUp; @@ -131,8 +133,6 @@ public partial class OCROverlay : Window RegionClickCanvas.MouseDown -= RegionClickCanvas_MouseDown; RegionClickCanvas.MouseUp -= RegionClickCanvas_MouseUp; RegionClickCanvas.MouseMove -= RegionClickCanvas_MouseMove; - - cancelMenuItem.Click -= CancelMenuItem_Click; } private void MainWindow_KeyUp(object sender, KeyEventArgs e) @@ -151,15 +151,7 @@ public partial class OCROverlay : Window private void MainWindow_KeyDown(object sender, KeyEventArgs e) { - switch (e.Key) - { - case Key.Escape: - WindowUtilities.CloseAllOCROverlays(); - PowerToysTelemetry.Log.WriteEvent(new PowerOCR.Telemetry.PowerOCRCancelledEvent()); - break; - default: - break; - } + WindowUtilities.OcrOverlayKeyDown(e.Key); } private void RegionClickCanvas_MouseDown(object sender, MouseButtonEventArgs e) @@ -169,6 +161,7 @@ public partial class OCROverlay : Window return; } + TopButtonsStackPanel.Visibility = Visibility.Collapsed; RegionClickCanvas.CaptureMouse(); CursorClipper.ClipCursor(this); @@ -176,8 +169,6 @@ public partial class OCROverlay : Window selectBorder.Height = 1; selectBorder.Width = 1; - dpiScale = VisualTreeHelper.GetDpi(this); - try { RegionClickCanvas.Children.Remove(selectBorder); @@ -193,17 +184,6 @@ public partial class OCROverlay : Window Canvas.SetLeft(selectBorder, clickedPoint.X); Canvas.SetTop(selectBorder, clickedPoint.Y); - var screens = System.Windows.Forms.Screen.AllScreens; - System.Drawing.Point formsPoint = new((int)clickedPoint.X, (int)clickedPoint.Y); - foreach (var scr in screens) - { - if (scr.Bounds.Contains(formsPoint)) - { - CurrentScreen = scr; - break; - } - } - IsSelecting = true; } @@ -232,18 +212,6 @@ public partial class OCROverlay : Window double leftValue = selectLeft + xShiftDelta; double topValue = selectTop + yShiftDelta; - if (CurrentScreen is not null && dpiScale is not null) - { - double currentScreenLeft = CurrentScreen.Bounds.Left; // Should always be 0 - double currentScreenRight = CurrentScreen.Bounds.Right / dpiScale.Value.DpiScaleX; - double currentScreenTop = CurrentScreen.Bounds.Top; // Should always be 0 - double currentScreenBottom = CurrentScreen.Bounds.Bottom / dpiScale.Value.DpiScaleY; - - // this is giving issues on different monitors - // leftValue = Math.Clamp(leftValue, currentScreenLeft, currentScreenRight - selectBorder.Width); - // topValue = Math.Clamp(topValue, currentScreenTop, currentScreenBottom - selectBorder.Height); - } - clippingGeometry.Rect = new Rect( new Point(leftValue, topValue), new Size(selectBorder.Width, selectBorder.Height)); @@ -276,14 +244,13 @@ public partial class OCROverlay : Window return; } + TopButtonsStackPanel.Visibility = Visibility.Visible; IsSelecting = false; - CurrentScreen = null; CursorClipper.UnClipCursor(); RegionClickCanvas.ReleaseMouseCapture(); Matrix m = PresentationSource.FromVisual(this).CompositionTarget.TransformToDevice; - Point mPt = GetMousePos(); Point movingPoint = e.GetPosition(this); movingPoint.X *= m.M11; movingPoint.Y *= m.M22; @@ -313,27 +280,47 @@ public partial class OCROverlay : Window if (regionScaled.Width < 3 || regionScaled.Height < 3) { + BackgroundBrush.Opacity = 0; + Logger.LogInfo($"Getting clicked word, {selectedLanguage?.LanguageTag}"); grabbedText = await ImageMethods.GetClickedWord(this, new Point(xDimScaled, yDimScaled), selectedLanguage); } else { - grabbedText = await ImageMethods.GetRegionsText(this, regionScaled, selectedLanguage); + if (TableMenuItem.IsChecked) + { + Logger.LogInfo($"Getting region as table, {selectedLanguage?.LanguageTag}"); + grabbedText = await OcrExtensions.GetRegionsTextAsTableAsync(this, regionScaled, selectedLanguage); + } + else + { + Logger.LogInfo($"Standard region capture, {selectedLanguage?.LanguageTag}"); + grabbedText = await ImageMethods.GetRegionsText(this, regionScaled, selectedLanguage); + + if (SingleLineMenuItem.IsChecked) + { + Logger.LogInfo($"Making grabbed text single line"); + grabbedText = grabbedText.MakeStringSingleLine(); + } + } } - if (string.IsNullOrWhiteSpace(grabbedText) == false) + if (string.IsNullOrWhiteSpace(grabbedText)) { - try - { - Clipboard.SetText(grabbedText); - } - catch (Exception ex) - { - Logger.LogError($"Clipboard.SetText exception: {ex}"); - } - - WindowUtilities.CloseAllOCROverlays(); - PowerToysTelemetry.Log.WriteEvent(new PowerOCR.Telemetry.PowerOCRCaptureEvent()); + BackgroundBrush.Opacity = ActiveOpacity; + return; } + + try + { + Clipboard.SetText(grabbedText); + } + catch (Exception ex) + { + Logger.LogError($"Clipboard.SetText exception: {ex}"); + } + + WindowUtilities.CloseAllOCROverlays(); + PowerToysTelemetry.Log.WriteEvent(new PowerOCR.Telemetry.PowerOCRCaptureEvent()); } private void CancelMenuItem_Click(object sender, RoutedEventArgs e) @@ -341,4 +328,150 @@ public partial class OCROverlay : Window WindowUtilities.CloseAllOCROverlays(); PowerToysTelemetry.Log.WriteEvent(new PowerOCR.Telemetry.PowerOCRCancelledEvent()); } + + private void LanguagesComboBox_SelectionChanged(object sender, SelectionChangedEventArgs e) + { + if (sender is not ComboBox languageComboBox || !isComboBoxReady) + { + return; + } + + // TODO: Set the preferred language based upon what was chosen here + int selection = languageComboBox.SelectedIndex; + selectedLanguage = languageComboBox.SelectedItem as Language; + + Logger.LogError($"Changed language to {selectedLanguage?.LanguageTag}"); + + // Set the language in the context menu + foreach (var item in CanvasContextMenu.Items) + { + if (item is MenuItem menuItemLoop) + { + menuItemLoop.IsChecked = menuItemLoop.Tag as Language == selectedLanguage; + } + } + + switch (selection) + { + case 0: + WindowUtilities.OcrOverlayKeyDown(Key.D1); + break; + case 1: + WindowUtilities.OcrOverlayKeyDown(Key.D2); + break; + case 2: + WindowUtilities.OcrOverlayKeyDown(Key.D3); + break; + case 3: + WindowUtilities.OcrOverlayKeyDown(Key.D4); + break; + case 4: + WindowUtilities.OcrOverlayKeyDown(Key.D5); + break; + case 5: + WindowUtilities.OcrOverlayKeyDown(Key.D6); + break; + case 6: + WindowUtilities.OcrOverlayKeyDown(Key.D7); + break; + case 7: + WindowUtilities.OcrOverlayKeyDown(Key.D8); + break; + case 8: + WindowUtilities.OcrOverlayKeyDown(Key.D9); + break; + default: + break; + } + } + + private void SingleLineMenuItem_Click(object sender, RoutedEventArgs e) + { + bool isActive = CheckIfCheckingOrUnchecking(sender); + WindowUtilities.OcrOverlayKeyDown(Key.S, isActive); + } + + private void TableToggleButton_Click(object sender, RoutedEventArgs e) + { + bool isActive = CheckIfCheckingOrUnchecking(sender); + WindowUtilities.OcrOverlayKeyDown(Key.T, isActive); + } + + private void SettingsMenuItem_Click(object sender, RoutedEventArgs e) + { + WindowUtilities.CloseAllOCROverlays(); + SettingsDeepLink.OpenSettings(SettingsDeepLink.SettingsWindow.PowerOCR, false); + } + + private static bool CheckIfCheckingOrUnchecking(object? sender) + { + if (sender is ToggleButton tb && tb.IsChecked is not null) + { + return tb.IsChecked.Value; + } + + if (sender is MenuItem mi) + { + return mi.IsChecked; + } + + return false; + } + + internal void KeyPressed(Key key, bool? isActive) + { + switch (key) + { + // This case is handled in the WindowUtilities.OcrOverlayKeyDown + // case Key.Escape: + // WindowUtilities.CloseAllFullscreenGrabs(); + // break; + case Key.S: + if (isActive is null) + { + SingleLineMenuItem.IsChecked = !SingleLineMenuItem.IsChecked; + } + else + { + SingleLineMenuItem.IsChecked = isActive.Value; + } + + // Possibly save this in settings later and remember this preference + break; + case Key.T: + if (isActive is null) + { + TableToggleButton.IsChecked = !TableToggleButton.IsChecked; + } + else + { + TableToggleButton.IsChecked = isActive.Value; + } + + break; + case Key.D1: + case Key.D2: + case Key.D3: + case Key.D4: + case Key.D5: + case Key.D6: + case Key.D7: + case Key.D8: + case Key.D9: + int numberPressed = (int)key - 34; // D1 casts to 35, D2 to 36, etc. + int numberOfLanguages = LanguagesComboBox.Items.Count; + + if (numberPressed <= numberOfLanguages + && numberPressed - 1 >= 0 + && numberPressed - 1 != LanguagesComboBox.SelectedIndex + && isComboBoxReady) + { + LanguagesComboBox.SelectedIndex = numberPressed - 1; + } + + break; + default: + break; + } + } } diff --git a/src/modules/PowerOCR/PowerOCR/PowerOCR.csproj b/src/modules/PowerOCR/PowerOCR/PowerOCR.csproj index ddabc2409d..5b59ab9ef8 100644 --- a/src/modules/PowerOCR/PowerOCR/PowerOCR.csproj +++ b/src/modules/PowerOCR/PowerOCR/PowerOCR.csproj @@ -52,6 +52,7 @@ + diff --git a/src/modules/PowerOCR/PowerOCR/Settings/UserSettings.cs b/src/modules/PowerOCR/PowerOCR/Settings/UserSettings.cs index d36bf19faa..d08bc5b020 100644 --- a/src/modules/PowerOCR/PowerOCR/Settings/UserSettings.cs +++ b/src/modules/PowerOCR/PowerOCR/Settings/UserSettings.cs @@ -23,7 +23,7 @@ namespace PowerOCR.Settings private const int SettingsReadOnChangeDelayInMs = 300; private readonly IFileSystemWatcher _watcher; - private readonly object _loadingSettingsLock = new object(); + private readonly object _loadingSettingsLock = new(); [ImportingConstructor] public UserSettings(Helpers.IThrottledActionInvoker throttledActionInvoker) @@ -113,7 +113,7 @@ namespace PowerOCR.Settings // var telemetrySettings = new Telemetry.PowerOcrSettings(properties.VisibleColorFormats) // { // ActivationShortcut = properties.ActivationShortcut.ToString(), - // ActivationBehaviour = properties.ActivationAction.ToString(), + // ActivationBehavior = properties.ActivationAction.ToString(), // ColorFormatForClipboard = properties.CopiedColorRepresentation.ToString(), // ShowColorName = properties.ShowColorName, // }; diff --git a/src/modules/PowerOCR/PowerOCR/Styles/ButtonStyles.xaml b/src/modules/PowerOCR/PowerOCR/Styles/ButtonStyles.xaml new file mode 100644 index 0000000000..fab21fb3ee --- /dev/null +++ b/src/modules/PowerOCR/PowerOCR/Styles/ButtonStyles.xaml @@ -0,0 +1,695 @@ + + + + + + + + + + + + + + M 0,0 L 3.5,4 L 7,0 Z + M 0,4 L 3.5,0 L 7,4 Z + M 0,0 L 4,3.5 L 0,7 Z + F1 M 10.0,1.2 L 4.7,9.1 L 4.5,9.1 L 0,5.2 L 1.3,3.5 L 4.3,6.1L 8.3,0 L 10.0,1.2 Z + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/modules/PowerOCR/PowerOCR/Styles/Colors.xaml b/src/modules/PowerOCR/PowerOCR/Styles/Colors.xaml new file mode 100644 index 0000000000..97fa005753 --- /dev/null +++ b/src/modules/PowerOCR/PowerOCR/Styles/Colors.xaml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file