mirror of
https://github.com/microsoft/PowerToys.git
synced 2026-02-01 17:00:11 +01:00
Compare commits
2 Commits
issue/3300
...
issue/4435
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fb19bb9966 | ||
|
|
33142a556c |
@@ -0,0 +1,167 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using System.Reflection;
|
||||
using AdvancedPaste.Helpers;
|
||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||
|
||||
namespace AdvancedPaste.UnitTests.HelpersTests;
|
||||
|
||||
[TestClass]
|
||||
public sealed class MarkdownHelperTests
|
||||
{
|
||||
/// <summary>
|
||||
/// Helper method to invoke the private CleanHtml method for testing.
|
||||
/// </summary>
|
||||
private static string InvokeCleanHtml(string html)
|
||||
{
|
||||
var methodInfo = typeof(MarkdownHelper).GetMethod("CleanHtml", BindingFlags.NonPublic | BindingFlags.Static);
|
||||
return (string)methodInfo!.Invoke(null, [html])!;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Helper method to invoke the private ConvertHtmlToMarkdown method for testing.
|
||||
/// </summary>
|
||||
private static string InvokeConvertHtmlToMarkdown(string html)
|
||||
{
|
||||
var methodInfo = typeof(MarkdownHelper).GetMethod("ConvertHtmlToMarkdown", BindingFlags.NonPublic | BindingFlags.Static);
|
||||
return (string)methodInfo!.Invoke(null, [html])!;
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void CleanHtml_GoogleSheetsWrapper_RemovesWrapperPreservesTable()
|
||||
{
|
||||
// Arrange - Google Sheets HTML with wrapper element
|
||||
const string googleSheetsHtml = @"<google-sheets-html-origin>
|
||||
<style type=""text/css""><!--td {border: 1px solid #cccccc;}--></style>
|
||||
<table xmlns=""http://www.w3.org/1999/xhtml"" cellspacing=""0"" cellpadding=""0"" dir=""ltr"" border=""1"" data-sheets-root=""1"">
|
||||
<colgroup><col width=""100""><col width=""100""></colgroup>
|
||||
<tbody><tr><td>A</td><td>B</td></tr><tr><td>1</td><td>2</td></tr></tbody>
|
||||
</table>
|
||||
</google-sheets-html-origin>";
|
||||
|
||||
// Act
|
||||
string cleanedHtml = InvokeCleanHtml(googleSheetsHtml);
|
||||
|
||||
// Assert - wrapper and style should be removed, table preserved
|
||||
Assert.IsFalse(cleanedHtml.Contains("google-sheets-html-origin"), "Google Sheets wrapper should be removed");
|
||||
Assert.IsFalse(cleanedHtml.Contains("<style"), "Style element should be removed");
|
||||
Assert.IsFalse(cleanedHtml.Contains("<colgroup"), "Colgroup element should be removed");
|
||||
Assert.IsTrue(cleanedHtml.Contains("<table"), "Table element should be preserved");
|
||||
Assert.IsTrue(cleanedHtml.Contains("<td>A</td>"), "Table content should be preserved");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void CleanHtml_GoogleSheetsHtml_ConvertsToMarkdownTable()
|
||||
{
|
||||
// Arrange - Google Sheets HTML with wrapper element
|
||||
const string googleSheetsHtml = @"<google-sheets-html-origin>
|
||||
<style type=""text/css""><!--td {border: 1px solid #cccccc;}--></style>
|
||||
<table xmlns=""http://www.w3.org/1999/xhtml"" cellspacing=""0"" cellpadding=""0"" dir=""ltr"" border=""1"" data-sheets-root=""1"">
|
||||
<colgroup><col width=""100""><col width=""100""></colgroup>
|
||||
<tbody><tr><td>A</td><td>B</td></tr><tr><td>1</td><td>2</td></tr></tbody>
|
||||
</table>
|
||||
</google-sheets-html-origin>";
|
||||
|
||||
// Act
|
||||
string cleanedHtml = InvokeCleanHtml(googleSheetsHtml);
|
||||
string markdown = InvokeConvertHtmlToMarkdown(cleanedHtml);
|
||||
|
||||
// Assert - should produce valid Markdown table
|
||||
Assert.IsTrue(markdown.Contains("|"), "Markdown should contain table pipes");
|
||||
Assert.IsTrue(markdown.Contains("A") && markdown.Contains("B"), "Markdown should contain table content");
|
||||
Assert.IsTrue(markdown.Contains("1") && markdown.Contains("2"), "Markdown should contain table data");
|
||||
Assert.IsFalse(markdown.Contains("<google-sheets-html-origin>"), "Markdown should not contain HTML wrapper");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void CleanHtml_ExcelHtml_ConvertsToMarkdownTable()
|
||||
{
|
||||
// Arrange - Typical Excel HTML (for regression testing)
|
||||
const string excelHtml = @"<table border=""0"" cellpadding=""0"" cellspacing=""0"" width=""192"">
|
||||
<tbody><tr height=""20""><td height=""20"" width=""64"">Name</td><td width=""64"">Value</td><td width=""64"">Status</td></tr>
|
||||
<tr height=""20""><td height=""20"">Item1</td><td>100</td><td>Active</td></tr>
|
||||
<tr height=""20""><td height=""20"">Item2</td><td>200</td><td>Inactive</td></tr>
|
||||
</tbody></table>";
|
||||
|
||||
// Act
|
||||
string cleanedHtml = InvokeCleanHtml(excelHtml);
|
||||
string markdown = InvokeConvertHtmlToMarkdown(cleanedHtml);
|
||||
|
||||
// Assert - Excel HTML should still convert correctly
|
||||
Assert.IsTrue(markdown.Contains("|"), "Markdown should contain table pipes");
|
||||
Assert.IsTrue(markdown.Contains("Name"), "Markdown should contain header content");
|
||||
Assert.IsTrue(markdown.Contains("Item1") && markdown.Contains("Item2"), "Markdown should contain row data");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void CleanHtml_StyleElement_IsRemoved()
|
||||
{
|
||||
// Arrange
|
||||
const string htmlWithStyle = @"<html><head><style>body { color: red; }</style></head><body><p>Text</p></body></html>";
|
||||
|
||||
// Act
|
||||
string cleanedHtml = InvokeCleanHtml(htmlWithStyle);
|
||||
|
||||
// Assert
|
||||
Assert.IsFalse(cleanedHtml.Contains("<style"), "Style element should be removed");
|
||||
Assert.IsTrue(cleanedHtml.Contains("<p>Text</p>") || cleanedHtml.Contains("Text"), "Content should be preserved");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void CleanHtml_ColgroupElement_IsRemoved()
|
||||
{
|
||||
// Arrange
|
||||
const string htmlWithColgroup = @"<table><colgroup><col width=""100""></colgroup><tbody><tr><td>Data</td></tr></tbody></table>";
|
||||
|
||||
// Act
|
||||
string cleanedHtml = InvokeCleanHtml(htmlWithColgroup);
|
||||
|
||||
// Assert
|
||||
Assert.IsFalse(cleanedHtml.Contains("<colgroup"), "Colgroup element should be removed");
|
||||
Assert.IsTrue(cleanedHtml.Contains("<table"), "Table element should be preserved");
|
||||
Assert.IsTrue(cleanedHtml.Contains("Data"), "Table content should be preserved");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void CleanHtml_ScriptElement_IsRemoved()
|
||||
{
|
||||
// Arrange
|
||||
const string htmlWithScript = @"<html><body><script>alert('test');</script><p>Content</p></body></html>";
|
||||
|
||||
// Act
|
||||
string cleanedHtml = InvokeCleanHtml(htmlWithScript);
|
||||
|
||||
// Assert
|
||||
Assert.IsFalse(cleanedHtml.Contains("<script"), "Script element should be removed");
|
||||
Assert.IsTrue(cleanedHtml.Contains("Content"), "Content should be preserved");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void CleanHtml_NestedGoogleSheetsTable_PreservesNestedContent()
|
||||
{
|
||||
// Arrange - More complex Google Sheets HTML
|
||||
const string complexGoogleSheetsHtml = @"<google-sheets-html-origin>
|
||||
<style type=""text/css"">td {border: 1px solid #ccc;}</style>
|
||||
<table data-sheets-root=""1"">
|
||||
<colgroup><col width=""100""><col width=""150""><col width=""100""></colgroup>
|
||||
<tbody>
|
||||
<tr><td>Header1</td><td>Header2</td><td>Header3</td></tr>
|
||||
<tr><td>Row1Col1</td><td>Row1Col2</td><td>Row1Col3</td></tr>
|
||||
<tr><td>Row2Col1</td><td>Row2Col2</td><td>Row2Col3</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</google-sheets-html-origin>";
|
||||
|
||||
// Act
|
||||
string cleanedHtml = InvokeCleanHtml(complexGoogleSheetsHtml);
|
||||
string markdown = InvokeConvertHtmlToMarkdown(cleanedHtml);
|
||||
|
||||
// Assert
|
||||
Assert.IsFalse(markdown.Contains("<google-sheets-html-origin>"), "Wrapper should be removed from markdown");
|
||||
Assert.IsTrue(markdown.Contains("Header1"), "Headers should be preserved");
|
||||
Assert.IsTrue(markdown.Contains("Row1Col1"), "Row data should be preserved");
|
||||
Assert.IsTrue(markdown.Contains("Row2Col3"), "All cells should be preserved");
|
||||
}
|
||||
}
|
||||
@@ -57,6 +57,23 @@ namespace AdvancedPaste.Helpers
|
||||
{
|
||||
Logger.LogTrace();
|
||||
|
||||
// Unwrap Google Sheets wrapper element (preserve children, remove wrapper)
|
||||
foreach (var googleSheetsWrapper in node.DescendantsAndSelf("google-sheets-html-origin").ToArray())
|
||||
{
|
||||
var parent = googleSheetsWrapper.ParentNode;
|
||||
if (parent == null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var child in googleSheetsWrapper.ChildNodes.ToArray())
|
||||
{
|
||||
parent.InsertBefore(child, googleSheetsWrapper);
|
||||
}
|
||||
|
||||
googleSheetsWrapper.Remove();
|
||||
}
|
||||
|
||||
// Remove specific elements by tag name, CSS class, or other attributes
|
||||
// Example: Remove all <script> elements
|
||||
foreach (var scriptNode in node.DescendantsAndSelf("script").ToArray())
|
||||
@@ -64,6 +81,18 @@ namespace AdvancedPaste.Helpers
|
||||
scriptNode.Remove();
|
||||
}
|
||||
|
||||
// Remove style elements (CSS not relevant for Markdown)
|
||||
foreach (var styleNode in node.DescendantsAndSelf("style").ToArray())
|
||||
{
|
||||
styleNode.Remove();
|
||||
}
|
||||
|
||||
// Remove colgroup elements (column width info not needed for Markdown)
|
||||
foreach (var colgroupNode in node.DescendantsAndSelf("colgroup").ToArray())
|
||||
{
|
||||
colgroupNode.Remove();
|
||||
}
|
||||
|
||||
// Ignore specific elements like <sup> elements
|
||||
foreach (var ignoredElement in node.DescendantsAndSelf("sup").ToArray())
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user