mirror of
https://github.com/microsoft/PowerToys.git
synced 2025-12-16 11:48:06 +01:00
[AdvPaste][JSON]Improve delimiter handling for CSV and add plain text parser (#33199)
* code changes * rework code * improvement * regex: ignore case * spell fixes * update regex * fixes * more fixes
This commit is contained in:
@@ -4,6 +4,8 @@
|
|||||||
|
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using System.Xml;
|
using System.Xml;
|
||||||
using ManagedCommon;
|
using ManagedCommon;
|
||||||
@@ -14,6 +16,10 @@ namespace AdvancedPaste.Helpers
|
|||||||
{
|
{
|
||||||
internal static class JsonHelper
|
internal static class JsonHelper
|
||||||
{
|
{
|
||||||
|
// List of supported CSV delimiters and Regex to detect separator property
|
||||||
|
private static readonly char[] CsvDelimArry = [',', ';', '\t'];
|
||||||
|
private static readonly Regex CsvSepIdentifierRegex = new Regex(@"^sep=(.)$", RegexOptions.IgnoreCase);
|
||||||
|
|
||||||
internal static string ToJsonFromXmlOrCsv(DataPackageView clipboardData)
|
internal static string ToJsonFromXmlOrCsv(DataPackageView clipboardData)
|
||||||
{
|
{
|
||||||
Logger.LogTrace();
|
Logger.LogTrace();
|
||||||
@@ -53,11 +59,31 @@ namespace AdvancedPaste.Helpers
|
|||||||
{
|
{
|
||||||
var csv = new List<string[]>();
|
var csv = new List<string[]>();
|
||||||
|
|
||||||
foreach (var line in text.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries))
|
string[] lines = text.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
|
||||||
|
|
||||||
|
// Detect the csv delimiter and the count of occurrence based on the first two csv lines.
|
||||||
|
GetCsvDelimiter(lines, out char delim, out int delimCount);
|
||||||
|
|
||||||
|
foreach (var line in lines)
|
||||||
{
|
{
|
||||||
csv.Add(line.Split(","));
|
// If line is separator property line, then skip it
|
||||||
|
if (CsvSepIdentifierRegex.IsMatch(line))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A CSV line is valid, if the delimiter occurs more or equal times in every line compared to the first data line. (More because sometimes the delimiter occurs in a data string.)
|
||||||
|
if (line.Count(x => x == delim) >= delimCount)
|
||||||
|
{
|
||||||
|
csv.Add(line.Split(delim));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new FormatException("Invalid CSV format: Number of delimiters wrong in the current line.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Logger.LogDebug("Convert from csv.");
|
||||||
jsonText = JsonConvert.SerializeObject(csv, Newtonsoft.Json.Formatting.Indented);
|
jsonText = JsonConvert.SerializeObject(csv, Newtonsoft.Json.Formatting.Indented);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -66,7 +92,79 @@ namespace AdvancedPaste.Helpers
|
|||||||
Logger.LogError("Failed parsing input as csv", ex);
|
Logger.LogError("Failed parsing input as csv", ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try convert Plain Text
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if (string.IsNullOrEmpty(jsonText))
|
||||||
|
{
|
||||||
|
var plainText = new List<string>();
|
||||||
|
|
||||||
|
foreach (var line in text.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries))
|
||||||
|
{
|
||||||
|
plainText.Add(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
Logger.LogDebug("Convert from plain text.");
|
||||||
|
jsonText = JsonConvert.SerializeObject(plainText, Newtonsoft.Json.Formatting.Indented);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Logger.LogError("Failed parsing input as plain text", ex);
|
||||||
|
}
|
||||||
|
|
||||||
return string.IsNullOrEmpty(jsonText) ? text : jsonText;
|
return string.IsNullOrEmpty(jsonText) ? text : jsonText;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void GetCsvDelimiter(in string[] csvLines, out char delimiter, out int delimiterCount)
|
||||||
|
{
|
||||||
|
delimiter = '\0'; // Unicode "null" character.
|
||||||
|
delimiterCount = 0;
|
||||||
|
|
||||||
|
if (csvLines.Length > 1)
|
||||||
|
{
|
||||||
|
// Try to select the delimiter based on the separator property.
|
||||||
|
Match matchChar = CsvSepIdentifierRegex.Match(csvLines[0]);
|
||||||
|
if (matchChar.Success)
|
||||||
|
{
|
||||||
|
// We can do matchChar[0] as the match only returns one character.
|
||||||
|
// We get the count from the second line, as the first one only contains the character definition and not a CSV data line.
|
||||||
|
char delimChar = matchChar.Groups[1].Value.Trim()[0];
|
||||||
|
delimiter = delimChar;
|
||||||
|
delimiterCount = csvLines[1].Count(x => x == delimChar);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (csvLines.Length > 0 && delimiterCount == 0)
|
||||||
|
{
|
||||||
|
// Try to select the correct delimiter based on the first two CSV lines from a list of predefined delimiters.
|
||||||
|
foreach (char c in CsvDelimArry)
|
||||||
|
{
|
||||||
|
int cntFirstLine = csvLines[0].Count(x => x == c);
|
||||||
|
int cntNextLine = 0; // Default to 0 that the 'second line' check is always true.
|
||||||
|
|
||||||
|
// Additional count if we have more than one line
|
||||||
|
if (csvLines.Length >= 2)
|
||||||
|
{
|
||||||
|
cntNextLine = csvLines[1].Count(x => x == c);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The delimiter is found if the count is bigger as from the last selected delimiter
|
||||||
|
// and if the next csv line does not exist or has the same number or more occurrences of the delimiter.
|
||||||
|
// (We check the next line to prevent false positives.)
|
||||||
|
if (cntFirstLine > delimiterCount && (cntNextLine == 0 || cntNextLine >= cntFirstLine))
|
||||||
|
{
|
||||||
|
delimiter = c;
|
||||||
|
delimiterCount = cntFirstLine;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the delimiter count is 0, we can't detect it and it is no valid CSV.
|
||||||
|
if (delimiterCount == 0)
|
||||||
|
{
|
||||||
|
throw new FormatException("Invalid CSV format: Failed to detect the delimiter.");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user