mirror of
https://github.com/microsoft/PowerToys.git
synced 2025-12-15 03:07:56 +01:00
[AdvPaste]CSV parser: Handle control characters (quotation marks) correctly (#33986)
## Summary of the Pull Request
This PR fixes the csv parsing related to quotation marks according to
csv standard rules:
- An empty data value can be written as `""`. => Remove both quotation
marks.
- Enclosing data by starting and ending with `"` if they contain the
delimiter. => First and last quotation mark has to be removed.
- Escape quotation mark with second quotation mark. => Replace pairs of
two with a single one.
### Input
```csv
A,B,,"","my ""nice"" string","""zz""","""""double quotes"""""
```
### Before this PR (Wrong result)
```json
[
[
"A",
"B",
"",
"\"\"",
"\"my \"\"nice\"\" string\"",
"\"\"\"zz\"\"\"",
"\"\"\"\"\"double quotes\"\"\"\"\""
]
]
```
### After this PR (Correct result)
```json
[
[
"A",
"B",
"",
"",
"my \"nice\" string",
"\"zz\"",
"\"\"double quotes\"\""
]
]
```
This commit is contained in:
@@ -24,9 +24,14 @@ namespace AdvancedPaste.Helpers
|
||||
private static readonly char[] CsvDelimArry = [',', ';', '\t'];
|
||||
private static readonly Regex CsvSepIdentifierRegex = new Regex(@"^sep=(.)$", RegexOptions.IgnoreCase);
|
||||
|
||||
// Split on every occurrence of the delimiter except if it is enclosed by " and ignore two " as escaped "
|
||||
// CSV: Split on every occurrence of the delimiter except if it is enclosed by " and ignore two " as escaped "
|
||||
private static readonly string CsvDelimSepRegexStr = @"(?=(?:[^""]*""[^""]*"")*(?![^""]*""))";
|
||||
|
||||
// CSV: Regex to remove/replace quotation marks
|
||||
private static readonly Regex CsvRemoveSingleQuotationMarksRegex = new Regex(@"^""(?!"")|(?<!"")""$|^""""$");
|
||||
private static readonly Regex CsvRemoveStartAndEndQuotationMarksRegex = new Regex(@"^""(?=(""{2})+)|(?<=(""{2})+)""$");
|
||||
private static readonly Regex CsvReplaceDoubleQuotationMarksRegex = new Regex(@"""{2}");
|
||||
|
||||
internal static string ToJsonFromXmlOrCsv(DataPackageView clipboardData)
|
||||
{
|
||||
Logger.LogTrace();
|
||||
@@ -134,7 +139,7 @@ namespace AdvancedPaste.Helpers
|
||||
{
|
||||
if (string.IsNullOrEmpty(jsonText))
|
||||
{
|
||||
var csv = new List<string[]>();
|
||||
var csv = new List<IEnumerable<string>>();
|
||||
|
||||
string[] lines = text.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
@@ -153,7 +158,8 @@ namespace AdvancedPaste.Helpers
|
||||
// and if every line contains no or an even count of quotation marks.
|
||||
if (Regex.Count(line, delim + CsvDelimSepRegexStr) == delimCount && int.IsEvenInteger(line.Count(x => x == '"')))
|
||||
{
|
||||
csv.Add(Regex.Split(line, delim + CsvDelimSepRegexStr, RegexOptions.IgnoreCase));
|
||||
string[] dataCells = Regex.Split(line, delim + CsvDelimSepRegexStr, RegexOptions.IgnoreCase);
|
||||
csv.Add(dataCells.Select(x => ReplaceQuotationMarksInCsvData(x)));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -244,5 +250,26 @@ namespace AdvancedPaste.Helpers
|
||||
throw new FormatException("Invalid CSV format: Failed to detect the delimiter.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Remove and replace quotation marks used as control sequences. (Enclosing quotation marks and escaping quotation marks.)
|
||||
/// </summary>
|
||||
/// <param name="str">CSV cell data to manipulate.</param>
|
||||
/// <returns>Manipulated string.</returns>
|
||||
private static string ReplaceQuotationMarksInCsvData(string str)
|
||||
{
|
||||
// Remove first and last single quotation mark (enclosing quotation marks) and remove quotation marks of an empty data set ("").
|
||||
str = CsvRemoveSingleQuotationMarksRegex.Replace(str, string.Empty);
|
||||
|
||||
// Remove first quotation mark if followed by pairs of quotation marks
|
||||
// and remove last quotation mark if precede by pairs of quotation marks.
|
||||
// (Removes enclosing quotation marks around the cell data for data like /"""abc"""/.)
|
||||
str = CsvRemoveStartAndEndQuotationMarksRegex.Replace(str, string.Empty);
|
||||
|
||||
// Replace pairs of two quotation marks with a single quotation mark. (Escaped quotation marks.)
|
||||
str = CsvReplaceDoubleQuotationMarksRegex.Replace(str, "\"");
|
||||
|
||||
return str;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user