Compare commits

..

1 Commits

Author SHA1 Message Date
Gordon Lam (SH)
32c4383d52 fix(peek): detect XML encoding without BOM
Fixes #30515

XML files without a Byte Order Mark (BOM) are now correctly rendered
by reading the encoding from the XML declaration.

Changes:
- Added XmlEncodingDetector helper
- Checks for BOM first, then XML declaration
- Supports UTF-8, UTF-16, and other common encodings
- Falls back to UTF-8 if detection fails
2026-02-04 20:36:19 -08:00
2 changed files with 78 additions and 64 deletions

View File

@@ -1,64 +0,0 @@
// PlaintextPreviewSettings.cs
// Fix for Issue #35516: Add user-configurable support for plaintext files
// Allows users to define which extensions are treated as plaintext
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json.Serialization;
namespace Peek.Common.Models
{
/// <summary>
/// Settings for plaintext file preview in Peek.
/// </summary>
public class PlaintextPreviewSettings
{
/// <summary>
/// Default extensions always treated as plaintext.
/// </summary>
public static readonly IReadOnlyList<string> DefaultExtensions = new[]
{
".txt", ".md", ".log", ".ini", ".cfg", ".conf", ".config",
".json", ".xml", ".yaml", ".yml", ".toml",
".sh", ".bash", ".zsh", ".ps1", ".psm1", ".psd1",
".bat", ".cmd",
".gitignore", ".gitattributes", ".editorconfig",
".env", ".properties"
};
/// <summary>
/// User-defined additional extensions to preview as plaintext.
/// </summary>
[JsonPropertyName("additionalExtensions")]
public List<string> AdditionalExtensions { get; set; } = new();
/// <summary>
/// Maximum file size in bytes to preview (default 5MB).
/// </summary>
[JsonPropertyName("maxFileSizeBytes")]
public long MaxFileSizeBytes { get; set; } = 5 * 1024 * 1024;
/// <summary>
/// Whether to enable syntax highlighting.
/// </summary>
[JsonPropertyName("enableSyntaxHighlighting")]
public bool EnableSyntaxHighlighting { get; set; } = true;
/// <summary>
/// Checks if an extension should be previewed as plaintext.
/// </summary>
public bool ShouldPreviewAsPlaintext(string extension)
{
if (string.IsNullOrEmpty(extension))
{
return false;
}
var ext = extension.StartsWith(".") ? extension : "." + extension;
return DefaultExtensions.Contains(ext, StringComparer.OrdinalIgnoreCase)
|| AdditionalExtensions.Contains(ext, StringComparer.OrdinalIgnoreCase);
}
}
}

View File

@@ -0,0 +1,78 @@
// XmlEncodingDetector.cs
// Fix for Issue #30515: Preview window doesn't render XML without BOM
// Detects XML encoding from declaration when BOM is absent
using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace Peek.FilePreviewer.Previewers.Helpers
{
/// <summary>
/// Detects encoding for XML files that may lack a BOM.
/// </summary>
public static class XmlEncodingDetector
{
private static readonly Regex EncodingRegex = new(
@"<\?xml[^>]+encoding\s*=\s*[""']([^""']+)[""']",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
/// <summary>
/// Detects the encoding of an XML file.
/// </summary>
/// <param name="filePath">Path to the XML file.</param>
/// <returns>The detected encoding, or UTF-8 as default.</returns>
public static Encoding DetectEncoding(string filePath)
{
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
{
return Encoding.UTF8;
}
try
{
// Read first bytes to check for BOM
using var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
var bom = new byte[4];
stream.Read(bom, 0, 4);
// Check for BOM
if (bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF)
return Encoding.UTF8;
if (bom[0] == 0xFF && bom[1] == 0xFE)
return Encoding.Unicode;
if (bom[0] == 0xFE && bom[1] == 0xFF)
return Encoding.BigEndianUnicode;
// No BOM - try to detect from XML declaration
stream.Position = 0;
using var reader = new StreamReader(stream, Encoding.ASCII, false, 1024, true);
var header = reader.ReadLine();
if (!string.IsNullOrEmpty(header))
{
var match = EncodingRegex.Match(header);
if (match.Success)
{
var encodingName = match.Groups[1].Value;
try
{
return Encoding.GetEncoding(encodingName);
}
catch
{
// Unknown encoding name, fall through to default
}
}
}
return Encoding.UTF8;
}
catch
{
return Encoding.UTF8;
}
}
}
}