diff --git a/.github/actions/spell-check/allow/code.txt b/.github/actions/spell-check/allow/code.txt
index a7d02dcb21..3e7341d5c3 100644
--- a/.github/actions/spell-check/allow/code.txt
+++ b/.github/actions/spell-check/allow/code.txt
@@ -330,6 +330,9 @@ HHH
riday
YYY
+# Unicode
+precomposed
+
# GitHub issue/PR commands
azp
feedbackhub
diff --git a/src/modules/powerrename/lib/PowerRenameRegEx.cpp b/src/modules/powerrename/lib/PowerRenameRegEx.cpp
index 567df48606..aabf838a7a 100644
--- a/src/modules/powerrename/lib/PowerRenameRegEx.cpp
+++ b/src/modules/powerrename/lib/PowerRenameRegEx.cpp
@@ -11,6 +11,48 @@
using std::conditional_t;
using std::regex_error;
+///
+/// Sanitizes the input string by replacing non-breaking spaces with regular spaces and
+/// normalizes it to Unicode NFC (precomposed) form.
+///
+/// The input wide string to sanitize and normalize. If empty, it is
+/// returned unchanged.
+/// A new std::wstring containing the sanitized and NFC-normalized form of the
+/// input. If normalization fails, the function returns the sanitized string (with non-
+/// breaking spaces replaced) as-is.
+static std::wstring SanitizeAndNormalize(const std::wstring& input)
+{
+ if (input.empty())
+ {
+ return input;
+ }
+
+ std::wstring sanitized = input;
+ // Replace non-breaking spaces (0xA0) with regular spaces (0x20).
+ std::replace(sanitized.begin(), sanitized.end(), L'\u00A0', L' ');
+
+ // Normalize to NFC (Precomposed).
+ // Get the size needed for the normalized string, including null terminator.
+ int size = NormalizeString(NormalizationC, sanitized.c_str(), -1, nullptr, 0);
+ if (size <= 0)
+ {
+ return sanitized; // Return unaltered if normalization fails.
+ }
+
+ // Perform the normalization.
+ std::wstring normalized;
+ normalized.resize(size);
+ NormalizeString(NormalizationC, sanitized.c_str(), -1, &normalized[0], size);
+
+ // Remove the explicit null terminator added by NormalizeString.
+ if (!normalized.empty() && normalized.back() == L'\0')
+ {
+ normalized.pop_back();
+ }
+
+ return normalized;
+}
+
IFACEMETHODIMP_(ULONG)
CPowerRenameRegEx::AddRef()
{
@@ -94,18 +136,20 @@ IFACEMETHODIMP CPowerRenameRegEx::PutSearchTerm(_In_ PCWSTR searchTerm, bool for
HRESULT hr = S_OK;
if (searchTerm)
{
+ std::wstring normalizedSearchTerm = SanitizeAndNormalize(searchTerm);
+
CSRWExclusiveAutoLock lock(&m_lock);
- if (m_searchTerm == nullptr || lstrcmp(searchTerm, m_searchTerm) != 0)
+ if (m_searchTerm == nullptr || lstrcmp(normalizedSearchTerm.c_str(), m_searchTerm) != 0)
{
changed = true;
CoTaskMemFree(m_searchTerm);
- if (lstrcmp(searchTerm, L"") == 0)
+ if (normalizedSearchTerm.empty())
{
m_searchTerm = NULL;
}
else
{
- hr = SHStrDup(searchTerm, &m_searchTerm);
+ hr = SHStrDup(normalizedSearchTerm.c_str(), &m_searchTerm);
}
}
}
@@ -238,17 +282,19 @@ IFACEMETHODIMP CPowerRenameRegEx::PutReplaceTerm(_In_ PCWSTR replaceTerm, bool f
HRESULT hr = S_OK;
if (replaceTerm)
{
+ std::wstring normalizedReplaceTerm = SanitizeAndNormalize(replaceTerm);
+
CSRWExclusiveAutoLock lock(&m_lock);
- if (m_replaceTerm == nullptr || lstrcmp(replaceTerm, m_RawReplaceTerm.c_str()) != 0)
+ if (m_replaceTerm == nullptr || lstrcmp(normalizedReplaceTerm.c_str(), m_RawReplaceTerm.c_str()) != 0)
{
changed = true;
CoTaskMemFree(m_replaceTerm);
- m_RawReplaceTerm = replaceTerm;
+ m_RawReplaceTerm = normalizedReplaceTerm;
if ((m_flags & RandomizeItems) || (m_flags & EnumerateItems))
hr = _OnEnumerateOrRandomizeItemsChanged();
else
- hr = SHStrDup(replaceTerm, &m_replaceTerm);
+ hr = SHStrDup(normalizedReplaceTerm.c_str(), &m_replaceTerm);
}
}
@@ -397,7 +443,10 @@ HRESULT CPowerRenameRegEx::Replace(_In_ PCWSTR source, _Outptr_ PWSTR* result, u
{
return hr;
}
- std::wstring res = source;
+
+ std::wstring normalizedSource = SanitizeAndNormalize(source);
+
+ std::wstring res = normalizedSource;
try
{
// TODO: creating the regex could be costly. May want to cache this.
@@ -438,9 +487,8 @@ HRESULT CPowerRenameRegEx::Replace(_In_ PCWSTR source, _Outptr_ PWSTR* result, u
}
}
- std::wstring sourceToUse;
+ std::wstring sourceToUse = normalizedSource;
sourceToUse.reserve(MAX_PATH);
- sourceToUse = source;
std::wstring searchTerm(m_searchTerm);
std::wstring replaceTerm;
@@ -536,7 +584,7 @@ HRESULT CPowerRenameRegEx::Replace(_In_ PCWSTR source, _Outptr_ PWSTR* result, u
replaceTerm = regex_replace(replaceTerm, zeroGroupRegex, L"$1$$$0");
replaceTerm = regex_replace(replaceTerm, otherGroupsRegex, L"$1$0$4");
- res = RegexReplaceDispatch[_useBoostLib](source, m_searchTerm, replaceTerm, m_flags & MatchAllOccurrences, isCaseInsensitive);
+ res = RegexReplaceDispatch[_useBoostLib](sourceToUse, m_searchTerm, replaceTerm, m_flags & MatchAllOccurrences, isCaseInsensitive);
// Use regex search to determine if a match exists. This is the basis for incrementing
// the counter.
@@ -669,17 +717,17 @@ PowerRenameLib::MetadataType CPowerRenameRegEx::_GetMetadataTypeFromFlags() cons
{
if (m_flags & MetadataSourceXMP)
return PowerRenameLib::MetadataType::XMP;
-
+
// Default to EXIF
return PowerRenameLib::MetadataType::EXIF;
}
-// Interface method implementation
+// Interface method implementation
IFACEMETHODIMP CPowerRenameRegEx::GetMetadataType(_Out_ PowerRenameLib::MetadataType* metadataType)
{
if (metadataType == nullptr)
return E_POINTER;
-
+
*metadataType = _GetMetadataTypeFromFlags();
return S_OK;
}
@@ -689,5 +737,3 @@ PowerRenameLib::MetadataType CPowerRenameRegEx::GetMetadataType() const
{
return _GetMetadataTypeFromFlags();
}
-
-
diff --git a/src/modules/powerrename/unittests/CommonRegExTests.h b/src/modules/powerrename/unittests/CommonRegExTests.h
index 1b0ad30b92..4dc078e9b1 100644
--- a/src/modules/powerrename/unittests/CommonRegExTests.h
+++ b/src/modules/powerrename/unittests/CommonRegExTests.h
@@ -647,6 +647,54 @@ TEST_METHOD(VerifyCounterIncrementsWhenResultIsUnchanged)
CoTaskMemFree(result);
}
+// Helper function to verify normalization behavior.
+void VerifyNormalizationHelper(DWORD flags)
+{
+ CComPtr renameRegEx;
+ Assert::IsTrue(CPowerRenameRegEx::s_CreateInstance(&renameRegEx) == S_OK);
+ Assert::IsTrue(renameRegEx->PutFlags(flags) == S_OK);
+
+ // 1. Unicode Normalization: NFD source with NFC search term.
+ PWSTR result = nullptr;
+ unsigned long index = 0;
+
+ // Source: "Test" + U+0438 (Cyrillic small letter i) + U+0306 (combining breve).
+ std::wstring sourceNFD = L"Test\u0438\u0306";
+ // Search: "Test" + U+0438 (Cyrillic small letter i with breve).
+ std::wstring searchNFC = L"Test\u0439";
+
+ // A match should occur despite different normalization forms.
+ Assert::IsTrue(renameRegEx->PutSearchTerm(searchNFC.c_str()) == S_OK);
+ Assert::IsTrue(renameRegEx->PutReplaceTerm(L"Match") == S_OK);
+ Assert::IsTrue(renameRegEx->Replace(sourceNFD.c_str(), &result, index) == S_OK);
+ Assert::AreEqual(L"Match", result, L"Failed to match NFD source with NFC search term.");
+ CoTaskMemFree(result);
+
+ // 2. Whitespace Normalization: test non-breaking space versus regular space.
+ result = nullptr;
+ index = 0;
+
+ // Source: "Hello" + non-breaking space + "World".
+ std::wstring sourceNBSP = L"Hello\u00A0World";
+ // Search: "Hello" + regular space + "World".
+ std::wstring searchSpace = L"Hello World";
+
+ Assert::IsTrue(renameRegEx->PutSearchTerm(searchSpace.c_str()) == S_OK);
+ Assert::IsTrue(renameRegEx->Replace(sourceNBSP.c_str(), &result, index) == S_OK);
+ Assert::AreEqual(L"Match", result, L"Failed to match non-breaking space source with regular space search term.");
+ CoTaskMemFree(result);
+}
+
+TEST_METHOD(VerifyUnicodeAndWhitespaceNormalizationSimpleSearch)
+{
+ VerifyNormalizationHelper(0);
+}
+
+TEST_METHOD(VerifyUnicodeAndWhitespaceNormalizationRegex)
+{
+ VerifyNormalizationHelper(UseRegularExpressions);
+}
+
#ifndef TESTS_PARTIAL
};
}