evaluation for the semantic search

This commit is contained in:
vanzue
2026-02-06 17:26:24 +08:00
parent 916182e47d
commit ee702f9edf
21 changed files with 1489 additions and 0 deletions

View File

@@ -1059,6 +1059,16 @@
<Platform Solution="*|x64" Project="x64" />
</Project>
</Folder>
<Folder Name="/tools/SettingsSearchEvaluation/">
<Project Path="tools/SettingsSearchEvaluation/SettingsSearchEvaluation.csproj">
<Platform Solution="*|ARM64" Project="ARM64" />
<Platform Solution="*|x64" Project="x64" />
</Project>
<Project Path="tools/SettingsSearchEvaluation.Tests/SettingsSearchEvaluation.Tests.csproj">
<Platform Solution="*|ARM64" Project="ARM64" />
<Platform Solution="*|x64" Project="x64" />
</Project>
</Folder>
<Folder Name="/Solution Items/">
<File Path=".vsconfig" />
<File Path="Cpp.Build.props" />

View File

@@ -0,0 +1,130 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.IO;
using Microsoft.VisualStudio.TestTools.UnitTesting;
namespace SettingsSearchEvaluation.Tests;
[TestClass]
public class EvaluationDataLoaderTests
{
[TestMethod]
public void LoadEntriesFromJson_NormalizesHeaderAndDetectsDuplicates()
{
const string json = """
[
{
"type": 0,
"header": null,
"pageTypeName": "ColorPickerPage",
"elementName": "",
"elementUid": "Activation_Shortcut",
"parentElementName": "",
"description": null,
"icon": null
},
{
"type": 0,
"header": null,
"pageTypeName": "FancyZonesPage",
"elementName": "",
"elementUid": "Activation_Shortcut",
"parentElementName": "",
"description": null,
"icon": null
}
]
""";
var (entries, diagnostics) = EvaluationDataLoader.LoadEntriesFromJson(json);
Assert.AreEqual(2, entries.Count);
Assert.AreEqual("Activation Shortcut", entries[0].Header);
Assert.AreEqual(1, diagnostics.DuplicateIdBucketCount);
Assert.IsTrue(diagnostics.DuplicateIdCounts.TryGetValue("Activation_Shortcut", out var count));
Assert.AreEqual(2, count);
}
[TestMethod]
public void LoadCases_GeneratesFallbackCases_WhenNoCasesFileSpecified()
{
const string json = """
[
{
"type": 0,
"header": "Fancy Zones",
"pageTypeName": "FancyZonesPage",
"elementName": "",
"elementUid": "FancyZones",
"parentElementName": "",
"description": "",
"icon": null
}
]
""";
var (entries, _) = EvaluationDataLoader.LoadEntriesFromJson(json);
var cases = EvaluationDataLoader.LoadCases(null, entries);
Assert.AreEqual(1, cases.Count);
Assert.AreEqual("Fancy Zones", cases[0].Query);
Assert.AreEqual("FancyZones", cases[0].ExpectedIds[0]);
}
[TestMethod]
public void LoadCases_LoadsAndNormalizesCasesFile()
{
const string entriesJson = """
[
{
"type": 0,
"header": "Fancy Zones",
"pageTypeName": "FancyZonesPage",
"elementName": "",
"elementUid": "FancyZones",
"parentElementName": "",
"description": "",
"icon": null
}
]
""";
const string casesJson = """
[
{
"query": " fancy zones ",
"expectedIds": [ "FancyZones", " fancyzones ", "" ],
"notes": "normalization test"
},
{
"query": "",
"expectedIds": [ "FancyZones" ]
},
{
"query": "missing expected",
"expectedIds": [ "" ]
}
]
""";
var (entries, _) = EvaluationDataLoader.LoadEntriesFromJson(entriesJson);
var casesFile = Path.GetTempFileName();
try
{
File.WriteAllText(casesFile, casesJson);
var cases = EvaluationDataLoader.LoadCases(casesFile, entries);
Assert.AreEqual(1, cases.Count);
Assert.AreEqual("fancy zones", cases[0].Query);
Assert.AreEqual(1, cases[0].ExpectedIds.Count);
Assert.AreEqual("FancyZones", cases[0].ExpectedIds[0]);
Assert.AreEqual("normalization test", cases[0].Notes);
}
finally
{
File.Delete(casesFile);
}
}
}

View File

@@ -0,0 +1,50 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
using Microsoft.VisualStudio.TestTools.UnitTesting;
namespace SettingsSearchEvaluation.Tests;
[TestClass]
public class EvaluationMathTests
{
private static readonly double[] LatencySamples = { 10.0, 20.0, 30.0, 40.0, 50.0 };
[TestMethod]
public void FindBestRank_ReturnsExpectedRank()
{
var ranked = new[] { "a", "b", "c", "d" };
var expected = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "c" };
var rank = EvaluationMath.FindBestRank(ranked, expected);
Assert.AreEqual(3, rank);
}
[TestMethod]
public void FindBestRank_ReturnsZero_WhenMissing()
{
var ranked = new[] { "a", "b", "c", "d" };
var expected = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "x", "y" };
var rank = EvaluationMath.FindBestRank(ranked, expected);
Assert.AreEqual(0, rank);
}
[TestMethod]
public void ComputeLatencySummary_ComputesQuantiles()
{
var summary = EvaluationMath.ComputeLatencySummary(LatencySamples);
Assert.AreEqual(5, summary.Samples);
Assert.AreEqual(10.0, summary.MinMs);
Assert.AreEqual(30.0, summary.P50Ms);
Assert.AreEqual(50.0, summary.P95Ms);
Assert.AreEqual(50.0, summary.MaxMs);
Assert.AreEqual(30.0, summary.AverageMs, 0.0001);
}
}

View File

@@ -0,0 +1,69 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Threading.Tasks;
using Microsoft.VisualStudio.TestTools.UnitTesting;
namespace SettingsSearchEvaluation.Tests;
[TestClass]
public class EvaluatorTests
{
[TestMethod]
public async Task RunAsync_BasicEngine_ReturnsExpectedMetricsForExactSingleEntry()
{
const string json = """
[
{
"type": 0,
"header": "Fancy Zones",
"pageTypeName": "FancyZonesPage",
"elementName": "",
"elementUid": "FancyZones",
"parentElementName": "",
"description": "",
"icon": null
}
]
""";
var (entries, diagnostics) = EvaluationDataLoader.LoadEntriesFromJson(json);
var cases = new[]
{
new EvaluationCase
{
Query = "Fancy Zones",
ExpectedIds = new[] { "FancyZones" },
Notes = "Exact query should be rank 1.",
},
};
var options = new RunnerOptions
{
IndexJsonPath = "test-index.json",
CasesJsonPath = null,
Engines = new[] { SearchEngineKind.Basic },
MaxResults = 5,
TopK = 5,
Iterations = 1,
WarmupIterations = 0,
SemanticIndexTimeout = TimeSpan.FromSeconds(1),
OutputJsonPath = null,
};
var report = await Evaluator.RunAsync(options, entries, diagnostics, cases);
Assert.AreEqual(1, report.Engines.Count);
var engine = report.Engines[0];
Assert.AreEqual(SearchEngineKind.Basic, engine.Engine);
Assert.IsTrue(engine.IsAvailable);
Assert.AreEqual(1, engine.QueryCount);
Assert.AreEqual(1.0, engine.RecallAtK, 0.0001);
Assert.AreEqual(1.0, engine.Mrr, 0.0001);
Assert.AreEqual(1, engine.CaseResults.Count);
Assert.IsTrue(engine.CaseResults[0].HitAtK);
Assert.AreEqual(1, engine.CaseResults[0].BestRank);
}
}

View File

@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<!-- Look at Directory.Build.props in root for common stuff as well -->
<Import Project="..\..\src\Common.Dotnet.CsWinRT.props" />
<PropertyGroup>
<IsPackable>false</IsPackable>
<OutputPath>..\..\$(Configuration)\$(Platform)\tests\SettingsSearchEvaluationTests\</OutputPath>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="MSTest" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\SettingsSearchEvaluation\SettingsSearchEvaluation.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,26 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Collections.ObjectModel;
namespace SettingsSearchEvaluation;
internal sealed class DatasetDiagnostics
{
public required int TotalEntries { get; init; }
public required int DistinctIds { get; init; }
public required int DuplicateIdBucketCount { get; init; }
public required IReadOnlyDictionary<string, int> DuplicateIdCounts { get; init; }
public static DatasetDiagnostics Empty { get; } = new()
{
TotalEntries = 0,
DistinctIds = 0,
DuplicateIdBucketCount = 0,
DuplicateIdCounts = new ReadOnlyDictionary<string, int>(new Dictionary<string, int>()),
};
}

View File

@@ -0,0 +1,30 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace SettingsSearchEvaluation;
internal sealed class EngineEvaluationReport
{
public required SearchEngineKind Engine { get; init; }
public required bool IsAvailable { get; init; }
public string? AvailabilityError { get; init; }
public string? CapabilitiesSummary { get; init; }
public int IndexedEntries { get; init; }
public int QueryCount { get; init; }
public double IndexingTimeMs { get; init; }
public double RecallAtK { get; init; }
public double Mrr { get; init; }
public required LatencySummary SearchLatencyMs { get; init; }
public required IReadOnlyList<QueryEvaluationResult> CaseResults { get; init; }
}

View File

@@ -0,0 +1,14 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace SettingsSearchEvaluation;
internal sealed class EvaluationCase
{
public required string Query { get; init; }
public required IReadOnlyList<string> ExpectedIds { get; init; }
public string? Notes { get; init; }
}

View File

@@ -0,0 +1,180 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Collections.ObjectModel;
using System.Text.Json;
using System.Text.RegularExpressions;
using Settings.UI.Library;
namespace SettingsSearchEvaluation;
internal static partial class EvaluationDataLoader
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true,
ReadCommentHandling = JsonCommentHandling.Skip,
};
public static (IReadOnlyList<SettingEntry> Entries, DatasetDiagnostics Diagnostics) LoadEntriesFromFile(string path)
{
ArgumentException.ThrowIfNullOrWhiteSpace(path);
var json = File.ReadAllText(path);
return LoadEntriesFromJson(json);
}
public static (IReadOnlyList<SettingEntry> Entries, DatasetDiagnostics Diagnostics) LoadEntriesFromJson(string json)
{
if (string.IsNullOrWhiteSpace(json))
{
return (Array.Empty<SettingEntry>(), DatasetDiagnostics.Empty);
}
var rawEntries = JsonSerializer.Deserialize<List<RawSettingEntry>>(json, JsonOptions) ?? new List<RawSettingEntry>();
var normalized = new List<SettingEntry>(rawEntries.Count);
foreach (var raw in rawEntries)
{
var pageType = raw.PageTypeName?.Trim() ?? string.Empty;
var elementName = raw.ElementName?.Trim() ?? string.Empty;
var elementUid = raw.ElementUid?.Trim() ?? string.Empty;
if (string.IsNullOrEmpty(elementUid))
{
elementUid = $"{pageType}|{elementName}";
}
var header = raw.Header?.Trim();
if (string.IsNullOrEmpty(header))
{
header = BuildFallbackHeader(elementUid, elementName, pageType);
}
var description = raw.Description?.Trim() ?? string.Empty;
var parent = raw.ParentElementName?.Trim() ?? string.Empty;
var icon = raw.Icon?.Trim() ?? string.Empty;
normalized.Add(new SettingEntry(
raw.Type,
header,
pageType,
elementName,
elementUid,
parent,
description,
icon));
}
return (normalized, BuildDiagnostics(normalized));
}
public static IReadOnlyList<EvaluationCase> LoadCases(string? casesPath, IReadOnlyList<SettingEntry> entries)
{
if (!string.IsNullOrWhiteSpace(casesPath))
{
var json = File.ReadAllText(casesPath);
var parsed = JsonSerializer.Deserialize<List<RawEvaluationCase>>(json, JsonOptions) ?? new List<RawEvaluationCase>();
var normalized = parsed
.Where(c => !string.IsNullOrWhiteSpace(c.Query))
.Select(c => new EvaluationCase
{
Query = c.Query!.Trim(),
ExpectedIds = c.ExpectedIds?
.Where(id => !string.IsNullOrWhiteSpace(id))
.Select(id => id.Trim())
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToArray() ?? Array.Empty<string>(),
Notes = c.Notes,
})
.Where(c => c.ExpectedIds.Count > 0)
.ToList();
if (normalized.Count > 0)
{
return normalized;
}
}
return GenerateFallbackCases(entries);
}
private static DatasetDiagnostics BuildDiagnostics(IReadOnlyList<SettingEntry> entries)
{
var duplicateBuckets = entries
.GroupBy(x => x.Id, StringComparer.OrdinalIgnoreCase)
.Where(group => group.Count() > 1)
.OrderByDescending(group => group.Count())
.ToDictionary(group => group.Key, group => group.Count(), StringComparer.OrdinalIgnoreCase);
return new DatasetDiagnostics
{
TotalEntries = entries.Count,
DistinctIds = entries.Select(x => x.Id).Distinct(StringComparer.OrdinalIgnoreCase).Count(),
DuplicateIdBucketCount = duplicateBuckets.Count,
DuplicateIdCounts = new ReadOnlyDictionary<string, int>(duplicateBuckets),
};
}
private static IReadOnlyList<EvaluationCase> GenerateFallbackCases(IReadOnlyList<SettingEntry> entries)
{
return entries
.Where(entry => !string.IsNullOrWhiteSpace(entry.Header) && !string.IsNullOrWhiteSpace(entry.Id))
.GroupBy(entry => entry.Id, StringComparer.OrdinalIgnoreCase)
.Select(group => group.First())
.Take(40)
.Select(entry => new EvaluationCase
{
Query = entry.Header,
ExpectedIds = new[] { entry.Id },
Notes = "Autogenerated case from index entry header.",
})
.ToArray();
}
private static string BuildFallbackHeader(string elementUid, string elementName, string pageTypeName)
{
var candidate = !string.IsNullOrWhiteSpace(elementUid)
? elementUid
: (!string.IsNullOrWhiteSpace(elementName) ? elementName : pageTypeName);
candidate = candidate.Replace('_', ' ').Trim();
candidate = ConsecutiveWhitespaceRegex().Replace(candidate, " ");
candidate = CamelBoundaryRegex().Replace(candidate, "$1 $2");
return candidate;
}
[GeneratedRegex(@"\s+")]
private static partial Regex ConsecutiveWhitespaceRegex();
[GeneratedRegex("([a-z0-9])([A-Z])")]
private static partial Regex CamelBoundaryRegex();
private sealed class RawSettingEntry
{
public EntryType Type { get; init; }
public string? Header { get; init; }
public string? PageTypeName { get; init; }
public string? ElementName { get; init; }
public string? ElementUid { get; init; }
public string? ParentElementName { get; init; }
public string? Description { get; init; }
public string? Icon { get; init; }
}
private sealed class RawEvaluationCase
{
public string? Query { get; init; }
public List<string>? ExpectedIds { get; init; }
public string? Notes { get; init; }
}
}

View File

@@ -0,0 +1,65 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace SettingsSearchEvaluation;
internal static class EvaluationMath
{
public static int FindBestRank(IReadOnlyList<string> rankedResultIds, IReadOnlySet<string> expectedIds)
{
ArgumentNullException.ThrowIfNull(rankedResultIds);
ArgumentNullException.ThrowIfNull(expectedIds);
if (expectedIds.Count == 0 || rankedResultIds.Count == 0)
{
return 0;
}
for (int index = 0; index < rankedResultIds.Count; index++)
{
if (expectedIds.Contains(rankedResultIds[index]))
{
return index + 1;
}
}
return 0;
}
public static LatencySummary ComputeLatencySummary(IReadOnlyList<double> samplesMs)
{
ArgumentNullException.ThrowIfNull(samplesMs);
if (samplesMs.Count == 0)
{
return LatencySummary.Empty;
}
var sorted = samplesMs.OrderBy(x => x).ToArray();
var total = samplesMs.Sum();
return new LatencySummary
{
Samples = sorted.Length,
MinMs = sorted[0],
P50Ms = Percentile(sorted, 0.50),
P95Ms = Percentile(sorted, 0.95),
MaxMs = sorted[^1],
AverageMs = total / sorted.Length,
};
}
private static double Percentile(IReadOnlyList<double> sortedSamples, double percentile)
{
if (sortedSamples.Count == 0)
{
return 0;
}
var clamped = Math.Clamp(percentile, 0, 1);
var rank = (int)Math.Ceiling(clamped * sortedSamples.Count) - 1;
rank = Math.Clamp(rank, 0, sortedSamples.Count - 1);
return sortedSamples[rank];
}
}

View File

@@ -0,0 +1,18 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace SettingsSearchEvaluation;
internal sealed class EvaluationReport
{
public required DateTimeOffset GeneratedAtUtc { get; init; }
public required string IndexJsonPath { get; init; }
public required DatasetDiagnostics Dataset { get; init; }
public required int CaseCount { get; init; }
public required IReadOnlyList<EngineEvaluationReport> Engines { get; init; }
}

View File

@@ -0,0 +1,292 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Diagnostics;
using Common.Search;
using Common.Search.FuzzSearch;
using Common.Search.SemanticSearch;
using Settings.UI.Library;
namespace SettingsSearchEvaluation;
internal static class Evaluator
{
public static async Task<EvaluationReport> RunAsync(
RunnerOptions options,
IReadOnlyList<SettingEntry> entries,
DatasetDiagnostics dataset,
IReadOnlyList<EvaluationCase> cases,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(options);
ArgumentNullException.ThrowIfNull(entries);
ArgumentNullException.ThrowIfNull(dataset);
ArgumentNullException.ThrowIfNull(cases);
var reports = new List<EngineEvaluationReport>(options.Engines.Count);
foreach (var engine in options.Engines)
{
cancellationToken.ThrowIfCancellationRequested();
reports.Add(engine switch
{
SearchEngineKind.Basic => await EvaluateBasicAsync(options, entries, cases, cancellationToken),
SearchEngineKind.Semantic => await EvaluateSemanticAsync(options, entries, cases, cancellationToken),
_ => throw new InvalidOperationException($"Unsupported engine '{engine}'."),
});
}
return new EvaluationReport
{
GeneratedAtUtc = DateTimeOffset.UtcNow,
IndexJsonPath = options.IndexJsonPath,
Dataset = dataset,
CaseCount = cases.Count,
Engines = reports,
};
}
private static async Task<EngineEvaluationReport> EvaluateBasicAsync(
RunnerOptions options,
IReadOnlyList<SettingEntry> entries,
IReadOnlyList<EvaluationCase> cases,
CancellationToken cancellationToken)
{
using var engine = new FuzzSearchEngine<SettingEntry>();
var indexingStopwatch = Stopwatch.StartNew();
await engine.InitializeAsync(cancellationToken);
await engine.IndexBatchAsync(entries, cancellationToken);
indexingStopwatch.Stop();
var metrics = await EvaluateQueryLoopAsync(
cases,
options,
(query, searchOptions, token) => engine.SearchAsync(query, searchOptions, token),
cancellationToken);
return new EngineEvaluationReport
{
Engine = SearchEngineKind.Basic,
IsAvailable = true,
AvailabilityError = null,
CapabilitiesSummary = "Fuzzy text search engine",
IndexedEntries = entries.Count,
QueryCount = cases.Count,
IndexingTimeMs = indexingStopwatch.Elapsed.TotalMilliseconds,
RecallAtK = metrics.RecallAtK,
Mrr = metrics.Mrr,
SearchLatencyMs = metrics.Latency,
CaseResults = metrics.CaseResults,
};
}
private static async Task<EngineEvaluationReport> EvaluateSemanticAsync(
RunnerOptions options,
IReadOnlyList<SettingEntry> entries,
IReadOnlyList<EvaluationCase> cases,
CancellationToken cancellationToken)
{
var indexName = $"PowerToys.Settings.Eval.{Environment.ProcessId}.{Guid.NewGuid():N}";
using var engine = new SemanticSearchEngine<SettingEntry>(indexName);
var initResult = await engine.InitializeWithResultAsync(cancellationToken);
if (initResult.IsFailure || !engine.IsReady)
{
return new EngineEvaluationReport
{
Engine = SearchEngineKind.Semantic,
IsAvailable = false,
AvailabilityError = FormatError(initResult.Error) ?? "Semantic engine is not ready.",
CapabilitiesSummary = null,
IndexedEntries = 0,
QueryCount = 0,
IndexingTimeMs = 0,
RecallAtK = 0,
Mrr = 0,
SearchLatencyMs = LatencySummary.Empty,
CaseResults = Array.Empty<QueryEvaluationResult>(),
};
}
var indexingStopwatch = Stopwatch.StartNew();
var indexResult = await engine.IndexBatchWithResultAsync(entries, cancellationToken);
if (indexResult.IsFailure)
{
return new EngineEvaluationReport
{
Engine = SearchEngineKind.Semantic,
IsAvailable = false,
AvailabilityError = FormatError(indexResult.Error) ?? "Semantic indexing failed.",
CapabilitiesSummary = BuildCapabilitiesSummary(engine.SemanticCapabilities),
IndexedEntries = 0,
QueryCount = 0,
IndexingTimeMs = indexingStopwatch.Elapsed.TotalMilliseconds,
RecallAtK = 0,
Mrr = 0,
SearchLatencyMs = LatencySummary.Empty,
CaseResults = Array.Empty<QueryEvaluationResult>(),
};
}
try
{
await engine.WaitForIndexingCompleteAsync(options.SemanticIndexTimeout);
}
catch (Exception ex)
{
return new EngineEvaluationReport
{
Engine = SearchEngineKind.Semantic,
IsAvailable = false,
AvailabilityError = $"Semantic indexing did not become idle: {ex.Message}",
CapabilitiesSummary = BuildCapabilitiesSummary(engine.SemanticCapabilities),
IndexedEntries = 0,
QueryCount = 0,
IndexingTimeMs = indexingStopwatch.Elapsed.TotalMilliseconds,
RecallAtK = 0,
Mrr = 0,
SearchLatencyMs = LatencySummary.Empty,
CaseResults = Array.Empty<QueryEvaluationResult>(),
};
}
indexingStopwatch.Stop();
var metrics = await EvaluateQueryLoopAsync(
cases,
options,
async (query, searchOptions, token) =>
{
var result = await engine.SearchWithResultAsync(query, searchOptions, token);
return result.Value ?? Array.Empty<SearchResult<SettingEntry>>();
},
cancellationToken);
return new EngineEvaluationReport
{
Engine = SearchEngineKind.Semantic,
IsAvailable = true,
AvailabilityError = null,
CapabilitiesSummary = BuildCapabilitiesSummary(engine.SemanticCapabilities),
IndexedEntries = entries.Count,
QueryCount = cases.Count,
IndexingTimeMs = indexingStopwatch.Elapsed.TotalMilliseconds,
RecallAtK = metrics.RecallAtK,
Mrr = metrics.Mrr,
SearchLatencyMs = metrics.Latency,
CaseResults = metrics.CaseResults,
};
}
private static async Task<QueryRunMetrics> EvaluateQueryLoopAsync(
IReadOnlyList<EvaluationCase> cases,
RunnerOptions options,
Func<string, SearchOptions, CancellationToken, Task<IReadOnlyList<SearchResult<SettingEntry>>>> searchAsync,
CancellationToken cancellationToken)
{
var caseResults = new List<QueryEvaluationResult>(cases.Count);
var latencySamples = new List<double>(Math.Max(1, cases.Count * options.Iterations));
var hits = 0;
var reciprocalRankSum = 0.0;
var searchOptions = new SearchOptions
{
MaxResults = options.MaxResults,
IncludeMatchSpans = false,
};
foreach (var queryCase in cases)
{
cancellationToken.ThrowIfCancellationRequested();
for (int warmup = 0; warmup < options.WarmupIterations; warmup++)
{
_ = await searchAsync(queryCase.Query, searchOptions, cancellationToken);
}
IReadOnlyList<SearchResult<SettingEntry>> firstMeasuredResult = Array.Empty<SearchResult<SettingEntry>>();
for (int iteration = 0; iteration < options.Iterations; iteration++)
{
var sw = Stopwatch.StartNew();
var queryResult = await searchAsync(queryCase.Query, searchOptions, cancellationToken);
sw.Stop();
latencySamples.Add(sw.Elapsed.TotalMilliseconds);
if (iteration == 0)
{
firstMeasuredResult = queryResult;
}
}
var rankedIds = firstMeasuredResult.Select(result => result.Item.Id).ToArray();
var expected = new HashSet<string>(queryCase.ExpectedIds, StringComparer.OrdinalIgnoreCase);
var bestRank = EvaluationMath.FindBestRank(rankedIds, expected);
var hit = bestRank > 0 && bestRank <= options.TopK;
if (hit)
{
hits++;
}
if (bestRank > 0)
{
reciprocalRankSum += 1.0 / bestRank;
}
caseResults.Add(new QueryEvaluationResult
{
Query = queryCase.Query,
ExpectedIds = queryCase.ExpectedIds,
TopResultIds = rankedIds.Take(options.TopK).ToArray(),
BestRank = bestRank,
HitAtK = hit,
Notes = queryCase.Notes,
});
}
var totalCases = Math.Max(1, cases.Count);
return new QueryRunMetrics
{
CaseResults = caseResults,
RecallAtK = hits / (double)totalCases,
Mrr = reciprocalRankSum / totalCases,
Latency = EvaluationMath.ComputeLatencySummary(latencySamples),
};
}
private static string? FormatError(SearchError? error)
{
if (error == null)
{
return null;
}
if (!string.IsNullOrWhiteSpace(error.Details))
{
return $"{error.Message} ({error.Details})";
}
return error.Message;
}
private static string BuildCapabilitiesSummary(SemanticSearchCapabilities? capabilities)
{
if (capabilities == null)
{
return "Capabilities unavailable";
}
return $"TextLexical={capabilities.TextLexicalAvailable}, TextSemantic={capabilities.TextSemanticAvailable}, ImageSemantic={capabilities.ImageSemanticAvailable}, ImageOcr={capabilities.ImageOcrAvailable}";
}
private sealed class QueryRunMetrics
{
public required IReadOnlyList<QueryEvaluationResult> CaseResults { get; init; }
public required double RecallAtK { get; init; }
public required double Mrr { get; init; }
public required LatencySummary Latency { get; init; }
}
}

View File

@@ -0,0 +1,22 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace SettingsSearchEvaluation;
internal sealed class LatencySummary
{
public int Samples { get; init; }
public double MinMs { get; init; }
public double P50Ms { get; init; }
public double P95Ms { get; init; }
public double MaxMs { get; init; }
public double AverageMs { get; init; }
public static LatencySummary Empty { get; } = new();
}

View File

@@ -0,0 +1,361 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Text.Json;
namespace SettingsSearchEvaluation;
internal static class Program
{
private static readonly JsonSerializerOptions OutputJsonOptions = new()
{
WriteIndented = true,
};
private static int Main(string[] args)
{
try
{
return MainAsync(args).GetAwaiter().GetResult();
}
catch (Exception ex)
{
Console.Error.WriteLine($"Unhandled error: {ex.Message}");
return 99;
}
}
private static async Task<int> MainAsync(string[] args)
{
if (args.Any(arg => string.Equals(arg, "--help", StringComparison.OrdinalIgnoreCase) || string.Equals(arg, "-h", StringComparison.OrdinalIgnoreCase)))
{
PrintUsage();
return 0;
}
if (!TryParseArgs(args, out var options, out var parseError))
{
Console.Error.WriteLine(parseError);
Console.Error.WriteLine();
PrintUsage();
return 2;
}
if (!File.Exists(options.IndexJsonPath))
{
Console.Error.WriteLine($"Index file not found: {options.IndexJsonPath}");
return 3;
}
if (!string.IsNullOrWhiteSpace(options.CasesJsonPath) && !File.Exists(options.CasesJsonPath))
{
Console.Error.WriteLine($"Cases file not found: {options.CasesJsonPath}");
return 3;
}
var (entries, dataset) = EvaluationDataLoader.LoadEntriesFromFile(options.IndexJsonPath);
var cases = EvaluationDataLoader.LoadCases(options.CasesJsonPath, entries);
if (cases.Count == 0)
{
Console.Error.WriteLine("No valid evaluation cases were found.");
return 3;
}
Console.WriteLine($"Loaded {entries.Count} entries from '{options.IndexJsonPath}'.");
Console.WriteLine($"Cases: {cases.Count}");
Console.WriteLine($"Duplicate id buckets: {dataset.DuplicateIdBucketCount}");
if (dataset.DuplicateIdBucketCount > 0)
{
var largest = dataset.DuplicateIdCounts
.OrderByDescending(x => x.Value)
.Take(5)
.Select(x => $"{x.Key} x{x.Value}");
Console.WriteLine($"Top duplicate ids: {string.Join(", ", largest)}");
}
var report = await Evaluator.RunAsync(options, entries, dataset, cases);
PrintSummary(report, options.TopK);
if (!string.IsNullOrWhiteSpace(options.OutputJsonPath))
{
var outputDirectory = Path.GetDirectoryName(options.OutputJsonPath);
if (!string.IsNullOrWhiteSpace(outputDirectory))
{
Directory.CreateDirectory(outputDirectory);
}
var json = JsonSerializer.Serialize(report, OutputJsonOptions);
File.WriteAllText(options.OutputJsonPath, json);
Console.WriteLine($"Wrote report to '{options.OutputJsonPath}'.");
}
return report.Engines.Any(engine => engine.IsAvailable) ? 0 : 4;
}
private static bool TryParseArgs(string[] args, out RunnerOptions options, out string error)
{
string defaultIndex = GetDefaultIndexPath();
string? indexPath = null;
string? casesPath = null;
string? outputPath = null;
var maxResults = 10;
var topK = 5;
var iterations = 5;
var warmup = 1;
var semanticTimeoutMs = 15000;
IReadOnlyList<SearchEngineKind> engines = new[] { SearchEngineKind.Basic, SearchEngineKind.Semantic };
for (int i = 0; i < args.Length; i++)
{
var arg = args[i];
switch (arg.ToLowerInvariant())
{
case "--index-json":
if (!TryReadValue(args, ref i, out indexPath))
{
options = null!;
error = "Missing value for --index-json";
return false;
}
break;
case "--cases-json":
if (!TryReadValue(args, ref i, out casesPath))
{
options = null!;
error = "Missing value for --cases-json";
return false;
}
break;
case "--output-json":
if (!TryReadValue(args, ref i, out outputPath))
{
options = null!;
error = "Missing value for --output-json";
return false;
}
break;
case "--engine":
if (!TryReadValue(args, ref i, out var engineText))
{
options = null!;
error = "Missing value for --engine";
return false;
}
if (!TryParseEngines(engineText!, out engines))
{
options = null!;
error = "Invalid --engine value. Allowed values: basic, semantic, both.";
return false;
}
break;
case "--max-results":
if (!TryReadInt(args, ref i, out maxResults) || maxResults <= 0)
{
options = null!;
error = "Invalid --max-results value. Must be a positive integer.";
return false;
}
break;
case "--top-k":
if (!TryReadInt(args, ref i, out topK) || topK <= 0)
{
options = null!;
error = "Invalid --top-k value. Must be a positive integer.";
return false;
}
break;
case "--iterations":
if (!TryReadInt(args, ref i, out iterations) || iterations <= 0)
{
options = null!;
error = "Invalid --iterations value. Must be a positive integer.";
return false;
}
break;
case "--warmup":
if (!TryReadInt(args, ref i, out warmup) || warmup < 0)
{
options = null!;
error = "Invalid --warmup value. Must be a non-negative integer.";
return false;
}
break;
case "--semantic-timeout-ms":
if (!TryReadInt(args, ref i, out semanticTimeoutMs) || semanticTimeoutMs <= 0)
{
options = null!;
error = "Invalid --semantic-timeout-ms value. Must be a positive integer.";
return false;
}
break;
default:
options = null!;
error = $"Unknown argument: {arg}";
return false;
}
}
options = new RunnerOptions
{
IndexJsonPath = Path.GetFullPath(indexPath ?? defaultIndex),
CasesJsonPath = string.IsNullOrWhiteSpace(casesPath) ? null : Path.GetFullPath(casesPath),
Engines = engines,
MaxResults = maxResults,
TopK = topK,
Iterations = iterations,
WarmupIterations = warmup,
SemanticIndexTimeout = TimeSpan.FromMilliseconds(semanticTimeoutMs),
OutputJsonPath = string.IsNullOrWhiteSpace(outputPath) ? null : Path.GetFullPath(outputPath),
};
error = string.Empty;
return true;
}
private static string GetDefaultIndexPath()
{
var repoRoot = FindRepoRoot(AppContext.BaseDirectory) ?? Environment.CurrentDirectory;
return Path.GetFullPath(Path.Combine(repoRoot, "src", "settings-ui", "Settings.UI", "Assets", "Settings", "search.index.json"));
}
private static string? FindRepoRoot(string startingDirectory)
{
var current = new DirectoryInfo(startingDirectory);
while (current != null)
{
var markerPath = Path.Combine(current.FullName, "PowerToys.slnx");
if (File.Exists(markerPath))
{
return current.FullName;
}
current = current.Parent;
}
return null;
}
private static bool TryParseEngines(string value, out IReadOnlyList<SearchEngineKind> engines)
{
if (string.Equals(value, "both", StringComparison.OrdinalIgnoreCase))
{
engines = new[] { SearchEngineKind.Basic, SearchEngineKind.Semantic };
return true;
}
if (string.Equals(value, "basic", StringComparison.OrdinalIgnoreCase))
{
engines = new[] { SearchEngineKind.Basic };
return true;
}
if (string.Equals(value, "semantic", StringComparison.OrdinalIgnoreCase))
{
engines = new[] { SearchEngineKind.Semantic };
return true;
}
engines = Array.Empty<SearchEngineKind>();
return false;
}
private static bool TryReadValue(string[] args, ref int index, out string? value)
{
if (index + 1 >= args.Length)
{
value = null;
return false;
}
index++;
value = args[index];
return true;
}
private static bool TryReadInt(string[] args, ref int index, out int value)
{
value = 0;
if (!TryReadValue(args, ref index, out var text))
{
return false;
}
return int.TryParse(text, out value);
}
private static void PrintSummary(EvaluationReport report, int topK)
{
Console.WriteLine();
Console.WriteLine("=== Evaluation Summary ===");
Console.WriteLine($"Generated: {report.GeneratedAtUtc:O}");
Console.WriteLine($"Dataset entries: {report.Dataset.TotalEntries} ({report.Dataset.DistinctIds} distinct ids)");
Console.WriteLine($"Cases: {report.CaseCount}");
Console.WriteLine();
foreach (var engine in report.Engines)
{
Console.WriteLine($"[{engine.Engine}]");
if (!engine.IsAvailable)
{
Console.WriteLine($" Unavailable: {engine.AvailabilityError}");
Console.WriteLine();
continue;
}
Console.WriteLine($" Capabilities: {engine.CapabilitiesSummary}");
Console.WriteLine($" Indexed entries: {engine.IndexedEntries}");
Console.WriteLine($" Indexing time (ms): {engine.IndexingTimeMs:F2}");
Console.WriteLine($" Recall@{topK}: {engine.RecallAtK:F4}");
Console.WriteLine($" MRR: {engine.Mrr:F4}");
Console.WriteLine($" Search latency ms (avg/p50/p95/max): {engine.SearchLatencyMs.AverageMs:F2}/{engine.SearchLatencyMs.P50Ms:F2}/{engine.SearchLatencyMs.P95Ms:F2}/{engine.SearchLatencyMs.MaxMs:F2}");
var misses = engine.CaseResults
.Where(result => !result.HitAtK)
.Take(3)
.ToList();
if (misses.Count > 0)
{
Console.WriteLine(" Sample misses:");
foreach (var miss in misses)
{
var top = miss.TopResultIds.Count == 0 ? "(none)" : string.Join(", ", miss.TopResultIds);
Console.WriteLine($" Query='{miss.Query}', expected='{string.Join("|", miss.ExpectedIds)}', top='{top}'");
}
}
Console.WriteLine();
}
}
private static void PrintUsage()
{
Console.WriteLine("SettingsSearchEvaluation");
Console.WriteLine("Evaluates basic and semantic settings search for recall and performance.");
Console.WriteLine();
Console.WriteLine("Usage:");
Console.WriteLine(" SettingsSearchEvaluation [options]");
Console.WriteLine();
Console.WriteLine("Options:");
Console.WriteLine(" --index-json <path> Path to settings search index JSON.");
Console.WriteLine(" --cases-json <path> Optional path to evaluation cases JSON.");
Console.WriteLine(" --engine <basic|semantic|both> Engine selection. Default: both.");
Console.WriteLine(" --max-results <n> Maximum returned results per query. Default: 10.");
Console.WriteLine(" --top-k <n> Recall cut-off K. Default: 5.");
Console.WriteLine(" --iterations <n> Measured runs per query. Default: 5.");
Console.WriteLine(" --warmup <n> Warmup runs per query. Default: 1.");
Console.WriteLine(" --semantic-timeout-ms <n> Semantic index idle wait timeout in ms. Default: 15000.");
Console.WriteLine(" --output-json <path> Optional output report file.");
Console.WriteLine(" --help Show this help.");
}
}

View File

@@ -0,0 +1,7 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("SettingsSearchEvaluation.Tests")]

View File

@@ -0,0 +1,20 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace SettingsSearchEvaluation;
internal sealed class QueryEvaluationResult
{
public required string Query { get; init; }
public required IReadOnlyList<string> ExpectedIds { get; init; }
public required IReadOnlyList<string> TopResultIds { get; init; }
public required int BestRank { get; init; }
public required bool HitAtK { get; init; }
public string? Notes { get; init; }
}

View File

@@ -0,0 +1,51 @@
# Settings Search Evaluation
This tool evaluates Settings search quality and latency for:
- `basic` search (`FuzzSearchEngine`)
- `semantic` search (`SemanticSearchEngine`)
It reports:
- `Recall@K`
- `MRR` (mean reciprocal rank)
- Search latency (`avg`, `p50`, `p95`, `max`)
- Dataset diagnostics including duplicate `SettingEntry.Id` buckets
## Run
Build with Visual Studio `MSBuild.exe` (the project references native components):
```powershell
$vswhere = "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe"
$msbuild = & $vswhere -latest -products * -requires Microsoft.Component.MSBuild -find MSBuild\**\Bin\MSBuild.exe
& $msbuild tools\SettingsSearchEvaluation\SettingsSearchEvaluation.csproj `
/t:Build /p:Configuration=Debug /p:Platform=arm64 /m:1 /nologo
```
Run the built executable:
```powershell
.\tools\SettingsSearchEvaluation\bin\arm64\Debug\net9.0-windows10.0.26100.0\SettingsSearchEvaluation.exe `
--index-json src/settings-ui/Settings.UI/Assets/Settings/search.index.json `
--cases-json tools/SettingsSearchEvaluation/cases/settings-search-cases.sample.json `
--engine both `
--top-k 5 `
--iterations 5 `
--warmup 1 `
--output-json tools/SettingsSearchEvaluation/artifacts/report.json
```
## Case file format
```json
[
{
"query": "color picker",
"expectedIds": ["ColorPicker"],
"notes": "Module entry"
}
]
```
If `--cases-json` is not provided, fallback cases are auto-generated from the index headers.

View File

@@ -0,0 +1,26 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace SettingsSearchEvaluation;
internal sealed class RunnerOptions
{
public required string IndexJsonPath { get; init; }
public string? CasesJsonPath { get; init; }
public required IReadOnlyList<SearchEngineKind> Engines { get; init; }
public int MaxResults { get; init; } = 10;
public int TopK { get; init; } = 5;
public int Iterations { get; init; } = 5;
public int WarmupIterations { get; init; } = 1;
public TimeSpan SemanticIndexTimeout { get; init; } = TimeSpan.FromSeconds(15);
public string? OutputJsonPath { get; init; }
}

View File

@@ -0,0 +1,11 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace SettingsSearchEvaluation;
internal enum SearchEngineKind
{
Basic,
Semantic,
}

View File

@@ -0,0 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">
<!-- Look at Directory.Build.props in root for common stuff as well -->
<Import Project="..\..\src\Common.Dotnet.CsWinRT.props" />
<PropertyGroup>
<OutputType>Exe</OutputType>
<RootNamespace>SettingsSearchEvaluation</RootNamespace>
<AssemblyName>SettingsSearchEvaluation</AssemblyName>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<DefaultItemExcludes>$(DefaultItemExcludes);artifacts\**\*;bin\**\*;obj\**\*</DefaultItemExcludes>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\common\Common.Search\Common.Search.csproj" />
<ProjectReference Include="..\..\src\settings-ui\Settings.UI.Library\Settings.UI.Library.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,72 @@
[
{
"query": "always on top",
"expectedIds": [ "AlwaysOnTop" ],
"notes": "Module entry"
},
{
"query": "awake mode",
"expectedIds": [ "Awake_ModeSettingsCard" ],
"notes": "Feature setting"
},
{
"query": "color picker",
"expectedIds": [ "ColorPicker" ],
"notes": "Module entry"
},
{
"query": "fancy zones",
"expectedIds": [ "FancyZones" ],
"notes": "Module entry"
},
{
"query": "launch cmd pal",
"expectedIds": [ "CmdPal_Launch" ],
"notes": "CmdPal launch setting"
},
{
"query": "image resizer",
"expectedIds": [ "ImageResizer" ],
"notes": "Module entry"
},
{
"query": "keyboard manager",
"expectedIds": [ "KeyboardManager" ],
"notes": "Module entry"
},
{
"query": "mouse jump",
"expectedIds": [ "MouseUtils_Enable_MouseJump" ],
"notes": "Module entry"
},
{
"query": "mouse without borders",
"expectedIds": [ "MouseWithoutBorders" ],
"notes": "Module setting"
},
{
"query": "peek",
"expectedIds": [ "Peek" ],
"notes": "Module entry"
},
{
"query": "power rename",
"expectedIds": [ "PowerRename" ],
"notes": "Module entry"
},
{
"query": "power toys run",
"expectedIds": [ "PowerLauncher" ],
"notes": "Module entry"
},
{
"query": "registry preview",
"expectedIds": [ "RegistryPreview" ],
"notes": "Module entry"
},
{
"query": "workspaces",
"expectedIds": [ "Workspaces_EnableToggleControl_HeaderText" ],
"notes": "Module setting"
}
]