mirror of
https://github.com/microsoft/PowerToys.git
synced 2026-02-24 04:00:02 +01:00
evaluation for the semantic search
This commit is contained in:
@@ -1059,6 +1059,16 @@
|
||||
<Platform Solution="*|x64" Project="x64" />
|
||||
</Project>
|
||||
</Folder>
|
||||
<Folder Name="/tools/SettingsSearchEvaluation/">
|
||||
<Project Path="tools/SettingsSearchEvaluation/SettingsSearchEvaluation.csproj">
|
||||
<Platform Solution="*|ARM64" Project="ARM64" />
|
||||
<Platform Solution="*|x64" Project="x64" />
|
||||
</Project>
|
||||
<Project Path="tools/SettingsSearchEvaluation.Tests/SettingsSearchEvaluation.Tests.csproj">
|
||||
<Platform Solution="*|ARM64" Project="ARM64" />
|
||||
<Platform Solution="*|x64" Project="x64" />
|
||||
</Project>
|
||||
</Folder>
|
||||
<Folder Name="/Solution Items/">
|
||||
<File Path=".vsconfig" />
|
||||
<File Path="Cpp.Build.props" />
|
||||
|
||||
@@ -0,0 +1,130 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using System.IO;
|
||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||
|
||||
namespace SettingsSearchEvaluation.Tests;
|
||||
|
||||
[TestClass]
|
||||
public class EvaluationDataLoaderTests
|
||||
{
|
||||
[TestMethod]
|
||||
public void LoadEntriesFromJson_NormalizesHeaderAndDetectsDuplicates()
|
||||
{
|
||||
const string json = """
|
||||
[
|
||||
{
|
||||
"type": 0,
|
||||
"header": null,
|
||||
"pageTypeName": "ColorPickerPage",
|
||||
"elementName": "",
|
||||
"elementUid": "Activation_Shortcut",
|
||||
"parentElementName": "",
|
||||
"description": null,
|
||||
"icon": null
|
||||
},
|
||||
{
|
||||
"type": 0,
|
||||
"header": null,
|
||||
"pageTypeName": "FancyZonesPage",
|
||||
"elementName": "",
|
||||
"elementUid": "Activation_Shortcut",
|
||||
"parentElementName": "",
|
||||
"description": null,
|
||||
"icon": null
|
||||
}
|
||||
]
|
||||
""";
|
||||
|
||||
var (entries, diagnostics) = EvaluationDataLoader.LoadEntriesFromJson(json);
|
||||
|
||||
Assert.AreEqual(2, entries.Count);
|
||||
Assert.AreEqual("Activation Shortcut", entries[0].Header);
|
||||
Assert.AreEqual(1, diagnostics.DuplicateIdBucketCount);
|
||||
Assert.IsTrue(diagnostics.DuplicateIdCounts.TryGetValue("Activation_Shortcut", out var count));
|
||||
Assert.AreEqual(2, count);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void LoadCases_GeneratesFallbackCases_WhenNoCasesFileSpecified()
|
||||
{
|
||||
const string json = """
|
||||
[
|
||||
{
|
||||
"type": 0,
|
||||
"header": "Fancy Zones",
|
||||
"pageTypeName": "FancyZonesPage",
|
||||
"elementName": "",
|
||||
"elementUid": "FancyZones",
|
||||
"parentElementName": "",
|
||||
"description": "",
|
||||
"icon": null
|
||||
}
|
||||
]
|
||||
""";
|
||||
|
||||
var (entries, _) = EvaluationDataLoader.LoadEntriesFromJson(json);
|
||||
var cases = EvaluationDataLoader.LoadCases(null, entries);
|
||||
|
||||
Assert.AreEqual(1, cases.Count);
|
||||
Assert.AreEqual("Fancy Zones", cases[0].Query);
|
||||
Assert.AreEqual("FancyZones", cases[0].ExpectedIds[0]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void LoadCases_LoadsAndNormalizesCasesFile()
|
||||
{
|
||||
const string entriesJson = """
|
||||
[
|
||||
{
|
||||
"type": 0,
|
||||
"header": "Fancy Zones",
|
||||
"pageTypeName": "FancyZonesPage",
|
||||
"elementName": "",
|
||||
"elementUid": "FancyZones",
|
||||
"parentElementName": "",
|
||||
"description": "",
|
||||
"icon": null
|
||||
}
|
||||
]
|
||||
""";
|
||||
|
||||
const string casesJson = """
|
||||
[
|
||||
{
|
||||
"query": " fancy zones ",
|
||||
"expectedIds": [ "FancyZones", " fancyzones ", "" ],
|
||||
"notes": "normalization test"
|
||||
},
|
||||
{
|
||||
"query": "",
|
||||
"expectedIds": [ "FancyZones" ]
|
||||
},
|
||||
{
|
||||
"query": "missing expected",
|
||||
"expectedIds": [ "" ]
|
||||
}
|
||||
]
|
||||
""";
|
||||
|
||||
var (entries, _) = EvaluationDataLoader.LoadEntriesFromJson(entriesJson);
|
||||
var casesFile = Path.GetTempFileName();
|
||||
try
|
||||
{
|
||||
File.WriteAllText(casesFile, casesJson);
|
||||
var cases = EvaluationDataLoader.LoadCases(casesFile, entries);
|
||||
|
||||
Assert.AreEqual(1, cases.Count);
|
||||
Assert.AreEqual("fancy zones", cases[0].Query);
|
||||
Assert.AreEqual(1, cases[0].ExpectedIds.Count);
|
||||
Assert.AreEqual("FancyZones", cases[0].ExpectedIds[0]);
|
||||
Assert.AreEqual("normalization test", cases[0].Notes);
|
||||
}
|
||||
finally
|
||||
{
|
||||
File.Delete(casesFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
50
tools/SettingsSearchEvaluation.Tests/EvaluationMathTests.cs
Normal file
50
tools/SettingsSearchEvaluation.Tests/EvaluationMathTests.cs
Normal file
@@ -0,0 +1,50 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||
|
||||
namespace SettingsSearchEvaluation.Tests;
|
||||
|
||||
[TestClass]
|
||||
public class EvaluationMathTests
|
||||
{
|
||||
private static readonly double[] LatencySamples = { 10.0, 20.0, 30.0, 40.0, 50.0 };
|
||||
|
||||
[TestMethod]
|
||||
public void FindBestRank_ReturnsExpectedRank()
|
||||
{
|
||||
var ranked = new[] { "a", "b", "c", "d" };
|
||||
var expected = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "c" };
|
||||
|
||||
var rank = EvaluationMath.FindBestRank(ranked, expected);
|
||||
|
||||
Assert.AreEqual(3, rank);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void FindBestRank_ReturnsZero_WhenMissing()
|
||||
{
|
||||
var ranked = new[] { "a", "b", "c", "d" };
|
||||
var expected = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "x", "y" };
|
||||
|
||||
var rank = EvaluationMath.FindBestRank(ranked, expected);
|
||||
|
||||
Assert.AreEqual(0, rank);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void ComputeLatencySummary_ComputesQuantiles()
|
||||
{
|
||||
var summary = EvaluationMath.ComputeLatencySummary(LatencySamples);
|
||||
|
||||
Assert.AreEqual(5, summary.Samples);
|
||||
Assert.AreEqual(10.0, summary.MinMs);
|
||||
Assert.AreEqual(30.0, summary.P50Ms);
|
||||
Assert.AreEqual(50.0, summary.P95Ms);
|
||||
Assert.AreEqual(50.0, summary.MaxMs);
|
||||
Assert.AreEqual(30.0, summary.AverageMs, 0.0001);
|
||||
}
|
||||
}
|
||||
69
tools/SettingsSearchEvaluation.Tests/EvaluatorTests.cs
Normal file
69
tools/SettingsSearchEvaluation.Tests/EvaluatorTests.cs
Normal file
@@ -0,0 +1,69 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using System;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||
|
||||
namespace SettingsSearchEvaluation.Tests;
|
||||
|
||||
[TestClass]
|
||||
public class EvaluatorTests
|
||||
{
|
||||
[TestMethod]
|
||||
public async Task RunAsync_BasicEngine_ReturnsExpectedMetricsForExactSingleEntry()
|
||||
{
|
||||
const string json = """
|
||||
[
|
||||
{
|
||||
"type": 0,
|
||||
"header": "Fancy Zones",
|
||||
"pageTypeName": "FancyZonesPage",
|
||||
"elementName": "",
|
||||
"elementUid": "FancyZones",
|
||||
"parentElementName": "",
|
||||
"description": "",
|
||||
"icon": null
|
||||
}
|
||||
]
|
||||
""";
|
||||
|
||||
var (entries, diagnostics) = EvaluationDataLoader.LoadEntriesFromJson(json);
|
||||
var cases = new[]
|
||||
{
|
||||
new EvaluationCase
|
||||
{
|
||||
Query = "Fancy Zones",
|
||||
ExpectedIds = new[] { "FancyZones" },
|
||||
Notes = "Exact query should be rank 1.",
|
||||
},
|
||||
};
|
||||
|
||||
var options = new RunnerOptions
|
||||
{
|
||||
IndexJsonPath = "test-index.json",
|
||||
CasesJsonPath = null,
|
||||
Engines = new[] { SearchEngineKind.Basic },
|
||||
MaxResults = 5,
|
||||
TopK = 5,
|
||||
Iterations = 1,
|
||||
WarmupIterations = 0,
|
||||
SemanticIndexTimeout = TimeSpan.FromSeconds(1),
|
||||
OutputJsonPath = null,
|
||||
};
|
||||
|
||||
var report = await Evaluator.RunAsync(options, entries, diagnostics, cases);
|
||||
|
||||
Assert.AreEqual(1, report.Engines.Count);
|
||||
var engine = report.Engines[0];
|
||||
Assert.AreEqual(SearchEngineKind.Basic, engine.Engine);
|
||||
Assert.IsTrue(engine.IsAvailable);
|
||||
Assert.AreEqual(1, engine.QueryCount);
|
||||
Assert.AreEqual(1.0, engine.RecallAtK, 0.0001);
|
||||
Assert.AreEqual(1.0, engine.Mrr, 0.0001);
|
||||
Assert.AreEqual(1, engine.CaseResults.Count);
|
||||
Assert.IsTrue(engine.CaseResults[0].HitAtK);
|
||||
Assert.AreEqual(1, engine.CaseResults[0].BestRank);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<!-- Look at Directory.Build.props in root for common stuff as well -->
|
||||
<Import Project="..\..\src\Common.Dotnet.CsWinRT.props" />
|
||||
|
||||
<PropertyGroup>
|
||||
<IsPackable>false</IsPackable>
|
||||
<OutputPath>..\..\$(Configuration)\$(Platform)\tests\SettingsSearchEvaluationTests\</OutputPath>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="MSTest" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\SettingsSearchEvaluation\SettingsSearchEvaluation.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
26
tools/SettingsSearchEvaluation/DatasetDiagnostics.cs
Normal file
26
tools/SettingsSearchEvaluation/DatasetDiagnostics.cs
Normal file
@@ -0,0 +1,26 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using System.Collections.ObjectModel;
|
||||
|
||||
namespace SettingsSearchEvaluation;
|
||||
|
||||
internal sealed class DatasetDiagnostics
|
||||
{
|
||||
public required int TotalEntries { get; init; }
|
||||
|
||||
public required int DistinctIds { get; init; }
|
||||
|
||||
public required int DuplicateIdBucketCount { get; init; }
|
||||
|
||||
public required IReadOnlyDictionary<string, int> DuplicateIdCounts { get; init; }
|
||||
|
||||
public static DatasetDiagnostics Empty { get; } = new()
|
||||
{
|
||||
TotalEntries = 0,
|
||||
DistinctIds = 0,
|
||||
DuplicateIdBucketCount = 0,
|
||||
DuplicateIdCounts = new ReadOnlyDictionary<string, int>(new Dictionary<string, int>()),
|
||||
};
|
||||
}
|
||||
30
tools/SettingsSearchEvaluation/EngineEvaluationReport.cs
Normal file
30
tools/SettingsSearchEvaluation/EngineEvaluationReport.cs
Normal file
@@ -0,0 +1,30 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
namespace SettingsSearchEvaluation;
|
||||
|
||||
internal sealed class EngineEvaluationReport
|
||||
{
|
||||
public required SearchEngineKind Engine { get; init; }
|
||||
|
||||
public required bool IsAvailable { get; init; }
|
||||
|
||||
public string? AvailabilityError { get; init; }
|
||||
|
||||
public string? CapabilitiesSummary { get; init; }
|
||||
|
||||
public int IndexedEntries { get; init; }
|
||||
|
||||
public int QueryCount { get; init; }
|
||||
|
||||
public double IndexingTimeMs { get; init; }
|
||||
|
||||
public double RecallAtK { get; init; }
|
||||
|
||||
public double Mrr { get; init; }
|
||||
|
||||
public required LatencySummary SearchLatencyMs { get; init; }
|
||||
|
||||
public required IReadOnlyList<QueryEvaluationResult> CaseResults { get; init; }
|
||||
}
|
||||
14
tools/SettingsSearchEvaluation/EvaluationCase.cs
Normal file
14
tools/SettingsSearchEvaluation/EvaluationCase.cs
Normal file
@@ -0,0 +1,14 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
namespace SettingsSearchEvaluation;
|
||||
|
||||
internal sealed class EvaluationCase
|
||||
{
|
||||
public required string Query { get; init; }
|
||||
|
||||
public required IReadOnlyList<string> ExpectedIds { get; init; }
|
||||
|
||||
public string? Notes { get; init; }
|
||||
}
|
||||
180
tools/SettingsSearchEvaluation/EvaluationDataLoader.cs
Normal file
180
tools/SettingsSearchEvaluation/EvaluationDataLoader.cs
Normal file
@@ -0,0 +1,180 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using System.Collections.ObjectModel;
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
using Settings.UI.Library;
|
||||
|
||||
namespace SettingsSearchEvaluation;
|
||||
|
||||
internal static partial class EvaluationDataLoader
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
ReadCommentHandling = JsonCommentHandling.Skip,
|
||||
};
|
||||
|
||||
public static (IReadOnlyList<SettingEntry> Entries, DatasetDiagnostics Diagnostics) LoadEntriesFromFile(string path)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(path);
|
||||
var json = File.ReadAllText(path);
|
||||
return LoadEntriesFromJson(json);
|
||||
}
|
||||
|
||||
public static (IReadOnlyList<SettingEntry> Entries, DatasetDiagnostics Diagnostics) LoadEntriesFromJson(string json)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(json))
|
||||
{
|
||||
return (Array.Empty<SettingEntry>(), DatasetDiagnostics.Empty);
|
||||
}
|
||||
|
||||
var rawEntries = JsonSerializer.Deserialize<List<RawSettingEntry>>(json, JsonOptions) ?? new List<RawSettingEntry>();
|
||||
var normalized = new List<SettingEntry>(rawEntries.Count);
|
||||
|
||||
foreach (var raw in rawEntries)
|
||||
{
|
||||
var pageType = raw.PageTypeName?.Trim() ?? string.Empty;
|
||||
var elementName = raw.ElementName?.Trim() ?? string.Empty;
|
||||
var elementUid = raw.ElementUid?.Trim() ?? string.Empty;
|
||||
|
||||
if (string.IsNullOrEmpty(elementUid))
|
||||
{
|
||||
elementUid = $"{pageType}|{elementName}";
|
||||
}
|
||||
|
||||
var header = raw.Header?.Trim();
|
||||
if (string.IsNullOrEmpty(header))
|
||||
{
|
||||
header = BuildFallbackHeader(elementUid, elementName, pageType);
|
||||
}
|
||||
|
||||
var description = raw.Description?.Trim() ?? string.Empty;
|
||||
var parent = raw.ParentElementName?.Trim() ?? string.Empty;
|
||||
var icon = raw.Icon?.Trim() ?? string.Empty;
|
||||
|
||||
normalized.Add(new SettingEntry(
|
||||
raw.Type,
|
||||
header,
|
||||
pageType,
|
||||
elementName,
|
||||
elementUid,
|
||||
parent,
|
||||
description,
|
||||
icon));
|
||||
}
|
||||
|
||||
return (normalized, BuildDiagnostics(normalized));
|
||||
}
|
||||
|
||||
public static IReadOnlyList<EvaluationCase> LoadCases(string? casesPath, IReadOnlyList<SettingEntry> entries)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(casesPath))
|
||||
{
|
||||
var json = File.ReadAllText(casesPath);
|
||||
var parsed = JsonSerializer.Deserialize<List<RawEvaluationCase>>(json, JsonOptions) ?? new List<RawEvaluationCase>();
|
||||
var normalized = parsed
|
||||
.Where(c => !string.IsNullOrWhiteSpace(c.Query))
|
||||
.Select(c => new EvaluationCase
|
||||
{
|
||||
Query = c.Query!.Trim(),
|
||||
ExpectedIds = c.ExpectedIds?
|
||||
.Where(id => !string.IsNullOrWhiteSpace(id))
|
||||
.Select(id => id.Trim())
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.ToArray() ?? Array.Empty<string>(),
|
||||
Notes = c.Notes,
|
||||
})
|
||||
.Where(c => c.ExpectedIds.Count > 0)
|
||||
.ToList();
|
||||
|
||||
if (normalized.Count > 0)
|
||||
{
|
||||
return normalized;
|
||||
}
|
||||
}
|
||||
|
||||
return GenerateFallbackCases(entries);
|
||||
}
|
||||
|
||||
private static DatasetDiagnostics BuildDiagnostics(IReadOnlyList<SettingEntry> entries)
|
||||
{
|
||||
var duplicateBuckets = entries
|
||||
.GroupBy(x => x.Id, StringComparer.OrdinalIgnoreCase)
|
||||
.Where(group => group.Count() > 1)
|
||||
.OrderByDescending(group => group.Count())
|
||||
.ToDictionary(group => group.Key, group => group.Count(), StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
return new DatasetDiagnostics
|
||||
{
|
||||
TotalEntries = entries.Count,
|
||||
DistinctIds = entries.Select(x => x.Id).Distinct(StringComparer.OrdinalIgnoreCase).Count(),
|
||||
DuplicateIdBucketCount = duplicateBuckets.Count,
|
||||
DuplicateIdCounts = new ReadOnlyDictionary<string, int>(duplicateBuckets),
|
||||
};
|
||||
}
|
||||
|
||||
private static IReadOnlyList<EvaluationCase> GenerateFallbackCases(IReadOnlyList<SettingEntry> entries)
|
||||
{
|
||||
return entries
|
||||
.Where(entry => !string.IsNullOrWhiteSpace(entry.Header) && !string.IsNullOrWhiteSpace(entry.Id))
|
||||
.GroupBy(entry => entry.Id, StringComparer.OrdinalIgnoreCase)
|
||||
.Select(group => group.First())
|
||||
.Take(40)
|
||||
.Select(entry => new EvaluationCase
|
||||
{
|
||||
Query = entry.Header,
|
||||
ExpectedIds = new[] { entry.Id },
|
||||
Notes = "Autogenerated case from index entry header.",
|
||||
})
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
private static string BuildFallbackHeader(string elementUid, string elementName, string pageTypeName)
|
||||
{
|
||||
var candidate = !string.IsNullOrWhiteSpace(elementUid)
|
||||
? elementUid
|
||||
: (!string.IsNullOrWhiteSpace(elementName) ? elementName : pageTypeName);
|
||||
|
||||
candidate = candidate.Replace('_', ' ').Trim();
|
||||
candidate = ConsecutiveWhitespaceRegex().Replace(candidate, " ");
|
||||
candidate = CamelBoundaryRegex().Replace(candidate, "$1 $2");
|
||||
return candidate;
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"\s+")]
|
||||
private static partial Regex ConsecutiveWhitespaceRegex();
|
||||
|
||||
[GeneratedRegex("([a-z0-9])([A-Z])")]
|
||||
private static partial Regex CamelBoundaryRegex();
|
||||
|
||||
private sealed class RawSettingEntry
|
||||
{
|
||||
public EntryType Type { get; init; }
|
||||
|
||||
public string? Header { get; init; }
|
||||
|
||||
public string? PageTypeName { get; init; }
|
||||
|
||||
public string? ElementName { get; init; }
|
||||
|
||||
public string? ElementUid { get; init; }
|
||||
|
||||
public string? ParentElementName { get; init; }
|
||||
|
||||
public string? Description { get; init; }
|
||||
|
||||
public string? Icon { get; init; }
|
||||
}
|
||||
|
||||
private sealed class RawEvaluationCase
|
||||
{
|
||||
public string? Query { get; init; }
|
||||
|
||||
public List<string>? ExpectedIds { get; init; }
|
||||
|
||||
public string? Notes { get; init; }
|
||||
}
|
||||
}
|
||||
65
tools/SettingsSearchEvaluation/EvaluationMath.cs
Normal file
65
tools/SettingsSearchEvaluation/EvaluationMath.cs
Normal file
@@ -0,0 +1,65 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
namespace SettingsSearchEvaluation;
|
||||
|
||||
internal static class EvaluationMath
|
||||
{
|
||||
public static int FindBestRank(IReadOnlyList<string> rankedResultIds, IReadOnlySet<string> expectedIds)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(rankedResultIds);
|
||||
ArgumentNullException.ThrowIfNull(expectedIds);
|
||||
|
||||
if (expectedIds.Count == 0 || rankedResultIds.Count == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (int index = 0; index < rankedResultIds.Count; index++)
|
||||
{
|
||||
if (expectedIds.Contains(rankedResultIds[index]))
|
||||
{
|
||||
return index + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
public static LatencySummary ComputeLatencySummary(IReadOnlyList<double> samplesMs)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(samplesMs);
|
||||
|
||||
if (samplesMs.Count == 0)
|
||||
{
|
||||
return LatencySummary.Empty;
|
||||
}
|
||||
|
||||
var sorted = samplesMs.OrderBy(x => x).ToArray();
|
||||
var total = samplesMs.Sum();
|
||||
|
||||
return new LatencySummary
|
||||
{
|
||||
Samples = sorted.Length,
|
||||
MinMs = sorted[0],
|
||||
P50Ms = Percentile(sorted, 0.50),
|
||||
P95Ms = Percentile(sorted, 0.95),
|
||||
MaxMs = sorted[^1],
|
||||
AverageMs = total / sorted.Length,
|
||||
};
|
||||
}
|
||||
|
||||
private static double Percentile(IReadOnlyList<double> sortedSamples, double percentile)
|
||||
{
|
||||
if (sortedSamples.Count == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
var clamped = Math.Clamp(percentile, 0, 1);
|
||||
var rank = (int)Math.Ceiling(clamped * sortedSamples.Count) - 1;
|
||||
rank = Math.Clamp(rank, 0, sortedSamples.Count - 1);
|
||||
return sortedSamples[rank];
|
||||
}
|
||||
}
|
||||
18
tools/SettingsSearchEvaluation/EvaluationReport.cs
Normal file
18
tools/SettingsSearchEvaluation/EvaluationReport.cs
Normal file
@@ -0,0 +1,18 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
namespace SettingsSearchEvaluation;
|
||||
|
||||
internal sealed class EvaluationReport
|
||||
{
|
||||
public required DateTimeOffset GeneratedAtUtc { get; init; }
|
||||
|
||||
public required string IndexJsonPath { get; init; }
|
||||
|
||||
public required DatasetDiagnostics Dataset { get; init; }
|
||||
|
||||
public required int CaseCount { get; init; }
|
||||
|
||||
public required IReadOnlyList<EngineEvaluationReport> Engines { get; init; }
|
||||
}
|
||||
292
tools/SettingsSearchEvaluation/Evaluator.cs
Normal file
292
tools/SettingsSearchEvaluation/Evaluator.cs
Normal file
@@ -0,0 +1,292 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using System.Diagnostics;
|
||||
using Common.Search;
|
||||
using Common.Search.FuzzSearch;
|
||||
using Common.Search.SemanticSearch;
|
||||
using Settings.UI.Library;
|
||||
|
||||
namespace SettingsSearchEvaluation;
|
||||
|
||||
internal static class Evaluator
|
||||
{
|
||||
public static async Task<EvaluationReport> RunAsync(
|
||||
RunnerOptions options,
|
||||
IReadOnlyList<SettingEntry> entries,
|
||||
DatasetDiagnostics dataset,
|
||||
IReadOnlyList<EvaluationCase> cases,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
ArgumentNullException.ThrowIfNull(entries);
|
||||
ArgumentNullException.ThrowIfNull(dataset);
|
||||
ArgumentNullException.ThrowIfNull(cases);
|
||||
|
||||
var reports = new List<EngineEvaluationReport>(options.Engines.Count);
|
||||
foreach (var engine in options.Engines)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
reports.Add(engine switch
|
||||
{
|
||||
SearchEngineKind.Basic => await EvaluateBasicAsync(options, entries, cases, cancellationToken),
|
||||
SearchEngineKind.Semantic => await EvaluateSemanticAsync(options, entries, cases, cancellationToken),
|
||||
_ => throw new InvalidOperationException($"Unsupported engine '{engine}'."),
|
||||
});
|
||||
}
|
||||
|
||||
return new EvaluationReport
|
||||
{
|
||||
GeneratedAtUtc = DateTimeOffset.UtcNow,
|
||||
IndexJsonPath = options.IndexJsonPath,
|
||||
Dataset = dataset,
|
||||
CaseCount = cases.Count,
|
||||
Engines = reports,
|
||||
};
|
||||
}
|
||||
|
||||
private static async Task<EngineEvaluationReport> EvaluateBasicAsync(
|
||||
RunnerOptions options,
|
||||
IReadOnlyList<SettingEntry> entries,
|
||||
IReadOnlyList<EvaluationCase> cases,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
using var engine = new FuzzSearchEngine<SettingEntry>();
|
||||
|
||||
var indexingStopwatch = Stopwatch.StartNew();
|
||||
await engine.InitializeAsync(cancellationToken);
|
||||
await engine.IndexBatchAsync(entries, cancellationToken);
|
||||
indexingStopwatch.Stop();
|
||||
|
||||
var metrics = await EvaluateQueryLoopAsync(
|
||||
cases,
|
||||
options,
|
||||
(query, searchOptions, token) => engine.SearchAsync(query, searchOptions, token),
|
||||
cancellationToken);
|
||||
|
||||
return new EngineEvaluationReport
|
||||
{
|
||||
Engine = SearchEngineKind.Basic,
|
||||
IsAvailable = true,
|
||||
AvailabilityError = null,
|
||||
CapabilitiesSummary = "Fuzzy text search engine",
|
||||
IndexedEntries = entries.Count,
|
||||
QueryCount = cases.Count,
|
||||
IndexingTimeMs = indexingStopwatch.Elapsed.TotalMilliseconds,
|
||||
RecallAtK = metrics.RecallAtK,
|
||||
Mrr = metrics.Mrr,
|
||||
SearchLatencyMs = metrics.Latency,
|
||||
CaseResults = metrics.CaseResults,
|
||||
};
|
||||
}
|
||||
|
||||
private static async Task<EngineEvaluationReport> EvaluateSemanticAsync(
|
||||
RunnerOptions options,
|
||||
IReadOnlyList<SettingEntry> entries,
|
||||
IReadOnlyList<EvaluationCase> cases,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var indexName = $"PowerToys.Settings.Eval.{Environment.ProcessId}.{Guid.NewGuid():N}";
|
||||
using var engine = new SemanticSearchEngine<SettingEntry>(indexName);
|
||||
|
||||
var initResult = await engine.InitializeWithResultAsync(cancellationToken);
|
||||
if (initResult.IsFailure || !engine.IsReady)
|
||||
{
|
||||
return new EngineEvaluationReport
|
||||
{
|
||||
Engine = SearchEngineKind.Semantic,
|
||||
IsAvailable = false,
|
||||
AvailabilityError = FormatError(initResult.Error) ?? "Semantic engine is not ready.",
|
||||
CapabilitiesSummary = null,
|
||||
IndexedEntries = 0,
|
||||
QueryCount = 0,
|
||||
IndexingTimeMs = 0,
|
||||
RecallAtK = 0,
|
||||
Mrr = 0,
|
||||
SearchLatencyMs = LatencySummary.Empty,
|
||||
CaseResults = Array.Empty<QueryEvaluationResult>(),
|
||||
};
|
||||
}
|
||||
|
||||
var indexingStopwatch = Stopwatch.StartNew();
|
||||
var indexResult = await engine.IndexBatchWithResultAsync(entries, cancellationToken);
|
||||
if (indexResult.IsFailure)
|
||||
{
|
||||
return new EngineEvaluationReport
|
||||
{
|
||||
Engine = SearchEngineKind.Semantic,
|
||||
IsAvailable = false,
|
||||
AvailabilityError = FormatError(indexResult.Error) ?? "Semantic indexing failed.",
|
||||
CapabilitiesSummary = BuildCapabilitiesSummary(engine.SemanticCapabilities),
|
||||
IndexedEntries = 0,
|
||||
QueryCount = 0,
|
||||
IndexingTimeMs = indexingStopwatch.Elapsed.TotalMilliseconds,
|
||||
RecallAtK = 0,
|
||||
Mrr = 0,
|
||||
SearchLatencyMs = LatencySummary.Empty,
|
||||
CaseResults = Array.Empty<QueryEvaluationResult>(),
|
||||
};
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await engine.WaitForIndexingCompleteAsync(options.SemanticIndexTimeout);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return new EngineEvaluationReport
|
||||
{
|
||||
Engine = SearchEngineKind.Semantic,
|
||||
IsAvailable = false,
|
||||
AvailabilityError = $"Semantic indexing did not become idle: {ex.Message}",
|
||||
CapabilitiesSummary = BuildCapabilitiesSummary(engine.SemanticCapabilities),
|
||||
IndexedEntries = 0,
|
||||
QueryCount = 0,
|
||||
IndexingTimeMs = indexingStopwatch.Elapsed.TotalMilliseconds,
|
||||
RecallAtK = 0,
|
||||
Mrr = 0,
|
||||
SearchLatencyMs = LatencySummary.Empty,
|
||||
CaseResults = Array.Empty<QueryEvaluationResult>(),
|
||||
};
|
||||
}
|
||||
|
||||
indexingStopwatch.Stop();
|
||||
var metrics = await EvaluateQueryLoopAsync(
|
||||
cases,
|
||||
options,
|
||||
async (query, searchOptions, token) =>
|
||||
{
|
||||
var result = await engine.SearchWithResultAsync(query, searchOptions, token);
|
||||
return result.Value ?? Array.Empty<SearchResult<SettingEntry>>();
|
||||
},
|
||||
cancellationToken);
|
||||
|
||||
return new EngineEvaluationReport
|
||||
{
|
||||
Engine = SearchEngineKind.Semantic,
|
||||
IsAvailable = true,
|
||||
AvailabilityError = null,
|
||||
CapabilitiesSummary = BuildCapabilitiesSummary(engine.SemanticCapabilities),
|
||||
IndexedEntries = entries.Count,
|
||||
QueryCount = cases.Count,
|
||||
IndexingTimeMs = indexingStopwatch.Elapsed.TotalMilliseconds,
|
||||
RecallAtK = metrics.RecallAtK,
|
||||
Mrr = metrics.Mrr,
|
||||
SearchLatencyMs = metrics.Latency,
|
||||
CaseResults = metrics.CaseResults,
|
||||
};
|
||||
}
|
||||
|
||||
private static async Task<QueryRunMetrics> EvaluateQueryLoopAsync(
|
||||
IReadOnlyList<EvaluationCase> cases,
|
||||
RunnerOptions options,
|
||||
Func<string, SearchOptions, CancellationToken, Task<IReadOnlyList<SearchResult<SettingEntry>>>> searchAsync,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var caseResults = new List<QueryEvaluationResult>(cases.Count);
|
||||
var latencySamples = new List<double>(Math.Max(1, cases.Count * options.Iterations));
|
||||
|
||||
var hits = 0;
|
||||
var reciprocalRankSum = 0.0;
|
||||
var searchOptions = new SearchOptions
|
||||
{
|
||||
MaxResults = options.MaxResults,
|
||||
IncludeMatchSpans = false,
|
||||
};
|
||||
|
||||
foreach (var queryCase in cases)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
for (int warmup = 0; warmup < options.WarmupIterations; warmup++)
|
||||
{
|
||||
_ = await searchAsync(queryCase.Query, searchOptions, cancellationToken);
|
||||
}
|
||||
|
||||
IReadOnlyList<SearchResult<SettingEntry>> firstMeasuredResult = Array.Empty<SearchResult<SettingEntry>>();
|
||||
for (int iteration = 0; iteration < options.Iterations; iteration++)
|
||||
{
|
||||
var sw = Stopwatch.StartNew();
|
||||
var queryResult = await searchAsync(queryCase.Query, searchOptions, cancellationToken);
|
||||
sw.Stop();
|
||||
latencySamples.Add(sw.Elapsed.TotalMilliseconds);
|
||||
|
||||
if (iteration == 0)
|
||||
{
|
||||
firstMeasuredResult = queryResult;
|
||||
}
|
||||
}
|
||||
|
||||
var rankedIds = firstMeasuredResult.Select(result => result.Item.Id).ToArray();
|
||||
var expected = new HashSet<string>(queryCase.ExpectedIds, StringComparer.OrdinalIgnoreCase);
|
||||
var bestRank = EvaluationMath.FindBestRank(rankedIds, expected);
|
||||
var hit = bestRank > 0 && bestRank <= options.TopK;
|
||||
|
||||
if (hit)
|
||||
{
|
||||
hits++;
|
||||
}
|
||||
|
||||
if (bestRank > 0)
|
||||
{
|
||||
reciprocalRankSum += 1.0 / bestRank;
|
||||
}
|
||||
|
||||
caseResults.Add(new QueryEvaluationResult
|
||||
{
|
||||
Query = queryCase.Query,
|
||||
ExpectedIds = queryCase.ExpectedIds,
|
||||
TopResultIds = rankedIds.Take(options.TopK).ToArray(),
|
||||
BestRank = bestRank,
|
||||
HitAtK = hit,
|
||||
Notes = queryCase.Notes,
|
||||
});
|
||||
}
|
||||
|
||||
var totalCases = Math.Max(1, cases.Count);
|
||||
return new QueryRunMetrics
|
||||
{
|
||||
CaseResults = caseResults,
|
||||
RecallAtK = hits / (double)totalCases,
|
||||
Mrr = reciprocalRankSum / totalCases,
|
||||
Latency = EvaluationMath.ComputeLatencySummary(latencySamples),
|
||||
};
|
||||
}
|
||||
|
||||
private static string? FormatError(SearchError? error)
|
||||
{
|
||||
if (error == null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(error.Details))
|
||||
{
|
||||
return $"{error.Message} ({error.Details})";
|
||||
}
|
||||
|
||||
return error.Message;
|
||||
}
|
||||
|
||||
private static string BuildCapabilitiesSummary(SemanticSearchCapabilities? capabilities)
|
||||
{
|
||||
if (capabilities == null)
|
||||
{
|
||||
return "Capabilities unavailable";
|
||||
}
|
||||
|
||||
return $"TextLexical={capabilities.TextLexicalAvailable}, TextSemantic={capabilities.TextSemanticAvailable}, ImageSemantic={capabilities.ImageSemanticAvailable}, ImageOcr={capabilities.ImageOcrAvailable}";
|
||||
}
|
||||
|
||||
private sealed class QueryRunMetrics
|
||||
{
|
||||
public required IReadOnlyList<QueryEvaluationResult> CaseResults { get; init; }
|
||||
|
||||
public required double RecallAtK { get; init; }
|
||||
|
||||
public required double Mrr { get; init; }
|
||||
|
||||
public required LatencySummary Latency { get; init; }
|
||||
}
|
||||
}
|
||||
22
tools/SettingsSearchEvaluation/LatencySummary.cs
Normal file
22
tools/SettingsSearchEvaluation/LatencySummary.cs
Normal file
@@ -0,0 +1,22 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
namespace SettingsSearchEvaluation;
|
||||
|
||||
internal sealed class LatencySummary
|
||||
{
|
||||
public int Samples { get; init; }
|
||||
|
||||
public double MinMs { get; init; }
|
||||
|
||||
public double P50Ms { get; init; }
|
||||
|
||||
public double P95Ms { get; init; }
|
||||
|
||||
public double MaxMs { get; init; }
|
||||
|
||||
public double AverageMs { get; init; }
|
||||
|
||||
public static LatencySummary Empty { get; } = new();
|
||||
}
|
||||
361
tools/SettingsSearchEvaluation/Program.cs
Normal file
361
tools/SettingsSearchEvaluation/Program.cs
Normal file
@@ -0,0 +1,361 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using System.Text.Json;
|
||||
|
||||
namespace SettingsSearchEvaluation;
|
||||
|
||||
internal static class Program
|
||||
{
|
||||
private static readonly JsonSerializerOptions OutputJsonOptions = new()
|
||||
{
|
||||
WriteIndented = true,
|
||||
};
|
||||
|
||||
private static int Main(string[] args)
|
||||
{
|
||||
try
|
||||
{
|
||||
return MainAsync(args).GetAwaiter().GetResult();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.Error.WriteLine($"Unhandled error: {ex.Message}");
|
||||
return 99;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<int> MainAsync(string[] args)
|
||||
{
|
||||
if (args.Any(arg => string.Equals(arg, "--help", StringComparison.OrdinalIgnoreCase) || string.Equals(arg, "-h", StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
PrintUsage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!TryParseArgs(args, out var options, out var parseError))
|
||||
{
|
||||
Console.Error.WriteLine(parseError);
|
||||
Console.Error.WriteLine();
|
||||
PrintUsage();
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (!File.Exists(options.IndexJsonPath))
|
||||
{
|
||||
Console.Error.WriteLine($"Index file not found: {options.IndexJsonPath}");
|
||||
return 3;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(options.CasesJsonPath) && !File.Exists(options.CasesJsonPath))
|
||||
{
|
||||
Console.Error.WriteLine($"Cases file not found: {options.CasesJsonPath}");
|
||||
return 3;
|
||||
}
|
||||
|
||||
var (entries, dataset) = EvaluationDataLoader.LoadEntriesFromFile(options.IndexJsonPath);
|
||||
var cases = EvaluationDataLoader.LoadCases(options.CasesJsonPath, entries);
|
||||
if (cases.Count == 0)
|
||||
{
|
||||
Console.Error.WriteLine("No valid evaluation cases were found.");
|
||||
return 3;
|
||||
}
|
||||
|
||||
Console.WriteLine($"Loaded {entries.Count} entries from '{options.IndexJsonPath}'.");
|
||||
Console.WriteLine($"Cases: {cases.Count}");
|
||||
Console.WriteLine($"Duplicate id buckets: {dataset.DuplicateIdBucketCount}");
|
||||
if (dataset.DuplicateIdBucketCount > 0)
|
||||
{
|
||||
var largest = dataset.DuplicateIdCounts
|
||||
.OrderByDescending(x => x.Value)
|
||||
.Take(5)
|
||||
.Select(x => $"{x.Key} x{x.Value}");
|
||||
Console.WriteLine($"Top duplicate ids: {string.Join(", ", largest)}");
|
||||
}
|
||||
|
||||
var report = await Evaluator.RunAsync(options, entries, dataset, cases);
|
||||
PrintSummary(report, options.TopK);
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(options.OutputJsonPath))
|
||||
{
|
||||
var outputDirectory = Path.GetDirectoryName(options.OutputJsonPath);
|
||||
if (!string.IsNullOrWhiteSpace(outputDirectory))
|
||||
{
|
||||
Directory.CreateDirectory(outputDirectory);
|
||||
}
|
||||
|
||||
var json = JsonSerializer.Serialize(report, OutputJsonOptions);
|
||||
File.WriteAllText(options.OutputJsonPath, json);
|
||||
Console.WriteLine($"Wrote report to '{options.OutputJsonPath}'.");
|
||||
}
|
||||
|
||||
return report.Engines.Any(engine => engine.IsAvailable) ? 0 : 4;
|
||||
}
|
||||
|
||||
private static bool TryParseArgs(string[] args, out RunnerOptions options, out string error)
|
||||
{
|
||||
string defaultIndex = GetDefaultIndexPath();
|
||||
string? indexPath = null;
|
||||
string? casesPath = null;
|
||||
string? outputPath = null;
|
||||
var maxResults = 10;
|
||||
var topK = 5;
|
||||
var iterations = 5;
|
||||
var warmup = 1;
|
||||
var semanticTimeoutMs = 15000;
|
||||
IReadOnlyList<SearchEngineKind> engines = new[] { SearchEngineKind.Basic, SearchEngineKind.Semantic };
|
||||
|
||||
for (int i = 0; i < args.Length; i++)
|
||||
{
|
||||
var arg = args[i];
|
||||
switch (arg.ToLowerInvariant())
|
||||
{
|
||||
case "--index-json":
|
||||
if (!TryReadValue(args, ref i, out indexPath))
|
||||
{
|
||||
options = null!;
|
||||
error = "Missing value for --index-json";
|
||||
return false;
|
||||
}
|
||||
|
||||
break;
|
||||
case "--cases-json":
|
||||
if (!TryReadValue(args, ref i, out casesPath))
|
||||
{
|
||||
options = null!;
|
||||
error = "Missing value for --cases-json";
|
||||
return false;
|
||||
}
|
||||
|
||||
break;
|
||||
case "--output-json":
|
||||
if (!TryReadValue(args, ref i, out outputPath))
|
||||
{
|
||||
options = null!;
|
||||
error = "Missing value for --output-json";
|
||||
return false;
|
||||
}
|
||||
|
||||
break;
|
||||
case "--engine":
|
||||
if (!TryReadValue(args, ref i, out var engineText))
|
||||
{
|
||||
options = null!;
|
||||
error = "Missing value for --engine";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!TryParseEngines(engineText!, out engines))
|
||||
{
|
||||
options = null!;
|
||||
error = "Invalid --engine value. Allowed values: basic, semantic, both.";
|
||||
return false;
|
||||
}
|
||||
|
||||
break;
|
||||
case "--max-results":
|
||||
if (!TryReadInt(args, ref i, out maxResults) || maxResults <= 0)
|
||||
{
|
||||
options = null!;
|
||||
error = "Invalid --max-results value. Must be a positive integer.";
|
||||
return false;
|
||||
}
|
||||
|
||||
break;
|
||||
case "--top-k":
|
||||
if (!TryReadInt(args, ref i, out topK) || topK <= 0)
|
||||
{
|
||||
options = null!;
|
||||
error = "Invalid --top-k value. Must be a positive integer.";
|
||||
return false;
|
||||
}
|
||||
|
||||
break;
|
||||
case "--iterations":
|
||||
if (!TryReadInt(args, ref i, out iterations) || iterations <= 0)
|
||||
{
|
||||
options = null!;
|
||||
error = "Invalid --iterations value. Must be a positive integer.";
|
||||
return false;
|
||||
}
|
||||
|
||||
break;
|
||||
case "--warmup":
|
||||
if (!TryReadInt(args, ref i, out warmup) || warmup < 0)
|
||||
{
|
||||
options = null!;
|
||||
error = "Invalid --warmup value. Must be a non-negative integer.";
|
||||
return false;
|
||||
}
|
||||
|
||||
break;
|
||||
case "--semantic-timeout-ms":
|
||||
if (!TryReadInt(args, ref i, out semanticTimeoutMs) || semanticTimeoutMs <= 0)
|
||||
{
|
||||
options = null!;
|
||||
error = "Invalid --semantic-timeout-ms value. Must be a positive integer.";
|
||||
return false;
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
options = null!;
|
||||
error = $"Unknown argument: {arg}";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
options = new RunnerOptions
|
||||
{
|
||||
IndexJsonPath = Path.GetFullPath(indexPath ?? defaultIndex),
|
||||
CasesJsonPath = string.IsNullOrWhiteSpace(casesPath) ? null : Path.GetFullPath(casesPath),
|
||||
Engines = engines,
|
||||
MaxResults = maxResults,
|
||||
TopK = topK,
|
||||
Iterations = iterations,
|
||||
WarmupIterations = warmup,
|
||||
SemanticIndexTimeout = TimeSpan.FromMilliseconds(semanticTimeoutMs),
|
||||
OutputJsonPath = string.IsNullOrWhiteSpace(outputPath) ? null : Path.GetFullPath(outputPath),
|
||||
};
|
||||
error = string.Empty;
|
||||
return true;
|
||||
}
|
||||
|
||||
private static string GetDefaultIndexPath()
|
||||
{
|
||||
var repoRoot = FindRepoRoot(AppContext.BaseDirectory) ?? Environment.CurrentDirectory;
|
||||
return Path.GetFullPath(Path.Combine(repoRoot, "src", "settings-ui", "Settings.UI", "Assets", "Settings", "search.index.json"));
|
||||
}
|
||||
|
||||
private static string? FindRepoRoot(string startingDirectory)
|
||||
{
|
||||
var current = new DirectoryInfo(startingDirectory);
|
||||
while (current != null)
|
||||
{
|
||||
var markerPath = Path.Combine(current.FullName, "PowerToys.slnx");
|
||||
if (File.Exists(markerPath))
|
||||
{
|
||||
return current.FullName;
|
||||
}
|
||||
|
||||
current = current.Parent;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool TryParseEngines(string value, out IReadOnlyList<SearchEngineKind> engines)
|
||||
{
|
||||
if (string.Equals(value, "both", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
engines = new[] { SearchEngineKind.Basic, SearchEngineKind.Semantic };
|
||||
return true;
|
||||
}
|
||||
|
||||
if (string.Equals(value, "basic", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
engines = new[] { SearchEngineKind.Basic };
|
||||
return true;
|
||||
}
|
||||
|
||||
if (string.Equals(value, "semantic", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
engines = new[] { SearchEngineKind.Semantic };
|
||||
return true;
|
||||
}
|
||||
|
||||
engines = Array.Empty<SearchEngineKind>();
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool TryReadValue(string[] args, ref int index, out string? value)
|
||||
{
|
||||
if (index + 1 >= args.Length)
|
||||
{
|
||||
value = null;
|
||||
return false;
|
||||
}
|
||||
|
||||
index++;
|
||||
value = args[index];
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool TryReadInt(string[] args, ref int index, out int value)
|
||||
{
|
||||
value = 0;
|
||||
if (!TryReadValue(args, ref index, out var text))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return int.TryParse(text, out value);
|
||||
}
|
||||
|
||||
private static void PrintSummary(EvaluationReport report, int topK)
|
||||
{
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("=== Evaluation Summary ===");
|
||||
Console.WriteLine($"Generated: {report.GeneratedAtUtc:O}");
|
||||
Console.WriteLine($"Dataset entries: {report.Dataset.TotalEntries} ({report.Dataset.DistinctIds} distinct ids)");
|
||||
Console.WriteLine($"Cases: {report.CaseCount}");
|
||||
Console.WriteLine();
|
||||
|
||||
foreach (var engine in report.Engines)
|
||||
{
|
||||
Console.WriteLine($"[{engine.Engine}]");
|
||||
if (!engine.IsAvailable)
|
||||
{
|
||||
Console.WriteLine($" Unavailable: {engine.AvailabilityError}");
|
||||
Console.WriteLine();
|
||||
continue;
|
||||
}
|
||||
|
||||
Console.WriteLine($" Capabilities: {engine.CapabilitiesSummary}");
|
||||
Console.WriteLine($" Indexed entries: {engine.IndexedEntries}");
|
||||
Console.WriteLine($" Indexing time (ms): {engine.IndexingTimeMs:F2}");
|
||||
Console.WriteLine($" Recall@{topK}: {engine.RecallAtK:F4}");
|
||||
Console.WriteLine($" MRR: {engine.Mrr:F4}");
|
||||
Console.WriteLine($" Search latency ms (avg/p50/p95/max): {engine.SearchLatencyMs.AverageMs:F2}/{engine.SearchLatencyMs.P50Ms:F2}/{engine.SearchLatencyMs.P95Ms:F2}/{engine.SearchLatencyMs.MaxMs:F2}");
|
||||
|
||||
var misses = engine.CaseResults
|
||||
.Where(result => !result.HitAtK)
|
||||
.Take(3)
|
||||
.ToList();
|
||||
|
||||
if (misses.Count > 0)
|
||||
{
|
||||
Console.WriteLine(" Sample misses:");
|
||||
foreach (var miss in misses)
|
||||
{
|
||||
var top = miss.TopResultIds.Count == 0 ? "(none)" : string.Join(", ", miss.TopResultIds);
|
||||
Console.WriteLine($" Query='{miss.Query}', expected='{string.Join("|", miss.ExpectedIds)}', top='{top}'");
|
||||
}
|
||||
}
|
||||
|
||||
Console.WriteLine();
|
||||
}
|
||||
}
|
||||
|
||||
private static void PrintUsage()
|
||||
{
|
||||
Console.WriteLine("SettingsSearchEvaluation");
|
||||
Console.WriteLine("Evaluates basic and semantic settings search for recall and performance.");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Usage:");
|
||||
Console.WriteLine(" SettingsSearchEvaluation [options]");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Options:");
|
||||
Console.WriteLine(" --index-json <path> Path to settings search index JSON.");
|
||||
Console.WriteLine(" --cases-json <path> Optional path to evaluation cases JSON.");
|
||||
Console.WriteLine(" --engine <basic|semantic|both> Engine selection. Default: both.");
|
||||
Console.WriteLine(" --max-results <n> Maximum returned results per query. Default: 10.");
|
||||
Console.WriteLine(" --top-k <n> Recall cut-off K. Default: 5.");
|
||||
Console.WriteLine(" --iterations <n> Measured runs per query. Default: 5.");
|
||||
Console.WriteLine(" --warmup <n> Warmup runs per query. Default: 1.");
|
||||
Console.WriteLine(" --semantic-timeout-ms <n> Semantic index idle wait timeout in ms. Default: 15000.");
|
||||
Console.WriteLine(" --output-json <path> Optional output report file.");
|
||||
Console.WriteLine(" --help Show this help.");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
[assembly: InternalsVisibleTo("SettingsSearchEvaluation.Tests")]
|
||||
20
tools/SettingsSearchEvaluation/QueryEvaluationResult.cs
Normal file
20
tools/SettingsSearchEvaluation/QueryEvaluationResult.cs
Normal file
@@ -0,0 +1,20 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
namespace SettingsSearchEvaluation;
|
||||
|
||||
internal sealed class QueryEvaluationResult
|
||||
{
|
||||
public required string Query { get; init; }
|
||||
|
||||
public required IReadOnlyList<string> ExpectedIds { get; init; }
|
||||
|
||||
public required IReadOnlyList<string> TopResultIds { get; init; }
|
||||
|
||||
public required int BestRank { get; init; }
|
||||
|
||||
public required bool HitAtK { get; init; }
|
||||
|
||||
public string? Notes { get; init; }
|
||||
}
|
||||
51
tools/SettingsSearchEvaluation/README.md
Normal file
51
tools/SettingsSearchEvaluation/README.md
Normal file
@@ -0,0 +1,51 @@
|
||||
# Settings Search Evaluation
|
||||
|
||||
This tool evaluates Settings search quality and latency for:
|
||||
|
||||
- `basic` search (`FuzzSearchEngine`)
|
||||
- `semantic` search (`SemanticSearchEngine`)
|
||||
|
||||
It reports:
|
||||
|
||||
- `Recall@K`
|
||||
- `MRR` (mean reciprocal rank)
|
||||
- Search latency (`avg`, `p50`, `p95`, `max`)
|
||||
- Dataset diagnostics including duplicate `SettingEntry.Id` buckets
|
||||
|
||||
## Run
|
||||
|
||||
Build with Visual Studio `MSBuild.exe` (the project references native components):
|
||||
|
||||
```powershell
|
||||
$vswhere = "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe"
|
||||
$msbuild = & $vswhere -latest -products * -requires Microsoft.Component.MSBuild -find MSBuild\**\Bin\MSBuild.exe
|
||||
& $msbuild tools\SettingsSearchEvaluation\SettingsSearchEvaluation.csproj `
|
||||
/t:Build /p:Configuration=Debug /p:Platform=arm64 /m:1 /nologo
|
||||
```
|
||||
|
||||
Run the built executable:
|
||||
|
||||
```powershell
|
||||
.\tools\SettingsSearchEvaluation\bin\arm64\Debug\net9.0-windows10.0.26100.0\SettingsSearchEvaluation.exe `
|
||||
--index-json src/settings-ui/Settings.UI/Assets/Settings/search.index.json `
|
||||
--cases-json tools/SettingsSearchEvaluation/cases/settings-search-cases.sample.json `
|
||||
--engine both `
|
||||
--top-k 5 `
|
||||
--iterations 5 `
|
||||
--warmup 1 `
|
||||
--output-json tools/SettingsSearchEvaluation/artifacts/report.json
|
||||
```
|
||||
|
||||
## Case file format
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"query": "color picker",
|
||||
"expectedIds": ["ColorPicker"],
|
||||
"notes": "Module entry"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
If `--cases-json` is not provided, fallback cases are auto-generated from the index headers.
|
||||
26
tools/SettingsSearchEvaluation/RunnerOptions.cs
Normal file
26
tools/SettingsSearchEvaluation/RunnerOptions.cs
Normal file
@@ -0,0 +1,26 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
namespace SettingsSearchEvaluation;
|
||||
|
||||
internal sealed class RunnerOptions
|
||||
{
|
||||
public required string IndexJsonPath { get; init; }
|
||||
|
||||
public string? CasesJsonPath { get; init; }
|
||||
|
||||
public required IReadOnlyList<SearchEngineKind> Engines { get; init; }
|
||||
|
||||
public int MaxResults { get; init; } = 10;
|
||||
|
||||
public int TopK { get; init; } = 5;
|
||||
|
||||
public int Iterations { get; init; } = 5;
|
||||
|
||||
public int WarmupIterations { get; init; } = 1;
|
||||
|
||||
public TimeSpan SemanticIndexTimeout { get; init; } = TimeSpan.FromSeconds(15);
|
||||
|
||||
public string? OutputJsonPath { get; init; }
|
||||
}
|
||||
11
tools/SettingsSearchEvaluation/SearchEngineKind.cs
Normal file
11
tools/SettingsSearchEvaluation/SearchEngineKind.cs
Normal file
@@ -0,0 +1,11 @@
|
||||
// Copyright (c) Microsoft Corporation
|
||||
// The Microsoft Corporation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
|
||||
namespace SettingsSearchEvaluation;
|
||||
|
||||
internal enum SearchEngineKind
|
||||
{
|
||||
Basic,
|
||||
Semantic,
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<!-- Look at Directory.Build.props in root for common stuff as well -->
|
||||
<Import Project="..\..\src\Common.Dotnet.CsWinRT.props" />
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<RootNamespace>SettingsSearchEvaluation</RootNamespace>
|
||||
<AssemblyName>SettingsSearchEvaluation</AssemblyName>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<DefaultItemExcludes>$(DefaultItemExcludes);artifacts\**\*;bin\**\*;obj\**\*</DefaultItemExcludes>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\src\common\Common.Search\Common.Search.csproj" />
|
||||
<ProjectReference Include="..\..\src\settings-ui\Settings.UI.Library\Settings.UI.Library.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,72 @@
|
||||
[
|
||||
{
|
||||
"query": "always on top",
|
||||
"expectedIds": [ "AlwaysOnTop" ],
|
||||
"notes": "Module entry"
|
||||
},
|
||||
{
|
||||
"query": "awake mode",
|
||||
"expectedIds": [ "Awake_ModeSettingsCard" ],
|
||||
"notes": "Feature setting"
|
||||
},
|
||||
{
|
||||
"query": "color picker",
|
||||
"expectedIds": [ "ColorPicker" ],
|
||||
"notes": "Module entry"
|
||||
},
|
||||
{
|
||||
"query": "fancy zones",
|
||||
"expectedIds": [ "FancyZones" ],
|
||||
"notes": "Module entry"
|
||||
},
|
||||
{
|
||||
"query": "launch cmd pal",
|
||||
"expectedIds": [ "CmdPal_Launch" ],
|
||||
"notes": "CmdPal launch setting"
|
||||
},
|
||||
{
|
||||
"query": "image resizer",
|
||||
"expectedIds": [ "ImageResizer" ],
|
||||
"notes": "Module entry"
|
||||
},
|
||||
{
|
||||
"query": "keyboard manager",
|
||||
"expectedIds": [ "KeyboardManager" ],
|
||||
"notes": "Module entry"
|
||||
},
|
||||
{
|
||||
"query": "mouse jump",
|
||||
"expectedIds": [ "MouseUtils_Enable_MouseJump" ],
|
||||
"notes": "Module entry"
|
||||
},
|
||||
{
|
||||
"query": "mouse without borders",
|
||||
"expectedIds": [ "MouseWithoutBorders" ],
|
||||
"notes": "Module setting"
|
||||
},
|
||||
{
|
||||
"query": "peek",
|
||||
"expectedIds": [ "Peek" ],
|
||||
"notes": "Module entry"
|
||||
},
|
||||
{
|
||||
"query": "power rename",
|
||||
"expectedIds": [ "PowerRename" ],
|
||||
"notes": "Module entry"
|
||||
},
|
||||
{
|
||||
"query": "power toys run",
|
||||
"expectedIds": [ "PowerLauncher" ],
|
||||
"notes": "Module entry"
|
||||
},
|
||||
{
|
||||
"query": "registry preview",
|
||||
"expectedIds": [ "RegistryPreview" ],
|
||||
"notes": "Module entry"
|
||||
},
|
||||
{
|
||||
"query": "workspaces",
|
||||
"expectedIds": [ "Workspaces_EnableToggleControl_HeaderText" ],
|
||||
"notes": "Module setting"
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user