Compare commits

...

5 Commits

Author SHA1 Message Date
Shawn Yuan (from Dev Box)
0dc4aebcf4 Fix issue when generating more images 2025-12-22 14:38:48 +08:00
Shawn Yuan (from Dev Box)
4a1f8293de add image preview support
Signed-off-by: Shawn Yuan (from Dev Box) <shuaiyuan@microsoft.com>
2025-12-18 10:35:47 +08:00
Shawn Yuan (from Dev Box)
57bb659f4d implemented text to image
Signed-off-by: Shawn Yuan (from Dev Box) <shuaiyuan@microsoft.com>
2025-12-17 17:03:33 +08:00
Shawn Yuan (from Dev Box)
400865a45f Implemented text to image
Signed-off-by: Shawn Yuan (from Dev Box) <shuaiyuan@microsoft.com>
2025-12-17 15:37:08 +08:00
Shawn Yuan (from Dev Box)
9318e41451 init
Signed-off-by: Shawn Yuan (from Dev Box) <shuaiyuan@microsoft.com>
2025-12-17 14:40:42 +08:00
12 changed files with 326 additions and 40 deletions

View File

@@ -430,12 +430,19 @@
Grid.Row="1"
MinHeight="104"
MaxHeight="320">
<TextBlock
Foreground="{ThemeResource TextFillColorSecondaryBrush}"
IsTextSelectionEnabled="True"
Style="{StaticResource CaptionTextBlockStyle}"
Text="{x:Bind ViewModel.CustomFormatResult, Mode=OneWay}"
TextWrapping="Wrap" />
<StackPanel>
<TextBlock
Foreground="{ThemeResource TextFillColorSecondaryBrush}"
IsTextSelectionEnabled="True"
Style="{StaticResource CaptionTextBlockStyle}"
Text="{x:Bind ViewModel.CustomFormatResult, Mode=OneWay}"
TextWrapping="Wrap"
Visibility="{x:Bind ViewModel.HasCustomFormatText, Mode=OneWay, Converter={StaticResource BoolToVisibilityConverter}}" />
<Image
Source="{x:Bind ViewModel.CustomFormatImageResult, Mode=OneWay}"
Stretch="Uniform"
Visibility="{x:Bind ViewModel.HasCustomFormatImage, Mode=OneWay, Converter={StaticResource BoolToVisibilityConverter}}" />
</StackPanel>
</ScrollViewer>
</Grid>
<Rectangle

View File

@@ -46,6 +46,13 @@ internal static class DataPackageHelpers
return dataPackage;
}
internal static DataPackage CreateFromImage(RandomAccessStreamReference imageStreamRef)
{
DataPackage dataPackage = new();
dataPackage.SetBitmap(imageStreamRef);
return dataPackage;
}
internal static async Task<DataPackage> CreateFromFileAsync(string fileName)
{
var storageFile = await StorageFile.GetFileFromPathAsync(fileName);

View File

@@ -0,0 +1,14 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using Windows.ApplicationModel.DataTransfer;
namespace AdvancedPaste.Models;
public class GeneratedResponse
{
public ClipboardItem Preview { get; set; }
public DataPackage Data { get; set; }
}

View File

@@ -122,4 +122,15 @@ public enum PasteFormats
KernelFunctionDescription = "Takes input instructions and transforms clipboard text (not TXT files) with these input instructions, putting the result back on the clipboard. This uses AI to accomplish the task.",
RequiresPrompt = true)]
CustomTextTransformation,
[PasteFormatMetadata(
IsCoreAction = false,
ResourceId = "TextToImage",
IconGlyph = "\uE91B",
RequiresAIService = true,
CanPreview = false,
SupportedClipboardFormats = ClipboardFormat.Text,
KernelFunctionDescription = "Generates an image based on the text description in the clipboard.",
RequiresPrompt = true)]
TextToImage,
}

View File

@@ -12,6 +12,7 @@ using Microsoft.PowerToys.Settings.UI.Library;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Microsoft.SemanticKernel.TextToImage;
namespace AdvancedPaste.Services;
@@ -45,24 +46,23 @@ public sealed class AdvancedAIKernelService : KernelServiceBase
protected override PromptExecutionSettings PromptExecutionSettings => CreatePromptExecutionSettings();
protected override void AddChatCompletionService(IKernelBuilder kernelBuilder)
protected override void AddAIServices(IKernelBuilder kernelBuilder)
{
ArgumentNullException.ThrowIfNull(kernelBuilder);
// 1. Register the primary Chat Completion Service
RegisterChatService(kernelBuilder);
// 2. Register auxiliary services (e.g., TextToImage) by searching through all configured providers
RegisterAuxiliaryServices(kernelBuilder);
}
private void RegisterChatService(IKernelBuilder kernelBuilder)
{
var runtimeConfig = GetRuntimeConfiguration();
var serviceType = runtimeConfig.ServiceType;
var modelName = runtimeConfig.ModelName;
var requiresApiKey = RequiresApiKey(serviceType);
var apiKey = string.Empty;
if (requiresApiKey)
{
this.credentialsProvider.Refresh();
apiKey = (this.credentialsProvider.GetKey() ?? string.Empty).Trim();
if (string.IsNullOrWhiteSpace(apiKey))
{
throw new InvalidOperationException($"An API key is required for {serviceType} but none was found in the credential vault.");
}
}
var apiKey = GetApiKey(serviceType);
var endpoint = string.IsNullOrWhiteSpace(runtimeConfig.Endpoint) ? null : runtimeConfig.Endpoint.Trim();
var deployment = string.IsNullOrWhiteSpace(runtimeConfig.DeploymentName) ? modelName : runtimeConfig.DeploymentName;
@@ -80,6 +80,76 @@ public sealed class AdvancedAIKernelService : KernelServiceBase
}
}
private void RegisterAuxiliaryServices(IKernelBuilder kernelBuilder)
{
// Try to find a dedicated Image Generation provider
if (TryRegisterImageService(kernelBuilder))
{
return;
}
}
private bool TryRegisterImageService(IKernelBuilder kernelBuilder)
{
var allProviders = this.UserSettings.PasteAIConfiguration?.Providers;
if (allProviders == null)
{
return false;
}
var imageProvider = allProviders.FirstOrDefault(p =>
p.Capabilities.HasFlag(AIServiceCapability.TextToImage));
if (imageProvider == null)
{
return false;
}
var serviceType = NormalizeServiceType(imageProvider.ServiceTypeKind);
var apiKey = this.credentialsProvider.GetKey(imageProvider.Id, serviceType);
if (string.IsNullOrWhiteSpace(apiKey))
{
return false;
}
var endpoint = string.IsNullOrWhiteSpace(imageProvider.EndpointUrl) ? null : imageProvider.EndpointUrl.Trim();
var deployment = string.IsNullOrWhiteSpace(imageProvider.DeploymentName) ? imageProvider.ModelName : imageProvider.DeploymentName;
switch (serviceType)
{
case AIServiceType.OpenAI:
#pragma warning disable SKEXP0010
kernelBuilder.AddOpenAITextToImage(apiKey, modelId: imageProvider.ModelName);
#pragma warning restore SKEXP0010
return true;
case AIServiceType.AzureOpenAI:
#pragma warning disable SKEXP0010
kernelBuilder.AddAzureOpenAITextToImage(deployment, RequireEndpoint(endpoint, serviceType), apiKey);
#pragma warning restore SKEXP0010
return true;
default:
return false;
}
}
private string GetApiKey(AIServiceType serviceType)
{
if (!RequiresApiKey(serviceType))
{
return string.Empty;
}
this.credentialsProvider.Refresh();
var apiKey = (this.credentialsProvider.GetKey() ?? string.Empty).Trim();
if (string.IsNullOrWhiteSpace(apiKey))
{
throw new InvalidOperationException($"An API key is required for {serviceType} but none was found in the credential vault.");
}
return apiKey;
}
protected override AIServiceUsage GetAIServiceUsage(ChatMessageContent chatMessage)
{
return AIServiceUsageHelper.GetOpenAIServiceUsage(chatMessage);

View File

@@ -50,6 +50,12 @@ public sealed class EnhancedVaultCredentialsProvider : IAICredentialsProvider
}
}
public string GetKey(string providerId, AIServiceType serviceType)
{
var entry = BuildCredentialEntry(NormalizeServiceType(serviceType), providerId);
return LoadKey(entry);
}
public bool IsConfigured()
{
return !string.IsNullOrEmpty(GetKey());

View File

@@ -2,6 +2,8 @@
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using Microsoft.PowerToys.Settings.UI.Library;
namespace AdvancedPaste.Services;
/// <summary>
@@ -21,6 +23,14 @@ public interface IAICredentialsProvider
/// <returns>Credential string or <see cref="string.Empty"/> when missing.</returns>
string GetKey();
/// <summary>
/// Retrieves the credential for a specific AI provider.
/// </summary>
/// <param name="providerId">The unique identifier of the provider.</param>
/// <param name="serviceType">The type of the service.</param>
/// <returns>Credential string or <see cref="string.Empty"/> when missing.</returns>
string GetKey(string providerId, AIServiceType serviceType);
/// <summary>
/// Refreshes the cached credential for the active AI provider.
/// </summary>

View File

@@ -5,6 +5,8 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Runtime.InteropServices.WindowsRuntime;
using System.Threading;
using System.Threading.Tasks;
using AdvancedPaste.Helpers;
@@ -18,7 +20,10 @@ using Microsoft.PowerToys.Settings.UI.Library;
using Microsoft.PowerToys.Telemetry;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Microsoft.SemanticKernel.TextToImage;
using Windows.ApplicationModel.DataTransfer;
using Windows.Storage.Streams;
namespace AdvancedPaste.Services;
@@ -40,7 +45,7 @@ public abstract class KernelServiceBase(
protected abstract PromptExecutionSettings PromptExecutionSettings { get; }
protected abstract void AddChatCompletionService(IKernelBuilder kernelBuilder);
protected abstract void AddAIServices(IKernelBuilder kernelBuilder);
protected abstract AIServiceUsage GetAIServiceUsage(ChatMessageContent chatMessage);
@@ -211,12 +216,19 @@ public abstract class KernelServiceBase(
private Kernel CreateKernel()
{
var kernelBuilder = Kernel.CreateBuilder();
AddChatCompletionService(kernelBuilder);
kernelBuilder.Plugins.AddFromFunctions("Actions", GetKernelFunctions());
AddAIServices(kernelBuilder);
// Build a temporary kernel to check registered services
// Note: This is a lightweight check. In a more complex DI scenario, we might need a different approach.
// However, since we are building the kernel right here, we can inspect the builder's services.
#pragma warning disable SKEXP0001
var hasTextToImageService = kernelBuilder.Services.Any(s => s.ServiceType == typeof(ITextToImageService));
#pragma warning restore SKEXP0001
kernelBuilder.Plugins.AddFromFunctions("Actions", GetKernelFunctions(hasTextToImageService));
return kernelBuilder.Build();
}
private IEnumerable<KernelFunction> GetKernelFunctions()
private IEnumerable<KernelFunction> GetKernelFunctions(bool hasTextToImageService)
{
// Get standard format functions
var standardFunctions =
@@ -224,15 +236,9 @@ public abstract class KernelServiceBase(
let metadata = PasteFormat.MetadataDict[format]
let coreDescription = metadata.KernelFunctionDescription
where !string.IsNullOrEmpty(coreDescription)
let requiresPrompt = metadata.RequiresPrompt
orderby requiresPrompt descending
select KernelFunctionFactory.CreateFromMethod(
method: requiresPrompt ? async (Kernel kernel, string prompt) => await ExecutePromptTransformAsync(kernel, format, prompt)
: async (Kernel kernel) => await ExecuteStandardTransformAsync(kernel, format),
functionName: format.ToString(),
description: requiresPrompt ? coreDescription : $"{coreDescription} Puts the result back on the clipboard.",
parameters: requiresPrompt ? [new(PromptParameterName) { Description = "Input instructions to AI", ParameterType = typeof(string) }] : null,
returnParameter: new() { Description = "Array of available clipboard formats after operation" });
where format != PasteFormats.TextToImage || hasTextToImageService // Filter out TextToImage if the service is not available
orderby metadata.RequiresPrompt descending
select CreateKernelFunctionForFormat(format, metadata, coreDescription);
HashSet<string> usedFunctionNames = new(Enum.GetNames<PasteFormats>(), StringComparer.OrdinalIgnoreCase);
@@ -257,6 +263,32 @@ public abstract class KernelServiceBase(
return standardFunctions.Concat(customActionFunctions);
}
private KernelFunction CreateKernelFunctionForFormat(PasteFormats format, PasteFormatMetadataAttribute metadata, string description)
{
if (format == PasteFormats.TextToImage)
{
return KernelFunctionFactory.CreateFromMethod(
method: async (Kernel kernel, string prompt = null) =>
{
await ExecuteTextToImageAsync(kernel, prompt ?? string.Empty);
return "Image generated successfully using the clipboard text as the description.";
},
functionName: format.ToString(),
description: "Generates an image based on text. If the 'prompt' parameter is not provided, the text currently in the clipboard is used as the image description.",
parameters: [new(PromptParameterName) { Description = "Optional. Additional instructions for the image. If left empty, the clipboard text is used.", ParameterType = typeof(string), IsRequired = false }],
returnParameter: new() { Description = "Status message indicating success." });
}
bool requiresPrompt = metadata.RequiresPrompt;
return KernelFunctionFactory.CreateFromMethod(
method: requiresPrompt ? async (Kernel kernel, string prompt) => await ExecutePromptTransformAsync(kernel, format, prompt)
: async (Kernel kernel) => await ExecuteStandardTransformAsync(kernel, format),
functionName: format.ToString(),
description: requiresPrompt ? description : $"{description} Puts the result back on the clipboard.",
parameters: requiresPrompt ? [new(PromptParameterName) { Description = "Input instructions to AI", ParameterType = typeof(string) }] : null,
returnParameter: new() { Description = "Array of available clipboard formats after operation" });
}
private static string GetUniqueFunctionName(string baseName, HashSet<string> usedFunctionNames, int customActionId)
{
ArgumentNullException.ThrowIfNull(usedFunctionNames);
@@ -318,6 +350,57 @@ public abstract class KernelServiceBase(
return DataPackageHelpers.CreateFromText(output);
});
private Task<string> ExecuteTextToImageAsync(Kernel kernel, string prompt) =>
ExecuteTransformAsync(
kernel,
new ActionChainItem(PasteFormats.TextToImage, Arguments: new() { { PromptParameterName, prompt } }),
async dataPackageView =>
{
Logger.LogDebug($"ExecuteTextToImageAsync started. Prompt: '{prompt}'");
var input = await dataPackageView.GetClipboardTextOrThrowAsync(kernel.GetCancellationToken());
var imageDescription = string.IsNullOrWhiteSpace(prompt) ? input : $"{input}. {prompt}";
Logger.LogDebug($"Image description: '{imageDescription}'");
#pragma warning disable SKEXP0001
var imageService = kernel.GetRequiredService<ITextToImageService>();
var settings = new OpenAITextToImageExecutionSettings
{
Size = (1024, 1024),
ResponseFormat = "b64_json",
};
var generatedImages = await imageService.GetImageContentsAsync(new TextContent(imageDescription), settings, cancellationToken: kernel.GetCancellationToken());
Logger.LogDebug($"Image generation completed. Count: {generatedImages.Count}");
if (generatedImages.Count == 0)
{
throw new InvalidOperationException("No image generated.");
}
var imageContent = generatedImages[0];
var stream = new InMemoryRandomAccessStream();
if (imageContent.Data.HasValue)
{
await stream.WriteAsync(imageContent.Data.Value.ToArray().AsBuffer());
}
else if (imageContent.Uri != null)
{
using var client = new HttpClient();
var imageBytes = await client.GetByteArrayAsync(imageContent.Uri, kernel.GetCancellationToken());
await stream.WriteAsync(imageBytes.AsBuffer());
}
else
{
throw new InvalidOperationException("Generated image contains no data.");
}
#pragma warning restore SKEXP0001
stream.Seek(0);
return DataPackageHelpers.CreateFromImage(RandomAccessStreamReference.CreateFromStream(stream));
});
private async Task<string> GetPromptBasedOutput(PasteFormats format, string prompt, string input, CancellationToken cancellationToken, IProgress<double> progress) =>
format switch
{
@@ -333,6 +416,7 @@ public abstract class KernelServiceBase(
private static async Task<string> ExecuteTransformAsync(Kernel kernel, ActionChainItem actionChainItem, Func<DataPackageView, Task<DataPackage>> transformFunc)
{
Logger.LogDebug($"ExecuteTransformAsync started for {actionChainItem.Format}");
kernel.GetOrAddActionChain().Add(actionChainItem);
kernel.SetLastError(null);
@@ -341,10 +425,13 @@ public abstract class KernelServiceBase(
var input = kernel.GetDataPackageView();
var output = await transformFunc(input);
kernel.SetDataPackage(output);
return await kernel.GetDataFormatsAsync();
var formats = await kernel.GetDataFormatsAsync();
Logger.LogDebug($"ExecuteTransformAsync finished. New formats: {formats}");
return formats;
}
catch (Exception ex)
{
Logger.LogError($"ExecuteTransformAsync failed for {actionChainItem.Format}", ex);
kernel.SetLastError(ex);
throw;
}
@@ -370,7 +457,7 @@ public abstract class KernelServiceBase(
kernelContent switch
{
FunctionCallContent functionCallContent => $"{functionCallContent.FunctionName}({FormatKernelArguments(functionCallContent.Arguments)})",
FunctionResultContent functionResultContent => functionResultContent.FunctionName,
FunctionResultContent functionResultContent => $"{functionResultContent.Result} / {functionResultContent.FunctionName}",
_ => kernelContent.ToString(),
};
#pragma warning restore SKEXP0001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.

View File

@@ -553,7 +553,7 @@ namespace AdvancedPaste.ViewModels
}
// List to store generated responses
public ObservableCollection<string> GeneratedResponses { get; set; } = [];
public ObservableCollection<GeneratedResponse> GeneratedResponses { get; set; } = [];
// Index to keep track of the current response
private int _currentResponseIndex;
@@ -566,8 +566,12 @@ namespace AdvancedPaste.ViewModels
if (value >= 0 && value < GeneratedResponses.Count)
{
SetProperty(ref _currentResponseIndex, value);
CustomFormatResult = GeneratedResponses[_currentResponseIndex];
var response = GeneratedResponses[_currentResponseIndex];
CustomFormatResult = response.Preview.Content;
CustomFormatImageResult = response.Preview.Image;
OnPropertyChanged(nameof(CurrentIndexDisplay));
OnPropertyChanged(nameof(HasCustomFormatText));
OnPropertyChanged(nameof(HasCustomFormatImage));
}
}
}
@@ -607,14 +611,21 @@ namespace AdvancedPaste.ViewModels
[ObservableProperty]
private string _customFormatResult;
[ObservableProperty]
private ImageSource _customFormatImageResult;
public bool HasCustomFormatText => !string.IsNullOrEmpty(CustomFormatResult);
public bool HasCustomFormatImage => CustomFormatImageResult != null;
[RelayCommand]
public async Task PasteCustomAsync()
{
var text = GeneratedResponses.ElementAtOrDefault(CurrentResponseIndex);
var response = GeneratedResponses.ElementAtOrDefault(CurrentResponseIndex);
if (!string.IsNullOrEmpty(text))
if (response?.Data != null)
{
await CopyPasteAndHideAsync(DataPackageHelpers.CreateFromText(text));
await CopyPasteAndHideAsync(response.Data);
}
}
@@ -695,11 +706,24 @@ namespace AdvancedPaste.ViewModels
await delayTask;
var outputText = await dataPackage.GetView().GetTextOrEmptyAsync();
bool shouldPreview = pasteFormat.Metadata.CanPreview && _userSettings.ShowCustomPreview && !string.IsNullOrEmpty(outputText) && source != PasteActionSource.GlobalKeyboardShortcut;
var formats = dataPackage.GetView().AvailableFormats;
var clipboardFormat = ClipboardFormat.None;
if (!string.IsNullOrEmpty(outputText))
{
clipboardFormat |= ClipboardFormat.Text;
}
if (formats.Contains(StandardDataFormats.Bitmap))
{
clipboardFormat |= ClipboardFormat.Image;
}
bool shouldPreview = pasteFormat.Metadata.CanPreview && _userSettings.ShowCustomPreview && clipboardFormat != ClipboardFormat.None && source != PasteActionSource.GlobalKeyboardShortcut;
if (shouldPreview)
{
GeneratedResponses.Add(outputText);
var previewItem = await ClipboardItemHelper.CreateFromCurrentClipboardAsync(dataPackage.GetView(), clipboardFormat);
GeneratedResponses.Add(new GeneratedResponse { Preview = previewItem, Data = dataPackage });
CurrentResponseIndex = GeneratedResponses.Count - 1;
PreviewRequested?.Invoke(this, EventArgs.Empty);
}

View File

@@ -10,6 +10,14 @@ using System.Text.Json.Serialization;
namespace Microsoft.PowerToys.Settings.UI.Library
{
[Flags]
public enum AIServiceCapability
{
None = 0,
ChatCompletion = 1,
TextToImage = 2,
}
/// <summary>
/// Represents a single Paste AI provider configuration entry.
/// </summary>
@@ -17,6 +25,7 @@ namespace Microsoft.PowerToys.Settings.UI.Library
{
private string _id = Guid.NewGuid().ToString("N");
private string _serviceType = "OpenAI";
private AIServiceCapability _capabilities = AIServiceCapability.ChatCompletion;
private string _modelName = string.Empty;
private string _endpointUrl = string.Empty;
private string _apiVersion = string.Empty;
@@ -50,6 +59,13 @@ namespace Microsoft.PowerToys.Settings.UI.Library
}
}
[JsonPropertyName("capabilities")]
public AIServiceCapability Capabilities
{
get => _capabilities;
set => SetProperty(ref _capabilities, value);
}
[JsonIgnore]
public AIServiceType ServiceTypeKind
{

View File

@@ -493,6 +493,16 @@
Margin="0,8,0,48"
Orientation="Vertical"
Spacing="16">
<ComboBox
x:Name="PasteAICapabilityComboBox"
x:Uid="AdvancedPaste_Capability"
Header="Capability"
MinWidth="200"
HorizontalAlignment="Stretch"
SelectionChanged="PasteAICapabilityComboBox_SelectionChanged">
<ComboBoxItem Content="Chat Completion" Tag="ChatCompletion" />
<ComboBoxItem Content="Text to Image" Tag="TextToImage" />
</ComboBox>
<TextBox
x:Name="PasteAIModelNameTextBox"
x:Uid="AdvancedPaste_ModelName"

View File

@@ -1138,9 +1138,33 @@ namespace Microsoft.PowerToys.Settings.UI.Views
await UpdateFoundryLocalUIAsync();
RefreshDialogBindings();
PasteAIApiKeyPasswordBox.Password = ViewModel.GetPasteAIApiKey(provider.Id, provider.ServiceType);
// Set Capability ComboBox
PasteAICapabilityComboBox.SelectedItem = PasteAICapabilityComboBox.Items
.OfType<ComboBoxItem>()
.FirstOrDefault(item => item.Tag is string tag &&
Enum.TryParse<AIServiceCapability>(tag, out var capability) &&
capability == provider.Capabilities);
await PasteAIProviderConfigurationDialog.ShowAsync();
}
private void PasteAICapabilityComboBox_SelectionChanged(object sender, SelectionChangedEventArgs e)
{
if (ViewModel?.PasteAIProviderDraft == null)
{
return;
}
if (PasteAICapabilityComboBox.SelectedItem is ComboBoxItem item && item.Tag is string tag)
{
if (Enum.TryParse<AIServiceCapability>(tag, out var capability))
{
ViewModel.PasteAIProviderDraft.Capabilities = capability;
}
}
}
private void RemovePasteAIProviderButton_Click(object sender, RoutedEventArgs e)
{
// sender is MenuFlyoutItem with PasteAIProviderDefinition Tag