Compare commits

...

13 Commits

Author SHA1 Message Date
Shawn Yuan (from Dev Box)
c978612433 added audio to text support 2025-12-26 13:42:03 +08:00
Shawn Yuan (from Dev Box)
63f5fee089 added text to audio 2025-12-26 11:41:47 +08:00
Shawn Yuan (from Dev Box)
2128505de8 fix xaml format 2025-12-24 15:23:50 +08:00
Shawn Yuan (from Dev Box)
0f4ead7069 update 2025-12-24 15:22:51 +08:00
Shawn Yuan (from Dev Box)
3749f3e87d update 2025-12-24 14:14:05 +08:00
Shawn Yuan (from Dev Box)
d341bd2ca6 update localization 2025-12-24 11:36:10 +08:00
Shawn Yuan (from Dev Box)
20dcb6fb47 add localization support 2025-12-24 11:17:17 +08:00
Shawn Yuan (from Dev Box)
72f84f9652 add model usage tag to ui 2025-12-24 11:01:01 +08:00
Shawn Yuan (from Dev Box)
64dafff7c4 add img size config 2025-12-24 10:20:49 +08:00
Shawn Yuan (from Dev Box)
927d190cf2 fix merge coflicts 2025-12-23 17:40:55 +08:00
Shawn Yuan (from Dev Box)
667800eb86 fix merge conflict 2025-12-23 17:16:43 +08:00
Shawn Yuan (from Dev Box)
35cab47465 Merge branch 'main' into shawn/APImprove2 2025-12-23 17:15:44 +08:00
Shawn Yuan (from Dev Box)
c1603b189f init 2025-12-23 17:12:17 +08:00
22 changed files with 960 additions and 60 deletions

View File

@@ -335,6 +335,7 @@
<converters:CountToVisibilityConverter x:Key="CountToVisibilityConverter" />
<converters:CountToInvertedVisibilityConverter x:Key="CountToInvertedVisibilityConverter" />
<converters:ServiceTypeToIconConverter x:Key="ServiceTypeToIconConverter" />
<converters:PasteAIUsageToStringConverter x:Key="PasteAIUsageToStringConverter" />
</ResourceDictionary>
</UserControl.Resources>
<Grid x:Name="PromptBoxGrid" Loaded="Grid_Loaded">
@@ -430,12 +431,52 @@
Grid.Row="1"
MinHeight="104"
MaxHeight="320">
<TextBlock
Foreground="{ThemeResource TextFillColorSecondaryBrush}"
IsTextSelectionEnabled="True"
Style="{StaticResource CaptionTextBlockStyle}"
Text="{x:Bind ViewModel.CustomFormatResult, Mode=OneWay}"
TextWrapping="Wrap" />
<StackPanel>
<TextBlock
Foreground="{ThemeResource TextFillColorSecondaryBrush}"
IsTextSelectionEnabled="True"
Style="{StaticResource CaptionTextBlockStyle}"
Text="{x:Bind ViewModel.CustomFormatResult, Mode=OneWay}"
TextWrapping="Wrap"
Visibility="{x:Bind ViewModel.HasCustomFormatText, Mode=OneWay, Converter={StaticResource BoolToVisibilityConverter}}" />
<Image
HorizontalAlignment="Left"
Source="{x:Bind ViewModel.CustomFormatImageResult, Mode=OneWay}"
Stretch="Uniform"
Visibility="{x:Bind ViewModel.HasCustomFormatImage, Mode=OneWay, Converter={StaticResource BoolToVisibilityConverter}}" />
<Grid Visibility="{x:Bind ViewModel.HasCustomFormatAudio, Mode=OneWay, Converter={StaticResource BoolToVisibilityConverter}}">
<Grid.RowDefinitions>
<RowDefinition Height="Auto" />
<RowDefinition Height="Auto" />
<RowDefinition Height="Auto" />
</Grid.RowDefinitions>
<TextBlock Grid.Row="0" Text="{x:Bind ViewModel.AudioFileName, Mode=OneWay}" HorizontalAlignment="Left" Margin="0,0,0,8" />
<Grid Grid.Row="1" Margin="0,0,0,8">
<Grid.ColumnDefinitions>
<ColumnDefinition Width="Auto" />
<ColumnDefinition Width="*" />
<ColumnDefinition Width="Auto" />
</Grid.ColumnDefinitions>
<TextBlock Grid.Column="0" Text="{x:Bind ViewModel.AudioPositionString, Mode=OneWay}" VerticalAlignment="Center" Margin="0,0,8,0" />
<Slider Grid.Column="1" Minimum="0" Maximum="{x:Bind ViewModel.AudioDuration, Mode=OneWay}" Value="{x:Bind ViewModel.AudioPosition, Mode=TwoWay}" VerticalAlignment="Center" />
<TextBlock Grid.Column="2" Text="{x:Bind ViewModel.AudioDurationString, Mode=OneWay}" VerticalAlignment="Center" Margin="8,0,0,0" />
</Grid>
<Grid Grid.Row="2">
<Grid.ColumnDefinitions>
<ColumnDefinition Width="Auto" />
<ColumnDefinition Width="*" />
<ColumnDefinition Width="Auto" />
</Grid.ColumnDefinitions>
<Button Grid.Column="0" Command="{x:Bind ViewModel.PlayPauseAudioCommand}">
<FontIcon Glyph="{x:Bind ViewModel.AudioPlayPauseGlyph, Mode=OneWay}" />
</Button>
<StackPanel Grid.Column="2" Orientation="Horizontal" Spacing="8">
<Button Command="{x:Bind ViewModel.SaveAudioCommand}" Content="Save" />
<Button Command="{x:Bind ViewModel.DeleteAudioCommand}" Content="Delete" />
</StackPanel>
</Grid>
</Grid>
</StackPanel>
</ScrollViewer>
</Grid>
<Rectangle
@@ -602,20 +643,37 @@
Style="{StaticResource CaptionTextBlockStyle}"
Text="{x:Bind ServiceType, Mode=OneWay}" />
</StackPanel>
<Border
<StackPanel
Grid.Column="2"
Padding="2,0,2,0"
VerticalAlignment="Center"
BorderBrush="{ThemeResource ControlStrokeColorSecondary}"
BorderThickness="1"
CornerRadius="{StaticResource ControlCornerRadius}"
Visibility="{x:Bind IsLocalModel, Mode=OneWay}">
<TextBlock
x:Uid="LocalModelBadge"
AutomationProperties.AccessibilityView="Raw"
FontSize="10"
Foreground="{ThemeResource TextFillColorSecondaryBrush}" />
</Border>
Orientation="Horizontal"
Spacing="4">
<Border
Padding="2,0,2,0"
VerticalAlignment="Center"
BorderBrush="{ThemeResource ControlStrokeColorSecondary}"
BorderThickness="1"
CornerRadius="{StaticResource ControlCornerRadius}">
<TextBlock
AutomationProperties.AccessibilityView="Raw"
FontSize="10"
Foreground="{ThemeResource TextFillColorSecondaryBrush}"
Text="{x:Bind Usage, Mode=OneWay, Converter={StaticResource PasteAIUsageToStringConverter}}" />
</Border>
<Border
Padding="2,0,2,0"
VerticalAlignment="Center"
BorderBrush="{ThemeResource ControlStrokeColorSecondary}"
BorderThickness="1"
CornerRadius="{StaticResource ControlCornerRadius}"
Visibility="{x:Bind IsLocalModel, Mode=OneWay}">
<TextBlock
x:Uid="LocalModelBadge"
AutomationProperties.AccessibilityView="Raw"
FontSize="10"
Foreground="{ThemeResource TextFillColorSecondaryBrush}" />
</Border>
</StackPanel>
<!--<Border
Grid.Column="2"
Padding="2,0,2,0"

View File

@@ -164,7 +164,7 @@ namespace AdvancedPaste.Controls
return;
}
var flyout = FlyoutBase.GetAttachedFlyout(AIProviderButton);
var flyout = AIProviderButton.Flyout;
if (AIProviderListView.SelectedItem is not PasteAIProviderDefinition provider)
{
@@ -180,7 +180,6 @@ namespace AdvancedPaste.Controls
if (ViewModel.SetActiveProviderCommand.CanExecute(provider))
{
await ViewModel.SetActiveProviderCommand.ExecuteAsync(provider);
SyncProviderSelection();
}
flyout?.Hide();

View File

@@ -0,0 +1,30 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using AdvancedPaste.Helpers;
using Microsoft.PowerToys.Settings.UI.Library;
using Microsoft.UI.Xaml.Data;
namespace AdvancedPaste.Converters;
public sealed partial class PasteAIUsageToStringConverter : IValueConverter
{
public object Convert(object value, Type targetType, object parameter, string language)
{
var usage = value switch
{
string s => PasteAIUsageExtensions.FromConfigString(s),
PasteAIUsage u => u,
_ => PasteAIUsage.ChatCompletion,
};
return ResourceLoaderInstance.ResourceLoader.GetString($"PasteAIUsage_{usage}_Label");
}
public object ConvertBack(object value, Type targetType, object parameter, string language)
{
throw new NotImplementedException();
}
}

View File

@@ -46,6 +46,13 @@ internal static class DataPackageHelpers
return dataPackage;
}
internal static DataPackage CreateFromImage(RandomAccessStreamReference imageStreamRef)
{
DataPackage dataPackage = new();
dataPackage.SetBitmap(imageStreamRef);
return dataPackage;
}
internal static async Task<DataPackage> CreateFromFileAsync(string fileName)
{
var storageFile = await StorageFile.GetFileFromPathAsync(fileName);
@@ -243,6 +250,29 @@ internal static class DataPackageHelpers
return memoryStream.ToArray();
}
internal static async Task<(byte[] Data, string MimeType)> GetAudioBytesAsync(this DataPackageView dataPackageView)
{
if (dataPackageView.Contains(StandardDataFormats.StorageItems))
{
var storageItems = await dataPackageView.GetStorageItemsAsync();
var file = storageItems.Count == 1 ? storageItems[0] as StorageFile : null;
if (file != null)
{
var supportedAudioTypes = SupportedFileTypes.Value.FirstOrDefault(x => x.Format == ClipboardFormat.Audio).FileTypes;
if (supportedAudioTypes != null && supportedAudioTypes.Contains(file.FileType))
{
using var stream = await file.OpenStreamForReadAsync();
using var memoryStream = new MemoryStream();
await stream.CopyToAsync(memoryStream);
return (memoryStream.ToArray(), file.ContentType);
}
}
}
return (null, null);
}
internal static async Task<SoftwareBitmap> GetImageContentAsync(this DataPackageView dataPackageView)
{
using var stream = await dataPackageView.GetImageStreamAsync();
@@ -279,7 +309,11 @@ internal static class DataPackageHelpers
var file = storageItems.Count == 1 ? storageItems[0] as StorageFile : null;
if (file != null)
{
return await file.OpenReadAsync();
var supportedImageTypes = SupportedFileTypes.Value.FirstOrDefault(x => x.Format == ClipboardFormat.Image).FileTypes;
if (supportedImageTypes != null && supportedImageTypes.Contains(file.FileType))
{
return await file.OpenReadAsync();
}
}
}

View File

@@ -118,8 +118,8 @@ public enum PasteFormats
IconGlyph = "\uE945",
RequiresAIService = true,
CanPreview = true,
SupportedClipboardFormats = ClipboardFormat.Text | ClipboardFormat.Image,
KernelFunctionDescription = "Takes user instructions and applies them to the current clipboard content (text or image). Use this function for image analysis, description, or transformation tasks beyond simple OCR.",
SupportedClipboardFormats = ClipboardFormat.Text | ClipboardFormat.Image | ClipboardFormat.Audio,
KernelFunctionDescription = "Takes user instructions and applies them to the current clipboard content (text, image or audio). Use this function for image analysis, description, or transformation tasks beyond simple OCR.",
RequiresPrompt = true)]
CustomTextTransformation,
}

View File

@@ -40,15 +40,15 @@ namespace AdvancedPaste.Services.CustomActions
this.userSettings = userSettings;
}
public async Task<CustomActionTransformResult> TransformAsync(string prompt, string inputText, byte[] imageBytes, CancellationToken cancellationToken, IProgress<double> progress)
public async Task<CustomActionTransformResult> TransformAsync(string prompt, string inputText, byte[] imageBytes, byte[] audioBytes, string audioMimeType, CancellationToken cancellationToken, IProgress<double> progress)
{
var pasteConfig = userSettings?.PasteAIConfiguration;
var providerConfig = BuildProviderConfig(pasteConfig);
return await TransformAsync(prompt, inputText, imageBytes, providerConfig, cancellationToken, progress);
return await TransformAsync(prompt, inputText, imageBytes, audioBytes, audioMimeType, providerConfig, cancellationToken, progress);
}
private async Task<CustomActionTransformResult> TransformAsync(string prompt, string inputText, byte[] imageBytes, PasteAIConfig providerConfig, CancellationToken cancellationToken, IProgress<double> progress)
private async Task<CustomActionTransformResult> TransformAsync(string prompt, string inputText, byte[] imageBytes, byte[] audioBytes, string audioMimeType, PasteAIConfig providerConfig, CancellationToken cancellationToken, IProgress<double> progress)
{
ArgumentNullException.ThrowIfNull(providerConfig);
@@ -57,7 +57,7 @@ namespace AdvancedPaste.Services.CustomActions
return new CustomActionTransformResult(string.Empty, AIServiceUsage.None);
}
if (string.IsNullOrWhiteSpace(inputText) && imageBytes is null)
if (string.IsNullOrWhiteSpace(inputText) && imageBytes is null && audioBytes is null)
{
Logger.LogWarning("Clipboard has no usable data");
return new CustomActionTransformResult(string.Empty, AIServiceUsage.None);
@@ -82,6 +82,8 @@ namespace AdvancedPaste.Services.CustomActions
InputText = inputText,
ImageBytes = imageBytes,
ImageMimeType = imageBytes != null ? "image/png" : null,
AudioBytes = audioBytes,
AudioMimeType = audioMimeType,
SystemPrompt = systemPrompt,
};
@@ -168,6 +170,10 @@ namespace AdvancedPaste.Services.CustomActions
ModelPath = provider.ModelPath,
SystemPrompt = systemPrompt,
ModerationEnabled = provider.ModerationEnabled,
Usage = provider.UsageKind,
ImageWidth = provider.ImageWidth,
ImageHeight = provider.ImageHeight,
Voice = provider.Voice,
};
return providerConfig;

View File

@@ -12,6 +12,6 @@ namespace AdvancedPaste.Services.CustomActions
{
public interface ICustomActionTransformService
{
Task<CustomActionTransformResult> TransformAsync(string prompt, string inputText, byte[] imageBytes, CancellationToken cancellationToken, IProgress<double> progress);
Task<CustomActionTransformResult> TransformAsync(string prompt, string inputText, byte[] imageBytes, byte[] audioBytes, string audioMimeType, CancellationToken cancellationToken, IProgress<double> progress);
}
}

View File

@@ -28,5 +28,13 @@ namespace AdvancedPaste.Services.CustomActions
public string SystemPrompt { get; set; }
public bool ModerationEnabled { get; set; }
public PasteAIUsage Usage { get; set; }
public string Voice { get; set; }
public int ImageWidth { get; set; }
public int ImageHeight { get; set; }
}
}

View File

@@ -16,6 +16,10 @@ namespace AdvancedPaste.Services.CustomActions
public string ImageMimeType { get; init; }
public byte[] AudioBytes { get; init; }
public string AudioMimeType { get; init; }
public string SystemPrompt { get; init; }
public AIServiceUsage Usage { get; set; } = AIServiceUsage.None;

View File

@@ -4,18 +4,23 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using AdvancedPaste.Helpers;
using AdvancedPaste.Models;
using Microsoft.PowerToys.Settings.UI.Library;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.AudioToText;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.AzureAIInference;
using Microsoft.SemanticKernel.Connectors.Google;
using Microsoft.SemanticKernel.Connectors.MistralAI;
using Microsoft.SemanticKernel.Connectors.Ollama;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Microsoft.SemanticKernel.TextToAudio;
using Microsoft.SemanticKernel.TextToImage;
namespace AdvancedPaste.Services.CustomActions
{
@@ -65,14 +70,129 @@ namespace AdvancedPaste.Services.CustomActions
var prompt = request.Prompt;
var inputText = request.InputText;
var imageBytes = request.ImageBytes;
var audioBytes = request.AudioBytes;
if (string.IsNullOrWhiteSpace(prompt) || (string.IsNullOrWhiteSpace(inputText) && imageBytes is null))
if (string.IsNullOrWhiteSpace(prompt) || (string.IsNullOrWhiteSpace(inputText) && imageBytes is null && audioBytes is null))
{
throw new ArgumentException("Prompt and input content must be provided", nameof(request));
}
var executionSettings = CreateExecutionSettings();
var kernel = CreateKernel();
switch (_config.Usage)
{
case PasteAIUsage.TextToImage:
var imageDescription = string.IsNullOrWhiteSpace(prompt) ? inputText : $"{inputText}. {prompt}";
return await ProcessTextToImageAsync(kernel, imageDescription, cancellationToken);
case PasteAIUsage.TextToAudio:
var textToAudioInput = string.IsNullOrWhiteSpace(prompt) ? inputText : $"{inputText}. {prompt}";
return await ProcessTextToAudioAsync(kernel, textToAudioInput, cancellationToken);
case PasteAIUsage.AudioToText:
return await ProcessAudioToTextAsync(kernel, request, cancellationToken);
case PasteAIUsage.ChatCompletion:
default:
var userMessageContent = $"""
User instructions:
{prompt}
Clipboard Content:
{inputText}
Output:
""";
return await ProcessChatCompletionAsync(kernel, request, userMessageContent, systemPrompt, cancellationToken);
}
}
private async Task<string> ProcessTextToImageAsync(Kernel kernel, string userMessageContent, CancellationToken cancellationToken)
{
#pragma warning disable SKEXP0001
var imageService = kernel.GetRequiredService<ITextToImageService>();
var width = _config.ImageWidth > 0 ? _config.ImageWidth : 1024;
var height = _config.ImageHeight > 0 ? _config.ImageHeight : 1024;
var settings = new OpenAITextToImageExecutionSettings
{
Size = (width, height),
};
var generatedImages = await imageService.GetImageContentsAsync(new TextContent(userMessageContent), settings, cancellationToken: cancellationToken);
if (generatedImages.Count == 0)
{
throw new InvalidOperationException("No image generated.");
}
var imageContent = generatedImages[0];
if (imageContent.Data.HasValue)
{
var base64 = Convert.ToBase64String(imageContent.Data.Value.ToArray());
return $"data:{imageContent.MimeType ?? "image/png"};base64,{base64}";
}
else if (imageContent.Uri != null)
{
using var client = new HttpClient();
var imageBytes = await client.GetByteArrayAsync(imageContent.Uri, cancellationToken);
var base64 = Convert.ToBase64String(imageBytes);
return $"data:image/png;base64,{base64}";
}
else
{
throw new InvalidOperationException("Generated image contains no data.");
}
#pragma warning restore SKEXP0001
}
private async Task<string> ProcessTextToAudioAsync(Kernel kernel, string text, CancellationToken cancellationToken)
{
#pragma warning disable SKEXP0001
var audioService = kernel.GetRequiredService<ITextToAudioService>();
var settings = new OpenAITextToAudioExecutionSettings
{
Voice = _config.Voice,
ResponseFormat = "mp3",
};
var audioContent = await audioService.GetAudioContentAsync(text, settings, cancellationToken: cancellationToken);
if (audioContent.Data.HasValue)
{
var tempPath = Path.GetTempPath();
var fileName = $"AdvancedPaste_Audio_{DateTime.Now:yyyyMMddHHmmss}.mp3";
var filePath = Path.Combine(tempPath, fileName);
await File.WriteAllBytesAsync(filePath, audioContent.Data.Value.ToArray(), cancellationToken);
return filePath;
}
else
{
throw new InvalidOperationException("Generated audio contains no data.");
}
#pragma warning restore SKEXP0001
}
private async Task<string> ProcessAudioToTextAsync(Kernel kernel, PasteAIRequest request, CancellationToken cancellationToken)
{
#pragma warning disable SKEXP0001
var audioService = kernel.GetRequiredService<IAudioToTextService>();
if (request.AudioBytes == null || request.AudioBytes.Length == 0)
{
throw new ArgumentException("Audio content must be provided", nameof(request));
}
var audioContent = new AudioContent(request.AudioBytes, request.AudioMimeType);
var textContent = await audioService.GetTextContentAsync(audioContent, null, cancellationToken: cancellationToken);
return textContent.Text;
#pragma warning restore SKEXP0001
}
private async Task<string> ProcessChatCompletionAsync(Kernel kernel, PasteAIRequest request, string userMessageContent, string systemPrompt, CancellationToken cancellationToken)
{
var executionSettings = CreateExecutionSettings();
var modelId = _config.Model;
IChatCompletionService chatService;
@@ -95,29 +215,20 @@ namespace AdvancedPaste.Services.CustomActions
var chatHistory = new ChatHistory();
chatHistory.AddSystemMessage(systemPrompt);
if (imageBytes != null)
if (request.ImageBytes != null)
{
var collection = new ChatMessageContentItemCollection();
if (!string.IsNullOrWhiteSpace(inputText))
if (!string.IsNullOrWhiteSpace(request.InputText))
{
collection.Add(new TextContent($"Clipboard Content:\n{inputText}"));
collection.Add(new TextContent($"Clipboard Content:\n{request.InputText}"));
}
collection.Add(new ImageContent(imageBytes, request.ImageMimeType ?? "image/png"));
collection.Add(new TextContent($"User instructions:\n{prompt}\n\nOutput:"));
collection.Add(new ImageContent(request.ImageBytes, request.ImageMimeType ?? "image/png"));
collection.Add(new TextContent($"User instructions:\n{request.Prompt}\n\nOutput:"));
chatHistory.AddUserMessage(collection);
}
else
{
var userMessageContent = $"""
User instructions:
{prompt}
Clipboard Content:
{inputText}
Output:
""";
chatHistory.AddUserMessage(userMessageContent);
}
@@ -142,11 +253,55 @@ namespace AdvancedPaste.Services.CustomActions
switch (_serviceType)
{
case AIServiceType.OpenAI:
kernelBuilder.AddOpenAIChatCompletion(_config.Model, apiKey, serviceId: _config.Model);
if (_config.Usage == PasteAIUsage.TextToImage)
{
#pragma warning disable SKEXP0010
kernelBuilder.AddOpenAITextToImage(apiKey, modelId: _config.Model);
#pragma warning restore SKEXP0010
}
else if (_config.Usage == PasteAIUsage.TextToAudio)
{
#pragma warning disable SKEXP0010
kernelBuilder.AddOpenAITextToAudio(_config.Model, apiKey);
#pragma warning restore SKEXP0010
}
else if (_config.Usage == PasteAIUsage.AudioToText)
{
#pragma warning disable SKEXP0010
kernelBuilder.AddOpenAIAudioToText(_config.Model, apiKey);
#pragma warning restore SKEXP0010
}
else
{
kernelBuilder.AddOpenAIChatCompletion(_config.Model, apiKey, serviceId: _config.Model);
}
break;
case AIServiceType.AzureOpenAI:
var deploymentName = string.IsNullOrWhiteSpace(_config.DeploymentName) ? _config.Model : _config.DeploymentName;
kernelBuilder.AddAzureOpenAIChatCompletion(deploymentName, RequireEndpoint(endpoint, _serviceType), apiKey, serviceId: _config.Model);
if (_config.Usage == PasteAIUsage.TextToImage)
{
#pragma warning disable SKEXP0010
kernelBuilder.AddAzureOpenAITextToImage(deploymentName, RequireEndpoint(endpoint, _serviceType), apiKey);
#pragma warning restore SKEXP0010
}
else if (_config.Usage == PasteAIUsage.TextToAudio)
{
#pragma warning disable SKEXP0010
kernelBuilder.AddAzureOpenAITextToAudio(deploymentName, RequireEndpoint(endpoint, _serviceType), apiKey);
#pragma warning restore SKEXP0010
}
else if (_config.Usage == PasteAIUsage.AudioToText)
{
#pragma warning disable SKEXP0010
kernelBuilder.AddAzureOpenAIAudioToText(deploymentName, RequireEndpoint(endpoint, _serviceType), apiKey);
#pragma warning restore SKEXP0010
}
else
{
kernelBuilder.AddAzureOpenAIChatCompletion(deploymentName, RequireEndpoint(endpoint, _serviceType), apiKey, serviceId: _config.Model);
}
break;
case AIServiceType.Mistral:
kernelBuilder.AddMistralChatCompletion(_config.Model, apiKey: apiKey);

View File

@@ -341,15 +341,16 @@ public abstract class KernelServiceBase(
async dataPackageView =>
{
var imageBytes = await dataPackageView.GetImageAsPngBytesAsync();
var audio = await dataPackageView.GetAudioBytesAsync();
var input = await dataPackageView.GetTextOrHtmlTextAsync();
if (string.IsNullOrEmpty(input) && imageBytes == null)
if (string.IsNullOrEmpty(input) && imageBytes == null && audio.Data == null)
{
// If we have no text and no image, try to get text via OCR or throw if nothing exists
input = await dataPackageView.GetClipboardTextOrThrowAsync(kernel.GetCancellationToken());
}
var result = await _customActionTransformService.TransformAsync(fixedPrompt, input, imageBytes, kernel.GetCancellationToken(), kernel.GetProgress());
var result = await _customActionTransformService.TransformAsync(fixedPrompt, input, imageBytes, audio.Data, audio.MimeType, kernel.GetCancellationToken(), kernel.GetProgress());
return DataPackageHelpers.CreateFromText(result?.Content ?? string.Empty);
});
@@ -360,21 +361,22 @@ public abstract class KernelServiceBase(
async dataPackageView =>
{
var imageBytes = await dataPackageView.GetImageAsPngBytesAsync();
var audio = await dataPackageView.GetAudioBytesAsync();
var input = await dataPackageView.GetTextOrHtmlTextAsync();
if (string.IsNullOrEmpty(input) && imageBytes == null)
if (string.IsNullOrEmpty(input) && imageBytes == null && audio.Data == null)
{
input = await dataPackageView.GetClipboardTextOrThrowAsync(kernel.GetCancellationToken());
}
string output = await GetPromptBasedOutput(format, prompt, input, imageBytes, kernel.GetCancellationToken(), kernel.GetProgress());
string output = await GetPromptBasedOutput(format, prompt, input, imageBytes, audio.Data, audio.MimeType, kernel.GetCancellationToken(), kernel.GetProgress());
return DataPackageHelpers.CreateFromText(output);
});
private async Task<string> GetPromptBasedOutput(PasteFormats format, string prompt, string input, byte[] imageBytes, CancellationToken cancellationToken, IProgress<double> progress) =>
private async Task<string> GetPromptBasedOutput(PasteFormats format, string prompt, string input, byte[] imageBytes, byte[] audioBytes, string audioMimeType, CancellationToken cancellationToken, IProgress<double> progress) =>
format switch
{
PasteFormats.CustomTextTransformation => (await _customActionTransformService.TransformAsync(prompt, input, imageBytes, cancellationToken, progress))?.Content ?? string.Empty,
PasteFormats.CustomTextTransformation => (await _customActionTransformService.TransformAsync(prompt, input, imageBytes, audioBytes, audioMimeType, cancellationToken, progress))?.Content ?? string.Empty,
_ => throw new ArgumentException($"Unsupported format {format} for prompt transform", nameof(format)),
};

View File

@@ -34,12 +34,26 @@ public sealed class PasteFormatExecutor(IKernelService kernelService, ICustomAct
// Run on thread-pool; although we use Async routines consistently, some actions still occasionally take a long time without yielding.
return await Task.Run(async () =>
pasteFormat.Format switch
{
if (pasteFormat.Format == PasteFormats.CustomTextTransformation)
{
var audio = await clipboardData.GetAudioBytesAsync();
return DataPackageHelpers.CreateFromText((await _customActionTransformService.TransformAsync(
pasteFormat.Prompt,
await clipboardData.GetTextOrHtmlTextAsync(),
await clipboardData.GetImageAsPngBytesAsync(),
audio.Data,
audio.MimeType,
cancellationToken,
progress))?.Content ?? string.Empty);
}
return pasteFormat.Format switch
{
PasteFormats.KernelQuery => await _kernelService.TransformClipboardAsync(pasteFormat.Prompt, clipboardData, pasteFormat.IsSavedQuery, cancellationToken, progress),
PasteFormats.CustomTextTransformation => DataPackageHelpers.CreateFromText((await _customActionTransformService.TransformAsync(pasteFormat.Prompt, await clipboardData.GetTextOrHtmlTextAsync(), await clipboardData.GetImageAsPngBytesAsync(), cancellationToken, progress))?.Content ?? string.Empty),
_ => await TransformHelpers.TransformAsync(format, clipboardData, cancellationToken, progress),
});
};
});
}
private static void WriteTelemetry(PasteFormats format, PasteActionSource source)

View File

@@ -372,4 +372,16 @@
<value>Unable to load Foundry Local model: {0}</value>
<comment>{0} is the model identifier. Do not translate {0}.</comment>
</data>
<data name="PasteAIUsage_ChatCompletion_Label" xml:space="preserve">
<value>Chat completion</value>
</data>
<data name="PasteAIUsage_TextToImage_Label" xml:space="preserve">
<value>Text to image</value>
</data>
<data name="PasteAIUsage_TextToAudio_Label" xml:space="preserve">
<value>Text to audio</value>
</data>
<data name="PasteAIUsage_AudioToText_Label" xml:space="preserve">
<value>Audio to text</value>
</data>
</root>

View File

@@ -7,6 +7,7 @@ using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.IO.Abstractions;
using System.Linq;
using System.Runtime.InteropServices;
@@ -27,6 +28,8 @@ using Microsoft.UI.Xaml.Media;
using Microsoft.UI.Xaml.Media.Imaging;
using Microsoft.Win32;
using Windows.ApplicationModel.DataTransfer;
using Windows.Media.Core;
using Windows.Media.Playback;
using Windows.System;
using WinUIEx;
@@ -271,6 +274,60 @@ namespace AdvancedPaste.ViewModels
OnPropertyChanged(nameof(CurrentIndexDisplay));
};
PlayPauseAudioCommand = new RelayCommand(PlayPauseAudio);
SaveAudioCommand = new RelayCommand(SaveAudio);
DeleteAudioCommand = new RelayCommand(DeleteAudio);
_audioTimer = new DispatcherTimer { Interval = TimeSpan.FromMilliseconds(100) };
_audioTimer.Tick += (s, e) =>
{
// Notify property change to update UI, but avoid triggering the setter logic
// The setter logic checks for significant difference, so it should be fine,
// but to be safe we are just notifying here.
OnPropertyChanged(nameof(AudioPosition));
OnPropertyChanged(nameof(AudioPositionString));
};
_audioPlayer = new MediaPlayer();
_audioPlayer.MediaOpened += (s, e) =>
{
_ = _dispatcherQueue.TryEnqueue(() =>
{
OnPropertyChanged(nameof(AudioDuration));
OnPropertyChanged(nameof(AudioDurationString));
});
};
_audioPlayer.PlaybackSession.PlaybackStateChanged += (s, e) =>
{
_ = _dispatcherQueue.TryEnqueue(() =>
{
OnPropertyChanged(nameof(IsAudioPlaying));
OnPropertyChanged(nameof(AudioPlayPauseGlyph));
if (s.PlaybackState == MediaPlaybackState.Playing)
{
_audioTimer.Start();
}
else
{
_audioTimer.Stop();
}
});
};
_audioPlayer.MediaEnded += (s, e) =>
{
_ = _dispatcherQueue.TryEnqueue(() =>
{
s.Position = TimeSpan.Zero;
// s.PlaybackState = MediaPlaybackState.Paused; // Read-only
_audioPlayer.Pause();
OnPropertyChanged(nameof(AudioPosition));
OnPropertyChanged(nameof(AudioPositionString));
OnPropertyChanged(nameof(IsAudioPlaying));
OnPropertyChanged(nameof(AudioPlayPauseGlyph));
});
};
ClipboardHistoryEnabled = IsClipboardHistoryEnabled();
UpdateOpenAIKey();
_clipboardTimer = new() { Interval = TimeSpan.FromSeconds(1) };
@@ -424,7 +481,27 @@ namespace AdvancedPaste.ViewModels
public void Dispose()
{
_clipboardTimer.Stop();
_userSettings.Changed -= UserSettings_Changed;
_pasteActionCancellationTokenSource?.Dispose();
_audioPlayer?.Dispose();
_audioTimer?.Stop();
// Cleanup any temporary audio files
foreach (var response in GeneratedResponses)
{
if (response.EndsWith(".mp3", StringComparison.OrdinalIgnoreCase) && File.Exists(response))
{
try
{
File.Delete(response);
}
catch (Exception ex)
{
Logger.LogError($"Failed to delete temporary audio file: {response}", ex);
}
}
}
GC.SuppressFinalize(this);
}
@@ -558,6 +635,23 @@ namespace AdvancedPaste.ViewModels
}
ClipboardHistoryEnabled = IsClipboardHistoryEnabled();
// Cleanup any temporary audio files from previous session
foreach (var response in GeneratedResponses)
{
if (response.EndsWith(".mp3", StringComparison.OrdinalIgnoreCase) && File.Exists(response))
{
try
{
File.Delete(response);
}
catch (Exception ex)
{
Logger.LogError($"Failed to delete temporary audio file: {response}", ex);
}
}
}
GeneratedResponses.Clear();
}
@@ -614,8 +708,101 @@ namespace AdvancedPaste.ViewModels
}
[ObservableProperty]
[NotifyPropertyChangedFor(nameof(HasCustomFormatImage))]
[NotifyPropertyChangedFor(nameof(HasCustomFormatText))]
[NotifyPropertyChangedFor(nameof(CustomFormatImageResult))]
private string _customFormatResult;
public bool HasCustomFormatImage => CustomFormatResult?.StartsWith("data:image", StringComparison.OrdinalIgnoreCase) ?? false;
public bool HasCustomFormatAudio => CustomFormatResult?.EndsWith(".mp3", StringComparison.OrdinalIgnoreCase) ?? false;
public bool HasCustomFormatText => !HasCustomFormatImage && !HasCustomFormatAudio;
public ImageSource CustomFormatImageResult
{
get
{
if (HasCustomFormatImage && !string.IsNullOrEmpty(CustomFormatResult))
{
try
{
var base64Data = CustomFormatResult.Split(',')[1];
var bytes = Convert.FromBase64String(base64Data);
var stream = new System.IO.MemoryStream(bytes);
var image = new BitmapImage();
image.SetSource(stream.AsRandomAccessStream());
return image;
}
catch (Exception ex)
{
Logger.LogError("Failed to create image source from data URI", ex);
}
}
return null;
}
}
private MediaPlayer _audioPlayer;
private DispatcherTimer _audioTimer;
public string AudioFileName => HasCustomFormatAudio ? Path.GetFileName(CustomFormatResult) : string.Empty;
public double AudioDuration => _audioPlayer?.PlaybackSession.NaturalDuration.TotalSeconds ?? 0;
public double AudioPosition
{
get => _audioPlayer?.PlaybackSession.Position.TotalSeconds ?? 0;
set
{
if (_audioPlayer != null)
{
if (Math.Abs(_audioPlayer.PlaybackSession.Position.TotalSeconds - value) > 0.5)
{
_audioPlayer.PlaybackSession.Position = TimeSpan.FromSeconds(value);
OnPropertyChanged(nameof(AudioPosition)); // Only notify if we actually changed the position
}
OnPropertyChanged(nameof(AudioPositionString));
}
}
}
public string AudioDurationString => TimeSpan.FromSeconds(AudioDuration).ToString(@"mm\:ss", CultureInfo.InvariantCulture);
public string AudioPositionString => TimeSpan.FromSeconds(AudioPosition).ToString(@"mm\:ss", CultureInfo.InvariantCulture);
public bool IsAudioPlaying => _audioPlayer?.PlaybackSession.PlaybackState == MediaPlaybackState.Playing;
public string AudioPlayPauseGlyph => IsAudioPlaying ? "\uE769" : "\uE768";
public IRelayCommand PlayPauseAudioCommand { get; }
public IRelayCommand SaveAudioCommand { get; }
public IRelayCommand DeleteAudioCommand { get; }
public MediaSource CustomFormatAudioResult
{
get
{
if (HasCustomFormatAudio && !string.IsNullOrEmpty(CustomFormatResult))
{
try
{
return MediaSource.CreateFromUri(new Uri(CustomFormatResult));
}
catch (Exception ex)
{
Logger.LogError("Failed to create audio source from file path", ex);
}
}
return null;
}
}
[RelayCommand]
public async Task PasteCustomAsync()
{
@@ -623,7 +810,25 @@ namespace AdvancedPaste.ViewModels
if (!string.IsNullOrEmpty(text))
{
await CopyPasteAndHideAsync(DataPackageHelpers.CreateFromText(text));
if (text.StartsWith("data:image", StringComparison.OrdinalIgnoreCase))
{
try
{
var base64Data = text.Split(',')[1];
var bytes = Convert.FromBase64String(base64Data);
var stream = new System.IO.MemoryStream(bytes);
var dataPackage = DataPackageHelpers.CreateFromImage(Windows.Storage.Streams.RandomAccessStreamReference.CreateFromStream(stream.AsRandomAccessStream()));
await CopyPasteAndHideAsync(dataPackage);
}
catch (Exception ex)
{
Logger.LogError("Failed to paste image from data URI", ex);
}
}
else
{
await CopyPasteAndHideAsync(DataPackageHelpers.CreateFromText(text));
}
}
}
@@ -895,11 +1100,6 @@ namespace AdvancedPaste.ViewModels
Logger.LogError("Failed to activate AI provider", ex);
return;
}
UpdateAIProviderActiveFlags();
OnPropertyChanged(nameof(AIProviders));
NotifyActiveProviderChanged();
EnqueueRefreshPasteFormats();
}
public async Task CancelPasteActionAsync()
@@ -922,5 +1122,119 @@ namespace AdvancedPaste.ViewModels
TransformProgress = value;
});
}
partial void OnCustomFormatResultChanged(string value)
{
OnPropertyChanged(nameof(HasCustomFormatAudio));
OnPropertyChanged(nameof(CustomFormatAudioResult));
OnPropertyChanged(nameof(AudioFileName));
if (HasCustomFormatAudio)
{
try
{
if (_audioPlayer != null)
{
// Ensure we are on the UI thread if needed, though OnCustomFormatResultChanged is likely called on UI thread.
// Reset player state
_audioPlayer.Pause();
_audioPlayer.Source = MediaSource.CreateFromUri(new Uri(value));
}
}
catch (Exception ex)
{
Logger.LogError("Failed to set audio source", ex);
}
}
else
{
if (_audioPlayer != null)
{
_audioPlayer.Pause();
_audioPlayer.Source = null;
}
}
}
private void PlayPauseAudio()
{
if (_audioPlayer == null)
{
return;
}
if (_audioPlayer.PlaybackSession.PlaybackState == MediaPlaybackState.Playing)
{
_audioPlayer.Pause();
}
else
{
_audioPlayer.Play();
}
}
private async void SaveAudio()
{
if (!HasCustomFormatAudio || string.IsNullOrEmpty(CustomFormatResult))
{
return;
}
var mainWindow = GetMainWindow();
if (mainWindow == null)
{
return;
}
var savePicker = new Windows.Storage.Pickers.FileSavePicker();
savePicker.SuggestedStartLocation = Windows.Storage.Pickers.PickerLocationId.Downloads;
savePicker.FileTypeChoices.Add("Audio", new List<string>() { ".mp3" });
savePicker.SuggestedFileName = Path.GetFileName(CustomFormatResult);
var hwnd = WinRT.Interop.WindowNative.GetWindowHandle(mainWindow);
WinRT.Interop.InitializeWithWindow.Initialize(savePicker, hwnd);
var file = await savePicker.PickSaveFileAsync();
if (file != null)
{
try
{
File.Copy(CustomFormatResult, file.Path, true);
}
catch (Exception ex)
{
Logger.LogError("Failed to save audio file", ex);
}
}
}
private void DeleteAudio()
{
if (HasCustomFormatAudio && !string.IsNullOrEmpty(CustomFormatResult))
{
try
{
if (File.Exists(CustomFormatResult))
{
File.Delete(CustomFormatResult);
}
}
catch (Exception ex)
{
Logger.LogError("Failed to delete audio file", ex);
}
GeneratedResponses.Remove(CustomFormatResult);
if (GeneratedResponses.Count > 0)
{
CurrentResponseIndex = Math.Max(0, CurrentResponseIndex - 1);
}
else
{
CustomFormatResult = null;
PreviewRequested?.Invoke(this, EventArgs.Empty);
}
}
}
}
}

View File

@@ -17,6 +17,7 @@ namespace Microsoft.PowerToys.Settings.UI.Library
{
private string _id = Guid.NewGuid().ToString("N");
private string _serviceType = "OpenAI";
private string _usage = "ChatCompletion";
private string _modelName = string.Empty;
private string _endpointUrl = string.Empty;
private string _apiVersion = string.Empty;
@@ -27,6 +28,9 @@ namespace Microsoft.PowerToys.Settings.UI.Library
private bool _isActive;
private bool _enableAdvancedAI;
private bool _isLocalModel;
private int _imageWidth = 1024;
private int _imageHeight = 1024;
private string _voice = "alloy";
public event PropertyChangedEventHandler PropertyChanged;
@@ -50,6 +54,20 @@ namespace Microsoft.PowerToys.Settings.UI.Library
}
}
[JsonPropertyName("usage")]
public string Usage
{
get => _usage;
set => SetProperty(ref _usage, string.IsNullOrWhiteSpace(value) ? "ChatCompletion" : value); // TODO: Localization support
}
[JsonIgnore]
public PasteAIUsage UsageKind
{
get => PasteAIUsageExtensions.FromConfigString(Usage);
set => Usage = value.ToConfigString();
}
[JsonIgnore]
public AIServiceType ServiceTypeKind
{
@@ -126,6 +144,27 @@ namespace Microsoft.PowerToys.Settings.UI.Library
set => SetProperty(ref _isLocalModel, value);
}
[JsonPropertyName("image-width")]
public int ImageWidth
{
get => _imageWidth;
set => SetProperty(ref _imageWidth, value);
}
[JsonPropertyName("image-height")]
public int ImageHeight
{
get => _imageHeight;
set => SetProperty(ref _imageHeight, value);
}
[JsonPropertyName("voice")]
public string Voice
{
get => _voice;
set => SetProperty(ref _voice, value ?? "alloy");
}
[JsonIgnore]
public bool IsActive
{
@@ -142,6 +181,7 @@ namespace Microsoft.PowerToys.Settings.UI.Library
{
Id = Id,
ServiceType = ServiceType,
Usage = Usage,
ModelName = ModelName,
EndpointUrl = EndpointUrl,
ApiVersion = ApiVersion,
@@ -151,6 +191,9 @@ namespace Microsoft.PowerToys.Settings.UI.Library
ModerationEnabled = ModerationEnabled,
EnableAdvancedAI = EnableAdvancedAI,
IsLocalModel = IsLocalModel,
ImageWidth = ImageWidth,
ImageHeight = ImageHeight,
Voice = Voice,
IsActive = IsActive,
};
}

View File

@@ -0,0 +1,14 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace Microsoft.PowerToys.Settings.UI.Library
{
public enum PasteAIUsage
{
ChatCompletion,
TextToImage,
TextToAudio,
AudioToText,
}
}

View File

@@ -0,0 +1,32 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace Microsoft.PowerToys.Settings.UI.Library
{
public static class PasteAIUsageExtensions
{
public static string ToConfigString(this PasteAIUsage usage)
{
return usage switch
{
PasteAIUsage.ChatCompletion => "ChatCompletion",
PasteAIUsage.TextToImage => "TextToImage",
PasteAIUsage.TextToAudio => "TextToAudio",
PasteAIUsage.AudioToText => "AudioToText",
_ => "ChatCompletion",
};
}
public static PasteAIUsage FromConfigString(string usage)
{
return usage switch
{
"TextToImage" => PasteAIUsage.TextToImage,
"TextToAudio" => PasteAIUsage.TextToAudio,
"AudioToText" => PasteAIUsage.AudioToText,
_ => PasteAIUsage.ChatCompletion,
};
}
}
}

View File

@@ -0,0 +1,30 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using Microsoft.PowerToys.Settings.UI.Helpers;
using Microsoft.PowerToys.Settings.UI.Library;
using Microsoft.UI.Xaml.Data;
namespace Microsoft.PowerToys.Settings.UI.Converters;
public sealed partial class PasteAIUsageToStringConverter : IValueConverter
{
public object Convert(object value, Type targetType, object parameter, string language)
{
var usage = value switch
{
string s => PasteAIUsageExtensions.FromConfigString(s),
PasteAIUsage u => u,
_ => PasteAIUsage.ChatCompletion,
};
return ResourceLoaderInstance.ResourceLoader.GetString($"PasteAIUsage_{usage}_Label");
}
public object ConvertBack(object value, Type targetType, object parameter, string language)
{
throw new NotImplementedException();
}
}

View File

@@ -40,6 +40,7 @@
</Style>
<converters:ServiceTypeToIconConverter x:Key="ServiceTypeToIconConverter" />
<converters:PasteAIUsageToStringConverter x:Key="PasteAIUsageToStringConverter" />
<DataTemplate x:Key="AdditionalActionTemplate" x:DataType="models:AdvancedPasteAdditionalAction">
<StackPanel Orientation="Horizontal" Spacing="4">
<controls:ShortcutControl
@@ -118,6 +119,18 @@
Header="{x:Bind ModelName, Mode=OneWay}"
HeaderIcon="{x:Bind ServiceType, Mode=OneWay, Converter={StaticResource ServiceTypeToIconConverter}}">
<StackPanel Orientation="Horizontal" Spacing="8">
<Border
Padding="8,4"
VerticalAlignment="Center"
Background="{ThemeResource CardBackgroundFillColorDefaultBrush}"
BorderBrush="{ThemeResource CardStrokeColorDefaultBrush}"
BorderThickness="1"
CornerRadius="4">
<TextBlock
FontSize="12"
Foreground="{ThemeResource TextFillColorSecondaryBrush}"
Text="{x:Bind Usage, Mode=OneWay, Converter={StaticResource PasteAIUsageToStringConverter}}" />
</Border>
<Button
Padding="8"
Background="Transparent"
@@ -493,6 +506,41 @@
Margin="0,8,0,48"
Orientation="Vertical"
Spacing="16">
<ComboBox
x:Name="PasteAIUsageComboBox"
x:Uid="AdvancedPaste_Usage"
MinWidth="200"
HorizontalAlignment="Stretch"
Header="Usage"
SelectedValue="{x:Bind ViewModel.PasteAIProviderDraft.Usage, Mode=TwoWay}"
SelectedValuePath="Tag"
SelectionChanged="PasteAIUsageComboBox_SelectionChanged"
Visibility="{x:Bind GetUsageVisibility(ViewModel.PasteAIProviderDraft.ServiceType), Mode=OneWay}">
<ComboBoxItem x:Uid="PasteAIUsage_ChatCompletion" Tag="ChatCompletion" />
<ComboBoxItem x:Uid="PasteAIUsage_TextToImage" Tag="TextToImage" />
<ComboBoxItem x:Uid="PasteAIUsage_TextToAudio" Tag="TextToAudio" />
<ComboBoxItem x:Uid="PasteAIUsage_AudioToText" Tag="AudioToText" />
</ComboBox>
<StackPanel
x:Name="PasteAIImageResolutionPanel"
Orientation="Horizontal"
Spacing="8"
Visibility="{x:Bind GetImageResolutionVisibility(ViewModel.PasteAIProviderDraft.Usage), Mode=OneWay}">
<TextBox
x:Name="PasteAIImageWidthTextBox"
x:Uid="AdvancedPaste_ImgOutputWidth"
MinWidth="96"
Text="{x:Bind ViewModel.PasteAIProviderDraft.ImageWidth, Mode=TwoWay}" />
<TextBlock
Margin="0,0,0,8"
VerticalAlignment="Bottom"
Text="x" />
<TextBox
x:Name="PasteAIImageHeightTextBox"
x:Uid="AdvancedPaste_ImgOutputHeight"
MinWidth="96"
Text="{x:Bind ViewModel.PasteAIProviderDraft.ImageHeight, Mode=TwoWay}" />
</StackPanel>
<TextBox
x:Name="PasteAIModelNameTextBox"
x:Uid="AdvancedPaste_ModelName"
@@ -525,6 +573,13 @@
MinWidth="200"
PlaceholderText="gpt-4o"
Text="{x:Bind ViewModel.PasteAIProviderDraft.DeploymentName, Mode=TwoWay}" />
<TextBox
x:Name="PasteAIVoiceTextBox"
MinWidth="200"
Header="Voice"
PlaceholderText="alloy"
Text="{x:Bind ViewModel.PasteAIProviderDraft.Voice, Mode=TwoWay}"
Visibility="{x:Bind GetVoiceVisibility(ViewModel.PasteAIProviderDraft.Usage), Mode=OneWay}" />
<TextBox
x:Name="PasteAISystemPromptTextBox"
x:Uid="AdvancedPaste_SystemPrompt"

View File

@@ -303,6 +303,52 @@ namespace Microsoft.PowerToys.Settings.UI.Views
System.Diagnostics.Debug.WriteLine($"{configType} API key saved successfully");
}
public Visibility GetUsageVisibility(string serviceType)
{
if (string.IsNullOrEmpty(serviceType))
{
return Visibility.Collapsed;
}
return (serviceType.Equals("OpenAI", StringComparison.OrdinalIgnoreCase) ||
serviceType.Equals("AzureOpenAI", StringComparison.OrdinalIgnoreCase))
? Visibility.Visible
: Visibility.Collapsed;
}
public Visibility GetImageResolutionVisibility(string usage)
{
return string.Equals(usage, "TextToImage", StringComparison.OrdinalIgnoreCase)
? Visibility.Visible
: Visibility.Collapsed;
}
public Visibility GetVoiceVisibility(string usage)
{
return string.Equals(usage, "TextToAudio", StringComparison.OrdinalIgnoreCase)
? Visibility.Visible
: Visibility.Collapsed;
}
private void PasteAIUsageComboBox_SelectionChanged(object sender, SelectionChangedEventArgs e)
{
if (ViewModel?.PasteAIProviderDraft == null)
{
return;
}
var comboBox = (ComboBox)sender;
if (comboBox.SelectedValue is string usage && usage == "TextToImage")
{
ViewModel.PasteAIProviderDraft.EnableAdvancedAI = false;
PasteAIEnableAdvancedAICheckBox.IsEnabled = false;
}
else
{
PasteAIEnableAdvancedAICheckBox.IsEnabled = true;
}
}
private void UpdatePasteAIUIVisibility()
{
var draft = ViewModel?.PasteAIProviderDraft;
@@ -345,6 +391,17 @@ namespace Microsoft.PowerToys.Settings.UI.Views
PasteAIEnableAdvancedAICheckBox.Visibility = showAdvancedAI ? Visibility.Visible : Visibility.Collapsed;
PasteAIApiKeyPasswordBox.Visibility = requiresApiKey ? Visibility.Visible : Visibility.Collapsed;
PasteAIModelNameTextBox.Visibility = isFoundryLocal ? Visibility.Collapsed : Visibility.Visible;
PasteAIImageResolutionPanel.Visibility = GetImageResolutionVisibility(draft.Usage);
if (draft.Usage == "TextToImage")
{
draft.EnableAdvancedAI = false;
PasteAIEnableAdvancedAICheckBox.IsEnabled = false;
}
else
{
PasteAIEnableAdvancedAICheckBox.IsEnabled = true;
}
if (requiresApiKey)
{

View File

@@ -5769,4 +5769,34 @@ To record a specific window, enter the hotkey with the Alt key in the opposite m
<data name="LightSwitch_FollowNightLightCardMessage.Text" xml:space="preserve">
<value>Following Night Light settings.</value>
</data>
<data name="PasteAIUsage_ChatCompletion_Label" xml:space="preserve">
<value>Chat completion</value>
</data>
<data name="PasteAIUsage_TextToImage_Label" xml:space="preserve">
<value>Text to image</value>
</data>
<data name="PasteAIUsage_ChatCompletion.Content" xml:space="preserve">
<value>Chat completion</value>
</data>
<data name="PasteAIUsage_TextToImage.Content" xml:space="preserve">
<value>Text to image</value>
</data>
<data name="PasteAIUsage_TextToAudio.Content" xml:space="preserve">
<value>Text to audio</value>
</data>
<data name="PasteAIUsage_AudioToText.Content" xml:space="preserve">
<value>Audio to text</value>
</data>
<data name="PasteAIUsage_TextToAudio_Label" xml:space="preserve">
<value>Text to audio</value>
</data>
<data name="PasteAIUsage_AudioToText_Label" xml:space="preserve">
<value>Audio to text</value>
</data>
<data name="AdvancedPaste_ImgOutputWidth.Header" xml:space="preserve">
<value>Image output width</value>
</data>
<data name="AdvancedPaste_ImgOutputHeight.Header" xml:space="preserve">
<value>Image output height</value>
</data>
</root>

View File

@@ -949,6 +949,9 @@ namespace Microsoft.PowerToys.Settings.UI.ViewModels
target.ModerationEnabled = source.ModerationEnabled;
target.EnableAdvancedAI = source.EnableAdvancedAI;
target.IsLocalModel = source.IsLocalModel;
target.Usage = source.Usage;
target.ImageWidth = source.ImageWidth;
target.ImageHeight = source.ImageHeight;
}
private void RemovePasteAICredentials(string providerId, string serviceType)