mirror of
https://github.com/infinilabs/coco-app.git
synced 2025-12-16 11:37:47 +01:00
feat: voice input support for search and chat (#302)
* feat: voice input support for search and chat * chore: add mic-recorder plugin * refactor: check microphone permission before recording * feat: realize sound wave effects * chore: remove mic-recorder plugin
This commit is contained in:
18
package.json
18
package.json
@@ -19,7 +19,7 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@headlessui/react": "^2.2.0",
|
||||
"@tauri-apps/api": "^2.3.0",
|
||||
"@tauri-apps/api": "^2.4.0",
|
||||
"@tauri-apps/plugin-autostart": "~2.2.0",
|
||||
"@tauri-apps/plugin-deep-link": "^2.2.0",
|
||||
"@tauri-apps/plugin-dialog": "^2.2.0",
|
||||
@@ -28,9 +28,10 @@
|
||||
"@tauri-apps/plugin-os": "^2.2.1",
|
||||
"@tauri-apps/plugin-process": "^2.2.0",
|
||||
"@tauri-apps/plugin-shell": "^2.2.0",
|
||||
"@tauri-apps/plugin-updater": "^2.6.0",
|
||||
"@tauri-apps/plugin-updater": "^2.6.1",
|
||||
"@tauri-apps/plugin-websocket": "~2.3.0",
|
||||
"@tauri-apps/plugin-window": "2.0.0-alpha.1",
|
||||
"@wavesurfer/react": "^1.0.9",
|
||||
"ahooks": "^3.8.4",
|
||||
"clsx": "^2.1.1",
|
||||
"dotenv": "^16.4.7",
|
||||
@@ -39,8 +40,8 @@
|
||||
"i18next-browser-languagedetector": "^8.0.4",
|
||||
"lodash-es": "^4.17.21",
|
||||
"lucide-react": "^0.461.0",
|
||||
"mermaid": "^11.4.1",
|
||||
"nanoid": "^5.1.3",
|
||||
"mermaid": "^11.5.0",
|
||||
"nanoid": "^5.1.5",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"react-hotkeys-hook": "^4.6.1",
|
||||
@@ -54,19 +55,20 @@
|
||||
"remark-gfm": "^4.0.1",
|
||||
"remark-math": "^6.0.0",
|
||||
"tauri-plugin-fs-pro-api": "^2.3.1",
|
||||
"tauri-plugin-macos-permissions-api": "^2.1.1",
|
||||
"tauri-plugin-macos-permissions-api": "^2.2.0",
|
||||
"tauri-plugin-screenshots-api": "^2.1.0",
|
||||
"use-debounce": "^10.0.4",
|
||||
"uuid": "^11.1.0",
|
||||
"wavesurfer.js": "^7.9.3",
|
||||
"zustand": "^5.0.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tauri-apps/cli": "^2.3.1",
|
||||
"@tauri-apps/cli": "^2.4.0",
|
||||
"@types/dom-speech-recognition": "^0.0.4",
|
||||
"@types/lodash-es": "^4.17.12",
|
||||
"@types/markdown-it": "^14.1.2",
|
||||
"@types/node": "^22.13.10",
|
||||
"@types/react": "^18.3.18",
|
||||
"@types/node": "^22.13.11",
|
||||
"@types/react": "^18.3.19",
|
||||
"@types/react-dom": "^18.3.5",
|
||||
"@types/react-katex": "^3.0.4",
|
||||
"@types/react-window": "^1.8.8",
|
||||
|
||||
830
pnpm-lock.yaml
generated
830
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
487
src-tauri/Cargo.lock
generated
487
src-tauri/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
180
src/components/AudioRecording/index.tsx
Normal file
180
src/components/AudioRecording/index.tsx
Normal file
@@ -0,0 +1,180 @@
|
||||
import { useAppStore } from "@/stores/appStore";
|
||||
import { useReactive } from "ahooks";
|
||||
import clsx from "clsx";
|
||||
import { Check, Loader, Mic, X } from "lucide-react";
|
||||
import { FC, useEffect, useRef } from "react";
|
||||
import {
|
||||
checkMicrophonePermission,
|
||||
requestMicrophonePermission,
|
||||
} from "tauri-plugin-macos-permissions-api";
|
||||
import { useWavesurfer } from "@wavesurfer/react";
|
||||
import RecordPlugin from "wavesurfer.js/dist/plugins/record.esm.js";
|
||||
import { pick } from "lodash-es";
|
||||
|
||||
interface AudioRecordingProps {
|
||||
onChange?: (text: string) => void;
|
||||
}
|
||||
|
||||
interface State {
|
||||
isRecording: boolean;
|
||||
converting: boolean;
|
||||
countdown: number;
|
||||
}
|
||||
|
||||
const INITIAL_STATE: State = {
|
||||
isRecording: false,
|
||||
converting: false,
|
||||
countdown: 30,
|
||||
};
|
||||
|
||||
let interval: ReturnType<typeof setInterval>;
|
||||
|
||||
const AudioRecording: FC<AudioRecordingProps> = (props) => {
|
||||
const { onChange } = props;
|
||||
const state = useReactive({ ...INITIAL_STATE });
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const recordRef = useRef<RecordPlugin>();
|
||||
const withVisibility = useAppStore((state) => state.withVisibility);
|
||||
|
||||
const { wavesurfer } = useWavesurfer({
|
||||
container: containerRef,
|
||||
height: 20,
|
||||
waveColor: "#0072ff",
|
||||
progressColor: "#999",
|
||||
barWidth: 4,
|
||||
barRadius: 4,
|
||||
barGap: 2,
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
if (!wavesurfer) return;
|
||||
|
||||
const record = wavesurfer.registerPlugin(
|
||||
RecordPlugin.create({
|
||||
scrollingWaveform: true,
|
||||
renderRecordedAudio: false,
|
||||
})
|
||||
);
|
||||
|
||||
record.on("record-end", (blob) => {
|
||||
const recordedUrl = URL.createObjectURL(blob);
|
||||
console.log("recorded:", recordedUrl);
|
||||
|
||||
// setAudioUrl(recordedUrl);
|
||||
});
|
||||
|
||||
recordRef.current = record;
|
||||
|
||||
return resetState;
|
||||
}, [wavesurfer]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!state.isRecording) return;
|
||||
|
||||
interval = setInterval(() => {
|
||||
if (state.countdown <= 0) {
|
||||
handleOk();
|
||||
}
|
||||
|
||||
state.countdown--;
|
||||
}, 1000);
|
||||
}, [state.isRecording]);
|
||||
|
||||
const resetState = (otherState: Partial<State> = {}) => {
|
||||
clearInterval(interval);
|
||||
recordRef.current?.stopRecording();
|
||||
Object.assign(state, { ...INITIAL_STATE, ...otherState });
|
||||
};
|
||||
|
||||
const checkPermission = async () => {
|
||||
const authorized = await checkMicrophonePermission();
|
||||
|
||||
if (authorized) return;
|
||||
|
||||
requestMicrophonePermission();
|
||||
|
||||
return new Promise(async (resolved) => {
|
||||
const timer = setInterval(async () => {
|
||||
const authorized = await checkMicrophonePermission();
|
||||
|
||||
if (!authorized) return;
|
||||
|
||||
clearInterval(timer);
|
||||
|
||||
resolved(true);
|
||||
}, 500);
|
||||
});
|
||||
};
|
||||
|
||||
const startRecording = async () => {
|
||||
await withVisibility(checkPermission);
|
||||
state.isRecording = true;
|
||||
recordRef.current?.startRecording();
|
||||
};
|
||||
|
||||
const handleOk = () => {
|
||||
resetState({ converting: true, countdown: state.countdown });
|
||||
|
||||
setTimeout(() => {
|
||||
onChange?.("");
|
||||
|
||||
resetState();
|
||||
}, 3000);
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<div
|
||||
className={clsx(
|
||||
"p-1 hover:bg-gray-50 dark:hover:bg-gray-700 rounded-full transition cursor-pointer"
|
||||
)}
|
||||
>
|
||||
<Mic className="size-4 text-[#999]" onClick={startRecording} />
|
||||
</div>
|
||||
|
||||
<div
|
||||
className={clsx(
|
||||
"absolute inset-0 flex items-center gap-1 px-1 rounded translate-x-full transition-all bg-[#ededed] dark:bg-[#202126]",
|
||||
{
|
||||
"!translate-x-0": state.isRecording || state.converting,
|
||||
}
|
||||
)}
|
||||
>
|
||||
<button
|
||||
disabled={state.converting}
|
||||
className={clsx(
|
||||
"flex items-center justify-center size-6 bg-white dark:bg-black rounded-full transition cursor-pointer",
|
||||
{
|
||||
"!cursor-not-allowed opacity-50": state.converting,
|
||||
}
|
||||
)}
|
||||
onClick={() => resetState()}
|
||||
>
|
||||
<X className="size-4 text-[#0C0C0C] dark:text-[#999999]" />
|
||||
</button>
|
||||
|
||||
<div className="flex items-center gap-1 flex-1 h-6 px-2 bg-white dark:bg-black rounded-full transition">
|
||||
<div ref={containerRef} className="flex-1"></div>
|
||||
|
||||
<span className="text-xs text-[#333] dark:text-[#999]">
|
||||
{state.countdown}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<button
|
||||
disabled={state.converting}
|
||||
className="flex items-center justify-center size-6 text-white bg-[#0072FF] rounded-full transition cursor-pointer"
|
||||
onClick={handleOk}
|
||||
>
|
||||
{state.converting ? (
|
||||
<Loader className="size-4 animate-spin" />
|
||||
) : (
|
||||
<Check className="size-4" />
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default AudioRecording;
|
||||
@@ -11,8 +11,8 @@ import { useAppStore } from "@/stores/appStore";
|
||||
import { useSearchStore } from "@/stores/searchStore";
|
||||
import { metaOrCtrlKey } from "@/utils/keyboardUtils";
|
||||
import SearchPopover from "./SearchPopover";
|
||||
// import SpeechToText from "../SpeechToText";
|
||||
import { DataSource } from "@/components/Assistant/types";
|
||||
import AudioRecording from "../AudioRecording";
|
||||
|
||||
interface ChatInputProps {
|
||||
onSend: (message: string) => void;
|
||||
@@ -61,16 +61,16 @@ export default function ChatInput({
|
||||
getDataSourcesByServer,
|
||||
setupWindowFocusListener,
|
||||
hideCoco,
|
||||
// checkScreenPermission,
|
||||
// requestScreenPermission,
|
||||
// getScreenMonitors,
|
||||
// getScreenWindows,
|
||||
// captureMonitorScreenshot,
|
||||
// captureWindowScreenshot,
|
||||
// openFileDialog,
|
||||
// getFileMetadata,
|
||||
// getFileIcon,
|
||||
}: ChatInputProps) {
|
||||
}: // checkScreenPermission,
|
||||
// requestScreenPermission,
|
||||
// getScreenMonitors,
|
||||
// getScreenWindows,
|
||||
// captureMonitorScreenshot,
|
||||
// captureWindowScreenshot,
|
||||
// openFileDialog,
|
||||
// getFileMetadata,
|
||||
// getFileIcon,
|
||||
ChatInputProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const showTooltip = useAppStore(
|
||||
@@ -229,11 +229,9 @@ export default function ChatInput({
|
||||
};
|
||||
|
||||
return (
|
||||
<div
|
||||
className={`w-full relative`}
|
||||
>
|
||||
<div className={`w-full relative`}>
|
||||
<div
|
||||
className={`p-2 flex items-center dark:text-[#D8D8D8] bg-[#ededed] dark:bg-[#202126] rounded transition-all relative `}
|
||||
className={`p-2 flex items-center dark:text-[#D8D8D8] bg-[#ededed] dark:bg-[#202126] rounded transition-all relative overflow-hidden`}
|
||||
>
|
||||
<div className="flex flex-wrap gap-2 flex-1 items-center relative">
|
||||
{!isChatMode && !sourceData ? (
|
||||
@@ -294,13 +292,12 @@ export default function ChatInput({
|
||||
) : null}
|
||||
</div>
|
||||
|
||||
{/* {isChatMode && (
|
||||
<SpeechToText
|
||||
onChange={(transcript) => {
|
||||
changeInput(inputValue + transcript);
|
||||
}}
|
||||
/>
|
||||
)} */}
|
||||
<AudioRecording
|
||||
key={isChatMode ? "chat" : "search"}
|
||||
onChange={(text) => {
|
||||
changeInput(inputValue + text);
|
||||
}}
|
||||
/>
|
||||
|
||||
{isChatMode && curChatEnd ? (
|
||||
<button
|
||||
@@ -410,14 +407,10 @@ export default function ChatInput({
|
||||
/>
|
||||
</div>
|
||||
) : (
|
||||
<div data-tauri-drag-region className="w-28 flex gap-2 relative">
|
||||
{/* <SpeechToText
|
||||
Icon={AudioLines}
|
||||
onChange={(transcript) => {
|
||||
changeInput(inputValue + transcript);
|
||||
}}
|
||||
/> */}
|
||||
</div>
|
||||
<div
|
||||
data-tauri-drag-region
|
||||
className="w-28 flex gap-2 relative"
|
||||
></div>
|
||||
)}
|
||||
|
||||
{isChatPage ? null : (
|
||||
|
||||
@@ -40,7 +40,9 @@ interface InputExtraProps {
|
||||
getScreenWindows: () => Promise<any[]>;
|
||||
captureMonitorScreenshot: (id: number) => Promise<string>;
|
||||
captureWindowScreenshot: (id: number) => Promise<string>;
|
||||
openFileDialog: (options: { multiple: boolean }) => Promise<string | string[] | null>;
|
||||
openFileDialog: (options: {
|
||||
multiple: boolean;
|
||||
}) => Promise<string | string[] | null>;
|
||||
getFileMetadata: (path: string) => Promise<any>;
|
||||
getFileIcon: (path: string, size: number) => Promise<string>;
|
||||
}
|
||||
@@ -59,7 +61,7 @@ const InputExtra = ({
|
||||
const { t, i18n } = useTranslation();
|
||||
const uploadFiles = useChatStore((state) => state.uploadFiles);
|
||||
const setUploadFiles = useChatStore((state) => state.setUploadFiles);
|
||||
const setIsPinned = useAppStore((state) => state.setIsPinned);
|
||||
const withVisibility = useAppStore((state) => state.withVisibility);
|
||||
|
||||
const state = useReactive<State>({
|
||||
screenshotableMonitors: [],
|
||||
@@ -98,14 +100,12 @@ const InputExtra = ({
|
||||
{
|
||||
label: t("search.input.uploadFile"),
|
||||
clickEvent: async () => {
|
||||
setIsPinned(true);
|
||||
|
||||
const selectedFiles = await openFileDialog({
|
||||
multiple: true,
|
||||
const selectedFiles = await withVisibility(() => {
|
||||
return openFileDialog({
|
||||
multiple: true,
|
||||
});
|
||||
});
|
||||
|
||||
setIsPinned(false);
|
||||
|
||||
if (isNil(selectedFiles)) return;
|
||||
|
||||
handleUploadFiles(selectedFiles);
|
||||
|
||||
@@ -1,97 +0,0 @@
|
||||
import { useEventListener, useReactive } from "ahooks";
|
||||
import clsx from "clsx";
|
||||
import { LucideIcon, Mic } from "lucide-react";
|
||||
import { FC, useEffect } from "react";
|
||||
|
||||
interface SpeechToTextProps {
|
||||
Icon?: LucideIcon;
|
||||
onChange?: (transcript: string) => void;
|
||||
}
|
||||
|
||||
let recognition: SpeechRecognition | null = null;
|
||||
|
||||
const SpeechToText: FC<SpeechToTextProps> = (props) => {
|
||||
const { Icon = Mic, onChange } = props;
|
||||
|
||||
const state = useReactive({
|
||||
speaking: false,
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
return destroyRecognition;
|
||||
}, []);
|
||||
|
||||
useEventListener("focusin", (event) => {
|
||||
const { target } = event;
|
||||
|
||||
const isInputElement =
|
||||
target instanceof HTMLInputElement ||
|
||||
target instanceof HTMLTextAreaElement;
|
||||
|
||||
if (state.speaking && isInputElement) {
|
||||
target.blur();
|
||||
}
|
||||
});
|
||||
|
||||
const handleSpeak = () => {
|
||||
if (state.speaking) {
|
||||
return destroyRecognition();
|
||||
}
|
||||
|
||||
const SpeechRecognition =
|
||||
window.SpeechRecognition || window.webkitSpeechRecognition;
|
||||
|
||||
recognition = new SpeechRecognition();
|
||||
recognition.continuous = true;
|
||||
recognition.interimResults = true;
|
||||
recognition.lang = "zh-CN";
|
||||
|
||||
recognition.onresult = (event) => {
|
||||
const transcript = [...event.results]
|
||||
.map((result) => result[0].transcript)
|
||||
.join("");
|
||||
|
||||
onChange?.(transcript);
|
||||
};
|
||||
|
||||
recognition.onerror = destroyRecognition;
|
||||
|
||||
recognition.onend = destroyRecognition;
|
||||
|
||||
recognition.start();
|
||||
|
||||
state.speaking = true;
|
||||
};
|
||||
|
||||
const destroyRecognition = () => {
|
||||
if (recognition) {
|
||||
recognition.abort();
|
||||
recognition.onresult = null;
|
||||
recognition.onerror = null;
|
||||
recognition.onend = null;
|
||||
recognition = null;
|
||||
}
|
||||
|
||||
state.speaking = false;
|
||||
};
|
||||
|
||||
return (
|
||||
<div
|
||||
className={clsx(
|
||||
"p-1 hover:bg-gray-50 dark:hover:bg-gray-700 rounded-full transition cursor-pointer",
|
||||
{
|
||||
"bg-blue-100 dark:bg-blue-900": state.speaking,
|
||||
}
|
||||
)}
|
||||
>
|
||||
<Icon
|
||||
className={clsx("size-4 text-[#999] dark:text-[#999]", {
|
||||
"text-blue-500 animate-pulse": state.speaking,
|
||||
})}
|
||||
onClick={handleSpeak}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default SpeechToText;
|
||||
@@ -1,15 +1,16 @@
|
||||
import { useEffect } from 'react';
|
||||
import { useEffect } from "react";
|
||||
|
||||
import { useAppStore } from '@/stores/appStore';
|
||||
import platformAdapter from '@/utils/platformAdapter';
|
||||
import { useAppStore } from "@/stores/appStore";
|
||||
import platformAdapter from "@/utils/platformAdapter";
|
||||
|
||||
export function useWindowEvents() {
|
||||
const isPinned = useAppStore((state) => state.isPinned);
|
||||
const visible = useAppStore((state) => state.visible);
|
||||
|
||||
useEffect(() => {
|
||||
const handleBlur = async () => {
|
||||
console.log("Window blurred");
|
||||
if (isPinned) {
|
||||
if (isPinned || visible) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -23,5 +24,5 @@ export function useWindowEvents() {
|
||||
return () => {
|
||||
window.removeEventListener("blur", handleBlur);
|
||||
};
|
||||
}, [isPinned]);
|
||||
}
|
||||
}, [isPinned, visible]);
|
||||
}
|
||||
|
||||
@@ -51,6 +51,9 @@ export type IAppStore = {
|
||||
|
||||
showCocoShortcuts: string[];
|
||||
setShowCocoShortcuts: (showCocoShortcuts: string[]) => void;
|
||||
|
||||
visible: boolean;
|
||||
withVisibility: <T>(fn: () => Promise<T>) => Promise<T>;
|
||||
};
|
||||
|
||||
export const useAppStore = create<IAppStore>()(
|
||||
@@ -104,6 +107,16 @@ export const useAppStore = create<IAppStore>()(
|
||||
|
||||
return set({ showCocoShortcuts });
|
||||
},
|
||||
visible: false,
|
||||
withVisibility: async <T>(fn: () => Promise<T>) => {
|
||||
set({ visible: true });
|
||||
|
||||
const result = await fn();
|
||||
|
||||
set({ visible: false });
|
||||
|
||||
return result;
|
||||
},
|
||||
}),
|
||||
{
|
||||
name: "app-store",
|
||||
|
||||
Reference in New Issue
Block a user