mirror of
https://github.com/infinilabs/coco-app.git
synced 2025-12-16 03:27:43 +01:00
feat: chat supports voice input (#276)
* feat: chat supports voice input * refactor: hide window out of focus * feat: search supports voice input
This commit is contained in:
@@ -57,6 +57,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tauri-apps/cli": "^2.3.1",
|
||||
"@types/dom-speech-recognition": "^0.0.4",
|
||||
"@types/lodash-es": "^4.17.12",
|
||||
"@types/markdown-it": "^14.1.2",
|
||||
"@types/node": "^22.13.10",
|
||||
|
||||
8
pnpm-lock.yaml
generated
8
pnpm-lock.yaml
generated
@@ -135,6 +135,9 @@ importers:
|
||||
'@tauri-apps/cli':
|
||||
specifier: ^2.3.1
|
||||
version: 2.3.1
|
||||
'@types/dom-speech-recognition':
|
||||
specifier: ^0.0.4
|
||||
version: 0.0.4
|
||||
'@types/lodash-es':
|
||||
specifier: ^4.17.12
|
||||
version: 4.17.12
|
||||
@@ -1230,6 +1233,9 @@ packages:
|
||||
'@types/debug@4.1.12':
|
||||
resolution: {integrity: sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==}
|
||||
|
||||
'@types/dom-speech-recognition@0.0.4':
|
||||
resolution: {integrity: sha512-zf2GwV/G6TdaLwpLDcGTIkHnXf8JEf/viMux+khqKQKDa8/8BAUtXXZS563GnvJ4Fg0PBLGAaFf2GekEVSZ6GQ==}
|
||||
|
||||
'@types/estree-jsx@1.0.5':
|
||||
resolution: {integrity: sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==}
|
||||
|
||||
@@ -4320,6 +4326,8 @@ snapshots:
|
||||
dependencies:
|
||||
'@types/ms': 2.1.0
|
||||
|
||||
'@types/dom-speech-recognition@0.0.4': {}
|
||||
|
||||
'@types/estree-jsx@1.0.5':
|
||||
dependencies:
|
||||
'@types/estree': 1.0.6
|
||||
|
||||
@@ -31,5 +31,12 @@
|
||||
</array>
|
||||
</dict>
|
||||
</array>
|
||||
|
||||
<key>NSMicrophoneUsageDescription</key>
|
||||
<string>Coco AI needs access to your microphone for voice input and audio recording features.</string>
|
||||
<key>NSCameraUsageDescription</key>
|
||||
<string>Coco AI requires camera access for scanning documents and capturing images.</string>
|
||||
<key>NSSpeechRecognitionUsageDescription</key>
|
||||
<string>Coco AI uses speech recognition to convert your voice into text for a hands-free experience.</string>
|
||||
</dict>
|
||||
</plist>
|
||||
@@ -1,4 +1,4 @@
|
||||
import { ArrowBigLeft, Search, Send, Brain } from "lucide-react";
|
||||
import { ArrowBigLeft, Search, Send, Brain, AudioLines } from "lucide-react";
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
import { listen } from "@tauri-apps/api/event";
|
||||
import { invoke, isTauri } from "@tauri-apps/api/core";
|
||||
@@ -13,6 +13,7 @@ import { useAppStore } from "@/stores/appStore";
|
||||
import { useSearchStore } from "@/stores/searchStore";
|
||||
import { metaOrCtrlKey } from "@/utils/keyboardUtils";
|
||||
import SearchPopover from "./SearchPopover";
|
||||
import SpeechToText from "../SpeechToText";
|
||||
|
||||
interface ChatInputProps {
|
||||
onSend: (message: string) => void;
|
||||
@@ -281,23 +282,13 @@ export default function ChatInput({
|
||||
) : null}
|
||||
</div>
|
||||
|
||||
{/* {isChatMode ? (
|
||||
<button
|
||||
className={`p-1 hover:bg-gray-50 dark:hover:bg-gray-700 rounded-full transition-colors ${
|
||||
isListening ? "bg-blue-100 dark:bg-blue-900" : ""
|
||||
}`}
|
||||
type="button"
|
||||
onClick={() => {}}
|
||||
>
|
||||
<Mic
|
||||
className={`w-4 h-4 ${
|
||||
isListening
|
||||
? "text-blue-500 animate-pulse"
|
||||
: "text-[#999] dark:text-[#999]"
|
||||
}`}
|
||||
/>
|
||||
</button>
|
||||
) : null} */}
|
||||
{isChatMode && (
|
||||
<SpeechToText
|
||||
onChange={(transcript) => {
|
||||
changeInput(inputValue + transcript);
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
|
||||
{isChatMode && curChatEnd ? (
|
||||
<button
|
||||
@@ -396,7 +387,14 @@ export default function ChatInput({
|
||||
/>
|
||||
</div>
|
||||
) : (
|
||||
<div className="w-28 flex gap-2 relative"></div>
|
||||
<div data-tauri-drag-region className="w-28 flex gap-2 relative">
|
||||
<SpeechToText
|
||||
Icon={AudioLines}
|
||||
onChange={(transcript) => {
|
||||
changeInput(inputValue + transcript);
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{isChatPage ? null : (
|
||||
|
||||
71
src/components/SpeechToText/index.tsx
Normal file
71
src/components/SpeechToText/index.tsx
Normal file
@@ -0,0 +1,71 @@
|
||||
import { useReactive } from "ahooks";
|
||||
import clsx from "clsx";
|
||||
import { Mic } from "lucide-react";
|
||||
import { ComponentType, FC } from "react";
|
||||
|
||||
interface SpeechToTextProps {
|
||||
Icon?: ComponentType<any>;
|
||||
onChange?: (transcript: string) => void;
|
||||
}
|
||||
|
||||
interface State {
|
||||
speaking: boolean;
|
||||
transcript: string;
|
||||
}
|
||||
|
||||
let recognition: SpeechRecognition;
|
||||
|
||||
const SpeechToText: FC<SpeechToTextProps> = (props) => {
|
||||
const { Icon = Mic, onChange } = props;
|
||||
|
||||
const state = useReactive<State>({
|
||||
speaking: false,
|
||||
transcript: "",
|
||||
});
|
||||
|
||||
const handleSpeak = async () => {
|
||||
if (state.speaking) {
|
||||
state.speaking = false;
|
||||
|
||||
return recognition.stop();
|
||||
}
|
||||
|
||||
const SpeechRecognition =
|
||||
window.SpeechRecognition || window.webkitSpeechRecognition;
|
||||
|
||||
recognition = new SpeechRecognition();
|
||||
recognition.continuous = true;
|
||||
recognition.interimResults = true;
|
||||
recognition.lang = "zh-CN";
|
||||
|
||||
recognition.onresult = (event) => {
|
||||
state.transcript = event.results[0][0].transcript;
|
||||
|
||||
onChange?.(state.transcript);
|
||||
};
|
||||
|
||||
recognition.start();
|
||||
|
||||
state.speaking = true;
|
||||
};
|
||||
|
||||
return (
|
||||
<div
|
||||
className={clsx(
|
||||
"p-1 hover:bg-gray-50 dark:hover:bg-gray-700 rounded-full transition",
|
||||
{
|
||||
"bg-blue-100 dark:bg-blue-900": state.speaking,
|
||||
}
|
||||
)}
|
||||
>
|
||||
<Icon
|
||||
className={clsx("size-4 text-[#999] dark:text-[#999]", {
|
||||
"text-blue-500 animate-pulse": state.speaking,
|
||||
})}
|
||||
onClick={handleSpeak}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default SpeechToText;
|
||||
Reference in New Issue
Block a user