diff --git a/assets/README.md b/assets/README.md index c6a05f2..004882a 100644 --- a/assets/README.md +++ b/assets/README.md @@ -71,3 +71,32 @@ Output:
Input Example : Generate an image of a horse
Output:
![](t2i.png)
+ +## Sound Detection +First upload your audio(.wav)
+Audio Example :
+
+Input Example : What events does this audio include?
+Output:
+![](detection.png)
+ +## Mono audio to Binaural Audio +First upload your audio(.wav)
+
+Input Example: Transfer the mono speech to a binaural one audio.
+Output:
+![](m2b.png)
+ +## Target Sound Detection +Fisrt upload your audio(.wav)
+
+Input Example: please help me detect the target sound in the audio based on desription: “I want to detect Applause event”
+Output:
+![](tsd.png)
+ +## Sound Extraction +First upload your audio(.wav)
+
+Input Example: Please help me extract the sound events from the audio based on the description: "a person shouts nearby and then emergency vehicle sirens sounds"
+Output:
+![](sound_extraction.png)
diff --git a/assets/detection.png b/assets/detection.png new file mode 100644 index 0000000..e9352a1 Binary files /dev/null and b/assets/detection.png differ diff --git a/assets/m2b.png b/assets/m2b.png new file mode 100644 index 0000000..9c1aeb0 Binary files /dev/null and b/assets/m2b.png differ diff --git a/assets/mix1.wav b/assets/mix1.wav new file mode 100644 index 0000000..021b86b Binary files /dev/null and b/assets/mix1.wav differ diff --git a/assets/sound_extraction.png b/assets/sound_extraction.png new file mode 100644 index 0000000..be04b37 Binary files /dev/null and b/assets/sound_extraction.png differ diff --git a/assets/tsd.png b/assets/tsd.png new file mode 100644 index 0000000..7b08f08 Binary files /dev/null and b/assets/tsd.png differ diff --git a/audio-chatgpt.py b/audio-chatgpt.py index 1314a5e..678d566 100644 --- a/audio-chatgpt.py +++ b/audio-chatgpt.py @@ -843,7 +843,7 @@ class ConversationBot: Tool(name="Extract sound event from mixture audio based on language description", func=self.extraction.inference, description="useful for when you extract target sound from a mixture audio, you can describe the taregt sound by text, receives audio_path and text as input. " "The input to this tool should be a comma seperated string of two, representing mixture audio path and input text."), - Tool(name="Detect the sound event from the audio based on your descriptions", func=self.TSD.inference, + Tool(name="Detect the target sound event from the audio based on your descriptions", func=self.TSD.inference, description="useful for when you want to know the when happens the target sound event in th audio. You can use language descriptions to instruct the model. receives text description and audio_path as input. " "The input to this tool should be a string, representing the answer. ")] self.agent = initialize_agent(