mirror of
https://github.com/AIGC-Audio/AudioGPT.git
synced 2025-12-15 19:37:52 +01:00
update
This commit is contained in:
@@ -4,8 +4,6 @@ sys.path.append(os.path.dirname(os.path.realpath(__file__)))
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
|
||||
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'NeuralSeq'))
|
||||
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'text_to_audio/Make_An_Audio'))
|
||||
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'text_to_audio/Make_An_Audio_img'))
|
||||
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'text_to_audio/Make_An_Audio_inpaint'))
|
||||
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'audio_detection'))
|
||||
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mono2binaural'))
|
||||
import gradio as gr
|
||||
@@ -217,7 +215,7 @@ class I2A:
|
||||
def __init__(self, device):
|
||||
print("Initializing Make-An-Audio-Image to %s" % device)
|
||||
self.device = device
|
||||
self.sampler = self._initialize_model('text_to_audio/Make_An_Audio_img/configs/img_to_audio/img2audio_args.yaml', 'text_to_audio/Make_An_Audio_img/useful_ckpts/ta54_epoch=000216.ckpt', device=device)
|
||||
self.sampler = self._initialize_model('text_to_audio/Make_An_Audio/configs/img_to_audio/img2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/ta54_epoch=000216.ckpt', device=device)
|
||||
self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio_img/vocoder/logs/bigv16k53w',device=device)
|
||||
|
||||
def _initialize_model(self, config, ckpt, device):
|
||||
@@ -421,8 +419,8 @@ class Inpaint:
|
||||
def __init__(self, device):
|
||||
print("Initializing Make-An-Audio-inpaint to %s" % device)
|
||||
self.device = device
|
||||
self.sampler = self._initialize_model_inpaint('text_to_audio/Make_An_Audio_inpaint/configs/inpaint/txt2audio_args.yaml', 'text_to_audio/Make_An_Audio_inpaint/useful_ckpts/inpaint7_epoch00047.ckpt')
|
||||
self.vocoder = VocoderBigVGAN('./vocoder/logs/bigv16k53w',device=device)
|
||||
self.sampler = self._initialize_model_inpaint('text_to_audio/Make_An_Audio/configs/inpaint/txt2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/inpaint7_epoch00047.ckpt')
|
||||
self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53',device=device)
|
||||
self.cmap_transform = matplotlib.cm.viridis
|
||||
|
||||
def _initialize_model_inpaint(self, config, ckpt):
|
||||
|
||||
@@ -8,14 +8,10 @@ wget -P checkpoints/0109_hifigan_bigpopcs_hop128/ -i https://huggingface.co/spac
|
||||
wget -P checkpoints/0102_xiaoma_pe/ -i https://huggingface.co/spaces/Silentlin/DiffSinger/blob/main/checkpoints/0102_xiaoma_pe/config.yaml https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
|
||||
# Text to audio
|
||||
cd text_to_audio
|
||||
git clone https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio
|
||||
git clone https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_img
|
||||
git clone https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_inpaint
|
||||
wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio/resolve/main/useful_ckpts/ta40multi_epoch=000085.ckpt
|
||||
wget -P text_to_audio/Make_An_Audio/useful_ckpts/CLAP/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio/resolve/main/useful_ckpts/CLAP/CLAP_weights_2022.pth
|
||||
wget -P text_to_audio/Make_An_Audio_img/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_img/resolve/main/useful_ckpts/ta54_epoch=000216.ckpt
|
||||
wget -P text_to_audio/Make_An_Audio_img/useful_ckpts/CLAP/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_img/blob/main/useful_ckpts/CLAP/CLAP_weights_2022.pth
|
||||
wget -P text_to_audio/Make_An_Audio_inpaint/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_inpaint/resolve/main/useful_ckpts/inpaint7_epoch00047.ckpt
|
||||
wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_img/resolve/main/useful_ckpts/ta54_epoch=000216.ckpt
|
||||
wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_inpaint/resolve/main/useful_ckpts/inpaint7_epoch00047.ckpt
|
||||
# Text to speech
|
||||
wget -P checkpoints/GenerSpeech/ -i https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/blob/main/checkpoints/GenerSpeech/config.yaml https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/GenerSpeech/model_ckpt_steps_300000.ckpt
|
||||
wget -P checkpoints/trainset_hifigan/ -i https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/blob/main/checkpoints/trainset_hifigan/config.yaml https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/trainset_hifigan/model_ckpt_steps_1000000.ckpt
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu113
|
||||
accelerate
|
||||
addict==2.4.0
|
||||
aiofiles
|
||||
albumentations==1.3.0
|
||||
appdirs==1.4.4
|
||||
basicsr==1.4.2
|
||||
@@ -10,17 +11,23 @@ diffusers
|
||||
einops==0.3.0
|
||||
espnet
|
||||
espnet_model_zoo
|
||||
ffmpeg-python
|
||||
g2p-en==2.1.0
|
||||
google==3.0.0
|
||||
gradio
|
||||
h5py==2.8.0
|
||||
h5py
|
||||
imageio==2.9.0
|
||||
imageio-ffmpeg==0.4.2
|
||||
invisible-watermark>=0.1.5
|
||||
jieba
|
||||
kornia==0.6
|
||||
langchain==0.0.101
|
||||
librosa
|
||||
loguru
|
||||
miditoolkit==0.1.7
|
||||
mmcv==1.5.0
|
||||
mmdet==2.23.0
|
||||
mmengine==0.7.2
|
||||
moviepy==1.0.3
|
||||
numpy==1.23.1
|
||||
omegaconf==2.1.1
|
||||
@@ -56,8 +63,9 @@ torchlibrosa
|
||||
torchmetrics==0.6.0
|
||||
torchvision==0.13.1
|
||||
transformers==4.26.1
|
||||
typing-extensions==3.10.0.2
|
||||
typing-extensions==4.0.0
|
||||
uuid==1.30
|
||||
webdataset==0.2.5
|
||||
webrtcvad==2.0.10
|
||||
yapf==0.32.0
|
||||
git+https://github.com/openai/CLIP.git
|
||||
Reference in New Issue
Block a user