hf model download

This commit is contained in:
jason-on-salt-a40
2024-04-13 15:11:15 -07:00
parent 3a8d5f4aab
commit 57079c44b6
7 changed files with 73 additions and 64 deletions

View File

@@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -26,7 +26,7 @@
"import os\n",
"os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\" \n",
"os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"\n",
"os.environ[\"USER\"] = \"YOUR_USERNAME\" # TODO change this to your username\n",
"os.environ[\"USER\"] = \"me\" # TODO change this to your username\n",
"\n",
"import torch\n",
"import torchaudio\n",
@@ -37,52 +37,58 @@
"from data.tokenizer import (\n",
" AudioTokenizer,\n",
" TextTokenizer,\n",
")\n"
")\n",
"from huggingface_hub import hf_hub_download"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# install MFA models and dictionaries if you haven't done so already, already done in the dockerfile or envrionment setup\n",
"!source ~/.bashrc && \\\n",
" conda activate voicecraft && \\\n",
" mfa model download dictionary english_us_arpa && \\\n",
" mfa model download acoustic english_us_arpa"
"# # install MFA models and dictionaries if you haven't done so already, already done in the dockerfile or envrionment setup\n",
"# !source ~/.bashrc && \\\n",
"# conda activate voicecraft && \\\n",
"# mfa model download dictionary english_us_arpa && \\\n",
"# mfa model download acoustic english_us_arpa"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Dora directory: /tmp/audiocraft_me\n"
]
}
],
"source": [
"# load model, encodec, and phn2num\n",
"# # load model, tokenizer, and other necessary files\n",
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
"voicecraft_name=\"giga330M.pth\" # or gigaHalfLibri330M_TTSEnhanced_max16s.pth, giga830M.pth\n",
"\n",
"# the old way of loading the model\n",
"from models import voicecraft\n",
"ckpt_fn =f\"./pretrained_models/{voicecraft_name}\"\n",
"if not os.path.exists(ckpt_fn):\n",
" os.system(f\"wget https://huggingface.co/pyp1/VoiceCraft/resolve/main/{voicecraft_name}\\?download\\=true\")\n",
" os.system(f\"mv {voicecraft_name}\\?download\\=true ./pretrained_models/{voicecraft_name}\")\n",
"ckpt = torch.load(ckpt_fn, map_location=\"cpu\")\n",
"model = voicecraft.VoiceCraft(ckpt[\"config\"])\n",
"model.load_state_dict(ckpt[\"model\"])\n",
"phn2num = ckpt['phn2num']\n",
"config = vars(ckpt['config'])\n",
"# the new way of loading the model, with huggingface, recommended\n",
"from models.voicecraft import VoiceCraftHF\n",
"model = VoiceCraftHF.from_pretrained(f\"pyp1/VoiceCraft_{voicecraft_name.replace('.pth', '')}\")\n",
"phn2num = model.args.phn2num\n",
"config = vars(model.args)\n",
"model.to(device)\n",
"model.eval()\n",
"\n",
"# # the new way of loading the model, with huggingface, this doesn't work yet\n",
"# from models.voicecraft import VoiceCraftHF\n",
"# model = VoiceCraftHF.from_pretrained(f\"pyp1/VoiceCraft_{voicecraft_name.replace('.pth', '')}\")\n",
"# phn2num = model.args.phn2num # or model.args['phn2num']?\n",
"# config = model.config\n",
"\n",
"# # the old way of loading the model\n",
"# from models import voicecraft\n",
"# filepath = hf_hub_download(repo_id=\"pyp1/VoiceCraft\", filename=voicecraft_name, repo_type=\"model\")\n",
"# ckpt = torch.load(filepath, map_location=\"cpu\")\n",
"# model = voicecraft.VoiceCraft(ckpt[\"config\"])\n",
"# model.load_state_dict(ckpt[\"model\"])\n",
"# config = vars(model.args)\n",
"# phn2num = ckpt[\"phn2num\"]\n",
"# model.to(device)\n",
"# model.eval()\n",
"\n",
@@ -98,7 +104,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -159,7 +165,7 @@
"\n",
"# NOTE adjust the below three arguments if the generation is not as good\n",
"stop_repetition = 3 # NOTE if the model generate long silence, reduce the stop_repetition to 3, 2 or even 1\n",
"sample_batch_size = 4 # NOTE: if the if there are long silence or unnaturally strecthed words, increase sample_batch_size to 5 or higher. What this will do to the model is that the model will run sample_batch_size examples of the same audio, and pick the one that's the shortest. So if the speech rate of the generated is too fast change it to a smaller number.\n",
"sample_batch_size = 5 # NOTE: if the if there are long silence or unnaturally strecthed words, increase sample_batch_size to 5 or higher. What this will do to the model is that the model will run sample_batch_size examples of the same audio, and pick the one that's the shortest. So if the speech rate of the generated is too fast change it to a smaller number.\n",
"seed = 1 # change seed if you are still unhappy with the result\n",
"\n",
"def seed_everything(seed):\n",