From 6c26fb7b3463334ae9fb4d63dae52f3c29506db0 Mon Sep 17 00:00:00 2001 From: Georg Kucsko Date: Tue, 25 Apr 2023 17:49:35 -0400 Subject: [PATCH 01/13] simplify device placement --- bark/generation.py | 123 ++++++++++++++++++++++++++++++--------------- bark/model.py | 1 - 2 files changed, 82 insertions(+), 42 deletions(-) diff --git a/bark/generation.py b/bark/generation.py index 4aa805e..28d963c 100644 --- a/bark/generation.py +++ b/bark/generation.py @@ -83,6 +83,7 @@ CACHE_DIR = os.path.join(os.getenv("XDG_CACHE_HOME", default_cache_dir), "suno", USE_SMALL_MODELS = os.environ.get("SUNO_USE_SMALL_MODELS", False) +GLOBAL_ENABLE_MPS = os.environ.get("SUNO_ENABLE_MPS", False) REMOTE_BASE_URL = "https://dl.suno-models.io/bark/models/v0/" @@ -114,10 +115,10 @@ REMOTE_MODEL_PATHS = { } -if not hasattr(torch.nn.functional, 'scaled_dot_product_attention'): +if not hasattr(torch.nn.functional, 'scaled_dot_product_attention') and torch.cuda.is_available(): logger.warning( - "torch version does not support flash attention. You will get significantly faster" + - " inference speed by upgrade torch to newest version / nightly." + "torch version does not support flash attention. You will get faster" + + " inference speed by upgrade torch to newest nightly version." ) @@ -141,6 +142,16 @@ def _get_ckpt_path(model_type, use_small=False): return os.path.join(CACHE_DIR, f"{model_name}.pt") +def _grab_best_device(use_gpu=True): + if torch.cuda.device_count() > 0 and use_gpu: + device = "cuda" + elif torch.backends.mps.is_available() and use_gpu and GLOBAL_ENABLE_MPS: + device = "mps" + else: + device = "cpu" + return device + + S3_BUCKET_PATH_RE = r"s3\:\/\/(.+?)\/" @@ -207,8 +218,6 @@ def clean_models(model_key=None): def _load_model(ckpt_path, device, use_small=False, model_type="text"): - if "cuda" not in device: - logger.warning("No GPU being used. Careful, inference might be extremely slow!") if model_type == "text": ConfigClass = GPTConfig ModelClass = GPT @@ -285,30 +294,32 @@ def load_model(use_gpu=True, use_small=False, force_reload=False, model_type="te if model_type not in ("text", "coarse", "fine"): raise NotImplementedError() global models - if torch.cuda.device_count() == 0 or not use_gpu: - device = "cpu" - else: - device = "cuda" - model_key = str(device) + f"__{model_type}" + device = _grab_best_device(use_gpu=use_gpu) + model_key = f"{model_type}" if model_key not in models or force_reload: ckpt_path = _get_ckpt_path(model_type, use_small=use_small) clean_models(model_key=model_key) model = _load_model_f(ckpt_path, device) models[model_key] = model + if model_type == "text": + models[model_key]["model"].to(device) + else: + models[model_key].to(device) return models[model_key] def load_codec_model(use_gpu=True, force_reload=False): global models - if torch.cuda.device_count() == 0 or not use_gpu: + device = _grab_best_device(use_gpu=use_gpu) + if device == "mps": + # encodec doesn't support mps device = "cpu" - else: - device = "cuda" - model_key = str(device) + f"__codec" + model_key = "codec" if model_key not in models or force_reload: clean_models(model_key=model_key) model = _load_codec_model(device) models[model_key] = model + models[model_key].to(device) return models[model_key] @@ -322,6 +333,11 @@ def preload_models( codec_use_gpu=True, force_reload=False, ): + """Load all the necessary models for the pipeline.""" + if _grab_best_device() == "cpu" and ( + text_use_gpu or coarse_use_gpu or fine_use_gpu or codec_use_gpu + ): + logger.warning("No GPU being used. Careful, inference might be very slow!") _ = load_model( model_type="text", use_gpu=text_use_gpu, use_small=text_use_small, force_reload=force_reload ) @@ -366,13 +382,11 @@ def generate_text_semantic( temp=0.7, top_k=None, top_p=None, - use_gpu=True, silent=False, min_eos_p=0.2, max_gen_duration_s=None, allow_early_stop=True, - model=None, - use_kv_caching=False + use_kv_caching=False, ): """Generate semantic tokens from text.""" assert isinstance(text, str) @@ -395,12 +409,15 @@ def generate_text_semantic( ) else: semantic_history = None - model_container = load_model(use_gpu=use_gpu, model_type="text") - if model is None: - model = model_container["model"] + # load models if not yet exist + global models + if "text" not in models: + preload_models() + model_container = models["text"] + model = model_container["model"] tokenizer = model_container["tokenizer"] encoded_text = np.array(_tokenize(tokenizer, text)) + TEXT_ENCODING_OFFSET - device = "cuda" if use_gpu and torch.cuda.device_count() > 0 else "cpu" + device = next(model.parameters()).device if len(encoded_text) > 256: p = round((len(encoded_text) - 256) / len(encoded_text) * 100, 1) logger.warning(f"warning, text too long, lopping of last {p}%") @@ -424,7 +441,9 @@ def generate_text_semantic( else: semantic_history = np.array([SEMANTIC_PAD_TOKEN] * 256) x = torch.from_numpy( - np.hstack([encoded_text, semantic_history, np.array([SEMANTIC_INFER_TOKEN])]).astype(np.int64) + np.hstack([ + encoded_text, semantic_history, np.array([SEMANTIC_INFER_TOKEN]) + ]).astype(np.int64) )[None] assert x.shape[1] == 256 + 256 + 1 with _inference_mode(): @@ -440,8 +459,9 @@ def generate_text_semantic( x_input = x[:, [-1]] else: x_input = x - - logits, kv_cache = model(x_input, merge_context=True, use_cache=use_kv_caching, past_kv=kv_cache) + logits, kv_cache = model( + x_input, merge_context=True, use_cache=use_kv_caching, past_kv=kv_cache + ) relevant_logits = logits[0, 0, :SEMANTIC_VOCAB_SIZE] if allow_early_stop: relevant_logits = torch.hstack( @@ -465,7 +485,13 @@ def generate_text_semantic( v, _ = torch.topk(relevant_logits, min(top_k, relevant_logits.size(-1))) relevant_logits[relevant_logits < v[-1]] = -float("Inf") probs = F.softmax(relevant_logits / temp, dim=-1) + # multinomial bugged on mps: shuttle to cpu if necessary + inf_device = probs.device + if probs.device.type == "mps": + probs = probs.to("cpu") item_next = torch.multinomial(probs, num_samples=1) + probs = probs.to(inf_device) + item_next = item_next.to(inf_device) if allow_early_stop and ( item_next == SEMANTIC_VOCAB_SIZE or (min_eos_p is not None and probs[-1] >= min_eos_p) @@ -513,12 +539,10 @@ def generate_coarse( temp=0.7, top_k=None, top_p=None, - use_gpu=True, silent=False, max_coarse_history=630, # min 60 (faster), max 630 (more context) sliding_window_len=60, - model=None, - use_kv_caching=False + use_kv_caching=False, ): """Generate coarse audio codes from semantic tokens.""" assert ( @@ -576,9 +600,12 @@ def generate_coarse( else: x_semantic_history = np.array([], dtype=np.int32) x_coarse_history = np.array([], dtype=np.int32) - if model is None: - model = load_model(use_gpu=use_gpu, model_type="coarse") - device = "cuda" if use_gpu and torch.cuda.device_count() > 0 else "cpu" + # load models if not yet exist + global models + if "coarse" not in models: + preload_models() + model = models["coarse"] + device = next(model.parameters()).device # start loop n_steps = int( round( @@ -650,7 +677,13 @@ def generate_coarse( v, _ = torch.topk(relevant_logits, min(top_k, relevant_logits.size(-1))) relevant_logits[relevant_logits < v[-1]] = -float("Inf") probs = F.softmax(relevant_logits / temp, dim=-1) + # multinomial bugged on mps: shuttle to cpu if necessary + inf_device = probs.device + if probs.device.type == "mps": + probs = probs.to("cpu") item_next = torch.multinomial(probs, num_samples=1) + probs = probs.to(inf_device) + item_next = item_next.to(inf_device) item_next += logit_start_idx x_coarse_in = torch.cat((x_coarse_in, item_next[None]), dim=1) x_in = torch.cat((x_in, item_next[None]), dim=1) @@ -672,9 +705,7 @@ def generate_fine( x_coarse_gen, history_prompt=None, temp=0.5, - use_gpu=True, silent=True, - model=None, ): """Generate full audio codes from coarse audio codes.""" assert ( @@ -704,9 +735,12 @@ def generate_fine( else: x_fine_history = None n_coarse = x_coarse_gen.shape[0] - if model is None: - model = load_model(use_gpu=use_gpu, model_type="fine") - device = "cuda" if use_gpu and torch.cuda.device_count() > 0 else "cpu" + # load models if not yet exist + global models + if "fine" not in models: + preload_models() + model = models["fine"] + device = next(model.parameters()).device # make input arr in_arr = np.vstack( [ @@ -754,10 +788,14 @@ def generate_fine( else: relevant_logits = logits[0, :, :CODEBOOK_SIZE] / temp probs = F.softmax(relevant_logits, dim=-1) + # multinomial bugged on mps: shuttle to cpu if necessary + inf_device = probs.device + if probs.device.type == "mps": + probs = probs.to("cpu") codebook_preds = torch.hstack( [ - torch.multinomial(probs[n], num_samples=1) - for n in range(rel_start_fill_idx, 1024) + torch.multinomial(probs[nnn], num_samples=1).to(inf_device) + for nnn in range(rel_start_fill_idx, 1024) ] ) in_buffer[0, rel_start_fill_idx:, nn] = codebook_preds @@ -778,11 +816,14 @@ def generate_fine( return gen_fine_arr -def codec_decode(fine_tokens, model=None, use_gpu=True): +def codec_decode(fine_tokens): """Turn quantized audio codes into audio array using encodec.""" - if model is None: - model = load_codec_model(use_gpu=use_gpu) - device = "cuda" if use_gpu and torch.cuda.device_count() > 0 else "cpu" + # load models if not yet exist + global models + if "codec" not in models: + preload_models() + model = models["codec"] + device = next(model.parameters()).device arr = torch.from_numpy(fine_tokens)[None] arr = arr.to(device) arr = arr.transpose(0, 1) diff --git a/bark/model.py b/bark/model.py index bb99932..457b49e 100644 --- a/bark/model.py +++ b/bark/model.py @@ -200,7 +200,6 @@ class GPT(nn.Module): pos_emb = self.transformer.wpe(position_ids) # position embeddings of shape (1, t, n_embd) - x = self.transformer.drop(tok_emb + pos_emb) new_kv = () if use_cache else None From 8675c23a4246794db0636ee1f5a2b27321cdd918 Mon Sep 17 00:00:00 2001 From: Jairo Correa Date: Tue, 25 Apr 2023 21:21:52 -0300 Subject: [PATCH 02/13] Option to offload to cpu --- bark/generation.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/bark/generation.py b/bark/generation.py index 28d963c..ec313e7 100644 --- a/bark/generation.py +++ b/bark/generation.py @@ -36,6 +36,9 @@ else: global models models = {} +global models_devices +models_devices = {} + CONTEXT_WINDOW_SIZE = 1024 @@ -84,6 +87,7 @@ CACHE_DIR = os.path.join(os.getenv("XDG_CACHE_HOME", default_cache_dir), "suno", USE_SMALL_MODELS = os.environ.get("SUNO_USE_SMALL_MODELS", False) GLOBAL_ENABLE_MPS = os.environ.get("SUNO_ENABLE_MPS", False) +OFFLOAD_CPU = os.environ.get("SUNO_OFFLOAD_CPU", False) REMOTE_BASE_URL = "https://dl.suno-models.io/bark/models/v0/" @@ -296,6 +300,9 @@ def load_model(use_gpu=True, use_small=False, force_reload=False, model_type="te global models device = _grab_best_device(use_gpu=use_gpu) model_key = f"{model_type}" + if OFFLOAD_CPU: + models_devices[model_key] = device + device = "cpu" if model_key not in models or force_reload: ckpt_path = _get_ckpt_path(model_type, use_small=use_small) clean_models(model_key=model_key) @@ -315,6 +322,9 @@ def load_codec_model(use_gpu=True, force_reload=False): # encodec doesn't support mps device = "cpu" model_key = "codec" + if OFFLOAD_CPU: + models_devices[model_key] = device + device = "cpu" if model_key not in models or force_reload: clean_models(model_key=model_key) model = _load_codec_model(device) @@ -417,6 +427,8 @@ def generate_text_semantic( model = model_container["model"] tokenizer = model_container["tokenizer"] encoded_text = np.array(_tokenize(tokenizer, text)) + TEXT_ENCODING_OFFSET + if OFFLOAD_CPU: + model.to(models_devices["text"]) device = next(model.parameters()).device if len(encoded_text) > 256: p = round((len(encoded_text) - 256) / len(encoded_text) * 100, 1) @@ -514,6 +526,8 @@ def generate_text_semantic( pbar_state = req_pbar_state pbar.close() out = x.detach().cpu().numpy().squeeze()[256 + 256 + 1 :] + if OFFLOAD_CPU: + model.to("cpu") assert all(0 <= out) and all(out < SEMANTIC_VOCAB_SIZE) _clear_cuda_cache() return out @@ -605,6 +619,8 @@ def generate_coarse( if "coarse" not in models: preload_models() model = models["coarse"] + if OFFLOAD_CPU: + model.to(models_devices["coarse"]) device = next(model.parameters()).device # start loop n_steps = int( @@ -691,6 +707,8 @@ def generate_coarse( n_step += 1 del x_in del x_semantic_in + if OFFLOAD_CPU: + model.to("cpu") gen_coarse_arr = x_coarse_in.detach().cpu().numpy().squeeze()[len(x_coarse_history) :] del x_coarse_in assert len(gen_coarse_arr) == n_steps @@ -740,6 +758,8 @@ def generate_fine( if "fine" not in models: preload_models() model = models["fine"] + if OFFLOAD_CPU: + model.to(models_devices["fine"]) device = next(model.parameters()).device # make input arr in_arr = np.vstack( @@ -808,6 +828,8 @@ def generate_fine( del in_buffer gen_fine_arr = in_arr.detach().cpu().numpy().squeeze().T del in_arr + if OFFLOAD_CPU: + model.to("cpu") gen_fine_arr = gen_fine_arr[:, n_history:] if n_remove_from_end > 0: gen_fine_arr = gen_fine_arr[:, :-n_remove_from_end] @@ -823,6 +845,8 @@ def codec_decode(fine_tokens): if "codec" not in models: preload_models() model = models["codec"] + if OFFLOAD_CPU: + model.to(models_devices["codec"]) device = next(model.parameters()).device arr = torch.from_numpy(fine_tokens)[None] arr = arr.to(device) @@ -831,4 +855,6 @@ def codec_decode(fine_tokens): out = model.decoder(emb) audio_arr = out.detach().cpu().numpy().squeeze() del arr, emb, out + if OFFLOAD_CPU: + model.to("cpu") return audio_arr From dfbe09f00e601168c640b28aac4c5f4a5e782153 Mon Sep 17 00:00:00 2001 From: Jairo Correa Date: Tue, 25 Apr 2023 22:42:21 -0300 Subject: [PATCH 03/13] Add missing global models_devices --- bark/generation.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bark/generation.py b/bark/generation.py index ec313e7..4ac165c 100644 --- a/bark/generation.py +++ b/bark/generation.py @@ -298,6 +298,7 @@ def load_model(use_gpu=True, use_small=False, force_reload=False, model_type="te if model_type not in ("text", "coarse", "fine"): raise NotImplementedError() global models + global models_devices device = _grab_best_device(use_gpu=use_gpu) model_key = f"{model_type}" if OFFLOAD_CPU: @@ -317,6 +318,7 @@ def load_model(use_gpu=True, use_small=False, force_reload=False, model_type="te def load_codec_model(use_gpu=True, force_reload=False): global models + global models_devices device = _grab_best_device(use_gpu=use_gpu) if device == "mps": # encodec doesn't support mps @@ -421,6 +423,7 @@ def generate_text_semantic( semantic_history = None # load models if not yet exist global models + global models_devices if "text" not in models: preload_models() model_container = models["text"] @@ -616,6 +619,7 @@ def generate_coarse( x_coarse_history = np.array([], dtype=np.int32) # load models if not yet exist global models + global models_devices if "coarse" not in models: preload_models() model = models["coarse"] @@ -755,6 +759,7 @@ def generate_fine( n_coarse = x_coarse_gen.shape[0] # load models if not yet exist global models + global models_devices if "fine" not in models: preload_models() model = models["fine"] @@ -842,6 +847,7 @@ def codec_decode(fine_tokens): """Turn quantized audio codes into audio array using encodec.""" # load models if not yet exist global models + global models_devices if "codec" not in models: preload_models() model = models["codec"] From e9ad2d5886388c6188eced86c96331b6a6983984 Mon Sep 17 00:00:00 2001 From: vaibhavs10 Date: Thu, 27 Apr 2023 16:12:54 +0200 Subject: [PATCH 04/13] initial commit --- bark/generation.py | 80 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 21 deletions(-) diff --git a/bark/generation.py b/bark/generation.py index 4ac165c..3b12757 100644 --- a/bark/generation.py +++ b/bark/generation.py @@ -14,6 +14,7 @@ import torch import torch.nn.functional as F import tqdm from transformers import BertTokenizer +from huggingface_hub import hf_hub_download from .model import GPTConfig, GPT from .model_fine import FineGPT, FineGPTConfig @@ -89,31 +90,64 @@ USE_SMALL_MODELS = os.environ.get("SUNO_USE_SMALL_MODELS", False) GLOBAL_ENABLE_MPS = os.environ.get("SUNO_ENABLE_MPS", False) OFFLOAD_CPU = os.environ.get("SUNO_OFFLOAD_CPU", False) -REMOTE_BASE_URL = "https://dl.suno-models.io/bark/models/v0/" +# REMOTE_BASE_URL = "https://dl.suno-models.io/bark/models/v0/" + +# REMOTE_MODEL_PATHS = { +# "text_small": { +# "path": os.path.join(REMOTE_BASE_URL, "text.pt"), +# "checksum": "b3e42bcbab23b688355cd44128c4cdd3", +# }, +# "coarse_small": { +# "path": os.path.join(REMOTE_BASE_URL, "coarse.pt"), +# "checksum": "5fe964825e3b0321f9d5f3857b89194d", +# }, +# "fine_small": { +# "path": os.path.join(REMOTE_BASE_URL, "fine.pt"), +# "checksum": "5428d1befe05be2ba32195496e58dc90", +# }, +# "text": { +# "path": os.path.join(REMOTE_BASE_URL, "text_2.pt"), +# "checksum": "54afa89d65e318d4f5f80e8e8799026a", +# }, +# "coarse": { +# "path": os.path.join(REMOTE_BASE_URL, "coarse_2.pt"), +# "checksum": "8a98094e5e3a255a5c9c0ab7efe8fd28", +# }, +# "fine": { +# "path": os.path.join(REMOTE_BASE_URL, "fine_2.pt"), +# "checksum": "59d184ed44e3650774a2f0503a48a97b", +# }, +# } REMOTE_MODEL_PATHS = { "text_small": { - "path": os.path.join(REMOTE_BASE_URL, "text.pt"), + "repo_id": "reach-vb/bark-small", + "file_name": "text.pt", "checksum": "b3e42bcbab23b688355cd44128c4cdd3", }, "coarse_small": { - "path": os.path.join(REMOTE_BASE_URL, "coarse.pt"), + "repo_id": "reach-vb/bark-small", + "file_name": "coarse.pt", "checksum": "5fe964825e3b0321f9d5f3857b89194d", }, "fine_small": { - "path": os.path.join(REMOTE_BASE_URL, "fine.pt"), + "repo_id": "reach-vb/bark-small", + "file_name": "fine.pt", "checksum": "5428d1befe05be2ba32195496e58dc90", }, "text": { - "path": os.path.join(REMOTE_BASE_URL, "text_2.pt"), + "repo_id": "reach-vb/bark", + "file_name": "text_2.pt", "checksum": "54afa89d65e318d4f5f80e8e8799026a", }, "coarse": { - "path": os.path.join(REMOTE_BASE_URL, "coarse_2.pt"), + "repo_id": "reach-vb/bark", + "file_name": "coarse_2.pt", "checksum": "8a98094e5e3a255a5c9c0ab7efe8fd28", }, "fine": { - "path": os.path.join(REMOTE_BASE_URL, "fine_2.pt"), + "repo_id": "reach-vb/bark-small", + "file_name": "fine_2.pt", "checksum": "59d184ed44e3650774a2f0503a48a97b", }, } @@ -165,21 +199,25 @@ def _parse_s3_filepath(s3_filepath): return bucket_name, rel_s3_filepath -def _download(from_s3_path, to_local_path): - os.makedirs(CACHE_DIR, exist_ok=True) - response = requests.get(from_s3_path, stream=True) - total_size_in_bytes = int(response.headers.get("content-length", 0)) - block_size = 1024 - progress_bar = tqdm.tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True) - with open(to_local_path, "wb") as file: - for data in response.iter_content(block_size): - progress_bar.update(len(data)) - file.write(data) - progress_bar.close() - if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: - raise ValueError("ERROR, something went wrong") +# def _download(from_s3_path, to_local_path): +# os.makedirs(CACHE_DIR, exist_ok=True) +# response = requests.get(from_s3_path, stream=True) +# total_size_in_bytes = int(response.headers.get("content-length", 0)) +# block_size = 1024 +# progress_bar = tqdm.tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True) +# with open(to_local_path, "wb") as file: +# for data in response.iter_content(block_size): +# progress_bar.update(len(data)) +# file.write(data) +# progress_bar.close() +# if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: +# raise ValueError("ERROR, something went wrong") +def _download(from_hf_path, file_name, to_local_path): + os.makedirs(CACHE_DIR, exist_ok=True) + hf_hub_download(repo_id=from_hf_path, filename=file_name, cache_dir=to_local_path) + class InferenceContext: def __init__(self, benchmark=False): # we can't expect inputs to be the same length, so disable benchmarking by default @@ -243,7 +281,7 @@ def _load_model(ckpt_path, device, use_small=False, model_type="text"): os.remove(ckpt_path) if not os.path.exists(ckpt_path): logger.info(f"{model_type} model not found, downloading into `{CACHE_DIR}`.") - _download(model_info["path"], ckpt_path) + _download(model_info["repo_id"], model_info["file_name"], ckpt_path) checkpoint = torch.load(ckpt_path, map_location=device) # this is a hack model_args = checkpoint["model_args"] From ac3a7568a7e34e2f90e98b9ee6a31425fc9fe66f Mon Sep 17 00:00:00 2001 From: vaibhavs10 Date: Thu, 27 Apr 2023 16:19:58 +0200 Subject: [PATCH 05/13] up --- bark/generation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bark/generation.py b/bark/generation.py index 3b12757..a3c8a0b 100644 --- a/bark/generation.py +++ b/bark/generation.py @@ -90,7 +90,7 @@ USE_SMALL_MODELS = os.environ.get("SUNO_USE_SMALL_MODELS", False) GLOBAL_ENABLE_MPS = os.environ.get("SUNO_ENABLE_MPS", False) OFFLOAD_CPU = os.environ.get("SUNO_OFFLOAD_CPU", False) -# REMOTE_BASE_URL = "https://dl.suno-models.io/bark/models/v0/" +REMOTE_BASE_URL = "https://dl.suno-models.io/bark/models/v0/" # REMOTE_MODEL_PATHS = { # "text_small": { @@ -176,7 +176,7 @@ def _md5(fname): def _get_ckpt_path(model_type, use_small=False): model_key = f"{model_type}_small" if use_small or USE_SMALL_MODELS else model_type - model_name = _string_md5(REMOTE_MODEL_PATHS[model_key]["path"]) + model_name = _string_md5(REMOTE_MODEL_PATHS[model_key]["file_name"]) return os.path.join(CACHE_DIR, f"{model_name}.pt") From c26a82a4153fe05486aa593498682348f7d6ed42 Mon Sep 17 00:00:00 2001 From: Vaibhav Srivastav Date: Thu, 27 Apr 2023 17:35:28 +0200 Subject: [PATCH 06/13] up --- bark/generation.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/bark/generation.py b/bark/generation.py index a3c8a0b..82994e2 100644 --- a/bark/generation.py +++ b/bark/generation.py @@ -146,7 +146,7 @@ REMOTE_MODEL_PATHS = { "checksum": "8a98094e5e3a255a5c9c0ab7efe8fd28", }, "fine": { - "repo_id": "reach-vb/bark-small", + "repo_id": "reach-vb/bark", "file_name": "fine_2.pt", "checksum": "59d184ed44e3650774a2f0503a48a97b", }, @@ -199,24 +199,12 @@ def _parse_s3_filepath(s3_filepath): return bucket_name, rel_s3_filepath -# def _download(from_s3_path, to_local_path): -# os.makedirs(CACHE_DIR, exist_ok=True) -# response = requests.get(from_s3_path, stream=True) -# total_size_in_bytes = int(response.headers.get("content-length", 0)) -# block_size = 1024 -# progress_bar = tqdm.tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True) -# with open(to_local_path, "wb") as file: -# for data in response.iter_content(block_size): -# progress_bar.update(len(data)) -# file.write(data) -# progress_bar.close() -# if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: -# raise ValueError("ERROR, something went wrong") - - def _download(from_hf_path, file_name, to_local_path): os.makedirs(CACHE_DIR, exist_ok=True) - hf_hub_download(repo_id=from_hf_path, filename=file_name, cache_dir=to_local_path) + destination_file_name = to_local_path.split("/")[-1] + file_dir = CACHE_DIR + hf_hub_download(repo_id=from_hf_path, filename=file_name, local_dir=file_dir) + os.replace(f"{CACHE_DIR}/{file_name}", to_local_path) class InferenceContext: def __init__(self, benchmark=False): From 035d08e157d57d04bd1d5891c6464b151bc5acab Mon Sep 17 00:00:00 2001 From: Vaibhav Srivastav Date: Thu, 27 Apr 2023 17:45:59 +0200 Subject: [PATCH 07/13] up --- bark/generation.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/bark/generation.py b/bark/generation.py index 82994e2..ab23479 100644 --- a/bark/generation.py +++ b/bark/generation.py @@ -90,34 +90,6 @@ USE_SMALL_MODELS = os.environ.get("SUNO_USE_SMALL_MODELS", False) GLOBAL_ENABLE_MPS = os.environ.get("SUNO_ENABLE_MPS", False) OFFLOAD_CPU = os.environ.get("SUNO_OFFLOAD_CPU", False) -REMOTE_BASE_URL = "https://dl.suno-models.io/bark/models/v0/" - -# REMOTE_MODEL_PATHS = { -# "text_small": { -# "path": os.path.join(REMOTE_BASE_URL, "text.pt"), -# "checksum": "b3e42bcbab23b688355cd44128c4cdd3", -# }, -# "coarse_small": { -# "path": os.path.join(REMOTE_BASE_URL, "coarse.pt"), -# "checksum": "5fe964825e3b0321f9d5f3857b89194d", -# }, -# "fine_small": { -# "path": os.path.join(REMOTE_BASE_URL, "fine.pt"), -# "checksum": "5428d1befe05be2ba32195496e58dc90", -# }, -# "text": { -# "path": os.path.join(REMOTE_BASE_URL, "text_2.pt"), -# "checksum": "54afa89d65e318d4f5f80e8e8799026a", -# }, -# "coarse": { -# "path": os.path.join(REMOTE_BASE_URL, "coarse_2.pt"), -# "checksum": "8a98094e5e3a255a5c9c0ab7efe8fd28", -# }, -# "fine": { -# "path": os.path.join(REMOTE_BASE_URL, "fine_2.pt"), -# "checksum": "59d184ed44e3650774a2f0503a48a97b", -# }, -# } REMOTE_MODEL_PATHS = { "text_small": { From c61ee92ee926f9248c8c2fe040a09dc117f96b84 Mon Sep 17 00:00:00 2001 From: Vaibhav Srivastav Date: Thu, 27 Apr 2023 17:51:43 +0200 Subject: [PATCH 08/13] up --- bark/generation.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/bark/generation.py b/bark/generation.py index ab23479..842b890 100644 --- a/bark/generation.py +++ b/bark/generation.py @@ -162,15 +162,6 @@ def _grab_best_device(use_gpu=True): return device -S3_BUCKET_PATH_RE = r"s3\:\/\/(.+?)\/" - - -def _parse_s3_filepath(s3_filepath): - bucket_name = re.search(S3_BUCKET_PATH_RE, s3_filepath).group(1) - rel_s3_filepath = re.sub(S3_BUCKET_PATH_RE, "", s3_filepath) - return bucket_name, rel_s3_filepath - - def _download(from_hf_path, file_name, to_local_path): os.makedirs(CACHE_DIR, exist_ok=True) destination_file_name = to_local_path.split("/")[-1] From b24dd26d4b18e355c5a425bec794b30216d1d86e Mon Sep 17 00:00:00 2001 From: Vaibhav Srivastav Date: Fri, 28 Apr 2023 16:26:09 +0200 Subject: [PATCH 09/13] add suggestions from code review --- bark/generation.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bark/generation.py b/bark/generation.py index 842b890..f6980cc 100644 --- a/bark/generation.py +++ b/bark/generation.py @@ -165,9 +165,8 @@ def _grab_best_device(use_gpu=True): def _download(from_hf_path, file_name, to_local_path): os.makedirs(CACHE_DIR, exist_ok=True) destination_file_name = to_local_path.split("/")[-1] - file_dir = CACHE_DIR - hf_hub_download(repo_id=from_hf_path, filename=file_name, local_dir=file_dir) - os.replace(f"{CACHE_DIR}/{file_name}", to_local_path) + hf_hub_download(repo_id=from_hf_path, filename=file_name, local_dir=CACHE_DIR) + os.replace(os.path.join(CACHE_DIR, file_name), to_local_path) class InferenceContext: def __init__(self, benchmark=False): From 27ff4f9db86b5cd22e93872704b7bee5b56e353b Mon Sep 17 00:00:00 2001 From: Vaibhav Srivastav Date: Fri, 28 Apr 2023 17:02:41 +0200 Subject: [PATCH 10/13] new model repo --- bark/generation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bark/generation.py b/bark/generation.py index f6980cc..1e2c8db 100644 --- a/bark/generation.py +++ b/bark/generation.py @@ -93,17 +93,17 @@ OFFLOAD_CPU = os.environ.get("SUNO_OFFLOAD_CPU", False) REMOTE_MODEL_PATHS = { "text_small": { - "repo_id": "reach-vb/bark-small", + "repo_id": "reach-vb/bark", "file_name": "text.pt", "checksum": "b3e42bcbab23b688355cd44128c4cdd3", }, "coarse_small": { - "repo_id": "reach-vb/bark-small", + "repo_id": "reach-vb/bark", "file_name": "coarse.pt", "checksum": "5fe964825e3b0321f9d5f3857b89194d", }, "fine_small": { - "repo_id": "reach-vb/bark-small", + "repo_id": "reach-vb/bark", "file_name": "fine.pt", "checksum": "5428d1befe05be2ba32195496e58dc90", }, From e0f2d117f51eeb6426d02f1ff57e9a2f4ab5f3fa Mon Sep 17 00:00:00 2001 From: Vaibhav Srivastav Date: Fri, 28 Apr 2023 17:54:50 +0200 Subject: [PATCH 11/13] updating model repo organisation reach-vb -> suno --- bark/generation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bark/generation.py b/bark/generation.py index 1e2c8db..64b3c47 100644 --- a/bark/generation.py +++ b/bark/generation.py @@ -93,32 +93,32 @@ OFFLOAD_CPU = os.environ.get("SUNO_OFFLOAD_CPU", False) REMOTE_MODEL_PATHS = { "text_small": { - "repo_id": "reach-vb/bark", + "repo_id": "suno/bark", "file_name": "text.pt", "checksum": "b3e42bcbab23b688355cd44128c4cdd3", }, "coarse_small": { - "repo_id": "reach-vb/bark", + "repo_id": "suno/bark", "file_name": "coarse.pt", "checksum": "5fe964825e3b0321f9d5f3857b89194d", }, "fine_small": { - "repo_id": "reach-vb/bark", + "repo_id": "suno/bark", "file_name": "fine.pt", "checksum": "5428d1befe05be2ba32195496e58dc90", }, "text": { - "repo_id": "reach-vb/bark", + "repo_id": "suno/bark", "file_name": "text_2.pt", "checksum": "54afa89d65e318d4f5f80e8e8799026a", }, "coarse": { - "repo_id": "reach-vb/bark", + "repo_id": "suno/bark", "file_name": "coarse_2.pt", "checksum": "8a98094e5e3a255a5c9c0ab7efe8fd28", }, "fine": { - "repo_id": "reach-vb/bark", + "repo_id": "suno/bark", "file_name": "fine_2.pt", "checksum": "59d184ed44e3650774a2f0503a48a97b", }, From 9fb3494391d78eb2ef0481b092c6d12a3a488163 Mon Sep 17 00:00:00 2001 From: Keenan Freyberg <32879321+kmfreyberg@users.noreply.github.com> Date: Fri, 28 Apr 2023 17:28:57 -0400 Subject: [PATCH 12/13] Update README.md --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a44887b..ed1b4d1 100644 --- a/README.md +++ b/README.md @@ -13,11 +13,16 @@ Bark is a transformer-based text-to-audio model created by [Suno](https://suno.a

-## 🔊 Demos +You can try Bark here: -[![Open in Spaces](https://img.shields.io/badge/🤗-Open%20In%20Spaces-blue.svg)](https://huggingface.co/spaces/suno/bark) +[![Open in Spaces](https://img.shields.io/badge/%20🤗-Open%20in%20Spaces-blue.svg)](https://huggingface.co/spaces/suno/bark) +[![Open on Replicate](https://img.shields.io/badge/®️-Open%20on%20Replicate-blue.svg)](https://replicate.com/suno-ai/bark) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eJfA2XUa-mXwdMy7DoYKVYHI1iTd9Vkt?usp=sharing) +## 🚀 Updates + + + ## 🤖 Usage ```python From 39e7305f4ad596032717619a900ec4730d168531 Mon Sep 17 00:00:00 2001 From: Georg Kucsko Date: Fri, 28 Apr 2023 17:38:54 -0400 Subject: [PATCH 13/13] Revert "Update README.md" This reverts commit 9fb3494391d78eb2ef0481b092c6d12a3a488163. --- README.md | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index ed1b4d1..a44887b 100644 --- a/README.md +++ b/README.md @@ -13,16 +13,11 @@ Bark is a transformer-based text-to-audio model created by [Suno](https://suno.a

-You can try Bark here: +## 🔊 Demos -[![Open in Spaces](https://img.shields.io/badge/%20🤗-Open%20in%20Spaces-blue.svg)](https://huggingface.co/spaces/suno/bark) -[![Open on Replicate](https://img.shields.io/badge/®️-Open%20on%20Replicate-blue.svg)](https://replicate.com/suno-ai/bark) +[![Open in Spaces](https://img.shields.io/badge/🤗-Open%20In%20Spaces-blue.svg)](https://huggingface.co/spaces/suno/bark) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eJfA2XUa-mXwdMy7DoYKVYHI1iTd9Vkt?usp=sharing) -## 🚀 Updates - - - ## 🤖 Usage ```python