diff --git a/README.md b/README.md index 737fb59..0dad467 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ text_prompt = """ I have a silky smooth voice, and today I will tell you about the exercise regimen of the common sloth. """ -audio_array = generate_audio(text_prompt, history_prompt="man-narrator") +audio_array = generate_audio(text_prompt, history_prompt="speech_0") ``` [sloth.webm](https://user-images.githubusercontent.com/5068315/230684883-a344c619-a560-4ff5-8b99-b4463a34487b.webm) diff --git a/bark/assets/prompts/brylcream.npz b/bark/assets/prompts/brylcream.npz deleted file mode 100644 index a4fb12f..0000000 Binary files a/bark/assets/prompts/brylcream.npz and /dev/null differ diff --git a/bark/assets/prompts/es-woman.npz b/bark/assets/prompts/es-woman.npz deleted file mode 100644 index db7620c..0000000 Binary files a/bark/assets/prompts/es-woman.npz and /dev/null differ diff --git a/bark/assets/prompts/music_0.npz b/bark/assets/prompts/music_0.npz new file mode 100644 index 0000000..4701038 Binary files /dev/null and b/bark/assets/prompts/music_0.npz differ diff --git a/bark/assets/prompts/music_1.npz b/bark/assets/prompts/music_1.npz new file mode 100644 index 0000000..b95f992 Binary files /dev/null and b/bark/assets/prompts/music_1.npz differ diff --git a/bark/assets/prompts/music_2.npz b/bark/assets/prompts/music_2.npz new file mode 100644 index 0000000..94c2577 Binary files /dev/null and b/bark/assets/prompts/music_2.npz differ diff --git a/bark/assets/prompts/music_3.npz b/bark/assets/prompts/music_3.npz new file mode 100644 index 0000000..c79219d Binary files /dev/null and b/bark/assets/prompts/music_3.npz differ diff --git a/bark/assets/prompts/music_4.npz b/bark/assets/prompts/music_4.npz new file mode 100644 index 0000000..5053865 Binary files /dev/null and b/bark/assets/prompts/music_4.npz differ diff --git a/bark/assets/prompts/music_5.npz b/bark/assets/prompts/music_5.npz new file mode 100644 index 0000000..5c96632 Binary files /dev/null and b/bark/assets/prompts/music_5.npz differ diff --git a/bark/assets/prompts/man-narrator.npz b/bark/assets/prompts/speech_0.npz similarity index 88% rename from bark/assets/prompts/man-narrator.npz rename to bark/assets/prompts/speech_0.npz index bc6f150..7bf3bc5 100644 Binary files a/bark/assets/prompts/man-narrator.npz and b/bark/assets/prompts/speech_0.npz differ diff --git a/bark/assets/prompts/speech_1.npz b/bark/assets/prompts/speech_1.npz new file mode 100644 index 0000000..c44d276 Binary files /dev/null and b/bark/assets/prompts/speech_1.npz differ diff --git a/bark/assets/prompts/speech_2.npz b/bark/assets/prompts/speech_2.npz new file mode 100644 index 0000000..56843f3 Binary files /dev/null and b/bark/assets/prompts/speech_2.npz differ diff --git a/bark/assets/prompts/speech_3.npz b/bark/assets/prompts/speech_3.npz new file mode 100644 index 0000000..d821f5d Binary files /dev/null and b/bark/assets/prompts/speech_3.npz differ diff --git a/bark/assets/prompts/speech_4.npz b/bark/assets/prompts/speech_4.npz new file mode 100644 index 0000000..0e82fe0 Binary files /dev/null and b/bark/assets/prompts/speech_4.npz differ diff --git a/bark/assets/prompts/speech_5.npz b/bark/assets/prompts/speech_5.npz new file mode 100644 index 0000000..3f90b5d Binary files /dev/null and b/bark/assets/prompts/speech_5.npz differ diff --git a/bark/assets/prompts/speech_6.npz b/bark/assets/prompts/speech_6.npz new file mode 100644 index 0000000..88c1787 Binary files /dev/null and b/bark/assets/prompts/speech_6.npz differ diff --git a/bark/assets/prompts/speech_7.npz b/bark/assets/prompts/speech_7.npz new file mode 100644 index 0000000..fd32c3b Binary files /dev/null and b/bark/assets/prompts/speech_7.npz differ diff --git a/bark/generation.py b/bark/generation.py index 91b322a..b47d371 100644 --- a/bark/generation.py +++ b/bark/generation.py @@ -49,10 +49,9 @@ COARSE_RATE_HZ = 75 SAMPLE_RATE = 24_000 -ALLOWED_PROMPTS = ( - "brylcream", - "es-woman", - "man-narrator", +ALLOWED_PROMPTS = set( + [f"speech_{n}" for n in range(8)] + + [f"music_{n}" for n in range(6)] ) @@ -307,7 +306,7 @@ def generate_text_semantic( assert (history_prompt in ALLOWED_PROMPTS) semantic_history = np.load( os.path.join(CUR_PATH, "assets", "prompts", f"{history_prompt}.npz") - )["text"] + )["semantic_prompt"] assert ( isinstance(semantic_history, np.ndarray) and len(semantic_history.shape) == 1 @@ -452,8 +451,8 @@ def generate_coarse( x_history = np.load( os.path.join(CUR_PATH, "assets", "prompts", f"{history_prompt}.npz") ) - x_semantic_history = x_history["coarse_1"] - x_coarse_history = x_history["coarse_2"] + x_semantic_history = x_history["semantic_prompt"] + x_coarse_history = x_history["coarse_prompt"] assert ( isinstance(x_semantic_history, np.ndarray) and len(x_semantic_history.shape) == 1 @@ -594,7 +593,7 @@ def generate_fine( assert (history_prompt in ALLOWED_PROMPTS) x_fine_history = np.load( os.path.join(CUR_PATH, "assets", "prompts", f"{history_prompt}.npz") - )["fine"] + )["fine_prompt"] assert ( isinstance(x_fine_history, np.ndarray) and len(x_fine_history.shape) == 2