From 6e4d02bfbf14aebcd22772a0a2a30791f99c4531 Mon Sep 17 00:00:00 2001
From: Mylo <36931363+gitmylo@users.noreply.github.com>
Date: Fri, 26 May 2023 22:38:39 +0200
Subject: [PATCH 1/2] Update customtokenizer.py to latest version.

---
 hubert/customtokenizer.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/hubert/customtokenizer.py b/hubert/customtokenizer.py
index 7f807d3..c1a4a51 100644
--- a/hubert/customtokenizer.py
+++ b/hubert/customtokenizer.py
@@ -1,4 +1,8 @@
-# From https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer
+"""
+Custom tokenizer model.
+Author: https://www.github.com/gitmylo/
+License: MIT
+"""
 
 import json
 import os.path
@@ -91,7 +95,7 @@ class CustomTokenizer(nn.Module):
         optimizer.step()
 
     def save(self, path):
-        info_path = os.path.basename(path) + '/.info'
+        info_path = '.'.join(os.path.basename(path).split('.')[:-1]) + '/.info'
         torch.save(self.state_dict(), path)
         data_from_model = Data(self.input_size, self.hidden_size, self.output_size, self.version)
         with ZipFile(path, 'a') as model_zip:
@@ -112,7 +116,9 @@ class CustomTokenizer(nn.Module):
             model = CustomTokenizer()
         else:
             model = CustomTokenizer(data_from_model.hidden_size, data_from_model.input_size, data_from_model.output_size, data_from_model.version)
-        model.load_state_dict(torch.load(path, map_location))
+        model.load_state_dict(torch.load(path))
+        if map_location:
+            model = model.to(map_location)
         return model
 
 

From 05b9106f003b56bb180fd71697a651809cfa6e11 Mon Sep 17 00:00:00 2001
From: Mylo <36931363+gitmylo@users.noreply.github.com>
Date: Fri, 26 May 2023 22:40:09 +0200
Subject: [PATCH 2/2] Update pre_kmeans_hubert.py to latest version

---
 hubert/pre_kmeans_hubert.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/hubert/pre_kmeans_hubert.py b/hubert/pre_kmeans_hubert.py
index 93f82fe..e7a76a4 100644
--- a/hubert/pre_kmeans_hubert.py
+++ b/hubert/pre_kmeans_hubert.py
@@ -1,4 +1,11 @@
-# From https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer
+"""
+Modified HuBERT model without kmeans.
+Original author: https://github.com/lucidrains/
+Modified by: https://www.github.com/gitmylo/
+License: MIT
+"""
+
+# Modified code from https://github.com/lucidrains/audiolm-pytorch/blob/main/audiolm_pytorch/hubert_kmeans.py
 
 from pathlib import Path
 
@@ -6,8 +13,6 @@ import torch
 from torch import nn
 from einops import pack, unpack
 
-import joblib
-
 import fairseq
 
 from torchaudio.functional import resample
@@ -37,13 +42,17 @@ class CustomHubert(nn.Module):
         checkpoint_path,
         target_sample_hz=16000,
         seq_len_multiple_of=None,
-        output_layer=9
+        output_layer=9,
+        device=None
     ):
         super().__init__()
         self.target_sample_hz = target_sample_hz
         self.seq_len_multiple_of = seq_len_multiple_of
         self.output_layer = output_layer
 
+        if device is not None:
+            self.to(device)
+
         model_path = Path(checkpoint_path)
 
         assert model_path.exists(), f'path {checkpoint_path} does not exist'
@@ -52,6 +61,9 @@ class CustomHubert(nn.Module):
         load_model_input = {checkpoint_path: checkpoint}
         model, *_ = fairseq.checkpoint_utils.load_model_ensemble_and_task(load_model_input)
 
+        if device is not None:
+            model[0].to(device)
+
         self.model = model[0]
         self.model.eval()