From f695fe60f68d3ef1fd68c44f88635f056bcfe744 Mon Sep 17 00:00:00 2001 From: Scott Date: Sun, 7 May 2023 17:24:13 +0100 Subject: [PATCH 1/3] Add English CHANGELOG (#243) --- Changelog_EN.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 Changelog_EN.md diff --git a/Changelog_EN.md b/Changelog_EN.md new file mode 100644 index 0000000..87b95d2 --- /dev/null +++ b/Changelog_EN.md @@ -0,0 +1,34 @@ +### 2023-04-09 +- Fixed training parameters to improve GPU utilization rate: A100 increased from 25% to around 90%, V100: 50% to around 90%, 2060S: 60% to around 85%, P40: 25% to around 95%; significantly improved training speed +- Changed parameter: total batch_size is now per GPU batch_size +- Changed total_epoch: maximum limit increased from 100 to 1000; default increased from 10 to 20 +- Fixed issue of ckpt extraction recognizing pitch incorrectly, causing abnormal inference +- Fixed issue of distributed training saving ckpt for each rank +- Applied nan feature filtering for feature extraction +- Fixed issue with silent input/output producing random consonants or noise (old models need to retrain with a new dataset) + +### 2023-04-16 Update +- Added local real-time voice changing mini-GUI, start by double-clicking go-realtime-gui.bat +- Applied filtering for frequency bands below 50Hz during training and inference +- Lowered the minimum pitch extraction of pyworld from the default 80 to 50 for training and inference, allowing male low-pitched voices between 50-80Hz not to be muted +- WebUI supports changing languages according to system locale (currently supporting en_US, ja_JP, zh_CN, zh_HK, zh_SG, zh_TW; defaults to en_US if not supported) +- Fixed recognition of some GPUs (e.g., V100-16G recognition failure, P4 recognition failure) + +### 2023-04-28 Update +- Upgraded faiss index settings for faster speed and higher quality +- Removed dependency on total_npy; future model sharing will not require total_npy input +- Unlocked restrictions for the 16-series GPUs, providing 4GB inference settings for 4GB VRAM GPUs +- Fixed bug in UVR5 vocal accompaniment separation for certain audio formats +- Real-time voice changing mini-GUI now supports non-40k and non-lazy pitch models + +### Future Plans: +Features: +- Add option: extract small models for each epoch save +- Add option: export additional mp3 to the specified path during inference +- Support multi-person training tab (up to 4 people) + +Base model: +- Collect breathing wav files to add to the training dataset to fix the issue of distorted breath sounds +- We are currently training a base model with an extended singing dataset, which will be released in the future +- Upgrade discriminator +- Upgrade self-supervised feature structure From 5928d5358cd58710a95436d12f0940cb3105c6f9 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Sun, 7 May 2023 17:40:09 +0000 Subject: [PATCH 2/3] Update gui.py --- gui.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gui.py b/gui.py index 3101cf3..f8415c0 100644 --- a/gui.py +++ b/gui.py @@ -1,4 +1,4 @@ -import os, sys +import os, sys,traceback now_dir = os.getcwd() sys.path.append(now_dir) @@ -39,7 +39,7 @@ class RVC: if index_rate != 0: self.index = faiss.read_index(index_path) # self.big_npy = np.load(npy_path) - self.big_npy = index.reconstruct_n(0, self.index.ntotal) + self.big_npy = self.index.reconstruct_n(0, self.index.ntotal) print("index search enabled") self.index_rate = index_rate model_path = hubert_path @@ -64,8 +64,8 @@ class RVC: print(self.net_g.load_state_dict(cpt["weight"], strict=False)) self.net_g.eval().to(device) self.net_g.half() - except Exception as e: - print(e) + except: + print(traceback.format_exc()) def get_f0(self, x, f0_up_key, inp_f0=None): x_pad = 1 From 4a2c9c062f325e9c6b3e0616f682e2cbf91750b0 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Sun, 7 May 2023 17:42:30 +0000 Subject: [PATCH 3/3] Update gui.py --- gui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui.py b/gui.py index f8415c0..2d1fd3c 100644 --- a/gui.py +++ b/gui.py @@ -130,7 +130,7 @@ class RVC: # _, I = self.index.search(npy, 1) # npy = self.big_npy[I.squeeze()].astype("float16") - score, ix = index.search(npy, k=8) + score, ix = self.index.search(npy, k=8) weight = np.square(1 / score) weight /= weight.sum(axis=1, keepdims=True) npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1).astype(