change default inference from top-p to top-k sampling, massive performance gain

2025-12-23 07:09:23 +01:00 · 2025-03-15 18:16:27 -05:00
parent 013a21c70d
commit 7121981bb4
6 changed files with 12 additions and 10 deletions
--- a/gradio_app.py
+++ b/gradio_app.py
@@ -512,9 +512,9 @@ def get_app():
                                    info="set to 0 to use less VRAM, but with slower inference")
                left_margin = gr.Number(label="left_margin", value=0.08, info="margin to the left of the editing segment")
                right_margin = gr.Number(label="right_margin", value=0.08, info="margin to the right of the editing segment")
-                top_p = gr.Number(label="top_p", value=0.9, info="0.9 is a good value, 0.8 is also good")
+                top_p = gr.Number(label="top_p", value=1, info="do not do topp sampling therefore set it to 1")
                temperature = gr.Number(label="temperature", value=1, info="haven't try other values, do not recommend to change")
-                top_k = gr.Number(label="top_k", value=0, info="0 means we don't use topk sampling, because we use topp sampling")
+                top_k = gr.Number(label="top_k", value=40, info="40 is a good default, can also try 20, 30")
                codec_audio_sr = gr.Number(label="codec_audio_sr", value=16000, info='encodec specific, Do not change')
                codec_sr = gr.Number(label="codec_sr", value=50, info='encodec specific, Do not change')
                silence_tokens = gr.Textbox(label="silence tokens", value="[1388,1898,131]", info="encodec specific, do not change")