From fd20265324303761e426da85d58fcc7f1a87045c Mon Sep 17 00:00:00 2001
From: pyp_l40 <peng_puyuan@outlook.com>
Date: Thu, 25 Apr 2024 14:38:03 -0500
Subject: [PATCH] fix number bug in whisperx alignment

---
 gradio_app.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gradio_app.py b/gradio_app.py
index 3b4c128..bcf220d 100644
--- a/gradio_app.py
+++ b/gradio_app.py
@@ -74,6 +74,8 @@ class WhisperxModel:
 
     def transcribe(self, audio_path):
         segments = self.model.transcribe(audio_path, batch_size=8)["segments"]
+        for segment in segments:
+            segment['text'] = replace_numbers_with_words(segment['text'])
         return self.align_model.align(segments, audio_path)
 
 
@@ -177,7 +179,7 @@ def align(seed, transcript, audio_path):
     if align_model is None:
         raise gr.Error("Align model not loaded")
     seed_everything(seed)
-
+    transcript = replace_numbers_with_words(transcript).replace("  ", " ").replace("  ", " ")
     fragments = align_segments(transcript, audio_path)
     segments = [{
         "start": float(fragment["begin"]),