From 3f91190b8ec44772831bfad72eb149c2393c840c Mon Sep 17 00:00:00 2001
From: memoryunreal <814514103@qq.com>
Date: Wed, 26 Apr 2023 05:04:57 +0000
Subject: [PATCH 1/3] hugging space badge -- li

---
 README.md               |  4 ++--
 app.py                  | 15 ++++++++++++++-
 track_anything.py       |  9 ++++++---
 tracker/base_tracker.py |  1 +
 4 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index 806ecef..8a5a77b 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@
 <a src="https://img.shields.io/badge/%F0%9F%A4%97-Open_in_Spaces-informational.svg?style=flat-square" href="https://huggingface.co/spaces/watchtowerss/Track-Anything">
 <img src="https://img.shields.io/badge/%F0%9F%A4%97-Open_in_Spaces-informational.svg?style=flat-square">
 </a>
+ <a href="https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a>
 <a src="https://img.shields.io/badge/%F0%9F%9A%80-SUSTech_VIP_Lab-important.svg?style=flat-square" href="https://zhengfenglab.com/">
 <img src="https://img.shields.io/badge/%F0%9F%9A%80-SUSTech_VIP_Lab-important.svg?style=flat-square">
 </a>
@@ -30,8 +31,7 @@
 ## :rocket: Updates
 - 2023/04/25:  We are delighted to introduce [Caption-Anything](https://github.com/ttengwang/Caption-Anything) :writing_hand:, an inventive project from our lab that combines the capabilities of Segment Anything, Visual Captioning, and ChatGPT. 
 
-- 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything) on Hugging Face :hugs:! 
-
+- 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything) on Hugging Face :hugs:!
 ## Demo
 
 https://user-images.githubusercontent.com/28050374/232842703-8395af24-b13e-4b8e-aafb-e94b61e6c449.MP4
diff --git a/app.py b/app.py
index 22a7ba7..ade5c50 100644
--- a/app.py
+++ b/app.py
@@ -13,6 +13,8 @@ import requests
 import json
 import torchvision
 import torch 
+from tools.interact_tools import SamControler
+from tracker.base_tracker import BaseTracker
 from tools.painter import mask_painter
 try: 
     from mmcv.cnn import ConvModule
@@ -204,6 +206,7 @@ def show_mask(video_state, interactive_state, mask_dropdown):
 
 # tracking vos
 def vos_tracking_video(video_state, interactive_state, mask_dropdown):
+
     model.xmem.clear_memory()
     if interactive_state["track_end_number"]:
         following_frames = video_state["origin_images"][video_state["select_frame_number"]:interactive_state["track_end_number"]]
@@ -223,6 +226,8 @@ def vos_tracking_video(video_state, interactive_state, mask_dropdown):
         template_mask = video_state["masks"][video_state["select_frame_number"]]
     fps = video_state["fps"]
     masks, logits, painted_images = model.generator(images=following_frames, template_mask=template_mask)
+    # clear GPU memory
+    model.xmem.clear_memory()
 
     if interactive_state["track_end_number"]: 
         video_state["masks"][video_state["select_frame_number"]:interactive_state["track_end_number"]] = masks
@@ -262,6 +267,7 @@ def vos_tracking_video(video_state, interactive_state, mask_dropdown):
 
 # inpaint 
 def inpaint_video(video_state, interactive_state, mask_dropdown):
+
     frames = np.asarray(video_state["origin_images"])
     fps = video_state["fps"]
     inpaint_masks = np.asarray(video_state["masks"])
@@ -342,6 +348,12 @@ e2fgvi_checkpoint = download_checkpoint_from_google_drive(e2fgvi_checkpoint_id,
 # initialize sam, xmem, e2fgvi models
 model = TrackingAnything(SAM_checkpoint, xmem_checkpoint, e2fgvi_checkpoint,args)
 
+
+title = """<p><h1 align="center">Track-Anything</h1></p>
+    """
+description = """<p>Gradio demo for Track Anything, a flexible and interactive tool for video object tracking, segmentation, and inpainting. I To use it, simply upload your video, or click one of the examples to load them. Code: <a href="https://github.com/gaomingqi/Track-Anything">https://github.com/gaomingqi/Track-Anything</a> <a href="https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>"""
+
+
 with gr.Blocks() as iface:
     """
         state for 
@@ -373,7 +385,8 @@ with gr.Blocks() as iface:
         "fps": 30
         }
     )
-
+    gr.Markdown(title)
+    gr.Markdown(description)
     with gr.Row():
 
         # for user video input
diff --git a/track_anything.py b/track_anything.py
index 360601d..3786b6a 100644
--- a/track_anything.py
+++ b/track_anything.py
@@ -10,9 +10,12 @@ import argparse
 class TrackingAnything():
     def __init__(self, sam_checkpoint, xmem_checkpoint, e2fgvi_checkpoint, args):
         self.args = args
-        self.samcontroler = SamControler(sam_checkpoint, args.sam_model_type, args.device)
-        self.xmem = BaseTracker(xmem_checkpoint, device=args.device)
-        self.baseinpainter = BaseInpainter(e2fgvi_checkpoint, args.device) 
+        self.sam_checkpoint = sam_checkpoint
+        self.xmem_checkpoint = xmem_checkpoint
+        self.e2fgvi_checkpoint = e2fgvi_checkpoint
+        self.samcontroler = SamControler(self.sam_checkpoint, args.sam_model_type, args.device)
+        self.xmem = BaseTracker(self.xmem_checkpoint, device=args.device)
+        self.baseinpainter = BaseInpainter(self.e2fgvi_checkpoint, args.device) 
     # def inference_step(self, first_flag: bool, interact_flag: bool, image: np.ndarray, 
     #                    same_image_flag: bool, points:np.ndarray, labels: np.ndarray, logits: np.ndarray=None, multimask=True):
     #     if first_flag:
diff --git a/tracker/base_tracker.py b/tracker/base_tracker.py
index 00be327..1d47f6b 100644
--- a/tracker/base_tracker.py
+++ b/tracker/base_tracker.py
@@ -126,6 +126,7 @@ class BaseTracker:
     def clear_memory(self):
         self.tracker.clear_memory()
         self.mapper.clear_labels()
+        torch.cuda.empty_cache()
 
 
 ##  how to use:

From 3d9478990389a0e4a6f2255f8f3b4d9655a886f5 Mon Sep 17 00:00:00 2001
From: gaomingqi <im.mingqi@gmail.com>
Date: Wed, 26 Apr 2023 14:30:52 +0800
Subject: [PATCH 2/3] fix resize in base_inpainter --mg

---
 inpainter/base_inpainter.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/inpainter/base_inpainter.py b/inpainter/base_inpainter.py
index 18fdfce..35d6d4e 100644
--- a/inpainter/base_inpainter.py
+++ b/inpainter/base_inpainter.py
@@ -69,10 +69,11 @@ class BaseInpainter:
             size = None
         else:
             size = [int(W*ratio), int(H*ratio)]
-            if size[0] % 2 > 0:
-                size[0] += 1
-            if size[1] % 2 > 0:
-                size[1] += 1
+            size = [si+1 if si%2>0 else si for si in size]  # only consider even values
+            # shortest side should be larger than 50
+            if min(size) < 50:
+                ratio = 50. / min(H, W)
+                size = [int(W*ratio), int(H*ratio)]
         
         masks = np.expand_dims(masks, axis=3)    # expand to T, H, W, 1
         binary_masks = resize_masks(masks, tuple(size))
@@ -156,7 +157,7 @@ if __name__ == '__main__':
     base_inpainter = BaseInpainter(checkpoint, device)
     # 3/3: inpainting (frames: numpy array, T, H, W, 3; masks: numpy array, T, H, W)
     # ratio: (0, 1], ratio for down sample, default value is 1
-    inpainted_frames = base_inpainter.inpaint(frames, masks, ratio=1)   # numpy array, T, H, W, 3
+    inpainted_frames = base_inpainter.inpaint(frames, masks, ratio=0.01)   # numpy array, T, H, W, 3
     # ----------------------------------------------
     # end
     # ----------------------------------------------

From 4c39381de5d1b86e65d31d48ccda906bbe06c198 Mon Sep 17 00:00:00 2001
From: Mingqi Gao <im.mingqi@gmail.com>
Date: Wed, 26 Apr 2023 15:12:37 +0800
Subject: [PATCH 3/3] update space link

---
 README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 8a5a77b..92825f4 100644
--- a/README.md
+++ b/README.md
@@ -8,10 +8,9 @@
 <a src="https://img.shields.io/badge/%F0%9F%93%96-Open_in_Spaces-informational.svg?style=flat-square" href="https://arxiv.org/abs/2304.11968">
 <img src="https://img.shields.io/badge/%F0%9F%93%96-Arxiv_2304.11968-red.svg?style=flat-square">
 </a>
-<a src="https://img.shields.io/badge/%F0%9F%A4%97-Open_in_Spaces-informational.svg?style=flat-square" href="https://huggingface.co/spaces/watchtowerss/Track-Anything">
+<a src="https://img.shields.io/badge/%F0%9F%A4%97-Open_in_Spaces-informational.svg?style=flat-square" href="https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=trueg">
 <img src="https://img.shields.io/badge/%F0%9F%A4%97-Open_in_Spaces-informational.svg?style=flat-square">
 </a>
- <a href="https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a>
 <a src="https://img.shields.io/badge/%F0%9F%9A%80-SUSTech_VIP_Lab-important.svg?style=flat-square" href="https://zhengfenglab.com/">
 <img src="https://img.shields.io/badge/%F0%9F%9A%80-SUSTech_VIP_Lab-important.svg?style=flat-square">
 </a>
@@ -31,7 +30,7 @@
 ## :rocket: Updates
 - 2023/04/25:  We are delighted to introduce [Caption-Anything](https://github.com/ttengwang/Caption-Anything) :writing_hand:, an inventive project from our lab that combines the capabilities of Segment Anything, Visual Captioning, and ChatGPT. 
 
-- 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything) on Hugging Face :hugs:!
+- 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=trueg) on Hugging Face :hugs:!
 ## Demo
 
 https://user-images.githubusercontent.com/28050374/232842703-8395af24-b13e-4b8e-aafb-e94b61e6c449.MP4