From 3f91190b8ec44772831bfad72eb149c2393c840c Mon Sep 17 00:00:00 2001
From: memoryunreal <814514103@qq.com>
Date: Wed, 26 Apr 2023 05:04:57 +0000
Subject: [PATCH 1/3] hugging space badge -- li
---
README.md | 4 ++--
app.py | 15 ++++++++++++++-
track_anything.py | 9 ++++++---
tracker/base_tracker.py | 1 +
4 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index 806ecef..8a5a77b 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@
+
@@ -30,8 +31,7 @@
## :rocket: Updates
- 2023/04/25: We are delighted to introduce [Caption-Anything](https://github.com/ttengwang/Caption-Anything) :writing_hand:, an inventive project from our lab that combines the capabilities of Segment Anything, Visual Captioning, and ChatGPT.
-- 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything) on Hugging Face :hugs:!
-
+- 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything) on Hugging Face :hugs:!
## Demo
https://user-images.githubusercontent.com/28050374/232842703-8395af24-b13e-4b8e-aafb-e94b61e6c449.MP4
diff --git a/app.py b/app.py
index 22a7ba7..ade5c50 100644
--- a/app.py
+++ b/app.py
@@ -13,6 +13,8 @@ import requests
import json
import torchvision
import torch
+from tools.interact_tools import SamControler
+from tracker.base_tracker import BaseTracker
from tools.painter import mask_painter
try:
from mmcv.cnn import ConvModule
@@ -204,6 +206,7 @@ def show_mask(video_state, interactive_state, mask_dropdown):
# tracking vos
def vos_tracking_video(video_state, interactive_state, mask_dropdown):
+
model.xmem.clear_memory()
if interactive_state["track_end_number"]:
following_frames = video_state["origin_images"][video_state["select_frame_number"]:interactive_state["track_end_number"]]
@@ -223,6 +226,8 @@ def vos_tracking_video(video_state, interactive_state, mask_dropdown):
template_mask = video_state["masks"][video_state["select_frame_number"]]
fps = video_state["fps"]
masks, logits, painted_images = model.generator(images=following_frames, template_mask=template_mask)
+ # clear GPU memory
+ model.xmem.clear_memory()
if interactive_state["track_end_number"]:
video_state["masks"][video_state["select_frame_number"]:interactive_state["track_end_number"]] = masks
@@ -262,6 +267,7 @@ def vos_tracking_video(video_state, interactive_state, mask_dropdown):
# inpaint
def inpaint_video(video_state, interactive_state, mask_dropdown):
+
frames = np.asarray(video_state["origin_images"])
fps = video_state["fps"]
inpaint_masks = np.asarray(video_state["masks"])
@@ -342,6 +348,12 @@ e2fgvi_checkpoint = download_checkpoint_from_google_drive(e2fgvi_checkpoint_id,
# initialize sam, xmem, e2fgvi models
model = TrackingAnything(SAM_checkpoint, xmem_checkpoint, e2fgvi_checkpoint,args)
+
+title = """
Track-Anything
+ """
+description = """Gradio demo for Track Anything, a flexible and interactive tool for video object tracking, segmentation, and inpainting. I To use it, simply upload your video, or click one of the examples to load them. Code: https://github.com/gaomingqi/Track-Anything 
"""
+
+
with gr.Blocks() as iface:
"""
state for
@@ -373,7 +385,8 @@ with gr.Blocks() as iface:
"fps": 30
}
)
-
+ gr.Markdown(title)
+ gr.Markdown(description)
with gr.Row():
# for user video input
diff --git a/track_anything.py b/track_anything.py
index 360601d..3786b6a 100644
--- a/track_anything.py
+++ b/track_anything.py
@@ -10,9 +10,12 @@ import argparse
class TrackingAnything():
def __init__(self, sam_checkpoint, xmem_checkpoint, e2fgvi_checkpoint, args):
self.args = args
- self.samcontroler = SamControler(sam_checkpoint, args.sam_model_type, args.device)
- self.xmem = BaseTracker(xmem_checkpoint, device=args.device)
- self.baseinpainter = BaseInpainter(e2fgvi_checkpoint, args.device)
+ self.sam_checkpoint = sam_checkpoint
+ self.xmem_checkpoint = xmem_checkpoint
+ self.e2fgvi_checkpoint = e2fgvi_checkpoint
+ self.samcontroler = SamControler(self.sam_checkpoint, args.sam_model_type, args.device)
+ self.xmem = BaseTracker(self.xmem_checkpoint, device=args.device)
+ self.baseinpainter = BaseInpainter(self.e2fgvi_checkpoint, args.device)
# def inference_step(self, first_flag: bool, interact_flag: bool, image: np.ndarray,
# same_image_flag: bool, points:np.ndarray, labels: np.ndarray, logits: np.ndarray=None, multimask=True):
# if first_flag:
diff --git a/tracker/base_tracker.py b/tracker/base_tracker.py
index 00be327..1d47f6b 100644
--- a/tracker/base_tracker.py
+++ b/tracker/base_tracker.py
@@ -126,6 +126,7 @@ class BaseTracker:
def clear_memory(self):
self.tracker.clear_memory()
self.mapper.clear_labels()
+ torch.cuda.empty_cache()
## how to use:
From 3d9478990389a0e4a6f2255f8f3b4d9655a886f5 Mon Sep 17 00:00:00 2001
From: gaomingqi
Date: Wed, 26 Apr 2023 14:30:52 +0800
Subject: [PATCH 2/3] fix resize in base_inpainter --mg
---
inpainter/base_inpainter.py | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/inpainter/base_inpainter.py b/inpainter/base_inpainter.py
index 18fdfce..35d6d4e 100644
--- a/inpainter/base_inpainter.py
+++ b/inpainter/base_inpainter.py
@@ -69,10 +69,11 @@ class BaseInpainter:
size = None
else:
size = [int(W*ratio), int(H*ratio)]
- if size[0] % 2 > 0:
- size[0] += 1
- if size[1] % 2 > 0:
- size[1] += 1
+ size = [si+1 if si%2>0 else si for si in size] # only consider even values
+ # shortest side should be larger than 50
+ if min(size) < 50:
+ ratio = 50. / min(H, W)
+ size = [int(W*ratio), int(H*ratio)]
masks = np.expand_dims(masks, axis=3) # expand to T, H, W, 1
binary_masks = resize_masks(masks, tuple(size))
@@ -156,7 +157,7 @@ if __name__ == '__main__':
base_inpainter = BaseInpainter(checkpoint, device)
# 3/3: inpainting (frames: numpy array, T, H, W, 3; masks: numpy array, T, H, W)
# ratio: (0, 1], ratio for down sample, default value is 1
- inpainted_frames = base_inpainter.inpaint(frames, masks, ratio=1) # numpy array, T, H, W, 3
+ inpainted_frames = base_inpainter.inpaint(frames, masks, ratio=0.01) # numpy array, T, H, W, 3
# ----------------------------------------------
# end
# ----------------------------------------------
From 4c39381de5d1b86e65d31d48ccda906bbe06c198 Mon Sep 17 00:00:00 2001
From: Mingqi Gao
Date: Wed, 26 Apr 2023 15:12:37 +0800
Subject: [PATCH 3/3] update space link
---
README.md | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 8a5a77b..92825f4 100644
--- a/README.md
+++ b/README.md
@@ -8,10 +8,9 @@
-
+
-
@@ -31,7 +30,7 @@
## :rocket: Updates
- 2023/04/25: We are delighted to introduce [Caption-Anything](https://github.com/ttengwang/Caption-Anything) :writing_hand:, an inventive project from our lab that combines the capabilities of Segment Anything, Visual Captioning, and ChatGPT.
-- 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything) on Hugging Face :hugs:!
+- 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=trueg) on Hugging Face :hugs:!
## Demo
https://user-images.githubusercontent.com/28050374/232842703-8395af24-b13e-4b8e-aafb-e94b61e6c449.MP4