From 3f91190b8ec44772831bfad72eb149c2393c840c Mon Sep 17 00:00:00 2001 From: memoryunreal <814514103@qq.com> Date: Wed, 26 Apr 2023 05:04:57 +0000 Subject: [PATCH 1/3] hugging space badge -- li --- README.md | 4 ++-- app.py | 15 ++++++++++++++- track_anything.py | 9 ++++++--- tracker/base_tracker.py | 1 + 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 806ecef..8a5a77b 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ + Duplicate Space @@ -30,8 +31,7 @@ ## :rocket: Updates - 2023/04/25: We are delighted to introduce [Caption-Anything](https://github.com/ttengwang/Caption-Anything) :writing_hand:, an inventive project from our lab that combines the capabilities of Segment Anything, Visual Captioning, and ChatGPT. -- 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything) on Hugging Face :hugs:! - +- 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything) on Hugging Face :hugs:! ## Demo https://user-images.githubusercontent.com/28050374/232842703-8395af24-b13e-4b8e-aafb-e94b61e6c449.MP4 diff --git a/app.py b/app.py index 22a7ba7..ade5c50 100644 --- a/app.py +++ b/app.py @@ -13,6 +13,8 @@ import requests import json import torchvision import torch +from tools.interact_tools import SamControler +from tracker.base_tracker import BaseTracker from tools.painter import mask_painter try: from mmcv.cnn import ConvModule @@ -204,6 +206,7 @@ def show_mask(video_state, interactive_state, mask_dropdown): # tracking vos def vos_tracking_video(video_state, interactive_state, mask_dropdown): + model.xmem.clear_memory() if interactive_state["track_end_number"]: following_frames = video_state["origin_images"][video_state["select_frame_number"]:interactive_state["track_end_number"]] @@ -223,6 +226,8 @@ def vos_tracking_video(video_state, interactive_state, mask_dropdown): template_mask = video_state["masks"][video_state["select_frame_number"]] fps = video_state["fps"] masks, logits, painted_images = model.generator(images=following_frames, template_mask=template_mask) + # clear GPU memory + model.xmem.clear_memory() if interactive_state["track_end_number"]: video_state["masks"][video_state["select_frame_number"]:interactive_state["track_end_number"]] = masks @@ -262,6 +267,7 @@ def vos_tracking_video(video_state, interactive_state, mask_dropdown): # inpaint def inpaint_video(video_state, interactive_state, mask_dropdown): + frames = np.asarray(video_state["origin_images"]) fps = video_state["fps"] inpaint_masks = np.asarray(video_state["masks"]) @@ -342,6 +348,12 @@ e2fgvi_checkpoint = download_checkpoint_from_google_drive(e2fgvi_checkpoint_id, # initialize sam, xmem, e2fgvi models model = TrackingAnything(SAM_checkpoint, xmem_checkpoint, e2fgvi_checkpoint,args) + +title = """

Track-Anything

+ """ +description = """

Gradio demo for Track Anything, a flexible and interactive tool for video object tracking, segmentation, and inpainting. I To use it, simply upload your video, or click one of the examples to load them. Code: https://github.com/gaomingqi/Track-Anything Duplicate Space

""" + + with gr.Blocks() as iface: """ state for @@ -373,7 +385,8 @@ with gr.Blocks() as iface: "fps": 30 } ) - + gr.Markdown(title) + gr.Markdown(description) with gr.Row(): # for user video input diff --git a/track_anything.py b/track_anything.py index 360601d..3786b6a 100644 --- a/track_anything.py +++ b/track_anything.py @@ -10,9 +10,12 @@ import argparse class TrackingAnything(): def __init__(self, sam_checkpoint, xmem_checkpoint, e2fgvi_checkpoint, args): self.args = args - self.samcontroler = SamControler(sam_checkpoint, args.sam_model_type, args.device) - self.xmem = BaseTracker(xmem_checkpoint, device=args.device) - self.baseinpainter = BaseInpainter(e2fgvi_checkpoint, args.device) + self.sam_checkpoint = sam_checkpoint + self.xmem_checkpoint = xmem_checkpoint + self.e2fgvi_checkpoint = e2fgvi_checkpoint + self.samcontroler = SamControler(self.sam_checkpoint, args.sam_model_type, args.device) + self.xmem = BaseTracker(self.xmem_checkpoint, device=args.device) + self.baseinpainter = BaseInpainter(self.e2fgvi_checkpoint, args.device) # def inference_step(self, first_flag: bool, interact_flag: bool, image: np.ndarray, # same_image_flag: bool, points:np.ndarray, labels: np.ndarray, logits: np.ndarray=None, multimask=True): # if first_flag: diff --git a/tracker/base_tracker.py b/tracker/base_tracker.py index 00be327..1d47f6b 100644 --- a/tracker/base_tracker.py +++ b/tracker/base_tracker.py @@ -126,6 +126,7 @@ class BaseTracker: def clear_memory(self): self.tracker.clear_memory() self.mapper.clear_labels() + torch.cuda.empty_cache() ## how to use: From 3d9478990389a0e4a6f2255f8f3b4d9655a886f5 Mon Sep 17 00:00:00 2001 From: gaomingqi Date: Wed, 26 Apr 2023 14:30:52 +0800 Subject: [PATCH 2/3] fix resize in base_inpainter --mg --- inpainter/base_inpainter.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/inpainter/base_inpainter.py b/inpainter/base_inpainter.py index 18fdfce..35d6d4e 100644 --- a/inpainter/base_inpainter.py +++ b/inpainter/base_inpainter.py @@ -69,10 +69,11 @@ class BaseInpainter: size = None else: size = [int(W*ratio), int(H*ratio)] - if size[0] % 2 > 0: - size[0] += 1 - if size[1] % 2 > 0: - size[1] += 1 + size = [si+1 if si%2>0 else si for si in size] # only consider even values + # shortest side should be larger than 50 + if min(size) < 50: + ratio = 50. / min(H, W) + size = [int(W*ratio), int(H*ratio)] masks = np.expand_dims(masks, axis=3) # expand to T, H, W, 1 binary_masks = resize_masks(masks, tuple(size)) @@ -156,7 +157,7 @@ if __name__ == '__main__': base_inpainter = BaseInpainter(checkpoint, device) # 3/3: inpainting (frames: numpy array, T, H, W, 3; masks: numpy array, T, H, W) # ratio: (0, 1], ratio for down sample, default value is 1 - inpainted_frames = base_inpainter.inpaint(frames, masks, ratio=1) # numpy array, T, H, W, 3 + inpainted_frames = base_inpainter.inpaint(frames, masks, ratio=0.01) # numpy array, T, H, W, 3 # ---------------------------------------------- # end # ---------------------------------------------- From 4c39381de5d1b86e65d31d48ccda906bbe06c198 Mon Sep 17 00:00:00 2001 From: Mingqi Gao Date: Wed, 26 Apr 2023 15:12:37 +0800 Subject: [PATCH 3/3] update space link --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8a5a77b..92825f4 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,9 @@ - + - Duplicate Space @@ -31,7 +30,7 @@ ## :rocket: Updates - 2023/04/25: We are delighted to introduce [Caption-Anything](https://github.com/ttengwang/Caption-Anything) :writing_hand:, an inventive project from our lab that combines the capabilities of Segment Anything, Visual Captioning, and ChatGPT. -- 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything) on Hugging Face :hugs:! +- 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=trueg) on Hugging Face :hugs:! ## Demo https://user-images.githubusercontent.com/28050374/232842703-8395af24-b13e-4b8e-aafb-e94b61e6c449.MP4