Merge remote-tracking branch 'origin/master'

2025-12-15 16:07:51 +01:00 · 2023-04-28 11:05:29 +08:00
parent 3581229a91 b2a18e29e4
commit 528c524ea5
3 changed files with 28 additions and 16 deletions
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 <a src="https://img.shields.io/badge/%F0%9F%93%96-Open_in_Spaces-informational.svg?style=flat-square" href="https://arxiv.org/abs/2304.11968">
 <img src="https://img.shields.io/badge/%F0%9F%93%96-Arxiv_2304.11968-red.svg?style=flat-square">
 </a>
-<a src="https://img.shields.io/badge/%F0%9F%A4%97-Open_in_Spaces-informational.svg?style=flat-square" href="https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=trueg">
+<a src="https://img.shields.io/badge/%F0%9F%A4%97-Open_in_Spaces-informational.svg?style=flat-square" href="https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=true">
 <img src="https://img.shields.io/badge/%F0%9F%A4%97-Hugging_Face_Space-informational.svg?style=flat-square">
 </a>
 <a src="https://img.shields.io/badge/%F0%9F%9A%80-SUSTech_VIP_Lab-important.svg?style=flat-square" href="https://zhengfenglab.com/">
@@ -31,23 +31,32 @@
 - 2023/04/25:  We are delighted to introduce [Caption-Anything](https://github.com/ttengwang/Caption-Anything) :writing_hand:, an inventive project from our lab that combines the capabilities of Segment Anything, Visual Captioning, and ChatGPT. 

 - 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=trueg) on Hugging Face :hugs:!
-## Demo

-https://user-images.githubusercontent.com/28050374/232842703-8395af24-b13e-4b8e-aafb-e94b61e6c449.MP4
+- 2023/04/14: We made Track-Anything public!

-### Multiple Object Tracking and Segmentation (with [XMem](https://github.com/hkchengrex/XMem))
+## :world_map: Video Tutorials
+
+https://user-images.githubusercontent.com/30309970/234902447-a4c59718-fcfe-443a-bd18-2f3f775cfc13.mp4
+
+---
+
+### :joystick: Example - Multiple Object Tracking and Segmentation (with [XMem](https://github.com/hkchengrex/XMem))

 https://user-images.githubusercontent.com/39208339/233035206-0a151004-6461-4deb-b782-d1dbfe691493.mp4

-### Video Object Tracking and Segmentation with Shot Changes (with [XMem](https://github.com/hkchengrex/XMem))
+---
+
+### :joystick: Example - Video Object Tracking and Segmentation with Shot Changes (with [XMem](https://github.com/hkchengrex/XMem))

 https://user-images.githubusercontent.com/30309970/232848349-f5e29e71-2ea4-4529-ac9a-94b9ca1e7055.mp4

-### Video Inpainting (with [E2FGVI](https://github.com/MCG-NKU/E2FGVI))
+---
+
+### :joystick: Example - Video Inpainting (with [E2FGVI](https://github.com/MCG-NKU/E2FGVI))

 https://user-images.githubusercontent.com/28050374/232959816-07f2826f-d267-4dda-8ae5-a5132173b8f4.mp4

-## Get Started
+## :computer: Get Started
 #### Linux & Windows
 ```shell
 # Clone the repository:
@@ -63,7 +72,7 @@ python app.py --device cuda:0
 ```


-## Citation
+## :book: Citation
 If you find this work useful for your research or applications, please cite using this BibTeX:
 ```bibtex
@misc{yang2023track,
@@ -76,6 +85,6 @@ If you find this work useful for your research or applications, please cite usin
 }
 ```

-## Acknowledgements
+## :clap: Acknowledgements

 The project is based on [Segment Anything](https://github.com/facebookresearch/segment-anything), [XMem](https://github.com/hkchengrex/XMem), and [E2FGVI](https://github.com/MCG-NKU/E2FGVI). Thanks for the authors for their efforts.
--- a/app.py
+++ b/app.py
@@ -13,10 +13,9 @@ import requests
 import json
 import torchvision
 import torch 
-from tools.interact_tools import SamControler
-from tracker.base_tracker import BaseTracker
 from tools.painter import mask_painter
 import psutil
+import time
 try: 
    from mmcv.cnn import ConvModule
 except:
@@ -82,7 +81,7 @@ def get_frames_from_video(video_input, video_state):
    """
    video_path = video_input
    frames = []
-
+    user_name = time.time()
    operation_log = [("",""),("Upload video already. Try click the image for adding targets to track and inpaint.","Normal")]
    try:
        cap = cv2.VideoCapture(video_path)
@@ -103,6 +102,7 @@ def get_frames_from_video(video_input, video_state):
    image_size = (frames[0].shape[0],frames[0].shape[1]) 
    # initialize video_state
    video_state = {
+        "user_name": user_name,
        "video_name": os.path.split(video_path)[-1],
        "origin_images": frames,
        "painted_images": frames.copy(),
@@ -375,8 +375,8 @@ folder ="./checkpoints"
 SAM_checkpoint = download_checkpoint(sam_checkpoint_url, folder, sam_checkpoint)
 xmem_checkpoint = download_checkpoint(xmem_checkpoint_url, folder, xmem_checkpoint)
 e2fgvi_checkpoint = download_checkpoint_from_google_drive(e2fgvi_checkpoint_id, folder, e2fgvi_checkpoint)
-# args.port = 12214
-# args.device = "cuda:2"
+# args.port = 12212
+# args.device = "cuda:1"
 # args.mask_save = True

 # initialize sam, xmem, e2fgvi models
@@ -409,6 +409,7 @@ with gr.Blocks() as iface:

    video_state = gr.State(
        {
+        "user_name": "",
        "video_name": "",
        "origin_images": None,
        "painted_images": None,
@@ -532,6 +533,8 @@ with gr.Blocks() as iface:
    video_input.clear(
        lambda: (
        {
+        "user_name": "",
+        "video_name": "",
        "origin_images": None,
        "painted_images": None,
        "masks": None,
@@ -593,5 +596,5 @@ with gr.Blocks() as iface:
        # cache_examples=True,
    ) 
 iface.queue(concurrency_count=1)
-# iface.launch(debug=True, enable_queue=True, server_port=args.port, server_name="0.0.0.0")
-iface.launch(debug=True, enable_queue=True)
+iface.launch(debug=True, enable_queue=True, server_port=args.port, server_name="0.0.0.0")
+# iface.launch(debug=True, enable_queue=True)
--- a/assets/color_map_with_id.png
+++ b/assets/color_map_with_id.png