diff --git a/README.md b/README.md index 3f9016e..1f8d84e 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ - + @@ -31,23 +31,32 @@ - 2023/04/25: We are delighted to introduce [Caption-Anything](https://github.com/ttengwang/Caption-Anything) :writing_hand:, an inventive project from our lab that combines the capabilities of Segment Anything, Visual Captioning, and ChatGPT. - 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=trueg) on Hugging Face :hugs:! -## Demo -https://user-images.githubusercontent.com/28050374/232842703-8395af24-b13e-4b8e-aafb-e94b61e6c449.MP4 +- 2023/04/14: We made Track-Anything public! -### Multiple Object Tracking and Segmentation (with [XMem](https://github.com/hkchengrex/XMem)) +## :world_map: Video Tutorials + +https://user-images.githubusercontent.com/30309970/234902447-a4c59718-fcfe-443a-bd18-2f3f775cfc13.mp4 + +--- + +### :joystick: Example - Multiple Object Tracking and Segmentation (with [XMem](https://github.com/hkchengrex/XMem)) https://user-images.githubusercontent.com/39208339/233035206-0a151004-6461-4deb-b782-d1dbfe691493.mp4 -### Video Object Tracking and Segmentation with Shot Changes (with [XMem](https://github.com/hkchengrex/XMem)) +--- + +### :joystick: Example - Video Object Tracking and Segmentation with Shot Changes (with [XMem](https://github.com/hkchengrex/XMem)) https://user-images.githubusercontent.com/30309970/232848349-f5e29e71-2ea4-4529-ac9a-94b9ca1e7055.mp4 -### Video Inpainting (with [E2FGVI](https://github.com/MCG-NKU/E2FGVI)) +--- + +### :joystick: Example - Video Inpainting (with [E2FGVI](https://github.com/MCG-NKU/E2FGVI)) https://user-images.githubusercontent.com/28050374/232959816-07f2826f-d267-4dda-8ae5-a5132173b8f4.mp4 -## Get Started +## :computer: Get Started #### Linux & Windows ```shell # Clone the repository: @@ -63,7 +72,7 @@ python app.py --device cuda:0 ``` -## Citation +## :book: Citation If you find this work useful for your research or applications, please cite using this BibTeX: ```bibtex @misc{yang2023track, @@ -76,6 +85,6 @@ If you find this work useful for your research or applications, please cite usin } ``` -## Acknowledgements +## :clap: Acknowledgements The project is based on [Segment Anything](https://github.com/facebookresearch/segment-anything), [XMem](https://github.com/hkchengrex/XMem), and [E2FGVI](https://github.com/MCG-NKU/E2FGVI). Thanks for the authors for their efforts. diff --git a/app.py b/app.py index 5b66e92..0118220 100644 --- a/app.py +++ b/app.py @@ -13,10 +13,9 @@ import requests import json import torchvision import torch -from tools.interact_tools import SamControler -from tracker.base_tracker import BaseTracker from tools.painter import mask_painter import psutil +import time try: from mmcv.cnn import ConvModule except: @@ -82,7 +81,7 @@ def get_frames_from_video(video_input, video_state): """ video_path = video_input frames = [] - + user_name = time.time() operation_log = [("",""),("Upload video already. Try click the image for adding targets to track and inpaint.","Normal")] try: cap = cv2.VideoCapture(video_path) @@ -103,6 +102,7 @@ def get_frames_from_video(video_input, video_state): image_size = (frames[0].shape[0],frames[0].shape[1]) # initialize video_state video_state = { + "user_name": user_name, "video_name": os.path.split(video_path)[-1], "origin_images": frames, "painted_images": frames.copy(), @@ -375,8 +375,8 @@ folder ="./checkpoints" SAM_checkpoint = download_checkpoint(sam_checkpoint_url, folder, sam_checkpoint) xmem_checkpoint = download_checkpoint(xmem_checkpoint_url, folder, xmem_checkpoint) e2fgvi_checkpoint = download_checkpoint_from_google_drive(e2fgvi_checkpoint_id, folder, e2fgvi_checkpoint) -# args.port = 12214 -# args.device = "cuda:2" +# args.port = 12212 +# args.device = "cuda:1" # args.mask_save = True # initialize sam, xmem, e2fgvi models @@ -409,6 +409,7 @@ with gr.Blocks() as iface: video_state = gr.State( { + "user_name": "", "video_name": "", "origin_images": None, "painted_images": None, @@ -532,6 +533,8 @@ with gr.Blocks() as iface: video_input.clear( lambda: ( { + "user_name": "", + "video_name": "", "origin_images": None, "painted_images": None, "masks": None, @@ -593,5 +596,5 @@ with gr.Blocks() as iface: # cache_examples=True, ) iface.queue(concurrency_count=1) -# iface.launch(debug=True, enable_queue=True, server_port=args.port, server_name="0.0.0.0") -iface.launch(debug=True, enable_queue=True) \ No newline at end of file +iface.launch(debug=True, enable_queue=True, server_port=args.port, server_name="0.0.0.0") +# iface.launch(debug=True, enable_queue=True) \ No newline at end of file diff --git a/assets/color_map_with_id.png b/assets/color_map_with_id.png new file mode 100644 index 0000000..8a70aa9 Binary files /dev/null and b/assets/color_map_with_id.png differ