Merge remote-tracking branch 'origin/master'

This commit is contained in:
zh-plus
2023-04-28 11:05:29 +08:00
3 changed files with 28 additions and 16 deletions

View File

@@ -8,7 +8,7 @@
<a src="https://img.shields.io/badge/%F0%9F%93%96-Open_in_Spaces-informational.svg?style=flat-square" href="https://arxiv.org/abs/2304.11968">
<img src="https://img.shields.io/badge/%F0%9F%93%96-Arxiv_2304.11968-red.svg?style=flat-square">
</a>
<a src="https://img.shields.io/badge/%F0%9F%A4%97-Open_in_Spaces-informational.svg?style=flat-square" href="https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=trueg">
<a src="https://img.shields.io/badge/%F0%9F%A4%97-Open_in_Spaces-informational.svg?style=flat-square" href="https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=true">
<img src="https://img.shields.io/badge/%F0%9F%A4%97-Hugging_Face_Space-informational.svg?style=flat-square">
</a>
<a src="https://img.shields.io/badge/%F0%9F%9A%80-SUSTech_VIP_Lab-important.svg?style=flat-square" href="https://zhengfenglab.com/">
@@ -31,23 +31,32 @@
- 2023/04/25: We are delighted to introduce [Caption-Anything](https://github.com/ttengwang/Caption-Anything) :writing_hand:, an inventive project from our lab that combines the capabilities of Segment Anything, Visual Captioning, and ChatGPT.
- 2023/04/20: We deployed [[DEMO]](https://huggingface.co/spaces/watchtowerss/Track-Anything?duplicate=trueg) on Hugging Face :hugs:!
## Demo
https://user-images.githubusercontent.com/28050374/232842703-8395af24-b13e-4b8e-aafb-e94b61e6c449.MP4
- 2023/04/14: We made Track-Anything public!
### Multiple Object Tracking and Segmentation (with [XMem](https://github.com/hkchengrex/XMem))
## :world_map: Video Tutorials
https://user-images.githubusercontent.com/30309970/234902447-a4c59718-fcfe-443a-bd18-2f3f775cfc13.mp4
---
### :joystick: Example - Multiple Object Tracking and Segmentation (with [XMem](https://github.com/hkchengrex/XMem))
https://user-images.githubusercontent.com/39208339/233035206-0a151004-6461-4deb-b782-d1dbfe691493.mp4
### Video Object Tracking and Segmentation with Shot Changes (with [XMem](https://github.com/hkchengrex/XMem))
---
### :joystick: Example - Video Object Tracking and Segmentation with Shot Changes (with [XMem](https://github.com/hkchengrex/XMem))
https://user-images.githubusercontent.com/30309970/232848349-f5e29e71-2ea4-4529-ac9a-94b9ca1e7055.mp4
### Video Inpainting (with [E2FGVI](https://github.com/MCG-NKU/E2FGVI))
---
### :joystick: Example - Video Inpainting (with [E2FGVI](https://github.com/MCG-NKU/E2FGVI))
https://user-images.githubusercontent.com/28050374/232959816-07f2826f-d267-4dda-8ae5-a5132173b8f4.mp4
## Get Started
## :computer: Get Started
#### Linux & Windows
```shell
# Clone the repository:
@@ -63,7 +72,7 @@ python app.py --device cuda:0
```
## Citation
## :book: Citation
If you find this work useful for your research or applications, please cite using this BibTeX:
```bibtex
@misc{yang2023track,
@@ -76,6 +85,6 @@ If you find this work useful for your research or applications, please cite usin
}
```
## Acknowledgements
## :clap: Acknowledgements
The project is based on [Segment Anything](https://github.com/facebookresearch/segment-anything), [XMem](https://github.com/hkchengrex/XMem), and [E2FGVI](https://github.com/MCG-NKU/E2FGVI). Thanks for the authors for their efforts.

17
app.py
View File

@@ -13,10 +13,9 @@ import requests
import json
import torchvision
import torch
from tools.interact_tools import SamControler
from tracker.base_tracker import BaseTracker
from tools.painter import mask_painter
import psutil
import time
try:
from mmcv.cnn import ConvModule
except:
@@ -82,7 +81,7 @@ def get_frames_from_video(video_input, video_state):
"""
video_path = video_input
frames = []
user_name = time.time()
operation_log = [("",""),("Upload video already. Try click the image for adding targets to track and inpaint.","Normal")]
try:
cap = cv2.VideoCapture(video_path)
@@ -103,6 +102,7 @@ def get_frames_from_video(video_input, video_state):
image_size = (frames[0].shape[0],frames[0].shape[1])
# initialize video_state
video_state = {
"user_name": user_name,
"video_name": os.path.split(video_path)[-1],
"origin_images": frames,
"painted_images": frames.copy(),
@@ -375,8 +375,8 @@ folder ="./checkpoints"
SAM_checkpoint = download_checkpoint(sam_checkpoint_url, folder, sam_checkpoint)
xmem_checkpoint = download_checkpoint(xmem_checkpoint_url, folder, xmem_checkpoint)
e2fgvi_checkpoint = download_checkpoint_from_google_drive(e2fgvi_checkpoint_id, folder, e2fgvi_checkpoint)
# args.port = 12214
# args.device = "cuda:2"
# args.port = 12212
# args.device = "cuda:1"
# args.mask_save = True
# initialize sam, xmem, e2fgvi models
@@ -409,6 +409,7 @@ with gr.Blocks() as iface:
video_state = gr.State(
{
"user_name": "",
"video_name": "",
"origin_images": None,
"painted_images": None,
@@ -532,6 +533,8 @@ with gr.Blocks() as iface:
video_input.clear(
lambda: (
{
"user_name": "",
"video_name": "",
"origin_images": None,
"painted_images": None,
"masks": None,
@@ -593,5 +596,5 @@ with gr.Blocks() as iface:
# cache_examples=True,
)
iface.queue(concurrency_count=1)
# iface.launch(debug=True, enable_queue=True, server_port=args.port, server_name="0.0.0.0")
iface.launch(debug=True, enable_queue=True)
iface.launch(debug=True, enable_queue=True, server_port=args.port, server_name="0.0.0.0")
# iface.launch(debug=True, enable_queue=True)

Binary file not shown.

After

Width:  |  Height:  |  Size: 93 KiB