diff --git a/README.md b/README.md index 7a3b820..aaf2847 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,27 @@ You can also use pngs to generate gif: ffmpeg -r 10 -f image2 -i output/img%d.png -s 448x256 -vf "split[s0][s1];[s0]palettegen=stats_mode=single[p];[s1][p]paletteuse=new=1" output/slomo.gif ``` +### Run in docker +Place the pre-trained models in `train_log/\*.pkl` (as above) + +Building the container: +``` +docker build -t rife -f docker/Dockerfile . +``` + +Running the container: +``` +docker run --rm -it -v $PWD:/host rife:latest inference_video --exp=1 --video=untitled.mp4 --output=untitled_rife.mp4 +``` +``` +docker run --rm -it -v $PWD:/host rife:latest inference_img --img img0.png img1.png --exp=4 +``` + +Using gpu acceleration (requires proper gpu drivers for docker): +``` +docker run --rm -it --gpus all -v /dev/dri:/dev/dri -v $PWD:/host rife:latest inference_video --exp=1 --video=untitled.mp4 --output=untitled_rife.mp4 +``` + ## Evaluation Download [RIFE model](https://drive.google.com/file/d/1c1R7iF-ypN6USo-D2YH_ORtaH3tukSlo/view?usp=sharing) or [RIFE2F1.5C model](https://drive.google.com/file/d/1ve9w-cRWotdvvbU1KcgtsSm12l-JUkeT/view?usp=sharing) reported by our paper. diff --git a/benchmark/MiddleBury_Other.py b/benchmark/MiddleBury_Other.py index 18b4e16..7c0f77f 100644 --- a/benchmark/MiddleBury_Other.py +++ b/benchmark/MiddleBury_Other.py @@ -12,7 +12,7 @@ from model.RIFE import Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Model() -model.load_model('./train_log') +model.load_model(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'train_log')) model.eval() model.device() diff --git a/benchmark/Vimeo90K.py b/benchmark/Vimeo90K.py index c984b4c..cd70e48 100644 --- a/benchmark/Vimeo90K.py +++ b/benchmark/Vimeo90K.py @@ -13,7 +13,7 @@ from model.RIFE import Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Model() -model.load_model('./train_log') +model.load_model(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'train_log')) model.eval() model.device() diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..801dbb7 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.8-slim + +# install deps +RUN apt-get update && apt-get -y install \ + bash ffmpeg + +# setup RIFE +WORKDIR /rife +COPY . . +RUN pip3 install -r requirements.txt + +ADD docker/inference_img /usr/local/bin/inference_img +RUN chmod +x /usr/local/bin/inference_img +ADD docker/inference_video /usr/local/bin/inference_video +RUN chmod +x /usr/local/bin/inference_video + +# add pre-trained models +COPY train_log /rife/train_log + +WORKDIR /host +ENTRYPOINT ["/bin/bash"] + +ENV NVIDIA_DRIVER_CAPABILITIES all \ No newline at end of file diff --git a/docker/inference_img b/docker/inference_img new file mode 100644 index 0000000..5557be4 --- /dev/null +++ b/docker/inference_img @@ -0,0 +1,2 @@ +#!/bin/sh +python3 /rife/inference_img.py $@ diff --git a/docker/inference_video b/docker/inference_video new file mode 100644 index 0000000..d718c5c --- /dev/null +++ b/docker/inference_video @@ -0,0 +1,2 @@ +#!/bin/sh +python3 /rife/inference_video.py $@ diff --git a/inference_img.py b/inference_img.py index 24a9ab0..633bd55 100644 --- a/inference_img.py +++ b/inference_img.py @@ -19,7 +19,7 @@ parser.add_argument('--exp', default=4, type=int) args = parser.parse_args() model = Model() -model.load_model('./train_log', -1) +model.load_model(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'train_log'), -1) model.eval() model.device() diff --git a/inference_video.py b/inference_video.py index 4976851..7b5d56c 100644 --- a/inference_video.py +++ b/inference_video.py @@ -27,25 +27,26 @@ def transferAudio(sourceVideo, targetVideo): os.makedirs("temp") # extract audio from video os.system("ffmpeg -y -i " + sourceVideo + " -c:a copy -vn " + tempAudioFileName) - - os.rename(targetVideo, "noAudio_"+targetVideo) + + targetNoAudio = os.path.splitext(targetVideo)[0] + "_noaudio" + os.path.splitext(targetVideo)[1] + os.rename(targetVideo, targetNoAudio) # combine audio file and new video file - os.system("ffmpeg -y -i " + "noAudio_"+targetVideo + " -i " + tempAudioFileName + " -c copy " + targetVideo) + os.system("ffmpeg -y -i " + targetNoAudio + " -i " + tempAudioFileName + " -c copy " + targetVideo) if os.path.getsize(targetVideo) == 0: # if ffmpeg failed to merge the video and audio together try converting the audio to aac tempAudioFileName = "./temp/audio.m4a" os.system("ffmpeg -y -i " + sourceVideo + " -c:a aac -b:a 160k -vn " + tempAudioFileName) - os.system("ffmpeg -y -i " + "noAudio_"+targetVideo + " -i " + tempAudioFileName + " -c copy " + targetVideo) + os.system("ffmpeg -y -i " + targetNoAudio + " -i " + tempAudioFileName + " -c copy " + targetVideo) if (os.path.getsize(targetVideo) == 0): # if aac is not supported by selected format - os.rename("noAudio_"+targetVideo, targetVideo) + os.rename(targetNoAudio, targetVideo) print("Audio transfer failed. Interpolated video will have no audio") else: print("Lossless audio transfer failed. Audio was transcoded to AAC (M4A) instead.") # remove audio-less video - os.remove("noAudio_"+targetVideo) + os.remove(targetNoAudio) else: - os.remove("noAudio_"+targetVideo) + os.remove(targetNoAudio) # remove temp directory shutil.rmtree("temp") @@ -59,6 +60,7 @@ if torch.cuda.is_available(): parser = argparse.ArgumentParser(description='Interpolation for a pair of images') parser.add_argument('--video', dest='video', type=str, default=None) +parser.add_argument('--output', dest='output', type=str, default=None) parser.add_argument('--img', dest='img', type=str, default=None) parser.add_argument('--montage', dest='montage', action='store_true', help='montage origin video') parser.add_argument('--UHD', dest='UHD', action='store_true', help='support 4k video') @@ -74,7 +76,7 @@ if not args.img is None: from model.RIFE_HD import Model model = Model() -model.load_model('./train_log', -1) +model.load_model(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'train_log'), -1) model.eval() model.device() @@ -107,12 +109,17 @@ else: lastframe = cv2.imread(os.path.join(args.img, videogen[0]))[:, :, ::-1].copy() videogen = videogen[1:] h, w, _ = lastframe.shape +vid_out_name = None vid_out = None if args.png: if not os.path.exists('vid_out'): os.mkdir('vid_out') else: - vid_out = cv2.VideoWriter('{}_{}X_{}fps.{}'.format(video_path_wo_ext, (2 ** args.exp), int(np.round(args.fps)), args.ext), fourcc, args.fps, (w, h)) + if args.output is not None: + vid_out_name = args.output + else: + vid_out_name = '{}_{}X_{}fps.{}'.format(video_path_wo_ext, (2 ** args.exp), int(np.round(args.fps)), args.ext) + vid_out = cv2.VideoWriter(vid_out_name, fourcc, args.fps, (w, h)) def clear_write_buffer(user_args, write_buffer): cnt = 0 @@ -211,9 +218,9 @@ if not vid_out is None: # move audio to new video file if appropriate if args.png == False and fpsNotAssigned == True and not args.skip and not args.video is None: - outputVideoFileName = '{}_{}X_{}fps.{}'.format(video_path_wo_ext, 2 ** args.exp, int(np.round(args.fps)), args.ext) try: - transferAudio(args.video, outputVideoFileName) + transferAudio(args.video, vid_out_name) except: print("Audio transfer failed. Interpolated video will have no audio") - os.rename("noAudio_"+outputVideoFileName, outputVideoFileName) + targetNoAudio = os.path.splitext(vid_out_name)[0] + "_noaudio" + os.path.splitext(vid_out_name)[1] + os.rename(targetNoAudio, vid_out_name)