Track-Anything/tools/interact_tools.py

import time
import torch
import cv2
from PIL import Image, ImageDraw, ImageOps
import numpy as np
from typing import Union
from segment_anything import sam_model_registry, SamPredictor, SamAutomaticMaskGenerator
import matplotlib.pyplot as plt
import PIL
from .mask_painter import mask_painter as mask_painter2
from .base_segmenter import BaseSegmenter
from .painter import mask_painter, point_painter
import os
import requests
import sys


mask_color = 3
mask_alpha = 0.7
contour_color = 1
contour_width = 5
point_color_ne = 8
point_color_ps = 50
point_alpha = 0.9
point_radius = 15
contour_color = 2
contour_width = 5


class SamControler():
    def __init__(self, SAM_checkpoint, model_type, device):
        '''
        initialize sam controler
        '''


        self.sam_controler = BaseSegmenter(SAM_checkpoint, model_type, device)


    # def seg_again(self, image: np.ndarray):
    #     '''
    #     it is used when interact in video
    #     '''
    #     self.sam_controler.reset_image()
    #     self.sam_controler.set_image(image)
    #     return


    def first_frame_click(self, image: np.ndarray, points:np.ndarray, labels: np.ndarray, multimask=True,mask_color=3):
        '''
        it is used in first frame in video
        return: mask, logit, painted image(mask+point)
        '''
        # self.sam_controler.set_image(image)
        origal_image = self.sam_controler.orignal_image
        neg_flag = labels[-1]
        if neg_flag==1:
            #find neg
            prompts = {
                'point_coords': points,
                'point_labels': labels,
            }
            masks, scores, logits = self.sam_controler.predict(prompts, 'point', multimask)
            mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]
            prompts = {
                'point_coords': points,
                'point_labels': labels,
                'mask_input': logit[None, :, :]
            }
            masks, scores, logits = self.sam_controler.predict(prompts, 'both', multimask)
            mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]
        else:
           #find positive
            prompts = {
                'point_coords': points,
                'point_labels': labels,
            }
            masks, scores, logits = self.sam_controler.predict(prompts, 'point', multimask)
            mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]


        assert len(points)==len(labels)

        painted_image = mask_painter(image, mask.astype('uint8'), mask_color, mask_alpha, contour_color, contour_width)
        painted_image = point_painter(painted_image, np.squeeze(points[np.argwhere(labels>0)],axis = 1), point_color_ne, point_alpha, point_radius, contour_color, contour_width)
        painted_image = point_painter(painted_image, np.squeeze(points[np.argwhere(labels<1)],axis = 1), point_color_ps, point_alpha, point_radius, contour_color, contour_width)
        painted_image = Image.fromarray(painted_image)

        return mask, logit, painted_image

    # def interact_loop(self, image:np.ndarray, same: bool, points:np.ndarray, labels: np.ndarray, logits: np.ndarray=None, multimask=True):
    #     origal_image = self.sam_controler.orignal_image
    #     if same:
    #         '''
    #         true; loop in the same image
    #         '''
    #         prompts = {
    #             'point_coords': points,
    #             'point_labels': labels,
    #             'mask_input': logits[None, :, :]
    #         }
    #         masks, scores, logits = self.sam_controler.predict(prompts, 'both', multimask)
    #         mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]

    #         painted_image = mask_painter(image, mask.astype('uint8'), mask_color, mask_alpha, contour_color, contour_width)
    #         painted_image = point_painter(painted_image, np.squeeze(points[np.argwhere(labels>0)],axis = 1), point_color_ne, point_alpha, point_radius, contour_color, contour_width)
    #         painted_image = point_painter(painted_image, np.squeeze(points[np.argwhere(labels<1)],axis = 1), point_color_ps, point_alpha, point_radius, contour_color, contour_width)
    #         painted_image = Image.fromarray(painted_image)

    #         return mask, logit, painted_image
    #     else:
    #         '''
    #         loop in the different image, interact in the video
    #         '''
    #         if image is None:
    #             raise('Image error')
    #         else:
    #             self.seg_again(image)
    #         prompts = {
    #             'point_coords': points,
    #             'point_labels': labels,
    #         }
    #         masks, scores, logits = self.sam_controler.predict(prompts, 'point', multimask)
    #         mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]

    #         painted_image = mask_painter(image, mask.astype('uint8'), mask_color, mask_alpha, contour_color, contour_width)
    #         painted_image = point_painter(painted_image, np.squeeze(points[np.argwhere(labels>0)],axis = 1), point_color_ne, point_alpha, point_radius, contour_color, contour_width)
    #         painted_image = point_painter(painted_image, np.squeeze(points[np.argwhere(labels<1)],axis = 1), point_color_ps, point_alpha, point_radius, contour_color, contour_width)
    #         painted_image = Image.fromarray(painted_image)

    #         return mask, logit, painted_image


# def initialize():
#     '''
#     initialize sam controler
#     '''
#     checkpoint_url = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth"
#     folder = "segmenter"
#     SAM_checkpoint= './checkpoints/sam_vit_h_4b8939.pth'
#     download_checkpoint(checkpoint_url, folder, SAM_checkpoint)


#     model_type = 'vit_h'
#     device = "cuda:0"
#     sam_controler = BaseSegmenter(SAM_checkpoint, model_type, device)
#     return sam_controler


# def seg_again(sam_controler, image: np.ndarray):
#     '''
#     it is used when interact in video
#     '''
#     sam_controler.reset_image()
#     sam_controler.set_image(image)
#     return


# def first_frame_click(sam_controler, image: np.ndarray, points:np.ndarray, labels: np.ndarray, multimask=True):
#     '''
#     it is used in first frame in video
#     return: mask, logit, painted image(mask+point)
#     '''
#     sam_controler.set_image(image)
#     prompts = {
#         'point_coords': points,
#         'point_labels': labels,
#     }
#     masks, scores, logits = sam_controler.predict(prompts, 'point', multimask)
#     mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]

#     assert len(points)==len(labels)

#     painted_image = mask_painter(image, mask.astype('uint8'), mask_color, mask_alpha, contour_color, contour_width)
#     painted_image = point_painter(painted_image, np.squeeze(points[np.argwhere(labels>0)],axis = 1), point_color_ne, point_alpha, point_radius, contour_color, contour_width)
#     painted_image = point_painter(painted_image, np.squeeze(points[np.argwhere(labels<1)],axis = 1), point_color_ps, point_alpha, point_radius, contour_color, contour_width)
#     painted_image = Image.fromarray(painted_image)

#     return mask, logit, painted_image

# def interact_loop(sam_controler, image:np.ndarray, same: bool, points:np.ndarray, labels: np.ndarray, logits: np.ndarray=None, multimask=True):
#     if same:
#         '''
#         true; loop in the same image
#         '''
#         prompts = {
#             'point_coords': points,
#             'point_labels': labels,
#             'mask_input': logits[None, :, :]
#         }
#         masks, scores, logits = sam_controler.predict(prompts, 'both', multimask)
#         mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]

#         painted_image = mask_painter(image, mask.astype('uint8'), mask_color, mask_alpha, contour_color, contour_width)
#         painted_image = point_painter(painted_image, np.squeeze(points[np.argwhere(labels>0)],axis = 1), point_color_ne, point_alpha, point_radius, contour_color, contour_width)
#         painted_image = point_painter(painted_image, np.squeeze(points[np.argwhere(labels<1)],axis = 1), point_color_ps, point_alpha, point_radius, contour_color, contour_width)
#         painted_image = Image.fromarray(painted_image)

#         return mask, logit, painted_image
#     else:
#         '''
#         loop in the different image, interact in the video
#         '''
#         if image is None:
#             raise('Image error')
#         else:
#             seg_again(sam_controler, image)
#         prompts = {
#             'point_coords': points,
#             'point_labels': labels,
#         }
#         masks, scores, logits = sam_controler.predict(prompts, 'point', multimask)
#         mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]

#         painted_image = mask_painter(image, mask.astype('uint8'), mask_color, mask_alpha, contour_color, contour_width)
#         painted_image = point_painter(painted_image, np.squeeze(points[np.argwhere(labels>0)],axis = 1), point_color_ne, point_alpha, point_radius, contour_color, contour_width)
#         painted_image = point_painter(painted_image, np.squeeze(points[np.argwhere(labels<1)],axis = 1), point_color_ps, point_alpha, point_radius, contour_color, contour_width)
#         painted_image = Image.fromarray(painted_image)

#         return mask, logit, painted_image


# if __name__ == "__main__":
#     points = np.array([[500, 375], [1125, 625]])
#     labels = np.array([1, 1])
#     image = cv2.imread('/hhd3/gaoshang/truck.jpg')
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

#     sam_controler = initialize()
#     mask, logit, painted_image_full = first_frame_click(sam_controler,image, points, labels, multimask=True)
#     painted_image = mask_painter2(image, mask.astype('uint8'), background_alpha=0.8)
#     painted_image = cv2.cvtColor(painted_image, cv2.COLOR_RGB2BGR)  # numpy array (h, w, 3)
#     cv2.imwrite('/hhd3/gaoshang/truck_point.jpg', painted_image)
#     cv2.imwrite('/hhd3/gaoshang/truck_change.jpg', image)
#     painted_image_full.save('/hhd3/gaoshang/truck_point_full.jpg')

#     mask, logit, painted_image_full = interact_loop(sam_controler,image,True, points, np.array([1, 0]), logit, multimask=True)
#     painted_image = mask_painter2(image, mask.astype('uint8'), background_alpha=0.8)
#     painted_image = cv2.cvtColor(painted_image, cv2.COLOR_RGB2BGR)  # numpy array (h, w, 3)
#     cv2.imwrite('/hhd3/gaoshang/truck_same.jpg', painted_image)
#     painted_image_full.save('/hhd3/gaoshang/truck_same_full.jpg')

#     mask, logit, painted_image_full = interact_loop(sam_controler,image, False, points, labels, multimask=True)
#     painted_image = mask_painter2(image, mask.astype('uint8'), background_alpha=0.8)
#     painted_image = cv2.cvtColor(painted_image, cv2.COLOR_RGB2BGR)  # numpy array (h, w, 3)
#     cv2.imwrite('/hhd3/gaoshang/truck_diff.jpg', painted_image)
#     painted_image_full.save('/hhd3/gaoshang/truck_diff_full.jpg')