Files
modelscope/modelscope/utils/constant.py
2022-10-19 10:09:06 +08:00

354 lines
11 KiB
Python

# Copyright (c) Alibaba, Inc. and its affiliates.
import enum
class Fields(object):
""" Names for different application fields
"""
cv = 'cv'
nlp = 'nlp'
audio = 'audio'
multi_modal = 'multi-modal'
class CVTasks(object):
# ocr
ocr_detection = 'ocr-detection'
ocr_recognition = 'ocr-recognition'
# human face body related
animal_recognition = 'animal-recognition'
face_detection = 'face-detection'
card_detection = 'card-detection'
face_recognition = 'face-recognition'
facial_expression_recognition = 'facial-expression-recognition'
face_2d_keypoints = 'face-2d-keypoints'
human_detection = 'human-detection'
human_object_interaction = 'human-object-interaction'
face_image_generation = 'face-image-generation'
body_2d_keypoints = 'body-2d-keypoints'
body_3d_keypoints = 'body-3d-keypoints'
hand_2d_keypoints = 'hand-2d-keypoints'
general_recognition = 'general-recognition'
human_wholebody_keypoint = 'human-wholebody-keypoint'
image_classification = 'image-classification'
image_multilabel_classification = 'image-multilabel-classification'
image_classification_imagenet = 'image-classification-imagenet'
image_classification_dailylife = 'image-classification-dailylife'
image_object_detection = 'image-object-detection'
video_object_detection = 'video-object-detection'
image_segmentation = 'image-segmentation'
semantic_segmentation = 'semantic-segmentation'
portrait_matting = 'portrait-matting'
text_driven_segmentation = 'text-driven-segmentation'
shop_segmentation = 'shop-segmentation'
hand_static = 'hand-static'
face_human_hand_detection = 'face-human-hand-detection'
face_emotion = 'face-emotion'
product_segmentation = 'product-segmentation'
crowd_counting = 'crowd-counting'
# image editing
skin_retouching = 'skin-retouching'
image_super_resolution = 'image-super-resolution'
image_colorization = 'image-colorization'
image_color_enhancement = 'image-color-enhancement'
image_denoising = 'image-denoising'
image_portrait_enhancement = 'image-portrait-enhancement'
image_inpainting = 'image-inpainting'
# image generation
image_to_image_translation = 'image-to-image-translation'
image_to_image_generation = 'image-to-image-generation'
image_style_transfer = 'image-style-transfer'
image_portrait_stylization = 'image-portrait-stylization'
image_body_reshaping = 'image-body-reshaping'
image_embedding = 'image-embedding'
product_retrieval_embedding = 'product-retrieval-embedding'
# video recognition
live_category = 'live-category'
action_recognition = 'action-recognition'
action_detection = 'action-detection'
video_category = 'video-category'
video_embedding = 'video-embedding'
virtual_try_on = 'virtual-try-on'
movie_scene_segmentation = 'movie-scene-segmentation'
# video segmentation
referring_video_object_segmentation = 'referring-video-object-segmentation'
# video editing
video_inpainting = 'video-inpainting'
# reid and tracking
video_single_object_tracking = 'video-single-object-tracking'
video_summarization = 'video-summarization'
image_reid_person = 'image-reid-person'
class NLPTasks(object):
# nlp tasks
word_segmentation = 'word-segmentation'
part_of_speech = 'part-of-speech'
named_entity_recognition = 'named-entity-recognition'
nli = 'nli'
sentiment_classification = 'sentiment-classification'
sentiment_analysis = 'sentiment-analysis'
sentence_similarity = 'sentence-similarity'
text_classification = 'text-classification'
sentence_embedding = 'sentence-embedding'
passage_ranking = 'passage-ranking'
relation_extraction = 'relation-extraction'
zero_shot = 'zero-shot'
translation = 'translation'
token_classification = 'token-classification'
conversational = 'conversational'
text_generation = 'text-generation'
text2text_generation = 'text2text-generation'
task_oriented_conversation = 'task-oriented-conversation'
dialog_intent_prediction = 'dialog-intent-prediction'
dialog_state_tracking = 'dialog-state-tracking'
table_question_answering = 'table-question-answering'
sentence_embedding = 'sentence-embedding'
fill_mask = 'fill-mask'
summarization = 'summarization'
question_answering = 'question-answering'
zero_shot_classification = 'zero-shot-classification'
backbone = 'backbone'
text_error_correction = 'text-error-correction'
faq_question_answering = 'faq-question-answering'
conversational_text_to_sql = 'conversational-text-to-sql'
information_extraction = 'information-extraction'
document_segmentation = 'document-segmentation'
feature_extraction = 'feature-extraction'
class AudioTasks(object):
# audio tasks
auto_speech_recognition = 'auto-speech-recognition'
text_to_speech = 'text-to-speech'
speech_signal_process = 'speech-signal-process'
acoustic_echo_cancellation = 'acoustic-echo-cancellation'
acoustic_noise_suppression = 'acoustic-noise-suppression'
keyword_spotting = 'keyword-spotting'
class MultiModalTasks(object):
# multi-modal tasks
image_captioning = 'image-captioning'
visual_grounding = 'visual-grounding'
text_to_image_synthesis = 'text-to-image-synthesis'
multi_modal_embedding = 'multi-modal-embedding'
generative_multi_modal_embedding = 'generative-multi-modal-embedding'
multi_modal_similarity = 'multi-modal-similarity'
visual_question_answering = 'visual-question-answering'
visual_entailment = 'visual-entailment'
video_multi_modal_embedding = 'video-multi-modal-embedding'
image_text_retrieval = 'image-text-retrieval'
class TasksIODescriptions(object):
image_to_image = 'image_to_image',
images_to_image = 'images_to_image',
image_to_text = 'image_to_text',
seed_to_image = 'seed_to_image',
text_to_speech = 'text_to_speech',
text_to_text = 'text_to_text',
speech_to_text = 'speech_to_text',
speech_to_speech = 'speech_to_speech'
speeches_to_speech = 'speeches_to_speech',
visual_grounding = 'visual_grounding',
visual_question_answering = 'visual_question_answering',
visual_entailment = 'visual_entailment',
generative_multi_modal_embedding = 'generative_multi_modal_embedding'
class Tasks(CVTasks, NLPTasks, AudioTasks, MultiModalTasks):
""" Names for tasks supported by modelscope.
Holds the standard task name to use for identifying different tasks.
This should be used to register models, pipelines, trainers.
"""
reverse_field_index = {}
@staticmethod
def find_field_by_task(task_name):
if len(Tasks.reverse_field_index) == 0:
# Lazy init, not thread safe
field_dict = {
Fields.cv: [
getattr(Tasks, attr) for attr in dir(CVTasks)
if not attr.startswith('__')
],
Fields.nlp: [
getattr(Tasks, attr) for attr in dir(NLPTasks)
if not attr.startswith('__')
],
Fields.audio: [
getattr(Tasks, attr) for attr in dir(AudioTasks)
if not attr.startswith('__')
],
Fields.multi_modal: [
getattr(Tasks, attr) for attr in dir(MultiModalTasks)
if not attr.startswith('__')
],
}
for field, tasks in field_dict.items():
for task in tasks:
if task in Tasks.reverse_field_index:
raise ValueError(f'Duplicate task: {task}')
Tasks.reverse_field_index[task] = field
return Tasks.reverse_field_index.get(task_name)
class InputFields(object):
""" Names for input data fields in the input data for pipelines
"""
img = 'img'
text = 'text'
audio = 'audio'
class Hubs(enum.Enum):
""" Source from which an entity (such as a Dataset or Model) is stored
"""
modelscope = 'modelscope'
huggingface = 'huggingface'
class DownloadMode(enum.Enum):
""" How to treat existing datasets
"""
REUSE_DATASET_IF_EXISTS = 'reuse_dataset_if_exists'
FORCE_REDOWNLOAD = 'force_redownload'
class DatasetFormations(enum.Enum):
""" How a dataset is organized and interpreted
"""
# formation that is compatible with official huggingface dataset, which
# organizes whole dataset into one single (zip) file.
hf_compatible = 1
# native modelscope formation that supports, among other things,
# multiple files in a dataset
native = 2
DatasetMetaFormats = {
DatasetFormations.native: ['.json'],
DatasetFormations.hf_compatible: ['.py'],
}
class ModelFile(object):
CONFIGURATION = 'configuration.json'
README = 'README.md'
TF_SAVED_MODEL_FILE = 'saved_model.pb'
TF_GRAPH_FILE = 'tf_graph.pb'
TF_CHECKPOINT_FOLDER = 'tf_ckpts'
TF_CKPT_PREFIX = 'ckpt-'
TORCH_MODEL_FILE = 'pytorch_model.pt'
TORCH_MODEL_BIN_FILE = 'pytorch_model.bin'
VOCAB_FILE = 'vocab.txt'
ONNX_MODEL_FILE = 'model.onnx'
LABEL_MAPPING = 'label_mapping.json'
TRAIN_OUTPUT_DIR = 'output'
TS_MODEL_FILE = 'model.ts'
class ConfigFields(object):
""" First level keyword in configuration file
"""
framework = 'framework'
task = 'task'
pipeline = 'pipeline'
model = 'model'
dataset = 'dataset'
preprocessor = 'preprocessor'
train = 'train'
evaluation = 'evaluation'
postprocessor = 'postprocessor'
class ConfigKeys(object):
"""Fixed keywords in configuration file"""
train = 'train'
val = 'val'
class Requirements(object):
"""Requirement names for each module
"""
protobuf = 'protobuf'
sentencepiece = 'sentencepiece'
sklearn = 'sklearn'
scipy = 'scipy'
timm = 'timm'
tokenizers = 'tokenizers'
tf = 'tf'
torch = 'torch'
class Frameworks(object):
tf = 'tensorflow'
torch = 'pytorch'
kaldi = 'kaldi'
DEFAULT_MODEL_REVISION = 'master'
DEFAULT_DATASET_REVISION = 'master'
DEFAULT_DATASET_NAMESPACE = 'modelscope'
class ModeKeys:
TRAIN = 'train'
EVAL = 'eval'
INFERENCE = 'inference'
class LogKeys:
ITER = 'iter'
ITER_TIME = 'iter_time'
EPOCH = 'epoch'
LR = 'lr' # learning rate
MODE = 'mode'
DATA_LOAD_TIME = 'data_load_time'
ETA = 'eta' # estimated time of arrival
MEMORY = 'memory'
LOSS = 'loss'
class TrainerStages:
before_run = 'before_run'
before_train_epoch = 'before_train_epoch'
before_train_iter = 'before_train_iter'
after_train_iter = 'after_train_iter'
after_train_epoch = 'after_train_epoch'
before_val_epoch = 'before_val_epoch'
before_val_iter = 'before_val_iter'
after_val_iter = 'after_val_iter'
after_val_epoch = 'after_val_epoch'
after_run = 'after_run'
class ColorCodes:
MAGENTA = '\033[95m'
YELLOW = '\033[93m'
GREEN = '\033[92m'
RED = '\033[91m'
END = '\033[0m'
class Devices:
"""device used for training and inference"""
cpu = 'cpu'
gpu = 'gpu'