Merge tag 'v1.9.0' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into master-gitlab

release 1.9.0
This commit is contained in:
mulin.lyh
2023-09-05 17:52:56 +08:00
26 changed files with 76 additions and 63 deletions

View File

@@ -9,7 +9,7 @@ cpu_sets_arr=($cpu_sets)
is_get_file_lock=false
CI_COMMAND=${CI_COMMAND:-bash .dev_scripts/ci_container_test.sh python tests/run.py --parallel 2 --run_config tests/run_config.yaml}
echo "ci command: $CI_COMMAND"
PR_CHANGED_FILES="${PR_CHANGED_FILES:-''}"
PR_CHANGED_FILES="${PR_CHANGED_FILES:-}"
echo "PR modified files: $PR_CHANGED_FILES"
PR_CHANGED_FILES=${PR_CHANGED_FILES//[ ]/#}
echo "PR_CHANGED_FILES: $PR_CHANGED_FILES"

View File

@@ -48,10 +48,10 @@ ENV SETUPTOOLS_USE_DISTUTILS=stdlib
RUN CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6" pip install --no-cache-dir 'git+https://github.com/facebookresearch/detectron2.git'
# torchmetrics==0.11.4 for ofa
RUN pip install --no-cache-dir tiktoken torchmetrics==0.11.4 'protobuf<=3.20.0' bitsandbytes basicsr && \
git clone -b v1.0.8 https://github.com/Dao-AILab/flash-attention && \
cd flash-attention && pip install . && \
pip install csrc/layer_norm && \
pip install csrc/rotary && \
cd .. && \
rm -rf flash-attention
RUN pip install --no-cache-dir tiktoken torchmetrics==0.11.4 'transformers<4.31.0' transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr
COPY docker/scripts/install_flash_attension.sh /tmp/install_flash_attension.sh
RUN if [ "$USE_GPU" = "True" ] ; then \
bash /tmp/install_flash_attension.sh; \
else \
echo 'cpu unsupport flash attention'; \
fi

View File

@@ -0,0 +1,6 @@
git clone -b v1.0.8 https://github.com/Dao-AILab/flash-attention && \
cd flash-attention && pip install . && \
pip install csrc/layer_norm && \
pip install csrc/rotary && \
cd .. && \
rm -rf flash-attention

View File

@@ -77,7 +77,7 @@ def cfg_modify_fn(cfg):
kwargs = dict(
model=training_args.model_id,
model=training_args.model,
model_revision=args.model_revision,
work_dir=training_args.work_dir,
train_dataset=train_dataset,

View File

@@ -65,6 +65,7 @@ class MTTR(nn.Module):
# keep only the valid frames (frames which are annotated):
# (for example, in a2d-sentences only the center frame in each window is annotated).
for layer_out in backbone_out:
valid_indices = valid_indices.to(layer_out.tensors.device)
layer_out.tensors = layer_out.tensors.index_select(
0, valid_indices)
layer_out.mask = layer_out.mask.index_select(0, valid_indices)

View File

@@ -10,7 +10,8 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
from diffusers.configuration_utils import ConfigMixin, register_to_config
from diffusers.models.cross_attention import CrossAttention, LoRALinearLayer
from diffusers.models.attention_processor import Attention
from diffusers.models.lora import LoRALinearLayer
from diffusers.models.modeling_utils import ModelMixin
from diffusers.models.resnet import (Downsample2D, Upsample2D, downsample_2d,
partial, upsample_2d)
@@ -467,7 +468,7 @@ class ControlLoRACrossAttnProcessor(LoRACrossAttnProcessor):
return control_states
def __call__(self,
attn: CrossAttention,
attn: Attention,
hidden_states,
encoder_hidden_states=None,
attention_mask=None,
@@ -619,7 +620,7 @@ class ControlLoRACrossAttnProcessorV2(LoRACrossAttnProcessor):
return control_states
def __call__(self,
attn: CrossAttention,
attn: Attention,
hidden_states,
encoder_hidden_states=None,
attention_mask=None,

View File

@@ -11,7 +11,7 @@ import torch.nn.functional as F
from diffusers import (AutoencoderKL, DDPMScheduler, DiffusionPipeline,
DPMSolverMultistepScheduler, UNet2DConditionModel,
utils)
from diffusers.models import cross_attention
from diffusers.models import attention
from diffusers.utils import deprecation_utils
from swift import AdapterConfig, LoRAConfig, PromptConfig, Swift
from transformers import CLIPTextModel, CLIPTokenizer
@@ -30,7 +30,7 @@ from .control_sd_lora import ControlLoRATuner
utils.deprecate = lambda *arg, **kwargs: None
deprecation_utils.deprecate = lambda *arg, **kwargs: None
cross_attention.deprecate = lambda *arg, **kwargs: None
attention.deprecate = lambda *arg, **kwargs: None
__tuner_MAP__ = {'lora': LoRATuner, 'control_lora': ControlLoRATuner}
@@ -113,12 +113,10 @@ class EfficientStableDiffusion(TorchModel):
rank = tuner_config[
'rank'] if tuner_config and 'rank' in tuner_config else 4
lora_config = LoRAConfig(
rank=rank,
replace_modules=['to_q', 'to_k', 'to_v', 'to_out.0'],
r=rank,
target_modules=['to_q', 'to_k', 'to_v', 'to_out.0'],
merge_weights=False,
only_lora_trainable=False,
use_merged_linear=False,
pretrained_weights=pretrained_tuner)
use_merged_linear=False)
self.unet = Swift.prepare_model(self.unet, lora_config)
elif tuner_name == 'swift-adapter':
adapter_length = tuner_config[
@@ -126,10 +124,8 @@ class EfficientStableDiffusion(TorchModel):
adapter_config = AdapterConfig(
dim=-1,
hidden_pos=0,
module_name=r'.*ff\.net\.2$',
adapter_length=adapter_length,
only_adapter_trainable=False,
pretrained_weights=pretrained_tuner)
target_modules=r'.*ff\.net\.2$',
adapter_length=adapter_length)
self.unet = Swift.prepare_model(self.unet, adapter_config)
elif tuner_name == 'swift-prompt':
prompt_length = tuner_config[
@@ -139,14 +135,11 @@ class EfficientStableDiffusion(TorchModel):
320, 320, 640, 640, 1280, 1280, 1280, 1280, 1280, 640, 640,
640, 320, 320, 320
],
module_layer_name=
target_modules=
r'.*[down_blocks|up_blocks|mid_block]\.\d+\.attentions\.\d+\.transformer_blocks\.\d+$',
embedding_pos=0,
prompt_length=prompt_length,
only_prompt_trainable=False,
attach_front=False,
pretrained_weights=pretrained_tuner,
extract_embedding=True)
attach_front=False)
self.unet = Swift.prepare_model(self.unet, prompt_config)
elif tuner_name in ('lora', 'control_lora'):
# if not set the config of control-tuner, we add the lora tuner directly to the original framework,

View File

@@ -8,7 +8,8 @@ from typing import List, Tuple, Union
import torch
import torch.nn as nn
from diffusers.configuration_utils import ConfigMixin, register_to_config
from diffusers.models.cross_attention import CrossAttention, LoRALinearLayer
from diffusers.models.attention_processor import Attention
from diffusers.models.lora import LoRALinearLayer
from diffusers.models.modeling_utils import ModelMixin
from diffusers.utils.outputs import BaseOutput
@@ -84,7 +85,7 @@ class LoRACrossAttnProcessor(nn.Module):
self.output_states_skipped = is_skipped
def __call__(self,
attn: CrossAttention,
attn: Attention,
hidden_states,
encoder_hidden_states=None,
attention_mask=None,

View File

@@ -112,7 +112,7 @@ class VideoToVideo(TorchModel):
generator.eval()
load_dict = torch.load(cfg.model_path, map_location='cpu')
ret = generator.load_state_dict(load_dict['state_dict'], strict=True)
self.generator = generator
self.generator = generator.half()
logger.info('Load model {} path {}, with local status {}'.format(
cfg.UNet.type, cfg.model_path, ret))
@@ -175,7 +175,7 @@ class VideoToVideo(TorchModel):
video_data = rearrange(video_data, 'b f c h w -> (b f) c h w')
video_data_list = torch.chunk(
video_data, video_data.shape[0] // 2, dim=0)
video_data, video_data.shape[0] // 1, dim=0)
with torch.no_grad():
decode_data = []
for vd_data in video_data_list:
@@ -185,6 +185,7 @@ class VideoToVideo(TorchModel):
video_data_feature = torch.cat(decode_data, dim=0)
video_data_feature = rearrange(
video_data_feature, '(b f) c h w -> b c f h w', b=batch_size)
torch.cuda.empty_cache()
with amp.autocast(enabled=True):
total_noise_levels = 600
@@ -209,6 +210,7 @@ class VideoToVideo(TorchModel):
t_min=0,
discretization='trailing')
torch.cuda.empty_cache()
scale_factor = 0.18215
vid_tensor_feature = 1. / scale_factor * gen_vid

View File

@@ -240,7 +240,14 @@ TASK_INPUTS = {
InputType.IMAGE,
Tasks.video_embedding:
InputType.VIDEO,
Tasks.virtual_try_on: (InputType.IMAGE, InputType.IMAGE, InputType.IMAGE),
Tasks.virtual_try_on: [
(InputType.IMAGE, InputType.IMAGE, InputType.IMAGE),
{
'masked_model': InputType.IMAGE,
'pose': InputType.IMAGE,
'cloth': InputType.IMAGE,
}
],
Tasks.text_driven_segmentation: {
InputKeys.IMAGE: InputType.IMAGE,
InputKeys.TEXT: InputType.TEXT

View File

@@ -31,7 +31,7 @@ class FaceEmotionPipeline(Pipeline):
logger.info('load model done')
def preprocess(self, input: Input) -> Dict[str, Any]:
img = LoadImage.convert_to_ndarray(input['img_path'])
img = LoadImage.convert_to_ndarray(input)
return img
def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:

View File

@@ -32,14 +32,13 @@ class NanoDettForFaceHumanHandDetectionPipeline(Pipeline):
logger.info('load model done')
def preprocess(self, input: Input) -> Dict[str, Any]:
img = LoadImage.convert_to_ndarray(input['input_path'])
img = LoadImage.convert_to_ndarray(input)
return img
def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
cls_list, bbox_list, score_list = det_infer.inference(
self.model, self.device, input)
logger.info(cls_list, bbox_list, score_list)
return {
OutputKeys.LABELS: cls_list,
OutputKeys.BOXES: bbox_list,

View File

@@ -30,7 +30,7 @@ class HandStaticPipeline(Pipeline):
logger.info('load model done')
def preprocess(self, input: Input) -> Dict[str, Any]:
img = LoadImage.convert_to_ndarray(input['img_path'])
img = LoadImage.convert_to_ndarray(input)
return img
def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:

View File

@@ -31,7 +31,8 @@ class F3NetForProductSegmentationPipeline(Pipeline):
logger.info('load model done')
def preprocess(self, input: Input) -> Dict[str, Any]:
img = LoadImage.convert_to_ndarray(input['input_path'])
img = LoadImage.convert_to_ndarray(input)
img = img.astype(np.float32)
return img

View File

@@ -448,9 +448,9 @@ class SeqGPTPipeline(Pipeline):
# define the forward pass
def forward(self, prompt: str, **forward_params) -> Dict[str, Any]:
# gen & decode
prompt += '[GEN]'
# prompt += '[GEN]'
input_ids = self.tokenizer(
prompt,
prompt + '[GEN]',
return_tensors='pt',
padding=True,
truncation=True,

View File

@@ -40,7 +40,8 @@ class CustomCheckpointProcessor(CheckpointProcessor):
def __init__(self,
modifier_token,
modifier_token_id,
torch_type=torch.float32):
torch_type=torch.float32,
safe_serialization=False):
"""Checkpoint processor for custom diffusion.
Args:

View File

@@ -1,5 +1,5 @@
# Make sure to modify __release_datetime__ to release time when making official release.
__version__ = '1.8.1'
__version__ = '1.9.0'
# default release datetime for branches under active development is set
# to be a time far-far-away-into-the-future
__release_datetime__ = '2099-10-13 08:56:12'
__release_datetime__ = '2023-09-06 00:00:00'

View File

@@ -21,7 +21,7 @@ class TestExportStableDiffusion(unittest.TestCase):
os.makedirs(self.tmp_dir)
self.model_id = 'AI-ModelScope/stable-diffusion-v1-5'
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_export_stable_diffusion(self):
model = Model.from_pretrained(self.model_id)
Exporter.from_model(model).export_onnx(

View File

@@ -16,7 +16,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.efficient_diffusion_tuning
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_lora_run_pipeline(self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-lora'
model_revision = 'v1.0.2'
@@ -33,7 +33,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
f'Efficient-diffusion-tuning-swift-lora output: {output_image_path}'
)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_lora_load_model_from_pretrained(
self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-lora'
@@ -41,7 +41,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
model = Model.from_pretrained(model_id, model_revision=model_revision)
self.assertTrue(model.__class__ == EfficientStableDiffusion)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_adapter_run_pipeline(self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-adapter'
model_revision = 'v1.0.2'
@@ -58,7 +58,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
f'Efficient-diffusion-tuning-swift-adapter output: {output_image_path}'
)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_adapter_load_model_from_pretrained(
self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-adapter'
@@ -66,7 +66,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
model = Model.from_pretrained(model_id, model_revision=model_revision)
self.assertTrue(model.__class__ == EfficientStableDiffusion)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_prompt_run_pipeline(self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-prompt'
model_revision = 'v1.0.2'
@@ -83,7 +83,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
f'Efficient-diffusion-tuning-swift-prompt output: {output_image_path}'
)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_prompt_load_model_from_pretrained(
self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-prompt'

View File

@@ -11,7 +11,7 @@ class FaceEmotionTest(unittest.TestCase):
def setUp(self) -> None:
self.model = 'damo/cv_face-emotion'
self.img = {'img_path': 'data/test/images/face_emotion.jpg'}
self.img = 'data/test/images/face_emotion.jpg'
def pipeline_inference(self, pipeline: Pipeline, input: str):
result = pipeline(input)

View File

@@ -14,9 +14,7 @@ class FaceHumanHandTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_nanodet_face-human-hand-detection'
self.input = {
'input_path': 'data/test/images/face_human_hand_detection.jpg',
}
self.input = 'data/test/images/face_human_hand_detection.jpg'
def pipeline_inference(self, pipeline: Pipeline, input: str):
result = pipeline(input)

View File

@@ -11,7 +11,7 @@ class HandStaticTest(unittest.TestCase):
def setUp(self) -> None:
self.model = 'damo/cv_mobileface_hand-static'
self.input = {'img_path': 'data/test/images/hand_static.jpg'}
self.input = 'data/test/images/hand_static.jpg'
def pipeline_inference(self, pipeline: Pipeline, input: str):
result = pipeline(input)

View File

@@ -17,9 +17,7 @@ class ProductSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_F3Net_product-segmentation'
self.input = {
'input_path': 'data/test/images/product_segmentation.jpg'
}
self.input = 'data/test/images/product_segmentation.jpg'
def pipeline_inference(self, pipeline: Pipeline, input: str):
result = pipeline(input)

View File

@@ -20,7 +20,11 @@ class VirtualTryonTest(unittest.TestCase):
masked_model = Image.open('data/test/images/virtual_tryon_model.jpg')
pose = Image.open('data/test/images/virtual_tryon_pose.jpg')
cloth = Image.open('data/test/images/virtual_tryon_cloth.jpg')
input_imgs = (masked_model, pose, cloth)
input_imgs = {
'masked_model': masked_model,
'pose': pose,
'cloth': cloth,
}
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_name(self):

View File

@@ -126,11 +126,12 @@ def get_current_branch():
def get_modified_files():
if 'PR_CHANGED_FILES' in os.environ and os.environ[
'PR_CHANGED_FILES'] != '':
'PR_CHANGED_FILES'].strip() != '':
logger.info('Getting PR modified files.')
# get modify file from environment
diff_files = os.environ['PR_CHANGED_FILES'].replace('#', '\n')
else:
logger.info('Getting diff of branch.')
cmd = ['git', 'diff', '--name-only', 'origin/master...']
diff_files = run_command_get_output(cmd)
logger.info('Diff files: ')

View File

@@ -33,7 +33,7 @@ class TestEfficientDiffusionTuningTrainerSwift(unittest.TestCase):
shutil.rmtree(self.tmp_dir)
super().tearDown()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_lora_train(self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-lora'
model_revision = 'v1.0.2'
@@ -62,7 +62,7 @@ class TestEfficientDiffusionTuningTrainerSwift(unittest.TestCase):
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
self.assertIn(f'epoch_{self.max_epochs}.pth', results_files)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_adapter_train(self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-adapter'
model_revision = 'v1.0.2'
@@ -91,7 +91,7 @@ class TestEfficientDiffusionTuningTrainerSwift(unittest.TestCase):
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
self.assertIn(f'epoch_{self.max_epochs}.pth', results_files)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_prompt_train(self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-prompt'
model_revision = 'v1.0.2'