Merge branch 'master-gitlab' into master-merge-internal20231007

This commit is contained in:
mulin.lyh
2023-10-09 17:03:13 +08:00
2 changed files with 85 additions and 63 deletions

View File

@@ -1,15 +1,19 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from contextlib import contextmanager
from typing import Any, Callable, Dict, Iterator, List, Tuple, Union
import json
import torch
from transformers import PreTrainedTokenizer
from modelscope import AutoTokenizer, Pipeline
from modelscope import (AutoModelForCausalLM, AutoTokenizer, Pipeline,
snapshot_download)
from modelscope.models.base import Model
from modelscope.models.nlp import ChatGLM2Tokenizer, Llama2Tokenizer
from modelscope.pipelines.builder import PIPELINES
from modelscope.pipelines.util import is_model, is_official_hub_path
from modelscope.utils.constant import Invoke, Tasks
from modelscope.utils.constant import Invoke, ModelFile, Tasks
from modelscope.utils.logger import get_logger
logger = get_logger()
@@ -23,13 +27,24 @@ class LLMPipeline(Pipeline):
logger.info(f'initiate model from {model}')
if isinstance(model, str) and is_official_hub_path(model):
logger.info(f'initiate model from location {model}.')
return Model.from_pretrained(
model,
invoked_by=Invoke.PIPELINE,
device_map=self.device_map,
torch_dtype=self.torch_dtype,
ignore_file_pattern=self.ignore_file_pattern) if is_model(
model) else model
if is_model(model):
return Model.from_pretrained(
model,
invoked_by=Invoke.PIPELINE,
device_map=self.device_map,
torch_dtype=self.torch_dtype,
ignore_file_pattern=self.ignore_file_pattern)
else:
model_dir = model if os.path.exists(
model) else snapshot_download(model)
# TODO: Temporary use of AutoModelForCausalLM
# Need to be updated into a universal solution
model = AutoModelForCausalLM.from_pretrained(
model_dir,
device_map=self.device_map,
trust_remote_code=True)
model.model_dir = model_dir
return model
else:
return model
@@ -39,9 +54,11 @@ class LLMPipeline(Pipeline):
tokenizer: PreTrainedTokenizer = None,
*args,
**kwargs):
self.device_map = kwargs.pop('device_map', None)
self.torch_dtype = kwargs.pop('torch_dtype', None)
self.ignore_file_pattern = kwargs.pop('ignore_file_pattern', None)
super().__init__(*args, **kwargs)
with self._temp_configuration_file(kwargs):
super().__init__(*args, **kwargs)
tokenizer_class = None
if isinstance(format_messages, str):
@@ -53,14 +70,9 @@ class LLMPipeline(Pipeline):
if format_messages is None:
model_type = self.cfg.safe_get('model.type',
'').lower().split('-')[0]
if model_type in LLM_FORMAT_MAP:
format_messages, format_output, tokenizer_class = LLM_FORMAT_MAP[
model_type]
else:
raise KeyError(
f'model type `{model_type}` is not supported for LLM pipeline!'
)
if format_messages is not None:
self.format_messages = format_messages
@@ -69,6 +81,19 @@ class LLMPipeline(Pipeline):
self.tokenizer = self._get_tokenizer(
tokenizer_class) if tokenizer is None else tokenizer
@contextmanager
def _temp_configuration_file(self, kwargs: Dict[str, Any]):
kwargs['model'] = model = self.initiate_single_model(kwargs['model'])
model_dir = model if isinstance(model, str) else model.model_dir
configuration_path = os.path.join(model_dir, ModelFile.CONFIGURATION)
if os.path.exists(configuration_path):
yield
else:
with open(configuration_path, 'w') as f:
json.dump({'framework': 'pytorch', 'task': 'chat'}, f)
yield
os.remove(configuration_path)
def _process_single(self, inputs, *args, **kwargs) -> Dict[str, Any]:
preprocess_params = kwargs.get('preprocess_params', {})
forward_params = kwargs.get('forward_params', {})
@@ -227,7 +252,7 @@ def chatglm2_format_messages(messages, tokenizer, **kwargs):
return prompt
prompt = build_chatglm2_prompt(messages, **kwargs)
return tokenizer(prompt, return_tensors='pt')
return tokenizer(prompt, return_token_type_ids=False, return_tensors='pt')
def chatglm2_format_output(response, **kwargs):
@@ -371,7 +396,7 @@ def wizardlm_format_messages(messages, tokenizer, **kwargs):
return prompts
prompts = build_wizardlm_prompt(messages, tokenizer, **kwargs)
return tokenizer(prompts, return_tensors='pt')
return tokenizer(prompts, return_token_type_ids=False, return_tensors='pt')
def wizardcode_format_messages(messages, tokenizer, **kwargs):
@@ -388,7 +413,11 @@ def wizardcode_format_messages(messages, tokenizer, **kwargs):
prompt = system + '\n\n### Instruction:\n' + user + '\n\n### Response:'
inputs = tokenizer(
prompt, padding=False, add_special_tokens=False, return_tensors='pt')
prompt,
return_token_type_ids=False,
padding=False,
add_special_tokens=False,
return_tensors='pt')
return inputs

View File

@@ -3,11 +3,7 @@ import unittest
import torch
from modelscope import (AutoConfig, AutoModelForCausalLM, Model,
snapshot_download)
from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp.llm_pipeline import LLMPipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level
@@ -134,25 +130,25 @@ class LLMPipelineTest(unittest.TestCase):
}
self.gen_cfg = {'do_sample': True, 'max_length': 512}
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_chatglm2(self):
pipe = LLMPipeline(model='ZhipuAI/chatglm2-6b', device_map='auto')
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_chatglm2int4(self):
pipe = LLMPipeline(model='ZhipuAI/chatglm2-6b-int4')
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_chatglm232k(self):
pipe = LLMPipeline(model='ZhipuAI/chatglm2-6b-32k', device_map='auto')
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_llama2(self):
pipe = LLMPipeline(
model='modelscope/Llama-2-7b-ms',
@@ -162,7 +158,7 @@ class LLMPipelineTest(unittest.TestCase):
print('messages: ', pipe(self.messages_en, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_en, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_llama2chat(self):
pipe = LLMPipeline(
model='modelscope/Llama-2-7b-chat-ms',
@@ -173,7 +169,7 @@ class LLMPipelineTest(unittest.TestCase):
print('messages: ', pipe(self.messages_en, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_en, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_codellama(self):
pipe = LLMPipeline(
model='AI-ModelScope/CodeLlama-7b-Instruct-hf',
@@ -183,7 +179,7 @@ class LLMPipelineTest(unittest.TestCase):
print('messages: ', pipe(self.messages_code, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_code, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_baichuan_7b(self):
pipe = LLMPipeline(
model='baichuan-inc/baichuan-7B',
@@ -192,7 +188,7 @@ class LLMPipelineTest(unittest.TestCase):
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_baichuan_13b(self):
pipe = LLMPipeline(
model='baichuan-inc/Baichuan-13B-Base',
@@ -201,7 +197,7 @@ class LLMPipelineTest(unittest.TestCase):
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_baichuan_13bchat(self):
pipe = LLMPipeline(
model='baichuan-inc/Baichuan-13B-Chat',
@@ -210,7 +206,7 @@ class LLMPipelineTest(unittest.TestCase):
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_baichuan2_7b(self):
pipe = LLMPipeline(
model='baichuan-inc/Baichuan2-7B-Base',
@@ -219,7 +215,7 @@ class LLMPipelineTest(unittest.TestCase):
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_baichuan2_7bchat(self):
pipe = LLMPipeline(
model='baichuan-inc/Baichuan2-7B-Chat',
@@ -228,7 +224,25 @@ class LLMPipelineTest(unittest.TestCase):
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skip('Need bitsandbytes')
def test_baichuan2_7bchat_int4(self):
pipe = LLMPipeline(
model='baichuan-inc/Baichuan2-7B-Chat-4bits',
device_map='auto',
torch_dtype=torch.float16)
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
@unittest.skip('Need bitsandbytes')
def test_baichuan2_13bchat_int4(self):
pipe = LLMPipeline(
model='baichuan-inc/Baichuan2-13B-Chat-4bits',
device_map='auto',
torch_dtype=torch.float16)
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_wizardlm_13b(self):
pipe = LLMPipeline(
model='AI-ModelScope/WizardLM-13B-V1.2',
@@ -238,7 +252,7 @@ class LLMPipelineTest(unittest.TestCase):
print('messages: ', pipe(self.messages_en, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_en, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_wizardmath(self):
pipe = LLMPipeline(
model='AI-ModelScope/WizardMath-7B-V1.0',
@@ -248,7 +262,7 @@ class LLMPipelineTest(unittest.TestCase):
print('messages: ', pipe(self.message_wizard_math, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_wizard_math, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_wizardcode_13b(self):
pipe = LLMPipeline(
model='AI-ModelScope/WizardCoder-Python-13B-V1.0',
@@ -268,42 +282,21 @@ class LLMPipelineTest(unittest.TestCase):
print('messages: ', pipe(self.message_wizard_code, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_wizard_code, **self.gen_cfg))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_qwen(self):
pipe = LLMPipeline(
model='ccyh123/Qwen-7B-Chat',
device_map='auto',
format_messages='qwen')
pipe = LLMPipeline(model='qwen/Qwen-7B-Chat', device_map='auto')
print('messages: ', pipe(self.messages_zh_with_system, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
@unittest.skip('Need AutoGPTQ')
@unittest.skip('Need optimum and auto-gptq')
def test_qwen_int4(self):
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
model_dir = snapshot_download('ccyh123/Qwen-7B-Chat-Int4')
quantize_config = BaseQuantizeConfig(
bits=4, # quantize model to 4-bit
group_size=128, # it is recommended to set the value to 128
desc_act=
False, # set to False can significantly speed up inference but the perplexity may slightly bad
)
model = AutoGPTQForCausalLM.from_pretrained(
model_dir,
quantize_config,
device_map='auto',
trust_remote_code=True,
use_safetensors=True)
model.model_dir = model_dir
pipe = LLMPipeline(model=model, format_messages='qwen')
pipe = LLMPipeline(model='qwen/Qwen-7B-Chat-Int4', device_map='auto')
print('messages: ', pipe(self.messages_zh_with_system, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
@unittest.skip('File does not exists configuration.json')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_qwen_vl(self):
pipe = LLMPipeline(
model='ccyh123/Qwen-VL-Chat',
device_map='auto',
format_messages='qwen')
pipe = LLMPipeline(model='qwen/Qwen-VL-Chat', device_map='auto')
print('messages: ', pipe(self.messages_mm, **self.gen_cfg))
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))