mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-25 12:39:25 +01:00
Merge branch 'master-gitlab' into master-merge-internal20231007
This commit is contained in:
@@ -1,15 +1,19 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import os
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Callable, Dict, Iterator, List, Tuple, Union
|
||||
|
||||
import json
|
||||
import torch
|
||||
from transformers import PreTrainedTokenizer
|
||||
|
||||
from modelscope import AutoTokenizer, Pipeline
|
||||
from modelscope import (AutoModelForCausalLM, AutoTokenizer, Pipeline,
|
||||
snapshot_download)
|
||||
from modelscope.models.base import Model
|
||||
from modelscope.models.nlp import ChatGLM2Tokenizer, Llama2Tokenizer
|
||||
from modelscope.pipelines.builder import PIPELINES
|
||||
from modelscope.pipelines.util import is_model, is_official_hub_path
|
||||
from modelscope.utils.constant import Invoke, Tasks
|
||||
from modelscope.utils.constant import Invoke, ModelFile, Tasks
|
||||
from modelscope.utils.logger import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
@@ -23,13 +27,24 @@ class LLMPipeline(Pipeline):
|
||||
logger.info(f'initiate model from {model}')
|
||||
if isinstance(model, str) and is_official_hub_path(model):
|
||||
logger.info(f'initiate model from location {model}.')
|
||||
return Model.from_pretrained(
|
||||
model,
|
||||
invoked_by=Invoke.PIPELINE,
|
||||
device_map=self.device_map,
|
||||
torch_dtype=self.torch_dtype,
|
||||
ignore_file_pattern=self.ignore_file_pattern) if is_model(
|
||||
model) else model
|
||||
if is_model(model):
|
||||
return Model.from_pretrained(
|
||||
model,
|
||||
invoked_by=Invoke.PIPELINE,
|
||||
device_map=self.device_map,
|
||||
torch_dtype=self.torch_dtype,
|
||||
ignore_file_pattern=self.ignore_file_pattern)
|
||||
else:
|
||||
model_dir = model if os.path.exists(
|
||||
model) else snapshot_download(model)
|
||||
# TODO: Temporary use of AutoModelForCausalLM
|
||||
# Need to be updated into a universal solution
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_dir,
|
||||
device_map=self.device_map,
|
||||
trust_remote_code=True)
|
||||
model.model_dir = model_dir
|
||||
return model
|
||||
else:
|
||||
return model
|
||||
|
||||
@@ -39,9 +54,11 @@ class LLMPipeline(Pipeline):
|
||||
tokenizer: PreTrainedTokenizer = None,
|
||||
*args,
|
||||
**kwargs):
|
||||
self.device_map = kwargs.pop('device_map', None)
|
||||
self.torch_dtype = kwargs.pop('torch_dtype', None)
|
||||
self.ignore_file_pattern = kwargs.pop('ignore_file_pattern', None)
|
||||
super().__init__(*args, **kwargs)
|
||||
with self._temp_configuration_file(kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
tokenizer_class = None
|
||||
if isinstance(format_messages, str):
|
||||
@@ -53,14 +70,9 @@ class LLMPipeline(Pipeline):
|
||||
if format_messages is None:
|
||||
model_type = self.cfg.safe_get('model.type',
|
||||
'').lower().split('-')[0]
|
||||
|
||||
if model_type in LLM_FORMAT_MAP:
|
||||
format_messages, format_output, tokenizer_class = LLM_FORMAT_MAP[
|
||||
model_type]
|
||||
else:
|
||||
raise KeyError(
|
||||
f'model type `{model_type}` is not supported for LLM pipeline!'
|
||||
)
|
||||
|
||||
if format_messages is not None:
|
||||
self.format_messages = format_messages
|
||||
@@ -69,6 +81,19 @@ class LLMPipeline(Pipeline):
|
||||
self.tokenizer = self._get_tokenizer(
|
||||
tokenizer_class) if tokenizer is None else tokenizer
|
||||
|
||||
@contextmanager
|
||||
def _temp_configuration_file(self, kwargs: Dict[str, Any]):
|
||||
kwargs['model'] = model = self.initiate_single_model(kwargs['model'])
|
||||
model_dir = model if isinstance(model, str) else model.model_dir
|
||||
configuration_path = os.path.join(model_dir, ModelFile.CONFIGURATION)
|
||||
if os.path.exists(configuration_path):
|
||||
yield
|
||||
else:
|
||||
with open(configuration_path, 'w') as f:
|
||||
json.dump({'framework': 'pytorch', 'task': 'chat'}, f)
|
||||
yield
|
||||
os.remove(configuration_path)
|
||||
|
||||
def _process_single(self, inputs, *args, **kwargs) -> Dict[str, Any]:
|
||||
preprocess_params = kwargs.get('preprocess_params', {})
|
||||
forward_params = kwargs.get('forward_params', {})
|
||||
@@ -227,7 +252,7 @@ def chatglm2_format_messages(messages, tokenizer, **kwargs):
|
||||
return prompt
|
||||
|
||||
prompt = build_chatglm2_prompt(messages, **kwargs)
|
||||
return tokenizer(prompt, return_tensors='pt')
|
||||
return tokenizer(prompt, return_token_type_ids=False, return_tensors='pt')
|
||||
|
||||
|
||||
def chatglm2_format_output(response, **kwargs):
|
||||
@@ -371,7 +396,7 @@ def wizardlm_format_messages(messages, tokenizer, **kwargs):
|
||||
return prompts
|
||||
|
||||
prompts = build_wizardlm_prompt(messages, tokenizer, **kwargs)
|
||||
return tokenizer(prompts, return_tensors='pt')
|
||||
return tokenizer(prompts, return_token_type_ids=False, return_tensors='pt')
|
||||
|
||||
|
||||
def wizardcode_format_messages(messages, tokenizer, **kwargs):
|
||||
@@ -388,7 +413,11 @@ def wizardcode_format_messages(messages, tokenizer, **kwargs):
|
||||
|
||||
prompt = system + '\n\n### Instruction:\n' + user + '\n\n### Response:'
|
||||
inputs = tokenizer(
|
||||
prompt, padding=False, add_special_tokens=False, return_tensors='pt')
|
||||
prompt,
|
||||
return_token_type_ids=False,
|
||||
padding=False,
|
||||
add_special_tokens=False,
|
||||
return_tensors='pt')
|
||||
return inputs
|
||||
|
||||
|
||||
|
||||
@@ -3,11 +3,7 @@ import unittest
|
||||
|
||||
import torch
|
||||
|
||||
from modelscope import (AutoConfig, AutoModelForCausalLM, Model,
|
||||
snapshot_download)
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.pipelines.nlp.llm_pipeline import LLMPipeline
|
||||
from modelscope.utils.constant import Tasks
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
|
||||
@@ -134,25 +130,25 @@ class LLMPipelineTest(unittest.TestCase):
|
||||
}
|
||||
self.gen_cfg = {'do_sample': True, 'max_length': 512}
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_chatglm2(self):
|
||||
pipe = LLMPipeline(model='ZhipuAI/chatglm2-6b', device_map='auto')
|
||||
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_chatglm2int4(self):
|
||||
pipe = LLMPipeline(model='ZhipuAI/chatglm2-6b-int4')
|
||||
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_chatglm232k(self):
|
||||
pipe = LLMPipeline(model='ZhipuAI/chatglm2-6b-32k', device_map='auto')
|
||||
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_llama2(self):
|
||||
pipe = LLMPipeline(
|
||||
model='modelscope/Llama-2-7b-ms',
|
||||
@@ -162,7 +158,7 @@ class LLMPipelineTest(unittest.TestCase):
|
||||
print('messages: ', pipe(self.messages_en, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_en, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_llama2chat(self):
|
||||
pipe = LLMPipeline(
|
||||
model='modelscope/Llama-2-7b-chat-ms',
|
||||
@@ -173,7 +169,7 @@ class LLMPipelineTest(unittest.TestCase):
|
||||
print('messages: ', pipe(self.messages_en, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_en, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_codellama(self):
|
||||
pipe = LLMPipeline(
|
||||
model='AI-ModelScope/CodeLlama-7b-Instruct-hf',
|
||||
@@ -183,7 +179,7 @@ class LLMPipelineTest(unittest.TestCase):
|
||||
print('messages: ', pipe(self.messages_code, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_code, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_baichuan_7b(self):
|
||||
pipe = LLMPipeline(
|
||||
model='baichuan-inc/baichuan-7B',
|
||||
@@ -192,7 +188,7 @@ class LLMPipelineTest(unittest.TestCase):
|
||||
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_baichuan_13b(self):
|
||||
pipe = LLMPipeline(
|
||||
model='baichuan-inc/Baichuan-13B-Base',
|
||||
@@ -201,7 +197,7 @@ class LLMPipelineTest(unittest.TestCase):
|
||||
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_baichuan_13bchat(self):
|
||||
pipe = LLMPipeline(
|
||||
model='baichuan-inc/Baichuan-13B-Chat',
|
||||
@@ -210,7 +206,7 @@ class LLMPipelineTest(unittest.TestCase):
|
||||
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_baichuan2_7b(self):
|
||||
pipe = LLMPipeline(
|
||||
model='baichuan-inc/Baichuan2-7B-Base',
|
||||
@@ -219,7 +215,7 @@ class LLMPipelineTest(unittest.TestCase):
|
||||
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_baichuan2_7bchat(self):
|
||||
pipe = LLMPipeline(
|
||||
model='baichuan-inc/Baichuan2-7B-Chat',
|
||||
@@ -228,7 +224,25 @@ class LLMPipelineTest(unittest.TestCase):
|
||||
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skip('Need bitsandbytes')
|
||||
def test_baichuan2_7bchat_int4(self):
|
||||
pipe = LLMPipeline(
|
||||
model='baichuan-inc/Baichuan2-7B-Chat-4bits',
|
||||
device_map='auto',
|
||||
torch_dtype=torch.float16)
|
||||
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
|
||||
|
||||
@unittest.skip('Need bitsandbytes')
|
||||
def test_baichuan2_13bchat_int4(self):
|
||||
pipe = LLMPipeline(
|
||||
model='baichuan-inc/Baichuan2-13B-Chat-4bits',
|
||||
device_map='auto',
|
||||
torch_dtype=torch.float16)
|
||||
print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_wizardlm_13b(self):
|
||||
pipe = LLMPipeline(
|
||||
model='AI-ModelScope/WizardLM-13B-V1.2',
|
||||
@@ -238,7 +252,7 @@ class LLMPipelineTest(unittest.TestCase):
|
||||
print('messages: ', pipe(self.messages_en, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_en, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_wizardmath(self):
|
||||
pipe = LLMPipeline(
|
||||
model='AI-ModelScope/WizardMath-7B-V1.0',
|
||||
@@ -248,7 +262,7 @@ class LLMPipelineTest(unittest.TestCase):
|
||||
print('messages: ', pipe(self.message_wizard_math, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_wizard_math, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_wizardcode_13b(self):
|
||||
pipe = LLMPipeline(
|
||||
model='AI-ModelScope/WizardCoder-Python-13B-V1.0',
|
||||
@@ -268,42 +282,21 @@ class LLMPipelineTest(unittest.TestCase):
|
||||
print('messages: ', pipe(self.message_wizard_code, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_wizard_code, **self.gen_cfg))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_qwen(self):
|
||||
pipe = LLMPipeline(
|
||||
model='ccyh123/Qwen-7B-Chat',
|
||||
device_map='auto',
|
||||
format_messages='qwen')
|
||||
pipe = LLMPipeline(model='qwen/Qwen-7B-Chat', device_map='auto')
|
||||
print('messages: ', pipe(self.messages_zh_with_system, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
|
||||
|
||||
@unittest.skip('Need AutoGPTQ')
|
||||
@unittest.skip('Need optimum and auto-gptq')
|
||||
def test_qwen_int4(self):
|
||||
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
||||
model_dir = snapshot_download('ccyh123/Qwen-7B-Chat-Int4')
|
||||
quantize_config = BaseQuantizeConfig(
|
||||
bits=4, # quantize model to 4-bit
|
||||
group_size=128, # it is recommended to set the value to 128
|
||||
desc_act=
|
||||
False, # set to False can significantly speed up inference but the perplexity may slightly bad
|
||||
)
|
||||
model = AutoGPTQForCausalLM.from_pretrained(
|
||||
model_dir,
|
||||
quantize_config,
|
||||
device_map='auto',
|
||||
trust_remote_code=True,
|
||||
use_safetensors=True)
|
||||
model.model_dir = model_dir
|
||||
pipe = LLMPipeline(model=model, format_messages='qwen')
|
||||
pipe = LLMPipeline(model='qwen/Qwen-7B-Chat-Int4', device_map='auto')
|
||||
print('messages: ', pipe(self.messages_zh_with_system, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
|
||||
|
||||
@unittest.skip('File does not exists configuration.json')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_qwen_vl(self):
|
||||
pipe = LLMPipeline(
|
||||
model='ccyh123/Qwen-VL-Chat',
|
||||
device_map='auto',
|
||||
format_messages='qwen')
|
||||
pipe = LLMPipeline(model='qwen/Qwen-VL-Chat', device_map='auto')
|
||||
print('messages: ', pipe(self.messages_mm, **self.gen_cfg))
|
||||
print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user