diff --git a/.dev_scripts/ci_container_test.sh b/.dev_scripts/ci_container_test.sh index 1782693e..31556192 100644 --- a/.dev_scripts/ci_container_test.sh +++ b/.dev_scripts/ci_container_test.sh @@ -31,6 +31,7 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then python -m spacy download en_core_web_sm pip install faiss-gpu pip install healpy + pip install huggingface-hub==0.25.2 # test with install pip install . else diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index a2ecc210..1dd6c6d5 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -170,7 +170,7 @@ def pipeline(task: str = None, pipeline_props['device'] = device cfg = ConfigDict(pipeline_props) - clear_llm_info(kwargs) + clear_llm_info(kwargs, pipeline_name) if kwargs: cfg.update(kwargs) @@ -223,7 +223,7 @@ def external_engine_for_llm_checker(model: Union[str, List[str], Model, List[Model]], revision: Optional[str], kwargs: Dict[str, Any]) -> Optional[str]: - from .nlp.llm_pipeline import SWIFT_MODEL_ID_MAPPING, ModelTypeHelper, LLMAdapterRegistry + from .nlp.llm_pipeline import SWIFT_MODEL_ID_MAPPING, init_swift_model_mapping, ModelTypeHelper, LLMAdapterRegistry from ..hub.check_model import get_model_id_from_cache if isinstance(model, list): model = model[0] @@ -236,8 +236,9 @@ def external_engine_for_llm_checker(model: Union[str, List[str], Model, model_id = get_model_id_from_cache(model) else: model_id = model - global SWIFT_MODEL_ID_MAPPING - if model_id in SWIFT_MODEL_ID_MAPPING: + + init_swift_model_mapping() + if model_id.lower() in SWIFT_MODEL_ID_MAPPING: return 'llm' model_type = ModelTypeHelper.get( model, revision, with_adapter=True, split='-', use_cache=True) @@ -245,9 +246,10 @@ def external_engine_for_llm_checker(model: Union[str, List[str], Model, return 'llm' -def clear_llm_info(kwargs: Dict): +def clear_llm_info(kwargs: Dict, pipeline_name: str): from modelscope.utils.model_type_helper import ModelTypeHelper kwargs.pop('external_engine_for_llm', None) - kwargs.pop('llm_framework', None) + if pipeline_name != 'llm': + kwargs.pop('llm_framework', None) ModelTypeHelper.clear_cache() diff --git a/modelscope/pipelines/nlp/llm_pipeline.py b/modelscope/pipelines/nlp/llm_pipeline.py index c46bb46a..3199d7fa 100644 --- a/modelscope/pipelines/nlp/llm_pipeline.py +++ b/modelscope/pipelines/nlp/llm_pipeline.py @@ -33,6 +33,17 @@ SWIFT_MODEL_ID_MAPPING = {} SWIFT_FRAMEWORK = 'swift' +def init_swift_model_mapping(): + from swift.llm.utils import MODEL_MAPPING + + global SWIFT_MODEL_ID_MAPPING + if not SWIFT_MODEL_ID_MAPPING: + SWIFT_MODEL_ID_MAPPING = { + v['model_id_or_path'].lower(): k + for k, v in MODEL_MAPPING.items() + } + + class LLMAdapterRegistry: llm_format_map = {'qwen': [None, None, None]} @@ -216,14 +227,7 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): def _init_swift(self, model_id, device) -> None: from swift.llm import prepare_model_template - from swift.llm.utils import MODEL_MAPPING, InferArguments - - global SWIFT_MODEL_ID_MAPPING - if not SWIFT_MODEL_ID_MAPPING: - SWIFT_MODEL_ID_MAPPING = { - v['model_id_or_path']: k - for k, v in MODEL_MAPPING.items() - } + from swift.llm.utils import InferArguments def format_messages(messages: Dict[str, List[Dict[str, str]]], tokenizer: PreTrainedTokenizer, @@ -261,9 +265,12 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): else: return dict(system=system, prompt=prompt, history=history) - assert model_id in SWIFT_MODEL_ID_MAPPING,\ + init_swift_model_mapping() + + assert model_id.lower() in SWIFT_MODEL_ID_MAPPING,\ f'Invalid model id {model_id} or Swift framework does not support this model.' - args = InferArguments(model_type=SWIFT_MODEL_ID_MAPPING[model_id]) + args = InferArguments( + model_type=SWIFT_MODEL_ID_MAPPING[model_id.lower()]) model, template = prepare_model_template( args, device_map=self.device_map) self.model = add_stream_generate(model) diff --git a/modelscope/preprocessors/nlp/fill_mask_preprocessor.py b/modelscope/preprocessors/nlp/fill_mask_preprocessor.py index f43e03ed..c5113f35 100644 --- a/modelscope/preprocessors/nlp/fill_mask_preprocessor.py +++ b/modelscope/preprocessors/nlp/fill_mask_preprocessor.py @@ -213,11 +213,14 @@ class FillMaskPoNetPreprocessor(FillMaskPreprocessorBase): osp.join(model_dir, ModelFile.CONFIGURATION)) self.language = self.cfg.model.get('language', 'en') if self.language == 'en': - from nltk.tokenize import sent_tokenize import nltk - nltk.download('punkt_tab') - # import_external_nltk_data( - # osp.join(model_dir, 'nltk_data'), 'tokenizers/punkt_tab') + from nltk.tokenize import sent_tokenize + from packaging import version + if version.parse(nltk.__version__) >= version.parse('3.8.2'): + nltk.download('punkt_tab') + else: + import_external_nltk_data( + osp.join(model_dir, 'nltk_data'), 'tokenizers/punkt_tab') elif self.language in ['zh', 'cn']: def sent_tokenize(para): diff --git a/modelscope/utils/streaming_output.py b/modelscope/utils/streaming_output.py index 96dad20f..1b93432a 100644 --- a/modelscope/utils/streaming_output.py +++ b/modelscope/utils/streaming_output.py @@ -175,7 +175,11 @@ class PretrainedModelStreamingOutputMixin(StreamingOutputMixin): @contextmanager def _replace_generate(self, model: PreTrainedModel) -> Generator: - if version.parse(transformers.__version__) >= version.parse('4.39.0'): + if version.parse(transformers.__version__) >= version.parse('4.43.0'): + greedy_search_name = 'stream_greedy_search' + sample_name = '_sample' + elif version.parse( + transformers.__version__) >= version.parse('4.39.0'): greedy_search_name = '_greedy_search' sample_name = '_sample' else: @@ -449,6 +453,8 @@ class PretrainedModelStreamingOutputMixin(StreamingOutputMixin): break # prepare model inputs + model_kwargs = self._get_initial_cache_position( + input_ids, model_kwargs) model_inputs = self.prepare_inputs_for_generation( input_ids, **model_kwargs)