diff --git a/.dev_scripts/build_image.sh b/.dev_scripts/build_image.sh
index 5ed5adf8..f52981b2 100644
--- a/.dev_scripts/build_image.sh
+++ b/.dev_scripts/build_image.sh
@@ -159,7 +159,7 @@ docker_file_content=`cat docker/Dockerfile.ubuntu`
BUILD_HASH_ID=$(git rev-parse HEAD)
# install thrid part library
-docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$BUILD_HASH_ID && pip install --no-cache-dir -U adaseq pai-easycv && pip install --no-cache-dir -U 'ms-swift' 'decord' 'qwen_vl_utils' 'pyav' 'librosa' 'funasr' 'timm>0.9.5' 'accelerate' 'gradio' 'peft' 'optimum' 'trl' 'transformers'"
+docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$BUILD_HASH_ID && pip install --no-cache-dir -U adaseq pai-easycv && pip install --no-cache-dir -U 'git+https://github.com/modelscope/ms-swift.git@release/2.5' 'decord' 'qwen_vl_utils' 'pyav' 'librosa' 'funasr' 'timm>0.9.5' 'transformers' 'accelerate' 'gradio' 'peft' 'optimum' 'trl'"
docker_file_content="${docker_file_content} \nRUN pip uninstall modelscope -y && export COMMIT_ID=$BUILD_HASH_ID && cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $build_branch --single-branch $REPO_URL && cd modelscope && pip install . && cd / && rm -fr /tmp/modelscope && pip cache purge;"
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index 4fdf7351..f5a42ca4 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -3,7 +3,7 @@ name: Bug report
about: Create a bug report to help us improve
title: ''
labels: ''
-assignees: Firmament-cyou, tastelikefeet, wangxingjun778, wenmengzhou, zzclynn
+assignees: tastelikefeet, wangxingjun778, yingdachen
---
@@ -36,14 +36,14 @@ A clear and concise description of what the bug is.
Please @ corresponding people according to your problem:
-Model related: @wenmengzhou @tastelikefeet
+Model related: @tastelikefeet
-Model hub related: @liuyhwangyh
+Model hub related: @liuyhwangyh @tastelikefeet @wangxingjun778
Dataset releated: @wangxingjun778
Finetune related: @tastelikefeet @Jintao-Huang
-Pipeline related: @Firmament-cyou @wenmengzhou
+Pipeline related: @tastelikefeet @wangxingjun778
-Contribute your model: @zzclynn
+Contribute your model: @yingdachen
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index 0731f3c1..6eef2aa5 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -3,7 +3,7 @@ name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
-assignees: tastelikefeet, wangxingjun778, wenmengzhou, yingdachen, zzclynn
+assignees: yingdachen, wangxingjun778, tastelikefeet
---
diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md
index c7ec7256..3545e543 100644
--- a/.github/ISSUE_TEMPLATE/question.md
+++ b/.github/ISSUE_TEMPLATE/question.md
@@ -3,7 +3,7 @@ name: Question
about: Describe this issue template's purpose here.
title: ''
labels: ''
-assignees: zzclynn,wenmengzhou
+assignees: tastelikefeet, wangxingjun778, yingdachen
---
@@ -18,7 +18,7 @@ Before asking a question, make sure you have:
Please @ corresponding people according to your problem:
-Model related: @wenmengzhou @tastelikefeet
+Model related: @tastelikefeet
Model hub related: @liuyhwangyh
@@ -26,6 +26,6 @@ Dataset releated: @wangxingjun778
Finetune related: @tastelikefeet @Jintao-Huang
-Pipeline related: @Firmament-cyou @wenmengzhou
+Pipeline related: @tastelikefeet @wangxingjun778
-Contribute your model: @zzclynn
+Contribute your model: @yingdachen
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7e6e9b77..a8565f16 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,3 +1,5 @@
+exclude: 'modelscope/preprocessors/templates/'
+
repos:
- repo: https://github.com/pycqa/flake8.git
rev: 4.0.0
diff --git a/.pre-commit-config_local.yaml b/.pre-commit-config_local.yaml
index a68a5b78..869d8fd6 100644
--- a/.pre-commit-config_local.yaml
+++ b/.pre-commit-config_local.yaml
@@ -1,3 +1,5 @@
+exclude: 'modelscope/preprocessors/templates/'
+
repos:
- repo: /home/admin/pre-commit/flake8
rev: 4.0.0
diff --git a/modelscope/cli/clearcache.py b/modelscope/cli/clearcache.py
new file mode 100644
index 00000000..7b89103b
--- /dev/null
+++ b/modelscope/cli/clearcache.py
@@ -0,0 +1,107 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+from argparse import ArgumentParser
+from pathlib import Path
+
+from modelscope.cli.base import CLICommand
+from modelscope.hub.constants import TEMPORARY_FOLDER_NAME
+
+
+def subparser_func(args):
+ """ Function which will be called for a specific sub parser.
+ """
+ return ClearCacheCMD(args)
+
+
+class ClearCacheCMD(CLICommand):
+ name = 'clear-cache'
+
+ def __init__(self, args):
+ self.args = args
+ self.cache_dir = os.getenv(
+ 'MODELSCOPE_CACHE',
+ Path.home().joinpath('.cache', 'modelscope'))
+
+ @staticmethod
+ def define_args(parsers: ArgumentParser):
+ """ define args for clear-cache command.
+ """
+ parser = parsers.add_parser(ClearCacheCMD.name)
+ group = parser.add_mutually_exclusive_group()
+ group.add_argument(
+ '--model',
+ type=str,
+ help=
+ 'The id of the model whose cache will be cleared. For clear-cache, '
+ 'if neither model or dataset id is provided, entire cache will be cleared.'
+ )
+ group.add_argument(
+ '--dataset',
+ type=str,
+ help=
+ 'The id of the dataset whose cache will be cleared. For clear-cache, '
+ 'if neither model or dataset id is provided, entire cache will be cleared.'
+ )
+
+ parser.set_defaults(func=subparser_func)
+
+ def execute(self):
+ self._execute_with_confirmation()
+
+ def _execute_with_confirmation(self):
+ all = False
+ single_model = False
+ prompt = '\nYou are about to delete '
+
+ if self.args.model or self.args.dataset:
+ if self.args.model:
+ id = self.args.model
+ single_model = True
+ prompt = prompt + f'local cache for model {id}. '
+ else:
+ id = self.args.dataset
+ prompt = prompt + f'local cache for dataset {id}. '
+ else:
+ prompt = prompt + f'entire ModelScope cache at {self.cache_dir}, including ALL models and dataset.\n'
+ all = True
+ user_input = input(
+ prompt
+ + '\nPlease press Y or y to proceed, any other key to abort.\n'
+ ).strip().upper()
+
+ if user_input == 'Y':
+ if all:
+ self._remove_directory(self.cache_dir)
+ print('Cache cleared.')
+ else:
+ entity_directory = os.path.join(
+ self.cache_dir, 'hub' if single_model else 'datasets', id)
+ temp_directory = os.path.join(
+ self.cache_dir, 'hub' if single_model else 'datasets',
+ TEMPORARY_FOLDER_NAME, id)
+ entity_removed = self._remove_directory(entity_directory)
+ temp_removed = self._remove_directory(temp_directory)
+ if (not entity_removed) and (not temp_removed):
+ if single_model:
+ print(
+ f'Cache for Model {id} not found. Nothing to do.')
+ else:
+ print(
+ f'Cache for Dataset {id} not found. Nothing to do.'
+ )
+ else:
+ print('Cache cleared.')
+ else:
+ print('Operation aborted.')
+ return
+
+ def _remove_directory(self, path):
+ if os.path.exists(path):
+ try:
+ shutil.rmtree(path)
+ print(f'Cache folder {path} removed.')
+ return True
+ except Exception as e:
+ print(f'An error occurred while clearing cache at {path}: {e}')
+ return False
diff --git a/modelscope/cli/cli.py b/modelscope/cli/cli.py
index 5e3fcbfd..74fb05db 100644
--- a/modelscope/cli/cli.py
+++ b/modelscope/cli/cli.py
@@ -3,6 +3,7 @@
import argparse
import logging
+from modelscope.cli.clearcache import ClearCacheCMD
from modelscope.cli.download import DownloadCMD
from modelscope.cli.login import LoginCMD
from modelscope.cli.modelcard import ModelCardCMD
@@ -23,6 +24,7 @@ def run_cmd():
subparsers = parser.add_subparsers(help='modelscope commands helpers')
DownloadCMD.define_args(subparsers)
+ ClearCacheCMD.define_args(subparsers)
PluginsCMD.define_args(subparsers)
PipelineCMD.define_args(subparsers)
ModelCardCMD.define_args(subparsers)
diff --git a/modelscope/msdatasets/utils/hf_datasets_util.py b/modelscope/msdatasets/utils/hf_datasets_util.py
index 36204d93..5b3a8bb7 100644
--- a/modelscope/msdatasets/utils/hf_datasets_util.py
+++ b/modelscope/msdatasets/utils/hf_datasets_util.py
@@ -555,7 +555,7 @@ def get_module_without_script(self) -> DatasetModule:
download_config = self.download_config.copy()
if download_config.download_desc is None:
- download_config.download_desc = 'Downloading readme'
+ download_config.download_desc = 'Downloading [README.md]'
try:
url_or_filename = _ms_api.get_dataset_file_url(
file_name='README.md',
@@ -989,7 +989,6 @@ class DatasetsWrapperHF:
download_config=download_config,
download_mode=download_mode,
verification_mode=verification_mode,
- try_from_hf_gcs=False,
num_proc=num_proc,
storage_options=storage_options,
# base_path=builder_instance.base_path,
diff --git a/modelscope/msdatasets/utils/hf_file_utils.py b/modelscope/msdatasets/utils/hf_file_utils.py
index b2931f7e..863bb196 100644
--- a/modelscope/msdatasets/utils/hf_file_utils.py
+++ b/modelscope/msdatasets/utils/hf_file_utils.py
@@ -5,27 +5,138 @@
import json
import os
import re
+import copy
import shutil
+import time
import warnings
-import inspect
from contextlib import contextmanager
from functools import partial
from pathlib import Path
+from typing import Optional, Union
from urllib.parse import urljoin, urlparse
import requests
+from tqdm import tqdm
from datasets import config
-from datasets.utils.file_utils import hash_url_to_filename, get_authentication_headers_for_url, ftp_head, fsspec_head, \
- http_head, _raise_if_offline_mode_is_enabled, ftp_get, fsspec_get, http_get
+from datasets.utils.file_utils import hash_url_to_filename, \
+ get_authentication_headers_for_url, fsspec_head, fsspec_get
from filelock import FileLock
from modelscope.utils.config_ds import MS_DATASETS_CACHE
from modelscope.utils.logger import get_logger
from modelscope.hub.api import ModelScopeConfig
+from modelscope import __version__
+
logger = get_logger()
+def get_datasets_user_agent_ms(user_agent: Optional[Union[str, dict]] = None) -> str:
+ ua = f'datasets/{__version__}'
+ ua += f'; python/{config.PY_VERSION}'
+ ua += f'; pyarrow/{config.PYARROW_VERSION}'
+ if config.TORCH_AVAILABLE:
+ ua += f'; torch/{config.TORCH_VERSION}'
+ if config.TF_AVAILABLE:
+ ua += f'; tensorflow/{config.TF_VERSION}'
+ if config.JAX_AVAILABLE:
+ ua += f'; jax/{config.JAX_VERSION}'
+ if isinstance(user_agent, dict):
+ ua += f"; {'; '.join(f'{k}/{v}' for k, v in user_agent.items())}"
+ elif isinstance(user_agent, str):
+ ua += '; ' + user_agent
+ return ua
+
+
+def _request_with_retry_ms(
+ method: str,
+ url: str,
+ max_retries: int = 2,
+ base_wait_time: float = 0.5,
+ max_wait_time: float = 2,
+ timeout: float = 10.0,
+ **params,
+) -> requests.Response:
+ """Wrapper around requests to retry in case it fails with a ConnectTimeout, with exponential backoff.
+
+ Note that if the environment variable HF_DATASETS_OFFLINE is set to 1, then a OfflineModeIsEnabled error is raised.
+
+ Args:
+ method (str): HTTP method, such as 'GET' or 'HEAD'.
+ url (str): The URL of the resource to fetch.
+ max_retries (int): Maximum number of retries, defaults to 0 (no retries).
+ base_wait_time (float): Duration (in seconds) to wait before retrying the first time. Wait time between
+ retries then grows exponentially, capped by max_wait_time.
+ max_wait_time (float): Maximum amount of time between two retries, in seconds.
+ **params (additional keyword arguments): Params to pass to :obj:`requests.request`.
+ """
+ tries, success = 0, False
+ response = None
+ while not success:
+ tries += 1
+ try:
+ response = requests.request(method=method.upper(), url=url, timeout=timeout, **params)
+ success = True
+ except (requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError) as err:
+ if tries > max_retries:
+ raise err
+ else:
+ logger.info(f'{method} request to {url} timed out, retrying... [{tries/max_retries}]')
+ sleep_time = min(max_wait_time, base_wait_time * 2 ** (tries - 1)) # Exponential backoff
+ time.sleep(sleep_time)
+ return response
+
+
+def http_head_ms(
+ url, proxies=None, headers=None, cookies=None, allow_redirects=True, timeout=10.0, max_retries=0
+) -> requests.Response:
+ headers = copy.deepcopy(headers) or {}
+ headers['user-agent'] = get_datasets_user_agent_ms(user_agent=headers.get('user-agent'))
+ response = _request_with_retry_ms(
+ method='HEAD',
+ url=url,
+ proxies=proxies,
+ headers=headers,
+ cookies=cookies,
+ allow_redirects=allow_redirects,
+ timeout=timeout,
+ max_retries=max_retries,
+ )
+ return response
+
+
+def http_get_ms(
+ url, temp_file, proxies=None, resume_size=0, headers=None, cookies=None, timeout=100.0, max_retries=0, desc=None
+) -> Optional[requests.Response]:
+ headers = dict(headers) if headers is not None else {}
+ headers['user-agent'] = get_datasets_user_agent_ms(user_agent=headers.get('user-agent'))
+ if resume_size > 0:
+ headers['Range'] = f'bytes={resume_size:d}-'
+ response = _request_with_retry_ms(
+ method='GET',
+ url=url,
+ stream=True,
+ proxies=proxies,
+ headers=headers,
+ cookies=cookies,
+ max_retries=max_retries,
+ timeout=timeout,
+ )
+ if temp_file is None:
+ return response
+ if response.status_code == 416: # Range not satisfiable
+ return
+ content_length = response.headers.get('Content-Length')
+ total = resume_size + int(content_length) if content_length is not None else None
+
+ progress = tqdm(total=total, initial=resume_size, unit_scale=True, unit='B', desc=desc or 'Downloading')
+ for chunk in response.iter_content(chunk_size=1024):
+ progress.update(len(chunk))
+ temp_file.write(chunk)
+
+ progress.close()
+
+
def get_from_cache_ms(
url,
cache_dir=None,
@@ -42,7 +153,7 @@ def get_from_cache_ms(
ignore_url_params=False,
storage_options=None,
download_desc=None,
- disable_tqdm=False,
+ disable_tqdm=None,
) -> str:
"""
Given a URL, look for the corresponding file in the local cache.
@@ -88,6 +199,8 @@ def get_from_cache_ms(
# if we don't ask for 'force_download' then we spare a request
filename = hash_url_to_filename(cached_url, etag=None)
cache_path = os.path.join(cache_dir, filename)
+ if download_desc is None:
+ download_desc = 'Downloading [' + filename + ']'
if os.path.exists(cache_path) and not force_download and not use_etag:
return cache_path
@@ -100,16 +213,14 @@ def get_from_cache_ms(
# We don't have the file locally or we need an eTag
if not local_files_only:
scheme = urlparse(url).scheme
- if scheme == 'ftp':
- connected = ftp_head(url)
- elif scheme not in ('http', 'https'):
+ if scheme not in ('http', 'https'):
response = fsspec_head(url, storage_options=storage_options)
# s3fs uses "ETag", gcsfs uses "etag"
etag = (response.get('ETag', None) or response.get('etag', None)) if use_etag else None
connected = True
try:
cookies = ModelScopeConfig.get_cookies()
- response = http_head(
+ response = http_head_ms(
url,
allow_redirects=True,
proxies=proxies,
@@ -166,7 +277,6 @@ def get_from_cache_ms(
)
elif response is not None and response.status_code == 404:
raise FileNotFoundError(f"Couldn't find file at {url}")
- _raise_if_offline_mode_is_enabled(f'Tried to reach {url}')
if head_error is not None:
raise ConnectionError(f"Couldn't reach {url} ({repr(head_error)})")
elif response is not None:
@@ -205,48 +315,21 @@ def get_from_cache_ms(
# Download to temporary file, then copy to cache path once finished.
# Otherwise, you get corrupt cache entries if the download gets interrupted.
with temp_file_manager() as temp_file:
- logger.info(f'Downloading to {temp_file.name}')
# GET file object
- if scheme == 'ftp':
- ftp_get(url, temp_file)
- elif scheme not in ('http', 'https'):
- fsspec_get_sig = inspect.signature(fsspec_get)
- if 'disable_tqdm' in fsspec_get_sig.parameters:
- fsspec_get(url,
- temp_file,
- storage_options=storage_options,
- desc=download_desc,
- disable_tqdm=disable_tqdm
- )
- else:
- fsspec_get(url, temp_file, storage_options=storage_options, desc=download_desc)
+ if scheme not in ('http', 'https'):
+ fsspec_get(url, temp_file, storage_options=storage_options, desc=download_desc)
else:
- http_get_sig = inspect.signature(http_get)
-
- if 'disable_tqdm' in http_get_sig.parameters:
- http_get(
- url,
- temp_file=temp_file,
- proxies=proxies,
- resume_size=resume_size,
- headers=headers,
- cookies=cookies,
- max_retries=max_retries,
- desc=download_desc,
- disable_tqdm=disable_tqdm,
- )
- else:
- http_get(
- url,
- temp_file=temp_file,
- proxies=proxies,
- resume_size=resume_size,
- headers=headers,
- cookies=cookies,
- max_retries=max_retries,
- desc=download_desc,
- )
+ http_get_ms(
+ url,
+ temp_file=temp_file,
+ proxies=proxies,
+ resume_size=resume_size,
+ headers=headers,
+ cookies=cookies,
+ max_retries=max_retries,
+ desc=download_desc,
+ )
logger.info(f'storing {url} in cache at {cache_path}')
shutil.move(temp_file.name, cache_path)
diff --git a/modelscope/preprocessors/templates/__init__.py b/modelscope/preprocessors/templates/__init__.py
new file mode 100644
index 00000000..5ac1780d
--- /dev/null
+++ b/modelscope/preprocessors/templates/__init__.py
@@ -0,0 +1,2 @@
+from .base import Template, get_template
+from .template import TemplateType
diff --git a/modelscope/preprocessors/templates/base.py b/modelscope/preprocessors/templates/base.py
new file mode 100644
index 00000000..4504a4bc
--- /dev/null
+++ b/modelscope/preprocessors/templates/base.py
@@ -0,0 +1,1041 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import re
+from copy import deepcopy
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+
+import torch
+import torch.nn.functional as F
+from modelscope import get_logger
+from torch.nn import Module
+from torch.nn.utils.rnn import pad_sequence
+from transformers import PreTrainedTokenizerBase, StoppingCriteria
+from .loss_scale import loss_scale_map
+from .tools_prompt import get_tools_prompt
+from .utils import load_batch, load_image, rescale_image, fetch_one, to_device, decode_base64
+from .utils import History, Prompt, StopWords, Context, Messages
+
+logger = get_logger()
+
+DEFAULT_SYSTEM = 'You are a helpful assistant.'
+
+TEMPLATE_MAPPING: Dict[str, Dict[str, Any]] = {}
+
+
+def get_template(
+ template_type: str,
+ tokenizer: PreTrainedTokenizerBase,
+ default_system: Optional[str] = None,
+ max_length: Optional[int] = None,
+ truncation_strategy: Literal['delete', 'truncation_left'] = 'delete',
+ **kwargs,
+) -> 'Template':
+ template_info = TEMPLATE_MAPPING[template_type]
+ template = deepcopy(template_info['template'])
+ template.init_template(tokenizer, default_system, max_length, truncation_strategy, **kwargs)
+ return template
+
+
+def _findall(token_list: List[int], sub_token_list: Union[int, List[int]]) -> List[int]:
+ """Find the index of a token in the token_list."""
+ if isinstance(sub_token_list, int):
+ sub_token_list = [sub_token_list]
+ res = []
+ idx = -1
+ try:
+ while True:
+ idx = token_list.index(sub_token_list[0], idx + 1)
+ if len(sub_token_list) == 1 or sub_token_list == token_list[idx:idx + len(sub_token_list)]:
+ res.append(idx)
+ except ValueError:
+ pass
+ return res
+
+
+def replace_img_tag(messages: Messages,
+ replace_token: str,
+ pattern=r'
(.+?)') -> Tuple[str, History, List[str]]:
+ images_path = []
+ new_messages = []
+ for i, m in enumerate(messages):
+ m = m.copy()
+ if m['content'] is None or m['role'] in ('tool', 'system', 'assistant'):
+ new_messages.append(m)
+ else:
+ images_path += re.findall(pattern, m['content'])
+ m['content'] = re.sub(pattern, replace_token, m['content'])
+ new_messages.append(m)
+ return messages, images_path
+
+
+class StopWordsCriteria(StoppingCriteria):
+ """Adding extra stop words in template to prevent unstoppable generation
+ Like suffixes and chat seps in the template.
+ """
+ def __init__(self, tokenizer: PreTrainedTokenizerBase, stop_words: StopWords, **tokenizer_kwargs) -> None:
+ self.tokenizer = tokenizer
+ self.stop_words = stop_words
+ self.tokenizer_kwargs = tokenizer_kwargs
+ self.start_idx = -1
+
+ def __call__(self, input_ids: torch.Tensor, scores: torch.Tensor, **kwargs) -> bool:
+ if self.start_idx == -1:
+ self.start_idx = len(input_ids[0]) - 1
+ tokenizer = self.tokenizer
+ stop_words = self.stop_words
+ # [-20:]: Assuming the end tokens do not exceed 20 tokens,
+ # to avoid input_ids being too long and affecting efficiency.
+ text = tokenizer.decode(input_ids[0, self.start_idx:][-20:], **self.tokenizer_kwargs)
+ for stop_word in stop_words:
+ if isinstance(stop_word, str):
+ if stop_word in text:
+ return True
+ else: # list
+ if len(stop_word) > 0 and input_ids[0].tolist()[-len(stop_word):] == stop_word:
+ return True
+ return False
+
+
+class Template:
+ """A template class for all supported models.
+
+ Args:
+ prefix: Prefix tokens before the first turn's prompt
+ prompt: A list of elements whose types are str and list of integers. The input query part of every turn.
+ chat_sep: The chat separators between every turn.
+ suffix: The end tokens after the chat finished.
+ default_system: A default system instruction.
+ system_prefix: The prefix if the `system` is not empty.
+ auto_add_bos: By default, the bos_token is not added. The auto_add_bos option will determine
+ whether to add it based on `tokenizer.encode('')`.
+ tools_prompt: The tools prompt name
+ tool_prompt: The tool prompt, usually useful when there is a tool role
+ padding_side: The padding side
+ infer_media_type: The media type supported by the multi-modals
+ Examples:
+ system\nYou are a helpful assistant!\nWho are you?\nassistant:I am a robot\nWho are you?\nassistant:I am a robot # noqa
+ ----------system------------ ---query---- --response- -----chatsep----- ---query--- --response- ----suffix-----
+ ----------------------------system_prefix---------------------------- ---------------------------- prompt ------------------------------------- ---------------------------- prompt -------------------------------------
+
+ """
+
+ special_tokens = ['', '