mirror of
https://github.com/modelscope/modelscope.git
synced 2026-02-24 12:10:09 +01:00
Merge commit 'bd1544bef0945677bfd6c0ac2b24f353f2f0817d' into feat/template
* commit 'bd1544bef0945677bfd6c0ac2b24f353f2f0817d': fix template: llava-llama-3 & yi-1.5 (#1011) add cmd line option of clear-cache (#1009) do not download pt and pth files for autoconfig, autotoknizer and generation config (#1008) Update issue templates Adapt new datasets (#1002) template and ollama in modelscope (#995) Unify dataset download log and remove tqdm disable option (#997) # Conflicts: # modelscope/preprocessors/templates/loader.py # tests/tools/test_to_ollama.py
This commit is contained in:
10
.github/ISSUE_TEMPLATE/bug_report.md
vendored
10
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -3,7 +3,7 @@ name: Bug report
|
||||
about: Create a bug report to help us improve
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: Firmament-cyou, tastelikefeet, wangxingjun778, wenmengzhou, zzclynn
|
||||
assignees: tastelikefeet, wangxingjun778, yingdachen
|
||||
|
||||
---
|
||||
|
||||
@@ -36,14 +36,14 @@ A clear and concise description of what the bug is.
|
||||
|
||||
Please @ corresponding people according to your problem:
|
||||
|
||||
Model related: @wenmengzhou @tastelikefeet
|
||||
Model related: @tastelikefeet
|
||||
|
||||
Model hub related: @liuyhwangyh
|
||||
Model hub related: @liuyhwangyh @tastelikefeet @wangxingjun778
|
||||
|
||||
Dataset releated: @wangxingjun778
|
||||
|
||||
Finetune related: @tastelikefeet @Jintao-Huang
|
||||
|
||||
Pipeline related: @Firmament-cyou @wenmengzhou
|
||||
Pipeline related: @tastelikefeet @wangxingjun778
|
||||
|
||||
Contribute your model: @zzclynn
|
||||
Contribute your model: @yingdachen
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/feature_request.md
vendored
2
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@@ -3,7 +3,7 @@ name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: tastelikefeet, wangxingjun778, wenmengzhou, yingdachen, zzclynn
|
||||
assignees: yingdachen, wangxingjun778, tastelikefeet
|
||||
|
||||
---
|
||||
|
||||
|
||||
8
.github/ISSUE_TEMPLATE/question.md
vendored
8
.github/ISSUE_TEMPLATE/question.md
vendored
@@ -3,7 +3,7 @@ name: Question
|
||||
about: Describe this issue template's purpose here.
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: zzclynn,wenmengzhou
|
||||
assignees: tastelikefeet, wangxingjun778, yingdachen
|
||||
|
||||
---
|
||||
|
||||
@@ -18,7 +18,7 @@ Before asking a question, make sure you have:
|
||||
|
||||
Please @ corresponding people according to your problem:
|
||||
|
||||
Model related: @wenmengzhou @tastelikefeet
|
||||
Model related: @tastelikefeet
|
||||
|
||||
Model hub related: @liuyhwangyh
|
||||
|
||||
@@ -26,6 +26,6 @@ Dataset releated: @wangxingjun778
|
||||
|
||||
Finetune related: @tastelikefeet @Jintao-Huang
|
||||
|
||||
Pipeline related: @Firmament-cyou @wenmengzhou
|
||||
Pipeline related: @tastelikefeet @wangxingjun778
|
||||
|
||||
Contribute your model: @zzclynn
|
||||
Contribute your model: @yingdachen
|
||||
|
||||
107
modelscope/cli/clearcache.py
Normal file
107
modelscope/cli/clearcache.py
Normal file
@@ -0,0 +1,107 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import os
|
||||
import shutil
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
from modelscope.cli.base import CLICommand
|
||||
from modelscope.hub.constants import TEMPORARY_FOLDER_NAME
|
||||
|
||||
|
||||
def subparser_func(args):
|
||||
""" Function which will be called for a specific sub parser.
|
||||
"""
|
||||
return ClearCacheCMD(args)
|
||||
|
||||
|
||||
class ClearCacheCMD(CLICommand):
|
||||
name = 'clear-cache'
|
||||
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
self.cache_dir = os.getenv(
|
||||
'MODELSCOPE_CACHE',
|
||||
Path.home().joinpath('.cache', 'modelscope'))
|
||||
|
||||
@staticmethod
|
||||
def define_args(parsers: ArgumentParser):
|
||||
""" define args for clear-cache command.
|
||||
"""
|
||||
parser = parsers.add_parser(ClearCacheCMD.name)
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument(
|
||||
'--model',
|
||||
type=str,
|
||||
help=
|
||||
'The id of the model whose cache will be cleared. For clear-cache, '
|
||||
'if neither model or dataset id is provided, entire cache will be cleared.'
|
||||
)
|
||||
group.add_argument(
|
||||
'--dataset',
|
||||
type=str,
|
||||
help=
|
||||
'The id of the dataset whose cache will be cleared. For clear-cache, '
|
||||
'if neither model or dataset id is provided, entire cache will be cleared.'
|
||||
)
|
||||
|
||||
parser.set_defaults(func=subparser_func)
|
||||
|
||||
def execute(self):
|
||||
self._execute_with_confirmation()
|
||||
|
||||
def _execute_with_confirmation(self):
|
||||
all = False
|
||||
single_model = False
|
||||
prompt = '\nYou are about to delete '
|
||||
|
||||
if self.args.model or self.args.dataset:
|
||||
if self.args.model:
|
||||
id = self.args.model
|
||||
single_model = True
|
||||
prompt = prompt + f'local cache for model {id}. '
|
||||
else:
|
||||
id = self.args.dataset
|
||||
prompt = prompt + f'local cache for dataset {id}. '
|
||||
else:
|
||||
prompt = prompt + f'entire ModelScope cache at {self.cache_dir}, including ALL models and dataset.\n'
|
||||
all = True
|
||||
user_input = input(
|
||||
prompt
|
||||
+ '\nPlease press Y or y to proceed, any other key to abort.\n'
|
||||
).strip().upper()
|
||||
|
||||
if user_input == 'Y':
|
||||
if all:
|
||||
self._remove_directory(self.cache_dir)
|
||||
print('Cache cleared.')
|
||||
else:
|
||||
entity_directory = os.path.join(
|
||||
self.cache_dir, 'hub' if single_model else 'datasets', id)
|
||||
temp_directory = os.path.join(
|
||||
self.cache_dir, 'hub' if single_model else 'datasets',
|
||||
TEMPORARY_FOLDER_NAME, id)
|
||||
entity_removed = self._remove_directory(entity_directory)
|
||||
temp_removed = self._remove_directory(temp_directory)
|
||||
if (not entity_removed) and (not temp_removed):
|
||||
if single_model:
|
||||
print(
|
||||
f'Cache for Model {id} not found. Nothing to do.')
|
||||
else:
|
||||
print(
|
||||
f'Cache for Dataset {id} not found. Nothing to do.'
|
||||
)
|
||||
else:
|
||||
print('Cache cleared.')
|
||||
else:
|
||||
print('Operation aborted.')
|
||||
return
|
||||
|
||||
def _remove_directory(self, path):
|
||||
if os.path.exists(path):
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
print(f'Cache folder {path} removed.')
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f'An error occurred while clearing cache at {path}: {e}')
|
||||
return False
|
||||
@@ -3,6 +3,7 @@
|
||||
import argparse
|
||||
import logging
|
||||
|
||||
from modelscope.cli.clearcache import ClearCacheCMD
|
||||
from modelscope.cli.download import DownloadCMD
|
||||
from modelscope.cli.login import LoginCMD
|
||||
from modelscope.cli.modelcard import ModelCardCMD
|
||||
@@ -23,6 +24,7 @@ def run_cmd():
|
||||
subparsers = parser.add_subparsers(help='modelscope commands helpers')
|
||||
|
||||
DownloadCMD.define_args(subparsers)
|
||||
ClearCacheCMD.define_args(subparsers)
|
||||
PluginsCMD.define_args(subparsers)
|
||||
PipelineCMD.define_args(subparsers)
|
||||
ModelCardCMD.define_args(subparsers)
|
||||
|
||||
@@ -555,7 +555,7 @@ def get_module_without_script(self) -> DatasetModule:
|
||||
|
||||
download_config = self.download_config.copy()
|
||||
if download_config.download_desc is None:
|
||||
download_config.download_desc = 'Downloading readme'
|
||||
download_config.download_desc = 'Downloading [README.md]'
|
||||
try:
|
||||
url_or_filename = _ms_api.get_dataset_file_url(
|
||||
file_name='README.md',
|
||||
@@ -989,7 +989,6 @@ class DatasetsWrapperHF:
|
||||
download_config=download_config,
|
||||
download_mode=download_mode,
|
||||
verification_mode=verification_mode,
|
||||
try_from_hf_gcs=False,
|
||||
num_proc=num_proc,
|
||||
storage_options=storage_options,
|
||||
# base_path=builder_instance.base_path,
|
||||
|
||||
@@ -5,27 +5,138 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import copy
|
||||
import shutil
|
||||
import time
|
||||
import warnings
|
||||
import inspect
|
||||
from contextlib import contextmanager
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union
|
||||
from urllib.parse import urljoin, urlparse
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
from datasets import config
|
||||
from datasets.utils.file_utils import hash_url_to_filename, get_authentication_headers_for_url, ftp_head, fsspec_head, \
|
||||
http_head, _raise_if_offline_mode_is_enabled, ftp_get, fsspec_get, http_get
|
||||
from datasets.utils.file_utils import hash_url_to_filename, \
|
||||
get_authentication_headers_for_url, fsspec_head, fsspec_get
|
||||
from filelock import FileLock
|
||||
|
||||
from modelscope.utils.config_ds import MS_DATASETS_CACHE
|
||||
from modelscope.utils.logger import get_logger
|
||||
from modelscope.hub.api import ModelScopeConfig
|
||||
|
||||
from modelscope import __version__
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
def get_datasets_user_agent_ms(user_agent: Optional[Union[str, dict]] = None) -> str:
|
||||
ua = f'datasets/{__version__}'
|
||||
ua += f'; python/{config.PY_VERSION}'
|
||||
ua += f'; pyarrow/{config.PYARROW_VERSION}'
|
||||
if config.TORCH_AVAILABLE:
|
||||
ua += f'; torch/{config.TORCH_VERSION}'
|
||||
if config.TF_AVAILABLE:
|
||||
ua += f'; tensorflow/{config.TF_VERSION}'
|
||||
if config.JAX_AVAILABLE:
|
||||
ua += f'; jax/{config.JAX_VERSION}'
|
||||
if isinstance(user_agent, dict):
|
||||
ua += f"; {'; '.join(f'{k}/{v}' for k, v in user_agent.items())}"
|
||||
elif isinstance(user_agent, str):
|
||||
ua += '; ' + user_agent
|
||||
return ua
|
||||
|
||||
|
||||
def _request_with_retry_ms(
|
||||
method: str,
|
||||
url: str,
|
||||
max_retries: int = 2,
|
||||
base_wait_time: float = 0.5,
|
||||
max_wait_time: float = 2,
|
||||
timeout: float = 10.0,
|
||||
**params,
|
||||
) -> requests.Response:
|
||||
"""Wrapper around requests to retry in case it fails with a ConnectTimeout, with exponential backoff.
|
||||
|
||||
Note that if the environment variable HF_DATASETS_OFFLINE is set to 1, then a OfflineModeIsEnabled error is raised.
|
||||
|
||||
Args:
|
||||
method (str): HTTP method, such as 'GET' or 'HEAD'.
|
||||
url (str): The URL of the resource to fetch.
|
||||
max_retries (int): Maximum number of retries, defaults to 0 (no retries).
|
||||
base_wait_time (float): Duration (in seconds) to wait before retrying the first time. Wait time between
|
||||
retries then grows exponentially, capped by max_wait_time.
|
||||
max_wait_time (float): Maximum amount of time between two retries, in seconds.
|
||||
**params (additional keyword arguments): Params to pass to :obj:`requests.request`.
|
||||
"""
|
||||
tries, success = 0, False
|
||||
response = None
|
||||
while not success:
|
||||
tries += 1
|
||||
try:
|
||||
response = requests.request(method=method.upper(), url=url, timeout=timeout, **params)
|
||||
success = True
|
||||
except (requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError) as err:
|
||||
if tries > max_retries:
|
||||
raise err
|
||||
else:
|
||||
logger.info(f'{method} request to {url} timed out, retrying... [{tries/max_retries}]')
|
||||
sleep_time = min(max_wait_time, base_wait_time * 2 ** (tries - 1)) # Exponential backoff
|
||||
time.sleep(sleep_time)
|
||||
return response
|
||||
|
||||
|
||||
def http_head_ms(
|
||||
url, proxies=None, headers=None, cookies=None, allow_redirects=True, timeout=10.0, max_retries=0
|
||||
) -> requests.Response:
|
||||
headers = copy.deepcopy(headers) or {}
|
||||
headers['user-agent'] = get_datasets_user_agent_ms(user_agent=headers.get('user-agent'))
|
||||
response = _request_with_retry_ms(
|
||||
method='HEAD',
|
||||
url=url,
|
||||
proxies=proxies,
|
||||
headers=headers,
|
||||
cookies=cookies,
|
||||
allow_redirects=allow_redirects,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
def http_get_ms(
|
||||
url, temp_file, proxies=None, resume_size=0, headers=None, cookies=None, timeout=100.0, max_retries=0, desc=None
|
||||
) -> Optional[requests.Response]:
|
||||
headers = dict(headers) if headers is not None else {}
|
||||
headers['user-agent'] = get_datasets_user_agent_ms(user_agent=headers.get('user-agent'))
|
||||
if resume_size > 0:
|
||||
headers['Range'] = f'bytes={resume_size:d}-'
|
||||
response = _request_with_retry_ms(
|
||||
method='GET',
|
||||
url=url,
|
||||
stream=True,
|
||||
proxies=proxies,
|
||||
headers=headers,
|
||||
cookies=cookies,
|
||||
max_retries=max_retries,
|
||||
timeout=timeout,
|
||||
)
|
||||
if temp_file is None:
|
||||
return response
|
||||
if response.status_code == 416: # Range not satisfiable
|
||||
return
|
||||
content_length = response.headers.get('Content-Length')
|
||||
total = resume_size + int(content_length) if content_length is not None else None
|
||||
|
||||
progress = tqdm(total=total, initial=resume_size, unit_scale=True, unit='B', desc=desc or 'Downloading')
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
progress.update(len(chunk))
|
||||
temp_file.write(chunk)
|
||||
|
||||
progress.close()
|
||||
|
||||
|
||||
def get_from_cache_ms(
|
||||
url,
|
||||
cache_dir=None,
|
||||
@@ -42,7 +153,7 @@ def get_from_cache_ms(
|
||||
ignore_url_params=False,
|
||||
storage_options=None,
|
||||
download_desc=None,
|
||||
disable_tqdm=False,
|
||||
disable_tqdm=None,
|
||||
) -> str:
|
||||
"""
|
||||
Given a URL, look for the corresponding file in the local cache.
|
||||
@@ -88,6 +199,8 @@ def get_from_cache_ms(
|
||||
# if we don't ask for 'force_download' then we spare a request
|
||||
filename = hash_url_to_filename(cached_url, etag=None)
|
||||
cache_path = os.path.join(cache_dir, filename)
|
||||
if download_desc is None:
|
||||
download_desc = 'Downloading [' + filename + ']'
|
||||
|
||||
if os.path.exists(cache_path) and not force_download and not use_etag:
|
||||
return cache_path
|
||||
@@ -100,16 +213,14 @@ def get_from_cache_ms(
|
||||
# We don't have the file locally or we need an eTag
|
||||
if not local_files_only:
|
||||
scheme = urlparse(url).scheme
|
||||
if scheme == 'ftp':
|
||||
connected = ftp_head(url)
|
||||
elif scheme not in ('http', 'https'):
|
||||
if scheme not in ('http', 'https'):
|
||||
response = fsspec_head(url, storage_options=storage_options)
|
||||
# s3fs uses "ETag", gcsfs uses "etag"
|
||||
etag = (response.get('ETag', None) or response.get('etag', None)) if use_etag else None
|
||||
connected = True
|
||||
try:
|
||||
cookies = ModelScopeConfig.get_cookies()
|
||||
response = http_head(
|
||||
response = http_head_ms(
|
||||
url,
|
||||
allow_redirects=True,
|
||||
proxies=proxies,
|
||||
@@ -166,7 +277,6 @@ def get_from_cache_ms(
|
||||
)
|
||||
elif response is not None and response.status_code == 404:
|
||||
raise FileNotFoundError(f"Couldn't find file at {url}")
|
||||
_raise_if_offline_mode_is_enabled(f'Tried to reach {url}')
|
||||
if head_error is not None:
|
||||
raise ConnectionError(f"Couldn't reach {url} ({repr(head_error)})")
|
||||
elif response is not None:
|
||||
@@ -205,48 +315,21 @@ def get_from_cache_ms(
|
||||
# Download to temporary file, then copy to cache path once finished.
|
||||
# Otherwise, you get corrupt cache entries if the download gets interrupted.
|
||||
with temp_file_manager() as temp_file:
|
||||
logger.info(f'Downloading to {temp_file.name}')
|
||||
|
||||
# GET file object
|
||||
if scheme == 'ftp':
|
||||
ftp_get(url, temp_file)
|
||||
elif scheme not in ('http', 'https'):
|
||||
fsspec_get_sig = inspect.signature(fsspec_get)
|
||||
if 'disable_tqdm' in fsspec_get_sig.parameters:
|
||||
fsspec_get(url,
|
||||
temp_file,
|
||||
storage_options=storage_options,
|
||||
desc=download_desc,
|
||||
disable_tqdm=disable_tqdm
|
||||
)
|
||||
else:
|
||||
fsspec_get(url, temp_file, storage_options=storage_options, desc=download_desc)
|
||||
if scheme not in ('http', 'https'):
|
||||
fsspec_get(url, temp_file, storage_options=storage_options, desc=download_desc)
|
||||
else:
|
||||
http_get_sig = inspect.signature(http_get)
|
||||
|
||||
if 'disable_tqdm' in http_get_sig.parameters:
|
||||
http_get(
|
||||
url,
|
||||
temp_file=temp_file,
|
||||
proxies=proxies,
|
||||
resume_size=resume_size,
|
||||
headers=headers,
|
||||
cookies=cookies,
|
||||
max_retries=max_retries,
|
||||
desc=download_desc,
|
||||
disable_tqdm=disable_tqdm,
|
||||
)
|
||||
else:
|
||||
http_get(
|
||||
url,
|
||||
temp_file=temp_file,
|
||||
proxies=proxies,
|
||||
resume_size=resume_size,
|
||||
headers=headers,
|
||||
cookies=cookies,
|
||||
max_retries=max_retries,
|
||||
desc=download_desc,
|
||||
)
|
||||
http_get_ms(
|
||||
url,
|
||||
temp_file=temp_file,
|
||||
proxies=proxies,
|
||||
resume_size=resume_size,
|
||||
headers=headers,
|
||||
cookies=cookies,
|
||||
max_retries=max_retries,
|
||||
desc=download_desc,
|
||||
)
|
||||
|
||||
logger.info(f'storing {url} in cache at {cache_path}')
|
||||
shutil.move(temp_file.name, cache_path)
|
||||
|
||||
@@ -83,7 +83,7 @@ template_info = [
|
||||
TemplateInfo(
|
||||
template=TemplateType.chatml,
|
||||
template_regex=
|
||||
f'.*{cases("yi")}{no_multi_modal()}{no("coder")}.*{chat_suffix}.*',
|
||||
f'.*{cases("yi")}{no_multi_modal()}{no("coder")}.*',
|
||||
modelfile_link=
|
||||
'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/yi-1.5.modelfile',
|
||||
),
|
||||
@@ -110,6 +110,10 @@ template_info = [
|
||||
'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/glm4.modelfile',
|
||||
),
|
||||
|
||||
TemplateInfo(
|
||||
template_regex=f'.*{cases("llava-llama-3")}.*',
|
||||
modelfile_link='https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/llava-llama-3.modelfile'),
|
||||
|
||||
# baichuan
|
||||
TemplateInfo(
|
||||
template=TemplateType.baichuan,
|
||||
|
||||
@@ -127,7 +127,9 @@ def _patch_pretrained_class():
|
||||
@classmethod
|
||||
def from_pretrained(cls, pretrained_model_name_or_path, *model_args,
|
||||
**kwargs):
|
||||
ignore_file_pattern = [r'\w+\.bin', r'\w+\.safetensors']
|
||||
ignore_file_pattern = [
|
||||
r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt'
|
||||
]
|
||||
model_dir = get_model_dir(pretrained_model_name_or_path,
|
||||
ignore_file_pattern, **kwargs)
|
||||
return ori_from_pretrained(cls, model_dir, *model_args, **kwargs)
|
||||
@@ -143,14 +145,18 @@ def _patch_pretrained_class():
|
||||
@classmethod
|
||||
def from_pretrained(cls, pretrained_model_name_or_path, *model_args,
|
||||
**kwargs):
|
||||
ignore_file_pattern = [r'\w+\.bin', r'\w+\.safetensors']
|
||||
ignore_file_pattern = [
|
||||
r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt'
|
||||
]
|
||||
model_dir = get_model_dir(pretrained_model_name_or_path,
|
||||
ignore_file_pattern, **kwargs)
|
||||
return ori_from_pretrained(cls, model_dir, *model_args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def get_config_dict(cls, pretrained_model_name_or_path, **kwargs):
|
||||
ignore_file_pattern = [r'\w+\.bin', r'\w+\.safetensors']
|
||||
ignore_file_pattern = [
|
||||
r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt'
|
||||
]
|
||||
model_dir = get_model_dir(pretrained_model_name_or_path,
|
||||
ignore_file_pattern, **kwargs)
|
||||
return ori_get_config_dict(cls, model_dir, **kwargs)
|
||||
@@ -242,11 +248,20 @@ AutoModelForTokenClassification = get_wrapped_class(
|
||||
AutoModelForTokenClassificationHF)
|
||||
|
||||
AutoTokenizer = get_wrapped_class(
|
||||
AutoTokenizerHF, ignore_file_pattern=[r'\w+\.bin', r'\w+\.safetensors'])
|
||||
AutoTokenizerHF,
|
||||
ignore_file_pattern=[
|
||||
r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt'
|
||||
])
|
||||
AutoConfig = get_wrapped_class(
|
||||
AutoConfigHF, ignore_file_pattern=[r'\w+\.bin', r'\w+\.safetensors'])
|
||||
AutoConfigHF,
|
||||
ignore_file_pattern=[
|
||||
r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt'
|
||||
])
|
||||
GenerationConfig = get_wrapped_class(
|
||||
GenerationConfigHF, ignore_file_pattern=[r'\w+\.bin', r'\w+\.safetensors'])
|
||||
GenerationConfigHF,
|
||||
ignore_file_pattern=[
|
||||
r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt'
|
||||
])
|
||||
GPTQConfig = GPTQConfigHF
|
||||
AwqConfig = AwqConfigHF
|
||||
BitsAndBytesConfig = BitsAndBytesConfigHF
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
addict
|
||||
attrs
|
||||
datasets>=2.18.0,<3.0.0
|
||||
datasets>=3.0.0
|
||||
einops
|
||||
oss2
|
||||
Pillow
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
addict
|
||||
attrs
|
||||
datasets>=2.18.0,<3.0.0
|
||||
datasets>=3.0.0
|
||||
einops
|
||||
oss2
|
||||
Pillow
|
||||
|
||||
@@ -44,6 +44,15 @@ class TestStreamLoad(unittest.TestCase):
|
||||
|
||||
assert sample['question'], f'Failed to load sample from {repo_id}'
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_stream_swift_jsonl(self):
|
||||
repo_id: str = 'iic/MSAgent-MultiRole'
|
||||
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
|
||||
sample = next(iter(ds))
|
||||
logger.info(sample)
|
||||
|
||||
assert sample['id'], f'Failed to load sample from {repo_id}'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -100,6 +100,11 @@ class TestToOllama(unittest.TestCase):
|
||||
ollama = TemplateLoader.to_ollama(
|
||||
'QuantFactory/Mistral-Nemo-Japanese-Instruct-2408-GGUF')
|
||||
self.assertTrue(ollama is not None)
|
||||
ollama = TemplateLoader.to_ollama('AI-ModelScope/Yi-1.5-9B-32K-GGUF')
|
||||
self.assertTrue(ollama is not None)
|
||||
ollama = TemplateLoader.to_ollama(
|
||||
'AI-ModelScope/llava-llama-3-8b-v1_1-gguf')
|
||||
self.assertTrue(ollama is not None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user