Merge branch 'release/1.32' into build_swift_image

This commit is contained in:
Jintao Huang
2025-12-14 16:33:58 +08:00
29 changed files with 1321 additions and 307 deletions

View File

@@ -22,7 +22,7 @@ do
echo "get gpu lock $gpu"
CONTAINER_NAME="modelscope-ci-$idx"
let is_get_file_lock=true
is_get_file_lock=true
# pull image if there are update
docker pull ${IMAGE_NAME}:${IMAGE_VERSION}

View File

@@ -54,7 +54,7 @@ jobs:
- name: Checkout
uses: actions/checkout@v3
with:
lfs: 'true'
lfs: 'false'
submodules: 'true'
fetch-depth: ${{ github.event_name == 'pull_request' && 2 || 0 }}
- name: Get changed files
@@ -65,8 +65,9 @@ jobs:
else
echo "PR_CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.event.after }} | xargs)" >> $GITHUB_ENV
fi
- name: Checkout LFS objects
run: git lfs checkout
- name: Fetch LFS objects
run: |
git lfs pull
- name: Run unittest
shell: bash
run: |

View File

@@ -24,12 +24,16 @@ jobs:
sudo chown -R $USER:$USER $ACTION_RUNNER_DIR
- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@v4
with:
lfs: 'true'
submodules: 'true'
- name: Checkout LFS objects
run: git lfs checkout
lfs: false
submodules: true
fetch-depth: 0
- name: Fetch LFS objects
run: |
git lfs pull
- name: Run unittest
shell: bash
run: |

View File

@@ -22,7 +22,9 @@ jobs:
- name: Install wheel
run: pip install wheel && pip install -r requirements/framework.txt
- name: Build ModelScope
run: python setup.py sdist bdist_wheel
# Build AST template before packaging
run: |
make whl
- name: Publish package to PyPI
run: |
pip install twine

View File

@@ -1,3 +1,3 @@
recursive-include modelscope/configs *.py *.cu *.h *.cpp
recursive-include modelscope/ops *.py *.cu *.h *.cpp
recursive-include modelscope/cli/template *.tpl
recursive-include modelscope/utils *.json

View File

@@ -18,7 +18,8 @@ test:
.PHONY: whl
whl:
python setup.py sdist bdist_wheel
python -c "from modelscope.utils.ast_utils import generate_ast_template; generate_ast_template()"
python setup.py sdist --dist-dir $(WHL_BUILD_DIR)/dist bdist_wheel --dist-dir $(WHL_BUILD_DIR)/dist
.PHONY: clean
clean:

View File

@@ -16,6 +16,7 @@ from modelscope.cli.server import ServerCMD
from modelscope.cli.upload import UploadCMD
from modelscope.hub.constants import MODELSCOPE_ASCII
from modelscope.utils.logger import get_logger
from modelscope.version import __version__
logger = get_logger(log_level=logging.WARNING)
@@ -24,6 +25,11 @@ def run_cmd():
print(MODELSCOPE_ASCII)
parser = argparse.ArgumentParser(
'ModelScope Command Line tool', usage='modelscope <command> [<args>]')
parser.add_argument(
'-V',
'--version',
action='version',
version=f'ModelScope CLI {__version__}')
parser.add_argument(
'--token', default=None, help='Specify ModelScope SDK token.')
subparsers = parser.add_subparsers(help='modelscope commands helpers')

View File

@@ -122,6 +122,13 @@ class CreateCMD(CLICommand):
type=str,
default='',
help='Path in the repository to upload to.')
aigc_group.add_argument(
'--model_source',
type=str,
default='USER_UPLOAD',
help=
'Source of the AIGC model. `USER_UPLOAD`, `TRAINED_FROM_MODELSCOPE` or `TRAINED_FROM_ALIYUN_FC`.'
)
parser.set_defaults(func=subparser_func)
@@ -179,10 +186,11 @@ class CreateCMD(CLICommand):
model_path=self.args.model_path,
aigc_type=self.args.aigc_type,
base_model_type=self.args.base_model_type,
revision=self.args.revision,
tag=self.args.revision,
description=self.args.description,
base_model_id=self.args.base_model_id,
path_in_repo=self.args.path_in_repo,
model_source=self.args.model_source,
)
# Convert visibility string to int for the API call

View File

@@ -1 +1,2 @@
from .callback import ProgressCallback
from .commit_scheduler import CommitScheduler

View File

@@ -54,8 +54,8 @@ from modelscope.hub.constants import (API_HTTP_CLIENT_MAX_RETRIES,
UPLOAD_MAX_FILE_SIZE,
UPLOAD_NORMAL_FILE_SIZE_TOTAL_LIMIT,
UPLOAD_SIZE_THRESHOLD_TO_ENFORCE_LFS,
DatasetVisibility, Licenses,
ModelVisibility, Visibility,
VALID_SORT_KEYS, DatasetVisibility,
Licenses, ModelVisibility, Visibility,
VisibilityMap)
from modelscope.hub.errors import (InvalidParameter, NotExistError,
NotLoginException, RequestError,
@@ -303,6 +303,7 @@ class HubApi:
'WeightsSize': aigc_model.weight_size,
'ModelPath': aigc_model.model_path,
'TriggerWords': aigc_model.trigger_words,
'ModelSource': aigc_model.model_source,
})
if aigc_model.official_tags:
@@ -912,6 +913,75 @@ class HubApi:
raise_for_http_status(r)
return None
def list_datasets(self,
owner_or_group: str,
*,
page_number: Optional[int] = 1,
page_size: Optional[int] = 10,
sort: Optional[str] = None,
search: Optional[str] = None,
endpoint: Optional[str] = None,
) -> dict:
"""List datasets via OpenAPI with pagination, filtering and sorting.
Args:
owner_or_group (str): Search by dataset authors (including organizations and individuals).
page_number (int, optional): The page number. Defaults to 1.
page_size (int, optional): The page size. Defaults to 10.
sort (str, optional): Sort key. If not provided, the server's default sorting is used.
choose from ['default', 'downloads', 'likes', 'last_modified'].
search (str, optional): Search by substring keywords in the dataset's Chinese name,
English name, and authors (including organizations and individuals).
endpoint (str, optional): Hub endpoint to use. When None, use the endpoint specified in the class.
Returns:
dict: The OpenAPI data payload, e.g.
{
"datasets": [...],
"total_count": int,
"page_number": int,
"page_size": int
}
"""
if not endpoint:
endpoint = self.endpoint
path = f'{endpoint}/openapi/v1/datasets'
# Build query params
params: Dict[str, Any] = {
'page_number': page_number,
'page_size': page_size,
}
if sort:
if sort not in VALID_SORT_KEYS:
raise InvalidParameter(
f'Invalid sort key: {sort}. Supported sort keys: {list(VALID_SORT_KEYS)}')
params['sort'] = sort
if search:
params['search'] = search
if owner_or_group:
params['author'] = owner_or_group
cookies = ModelScopeConfig.get_cookies()
headers = self.builder_headers(self.headers)
r = self.session.get(
path,
params=params,
cookies=cookies,
headers=headers
)
raise_for_http_status(r)
resp = r.json()
# OpenAPI success schema
if resp.get('success') is True and 'data' in resp:
return resp['data']
else:
# Fallback for unexpected schema
msg = resp.get('message') or 'Failed to list datasets'
raise RequestError(msg)
def _check_cookie(self, use_cookies: Union[bool, CookieJar] = False) -> CookieJar: # noqa
cookies = None
if isinstance(use_cookies, CookieJar):
@@ -1238,17 +1308,6 @@ class HubApi:
logger.info(f'Create dataset success: {dataset_repo_url}')
return dataset_repo_url
def list_datasets(self, endpoint: Optional[str] = None):
if not endpoint:
endpoint = self.endpoint
path = f'{endpoint}/api/v1/datasets'
params = {}
r = self.session.get(path, params=params,
headers=self.builder_headers(self.headers))
raise_for_http_status(r)
dataset_list = r.json()[API_RESPONSE_FIELD_DATA]
return [x['Name'] for x in dataset_list]
def delete_dataset(self, dataset_id: str, endpoint: Optional[str] = None):
cookies = ModelScopeConfig.get_cookies()
@@ -1802,6 +1861,7 @@ class HubApi:
user_name = os.environ[MODELSCOPE_CLOUD_USERNAME]
download_uv_url = f'{endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/' \
f'{channel}?user={user_name}'
download_uv_resp = self.session.post(download_uv_url, cookies=cookies,
headers=self.builder_headers(self.headers))
download_uv_resp = download_uv_resp.json()

View File

@@ -0,0 +1,381 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright 2022-present, the HuggingFace Inc. team.
import atexit
import contextlib
import os
import time
import types
from concurrent.futures import Future, ThreadPoolExecutor
from io import SEEK_END, SEEK_SET, BytesIO
from pathlib import Path
from threading import Lock, Thread
from typing import Dict, List, Optional, Union
from modelscope.hub.api import HubApi
from modelscope.hub.constants import Visibility
from modelscope.utils.constant import DEFAULT_REPOSITORY_REVISION
from modelscope.utils.logger import get_logger
from modelscope.utils.repo_utils import CommitInfo, RepoUtils
logger = get_logger()
IGNORE_GIT_FOLDER_PATTERNS = ['.git', '.git/*', '*/.git', '**/.git/**']
@contextlib.contextmanager
def patch_upload_folder_for_scheduler(scheduler_instance):
"""Patch upload_folder for CommitScheduler"""
api = scheduler_instance.api
original_prepare = api._prepare_upload_folder
def patched_prepare_upload_folder(
api_self,
folder_path_or_files: Union[str, Path, List[str], List[Path]],
path_in_repo: str,
allow_patterns: Optional[Union[List[str], str]] = None,
ignore_patterns: Optional[Union[List[str], str]] = None,
) -> List[Union[tuple, list]]:
"""
Patched version that supports incremental updates for CommitScheduler.
"""
with scheduler_instance.lock:
if isinstance(folder_path_or_files, list):
raise ValueError(
'Uploading multiple files or folders is not supported for scheduled commit.'
)
elif os.path.isfile(folder_path_or_files):
raise ValueError(
'Uploading file is not supported for scheduled commit.')
else:
folder_path = Path(folder_path_or_files).expanduser().resolve()
logger.debug('Listing files to upload for scheduled commit.')
relpath_to_abspath = {
path.relative_to(folder_path).as_posix(): path
for path in sorted(folder_path.glob('**/*')) if path.is_file()
}
prefix = f"{path_in_repo.strip('/')}/" if path_in_repo else ''
prepared_repo_objects = []
files_to_track = {}
for relpath in RepoUtils.filter_repo_objects(
relpath_to_abspath.keys(),
allow_patterns=allow_patterns,
ignore_patterns=ignore_patterns):
local_path = relpath_to_abspath[relpath]
stat = local_path.stat()
if scheduler_instance.last_uploaded.get(
local_path
) is None or scheduler_instance.last_uploaded[
local_path] != stat.st_mtime:
partial_file = PartialFileIO(local_path, stat.st_size)
prepared_repo_objects.append(
(prefix + relpath, partial_file))
files_to_track[local_path] = stat.st_mtime
scheduler_instance._pending_tracker_updates = files_to_track
if not prepared_repo_objects:
logger.debug(
'No changed files to upload for scheduled commit.')
return prepared_repo_objects
try:
api._prepare_upload_folder = types.MethodType(
patched_prepare_upload_folder, api)
yield
finally:
api._prepare_upload_folder = original_prepare
class CommitScheduler:
"""
A scheduler that automatically uploads a local folder to ModelScope Hub at
specified intervals (e.g., every 5 minutes).
It's recommended to use the scheduler as a context manager to ensure proper
cleanup and final commit execution when your script completes. Alternatively,
you can manually stop the scheduler using the `stop` method.
Args:
repo_id (`str`):
The id of the repo to commit to.
folder_path (`str` or `Path`):
Local folder path that will be monitored and uploaded periodically.
interval (`int` or `float`, *optional*):
Time interval in minutes between each upload operation. Defaults to 5 minutes.
path_in_repo (`str`, *optional*):
Target directory path within the repository, such as `"models/"`.
If not specified, files are uploaded to the repository root.
repo_type (`str`, *optional*):
Repository type for the target repo. Defaults to `model`.
revision (`str`, *optional*):
Target branch or revision for commits. Defaults to `master`.
visibility (`str`, *optional*):
The visibility of the repo,
could be `public`, `private`, `internal`, default to `public`.
token (`str`, *optional*):
The token to use to commit to the repo. Defaults to the token saved on the machine.
allow_patterns (`List[str]` or `str`, *optional*):
File patterns to include in uploads. Only files matching these patterns will be uploaded.
ignore_patterns (`List[str]` or `str`, *optional*):
File patterns to exclude from uploads. Files matching these patterns will be skipped.
hub_api (`HubApi`, *optional*):
Custom [`HubApi`] instance for Hub operations. Allows for customized
configurations like user agent or token settings.
Example:
```py
>>> from pathlib import Path
>>> from modelscope.hub import CommitScheduler
# Create scheduler with 10-minute intervals
>>> data_file = Path("workspace/experiment.log")
>>> scheduler = CommitScheduler(
... repo_id="my_experiments",
... repo_type="dataset",
... folder_path=data_file.parent,
... interval=10
... )
>>> with data_file.open("a") as f:
... f.write("experiment started")
# Later in the workflow...
>>> with data_file.open("a") as f:
... f.write("experiment completed")
```
Context manager usage:
```py
>>> from pathlib import Path
>>> from modelscope.hub import CommitScheduler
>>> with CommitScheduler(
... repo_id="my_experiments",
... repo_type="dataset",
... folder_path="workspace",
... interval=10
... ) as scheduler:
... log_file = Path("workspace/progress.log")
... with log_file.open("a") as f:
... f.write("starting process")
... # ... perform work ...
... with log_file.open("a") as f:
... f.write("process finished")
# Scheduler automatically stops and performs final upload
```
"""
def __init__(
self,
*,
repo_id: str,
folder_path: Union[str, Path],
interval: Union[int, float] = 5,
path_in_repo: Optional[str] = None,
repo_type: Optional[str] = None,
revision: Optional[str] = DEFAULT_REPOSITORY_REVISION,
visibility: Optional[str] = Visibility.PUBLIC,
token: Optional[str] = None,
allow_patterns: Optional[Union[List[str], str]] = None,
ignore_patterns: Optional[Union[List[str], str]] = None,
hub_api: Optional[HubApi] = None,
) -> None:
self.api = hub_api or HubApi()
self.folder_path = Path(folder_path).expanduser().resolve()
if not self.folder_path.exists():
raise ValueError(f'Folder path does not exist: {folder_path}')
self.path_in_repo = path_in_repo or ''
self.allow_patterns = allow_patterns
if ignore_patterns is None:
ignore_patterns = []
elif isinstance(ignore_patterns, str):
ignore_patterns = [ignore_patterns]
self.ignore_patterns = ignore_patterns + IGNORE_GIT_FOLDER_PATTERNS
self.repo_url = self.api.create_repo(
repo_id=repo_id,
token=token,
repo_type=repo_type,
visibility=visibility,
exist_ok=True,
create_default_config=False,
)
self.repo_id = repo_id
self.repo_type = repo_type
self.revision = revision
self.token = token
# Keep track of already uploaded files
self.last_uploaded: Dict[Path, float] = {}
if interval <= 0:
raise ValueError(
f'"interval" must be a positive integer, not "{interval}".')
self.lock = Lock()
self.interval = interval
self.__stopped = False
logger.info(
f'Scheduled job to push {self.folder_path} to {self.repo_id} at an interval of {self.interval} minutes.'
)
self.executor = ThreadPoolExecutor(max_workers=1)
self._scheduler_thread = Thread(
target=self._run_scheduler, daemon=True)
self._scheduler_thread.start()
atexit.register(self.commit_scheduled_changes)
def stop(self) -> None:
"""Stop the scheduler."""
self.__stopped = True
def __enter__(self) -> 'CommitScheduler':
return self
def __exit__(self, exc_type, exc_value, traceback) -> None:
self.trigger().result()
self.stop()
return
def _run_scheduler(self) -> None:
while not self.__stopped:
self.last_future = self.trigger()
time.sleep(self.interval * 60)
def trigger(self) -> Future:
"""Trigger a background commit and return a future."""
return self.executor.submit(self._commit_scheduled_changes)
def _commit_scheduled_changes(self) -> Optional[CommitInfo]:
if self.__stopped:
return None
logger.info('(Background) scheduled commit triggered.')
try:
value = self.commit_scheduled_changes()
return value
except Exception as e:
logger.error(f'Error while pushing to Hub: {e}')
raise
def commit_scheduled_changes(self) -> Optional[CommitInfo]:
"""Push folder to the Hub and return commit info if changes are found."""
try:
self._pending_tracker_updates = {}
with patch_upload_folder_for_scheduler(self):
commit_info = self.api.upload_folder(
repo_id=self.repo_id,
folder_path=self.folder_path,
path_in_repo=self.path_in_repo,
commit_message='Scheduled Commit',
token=self.token,
repo_type=self.repo_type,
allow_patterns=self.allow_patterns,
ignore_patterns=self.ignore_patterns,
revision=self.revision,
)
if commit_info is None:
logger.debug(
'No changed files to upload for scheduled commit.')
return None
with self.lock:
if hasattr(self, '_pending_tracker_updates'):
self.last_uploaded.update(self._pending_tracker_updates)
logger.debug(
f'Updated modification tracker for {len(self._pending_tracker_updates)} files.'
)
del self._pending_tracker_updates
return commit_info
except Exception as e:
# Treat "No files to upload" as a normal ― no-change ― situation instead of an error.
if 'No files to upload' in str(e):
logger.debug(
'No changed files to upload for scheduled commit.')
return None
if hasattr(self, '_pending_tracker_updates'):
del self._pending_tracker_updates
logger.error(f'Error during scheduled commit: {e}')
raise
class PartialFileIO(BytesIO):
"""A file-like object that reads only the first part of a file."""
def __init__(self, file_path: Union[str, Path], size_limit: int) -> None:
self._file_path = Path(file_path)
self._file = None
self._size_limit = size_limit
self.open()
def open(self) -> None:
"""Open the file and initialize size limit."""
if self._file is not None:
return
try:
self._file = self._file_path.open('rb')
self._size_limit = min(
self._size_limit or float('inf'),
os.fstat(self._file.fileno()).st_size)
except OSError as e:
logger.error(f'Failed to open file {self._file_path}: {e}')
raise
def close(self) -> None:
"""Close the file if it's open."""
if self._file is not None:
self._file.close()
self._file = None
def __del__(self) -> None:
self.close()
return super().__del__()
def __repr__(self) -> str:
return f'<PartialFileIO file_path={self._file_path} size_limit={self._size_limit}>'
def __len__(self) -> int:
return self._size_limit
def __getattribute__(self, name: str):
if name.startswith('_') or name in {
'read', 'tell', 'seek', 'close', 'open'
}: # only 5 public methods supported
return super().__getattribute__(name)
raise NotImplementedError(f"PartialFileIO does not support '{name}'.")
def tell(self) -> int:
return self._file.tell()
def seek(self, __offset: int, __whence: int = SEEK_SET) -> int:
"""Seek to a position in the file, but never beyond size_limit."""
if __whence == SEEK_END:
__offset = len(self) + __offset
__whence = SEEK_SET
pos = self._file.seek(__offset, __whence)
if pos > self._size_limit:
return self._file.seek(self._size_limit)
return pos
def read(self, __size: Optional[int] = -1) -> bytes:
"""Read at most _size bytes from the current position."""
current = self.tell()
if __size is None or __size < 0:
# Read until file limit
truncated_size = self._size_limit - current
else:
# Read until file limit or __size
truncated_size = min(__size, self._size_limit - current)
return self._file.read(truncated_size)

View File

@@ -116,3 +116,18 @@ VisibilityMap = {
ModelVisibility.INTERNAL: Visibility.INTERNAL,
ModelVisibility.PUBLIC: Visibility.PUBLIC
}
class SortKey(object):
DEFAULT = 'default'
DOWNLOADS = 'downloads'
LIKES = 'likes'
LAST_MODIFIED = 'last_modified'
VALID_SORT_KEYS = {
SortKey.DEFAULT,
SortKey.DOWNLOADS,
SortKey.LIKES,
SortKey.LAST_MODIFIED,
}

View File

@@ -120,11 +120,12 @@ def handle_http_response(response: requests.Response,
http_error_msg = 'The request model: %s does not exist!' % (model_id)
elif 403 == response.status_code:
if cookies is None:
http_error_msg = 'Authentication token does not exist, '
'failed to access model {model_id} which may not exist or may be '
'private. Please login first.'
http_error_msg = f'Authentication token does not exist, \
failed to access model {model_id} which may not exist \
or may be private. Please login first.'
else:
http_error_msg = 'The authentication token is invalid, failed to access model {model_id}.'
http_error_msg = f'The authentication token is invalid, failed to access model {model_id}.'
elif 400 <= response.status_code < 500:
http_error_msg = u'%s Client Error: %s, Request id: %s for url: %s' % (
response.status_code, reason, request_id, response.url)

View File

@@ -25,6 +25,7 @@ from modelscope.hub.constants import (
MODELSCOPE_PARALLEL_DOWNLOAD_THRESHOLD_MB, TEMPORARY_FOLDER_NAME)
from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
DEFAULT_MODEL_REVISION,
INTRA_CLOUD_ACCELERATION,
REPO_TYPE_DATASET, REPO_TYPE_MODEL,
REPO_TYPE_SUPPORT)
from modelscope.utils.file_utils import (get_dataset_cache_root,
@@ -194,9 +195,22 @@ def _repo_file_download(
" online, set 'local_files_only' to False.")
_api = HubApi()
headers = {
'user-agent': ModelScopeConfig.get_user_agent(user_agent=user_agent, )
'user-agent': ModelScopeConfig.get_user_agent(user_agent=user_agent, ),
'snapshot-identifier': str(uuid.uuid4()),
}
if INTRA_CLOUD_ACCELERATION == 'true':
region_id: str = (
os.getenv('INTRA_CLOUD_ACCELERATION_REGION')
or _api._get_internal_acceleration_domain())
if region_id:
logger.info(
f'Intra-cloud acceleration enabled for downloading from {repo_id}'
)
headers['x-aliyun-region-id'] = region_id
if cookies is None:
cookies = ModelScopeConfig.get_cookies()
repo_files = []

View File

@@ -8,7 +8,7 @@ import requests
from tqdm.auto import tqdm
from modelscope.hub.utils.utils import (MODELSCOPE_URL_SCHEME,
encode_image_to_base64, get_endpoint)
encode_media_to_base64, get_endpoint)
from modelscope.utils.logger import get_logger
logger = get_logger()
@@ -69,17 +69,20 @@ class AigcModel:
'main-strong', 'character-strong'
}
def __init__(self,
aigc_type: str,
base_model_type: str,
model_path: str,
base_model_id: str = '',
tag: Optional[str] = 'v1.0',
description: Optional[str] = 'this is an aigc model',
cover_images: Optional[List[str]] = None,
path_in_repo: Optional[str] = '',
trigger_words: Optional[List[str]] = None,
official_tags: Optional[List[str]] = None):
def __init__(
self,
aigc_type: str,
base_model_type: str,
model_path: str,
base_model_id: str = '',
tag: Optional[str] = 'v1.0',
description: Optional[str] = 'this is an aigc model',
cover_images: Optional[List[str]] = None,
path_in_repo: Optional[str] = '',
trigger_words: Optional[List[str]] = None,
official_tags: Optional[List[str]] = None,
model_source: Optional[str] = 'USER_UPLOAD',
):
"""
Initializes the AigcModel helper.
@@ -94,12 +97,15 @@ class AigcModel:
path_in_repo (str, optional): Path in the repository.
trigger_words (List[str], optional): Trigger words for the AIGC Lora model.
official_tags (List[str], optional): Official tags for the AIGC model. Defaults to None.
model_source (str, optional): Source of the model.
`USER_UPLOAD`, `TRAINED_FROM_MODELSCOPE` or `TRAINED_FROM_ALIYUN_FC`. Defaults to 'USER_UPLOAD'.
"""
self.model_path = model_path
self.aigc_type = aigc_type
self.base_model_type = base_model_type
self.tag = tag
self.description = description
self.model_source = model_source
# Process cover images - convert local paths to base64 data URLs
if cover_images is not None:
processed_cover_images = []
@@ -111,7 +117,7 @@ class AigcModel:
or img.startswith('data:')):
try:
# Convert local path to base64 data URL
processed_img = encode_image_to_base64(img)
processed_img = encode_media_to_base64(img)
processed_cover_images.append(processed_img)
logger.info('Converted local image to base64: %s',
os.path.basename(img))
@@ -383,7 +389,8 @@ class AigcModel:
'weight_sha256': self.weight_sha256,
'weight_size': self.weight_size,
'trigger_words': self.trigger_words,
'official_tags': self.official_tags
'official_tags': self.official_tags,
'model_source': self.model_source,
}
@classmethod

View File

@@ -146,16 +146,15 @@ def compute_hash(file_path):
return sha256_hash.hexdigest()
def file_integrity_validation(file_path, expected_sha256):
def file_integrity_validation(file_path, expected_sha256) -> bool:
"""Validate the file hash is expected, if not, delete the file
Args:
file_path (str): The file to validate
expected_sha256 (str): The expected sha256 hash
Raises:
FileIntegrityError: If file_path hash is not expected.
Returns:
bool: True if the file is valid, False otherwise
"""
file_sha256 = compute_hash(file_path)
if not file_sha256 == expected_sha256:
@@ -163,7 +162,9 @@ def file_integrity_validation(file_path, expected_sha256):
msg = 'File %s integrity check failed, expected sha256 signature is %s, actual is %s, the download may be incomplete, please try again.' % ( # noqa E501
file_path, expected_sha256, file_sha256)
logger.error(msg)
raise FileIntegrityError(msg)
return False
return True
def add_content_to_file(repo,
@@ -380,40 +381,40 @@ def convert_timestamp(time_stamp: Union[int, str, datetime],
)
def encode_image_to_base64(image_path: str) -> str:
def encode_media_to_base64(media_file_path: str) -> str:
"""
Encode image file to base64 string.
Encode image or video file to base64 string.
Args:
image_path (str): Path to the image file
media_file_path (str): Path to the image or video file
Returns:
str: Base64 encoded string with data URL prefix
Raises:
FileNotFoundError: If image file doesn't exist
ValueError: If file is not a valid image format
FileNotFoundError: If image/video file doesn't exist
ValueError: If file is not a valid format
"""
import base64
import mimetypes
# Expand user path
image_path = os.path.expanduser(image_path)
media_file_path = os.path.expanduser(media_file_path)
if not os.path.exists(image_path):
raise FileNotFoundError(f'Image file not found: {image_path}')
if not os.path.exists(media_file_path):
raise FileNotFoundError(f'Image file not found: {media_file_path}')
if not os.path.isfile(image_path):
raise ValueError(f'Path is not a file: {image_path}')
if not os.path.isfile(media_file_path):
raise ValueError(f'Path is not a file: {media_file_path}')
# Get MIME type
mime_type, _ = mimetypes.guess_type(image_path)
if not mime_type or not mime_type.startswith('image/'):
raise ValueError(f'File is not a valid image format: {image_path}')
mime_type, _ = mimetypes.guess_type(media_file_path)
if not mime_type:
raise ValueError(f'File is not a valid format: {media_file_path}')
# Read and encode file
with open(image_path, 'rb') as image_file:
image_data = image_file.read()
with open(media_file_path, 'rb') as media_file:
image_data = media_file.read()
base64_data = base64.b64encode(image_data).decode('utf-8')
# Return data URL format

View File

View File

@@ -741,7 +741,8 @@ def _download_additional_modules(
namespace: str,
revision: str,
imports: Tuple[str, str, str, str],
download_config: Optional[DownloadConfig]
download_config: Optional[DownloadConfig],
trust_remote_code: Optional[bool] = False,
) -> List[Tuple[str, str]]:
"""
Download additional module for a module <name>.py at URL (or local path) <base_path>/<name>.py
@@ -755,6 +756,21 @@ def _download_additional_modules(
"""
local_imports = []
library_imports = []
# Check if we need to execute remote code
has_remote_code = any(
import_type in ('internal', 'external')
for import_type, _, _, _ in imports
)
if has_remote_code and not trust_remote_code:
raise ValueError(
f'Loading {name} requires executing code from the repository. '
'This is disabled by default for security reasons. '
'If you trust the authors of this dataset, you can enable it with '
'`trust_remote_code=True`.'
)
download_config = download_config.copy()
if download_config.download_desc is None:
download_config.download_desc = 'Downloading extra modules'
@@ -863,6 +879,7 @@ def get_module_with_script(self) -> DatasetModule:
revision=revision,
imports=imports,
download_config=self.download_config,
trust_remote_code=self.trust_remote_code,
)
additional_files = []
if dataset_infos_path:

View File

@@ -194,6 +194,10 @@ template_info = [
modelfile_prefix=
'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/deepseek-llm',
),
TemplateInfo(
template_regex=f'.*{cases("DeepSeek-V3.1")}.*',
modelfile_prefix=
'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/deepseek-v3.1'),
TemplateInfo(
template_regex=
f'.*{cases("deepseek")}.*{cases("v3")}.*',
@@ -994,6 +998,14 @@ template_info = [
template_regex=f'.*{cases("deepscaler")}.*',
modelfile_prefix=
'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/deepscaler'),
TemplateInfo(
template_regex=f'.*{cases("granite-4.0")}.*',
modelfile_prefix=
'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/granite4'),
TemplateInfo(
template_regex=f'.*{cases("gpt-oss")}.*',
modelfile_prefix=
'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/gpt-oss'),
]

View File

@@ -768,7 +768,7 @@ def load_from_prebuilt(file_path=None):
if os.path.exists(file_path):
index = _load_index(file_path, with_template=True)
else:
index = None
index = generate_ast_template()
return index

View File

@@ -257,6 +257,7 @@ def get_file_hash(
progress.update(len(byte_chunk))
file_hash = file_hash.hexdigest()
final_chunk_size = buffer_size
file_path_or_obj.seek(0, os.SEEK_SET)
else:
progress.close()

View File

@@ -418,6 +418,7 @@ class UploadInfo:
file_hash_info: dict = file_hash_info or get_file_hash(fileobj)
fileobj.seek(0, os.SEEK_SET)
sample = fileobj.read(512)
fileobj.seek(0, os.SEEK_SET)
return cls(
sha256=file_hash_info['file_hash'],
size=file_hash_info['file_size'],

View File

@@ -1,5 +1,5 @@
# Make sure to modify __release_datetime__ to release time when making official release.
__version__ = '1.31.0'
__version__ = '1.32.0'
# default release datetime for branches under active development is set
# to be a time far-far-away-into-the-future
__release_datetime__ = '2025-10-10 23:59:59'
__release_datetime__ = '2025-11-07 12:00:00'

81
pyproject.toml Normal file
View File

@@ -0,0 +1,81 @@
[project]
name = "modelscope"
dynamic = ["version", "dependencies", "optional-dependencies"]
description = "ModelScope: bring the notion of Model-as-a-Service to life."
readme = {file = "README.md", content-type = "text/markdown"}
license = "Apache-2.0"
license-files = ["LICENSE"]
authors = [
{name = "ModelScope team"},
{email = "contact@modelscope.cn"}
]
keywords = ["python", "nlp", "science", "cv", "speech", "multi-modal"]
requires-python = ">=3.9"
classifiers = [
'Development Status :: 4 - Beta',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
]
[project.urls]
Homepage = "https://github.com/modelscope/modelscope"
[project.scripts]
modelscope = "modelscope.cli.cli:run_cmd"
[build-system]
requires = ["setuptools>=69", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools]
include-package-data = true
[tool.setuptools.packages.find]
where = ["."]
include = ["modelscope*"]
[tool.setuptools.dynamic]
version = {attr = "modelscope.version.__version__"}
dependencies = {file = ["requirements/hub.txt"]}
[tool.setuptools.dynamic.optional-dependencies]
hub = {file = ["requirements/hub.txt"]}
datasets = {file = ["requirements/datasets.txt"]}
framework = {file = ["requirements/framework.txt"]}
server = {file = ["requirements/server.txt"]}
docs = {file = ["requirements/docs.txt"]}
tests = {file = ["requirements/tests.txt"]}
# domain specific with framework base
cv = {file = ["requirements/framework.txt", "requirements/cv.txt"]}
nlp = {file = ["requirements/framework.txt", "requirements/nlp.txt"]}
multi-modal = {file = ["requirements/framework.txt", "requirements/multi-modal.txt"]}
science = {file = ["requirements/framework.txt", "requirements/science.txt"]}
# audio specific with framework base
audio_asr = {file = ["requirements/framework.txt", "requirements/audio/audio_asr.txt"]}
audio_codec = {file = ["requirements/framework.txt", "requirements/audio/audio_codec.txt"]}
audio_tts = {file = ["requirements/framework.txt", "requirements/audio/audio_tts.txt"]}
audio_kws = {file = ["requirements/framework.txt", "requirements/audio/audio_kws.txt"]}
audio_signal = {file = ["requirements/framework.txt", "requirements/audio/audio_signal.txt"]}
audio = {file = ["requirements/framework.txt",
"requirements/audio/audio_asr.txt",
"requirements/audio/audio_codec.txt",
"requirements/audio/audio_tts.txt",
"requirements/audio/audio_kws.txt",
"requirements/audio/audio_signal.txt"]}
# skip audio requirements due to its hard dependency which may cause installation failure
all = {file = [
"requirements/hub.txt",
"requirements/datasets.txt",
"requirements/framework.txt",
"requirements/cv.txt",
"requirements/nlp.txt",
"requirements/multi-modal.txt",
"requirements/science.txt",
"requirements/server.txt",
]}

View File

@@ -2,11 +2,11 @@ accelerate
cloudpickle
decord>=0.6.0
diffusers>=0.25.0
ftfy>=6.0.3
# 0.12.1 has issue of No such file or directory: 'fairseq/version.txt'
# 0.12.2 not support py311
#fairseq==0.12.2
https://github.com/liyaodev/fairseq/releases/download/v0.12.3.1/fairseq-0.12.3.1-cp311-cp311-linux_x86_64.whl
fairseq-fixed==0.12.3.1
ftfy>=6.0.3
librosa==0.10.1
opencv-python
pycocoevalcap>=1.2

239
setup.py
View File

@@ -1,239 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# !/usr/bin/env python
import os
import shutil
import subprocess
from setuptools import find_packages, setup
# -*- coding: utf-8 -*-
from modelscope.utils.constant import Fields
from setuptools import setup
def readme():
with open('README.md', encoding='utf-8') as f:
content = f.read()
return content
version_file = 'modelscope/version.py'
def get_git_hash():
def _minimal_ext_cmd(cmd):
# construct minimal environment
env = {}
for k in ['SYSTEMROOT', 'PATH', 'HOME']:
v = os.environ.get(k)
if v is not None:
env[k] = v
# LANGUAGE is used on win32
env['LANGUAGE'] = 'C'
env['LANG'] = 'C'
env['LC_ALL'] = 'C'
out = subprocess.Popen(
cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
return out
try:
out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
sha = out.strip().decode('ascii')
except OSError:
sha = 'unknown'
return sha
def get_hash():
assert os.path.exists('.git'), '.git directory does not exist'
sha = get_git_hash()[:7]
return sha
def get_version():
with open(version_file, 'r', encoding='utf-8') as f:
exec(compile(f.read(), version_file, 'exec'))
return locals()['__version__']
def parse_requirements(fname='requirements.txt', with_version=True):
"""
Parse the package dependencies listed in a requirements file but strips
specific versioning information.
Args:
fname (str): path to requirements file
with_version (bool, default=False): if True include version specs
Returns:
List[str]: list of requirements items
CommandLine:
python -c "import setup; print(setup.parse_requirements())"
"""
import re
import sys
from os.path import exists
require_fpath = fname
def parse_line(line):
"""
Parse information from a line in a requirements text file
"""
if line.startswith('-r '):
# Allow specifying requirements in other files
target = line.split(' ')[1]
relative_base = os.path.dirname(fname)
absolute_target = os.path.join(relative_base, target)
for info in parse_require_file(absolute_target):
yield info
else:
info = {'line': line}
if line.startswith('-e '):
info['package'] = line.split('#egg=')[1]
else:
# Remove versioning from the package
pat = '(' + '|'.join(['>=', '==', '>']) + ')'
parts = re.split(pat, line, maxsplit=1)
parts = [p.strip() for p in parts]
info['package'] = parts[0]
if len(parts) > 1:
op, rest = parts[1:]
if ';' in rest:
# Handle platform specific dependencies
# http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
version, platform_deps = map(str.strip,
rest.split(';'))
info['platform_deps'] = platform_deps
else:
version = rest # NOQA
info['version'] = (op, version)
yield info
def parse_require_file(fpath):
with open(fpath, 'r', encoding='utf-8') as f:
for line in f.readlines():
line = line.strip()
if line.startswith('http'):
print('skip http requirements %s' % line)
continue
if line and not line.startswith('#') and not line.startswith(
'--'):
for info in parse_line(line):
yield info
elif line and line.startswith('--find-links'):
eles = line.split()
for e in eles:
e = e.strip()
if 'http' in e:
info = dict(dependency_links=e)
yield info
def gen_packages_items():
items = []
deps_link = []
if exists(require_fpath):
for info in parse_require_file(require_fpath):
if 'dependency_links' not in info:
parts = [info['package']]
if with_version and 'version' in info:
parts.extend(info['version'])
if not sys.version.startswith('3.4'):
# apparently package_deps are broken in 3.4
platform_deps = info.get('platform_deps')
if platform_deps is not None:
parts.append(';' + platform_deps)
item = ''.join(parts)
items.append(item)
else:
deps_link.append(info['dependency_links'])
return items, deps_link
return gen_packages_items()
def pack_resource():
# pack resource such as configs and tools
root_dir = 'package/'
if os.path.isdir(root_dir):
shutil.rmtree(root_dir)
os.makedirs(root_dir)
proj_dir = root_dir + 'modelscope/'
shutil.copytree('./modelscope', proj_dir)
shutil.copytree('./configs', proj_dir + 'configs')
shutil.copytree('./requirements', 'package/requirements')
shutil.copy('./requirements.txt', 'package/requirements.txt')
shutil.copy('./MANIFEST.in', 'package/MANIFEST.in')
shutil.copy('./README.md', 'package/README.md')
if __name__ == '__main__':
# write_version_py()
from modelscope.utils.ast_utils import generate_ast_template
generate_ast_template()
pack_resource()
os.chdir('package')
install_requires, deps_link = parse_requirements('requirements.txt')
extra_requires = {}
all_requires = []
for field in dir(Fields):
if field.startswith('_'):
continue
field = getattr(Fields, field)
extra_requires[field], _ = parse_requirements(
f'requirements/{field}.txt')
# skip audio requirements due to its hard dependency which
# result in mac/windows compatibility problems
if field != Fields.audio:
all_requires.append(extra_requires[field])
for subfiled in ['asr', 'kws', 'signal', 'tts']:
filed_name = f'audio_{subfiled}'
extra_requires[filed_name], _ = parse_requirements(
f'requirements/audio/{filed_name}.txt')
framework_requires = extra_requires['framework']
# add framework dependencies to every field
for field, requires in extra_requires.items():
if field not in [
'server', 'framework', 'hub', 'datasets'
]: # server need install model's field dependencies before.
extra_requires[field] = framework_requires + extra_requires[field]
extra_requires['all'] = all_requires
setup(
name='modelscope',
version=get_version(),
description=
'ModelScope: bring the notion of Model-as-a-Service to life.',
long_description=readme(),
long_description_content_type='text/markdown',
author='ModelScope team',
author_email='contact@modelscope.cn',
keywords='python,nlp,science,cv,speech,multi-modal',
url='https://github.com/modelscope/modelscope',
packages=find_packages(exclude=('configs', 'demo')),
include_package_data=True,
package_data={
'': ['*.h', '*.cpp', '*.cu'],
},
classifiers=[
'Development Status :: 4 - Beta',
'License :: OSI Approved :: Apache Software License',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
],
license='Apache License 2.0',
tests_require=parse_requirements('requirements/tests.txt'),
install_requires=install_requires,
extras_require=extra_requires,
entry_points={
'console_scripts': ['modelscope=modelscope.cli.cli:run_cmd']
},
dependency_links=deps_link,
zip_safe=False)
setup()

View File

@@ -0,0 +1,583 @@
import atexit
import shutil
import tempfile
import time
import unittest
import uuid
from io import SEEK_END
from pathlib import Path
from unittest.mock import MagicMock, patch
from modelscope.hub.api import HubApi
from modelscope.hub.commit_scheduler import CommitScheduler, PartialFileIO
from modelscope.hub.constants import Visibility
from modelscope.hub.errors import NotExistError
from modelscope.hub.file_download import _repo_file_download
from modelscope.utils.constant import DEFAULT_REPOSITORY_REVISION
from modelscope.utils.repo_utils import CommitInfo, CommitOperationAdd
from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1, TEST_MODEL_ORG,
delete_credential, test_level)
class TestCommitScheduler(unittest.TestCase):
"""Test suite for ModelScope CommitScheduler functionality."""
def setUp(self) -> None:
"""Set up test environment with temporary directories and mock API."""
self.api = HubApi()
self.repo_name = f'test-commit-scheduler-{uuid.uuid4().hex[:8]}'
self.repo_id = f'{TEST_MODEL_ORG}/{self.repo_name}'
# Create temporary cache directory
self.cache_dir = Path(tempfile.mkdtemp())
self.watched_folder = self.cache_dir / 'watched_folder'
self.watched_folder.mkdir(exist_ok=True, parents=True)
# Initialize scheduler reference for cleanup
self.scheduler = None
def tearDown(self) -> None:
"""Clean up test resources."""
def cleanup(scheduler):
try:
scheduler.stop()
remove_atexit_callback(scheduler)
except Exception as e:
print(
f'Warning: Could not clean up scheduler {scheduler.repo_id}: {e}'
)
def remove_atexit_callback(scheduler):
try:
atexit.unregister(scheduler.commit_scheduled_changes)
except Exception as e:
print(
f'Warning: Could not remove atexit callback for scheduler {scheduler.repo_id}: {e}'
)
# Stop scheduler if it exists
if self.scheduler is not None:
try:
cleanup(self.scheduler)
except Exception:
pass
# Try to delete test repo (may not exist for mocked tests)
try:
if hasattr(self, 'api') and TEST_ACCESS_TOKEN1:
self.api.login(TEST_ACCESS_TOKEN1)
self.api.delete_repo(repo_id=self.repo_id, repo_type='dataset')
except Exception:
pass
# Clean up temporary directories
if self.cache_dir.exists():
shutil.rmtree(self.cache_dir, ignore_errors=True)
delete_credential()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_invalid_folder_path_nonexistent(self) -> None:
"""Test that CommitScheduler raises error for non-existent folder."""
nonexistent_path = self.cache_dir / 'nonexistent'
with self.assertRaises(ValueError) as cm:
CommitScheduler(
repo_id=self.repo_id,
folder_path=nonexistent_path,
interval=1,
hub_api=self.api,
repo_type='dataset',
token=TEST_ACCESS_TOKEN1)
self.assertIn('does not exist', str(cm.exception))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_invalid_interval(self) -> None:
"""Test that CommitScheduler raises error for invalid interval values."""
# Test zero interval
with self.assertRaises(ValueError) as cm:
CommitScheduler(
repo_id=self.repo_id,
folder_path=self.watched_folder,
interval=0,
hub_api=self.api,
repo_type='dataset',
token=TEST_ACCESS_TOKEN1)
self.assertIn('positive', str(cm.exception))
# Test negative interval
with self.assertRaises(ValueError) as cm:
CommitScheduler(
repo_id=self.repo_id,
folder_path=self.watched_folder,
interval=-1,
hub_api=self.api,
repo_type='dataset',
token=TEST_ACCESS_TOKEN1)
self.assertIn('positive', str(cm.exception))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_initialization_with_defaults(self) -> None:
"""Test CommitScheduler initialization with default parameters."""
with patch.object(HubApi, 'create_repo') as mock_create:
mock_create.return_value = f'https://modelscope.cn/datasets/{self.repo_id}'
self.scheduler = CommitScheduler(
repo_id=self.repo_id,
folder_path=self.watched_folder,
hub_api=self.api,
repo_type='dataset',
token=TEST_ACCESS_TOKEN1)
# Check default values
self.assertEqual(self.scheduler.repo_id, self.repo_id)
self.assertEqual(self.scheduler.folder_path,
self.watched_folder.resolve())
self.assertEqual(self.scheduler.interval, 5) # default 5 minutes
self.assertEqual(self.scheduler.path_in_repo, '')
self.assertEqual(self.scheduler.revision,
DEFAULT_REPOSITORY_REVISION)
self.assertIsInstance(self.scheduler.last_uploaded, dict)
# Verify create_repo was called
mock_create.assert_called_once()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_initialization_with_custom_parameters(self) -> None:
"""Test CommitScheduler initialization with custom parameters."""
custom_interval = 10
custom_path_in_repo = 'custom/path'
custom_revision = 'develop'
allow_patterns = ['*.txt', '*.json']
ignore_patterns = ['*.log', 'temp/*']
with patch.object(HubApi, 'create_repo') as mock_create:
mock_create.return_value = f'https://modelscope.cn/datasets/{self.repo_id}'
self.scheduler = CommitScheduler(
repo_id=self.repo_id,
folder_path=self.watched_folder,
interval=custom_interval,
path_in_repo=custom_path_in_repo,
revision=custom_revision,
allow_patterns=allow_patterns,
ignore_patterns=ignore_patterns,
visibility=Visibility.PUBLIC,
hub_api=self.api,
repo_type='dataset',
token=TEST_ACCESS_TOKEN1)
# Check custom values
self.assertEqual(self.scheduler.interval, custom_interval)
self.assertEqual(self.scheduler.path_in_repo, custom_path_in_repo)
self.assertEqual(self.scheduler.revision, custom_revision)
self.assertEqual(self.scheduler.allow_patterns, allow_patterns)
# Check ignore patterns include git folders
expected_ignore = ignore_patterns + [
'.git', '.git/*', '*/.git', '**/.git/**'
]
self.assertEqual(self.scheduler.ignore_patterns, expected_ignore)
# Verify create_repo was called with correct visibility
mock_create.assert_called_once()
call_args = mock_create.call_args
self.assertEqual(call_args.kwargs.get('visibility'), 'public')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@patch.object(CommitScheduler, 'commit_scheduled_changes')
def test_mocked_scheduler_execution(self, mock_push: MagicMock) -> None:
"""Test scheduler with mocked commit_scheduled_changes method."""
mock_push.return_value = CommitInfo(
commit_url=
'https://modelscope.cn/datasets/test_scheduler_unit/commit/test123',
commit_message='Test commit',
commit_description='',
oid='test123')
with patch.object(HubApi, 'create_repo'):
self.scheduler = CommitScheduler(
repo_id=self.repo_id,
folder_path=self.watched_folder,
interval=1 / 60 / 10, # every 0.1s for fast testing
hub_api=self.api,
repo_type='dataset',
token=TEST_ACCESS_TOKEN1)
# Wait for at least a couple of scheduler cycles
time.sleep(0.5)
# Should have been called multiple times
self.assertGreater(len(mock_push.call_args_list), 1)
# Check the last future result
if hasattr(self.scheduler,
'last_future') and self.scheduler.last_future:
result = self.scheduler.last_future.result()
self.assertEqual(result.oid, 'test123')
self.assertEqual(result.commit_message, 'Test commit')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_scheduler_stop(self) -> None:
"""Test stopping the scheduler."""
with patch.object(HubApi, 'create_repo'):
self.scheduler = CommitScheduler(
repo_id=self.repo_id,
folder_path=self.watched_folder,
interval=1, # 1 minute
hub_api=self.api,
repo_type='dataset',
token=TEST_ACCESS_TOKEN1)
# Scheduler should be running
self.assertFalse(self.scheduler._CommitScheduler__stopped)
# Stop the scheduler
self.scheduler.stop()
# Scheduler should be stopped
self.assertTrue(self.scheduler._CommitScheduler__stopped)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_context_manager(self) -> None:
"""Test CommitScheduler as context manager."""
file_path = self.watched_folder / 'test_file.txt'
with patch.object(HubApi, 'create_repo'), \
patch.object(CommitScheduler, 'commit_scheduled_changes') as mock_push:
mock_push.return_value = CommitInfo(
commit_url=
'https://modelscope.cn/datasets/test_scheduler_unit/commit/test123',
commit_message='Test commit',
commit_description='',
oid='test123')
with CommitScheduler(
repo_id=self.repo_id,
folder_path=self.watched_folder,
interval=5, # 5 minutes - won't trigger during test
hub_api=self.api,
repo_type='dataset',
token=TEST_ACCESS_TOKEN1) as scheduler:
# Write a file inside the context
file_path.write_text('test content')
# Reference for later assertions
self.scheduler = scheduler
# After exiting context, scheduler should be stopped
self.assertTrue(self.scheduler._CommitScheduler__stopped)
# Should have triggered commit_scheduled_changes on exit
mock_push.assert_called()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_trigger_manual_commit(self) -> None:
"""Test manually triggering a commit."""
with patch.object(HubApi, 'create_repo'), \
patch.object(CommitScheduler, 'commit_scheduled_changes') as mock_push:
mock_push.return_value = CommitInfo(
commit_url=
'https://modelscope.cn/datasets/test_scheduler_unit/commit/test123',
commit_message='Manual commit',
commit_description='',
oid='manual123')
self.scheduler = CommitScheduler(
repo_id=self.repo_id,
folder_path=self.watched_folder,
interval=60, # Long interval to avoid auto-trigger
hub_api=self.api,
repo_type='dataset',
token=TEST_ACCESS_TOKEN1)
future = self.scheduler.trigger()
result = future.result()
# Verify the result
self.assertEqual(result.oid, 'manual123')
self.assertEqual(result.commit_message, 'Manual commit')
mock_push.assert_called()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_stopped_scheduler_no_push(self) -> None:
"""Test that stopped scheduler doesn't perform push operations."""
with patch.object(HubApi, 'create_repo'):
self.scheduler = CommitScheduler(
repo_id=self.repo_id,
folder_path=self.watched_folder,
interval=60,
hub_api=self.api,
repo_type='dataset',
token=TEST_ACCESS_TOKEN1)
# Stop the scheduler immediately
self.scheduler.stop()
# Try to trigger after stopping
future = self.scheduler.trigger()
result = future.result()
# Result should be None for stopped scheduler
self.assertIsNone(result)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_sync_local_folder_to_hub(self) -> None:
"""Test sync local folder to remote repo."""
hub_cache = self.cache_dir / 'hub'
file_path = self.watched_folder / 'file.txt'
bin_path = self.watched_folder / 'file.bin'
git_path = self.watched_folder / '.git'
self.scheduler = CommitScheduler(
folder_path=self.watched_folder,
repo_id=self.repo_id,
interval=1 / 60,
hub_api=self.api,
repo_type='dataset',
token=TEST_ACCESS_TOKEN1)
# 1 push to hub triggered
time.sleep(0.5)
# write content to files
with file_path.open('a') as f:
f.write('first line\n')
with bin_path.open('a') as f:
f.write('binary content')
with git_path.open('a') as f:
f.write('git content\n')
# 2 push to hub triggered
time.sleep(2)
self.scheduler.last_future.result()
# new content in file
with file_path.open('a') as f:
f.write('second line\n')
# 1 push to hub triggered
time.sleep(1)
self.scheduler.last_future.result()
with bin_path.open('a') as f:
f.write(' updated')
# 5 push to hub triggered
time.sleep(5) # wait for every threads/uploads to complete
self.scheduler.stop()
self.scheduler.last_future.result()
repo_id = self.scheduler.repo_id
def _download(filename: str, revision: str) -> Path:
return Path(
_repo_file_download(
repo_id=repo_id,
file_path=filename,
revision=revision,
cache_dir=hub_cache,
repo_type='dataset'))
# Check file.txt consistency
txt_push = _download(filename='file.txt', revision='master')
self.assertEqual(txt_push.read_text(), 'first line\nsecond line\n')
# Check file.bin consistency
bin_push = _download(filename='file.bin', revision='master')
self.assertEqual(bin_push.read_text(), 'binary content updated')
# Check .git file was not uploaded (should be ignored)
with self.assertRaises(NotExistError):
_download(filename='.git', revision='master')
class TestPartialFileIO(unittest.TestCase):
"""Test suite for PartialFileIO functionality."""
def setUp(self) -> None:
"""Set up test environment with temporary file."""
self.cache_dir = Path(tempfile.mkdtemp())
self.test_file = self.cache_dir / 'file.txt'
self.test_file.write_text('123456789abcdef') # 15 bytes
def tearDown(self) -> None:
"""Clean up test resources."""
if self.cache_dir.exists():
shutil.rmtree(self.cache_dir, ignore_errors=True)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_partial_file_read_with_limit(self) -> None:
"""Test reading partial file with size limit."""
partial_file = PartialFileIO(self.test_file, size_limit=5)
# Should read only first 5 bytes
content = partial_file.read()
self.assertEqual(content, b'12345')
# Second read should return empty
content = partial_file.read()
self.assertEqual(content, b'')
partial_file.close()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_partial_file_read_by_chunks(self) -> None:
"""Test reading partial file in chunks."""
partial_file = PartialFileIO(self.test_file, size_limit=8)
# Read in chunks
chunk1 = partial_file.read(3)
self.assertEqual(chunk1, b'123')
chunk2 = partial_file.read(3)
self.assertEqual(chunk2, b'456')
chunk3 = partial_file.read(3)
self.assertEqual(chunk3, b'78') # Only 2 bytes left within limit
chunk4 = partial_file.read(3)
self.assertEqual(chunk4, b'') # Nothing left
partial_file.close()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_partial_file_read_oversized_chunk(self) -> None:
"""Test reading more than size limit in one go."""
partial_file = PartialFileIO(self.test_file, size_limit=5)
# Request more than limit
content = partial_file.read(20)
self.assertEqual(content, b'12345') # Should return only up to limit
partial_file.close()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_partial_file_len(self) -> None:
"""Test __len__ method returns correct size limit."""
partial_file = PartialFileIO(self.test_file, size_limit=7)
self.assertEqual(len(partial_file), 7)
partial_file.close()
# Size limit larger than actual file should be capped
large_partial = PartialFileIO(self.test_file, size_limit=100)
self.assertEqual(len(large_partial), 15) # Actual file size
large_partial.close()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_partial_file_seek_and_tell(self) -> None:
"""Test seek and tell operations."""
partial_file = PartialFileIO(self.test_file, size_limit=10)
# Initial position
self.assertEqual(partial_file.tell(), 0)
# Read some bytes
partial_file.read(3)
self.assertEqual(partial_file.tell(), 3)
# Seek to beginning
partial_file.seek(0)
self.assertEqual(partial_file.tell(), 0)
# Seek to specific position
partial_file.seek(5)
self.assertEqual(partial_file.tell(), 5)
# Seek beyond limit should be capped
partial_file.seek(50)
self.assertEqual(partial_file.tell(), 10) # Capped at size limit
# Seek from end
partial_file.seek(-2, SEEK_END)
self.assertEqual(partial_file.tell(), 8) # 10-2
partial_file.close()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_partial_file_not_implemented_methods(self) -> None:
"""Test that unsupported methods raise NotImplementedError."""
partial_file = PartialFileIO(self.test_file, size_limit=5)
# These methods should not be implemented
with self.assertRaises(NotImplementedError):
partial_file.readline()
with self.assertRaises(NotImplementedError):
partial_file.write(b'test')
partial_file.close()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_partial_file_repr(self) -> None:
"""Test string representation of PartialFileIO."""
partial_file = PartialFileIO(self.test_file, size_limit=5)
repr_str = repr(partial_file)
self.assertEqual(
repr_str,
f'<PartialFileIO file_path={self.test_file} size_limit=5>')
partial_file.close()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_file_modification_after_creation(self) -> None:
"""Test that file modifications after PartialFileIO creation don't affect size limit."""
partial_file = PartialFileIO(self.test_file, size_limit=20)
# Original length is capped at file size
self.assertEqual(len(partial_file), 15)
with self.test_file.open('ab') as f:
f.write(b'additional_content')
# Size limit should remain the same (captured at creation)
self.assertEqual(len(partial_file), 15)
# Content read should still be limited to original size
content = partial_file.read()
self.assertEqual(len(content), 15)
self.assertEqual(content, b'123456789abcdef')
partial_file.close()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_high_size_limit(self) -> None:
"""Test size limit larger than file size."""
file = PartialFileIO(self.test_file, size_limit=20)
with self.test_file.open('ab') as f:
f.write(b'ghijkl')
# File size limit is truncated to the actual file size at instance creation (not on the fly)
self.assertEqual(len(file), 15)
self.assertEqual(file._size_limit, 15)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_with_commit_operation_add(self) -> None:
"""Test with CommitOperationAdd."""
op_truncated = CommitOperationAdd(
path_or_fileobj=PartialFileIO(self.test_file, size_limit=5),
path_in_repo='test_file.txt')
self.assertEqual(op_truncated.upload_info.size, 5)
self.assertEqual(op_truncated.upload_info.sample, b'12345')
with op_truncated.as_file() as f:
self.assertEqual(f.read(), b'12345')
# Full file
op_full = CommitOperationAdd(
path_or_fileobj=PartialFileIO(self.test_file, size_limit=9),
path_in_repo='test_file.txt')
self.assertEqual(op_full.upload_info.size, 9)
self.assertEqual(op_full.upload_info.sample, b'123456789')
with op_full.as_file() as f:
self.assertEqual(f.read(), b'123456789')
# Truncated file has a different hash than the full file
self.assertNotEqual(op_truncated.upload_info.sha256,
op_full.upload_info.sha256)
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,38 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest
from modelscope import HubApi
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
logger = get_logger()
default_owner = 'modelscope'
class HubListHubTest(unittest.TestCase):
def setUp(self):
self.api = HubApi()
@unittest.skipUnless(test_level() >= 3, 'skip test in current test level')
def test_list_datasets(self):
# Use default args
result = self.api.list_datasets(owner_or_group=default_owner)
logger.info(f'List datasets result: {result}')
@unittest.skipUnless(test_level() >= 3, 'skip test in current test level')
def test_list_datasets_with_args(self):
result = self.api.list_datasets(
owner_or_group=default_owner,
page_number=1,
page_size=2,
sort='downloads',
search='chinese',
)
logger.info(f'List datasets with full result: {result}')
def test_list_models(self):
result = self.api.list_models(
owner_or_group='Qwen', page_number=1, page_size=2)
logger.info(f'List models result: {result}')

View File

@@ -122,6 +122,18 @@ class TestToOllama(unittest.TestCase):
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_check_template_type(self):
_test_check_tmpl_type(
'unsloth/gpt-oss-20b-GGUF',
'gpt-oss',
gguf_meta={'general.name': 'Gpt-Oss-20B'})
_test_check_tmpl_type(
'unsloth/granite-4.0-h-tiny-GGUF',
'granite4',
gguf_meta={'general.name': 'Granite-4.0-H-Tiny'})
_test_check_tmpl_type(
'unsloth/DeepSeek-V3.1-GGUF',
'deepseek-v3.1',
gguf_meta={'general.name': 'Deepseek-V3.1'})
_test_check_tmpl_type('unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF',
'qwen3-coder')
_test_check_tmpl_type(