mirror of
https://github.com/modelscope/modelscope.git
synced 2026-02-24 04:01:10 +01:00
Merge branch 'release/1.32' into build_swift_image
This commit is contained in:
@@ -22,7 +22,7 @@ do
|
||||
echo "get gpu lock $gpu"
|
||||
|
||||
CONTAINER_NAME="modelscope-ci-$idx"
|
||||
let is_get_file_lock=true
|
||||
is_get_file_lock=true
|
||||
|
||||
# pull image if there are update
|
||||
docker pull ${IMAGE_NAME}:${IMAGE_VERSION}
|
||||
|
||||
7
.github/workflows/citest.yaml
vendored
7
.github/workflows/citest.yaml
vendored
@@ -54,7 +54,7 @@ jobs:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
lfs: 'true'
|
||||
lfs: 'false'
|
||||
submodules: 'true'
|
||||
fetch-depth: ${{ github.event_name == 'pull_request' && 2 || 0 }}
|
||||
- name: Get changed files
|
||||
@@ -65,8 +65,9 @@ jobs:
|
||||
else
|
||||
echo "PR_CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.event.after }} | xargs)" >> $GITHUB_ENV
|
||||
fi
|
||||
- name: Checkout LFS objects
|
||||
run: git lfs checkout
|
||||
- name: Fetch LFS objects
|
||||
run: |
|
||||
git lfs pull
|
||||
- name: Run unittest
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
14
.github/workflows/daily_regression.yaml
vendored
14
.github/workflows/daily_regression.yaml
vendored
@@ -24,12 +24,16 @@ jobs:
|
||||
sudo chown -R $USER:$USER $ACTION_RUNNER_DIR
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: 'true'
|
||||
submodules: 'true'
|
||||
- name: Checkout LFS objects
|
||||
run: git lfs checkout
|
||||
lfs: false
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Fetch LFS objects
|
||||
run: |
|
||||
git lfs pull
|
||||
|
||||
- name: Run unittest
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
4
.github/workflows/publish.yaml
vendored
4
.github/workflows/publish.yaml
vendored
@@ -22,7 +22,9 @@ jobs:
|
||||
- name: Install wheel
|
||||
run: pip install wheel && pip install -r requirements/framework.txt
|
||||
- name: Build ModelScope
|
||||
run: python setup.py sdist bdist_wheel
|
||||
# Build AST template before packaging
|
||||
run: |
|
||||
make whl
|
||||
- name: Publish package to PyPI
|
||||
run: |
|
||||
pip install twine
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
recursive-include modelscope/configs *.py *.cu *.h *.cpp
|
||||
recursive-include modelscope/ops *.py *.cu *.h *.cpp
|
||||
recursive-include modelscope/cli/template *.tpl
|
||||
recursive-include modelscope/utils *.json
|
||||
|
||||
3
Makefile
3
Makefile
@@ -18,7 +18,8 @@ test:
|
||||
|
||||
.PHONY: whl
|
||||
whl:
|
||||
python setup.py sdist bdist_wheel
|
||||
python -c "from modelscope.utils.ast_utils import generate_ast_template; generate_ast_template()"
|
||||
python setup.py sdist --dist-dir $(WHL_BUILD_DIR)/dist bdist_wheel --dist-dir $(WHL_BUILD_DIR)/dist
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
|
||||
@@ -16,6 +16,7 @@ from modelscope.cli.server import ServerCMD
|
||||
from modelscope.cli.upload import UploadCMD
|
||||
from modelscope.hub.constants import MODELSCOPE_ASCII
|
||||
from modelscope.utils.logger import get_logger
|
||||
from modelscope.version import __version__
|
||||
|
||||
logger = get_logger(log_level=logging.WARNING)
|
||||
|
||||
@@ -24,6 +25,11 @@ def run_cmd():
|
||||
print(MODELSCOPE_ASCII)
|
||||
parser = argparse.ArgumentParser(
|
||||
'ModelScope Command Line tool', usage='modelscope <command> [<args>]')
|
||||
parser.add_argument(
|
||||
'-V',
|
||||
'--version',
|
||||
action='version',
|
||||
version=f'ModelScope CLI {__version__}')
|
||||
parser.add_argument(
|
||||
'--token', default=None, help='Specify ModelScope SDK token.')
|
||||
subparsers = parser.add_subparsers(help='modelscope commands helpers')
|
||||
|
||||
@@ -122,6 +122,13 @@ class CreateCMD(CLICommand):
|
||||
type=str,
|
||||
default='',
|
||||
help='Path in the repository to upload to.')
|
||||
aigc_group.add_argument(
|
||||
'--model_source',
|
||||
type=str,
|
||||
default='USER_UPLOAD',
|
||||
help=
|
||||
'Source of the AIGC model. `USER_UPLOAD`, `TRAINED_FROM_MODELSCOPE` or `TRAINED_FROM_ALIYUN_FC`.'
|
||||
)
|
||||
|
||||
parser.set_defaults(func=subparser_func)
|
||||
|
||||
@@ -179,10 +186,11 @@ class CreateCMD(CLICommand):
|
||||
model_path=self.args.model_path,
|
||||
aigc_type=self.args.aigc_type,
|
||||
base_model_type=self.args.base_model_type,
|
||||
revision=self.args.revision,
|
||||
tag=self.args.revision,
|
||||
description=self.args.description,
|
||||
base_model_id=self.args.base_model_id,
|
||||
path_in_repo=self.args.path_in_repo,
|
||||
model_source=self.args.model_source,
|
||||
)
|
||||
|
||||
# Convert visibility string to int for the API call
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
from .callback import ProgressCallback
|
||||
from .commit_scheduler import CommitScheduler
|
||||
|
||||
@@ -54,8 +54,8 @@ from modelscope.hub.constants import (API_HTTP_CLIENT_MAX_RETRIES,
|
||||
UPLOAD_MAX_FILE_SIZE,
|
||||
UPLOAD_NORMAL_FILE_SIZE_TOTAL_LIMIT,
|
||||
UPLOAD_SIZE_THRESHOLD_TO_ENFORCE_LFS,
|
||||
DatasetVisibility, Licenses,
|
||||
ModelVisibility, Visibility,
|
||||
VALID_SORT_KEYS, DatasetVisibility,
|
||||
Licenses, ModelVisibility, Visibility,
|
||||
VisibilityMap)
|
||||
from modelscope.hub.errors import (InvalidParameter, NotExistError,
|
||||
NotLoginException, RequestError,
|
||||
@@ -303,6 +303,7 @@ class HubApi:
|
||||
'WeightsSize': aigc_model.weight_size,
|
||||
'ModelPath': aigc_model.model_path,
|
||||
'TriggerWords': aigc_model.trigger_words,
|
||||
'ModelSource': aigc_model.model_source,
|
||||
})
|
||||
|
||||
if aigc_model.official_tags:
|
||||
@@ -912,6 +913,75 @@ class HubApi:
|
||||
raise_for_http_status(r)
|
||||
return None
|
||||
|
||||
def list_datasets(self,
|
||||
owner_or_group: str,
|
||||
*,
|
||||
page_number: Optional[int] = 1,
|
||||
page_size: Optional[int] = 10,
|
||||
sort: Optional[str] = None,
|
||||
search: Optional[str] = None,
|
||||
endpoint: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""List datasets via OpenAPI with pagination, filtering and sorting.
|
||||
|
||||
Args:
|
||||
owner_or_group (str): Search by dataset authors (including organizations and individuals).
|
||||
page_number (int, optional): The page number. Defaults to 1.
|
||||
page_size (int, optional): The page size. Defaults to 10.
|
||||
sort (str, optional): Sort key. If not provided, the server's default sorting is used.
|
||||
choose from ['default', 'downloads', 'likes', 'last_modified'].
|
||||
search (str, optional): Search by substring keywords in the dataset's Chinese name,
|
||||
English name, and authors (including organizations and individuals).
|
||||
endpoint (str, optional): Hub endpoint to use. When None, use the endpoint specified in the class.
|
||||
|
||||
Returns:
|
||||
dict: The OpenAPI data payload, e.g.
|
||||
{
|
||||
"datasets": [...],
|
||||
"total_count": int,
|
||||
"page_number": int,
|
||||
"page_size": int
|
||||
}
|
||||
"""
|
||||
if not endpoint:
|
||||
endpoint = self.endpoint
|
||||
path = f'{endpoint}/openapi/v1/datasets'
|
||||
|
||||
# Build query params
|
||||
params: Dict[str, Any] = {
|
||||
'page_number': page_number,
|
||||
'page_size': page_size,
|
||||
}
|
||||
if sort:
|
||||
if sort not in VALID_SORT_KEYS:
|
||||
raise InvalidParameter(
|
||||
f'Invalid sort key: {sort}. Supported sort keys: {list(VALID_SORT_KEYS)}')
|
||||
params['sort'] = sort
|
||||
if search:
|
||||
params['search'] = search
|
||||
if owner_or_group:
|
||||
params['author'] = owner_or_group
|
||||
|
||||
cookies = ModelScopeConfig.get_cookies()
|
||||
headers = self.builder_headers(self.headers)
|
||||
|
||||
r = self.session.get(
|
||||
path,
|
||||
params=params,
|
||||
cookies=cookies,
|
||||
headers=headers
|
||||
)
|
||||
raise_for_http_status(r)
|
||||
resp = r.json()
|
||||
|
||||
# OpenAPI success schema
|
||||
if resp.get('success') is True and 'data' in resp:
|
||||
return resp['data']
|
||||
else:
|
||||
# Fallback for unexpected schema
|
||||
msg = resp.get('message') or 'Failed to list datasets'
|
||||
raise RequestError(msg)
|
||||
|
||||
def _check_cookie(self, use_cookies: Union[bool, CookieJar] = False) -> CookieJar: # noqa
|
||||
cookies = None
|
||||
if isinstance(use_cookies, CookieJar):
|
||||
@@ -1238,17 +1308,6 @@ class HubApi:
|
||||
logger.info(f'Create dataset success: {dataset_repo_url}')
|
||||
return dataset_repo_url
|
||||
|
||||
def list_datasets(self, endpoint: Optional[str] = None):
|
||||
if not endpoint:
|
||||
endpoint = self.endpoint
|
||||
path = f'{endpoint}/api/v1/datasets'
|
||||
params = {}
|
||||
r = self.session.get(path, params=params,
|
||||
headers=self.builder_headers(self.headers))
|
||||
raise_for_http_status(r)
|
||||
dataset_list = r.json()[API_RESPONSE_FIELD_DATA]
|
||||
return [x['Name'] for x in dataset_list]
|
||||
|
||||
def delete_dataset(self, dataset_id: str, endpoint: Optional[str] = None):
|
||||
|
||||
cookies = ModelScopeConfig.get_cookies()
|
||||
@@ -1802,6 +1861,7 @@ class HubApi:
|
||||
user_name = os.environ[MODELSCOPE_CLOUD_USERNAME]
|
||||
download_uv_url = f'{endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/' \
|
||||
f'{channel}?user={user_name}'
|
||||
|
||||
download_uv_resp = self.session.post(download_uv_url, cookies=cookies,
|
||||
headers=self.builder_headers(self.headers))
|
||||
download_uv_resp = download_uv_resp.json()
|
||||
|
||||
381
modelscope/hub/commit_scheduler.py
Normal file
381
modelscope/hub/commit_scheduler.py
Normal file
@@ -0,0 +1,381 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
# Copyright 2022-present, the HuggingFace Inc. team.
|
||||
import atexit
|
||||
import contextlib
|
||||
import os
|
||||
import time
|
||||
import types
|
||||
from concurrent.futures import Future, ThreadPoolExecutor
|
||||
from io import SEEK_END, SEEK_SET, BytesIO
|
||||
from pathlib import Path
|
||||
from threading import Lock, Thread
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
from modelscope.hub.api import HubApi
|
||||
from modelscope.hub.constants import Visibility
|
||||
from modelscope.utils.constant import DEFAULT_REPOSITORY_REVISION
|
||||
from modelscope.utils.logger import get_logger
|
||||
from modelscope.utils.repo_utils import CommitInfo, RepoUtils
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
IGNORE_GIT_FOLDER_PATTERNS = ['.git', '.git/*', '*/.git', '**/.git/**']
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def patch_upload_folder_for_scheduler(scheduler_instance):
|
||||
"""Patch upload_folder for CommitScheduler"""
|
||||
api = scheduler_instance.api
|
||||
original_prepare = api._prepare_upload_folder
|
||||
|
||||
def patched_prepare_upload_folder(
|
||||
api_self,
|
||||
folder_path_or_files: Union[str, Path, List[str], List[Path]],
|
||||
path_in_repo: str,
|
||||
allow_patterns: Optional[Union[List[str], str]] = None,
|
||||
ignore_patterns: Optional[Union[List[str], str]] = None,
|
||||
) -> List[Union[tuple, list]]:
|
||||
"""
|
||||
Patched version that supports incremental updates for CommitScheduler.
|
||||
"""
|
||||
with scheduler_instance.lock:
|
||||
if isinstance(folder_path_or_files, list):
|
||||
raise ValueError(
|
||||
'Uploading multiple files or folders is not supported for scheduled commit.'
|
||||
)
|
||||
elif os.path.isfile(folder_path_or_files):
|
||||
raise ValueError(
|
||||
'Uploading file is not supported for scheduled commit.')
|
||||
else:
|
||||
folder_path = Path(folder_path_or_files).expanduser().resolve()
|
||||
|
||||
logger.debug('Listing files to upload for scheduled commit.')
|
||||
relpath_to_abspath = {
|
||||
path.relative_to(folder_path).as_posix(): path
|
||||
for path in sorted(folder_path.glob('**/*')) if path.is_file()
|
||||
}
|
||||
prefix = f"{path_in_repo.strip('/')}/" if path_in_repo else ''
|
||||
|
||||
prepared_repo_objects = []
|
||||
files_to_track = {}
|
||||
|
||||
for relpath in RepoUtils.filter_repo_objects(
|
||||
relpath_to_abspath.keys(),
|
||||
allow_patterns=allow_patterns,
|
||||
ignore_patterns=ignore_patterns):
|
||||
local_path = relpath_to_abspath[relpath]
|
||||
stat = local_path.stat()
|
||||
if scheduler_instance.last_uploaded.get(
|
||||
local_path
|
||||
) is None or scheduler_instance.last_uploaded[
|
||||
local_path] != stat.st_mtime:
|
||||
partial_file = PartialFileIO(local_path, stat.st_size)
|
||||
prepared_repo_objects.append(
|
||||
(prefix + relpath, partial_file))
|
||||
files_to_track[local_path] = stat.st_mtime
|
||||
|
||||
scheduler_instance._pending_tracker_updates = files_to_track
|
||||
|
||||
if not prepared_repo_objects:
|
||||
logger.debug(
|
||||
'No changed files to upload for scheduled commit.')
|
||||
|
||||
return prepared_repo_objects
|
||||
|
||||
try:
|
||||
api._prepare_upload_folder = types.MethodType(
|
||||
patched_prepare_upload_folder, api)
|
||||
yield
|
||||
finally:
|
||||
api._prepare_upload_folder = original_prepare
|
||||
|
||||
|
||||
class CommitScheduler:
|
||||
"""
|
||||
A scheduler that automatically uploads a local folder to ModelScope Hub at
|
||||
specified intervals (e.g., every 5 minutes).
|
||||
|
||||
It's recommended to use the scheduler as a context manager to ensure proper
|
||||
cleanup and final commit execution when your script completes. Alternatively,
|
||||
you can manually stop the scheduler using the `stop` method.
|
||||
|
||||
Args:
|
||||
repo_id (`str`):
|
||||
The id of the repo to commit to.
|
||||
folder_path (`str` or `Path`):
|
||||
Local folder path that will be monitored and uploaded periodically.
|
||||
interval (`int` or `float`, *optional*):
|
||||
Time interval in minutes between each upload operation. Defaults to 5 minutes.
|
||||
path_in_repo (`str`, *optional*):
|
||||
Target directory path within the repository, such as `"models/"`.
|
||||
If not specified, files are uploaded to the repository root.
|
||||
repo_type (`str`, *optional*):
|
||||
Repository type for the target repo. Defaults to `model`.
|
||||
revision (`str`, *optional*):
|
||||
Target branch or revision for commits. Defaults to `master`.
|
||||
visibility (`str`, *optional*):
|
||||
The visibility of the repo,
|
||||
could be `public`, `private`, `internal`, default to `public`.
|
||||
token (`str`, *optional*):
|
||||
The token to use to commit to the repo. Defaults to the token saved on the machine.
|
||||
allow_patterns (`List[str]` or `str`, *optional*):
|
||||
File patterns to include in uploads. Only files matching these patterns will be uploaded.
|
||||
ignore_patterns (`List[str]` or `str`, *optional*):
|
||||
File patterns to exclude from uploads. Files matching these patterns will be skipped.
|
||||
hub_api (`HubApi`, *optional*):
|
||||
Custom [`HubApi`] instance for Hub operations. Allows for customized
|
||||
configurations like user agent or token settings.
|
||||
|
||||
Example:
|
||||
```py
|
||||
>>> from pathlib import Path
|
||||
>>> from modelscope.hub import CommitScheduler
|
||||
|
||||
# Create scheduler with 10-minute intervals
|
||||
>>> data_file = Path("workspace/experiment.log")
|
||||
>>> scheduler = CommitScheduler(
|
||||
... repo_id="my_experiments",
|
||||
... repo_type="dataset",
|
||||
... folder_path=data_file.parent,
|
||||
... interval=10
|
||||
... )
|
||||
|
||||
>>> with data_file.open("a") as f:
|
||||
... f.write("experiment started")
|
||||
|
||||
# Later in the workflow...
|
||||
>>> with data_file.open("a") as f:
|
||||
... f.write("experiment completed")
|
||||
```
|
||||
|
||||
Context manager usage:
|
||||
```py
|
||||
>>> from pathlib import Path
|
||||
>>> from modelscope.hub import CommitScheduler
|
||||
|
||||
>>> with CommitScheduler(
|
||||
... repo_id="my_experiments",
|
||||
... repo_type="dataset",
|
||||
... folder_path="workspace",
|
||||
... interval=10
|
||||
... ) as scheduler:
|
||||
... log_file = Path("workspace/progress.log")
|
||||
... with log_file.open("a") as f:
|
||||
... f.write("starting process")
|
||||
... # ... perform work ...
|
||||
... with log_file.open("a") as f:
|
||||
... f.write("process finished")
|
||||
|
||||
# Scheduler automatically stops and performs final upload
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
repo_id: str,
|
||||
folder_path: Union[str, Path],
|
||||
interval: Union[int, float] = 5,
|
||||
path_in_repo: Optional[str] = None,
|
||||
repo_type: Optional[str] = None,
|
||||
revision: Optional[str] = DEFAULT_REPOSITORY_REVISION,
|
||||
visibility: Optional[str] = Visibility.PUBLIC,
|
||||
token: Optional[str] = None,
|
||||
allow_patterns: Optional[Union[List[str], str]] = None,
|
||||
ignore_patterns: Optional[Union[List[str], str]] = None,
|
||||
hub_api: Optional[HubApi] = None,
|
||||
) -> None:
|
||||
self.api = hub_api or HubApi()
|
||||
|
||||
self.folder_path = Path(folder_path).expanduser().resolve()
|
||||
if not self.folder_path.exists():
|
||||
raise ValueError(f'Folder path does not exist: {folder_path}')
|
||||
|
||||
self.path_in_repo = path_in_repo or ''
|
||||
self.allow_patterns = allow_patterns
|
||||
|
||||
if ignore_patterns is None:
|
||||
ignore_patterns = []
|
||||
elif isinstance(ignore_patterns, str):
|
||||
ignore_patterns = [ignore_patterns]
|
||||
self.ignore_patterns = ignore_patterns + IGNORE_GIT_FOLDER_PATTERNS
|
||||
|
||||
self.repo_url = self.api.create_repo(
|
||||
repo_id=repo_id,
|
||||
token=token,
|
||||
repo_type=repo_type,
|
||||
visibility=visibility,
|
||||
exist_ok=True,
|
||||
create_default_config=False,
|
||||
)
|
||||
self.repo_id = repo_id
|
||||
self.repo_type = repo_type
|
||||
self.revision = revision
|
||||
self.token = token
|
||||
|
||||
# Keep track of already uploaded files
|
||||
self.last_uploaded: Dict[Path, float] = {}
|
||||
|
||||
if interval <= 0:
|
||||
raise ValueError(
|
||||
f'"interval" must be a positive integer, not "{interval}".')
|
||||
self.lock = Lock()
|
||||
self.interval = interval
|
||||
self.__stopped = False
|
||||
|
||||
logger.info(
|
||||
f'Scheduled job to push {self.folder_path} to {self.repo_id} at an interval of {self.interval} minutes.'
|
||||
)
|
||||
self.executor = ThreadPoolExecutor(max_workers=1)
|
||||
self._scheduler_thread = Thread(
|
||||
target=self._run_scheduler, daemon=True)
|
||||
self._scheduler_thread.start()
|
||||
atexit.register(self.commit_scheduled_changes)
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop the scheduler."""
|
||||
self.__stopped = True
|
||||
|
||||
def __enter__(self) -> 'CommitScheduler':
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback) -> None:
|
||||
self.trigger().result()
|
||||
self.stop()
|
||||
return
|
||||
|
||||
def _run_scheduler(self) -> None:
|
||||
while not self.__stopped:
|
||||
self.last_future = self.trigger()
|
||||
time.sleep(self.interval * 60)
|
||||
|
||||
def trigger(self) -> Future:
|
||||
"""Trigger a background commit and return a future."""
|
||||
return self.executor.submit(self._commit_scheduled_changes)
|
||||
|
||||
def _commit_scheduled_changes(self) -> Optional[CommitInfo]:
|
||||
if self.__stopped:
|
||||
return None
|
||||
|
||||
logger.info('(Background) scheduled commit triggered.')
|
||||
try:
|
||||
value = self.commit_scheduled_changes()
|
||||
return value
|
||||
except Exception as e:
|
||||
logger.error(f'Error while pushing to Hub: {e}')
|
||||
raise
|
||||
|
||||
def commit_scheduled_changes(self) -> Optional[CommitInfo]:
|
||||
"""Push folder to the Hub and return commit info if changes are found."""
|
||||
try:
|
||||
self._pending_tracker_updates = {}
|
||||
with patch_upload_folder_for_scheduler(self):
|
||||
commit_info = self.api.upload_folder(
|
||||
repo_id=self.repo_id,
|
||||
folder_path=self.folder_path,
|
||||
path_in_repo=self.path_in_repo,
|
||||
commit_message='Scheduled Commit',
|
||||
token=self.token,
|
||||
repo_type=self.repo_type,
|
||||
allow_patterns=self.allow_patterns,
|
||||
ignore_patterns=self.ignore_patterns,
|
||||
revision=self.revision,
|
||||
)
|
||||
|
||||
if commit_info is None:
|
||||
logger.debug(
|
||||
'No changed files to upload for scheduled commit.')
|
||||
return None
|
||||
|
||||
with self.lock:
|
||||
if hasattr(self, '_pending_tracker_updates'):
|
||||
self.last_uploaded.update(self._pending_tracker_updates)
|
||||
logger.debug(
|
||||
f'Updated modification tracker for {len(self._pending_tracker_updates)} files.'
|
||||
)
|
||||
del self._pending_tracker_updates
|
||||
|
||||
return commit_info
|
||||
|
||||
except Exception as e:
|
||||
# Treat "No files to upload" as a normal ― no-change ― situation instead of an error.
|
||||
if 'No files to upload' in str(e):
|
||||
logger.debug(
|
||||
'No changed files to upload for scheduled commit.')
|
||||
return None
|
||||
|
||||
if hasattr(self, '_pending_tracker_updates'):
|
||||
del self._pending_tracker_updates
|
||||
logger.error(f'Error during scheduled commit: {e}')
|
||||
raise
|
||||
|
||||
|
||||
class PartialFileIO(BytesIO):
|
||||
"""A file-like object that reads only the first part of a file."""
|
||||
|
||||
def __init__(self, file_path: Union[str, Path], size_limit: int) -> None:
|
||||
self._file_path = Path(file_path)
|
||||
self._file = None
|
||||
self._size_limit = size_limit
|
||||
self.open()
|
||||
|
||||
def open(self) -> None:
|
||||
"""Open the file and initialize size limit."""
|
||||
if self._file is not None:
|
||||
return
|
||||
try:
|
||||
self._file = self._file_path.open('rb')
|
||||
self._size_limit = min(
|
||||
self._size_limit or float('inf'),
|
||||
os.fstat(self._file.fileno()).st_size)
|
||||
except OSError as e:
|
||||
logger.error(f'Failed to open file {self._file_path}: {e}')
|
||||
raise
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the file if it's open."""
|
||||
if self._file is not None:
|
||||
self._file.close()
|
||||
self._file = None
|
||||
|
||||
def __del__(self) -> None:
|
||||
self.close()
|
||||
return super().__del__()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'<PartialFileIO file_path={self._file_path} size_limit={self._size_limit}>'
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self._size_limit
|
||||
|
||||
def __getattribute__(self, name: str):
|
||||
if name.startswith('_') or name in {
|
||||
'read', 'tell', 'seek', 'close', 'open'
|
||||
}: # only 5 public methods supported
|
||||
return super().__getattribute__(name)
|
||||
raise NotImplementedError(f"PartialFileIO does not support '{name}'.")
|
||||
|
||||
def tell(self) -> int:
|
||||
return self._file.tell()
|
||||
|
||||
def seek(self, __offset: int, __whence: int = SEEK_SET) -> int:
|
||||
"""Seek to a position in the file, but never beyond size_limit."""
|
||||
if __whence == SEEK_END:
|
||||
__offset = len(self) + __offset
|
||||
__whence = SEEK_SET
|
||||
|
||||
pos = self._file.seek(__offset, __whence)
|
||||
if pos > self._size_limit:
|
||||
return self._file.seek(self._size_limit)
|
||||
return pos
|
||||
|
||||
def read(self, __size: Optional[int] = -1) -> bytes:
|
||||
"""Read at most _size bytes from the current position."""
|
||||
current = self.tell()
|
||||
if __size is None or __size < 0:
|
||||
# Read until file limit
|
||||
truncated_size = self._size_limit - current
|
||||
else:
|
||||
# Read until file limit or __size
|
||||
truncated_size = min(__size, self._size_limit - current)
|
||||
return self._file.read(truncated_size)
|
||||
@@ -116,3 +116,18 @@ VisibilityMap = {
|
||||
ModelVisibility.INTERNAL: Visibility.INTERNAL,
|
||||
ModelVisibility.PUBLIC: Visibility.PUBLIC
|
||||
}
|
||||
|
||||
|
||||
class SortKey(object):
|
||||
DEFAULT = 'default'
|
||||
DOWNLOADS = 'downloads'
|
||||
LIKES = 'likes'
|
||||
LAST_MODIFIED = 'last_modified'
|
||||
|
||||
|
||||
VALID_SORT_KEYS = {
|
||||
SortKey.DEFAULT,
|
||||
SortKey.DOWNLOADS,
|
||||
SortKey.LIKES,
|
||||
SortKey.LAST_MODIFIED,
|
||||
}
|
||||
|
||||
@@ -120,11 +120,12 @@ def handle_http_response(response: requests.Response,
|
||||
http_error_msg = 'The request model: %s does not exist!' % (model_id)
|
||||
elif 403 == response.status_code:
|
||||
if cookies is None:
|
||||
http_error_msg = 'Authentication token does not exist, '
|
||||
'failed to access model {model_id} which may not exist or may be '
|
||||
'private. Please login first.'
|
||||
http_error_msg = f'Authentication token does not exist, \
|
||||
failed to access model {model_id} which may not exist \
|
||||
or may be private. Please login first.'
|
||||
|
||||
else:
|
||||
http_error_msg = 'The authentication token is invalid, failed to access model {model_id}.'
|
||||
http_error_msg = f'The authentication token is invalid, failed to access model {model_id}.'
|
||||
elif 400 <= response.status_code < 500:
|
||||
http_error_msg = u'%s Client Error: %s, Request id: %s for url: %s' % (
|
||||
response.status_code, reason, request_id, response.url)
|
||||
|
||||
@@ -25,6 +25,7 @@ from modelscope.hub.constants import (
|
||||
MODELSCOPE_PARALLEL_DOWNLOAD_THRESHOLD_MB, TEMPORARY_FOLDER_NAME)
|
||||
from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
|
||||
DEFAULT_MODEL_REVISION,
|
||||
INTRA_CLOUD_ACCELERATION,
|
||||
REPO_TYPE_DATASET, REPO_TYPE_MODEL,
|
||||
REPO_TYPE_SUPPORT)
|
||||
from modelscope.utils.file_utils import (get_dataset_cache_root,
|
||||
@@ -194,9 +195,22 @@ def _repo_file_download(
|
||||
" online, set 'local_files_only' to False.")
|
||||
|
||||
_api = HubApi()
|
||||
|
||||
headers = {
|
||||
'user-agent': ModelScopeConfig.get_user_agent(user_agent=user_agent, )
|
||||
'user-agent': ModelScopeConfig.get_user_agent(user_agent=user_agent, ),
|
||||
'snapshot-identifier': str(uuid.uuid4()),
|
||||
}
|
||||
|
||||
if INTRA_CLOUD_ACCELERATION == 'true':
|
||||
region_id: str = (
|
||||
os.getenv('INTRA_CLOUD_ACCELERATION_REGION')
|
||||
or _api._get_internal_acceleration_domain())
|
||||
if region_id:
|
||||
logger.info(
|
||||
f'Intra-cloud acceleration enabled for downloading from {repo_id}'
|
||||
)
|
||||
headers['x-aliyun-region-id'] = region_id
|
||||
|
||||
if cookies is None:
|
||||
cookies = ModelScopeConfig.get_cookies()
|
||||
repo_files = []
|
||||
|
||||
@@ -8,7 +8,7 @@ import requests
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
from modelscope.hub.utils.utils import (MODELSCOPE_URL_SCHEME,
|
||||
encode_image_to_base64, get_endpoint)
|
||||
encode_media_to_base64, get_endpoint)
|
||||
from modelscope.utils.logger import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
@@ -69,17 +69,20 @@ class AigcModel:
|
||||
'main-strong', 'character-strong'
|
||||
}
|
||||
|
||||
def __init__(self,
|
||||
aigc_type: str,
|
||||
base_model_type: str,
|
||||
model_path: str,
|
||||
base_model_id: str = '',
|
||||
tag: Optional[str] = 'v1.0',
|
||||
description: Optional[str] = 'this is an aigc model',
|
||||
cover_images: Optional[List[str]] = None,
|
||||
path_in_repo: Optional[str] = '',
|
||||
trigger_words: Optional[List[str]] = None,
|
||||
official_tags: Optional[List[str]] = None):
|
||||
def __init__(
|
||||
self,
|
||||
aigc_type: str,
|
||||
base_model_type: str,
|
||||
model_path: str,
|
||||
base_model_id: str = '',
|
||||
tag: Optional[str] = 'v1.0',
|
||||
description: Optional[str] = 'this is an aigc model',
|
||||
cover_images: Optional[List[str]] = None,
|
||||
path_in_repo: Optional[str] = '',
|
||||
trigger_words: Optional[List[str]] = None,
|
||||
official_tags: Optional[List[str]] = None,
|
||||
model_source: Optional[str] = 'USER_UPLOAD',
|
||||
):
|
||||
"""
|
||||
Initializes the AigcModel helper.
|
||||
|
||||
@@ -94,12 +97,15 @@ class AigcModel:
|
||||
path_in_repo (str, optional): Path in the repository.
|
||||
trigger_words (List[str], optional): Trigger words for the AIGC Lora model.
|
||||
official_tags (List[str], optional): Official tags for the AIGC model. Defaults to None.
|
||||
model_source (str, optional): Source of the model.
|
||||
`USER_UPLOAD`, `TRAINED_FROM_MODELSCOPE` or `TRAINED_FROM_ALIYUN_FC`. Defaults to 'USER_UPLOAD'.
|
||||
"""
|
||||
self.model_path = model_path
|
||||
self.aigc_type = aigc_type
|
||||
self.base_model_type = base_model_type
|
||||
self.tag = tag
|
||||
self.description = description
|
||||
self.model_source = model_source
|
||||
# Process cover images - convert local paths to base64 data URLs
|
||||
if cover_images is not None:
|
||||
processed_cover_images = []
|
||||
@@ -111,7 +117,7 @@ class AigcModel:
|
||||
or img.startswith('data:')):
|
||||
try:
|
||||
# Convert local path to base64 data URL
|
||||
processed_img = encode_image_to_base64(img)
|
||||
processed_img = encode_media_to_base64(img)
|
||||
processed_cover_images.append(processed_img)
|
||||
logger.info('Converted local image to base64: %s',
|
||||
os.path.basename(img))
|
||||
@@ -383,7 +389,8 @@ class AigcModel:
|
||||
'weight_sha256': self.weight_sha256,
|
||||
'weight_size': self.weight_size,
|
||||
'trigger_words': self.trigger_words,
|
||||
'official_tags': self.official_tags
|
||||
'official_tags': self.official_tags,
|
||||
'model_source': self.model_source,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -146,16 +146,15 @@ def compute_hash(file_path):
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
|
||||
def file_integrity_validation(file_path, expected_sha256):
|
||||
def file_integrity_validation(file_path, expected_sha256) -> bool:
|
||||
"""Validate the file hash is expected, if not, delete the file
|
||||
|
||||
Args:
|
||||
file_path (str): The file to validate
|
||||
expected_sha256 (str): The expected sha256 hash
|
||||
|
||||
Raises:
|
||||
FileIntegrityError: If file_path hash is not expected.
|
||||
|
||||
Returns:
|
||||
bool: True if the file is valid, False otherwise
|
||||
"""
|
||||
file_sha256 = compute_hash(file_path)
|
||||
if not file_sha256 == expected_sha256:
|
||||
@@ -163,7 +162,9 @@ def file_integrity_validation(file_path, expected_sha256):
|
||||
msg = 'File %s integrity check failed, expected sha256 signature is %s, actual is %s, the download may be incomplete, please try again.' % ( # noqa E501
|
||||
file_path, expected_sha256, file_sha256)
|
||||
logger.error(msg)
|
||||
raise FileIntegrityError(msg)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def add_content_to_file(repo,
|
||||
@@ -380,40 +381,40 @@ def convert_timestamp(time_stamp: Union[int, str, datetime],
|
||||
)
|
||||
|
||||
|
||||
def encode_image_to_base64(image_path: str) -> str:
|
||||
def encode_media_to_base64(media_file_path: str) -> str:
|
||||
"""
|
||||
Encode image file to base64 string.
|
||||
Encode image or video file to base64 string.
|
||||
|
||||
Args:
|
||||
image_path (str): Path to the image file
|
||||
media_file_path (str): Path to the image or video file
|
||||
|
||||
Returns:
|
||||
str: Base64 encoded string with data URL prefix
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If image file doesn't exist
|
||||
ValueError: If file is not a valid image format
|
||||
FileNotFoundError: If image/video file doesn't exist
|
||||
ValueError: If file is not a valid format
|
||||
"""
|
||||
import base64
|
||||
import mimetypes
|
||||
|
||||
# Expand user path
|
||||
image_path = os.path.expanduser(image_path)
|
||||
media_file_path = os.path.expanduser(media_file_path)
|
||||
|
||||
if not os.path.exists(image_path):
|
||||
raise FileNotFoundError(f'Image file not found: {image_path}')
|
||||
if not os.path.exists(media_file_path):
|
||||
raise FileNotFoundError(f'Image file not found: {media_file_path}')
|
||||
|
||||
if not os.path.isfile(image_path):
|
||||
raise ValueError(f'Path is not a file: {image_path}')
|
||||
if not os.path.isfile(media_file_path):
|
||||
raise ValueError(f'Path is not a file: {media_file_path}')
|
||||
|
||||
# Get MIME type
|
||||
mime_type, _ = mimetypes.guess_type(image_path)
|
||||
if not mime_type or not mime_type.startswith('image/'):
|
||||
raise ValueError(f'File is not a valid image format: {image_path}')
|
||||
mime_type, _ = mimetypes.guess_type(media_file_path)
|
||||
if not mime_type:
|
||||
raise ValueError(f'File is not a valid format: {media_file_path}')
|
||||
|
||||
# Read and encode file
|
||||
with open(image_path, 'rb') as image_file:
|
||||
image_data = image_file.read()
|
||||
with open(media_file_path, 'rb') as media_file:
|
||||
image_data = media_file.read()
|
||||
base64_data = base64.b64encode(image_data).decode('utf-8')
|
||||
|
||||
# Return data URL format
|
||||
|
||||
0
modelscope/msdatasets.lock
Normal file
0
modelscope/msdatasets.lock
Normal file
@@ -741,7 +741,8 @@ def _download_additional_modules(
|
||||
namespace: str,
|
||||
revision: str,
|
||||
imports: Tuple[str, str, str, str],
|
||||
download_config: Optional[DownloadConfig]
|
||||
download_config: Optional[DownloadConfig],
|
||||
trust_remote_code: Optional[bool] = False,
|
||||
) -> List[Tuple[str, str]]:
|
||||
"""
|
||||
Download additional module for a module <name>.py at URL (or local path) <base_path>/<name>.py
|
||||
@@ -755,6 +756,21 @@ def _download_additional_modules(
|
||||
"""
|
||||
local_imports = []
|
||||
library_imports = []
|
||||
|
||||
# Check if we need to execute remote code
|
||||
has_remote_code = any(
|
||||
import_type in ('internal', 'external')
|
||||
for import_type, _, _, _ in imports
|
||||
)
|
||||
|
||||
if has_remote_code and not trust_remote_code:
|
||||
raise ValueError(
|
||||
f'Loading {name} requires executing code from the repository. '
|
||||
'This is disabled by default for security reasons. '
|
||||
'If you trust the authors of this dataset, you can enable it with '
|
||||
'`trust_remote_code=True`.'
|
||||
)
|
||||
|
||||
download_config = download_config.copy()
|
||||
if download_config.download_desc is None:
|
||||
download_config.download_desc = 'Downloading extra modules'
|
||||
@@ -863,6 +879,7 @@ def get_module_with_script(self) -> DatasetModule:
|
||||
revision=revision,
|
||||
imports=imports,
|
||||
download_config=self.download_config,
|
||||
trust_remote_code=self.trust_remote_code,
|
||||
)
|
||||
additional_files = []
|
||||
if dataset_infos_path:
|
||||
|
||||
@@ -194,6 +194,10 @@ template_info = [
|
||||
modelfile_prefix=
|
||||
'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/deepseek-llm',
|
||||
),
|
||||
TemplateInfo(
|
||||
template_regex=f'.*{cases("DeepSeek-V3.1")}.*',
|
||||
modelfile_prefix=
|
||||
'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/deepseek-v3.1'),
|
||||
TemplateInfo(
|
||||
template_regex=
|
||||
f'.*{cases("deepseek")}.*{cases("v3")}.*',
|
||||
@@ -994,6 +998,14 @@ template_info = [
|
||||
template_regex=f'.*{cases("deepscaler")}.*',
|
||||
modelfile_prefix=
|
||||
'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/deepscaler'),
|
||||
TemplateInfo(
|
||||
template_regex=f'.*{cases("granite-4.0")}.*',
|
||||
modelfile_prefix=
|
||||
'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/granite4'),
|
||||
TemplateInfo(
|
||||
template_regex=f'.*{cases("gpt-oss")}.*',
|
||||
modelfile_prefix=
|
||||
'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/gpt-oss'),
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -768,7 +768,7 @@ def load_from_prebuilt(file_path=None):
|
||||
if os.path.exists(file_path):
|
||||
index = _load_index(file_path, with_template=True)
|
||||
else:
|
||||
index = None
|
||||
index = generate_ast_template()
|
||||
return index
|
||||
|
||||
|
||||
|
||||
@@ -257,6 +257,7 @@ def get_file_hash(
|
||||
progress.update(len(byte_chunk))
|
||||
file_hash = file_hash.hexdigest()
|
||||
final_chunk_size = buffer_size
|
||||
file_path_or_obj.seek(0, os.SEEK_SET)
|
||||
|
||||
else:
|
||||
progress.close()
|
||||
|
||||
@@ -418,6 +418,7 @@ class UploadInfo:
|
||||
file_hash_info: dict = file_hash_info or get_file_hash(fileobj)
|
||||
fileobj.seek(0, os.SEEK_SET)
|
||||
sample = fileobj.read(512)
|
||||
fileobj.seek(0, os.SEEK_SET)
|
||||
return cls(
|
||||
sha256=file_hash_info['file_hash'],
|
||||
size=file_hash_info['file_size'],
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Make sure to modify __release_datetime__ to release time when making official release.
|
||||
__version__ = '1.31.0'
|
||||
__version__ = '1.32.0'
|
||||
# default release datetime for branches under active development is set
|
||||
# to be a time far-far-away-into-the-future
|
||||
__release_datetime__ = '2025-10-10 23:59:59'
|
||||
__release_datetime__ = '2025-11-07 12:00:00'
|
||||
|
||||
81
pyproject.toml
Normal file
81
pyproject.toml
Normal file
@@ -0,0 +1,81 @@
|
||||
[project]
|
||||
name = "modelscope"
|
||||
dynamic = ["version", "dependencies", "optional-dependencies"]
|
||||
description = "ModelScope: bring the notion of Model-as-a-Service to life."
|
||||
readme = {file = "README.md", content-type = "text/markdown"}
|
||||
license = "Apache-2.0"
|
||||
license-files = ["LICENSE"]
|
||||
authors = [
|
||||
{name = "ModelScope team"},
|
||||
{email = "contact@modelscope.cn"}
|
||||
]
|
||||
keywords = ["python", "nlp", "science", "cv", "speech", "multi-modal"]
|
||||
requires-python = ">=3.9"
|
||||
classifiers = [
|
||||
'Development Status :: 4 - Beta',
|
||||
'Operating System :: OS Independent',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
'Programming Language :: Python :: 3.10',
|
||||
'Programming Language :: Python :: 3.11',
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/modelscope/modelscope"
|
||||
|
||||
[project.scripts]
|
||||
modelscope = "modelscope.cli.cli:run_cmd"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=69", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
include-package-data = true
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["."]
|
||||
include = ["modelscope*"]
|
||||
|
||||
[tool.setuptools.dynamic]
|
||||
version = {attr = "modelscope.version.__version__"}
|
||||
dependencies = {file = ["requirements/hub.txt"]}
|
||||
|
||||
[tool.setuptools.dynamic.optional-dependencies]
|
||||
hub = {file = ["requirements/hub.txt"]}
|
||||
datasets = {file = ["requirements/datasets.txt"]}
|
||||
framework = {file = ["requirements/framework.txt"]}
|
||||
server = {file = ["requirements/server.txt"]}
|
||||
docs = {file = ["requirements/docs.txt"]}
|
||||
tests = {file = ["requirements/tests.txt"]}
|
||||
|
||||
# domain specific with framework base
|
||||
cv = {file = ["requirements/framework.txt", "requirements/cv.txt"]}
|
||||
nlp = {file = ["requirements/framework.txt", "requirements/nlp.txt"]}
|
||||
multi-modal = {file = ["requirements/framework.txt", "requirements/multi-modal.txt"]}
|
||||
science = {file = ["requirements/framework.txt", "requirements/science.txt"]}
|
||||
|
||||
# audio specific with framework base
|
||||
audio_asr = {file = ["requirements/framework.txt", "requirements/audio/audio_asr.txt"]}
|
||||
audio_codec = {file = ["requirements/framework.txt", "requirements/audio/audio_codec.txt"]}
|
||||
audio_tts = {file = ["requirements/framework.txt", "requirements/audio/audio_tts.txt"]}
|
||||
audio_kws = {file = ["requirements/framework.txt", "requirements/audio/audio_kws.txt"]}
|
||||
audio_signal = {file = ["requirements/framework.txt", "requirements/audio/audio_signal.txt"]}
|
||||
audio = {file = ["requirements/framework.txt",
|
||||
"requirements/audio/audio_asr.txt",
|
||||
"requirements/audio/audio_codec.txt",
|
||||
"requirements/audio/audio_tts.txt",
|
||||
"requirements/audio/audio_kws.txt",
|
||||
"requirements/audio/audio_signal.txt"]}
|
||||
|
||||
# skip audio requirements due to its hard dependency which may cause installation failure
|
||||
all = {file = [
|
||||
"requirements/hub.txt",
|
||||
"requirements/datasets.txt",
|
||||
"requirements/framework.txt",
|
||||
"requirements/cv.txt",
|
||||
"requirements/nlp.txt",
|
||||
"requirements/multi-modal.txt",
|
||||
"requirements/science.txt",
|
||||
"requirements/server.txt",
|
||||
]}
|
||||
@@ -2,11 +2,11 @@ accelerate
|
||||
cloudpickle
|
||||
decord>=0.6.0
|
||||
diffusers>=0.25.0
|
||||
ftfy>=6.0.3
|
||||
# 0.12.1 has issue of No such file or directory: 'fairseq/version.txt'
|
||||
# 0.12.2 not support py311
|
||||
#fairseq==0.12.2
|
||||
https://github.com/liyaodev/fairseq/releases/download/v0.12.3.1/fairseq-0.12.3.1-cp311-cp311-linux_x86_64.whl
|
||||
fairseq-fixed==0.12.3.1
|
||||
ftfy>=6.0.3
|
||||
librosa==0.10.1
|
||||
opencv-python
|
||||
pycocoevalcap>=1.2
|
||||
|
||||
239
setup.py
239
setup.py
@@ -1,239 +1,6 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
# !/usr/bin/env python
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from setuptools import find_packages, setup
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from modelscope.utils.constant import Fields
|
||||
from setuptools import setup
|
||||
|
||||
|
||||
def readme():
|
||||
with open('README.md', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
return content
|
||||
|
||||
|
||||
version_file = 'modelscope/version.py'
|
||||
|
||||
|
||||
def get_git_hash():
|
||||
|
||||
def _minimal_ext_cmd(cmd):
|
||||
# construct minimal environment
|
||||
env = {}
|
||||
for k in ['SYSTEMROOT', 'PATH', 'HOME']:
|
||||
v = os.environ.get(k)
|
||||
if v is not None:
|
||||
env[k] = v
|
||||
# LANGUAGE is used on win32
|
||||
env['LANGUAGE'] = 'C'
|
||||
env['LANG'] = 'C'
|
||||
env['LC_ALL'] = 'C'
|
||||
out = subprocess.Popen(
|
||||
cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
|
||||
return out
|
||||
|
||||
try:
|
||||
out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
|
||||
sha = out.strip().decode('ascii')
|
||||
except OSError:
|
||||
sha = 'unknown'
|
||||
|
||||
return sha
|
||||
|
||||
|
||||
def get_hash():
|
||||
assert os.path.exists('.git'), '.git directory does not exist'
|
||||
sha = get_git_hash()[:7]
|
||||
return sha
|
||||
|
||||
|
||||
def get_version():
|
||||
with open(version_file, 'r', encoding='utf-8') as f:
|
||||
exec(compile(f.read(), version_file, 'exec'))
|
||||
return locals()['__version__']
|
||||
|
||||
|
||||
def parse_requirements(fname='requirements.txt', with_version=True):
|
||||
"""
|
||||
Parse the package dependencies listed in a requirements file but strips
|
||||
specific versioning information.
|
||||
|
||||
Args:
|
||||
fname (str): path to requirements file
|
||||
with_version (bool, default=False): if True include version specs
|
||||
|
||||
Returns:
|
||||
List[str]: list of requirements items
|
||||
|
||||
CommandLine:
|
||||
python -c "import setup; print(setup.parse_requirements())"
|
||||
"""
|
||||
import re
|
||||
import sys
|
||||
from os.path import exists
|
||||
require_fpath = fname
|
||||
|
||||
def parse_line(line):
|
||||
"""
|
||||
Parse information from a line in a requirements text file
|
||||
"""
|
||||
if line.startswith('-r '):
|
||||
# Allow specifying requirements in other files
|
||||
target = line.split(' ')[1]
|
||||
relative_base = os.path.dirname(fname)
|
||||
absolute_target = os.path.join(relative_base, target)
|
||||
for info in parse_require_file(absolute_target):
|
||||
yield info
|
||||
else:
|
||||
info = {'line': line}
|
||||
if line.startswith('-e '):
|
||||
info['package'] = line.split('#egg=')[1]
|
||||
else:
|
||||
# Remove versioning from the package
|
||||
pat = '(' + '|'.join(['>=', '==', '>']) + ')'
|
||||
parts = re.split(pat, line, maxsplit=1)
|
||||
parts = [p.strip() for p in parts]
|
||||
|
||||
info['package'] = parts[0]
|
||||
if len(parts) > 1:
|
||||
op, rest = parts[1:]
|
||||
if ';' in rest:
|
||||
# Handle platform specific dependencies
|
||||
# http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
|
||||
version, platform_deps = map(str.strip,
|
||||
rest.split(';'))
|
||||
info['platform_deps'] = platform_deps
|
||||
else:
|
||||
version = rest # NOQA
|
||||
info['version'] = (op, version)
|
||||
yield info
|
||||
|
||||
def parse_require_file(fpath):
|
||||
with open(fpath, 'r', encoding='utf-8') as f:
|
||||
for line in f.readlines():
|
||||
line = line.strip()
|
||||
if line.startswith('http'):
|
||||
print('skip http requirements %s' % line)
|
||||
continue
|
||||
if line and not line.startswith('#') and not line.startswith(
|
||||
'--'):
|
||||
for info in parse_line(line):
|
||||
yield info
|
||||
elif line and line.startswith('--find-links'):
|
||||
eles = line.split()
|
||||
for e in eles:
|
||||
e = e.strip()
|
||||
if 'http' in e:
|
||||
info = dict(dependency_links=e)
|
||||
yield info
|
||||
|
||||
def gen_packages_items():
|
||||
items = []
|
||||
deps_link = []
|
||||
if exists(require_fpath):
|
||||
for info in parse_require_file(require_fpath):
|
||||
if 'dependency_links' not in info:
|
||||
parts = [info['package']]
|
||||
if with_version and 'version' in info:
|
||||
parts.extend(info['version'])
|
||||
if not sys.version.startswith('3.4'):
|
||||
# apparently package_deps are broken in 3.4
|
||||
platform_deps = info.get('platform_deps')
|
||||
if platform_deps is not None:
|
||||
parts.append(';' + platform_deps)
|
||||
item = ''.join(parts)
|
||||
items.append(item)
|
||||
else:
|
||||
deps_link.append(info['dependency_links'])
|
||||
return items, deps_link
|
||||
|
||||
return gen_packages_items()
|
||||
|
||||
|
||||
def pack_resource():
|
||||
# pack resource such as configs and tools
|
||||
root_dir = 'package/'
|
||||
if os.path.isdir(root_dir):
|
||||
shutil.rmtree(root_dir)
|
||||
os.makedirs(root_dir)
|
||||
|
||||
proj_dir = root_dir + 'modelscope/'
|
||||
shutil.copytree('./modelscope', proj_dir)
|
||||
shutil.copytree('./configs', proj_dir + 'configs')
|
||||
shutil.copytree('./requirements', 'package/requirements')
|
||||
shutil.copy('./requirements.txt', 'package/requirements.txt')
|
||||
shutil.copy('./MANIFEST.in', 'package/MANIFEST.in')
|
||||
shutil.copy('./README.md', 'package/README.md')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# write_version_py()
|
||||
from modelscope.utils.ast_utils import generate_ast_template
|
||||
generate_ast_template()
|
||||
pack_resource()
|
||||
os.chdir('package')
|
||||
install_requires, deps_link = parse_requirements('requirements.txt')
|
||||
extra_requires = {}
|
||||
all_requires = []
|
||||
for field in dir(Fields):
|
||||
if field.startswith('_'):
|
||||
continue
|
||||
field = getattr(Fields, field)
|
||||
extra_requires[field], _ = parse_requirements(
|
||||
f'requirements/{field}.txt')
|
||||
|
||||
# skip audio requirements due to its hard dependency which
|
||||
# result in mac/windows compatibility problems
|
||||
if field != Fields.audio:
|
||||
all_requires.append(extra_requires[field])
|
||||
for subfiled in ['asr', 'kws', 'signal', 'tts']:
|
||||
filed_name = f'audio_{subfiled}'
|
||||
extra_requires[filed_name], _ = parse_requirements(
|
||||
f'requirements/audio/{filed_name}.txt')
|
||||
framework_requires = extra_requires['framework']
|
||||
# add framework dependencies to every field
|
||||
for field, requires in extra_requires.items():
|
||||
if field not in [
|
||||
'server', 'framework', 'hub', 'datasets'
|
||||
]: # server need install model's field dependencies before.
|
||||
extra_requires[field] = framework_requires + extra_requires[field]
|
||||
extra_requires['all'] = all_requires
|
||||
|
||||
setup(
|
||||
name='modelscope',
|
||||
version=get_version(),
|
||||
description=
|
||||
'ModelScope: bring the notion of Model-as-a-Service to life.',
|
||||
long_description=readme(),
|
||||
long_description_content_type='text/markdown',
|
||||
author='ModelScope team',
|
||||
author_email='contact@modelscope.cn',
|
||||
keywords='python,nlp,science,cv,speech,multi-modal',
|
||||
url='https://github.com/modelscope/modelscope',
|
||||
packages=find_packages(exclude=('configs', 'demo')),
|
||||
include_package_data=True,
|
||||
package_data={
|
||||
'': ['*.h', '*.cpp', '*.cu'],
|
||||
},
|
||||
classifiers=[
|
||||
'Development Status :: 4 - Beta',
|
||||
'License :: OSI Approved :: Apache Software License',
|
||||
'Operating System :: OS Independent',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
'Programming Language :: Python :: 3.10',
|
||||
'Programming Language :: Python :: 3.11',
|
||||
],
|
||||
license='Apache License 2.0',
|
||||
tests_require=parse_requirements('requirements/tests.txt'),
|
||||
install_requires=install_requires,
|
||||
extras_require=extra_requires,
|
||||
entry_points={
|
||||
'console_scripts': ['modelscope=modelscope.cli.cli:run_cmd']
|
||||
},
|
||||
dependency_links=deps_link,
|
||||
zip_safe=False)
|
||||
setup()
|
||||
|
||||
583
tests/hub/test_commit_scheduler.py
Normal file
583
tests/hub/test_commit_scheduler.py
Normal file
@@ -0,0 +1,583 @@
|
||||
import atexit
|
||||
import shutil
|
||||
import tempfile
|
||||
import time
|
||||
import unittest
|
||||
import uuid
|
||||
from io import SEEK_END
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from modelscope.hub.api import HubApi
|
||||
from modelscope.hub.commit_scheduler import CommitScheduler, PartialFileIO
|
||||
from modelscope.hub.constants import Visibility
|
||||
from modelscope.hub.errors import NotExistError
|
||||
from modelscope.hub.file_download import _repo_file_download
|
||||
from modelscope.utils.constant import DEFAULT_REPOSITORY_REVISION
|
||||
from modelscope.utils.repo_utils import CommitInfo, CommitOperationAdd
|
||||
from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1, TEST_MODEL_ORG,
|
||||
delete_credential, test_level)
|
||||
|
||||
|
||||
class TestCommitScheduler(unittest.TestCase):
|
||||
"""Test suite for ModelScope CommitScheduler functionality."""
|
||||
|
||||
def setUp(self) -> None:
|
||||
"""Set up test environment with temporary directories and mock API."""
|
||||
self.api = HubApi()
|
||||
self.repo_name = f'test-commit-scheduler-{uuid.uuid4().hex[:8]}'
|
||||
self.repo_id = f'{TEST_MODEL_ORG}/{self.repo_name}'
|
||||
|
||||
# Create temporary cache directory
|
||||
self.cache_dir = Path(tempfile.mkdtemp())
|
||||
self.watched_folder = self.cache_dir / 'watched_folder'
|
||||
self.watched_folder.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# Initialize scheduler reference for cleanup
|
||||
self.scheduler = None
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""Clean up test resources."""
|
||||
|
||||
def cleanup(scheduler):
|
||||
try:
|
||||
scheduler.stop()
|
||||
remove_atexit_callback(scheduler)
|
||||
except Exception as e:
|
||||
print(
|
||||
f'Warning: Could not clean up scheduler {scheduler.repo_id}: {e}'
|
||||
)
|
||||
|
||||
def remove_atexit_callback(scheduler):
|
||||
try:
|
||||
atexit.unregister(scheduler.commit_scheduled_changes)
|
||||
except Exception as e:
|
||||
print(
|
||||
f'Warning: Could not remove atexit callback for scheduler {scheduler.repo_id}: {e}'
|
||||
)
|
||||
|
||||
# Stop scheduler if it exists
|
||||
if self.scheduler is not None:
|
||||
try:
|
||||
cleanup(self.scheduler)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try to delete test repo (may not exist for mocked tests)
|
||||
try:
|
||||
if hasattr(self, 'api') and TEST_ACCESS_TOKEN1:
|
||||
self.api.login(TEST_ACCESS_TOKEN1)
|
||||
self.api.delete_repo(repo_id=self.repo_id, repo_type='dataset')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Clean up temporary directories
|
||||
if self.cache_dir.exists():
|
||||
shutil.rmtree(self.cache_dir, ignore_errors=True)
|
||||
|
||||
delete_credential()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_invalid_folder_path_nonexistent(self) -> None:
|
||||
"""Test that CommitScheduler raises error for non-existent folder."""
|
||||
nonexistent_path = self.cache_dir / 'nonexistent'
|
||||
|
||||
with self.assertRaises(ValueError) as cm:
|
||||
CommitScheduler(
|
||||
repo_id=self.repo_id,
|
||||
folder_path=nonexistent_path,
|
||||
interval=1,
|
||||
hub_api=self.api,
|
||||
repo_type='dataset',
|
||||
token=TEST_ACCESS_TOKEN1)
|
||||
self.assertIn('does not exist', str(cm.exception))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_invalid_interval(self) -> None:
|
||||
"""Test that CommitScheduler raises error for invalid interval values."""
|
||||
# Test zero interval
|
||||
with self.assertRaises(ValueError) as cm:
|
||||
CommitScheduler(
|
||||
repo_id=self.repo_id,
|
||||
folder_path=self.watched_folder,
|
||||
interval=0,
|
||||
hub_api=self.api,
|
||||
repo_type='dataset',
|
||||
token=TEST_ACCESS_TOKEN1)
|
||||
self.assertIn('positive', str(cm.exception))
|
||||
|
||||
# Test negative interval
|
||||
with self.assertRaises(ValueError) as cm:
|
||||
CommitScheduler(
|
||||
repo_id=self.repo_id,
|
||||
folder_path=self.watched_folder,
|
||||
interval=-1,
|
||||
hub_api=self.api,
|
||||
repo_type='dataset',
|
||||
token=TEST_ACCESS_TOKEN1)
|
||||
self.assertIn('positive', str(cm.exception))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_initialization_with_defaults(self) -> None:
|
||||
"""Test CommitScheduler initialization with default parameters."""
|
||||
with patch.object(HubApi, 'create_repo') as mock_create:
|
||||
mock_create.return_value = f'https://modelscope.cn/datasets/{self.repo_id}'
|
||||
|
||||
self.scheduler = CommitScheduler(
|
||||
repo_id=self.repo_id,
|
||||
folder_path=self.watched_folder,
|
||||
hub_api=self.api,
|
||||
repo_type='dataset',
|
||||
token=TEST_ACCESS_TOKEN1)
|
||||
|
||||
# Check default values
|
||||
self.assertEqual(self.scheduler.repo_id, self.repo_id)
|
||||
self.assertEqual(self.scheduler.folder_path,
|
||||
self.watched_folder.resolve())
|
||||
self.assertEqual(self.scheduler.interval, 5) # default 5 minutes
|
||||
self.assertEqual(self.scheduler.path_in_repo, '')
|
||||
self.assertEqual(self.scheduler.revision,
|
||||
DEFAULT_REPOSITORY_REVISION)
|
||||
self.assertIsInstance(self.scheduler.last_uploaded, dict)
|
||||
|
||||
# Verify create_repo was called
|
||||
mock_create.assert_called_once()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_initialization_with_custom_parameters(self) -> None:
|
||||
"""Test CommitScheduler initialization with custom parameters."""
|
||||
custom_interval = 10
|
||||
custom_path_in_repo = 'custom/path'
|
||||
custom_revision = 'develop'
|
||||
allow_patterns = ['*.txt', '*.json']
|
||||
ignore_patterns = ['*.log', 'temp/*']
|
||||
|
||||
with patch.object(HubApi, 'create_repo') as mock_create:
|
||||
mock_create.return_value = f'https://modelscope.cn/datasets/{self.repo_id}'
|
||||
|
||||
self.scheduler = CommitScheduler(
|
||||
repo_id=self.repo_id,
|
||||
folder_path=self.watched_folder,
|
||||
interval=custom_interval,
|
||||
path_in_repo=custom_path_in_repo,
|
||||
revision=custom_revision,
|
||||
allow_patterns=allow_patterns,
|
||||
ignore_patterns=ignore_patterns,
|
||||
visibility=Visibility.PUBLIC,
|
||||
hub_api=self.api,
|
||||
repo_type='dataset',
|
||||
token=TEST_ACCESS_TOKEN1)
|
||||
|
||||
# Check custom values
|
||||
self.assertEqual(self.scheduler.interval, custom_interval)
|
||||
self.assertEqual(self.scheduler.path_in_repo, custom_path_in_repo)
|
||||
self.assertEqual(self.scheduler.revision, custom_revision)
|
||||
self.assertEqual(self.scheduler.allow_patterns, allow_patterns)
|
||||
|
||||
# Check ignore patterns include git folders
|
||||
expected_ignore = ignore_patterns + [
|
||||
'.git', '.git/*', '*/.git', '**/.git/**'
|
||||
]
|
||||
self.assertEqual(self.scheduler.ignore_patterns, expected_ignore)
|
||||
|
||||
# Verify create_repo was called with correct visibility
|
||||
mock_create.assert_called_once()
|
||||
call_args = mock_create.call_args
|
||||
self.assertEqual(call_args.kwargs.get('visibility'), 'public')
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@patch.object(CommitScheduler, 'commit_scheduled_changes')
|
||||
def test_mocked_scheduler_execution(self, mock_push: MagicMock) -> None:
|
||||
"""Test scheduler with mocked commit_scheduled_changes method."""
|
||||
mock_push.return_value = CommitInfo(
|
||||
commit_url=
|
||||
'https://modelscope.cn/datasets/test_scheduler_unit/commit/test123',
|
||||
commit_message='Test commit',
|
||||
commit_description='',
|
||||
oid='test123')
|
||||
|
||||
with patch.object(HubApi, 'create_repo'):
|
||||
self.scheduler = CommitScheduler(
|
||||
repo_id=self.repo_id,
|
||||
folder_path=self.watched_folder,
|
||||
interval=1 / 60 / 10, # every 0.1s for fast testing
|
||||
hub_api=self.api,
|
||||
repo_type='dataset',
|
||||
token=TEST_ACCESS_TOKEN1)
|
||||
|
||||
# Wait for at least a couple of scheduler cycles
|
||||
time.sleep(0.5)
|
||||
|
||||
# Should have been called multiple times
|
||||
self.assertGreater(len(mock_push.call_args_list), 1)
|
||||
|
||||
# Check the last future result
|
||||
if hasattr(self.scheduler,
|
||||
'last_future') and self.scheduler.last_future:
|
||||
result = self.scheduler.last_future.result()
|
||||
self.assertEqual(result.oid, 'test123')
|
||||
self.assertEqual(result.commit_message, 'Test commit')
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_scheduler_stop(self) -> None:
|
||||
"""Test stopping the scheduler."""
|
||||
with patch.object(HubApi, 'create_repo'):
|
||||
self.scheduler = CommitScheduler(
|
||||
repo_id=self.repo_id,
|
||||
folder_path=self.watched_folder,
|
||||
interval=1, # 1 minute
|
||||
hub_api=self.api,
|
||||
repo_type='dataset',
|
||||
token=TEST_ACCESS_TOKEN1)
|
||||
|
||||
# Scheduler should be running
|
||||
self.assertFalse(self.scheduler._CommitScheduler__stopped)
|
||||
|
||||
# Stop the scheduler
|
||||
self.scheduler.stop()
|
||||
|
||||
# Scheduler should be stopped
|
||||
self.assertTrue(self.scheduler._CommitScheduler__stopped)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_context_manager(self) -> None:
|
||||
"""Test CommitScheduler as context manager."""
|
||||
file_path = self.watched_folder / 'test_file.txt'
|
||||
|
||||
with patch.object(HubApi, 'create_repo'), \
|
||||
patch.object(CommitScheduler, 'commit_scheduled_changes') as mock_push:
|
||||
mock_push.return_value = CommitInfo(
|
||||
commit_url=
|
||||
'https://modelscope.cn/datasets/test_scheduler_unit/commit/test123',
|
||||
commit_message='Test commit',
|
||||
commit_description='',
|
||||
oid='test123')
|
||||
|
||||
with CommitScheduler(
|
||||
repo_id=self.repo_id,
|
||||
folder_path=self.watched_folder,
|
||||
interval=5, # 5 minutes - won't trigger during test
|
||||
hub_api=self.api,
|
||||
repo_type='dataset',
|
||||
token=TEST_ACCESS_TOKEN1) as scheduler:
|
||||
# Write a file inside the context
|
||||
file_path.write_text('test content')
|
||||
|
||||
# Reference for later assertions
|
||||
self.scheduler = scheduler
|
||||
|
||||
# After exiting context, scheduler should be stopped
|
||||
self.assertTrue(self.scheduler._CommitScheduler__stopped)
|
||||
|
||||
# Should have triggered commit_scheduled_changes on exit
|
||||
mock_push.assert_called()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_trigger_manual_commit(self) -> None:
|
||||
"""Test manually triggering a commit."""
|
||||
with patch.object(HubApi, 'create_repo'), \
|
||||
patch.object(CommitScheduler, 'commit_scheduled_changes') as mock_push:
|
||||
mock_push.return_value = CommitInfo(
|
||||
commit_url=
|
||||
'https://modelscope.cn/datasets/test_scheduler_unit/commit/test123',
|
||||
commit_message='Manual commit',
|
||||
commit_description='',
|
||||
oid='manual123')
|
||||
|
||||
self.scheduler = CommitScheduler(
|
||||
repo_id=self.repo_id,
|
||||
folder_path=self.watched_folder,
|
||||
interval=60, # Long interval to avoid auto-trigger
|
||||
hub_api=self.api,
|
||||
repo_type='dataset',
|
||||
token=TEST_ACCESS_TOKEN1)
|
||||
future = self.scheduler.trigger()
|
||||
result = future.result()
|
||||
|
||||
# Verify the result
|
||||
self.assertEqual(result.oid, 'manual123')
|
||||
self.assertEqual(result.commit_message, 'Manual commit')
|
||||
mock_push.assert_called()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_stopped_scheduler_no_push(self) -> None:
|
||||
"""Test that stopped scheduler doesn't perform push operations."""
|
||||
with patch.object(HubApi, 'create_repo'):
|
||||
|
||||
self.scheduler = CommitScheduler(
|
||||
repo_id=self.repo_id,
|
||||
folder_path=self.watched_folder,
|
||||
interval=60,
|
||||
hub_api=self.api,
|
||||
repo_type='dataset',
|
||||
token=TEST_ACCESS_TOKEN1)
|
||||
|
||||
# Stop the scheduler immediately
|
||||
self.scheduler.stop()
|
||||
|
||||
# Try to trigger after stopping
|
||||
future = self.scheduler.trigger()
|
||||
result = future.result()
|
||||
|
||||
# Result should be None for stopped scheduler
|
||||
self.assertIsNone(result)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_sync_local_folder_to_hub(self) -> None:
|
||||
"""Test sync local folder to remote repo."""
|
||||
hub_cache = self.cache_dir / 'hub'
|
||||
file_path = self.watched_folder / 'file.txt'
|
||||
bin_path = self.watched_folder / 'file.bin'
|
||||
git_path = self.watched_folder / '.git'
|
||||
self.scheduler = CommitScheduler(
|
||||
folder_path=self.watched_folder,
|
||||
repo_id=self.repo_id,
|
||||
interval=1 / 60,
|
||||
hub_api=self.api,
|
||||
repo_type='dataset',
|
||||
token=TEST_ACCESS_TOKEN1)
|
||||
|
||||
# 1 push to hub triggered
|
||||
time.sleep(0.5)
|
||||
|
||||
# write content to files
|
||||
with file_path.open('a') as f:
|
||||
f.write('first line\n')
|
||||
with bin_path.open('a') as f:
|
||||
f.write('binary content')
|
||||
with git_path.open('a') as f:
|
||||
f.write('git content\n')
|
||||
|
||||
# 2 push to hub triggered
|
||||
time.sleep(2)
|
||||
self.scheduler.last_future.result()
|
||||
|
||||
# new content in file
|
||||
with file_path.open('a') as f:
|
||||
f.write('second line\n')
|
||||
|
||||
# 1 push to hub triggered
|
||||
time.sleep(1)
|
||||
self.scheduler.last_future.result()
|
||||
|
||||
with bin_path.open('a') as f:
|
||||
f.write(' updated')
|
||||
|
||||
# 5 push to hub triggered
|
||||
time.sleep(5) # wait for every threads/uploads to complete
|
||||
self.scheduler.stop()
|
||||
self.scheduler.last_future.result()
|
||||
|
||||
repo_id = self.scheduler.repo_id
|
||||
|
||||
def _download(filename: str, revision: str) -> Path:
|
||||
return Path(
|
||||
_repo_file_download(
|
||||
repo_id=repo_id,
|
||||
file_path=filename,
|
||||
revision=revision,
|
||||
cache_dir=hub_cache,
|
||||
repo_type='dataset'))
|
||||
|
||||
# Check file.txt consistency
|
||||
txt_push = _download(filename='file.txt', revision='master')
|
||||
self.assertEqual(txt_push.read_text(), 'first line\nsecond line\n')
|
||||
|
||||
# Check file.bin consistency
|
||||
bin_push = _download(filename='file.bin', revision='master')
|
||||
self.assertEqual(bin_push.read_text(), 'binary content updated')
|
||||
|
||||
# Check .git file was not uploaded (should be ignored)
|
||||
with self.assertRaises(NotExistError):
|
||||
_download(filename='.git', revision='master')
|
||||
|
||||
|
||||
class TestPartialFileIO(unittest.TestCase):
|
||||
"""Test suite for PartialFileIO functionality."""
|
||||
|
||||
def setUp(self) -> None:
|
||||
"""Set up test environment with temporary file."""
|
||||
self.cache_dir = Path(tempfile.mkdtemp())
|
||||
self.test_file = self.cache_dir / 'file.txt'
|
||||
self.test_file.write_text('123456789abcdef') # 15 bytes
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""Clean up test resources."""
|
||||
if self.cache_dir.exists():
|
||||
shutil.rmtree(self.cache_dir, ignore_errors=True)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_partial_file_read_with_limit(self) -> None:
|
||||
"""Test reading partial file with size limit."""
|
||||
partial_file = PartialFileIO(self.test_file, size_limit=5)
|
||||
|
||||
# Should read only first 5 bytes
|
||||
content = partial_file.read()
|
||||
self.assertEqual(content, b'12345')
|
||||
|
||||
# Second read should return empty
|
||||
content = partial_file.read()
|
||||
self.assertEqual(content, b'')
|
||||
|
||||
partial_file.close()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_partial_file_read_by_chunks(self) -> None:
|
||||
"""Test reading partial file in chunks."""
|
||||
partial_file = PartialFileIO(self.test_file, size_limit=8)
|
||||
|
||||
# Read in chunks
|
||||
chunk1 = partial_file.read(3)
|
||||
self.assertEqual(chunk1, b'123')
|
||||
|
||||
chunk2 = partial_file.read(3)
|
||||
self.assertEqual(chunk2, b'456')
|
||||
|
||||
chunk3 = partial_file.read(3)
|
||||
self.assertEqual(chunk3, b'78') # Only 2 bytes left within limit
|
||||
|
||||
chunk4 = partial_file.read(3)
|
||||
self.assertEqual(chunk4, b'') # Nothing left
|
||||
|
||||
partial_file.close()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_partial_file_read_oversized_chunk(self) -> None:
|
||||
"""Test reading more than size limit in one go."""
|
||||
partial_file = PartialFileIO(self.test_file, size_limit=5)
|
||||
|
||||
# Request more than limit
|
||||
content = partial_file.read(20)
|
||||
self.assertEqual(content, b'12345') # Should return only up to limit
|
||||
|
||||
partial_file.close()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_partial_file_len(self) -> None:
|
||||
"""Test __len__ method returns correct size limit."""
|
||||
partial_file = PartialFileIO(self.test_file, size_limit=7)
|
||||
self.assertEqual(len(partial_file), 7)
|
||||
partial_file.close()
|
||||
|
||||
# Size limit larger than actual file should be capped
|
||||
large_partial = PartialFileIO(self.test_file, size_limit=100)
|
||||
self.assertEqual(len(large_partial), 15) # Actual file size
|
||||
large_partial.close()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_partial_file_seek_and_tell(self) -> None:
|
||||
"""Test seek and tell operations."""
|
||||
partial_file = PartialFileIO(self.test_file, size_limit=10)
|
||||
|
||||
# Initial position
|
||||
self.assertEqual(partial_file.tell(), 0)
|
||||
|
||||
# Read some bytes
|
||||
partial_file.read(3)
|
||||
self.assertEqual(partial_file.tell(), 3)
|
||||
|
||||
# Seek to beginning
|
||||
partial_file.seek(0)
|
||||
self.assertEqual(partial_file.tell(), 0)
|
||||
|
||||
# Seek to specific position
|
||||
partial_file.seek(5)
|
||||
self.assertEqual(partial_file.tell(), 5)
|
||||
|
||||
# Seek beyond limit should be capped
|
||||
partial_file.seek(50)
|
||||
self.assertEqual(partial_file.tell(), 10) # Capped at size limit
|
||||
|
||||
# Seek from end
|
||||
partial_file.seek(-2, SEEK_END)
|
||||
self.assertEqual(partial_file.tell(), 8) # 10-2
|
||||
|
||||
partial_file.close()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_partial_file_not_implemented_methods(self) -> None:
|
||||
"""Test that unsupported methods raise NotImplementedError."""
|
||||
partial_file = PartialFileIO(self.test_file, size_limit=5)
|
||||
|
||||
# These methods should not be implemented
|
||||
with self.assertRaises(NotImplementedError):
|
||||
partial_file.readline()
|
||||
|
||||
with self.assertRaises(NotImplementedError):
|
||||
partial_file.write(b'test')
|
||||
|
||||
partial_file.close()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_partial_file_repr(self) -> None:
|
||||
"""Test string representation of PartialFileIO."""
|
||||
partial_file = PartialFileIO(self.test_file, size_limit=5)
|
||||
repr_str = repr(partial_file)
|
||||
|
||||
self.assertEqual(
|
||||
repr_str,
|
||||
f'<PartialFileIO file_path={self.test_file} size_limit=5>')
|
||||
|
||||
partial_file.close()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_file_modification_after_creation(self) -> None:
|
||||
"""Test that file modifications after PartialFileIO creation don't affect size limit."""
|
||||
partial_file = PartialFileIO(self.test_file, size_limit=20)
|
||||
|
||||
# Original length is capped at file size
|
||||
self.assertEqual(len(partial_file), 15)
|
||||
|
||||
with self.test_file.open('ab') as f:
|
||||
f.write(b'additional_content')
|
||||
|
||||
# Size limit should remain the same (captured at creation)
|
||||
self.assertEqual(len(partial_file), 15)
|
||||
|
||||
# Content read should still be limited to original size
|
||||
content = partial_file.read()
|
||||
self.assertEqual(len(content), 15)
|
||||
self.assertEqual(content, b'123456789abcdef')
|
||||
|
||||
partial_file.close()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_high_size_limit(self) -> None:
|
||||
"""Test size limit larger than file size."""
|
||||
file = PartialFileIO(self.test_file, size_limit=20)
|
||||
with self.test_file.open('ab') as f:
|
||||
f.write(b'ghijkl')
|
||||
|
||||
# File size limit is truncated to the actual file size at instance creation (not on the fly)
|
||||
self.assertEqual(len(file), 15)
|
||||
self.assertEqual(file._size_limit, 15)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_with_commit_operation_add(self) -> None:
|
||||
"""Test with CommitOperationAdd."""
|
||||
op_truncated = CommitOperationAdd(
|
||||
path_or_fileobj=PartialFileIO(self.test_file, size_limit=5),
|
||||
path_in_repo='test_file.txt')
|
||||
self.assertEqual(op_truncated.upload_info.size, 5)
|
||||
self.assertEqual(op_truncated.upload_info.sample, b'12345')
|
||||
|
||||
with op_truncated.as_file() as f:
|
||||
self.assertEqual(f.read(), b'12345')
|
||||
|
||||
# Full file
|
||||
op_full = CommitOperationAdd(
|
||||
path_or_fileobj=PartialFileIO(self.test_file, size_limit=9),
|
||||
path_in_repo='test_file.txt')
|
||||
self.assertEqual(op_full.upload_info.size, 9)
|
||||
self.assertEqual(op_full.upload_info.sample, b'123456789')
|
||||
|
||||
with op_full.as_file() as f:
|
||||
self.assertEqual(f.read(), b'123456789')
|
||||
|
||||
# Truncated file has a different hash than the full file
|
||||
self.assertNotEqual(op_truncated.upload_info.sha256,
|
||||
op_full.upload_info.sha256)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
38
tests/hub/test_hub_list.py
Normal file
38
tests/hub/test_hub_list.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import unittest
|
||||
|
||||
from modelscope import HubApi
|
||||
from modelscope.utils.logger import get_logger
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
default_owner = 'modelscope'
|
||||
|
||||
|
||||
class HubListHubTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.api = HubApi()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 3, 'skip test in current test level')
|
||||
def test_list_datasets(self):
|
||||
# Use default args
|
||||
result = self.api.list_datasets(owner_or_group=default_owner)
|
||||
logger.info(f'List datasets result: {result}')
|
||||
|
||||
@unittest.skipUnless(test_level() >= 3, 'skip test in current test level')
|
||||
def test_list_datasets_with_args(self):
|
||||
result = self.api.list_datasets(
|
||||
owner_or_group=default_owner,
|
||||
page_number=1,
|
||||
page_size=2,
|
||||
sort='downloads',
|
||||
search='chinese',
|
||||
)
|
||||
logger.info(f'List datasets with full result: {result}')
|
||||
|
||||
def test_list_models(self):
|
||||
result = self.api.list_models(
|
||||
owner_or_group='Qwen', page_number=1, page_size=2)
|
||||
logger.info(f'List models result: {result}')
|
||||
@@ -122,6 +122,18 @@ class TestToOllama(unittest.TestCase):
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_check_template_type(self):
|
||||
_test_check_tmpl_type(
|
||||
'unsloth/gpt-oss-20b-GGUF',
|
||||
'gpt-oss',
|
||||
gguf_meta={'general.name': 'Gpt-Oss-20B'})
|
||||
_test_check_tmpl_type(
|
||||
'unsloth/granite-4.0-h-tiny-GGUF',
|
||||
'granite4',
|
||||
gguf_meta={'general.name': 'Granite-4.0-H-Tiny'})
|
||||
_test_check_tmpl_type(
|
||||
'unsloth/DeepSeek-V3.1-GGUF',
|
||||
'deepseek-v3.1',
|
||||
gguf_meta={'general.name': 'Deepseek-V3.1'})
|
||||
_test_check_tmpl_type('unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF',
|
||||
'qwen3-coder')
|
||||
_test_check_tmpl_type(
|
||||
|
||||
Reference in New Issue
Block a user