mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-22 02:59:24 +01:00
@@ -248,15 +248,15 @@ class MsDataset:
|
||||
break
|
||||
target_subset_name, target_dataset_structure = get_target_dataset_structure(
|
||||
dataset_json, subset_name, split)
|
||||
meta_map, file_map = get_dataset_files(target_dataset_structure,
|
||||
dataset_name, namespace,
|
||||
version)
|
||||
meta_map, file_map, args_map = get_dataset_files(
|
||||
target_dataset_structure, dataset_name, namespace, version)
|
||||
builder = load_dataset_builder(
|
||||
dataset_name,
|
||||
subset_name,
|
||||
namespace,
|
||||
meta_data_files=meta_map,
|
||||
zip_data_files=file_map,
|
||||
args_map=args_map,
|
||||
cache_dir=MS_DATASETS_CACHE,
|
||||
version=version,
|
||||
split=list(target_dataset_structure.keys()),
|
||||
|
||||
@@ -60,6 +60,8 @@ class ImageInstanceSegmentationCocoDataset(TorchTaskDataset):
|
||||
classes=None,
|
||||
seg_prefix=None,
|
||||
folder_name=None,
|
||||
ann_file=None,
|
||||
img_prefix=None,
|
||||
test_mode=False,
|
||||
filter_empty_gt=True,
|
||||
**kwargs):
|
||||
@@ -69,11 +71,9 @@ class ImageInstanceSegmentationCocoDataset(TorchTaskDataset):
|
||||
self.split = next(iter(split_config.keys()))
|
||||
self.preprocessor = preprocessor
|
||||
|
||||
self.ann_file = osp.join(self.data_root,
|
||||
DATASET_STRUCTURE[self.split]['annotation'])
|
||||
self.ann_file = osp.join(self.data_root, ann_file)
|
||||
|
||||
self.img_prefix = osp.join(self.data_root,
|
||||
DATASET_STRUCTURE[self.split]['images'])
|
||||
self.img_prefix = osp.join(self.data_root, img_prefix)
|
||||
self.seg_prefix = seg_prefix
|
||||
self.test_mode = test_mode
|
||||
self.filter_empty_gt = filter_empty_gt
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from typing import Mapping, Optional, Sequence, Union
|
||||
from typing import Any, Mapping, Optional, Sequence, Union
|
||||
|
||||
from datasets.builder import DatasetBuilder
|
||||
|
||||
@@ -92,6 +92,7 @@ def get_dataset_files(subset_split_into: dict,
|
||||
"""
|
||||
meta_map = defaultdict(dict)
|
||||
file_map = defaultdict(dict)
|
||||
args_map = defaultdict(dict)
|
||||
from modelscope.hub.api import HubApi
|
||||
modelscope_api = HubApi()
|
||||
for split, info in subset_split_into.items():
|
||||
@@ -99,7 +100,8 @@ def get_dataset_files(subset_split_into: dict,
|
||||
info.get('meta', ''), dataset_name, namespace, revision)
|
||||
if info.get('file'):
|
||||
file_map[split] = info['file']
|
||||
return meta_map, file_map
|
||||
args_map[split] = info.get('args')
|
||||
return meta_map, file_map, args_map
|
||||
|
||||
|
||||
def load_dataset_builder(dataset_name: str, subset_name: str, namespace: str,
|
||||
@@ -107,12 +109,16 @@ def load_dataset_builder(dataset_name: str, subset_name: str, namespace: str,
|
||||
Sequence[str]]],
|
||||
zip_data_files: Mapping[str, Union[str,
|
||||
Sequence[str]]],
|
||||
cache_dir: str, version: Optional[Union[str]],
|
||||
split: Sequence[str],
|
||||
args_map: Mapping[str, Any], cache_dir: str,
|
||||
version: Optional[Union[str]], split: Sequence[str],
|
||||
**config_kwargs) -> DatasetBuilder:
|
||||
sub_dir = os.path.join(version, '_'.join(split))
|
||||
meta_data_file = next(iter(meta_data_files.values()))
|
||||
if not meta_data_file:
|
||||
args_map = next(iter(args_map.values()))
|
||||
if args_map is None:
|
||||
args_map = {}
|
||||
args_map.update(config_kwargs)
|
||||
builder_instance = TaskSpecificDatasetBuilder(
|
||||
dataset_name=dataset_name,
|
||||
namespace=namespace,
|
||||
@@ -121,7 +127,7 @@ def load_dataset_builder(dataset_name: str, subset_name: str, namespace: str,
|
||||
meta_data_files=meta_data_files,
|
||||
zip_data_files=zip_data_files,
|
||||
hash=sub_dir,
|
||||
**config_kwargs)
|
||||
**args_map)
|
||||
elif meta_data_file.endswith('.csv'):
|
||||
builder_instance = MsCsvDatasetBuilder(
|
||||
dataset_name=dataset_name,
|
||||
|
||||
@@ -36,9 +36,8 @@ class MsDatasetTest(unittest.TestCase):
|
||||
ms_ds_train = MsDataset.load(
|
||||
'pets_small',
|
||||
namespace=DEFAULT_DATASET_NAMESPACE,
|
||||
split='train',
|
||||
classes=('1', '2'),
|
||||
folder_name='Pets')
|
||||
download_mode=DownloadMode.FORCE_REDOWNLOAD,
|
||||
split='train')
|
||||
print(ms_ds_train.config_kwargs)
|
||||
assert next(iter(ms_ds_train.config_kwargs['split_config'].values()))
|
||||
|
||||
|
||||
@@ -20,7 +20,6 @@ class TestFinetuneMPlug(unittest.TestCase):
|
||||
self.tmp_dir = tempfile.TemporaryDirectory().name
|
||||
if not os.path.exists(self.tmp_dir):
|
||||
os.makedirs(self.tmp_dir)
|
||||
|
||||
from modelscope.utils.constant import DownloadMode
|
||||
datadict = MsDataset.load(
|
||||
'coco_captions_small_slice',
|
||||
|
||||
@@ -15,7 +15,7 @@ from modelscope.msdatasets.task_datasets import \
|
||||
ImageInstanceSegmentationCocoDataset
|
||||
from modelscope.trainers import build_trainer
|
||||
from modelscope.utils.config import Config, ConfigDict
|
||||
from modelscope.utils.constant import ModelFile
|
||||
from modelscope.utils.constant import DownloadMode, ModelFile
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
|
||||
@@ -41,38 +41,26 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase):
|
||||
if train_data_cfg is None:
|
||||
# use default toy data
|
||||
train_data_cfg = ConfigDict(
|
||||
name='pets_small',
|
||||
split='train',
|
||||
classes=('Cat', 'Dog'),
|
||||
folder_name='Pets',
|
||||
test_mode=False)
|
||||
name='pets_small', split='train', test_mode=False)
|
||||
if val_data_cfg is None:
|
||||
val_data_cfg = ConfigDict(
|
||||
name='pets_small',
|
||||
split='validation',
|
||||
classes=('Cat', 'Dog'),
|
||||
folder_name='Pets',
|
||||
test_mode=True)
|
||||
name='pets_small', split='validation', test_mode=True)
|
||||
|
||||
self.train_dataset = MsDataset.load(
|
||||
dataset_name=train_data_cfg.name,
|
||||
split=train_data_cfg.split,
|
||||
classes=train_data_cfg.classes,
|
||||
folder_name=train_data_cfg.folder_name,
|
||||
test_mode=train_data_cfg.test_mode)
|
||||
assert self.train_dataset.config_kwargs[
|
||||
'classes'] == train_data_cfg.classes
|
||||
test_mode=train_data_cfg.test_mode,
|
||||
download_mode=DownloadMode.FORCE_REDOWNLOAD)
|
||||
assert self.train_dataset.config_kwargs['classes']
|
||||
assert next(
|
||||
iter(self.train_dataset.config_kwargs['split_config'].values()))
|
||||
|
||||
self.eval_dataset = MsDataset.load(
|
||||
dataset_name=val_data_cfg.name,
|
||||
split=val_data_cfg.split,
|
||||
classes=val_data_cfg.classes,
|
||||
folder_name=val_data_cfg.folder_name,
|
||||
test_mode=val_data_cfg.test_mode)
|
||||
assert self.eval_dataset.config_kwargs[
|
||||
'classes'] == val_data_cfg.classes
|
||||
test_mode=val_data_cfg.test_mode,
|
||||
download_mode=DownloadMode.FORCE_REDOWNLOAD)
|
||||
assert self.eval_dataset.config_kwargs['classes']
|
||||
assert next(
|
||||
iter(self.eval_dataset.config_kwargs['split_config'].values()))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user