[to #42322933]MsDataset upload and load supports directory.

上传和下载支持多文件操作
This commit is contained in:
xingjun.wxj
2022-10-14 18:32:38 +08:00
committed by yingda.chen
parent 355da866c5
commit 1b4d5ccb9c
9 changed files with 250 additions and 51 deletions

View File

@@ -6,9 +6,13 @@ import unittest
import zipfile
from modelscope.msdatasets import MsDataset
from modelscope.utils.constant import ModelFile
from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects
from modelscope.utils import logger as logging
from modelscope.utils.constant import DEFAULT_DATASET_REVISION, ModelFile
from modelscope.utils.test_utils import test_level
logger = logging.get_logger(__name__)
KEY_EXTRACTED = 'extracted'
@@ -39,7 +43,8 @@ class DatasetUploadTest(unittest.TestCase):
def tearDown(self):
os.chdir(self.old_dir)
shutil.rmtree(self.temp_dir, ignore_errors=True)
print('The test dir successfully removed!')
logger.info(
f'Temporary directory {self.temp_dir} successfully removed!')
@staticmethod
def get_raw_downloaded_file_path(extracted_path):
@@ -68,6 +73,40 @@ class DatasetUploadTest(unittest.TestCase):
dataset_name=self.dataset_name,
namespace=self.namespace)
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ds_upload_dir(self):
ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train')
config_train = ms_ds_train._hf_ds.config_kwargs
extracted_path_train = config_train.get('split_config').get('train')
MsDataset.upload(
object_name='train',
local_file_path=os.path.join(extracted_path_train,
'Pets/images/train'),
dataset_name=self.dataset_name,
namespace=self.namespace)
MsDataset.upload(
object_name='val',
local_file_path=os.path.join(extracted_path_train,
'Pets/images/val'),
dataset_name=self.dataset_name,
namespace=self.namespace)
objects = list_dataset_objects(
hub_api=self.api,
max_limit=-1,
is_recursive=True,
dataset_name=self.dataset_name,
namespace=self.namespace,
version=DEFAULT_DATASET_REVISION)
logger.info(f'{len(objects)} objects have been uploaded: {objects}')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ds_download_dir(self):
test_ds = MsDataset.load(self.dataset_name, self.namespace)
assert test_ds.config_kwargs['split_config'].values()
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ds_clone_meta(self):
MsDataset.clone_meta(