mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-18 17:27:43 +01:00
Update asr_dataset.py
for download_mode when you need to re-download data
This commit is contained in:
@@ -3,6 +3,8 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from modelscope.msdatasets.ms_dataset import MsDataset
|
from modelscope.msdatasets.ms_dataset import MsDataset
|
||||||
|
from modelscope.utils.constant import DownloadMode
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
class ASRDataset(MsDataset):
|
class ASRDataset(MsDataset):
|
||||||
@@ -33,16 +35,21 @@ class ASRDataset(MsDataset):
|
|||||||
dataset_name,
|
dataset_name,
|
||||||
namespace='speech_asr',
|
namespace='speech_asr',
|
||||||
train_set='train',
|
train_set='train',
|
||||||
dev_set='validation'):
|
dev_set='validation',
|
||||||
if os.path.exists(dataset_name):
|
download_mode: Optional[DownloadMode] = None):
|
||||||
data_dir = dataset_name
|
if download_mode is not None:
|
||||||
ds_dict = {}
|
ds_dict = MsDataset.load(
|
||||||
ds_dict['train'] = cls.load_core(data_dir, train_set)
|
dataset_name=dataset_name, namespace=namespace, download_mode=download_mode)
|
||||||
ds_dict['validation'] = cls.load_core(data_dir, dev_set)
|
|
||||||
ds_dict['raw_data_dir'] = data_dir
|
|
||||||
return ds_dict
|
return ds_dict
|
||||||
else:
|
else:
|
||||||
from modelscope.msdatasets import MsDataset
|
if os.path.exists(dataset_name):
|
||||||
ds_dict = MsDataset.load(
|
data_dir = dataset_name
|
||||||
dataset_name=dataset_name, namespace=namespace)
|
ds_dict = {}
|
||||||
return ds_dict
|
ds_dict['train'] = cls.load_core(data_dir, train_set)
|
||||||
|
ds_dict['validation'] = cls.load_core(data_dir, dev_set)
|
||||||
|
ds_dict['raw_data_dir'] = data_dir
|
||||||
|
return ds_dict
|
||||||
|
else:
|
||||||
|
ds_dict = MsDataset.load(
|
||||||
|
dataset_name=dataset_name, namespace=namespace)
|
||||||
|
return ds_dict
|
||||||
|
|||||||
Reference in New Issue
Block a user