From e02a260c93d9044bc93300541a76cf3384d9b2a9 Mon Sep 17 00:00:00 2001
From: "xingjun.wxj" <xingjun.wxj@alibaba-inc.com>
Date: Fri, 10 Mar 2023 09:03:32 +0800
Subject: [PATCH] Refactor the task_datasets module

Refactor the task_datasets module:

1. Add new module modelscope.msdatasets.dataset_cls.custom_datasets.
2. Add new function: modelscope.msdatasets.ms_dataset.MsDataset.to_custom_dataset().
2. Add calling to_custom_dataset() func in MsDataset.load() to adapt new custom_datasets module.
3. Refactor the pipeline for loading custom dataset:
	1) Only use MsDataset.load() function to load the custom datasets.
	2) Combine MsDataset.load() with class EpochBasedTrainer.
4. Add new entry func for building datasets in EpochBasedTrainer: see modelscope.trainers.trainer.EpochBasedTrainer.build_dataset()
5. Add new func to build the custom dataset from model configuration, see: modelscope.trainers.trainer.EpochBasedTrainer.build_dataset_from_cfg()
6. Add new registry function for building custom datasets, see: modelscope.msdatasets.dataset_cls.custom_datasets.builder.build_custom_dataset()
7. Refine the class SiameseUIETrainer to adapt the new custom_datasets module.
8. Add class TorchCustomDataset as a superclass for custom datasets classes.
9. To move modules/classes/functions:
	1) Move module msdatasets.audio to custom_datasets
	2) Move module msdatasets.cv to custom_datasets
	3) Move module bad_image_detecting to custom_datasets
	4) Move module damoyolo to custom_datasets
	5) Move module face_2d_keypoints to custom_datasets
	6) Move module hand_2d_keypoints to custom_datasets
	7) Move module human_wholebody_keypoint to custom_datasets
	8) Move module image_classification to custom_datasets
	9) Move module image_inpainting to custom_datasets
	10) Move module image_portrait_enhancement to custom_datasets
	11) Move module image_quality_assessment_degradation to custom_datasets
	12) Move module image_quality_assmessment_mos to custom_datasets
	13) Move class LanguageGuidedVideoSummarizationDataset to custom_datasets
	14) Move class MGeoRankingDataset to custom_datasets
	15) Move module movie_scene_segmentation custom_datasets
	16) Move module object_detection to custom_datasets
	17) Move module referring_video_object_segmentation to custom_datasets
	18) Move module sidd_image_denoising to custom_datasets
	19) Move module video_frame_interpolation to custom_datasets
	20) Move module video_stabilization to custom_datasets
	21) Move module video_super_resolution to custom_datasets
	22) Move class GoproImageDeblurringDataset to custom_datasets
	23) Move class EasyCVBaseDataset to custom_datasets
	24) Move class ImageInstanceSegmentationCocoDataset to custom_datasets
	25) Move class RedsImageDeblurringDataset to custom_datasets
	26) Move class TextRankingDataset to custom_datasets
	27) Move class VecoDataset to custom_datasets
	28) Move class VideoSummarizationDataset to custom_datasets
10. To delete modules/functions/classes:
	1) Del module task_datasets
	2) Del to_task_dataset() in EpochBasedTrainer
	3) Del build_dataset() in EpochBasedTrainer and renew a same name function.
11. Rename class Datasets to CustomDatasets in metainfo.py

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11872747
---
 docs/source/api/modelscope.msdatasets.cv.rst  |  14 -
 ...msdatasets.dataset_cls.custom_datasets.rst |  41 ++
 .../api/modelscope.msdatasets.dataset_cls.rst |  15 +
 .../api/modelscope.msdatasets.ms_dataset.rst  |   1 -
 modelscope/metainfo.py                        |   2 +-
 .../damo/apis/detector_evaluater.py           |   4 +-
 .../damo/apis/detector_inference.py           |   2 +-
 modelscope/msdatasets/__init__.py             |   1 -
 modelscope/msdatasets/audio/__init__.py       |   0
 modelscope/msdatasets/cv/__init__.py          |   3 -
 .../msdatasets/data_loader/data_loader.py     |   7 +-
 modelscope/msdatasets/dataset_cls/__init__.py |   2 +
 .../dataset_cls/custom_datasets/__init__.py   |  84 +++
 .../custom_datasets}/audio/__init__.py        |   0
 .../custom_datasets}/audio/asr_dataset.py     |   0
 .../audio/kws_farfield_dataset.py             |   1 -
 .../audio/kws_nearfield_dataset.py            |   2 +-
 .../audio/kws_nearfield_processor.py          |   0
 .../bad_image_detecting/__init__.py           |   0
 .../bad_image_detecting_dataset.py            |  12 +-
 .../custom_datasets}/builder.py               |  12 +-
 .../custom_datasets}/damoyolo/__init__.py     |   1 +
 .../custom_datasets}/damoyolo/build.py        |   0
 .../damoyolo/collate_batch.py                 |   0
 .../damoyolo/datasets/__init__.py             |   0
 .../damoyolo/datasets/coco.py                 |   0
 .../damoyolo/datasets/mosaic_wrapper.py       |   0
 .../damoyolo/evaluation/__init__.py           |   2 +-
 .../damoyolo/evaluation/coco/__init__.py      |   0
 .../damoyolo/evaluation/coco/coco_eval.py     |   0
 .../damoyolo/samplers/__init__.py             |   0
 .../damoyolo/samplers/distributed.py          |   0
 .../samplers/grouped_batch_sampler.py         |   0
 .../samplers/iteration_based_batch_sampler.py |   0
 .../damoyolo/transforms/__init__.py           |   0
 .../damoyolo/transforms/build.py              |   0
 .../damoyolo/transforms/transforms.py         |   0
 .../custom_datasets}/easycv_base.py           |   0
 .../face_2d_keypoins/__init__.py              |   0
 .../face_2d_keypoints_dataset.py              |  11 +-
 .../gopro_image_deblurring_dataset.py         |  17 +-
 .../hand_2d_keypoints/__init__.py             |  22 +
 .../hand_2d_keypoints_dataset.py              |  11 +-
 .../human_wholebody_keypoint/__init__.py      |   0
 .../human_wholebody_keypoint_dataset.py       |  11 +-
 .../image_classification/__init__.py          |   0
 .../classification_dataset.py                 |  12 +-
 .../image_inpainting}/__init__.py             |   6 +-
 .../custom_datasets}/image_inpainting/aug.py  |   0
 .../image_inpainting_dataset.py               |  12 +-
 ...mage_instance_segmentation_coco_dataset.py |   8 +-
 .../image_portrait_enhancement/__init__.py    |   0
 .../image_portrait_enhancement/data_utils.py  |   0
 .../image_portrait_enhancement_dataset.py     |  13 +-
 .../__init__.py                               |   0
 ..._quality_assessment_degradation_dataset.py |  11 +-
 .../image_quality_assmessment_mos/__init__.py |   0
 .../image_quality_assessment_mos_dataset.py   |  12 +-
 .../image_semantic_segmentation/__init__.py   |   0
 .../segmentation_dataset.py                   |  11 +-
 ...uage_guided_video_summarization_dataset.py |   9 +-
 .../custom_datasets}/mgeo_ranking_dataset.py  |  14 +-
 .../movie_scene_segmentation/__init__.py      |  20 +
 .../movie_scene_segmentation_dataset.py       |  11 +-
 .../movie_scene_segmentation/sampler.py       |   0
 .../object_detection/__init__.py              |   0
 .../object_detection/detection_dataset.py     |  25 +-
 .../ocr_detection/__init__.py                 |   1 +
 .../ocr_detection/augmenter.py                |   0
 .../ocr_detection/data_loader.py              |   0
 .../ocr_detection/image_dataset.py            |   0
 .../ocr_detection/measures/__init__.py        |   0
 .../ocr_detection/measures/iou_evaluator.py   |   0
 .../ocr_detection/measures/quad_measurer.py   |   0
 .../ocr_detection/processes/__init__.py       |   0
 .../ocr_detection/processes/augment_data.py   |   0
 .../ocr_detection/processes/data_process.py   |   0
 .../processes/make_border_map.py              |   0
 .../processes/make_icdar_data.py              |   0
 .../processes/make_seg_detection_data.py      |   0
 .../processes/normalize_image.py              |   0
 .../processes/random_crop_data.py             |   0
 .../ocr_recognition_dataset.py                |  11 +-
 .../reds_image_deblurring_dataset.py          |  17 +-
 .../__init__.py                               |  21 +
 ...rring_video_object_segmentation_dataset.py |   9 +-
 .../transformers.py                           |   0
 .../sidd_image_denoising/__init__.py          |   0
 .../sidd_image_denoising/data_utils.py        |   0
 .../sidd_image_denoising_dataset.py           |   9 +-
 .../sidd_image_denoising/transforms.py        |   0
 .../custom_datasets}/text_ranking_dataset.py  |  16 +-
 .../custom_datasets/torch_custom_dataset.py   |  51 ++
 .../custom_datasets}/veco_dataset.py          |   8 +-
 .../video_frame_interpolation/__init__.py     |   0
 .../video_frame_interpolation/data_utils.py   |   0
 .../video_frame_interpolation_dataset.py      |  13 +-
 .../video_stabilization/__init__.py           |   0
 .../video_stabilization_dataset.py            |   9 +-
 .../video_summarization_dataset.py            |   6 +-
 .../video_super_resolution/__init__.py        |   0
 .../video_super_resolution_dataset.py         |   9 +-
 modelscope/msdatasets/dataset_cls/dataset.py  |  27 +-
 .../msdatasets/meta/data_meta_config.py       |  31 +-
 .../msdatasets/meta/data_meta_manager.py      |   5 +-
 modelscope/msdatasets/ms_dataset.py           | 697 ++++++++++--------
 .../msdatasets/task_datasets/__init__.py      |  51 --
 modelscope/msdatasets/task_datasets/base.py   |  48 --
 .../image_inpainting/__init__.py              |   2 -
 .../movie_scene_segmentation/__init__.py      |   2 -
 .../__init__.py                               |   3 -
 .../task_datasets/torch_base_dataset.py       |  64 --
 modelscope/msdatasets/utils/dataset_utils.py  |   4 +-
 .../trainers/audio/kws_farfield_trainer.py    |   3 +-
 .../trainers/audio/kws_nearfield_trainer.py   |  16 +-
 .../cv/image_detection_damoyolo_trainer.py    |   8 +-
 .../trainers/cv/ocr_detection_db_trainer.py   |   6 +-
 .../trainers/nlp/siamese_uie_trainer.py       |  30 +-
 modelscope/trainers/nlp_trainer.py            |   9 +-
 modelscope/trainers/trainer.py                | 221 +++---
 modelscope/utils/ast_utils.py                 |   2 +-
 modelscope/utils/constant.py                  |   5 +
 tests/msdatasets/test_ms_dataset.py           |  37 +-
 .../test_movie_scene_segmentation.py          |  90 ++-
 tests/run_analysis.py                         |   2 +-
 tests/taskdataset/test_veco_dataset.py        |   3 +-
 .../trainers/test_action_detection_trainer.py |   2 +-
 tests/trainers/test_image_deblur_trainer.py   |   2 +-
 tests/trainers/test_image_denoise_trainer.py  |   2 +-
 ...est_image_instance_segmentation_trainer.py |   2 -
 ...test_image_portrait_enhancement_trainer.py |   8 +-
 ...uage_guided_video_summarization_trainer.py |   2 +-
 tests/trainers/test_siamese_uie_trainer.py    |   3 +-
 .../trainers/test_tinynas_damoyolo_trainer.py |  12 +-
 .../test_video_summarization_trainer.py       |   4 +-
 135 files changed, 1158 insertions(+), 867 deletions(-)
 delete mode 100644 docs/source/api/modelscope.msdatasets.cv.rst
 create mode 100644 docs/source/api/modelscope.msdatasets.dataset_cls.custom_datasets.rst
 create mode 100644 docs/source/api/modelscope.msdatasets.dataset_cls.rst
 delete mode 100644 modelscope/msdatasets/audio/__init__.py
 delete mode 100644 modelscope/msdatasets/cv/__init__.py
 create mode 100644 modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/audio/__init__.py (100%)
 rename modelscope/msdatasets/{ => dataset_cls/custom_datasets}/audio/asr_dataset.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/audio/kws_farfield_dataset.py (99%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/audio/kws_nearfield_dataset.py (98%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/audio/kws_nearfield_processor.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/bad_image_detecting/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/bad_image_detecting/bad_image_detecting_dataset.py (79%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/builder.py (56%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/__init__.py (75%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/build.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/collate_batch.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/datasets/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/datasets/coco.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/datasets/mosaic_wrapper.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/evaluation/__init__.py (93%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/evaluation/coco/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/evaluation/coco/coco_eval.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/samplers/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/samplers/distributed.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/samplers/grouped_batch_sampler.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/samplers/iteration_based_batch_sampler.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/transforms/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/transforms/build.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/damoyolo/transforms/transforms.py (100%)
 rename modelscope/msdatasets/{cv => dataset_cls/custom_datasets}/easycv_base.py (100%)
 rename modelscope/msdatasets/{cv => dataset_cls/custom_datasets}/face_2d_keypoins/__init__.py (100%)
 rename modelscope/msdatasets/{cv => dataset_cls/custom_datasets}/face_2d_keypoins/face_2d_keypoints_dataset.py (78%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/gopro_image_deblurring_dataset.py (76%)
 create mode 100644 modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/__init__.py
 rename modelscope/msdatasets/{cv => dataset_cls/custom_datasets}/hand_2d_keypoints/hand_2d_keypoints_dataset.py (79%)
 rename modelscope/msdatasets/{cv => dataset_cls/custom_datasets}/human_wholebody_keypoint/__init__.py (100%)
 rename modelscope/msdatasets/{cv => dataset_cls/custom_datasets}/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py (79%)
 rename modelscope/msdatasets/{cv => dataset_cls/custom_datasets}/image_classification/__init__.py (100%)
 rename modelscope/msdatasets/{cv => dataset_cls/custom_datasets}/image_classification/classification_dataset.py (75%)
 rename modelscope/msdatasets/{cv/hand_2d_keypoints => dataset_cls/custom_datasets/image_inpainting}/__init__.py (75%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/image_inpainting/aug.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/image_inpainting/image_inpainting_dataset.py (97%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/image_instance_segmentation_coco_dataset.py (98%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/image_portrait_enhancement/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/image_portrait_enhancement/data_utils.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/image_portrait_enhancement/image_portrait_enhancement_dataset.py (77%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/image_quality_assessment_degradation/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/image_quality_assessment_degradation/image_quality_assessment_degradation_dataset.py (81%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/image_quality_assmessment_mos/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/image_quality_assmessment_mos/image_quality_assessment_mos_dataset.py (77%)
 rename modelscope/msdatasets/{cv => dataset_cls/custom_datasets}/image_semantic_segmentation/__init__.py (100%)
 rename modelscope/msdatasets/{cv => dataset_cls/custom_datasets}/image_semantic_segmentation/segmentation_dataset.py (81%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/language_guided_video_summarization_dataset.py (94%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/mgeo_ranking_dataset.py (93%)
 create mode 100644 modelscope/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/__init__.py
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/movie_scene_segmentation/movie_scene_segmentation_dataset.py (94%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/movie_scene_segmentation/sampler.py (100%)
 rename modelscope/msdatasets/{cv => dataset_cls/custom_datasets}/object_detection/__init__.py (100%)
 rename modelscope/msdatasets/{cv => dataset_cls/custom_datasets}/object_detection/detection_dataset.py (85%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/__init__.py (78%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/augmenter.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/data_loader.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/image_dataset.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/measures/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/measures/iou_evaluator.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/measures/quad_measurer.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/processes/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/processes/augment_data.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/processes/data_process.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/processes/make_border_map.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/processes/make_icdar_data.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/processes/make_seg_detection_data.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/processes/normalize_image.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_detection/processes/random_crop_data.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/ocr_recognition_dataset.py (87%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/reds_image_deblurring_dataset.py (74%)
 create mode 100644 modelscope/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/__init__.py
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py (98%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/referring_video_object_segmentation/transformers.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/sidd_image_denoising/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/sidd_image_denoising/data_utils.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/sidd_image_denoising/sidd_image_denoising_dataset.py (87%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/sidd_image_denoising/transforms.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/text_ranking_dataset.py (92%)
 create mode 100644 modelscope/msdatasets/dataset_cls/custom_datasets/torch_custom_dataset.py
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/veco_dataset.py (91%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/video_frame_interpolation/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/video_frame_interpolation/data_utils.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/video_frame_interpolation/video_frame_interpolation_dataset.py (79%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/video_stabilization/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/video_stabilization/video_stabilization_dataset.py (71%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/video_summarization_dataset.py (94%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/video_super_resolution/__init__.py (100%)
 rename modelscope/msdatasets/{task_datasets => dataset_cls/custom_datasets}/video_super_resolution/video_super_resolution_dataset.py (89%)
 delete mode 100644 modelscope/msdatasets/task_datasets/__init__.py
 delete mode 100644 modelscope/msdatasets/task_datasets/base.py
 delete mode 100644 modelscope/msdatasets/task_datasets/image_inpainting/__init__.py
 delete mode 100644 modelscope/msdatasets/task_datasets/movie_scene_segmentation/__init__.py
 delete mode 100644 modelscope/msdatasets/task_datasets/referring_video_object_segmentation/__init__.py
 delete mode 100644 modelscope/msdatasets/task_datasets/torch_base_dataset.py

diff --git a/docs/source/api/modelscope.msdatasets.cv.rst b/docs/source/api/modelscope.msdatasets.cv.rst
deleted file mode 100644
index ef0a8a3b..00000000
--- a/docs/source/api/modelscope.msdatasets.cv.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-modelscope.msdatasets.cv
-================================
-
-.. automodule:: modelscope.msdatasets.cv
-
-.. currentmodule:: modelscope.msdatasets.cv
-
-.. autosummary::
-    :toctree: generated
-    :nosignatures:
-    :template: classtemplate.rst
-
-    easycv_base.EasyCVBaseDataset
-    image_classification.ClsDataset
diff --git a/docs/source/api/modelscope.msdatasets.dataset_cls.custom_datasets.rst b/docs/source/api/modelscope.msdatasets.dataset_cls.custom_datasets.rst
new file mode 100644
index 00000000..b5a4b0f6
--- /dev/null
+++ b/docs/source/api/modelscope.msdatasets.dataset_cls.custom_datasets.rst
@@ -0,0 +1,41 @@
+modelscope.msdatasets.dataset_cls.custom_datasets
+====================
+
+.. automodule:: modelscope.msdatasets.dataset_cls.custom_datasets
+
+.. currentmodule:: modelscope.msdatasets.dataset_cls.custom_datasets
+
+
+.. autosummary::
+    :toctree: generated
+    :nosignatures:
+    :template: classtemplate.rst
+
+    EasyCVBaseDataset
+    TorchCustomDataset
+    MovieSceneSegmentationDataset
+    ImageInstanceSegmentationCocoDataset
+    GoproImageDeblurringDataset
+    LanguageGuidedVideoSummarizationDataset
+    MGeoRankingDataset
+    RedsImageDeblurringDataset
+    TextRankingDataset
+    VecoDataset
+    VideoSummarizationDataset
+    BadImageDetectingDataset
+    ImageInpaintingDataset
+    ImagePortraitEnhancementDataset
+    ImageQualityAssessmentDegradationDataset
+    ImageQualityAssessmentMosDataset
+    ReferringVideoObjectSegmentationDataset
+    SiddImageDenoisingDataset
+    VideoFrameInterpolationDataset
+    VideoStabilizationDataset
+    VideoSuperResolutionDataset
+    SegDataset
+    FaceKeypointDataset
+    HandCocoWholeBodyDataset
+    WholeBodyCocoTopDownDataset
+    ClsDataset
+    DetImagesMixDataset
+    DetDataset
diff --git a/docs/source/api/modelscope.msdatasets.dataset_cls.rst b/docs/source/api/modelscope.msdatasets.dataset_cls.rst
new file mode 100644
index 00000000..d415b800
--- /dev/null
+++ b/docs/source/api/modelscope.msdatasets.dataset_cls.rst
@@ -0,0 +1,15 @@
+modelscope.msdatasets.dataset_cls
+====================
+
+.. automodule:: modelscope.msdatasets.dataset_cls
+
+.. currentmodule:: modelscope.msdatasets.dataset_cls
+
+
+.. autosummary::
+    :toctree: generated
+    :nosignatures:
+    :template: classtemplate.rst
+
+    ExternalDataset
+    NativeIterableDataset
diff --git a/docs/source/api/modelscope.msdatasets.ms_dataset.rst b/docs/source/api/modelscope.msdatasets.ms_dataset.rst
index 03cc8d97..92df1e89 100644
--- a/docs/source/api/modelscope.msdatasets.ms_dataset.rst
+++ b/docs/source/api/modelscope.msdatasets.ms_dataset.rst
@@ -10,5 +10,4 @@ modelscope.msdatasets.ms_dataset
     :nosignatures:
     :template: classtemplate.rst
 
-    MsMapDataset
     MsDataset
diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py
index 9e7e368a..e4059269 100644
--- a/modelscope/metainfo.py
+++ b/modelscope/metainfo.py
@@ -1137,7 +1137,7 @@ class LR_Schedulers(object):
     ExponentialWarmup = 'ExponentialWarmup'
 
 
-class Datasets(object):
+class CustomDatasets(object):
     """ Names for different datasets.
     """
     ClsDataset = 'ClsDataset'
diff --git a/modelscope/models/cv/tinynas_detection/damo/apis/detector_evaluater.py b/modelscope/models/cv/tinynas_detection/damo/apis/detector_evaluater.py
index 82ffb567..6ff194f6 100644
--- a/modelscope/models/cv/tinynas_detection/damo/apis/detector_evaluater.py
+++ b/modelscope/models/cv/tinynas_detection/damo/apis/detector_evaluater.py
@@ -8,8 +8,8 @@ from modelscope.models.cv.tinynas_detection.damo.apis.detector_inference import
     inference
 from modelscope.models.cv.tinynas_detection.damo.detectors.detector import \
     build_local_model
-from modelscope.msdatasets.task_datasets.damoyolo import (build_dataloader,
-                                                          build_dataset)
+from modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo import (
+    build_dataloader, build_dataset)
 
 
 def mkdir(path):
diff --git a/modelscope/models/cv/tinynas_detection/damo/apis/detector_inference.py b/modelscope/models/cv/tinynas_detection/damo/apis/detector_inference.py
index 47c1fb1b..dcd33834 100644
--- a/modelscope/models/cv/tinynas_detection/damo/apis/detector_inference.py
+++ b/modelscope/models/cv/tinynas_detection/damo/apis/detector_inference.py
@@ -5,7 +5,7 @@ import os
 import torch
 from tqdm import tqdm
 
-from modelscope.msdatasets.task_datasets.damoyolo.evaluation import evaluate
+from modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo import evaluate
 from modelscope.utils.logger import get_logger
 from modelscope.utils.timer import Timer, get_time_str
 from modelscope.utils.torch_utils import (all_gather, get_world_size,
diff --git a/modelscope/msdatasets/__init__.py b/modelscope/msdatasets/__init__.py
index 073f9396..70200e44 100644
--- a/modelscope/msdatasets/__init__.py
+++ b/modelscope/msdatasets/__init__.py
@@ -1,3 +1,2 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-from . import cv
 from .ms_dataset import MsDataset
diff --git a/modelscope/msdatasets/audio/__init__.py b/modelscope/msdatasets/audio/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/modelscope/msdatasets/cv/__init__.py b/modelscope/msdatasets/cv/__init__.py
deleted file mode 100644
index fad91bcf..00000000
--- a/modelscope/msdatasets/cv/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from . import (image_classification, image_semantic_segmentation,
-               object_detection)
diff --git a/modelscope/msdatasets/data_loader/data_loader.py b/modelscope/msdatasets/data_loader/data_loader.py
index c97151b0..1ef92372 100644
--- a/modelscope/msdatasets/data_loader/data_loader.py
+++ b/modelscope/msdatasets/data_loader/data_loader.py
@@ -13,6 +13,7 @@ from modelscope.msdatasets.context.dataset_context_config import \
     DatasetContextConfig
 from modelscope.msdatasets.data_files.data_files_manager import \
     DataFilesManager
+from modelscope.msdatasets.dataset_cls.dataset import ExternalDataset
 from modelscope.msdatasets.meta.data_meta_manager import DataMetaManager
 from modelscope.utils.constant import DatasetFormations
 
@@ -62,7 +63,8 @@ class OssDataLoader(BaseDataLoader):
 
         self.data_files_builder: Optional[DataFilesManager] = None
         self.dataset: Optional[Union[Dataset, IterableDataset, DatasetDict,
-                                     IterableDatasetDict]] = None
+                                     IterableDatasetDict,
+                                     ExternalDataset]] = None
         self.builder: Optional[DatasetBuilder] = None
         self.data_files_manager: Optional[DataFilesManager] = None
 
@@ -141,7 +143,8 @@ class OssDataLoader(BaseDataLoader):
                 self.builder)
 
     def _post_process(self) -> None:
-        ...
+        if isinstance(self.dataset, ExternalDataset):
+            self.dataset.custom_map = self.dataset_context_config.data_meta_config.meta_type_map
 
 
 class MaxComputeDataLoader(BaseDataLoader):
diff --git a/modelscope/msdatasets/dataset_cls/__init__.py b/modelscope/msdatasets/dataset_cls/__init__.py
index b937315b..a5b2e73d 100644
--- a/modelscope/msdatasets/dataset_cls/__init__.py
+++ b/modelscope/msdatasets/dataset_cls/__init__.py
@@ -1 +1,3 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
+
+from .dataset import ExternalDataset, NativeIterableDataset
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py
new file mode 100644
index 00000000..c8a94b89
--- /dev/null
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py
@@ -0,0 +1,84 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .easycv_base import EasyCVBaseDataset
+    from .builder import CUSTOM_DATASETS, build_custom_dataset
+    from .torch_custom_dataset import TorchCustomDataset
+    from .movie_scene_segmentation.movie_scene_segmentation_dataset import MovieSceneSegmentationDataset
+    from .image_instance_segmentation_coco_dataset import ImageInstanceSegmentationCocoDataset
+    from .gopro_image_deblurring_dataset import GoproImageDeblurringDataset
+    from .language_guided_video_summarization_dataset import LanguageGuidedVideoSummarizationDataset
+    from .mgeo_ranking_dataset import MGeoRankingDataset
+    from .reds_image_deblurring_dataset import RedsImageDeblurringDataset
+    from .text_ranking_dataset import TextRankingDataset
+    from .veco_dataset import VecoDataset
+    from .video_summarization_dataset import VideoSummarizationDataset
+    from .audio import KWSDataset, KWSDataLoader, kws_nearfield_dataset
+    from .bad_image_detecting import BadImageDetectingDataset
+    from .image_inpainting import ImageInpaintingDataset
+    from .image_portrait_enhancement import ImagePortraitEnhancementDataset
+    from .image_quality_assessment_degradation import ImageQualityAssessmentDegradationDataset
+    from .image_quality_assmessment_mos import ImageQualityAssessmentMosDataset
+    from .referring_video_object_segmentation import ReferringVideoObjectSegmentationDataset
+    from .sidd_image_denoising import SiddImageDenoisingDataset
+    from .video_frame_interpolation import VideoFrameInterpolationDataset
+    from .video_stabilization import VideoStabilizationDataset
+    from .video_super_resolution import VideoSuperResolutionDataset
+    from .image_semantic_segmentation import SegDataset
+    from .face_2d_keypoins import FaceKeypointDataset
+    from .hand_2d_keypoints import HandCocoWholeBodyDataset
+    from .human_wholebody_keypoint import WholeBodyCocoTopDownDataset
+    from .image_classification import ClsDataset
+    from .object_detection import DetDataset, DetImagesMixDataset
+    from .ocr_detection import DataLoader, ImageDataset, QuadMeasurer
+    from .ocr_recognition_dataset import OCRRecognitionDataset
+else:
+    _import_structure = {
+        'easycv_base': ['EasyCVBaseDataset'],
+        'builder': ['CUSTOM_DATASETS', 'build_custom_dataset'],
+        'torch_custom_dataset': ['TorchCustomDataset'],
+        'movie_scene_segmentation_dataset': ['MovieSceneSegmentationDataset'],
+        'image_instance_segmentation_coco_dataset':
+        ['ImageInstanceSegmentationCocoDataset'],
+        'gopro_image_deblurring_dataset': ['GoproImageDeblurringDataset'],
+        'language_guided_video_summarization_dataset':
+        ['LanguageGuidedVideoSummarizationDataset'],
+        'mgeo_ranking_dataset': ['MGeoRankingDataset'],
+        'reds_image_deblurring_dataset': ['RedsImageDeblurringDataset'],
+        'text_ranking_dataset': ['TextRankingDataset'],
+        'veco_dataset': ['VecoDataset'],
+        'video_summarization_dataset': ['VideoSummarizationDataset'],
+        'audio': ['KWSDataset', 'KWSDataLoader', 'kws_nearfield_dataset'],
+        'bad_image_detecting': ['BadImageDetectingDataset'],
+        'image_inpainting': ['ImageInpaintingDataset'],
+        'image_portrait_enhancement': ['ImagePortraitEnhancementDataset'],
+        'image_quality_assessment_degradation':
+        ['ImageQualityAssessmentDegradationDataset'],
+        'image_quality_assmessment_mos': ['ImageQualityAssessmentMosDataset'],
+        'referring_video_object_segmentation':
+        ['ReferringVideoObjectSegmentationDataset'],
+        'sidd_image_denoising': ['SiddImageDenoisingDataset'],
+        'video_frame_interpolation': ['VideoFrameInterpolationDataset'],
+        'video_stabilization': ['VideoStabilizationDataset'],
+        'video_super_resolution': ['VideoSuperResolutionDataset'],
+        'image_semantic_segmentation': ['SegDataset'],
+        'face_2d_keypoins': ['FaceKeypointDataset'],
+        'hand_2d_keypoints': ['HandCocoWholeBodyDataset'],
+        'human_wholebody_keypoint': ['WholeBodyCocoTopDownDataset'],
+        'image_classification': ['ClsDataset'],
+        'object_detection': ['DetDataset', 'DetImagesMixDataset'],
+        'ocr_detection': ['DataLoader', 'ImageDataset', 'QuadMeasurer'],
+        'ocr_recognition_dataset': ['OCRRecognitionDataset'],
+    }
+
+    import sys
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/msdatasets/task_datasets/audio/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/audio/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/audio/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/audio/__init__.py
diff --git a/modelscope/msdatasets/audio/asr_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/audio/asr_dataset.py
similarity index 100%
rename from modelscope/msdatasets/audio/asr_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/audio/asr_dataset.py
diff --git a/modelscope/msdatasets/task_datasets/audio/kws_farfield_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/audio/kws_farfield_dataset.py
similarity index 99%
rename from modelscope/msdatasets/task_datasets/audio/kws_farfield_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/audio/kws_farfield_dataset.py
index d4866204..69c95bbd 100644
--- a/modelscope/msdatasets/task_datasets/audio/kws_farfield_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/audio/kws_farfield_dataset.py
@@ -5,7 +5,6 @@ import math
 import os.path
 import queue
 import threading
-import time
 
 import numpy as np
 import torch
diff --git a/modelscope/msdatasets/task_datasets/audio/kws_nearfield_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/audio/kws_nearfield_dataset.py
similarity index 98%
rename from modelscope/msdatasets/task_datasets/audio/kws_nearfield_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/audio/kws_nearfield_dataset.py
index 43f28e01..1b784410 100644
--- a/modelscope/msdatasets/task_datasets/audio/kws_nearfield_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/audio/kws_nearfield_dataset.py
@@ -18,7 +18,7 @@ import torch
 import torch.distributed as dist
 from torch.utils.data import IterableDataset
 
-import modelscope.msdatasets.task_datasets.audio.kws_nearfield_processor as processor
+import modelscope.msdatasets.dataset_cls.custom_datasets.audio.kws_nearfield_processor as processor
 from modelscope.trainers.audio.kws_utils.file_utils import (make_pair,
                                                             read_lists)
 from modelscope.utils.logger import get_logger
diff --git a/modelscope/msdatasets/task_datasets/audio/kws_nearfield_processor.py b/modelscope/msdatasets/dataset_cls/custom_datasets/audio/kws_nearfield_processor.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/audio/kws_nearfield_processor.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/audio/kws_nearfield_processor.py
diff --git a/modelscope/msdatasets/task_datasets/bad_image_detecting/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/bad_image_detecting/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/bad_image_detecting/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/bad_image_detecting/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/bad_image_detecting/bad_image_detecting_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/bad_image_detecting/bad_image_detecting_dataset.py
similarity index 79%
rename from modelscope/msdatasets/task_datasets/bad_image_detecting/bad_image_detecting_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/bad_image_detecting/bad_image_detecting_dataset.py
index f3cd9a2f..539b7b25 100644
--- a/modelscope/msdatasets/task_datasets/bad_image_detecting/bad_image_detecting_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/bad_image_detecting/bad_image_detecting_dataset.py
@@ -1,12 +1,8 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
-import cv2
-import numpy as np
-
 from modelscope.metainfo import Models
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.outputs import OutputKeys
 from modelscope.preprocessors import LoadImage
 from modelscope.preprocessors.cv.bad_image_detecting_preprocessor import \
@@ -14,9 +10,9 @@ from modelscope.preprocessors.cv.bad_image_detecting_preprocessor import \
 from modelscope.utils.constant import Tasks
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     Tasks.bad_image_detecting, module_name=Models.bad_image_detecting)
-class BadImageDetectingDataset(TorchTaskDataset):
+class BadImageDetectingDataset(TorchCustomDataset):
     """Paired image dataset for bad image detecting.
     """
 
diff --git a/modelscope/msdatasets/task_datasets/builder.py b/modelscope/msdatasets/dataset_cls/custom_datasets/builder.py
similarity index 56%
rename from modelscope/msdatasets/task_datasets/builder.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/builder.py
index 683bec8f..a793ea27 100644
--- a/modelscope/msdatasets/task_datasets/builder.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/builder.py
@@ -3,13 +3,13 @@
 from modelscope.utils.config import ConfigDict
 from modelscope.utils.registry import Registry, build_from_cfg
 
-TASK_DATASETS = Registry('task_datasets')
+CUSTOM_DATASETS = Registry('custom_datasets')
 
 
-def build_task_dataset(cfg: ConfigDict,
-                       task_name: str = None,
-                       default_args: dict = None):
-    """ Build task specific dataset processor given model config dict and the task name.
+def build_custom_dataset(cfg: ConfigDict,
+                         task_name: str,
+                         default_args: dict = None):
+    """ Build custom dataset for user-define dataset given model config and task name.
 
     Args:
         cfg (:obj:`ConfigDict`): config dict for model object.
@@ -18,4 +18,4 @@ def build_task_dataset(cfg: ConfigDict,
         default_args (dict, optional): Default initialization arguments.
     """
     return build_from_cfg(
-        cfg, TASK_DATASETS, group_key=task_name, default_args=default_args)
+        cfg, CUSTOM_DATASETS, group_key=task_name, default_args=default_args)
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/__init__.py
similarity index 75%
rename from modelscope/msdatasets/task_datasets/damoyolo/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/__init__.py
index 2a3bccdb..dabde7a4 100644
--- a/modelscope/msdatasets/task_datasets/damoyolo/__init__.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/__init__.py
@@ -1,2 +1,3 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from .build import build_dataloader, build_dataset
+from .evaluation import evaluate
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/build.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/build.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/build.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/build.py
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/collate_batch.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/collate_batch.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/collate_batch.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/collate_batch.py
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/datasets/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/datasets/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/datasets/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/datasets/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/datasets/coco.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/datasets/coco.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/datasets/coco.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/datasets/coco.py
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/datasets/mosaic_wrapper.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/datasets/mosaic_wrapper.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/datasets/mosaic_wrapper.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/datasets/mosaic_wrapper.py
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/evaluation/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/evaluation/__init__.py
similarity index 93%
rename from modelscope/msdatasets/task_datasets/damoyolo/evaluation/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/evaluation/__init__.py
index b121b80b..b12fbf69 100644
--- a/modelscope/msdatasets/task_datasets/damoyolo/evaluation/__init__.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/evaluation/__init__.py
@@ -1,6 +1,6 @@
 # Copyright © Alibaba, Inc. and its affiliates.
 
-from modelscope.msdatasets.task_datasets.damoyolo import datasets
+from .. import datasets
 from .coco import coco_evaluation
 
 
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/evaluation/coco/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/evaluation/coco/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/evaluation/coco/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/evaluation/coco/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/evaluation/coco/coco_eval.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/evaluation/coco/coco_eval.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/evaluation/coco/coco_eval.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/evaluation/coco/coco_eval.py
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/samplers/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/samplers/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/samplers/distributed.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/distributed.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/samplers/distributed.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/distributed.py
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/samplers/grouped_batch_sampler.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/grouped_batch_sampler.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/samplers/grouped_batch_sampler.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/grouped_batch_sampler.py
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/samplers/iteration_based_batch_sampler.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/iteration_based_batch_sampler.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/samplers/iteration_based_batch_sampler.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/iteration_based_batch_sampler.py
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/transforms/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/transforms/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/transforms/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/transforms/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/transforms/build.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/transforms/build.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/transforms/build.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/transforms/build.py
diff --git a/modelscope/msdatasets/task_datasets/damoyolo/transforms/transforms.py b/modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/transforms/transforms.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/damoyolo/transforms/transforms.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/damoyolo/transforms/transforms.py
diff --git a/modelscope/msdatasets/cv/easycv_base.py b/modelscope/msdatasets/dataset_cls/custom_datasets/easycv_base.py
similarity index 100%
rename from modelscope/msdatasets/cv/easycv_base.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/easycv_base.py
diff --git a/modelscope/msdatasets/cv/face_2d_keypoins/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/__init__.py
similarity index 100%
rename from modelscope/msdatasets/cv/face_2d_keypoins/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/__init__.py
diff --git a/modelscope/msdatasets/cv/face_2d_keypoins/face_2d_keypoints_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/face_2d_keypoints_dataset.py
similarity index 78%
rename from modelscope/msdatasets/cv/face_2d_keypoins/face_2d_keypoints_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/face_2d_keypoints_dataset.py
index 2f2e03ef..9f55901f 100644
--- a/modelscope/msdatasets/cv/face_2d_keypoins/face_2d_keypoints_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/face_2d_keypoints_dataset.py
@@ -1,15 +1,16 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from easycv.datasets.face import FaceKeypointDataset as _FaceKeypointDataset
 
-from modelscope.metainfo import Datasets
-from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
+from modelscope.metainfo import CustomDatasets
+from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
+from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
+    EasyCVBaseDataset
 from modelscope.utils.constant import Tasks
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     group_key=Tasks.face_2d_keypoints,
-    module_name=Datasets.Face2dKeypointsDataset)
+    module_name=CustomDatasets.Face2dKeypointsDataset)
 class FaceKeypointDataset(EasyCVBaseDataset, _FaceKeypointDataset):
     """EasyCV dataset for face 2d keypoints.
 
diff --git a/modelscope/msdatasets/task_datasets/gopro_image_deblurring_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/gopro_image_deblurring_dataset.py
similarity index 76%
rename from modelscope/msdatasets/task_datasets/gopro_image_deblurring_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/gopro_image_deblurring_dataset.py
index fb621551..408b8ffe 100644
--- a/modelscope/msdatasets/task_datasets/gopro_image_deblurring_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/gopro_image_deblurring_dataset.py
@@ -3,14 +3,13 @@
 import cv2
 import numpy as np
 
-from modelscope.metainfo import Datasets
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.sidd_image_denoising.data_utils import (
+from modelscope.metainfo import CustomDatasets
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
+from modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.data_utils import (
     img2tensor, padding)
-from modelscope.msdatasets.task_datasets.sidd_image_denoising.transforms import (
+from modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.transforms import (
     augment, paired_random_crop)
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
 from modelscope.utils.constant import Tasks
 
 
@@ -18,9 +17,9 @@ def default_loader(path):
     return cv2.imread(path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 255.0
 
 
-@TASK_DATASETS.register_module(
-    Tasks.image_deblurring, module_name=Datasets.PairedDataset)
-class GoproImageDeblurringDataset(TorchTaskDataset):
+@CUSTOM_DATASETS.register_module(
+    Tasks.image_deblurring, module_name=CustomDatasets.PairedDataset)
+class GoproImageDeblurringDataset(TorchCustomDataset):
     """Paired image dataset for image restoration.
     """
 
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/__init__.py
new file mode 100644
index 00000000..3af670e3
--- /dev/null
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .hand_2d_keypoints_dataset import HandCocoWholeBodyDataset
+
+else:
+    _import_structure = {
+        'hand_2d_keypoints_dataset': ['HandCocoWholeBodyDataset']
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/msdatasets/cv/hand_2d_keypoints/hand_2d_keypoints_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/hand_2d_keypoints_dataset.py
similarity index 79%
rename from modelscope/msdatasets/cv/hand_2d_keypoints/hand_2d_keypoints_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/hand_2d_keypoints_dataset.py
index 89ee0bb8..c6163715 100644
--- a/modelscope/msdatasets/cv/hand_2d_keypoints/hand_2d_keypoints_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/hand_2d_keypoints_dataset.py
@@ -2,15 +2,16 @@
 from easycv.datasets.pose import \
     HandCocoWholeBodyDataset as _HandCocoWholeBodyDataset
 
-from modelscope.metainfo import Datasets
-from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
+from modelscope.metainfo import CustomDatasets
+from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
+from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
+    EasyCVBaseDataset
 from modelscope.utils.constant import Tasks
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     group_key=Tasks.hand_2d_keypoints,
-    module_name=Datasets.HandCocoWholeBodyDataset)
+    module_name=CustomDatasets.HandCocoWholeBodyDataset)
 class HandCocoWholeBodyDataset(EasyCVBaseDataset, _HandCocoWholeBodyDataset):
     """EasyCV dataset for human hand 2d keypoints.
 
diff --git a/modelscope/msdatasets/cv/human_wholebody_keypoint/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/__init__.py
similarity index 100%
rename from modelscope/msdatasets/cv/human_wholebody_keypoint/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/__init__.py
diff --git a/modelscope/msdatasets/cv/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py
similarity index 79%
rename from modelscope/msdatasets/cv/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py
index fc9469f2..59c97af8 100644
--- a/modelscope/msdatasets/cv/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py
@@ -2,15 +2,16 @@
 from easycv.datasets.pose import \
     WholeBodyCocoTopDownDataset as _WholeBodyCocoTopDownDataset
 
-from modelscope.metainfo import Datasets
-from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
+from modelscope.metainfo import CustomDatasets
+from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
+from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
+    EasyCVBaseDataset
 from modelscope.utils.constant import Tasks
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     group_key=Tasks.human_wholebody_keypoint,
-    module_name=Datasets.HumanWholeBodyKeypointDataset)
+    module_name=CustomDatasets.HumanWholeBodyKeypointDataset)
 class WholeBodyCocoTopDownDataset(EasyCVBaseDataset,
                                   _WholeBodyCocoTopDownDataset):
     """EasyCV dataset for human whole body 2d keypoints.
diff --git a/modelscope/msdatasets/cv/image_classification/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/__init__.py
similarity index 100%
rename from modelscope/msdatasets/cv/image_classification/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/__init__.py
diff --git a/modelscope/msdatasets/cv/image_classification/classification_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/classification_dataset.py
similarity index 75%
rename from modelscope/msdatasets/cv/image_classification/classification_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/classification_dataset.py
index ba73e472..386810c7 100644
--- a/modelscope/msdatasets/cv/image_classification/classification_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/classification_dataset.py
@@ -1,14 +1,16 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from easycv.datasets.classification import ClsDataset as _ClsDataset
 
-from modelscope.metainfo import Datasets
-from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
+from modelscope.metainfo import CustomDatasets
+from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
+from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
+    EasyCVBaseDataset
 from modelscope.utils.constant import Tasks
 
 
-@TASK_DATASETS.register_module(
-    group_key=Tasks.image_classification, module_name=Datasets.ClsDataset)
+@CUSTOM_DATASETS.register_module(
+    group_key=Tasks.image_classification,
+    module_name=CustomDatasets.ClsDataset)
 class ClsDataset(_ClsDataset):
     """EasyCV dataset for classification.
 
diff --git a/modelscope/msdatasets/cv/hand_2d_keypoints/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_inpainting/__init__.py
similarity index 75%
rename from modelscope/msdatasets/cv/hand_2d_keypoints/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_inpainting/__init__.py
index 5c1c72c1..0c9552bd 100644
--- a/modelscope/msdatasets/cv/hand_2d_keypoints/__init__.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/image_inpainting/__init__.py
@@ -4,13 +4,11 @@ from typing import TYPE_CHECKING
 from modelscope.utils.import_utils import LazyImportModule
 
 if TYPE_CHECKING:
-    from .hand_2d_keypoints_dataset import Hand2DKeypointDataset
-
+    from .image_inpainting_dataset import ImageInpaintingDataset
 else:
     _import_structure = {
-        'hand_2d_keypoints_dataset': ['Hand2DKeypointDataset']
+        'image_inpainting_dataset': ['ImageInpaintingDataset'],
     }
-
     import sys
 
     sys.modules[__name__] = LazyImportModule(
diff --git a/modelscope/msdatasets/task_datasets/image_inpainting/aug.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_inpainting/aug.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/image_inpainting/aug.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_inpainting/aug.py
diff --git a/modelscope/msdatasets/task_datasets/image_inpainting/image_inpainting_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_inpainting/image_inpainting_dataset.py
similarity index 97%
rename from modelscope/msdatasets/task_datasets/image_inpainting/image_inpainting_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_inpainting/image_inpainting_dataset.py
index 057b8f88..c7040c86 100644
--- a/modelscope/msdatasets/task_datasets/image_inpainting/image_inpainting_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/image_inpainting/image_inpainting_dataset.py
@@ -3,20 +3,16 @@ Part of the implementation is borrowed and modified from LaMa,
 publicly available at https://github.com/saic-mdal/lama
 """
 import glob
-import os
 import os.path as osp
 from enum import Enum
 
 import albumentations as A
 import cv2
-import json
 import numpy as np
-import torch
 
 from modelscope.metainfo import Models
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.utils.constant import Tasks
 from modelscope.utils.logger import get_logger
 from .aug import IAAAffine2, IAAPerspective2
@@ -296,9 +292,9 @@ def get_transforms(test_mode, out_size):
     return transform
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     Tasks.image_inpainting, module_name=Models.image_inpainting)
-class ImageInpaintingDataset(TorchTaskDataset):
+class ImageInpaintingDataset(TorchCustomDataset):
 
     def __init__(self, **kwargs):
         split_config = kwargs['split_config']
diff --git a/modelscope/msdatasets/task_datasets/image_instance_segmentation_coco_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_instance_segmentation_coco_dataset.py
similarity index 98%
rename from modelscope/msdatasets/task_datasets/image_instance_segmentation_coco_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_instance_segmentation_coco_dataset.py
index 1c7bc249..4dd1af5a 100644
--- a/modelscope/msdatasets/task_datasets/image_instance_segmentation_coco_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/image_instance_segmentation_coco_dataset.py
@@ -6,9 +6,9 @@ import numpy as np
 from pycocotools.coco import COCO
 
 from modelscope.metainfo import Models
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.utils.constant import Tasks
-from .builder import TASK_DATASETS
-from .torch_base_dataset import TorchTaskDataset
 
 DATASET_STRUCTURE = {
     'train': {
@@ -22,10 +22,10 @@ DATASET_STRUCTURE = {
 }
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     module_name=Models.cascade_mask_rcnn_swin,
     group_key=Tasks.image_segmentation)
-class ImageInstanceSegmentationCocoDataset(TorchTaskDataset):
+class ImageInstanceSegmentationCocoDataset(TorchCustomDataset):
     """Coco-style dataset for image instance segmentation.
 
     Args:
diff --git a/modelscope/msdatasets/task_datasets/image_portrait_enhancement/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/image_portrait_enhancement/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/image_portrait_enhancement/data_utils.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/data_utils.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/image_portrait_enhancement/data_utils.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/data_utils.py
diff --git a/modelscope/msdatasets/task_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py
similarity index 77%
rename from modelscope/msdatasets/task_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py
index 58d40778..d2c03408 100644
--- a/modelscope/msdatasets/task_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py
@@ -3,10 +3,9 @@
 import cv2
 import numpy as np
 
-from modelscope.metainfo import Datasets, Models
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
+from modelscope.metainfo import CustomDatasets
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.utils.constant import Tasks
 from .data_utils import img2tensor
 
@@ -15,9 +14,9 @@ def default_loader(path):
     return cv2.imread(path, cv2.IMREAD_COLOR).astype(np.float32) / 255.0
 
 
-@TASK_DATASETS.register_module(
-    Tasks.image_portrait_enhancement, module_name=Datasets.PairedDataset)
-class ImagePortraitEnhancementDataset(TorchTaskDataset):
+@CUSTOM_DATASETS.register_module(
+    Tasks.image_portrait_enhancement, module_name=CustomDatasets.PairedDataset)
+class ImagePortraitEnhancementDataset(TorchCustomDataset):
     """Paired image dataset for image portrait enhancement.
     """
 
diff --git a/modelscope/msdatasets/task_datasets/image_quality_assessment_degradation/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_quality_assessment_degradation/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/image_quality_assessment_degradation/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_quality_assessment_degradation/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/image_quality_assessment_degradation/image_quality_assessment_degradation_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_quality_assessment_degradation/image_quality_assessment_degradation_dataset.py
similarity index 81%
rename from modelscope/msdatasets/task_datasets/image_quality_assessment_degradation/image_quality_assessment_degradation_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_quality_assessment_degradation/image_quality_assessment_degradation_dataset.py
index 75826065..06f0453e 100644
--- a/modelscope/msdatasets/task_datasets/image_quality_assessment_degradation/image_quality_assessment_degradation_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/image_quality_assessment_degradation/image_quality_assessment_degradation_dataset.py
@@ -1,21 +1,18 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
-import cv2
-import numpy as np
 from torchvision import transforms
 
 from modelscope.metainfo import Models
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.preprocessors import LoadImage
 from modelscope.utils.constant import Tasks
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     Tasks.image_quality_assessment_degradation,
     module_name=Models.image_quality_assessment_degradation)
-class ImageQualityAssessmentDegradationDataset(TorchTaskDataset):
+class ImageQualityAssessmentDegradationDataset(TorchCustomDataset):
     """Paired image dataset for image quality assessment degradation.
     """
 
diff --git a/modelscope/msdatasets/task_datasets/image_quality_assmessment_mos/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_quality_assmessment_mos/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/image_quality_assmessment_mos/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_quality_assmessment_mos/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/image_quality_assmessment_mos/image_quality_assessment_mos_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_quality_assmessment_mos/image_quality_assessment_mos_dataset.py
similarity index 77%
rename from modelscope/msdatasets/task_datasets/image_quality_assmessment_mos/image_quality_assessment_mos_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_quality_assmessment_mos/image_quality_assessment_mos_dataset.py
index 3d8ed297..28c163eb 100644
--- a/modelscope/msdatasets/task_datasets/image_quality_assmessment_mos/image_quality_assessment_mos_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/image_quality_assmessment_mos/image_quality_assessment_mos_dataset.py
@@ -1,20 +1,16 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
-import cv2
-import numpy as np
-
 from modelscope.metainfo import Models
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.preprocessors.cv import ImageQualityAssessmentMosPreprocessor
 from modelscope.utils.constant import Tasks
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     Tasks.image_quality_assessment_mos,
     module_name=Models.image_quality_assessment_mos)
-class ImageQualityAssessmentMosDataset(TorchTaskDataset):
+class ImageQualityAssessmentMosDataset(TorchCustomDataset):
     """Paired image dataset for image quality assessment mos.
     """
 
diff --git a/modelscope/msdatasets/cv/image_semantic_segmentation/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/__init__.py
similarity index 100%
rename from modelscope/msdatasets/cv/image_semantic_segmentation/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/__init__.py
diff --git a/modelscope/msdatasets/cv/image_semantic_segmentation/segmentation_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/segmentation_dataset.py
similarity index 81%
rename from modelscope/msdatasets/cv/image_semantic_segmentation/segmentation_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/segmentation_dataset.py
index b1316e2e..71e7c42b 100644
--- a/modelscope/msdatasets/cv/image_semantic_segmentation/segmentation_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/segmentation_dataset.py
@@ -1,14 +1,15 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from easycv.datasets.segmentation import SegDataset as _SegDataset
 
-from modelscope.metainfo import Datasets
-from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
+from modelscope.metainfo import CustomDatasets
+from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
+from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
+    EasyCVBaseDataset
 from modelscope.utils.constant import Tasks
 
 
-@TASK_DATASETS.register_module(
-    group_key=Tasks.image_segmentation, module_name=Datasets.SegDataset)
+@CUSTOM_DATASETS.register_module(
+    group_key=Tasks.image_segmentation, module_name=CustomDatasets.SegDataset)
 class SegDataset(EasyCVBaseDataset, _SegDataset):
     """EasyCV dataset for Sementic segmentation.
     For more details, please refer to :
diff --git a/modelscope/msdatasets/task_datasets/language_guided_video_summarization_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/language_guided_video_summarization_dataset.py
similarity index 94%
rename from modelscope/msdatasets/task_datasets/language_guided_video_summarization_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/language_guided_video_summarization_dataset.py
index 94313e15..756d0050 100644
--- a/modelscope/msdatasets/task_datasets/language_guided_video_summarization_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/language_guided_video_summarization_dataset.py
@@ -25,16 +25,15 @@ import numpy as np
 import torch
 
 from modelscope.metainfo import Models
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.utils.constant import Tasks
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     Tasks.language_guided_video_summarization,
     module_name=Models.language_guided_video_summarization)
-class LanguageGuidedVideoSummarizationDataset(TorchTaskDataset):
+class LanguageGuidedVideoSummarizationDataset(TorchCustomDataset):
 
     def __init__(self, mode, opt, root_dir):
         self.mode = mode
diff --git a/modelscope/msdatasets/task_datasets/mgeo_ranking_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/mgeo_ranking_dataset.py
similarity index 93%
rename from modelscope/msdatasets/task_datasets/mgeo_ranking_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/mgeo_ranking_dataset.py
index 9adccd7c..536451ae 100644
--- a/modelscope/msdatasets/task_datasets/mgeo_ranking_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/mgeo_ranking_dataset.py
@@ -1,24 +1,20 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import random
-from dataclasses import dataclass
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, List, Union
 
 import json
 import torch
-from datasets import Dataset, IterableDataset, concatenate_datasets
 from torch.utils.data import ConcatDataset
-from transformers import DataCollatorWithPadding
 
 from modelscope.metainfo import Models
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.utils.constant import ModeKeys, Tasks
-from .base import TaskDataset
-from .builder import TASK_DATASETS
-from .torch_base_dataset import TorchTaskDataset
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     group_key=Tasks.text_ranking, module_name=Models.mgeo)
-class MGeoRankingDataset(TorchTaskDataset):
+class MGeoRankingDataset(TorchCustomDataset):
 
     def __init__(self,
                  datasets: Union[Any, List[Any]],
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/__init__.py
new file mode 100644
index 00000000..6157e9e8
--- /dev/null
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .movie_scene_segmentation_dataset import MovieSceneSegmentationDataset
+else:
+    _import_structure = {
+        'movie_scene_segmentation_dataset': ['MovieSceneSegmentationDataset'],
+    }
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
similarity index 94%
rename from modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
index 49991b11..041976dd 100644
--- a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
@@ -10,9 +10,8 @@ import torch
 from torchvision.datasets.folder import pil_loader
 
 from modelscope.metainfo import Models
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets.builder import \
+    CUSTOM_DATASETS
 from modelscope.utils.constant import Tasks
 from . import sampler
 
@@ -30,9 +29,9 @@ DATASET_STRUCTURE = {
 }
 
 
-@TASK_DATASETS.register_module(
-    Tasks.movie_scene_segmentation, module_name=Models.resnet50_bert)
-class MovieSceneSegmentationDataset(TorchTaskDataset):
+@CUSTOM_DATASETS.register_module(
+    group_key=Tasks.movie_scene_segmentation, module_name=Models.resnet50_bert)
+class MovieSceneSegmentationDataset(torch.utils.data.Dataset):
     """dataset for movie scene segmentation.
 
     Args:
diff --git a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/sampler.py b/modelscope/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/sampler.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/movie_scene_segmentation/sampler.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/sampler.py
diff --git a/modelscope/msdatasets/cv/object_detection/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/__init__.py
similarity index 100%
rename from modelscope/msdatasets/cv/object_detection/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/__init__.py
diff --git a/modelscope/msdatasets/cv/object_detection/detection_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/detection_dataset.py
similarity index 85%
rename from modelscope/msdatasets/cv/object_detection/detection_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/detection_dataset.py
index c7e45eea..66c11f64 100644
--- a/modelscope/msdatasets/cv/object_detection/detection_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/detection_dataset.py
@@ -1,20 +1,21 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-import os.path as osp
 
 from easycv.datasets.detection import DetDataset as _DetDataset
 from easycv.datasets.detection import \
     DetImagesMixDataset as _DetImagesMixDataset
 
-from modelscope.metainfo import Datasets
-from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
-from modelscope.msdatasets.task_datasets import TASK_DATASETS
+from modelscope.metainfo import CustomDatasets
+from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
+from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
+    EasyCVBaseDataset
 from modelscope.utils.constant import Tasks
 
 
-@TASK_DATASETS.register_module(
-    group_key=Tasks.image_object_detection, module_name=Datasets.DetDataset)
-@TASK_DATASETS.register_module(
-    group_key=Tasks.image_segmentation, module_name=Datasets.DetDataset)
+@CUSTOM_DATASETS.register_module(
+    group_key=Tasks.image_object_detection,
+    module_name=CustomDatasets.DetDataset)
+@CUSTOM_DATASETS.register_module(
+    group_key=Tasks.image_segmentation, module_name=CustomDatasets.DetDataset)
 class DetDataset(EasyCVBaseDataset, _DetDataset):
     """EasyCV dataset for object detection.
     For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/raw.py .
@@ -47,12 +48,12 @@ class DetDataset(EasyCVBaseDataset, _DetDataset):
         _DetDataset.__init__(self, *args, **kwargs)
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     group_key=Tasks.image_object_detection,
-    module_name=Datasets.DetImagesMixDataset)
-@TASK_DATASETS.register_module(
+    module_name=CustomDatasets.DetImagesMixDataset)
+@CUSTOM_DATASETS.register_module(
     group_key=Tasks.domain_specific_object_detection,
-    module_name=Datasets.DetImagesMixDataset)
+    module_name=CustomDatasets.DetImagesMixDataset)
 class DetImagesMixDataset(EasyCVBaseDataset, _DetImagesMixDataset):
     """EasyCV dataset for object detection, a wrapper of multiple images mixed dataset.
     Suitable for training on multiple images mixed data augmentation like
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/__init__.py
similarity index 78%
rename from modelscope/msdatasets/task_datasets/ocr_detection/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/__init__.py
index 5afd1ded..6a3847b9 100644
--- a/modelscope/msdatasets/task_datasets/ocr_detection/__init__.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/__init__.py
@@ -1,3 +1,4 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from .data_loader import DataLoader
 from .image_dataset import ImageDataset
+from .measures import QuadMeasurer
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/augmenter.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/augmenter.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/augmenter.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/augmenter.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/data_loader.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/data_loader.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/data_loader.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/data_loader.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/image_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/image_dataset.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/image_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/image_dataset.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/measures/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/measures/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/measures/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/measures/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/measures/iou_evaluator.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/measures/iou_evaluator.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/measures/iou_evaluator.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/measures/iou_evaluator.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/measures/quad_measurer.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/measures/quad_measurer.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/measures/quad_measurer.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/measures/quad_measurer.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/processes/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/processes/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/processes/augment_data.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/augment_data.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/processes/augment_data.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/augment_data.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/processes/data_process.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/data_process.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/processes/data_process.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/data_process.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/processes/make_border_map.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_border_map.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/processes/make_border_map.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_border_map.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/processes/make_icdar_data.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_icdar_data.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/processes/make_icdar_data.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_icdar_data.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/processes/make_seg_detection_data.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_seg_detection_data.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/processes/make_seg_detection_data.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_seg_detection_data.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/processes/normalize_image.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/normalize_image.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/processes/normalize_image.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/normalize_image.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_detection/processes/random_crop_data.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/random_crop_data.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/ocr_detection/processes/random_crop_data.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/random_crop_data.py
diff --git a/modelscope/msdatasets/task_datasets/ocr_recognition_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py
similarity index 87%
rename from modelscope/msdatasets/task_datasets/ocr_recognition_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py
index 8be657f0..bc9cd3ca 100644
--- a/modelscope/msdatasets/task_datasets/ocr_recognition_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py
@@ -9,9 +9,10 @@ import torch
 from PIL import Image
 
 from modelscope.metainfo import Models
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets.builder import \
+    CUSTOM_DATASETS
+from modelscope.msdatasets.dataset_cls.custom_datasets.torch_custom_dataset import \
+    TorchCustomDataset
 from modelscope.utils.constant import Tasks
 from modelscope.utils.logger import get_logger
 
@@ -29,9 +30,9 @@ def Q2B(uchar):
     return chr(inside_code)
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     Tasks.ocr_recognition, module_name=Models.ocr_recognition)
-class OCRRecognitionDataset(TorchTaskDataset):
+class OCRRecognitionDataset(TorchCustomDataset):
 
     def __init__(self, **kwargs):
         split_config = kwargs['split_config']
diff --git a/modelscope/msdatasets/task_datasets/reds_image_deblurring_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/reds_image_deblurring_dataset.py
similarity index 74%
rename from modelscope/msdatasets/task_datasets/reds_image_deblurring_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/reds_image_deblurring_dataset.py
index 17b731bc..b03c1d06 100644
--- a/modelscope/msdatasets/task_datasets/reds_image_deblurring_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/reds_image_deblurring_dataset.py
@@ -3,14 +3,13 @@
 import cv2
 import numpy as np
 
-from modelscope.metainfo import Datasets
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.sidd_image_denoising.data_utils import (
+from modelscope.metainfo import CustomDatasets
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
+from modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.data_utils import (
     img2tensor, padding)
-from modelscope.msdatasets.task_datasets.sidd_image_denoising.transforms import (
+from modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.transforms import (
     augment, paired_random_crop)
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
 from modelscope.utils.constant import Tasks
 
 
@@ -18,9 +17,9 @@ def default_loader(path):
     return cv2.imread(path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 255.0
 
 
-@TASK_DATASETS.register_module(
-    Tasks.image_deblurring, module_name=Datasets.PairedDataset)
-class RedsImageDeblurringDataset(TorchTaskDataset):
+@CUSTOM_DATASETS.register_module(
+    Tasks.image_deblurring, module_name=CustomDatasets.PairedDataset)
+class RedsImageDeblurringDataset(TorchCustomDataset):
     """Paired image dataset for image restoration.
     """
 
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/__init__.py
new file mode 100644
index 00000000..7349e494
--- /dev/null
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/__init__.py
@@ -0,0 +1,21 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .referring_video_object_segmentation_dataset import ReferringVideoObjectSegmentationDataset
+else:
+    _import_structure = {
+        'referring_video_object_segmentation_dataset':
+        ['MovieSceneSegmentationDataset'],
+    }
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py
similarity index 98%
rename from modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py
index 8b6d22a4..4493fd96 100644
--- a/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py
@@ -18,9 +18,8 @@ from tqdm import tqdm
 from modelscope.metainfo import Models
 from modelscope.models.cv.referring_video_object_segmentation.utils import \
     nested_tensor_from_videos_list
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.utils.constant import Tasks
 from modelscope.utils.logger import get_logger
 from . import transformers as T
@@ -33,10 +32,10 @@ def get_image_id(video_id, frame_idx, ref_instance_a2d_id):
     return image_id
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     Tasks.referring_video_object_segmentation,
     module_name=Models.referring_video_object_segmentation)
-class ReferringVideoObjectSegmentationDataset(TorchTaskDataset):
+class ReferringVideoObjectSegmentationDataset(TorchCustomDataset):
 
     def __init__(self, **kwargs):
         split_config = kwargs['split_config']
diff --git a/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/transformers.py b/modelscope/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/transformers.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/referring_video_object_segmentation/transformers.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/transformers.py
diff --git a/modelscope/msdatasets/task_datasets/sidd_image_denoising/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/sidd_image_denoising/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/sidd_image_denoising/data_utils.py b/modelscope/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/data_utils.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/sidd_image_denoising/data_utils.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/data_utils.py
diff --git a/modelscope/msdatasets/task_datasets/sidd_image_denoising/sidd_image_denoising_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/sidd_image_denoising_dataset.py
similarity index 87%
rename from modelscope/msdatasets/task_datasets/sidd_image_denoising/sidd_image_denoising_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/sidd_image_denoising_dataset.py
index 3f0cdae0..9369b991 100644
--- a/modelscope/msdatasets/task_datasets/sidd_image_denoising/sidd_image_denoising_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/sidd_image_denoising_dataset.py
@@ -4,9 +4,8 @@ import cv2
 import numpy as np
 
 from modelscope.metainfo import Models
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.utils.constant import Tasks
 from .data_utils import img2tensor, padding
 from .transforms import augment, paired_random_crop
@@ -16,9 +15,9 @@ def default_loader(path):
     return cv2.imread(path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 255.0
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     Tasks.image_denoising, module_name=Models.nafnet)
-class SiddImageDenoisingDataset(TorchTaskDataset):
+class SiddImageDenoisingDataset(TorchCustomDataset):
     """Paired image dataset for image restoration.
     """
 
diff --git a/modelscope/msdatasets/task_datasets/sidd_image_denoising/transforms.py b/modelscope/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/transforms.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/sidd_image_denoising/transforms.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/transforms.py
diff --git a/modelscope/msdatasets/task_datasets/text_ranking_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/text_ranking_dataset.py
similarity index 92%
rename from modelscope/msdatasets/task_datasets/text_ranking_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/text_ranking_dataset.py
index 19f07110..46c64bbf 100644
--- a/modelscope/msdatasets/task_datasets/text_ranking_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/text_ranking_dataset.py
@@ -1,25 +1,21 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import random
-from dataclasses import dataclass
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, List, Union
 
 import torch
-from datasets import Dataset, IterableDataset, concatenate_datasets
 from torch.utils.data import ConcatDataset
-from transformers import DataCollatorWithPadding
 
 from modelscope.metainfo import Models
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.utils.constant import ModeKeys, Tasks
-from .base import TaskDataset
-from .builder import TASK_DATASETS
-from .torch_base_dataset import TorchTaskDataset
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     group_key=Tasks.text_ranking, module_name=Models.bert)
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     group_key=Tasks.sentence_embedding, module_name=Models.bert)
-class TextRankingDataset(TorchTaskDataset):
+class TextRankingDataset(TorchCustomDataset):
 
     def __init__(self,
                  datasets: Union[Any, List[Any]],
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/torch_custom_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/torch_custom_dataset.py
new file mode 100644
index 00000000..54ad55b7
--- /dev/null
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/torch_custom_dataset.py
@@ -0,0 +1,51 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import Any, List, Union
+
+import torch.utils.data
+from torch.utils.data import ConcatDataset as TorchConcatDataset
+
+from modelscope.utils.constant import ModeKeys
+
+
+class TorchCustomDataset(torch.utils.data.Dataset):
+    """The custom dataset base class for all the torch-based task processors.
+    """
+
+    def __init__(self,
+                 datasets: Union[Any, List[Any]],
+                 mode=ModeKeys.TRAIN,
+                 preprocessor=None,
+                 **kwargs):
+        self.trainer = None
+        self.mode = mode
+        self.preprocessor = preprocessor
+        self._inner_dataset = self.prepare_dataset(datasets)
+
+    def __getitem__(self, index) -> Any:
+        return self.preprocessor(
+            self._inner_dataset[index]
+        ) if self.preprocessor else self._inner_dataset[index]
+
+    def __len__(self):
+        return len(self._inner_dataset)
+
+    def prepare_dataset(self, datasets: Union[Any, List[Any]]) -> Any:
+        """Prepare a dataset.
+
+        User can process the input datasets in a whole dataset perspective.
+        This method gives a default implementation of datasets merging, user can override this
+        method to write custom logics.
+
+        Args:
+            datasets: The original dataset(s)
+
+        Returns: A single dataset, which may be created after merging.
+
+        """
+        if isinstance(datasets, List):
+            if len(datasets) == 1:
+                return datasets[0]
+            elif len(datasets) > 1:
+                return TorchConcatDataset(datasets)
+        else:
+            return datasets
diff --git a/modelscope/msdatasets/task_datasets/veco_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/veco_dataset.py
similarity index 91%
rename from modelscope/msdatasets/task_datasets/veco_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/veco_dataset.py
index df7c6483..047849bc 100644
--- a/modelscope/msdatasets/task_datasets/veco_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/veco_dataset.py
@@ -5,13 +5,13 @@ import numpy as np
 from datasets import Dataset, IterableDataset, concatenate_datasets
 
 from modelscope.metainfo import Models
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.utils.constant import Tasks
-from .builder import TASK_DATASETS
-from .torch_base_dataset import TorchTaskDataset
 
 
-@TASK_DATASETS.register_module(module_name=Models.veco, group_key=Tasks.nli)
-class VecoDataset(TorchTaskDataset):
+@CUSTOM_DATASETS.register_module(module_name=Models.veco, group_key=Tasks.nli)
+class VecoDataset(TorchCustomDataset):
 
     def __init__(self,
                  datasets: Union[Any, List[Any]],
diff --git a/modelscope/msdatasets/task_datasets/video_frame_interpolation/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/video_frame_interpolation/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/video_frame_interpolation/data_utils.py b/modelscope/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/data_utils.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/video_frame_interpolation/data_utils.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/data_utils.py
diff --git a/modelscope/msdatasets/task_datasets/video_frame_interpolation/video_frame_interpolation_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/video_frame_interpolation_dataset.py
similarity index 79%
rename from modelscope/msdatasets/task_datasets/video_frame_interpolation/video_frame_interpolation_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/video_frame_interpolation_dataset.py
index 44b965a7..6f47906d 100644
--- a/modelscope/msdatasets/task_datasets/video_frame_interpolation/video_frame_interpolation_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/video_frame_interpolation_dataset.py
@@ -1,16 +1,13 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
-from collections import defaultdict
-
 import cv2
 import numpy as np
 import torch
 
 from modelscope.metainfo import Models
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
-from modelscope.msdatasets.task_datasets.video_frame_interpolation.data_utils import (
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
+from modelscope.msdatasets.dataset_cls.custom_datasets.video_frame_interpolation.data_utils import (
     img2tensor, img_padding)
 from modelscope.utils.constant import Tasks
 
@@ -19,10 +16,10 @@ def default_loader(path):
     return cv2.imread(path, cv2.IMREAD_UNCHANGED).astype(np.float32)
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     Tasks.video_frame_interpolation,
     module_name=Models.video_frame_interpolation)
-class VideoFrameInterpolationDataset(TorchTaskDataset):
+class VideoFrameInterpolationDataset(TorchCustomDataset):
     """Dataset for video frame-interpolation.
     """
 
diff --git a/modelscope/msdatasets/task_datasets/video_stabilization/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/video_stabilization/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/video_stabilization/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/video_stabilization/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/video_stabilization/video_stabilization_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/video_stabilization/video_stabilization_dataset.py
similarity index 71%
rename from modelscope/msdatasets/task_datasets/video_stabilization/video_stabilization_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/video_stabilization/video_stabilization_dataset.py
index b0e6bdef..a0e0604c 100644
--- a/modelscope/msdatasets/task_datasets/video_stabilization/video_stabilization_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/video_stabilization/video_stabilization_dataset.py
@@ -1,15 +1,14 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
 from modelscope.metainfo import Models
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.utils.constant import Tasks
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     Tasks.video_stabilization, module_name=Models.video_stabilization)
-class VideoStabilizationDataset(TorchTaskDataset):
+class VideoStabilizationDataset(TorchCustomDataset):
     """Paired video dataset for video stabilization.
     """
 
diff --git a/modelscope/msdatasets/task_datasets/video_summarization_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/video_summarization_dataset.py
similarity index 94%
rename from modelscope/msdatasets/task_datasets/video_summarization_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/video_summarization_dataset.py
index 02639be8..4d6e0155 100644
--- a/modelscope/msdatasets/task_datasets/video_summarization_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/video_summarization_dataset.py
@@ -8,11 +8,11 @@ import json
 import numpy as np
 import torch
 
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets import \
+    TorchCustomDataset
 
 
-class VideoSummarizationDataset(TorchTaskDataset):
+class VideoSummarizationDataset(TorchCustomDataset):
 
     def __init__(self, mode, opt, root_dir):
         self.mode = mode
diff --git a/modelscope/msdatasets/task_datasets/video_super_resolution/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/video_super_resolution/__init__.py
similarity index 100%
rename from modelscope/msdatasets/task_datasets/video_super_resolution/__init__.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/video_super_resolution/__init__.py
diff --git a/modelscope/msdatasets/task_datasets/video_super_resolution/video_super_resolution_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/video_super_resolution/video_super_resolution_dataset.py
similarity index 89%
rename from modelscope/msdatasets/task_datasets/video_super_resolution/video_super_resolution_dataset.py
rename to modelscope/msdatasets/dataset_cls/custom_datasets/video_super_resolution/video_super_resolution_dataset.py
index 69faa527..86e07db1 100644
--- a/modelscope/msdatasets/task_datasets/video_super_resolution/video_super_resolution_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/video_super_resolution/video_super_resolution_dataset.py
@@ -7,9 +7,8 @@ import numpy as np
 import torch
 
 from modelscope.metainfo import Models
-from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets import (
+    CUSTOM_DATASETS, TorchCustomDataset)
 from modelscope.utils.constant import Tasks
 
 
@@ -42,9 +41,9 @@ def img2tensor(imgs, bgr2rgb=True, float32=True):
         return _totensor(imgs, bgr2rgb, float32)
 
 
-@TASK_DATASETS.register_module(
+@CUSTOM_DATASETS.register_module(
     Tasks.video_super_resolution, module_name=Models.real_basicvsr)
-class VideoSuperResolutionDataset(TorchTaskDataset):
+class VideoSuperResolutionDataset(TorchCustomDataset):
     """single video dataset for video super-resolution.
     """
 
diff --git a/modelscope/msdatasets/dataset_cls/dataset.py b/modelscope/msdatasets/dataset_cls/dataset.py
index 57ee8150..4acf51b1 100644
--- a/modelscope/msdatasets/dataset_cls/dataset.py
+++ b/modelscope/msdatasets/dataset_cls/dataset.py
@@ -14,15 +14,19 @@ logger = get_logger()
 
 
 class ExternalDataset(object):
+    """Dataset class for custom datasets."""
 
     def __init__(self, split_path_dict, config_kwargs):
         self.split_path_dict = split_path_dict
         self.config_kwargs = copy.deepcopy(config_kwargs)
         self.config_kwargs.update({'split_config': split_path_dict})
-        self.ext_dataset = None
+        # dataset for specific extensions
+        self.spec_extension_dataset = None
         self.split_data_files = {k: [] for k, _ in split_path_dict.items()}
-        file_ext = ''
+        self.custom_map = {}
 
+        # the extension of file
+        file_ext = ''
         for split_name, split_dir in split_path_dict.items():
             if isinstance(split_dir, str) and os.path.isdir(split_dir):
                 split_file_names = os.listdir(split_dir)
@@ -52,25 +56,27 @@ class ExternalDataset(object):
 
         if file_ext:
             file_ext = EXTENSIONS_TO_LOAD.get(file_ext)
-            self.ext_dataset = datasets.load_dataset(
+            self.spec_extension_dataset = datasets.load_dataset(
                 file_ext, data_files=self.split_data_files, **config_kwargs)
 
     def __len__(self):
-        return len(self.split_path_dict
-                   ) if not self.ext_dataset else self.ext_dataset.__len__()
+        return len(
+            self.split_path_dict
+        ) if not self.spec_extension_dataset else self.spec_extension_dataset.__len__(
+        )
 
     def __getitem__(self, item):
-        if not self.ext_dataset:
+        if not self.spec_extension_dataset:
             return self.split_path_dict.get(item)
         else:
-            return self.ext_dataset.__getitem__(item)
+            return self.spec_extension_dataset.__getitem__(item)
 
     def __iter__(self):
-        if not self.ext_dataset:
+        if not self.spec_extension_dataset:
             for k, v in self.split_path_dict.items():
                 yield k, v
         else:
-            for k, v in self.ext_dataset.items():
+            for k, v in self.spec_extension_dataset.items():
                 yield k, v
 
 
@@ -99,3 +105,6 @@ class NativeIterableDataset(IterableDataset):
                 entity = ret
 
             yield entity
+
+    def __len__(self):
+        return 1
diff --git a/modelscope/msdatasets/meta/data_meta_config.py b/modelscope/msdatasets/meta/data_meta_config.py
index 401a8e14..7f97108b 100644
--- a/modelscope/msdatasets/meta/data_meta_config.py
+++ b/modelscope/msdatasets/meta/data_meta_config.py
@@ -2,7 +2,35 @@
 
 
 class DataMetaConfig(object):
-    """Modelscope data-meta config class."""
+    """Modelscope data-meta config class.
+
+    Attributes:
+        dataset_scripts(str): The local path of dataset scripts.
+        dataset_formation(:obj:`enum.Enum`): Dataset formation, refer to modelscope.utils.constant.DatasetFormations.
+        meta_cache_dir(str): Meta cache path.
+        meta_data_files(dict): Meta data mapping, Example: {'test': 'https://xxx/mytest.csv'}
+        zip_data_files(dict): Data files mapping, Example: {'test': 'pictures.zip'}
+        meta_args_map(dict): Meta arguments mapping, Example: {'test': {'file': 'pictures.zip'}, ...}
+        target_dataset_structure(dict): Dataset Structure, like
+             {
+                "default":{
+                    "train":{
+                        "meta":"my_train.csv",
+                        "file":"pictures.zip"
+                    }
+                },
+                "subsetA":{
+                    "test":{
+                        "meta":"mytest.csv",
+                        "file":"pictures.zip"
+                    }
+                }
+            }
+        dataset_py_script(str): The python script path of dataset.
+        meta_type_map(dict): The custom dataset mapping in meta data,
+            Example: {"type": "MovieSceneSegmentationCustomDataset",
+                        "preprocessor": "movie-scene-segmentation-preprocessor"}
+    """
 
     def __init__(self):
         self.dataset_scripts = None
@@ -13,3 +41,4 @@ class DataMetaConfig(object):
         self.meta_args_map = None
         self.target_dataset_structure = None
         self.dataset_py_script = None
+        self.meta_type_map = {}
diff --git a/modelscope/msdatasets/meta/data_meta_manager.py b/modelscope/msdatasets/meta/data_meta_manager.py
index bba46e84..d90b8d5e 100644
--- a/modelscope/msdatasets/meta/data_meta_manager.py
+++ b/modelscope/msdatasets/meta/data_meta_manager.py
@@ -75,7 +75,7 @@ class DataMetaManager(object):
         elif download_mode == DownloadMode.FORCE_REDOWNLOAD:
             # Clean meta-files
             if os.path.exists(meta_cache_dir) and os.listdir(meta_cache_dir):
-                shutil.rmtree(meta_cache_dir)
+                shutil.rmtree(meta_cache_dir, ignore_errors=True)
             # Re-download meta-files
             with FileLock(lock_file=lock_file_path):
                 os.makedirs(meta_cache_dir, exist_ok=True)
@@ -129,12 +129,13 @@ class DataMetaManager(object):
         else:
             target_subset_name, target_dataset_structure = get_target_dataset_structure(
                 dataset_json, subset_name, split)
-            meta_map, file_map, args_map = get_dataset_files(
+            meta_map, file_map, args_map, type_map = get_dataset_files(
                 target_dataset_structure, dataset_name, namespace, version)
 
             data_meta_config.meta_data_files = meta_map
             data_meta_config.zip_data_files = file_map
             data_meta_config.meta_args_map = args_map
+            data_meta_config.meta_type_map = type_map
             data_meta_config.target_dataset_structure = target_dataset_structure
 
         self.dataset_context_config.data_meta_config = data_meta_config
diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py
index f1c40e12..06f47874 100644
--- a/modelscope/msdatasets/ms_dataset.py
+++ b/modelscope/msdatasets/ms_dataset.py
@@ -16,19 +16,27 @@ from modelscope.msdatasets.context.dataset_context_config import \
 from modelscope.msdatasets.data_loader.data_loader_manager import (
     LocalDataLoaderManager, LocalDataLoaderType, RemoteDataLoaderManager,
     RemoteDataLoaderType)
+from modelscope.msdatasets.dataset_cls.custom_datasets.builder import \
+    build_custom_dataset
 from modelscope.msdatasets.dataset_cls.dataset import (ExternalDataset,
                                                        NativeIterableDataset)
-from modelscope.msdatasets.task_datasets.builder import build_task_dataset
 from modelscope.msdatasets.utils.delete_utils import DatasetDeleteManager
 from modelscope.msdatasets.utils.upload_utils import DatasetUploadManager
-from modelscope.utils.config import ConfigDict
+from modelscope.preprocessors import build_preprocessor
+from modelscope.utils.config import Config, ConfigDict
 from modelscope.utils.config_ds import MS_DATASETS_CACHE
 from modelscope.utils.constant import (DEFAULT_DATASET_NAMESPACE,
-                                       DEFAULT_DATASET_REVISION, DownloadMode,
-                                       Hubs, UploadMode)
+                                       DEFAULT_DATASET_REVISION, ConfigFields,
+                                       DownloadMode, Hubs, ModeKeys, Tasks,
+                                       UploadMode)
 from modelscope.utils.import_utils import is_tf_available, is_torch_available
 from modelscope.utils.logger import get_logger
 
+try:
+    from tensorflow.data import Dataset as TfDataset
+except Exception as e:
+    print(e)
+
 logger = get_logger()
 
 
@@ -53,6 +61,7 @@ class MsDataset:
     """
     # the underlying huggingface Dataset
     _hf_ds = None
+    _dataset_context_config: DatasetContextConfig = None
 
     def __init__(self,
                  ds_instance: Union[Dataset, IterableDataset, ExternalDataset],
@@ -63,6 +72,7 @@ class MsDataset:
                 f'"target" must be a column of the dataset({list(self._hf_ds.features.keys())}, but got {target}'
             )
         self.target = target
+        self.is_custom = False
 
     def __iter__(self):
         for item in self._hf_ds:
@@ -77,10 +87,10 @@ class MsDataset:
     def __len__(self):
         if isinstance(self._hf_ds, IterableDataset) or isinstance(
                 self._hf_ds, NativeIterableDataset):
-            logger.error(
-                f'object of type `{self._hf_ds.__class__.__name__}` has no __len__()'
+            logger.warning(
+                f'object of type `{self._hf_ds.__class__.__name__}` has default length 1'
             )
-            return None
+            return 1
         return len(self._hf_ds)
 
     @property
@@ -163,6 +173,7 @@ class MsDataset:
         REUSE_DATASET_IF_EXISTS,
         cache_dir: Optional[str] = MS_DATASETS_CACHE,
         use_streaming: Optional[bool] = False,
+        custom_cfg: Optional[Config] = Config(),
         **config_kwargs,
     ) -> Union[dict, 'MsDataset', NativeIterableDataset]:
         """Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset.
@@ -191,6 +202,8 @@ class MsDataset:
                 use_streaming (bool, Optional): If set to True, no need to download all data files.
                                                 Instead, it streams the data progressively, and returns
                                                 NativeIterableDataset or a dict of NativeIterableDataset.
+                custom_cfg (str, Optional): Model configuration, this can be used for custom datasets.
+                                           see https://modelscope.cn/docs/Configuration%E8%AF%A6%E8%A7%A3
                 **config_kwargs (additional keyword arguments): Keyword arguments to be passed
 
             Returns:
@@ -245,305 +258,44 @@ class MsDataset:
             dataset_inst = LocalDataLoaderManager(
                 dataset_context_config).load_dataset(
                     LocalDataLoaderType.HF_DATA_LOADER)
-            return MsDataset.to_ms_dataset(dataset_inst, target=target)
+            dataset_inst = MsDataset.to_ms_dataset(dataset_inst, target=target)
+            if isinstance(dataset_inst, MsDataset):
+                dataset_inst._dataset_context_config = dataset_context_config
+                if custom_cfg:
+                    dataset_inst.to_custom_dataset(
+                        custom_cfg=custom_cfg, **config_kwargs)
+                    dataset_inst.is_custom = True
+            return dataset_inst
         # Load from the huggingface hub
         elif hub == Hubs.huggingface:
             dataset_inst = RemoteDataLoaderManager(
                 dataset_context_config).load_dataset(
                     RemoteDataLoaderType.HF_DATA_LOADER)
-            return MsDataset.to_ms_dataset(dataset_inst, target=target)
+            dataset_inst = MsDataset.to_ms_dataset(dataset_inst, target=target)
+            dataset_inst._dataset_context_config = dataset_context_config
+            if custom_cfg:
+                dataset_inst.to_custom_dataset(
+                    custom_cfg=custom_cfg, **config_kwargs)
+                dataset_inst.is_custom = True
+            return dataset_inst
         # Load from the modelscope hub
         elif hub == Hubs.modelscope:
-            dataset_inst = RemoteDataLoaderManager(
-                dataset_context_config).load_dataset(
-                    RemoteDataLoaderType.MS_DATA_LOADER)
-            return MsDataset.to_ms_dataset(dataset_inst, target=target)
+            remote_dataloader_manager = RemoteDataLoaderManager(
+                dataset_context_config)
+            dataset_inst = remote_dataloader_manager.load_dataset(
+                RemoteDataLoaderType.MS_DATA_LOADER)
+            dataset_inst = MsDataset.to_ms_dataset(dataset_inst, target=target)
+            if isinstance(dataset_inst, MsDataset):
+                dataset_inst._dataset_context_config = remote_dataloader_manager.dataset_context_config
+                if custom_cfg:
+                    dataset_inst.to_custom_dataset(
+                        custom_cfg=custom_cfg, **config_kwargs)
+                    dataset_inst.is_custom = True
+            return dataset_inst
         else:
             raise 'Please adjust input args to specify a loading mode, we support following scenes: ' \
                   'loading from local disk, huggingface hub and modelscope hub.'
 
-    def to_torch_dataset_with_processors(
-        self,
-        preprocessors: Union[Callable, List[Callable]],
-        columns: Union[str, List[str]] = None,
-        to_tensor: bool = True,
-    ):
-        import torch
-        preprocessor_list = preprocessors if isinstance(
-            preprocessors, list) else [preprocessors]
-
-        columns = format_list(columns)
-
-        columns = [
-            key for key in self._hf_ds.features.keys() if key in columns
-        ]
-        retained_columns = []
-        if to_tensor:
-            sample = next(iter(self._hf_ds))
-
-            sample_res = {k: np.array(sample[k]) for k in columns}
-            for processor in preprocessor_list:
-                sample_res.update(
-                    {k: np.array(v)
-                     for k, v in processor(sample).items()})
-
-            def is_numpy_number(value):
-                return np.issubdtype(value.dtype, np.integer) or np.issubdtype(
-                    value.dtype, np.floating)
-
-            for k in sample_res.keys():
-                if not is_numpy_number(sample_res[k]):
-                    logger.warning(
-                        f'Data of column {k} is non-numeric, will be removed')
-                    continue
-                retained_columns.append(k)
-
-        class MsMapDataset(torch.utils.data.Dataset):
-
-            def __init__(self, dataset: Iterable, preprocessor_list,
-                         retained_columns, columns, to_tensor):
-                super(MsDataset).__init__()
-                self.dataset = dataset
-                self.preprocessor_list = preprocessor_list
-                self.to_tensor = to_tensor
-                self.retained_columns = retained_columns
-                self.columns = columns
-
-            def __len__(self):
-                return len(self.dataset)
-
-            def type_converter(self, x):
-                import torch
-                if self.to_tensor and not isinstance(x, torch.Tensor):
-                    return torch.tensor(x)
-                else:
-                    return x
-
-            def __getitem__(self, index):
-                item_dict = self.dataset[index]
-                res = {
-                    k: self.type_converter(item_dict[k])
-                    for k in self.columns
-                    if (not self.to_tensor) or k in self.retained_columns
-                }
-                for preprocessor in self.preprocessor_list:
-                    res.update({
-                        k: self.type_converter(v)
-                        for k, v in preprocessor(item_dict).items()
-                        if (not self.to_tensor) or k in self.retained_columns
-                    })
-                return res
-
-        return MsMapDataset(self._hf_ds, preprocessor_list, retained_columns,
-                            columns, to_tensor)
-
-    def to_torch_dataset(
-        self,
-        columns: Union[str, List[str]] = None,
-        preprocessors: Union[Callable, List[Callable]] = None,
-        task_name: str = None,
-        task_data_config: ConfigDict = None,
-        to_tensor: bool = True,
-        **format_kwargs,
-    ):
-        """Create a torch.utils.data.Dataset from the MS Dataset. The torch.utils.data.Dataset can be passed to
-           torch.utils.data.DataLoader.
-
-        Args:
-            preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process
-                every sample of the dataset. The output type of processors is dict, and each (numeric) field of the dict
-                will be used as a field of torch.utils.data.Dataset.
-            columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only if
-                `to_tensor` is True). If the preprocessor is None, the arg columns must have at least one column.
-                If the `preprocessors` is not None, the output fields of processors will also be added.
-            task_name (str, default None):  task name, refer to :obj:`Tasks` for more details
-            task_data_config (ConfigDict, default None): config dict for model object.
-            to_tensor (bool, default None): whether convert the data types of dataset column(s) to torch.tensor or not.
-            format_kwargs: A `dict` of arguments to be passed to the `torch.tensor`.
-
-        Returns:
-            :class:`tf.data.Dataset`
-
-        """
-        if not is_torch_available():
-            raise ImportError(
-                'The function to_torch_dataset requires pytorch to be installed'
-            )
-        if isinstance(self._hf_ds, ExternalDataset):
-            task_data_config.update({'preprocessor': preprocessors})
-            task_data_config.update(self._hf_ds.config_kwargs)
-            return build_task_dataset(task_data_config, task_name)
-        if preprocessors is not None:
-            return self.to_torch_dataset_with_processors(
-                preprocessors, columns=columns, to_tensor=to_tensor)
-        else:
-            self._hf_ds.reset_format()
-            self._hf_ds.set_format(
-                type='torch', columns=columns, format_kwargs=format_kwargs)
-            return self._hf_ds
-
-    def to_tf_dataset_with_processors(
-        self,
-        batch_size: int,
-        shuffle: bool,
-        preprocessors: Union[Callable, List[Callable]],
-        drop_remainder: bool = None,
-        prefetch: bool = True,
-        label_cols: Union[str, List[str]] = None,
-        columns: Union[str, List[str]] = None,
-    ):
-        preprocessor_list = preprocessors if isinstance(
-            preprocessors, list) else [preprocessors]
-
-        label_cols = format_list(label_cols)
-        columns = format_list(columns)
-        cols_to_retain = list(set(label_cols + columns))
-        retained_columns = [
-            key for key in self._hf_ds.features.keys() if key in cols_to_retain
-        ]
-        import tensorflow as tf
-        tf_dataset = tf.data.Dataset.from_tensor_slices(
-            np.arange(len(self._hf_ds), dtype=np.int64))
-        if shuffle:
-            tf_dataset = tf_dataset.shuffle(buffer_size=len(self._hf_ds))
-
-        def func(i, return_dict=False):
-            i = int(i)
-            res = {k: np.array(self._hf_ds[i][k]) for k in retained_columns}
-            for preprocessor in preprocessor_list:
-                # TODO preprocessor output may have the same key
-                res.update({
-                    k: np.array(v)
-                    for k, v in preprocessor(self._hf_ds[i]).items()
-                })
-            if return_dict:
-                return res
-            return tuple(list(res.values()))
-
-        sample_res = func(0, True)
-
-        @tf.function(input_signature=[tf.TensorSpec(None, tf.int64)])
-        def fetch_function(i):
-            output = tf.numpy_function(
-                func,
-                inp=[i],
-                Tout=[
-                    tf.dtypes.as_dtype(val.dtype)
-                    for val in sample_res.values()
-                ],
-            )
-            return {key: output[i] for i, key in enumerate(sample_res)}
-
-        from tensorflow.data.experimental import AUTOTUNE
-        tf_dataset = tf_dataset.map(
-            fetch_function, num_parallel_calls=AUTOTUNE)
-        if label_cols:
-
-            def split_features_and_labels(input_batch):
-                labels = {
-                    key: tensor
-                    for key, tensor in input_batch.items() if key in label_cols
-                }
-                if len(input_batch) == 1:
-                    input_batch = next(iter(input_batch.values()))
-                if len(labels) == 1:
-                    labels = next(iter(labels.values()))
-                return input_batch, labels
-
-            tf_dataset = tf_dataset.map(split_features_and_labels)
-
-        elif len(columns) == 1:
-            tf_dataset = tf_dataset.map(lambda x: next(iter(x.values())))
-        if batch_size > 1:
-            tf_dataset = tf_dataset.batch(
-                batch_size, drop_remainder=drop_remainder)
-
-        if prefetch:
-            tf_dataset = tf_dataset.prefetch(AUTOTUNE)
-        return tf_dataset
-
-    def to_tf_dataset(
-        self,
-        batch_size: int,
-        shuffle: bool,
-        preprocessors: Union[Callable, List[Callable]] = None,
-        columns: Union[str, List[str]] = None,
-        collate_fn: Callable = None,
-        drop_remainder: bool = None,
-        collate_fn_args: Dict[str, Any] = None,
-        label_cols: Union[str, List[str]] = None,
-        prefetch: bool = True,
-    ):
-        """Create a tf.data.Dataset from the MS Dataset. This tf.data.Dataset can be passed to tf methods like
-           model.fit() or model.predict().
-
-        Args:
-            batch_size (int): Number of samples in a single batch.
-            shuffle(bool): Shuffle the dataset order.
-            preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process
-                every sample of the dataset. The output type of processors is dict, and each field of the dict will be
-                used as a field of the tf.data. Dataset. If the `preprocessors` is None, the `collate_fn`
-                shouldn't be None.
-            columns (str or List[str], default None): Dataset column(s) to be loaded. If the preprocessor is None,
-                the arg columns must have at least one column. If the `preprocessors` is not None, the output fields of
-                processors will also be added.
-            collate_fn(Callable, default None): A callable object used to collect lists of samples into a batch. If
-                the `preprocessors` is None, the `collate_fn` shouldn't be None.
-            drop_remainder(bool, default None): Drop the last incomplete batch when loading.
-            collate_fn_args (Dict, optional): A `dict` of arguments to be passed to the`collate_fn`.
-            label_cols (str or List[str], defalut None): Dataset column(s) to load as labels.
-            prefetch (bool, default True): Prefetch data.
-
-        Returns:
-            :class:`tf.data.Dataset`
-
-        """
-        if not is_tf_available():
-            raise ImportError(
-                'The function to_tf_dataset requires Tensorflow to be installed.'
-            )
-        if preprocessors is not None:
-            return self.to_tf_dataset_with_processors(
-                batch_size,
-                shuffle,
-                preprocessors,
-                drop_remainder=drop_remainder,
-                prefetch=prefetch,
-                label_cols=label_cols,
-                columns=columns)
-
-        if collate_fn is None:
-            logger.error(
-                'The `preprocessors` and the `collate_fn` should`t be both None.'
-            )
-            return None
-        self._hf_ds.reset_format()
-        return self._hf_ds.to_tf_dataset(
-            columns,
-            batch_size,
-            shuffle,
-            collate_fn,
-            drop_remainder=drop_remainder,
-            collate_fn_args=collate_fn_args,
-            label_cols=label_cols,
-            prefetch=prefetch)
-
-    def to_hf_dataset(self) -> Dataset:
-        self._hf_ds.reset_format()
-        return self._hf_ds
-
-    def remap_columns(self, column_mapping: Dict[str, str]) -> Dataset:
-        """
-        Rename columns and return the underlying hf dataset directly
-        TODO: support native MsDataset column rename.
-        Args:
-            column_mapping: the mapping of the original and new column names
-        Returns:
-            underlying hf dataset
-        """
-        self._hf_ds.reset_format()
-        return self._hf_ds.rename_columns(column_mapping)
-
     @staticmethod
     def upload(
             object_name: str,
@@ -695,3 +447,358 @@ class MsDataset:
         resp_msg = _delete_manager.delete(object_name=object_name)
         logger.info(f'Object {object_name} successfully removed!')
         return resp_msg
+
+    def to_torch_dataset(
+        self,
+        columns: Union[str, List[str]] = None,
+        preprocessors: Union[Callable, List[Callable]] = None,
+        task_name: str = None,
+        data_config: ConfigDict = None,
+        to_tensor: bool = True,
+        **format_kwargs,
+    ):
+        """Create a torch.utils.data.Dataset from the MS Dataset. The torch.utils.data.Dataset can be passed to
+           torch.utils.data.DataLoader.
+
+        Args:
+            preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process
+                every sample of the dataset. The output type of processors is dict, and each (numeric) field of the dict
+                will be used as a field of torch.utils.data.Dataset.
+            columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only if
+                `to_tensor` is True). If the preprocessor is None, the arg columns must have at least one column.
+                If the `preprocessors` is not None, the output fields of processors will also be added.
+            task_name (str, default None):  task name, refer to :obj:`Tasks` for more details
+            data_config (ConfigDict, default None): config dict for model object.
+                Attributes of ConfigDict:
+                    `preprocessor` (Callable, List[Callable], optional): preprocessors to deal with dataset
+                    `type` (str): the type of task
+                    `split_config` (dict, optional): get the split config for ExternalDataset
+                    `test_mode` (bool, optional): is test mode or not
+            to_tensor (bool, default None): whether convert the data types of dataset column(s) to torch.tensor or not.
+            format_kwargs: A `dict` of arguments to be passed to the `torch.tensor`.
+
+        Returns:
+            :class:`torch.utils.data.Dataset`
+
+        """
+        if not is_torch_available():
+            raise ImportError(
+                'The function to_torch_dataset requires pytorch to be installed'
+            )
+        if isinstance(self._hf_ds, ExternalDataset):
+            data_config.update({'preprocessor': preprocessors})
+            data_config.update(self._hf_ds.config_kwargs)
+            return build_custom_dataset(data_config, task_name)
+        if preprocessors is not None:
+            return self._to_torch_dataset_with_processors(
+                preprocessors, columns=columns, to_tensor=to_tensor)
+        else:
+            self._hf_ds.reset_format()
+            self._hf_ds.set_format(
+                type='torch', columns=columns, format_kwargs=format_kwargs)
+            return self._hf_ds
+
+    def to_tf_dataset(
+        self,
+        batch_size: int,
+        shuffle: bool,
+        preprocessors: Union[Callable, List[Callable]] = None,
+        columns: Union[str, List[str]] = None,
+        collate_fn: Callable = None,
+        drop_remainder: bool = None,
+        collate_fn_args: Dict[str, Any] = None,
+        label_cols: Union[str, List[str]] = None,
+        prefetch: bool = True,
+    ):
+        """Create a tf.data.Dataset from the MS Dataset. This tf.data.Dataset can be passed to tf methods like
+           model.fit() or model.predict().
+
+        Args:
+            batch_size (int): Number of samples in a single batch.
+            shuffle(bool): Shuffle the dataset order.
+            preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process
+                every sample of the dataset. The output type of processors is dict, and each field of the dict will be
+                used as a field of the tf.data. Dataset. If the `preprocessors` is None, the `collate_fn`
+                shouldn't be None.
+            columns (str or List[str], default None): Dataset column(s) to be loaded. If the preprocessor is None,
+                the arg columns must have at least one column. If the `preprocessors` is not None, the output fields of
+                processors will also be added.
+            collate_fn(Callable, default None): A callable object used to collect lists of samples into a batch. If
+                the `preprocessors` is None, the `collate_fn` shouldn't be None.
+            drop_remainder(bool, default None): Drop the last incomplete batch when loading.
+            collate_fn_args (Dict, optional): A `dict` of arguments to be passed to the`collate_fn`.
+            label_cols (str or List[str], defalut None): Dataset column(s) to load as labels.
+            prefetch (bool, default True): Prefetch data.
+
+        Returns:
+            :class:`tf.data.Dataset`
+
+        """
+        if not is_tf_available():
+            raise ImportError(
+                'The function to_tf_dataset requires Tensorflow to be installed.'
+            )
+        if preprocessors is not None:
+            return self._to_tf_dataset_with_processors(
+                batch_size,
+                shuffle,
+                preprocessors,
+                drop_remainder=drop_remainder,
+                prefetch=prefetch,
+                label_cols=label_cols,
+                columns=columns)
+
+        if collate_fn is None:
+            logger.error(
+                'The `preprocessors` and the `collate_fn` should`t be both None.'
+            )
+            return None
+        self._hf_ds.reset_format()
+        return self._hf_ds.to_tf_dataset(
+            columns,
+            batch_size,
+            shuffle,
+            collate_fn,
+            drop_remainder=drop_remainder,
+            collate_fn_args=collate_fn_args,
+            label_cols=label_cols,
+            prefetch=prefetch)
+
+    def to_hf_dataset(self) -> Dataset:
+        self._hf_ds.reset_format()
+        return self._hf_ds
+
+    def remap_columns(self, column_mapping: Dict[str, str]) -> Dataset:
+        """
+        Rename columns and return the underlying hf dataset directly
+        TODO: support native MsDataset column rename.
+        Args:
+            column_mapping: the mapping of the original and new column names
+        Returns:
+            underlying hf dataset
+        """
+        self._hf_ds.reset_format()
+        return self._hf_ds.rename_columns(column_mapping)
+
+    def _to_torch_dataset_with_processors(
+        self,
+        preprocessors: Union[Callable, List[Callable]],
+        columns: Union[str, List[str]] = None,
+        to_tensor: bool = True,
+    ):
+        preprocessor_list = preprocessors if isinstance(
+            preprocessors, list) else [preprocessors]
+
+        columns = format_list(columns)
+
+        columns = [
+            key for key in self._hf_ds.features.keys() if key in columns
+        ]
+        retained_columns = []
+        if to_tensor:
+            sample = next(iter(self._hf_ds))
+
+            sample_res = {k: np.array(sample[k]) for k in columns}
+            for processor in preprocessor_list:
+                sample_res.update(
+                    {k: np.array(v)
+                     for k, v in processor(sample).items()})
+
+            def is_numpy_number(value):
+                return np.issubdtype(value.dtype, np.integer) or np.issubdtype(
+                    value.dtype, np.floating)
+
+            for k in sample_res.keys():
+                if not is_numpy_number(sample_res[k]):
+                    logger.warning(
+                        f'Data of column {k} is non-numeric, will be removed')
+                    continue
+                retained_columns.append(k)
+
+        import torch
+
+        class MsMapDataset(torch.utils.data.Dataset):
+
+            def __init__(self, dataset: Iterable, preprocessor_list,
+                         retained_columns, columns, to_tensor):
+                super(MsDataset).__init__()
+                self.dataset = dataset
+                self.preprocessor_list = preprocessor_list
+                self.to_tensor = to_tensor
+                self.retained_columns = retained_columns
+                self.columns = columns
+
+            def __len__(self):
+                return len(self.dataset)
+
+            def type_converter(self, x):
+                if self.to_tensor:
+                    return torch.tensor(x)
+                else:
+                    return x
+
+            def __getitem__(self, index):
+                item_dict = self.dataset[index]
+                res = {
+                    k: self.type_converter(item_dict[k])
+                    for k in self.columns
+                    if (not self.to_tensor) or k in self.retained_columns
+                }
+                for preprocessor in self.preprocessor_list:
+                    res.update({
+                        k: self.type_converter(v)
+                        for k, v in preprocessor(item_dict).items()
+                        if (not self.to_tensor) or k in self.retained_columns
+                    })
+                return res
+
+        return MsMapDataset(self._hf_ds, preprocessor_list, retained_columns,
+                            columns, to_tensor)
+
+    def _to_tf_dataset_with_processors(
+        self,
+        batch_size: int,
+        shuffle: bool,
+        preprocessors: Union[Callable, List[Callable]],
+        drop_remainder: bool = None,
+        prefetch: bool = True,
+        label_cols: Union[str, List[str]] = None,
+        columns: Union[str, List[str]] = None,
+    ):
+        preprocessor_list = preprocessors if isinstance(
+            preprocessors, list) else [preprocessors]
+
+        label_cols = format_list(label_cols)
+        columns = format_list(columns)
+        cols_to_retain = list(set(label_cols + columns))
+        retained_columns = [
+            key for key in self._hf_ds.features.keys() if key in cols_to_retain
+        ]
+        import tensorflow as tf
+        tf_dataset = tf.data.Dataset.from_tensor_slices(
+            np.arange(len(self._hf_ds), dtype=np.int64))
+        if shuffle:
+            tf_dataset = tf_dataset.shuffle(buffer_size=len(self._hf_ds))
+
+        def func(i, return_dict=False):
+            i = int(i)
+            res = {k: np.array(self._hf_ds[i][k]) for k in retained_columns}
+            for preprocessor in preprocessor_list:
+                # TODO preprocessor output may have the same key
+                res.update({
+                    k: np.array(v)
+                    for k, v in preprocessor(self._hf_ds[i]).items()
+                })
+            if return_dict:
+                return res
+            return tuple(list(res.values()))
+
+        sample_res = func(0, True)
+
+        @tf.function(input_signature=[tf.TensorSpec(None, tf.int64)])
+        def fetch_function(i):
+            output = tf.numpy_function(
+                func,
+                inp=[i],
+                Tout=[
+                    tf.dtypes.as_dtype(val.dtype)
+                    for val in sample_res.values()
+                ],
+            )
+            return {key: output[i] for i, key in enumerate(sample_res)}
+
+        from tensorflow.data.experimental import AUTOTUNE
+        tf_dataset = tf_dataset.map(
+            fetch_function, num_parallel_calls=AUTOTUNE)
+        if label_cols:
+
+            def split_features_and_labels(input_batch):
+                labels = {
+                    key: tensor
+                    for key, tensor in input_batch.items() if key in label_cols
+                }
+                if len(input_batch) == 1:
+                    input_batch = next(iter(input_batch.values()))
+                if len(labels) == 1:
+                    labels = next(iter(labels.values()))
+                return input_batch, labels
+
+            tf_dataset = tf_dataset.map(split_features_and_labels)
+
+        elif len(columns) == 1:
+            tf_dataset = tf_dataset.map(lambda x: next(iter(x.values())))
+        if batch_size > 1:
+            tf_dataset = tf_dataset.batch(
+                batch_size, drop_remainder=drop_remainder)
+
+        if prefetch:
+            tf_dataset = tf_dataset.prefetch(AUTOTUNE)
+        return tf_dataset
+
+    def to_custom_dataset(self,
+                          custom_cfg: Config,
+                          preprocessor=None,
+                          mode=None,
+                          **kwargs):
+        """Convert the input datasets to specific custom datasets by given model configuration and preprocessor.
+
+        Args:
+            custom_cfg (Config): The model configuration for custom datasets.
+            preprocessor (Preprocessor, Optional): Preprocessor for data samples.
+            mode (str, Optional): See modelscope.utils.constant.ModeKeys
+
+        Returns:
+            `MsDataset`
+        """
+
+        if not is_torch_available():
+            raise ImportError(
+                'The function to_custom_dataset requires pytorch to be installed'
+            )
+        if not custom_cfg:
+            return
+
+        # Set the flag that it has been converted to custom dataset
+        self.is_custom = True
+
+        # Check mode
+        if mode is None:
+            if 'mode' in kwargs:
+                mode = kwargs.get('mode')
+
+        # Parse cfg
+        ds_cfg_key = 'train' if mode == ModeKeys.TRAIN else 'val'
+        data_cfg = custom_cfg.safe_get(f'dataset.{ds_cfg_key}')
+        if data_cfg is None:
+            data_cfg = ConfigDict(type=custom_cfg.model.type) if hasattr(
+                custom_cfg, ConfigFields.model) else ConfigDict(type=None)
+        data_cfg.update(dict(mode=mode))
+
+        # Get preprocessors from custom_cfg
+        task_name = custom_cfg.task
+        if 'task' in kwargs:
+            task_name = kwargs.pop('task')
+        field_name = Tasks.find_field_by_task(task_name)
+        if 'field' in kwargs:
+            field_name = kwargs.pop('field')
+        if preprocessor is None and hasattr(custom_cfg, 'preprocessor'):
+            preprocessor_cfg = custom_cfg.preprocessor
+            if preprocessor_cfg:
+                preprocessor = build_preprocessor(preprocessor_cfg, field_name)
+
+        # Build custom dataset
+        if isinstance(self._hf_ds, ExternalDataset):
+            data_cfg.update(dict(preprocessor=preprocessor))
+            data_cfg.update(self._hf_ds.config_kwargs)
+            self._hf_ds = build_custom_dataset(
+                cfg=data_cfg, task_name=custom_cfg.task)
+            return
+
+        if preprocessor is not None:
+            to_tensor = kwargs.get('to_tensor', True)
+            self._hf_ds = self._to_torch_dataset_with_processors(
+                preprocessors=preprocessor, to_tensor=to_tensor)
+        else:
+            self._hf_ds.reset_format()
+            self._hf_ds.set_format(type='torch')
+        return
diff --git a/modelscope/msdatasets/task_datasets/__init__.py b/modelscope/msdatasets/task_datasets/__init__.py
deleted file mode 100644
index 8c8cbdf2..00000000
--- a/modelscope/msdatasets/task_datasets/__init__.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule, is_torch_available
-
-if TYPE_CHECKING:
-    from .base import TaskDataset
-    from .builder import TASK_DATASETS, build_task_dataset
-    from .torch_base_dataset import TorchTaskDataset
-    from .veco_dataset import VecoDataset
-    from .image_instance_segmentation_coco_dataset import ImageInstanceSegmentationCocoDataset
-    from .movie_scene_segmentation import MovieSceneSegmentationDataset
-    from .video_summarization_dataset import VideoSummarizationDataset
-    from .language_guided_video_summarization_dataset import LanguageGuidedVideoSummarizationDataset
-    from .image_inpainting import ImageInpaintingDataset
-    from .ocr_recognition_dataset import OCRRecognitionDataset
-    from .text_ranking_dataset import TextRankingDataset
-    from .referring_video_object_segmentation import ReferringVideoObjectSegmentationDataset
-    from .bad_image_detecting import BadImageDetectingDataset
-
-else:
-    _import_structure = {
-        'base': ['TaskDataset'],
-        'builder': ['TASK_DATASETS', 'build_task_dataset'],
-        'torch_base_dataset': ['TorchTaskDataset'],
-        'text_ranking_dataset': ['TextRankingDataset'],
-        'veco_dataset': ['VecoDataset'],
-        'image_instance_segmentation_coco_dataset':
-        ['ImageInstanceSegmentationCocoDataset'],
-        'video_summarization_dataset': ['VideoSummarizationDataset'],
-        'language_guided_video_summarization_dataset':
-        ['LanguageGuidedVideoSummarizationDataset'],
-        'movie_scene_segmentation': ['MovieSceneSegmentationDataset'],
-        'image_inpainting': ['ImageInpaintingDataset'],
-        'ocr_recognition_dataset': ['OCRRecognitionDataset'],
-        'sidd_image_denoising_dataset': ['SiddImageDenoisingDataset'],
-        'image_portrait_enhancement_dataset':
-        ['ImagePortraitEnhancementDataset'],
-        'referring_video_object_segmentation':
-        ['ReferringVideoObjectSegmentationDataset'],
-        'bad_image_detecting': ['BadImageDetectingDataset'],
-    }
-    import sys
-
-    sys.modules[__name__] = LazyImportModule(
-        __name__,
-        globals()['__file__'],
-        _import_structure,
-        module_spec=__spec__,
-        extra_objects={},
-    )
diff --git a/modelscope/msdatasets/task_datasets/base.py b/modelscope/msdatasets/task_datasets/base.py
deleted file mode 100644
index 39b791b1..00000000
--- a/modelscope/msdatasets/task_datasets/base.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from abc import ABC, abstractmethod
-from typing import Any, List, Tuple, Union
-
-
-class TaskDataset(ABC):
-    """The task dataset base class for all the task specific dataset processors.
-    """
-
-    def __init__(self,
-                 datasets: Union[Any, List[Any]],
-                 mode,
-                 preprocessor=None,
-                 **kwargs):
-        super().__init__()
-        self.mode = mode
-        self.preprocessor = preprocessor
-        self._inner_dataset = self.prepare_dataset(datasets)
-
-    @abstractmethod
-    def prepare_dataset(self, datasets: Union[Any, List[Any]]) -> Any:
-        """Prepare a dataset.
-
-        User can process the input datasets in a whole dataset perspective.
-        This method also helps to merge several datasets to one.
-
-        Args:
-            datasets: The original dataset(s)
-
-        Returns: A single dataset, which may be created after merging.
-
-        """
-        pass
-
-    @abstractmethod
-    def prepare_sample(self, data):
-        """Preprocess the data fetched from the inner_dataset.
-
-        If the preprocessor is None, the original data will be returned, else the preprocessor will be called.
-        User can override this method to implement custom logics.
-
-        Args:
-            data: The data fetched from the dataset.
-
-        Returns: The processed data.
-
-        """
-        pass
diff --git a/modelscope/msdatasets/task_datasets/image_inpainting/__init__.py b/modelscope/msdatasets/task_datasets/image_inpainting/__init__.py
deleted file mode 100644
index 732a1bd7..00000000
--- a/modelscope/msdatasets/task_datasets/image_inpainting/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from .image_inpainting_dataset import ImageInpaintingDataset
diff --git a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/__init__.py b/modelscope/msdatasets/task_datasets/movie_scene_segmentation/__init__.py
deleted file mode 100644
index b1bc40f8..00000000
--- a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from .movie_scene_segmentation_dataset import MovieSceneSegmentationDataset
diff --git a/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/__init__.py b/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/__init__.py
deleted file mode 100644
index 7c1b724e..00000000
--- a/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from .referring_video_object_segmentation_dataset import \
-    ReferringVideoObjectSegmentationDataset
diff --git a/modelscope/msdatasets/task_datasets/torch_base_dataset.py b/modelscope/msdatasets/task_datasets/torch_base_dataset.py
deleted file mode 100644
index 4d82b741..00000000
--- a/modelscope/msdatasets/task_datasets/torch_base_dataset.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import Any, List, Tuple, Union
-
-from torch.utils.data import ConcatDataset, Dataset
-
-from .base import TaskDataset
-
-
-class TorchTaskDataset(TaskDataset, Dataset):
-    """The task dataset base class for all the torch-based task processors.
-
-    This base class is enough for most cases, except there are procedures which can not be executed in
-    preprocessors and Datasets like dataset merging.
-    """
-
-    def __init__(self,
-                 datasets: Union[Any, List[Any]],
-                 mode,
-                 preprocessor=None,
-                 **kwargs):
-        TaskDataset.__init__(self, datasets, mode, preprocessor, **kwargs)
-        self.trainer = None
-
-    def __getitem__(self, index) -> Any:
-        return self.prepare_sample(self._inner_dataset[index])
-
-    def __len__(self):
-        return len(self._inner_dataset)
-
-    def prepare_dataset(self, datasets: Union[Any, List[Any]]) -> Any:
-        """Prepare a dataset.
-
-        User can process the input datasets in a whole dataset perspective.
-        This method gives a default implementation of datasets merging, user can override this
-        method to write custom logics.
-
-        Args:
-            datasets: The original dataset(s)
-
-        Returns: A single dataset, which may be created after merging.
-
-        """
-        if isinstance(datasets, List):
-            if len(datasets) == 1:
-                return datasets[0]
-            elif len(datasets) > 1:
-                return ConcatDataset(datasets)
-        else:
-            return datasets
-
-    def prepare_sample(self, data):
-        """Preprocess the data fetched from the inner_dataset.
-
-        If the preprocessor is None, the original data will be returned, else the preprocessor will be called.
-        User can override this method to implement custom logics.
-
-        Args:
-            data: The data fetched from the dataset.
-
-        Returns: The processed data.
-
-        """
-        return self.preprocessor(
-            data) if self.preprocessor is not None else data
diff --git a/modelscope/msdatasets/utils/dataset_utils.py b/modelscope/msdatasets/utils/dataset_utils.py
index 4c80af7d..dde044d5 100644
--- a/modelscope/msdatasets/utils/dataset_utils.py
+++ b/modelscope/msdatasets/utils/dataset_utils.py
@@ -184,9 +184,11 @@ def get_dataset_files(subset_split_into: dict,
     meta_map = defaultdict(dict)
     file_map = defaultdict(dict)
     args_map = defaultdict(dict)
+    custom_type_map = defaultdict(dict)
     modelscope_api = HubApi()
 
     for split, info in subset_split_into.items():
+        custom_type_map[split] = info.get('custom', '')
         meta_map[split] = modelscope_api.get_dataset_file_url(
             info.get('meta', ''), dataset_name, namespace, revision)
         if info.get('file'):
@@ -221,4 +223,4 @@ def get_dataset_files(subset_split_into: dict,
         if contains_dir(file_map):
             file_map = get_split_objects_map(file_map, objects)
 
-    return meta_map, file_map, args_map
+    return meta_map, file_map, args_map, custom_type_map
diff --git a/modelscope/trainers/audio/kws_farfield_trainer.py b/modelscope/trainers/audio/kws_farfield_trainer.py
index 508517a7..205947b7 100644
--- a/modelscope/trainers/audio/kws_farfield_trainer.py
+++ b/modelscope/trainers/audio/kws_farfield_trainer.py
@@ -12,7 +12,8 @@ from torch import optim as optim
 
 from modelscope.metainfo import Trainers
 from modelscope.models import Model, TorchModel
-from modelscope.msdatasets.task_datasets.audio import KWSDataLoader, KWSDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets.audio import (
+    KWSDataLoader, KWSDataset)
 from modelscope.trainers.base import BaseTrainer
 from modelscope.trainers.builder import TRAINERS
 from modelscope.utils.audio.audio_utils import update_conf
diff --git a/modelscope/trainers/audio/kws_nearfield_trainer.py b/modelscope/trainers/audio/kws_nearfield_trainer.py
index bf00c435..5e63e87e 100644
--- a/modelscope/trainers/audio/kws_nearfield_trainer.py
+++ b/modelscope/trainers/audio/kws_nearfield_trainer.py
@@ -1,42 +1,30 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import copy
 import datetime
-import math
 import os
-import random
 import re
-import sys
-from shutil import copyfile
 from typing import Callable, Dict, Optional
 
-import numpy as np
 import torch
-import torch.distributed as dist
-import torch.nn.functional as F
 import yaml
 from tensorboardX import SummaryWriter
 from torch import nn as nn
 from torch import optim as optim
-from torch.distributed import ReduceOp
-from torch.nn.utils import clip_grad_norm_
 from torch.utils.data import DataLoader
 
 from modelscope.metainfo import Trainers
 from modelscope.models import Model, TorchModel
-from modelscope.msdatasets.task_datasets.audio.kws_nearfield_dataset import \
+from modelscope.msdatasets.dataset_cls.custom_datasets.audio.kws_nearfield_dataset import \
     kws_nearfield_dataset
 from modelscope.trainers.base import BaseTrainer
 from modelscope.trainers.builder import TRAINERS
-from modelscope.utils.audio.audio_utils import update_conf
 from modelscope.utils.checkpoint import load_checkpoint, save_checkpoint
 from modelscope.utils.config import Config
 from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile
-from modelscope.utils.data_utils import to_device
 from modelscope.utils.device import create_device
 from modelscope.utils.logger import get_logger
 from modelscope.utils.torch_utils import (get_dist_info, get_local_rank,
-                                          init_dist, is_master,
-                                          set_random_seed)
+                                          init_dist, set_random_seed)
 from .kws_utils.batch_utils import executor_cv, executor_test, executor_train
 from .kws_utils.det_utils import compute_det
 from .kws_utils.file_utils import query_tokens_id, read_lexicon, read_token
diff --git a/modelscope/trainers/cv/image_detection_damoyolo_trainer.py b/modelscope/trainers/cv/image_detection_damoyolo_trainer.py
index 734c8915..c8081ee0 100644
--- a/modelscope/trainers/cv/image_detection_damoyolo_trainer.py
+++ b/modelscope/trainers/cv/image_detection_damoyolo_trainer.py
@@ -1,11 +1,9 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-import copy
 import datetime
 import math
 import os
-import os.path as osp
 import time
-from typing import Callable, Dict, Optional
+from typing import Dict
 
 import torch
 import torch.distributed as dist
@@ -25,8 +23,8 @@ from modelscope.models.cv.tinynas_detection.damo.detectors.detector import (
     build_ddp_model, build_local_model)
 from modelscope.models.cv.tinynas_detection.damo.utils import (
     cosine_scheduler, ema_model)
-from modelscope.msdatasets.task_datasets.damoyolo import (build_dataloader,
-                                                          build_dataset)
+from modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo import (
+    build_dataloader, build_dataset)
 from modelscope.trainers.base import BaseTrainer
 from modelscope.trainers.builder import TRAINERS
 from modelscope.utils.checkpoint import save_checkpoint
diff --git a/modelscope/trainers/cv/ocr_detection_db_trainer.py b/modelscope/trainers/cv/ocr_detection_db_trainer.py
index 2967ffb0..3a9d51aa 100644
--- a/modelscope/trainers/cv/ocr_detection_db_trainer.py
+++ b/modelscope/trainers/cv/ocr_detection_db_trainer.py
@@ -19,10 +19,8 @@ from modelscope.models.cv.ocr_detection.modules.dbnet import (DBModel,
                                                               DBModel_v2)
 from modelscope.models.cv.ocr_detection.utils import (boxes_from_bitmap,
                                                       polygons_from_bitmap)
-from modelscope.msdatasets.task_datasets.ocr_detection import (DataLoader,
-                                                               ImageDataset)
-from modelscope.msdatasets.task_datasets.ocr_detection.measures import \
-    QuadMeasurer
+from modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection import (
+    DataLoader, ImageDataset, QuadMeasurer)
 from modelscope.trainers.base import BaseTrainer
 from modelscope.trainers.builder import TRAINERS
 from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile
diff --git a/modelscope/trainers/nlp/siamese_uie_trainer.py b/modelscope/trainers/nlp/siamese_uie_trainer.py
index af95006e..e3289976 100644
--- a/modelscope/trainers/nlp/siamese_uie_trainer.py
+++ b/modelscope/trainers/nlp/siamese_uie_trainer.py
@@ -106,17 +106,21 @@ class SiameseUIETrainer(EpochBasedTrainer):
             seed=seed,
             **kwargs)
 
-    def to_task_dataset(self,
-                        datasets: Union[Dataset, List[Dataset]],
-                        mode: str,
-                        task_data_config: Config = None,
-                        preprocessor: Optional[Preprocessor] = None,
-                        **kwargs):
-        if mode == 'train':
+    def build_dataset(self,
+                      datasets: Union[torch.utils.data.Dataset, MsDataset,
+                                      List[torch.utils.data.Dataset]],
+                      model_cfg: Config,
+                      mode: str,
+                      preprocessor: Optional[Preprocessor] = None,
+                      **kwargs):
+        if mode == ModeKeys.TRAIN:
             datasets = self.load_dataset(datasets)
-            # print('****self.train_dataset*******', self.train_dataset[0])
-        return super().to_task_dataset(datasets, mode, task_data_config,
-                                       preprocessor, **kwargs)
+        return super(SiameseUIETrainer, self).build_dataset(
+            datasets=datasets,
+            model_cfg=self.cfg,
+            mode=mode,
+            preprocessor=preprocessor,
+            **kwargs)
 
     def get_train_dataloader(self):
         """ Builder torch dataloader for training.
@@ -125,12 +129,6 @@ class SiameseUIETrainer(EpochBasedTrainer):
         the config for data.train in configuration file, or subclass and override this method
         (or `get_train_dataloader` in a subclass.
         """
-        if self.train_dataset is None:
-            train_data = self.cfg.dataset.train
-            self.train_dataset = self.build_dataset(
-                train_data,
-                mode=ModeKeys.TRAIN,
-                preprocessor=self.train_preprocessor)
         self.train_dataset.preprocessor = None
         data_loader = self._build_dataloader_with_dataset(
             self.train_dataset,
diff --git a/modelscope/trainers/nlp_trainer.py b/modelscope/trainers/nlp_trainer.py
index bbdd080f..455fc907 100644
--- a/modelscope/trainers/nlp_trainer.py
+++ b/modelscope/trainers/nlp_trainer.py
@@ -150,7 +150,7 @@ class VecoTrainer(NlpEpochBasedTrainer):
         """Veco evaluates the datasets one by one.
 
         """
-        from modelscope.msdatasets.task_datasets import VecoDataset
+        from modelscope.msdatasets.dataset_cls.custom_datasets import VecoDataset
         if checkpoint_path is not None:
             from modelscope.trainers.hooks import LoadCheckpointHook
             LoadCheckpointHook.load_checkpoint(checkpoint_path, self)
@@ -159,9 +159,10 @@ class VecoTrainer(NlpEpochBasedTrainer):
         metric_values = {}
 
         if self.eval_dataset is None:
-            val_data = self.cfg.dataset.val
-            self.eval_dataset = self.build_dataset(
-                val_data, mode=ModeKeys.EVAL)
+            self.eval_dataset = self.build_dataset_from_cfg(
+                model_cfg=self.cfg,
+                mode=self._mode,
+                preprocessor=self.eval_preprocessor)
 
         idx = 0
         dataset_cnt = 1
diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py
index 8cddbeae..7779b1a5 100644
--- a/modelscope/trainers/trainer.py
+++ b/modelscope/trainers/trainer.py
@@ -20,10 +20,11 @@ from modelscope.metrics import build_metric, task_default_metrics
 from modelscope.metrics.prediction_saving_wrapper import \
     PredictionSavingWrapper
 from modelscope.models.base import Model, TorchModel
+from modelscope.msdatasets.dataset_cls.custom_datasets import \
+    TorchCustomDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets.builder import \
+    build_custom_dataset
 from modelscope.msdatasets.ms_dataset import MsDataset
-from modelscope.msdatasets.task_datasets.builder import build_task_dataset
-from modelscope.msdatasets.task_datasets.torch_base_dataset import \
-    TorchTaskDataset
 from modelscope.outputs import ModelOutputBase
 from modelscope.preprocessors.base import Preprocessor
 from modelscope.trainers.hooks.builder import HOOKS
@@ -126,6 +127,10 @@ class EpochBasedTrainer(BaseTrainer):
         self._stop_training = False
         self._compile = kwargs.get('compile', False)
 
+        self.train_dataloader = None
+        self.eval_dataloader = None
+        self.data_loader = None
+
         if isinstance(model, str):
             third_party = kwargs.get(ThirdParty.KEY)
             if third_party is not None:
@@ -178,6 +183,21 @@ class EpochBasedTrainer(BaseTrainer):
         self.logger = get_logger(
             log_file=log_file, log_level=self.cfg.get('log_level', 'INFO'))
 
+        # Get train datasets
+        self.train_dataset = self.build_dataset(
+            datasets=train_dataset,
+            model_cfg=self.cfg,
+            mode=ModeKeys.TRAIN,
+            preprocessor=self.train_preprocessor,
+            **kwargs)
+        # Get evaluation datasets
+        self.eval_dataset = self.build_dataset(
+            datasets=eval_dataset,
+            model_cfg=self.cfg,
+            mode=ModeKeys.EVAL,
+            preprocessor=self.eval_preprocessor,
+            **kwargs)
+
         self.train_data_collator, self.eval_data_collator = self.get_data_collator(
             data_collator,
             remove_unused_data=kwargs.get('remove_unused_data', False))
@@ -226,19 +246,6 @@ class EpochBasedTrainer(BaseTrainer):
         self._dist = self.is_dp_group_available() and dist.get_world_size(
             self.dp_group) > 1
 
-        self.train_dataset = self.to_task_dataset(
-            train_dataset,
-            mode=ModeKeys.TRAIN,
-            task_data_config=self.cfg.safe_get('dataset.train'),
-            preprocessor=self.train_preprocessor,
-            **kwargs)
-        self.eval_dataset = self.to_task_dataset(
-            eval_dataset,
-            mode=ModeKeys.EVAL,
-            task_data_config=self.cfg.safe_get('dataset.val'),
-            preprocessor=self.eval_preprocessor,
-            **kwargs)
-
         self.metrics = self.get_metrics()
 
         if not self.parallel_groups:
@@ -466,85 +473,108 @@ class EpochBasedTrainer(BaseTrainer):
             else:
                 return _get_data_len(self.eval_dataloader)
 
-    def to_task_dataset(self,
-                        datasets: Union[Dataset, List[Dataset]],
-                        mode: str,
-                        task_data_config: Config = None,
-                        preprocessor: Optional[Preprocessor] = None,
-                        **kwargs):
-        """Build the task specific dataset processor for this trainer.
+    def build_dataset(self,
+                      datasets: Union[Dataset, MsDataset, List[Dataset]],
+                      model_cfg: Config,
+                      mode: str,
+                      preprocessor: Optional[Preprocessor] = None,
+                      **kwargs):
+        """Build input datasets by given model configuration and preprocessor.
 
-        Returns: The task dataset processor for the task. If no result for the very model-type and task,
-        the default TaskDataset will be returned.
+        Args:
+            datasets (Union[Dataset, MsDataset, List[Dataset]]): The input datasets.
+            model_cfg (Config): The model configuration.
+            mode (str): `train`, `eval` or `inference`. See modelscope.utils.constant.ModeKeys
+            preprocessor (Preprocessor, Optional): The preprocessor for input data samples.
+
+        Returns:
+            Preprocessed datasets.
         """
         try:
-            to_tensor = kwargs.get('to_tensor', True)
             if not datasets:
-                return datasets
-            if isinstance(datasets, TorchTaskDataset):
+                return EpochBasedTrainer.build_dataset_from_cfg(
+                    model_cfg=model_cfg, mode=mode, preprocessor=preprocessor)
+
+            if isinstance(datasets, TorchCustomDataset):
                 return datasets
             elif isinstance(datasets, MsDataset):
-                if task_data_config is None:
-                    # adapt to some special models
-                    task_data_config = ConfigDict(
-                        type=self.cfg.model.type) if hasattr(
-                            self.cfg, ConfigFields.model) else ConfigDict(
-                                type=None)
-                task_data_config.update(dict(mode=mode))
-                return datasets.to_torch_dataset(
-                    task_data_config=task_data_config,
-                    task_name=self.cfg.task,
-                    preprocessors=preprocessor,
-                    to_tensor=to_tensor)
+                if not datasets.is_custom:
+                    datasets.to_custom_dataset(
+                        custom_cfg=model_cfg,
+                        preprocessor=preprocessor,
+                        mode=mode,
+                        **kwargs)
+                return datasets.ds_instance
             elif isinstance(datasets, List) and isinstance(
                     datasets[0], MsDataset):
-                if task_data_config is None:
-                    # adapt to some special models
-                    task_data_config = ConfigDict(
-                        type=self.cfg.model.type) if hasattr(
-                            self.cfg, ConfigFields.model) else ConfigDict(
-                                type=None)
-                task_data_config.update(dict(mode=mode))
-                datasets = [
-                    d.to_torch_dataset(
-                        task_data_config=task_data_config,
-                        task_name=self.cfg.task,
-                        preprocessors=preprocessor,
-                        to_tensor=to_tensor) for d in datasets
-                ]
-                cfg = ConfigDict(
-                    type=self.cfg.model.type, mode=mode, datasets=datasets)
-                task_dataset = build_task_dataset(cfg, self.cfg.task)
-                task_dataset.trainer = self
-                return task_dataset
+                custom_datasets = []
+                for dataset in datasets:
+                    if not dataset.is_custom:
+                        dataset.to_custom_dataset(
+                            custom_cfg=model_cfg,
+                            preprocessor=preprocessor,
+                            mode=mode,
+                            **kwargs)
+                    custom_datasets.append(dataset.ds_instance)
+                torch_custom_dataset = TorchCustomDataset(
+                    datasets=custom_datasets,
+                    mode=mode,
+                    preprocessor=None,
+                    **kwargs)
+                torch_custom_dataset.trainer = self
+                return torch_custom_dataset
             else:
-                if task_data_config is None:
+                dataset_mode_key = 'train' if mode == ModeKeys.TRAIN else 'val'
+                data_config = model_cfg.safe_get(f'dataset.{dataset_mode_key}')
+                if data_config is None:
                     # adapt to some special models
-                    task_data_config = {}
+                    data_config = {}
                 # avoid add no str value datasets, preprocessors in cfg
-                task_data_build_config = ConfigDict(
-                    type=self.cfg.model.type,
+                data_build_config = ConfigDict(
+                    type=model_cfg.model.type,
                     mode=mode,
                     datasets=datasets,
                     preprocessor=preprocessor)
-                task_data_build_config.update(task_data_config)
-                task_dataset = build_task_dataset(task_data_build_config,
-                                                  self.cfg.task)
-                task_dataset.trainer = self
-                return task_dataset
-        except Exception:
+                data_build_config.update(data_config)
+                custom_dataset = build_custom_dataset(data_build_config,
+                                                      model_cfg.task)
+                custom_dataset.trainer = self
+                return custom_dataset
+        except Exception as e:
+            print('** build_dataset error log:', e)
             if isinstance(datasets, (List, Tuple)) or preprocessor is not None:
-                task_dataset = TorchTaskDataset(
+                custom_dataset = TorchCustomDataset(
                     datasets,
                     mode=mode,
                     preprocessor=preprocessor,
-                    **(dict(type=self.cfg.model.type) if hasattr(
-                        self.cfg, 'model') else {}))
-                task_dataset.trainer = self
-                return task_dataset
+                    **(dict(type=model_cfg.model.type) if hasattr(
+                        model_cfg, 'model') else {}))
+                custom_dataset.trainer = self
+                return custom_dataset
             else:
                 return datasets
 
+    @staticmethod
+    def build_dataset_from_cfg(model_cfg: Config,
+                               mode: str,
+                               preprocessor: Preprocessor = None):
+        dataset = None
+        dataset_name = model_cfg.safe_get('dataset.name')
+        subset_name = model_cfg.safe_get('dataset.subset', default='default')
+        split_name = model_cfg.safe_get(f'dataset.split_{mode}')
+        if not dataset_name or not split_name:
+            return dataset
+        dataset = MsDataset.load(
+            dataset_name=dataset_name,
+            subset_name=subset_name,
+            split=split_name,
+            custom_cfg=model_cfg)
+        if not dataset.is_custom:
+            dataset.to_custom_dataset(
+                custom_cfg=model_cfg, preprocessor=preprocessor, mode=mode)
+
+        return dataset.ds_instance
+
     def build_preprocessor(self) -> Tuple[Preprocessor, Preprocessor]:
         """Build train and eval preprocessor.
 
@@ -667,7 +697,7 @@ class EpochBasedTrainer(BaseTrainer):
                 checkpoint_path, self, strict=strict)
         self.model.eval()
         self._mode = ModeKeys.EVAL
-        predict_dataloader = self.get_predict_data_loader(predict_datasets)
+        predict_dataloader = self.get_predict_dataloader(predict_datasets)
         metric_classes = [PredictionSavingWrapper(saving_fn=saving_fn)]
 
         for m in metric_classes:
@@ -836,11 +866,7 @@ class EpochBasedTrainer(BaseTrainer):
         (or `get_train_dataloader` in a subclass.
         """
         if self.train_dataset is None:
-            train_data = self.cfg.dataset.train
-            self.train_dataset = self.build_dataset(
-                train_data,
-                mode=ModeKeys.TRAIN,
-                preprocessor=self.train_preprocessor)
+            raise 'The train_dataset cannot be None.'
         data_loader = self._build_dataloader_with_dataset(
             self.train_dataset,
             dist=self._dist,
@@ -857,11 +883,7 @@ class EpochBasedTrainer(BaseTrainer):
         pass
         """
         if self.eval_dataset is None:
-            val_data = self.cfg.dataset.val
-            self.eval_dataset = self.build_dataset(
-                val_data,
-                mode=ModeKeys.EVAL,
-                preprocessor=self.eval_preprocessor)
+            raise 'The eval_dataset cannot be None.'
 
         default_config = {'shuffle': False}
         default_config.update(self.cfg.evaluation.get('dataloader', {}))
@@ -873,15 +895,16 @@ class EpochBasedTrainer(BaseTrainer):
             **default_config)
         return data_loader
 
-    def get_predict_data_loader(self, predict_datasets: Union[Dataset,
-                                                              List[Dataset]]):
+    def get_predict_dataloader(self, predict_datasets: Union[Dataset,
+                                                             List[Dataset]]):
         """ Builder torch dataloader for prediction with the config of evaluation.
 
         Args:
             predict_datasets(Union[Dataset, List[Dataset]]): The datasets used to predict ground truth.
         """
-        dataset = self.to_task_dataset(
-            predict_datasets,
+        dataset = self.build_dataset(
+            datasets=predict_datasets,
+            model_cfg=self.cfg,
             mode=ModeKeys.EVAL,
             preprocessor=self.eval_preprocessor)
 
@@ -895,26 +918,6 @@ class EpochBasedTrainer(BaseTrainer):
             **default_config)
         return data_loader
 
-    def build_dataset(self, data_cfg, mode, preprocessor=None):
-        """ Build torch dataset object using data config
-        """
-        # TODO: support MsDataset load for cv
-        if hasattr(data_cfg, 'name'):
-            dataset_name = data_cfg.pop('name')
-            dataset = MsDataset.load(
-                dataset_name=dataset_name,
-                **data_cfg,
-            )
-            cfg = ConfigDict(type=self.cfg.model.type, mode=mode)
-            torch_dataset = dataset.to_torch_dataset(
-                task_data_config=cfg,
-                task_name=self.cfg.task,
-                preprocessors=preprocessor)
-        else:
-            torch_dataset = build_task_dataset(data_cfg, self.cfg.task)
-        dataset = self.to_task_dataset(torch_dataset, mode)
-        return dataset
-
     def build_optimizer(self, cfg: ConfigDict, default_args: dict = None):
         try:
             return build_optimizer(
diff --git a/modelscope/utils/ast_utils.py b/modelscope/utils/ast_utils.py
index 7f078467..76f15e56 100644
--- a/modelscope/utils/ast_utils.py
+++ b/modelscope/utils/ast_utils.py
@@ -16,7 +16,7 @@ import json
 
 from modelscope import __version__
 from modelscope.fileio.file import LocalStorage
-from modelscope.metainfo import (Datasets, Heads, Hooks, LR_Schedulers,
+from modelscope.metainfo import (CustomDatasets, Heads, Hooks, LR_Schedulers,
                                  Metrics, Models, Optimizers, Pipelines,
                                  Preprocessors, TaskModels, Trainers)
 from modelscope.utils.constant import Fields, Tasks
diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py
index bedafa0c..f2623db4 100644
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -526,3 +526,8 @@ class DistributedParallelType(object):
     DP = 'data_parallel'
     TP = 'tensor_model_parallel'
     PP = 'pipeline_model_parallel'
+
+
+class DatasetTensorflowConfig:
+    BATCH_SIZE = 'batch_size'
+    DEFAULT_BATCH_SIZE_VALUE = 5
diff --git a/tests/msdatasets/test_ms_dataset.py b/tests/msdatasets/test_ms_dataset.py
index 51074bca..8ded9a46 100644
--- a/tests/msdatasets/test_ms_dataset.py
+++ b/tests/msdatasets/test_ms_dataset.py
@@ -3,12 +3,16 @@ import hashlib
 import os
 import unittest
 
+from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models import Model
 from modelscope.msdatasets import MsDataset
-from modelscope.msdatasets.audio.asr_dataset import ASRDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets.audio.asr_dataset import \
+    ASRDataset
 from modelscope.preprocessors import TextClassificationTransformersPreprocessor
 from modelscope.preprocessors.base import Preprocessor
-from modelscope.utils.constant import DEFAULT_DATASET_NAMESPACE, DownloadMode
+from modelscope.utils.config import Config
+from modelscope.utils.constant import (DEFAULT_DATASET_NAMESPACE, DownloadMode,
+                                       ModelFile)
 from modelscope.utils.test_utils import require_tf, require_torch, test_level
 
 
@@ -68,6 +72,7 @@ class MsDatasetTest(unittest.TestCase):
         ms_ds_train = MsDataset.load('movie_scene_seg_toydata', split='train')
         print(ms_ds_train._hf_ds.config_kwargs)
         assert next(iter(ms_ds_train.config_kwargs['split_config'].values()))
+        assert next(iter(ms_ds_train))
 
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_coco(self):
@@ -260,6 +265,34 @@ class MsDatasetTest(unittest.TestCase):
         print(data_example)
         assert data_example.values()
 
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_to_custom_dataset_movie_scene_toydata(self):
+        from modelscope.msdatasets.dataset_cls.custom_datasets.movie_scene_segmentation import \
+            MovieSceneSegmentationDataset
+        from modelscope.msdatasets.dataset_cls.dataset import ExternalDataset
+
+        model_id = 'damo/cv_resnet50-bert_video-scene-segmentation_movienet'
+        cache_path = snapshot_download(model_id)
+        config_path = os.path.join(cache_path, ModelFile.CONFIGURATION)
+        cfg = Config.from_file(config_path)
+
+        # ds_test.ds_instance got object 'MovieSceneSegmentationDataset' when the custom_cfg is not none.
+        ds_test_1 = MsDataset.load(
+            'modelscope/movie_scene_seg_toydata',
+            split='test',
+            custom_cfg=cfg,
+            test_mode=True)
+        assert ds_test_1.is_custom
+        assert isinstance(ds_test_1.ds_instance, MovieSceneSegmentationDataset)
+
+        # ds_test.ds_instance got object 'ExternalDataset' when the custom_cfg is none. (by default)
+        ds_test_2 = MsDataset.load(
+            'modelscope/movie_scene_seg_toydata',
+            split='test',
+            custom_cfg=None)
+        assert not ds_test_2.is_custom
+        assert isinstance(ds_test_2.ds_instance, ExternalDataset)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_movie_scene_segmentation.py b/tests/pipelines/test_movie_scene_segmentation.py
index affd5140..0ac8b716 100644
--- a/tests/pipelines/test_movie_scene_segmentation.py
+++ b/tests/pipelines/test_movie_scene_segmentation.py
@@ -1,8 +1,15 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import tempfile
 import unittest
 
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
 from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config, ConfigDict
+from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
@@ -13,6 +20,12 @@ class MovieSceneSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
         self.task = Tasks.movie_scene_segmentation
         self.model_id = 'damo/cv_resnet50-bert_video-scene-segmentation_movienet'
 
+        cache_path = snapshot_download(self.model_id)
+        config_path = os.path.join(cache_path, ModelFile.CONFIGURATION)
+        self.cfg = Config.from_file(config_path)
+
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_movie_scene_segmentation(self):
         input_location = 'data/test/videos/movie_scene_segmentation_test_video.mp4'
@@ -24,6 +37,81 @@ class MovieSceneSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
         else:
             raise ValueError('process error')
 
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_movie_scene_segmentation_finetune(self):
+
+        train_data_cfg = ConfigDict(
+            name='movie_scene_seg_toydata',
+            split='train',
+            cfg=self.cfg.preprocessor,
+            test_mode=False)
+
+        train_dataset = MsDataset.load(
+            dataset_name=train_data_cfg.name,
+            split=train_data_cfg.split,
+            cfg=train_data_cfg.cfg,
+            test_mode=train_data_cfg.test_mode)
+
+        test_data_cfg = ConfigDict(
+            name='movie_scene_seg_toydata',
+            split='test',
+            cfg=self.cfg.preprocessor,
+            test_mode=True)
+
+        test_dataset = MsDataset.load(
+            dataset_name=test_data_cfg.name,
+            split=test_data_cfg.split,
+            cfg=test_data_cfg.cfg,
+            test_mode=test_data_cfg.test_mode)
+
+        kwargs = dict(
+            model=self.model_id,
+            train_dataset=train_dataset,
+            eval_dataset=test_dataset,
+            work_dir=self.tmp_dir)
+
+        trainer = build_trainer(
+            name=Trainers.movie_scene_segmentation, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(trainer.work_dir)
+        print(results_files)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_movie_scene_segmentation_finetune_with_custom_dataset(self):
+
+        data_cfg = ConfigDict(
+            dataset_name='movie_scene_seg_toydata',
+            namespace='modelscope',
+            train_split='train',
+            test_split='test',
+            model_cfg=self.cfg)
+
+        train_dataset = MsDataset.load(
+            dataset_name=data_cfg.dataset_name,
+            namespace=data_cfg.namespace,
+            split=data_cfg.train_split,
+            custom_cfg=data_cfg.model_cfg,
+            test_mode=False)
+
+        test_dataset = MsDataset.load(
+            dataset_name=data_cfg.dataset_name,
+            namespace=data_cfg.namespace,
+            split=data_cfg.test_split,
+            custom_cfg=data_cfg.model_cfg,
+            test_mode=True)
+
+        kwargs = dict(
+            model=self.model_id,
+            train_dataset=train_dataset,
+            eval_dataset=test_dataset,
+            work_dir=self.tmp_dir)
+
+        trainer = build_trainer(
+            name=Trainers.movie_scene_segmentation, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(trainer.work_dir)
+        print(results_files)
+
     @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_movie_scene_segmentation_with_default_task(self):
         input_location = 'data/test/videos/movie_scene_segmentation_test_video.mp4'
diff --git a/tests/run_analysis.py b/tests/run_analysis.py
index d6a526ac..ca0a0018 100644
--- a/tests/run_analysis.py
+++ b/tests/run_analysis.py
@@ -259,7 +259,7 @@ def get_test_suites_to_run():
                         affected_trainer_cases.extend(
                             model_trainer_map[model_id])
         elif (affected_register_module[0] == 'HOOKS'
-              or affected_register_module[0] == 'TASK_DATASETS'):
+              or affected_register_module[0] == 'CUSTOM_DATASETS'):
             # ["HOOKS", "", "CheckpointHook", "CheckpointHook"]
             # ["HOOKS", "", hook_name, class_name]
             # HOOKS, DATASETS modify run all trainer cases
diff --git a/tests/taskdataset/test_veco_dataset.py b/tests/taskdataset/test_veco_dataset.py
index 76da1681..c220c363 100644
--- a/tests/taskdataset/test_veco_dataset.py
+++ b/tests/taskdataset/test_veco_dataset.py
@@ -2,7 +2,8 @@
 
 import unittest
 
-from modelscope.msdatasets.task_datasets.veco_dataset import VecoDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets.veco_dataset import \
+    VecoDataset
 from modelscope.utils.test_utils import test_level
 
 
diff --git a/tests/trainers/test_action_detection_trainer.py b/tests/trainers/test_action_detection_trainer.py
index 7d0b401f..f2461ebb 100644
--- a/tests/trainers/test_action_detection_trainer.py
+++ b/tests/trainers/test_action_detection_trainer.py
@@ -43,7 +43,7 @@ class TestActionDetectionTrainer(unittest.TestCase):
         shutil.rmtree(self.tmp_dir)
         super().tearDown()
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_trainer(self):
 
         def cfg_modify_fn(cfg):
diff --git a/tests/trainers/test_image_deblur_trainer.py b/tests/trainers/test_image_deblur_trainer.py
index 6ae88726..f07db1bb 100644
--- a/tests/trainers/test_image_deblur_trainer.py
+++ b/tests/trainers/test_image_deblur_trainer.py
@@ -7,7 +7,7 @@ import unittest
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models.cv.image_deblur import NAFNetForImageDeblur
 from modelscope.msdatasets import MsDataset
-from modelscope.msdatasets.task_datasets.gopro_image_deblurring_dataset import \
+from modelscope.msdatasets.dataset_cls.custom_datasets.gopro_image_deblurring_dataset import \
     GoproImageDeblurringDataset
 from modelscope.trainers import build_trainer
 from modelscope.utils.config import Config
diff --git a/tests/trainers/test_image_denoise_trainer.py b/tests/trainers/test_image_denoise_trainer.py
index 3b5882bd..e2b65b32 100644
--- a/tests/trainers/test_image_denoise_trainer.py
+++ b/tests/trainers/test_image_denoise_trainer.py
@@ -7,7 +7,7 @@ import unittest
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models.cv.image_denoise import NAFNetForImageDenoise
 from modelscope.msdatasets import MsDataset
-from modelscope.msdatasets.task_datasets.sidd_image_denoising import \
+from modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising import \
     SiddImageDenoisingDataset
 from modelscope.trainers import build_trainer
 from modelscope.utils.config import Config
diff --git a/tests/trainers/test_image_instance_segmentation_trainer.py b/tests/trainers/test_image_instance_segmentation_trainer.py
index 03f7eea3..923eca2c 100644
--- a/tests/trainers/test_image_instance_segmentation_trainer.py
+++ b/tests/trainers/test_image_instance_segmentation_trainer.py
@@ -11,8 +11,6 @@ from modelscope.metainfo import Trainers
 from modelscope.models.cv.image_instance_segmentation import \
     CascadeMaskRCNNSwinModel
 from modelscope.msdatasets import MsDataset
-from modelscope.msdatasets.task_datasets import \
-    ImageInstanceSegmentationCocoDataset
 from modelscope.trainers import build_trainer
 from modelscope.utils.config import Config, ConfigDict
 from modelscope.utils.constant import DownloadMode, ModelFile
diff --git a/tests/trainers/test_image_portrait_enhancement_trainer.py b/tests/trainers/test_image_portrait_enhancement_trainer.py
index a9fc74cb..b556a13b 100644
--- a/tests/trainers/test_image_portrait_enhancement_trainer.py
+++ b/tests/trainers/test_image_portrait_enhancement_trainer.py
@@ -1,21 +1,15 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import os
-import os.path as osp
 import shutil
 import tempfile
 import unittest
-from typing import Callable, List, Optional, Tuple, Union
-
-import cv2
-import torch
-from torch.utils import data as data
 
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.metainfo import Trainers
 from modelscope.models.cv.image_portrait_enhancement import \
     ImagePortraitEnhancement
 from modelscope.msdatasets import MsDataset
-from modelscope.msdatasets.task_datasets.image_portrait_enhancement import \
+from modelscope.msdatasets.dataset_cls.custom_datasets.image_portrait_enhancement import \
     ImagePortraitEnhancementDataset
 from modelscope.trainers import build_trainer
 from modelscope.utils.constant import DownloadMode, ModelFile
diff --git a/tests/trainers/test_language_guided_video_summarization_trainer.py b/tests/trainers/test_language_guided_video_summarization_trainer.py
index 3ff0e102..2673e4b9 100644
--- a/tests/trainers/test_language_guided_video_summarization_trainer.py
+++ b/tests/trainers/test_language_guided_video_summarization_trainer.py
@@ -7,7 +7,7 @@ import unittest
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models.cv.language_guided_video_summarization import \
     ClipItVideoSummarization
-from modelscope.msdatasets.task_datasets import \
+from modelscope.msdatasets.dataset_cls.custom_datasets import \
     LanguageGuidedVideoSummarizationDataset
 from modelscope.trainers import build_trainer
 from modelscope.utils.config import Config
diff --git a/tests/trainers/test_siamese_uie_trainer.py b/tests/trainers/test_siamese_uie_trainer.py
index c143c562..bf21ece9 100644
--- a/tests/trainers/test_siamese_uie_trainer.py
+++ b/tests/trainers/test_siamese_uie_trainer.py
@@ -16,8 +16,7 @@ class TestFinetuneSiameseUIE(unittest.TestCase):
     def setUp(self):
         print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
         self.tmp_dir = tempfile.TemporaryDirectory().name
-        if not os.path.exists(self.tmp_dir):
-            os.makedirs(self.tmp_dir)
+        os.makedirs(self.tmp_dir, exist_ok=True)
 
     def tearDown(self):
         shutil.rmtree(self.tmp_dir)
diff --git a/tests/trainers/test_tinynas_damoyolo_trainer.py b/tests/trainers/test_tinynas_damoyolo_trainer.py
index d08980da..5dd9e928 100644
--- a/tests/trainers/test_tinynas_damoyolo_trainer.py
+++ b/tests/trainers/test_tinynas_damoyolo_trainer.py
@@ -1,18 +1,12 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
 
-import torch
+import os
+import unittest
 
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.metainfo import Trainers
 from modelscope.trainers import build_trainer
-from modelscope.utils.config import Config
-from modelscope.utils.constant import ModelFile
-from modelscope.utils.test_utils import DistributedTestCase, test_level
+from modelscope.utils.test_utils import test_level
 
 
 def _setup():
diff --git a/tests/trainers/test_video_summarization_trainer.py b/tests/trainers/test_video_summarization_trainer.py
index 1cea1eea..35eee2bc 100644
--- a/tests/trainers/test_video_summarization_trainer.py
+++ b/tests/trainers/test_video_summarization_trainer.py
@@ -6,7 +6,8 @@ import unittest
 
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models.cv.video_summarization import PGLVideoSummarization
-from modelscope.msdatasets.task_datasets import VideoSummarizationDataset
+from modelscope.msdatasets.dataset_cls.custom_datasets import \
+    VideoSummarizationDataset
 from modelscope.trainers import build_trainer
 from modelscope.utils.config import Config
 from modelscope.utils.constant import ModelFile
@@ -17,6 +18,7 @@ logger = get_logger()
 
 
 class VideoSummarizationTrainerTest(unittest.TestCase):
+    # TODO: To be added to CUSTOM_DATASETS register
 
     def setUp(self):
         print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))