modelscope/tests/trainers/easycv/test_easycv_trainer.py

# Copyright (c) Alibaba, Inc. and its affiliates.
import glob
import os
import shutil
import tempfile
import unittest

import json
import requests
import torch

from modelscope.metainfo import Models, Pipelines, Trainers
from modelscope.trainers import build_trainer
from modelscope.utils.config import Config
from modelscope.utils.constant import LogKeys, ModeKeys, Tasks
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import DistributedTestCase, test_level
from modelscope.utils.torch_utils import is_master


def _download_data(url, save_dir):
    r = requests.get(url, verify=True)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    zip_name = os.path.split(url)[-1]
    save_path = os.path.join(save_dir, zip_name)
    with open(save_path, 'wb') as f:
        f.write(r.content)

    unpack_dir = os.path.join(save_dir, os.path.splitext(zip_name)[0])
    shutil.unpack_archive(save_path, unpack_dir)


def train_func(work_dir, dist=False, log_config=3, imgs_per_gpu=4):
    import easycv
    config_path = os.path.join(
        os.path.dirname(easycv.__file__),
        'configs/detection/yolox/yolox_s_8xb16_300e_coco.py')

    data_dir = os.path.join(work_dir, 'small_coco_test')
    url = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/datasets/small_coco.zip'
    if is_master():
        _download_data(url, data_dir)

    import time
    time.sleep(1)
    cfg = Config.from_file(config_path)

    cfg.work_dir = work_dir
    cfg.total_epochs = 2
    cfg.checkpoint_config.interval = 1
    cfg.eval_config.interval = 1
    cfg.log_config = dict(
        interval=log_config,
        hooks=[
            dict(type='TextLoggerHook'),
            dict(type='TensorboardLoggerHook')
        ])
    cfg.data.train.data_source.ann_file = os.path.join(
        data_dir, 'small_coco/small_coco/instances_train2017_20.json')
    cfg.data.train.data_source.img_prefix = os.path.join(
        data_dir, 'small_coco/small_coco/train2017')
    cfg.data.val.data_source.ann_file = os.path.join(
        data_dir, 'small_coco/small_coco/instances_val2017_20.json')
    cfg.data.val.data_source.img_prefix = os.path.join(
        data_dir, 'small_coco/small_coco/val2017')
    cfg.data.imgs_per_gpu = imgs_per_gpu
    cfg.data.workers_per_gpu = 2
    cfg.data.val.imgs_per_gpu = 2

    ms_cfg_file = os.path.join(work_dir, 'ms_yolox_s_8xb16_300e_coco.json')
    from easycv.utils.ms_utils import to_ms_config

    if is_master():
        to_ms_config(
            cfg,
            dump=True,
            task=Tasks.image_object_detection,
            ms_model_name=Models.yolox,
            pipeline_name=Pipelines.easycv_detection,
            save_path=ms_cfg_file)

    trainer_name = Trainers.easycv
    kwargs = dict(
        task=Tasks.image_object_detection,
        cfg_file=ms_cfg_file,
        launcher='pytorch' if dist else None)

    trainer = build_trainer(trainer_name, kwargs)
    trainer.train()


@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
class EasyCVTrainerTestSingleGpu(unittest.TestCase):

    def setUp(self):
        self.logger = get_logger()
        self.logger.info(('Testing %s.%s' %
                          (type(self).__name__, self._testMethodName)))
        self.tmp_dir = tempfile.TemporaryDirectory().name
        if not os.path.exists(self.tmp_dir):
            os.makedirs(self.tmp_dir)

    def tearDown(self):
        super().tearDown()
        shutil.rmtree(self.tmp_dir, ignore_errors=True)

    @unittest.skipIf(
        True, 'The test cases are all run in the master process, '
        'cause registry conflicts, and it should run in the subprocess.')
    def test_single_gpu(self):
        # TODO: run in subprocess
        train_func(self.tmp_dir)

        results_files = os.listdir(self.tmp_dir)
        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
        self.assertEqual(len(json_files), 1)

        with open(json_files[0], 'r') as f:
            lines = [i.strip() for i in f.readlines()]

        self.assertDictContainsSubset(
            {
                LogKeys.MODE: ModeKeys.TRAIN,
                LogKeys.EPOCH: 1,
                LogKeys.ITER: 3,
                LogKeys.LR: 0.00013
            }, json.loads(lines[0]))
        self.assertDictContainsSubset(
            {
                LogKeys.MODE: ModeKeys.EVAL,
                LogKeys.EPOCH: 1,
                LogKeys.ITER: 10
            }, json.loads(lines[1]))
        self.assertDictContainsSubset(
            {
                LogKeys.MODE: ModeKeys.TRAIN,
                LogKeys.EPOCH: 2,
                LogKeys.ITER: 3,
                LogKeys.LR: 0.00157
            }, json.loads(lines[2]))
        self.assertDictContainsSubset(
            {
                LogKeys.MODE: ModeKeys.EVAL,
                LogKeys.EPOCH: 2,
                LogKeys.ITER: 10
            }, json.loads(lines[3]))
        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
        for i in [0, 2]:
            self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i])
            self.assertIn(LogKeys.ITER_TIME, lines[i])
            self.assertIn(LogKeys.MEMORY, lines[i])
            self.assertIn('total_loss', lines[i])
        for i in [1, 3]:
            self.assertIn(
                'CocoDetectionEvaluator_DetectionBoxes_Precision/mAP',
                lines[i])
            self.assertIn('DetectionBoxes_Precision/mAP', lines[i])
            self.assertIn('DetectionBoxes_Precision/mAP@.50IOU', lines[i])
            self.assertIn('DetectionBoxes_Precision/mAP@.75IOU', lines[i])
            self.assertIn('DetectionBoxes_Precision/mAP (small)', lines[i])


@unittest.skipIf(not torch.cuda.is_available()
                 or torch.cuda.device_count() <= 1, 'distributed unittest')
class EasyCVTrainerTestMultiGpus(DistributedTestCase):

    def setUp(self):
        self.logger = get_logger()
        self.logger.info(('Testing %s.%s' %
                          (type(self).__name__, self._testMethodName)))
        self.tmp_dir = tempfile.TemporaryDirectory().name
        if not os.path.exists(self.tmp_dir):
            os.makedirs(self.tmp_dir)

    def tearDown(self):
        super().tearDown()
        shutil.rmtree(self.tmp_dir, ignore_errors=True)

    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_multi_gpus(self):
        self.start(
            train_func,
            num_gpus=2,
            work_dir=self.tmp_dir,
            dist=True,
            log_config=2,
            imgs_per_gpu=5)

        results_files = os.listdir(self.tmp_dir)
        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
        self.assertEqual(len(json_files), 1)

        with open(json_files[0], 'r') as f:
            lines = [i.strip() for i in f.readlines()]

        self.assertDictContainsSubset(
            {
                LogKeys.MODE: ModeKeys.TRAIN,
                LogKeys.EPOCH: 1,
                LogKeys.ITER: 2,
                LogKeys.LR: 0.0002
            }, json.loads(lines[0]))
        self.assertDictContainsSubset(
            {
                LogKeys.MODE: ModeKeys.EVAL,
                LogKeys.EPOCH: 1,
                LogKeys.ITER: 5
            }, json.loads(lines[1]))
        self.assertDictContainsSubset(
            {
                LogKeys.MODE: ModeKeys.TRAIN,
                LogKeys.EPOCH: 2,
                LogKeys.ITER: 2,
                LogKeys.LR: 0.0018
            }, json.loads(lines[2]))
        self.assertDictContainsSubset(
            {
                LogKeys.MODE: ModeKeys.EVAL,
                LogKeys.EPOCH: 2,
                LogKeys.ITER: 5
            }, json.loads(lines[3]))

        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)

        for i in [0, 2]:
            self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i])
            self.assertIn(LogKeys.ITER_TIME, lines[i])
            self.assertIn(LogKeys.MEMORY, lines[i])
            self.assertIn('total_loss', lines[i])
        for i in [1, 3]:
            self.assertIn(
                'CocoDetectionEvaluator_DetectionBoxes_Precision/mAP',
                lines[i])
            self.assertIn('DetectionBoxes_Precision/mAP', lines[i])
            self.assertIn('DetectionBoxes_Precision/mAP@.50IOU', lines[i])
            self.assertIn('DetectionBoxes_Precision/mAP@.75IOU', lines[i])
            self.assertIn('DetectionBoxes_Precision/mAP (small)', lines[i])


if __name__ == '__main__':
    unittest.main()