# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import platform
import subprocess
import sys
from typing import List, Optional, Union

import click
import yaml

from import (
from import get_model_info
from mim.utils import (

    help='Config ids to download, such as resnet18_8xb16_cifar10',
    help='dataset name to download, such as coco2017',
    help='Ignore ssl certificate check')
    '--dest', 'dest_root', type=str, help='Destination of saving checkpoints.')
def cli(package: str,
        configs: Optional[List[str]],
        dataset: Optional[str],
        dest_root: Optional[str] = None,
        check_certificate: bool = True) -> None:
    """Download checkpoints from url and parse configs from package.

        > mim download mmcls --config resnet18_8xb16_cifar10
        > mim download mmcls --config resnet18_8xb16_cifar10 --dest .
    download(package, configs, dest_root, check_certificate, dataset)

[docs]def download(package: str, configs: Optional[List[str]] = None, dest_root: Optional[str] = None, check_certificate: bool = True, dataset: Optional[str] = None) -> Union[List[str], None]: """Download checkpoints from url and parse configs from package. Args: package (str): Name of package. configs (List[str], optional): List of config ids. dest_root (str, optional): Destination directory to save checkpoint and config. Default: None. check_certificate (bool): Whether to check the ssl certificate. Default: True. dataset (str, optional): The name of dataset. """ full_name = module_full_name(package) if full_name == '': msg = f"Can't determine a unique package given abbreviation {package}" raise ValueError(highlighted_error(msg)) package = full_name if dest_root is None: dest_root = DEFAULT_CACHE_DIR dest_root = osp.abspath(dest_root) if configs is not None and dataset is not None: raise ValueError( 'Cannot download config and dataset at the same time!') if configs is None and dataset is None: raise ValueError('Please specify config or dataset to download!') if configs is not None: return _download_configs(package, configs, dest_root, check_certificate) else: return _download_dataset(package, dataset, dest_root) # type: ignore
def _download_configs(package: str, configs: List[str], dest_root: str, check_certificate: bool = True) -> List[str]: # Create the destination directory if it does not exist. os.makedirs(dest_root, exist_ok=True) package, version = split_package_version(package) if version: raise ValueError( highlighted_error('version is not allowed, please type ' '"mim download -h" to show the correct way.')) if not is_installed(package): raise RuntimeError( highlighted_error(f'{package} is not installed. Please install it ' 'first.')) checkpoints = [] model_info = get_model_info( package, shown_fields=['weight', 'config'], to_dict=True) valid_configs = model_info.keys() invalid_configs = set(configs) - set(valid_configs) if invalid_configs: raise ValueError( highlighted_error(f'Expected configs: {valid_configs}, but got ' f'{invalid_configs}')) try: from mmengine import Config except ImportError: try: from mmcv import Config except ImportError: raise ImportError( 'Please install mmengine to use the download command: ' '`mim install mmengine`.') for config in configs: click.echo(f'processing {config}...') checkpoint_urls = model_info[config]['weight'] for checkpoint_url in checkpoint_urls.split(','): filename = checkpoint_url.split('/')[-1] checkpoint_path = osp.join(dest_root, filename) if osp.exists(checkpoint_path): echo_success(f'{filename} exists in {dest_root}') else: # TODO: check checkpoint hash when all the models are ready. download_from_file( checkpoint_url, checkpoint_path, check_certificate=check_certificate) echo_success( f'Successfully downloaded {filename} to {dest_root}') config_paths = model_info[config]['config'] for config_path in config_paths.split(','): installed_path = get_installed_path(package) # configs will be put in package/.mim in PR #68 possible_config_paths = [ osp.join(installed_path, '.mim', config_path), osp.join(installed_path, config_path) ] for config_path in possible_config_paths: if osp.exists(config_path): config_obj = Config.fromfile(config_path) saved_config_path = osp.join(dest_root, f'{config}.py') config_obj.dump(saved_config_path) echo_success( f'Successfully dumped {config}.py to {dest_root}') checkpoints.append(filename) break else: raise ValueError( highlighted_error(f'{config_path} is not found.')) return checkpoints def _download_dataset(package: str, dataset: str, dest_root: str) -> None: if platform.system() != 'Linux': raise RuntimeError('downloading dataset is only supported in Linux!') if not is_installed(package): raise RuntimeError( f'Please install {package} by `pip install {package}`') installed_path = get_installed_path(package) mim_path = osp.join(installed_path, '.mim') dataset_index_path = osp.join(mim_path, 'dataset-index.yml') if not osp.exists(dataset_index_path): raise FileNotFoundError( f'Cannot find {dataset_index_path}, ' f'please update {package} to the latest version! If you have ' f'already updated it and still get this error, please report an ' f'issue to {package}') with open(dataset_index_path) as f: datasets_meta = yaml.load(f, Loader=yaml.SafeLoader) if dataset not in datasets_meta: raise KeyError(f'Cannot find {dataset} in {dataset_index_path}. ' 'here are the available datasets: ' '{}'.format('\n'.join(datasets_meta.keys()))) dataset_meta = datasets_meta[dataset] # OpenMMLab repo will define the `dataset-index.yml` like this: # voc2007: # dataset: PASCAL_VOC2007 # download_root: data # data_root: data # script: tools/dataset_converters/scripts/ # In this case, the top level key "voc2007" means the "Dataset Name" passed # to `mim download --dataset {Dataset Name}` # The nested field "dataset" means the argument passed to `odl get` # If the value of "dataset" is the same as the "Dataset Name", downstream # repos can skip defining "dataset" and "Dataset Name" will be passed # to `odl get` src_name = dataset_meta.get('dataset', dataset) # `odl get` will download raw dataset to `download_root`, and the script # will process the raws data and put the prepared data to the `data_root` download_root = dataset_meta['download_root'] os.makedirs(download_root, exist_ok=True) color_echo(f'Start downloading {dataset} to {download_root}...', 'blue') subprocess.check_call(['odl', 'get', src_name, '-d', download_root], stdin=sys.stdin, stdout=sys.stdout) if not osp.exists(download_root): return script_path = dataset_meta.get('script') if script_path is None: return script_path = osp.join(mim_path, script_path) color_echo('Preprocess data ...', 'blue') if dest_root == osp.abspath(DEFAULT_CACHE_DIR): data_root = dataset_meta['data_root'] else: data_root = dest_root os.makedirs(data_root, exist_ok=True) call_command(['chmod', '+x', script_path]) call_command([script_path, download_root, data_root]) echo_success('Finished!')
