#! /usr/bin/env python # -*- coding: utf-8 -*- # COPYRIGHT NOTICE STARTS HERE # Copyright 2019 © Samsung Electronics Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # COPYRIGHT NOTICE ENDS HERE import argparse import concurrent.futures import logging import os import sys from retrying import retry import base log = logging.getLogger(__name__) @retry(stop_max_attempt_number=5, wait_fixed=2000) def get_file(file_uri): """ Get file from the Internet :param file_uri: address of file :return: byte content of file """ if not file_uri.startswith('http'): file_uri = 'http://' + file_uri file_req = base.make_get_request(file_uri) return file_req.content def download_file(file_uri, dst_dir): """ Download http file and save it to file. :param file_uri: http address of file :param dst_dir: directory where file will be saved """ log.info('Downloading: {}'.format(file_uri)) dst_path = '{}/{}'.format(dst_dir, file_uri.rsplit('//')[-1]) try: file_content = get_file(file_uri) base.save_to_file(dst_path, file_content) except Exception as err: if os.path.isfile(dst_path): os.remove(dst_path) log.error('Error downloading: {}: {}'.format(file_uri, err)) raise err log.info('Downloaded: {}'.format(file_uri)) def missing(file_set, dst_dir): return {file for file in file_set if not os.path.isfile('{}/{}'.format(dst_dir, file))} def download(data_list, dst_dir, check, progress, workers=None): """ Download files specified in data list :param data_list: path to file with list :param dst_dir: destination directory :param check: boolean check mode :param progress: progressbar.ProgressBar to monitor progress :param workers: workers to use for parallel execution :return: 0 if success else number of errors """ file_set = base.load_list(data_list) missing_files = missing(file_set, dst_dir) target_count = len(file_set) if check: log.info(base.simple_check_table(file_set, missing_files)) return skipping = file_set - missing_files base.start_progress(progress, len(file_set), skipping, log) error_count = base.run_concurrent(workers, progress, download_file, missing_files, dst_dir) base.finish_progress(progress, error_count, log) if error_count > 0: log.error('{} files were not downloaded. Check log for specific failures.'.format(error_count)) raise RuntimeError() def run_cli(): """ Run as cli tool """ parser = argparse.ArgumentParser(description='Download http files from list') parser.add_argument('file_list', metavar='file-list', help='File with list of http files to download') parser.add_argument('--output-dir', '-o', default=os.getcwd(), help='Destination directory for saving') parser.add_argument('--check', '-c', action='store_true', default=False, help='Check mode') parser.add_argument('--debug', action='store_true', default=False, help='Turn on debug output') parser.add_argument('--workers', type=int, default=None, help='Set maximum workers for parallel download (default: cores * 5)') args = parser.parse_args() if args.debug: logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) else: logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') progress = base.init_progress('http files') if not args.check else None try: download(args.file_list, args.output_dir, args.check, progress, args.workers) except RuntimeError: sys.exit(1) if __name__ == '__main__': run_cli()