From 455be472dfdd4b3c9d2e1cc3c4962115760383f4 Mon Sep 17 00:00:00 2001 From: Milan Verespej Date: Thu, 23 May 2019 14:21:19 +0200 Subject: Add base download script This script is supposed to be used for convenience when downloading data from multiple lists at once. Issue-ID: OOM-1803 Change-Id: I4031ed3650f7880883e299b43c79e6bfd08c886c Signed-off-by: Milan Verespej --- build/download/base.py | 3 +- build/download/docker_images.py | 22 +++--- build/download/download.py | 158 ++++++++++++++++++++++++++++++++++++++++ build/download/git_repos.py | 17 +++-- build/download/http_files.py | 12 +-- build/download/npm_packages.py | 8 +- build/download/rpm_packages.py | 15 ++-- 7 files changed, 199 insertions(+), 36 deletions(-) create mode 100755 build/download/download.py (limited to 'build/download') diff --git a/build/download/base.py b/build/download/base.py index 5bcd0ef6..d8b44839 100644 --- a/build/download/base.py +++ b/build/download/base.py @@ -38,7 +38,8 @@ def load_list(item_list): :return: set of items from file """ with open(item_list, 'r') as f: - return {item for item in (line.strip() for line in f) if item} + return {item for item in (line.strip() for line in f) + if item and not item.startswith('#')} def init_progress(items_name): diff --git a/build/download/docker_images.py b/build/download/docker_images.py index e4e742b3..d8138dd6 100755 --- a/build/download/docker_images.py +++ b/build/download/docker_images.py @@ -180,7 +180,7 @@ def download_docker_image(image, save, output_dir, docker_client): if save: save_image(image, pulled_image, output_dir) except Exception as err: - log.error('Error downloading {}: {}'.format(image, err)) + log.exception('Error downloading {}: {}'.format(image, err)) raise err @@ -195,10 +195,10 @@ def download(image_list, save, output_dir, check_mode, progress, workers=3): :return: None """ try: - docker_client = docker.client.DockerClient(version='auto') + # big timeout in case of massive images like pnda-mirror-container:5.0.0 (11.4GB) + docker_client = docker.client.DockerClient(version='auto', timeout=300) except docker.errors.DockerException as err: - log.error(err) - log.error('Error creating docker client. Check if is docker installed and running' + log.exception('Error creating docker client. Check if is docker installed and running' ' or if you have right permissions.') raise err @@ -221,14 +221,12 @@ def download(image_list, save, output_dir, check_mode, progress, workers=3): missing_images['not_saved'] - missing_images['not_pulled'], None, output_dir, docker_client) + base.finish_progress(progress, error_count, log) if error_count > 0: log.error('{} images were not downloaded'.format(error_count)) missing_images = missing(docker_client, target_images, save, output_dir) log.info(check_table(merge_dict_sets(missing_images), missing_images, save)) - - base.finish_progress(progress, error_count, log) - - return error_count + raise RuntimeError() def run_cli(): @@ -256,11 +254,13 @@ def run_cli(): progress = base.init_progress('Docker images') if not args.check else None try: - sys.exit(download(args.image_list, args.save, args.output_dir, args.check, - progress, args.workers)) + download(args.image_list, args.save, args.output_dir, args.check, + progress, args.workers) except docker.errors.DockerException: - log.error('Irrecoverable error detected.') + log.exception('Irrecoverable error detected.') sys.exit(1) + except RuntimeError as err: + log.exception(err) if __name__ == '__main__': diff --git a/build/download/download.py b/build/download/download.py new file mode 100755 index 00000000..ebce931a --- /dev/null +++ b/build/download/download.py @@ -0,0 +1,158 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# COPYRIGHT NOTICE STARTS HERE + +# Copyright 2019 © Samsung Electronics Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# COPYRIGHT NOTICE ENDS HERE + +import argparse +import logging +import sys +import datetime +import timeit + +import base +import docker_images +import git_repos +import http_files +import npm_packages +import rpm_packages + +log = logging.getLogger(name=__name__) + +def parse_args(): + parser=argparse.ArgumentParser(description='Download data from lists') + list_group = parser.add_argument_group() + list_group.add_argument('--docker', action='append', nargs='+', default=[], + metavar=('list', 'dir-name'), + help='Docker type list. If second argument is specified ' + 'it is treated as directory where images will be saved ' + 'otherwise only pull operation is executed') + list_group.add_argument('--http', action='append', nargs=2, default=[], + metavar=('list', 'dir-name'), + help='Http type list and directory to save downloaded files') + list_group.add_argument('--npm', action='append', nargs=2, default=[], + metavar=('list', 'dir-name'), + help='npm type list and directory to save downloaded files') + list_group.add_argument('--rpm', action='append', nargs=2, default=[], + metavar=('list', 'dir-name'), + help='rpm type list and directory to save downloaded files') + list_group.add_argument('--git', action='append', nargs=2, default=[], + metavar=('list', 'dir-name'), + help='git repo type list and directory to save downloaded files') + parser.add_argument('--npm-registry', default='https://registry.npmjs.org', + help='npm registry to use (default: https://registry.npmjs.org)') + parser.add_argument('--check', '-c', action='store_true', default=False, + help='Check what is missing. No download.') + parser.add_argument('--debug', action='store_true', default=False, + help='Turn on debug output') + + args = parser.parse_args() + + for arg in ('docker', 'npm', 'http', 'rpm', 'git'): + if getattr(args, arg): + return args + + parser.error('One of --docker, --npm, --http, --rpm, --git must be specified') + + +def run_cli(): + args = parse_args() + + console_handler = logging.StreamHandler(sys.stdout) + console_formatter = logging.Formatter('%(message)s') + console_handler.setFormatter(console_formatter) + now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') + log_file = 'download_data-{}.log'.format(now) + file_format = "%(asctime)s: %(filename)s: %(levelname)s: %(message)s" + + if args.debug: + logging.basicConfig(level=logging.DEBUG, filename=log_file, format=file_format) + else: + logging.basicConfig(level=logging.INFO, filename=log_file, format=file_format) + root_logger = logging.getLogger() + root_logger.addHandler(console_handler) + + list_with_errors = [] + timer_start = timeit.default_timer() + + for docker_list in args.docker: + log.info('Processing {}.'.format(docker_list[0])) + progress = None if args.check else base.init_progress('docker images') + save = False + if len(docker_list) > 1: + save = True + else: + docker_list.append(None) + try: + docker_images.download(docker_list[0], save, + docker_list[1], args.check, progress) + except RuntimeError: + list_with_errors.append(docker_list[0]) + + for http_list in args.http: + progress = None if args.check else base.init_progress('http files') + log.info('Processing {}.'.format(http_list[0])) + try: + http_files.download(http_list[0], http_list[1], args.check, + progress) + except RuntimeError: + list_with_errors.append(http_list[0]) + + for npm_list in args.npm: + progress = None if args.check else base.init_progress('npm packages') + log.info('Processing {}.'.format(npm_list[0])) + try: + npm_packages.download(npm_list[0], args.npm_registry, npm_list[1], + args.check, progress) + except RuntimeError: + list_with_errors.append(npm_list[0]) + + for rpm_list in args.rpm: + if args.check: + log.info('Check mode for rpm packages is not implemented') + break + log.info('Processing {}.'.format(rpm_list[0])) + try: + rpm_packages.download(rpm_list[0], rpm_list[1]) + except RuntimeError: + list_with_errors.append(rpm_list[0]) + + for git_list in args.git: + if args.check: + log.info('Check mode for git repositories is not implemented') + break + progress = None if args.check else base.init_progress('git repositories') + log.info('Processing {}.'.format(git_list[0])) + try: + git_repos.download(git_list[0], git_list[1], progress) + except RuntimeError: + list_with_errors.append(git_list[0]) + + e_time = datetime.timedelta(seconds=timeit.default_timer() - timer_start) + log.info(timeit.default_timer() - timer_start) + log.info('Execution ended. Total elapsed time {}'.format(e_time)) + + if list_with_errors: + log.error('Errors encountered while processing these lists:' + '\n{}'.format('\n'.join(list_with_errors))) + sys.exit(1) + + + +if __name__ == '__main__': + run_cli() diff --git a/build/download/git_repos.py b/build/download/git_repos.py index e388e94c..aff01b80 100755 --- a/build/download/git_repos.py +++ b/build/download/git_repos.py @@ -45,10 +45,9 @@ def download(git_list, dst_dir, progress): if not base.check_tool('git'): log.error('ERROR: git is not installed') progress.finish(dirty=True) - return 1 + raise RuntimeError('git missing') - git_set = {tuple(item.split()) for item in base.load_list(git_list) - if not item.startswith('#')} + git_set = {tuple(item.split()) for item in base.load_list(git_list)} error_count = 0 @@ -64,14 +63,13 @@ def download(git_list, dst_dir, progress): clone_repo(dst, *repo) progress.update(progress.value + 1) except subprocess.CalledProcessError as err: - log.error(err.output.decode()) + log.exception(err.output.decode()) error_count += 1 base.finish_progress(progress, error_count, log) if error_count > 0: log.error('{} were not downloaded. Check logs for details'.format(error_count)) - return error_count - + raise RuntimeError('Download unsuccesfull') def run_cli(): parser = argparse.ArgumentParser(description='Download git repositories from list') @@ -85,8 +83,11 @@ def run_cli(): logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') progress = base.init_progress('git repositories') - - sys.exit(download(args.git_list, args.output_dir, progress)) + try: + download(args.git_list, args.output_dir, progress) + except RuntimeError as err: + log.exception(err) + sys.exit(1) if __name__ == '__main__': diff --git a/build/download/http_files.py b/build/download/http_files.py index f5b1e59a..c83158d6 100755 --- a/build/download/http_files.py +++ b/build/download/http_files.py @@ -83,7 +83,7 @@ def download(data_list, dst_dir, check, progress, workers=None): if check: log.info(base.simple_check_table(file_set, missing_files)) - return 0 + return skipping = file_set - missing_files @@ -91,12 +91,11 @@ def download(data_list, dst_dir, check, progress, workers=None): error_count = base.run_concurrent(workers, progress, download_file, missing_files, dst_dir) + base.finish_progress(progress, error_count, log) if error_count > 0: log.error('{} files were not downloaded. Check log for specific failures.'.format(error_count)) + raise RuntimeError() - base.finish_progress(progress, error_count, log) - - return error_count def run_cli(): """ @@ -123,7 +122,10 @@ def run_cli(): progress = base.init_progress('http files') if not args.check else None - sys.exit(download(args.file_list, args.output_dir, args.check, progress, args.workers)) + try: + download(args.file_list, args.output_dir, args.check, progress, args.workers) + except RuntimeError: + sys.exit(1) if __name__ == '__main__': diff --git a/build/download/npm_packages.py b/build/download/npm_packages.py index c174e2c1..70c03ad8 100755 --- a/build/download/npm_packages.py +++ b/build/download/npm_packages.py @@ -57,7 +57,7 @@ def download_npm(npm, registry, dst_dir): except Exception as err: if os.path.isfile(dst_path): os.remove(dst_path) - log.error('Failed: {}: {}'.format(npm, err)) + log.exception('Failed: {}'.format(npm)) raise err log.info('Downloaded: {}'.format(npm)) @@ -81,12 +81,10 @@ def download(npm_list, registry, dst_dir, check_mode, progress=None, workers=Non base.start_progress(progress, len(npm_set), skipping, log) error_count = base.run_concurrent(workers, progress, download_npm, missing_npms, registry, dst_dir) + base.finish_progress(progress, error_count, log) if error_count > 0: log.error('{} packages were not downloaded. Check log for specific failures.'.format(error_count)) - - base.finish_progress(progress, error_count, log) - - return error_count + raise RuntimeError() def run_cli(): diff --git a/build/download/rpm_packages.py b/build/download/rpm_packages.py index 7f9700a3..732af0ea 100755 --- a/build/download/rpm_packages.py +++ b/build/download/rpm_packages.py @@ -33,7 +33,7 @@ log = logging.getLogger(name=__name__) def download(rpm_list, dst_dir): if not base.check_tool('yumdownloader'): log.error('ERROR: yumdownloader is not installed') - return 1 + raise RuntimeError('yumdownloader missing') rpm_set = base.load_list(rpm_list) @@ -41,11 +41,10 @@ def download(rpm_list, dst_dir): log.info('Running command: {}'.format(command)) try: subprocess.check_call(command.split()) - log.info('Downloaded') except subprocess.CalledProcessError as err: - log.error(err.output) - return err.returncode - + log.exception(err.output) + raise err + log.info('Downloaded') def run_cli(): @@ -59,7 +58,11 @@ def run_cli(): logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') - sys.exit(download(args.rpm_list, args.output_dir)) + try: + download(args.rpm_list, args.output_dir) + except (subprocess.CalledProcessError, RuntimeError): + sys.exit(1) + if __name__ == '__main__': -- cgit 1.2.3-korg