diff options
author | Milan Verespej <m.verespej@partner.samsung.com> | 2019-06-18 13:46:48 +0200 |
---|---|---|
committer | Milan Verespej <m.verespej@partner.samsung.com> | 2019-06-18 18:43:48 +0200 |
commit | 4c7e8924320654de60efd5096d6678549ca5ac05 (patch) | |
tree | 62ca36db448547be5036da3321f3f54204bd1787 /build/download | |
parent | 2e1328a8867190f203043fb5758dc8117ba3d673 (diff) |
Refactor npm packages download
Series of commits to improve python download
scripts.
Issue-ID: OOM-1803
Change-Id: I199660ab121038780c34065f78538bb4e7b07eb5
Signed-off-by: Milan Verespej <m.verespej@partner.samsung.com>
Diffstat (limited to 'build/download')
-rw-r--r-- | build/download/http_downloader.py | 4 | ||||
-rwxr-xr-x | build/download/npm_downloader.py | 134 |
2 files changed, 136 insertions, 2 deletions
diff --git a/build/download/http_downloader.py b/build/download/http_downloader.py index 69adc4dd..ba2c0f7e 100644 --- a/build/download/http_downloader.py +++ b/build/download/http_downloader.py @@ -36,8 +36,8 @@ log = logging.getLogger(__name__) class HttpDownloader(ConcurrentDownloader): - def __init__(self, *list_args, workers=None): - super().__init__('http files', *list_args, workers=workers) + def __init__(self, *list_args, list_type='http_files', workers=None): + super().__init__(list_type, *list_args, workers=workers) @property def check_table(self): diff --git a/build/download/npm_downloader.py b/build/download/npm_downloader.py new file mode 100755 index 00000000..369af72a --- /dev/null +++ b/build/download/npm_downloader.py @@ -0,0 +1,134 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# COPYRIGHT NOTICE STARTS HERE + +# Copyright 2019 © Samsung Electronics Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# COPYRIGHT NOTICE ENDS HERE + +import argparse +import datetime +import hashlib +import logging +import os +import sys +import timeit + +from retrying import retry + +import http_downloader +import http_file + +log = logging.getLogger(__name__) + + +class NpmDownloader(http_downloader.HttpDownloader): + def __init__(self, npm_registry, *list_args, workers=None): + super().__init__(*list_args, list_type='npm packages', workers=workers) + self._registry = npm_registry + + def _download_item(self, item): + """ + Download npm package + :param item: http file to be downloaded (tuple: (npm_name@version, dst_dir)) + """ + log.info('Downloading: {}'.format(item[0])) + npm_name, npm_version = item[0].split('@') + dst_path = '{}/{}-{}.tgz'.format(item[1], npm_name, npm_version) + try: + tarball = http_file.HttpFile(item[0], self._get_npm(*item[0].split('@')), dst_path) + tarball.save_to_file() + except Exception as err: + log.exception('Failed: {}'.format(item[0])) + if os.path.isfile(dst_path): + os.remove(dst_path) + raise err + log.info('Downloaded: {}'.format(item[0])) + + @retry(stop_max_attempt_number=5, wait_fixed=5000) + def _get_npm(self, npm_name, npm_version): + """ + Download and save npm tarball to disk + :param npm_name: name of npm package + :param npm_version: version of npm package + """ + npm_url = '{}/{}/{}'.format(self._registry, npm_name, npm_version) + npm_req = self._make_get_request(npm_url) + npm_json = npm_req.json() + tarball_url = npm_json['dist']['tarball'] + shasum = npm_json['dist']['shasum'] + tarball_req = self._make_get_request(tarball_url) + tarball = tarball_req.content + if hashlib.sha1(tarball).hexdigest() == shasum: + return tarball + else: + raise Exception( + '{}@{}: Wrong checksum. Retrying...'.format(npm_name, npm_version)) + + def _is_missing(self, item): + """ + Check if item is missing (not downloaded) + :param item: item to check + :return: boolean + """ + return not os.path.isfile('{}/{}-{}.tgz'.format(self._data_list[item], *item.split('@'))) + + +def run_cli(): + """ + Run as cli tool + """ + parser = argparse.ArgumentParser(description='Download npm packages from list') + parser.add_argument('npm_list', metavar='npm-list', + help='File with list of npm packages to download.') + parser.add_argument('--registry', '-r', default='https://registry.npmjs.org', + help='Download destination') + parser.add_argument('--output-dir', '-o', default=os.getcwd(), + help='Download destination') + parser.add_argument('--check', '-c', action='store_true', default=False, + help='Check what is missing. No download.') + parser.add_argument('--debug', action='store_true', default=False, + help='Turn on debug output') + parser.add_argument('--workers', type=int, default=None, + help='Set maximum workers for parallel download (default: cores * 5)') + + args = parser.parse_args() + + if args.debug: + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) + else: + logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') + + downloader = NpmDownloader(args.registry, [args.npm_list, args.output_dir], workers=args.workers) + + if args.check: + log.info('Check mode. No download will be executed.') + log.info(downloader.check_table) + sys.exit(0) + + timer_start = timeit.default_timer() + try: + downloader.download() + except RuntimeError: + log.error('Error occurred.') + sys.exit(1) + finally: + log.info('Downloading finished in {}'.format( + datetime.timedelta(seconds=timeit.default_timer() - timer_start))) + + +if __name__ == '__main__': + run_cli() |