diff options
-rw-r--r-- | build/download/base.py | 115 | ||||
-rwxr-xr-x | build/download/command_downloader.py | 59 | ||||
-rw-r--r-- | build/download/concurrent_downloader.py | 77 | ||||
-rwxr-xr-x | build/download/docker_downloader.py | 242 | ||||
-rwxr-xr-x | build/download/docker_images.py | 268 | ||||
-rwxr-xr-x | build/download/download.py | 224 | ||||
-rw-r--r-- | build/download/downloader.py | 129 | ||||
-rwxr-xr-x | build/download/git_downloader.py | 137 | ||||
-rwxr-xr-x | build/download/git_repos.py | 97 | ||||
-rw-r--r-- | build/download/http_downloader.py | 144 | ||||
-rw-r--r-- | build/download/http_file.py | 49 | ||||
-rwxr-xr-x | build/download/http_files.py | 133 | ||||
-rwxr-xr-x | build/download/npm_downloader.py | 134 | ||||
-rwxr-xr-x | build/download/npm_packages.py | 119 | ||||
-rwxr-xr-x | build/download/pypi_downloader.py | 101 | ||||
-rwxr-xr-x | build/download/pypi_packages.py | 88 | ||||
-rw-r--r-- | build/download/requirements.txt | 2 | ||||
-rwxr-xr-x | build/download/rpm_downloader.py | 130 | ||||
-rwxr-xr-x | build/download/rpm_packages.py | 69 | ||||
-rw-r--r-- | docs/BuildGuide.rst | 5 |
20 files changed, 1336 insertions, 986 deletions
diff --git a/build/download/base.py b/build/download/base.py deleted file mode 100644 index d8b44839..00000000 --- a/build/download/base.py +++ /dev/null @@ -1,115 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- - -# COPYRIGHT NOTICE STARTS HERE - -# Copyright 2019 © Samsung Electronics Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# COPYRIGHT NOTICE ENDS HERE - - -import concurrent.futures -import os -import progressbar -import prettytable -import requests -from distutils.spawn import find_executable - -progressbar.streams.wrap_stdout() -progressbar.streams.wrap_stderr() - - -def load_list(item_list): - """ - Parse list with items to be downloaded. - :param item_list: File with list of items (1 line per item) - :return: set of items from file - """ - with open(item_list, 'r') as f: - return {item for item in (line.strip() for line in f) - if item and not item.startswith('#')} - - -def init_progress(items_name): - progress_widgets = ['Downloading {}: '.format(items_name), - progressbar.Bar(), ' ', - progressbar.Percentage(), ' ', - '(', progressbar.SimpleProgress(), ')'] - - progress = progressbar.ProgressBar(widgets=progress_widgets, - poll_rate=1.0, - redirect_stdout=True) - return progress - - -def start_progress(progress, target_count, skipping, log): - log_skipping(skipping, log) - log.info("Initializing download. Takes a while.") - - progress.max_value = target_count - progress.start() - progress.update(len(skipping)) - - -def log_skipping(skipping_iterable, logger): - for skipped in skipping_iterable: - logger.info('Skipping: {}'.format(skipped)) - - -def run_concurrent(workers, progress, fn, iterable, *args): - with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor: - futures = [executor.submit(fn, item, *args) for item in iterable] - error_count = 0 - for future in concurrent.futures.as_completed(futures): - error = future.exception() - if error: - error_count += 1 - progress.update() - else: - progress.update(progress.value +1) - return error_count - - -def finish_progress(progress, error_count, log): - progress.finish(dirty=error_count > 0) - log.info('Download ended. Elapsed time {}'.format(progress.data()['time_elapsed'])) - -def check_tool(name): - return find_executable(name) - -def save_to_file(dst, content): - """ - Save downloaded byte content to file - :param dst: path to file to save content to - :param content: byte content of file - """ - dst_dir = os.path.dirname(dst) - if not os.path.exists(dst_dir): - os.makedirs(dst_dir) - with open(dst, 'wb') as dst_file: - dst_file.write(content) - -def make_get_request(url): - req = requests.get(url) - req.raise_for_status() - return req - -def simple_check_table(target, missing): - table = prettytable.PrettyTable(['Name', 'Downloaded']) - table.align['Name'] = 'l' - for item in sorted(target): - table.add_row([item, item not in missing]) - return table - diff --git a/build/download/command_downloader.py b/build/download/command_downloader.py new file mode 100755 index 00000000..5efc8b0f --- /dev/null +++ b/build/download/command_downloader.py @@ -0,0 +1,59 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# COPYRIGHT NOTICE STARTS HERE + +# Copyright 2019 © Samsung Electronics Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# COPYRIGHT NOTICE ENDS HERE + +import logging +import subprocess +from abc import ABC +from distutils.spawn import find_executable + +from downloader import AbstractDownloader + +log = logging.getLogger(__name__) + + +class CommandDownloader(AbstractDownloader, ABC): + def __init__(self, list_type, cli_tool, *list_args): + super().__init__(list_type, *list_args) + if not find_executable(cli_tool): + raise FileNotFoundError(cli_tool) + + def download(self): + """ + Download items from list + """ + if not self._initial_log(): + return + items_left = len(self._missing) + error_occurred = False + for item, dst_dir in self._data_list.items(): + try: + self._download_item((item, dst_dir)) + except subprocess.CalledProcessError as err: + log.exception(err.output.decode()) + error_occurred = True + items_left -= 1 + log.info('{} {} left to download.'.format(items_left, self._list_type)) + if error_occurred: + log.error('{} {} were not downloaded.'.format(items_left, self._list_type)) + raise RuntimeError('One or more errors occurred') + + def _download_item(self, item): + pass diff --git a/build/download/concurrent_downloader.py b/build/download/concurrent_downloader.py new file mode 100644 index 00000000..c84dac86 --- /dev/null +++ b/build/download/concurrent_downloader.py @@ -0,0 +1,77 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# COPYRIGHT NOTICE STARTS HERE + +# Copyright 2019 © Samsung Electronics Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# COPYRIGHT NOTICE ENDS HERE + +import concurrent.futures +import logging +from abc import ABC, abstractmethod + +from downloader import AbstractDownloader + +log = logging.getLogger(__name__) + + +class ConcurrentDownloader(AbstractDownloader, ABC): + def __init__(self, list_type, *list_args, workers=None): + super().__init__(list_type, *list_args) + self._workers = workers + + @abstractmethod + def _download_item(self, item): + """ + Download item from list + :param item: item to be downloaded + """ + pass + + def download(self): + """ + Download images concurrently from data lists. + """ + if not self._initial_log(): + return + items_left = len(self._missing) + try: + for _ in self.run_concurrent(self._download_item, self._missing.items()): + items_left -= 1 + log.info('{} {} left to download.'.format(items_left, self._list_type)) + except RuntimeError as err: + log.error('{} {} were not downloaded.'.format(items_left, self._list_type)) + raise err + + def run_concurrent(self, fn, iterable, *args): + """ + Run function concurrently for iterable + :param fn: function to run + :param iterable: iterable to process + :param args: arguments for function (fn) + """ + with concurrent.futures.ThreadPoolExecutor(max_workers=self._workers) as executor: + futures = [executor.submit(fn, item, *args) for item in iterable] + error_occurred = False + + for future in concurrent.futures.as_completed(futures): + error = future.exception() + if error: + error_occurred = True + else: + yield + if error_occurred: + raise RuntimeError('One or more errors occurred') diff --git a/build/download/docker_downloader.py b/build/download/docker_downloader.py new file mode 100755 index 00000000..13323d3b --- /dev/null +++ b/build/download/docker_downloader.py @@ -0,0 +1,242 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# COPYRIGHT NOTICE STARTS HERE + +# Copyright 2019 © Samsung Electronics Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# COPYRIGHT NOTICE ENDS HERE + +import argparse +import datetime +import itertools +import logging +import os +import sys +import timeit + +import docker +from retrying import retry + +from concurrent_downloader import ConcurrentDownloader + +log = logging.getLogger(__name__) + + +class DockerDownloader(ConcurrentDownloader): + def __init__(self, save, *list_args, workers=3): + self._save = save + try: + # big timeout in case of massive images like pnda-mirror-container:5.0.0 (11.4GB) + self._docker_client = docker.client.DockerClient(version='auto', timeout=300) + except docker.errors.DockerException as err: + log.exception( + 'Error creating docker client. Check if is docker installed and running' + ' or if you have right permissions.') + raise err + self._pulled_images = set(itertools.chain.from_iterable((image.tags for image + in self._docker_client.images.list()))) + list_args = ([*x, None] if len(x) < 2 else x for x in list_args) + super().__init__('docker images', *list_args, workers=workers) + + @staticmethod + def image_registry_name(image_name): + """ + Get the name as shown in local registry. Since some strings are not part of name + when using default registry e.g. docker.io + :param image_name: name of the image from the list + :return: name of the image as it is shown by docker + """ + name = image_name + + if name.startswith('docker.io/'): + name = name.replace('docker.io/', '') + + if name.startswith('library/'): + name = name.replace('library/', '') + + if ':' not in name.rsplit('/')[-1]: + name = '{}:latest'.format(name) + + return name + + @property + def check_table(self): + """ + Table showing information of which images are pulled/saved + """ + self.missing() + return self._table(self._data_list) + + @property + def fail_table(self): + """ + Table showing information about state of download of images + that encountered problems while downloading + """ + return self._table(self.missing()) + + @staticmethod + def _image_filename(image_name): + """ + Get a name of a file where image will be saved. + :param image_name: Name of the image from list + :return: Filename of the image + """ + return '{}.tar'.format(image_name.replace(':', '_').replace('/', '_')) + + def _table(self, images): + """ + Get table in format for images + :param images: images to put into table + :return: check table format with specified images + """ + header = ['Name', 'Pulled', 'Saved'] + data = [] + for item in images: + if item not in self._missing: + data.append((item, True, True if self._save else 'N/A')) + else: + data.append((item, self._missing[item]['pulled'], self._missing[item]['saved'])) + return self._check_table(header, {'Name': 'l'}, data) + + def _is_pulled(self, image): + return self.image_registry_name(image) in self._pulled_images + + def _is_saved(self, image): + dst = '{}/{}'.format(self._data_list[image], self._image_filename(image)) + return os.path.isfile(dst) + + def _is_missing(self, item): + """ + Missing docker images are checked slightly differently. + """ + pass + + def missing(self): + """ + Get dictionary of images not present locally. + """ + missing = dict() + for image, dst in self._data_list.items(): + pulled = self._is_pulled(image) + if self._save: + # if pulling and save is True. Save every pulled image to assure parity + saved = False if not pulled else self._is_saved(image) + else: + saved = 'N/A' + if not pulled or not saved: + missing[image] = {'dst': dst, 'pulled': pulled, 'saved': saved} + self._missing = missing + return self._missing + + @retry(stop_max_attempt_number=5, wait_fixed=5000) + def _pull_image(self, image_name): + """ + Pull docker image. + :param image_name: name of the image to be pulled + :return: pulled image (image object) + :raises docker.errors.APIError: after unsuccessful retries + """ + if ':' not in image_name.rsplit('/')[-1]: + image_name = '{}:latest'.format(image_name) + try: + image = self._docker_client.images.pull(image_name) + log.info('Image {} pulled'.format(image_name)) + return image + except docker.errors.APIError as err: + log.warning('Failed: {}: {}. Retrying...'.format(image_name, err)) + raise err + + def _save_image(self, image_name, image, output_dir): + """ + Save image to tar. + :param output_dir: path to destination directory + :param image: image object from pull_image function + :param image_name: name of the image from list + """ + dst = '{}/{}'.format(output_dir, self._image_filename(image_name)) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + try: + with open(dst, 'wb') as f: + for chunk in image.save(named=self.image_registry_name(image_name)): + f.write(chunk) + log.info('Image {} saved as {}'.format(image_name, dst)) + except Exception as err: + if os.path.isfile(dst): + os.remove(dst) + raise err + + def _download_item(self, image): + """ Pull and save docker image from specified docker registry + :param image: image to be downloaded + """ + image_name, image_dict = image + log.info('Downloading image: {}'.format(image_name)) + try: + if image_dict['pulled']: + image_to_save = self._docker_client.images.get(image_name) + else: + image_to_save = self._pull_image(image_name) + if self._save: + self._save_image(image_name, image_to_save, image_dict['dst']) + except Exception as err: + log.exception('Error downloading {}: {}'.format(image_name, err)) + raise err + + +def run_cli(): + parser = argparse.ArgumentParser(description='Download docker images from list') + parser.add_argument('image_list', metavar='image-list', + help='File with list of images to download.') + parser.add_argument('--save', '-s', action='store_true', default=False, + help='Save images (without it only pull is executed)') + parser.add_argument('--output-dir', '-o', default=os.getcwd(), + help='Download destination') + parser.add_argument('--check', '-c', action='store_true', default=False, + help='Check what is missing. No download.' + 'Use with combination with -s to check saved images as well.') + parser.add_argument('--debug', action='store_true', default=False, + help='Turn on debug output') + parser.add_argument('--workers', type=int, default=3, + help='Set maximum workers for parallel download (default: 3)') + + args = parser.parse_args() + + if args.debug: + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) + else: + logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') + + downloader = DockerDownloader(args.save, [args.file_list, args.output_dir], workers=args.workers) + + if args.check: + log.info('Check mode. No download will be executed.') + log.info(downloader.check_table) + sys.exit(0) + + timer_start = timeit.default_timer() + try: + downloader.download() + except RuntimeError: + sys.exit(1) + finally: + log.info('Downloading finished in {}'.format( + datetime.timedelta(seconds=timeit.default_timer() - timer_start))) + + +if __name__ == '__main__': + run_cli() diff --git a/build/download/docker_images.py b/build/download/docker_images.py deleted file mode 100755 index d8138dd6..00000000 --- a/build/download/docker_images.py +++ /dev/null @@ -1,268 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- - -# COPYRIGHT NOTICE STARTS HERE - -# Copyright 2019 © Samsung Electronics Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# COPYRIGHT NOTICE ENDS HERE - - -import argparse -import concurrent.futures -import docker -import itertools -import json -import logging -import os -import prettytable -import sys -import threading -from retrying import retry - -import base - -log = logging.getLogger(__name__) - - -def image_filename(image_name): - """ - Get a name of a file where image will be saved. - :param image_name: Name of the image from list - :return: Filename of the image - """ - return '{}.tar'.format(image_name.replace(':', '_').replace('/', '_')) - - -def image_registry_name(image_name): - """ - Get the name as shown in local registry. Since some strings are not part of name - when using default registry e.g. docker.io - :param image_name: name of the image from the list - :return: name of the image as it is shown by docker - """ - name = image_name - - if name.startswith('docker.io/'): - name = name.replace('docker.io/', '') - - if name.startswith('library/'): - name = name.replace('library/', '') - - if ':' not in name.rsplit('/')[-1]: - name = '{}:latest'.format(name) - - return name - - -def not_pulled_images(docker_client, target_list): - """ - Get set of images that are not pulled on local system. - :param docker_client: docker.client.DockerClient - :param target_list: list of images to look for - :return: (set) images that are not present on local system - """ - pulled = set(itertools.chain.from_iterable((image.tags for image - in docker_client.images.list()))) - return {image for image in target_list if image_registry_name(image) not in pulled} - - -def not_saved(target_images, target_dir): - """ - Get set of images that are not saved in target directory - :param target_images: List of images to check for - :param target_dir: Directory where those images should be - :return: (set) Images that are missing from target directory - """ - return set(image for image in target_images - if not os.path.isfile('/'.join((target_dir, image_filename(image))))) - - -def missing(docker_client, target_list, save, target_dir): - """ - Get dictionary of images not present locally. - :param docker_client: docker.client.DockerClient for communication with docker - :param target_list: list of desired images - :param save: (boolean) check for saved images - :param target_dir: target directory for saved images - :return: Dictionary of missing images ('not_pulled', 'not_saved') - """ - return {'not_pulled': not_pulled_images(docker_client, target_list), - 'not_saved': not_saved(target_list, target_dir) if save else set()} - - -def merge_dict_sets(dictionary): - return set.union(*dictionary.values()) - - -def check_table(check_list, missing, save): - table = prettytable.PrettyTable(['Image', 'Pulled', 'Saved']) - table.align['Image'] = 'l' - for image in sorted(check_list): - pulled = not image in missing['not_pulled'] - download_state = [pulled] - if save: - # if not pulled save anyway - download_state.append(pulled and not image in missing['not_saved']) - else: - download_state.append('Not checked') - table.add_row([image] + download_state) - return table - - -@retry(stop_max_attempt_number=5, wait_fixed=5000) -def pull_image(docker_client, image_name): - """ - Pull docker image. - :param docker_client: docker.client.DockerClient for communication with docker - :param image_name: name of the image to be pulled - :return: pulled image (image object) - :raises docker.errors.APIError: after unsuccessful retries - """ - if ':' not in image_name.rsplit('/')[-1]: - image_name = '{}:latest'.format(image_name) - try: - image = docker_client.images.pull(image_name) - log.info('Image {} pulled'.format(image_name)) - return image - except docker.errors.APIError as err: - log.warning('Failed: {}: {}. Retrying...'.format(image_name, err)) - raise err - - -def save_image(image_name, image, output_dir, docker_client=None): - """ - Save image to tar. - :param output_dir: path to destination directory - :param image: image object from pull_image function - :param image_name: name of the image from list - :param docker_client: docker.client.DockerClient for communication with docker - :return: None - """ - dst = '{}/{}'.format(output_dir, image_filename(image_name)) - if not os.path.exists(output_dir): - os.makedirs(output_dir) - if not isinstance(image, docker.models.images.Image): - image = docker_client.images.get(image_name) - try: - with open(dst, 'wb') as f: - for chunk in image.save(named=image_registry_name(image_name)): - f.write(chunk) - log.info('Image {} saved as {}'.format(image_name, dst)) - except Exception as err: - os.remove(dst) - raise err - - -def download_docker_image(image, save, output_dir, docker_client): - """ Pull and save docker image from specified docker registry - :param docker_client: docker.client.DockerClient for communication with docker - :param image: image to be downloaded - :param save: boolean - save image to disk or skip saving - :param output_dir: directory where image will be saved - :return: None - """ - log.info('Downloading image: {}'.format(image)) - try: - pulled_image = pull_image(docker_client, image) - if save: - save_image(image, pulled_image, output_dir) - except Exception as err: - log.exception('Error downloading {}: {}'.format(image, err)) - raise err - - -def download(image_list, save, output_dir, check_mode, progress, workers=3): - """ - Download images from list - :param image_list: list of images to be downloaded - :param save: whether images should be saved to disk - :param output_dir: directory where images will be saved - :param check_mode: only check for missing images. No download - :param progress_bar: progressbar.ProgressBar to show how far download is - :return: None - """ - try: - # big timeout in case of massive images like pnda-mirror-container:5.0.0 (11.4GB) - docker_client = docker.client.DockerClient(version='auto', timeout=300) - except docker.errors.DockerException as err: - log.exception('Error creating docker client. Check if is docker installed and running' - ' or if you have right permissions.') - raise err - - target_images = base.load_list(image_list) - missing_images = missing(docker_client, target_images, save, output_dir) - - if check_mode: - log.info(check_table(target_images, missing_images, save)) - return - - skipping = target_images - merge_dict_sets(missing_images) - - base.start_progress(progress, len(target_images), skipping, log) - - # if pulling and save is True. Save every pulled image to assure parity - error_count = base.run_concurrent(workers, progress, download_docker_image, missing_images['not_pulled'], - save, output_dir, docker_client) - # only save those that are pulled already but not saved - error_count += base.run_concurrent(workers, progress, save_image, - missing_images['not_saved'] - missing_images['not_pulled'], - None, output_dir, docker_client) - - base.finish_progress(progress, error_count, log) - if error_count > 0: - log.error('{} images were not downloaded'.format(error_count)) - missing_images = missing(docker_client, target_images, save, output_dir) - log.info(check_table(merge_dict_sets(missing_images), missing_images, save)) - raise RuntimeError() - - -def run_cli(): - parser = argparse.ArgumentParser(description='Download docker images from list') - parser.add_argument('image_list', metavar='image-list', - help='File with list of images to download.') - parser.add_argument('--save', '-s', action='store_true', default=False, - help='Save images (without it only pull is executed)') - parser.add_argument('--output-dir', '-o', default=os.getcwd(), - help='Download destination') - parser.add_argument('--check', '-c', action='store_true', default=False, - help='Check what is missing. No download.' - 'Use with combination with -s to check saved images as well.') - parser.add_argument('--debug', action='store_true', default=False, - help='Turn on debug output') - parser.add_argument('--workers', type=int, default=3, - help='Set maximum workers for parallel download (default: 3)') - - args = parser.parse_args() - - if args.debug: - logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) - else: - logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') - - progress = base.init_progress('Docker images') if not args.check else None - try: - download(args.image_list, args.save, args.output_dir, args.check, - progress, args.workers) - except docker.errors.DockerException: - log.exception('Irrecoverable error detected.') - sys.exit(1) - except RuntimeError as err: - log.exception(err) - - -if __name__ == '__main__': - run_cli() - diff --git a/build/download/download.py b/build/download/download.py index 0d8912ee..0af12989 100755 --- a/build/download/download.py +++ b/build/download/download.py @@ -20,44 +20,51 @@ # COPYRIGHT NOTICE ENDS HERE import argparse +import datetime import logging import sys -import datetime import timeit -import base -import docker_images -import git_repos -import http_files -import npm_packages -import pypi_packages -import rpm_packages +import docker_downloader +import git_downloader +import http_downloader +import npm_downloader +import pypi_downloader +import rpm_downloader log = logging.getLogger(name=__name__) + def parse_args(): - parser=argparse.ArgumentParser(description='Download data from lists') + """ + Parse command line arguments + :return: arguments + """ + parser = argparse.ArgumentParser(description='Download data from lists') list_group = parser.add_argument_group() list_group.add_argument('--docker', action='append', nargs='+', default=[], - metavar=('list', 'dir-name'), - help='Docker type list. If second argument is specified ' - 'it is treated as directory where images will be saved ' - 'otherwise only pull operation is executed') + metavar=('list', 'dir-name'), + help='Docker type list. If second argument is specified ' + 'it is treated as directory where images will be saved ' + 'otherwise only pull operation is executed this can\'t ' + 'be mixed between multiple docker list specifications. ' + 'if one of the list does not have directory specified ' + 'all lists are only pulled!!!') list_group.add_argument('--http', action='append', nargs=2, default=[], - metavar=('list', 'dir-name'), - help='Http type list and directory to save downloaded files') + metavar=('list', 'dir-name'), + help='Http type list and directory to save downloaded files') list_group.add_argument('--npm', action='append', nargs=2, default=[], - metavar=('list', 'dir-name'), - help='npm type list and directory to save downloaded files') + metavar=('list', 'dir-name'), + help='npm type list and directory to save downloaded files') list_group.add_argument('--rpm', action='append', nargs=2, default=[], - metavar=('list', 'dir-name'), - help='rpm type list and directory to save downloaded files') + metavar=('list', 'dir-name'), + help='rpm type list and directory to save downloaded files') list_group.add_argument('--git', action='append', nargs=2, default=[], - metavar=('list', 'dir-name'), - help='git repo type list and directory to save downloaded files') + metavar=('list', 'dir-name'), + help='git repo type list and directory to save downloaded files') list_group.add_argument('--pypi', action='append', nargs=2, default=[], - metavar=('list', 'dir-name'), - help='pypi packages type list and directory to save downloaded files') + metavar=('list', 'dir-name'), + help='pypi packages type list and directory to save downloaded files') parser.add_argument('--npm-registry', default='https://registry.npmjs.org', help='npm registry to use (default: https://registry.npmjs.org)') parser.add_argument('--check', '-c', action='store_true', default=False, @@ -71,10 +78,77 @@ def parse_args(): if getattr(args, arg): return args - parser.error('One of --docker, --npm, --http, --rpm, --git must be specified') + parser.error('One of --docker, --npm, --http, --rpm, --git or --pypi must be specified') + + +def log_start(item_type): + """ + Log starting message + :param item_type: type of resources + :return: + """ + log.info('Starting download of {}.'.format(item_type)) + + +def handle_download(downloader, check_mode, errorred_lists, start_time): + """ + Handle download of resources + :param downloader: downloader to use + :param check_mode: run in check mode (boolean) + :param errorred_lists: list of data types of failed lists + :param start_time: timeit.default_timer() right before download + :return: timeit.default_timer() at the end of download + """ + if check_mode: + print(downloader.check_table) + else: + log_start(downloader.list_type) + try: + downloader.download() + except RuntimeError: + errorred_lists.append(downloader.list_type) + return log_time_interval(start_time, downloader.list_type) + + +def handle_command_download(downloader_class, check_mode, errorred_lists, start_time, *args): + """ + Handle download of resources where shell command is used + :param downloader_class: Class of command_downloader.CommandDownloader to use + :param check_mode: run in check mode (boolean) + :param errorred_lists: list of data types of failed lists + :param start_time: timeit.default_timer() right before download + :param args: arguments for downloader class initialization + :return: timeit.default_timer() at the end of download + """ + try: + downloader = downloader_class(*args) + return handle_download(downloader, check_mode, errorred_lists, start_time) + except FileNotFoundError as err: + classname = type(downloader_class).__name__ + log.exception('Error initializing: {}: {}'.format(classname, err)) + return timeit.default_timer() + + +def log_time_interval(start, resource_type=''): + """ + Log how long the download took + :param start: timeit.default_timer() when interval started + :param resource_type: type of data that was downloaded. (empty string for whole download) + :return: timeit.default_timer() after logging + """ + e_time = datetime.timedelta(seconds=timeit.default_timer() - start) + if resource_type: + msg = 'Download of {} took {}\n'.format(resource_type, e_time) + else: + msg = 'Execution ended. Total elapsed time {}'.format(e_time) + log.info(msg) + return timeit.default_timer() def run_cli(): + if sys.version_info.major < 3: + log.error('Unfortunately Python 2 is not supported for data download.') + sys.exit(1) args = parse_args() console_handler = logging.StreamHandler(sys.stdout) @@ -91,82 +165,44 @@ def run_cli(): root_logger = logging.getLogger() root_logger.addHandler(console_handler) - list_with_errors = [] - timer_start = timeit.default_timer() - - for docker_list in args.docker: - log.info('Processing {}.'.format(docker_list[0])) - progress = None if args.check else base.init_progress('docker images') - save = False - if len(docker_list) > 1: - save = True - else: - docker_list.append(None) - try: - docker_images.download(docker_list[0], save, - docker_list[1], args.check, progress) - except RuntimeError: - list_with_errors.append(docker_list[0]) + errorred_lists = [] + timer_start = interval_start = timeit.default_timer() - for http_list in args.http: - progress = None if args.check else base.init_progress('http files') - log.info('Processing {}.'.format(http_list[0])) - try: - http_files.download(http_list[0], http_list[1], args.check, - progress) - except RuntimeError: - list_with_errors.append(http_list[0]) + if args.check: + log.info('Check mode. No download will be executed.') - for npm_list in args.npm: - progress = None if args.check else base.init_progress('npm packages') - log.info('Processing {}.'.format(npm_list[0])) - try: - npm_packages.download(npm_list[0], args.npm_registry, npm_list[1], - args.check, progress) - except RuntimeError: - list_with_errors.append(npm_list[0]) + if args.docker: + save = True if len(list(filter(lambda x: len(x) == 2, args.docker))) == len(args.docker) else False + docker = docker_downloader.DockerDownloader(save, *args.docker, workers=3) + interval_start = handle_download(docker, args.check, errorred_lists, interval_start) - for rpm_list in args.rpm: - if args.check: - log.info('Check mode for rpm packages is not implemented') - break - log.info('Processing {}.'.format(rpm_list[0])) - try: - rpm_packages.download(rpm_list[0], rpm_list[1]) - except RuntimeError: - list_with_errors.append(rpm_list[0]) - - for git_list in args.git: - if args.check: - log.info('Check mode for git repositories is not implemented') - break - progress = None if args.check else base.init_progress('git repositories') - log.info('Processing {}.'.format(git_list[0])) - try: - git_repos.download(git_list[0], git_list[1], progress) - except RuntimeError: - list_with_errors.append(git_list[0]) - - for pypi_list in args.pypi: - if args.check: - log.info('Check mode for pypi packages is not implemented') - break - progress = None if args.check else base.init_progress('pypi packages') - log.info('Processing {}.'.format(pypi_list[0])) - try: - pypi_packages.download(pypi_list[0], pypi_list[1], progress) - except RuntimeError: - list_with_errors.append(pypi_list[0]) + if args.http: + http = http_downloader.HttpDownloader(*args.http) + interval_start = handle_download(http, args.check, errorred_lists, interval_start) - e_time = datetime.timedelta(seconds=timeit.default_timer() - timer_start) - log.info(timeit.default_timer() - timer_start) - log.info('Execution ended. Total elapsed time {}'.format(e_time)) + if args.npm: + npm = npm_downloader.NpmDownloader(args.npm_registry, *args.npm) + interval_start = handle_download(npm, args.check, errorred_lists, interval_start) - if list_with_errors: - log.error('Errors encountered while processing these lists:' - '\n{}'.format('\n'.join(list_with_errors))) - sys.exit(1) + if args.rpm: + interval_start = handle_command_download(rpm_downloader.RpmDownloader, args.check, errorred_lists, + interval_start, *args.rpm) + + if args.git: + interval_start = handle_command_download(git_downloader.GitDownloader, args.check, errorred_lists, + interval_start, *args.git) + if args.pypi: + handle_command_download(pypi_downloader.PyPiDownloader, args.check, errorred_lists, + interval_start, *args.pypi) + + if not args.check: + log_time_interval(timer_start) + + if errorred_lists: + log.error('Errors encountered while processing these types:' + '\n{}'.format('\n'.join(errorred_lists))) + sys.exit(1) if __name__ == '__main__': diff --git a/build/download/downloader.py b/build/download/downloader.py new file mode 100644 index 00000000..64403300 --- /dev/null +++ b/build/download/downloader.py @@ -0,0 +1,129 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# COPYRIGHT NOTICE STARTS HERE + +# Copyright 2019 © Samsung Electronics Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# COPYRIGHT NOTICE ENDS HERE + +import logging +from abc import ABC, abstractmethod + +import prettytable + +log = logging.getLogger(__name__) + + +class AbstractDownloader(ABC): + + def __init__(self, list_type, *list_args): + self._list_type = list_type + self._data_list = {item: list_arg[1] for list_arg in list_args + for item in self._load_list(list_arg[0])} + self._missing = self.missing() + + @property + def list_type(self): + """ + Type of resource in list + """ + return self._list_type + + @staticmethod + def _load_list(path): + """ + Load list from file. + :param path: path to file + :return: set of items in list + """ + with open(path, 'r') as f: + return {item for item in (line.strip() for line in f) + if item and not item.startswith('#')} + + @staticmethod + def _check_table(header, alignment_dict, data): + """ + General method to generate table + :param header: header of the table + :param alignment_dict: dictionary with alignment for columns + :param data: iterable of rows of table + :return: table formatted data + """ + table = prettytable.PrettyTable(header) + + for k, v in alignment_dict.items(): + table.align[k] = v + + for row in sorted(data): + table.add_row(row) + + return table + + @abstractmethod + def download(self): + """ + Download resources from lists + """ + pass + + @abstractmethod + def _is_missing(self, item): + """ + Check if item is not downloaded + """ + pass + + def missing(self): + """ + Check for missing data (not downloaded) + :return: dictionary of missing items + """ + self._missing = {item: dst for item, dst in self._data_list.items() if + self._is_missing(item)} + return self._missing + + def _log_existing(self): + """ + Log items that are already downloaded. + """ + for item in self._merged_lists(): + if item not in self._missing: + if type(self).__name__ == 'DockerDownloader': + log.info('Docker image present: {}'.format(item)) + else: + log.info('File or directory present: {}'.format(item)) + + def _merged_lists(self): + """ + Get all item names in one set + :return: set with all items + """ + return set(self._data_list.keys()) + + def _initial_log(self): + """ + Log initial info for download. + :return: True if download is necessary False if everything is already downloaded + """ + self._log_existing() + items_left = len(self._missing) + class_name = type(self).__name__ + if items_left == 0: + log.info('{}: Everything seems to be present no download necessary.'.format(class_name)) + return False + log.info('{}: Initializing download {} {} are not present.'.format(class_name, items_left, + self._list_type)) + return True diff --git a/build/download/git_downloader.py b/build/download/git_downloader.py new file mode 100755 index 00000000..46faa8f8 --- /dev/null +++ b/build/download/git_downloader.py @@ -0,0 +1,137 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# COPYRIGHT NOTICE STARTS HERE + +# Copyright 2019 © Samsung Electronics Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# COPYRIGHT NOTICE ENDS HERE + +import argparse +import datetime +import logging +import os +import shutil +import subprocess +import sys +import timeit + +from command_downloader import CommandDownloader + +log = logging.getLogger(name=__name__) + + +class GitDownloader(CommandDownloader): + def __init__(self, *list_args): + super().__init__('git repositories', 'git', *list_args) + + @property + def check_table(self): + """ + Table with information which items from lists are downloaded + """ + self.missing() + header = ['Name', 'Branch', 'Downloaded'] + return self._check_table(header, {'Name': 'l'}, + ((*item.split(), self._downloaded(item)) for item + in self._data_list)) + + @staticmethod + def _download_item(item): + repo, branch = item[0].split() + dst = '{}/{}'.format(item[1], repo) + command = 'git clone -b {} --single-branch https://{} --bare {}'.format(branch, + repo, + dst) + if os.path.exists(dst): + log.warning('File or directory exists {} removing and cloning' + ' to be sure it is latest.'.format(dst)) + if os.path.isfile(dst): + os.remove(dst) + elif os.path.isdir(dst): + shutil.rmtree(dst) + + log.info('Running: {}'.format(command)) + log.info( + subprocess.check_output(command.split(), stderr=subprocess.STDOUT).decode()) + log.info('Downloaded: {}'.format(repo)) + + def _is_missing(self, item): + """ + Check if item is missing (not cloned) + :param item: item to check + :return: True if not present 'maybe' if directory exists + """ + dst = '{}/{}'.format(self._data_list[item], item.split()[0]) + if os.path.exists(dst): + # it is bare repo who knows + return 'maybe' + return True + + def _downloaded(self, item): + """ + Check if item is present (cloned) + :param item: item to check + :return: True if not cloned 'maybe' if directory exists + """ + missing = self._is_missing(item) + if missing != 'maybe': + return False + # It is bare repo so who knows if it is latest version + return 'maybe' + + def missing(self): + """ + Check for missing data (not downloaded) + :return: dictionary of missing items + """ + self._missing = {item: dst for item, dst in self._data_list.items()} + return self._missing + + +def run_cli(): + """ + Run as cli tool + """ + parser = argparse.ArgumentParser(description='Download git repositories from list') + parser.add_argument('git_list', metavar='git-list', + help='File with list of npm packages to download.') + parser.add_argument('--output-dir', '-o', default=os.getcwd(), + help='Download destination') + parser.add_argument('--check', '-c', action='store_true', default=False, + help='Check mode') + + args = parser.parse_args() + + logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') + + downloader = GitDownloader([args.git_list, args.output_dir]) + if args.check: + log.info('Check mode. No download will be executed.') + log.info(downloader.check_table) + sys.exit(0) + + timer_start = timeit.default_timer() + try: + downloader.download() + except RuntimeError: + sys.exit(1) + finally: + log.info('Downloading finished in {}'.format( + datetime.timedelta(seconds=timeit.default_timer() - timer_start))) + + +if __name__ == '__main__': + run_cli() diff --git a/build/download/git_repos.py b/build/download/git_repos.py deleted file mode 100755 index 1d8c2979..00000000 --- a/build/download/git_repos.py +++ /dev/null @@ -1,97 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- - -# COPYRIGHT NOTICE STARTS HERE - -# Copyright 2019 © Samsung Electronics Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# COPYRIGHT NOTICE ENDS HEREE - -import argparse -import subprocess -import shutil -import logging -import sys -import os -from retrying import retry - -import base - -log = logging.getLogger(name=__name__) - -@retry(stop_max_attempt_number=5, wait_fixed=5000) -def clone_repo(dst, repo, branch=None): - if branch: - command = 'git clone -b {} --single-branch https://{} --bare {}'.format(branch, repo, dst) - else: - command = 'git clone https://{} --bare {}'.format(repo, dst) - log.info('Running: {}'.format(command)) - log.info(subprocess.check_output(command.split(), stderr=subprocess.STDOUT).decode()) - log.info('Downloaded: {}'.format(repo)) - - -def download(git_list, dst_dir, progress): - if not base.check_tool('git'): - log.error('ERROR: git is not installed') - progress.finish(dirty=True) - raise RuntimeError('git missing') - - git_set = {tuple(item.split()) for item in base.load_list(git_list)} - - error_count = 0 - - base.start_progress(progress, len(git_set), [], log) - - for repo in git_set: - dst = '{}/{}'.format(dst_dir, repo[0]) - if os.path.isdir(dst): - log.warning('Directory {} already exists. Repo probably present'.format(dst)) - progress.update(progress.value + 1) - continue - try: - clone_repo(dst, *repo) - progress.update(progress.value + 1) - except subprocess.CalledProcessError as err: - if os.path.isdir(dst): - shutil.rmtree(dst) - log.exception(err.output.decode()) - error_count += 1 - - base.finish_progress(progress, error_count, log) - if error_count > 0: - log.error('{} were not downloaded. Check logs for details'.format(error_count)) - raise RuntimeError('Download unsuccessful') - -def run_cli(): - parser = argparse.ArgumentParser(description='Download git repositories from list') - parser.add_argument('git_list', metavar='git-list', - help='File with list of git repos to download.') - parser.add_argument('--output-dir', '-o', default=os.getcwd(), - help='Download destination') - - args = parser.parse_args() - - logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') - - progress = base.init_progress('git repositories') - try: - download(args.git_list, args.output_dir, progress) - except RuntimeError as err: - log.exception(err) - sys.exit(1) - - -if __name__ == '__main__': - run_cli() diff --git a/build/download/http_downloader.py b/build/download/http_downloader.py new file mode 100644 index 00000000..ba2c0f7e --- /dev/null +++ b/build/download/http_downloader.py @@ -0,0 +1,144 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# COPYRIGHT NOTICE STARTS HERE + +# Copyright 2019 © Samsung Electronics Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# COPYRIGHT NOTICE ENDS HERE + +import argparse +import datetime +import logging +import os +import sys +import timeit + +import requests +from retrying import retry + +import http_file +from concurrent_downloader import ConcurrentDownloader + +log = logging.getLogger(__name__) + + +class HttpDownloader(ConcurrentDownloader): + def __init__(self, *list_args, list_type='http_files', workers=None): + super().__init__(list_type, *list_args, workers=workers) + + @property + def check_table(self): + """ + Table with information what items from lists are downloaded + """ + self.missing() + header = ['Name', 'Downloaded'] + return self._check_table(header, {'Name': 'l'}, + ((item, item not in self._missing) for item + in self._data_list)) + + @staticmethod + def _make_get_request(url): + """ + Run http get request + :param url: url to reqeuest + :return: requests.Response + """ + req = requests.get(url) + req.raise_for_status() + return req + + def _is_missing(self, item): + """ + Check if item is missing (not downloaded) + :param item: item to check + :return: boolean + """ + return not os.path.isfile( + '{}/{}'.format(self._data_list[item], item.rsplit('//')[-1])) + + @retry(stop_max_attempt_number=5, wait_fixed=2000) + def _get_file(self, file_uri): + """ + Get http file from uri + :param file_uri: uri of the file + :return: file content + """ + if not file_uri.startswith('http'): + file_uri = 'http://' + file_uri + file_req = self._make_get_request(file_uri) + return file_req.content + + def _download_item(self, item): + """ + Download http file + :param item: http file to be downloaded (tuple: (uri, dst_dir)) + """ + log.info('Downloading: {}'.format(item[0])) + dst_path = '{}/{}'.format(item[1], item[0].rsplit('//')[-1]) + try: + f = http_file.HttpFile(item[0], self._get_file(item[0]), dst_path) + f.save_to_file() + except Exception as err: + log.exception('Error downloading: {}: {}'.format(item[0], err)) + if os.path.isfile(dst_path): + os.remove(dst_path) + raise err + log.info('Downloaded: {}'.format(f.name)) + + +def run_cli(): + """ + Run as cli tool + """ + parser = argparse.ArgumentParser(description='Download http files from list') + parser.add_argument('file_list', metavar='file-list', + help='File with list of http files to download') + parser.add_argument('--output-dir', '-o', default=os.getcwd(), + help='Destination directory for saving') + parser.add_argument('--check', '-c', action='store_true', default=False, + help='Check mode') + parser.add_argument('--debug', action='store_true', default=False, + help='Turn on debug output') + parser.add_argument('--workers', type=int, default=None, + help='Set maximum workers for parallel download (default: cores * 5)') + + args = parser.parse_args() + + if args.debug: + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) + else: + logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') + + downloader = HttpDownloader([args.file_list, args.output_dir], workers=args.workers) + + if args.check: + log.info('Check mode. No download will be executed.') + log.info(downloader.check_table) + sys.exit(0) + + timer_start = timeit.default_timer() + try: + downloader.download() + except RuntimeError: + sys.exit(1) + finally: + log.info('Downloading finished in {}'.format( + datetime.timedelta(seconds=timeit.default_timer() - timer_start))) + + +if __name__ == '__main__': + run_cli() diff --git a/build/download/http_file.py b/build/download/http_file.py new file mode 100644 index 00000000..397f0930 --- /dev/null +++ b/build/download/http_file.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +# COPYRIGHT NOTICE STARTS HERE + +# Copyright 2019 © Samsung Electronics Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# COPYRIGHT NOTICE ENDS HERE + +import os + + +class HttpFile: + """ + File to be saved + """ + + def __init__(self, name, content, dst): + self._name = name + self._content = content + self._dst = dst + + @property + def name(self): + """ + Name of the file + """ + return self._name + + def save_to_file(self): + """ + Save it to disk + """ + dst_dir = os.path.dirname(self._dst) + if not os.path.exists(dst_dir): + os.makedirs(dst_dir) + with open(self._dst, 'wb') as dst_file: + dst_file.write(self._content) diff --git a/build/download/http_files.py b/build/download/http_files.py deleted file mode 100755 index c83158d6..00000000 --- a/build/download/http_files.py +++ /dev/null @@ -1,133 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- - -# COPYRIGHT NOTICE STARTS HERE - -# Copyright 2019 © Samsung Electronics Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# COPYRIGHT NOTICE ENDS HERE - - -import argparse -import concurrent.futures -import logging -import os -import sys -from retrying import retry - -import base - -log = logging.getLogger(__name__) - -@retry(stop_max_attempt_number=5, wait_fixed=2000) -def get_file(file_uri): - """ - Get file from the Internet - :param file_uri: address of file - :return: byte content of file - """ - if not file_uri.startswith('http'): - file_uri = 'http://' + file_uri - file_req = base.make_get_request(file_uri) - return file_req.content - - -def download_file(file_uri, dst_dir): - """ - Download http file and save it to file. - :param file_uri: http address of file - :param dst_dir: directory where file will be saved - """ - log.info('Downloading: {}'.format(file_uri)) - dst_path = '{}/{}'.format(dst_dir, file_uri.rsplit('//')[-1]) - try: - file_content = get_file(file_uri) - base.save_to_file(dst_path, file_content) - except Exception as err: - if os.path.isfile(dst_path): - os.remove(dst_path) - log.error('Error downloading: {}: {}'.format(file_uri, err)) - raise err - log.info('Downloaded: {}'.format(file_uri)) - - -def missing(file_set, dst_dir): - return {file for file in file_set if not os.path.isfile('{}/{}'.format(dst_dir, file))} - - -def download(data_list, dst_dir, check, progress, workers=None): - """ - Download files specified in data list - :param data_list: path to file with list - :param dst_dir: destination directory - :param check: boolean check mode - :param progress: progressbar.ProgressBar to monitor progress - :param workers: workers to use for parallel execution - :return: 0 if success else number of errors - """ - file_set = base.load_list(data_list) - missing_files = missing(file_set, dst_dir) - target_count = len(file_set) - - if check: - log.info(base.simple_check_table(file_set, missing_files)) - return - - skipping = file_set - missing_files - - base.start_progress(progress, len(file_set), skipping, log) - - error_count = base.run_concurrent(workers, progress, download_file, missing_files, dst_dir) - - base.finish_progress(progress, error_count, log) - if error_count > 0: - log.error('{} files were not downloaded. Check log for specific failures.'.format(error_count)) - raise RuntimeError() - - -def run_cli(): - """ - Run as cli tool - """ - parser = argparse.ArgumentParser(description='Download http files from list') - parser.add_argument('file_list', metavar='file-list', - help='File with list of http files to download') - parser.add_argument('--output-dir', '-o', default=os.getcwd(), - help='Destination directory for saving') - parser.add_argument('--check', '-c', action='store_true', default=False, - help='Check mode') - parser.add_argument('--debug', action='store_true', default=False, - help='Turn on debug output') - parser.add_argument('--workers', type=int, default=None, - help='Set maximum workers for parallel download (default: cores * 5)') - - args = parser.parse_args() - - if args.debug: - logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) - else: - logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') - - progress = base.init_progress('http files') if not args.check else None - - try: - download(args.file_list, args.output_dir, args.check, progress, args.workers) - except RuntimeError: - sys.exit(1) - - -if __name__ == '__main__': - run_cli() - diff --git a/build/download/npm_downloader.py b/build/download/npm_downloader.py new file mode 100755 index 00000000..369af72a --- /dev/null +++ b/build/download/npm_downloader.py @@ -0,0 +1,134 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# COPYRIGHT NOTICE STARTS HERE + +# Copyright 2019 © Samsung Electronics Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# COPYRIGHT NOTICE ENDS HERE + +import argparse +import datetime +import hashlib +import logging +import os +import sys +import timeit + +from retrying import retry + +import http_downloader +import http_file + +log = logging.getLogger(__name__) + + +class NpmDownloader(http_downloader.HttpDownloader): + def __init__(self, npm_registry, *list_args, workers=None): + super().__init__(*list_args, list_type='npm packages', workers=workers) + self._registry = npm_registry + + def _download_item(self, item): + """ + Download npm package + :param item: http file to be downloaded (tuple: (npm_name@version, dst_dir)) + """ + log.info('Downloading: {}'.format(item[0])) + npm_name, npm_version = item[0].split('@') + dst_path = '{}/{}-{}.tgz'.format(item[1], npm_name, npm_version) + try: + tarball = http_file.HttpFile(item[0], self._get_npm(*item[0].split('@')), dst_path) + tarball.save_to_file() + except Exception as err: + log.exception('Failed: {}'.format(item[0])) + if os.path.isfile(dst_path): + os.remove(dst_path) + raise err + log.info('Downloaded: {}'.format(item[0])) + + @retry(stop_max_attempt_number=5, wait_fixed=5000) + def _get_npm(self, npm_name, npm_version): + """ + Download and save npm tarball to disk + :param npm_name: name of npm package + :param npm_version: version of npm package + """ + npm_url = '{}/{}/{}'.format(self._registry, npm_name, npm_version) + npm_req = self._make_get_request(npm_url) + npm_json = npm_req.json() + tarball_url = npm_json['dist']['tarball'] + shasum = npm_json['dist']['shasum'] + tarball_req = self._make_get_request(tarball_url) + tarball = tarball_req.content + if hashlib.sha1(tarball).hexdigest() == shasum: + return tarball + else: + raise Exception( + '{}@{}: Wrong checksum. Retrying...'.format(npm_name, npm_version)) + + def _is_missing(self, item): + """ + Check if item is missing (not downloaded) + :param item: item to check + :return: boolean + """ + return not os.path.isfile('{}/{}-{}.tgz'.format(self._data_list[item], *item.split('@'))) + + +def run_cli(): + """ + Run as cli tool + """ + parser = argparse.ArgumentParser(description='Download npm packages from list') + parser.add_argument('npm_list', metavar='npm-list', + help='File with list of npm packages to download.') + parser.add_argument('--registry', '-r', default='https://registry.npmjs.org', + help='Download destination') + parser.add_argument('--output-dir', '-o', default=os.getcwd(), + help='Download destination') + parser.add_argument('--check', '-c', action='store_true', default=False, + help='Check what is missing. No download.') + parser.add_argument('--debug', action='store_true', default=False, + help='Turn on debug output') + parser.add_argument('--workers', type=int, default=None, + help='Set maximum workers for parallel download (default: cores * 5)') + + args = parser.parse_args() + + if args.debug: + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) + else: + logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') + + downloader = NpmDownloader(args.registry, [args.npm_list, args.output_dir], workers=args.workers) + + if args.check: + log.info('Check mode. No download will be executed.') + log.info(downloader.check_table) + sys.exit(0) + + timer_start = timeit.default_timer() + try: + downloader.download() + except RuntimeError: + log.error('Error occurred.') + sys.exit(1) + finally: + log.info('Downloading finished in {}'.format( + datetime.timedelta(seconds=timeit.default_timer() - timer_start))) + + +if __name__ == '__main__': + run_cli() diff --git a/build/download/npm_packages.py b/build/download/npm_packages.py deleted file mode 100755 index 70c03ad8..00000000 --- a/build/download/npm_packages.py +++ /dev/null @@ -1,119 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- - -# COPYRIGHT NOTICE STARTS HERE - -# Copyright 2019 © Samsung Electronics Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# COPYRIGHT NOTICE ENDS HERE - -import argparse -import concurrent.futures -import hashlib -import logging -import os -import sys -from retrying import retry - -import base - -log = logging.getLogger(name=__name__) - - -@retry(stop_max_attempt_number=5, wait_fixed=5000) -def get_npm(registry, npm_name, npm_version): - npm_url = '{}/{}/{}'.format(registry, npm_name, npm_version) - npm_req = base.make_get_request(npm_url) - npm_json = npm_req.json() - tarball_url = npm_json['dist']['tarball'] - shasum = npm_json['dist']['shasum'] - tarball_req = base.make_get_request(tarball_url) - tarball = tarball_req.content - if hashlib.sha1(tarball).hexdigest() == shasum: - return tarball - else: - raise Exception('{}@{}: Wrong checksum. Retrying...'.format(npm_name, npm_version)) - - -def download_npm(npm, registry, dst_dir): - log.info('Downloading: {}'.format(npm)) - npm_name, npm_version = npm.split('@') - dst_path = '{}/{}-{}.tgz'.format(dst_dir, npm_name, npm_version) - try: - tarball = get_npm(registry, *npm.split('@')) - base.save_to_file(dst_path, tarball) - except Exception as err: - if os.path.isfile(dst_path): - os.remove(dst_path) - log.exception('Failed: {}'.format(npm)) - raise err - log.info('Downloaded: {}'.format(npm)) - - -def missing(npm_set, dst_dir): - return {npm for npm in npm_set - if not os.path.isfile('{}/{}-{}.tgz'.format(dst_dir, *npm.split('@')))} - - -def download(npm_list, registry, dst_dir, check_mode, progress=None, workers=None): - npm_set = base.load_list(npm_list) - target_count = len(npm_set) - missing_npms = missing(npm_set, dst_dir) - - if check_mode: - log.info(base.simple_check_table(npm_set, missing_npms)) - return 0 - - skipping = npm_set - missing_npms - - base.start_progress(progress, len(npm_set), skipping, log) - error_count = base.run_concurrent(workers, progress, download_npm, missing_npms, registry, dst_dir) - - base.finish_progress(progress, error_count, log) - if error_count > 0: - log.error('{} packages were not downloaded. Check log for specific failures.'.format(error_count)) - raise RuntimeError() - - -def run_cli(): - parser = argparse.ArgumentParser(description='Download npm packages from list') - parser.add_argument('npm_list', metavar='npm-list', - help='File with list of npm packages to download.') - parser.add_argument('--registry', '-r', default='https://registry.npmjs.org', - help='Download destination') - parser.add_argument('--output-dir', '-o', default=os.getcwd(), - help='Download destination') - parser.add_argument('--check', '-c', action='store_true', default=False, - help='Check what is missing. No download.') - parser.add_argument('--debug', action='store_true', default=False, - help='Turn on debug output') - parser.add_argument('--workers', type=int, default=None, - help='Set maximum workers for parallel download (default: cores * 5)') - - args = parser.parse_args() - - if args.debug: - logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) - else: - logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') - - progress = base.init_progress('npm packages') if not args.check else None - sys.exit(download(args.npm_list, args.registry, args.output_dir, args.check, progress, - args.workers)) - - -if __name__ == '__main__': - run_cli() - diff --git a/build/download/pypi_downloader.py b/build/download/pypi_downloader.py new file mode 100755 index 00000000..4ab6b1f4 --- /dev/null +++ b/build/download/pypi_downloader.py @@ -0,0 +1,101 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# COPYRIGHT NOTICE STARTS HERE + +# Copyright 2019 © Samsung Electronics Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# COPYRIGHT NOTICE ENDS HERE + +import argparse +import datetime +import logging +import os +import subprocess +import sys +import timeit + +from retrying import retry + +from command_downloader import CommandDownloader + +log = logging.getLogger(name=__name__) + + +class PyPiDownloader(CommandDownloader): + def __init__(self, *list_args): + super().__init__('pypi packages', 'pip', *list_args) + + @property + def check_table(self): + """ + Return check table for pypi packages + :return: '' not implemented + """ + log.warning('Check mode for pypi is not implemented.') + return '' + + def _is_missing(self, item): + """ + Check if item is missing + :param item: item to check + :return: True since don't know the actual filename + """ + # always true don't know the name + return True + + @retry(stop_max_attempt_number=5, wait_fixed=5000) + def _download_item(self, item): + """ + Download pip package using pip + :param item: tuple(package_name, dst_dir) (name possibly with version specification) + """ + package_name, dst_dir = item + command = 'pip download --dest {} {}'.format(dst_dir, package_name) + log.info('Running: {}'.format(command)) + log.info( + subprocess.check_output(command.split(), stderr=subprocess.STDOUT).decode()) + log.info('Downloaded: {}'.format(package_name)) + + +def run_cli(): + """ + Run as cli tool + """ + parser = argparse.ArgumentParser(description='Download git repositories from list') + parser.add_argument('pypi_list', metavar='pypi-list', + help='File with list of pypi packages to download.') + parser.add_argument('--output-dir', '-o', default=os.getcwd(), + help='Download destination') + + args = parser.parse_args() + + logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') + + downloader = PyPiDownloader([args.pypi_list, args.output_dir]) + + timer_start = timeit.default_timer() + try: + downloader.download() + except RuntimeError as err: + log.exception(err) + sys.exit(1) + finally: + log.info('Downloading finished in {}'.format( + datetime.timedelta(seconds=timeit.default_timer() - timer_start))) + + +if __name__ == '__main__': + run_cli() diff --git a/build/download/pypi_packages.py b/build/download/pypi_packages.py deleted file mode 100755 index 951003c5..00000000 --- a/build/download/pypi_packages.py +++ /dev/null @@ -1,88 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- - -# COPYRIGHT NOTICE STARTS HERE - -# Copyright 2019 © Samsung Electronics Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# COPYRIGHT NOTICE ENDS HERE - -import argparse -import logging -import sys -import subprocess -import os -from retrying import retry - -import base - -log = logging.getLogger(name=__name__) - -@retry(stop_max_attempt_number=5, wait_fixed=5000) -def download_package(package_name, dst_dir): - command = 'pip download --dest {} {}'.format(dst_dir, package_name) - log.info('Running: {}'.format(command)) - log.info(subprocess.check_output(command.split(), stderr=subprocess.STDOUT).decode()) - log.info('Downloaded: {}'.format(package_name)) - - -def download(pypi_list, dst_dir, progress): - if not base.check_tool('pip'): - log.error('ERROR: pip is not installed') - progress.finish(dirty=True) - raise RuntimeError('pip missing') - - pypi_set = base.load_list(pypi_list) - - error_count = 0 - - base.start_progress(progress, len(pypi_set), [], log) - - for package in pypi_set: - try: - download_package(package, dst_dir) - except subprocess.CalledProcessError as err: - log.exception(err.output.decode()) - error_count += 1 - - progress.update(progress.value + 1) - - base.finish_progress(progress, error_count, log) - if error_count > 0: - log.error('{} packages were not downloaded. Check logs for details'.format(error_count)) - raise RuntimeError('Download unsuccesfull') - - -def run_cli(): - parser = argparse.ArgumentParser(description='Download git repositories from list') - parser.add_argument('pypi_list', metavar='pypi-list', - help='File with list of pypi packages to download.') - parser.add_argument('--output-dir', '-o', default=os.getcwd(), - help='Download destination') - - args = parser.parse_args() - - logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') - - progress = base.init_progress('pypi packages') - try: - download(args.pypi_list, args.output_dir, progress) - except RuntimeError as err: - log.exception(err) - sys.exit(1) - - -if __name__ == '__main__': - run_cli() diff --git a/build/download/requirements.txt b/build/download/requirements.txt index 875f0c67..3eee2a2f 100644 --- a/build/download/requirements.txt +++ b/build/download/requirements.txt @@ -1,5 +1,3 @@ docker==3.7.2 -futures==3.2.0; python_version == '2.7' prettytable==0.7.2 -progressbar2==3.39.3 retrying==1.3.3 diff --git a/build/download/rpm_downloader.py b/build/download/rpm_downloader.py new file mode 100755 index 00000000..92ae6a78 --- /dev/null +++ b/build/download/rpm_downloader.py @@ -0,0 +1,130 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# COPYRIGHT NOTICE STARTS HERE + +# Copyright 2019 © Samsung Electronics Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# COPYRIGHT NOTICE ENDS HERE + +import argparse +import datetime +import logging +import os +import subprocess +import sys +import timeit +from collections import defaultdict + +from command_downloader import CommandDownloader + +log = logging.getLogger(name=__name__) + + +class RpmDownloader(CommandDownloader): + def __init__(self, *list_args): + super().__init__('rpm packages', 'yumdownloader', *list_args) + # beneficial to have it in same format + + @property + def check_table(self): + """ + Return check table for rpm packages + :return: '' not implemented + """ + log.warning('Check mode for rpms is not implemented.') + return '' + + @staticmethod + def _download_rpm_set(dst, rpms): + command = 'yumdownloader --destdir={} {}'.format(dst, ' '.join(rpms)) + log.info('Running command: {}'.format(command)) + log.info( + subprocess.check_output(command.split(), stderr=subprocess.STDOUT).decode()) + log.info('Downloaded: {}'.format(', '.join(sorted(rpms)))) + + def missing(self): + """ + Check for missing rpms (not downloaded) + :return: dictionary of missing items grouped by dst dir + """ + # we need slightly different format for yumdownloader + self._missing = defaultdict(set) + for item, dst in self._data_list.items(): + self._missing[dst].add(item) + return self._missing + + def _is_missing(self, item): + """ + Check if item is missing + :param item: item to check + :return: it is always missing because not sure about downloaded filename + """ + # don't know file names so always missing + return True + + def _initial_log(self): + """ + Simpler then in parent + """ + class_name = type(self).__name__ + log.info('{}: Initializing download {} {} are not present.'.format(class_name, len(self._data_list), + self._list_type)) + + def download(self): + """ + Download rpm packages from lists + """ + self._initial_log() + error_occurred = False + + for dst, rpm_set in self._missing.items(): + try: + self._download_rpm_set(dst, rpm_set) + except subprocess.CalledProcessError as err: + log.exception(err.output) + error_occurred = True + if error_occurred: + log.error('Download failed') + raise RuntimeError('Download unsuccessful') + + +def run_cli(): + """ + Run as cli tool + """ + parser = argparse.ArgumentParser(description='Download rpm packages from list') + parser.add_argument('rpm_list', metavar='rpm-list', + help='File with list of npm packages to download.') + parser.add_argument('--output-dir', '-o', default=os.getcwd(), + help='Download destination') + + args = parser.parse_args() + + logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') + + timer_start = timeit.default_timer() + try: + downloader = RpmDownloader([args.rpm_list, args.output_dir]) + downloader.download() + except RuntimeError: + sys.exit(1) + finally: + log.info('Downloading finished in {}'.format( + datetime.timedelta(seconds=timeit.default_timer() - timer_start))) + + +if __name__ == '__main__': + run_cli() diff --git a/build/download/rpm_packages.py b/build/download/rpm_packages.py deleted file mode 100755 index 732af0ea..00000000 --- a/build/download/rpm_packages.py +++ /dev/null @@ -1,69 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- - -# COPYRIGHT NOTICE STARTS HERE - -# Copyright 2019 © Samsung Electronics Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# COPYRIGHT NOTICE ENDS HERE - -import argparse -import subprocess -import logging -import sys -import os - -import base - -log = logging.getLogger(name=__name__) - - -def download(rpm_list, dst_dir): - if not base.check_tool('yumdownloader'): - log.error('ERROR: yumdownloader is not installed') - raise RuntimeError('yumdownloader missing') - - rpm_set = base.load_list(rpm_list) - - command = 'yumdownloader --destdir={} {}'.format(dst_dir, ' '.join(rpm_set)) - log.info('Running command: {}'.format(command)) - try: - subprocess.check_call(command.split()) - except subprocess.CalledProcessError as err: - log.exception(err.output) - raise err - log.info('Downloaded') - - -def run_cli(): - parser = argparse.ArgumentParser(description='Download rpm packages from list') - parser.add_argument('rpm_list', metavar='rpm-list', - help='File with list of npm packages to download.') - parser.add_argument('--output-dir', '-o', default=os.getcwd(), - help='Download destination') - - args = parser.parse_args() - - logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') - - try: - download(args.rpm_list, args.output_dir) - except (subprocess.CalledProcessError, RuntimeError): - sys.exit(1) - - - -if __name__ == '__main__': - run_cli() diff --git a/docs/BuildGuide.rst b/docs/BuildGuide.rst index 6b44f230..798da9f9 100644 --- a/docs/BuildGuide.rst +++ b/docs/BuildGuide.rst @@ -61,6 +61,9 @@ Subsequent steps are the same on both platforms: # install following packages yum install -y docker-ce-18.09.5 python-pip git createrepo expect nodejs npm jq + # install Python 3 (download scripts don't support Python 2 anymore) + yum install -y python36 + # twine package is needed by nexus blob build script pip install twine @@ -77,7 +80,7 @@ Then it is necessary to clone all installer and build related repositories and p cd onap-offline # install required pip packages for download scripts - pip install -r ./build/download/requirements.txt + pip3 install -r ./build/download/requirements.txt Part 2. Download artifacts for offline installer ------------------------------------------------ |