From da9e6b9c5a0a06d30078e44051f18ee3ba02968c Mon Sep 17 00:00:00 2001 From: Milan Verespej Date: Tue, 18 Jun 2019 15:09:41 +0200 Subject: Refactor and improve docker image downloading Issue-ID: OOM-1803 Change-Id: I4e648d49835faa60165725d1ca4ec22ba1e3e12b Signed-off-by: Milan Verespej --- build/download/docker_downloader.py | 242 ++++++++++++++++++++++++++++++++++++ build/download/downloader.py | 5 +- 2 files changed, 246 insertions(+), 1 deletion(-) create mode 100755 build/download/docker_downloader.py diff --git a/build/download/docker_downloader.py b/build/download/docker_downloader.py new file mode 100755 index 00000000..13323d3b --- /dev/null +++ b/build/download/docker_downloader.py @@ -0,0 +1,242 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# COPYRIGHT NOTICE STARTS HERE + +# Copyright 2019 © Samsung Electronics Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# COPYRIGHT NOTICE ENDS HERE + +import argparse +import datetime +import itertools +import logging +import os +import sys +import timeit + +import docker +from retrying import retry + +from concurrent_downloader import ConcurrentDownloader + +log = logging.getLogger(__name__) + + +class DockerDownloader(ConcurrentDownloader): + def __init__(self, save, *list_args, workers=3): + self._save = save + try: + # big timeout in case of massive images like pnda-mirror-container:5.0.0 (11.4GB) + self._docker_client = docker.client.DockerClient(version='auto', timeout=300) + except docker.errors.DockerException as err: + log.exception( + 'Error creating docker client. Check if is docker installed and running' + ' or if you have right permissions.') + raise err + self._pulled_images = set(itertools.chain.from_iterable((image.tags for image + in self._docker_client.images.list()))) + list_args = ([*x, None] if len(x) < 2 else x for x in list_args) + super().__init__('docker images', *list_args, workers=workers) + + @staticmethod + def image_registry_name(image_name): + """ + Get the name as shown in local registry. Since some strings are not part of name + when using default registry e.g. docker.io + :param image_name: name of the image from the list + :return: name of the image as it is shown by docker + """ + name = image_name + + if name.startswith('docker.io/'): + name = name.replace('docker.io/', '') + + if name.startswith('library/'): + name = name.replace('library/', '') + + if ':' not in name.rsplit('/')[-1]: + name = '{}:latest'.format(name) + + return name + + @property + def check_table(self): + """ + Table showing information of which images are pulled/saved + """ + self.missing() + return self._table(self._data_list) + + @property + def fail_table(self): + """ + Table showing information about state of download of images + that encountered problems while downloading + """ + return self._table(self.missing()) + + @staticmethod + def _image_filename(image_name): + """ + Get a name of a file where image will be saved. + :param image_name: Name of the image from list + :return: Filename of the image + """ + return '{}.tar'.format(image_name.replace(':', '_').replace('/', '_')) + + def _table(self, images): + """ + Get table in format for images + :param images: images to put into table + :return: check table format with specified images + """ + header = ['Name', 'Pulled', 'Saved'] + data = [] + for item in images: + if item not in self._missing: + data.append((item, True, True if self._save else 'N/A')) + else: + data.append((item, self._missing[item]['pulled'], self._missing[item]['saved'])) + return self._check_table(header, {'Name': 'l'}, data) + + def _is_pulled(self, image): + return self.image_registry_name(image) in self._pulled_images + + def _is_saved(self, image): + dst = '{}/{}'.format(self._data_list[image], self._image_filename(image)) + return os.path.isfile(dst) + + def _is_missing(self, item): + """ + Missing docker images are checked slightly differently. + """ + pass + + def missing(self): + """ + Get dictionary of images not present locally. + """ + missing = dict() + for image, dst in self._data_list.items(): + pulled = self._is_pulled(image) + if self._save: + # if pulling and save is True. Save every pulled image to assure parity + saved = False if not pulled else self._is_saved(image) + else: + saved = 'N/A' + if not pulled or not saved: + missing[image] = {'dst': dst, 'pulled': pulled, 'saved': saved} + self._missing = missing + return self._missing + + @retry(stop_max_attempt_number=5, wait_fixed=5000) + def _pull_image(self, image_name): + """ + Pull docker image. + :param image_name: name of the image to be pulled + :return: pulled image (image object) + :raises docker.errors.APIError: after unsuccessful retries + """ + if ':' not in image_name.rsplit('/')[-1]: + image_name = '{}:latest'.format(image_name) + try: + image = self._docker_client.images.pull(image_name) + log.info('Image {} pulled'.format(image_name)) + return image + except docker.errors.APIError as err: + log.warning('Failed: {}: {}. Retrying...'.format(image_name, err)) + raise err + + def _save_image(self, image_name, image, output_dir): + """ + Save image to tar. + :param output_dir: path to destination directory + :param image: image object from pull_image function + :param image_name: name of the image from list + """ + dst = '{}/{}'.format(output_dir, self._image_filename(image_name)) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + try: + with open(dst, 'wb') as f: + for chunk in image.save(named=self.image_registry_name(image_name)): + f.write(chunk) + log.info('Image {} saved as {}'.format(image_name, dst)) + except Exception as err: + if os.path.isfile(dst): + os.remove(dst) + raise err + + def _download_item(self, image): + """ Pull and save docker image from specified docker registry + :param image: image to be downloaded + """ + image_name, image_dict = image + log.info('Downloading image: {}'.format(image_name)) + try: + if image_dict['pulled']: + image_to_save = self._docker_client.images.get(image_name) + else: + image_to_save = self._pull_image(image_name) + if self._save: + self._save_image(image_name, image_to_save, image_dict['dst']) + except Exception as err: + log.exception('Error downloading {}: {}'.format(image_name, err)) + raise err + + +def run_cli(): + parser = argparse.ArgumentParser(description='Download docker images from list') + parser.add_argument('image_list', metavar='image-list', + help='File with list of images to download.') + parser.add_argument('--save', '-s', action='store_true', default=False, + help='Save images (without it only pull is executed)') + parser.add_argument('--output-dir', '-o', default=os.getcwd(), + help='Download destination') + parser.add_argument('--check', '-c', action='store_true', default=False, + help='Check what is missing. No download.' + 'Use with combination with -s to check saved images as well.') + parser.add_argument('--debug', action='store_true', default=False, + help='Turn on debug output') + parser.add_argument('--workers', type=int, default=3, + help='Set maximum workers for parallel download (default: 3)') + + args = parser.parse_args() + + if args.debug: + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) + else: + logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') + + downloader = DockerDownloader(args.save, [args.file_list, args.output_dir], workers=args.workers) + + if args.check: + log.info('Check mode. No download will be executed.') + log.info(downloader.check_table) + sys.exit(0) + + timer_start = timeit.default_timer() + try: + downloader.download() + except RuntimeError: + sys.exit(1) + finally: + log.info('Downloading finished in {}'.format( + datetime.timedelta(seconds=timeit.default_timer() - timer_start))) + + +if __name__ == '__main__': + run_cli() diff --git a/build/download/downloader.py b/build/download/downloader.py index b8e9ed50..64403300 100644 --- a/build/download/downloader.py +++ b/build/download/downloader.py @@ -101,7 +101,10 @@ class AbstractDownloader(ABC): """ for item in self._merged_lists(): if item not in self._missing: - log.info('File or directory present: {}'.format(item)) + if type(self).__name__ == 'DockerDownloader': + log.info('Docker image present: {}'.format(item)) + else: + log.info('File or directory present: {}'.format(item)) def _merged_lists(self): """ -- cgit 1.2.3-korg