summaryrefslogtreecommitdiffstats
path: root/build
diff options
context:
space:
mode:
authorMilan Verespej <m.verespej@partner.samsung.com>2019-04-18 14:37:51 +0200
committerMilan Verespej <m.verespej@partner.samsung.com>2019-05-24 10:26:50 +0200
commit8786632a6a9cf3be20e7b1d6a047751fa26fcf8b (patch)
treefb94be2d71b97bdbb3acbff309f8d3c63568f4ab /build
parentb1fe78553ea7d1caf90c1d52e195cd8f2a082008 (diff)
Improve docker image download script
This is part of the series of changes that will improve download scripts. Issue-ID: OOM-1803 Change-Id: I6606762f8e05c25132257e5612997418a9d6d701 Signed-off-by: Milan Verespej <m.verespej@partner.samsung.com>
Diffstat (limited to 'build')
-rw-r--r--build/download/__init__.py22
-rw-r--r--build/download/base.py83
-rwxr-xr-xbuild/download/docker_images.py268
-rw-r--r--build/download/requirements.txt5
4 files changed, 378 insertions, 0 deletions
diff --git a/build/download/__init__.py b/build/download/__init__.py
new file mode 100644
index 00000000..f0efbc15
--- /dev/null
+++ b/build/download/__init__.py
@@ -0,0 +1,22 @@
+
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# COPYRIGHT NOTICE STARTS HERE
+
+# Copyright 2019 © Samsung Electronics Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# COPYRIGHT NOTICE ENDS HERE
+
diff --git a/build/download/base.py b/build/download/base.py
new file mode 100644
index 00000000..fcf60242
--- /dev/null
+++ b/build/download/base.py
@@ -0,0 +1,83 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# COPYRIGHT NOTICE STARTS HERE
+
+# Copyright 2019 © Samsung Electronics Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# COPYRIGHT NOTICE ENDS HERE
+
+
+import progressbar
+import concurrent.futures
+
+progressbar.streams.wrap_stdout()
+progressbar.streams.wrap_stderr()
+
+
+def load_list(item_list):
+ """
+ Parse list with items to be downloaded.
+ :param item_list: File with list of items (1 line per item)
+ :return: set of items from file
+ """
+ with open(item_list, 'r') as f:
+ return {item for item in (line.strip() for line in f) if item}
+
+
+def init_progress(items_name):
+ progress_widgets = ['Downloading {}: '.format(items_name),
+ progressbar.Bar(), ' ',
+ progressbar.Percentage(), ' ',
+ '(', progressbar.SimpleProgress(), ')']
+
+ progress = progressbar.ProgressBar(widgets=progress_widgets,
+ poll_rate=1.0,
+ redirect_stdout=True)
+ return progress
+
+
+def start_progress(progress, target_count, skipping, log):
+ log_skipping(skipping, log)
+ log.info("Initializing download. Takes a while.")
+
+ progress.max_value = target_count
+ progress.start()
+ progress.update(len(skipping))
+
+
+def log_skipping(skipping_iterable, logger):
+ for skipped in skipping_iterable:
+ logger.info('Skipping: {}'.format(skipped))
+
+
+def run_concurrent(workers, progress, fn, iterable, *args):
+ with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
+ futures = [executor.submit(fn, item, *args) for item in iterable]
+ error_count = 0
+ for future in concurrent.futures.as_completed(futures):
+ error = future.exception()
+ if error:
+ error_count += 1
+ progress.update()
+ else:
+ progress.update(progress.value +1)
+ return error_count
+
+
+def finish_progress(progress, error_count, log):
+ progress.finish(dirty=error_count > 0)
+ log.info('Download ended. Elapsed time {}'.format(progress.data()['time_elapsed']))
+
diff --git a/build/download/docker_images.py b/build/download/docker_images.py
new file mode 100755
index 00000000..e4e742b3
--- /dev/null
+++ b/build/download/docker_images.py
@@ -0,0 +1,268 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# COPYRIGHT NOTICE STARTS HERE
+
+# Copyright 2019 © Samsung Electronics Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# COPYRIGHT NOTICE ENDS HERE
+
+
+import argparse
+import concurrent.futures
+import docker
+import itertools
+import json
+import logging
+import os
+import prettytable
+import sys
+import threading
+from retrying import retry
+
+import base
+
+log = logging.getLogger(__name__)
+
+
+def image_filename(image_name):
+ """
+ Get a name of a file where image will be saved.
+ :param image_name: Name of the image from list
+ :return: Filename of the image
+ """
+ return '{}.tar'.format(image_name.replace(':', '_').replace('/', '_'))
+
+
+def image_registry_name(image_name):
+ """
+ Get the name as shown in local registry. Since some strings are not part of name
+ when using default registry e.g. docker.io
+ :param image_name: name of the image from the list
+ :return: name of the image as it is shown by docker
+ """
+ name = image_name
+
+ if name.startswith('docker.io/'):
+ name = name.replace('docker.io/', '')
+
+ if name.startswith('library/'):
+ name = name.replace('library/', '')
+
+ if ':' not in name.rsplit('/')[-1]:
+ name = '{}:latest'.format(name)
+
+ return name
+
+
+def not_pulled_images(docker_client, target_list):
+ """
+ Get set of images that are not pulled on local system.
+ :param docker_client: docker.client.DockerClient
+ :param target_list: list of images to look for
+ :return: (set) images that are not present on local system
+ """
+ pulled = set(itertools.chain.from_iterable((image.tags for image
+ in docker_client.images.list())))
+ return {image for image in target_list if image_registry_name(image) not in pulled}
+
+
+def not_saved(target_images, target_dir):
+ """
+ Get set of images that are not saved in target directory
+ :param target_images: List of images to check for
+ :param target_dir: Directory where those images should be
+ :return: (set) Images that are missing from target directory
+ """
+ return set(image for image in target_images
+ if not os.path.isfile('/'.join((target_dir, image_filename(image)))))
+
+
+def missing(docker_client, target_list, save, target_dir):
+ """
+ Get dictionary of images not present locally.
+ :param docker_client: docker.client.DockerClient for communication with docker
+ :param target_list: list of desired images
+ :param save: (boolean) check for saved images
+ :param target_dir: target directory for saved images
+ :return: Dictionary of missing images ('not_pulled', 'not_saved')
+ """
+ return {'not_pulled': not_pulled_images(docker_client, target_list),
+ 'not_saved': not_saved(target_list, target_dir) if save else set()}
+
+
+def merge_dict_sets(dictionary):
+ return set.union(*dictionary.values())
+
+
+def check_table(check_list, missing, save):
+ table = prettytable.PrettyTable(['Image', 'Pulled', 'Saved'])
+ table.align['Image'] = 'l'
+ for image in sorted(check_list):
+ pulled = not image in missing['not_pulled']
+ download_state = [pulled]
+ if save:
+ # if not pulled save anyway
+ download_state.append(pulled and not image in missing['not_saved'])
+ else:
+ download_state.append('Not checked')
+ table.add_row([image] + download_state)
+ return table
+
+
+@retry(stop_max_attempt_number=5, wait_fixed=5000)
+def pull_image(docker_client, image_name):
+ """
+ Pull docker image.
+ :param docker_client: docker.client.DockerClient for communication with docker
+ :param image_name: name of the image to be pulled
+ :return: pulled image (image object)
+ :raises docker.errors.APIError: after unsuccessful retries
+ """
+ if ':' not in image_name.rsplit('/')[-1]:
+ image_name = '{}:latest'.format(image_name)
+ try:
+ image = docker_client.images.pull(image_name)
+ log.info('Image {} pulled'.format(image_name))
+ return image
+ except docker.errors.APIError as err:
+ log.warning('Failed: {}: {}. Retrying...'.format(image_name, err))
+ raise err
+
+
+def save_image(image_name, image, output_dir, docker_client=None):
+ """
+ Save image to tar.
+ :param output_dir: path to destination directory
+ :param image: image object from pull_image function
+ :param image_name: name of the image from list
+ :param docker_client: docker.client.DockerClient for communication with docker
+ :return: None
+ """
+ dst = '{}/{}'.format(output_dir, image_filename(image_name))
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ if not isinstance(image, docker.models.images.Image):
+ image = docker_client.images.get(image_name)
+ try:
+ with open(dst, 'wb') as f:
+ for chunk in image.save(named=image_registry_name(image_name)):
+ f.write(chunk)
+ log.info('Image {} saved as {}'.format(image_name, dst))
+ except Exception as err:
+ os.remove(dst)
+ raise err
+
+
+def download_docker_image(image, save, output_dir, docker_client):
+ """ Pull and save docker image from specified docker registry
+ :param docker_client: docker.client.DockerClient for communication with docker
+ :param image: image to be downloaded
+ :param save: boolean - save image to disk or skip saving
+ :param output_dir: directory where image will be saved
+ :return: None
+ """
+ log.info('Downloading image: {}'.format(image))
+ try:
+ pulled_image = pull_image(docker_client, image)
+ if save:
+ save_image(image, pulled_image, output_dir)
+ except Exception as err:
+ log.error('Error downloading {}: {}'.format(image, err))
+ raise err
+
+
+def download(image_list, save, output_dir, check_mode, progress, workers=3):
+ """
+ Download images from list
+ :param image_list: list of images to be downloaded
+ :param save: whether images should be saved to disk
+ :param output_dir: directory where images will be saved
+ :param check_mode: only check for missing images. No download
+ :param progress_bar: progressbar.ProgressBar to show how far download is
+ :return: None
+ """
+ try:
+ docker_client = docker.client.DockerClient(version='auto')
+ except docker.errors.DockerException as err:
+ log.error(err)
+ log.error('Error creating docker client. Check if is docker installed and running'
+ ' or if you have right permissions.')
+ raise err
+
+ target_images = base.load_list(image_list)
+ missing_images = missing(docker_client, target_images, save, output_dir)
+
+ if check_mode:
+ log.info(check_table(target_images, missing_images, save))
+ return
+
+ skipping = target_images - merge_dict_sets(missing_images)
+
+ base.start_progress(progress, len(target_images), skipping, log)
+
+ # if pulling and save is True. Save every pulled image to assure parity
+ error_count = base.run_concurrent(workers, progress, download_docker_image, missing_images['not_pulled'],
+ save, output_dir, docker_client)
+ # only save those that are pulled already but not saved
+ error_count += base.run_concurrent(workers, progress, save_image,
+ missing_images['not_saved'] - missing_images['not_pulled'],
+ None, output_dir, docker_client)
+
+ if error_count > 0:
+ log.error('{} images were not downloaded'.format(error_count))
+ missing_images = missing(docker_client, target_images, save, output_dir)
+ log.info(check_table(merge_dict_sets(missing_images), missing_images, save))
+
+ base.finish_progress(progress, error_count, log)
+
+ return error_count
+
+
+def run_cli():
+ parser = argparse.ArgumentParser(description='Download docker images from list')
+ parser.add_argument('image_list', metavar='image-list',
+ help='File with list of images to download.')
+ parser.add_argument('--save', '-s', action='store_true', default=False,
+ help='Save images (without it only pull is executed)')
+ parser.add_argument('--output-dir', '-o', default=os.getcwd(),
+ help='Download destination')
+ parser.add_argument('--check', '-c', action='store_true', default=False,
+ help='Check what is missing. No download.'
+ 'Use with combination with -s to check saved images as well.')
+ parser.add_argument('--debug', action='store_true', default=False,
+ help='Turn on debug output')
+ parser.add_argument('--workers', type=int, default=3,
+ help='Set maximum workers for parallel download (default: 3)')
+
+ args = parser.parse_args()
+
+ if args.debug:
+ logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
+ else:
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
+
+ progress = base.init_progress('Docker images') if not args.check else None
+ try:
+ sys.exit(download(args.image_list, args.save, args.output_dir, args.check,
+ progress, args.workers))
+ except docker.errors.DockerException:
+ log.error('Irrecoverable error detected.')
+ sys.exit(1)
+
+
+if __name__ == '__main__':
+ run_cli()
+
diff --git a/build/download/requirements.txt b/build/download/requirements.txt
new file mode 100644
index 00000000..875f0c67
--- /dev/null
+++ b/build/download/requirements.txt
@@ -0,0 +1,5 @@
+docker==3.7.2
+futures==3.2.0; python_version == '2.7'
+prettytable==0.7.2
+progressbar2==3.39.3
+retrying==1.3.3