summaryrefslogtreecommitdiffstats
path: root/build/download/http_files.py
blob: c83158d608212e2e1b96de7cd54fa7048bb17faf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   COPYRIGHT NOTICE STARTS HERE

#   Copyright 2019 © Samsung Electronics Co., Ltd.
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

#   COPYRIGHT NOTICE ENDS HERE


import argparse
import concurrent.futures
import logging
import os
import sys
from retrying import retry

import base

log = logging.getLogger(__name__)

@retry(stop_max_attempt_number=5, wait_fixed=2000)
def get_file(file_uri):
    """
    Get file from the Internet
    :param file_uri: address of file
    :return: byte content of file
    """
    if not file_uri.startswith('http'):
        file_uri = 'http://' + file_uri
    file_req = base.make_get_request(file_uri)
    return file_req.content


def download_file(file_uri, dst_dir):
    """
    Download http file and save it to file.
    :param file_uri: http address of file
    :param dst_dir: directory where file will be saved
    """
    log.info('Downloading: {}'.format(file_uri))
    dst_path = '{}/{}'.format(dst_dir, file_uri.rsplit('//')[-1])
    try:
        file_content = get_file(file_uri)
        base.save_to_file(dst_path, file_content)
    except Exception as err:
        if os.path.isfile(dst_path):
            os.remove(dst_path)
        log.error('Error downloading: {}: {}'.format(file_uri, err))
        raise err
    log.info('Downloaded: {}'.format(file_uri))


def missing(file_set, dst_dir):
    return {file for file in file_set if not os.path.isfile('{}/{}'.format(dst_dir, file))}


def download(data_list, dst_dir, check, progress, workers=None):
    """
    Download files specified in data list
    :param data_list: path to file with list
    :param dst_dir: destination directory
    :param check: boolean check mode
    :param progress: progressbar.ProgressBar to monitor progress
    :param workers: workers to use for parallel execution
    :return: 0 if success else number of errors
    """
    file_set = base.load_list(data_list)
    missing_files = missing(file_set, dst_dir)
    target_count = len(file_set)

    if check:
        log.info(base.simple_check_table(file_set, missing_files))
        return

    skipping = file_set - missing_files

    base.start_progress(progress, len(file_set), skipping, log)

    error_count = base.run_concurrent(workers, progress, download_file, missing_files, dst_dir)

    base.finish_progress(progress, error_count, log)
    if error_count > 0:
        log.error('{} files were not downloaded. Check log for specific failures.'.format(error_count))
        raise RuntimeError()


def run_cli():
    """
    Run as cli tool
    """
    parser = argparse.ArgumentParser(description='Download http files from list')
    parser.add_argument('file_list', metavar='file-list',
                        help='File with list of http files to download')
    parser.add_argument('--output-dir', '-o', default=os.getcwd(),
                        help='Destination directory for saving')
    parser.add_argument('--check', '-c', action='store_true', default=False,
                        help='Check mode')
    parser.add_argument('--debug', action='store_true', default=False,
                        help='Turn on debug output')
    parser.add_argument('--workers', type=int, default=None,
                        help='Set maximum workers for parallel download (default: cores * 5)')

    args = parser.parse_args()

    if args.debug:
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
    else:
        logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')

    progress = base.init_progress('http files') if not args.check else None

    try:
        download(args.file_list, args.output_dir, args.check, progress, args.workers)
    except RuntimeError:
        sys.exit(1)


if __name__ == '__main__':
    run_cli()