diff options
author | Pavel Aharoni <pa0916@att.com> | 2017-03-29 13:35:45 +0300 |
---|---|---|
committer | Pavel Aharoni <pa0916@att.com> | 2017-03-29 13:35:45 +0300 |
commit | e2cc2530fc6d54ebc975c01a4ff887ce12f0a736 (patch) | |
tree | 38385867295c8a09fb0d7f8eaf5fa78179e5b13a /jython-tosca-parser/src/main/resources/Lib/site-packages/pip/download.py | |
parent | bccebaa9888906f8ff78172f62ec592956066d82 (diff) |
[SDC-6] sdc-distribution-client 1707 rebasing
Change-Id: I322a05fd79beb6ba4fee4d32afffecf531b86e98
Signed-off-by: Pavel Aharoni <pa0916@att.com>
Diffstat (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/pip/download.py')
-rw-r--r-- | jython-tosca-parser/src/main/resources/Lib/site-packages/pip/download.py | 729 |
1 files changed, 729 insertions, 0 deletions
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/download.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/download.py new file mode 100644 index 0000000..4c23f5e --- /dev/null +++ b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/download.py @@ -0,0 +1,729 @@ +import cgi +import email.utils +import hashlib +import getpass +import mimetypes +import os +import platform +import re +import shutil +import sys +import tempfile + +import pip + +from pip.backwardcompat import urllib, urlparse, raw_input +from pip.exceptions import InstallationError, HashMismatch +from pip.util import (splitext, rmtree, format_size, display_path, + backup_dir, ask_path_exists, unpack_file, + create_download_cache_folder, cache_download) +from pip.vcs import vcs +from pip.log import logger +from pip._vendor import requests, six +from pip._vendor.requests.adapters import BaseAdapter +from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth +from pip._vendor.requests.compat import IncompleteRead +from pip._vendor.requests.exceptions import InvalidURL, ChunkedEncodingError +from pip._vendor.requests.models import Response +from pip._vendor.requests.structures import CaseInsensitiveDict + +__all__ = ['get_file_content', + 'is_url', 'url_to_path', 'path_to_url', + 'is_archive_file', 'unpack_vcs_link', + 'unpack_file_url', 'is_vcs_url', 'is_file_url', 'unpack_http_url'] + + +def user_agent(): + """Return a string representing the user agent.""" + _implementation = platform.python_implementation() + + if _implementation == 'CPython': + _implementation_version = platform.python_version() + elif _implementation == 'PyPy': + _implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, + sys.pypy_version_info.minor, + sys.pypy_version_info.micro) + if sys.pypy_version_info.releaselevel != 'final': + _implementation_version = ''.join([ + _implementation_version, + sys.pypy_version_info.releaselevel, + ]) + elif _implementation == 'Jython': + _implementation_version = platform.python_version() # Complete Guess + elif _implementation == 'IronPython': + _implementation_version = platform.python_version() # Complete Guess + else: + _implementation_version = 'Unknown' + + try: + p_system = platform.system() + p_release = platform.release() + except IOError: + p_system = 'Unknown' + p_release = 'Unknown' + + return " ".join(['pip/%s' % pip.__version__, + '%s/%s' % (_implementation, _implementation_version), + '%s/%s' % (p_system, p_release)]) + + +class MultiDomainBasicAuth(AuthBase): + + def __init__(self, prompting=True): + self.prompting = prompting + self.passwords = {} + + def __call__(self, req): + parsed = urlparse.urlparse(req.url) + + # Get the netloc without any embedded credentials + netloc = parsed.netloc.split("@", 1)[-1] + + # Set the url of the request to the url without any credentials + req.url = urlparse.urlunparse(parsed[:1] + (netloc,) + parsed[2:]) + + # Use any stored credentials that we have for this netloc + username, password = self.passwords.get(netloc, (None, None)) + + # Extract credentials embedded in the url if we have none stored + if username is None: + username, password = self.parse_credentials(parsed.netloc) + + if username or password: + # Store the username and password + self.passwords[netloc] = (username, password) + + # Send the basic auth with this request + req = HTTPBasicAuth(username or "", password or "")(req) + + # Attach a hook to handle 401 responses + req.register_hook("response", self.handle_401) + + return req + + def handle_401(self, resp, **kwargs): + # We only care about 401 responses, anything else we want to just + # pass through the actual response + if resp.status_code != 401: + return resp + + # We are not able to prompt the user so simple return the response + if not self.prompting: + return resp + + parsed = urlparse.urlparse(resp.url) + + # Prompt the user for a new username and password + username = raw_input("User for %s: " % parsed.netloc) + password = getpass.getpass("Password: ") + + # Store the new username and password to use for future requests + if username or password: + self.passwords[parsed.netloc] = (username, password) + + # Consume content and release the original connection to allow our new + # request to reuse the same one. + resp.content + resp.raw.release_conn() + + # Add our new username and password to the request + req = HTTPBasicAuth(username or "", password or "")(resp.request) + + # Send our new request + new_resp = resp.connection.send(req, **kwargs) + new_resp.history.append(resp) + + return new_resp + + def parse_credentials(self, netloc): + if "@" in netloc: + userinfo = netloc.rsplit("@", 1)[0] + if ":" in userinfo: + return userinfo.split(":", 1) + return userinfo, None + return None, None + + +class LocalFSResponse(object): + + def __init__(self, fileobj): + self.fileobj = fileobj + + def __getattr__(self, name): + return getattr(self.fileobj, name) + + def read(self, amt=None, decode_content=None, cache_content=False): + return self.fileobj.read(amt) + + # Insert Hacks to Make Cookie Jar work w/ Requests + @property + def _original_response(self): + class FakeMessage(object): + def getheaders(self, header): + return [] + + def get_all(self, header, default): + return [] + + class FakeResponse(object): + @property + def msg(self): + return FakeMessage() + + return FakeResponse() + + +class LocalFSAdapter(BaseAdapter): + + def send(self, request, stream=None, timeout=None, verify=None, cert=None, + proxies=None): + parsed_url = urlparse.urlparse(request.url) + + # We only work for requests with a host of localhost + if parsed_url.netloc.lower() != "localhost": + raise InvalidURL( + "Invalid URL %r: Only localhost is allowed" % + request.url + ) + + real_url = urlparse.urlunparse(parsed_url[:1] + ("",) + parsed_url[2:]) + pathname = url_to_path(real_url) + + resp = Response() + resp.status_code = 200 + resp.url = real_url + + stats = os.stat(pathname) + modified = email.utils.formatdate(stats.st_mtime, usegmt=True) + resp.headers = CaseInsensitiveDict({ + "Content-Type": mimetypes.guess_type(pathname)[0] or "text/plain", + "Content-Length": stats.st_size, + "Last-Modified": modified, + }) + + resp.raw = LocalFSResponse(open(pathname, "rb")) + resp.close = resp.raw.close + + return resp + + def close(self): + pass + + +class PipSession(requests.Session): + + timeout = None + + def __init__(self, *args, **kwargs): + retries = kwargs.pop('retries', None) + + super(PipSession, self).__init__(*args, **kwargs) + + # Attach our User Agent to the request + self.headers["User-Agent"] = user_agent() + + # Attach our Authentication handler to the session + self.auth = MultiDomainBasicAuth() + + # Configure retries + if retries: + http_adapter = requests.adapters.HTTPAdapter(max_retries=retries) + self.mount("http://", http_adapter) + self.mount("https://", http_adapter) + + # Enable file:// urls + self.mount("file://", LocalFSAdapter()) + + def request(self, method, url, *args, **kwargs): + # Make file:// urls not fail due to lack of a hostname + parsed = urlparse.urlparse(url) + if parsed.scheme == "file": + url = urlparse.urlunparse(parsed[:1] + ("localhost",) + parsed[2:]) + + # Allow setting a default timeout on a session + kwargs.setdefault("timeout", self.timeout) + + # Dispatch the actual request + return super(PipSession, self).request(method, url, *args, **kwargs) + + +def get_file_content(url, comes_from=None, session=None): + """Gets the content of a file; it may be a filename, file: URL, or + http: URL. Returns (location, content). Content is unicode.""" + if session is None: + session = PipSession() + + match = _scheme_re.search(url) + if match: + scheme = match.group(1).lower() + if (scheme == 'file' and comes_from + and comes_from.startswith('http')): + raise InstallationError( + 'Requirements file %s references URL %s, which is local' + % (comes_from, url)) + if scheme == 'file': + path = url.split(':', 1)[1] + path = path.replace('\\', '/') + match = _url_slash_drive_re.match(path) + if match: + path = match.group(1) + ':' + path.split('|', 1)[1] + path = urllib.unquote(path) + if path.startswith('/'): + path = '/' + path.lstrip('/') + url = path + else: + # FIXME: catch some errors + resp = session.get(url) + resp.raise_for_status() + + if six.PY3: + return resp.url, resp.text + else: + return resp.url, resp.content + try: + f = open(url) + content = f.read() + except IOError as exc: + raise InstallationError( + 'Could not open requirements file: %s' % str(exc) + ) + else: + f.close() + return url, content + + +_scheme_re = re.compile(r'^(http|https|file):', re.I) +_url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I) + + +def is_url(name): + """Returns true if the name looks like a URL""" + if ':' not in name: + return False + scheme = name.split(':', 1)[0].lower() + return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes + + +def url_to_path(url): + """ + Convert a file: URL to a path. + """ + assert url.startswith('file:'), ( + "You can only turn file: urls into filenames (not %r)" % url) + path = url[len('file:'):].lstrip('/') + path = urllib.unquote(path) + if _url_drive_re.match(path): + path = path[0] + ':' + path[2:] + else: + path = '/' + path + return path + + +_drive_re = re.compile('^([a-z]):', re.I) +_url_drive_re = re.compile('^([a-z])[:|]', re.I) + + +def path_to_url(path): + """ + Convert a path to a file: URL. The path will be made absolute and have + quoted path parts. + """ + path = os.path.normpath(os.path.abspath(path)) + drive, path = os.path.splitdrive(path) + filepath = path.split(os.path.sep) + url = '/'.join([urllib.quote(part) for part in filepath]) + if not drive: + url = url.lstrip('/') + return 'file:///' + drive + url + + +def is_archive_file(name): + """Return True if `name` is a considered as an archive file.""" + archives = ( + '.zip', '.tar.gz', '.tar.bz2', '.tgz', '.tar', '.pybundle', '.whl' + ) + ext = splitext(name)[1].lower() + if ext in archives: + return True + return False + + +def unpack_vcs_link(link, location, only_download=False): + vcs_backend = _get_used_vcs_backend(link) + if only_download: + vcs_backend.export(location) + else: + vcs_backend.unpack(location) + + +def _get_used_vcs_backend(link): + for backend in vcs.backends: + if link.scheme in backend.schemes: + vcs_backend = backend(link.url) + return vcs_backend + + +def is_vcs_url(link): + return bool(_get_used_vcs_backend(link)) + + +def is_file_url(link): + return link.url.lower().startswith('file:') + + +def _check_hash(download_hash, link): + if download_hash.digest_size != hashlib.new(link.hash_name).digest_size: + logger.fatal( + "Hash digest size of the package %d (%s) doesn't match the " + "expected hash name %s!" % + (download_hash.digest_size, link, link.hash_name) + ) + raise HashMismatch('Hash name mismatch for package %s' % link) + if download_hash.hexdigest() != link.hash: + logger.fatal( + "Hash of the package %s (%s) doesn't match the expected hash %s!" % + (link, download_hash.hexdigest(), link.hash) + ) + raise HashMismatch( + 'Bad %s hash for package %s' % (link.hash_name, link) + ) + + +def _get_hash_from_file(target_file, link): + try: + download_hash = hashlib.new(link.hash_name) + except (ValueError, TypeError): + logger.warn( + "Unsupported hash name %s for package %s" % (link.hash_name, link) + ) + return None + + fp = open(target_file, 'rb') + while True: + chunk = fp.read(4096) + if not chunk: + break + download_hash.update(chunk) + fp.close() + return download_hash + + +def _download_url(resp, link, temp_location): + fp = open(temp_location, 'wb') + download_hash = None + if link.hash and link.hash_name: + try: + download_hash = hashlib.new(link.hash_name) + except ValueError: + logger.warn( + "Unsupported hash name %s for package %s" % + (link.hash_name, link) + ) + try: + total_length = int(resp.headers['content-length']) + except (ValueError, KeyError, TypeError): + total_length = 0 + downloaded = 0 + show_progress = total_length > 40 * 1000 or not total_length + show_url = link.show_url + try: + if show_progress: + # FIXME: the URL can get really long in this message: + if total_length: + logger.start_progress( + 'Downloading %s (%s): ' % + (show_url, format_size(total_length)) + ) + else: + logger.start_progress( + 'Downloading %s (unknown size): ' % show_url + ) + else: + logger.notify('Downloading %s' % show_url) + logger.info('Downloading from URL %s' % link) + + def resp_read(chunk_size): + try: + # Special case for urllib3. + try: + for chunk in resp.raw.stream( + chunk_size, + # We use decode_content=False here because we do + # want urllib3 to mess with the raw bytes we get + # from the server. If we decompress inside of + # urllib3 then we cannot verify the checksum + # because the checksum will be of the compressed + # file. This breakage will only occur if the + # server adds a Content-Encoding header, which + # depends on how the server was configured: + # - Some servers will notice that the file isn't a + # compressible file and will leave the file alone + # and with an empty Content-Encoding + # - Some servers will notice that the file is + # already compressed and will leave the file + # alone and will add a Content-Encoding: gzip + # header + # - Some servers won't notice anything at all and + # will take a file that's already been compressed + # and compress it again and set the + # Content-Encoding: gzip header + # + # By setting this not to decode automatically we + # hope to eliminate problems with the second case. + decode_content=False): + yield chunk + except IncompleteRead as e: + raise ChunkedEncodingError(e) + except AttributeError: + # Standard file-like object. + while True: + chunk = resp.raw.read(chunk_size) + if not chunk: + break + yield chunk + + for chunk in resp_read(4096): + downloaded += len(chunk) + if show_progress: + if not total_length: + logger.show_progress('%s' % format_size(downloaded)) + else: + logger.show_progress( + '%3i%% %s' % + ( + 100 * downloaded / total_length, + format_size(downloaded) + ) + ) + if download_hash is not None: + download_hash.update(chunk) + fp.write(chunk) + fp.close() + finally: + if show_progress: + logger.end_progress('%s downloaded' % format_size(downloaded)) + return download_hash + + +def _copy_file(filename, location, content_type, link): + copy = True + download_location = os.path.join(location, link.filename) + if os.path.exists(download_location): + response = ask_path_exists( + 'The file %s exists. (i)gnore, (w)ipe, (b)ackup ' % + display_path(download_location), ('i', 'w', 'b')) + if response == 'i': + copy = False + elif response == 'w': + logger.warn('Deleting %s' % display_path(download_location)) + os.remove(download_location) + elif response == 'b': + dest_file = backup_dir(download_location) + logger.warn( + 'Backing up %s to %s' % + (display_path(download_location), display_path(dest_file)) + ) + shutil.move(download_location, dest_file) + if copy: + shutil.copy(filename, download_location) + logger.notify('Saved %s' % display_path(download_location)) + + +def unpack_http_url(link, location, download_cache, download_dir=None, + session=None): + if session is None: + session = PipSession() + + temp_dir = tempfile.mkdtemp('-unpack', 'pip-') + temp_location = None + target_url = link.url.split('#', 1)[0] + already_cached = False + cache_file = None + cache_content_type_file = None + download_hash = None + + # If a download cache is specified, is the file cached there? + if download_cache: + cache_file = os.path.join( + download_cache, + urllib.quote(target_url, '') + ) + cache_content_type_file = cache_file + '.content-type' + already_cached = ( + os.path.exists(cache_file) and + os.path.exists(cache_content_type_file) + ) + if not os.path.isdir(download_cache): + create_download_cache_folder(download_cache) + + # If a download dir is specified, is the file already downloaded there? + already_downloaded = None + if download_dir: + already_downloaded = os.path.join(download_dir, link.filename) + if not os.path.exists(already_downloaded): + already_downloaded = None + + # If already downloaded, does its hash match? + if already_downloaded: + temp_location = already_downloaded + content_type = mimetypes.guess_type(already_downloaded)[0] + logger.notify('File was already downloaded %s' % already_downloaded) + if link.hash: + download_hash = _get_hash_from_file(temp_location, link) + try: + _check_hash(download_hash, link) + except HashMismatch: + logger.warn( + 'Previously-downloaded file %s has bad hash, ' + 're-downloading.' % temp_location + ) + temp_location = None + os.unlink(already_downloaded) + already_downloaded = None + + # If not a valid download, let's confirm the cached file is valid + if already_cached and not temp_location: + with open(cache_content_type_file) as fp: + content_type = fp.read().strip() + temp_location = cache_file + logger.notify('Using download cache from %s' % cache_file) + if link.hash and link.hash_name: + download_hash = _get_hash_from_file(cache_file, link) + try: + _check_hash(download_hash, link) + except HashMismatch: + logger.warn( + 'Cached file %s has bad hash, ' + 're-downloading.' % temp_location + ) + temp_location = None + os.unlink(cache_file) + os.unlink(cache_content_type_file) + already_cached = False + + # We don't have either a cached or a downloaded copy + # let's download to a tmp dir + if not temp_location: + try: + resp = session.get( + target_url, + # We use Accept-Encoding: identity here because requests + # defaults to accepting compressed responses. This breaks in + # a variety of ways depending on how the server is configured. + # - Some servers will notice that the file isn't a compressible + # file and will leave the file alone and with an empty + # Content-Encoding + # - Some servers will notice that the file is already + # compressed and will leave the file alone and will add a + # Content-Encoding: gzip header + # - Some servers won't notice anything at all and will take + # a file that's already been compressed and compress it again + # and set the Content-Encoding: gzip header + # By setting this to request only the identity encoding We're + # hoping to eliminate the third case. Hopefully there does not + # exist a server which when given a file will notice it is + # already compressed and that you're not asking for a + # compressed file and will then decompress it before sending + # because if that's the case I don't think it'll ever be + # possible to make this work. + headers={"Accept-Encoding": "identity"}, + stream=True, + ) + resp.raise_for_status() + except requests.HTTPError as exc: + logger.fatal("HTTP error %s while getting %s" % + (exc.response.status_code, link)) + raise + + content_type = resp.headers.get('content-type', '') + filename = link.filename # fallback + # Have a look at the Content-Disposition header for a better guess + content_disposition = resp.headers.get('content-disposition') + if content_disposition: + type, params = cgi.parse_header(content_disposition) + # We use ``or`` here because we don't want to use an "empty" value + # from the filename param. + filename = params.get('filename') or filename + ext = splitext(filename)[1] + if not ext: + ext = mimetypes.guess_extension(content_type) + if ext: + filename += ext + if not ext and link.url != resp.url: + ext = os.path.splitext(resp.url)[1] + if ext: + filename += ext + temp_location = os.path.join(temp_dir, filename) + download_hash = _download_url(resp, link, temp_location) + if link.hash and link.hash_name: + _check_hash(download_hash, link) + + # a download dir is specified; let's copy the archive there + if download_dir and not already_downloaded: + _copy_file(temp_location, download_dir, content_type, link) + + # unpack the archive to the build dir location. even when only downloading + # archives, they have to be unpacked to parse dependencies + unpack_file(temp_location, location, content_type, link) + + # if using a download cache, cache it, if needed + if cache_file and not already_cached: + cache_download(cache_file, temp_location, content_type) + + if not (already_cached or already_downloaded): + os.unlink(temp_location) + + os.rmdir(temp_dir) + + +def unpack_file_url(link, location, download_dir=None): + + link_path = url_to_path(link.url_without_fragment) + already_downloaded = False + + # If it's a url to a local directory + if os.path.isdir(link_path): + if os.path.isdir(location): + rmtree(location) + shutil.copytree(link_path, location, symlinks=True) + return + + # if link has a hash, let's confirm it matches + if link.hash: + link_path_hash = _get_hash_from_file(link_path, link) + _check_hash(link_path_hash, link) + + # If a download dir is specified, is the file already there and valid? + if download_dir: + download_path = os.path.join(download_dir, link.filename) + if os.path.exists(download_path): + content_type = mimetypes.guess_type(download_path)[0] + logger.notify('File was already downloaded %s' % download_path) + if link.hash: + download_hash = _get_hash_from_file(download_path, link) + try: + _check_hash(download_hash, link) + already_downloaded = True + except HashMismatch: + logger.warn( + 'Previously-downloaded file %s has bad hash, ' + 're-downloading.' % link_path + ) + os.unlink(download_path) + else: + already_downloaded = True + + if already_downloaded: + from_path = download_path + else: + from_path = link_path + + content_type = mimetypes.guess_type(from_path)[0] + + # unpack the archive to the build dir location. even when only downloading + # archives, they have to be unpacked to parse dependencies + unpack_file(from_path, location, content_type, link) + + # a download dir is specified and not already downloaded + if download_dir and not already_downloaded: + _copy_file(from_path, download_dir, content_type, link) |