# ============LICENSE_START======================================================= # org.onap.vvp/image-scanner # =================================================================== # Copyright © 2017 AT&T Intellectual Property. All rights reserved. # =================================================================== # # Unless otherwise specified, all software contained herein is licensed # under the Apache License, Version 2.0 (the “License”); # you may not use this software except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # # Unless otherwise specified, all documentation contained herein is licensed # under the Creative Commons License, Attribution 4.0 Intl. (the “License”); # you may not use this documentation except in compliance with the License. # You may obtain a copy of the License at # # https://creativecommons.org/licenses/by/4.0/ # # Unless required by applicable law or agreed to in writing, documentation # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # ============LICENSE_END============================================ # # ECOMP is a trademark and service mark of AT&T Intellectual Property. # import os import re import hashlib import datetime from subprocess import run from xml.etree import ElementTree from celery import Celery import requests from . import config from .in_temp_dir import in_temp_dir from .regexdispatch import regexdispatch celery_app = Celery( broker='redis://redis', backend='redis://redis', ) # direct_re will match URLs pointing directly to an image to download, over # http and https connections, and will capture the hostname and filename in # named groups. This includes URLs to S3 and RadosGW endpoints. # image_re = re.compile(r'.*\.(?:img|iso|qcow2?)(?:\.gz)?$') SLACK_TOKEN = os.getenv('SLACK_TOKEN') DOMAIN = os.getenv('DOMAIN') def sha256(path): """Return the SHA256 checksum of the file at path""" h = hashlib.new('sha256') with open(path, 'rb') as fd: for chunk in iter((lambda: fd.read(4096)), b''): h.update(chunk) return h.hexdigest() @celery_app.task(queue='scans', ignore_result=True) @in_temp_dir() def request_scan(source, path, recipients=None, jenkins_job_name=None, checklist_uuid=None): """Retrieve and scan all partitions of (an) image(s), and notify of the results. source: A git URL referencing a repository containing one or more images, or an HTTP(S) URL referencing a single image. path: If source is a git url, this specifies a path within that repo to an image. If omitted, all images found in the repo will be scanned. If source is an http url, this is ignored. recipients: A list of places to deliver a notification when the image scan is complete. Currently, this may include Slack usernames and Slack channels. jenkins_job_name: The name of the jenkins job that should be built to process the scan results. checklist_uuid: The UUID of the checklist that should be passed to the jenkins job. This function assumes the current working directory is a safe workarea for retrieving and manipulating images, but is decorated with in_temp_dir which changes to a new temporary directory upon invocation. """ # TODO printing to a status file is archaic and messy; let's use the python # logging framework or storing status in redis instead. with config.STATUSFILE.open('w') as statusfile: print( "Processing request {source} {path} in {workspace}".format( source=source, path=path, workspace=os.getcwd()), file=statusfile, flush=True) for image in retrieve_images(source, path): print( "- Image file: {}...".format(image), file=statusfile, flush=True) if not os.path.exists(image): raise ValueError("Path not found: {}".format(image)) print("-- Checksumming...", file=statusfile, flush=True) checksum = sha256(image) print("-- Scanning...", file=statusfile, flush=True) logfile = config.LOGS_PATH / 'SecurityValidation-{}.txt'.format( checksum) # for partition in image_partitions(): # result = scan_partition(partition) with open(logfile, 'w') as fd: print(datetime.datetime.utcnow().ctime(), "UTC", file=fd) print("Launching image scan for {} from {} {}".format( image, source, path), file=fd) print("SHA256 checksum:", checksum, file=fd, flush=True) result = run( ['/usr/local/bin/imagescanner-image', image], stdout=fd, stderr=fd, ) if recipients: print( "-- Scheduling notification (exit code: {})..." .format(result.returncode), file=statusfile, flush=True) slack_notify.delay( status="Success" if result.returncode == 0 else "Failure", source=source, filename=image, checksum=checksum, recipients=recipients, ) elif checklist_uuid and jenkins_job_name: print( "-- Triggering Jenkins job {} for checklist {}" .format(jenkins_job_name, checklist_uuid), file=statusfile, flush=True) jenkins_notify.delay( jenkins_job_name, status=result.returncode, checksum=checksum, checklist_uuid=checklist_uuid, ) else: print( "-- Skipping notification (exit code was: {})." .format(result.returncode), file=statusfile, flush=True) print("-- Done.", file=statusfile, flush=True) print("- All images processed.", file=statusfile, flush=True) @regexdispatch def retrieve_images(source, path): """Generate the filenames of one or multiple disk images as they are retrieved from _source_. Source may be one of several types of source, so we dispatch to an appropriate function to deal with it: - a git url to a repo containing disk images - a normal https url directly to a single disk image - an https url directly to a single disk image in a radosgw (s3) bucket - an https url to a radosgw (s3) bucket containing disk images See the docstring for request_scan for documentation of the source and path arguments. This function assumes the current working directory is a safe workarea for retrieving and manipulating images. """ raise ValueError("Unknown source type %s" % source) @retrieve_images.register(r'.*\.git$') def _ri_git(source, path, **kwargs): run(['/usr/bin/git', 'clone', '--depth', '1', '--single-branch', '--recursive', source, 'repo/'], env={"GIT_SSH_COMMAND": " ".join([ "ssh", "-i /root/.ssh/id_ed25519", "-o StrictHostKeyChecking=no"])}, check=True, ) if path: yield os.path.join("repo", path) return for root, dirs, files in os.walk('repo'): for name in files: if image_re.match(name): yield os.path.join(root, name) # FIXME this regex won't properly detect URLs with query-strings. @retrieve_images.register(r'''(?x) # this is a "verbose" regex https?:// # match an http or https url (?P # capture the hostname: [^/:]* # anything up to the first / or : ) .* # any number of path components /(?P # capture the filename after the last / [^/]* # anything not a / \.(?:img|iso|qcow2?) # with one of these three extensions (?:\.gz)? # optionally also compressed )$''') def _ri_direct(source, path=None, hostname=None, filename=None, **kwargs): auth = config.AUTHS.get(hostname) with open(filename, 'wb') as fd: r = requests.get(source, stream=True, auth=auth) for chunk in r.iter_content(chunk_size=4096): fd.write(chunk) yield filename @retrieve_images.register(r'''(?x) # this is a "verbose" regex https?:// # match an http or https url (?P # capture the hostname: [^/:]* # anything up to the first / or : ) .* # any number of path components /$ # ending with a slash ''') def _ri_bucket(source, path=None, hostname=None, filename=None, **kwargs): """We assume that an HTTP(s) URL ending in / is a radosgw bucket.""" auth = config.AUTHS.get(hostname) # We could request ?format=json but the output is malformed; all but one # filename is truncated. response = requests.get(source, {'format': 'xml'}, auth=auth) keys = ElementTree.fromstring(response.text).iter( '{http://s3.amazonaws.com/doc/2006-03-01/}Key') filenames = [x.text for x in keys] for filename in filenames: if image_re.match(filename): yield from retrieve_images(source + filename) @celery_app.task(ignore_result=True) def slack_notify(status, source, filename, checksum, recipients): if not SLACK_TOKEN: print("No Slack token defined; skipping notification.") return if not recipients: print("No recipients specified; skipping notification.") return # TODO replace this handrolled code with a nice slack client library link = "http://{}/imagescanner/result/{}".format(DOMAIN, checksum) if filename.startswith('repo/'): filename = filename[5:] payload = { "username": "Disk Image Scanning Robot", "icon_emoji": ":robot_face:", "attachments": [{ "fallback": "Image scan log: {}".format(link), "pretext": "Disk image scan completed", "color": "#00ff00" if status.lower() == 'success' else "#ff0000", "title": "Scan {} for {}".format(status, filename), "title_link": link, "fields": [{"title": t, "value": v, "short": s} for t, v, s in [ ("Source", source, True), ("Filename", filename, True), ("Checksum", checksum, False), ]] }] } for recipient in recipients: requests.post( "https://hooks.slack.com/services/%s" % SLACK_TOKEN, json=dict(payload, channel=recipient), ) @celery_app.task(ignore_result=True) def jenkins_notify(name, status, checksum, checklist_uuid): # The frontend does not need the jenkins library, so we perform the import # it from within the worker task. from jenkins import Jenkins server = Jenkins(**config.JENKINS) logurl = "http://{}/imagescanner/result/{}".format(DOMAIN, checksum) server.build_job(name, { "checklist_uuid": checklist_uuid, "status": status, "logurl": logurl, })