diff options
Diffstat (limited to 'imagescanner/imagescanner/tasks.py')
-rw-r--r-- | imagescanner/imagescanner/tasks.py | 236 |
1 files changed, 236 insertions, 0 deletions
diff --git a/imagescanner/imagescanner/tasks.py b/imagescanner/imagescanner/tasks.py new file mode 100644 index 0000000..3610373 --- /dev/null +++ b/imagescanner/imagescanner/tasks.py @@ -0,0 +1,236 @@ +# ============LICENSE_START======================================================= +# org.onap.vvp/image-scanner +# =================================================================== +# Copyright © 2017 AT&T Intellectual Property. All rights reserved. +# =================================================================== +# +# Unless otherwise specified, all software contained herein is licensed +# under the Apache License, Version 2.0 (the “License”); +# you may not use this software except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# +# Unless otherwise specified, all documentation contained herein is licensed +# under the Creative Commons License, Attribution 4.0 Intl. (the “License”); +# you may not use this documentation except in compliance with the License. +# You may obtain a copy of the License at +# +# https://creativecommons.org/licenses/by/4.0/ +# +# Unless required by applicable law or agreed to in writing, documentation +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============LICENSE_END============================================ +# +# ECOMP is a trademark and service mark of AT&T Intellectual Property. +# + +import os +import re +import hashlib +import datetime +from subprocess import run +from celery import Celery +import requests +from . import STATUSFILE, LOGS_PATH +from .in_temp_dir import in_temp_dir + + +celery_app = Celery( + broker='redis://redis', + backend='redis://redis', + ) +repo_re = re.compile(r'.*\.git$') +direct_re = re.compile(r'http.*\.(?:img|iso|qcow2)(?:\.gz)?$') +image_re = re.compile(r'.*\.(?:img|iso|qcow2)(?:\.gz)?$') +SLACK_TOKEN = os.getenv('SLACK_TOKEN') +DOMAIN = os.getenv('DOMAIN') + + +def sha256(path): + """Return the SHA256 checksum of the file at path""" + h = hashlib.new('sha256') + with open(path, 'rb') as fd: + for chunk in iter((lambda: fd.read(4096)), b''): + h.update(chunk) + return h.hexdigest() + + +@celery_app.task(queue='scans', ignore_result=True) +@in_temp_dir() +def request_scan(source, path, recipients): + """Retrieve and scan all partitions of (an) image(s), and notify of the + results. + + source: + A git URL referencing a repository containing one or more images, or an + HTTP(S) URL referencing a single image. + + path: + If source is a git url, this specifies a path within that repo to an + image. If omitted, all images found in the repo will be scanned. + If source is an http url, this is ignored. + + recipients: + A list of places to deliver a notification when the image scan is + complete. Currently, this may include Slack usernames and Slack + channels. + + This function assumes the current working directory is a safe workarea for + retrieving and manipulating images, but is decorated with in_temp_dir which + changes to a new temporary directory upon invocation. + + """ + + # TODO printing to a status file is archaic and messy; let's use the python + # logging framework or storing status in redis instead. + with STATUSFILE.open('w') as statusfile: + + print( + "Processing request {source} {path} in {workspace}".format( + source=source, path=path, workspace=os.getcwd()), + file=statusfile, + flush=True) + + for image in retrieve_images(source, path): + print("- Image file: {}...".format(image), + file=statusfile, flush=True) + if not os.path.exists(image): + raise ValueError("Path not found: {}".format(image)) + + print("-- Checksumming...", file=statusfile, flush=True) + checksum = sha256(image) + + print("-- Scanning...", + file=statusfile, flush=True) + logfile = LOGS_PATH / 'SecurityValidation-{}.txt'.format(checksum) + + #for partition in image_partitions(): + # result = scan_partition(partition) + with open(logfile, 'w') as fd: + print(datetime.datetime.utcnow().ctime(), "UTC", file=fd) + print("Launching image scan for {} from {} {}".format( + image, source, path), file=fd) + print("SHA256 checksum:", checksum, file=fd, flush=True) + result = run( + ['/usr/local/bin/imagescanner-image', image], + stdout=fd, + stderr=fd, + ) + + print("-- Scheduling notification (exit code:{})...".format(result.returncode), + file=statusfile, flush=True) + + slack_notify.delay( + status="Success" if result.returncode == 0 else "Failure", + source=source, + filename=image, + checksum=checksum, + recipients=recipients, + ) + + print("-- Done.", file=statusfile, flush=True) + + print("- All images processed.", file=statusfile, flush=True) + +def retrieve_images(source, path): + """Generate the filenames of one or multiple disk images as they are + retrieved from _source_. + + See the docstring for request_scan for documentation of the source and path + arguments. + + This function assumes the current working directory is a safe workarea for + retrieving and manipulating images. + + """ + if repo_re.match(source): + return retrieve_images_git(source, path) + elif direct_re.match(source): + return retrieve_image_direct(source) + else: + raise ValueError("Unknown source format {}".format(source)) + + +def retrieve_images_git(source, path): + run(['/usr/bin/git', 'clone', + '--depth', '1', + '--single-branch', + '--recursive', + source, + 'repo/'], + env={"GIT_SSH_COMMAND": " ".join([ + "ssh", + "-i /root/.ssh/id_ed25519", + "-o StrictHostKeyChecking=no"])}, + check=True, + ) + + if path: + yield os.path.join("repo", path) + return + + for root, dirs, files in os.walk('repo'): + for name in files: + if image_re.match(name): + yield os.path.join(root, name) + + +def retrieve_image_direct(source): + filename = re.search(r'[^/]*$', source).group(0) + with open(filename, 'wb') as fd: + r = requests.get(source, stream=True) + for chunk in r.iter_content(chunk_size=4096): + fd.write(chunk) + yield filename + + +# FIXME the slack notification should go into a different queue than the image +# requests so they don't get blocked by the scans. +@celery_app.task(ignore_result=True) +def slack_notify(status, source, filename, checksum, recipients): + if not SLACK_TOKEN: + print("No Slack token defined; skipping notification.") + return + + # TODO replace this handrolled code with a nice slack client library + + link = "http://{}/imagescanner/result/{}".format(DOMAIN, checksum) + + if filename.startswith('repo/'): + filename = filename[5:] + + payload = { + "username": "Disk Image Scanning Robot", + "icon_emoji": ":robot_face:", + "attachments": [{ + "fallback": "Image scan log: {}".format(link), + "pretext": "Disk image scan completed", + "color": "#00ff00" if status.lower() == 'success' else "#ff0000", + "title": "Scan {} for {}".format(status, filename), + "title_link": link, + "fields": [{"title": t, "value": v, "short": s} for t, v, s in [ + ("Source", source, True), + ("Filename", filename, True), + ("Checksum", checksum, False), + ]] + }] + } + + for recipient in recipients: + requests.post( + "https://hooks.slack.com/services/%s" % SLACK_TOKEN, + json=dict(payload, channel=recipient), + ) |