summaryrefslogtreecommitdiffstats
path: root/imagescanner/imagescanner/tasks.py
diff options
context:
space:
mode:
Diffstat (limited to 'imagescanner/imagescanner/tasks.py')
-rw-r--r--imagescanner/imagescanner/tasks.py236
1 files changed, 236 insertions, 0 deletions
diff --git a/imagescanner/imagescanner/tasks.py b/imagescanner/imagescanner/tasks.py
new file mode 100644
index 0000000..3610373
--- /dev/null
+++ b/imagescanner/imagescanner/tasks.py
@@ -0,0 +1,236 @@
+# ============LICENSE_START=======================================================
+# org.onap.vvp/image-scanner
+# ===================================================================
+# Copyright © 2017 AT&T Intellectual Property. All rights reserved.
+# ===================================================================
+#
+# Unless otherwise specified, all software contained herein is licensed
+# under the Apache License, Version 2.0 (the “License”);
+# you may not use this software except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+#
+# Unless otherwise specified, all documentation contained herein is licensed
+# under the Creative Commons License, Attribution 4.0 Intl. (the “License”);
+# you may not use this documentation except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://creativecommons.org/licenses/by/4.0/
+#
+# Unless required by applicable law or agreed to in writing, documentation
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ============LICENSE_END============================================
+#
+# ECOMP is a trademark and service mark of AT&T Intellectual Property.
+#
+
+import os
+import re
+import hashlib
+import datetime
+from subprocess import run
+from celery import Celery
+import requests
+from . import STATUSFILE, LOGS_PATH
+from .in_temp_dir import in_temp_dir
+
+
+celery_app = Celery(
+ broker='redis://redis',
+ backend='redis://redis',
+ )
+repo_re = re.compile(r'.*\.git$')
+direct_re = re.compile(r'http.*\.(?:img|iso|qcow2)(?:\.gz)?$')
+image_re = re.compile(r'.*\.(?:img|iso|qcow2)(?:\.gz)?$')
+SLACK_TOKEN = os.getenv('SLACK_TOKEN')
+DOMAIN = os.getenv('DOMAIN')
+
+
+def sha256(path):
+ """Return the SHA256 checksum of the file at path"""
+ h = hashlib.new('sha256')
+ with open(path, 'rb') as fd:
+ for chunk in iter((lambda: fd.read(4096)), b''):
+ h.update(chunk)
+ return h.hexdigest()
+
+
+@celery_app.task(queue='scans', ignore_result=True)
+@in_temp_dir()
+def request_scan(source, path, recipients):
+ """Retrieve and scan all partitions of (an) image(s), and notify of the
+ results.
+
+ source:
+ A git URL referencing a repository containing one or more images, or an
+ HTTP(S) URL referencing a single image.
+
+ path:
+ If source is a git url, this specifies a path within that repo to an
+ image. If omitted, all images found in the repo will be scanned.
+ If source is an http url, this is ignored.
+
+ recipients:
+ A list of places to deliver a notification when the image scan is
+ complete. Currently, this may include Slack usernames and Slack
+ channels.
+
+ This function assumes the current working directory is a safe workarea for
+ retrieving and manipulating images, but is decorated with in_temp_dir which
+ changes to a new temporary directory upon invocation.
+
+ """
+
+ # TODO printing to a status file is archaic and messy; let's use the python
+ # logging framework or storing status in redis instead.
+ with STATUSFILE.open('w') as statusfile:
+
+ print(
+ "Processing request {source} {path} in {workspace}".format(
+ source=source, path=path, workspace=os.getcwd()),
+ file=statusfile,
+ flush=True)
+
+ for image in retrieve_images(source, path):
+ print("- Image file: {}...".format(image),
+ file=statusfile, flush=True)
+ if not os.path.exists(image):
+ raise ValueError("Path not found: {}".format(image))
+
+ print("-- Checksumming...", file=statusfile, flush=True)
+ checksum = sha256(image)
+
+ print("-- Scanning...",
+ file=statusfile, flush=True)
+ logfile = LOGS_PATH / 'SecurityValidation-{}.txt'.format(checksum)
+
+ #for partition in image_partitions():
+ # result = scan_partition(partition)
+ with open(logfile, 'w') as fd:
+ print(datetime.datetime.utcnow().ctime(), "UTC", file=fd)
+ print("Launching image scan for {} from {} {}".format(
+ image, source, path), file=fd)
+ print("SHA256 checksum:", checksum, file=fd, flush=True)
+ result = run(
+ ['/usr/local/bin/imagescanner-image', image],
+ stdout=fd,
+ stderr=fd,
+ )
+
+ print("-- Scheduling notification (exit code:{})...".format(result.returncode),
+ file=statusfile, flush=True)
+
+ slack_notify.delay(
+ status="Success" if result.returncode == 0 else "Failure",
+ source=source,
+ filename=image,
+ checksum=checksum,
+ recipients=recipients,
+ )
+
+ print("-- Done.", file=statusfile, flush=True)
+
+ print("- All images processed.", file=statusfile, flush=True)
+
+def retrieve_images(source, path):
+ """Generate the filenames of one or multiple disk images as they are
+ retrieved from _source_.
+
+ See the docstring for request_scan for documentation of the source and path
+ arguments.
+
+ This function assumes the current working directory is a safe workarea for
+ retrieving and manipulating images.
+
+ """
+ if repo_re.match(source):
+ return retrieve_images_git(source, path)
+ elif direct_re.match(source):
+ return retrieve_image_direct(source)
+ else:
+ raise ValueError("Unknown source format {}".format(source))
+
+
+def retrieve_images_git(source, path):
+ run(['/usr/bin/git', 'clone',
+ '--depth', '1',
+ '--single-branch',
+ '--recursive',
+ source,
+ 'repo/'],
+ env={"GIT_SSH_COMMAND": " ".join([
+ "ssh",
+ "-i /root/.ssh/id_ed25519",
+ "-o StrictHostKeyChecking=no"])},
+ check=True,
+ )
+
+ if path:
+ yield os.path.join("repo", path)
+ return
+
+ for root, dirs, files in os.walk('repo'):
+ for name in files:
+ if image_re.match(name):
+ yield os.path.join(root, name)
+
+
+def retrieve_image_direct(source):
+ filename = re.search(r'[^/]*$', source).group(0)
+ with open(filename, 'wb') as fd:
+ r = requests.get(source, stream=True)
+ for chunk in r.iter_content(chunk_size=4096):
+ fd.write(chunk)
+ yield filename
+
+
+# FIXME the slack notification should go into a different queue than the image
+# requests so they don't get blocked by the scans.
+@celery_app.task(ignore_result=True)
+def slack_notify(status, source, filename, checksum, recipients):
+ if not SLACK_TOKEN:
+ print("No Slack token defined; skipping notification.")
+ return
+
+ # TODO replace this handrolled code with a nice slack client library
+
+ link = "http://{}/imagescanner/result/{}".format(DOMAIN, checksum)
+
+ if filename.startswith('repo/'):
+ filename = filename[5:]
+
+ payload = {
+ "username": "Disk Image Scanning Robot",
+ "icon_emoji": ":robot_face:",
+ "attachments": [{
+ "fallback": "Image scan log: {}".format(link),
+ "pretext": "Disk image scan completed",
+ "color": "#00ff00" if status.lower() == 'success' else "#ff0000",
+ "title": "Scan {} for {}".format(status, filename),
+ "title_link": link,
+ "fields": [{"title": t, "value": v, "short": s} for t, v, s in [
+ ("Source", source, True),
+ ("Filename", filename, True),
+ ("Checksum", checksum, False),
+ ]]
+ }]
+ }
+
+ for recipient in recipients:
+ requests.post(
+ "https://hooks.slack.com/services/%s" % SLACK_TOKEN,
+ json=dict(payload, channel=recipient),
+ )