summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ansible/application.yml12
-rwxr-xr-xansible/group_vars/infrastructure.yml2
-rw-r--r--ansible/kube_prometheus.yml9
-rw-r--r--ansible/nfs.yml12
-rw-r--r--ansible/roles/k8s-persistent-volume/.yamllint12
-rw-r--r--ansible/roles/k8s-persistent-volume/defaults/main.yml10
-rw-r--r--ansible/roles/k8s-persistent-volume/molecule/default/converge.yml12
-rw-r--r--ansible/roles/k8s-persistent-volume/molecule/default/molecule.yml23
-rw-r--r--ansible/roles/k8s-persistent-volume/molecule/default/vars.yml2
-rw-r--r--ansible/roles/k8s-persistent-volume/tasks/main.yml19
-rw-r--r--ansible/roles/k8s-persistent-volume/templates/pv.yaml.j215
-rw-r--r--ansible/roles/kube-prometheus-stack/.yamllint12
-rw-r--r--ansible/roles/kube-prometheus-stack/defaults/main.yml49
-rw-r--r--ansible/roles/kube-prometheus-stack/files/grafana_dashboard.json663
-rw-r--r--ansible/roles/kube-prometheus-stack/molecule/default/Dockerfile.j214
-rw-r--r--ansible/roles/kube-prometheus-stack/molecule/default/converge.yml10
-rw-r--r--ansible/roles/kube-prometheus-stack/molecule/default/molecule.yml28
-rw-r--r--ansible/roles/kube-prometheus-stack/molecule/default/prepare.yml10
-rw-r--r--ansible/roles/kube-prometheus-stack/tasks/main.yml27
-rw-r--r--ansible/site.yml4
-rw-r--r--ansible/test/roles/prepare-kube-prometheus-stack/tasks/main.yml11
-rwxr-xr-xbuild/build_nexus_blob.sh3
-rw-r--r--build/data_lists/kube_prometheus_stack_docker_images.list10
-rw-r--r--build/data_lists/kube_prometheus_stack_http.list1
-rw-r--r--build/data_lists/onap_rpm.list1
-rwxr-xr-xbuild/package.py3
-rw-r--r--docs/BuildGuide.rst6
-rw-r--r--docs/InstallGuide.rst77
-rw-r--r--docs/images/grafana-dashboards.pngbin0 -> 84655 bytes
-rw-r--r--docs/images/grafana-signin.pngbin0 -> 139666 bytes
-rwxr-xr-xtools/helm-healer.sh8
31 files changed, 1044 insertions, 21 deletions
diff --git a/ansible/application.yml b/ansible/application.yml
index 02c654f2..6647f3b3 100644
--- a/ansible/application.yml
+++ b/ansible/application.yml
@@ -1,16 +1,4 @@
---
-- name: Setup nfs server
- hosts: nfs-server
- roles:
- - role: nfs
- when: groups.kubernetes | length > 1
-
-- name: Setup nfs mounts
- hosts: kubernetes:!nfs-server
- roles:
- - role: nfs
- when: groups.kubernetes | length > 1
-
- name: Install Helm application {{ app_name }} into offline Kubernetes cluster
hosts: infrastructure
roles:
diff --git a/ansible/group_vars/infrastructure.yml b/ansible/group_vars/infrastructure.yml
index b351697b..69b89587 100755
--- a/ansible/group_vars/infrastructure.yml
+++ b/ansible/group_vars/infrastructure.yml
@@ -27,3 +27,5 @@ nexus3_image: sonatype/nexus3:3.15.2
dns_server_image: andyshinn/dnsmasq:2.76
nginx_server_image: own_nginx:2.0.0
chartmuseum_server_image: chartmuseum/chartmuseum
+kube_prometheus_stack_enabled: false
+kube_prometheus_stack_version: 16.1.2
diff --git a/ansible/kube_prometheus.yml b/ansible/kube_prometheus.yml
new file mode 100644
index 00000000..d30004b9
--- /dev/null
+++ b/ansible/kube_prometheus.yml
@@ -0,0 +1,9 @@
+---
+- name: Deploy kube-prometheus stack
+ hosts: infrastructure
+ pre_tasks:
+ - name: Include "kubernetes" group variables # k8s-persistent-volume role needs 'nfs_mount_path'
+ include_vars: group_vars/kubernetes.yml
+ roles:
+ - k8s-persistent-volume
+ - kube-prometheus-stack
diff --git a/ansible/nfs.yml b/ansible/nfs.yml
new file mode 100644
index 00000000..380d29b1
--- /dev/null
+++ b/ansible/nfs.yml
@@ -0,0 +1,12 @@
+---
+- name: Setup nfs server
+ hosts: nfs-server
+ roles:
+ - role: nfs
+ when: groups.kubernetes | length > 1
+
+- name: Setup nfs mounts
+ hosts: kubernetes:!nfs-server
+ roles:
+ - role: nfs
+ when: groups.kubernetes | length > 1
diff --git a/ansible/roles/k8s-persistent-volume/.yamllint b/ansible/roles/k8s-persistent-volume/.yamllint
new file mode 100644
index 00000000..c5ae64be
--- /dev/null
+++ b/ansible/roles/k8s-persistent-volume/.yamllint
@@ -0,0 +1,12 @@
+---
+extends: default
+
+rules:
+ braces:
+ max-spaces-inside: 1
+ level: error
+ brackets:
+ max-spaces-inside: 1
+ level: error
+ line-length: disable
+ truthy: disable
diff --git a/ansible/roles/k8s-persistent-volume/defaults/main.yml b/ansible/roles/k8s-persistent-volume/defaults/main.yml
new file mode 100644
index 00000000..de387323
--- /dev/null
+++ b/ansible/roles/k8s-persistent-volume/defaults/main.yml
@@ -0,0 +1,10 @@
+---
+k8s_volumes:
+ - name: kube-prometheus-prometheus
+ capacity: "6Gi"
+ path_prefix: "{{ nfs_mount_path }}/kube-prometheus"
+ owner: 1000 # derived from prometheus.prometheusSpec.securityContext.runAsUser
+ group: 2000 # derived from prometheus.prometheusSpec.securityContext.fsGroup
+ - name: kube-prometheus-grafana
+ capacity: "4Gi"
+ path_prefix: "{{ nfs_mount_path }}/kube-prometheus"
diff --git a/ansible/roles/k8s-persistent-volume/molecule/default/converge.yml b/ansible/roles/k8s-persistent-volume/molecule/default/converge.yml
new file mode 100644
index 00000000..4e15b3c8
--- /dev/null
+++ b/ansible/roles/k8s-persistent-volume/molecule/default/converge.yml
@@ -0,0 +1,12 @@
+---
+- name: Converge
+ hosts: all
+ pre_tasks:
+ - name: Include kubernetes group variables
+ include_vars: ../../../../group_vars/kubernetes.yml
+ - name: Include test scenario variables
+ include_vars: vars.yml
+ tasks:
+ - name: "Include k8s-persistent-volume"
+ include_role:
+ name: "k8s-persistent-volume"
diff --git a/ansible/roles/k8s-persistent-volume/molecule/default/molecule.yml b/ansible/roles/k8s-persistent-volume/molecule/default/molecule.yml
new file mode 100644
index 00000000..dbbf0d81
--- /dev/null
+++ b/ansible/roles/k8s-persistent-volume/molecule/default/molecule.yml
@@ -0,0 +1,23 @@
+---
+dependency:
+ name: galaxy
+driver:
+ name: docker
+lint: |
+ set -e
+ yamllint .
+ ansible-lint .
+ flake8
+platforms:
+ - name: infrastructure-k8s-persistent-volume
+ image: molecule-${PREBUILD_PLATFORM_DISTRO:-centos}:${PREBUILD_DISTRO_VERSION:-centos7.6}
+ pre_build_image: true
+ groups:
+ - infrastructure
+provisioner:
+ name: ansible
+ env:
+ ANSIBLE_ROLES_PATH: ../../../../test/roles
+ ANSIBLE_LIBRARY: ../../../../library
+verifier:
+ name: testinfra
diff --git a/ansible/roles/k8s-persistent-volume/molecule/default/vars.yml b/ansible/roles/k8s-persistent-volume/molecule/default/vars.yml
new file mode 100644
index 00000000..ce473376
--- /dev/null
+++ b/ansible/roles/k8s-persistent-volume/molecule/default/vars.yml
@@ -0,0 +1,2 @@
+---
+k8s_volumes: []
diff --git a/ansible/roles/k8s-persistent-volume/tasks/main.yml b/ansible/roles/k8s-persistent-volume/tasks/main.yml
new file mode 100644
index 00000000..8428857f
--- /dev/null
+++ b/ansible/roles/k8s-persistent-volume/tasks/main.yml
@@ -0,0 +1,19 @@
+---
+- name: Install OpenShift Python client library # community.kubernetes.k8s module dependency
+ package:
+ name: 'python2-openshift'
+ state: present
+
+- name: Create k8s persistent volumes
+ community.kubernetes.k8s:
+ wait: True
+ template: pv.yaml.j2
+ loop: "{{ k8s_volumes }}"
+
+- name: Create host paths for PVs and set their permissions
+ file:
+ path: "{{ item.path_prefix }}/{{ item.name }}"
+ state: directory
+ owner: "{{ item.owner | default(0) }}"
+ group: "{{ item.group | default(0) }}"
+ loop: "{{ k8s_volumes }}"
diff --git a/ansible/roles/k8s-persistent-volume/templates/pv.yaml.j2 b/ansible/roles/k8s-persistent-volume/templates/pv.yaml.j2
new file mode 100644
index 00000000..979ef2c6
--- /dev/null
+++ b/ansible/roles/k8s-persistent-volume/templates/pv.yaml.j2
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: {{ item.name }}
+spec:
+ capacity:
+ storage: {{ item.capacity }}
+ volumeMode: Filesystem
+ accessModes:
+ - ReadWriteOnce
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: {{ item.name }}
+ hostPath:
+ path: {{ item.path_prefix }}/{{ item.name }}
+ type: ""
diff --git a/ansible/roles/kube-prometheus-stack/.yamllint b/ansible/roles/kube-prometheus-stack/.yamllint
new file mode 100644
index 00000000..c5ae64be
--- /dev/null
+++ b/ansible/roles/kube-prometheus-stack/.yamllint
@@ -0,0 +1,12 @@
+---
+extends: default
+
+rules:
+ braces:
+ max-spaces-inside: 1
+ level: error
+ brackets:
+ max-spaces-inside: 1
+ level: error
+ line-length: disable
+ truthy: disable
diff --git a/ansible/roles/kube-prometheus-stack/defaults/main.yml b/ansible/roles/kube-prometheus-stack/defaults/main.yml
new file mode 100644
index 00000000..bd82be49
--- /dev/null
+++ b/ansible/roles/kube-prometheus-stack/defaults/main.yml
@@ -0,0 +1,49 @@
+---
+kube_prometheus_stack:
+ k8s_namespace: kube-prometheus
+ helm_release_name: kube-prometheus-stack
+ helm_timeout: "240s"
+ helm_values_file: "{{ app_data_path }}/kube_prometheus_values.yaml"
+ helm_values:
+ grafana:
+ adminPassword: admin
+ dashboardProviders:
+ dashboardproviders.yaml:
+ apiVersion: 1
+ providers:
+ - name: 'custom'
+ orgId: 1
+ folder: 'Custom'
+ type: file
+ disableDeletion: false
+ editable: true
+ updateIntervalSeconds: 60
+ options:
+ path: /var/lib/grafana/dashboards/custom
+ persistence:
+ type: pvc
+ enabled: true
+ storageClassName: kube-prometheus-grafana
+ accessModes:
+ - ReadWriteOnce
+ size: 4Gi
+ service:
+ enabled: true
+ type: NodePort
+ port: 80
+ targetPort: 3000
+ annotations: {}
+ labels: {}
+ portName: service
+ prometheus:
+ prometheusSpec:
+ scrapeInterval: 60s
+ storageSpec:
+ disableMountSubPath: true
+ volumeClaimTemplate:
+ spec:
+ storageClassName: kube-prometheus-prometheus
+ accessModes: ["ReadWriteOnce"]
+ resources:
+ requests:
+ storage: 6Gi
diff --git a/ansible/roles/kube-prometheus-stack/files/grafana_dashboard.json b/ansible/roles/kube-prometheus-stack/files/grafana_dashboard.json
new file mode 100644
index 00000000..1ef8138b
--- /dev/null
+++ b/ansible/roles/kube-prometheus-stack/files/grafana_dashboard.json
@@ -0,0 +1,663 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": 27,
+ "links": [],
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 19,
+ "panels": [],
+ "title": "Node readiness status",
+ "type": "row"
+ },
+ {
+ "datasource": null,
+ "description": "Shows the node readiness status",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "from": "",
+ "id": 1,
+ "text": "NotReady",
+ "to": "",
+ "type": 1,
+ "value": "0"
+ },
+ {
+ "from": "",
+ "id": 2,
+ "text": "Ready",
+ "to": "",
+ "type": 1,
+ "value": "1"
+ }
+ ],
+ "max": 1,
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "dark-red",
+ "value": 0
+ },
+ {
+ "color": "semi-dark-green",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 24,
+ "x": 0,
+ "y": 1
+ },
+ "id": 6,
+ "options": {
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true,
+ "text": {}
+ },
+ "pluginVersion": "7.5.5",
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "kube_node_status_condition{condition=\"Ready\",status=\"true\"}",
+ "interval": "",
+ "legendFormat": "{{node}}",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "K8S node status",
+ "type": "gauge"
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 6
+ },
+ "id": 17,
+ "panels": [],
+ "title": "Pod status in Onap namespace",
+ "type": "row"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": null,
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 6,
+ "x": 0,
+ "y": 7
+ },
+ "id": 15,
+ "links": [],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "text": {},
+ "textMode": "value"
+ },
+ "pluginVersion": "7.5.5",
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(kube_pod_status_phase{namespace=\"onap\", phase=\"Running\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Running",
+ "type": "stat"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": null,
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 6,
+ "x": 6,
+ "y": 7
+ },
+ "id": 14,
+ "links": [],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "text": {},
+ "textMode": "value"
+ },
+ "pluginVersion": "7.5.5",
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(kube_pod_status_phase{namespace=\"onap\", phase=\"Succeeded\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Succeeded",
+ "type": "stat"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": null,
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 6,
+ "x": 12,
+ "y": 7
+ },
+ "id": 10,
+ "links": [],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "text": {},
+ "textMode": "value"
+ },
+ "pluginVersion": "7.5.5",
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(kube_pod_status_phase{namespace=\"onap\", phase=\"Pending\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pending",
+ "type": "stat"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": null,
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "dark-red",
+ "value": null
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 6,
+ "x": 18,
+ "y": 7
+ },
+ "id": 13,
+ "links": [],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "text": {},
+ "textMode": "value"
+ },
+ "pluginVersion": "7.5.5",
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(kube_pod_status_phase{namespace=\"onap\", phase=\"Failed\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Failed",
+ "type": "stat"
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 10
+ },
+ "id": 12,
+ "panels": [],
+ "title": "Open file descriptors",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "description": "Number of currently opened file descriptors",
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 0,
+ "y": 11
+ },
+ "hiddenSeries": false,
+ "id": 4,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "node_filefd_allocated",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Open file descriptors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:279",
+ "decimals": null,
+ "format": "short",
+ "label": "open fds",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:280",
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "alert": {
+ "alertRuleTags": {},
+ "conditions": [
+ {
+ "evaluator": {
+ "params": [
+ 75
+ ],
+ "type": "gt"
+ },
+ "operator": {
+ "type": "and"
+ },
+ "query": {
+ "params": [
+ "A",
+ "5m",
+ "now"
+ ]
+ },
+ "reducer": {
+ "params": [],
+ "type": "last"
+ },
+ "type": "query"
+ }
+ ],
+ "executionErrorState": "alerting",
+ "for": "5m",
+ "frequency": "1m",
+ "handler": 1,
+ "name": "Open file descriptors usage high",
+ "noDataState": "no_data",
+ "notifications": []
+ },
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "description": "Percentage of total available file descriptors allocated",
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 5,
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 12,
+ "y": 11
+ },
+ "hiddenSeries": false,
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "(node_filefd_allocated/node_filefd_maximum)*100",
+ "interval": "",
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+ {
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 75,
+ "visible": true
+ }
+ ],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Open file descriptors usage",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:655",
+ "format": "percent",
+ "label": "open fds",
+ "logBase": 1,
+ "max": "100",
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:656",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "schemaVersion": 27,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-6h",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "",
+ "title": "Custom Dashboard",
+ "uid": "9Q97TIqGt",
+ "version": 24
+}
diff --git a/ansible/roles/kube-prometheus-stack/molecule/default/Dockerfile.j2 b/ansible/roles/kube-prometheus-stack/molecule/default/Dockerfile.j2
new file mode 100644
index 00000000..e6aa95d3
--- /dev/null
+++ b/ansible/roles/kube-prometheus-stack/molecule/default/Dockerfile.j2
@@ -0,0 +1,14 @@
+# Molecule managed
+
+{% if item.registry is defined %}
+FROM {{ item.registry.url }}/{{ item.image }}
+{% else %}
+FROM {{ item.image }}
+{% endif %}
+
+RUN if [ $(command -v apt-get) ]; then apt-get update && apt-get install -y python sudo bash ca-certificates && apt-get clean; \
+ elif [ $(command -v dnf) ]; then dnf makecache && dnf --assumeyes install python sudo python-devel python*-dnf bash && dnf clean all; \
+ elif [ $(command -v yum) ]; then yum makecache fast && yum install -y python sudo yum-plugin-ovl bash && sed -i 's/plugins=0/plugins=1/g' /etc/yum.conf && yum clean all; \
+ elif [ $(command -v zypper) ]; then zypper refresh && zypper install -y python sudo bash python-xml && zypper clean -a; \
+ elif [ $(command -v apk) ]; then apk update && apk add --no-cache python sudo bash ca-certificates; \
+ elif [ $(command -v xbps-install) ]; then xbps-install -Syu && xbps-install -y python sudo bash ca-certificates && xbps-remove -O; fi
diff --git a/ansible/roles/kube-prometheus-stack/molecule/default/converge.yml b/ansible/roles/kube-prometheus-stack/molecule/default/converge.yml
new file mode 100644
index 00000000..c921d638
--- /dev/null
+++ b/ansible/roles/kube-prometheus-stack/molecule/default/converge.yml
@@ -0,0 +1,10 @@
+---
+- name: Converge
+ hosts: all
+ pre_tasks:
+ - name: Include infrastructure group variables
+ include_vars: ../../../../group_vars/infrastructure.yml
+ tasks:
+ - name: "Include kube-prometheus-stack"
+ include_role:
+ name: "kube-prometheus-stack"
diff --git a/ansible/roles/kube-prometheus-stack/molecule/default/molecule.yml b/ansible/roles/kube-prometheus-stack/molecule/default/molecule.yml
new file mode 100644
index 00000000..c1eafa3b
--- /dev/null
+++ b/ansible/roles/kube-prometheus-stack/molecule/default/molecule.yml
@@ -0,0 +1,28 @@
+---
+dependency:
+ name: galaxy
+driver:
+ name: docker
+lint: |
+ set -e
+ yamllint .
+ ansible-lint .
+ flake8
+platforms:
+ - name: infrastructure-kube-prometheus-stack
+ image: centos:7
+ groups:
+ - infrastructure
+provisioner:
+ name: ansible
+ env:
+ ANSIBLE_ROLES_PATH: ../../../../test/roles
+ ANSIBLE_LIBRARY: ../../../../library
+ inventory:
+ group_vars:
+ all:
+ app_name: onap
+ app_data_path: "/opt/{{ app_name }}"
+ nfs_mount_path: "/nfs"
+verifier:
+ name: testinfra
diff --git a/ansible/roles/kube-prometheus-stack/molecule/default/prepare.yml b/ansible/roles/kube-prometheus-stack/molecule/default/prepare.yml
new file mode 100644
index 00000000..dc7e0401
--- /dev/null
+++ b/ansible/roles/kube-prometheus-stack/molecule/default/prepare.yml
@@ -0,0 +1,10 @@
+---
+- name: Prepare infra for kube-prometheus-stack role
+ hosts: all
+ pre_tasks:
+ - name: Include infrastructure group variables
+ include_vars: ../../../../group_vars/infrastructure.yml
+ tasks:
+ - name: "Include prepare-kube-prometheus-stack"
+ include_role:
+ name: "prepare-kube-prometheus-stack"
diff --git a/ansible/roles/kube-prometheus-stack/tasks/main.yml b/ansible/roles/kube-prometheus-stack/tasks/main.yml
new file mode 100644
index 00000000..e190b758
--- /dev/null
+++ b/ansible/roles/kube-prometheus-stack/tasks/main.yml
@@ -0,0 +1,27 @@
+---
+- name: Check {{ kube_prometheus_stack.helm_release_name }} helm package exists
+ stat:
+ path: "{{ app_data_path }}/downloads/kube-prometheus-stack-{{ kube_prometheus_stack_version }}.tgz"
+ register: kube_prometheus_package_stat
+ failed_when: not kube_prometheus_package_stat.stat.exists
+
+- name: Generate helm values file
+ copy:
+ dest: "{{ kube_prometheus_stack.helm_values_file }}"
+ content: "{{ kube_prometheus_stack.helm_values | combine(kube_prometheus_helm_values | default({}), recursive=True) | to_nice_yaml }}"
+
+- name: Upload custom Grafana dashboard
+ copy:
+ src: grafana_dashboard.json
+ dest: "{{ nfs_mount_path }}/kube-prometheus/kube-prometheus-grafana/dashboards/custom/"
+
+- name: "Install Helm release {{ kube_prometheus_stack.helm_release_name }}"
+ community.kubernetes.helm:
+ release_name: "{{ kube_prometheus_stack.helm_release_name }}"
+ release_namespace: "{{ kube_prometheus_stack.k8s_namespace }}"
+ create_namespace: True
+ chart_ref: "{{ app_data_path }}/downloads/kube-prometheus-stack-{{ kube_prometheus_stack_version }}.tgz"
+ values_files: "{{ kube_prometheus_stack.helm_values_file }}"
+ wait: True
+ wait_timeout: "{{ kube_prometheus_stack.helm_timeout }}"
+ tags: molecule-notest
diff --git a/ansible/site.yml b/ansible/site.yml
index 7e22c5b2..3f08ec65 100644
--- a/ansible/site.yml
+++ b/ansible/site.yml
@@ -16,5 +16,7 @@
- import_playbook: resources.yml
- import_playbook: infrastructure.yml
- import_playbook: rke.yml
+- import_playbook: nfs.yml
+- import_playbook: kube_prometheus.yml
+ when: kube_prometheus_stack_enabled
- import_playbook: application.yml
-
diff --git a/ansible/test/roles/prepare-kube-prometheus-stack/tasks/main.yml b/ansible/test/roles/prepare-kube-prometheus-stack/tasks/main.yml
new file mode 100644
index 00000000..3d472c2e
--- /dev/null
+++ b/ansible/test/roles/prepare-kube-prometheus-stack/tasks/main.yml
@@ -0,0 +1,11 @@
+---
+- name: "Ensure {{ app_data_path }}/downloads directory exists"
+ file:
+ path: "{{ app_data_path }}/downloads"
+ recurse: true
+ state: directory
+
+- name: "Download kube-prometheus-stack-{{ kube_prometheus_stack_version }}.tgz"
+ get_url:
+ url: "https://github.com/prometheus-community/helm-charts/releases/download/kube-prometheus-stack-{{ kube_prometheus_stack_version }}/kube-prometheus-stack-{{ kube_prometheus_stack_version }}.tgz"
+ dest: "{{ app_data_path }}/downloads"
diff --git a/build/build_nexus_blob.sh b/build/build_nexus_blob.sh
index 9c4b5e69..ddc3145b 100755
--- a/build/build_nexus_blob.sh
+++ b/build/build_nexus_blob.sh
@@ -267,6 +267,7 @@ NXS_INFRA_LIST="${LISTS_DIR}/infra_docker_images.list"
NXS_DOCKER_IMG_LIST="${LISTS_DIR}/onap_docker_images.list"
NXS_RKE_DOCKER_IMG_LIST="${LISTS_DIR}/rke_docker_images.list"
NXS_K8S_DOCKER_IMG_LIST="${LISTS_DIR}/k8s_docker_images.list"
+NXS_PROMETHEUS_DOCKER_IMG_LIST="${LISTS_DIR}/kube_prometheus_stack_docker_images.list"
# Setup Nexus image used for build and install infra
NEXUS_IMAGE="$(grep sonatype/nexus3 ${NXS_INFRA_LIST})"
@@ -274,7 +275,7 @@ NEXUS_IMAGE_TAR="${DATA_DIR}/offline_data/docker_images_infra/$(sed 's/\//\_/ ;
# Set default lists if nothing specific defined by user
if [ ${#NXS_DOCKER_IMG_LISTS[@]} -eq 0 ]; then
- NXS_DOCKER_IMG_LISTS=("${NXS_DOCKER_IMG_LIST}" "${NXS_RKE_DOCKER_IMG_LIST}" "${NXS_K8S_DOCKER_IMG_LIST}")
+ NXS_DOCKER_IMG_LISTS=("${NXS_DOCKER_IMG_LIST}" "${NXS_RKE_DOCKER_IMG_LIST}" "${NXS_K8S_DOCKER_IMG_LIST}" "${NXS_PROMETHEUS_DOCKER_IMG_LIST}")
fi
# Create Docker client config dir
diff --git a/build/data_lists/kube_prometheus_stack_docker_images.list b/build/data_lists/kube_prometheus_stack_docker_images.list
new file mode 100644
index 00000000..320b488b
--- /dev/null
+++ b/build/data_lists/kube_prometheus_stack_docker_images.list
@@ -0,0 +1,10 @@
+quay.io/prometheus-operator/prometheus-config-reloader:v0.48.0
+quay.io/prometheus-operator/prometheus-operator:v0.48.0
+quay.io/prometheus/alertmanager:v0.22.0
+quay.io/prometheus/prometheus:v2.27.1
+grafana/grafana:7.5.5
+k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.0.0
+jettech/kube-webhook-certgen:v1.5.2
+quay.io/kiwigrid/k8s-sidecar:1.10.7
+quay.io/prometheus/node-exporter:v1.1.2
+busybox:1.31.1
diff --git a/build/data_lists/kube_prometheus_stack_http.list b/build/data_lists/kube_prometheus_stack_http.list
new file mode 100644
index 00000000..07176556
--- /dev/null
+++ b/build/data_lists/kube_prometheus_stack_http.list
@@ -0,0 +1 @@
+https://github.com/prometheus-community/helm-charts/releases/download/kube-prometheus-stack-16.1.2/kube-prometheus-stack-16.1.2.tgz
diff --git a/build/data_lists/onap_rpm.list b/build/data_lists/onap_rpm.list
index 689d5772..47139cf3 100644
--- a/build/data_lists/onap_rpm.list
+++ b/build/data_lists/onap_rpm.list
@@ -20,3 +20,4 @@ python-chardet-2.2.1-3.el7.noarch
python-ipaddress-1.0.16-2.el7.noarch
python-jsonpointer-1.9-2.el7.noarch
python-websocket-client-0.56.0-3.git3c25814.el7.noarch
+python2-openshift-0.11.2-1.el7.noarch
diff --git a/build/package.py b/build/package.py
index ce603be5..a87f5a18 100755
--- a/build/package.py
+++ b/build/package.py
@@ -246,7 +246,8 @@ def build_offline_deliverables(build_version,
bin_pattern_list = ['**/rke_linux-amd64',
'**/helm-*-linux-amd64.tar.gz',
'**/kubectl',
- '**/helm-push_*_linux_amd64.tar.gz']
+ '**/helm-push_*_linux_amd64.tar.gz',
+ '**/kube-prometheus-stack-*.tgz']
for pattern in bin_pattern_list:
for bin_file in glob.glob(os.path.join('.', pattern), recursive=True):
diff --git a/docs/BuildGuide.rst b/docs/BuildGuide.rst
index ae5a792a..6c6463ae 100644
--- a/docs/BuildGuide.rst
+++ b/docs/BuildGuide.rst
@@ -130,12 +130,14 @@ Download all required binaries and docker images. Run download.py twice (as show
# all data lists are taken from ./build/data_lists/ folder by default
# all resources will be stored in expected folder structure within "../resources" folder
./build/download/download.py --docker ./build/data_lists/infra_docker_images.list ../resources/offline_data/docker_images_infra \
- --http ./build/data_lists/infra_bin_utils.list ../resources/downloads
+ --http ./build/data_lists/infra_bin_utils.list ../resources/downloads \
+ --http ./build/data_lists/kube_prometheus_stack_http.list ../resources/downloads
# second argument for --docker is not present, images are just pulled and cached
./build/download/download.py --docker ./build/data_lists/rke_docker_images.list \
--docker ./build/data_lists/k8s_docker_images.list \
- --docker ./build/data_lists/onap_docker_images.list
+ --docker ./build/data_lists/onap_docker_images.list \
+ --docker ./build/data_lists/kube_prometheus_stack_docker_images.list
diff --git a/docs/InstallGuide.rst b/docs/InstallGuide.rst
index 61c52cc2..30f5c404 100644
--- a/docs/InstallGuide.rst
+++ b/docs/InstallGuide.rst
@@ -601,6 +601,79 @@ For additional information concerning the Kubernetes Dashboard please refer to t
-----
+Appendix 3. Running kube-prometheus stack
+-----------------------------------------
+
+`Kube-prometheus stack`_ is a collection of Kubernetes manifests, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the `Prometheus Operator`_.
+
+The Stack is not deployed by default in Offline ONAP Platform, but all artifacts which it requires are downloaded by relevant scripts in the package build phase (see `Build Guide`_).
+
+Setup (optional)
+~~~~~~~~~~~~~~~~
+
+Kube-prometheus stack itself is a Kubernetes native application provisioned using Helm Charts. As such it can be configured using Helm values. Offline Installer provides a handy way for passing those values to the helm installation process.
+
+Any values for the Stack should be defined as subkeys of **kube_prometheus_helm_values** variable in **application_configuration.yml**. For instance, in order to override the default Grafana password, insert below structure into application_configuration.yml::
+
+ kube_prometheus_helm_values:
+ grafana:
+ adminPassword: <password>
+
+Another example - to set custom storage size for Prometheus tsdb::
+
+
+ kube_prometheus_helm_values:
+ prometheus:
+ prometheusSpec:
+ storageSpec:
+ volumeClaimTemplate:
+ spec:
+ resources:
+ requests:
+ storage: 6Gi
+
+A comprehensive list of Helm values for the Stack can be obtained on the `Kube-prometheus stack`_ project site, in the `values.yaml`_ file. Additional values for the Grafana can be checked on the `Grafana`_ project site in the *charts/grafana/values.yaml* file.
+
+Installation
+~~~~~~~~~~~~
+
+In order to actually install this tool it's required to set the following variable in application_configuration.yml::
+
+ kube_prometheus_stack_enabled: true
+
+After the Offline Platform installation process is complete, the Stack will be deployed into its own kubernetes and helm namespace **kube-prometheus**.
+
+Accessing Grafana dashboard
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The most straightforward way to access the Grafana UI is by leveraging the *port-forward* k8s facility. Issue following command on the Infra host::
+
+ kubectl -n kube-prometheus port-forward --address 0.0.0.0 svc/kube-prometheus-stack-grafana 8081:80
+
+Then navigate to http://<infra IP>:8081 to access the UI:
+
+.. image:: images/grafana-signin.png
+ :alt: Grafana Login page
+
+Default username is *admin* and the default password is *prom-operator*.
+
+In the left pane navigate to *Dashboards -> Manage* to see the various pre-defined dashboards that come bundled with kube-prometheus stack. There is also the *Custom* folder which holds few additional dashes defined by the Offline Installer authors:
+
+.. image:: images/grafana-dashboards.png
+ :alt: Grafana dashboards
+
+Alternative way of accessing the UI is by leveraging the NodePort type service which exposes Grafana UI on the Infra host public port directly. To do so get the port number first::
+
+ kubectl -n kube-prometheus get service/kube-prometheus-stack-grafana -o custom-columns=PORTS:.spec.ports[].nodePort
+
+Then navigate to http://<infra IP>:<nodePort> to access the UI.
+
+
+Caveats
+~~~~~~~
+
+Kube-prometheus stack bundled with Offline Installer requires at least release v3 of Helm. If you wish to deploy the Stack you need to set relevant v3 release of Helm. Please see **Part 2. Configuration** for details.
+
.. _Build Guide: ./BuildGuide.rst
.. _Software requirements: https://docs.onap.org/projects/onap-oom/en/latest/oom_cloud_setup_guide.html#software-requirements
.. _Hardware requirements: https://docs.onap.org/projects/onap-oom/en/latest/oom_cloud_setup_guide.html#minimum-hardware-configuration
@@ -608,3 +681,7 @@ For additional information concerning the Kubernetes Dashboard please refer to t
.. _Offline installer: https://gerrit.onap.org/r/q/oom/offline-installer
.. _RKE: https://rancher.com/products/rke/
.. _Helm: https://helm.sh/
+.. _Kube-prometheus stack: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack
+.. _Prometheus Operator: https://github.com/prometheus-operator/prometheus-operator
+.. _values.yaml: https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml
+.. _Grafana: https://github.com/grafana/helm-charts
diff --git a/docs/images/grafana-dashboards.png b/docs/images/grafana-dashboards.png
new file mode 100644
index 00000000..9cedeabb
--- /dev/null
+++ b/docs/images/grafana-dashboards.png
Binary files differ
diff --git a/docs/images/grafana-signin.png b/docs/images/grafana-signin.png
new file mode 100644
index 00000000..73bc28af
--- /dev/null
+++ b/docs/images/grafana-signin.png
Binary files differ
diff --git a/tools/helm-healer.sh b/tools/helm-healer.sh
index ab79be8e..9395e079 100755
--- a/tools/helm-healer.sh
+++ b/tools/helm-healer.sh
@@ -57,17 +57,17 @@ USAGE
EXAMPLES
Usage 1: (simple heuristics - redeploy failed components):
- ${CMD} -n onap -f /some/override1.yml -s /dockerdata-nfs
+ ${CMD} -n onap -f /some/override1.yml -s /dockerdata-nfs/onap
Usage 2: (redeploy ONLY explicitly listed components):
- ${CMD} -n onap -f /some/override1.yml -s /dockerdata-nfs \\
+ ${CMD} -n onap -f /some/override1.yml -s /dockerdata-nfs/onap \\
-c onap-aaf -c onap-sdc -c onap-portal
Usage 3: (delete EVERYTHING and redeploy):
- ${CMD} -n onap -f /some/override1.yml -s /dockerdata-nfs --delete-all
+ ${CMD} -n onap -f /some/override1.yml -s /dockerdata-nfs/onap --delete-all
Usage 4: (delete EVERYTHING and DO NOT redeploy - clean env.)
- ${CMD} -n onap -s /dockerdata-nfs --delete-all --clean-only
+ ${CMD} -n onap -s /dockerdata-nfs/onap --delete-all --clean-only
NOTES