From 4a9ca697710e8ab87e3e286a852d5413c4adc037 Mon Sep 17 00:00:00 2001 From: Akhila Kishore Date: Mon, 23 Sep 2019 14:57:43 -0700 Subject: Adding SRIOV Network Device Plugin to KuD Integrating SRIOV as an add-on to KuD. A device should have X700 series NIC for this Add-on to work. Getting the device driver, build and installing it is a part of this patch. Followed by running the SRIOV CNI Daemonset, and NetworkAttachmentDefinition. Reworked the way SRIOV check happens. Previously ran on installer.sh. Now the script is injected into kube-nodes and playbook will run only if the hardware check is true by creating a conf file. Removed unwanted comments and nit changes. Signed-off-by: Akhila Kishore Issue-ID: MULTICLOUD-832 Change-Id: I1701a50bc717ddca0d332d6a42d329eaf4c03820 --- kud/deployment_infra/images/sriov-cni.yml | 45 ++++++++ kud/deployment_infra/images/sriov-daemonset.yml | 82 +++++++++++++++ kud/deployment_infra/playbooks/configure-sriov.yml | 36 +++++++ .../playbooks/install_iavf_drivers.sh | 68 ++++++++++++ kud/deployment_infra/playbooks/kud-vars.yml | 6 ++ .../playbooks/preconfigure-sriov.yml | 116 +++++++++++++++++++++ kud/deployment_infra/playbooks/sriov-nad.yml | 19 ++++ .../playbooks/sriov_hardware_check.sh | 26 +++++ kud/hosting_providers/vagrant/clean_sriov.sh | 16 +++ kud/hosting_providers/vagrant/installer.sh | 9 +- kud/tests/sriov.sh | 72 +++++++++++++ 11 files changed, 493 insertions(+), 2 deletions(-) create mode 100644 kud/deployment_infra/images/sriov-cni.yml create mode 100644 kud/deployment_infra/images/sriov-daemonset.yml create mode 100644 kud/deployment_infra/playbooks/configure-sriov.yml create mode 100755 kud/deployment_infra/playbooks/install_iavf_drivers.sh create mode 100644 kud/deployment_infra/playbooks/preconfigure-sriov.yml create mode 100644 kud/deployment_infra/playbooks/sriov-nad.yml create mode 100644 kud/deployment_infra/playbooks/sriov_hardware_check.sh create mode 100644 kud/hosting_providers/vagrant/clean_sriov.sh create mode 100755 kud/tests/sriov.sh diff --git a/kud/deployment_infra/images/sriov-cni.yml b/kud/deployment_infra/images/sriov-cni.yml new file mode 100644 index 00000000..bd943d04 --- /dev/null +++ b/kud/deployment_infra/images/sriov-cni.yml @@ -0,0 +1,45 @@ +# SRIOV-CNI Release v1 +# Based on: +# https://github.com/intel/sriov-cni/blob/master/images/sriov-cni-daemonset.yaml +--- +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: kube-sriov-cni-ds-amd64 + namespace: kube-system + labels: + tier: node + app: sriov-cni +spec: + template: + metadata: + labels: + tier: node + app: sriov-cni + spec: + hostNetwork: true + nodeSelector: + beta.kubernetes.io/arch: amd64 + tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + containers: + - name: kube-sriov-cni + image: nfvpe/sriov-cni + securityContext: + privileged: true + resources: + requests: + cpu: "100m" + memory: "50Mi" + limits: + cpu: "100m" + memory: "50Mi" + volumeMounts: + - name: cnibin + mountPath: /host/opt/cni/bin + volumes: + - name: cnibin + hostPath: + path: /opt/cni/bin diff --git a/kud/deployment_infra/images/sriov-daemonset.yml b/kud/deployment_infra/images/sriov-daemonset.yml new file mode 100644 index 00000000..1edbc6c3 --- /dev/null +++ b/kud/deployment_infra/images/sriov-daemonset.yml @@ -0,0 +1,82 @@ +# SRIOV device CNI plugin +# Based on: +# https://github.com/intel/sriov-network-device-plugin/blob/master/images/sriovdp-daemonset.yaml +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: sriovdp-config + namespace: kube-system +data: + config.json: | + { + "resourceList": [{ + "resourceName": "intel_sriov_700", + "selectors": { + "vendors": ["8086"] + "devices": ["37cd"] + } + }] + } + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: sriov-device-plugin + namespace: kube-system + +--- +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: kube-sriov-device-plugin-amd64 + namespace: kube-system + labels: + tier: node + app: sriovdp +spec: + template: + metadata: + labels: + tier: node + app: sriovdp + spec: + hostNetwork: true + hostPID: true + nodeSelector: + beta.kubernetes.io/arch: amd64 + tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + serviceAccountName: sriov-device-plugin + containers: + - name: kube-sriovdp + image: nfvpe/sriov-device-plugin + args: + - --log-dir=sriovdp + - --log-level=10 + securityContext: + privileged: true + volumeMounts: + - name: devicesock + mountPath: /var/lib/kubelet/ + readOnly: false + - name: log + mountPath: /var/log + - name: config-volume + mountPath: /etc/pcidp + volumes: + - name: devicesock + hostPath: + path: /var/lib/kubelet/ + - name: log + hostPath: + path: /var/log + - name: config-volume + configMap: + name: sriovdp-config + items: + - key: config.json + path: config.json diff --git a/kud/deployment_infra/playbooks/configure-sriov.yml b/kud/deployment_infra/playbooks/configure-sriov.yml new file mode 100644 index 00000000..8ba6cf48 --- /dev/null +++ b/kud/deployment_infra/playbooks/configure-sriov.yml @@ -0,0 +1,36 @@ +--- +# SPDX-license-identifier: Apache-2.0 +############################################################################## +# Copyright (c) 2018 +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +- import_playbook: preconfigure-sriov.yml + +- hosts: localhost + become: yes + pre_tasks: + - block: + - name: "End play if SRIOV is False" + debug: + msg: "SRIOV option not available, ending play" + - meta: end_play + when: SRIOV_NODE == "False" + tasks: + - debug: + var: SRIOV_NODE + - name: Apply Multus + shell: "/usr/local/bin/kubectl apply -f {{ playbook_dir }}/../images/multus-daemonset.yml" + when: SRIOV_NODE==True + - name: Apply SRIOV CNI + shell: "/usr/local/bin/kubectl apply -f {{ playbook_dir }}/../images/sriov-cni.yaml" + when: SRIOV_NODE==True + - name: Apply SRIOV DaemonSet + shell: "/usr/local/bin/kubectl apply -f {{ playbook_dir }}/../images/sriov-daemonset.yaml" + when: SRIOV_NODE==True + - name: Apply SRIOV Network Attachment definition + shell: "/usr/local/bin/kubectl apply -f {{ playbook_dir }}/sriov-nad.yml" + when: SRIOV_NODE==True diff --git a/kud/deployment_infra/playbooks/install_iavf_drivers.sh b/kud/deployment_infra/playbooks/install_iavf_drivers.sh new file mode 100755 index 00000000..d44483de --- /dev/null +++ b/kud/deployment_infra/playbooks/install_iavf_drivers.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# Based on: +# https://gerrit.akraino.org/r/#/c/icn/+/1359/1/deploy/kud-plugin-addons/device-plugins/sriov/driver/install_iavf_drivers.sh + +function install_iavf_driver { + local ifname=$1 + + echo "Installing modules..." + echo "Installing i40evf blacklist file..." + mkdir -p "/etc/modprobe.d/" + echo "blacklist i40evf" > "/etc/modprobe.d/iavf-blacklist-i40evf.conf" + + kver=`uname -a | awk '{print $3}'` + install_mod_dir=/lib/modules/$kver/updates/drivers/net/ethernet/intel/iavf/ + echo "Installing driver in $install_mod_dir" + mkdir -p $install_mod_dir + cp iavf.ko $install_mod_dir + + echo "Installing kernel module i40evf..." + depmod -a + modprobe i40evf + modprobe iavf + + echo "Enabling VF on interface $ifname..." + echo "/sys/class/net/$ifname/device/sriov_numvfs" + echo '8' > /sys/class/net/$ifname/device/sriov_numvfs +} + +function is_used { + local ifname=$1 + route_info=`ip route show | grep $ifname` + if [ -z "$route_info" ]; then + return 0 + else + return 1 + fi +} + +function get_sriov_ifname { + for net_device in /sys/class/net/*/ ; do + if [ -e $net_device/device/sriov_numvfs ] ; then + ifname=$(basename $net_device) + is_used $ifname + if [ "$?" = "0" ]; then + echo $ifname + return + fi + fi + done + echo '' +} + +if [ $# -ne 1 ] ; then + ifname=$(get_sriov_ifname) + if [ -z "$ifname" ]; then + echo "Cannot find Nic with SRIOV support." + else + install_iavf_driver $ifname + fi +else + ifname=$1 + if [ ! -e /sys/class/net/$ifname/device/sriov_numvfs ] ; then + echo "${ifname} is not a valid sriov interface" + else + install_iavf_driver $ifname + fi +fi diff --git a/kud/deployment_infra/playbooks/kud-vars.yml b/kud/deployment_infra/playbooks/kud-vars.yml index a9910f8d..316ec890 100644 --- a/kud/deployment_infra/playbooks/kud-vars.yml +++ b/kud/deployment_infra/playbooks/kud-vars.yml @@ -39,6 +39,12 @@ istio_source_type: "tarball" istio_version: 1.0.3 istio_url: "https://github.com/istio/istio/releases/download/{{ istio_version }}/istio-{{ istio_version }}-linux.tar.gz" +sriov_dest: "{{ base_dest }}/sriov" +driver_source_type: "tarball" +driver_version: 3.7.34 +driver_url: "https://downloadmirror.intel.com/28943/eng/iavf-{{ driver_version }}.tar.gz" +package: iavf-3.7.34 + go_version: '1.12.5' kubespray_version: 2.10.4 helm_client_version: 2.9.1 diff --git a/kud/deployment_infra/playbooks/preconfigure-sriov.yml b/kud/deployment_infra/playbooks/preconfigure-sriov.yml new file mode 100644 index 00000000..c4276e1b --- /dev/null +++ b/kud/deployment_infra/playbooks/preconfigure-sriov.yml @@ -0,0 +1,116 @@ +--- +# SPDX-license-identifier: Apache-2.0 +############################################################################## +# Copyright (c) 2018 +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +- hosts: kube-node + become: yes + pre_tasks: + - name: Create SRIOV driver folder in the target destination + file: + state: directory + path: "{{ item }}" + with_items: + - sriov + - copy: + src: "{{ playbook_dir }}/sriov_hardware_check.sh" + dest: sriov + - name: Changing perm of "sh", adding "+x" + shell: "chmod +x sriov_hardware_check.sh" + args: + chdir: "sriov" + warn: False + - name: Register SRIOV + shell: "echo {{ SRIOV | default(False) }}" + - name: Run the script and Re-evaluate the variable + command: sriov/sriov_hardware_check.sh + register: output + - set_fact: + SRIOV: "{{ output.stdout }}" + - name: Recreate the conf file for every host + file: + path: /tmp/sriov.conf + state: absent + delegate_to: localhost + - lineinfile : > + dest=/tmp/sriov.conf + create=yes + line='{{SRIOV}}' + delegate_to: localhost + - name: Clean the script and folder. + file: + path: sriov + state: absent + +# Run the following task only if the SRIOV is set to True +# i.e when SRIOV hardware is available +- hosts: localhost + become: yes + pre_tasks: + - name: Read SRIOV value from the conf file. + command: cat /tmp/sriov.conf + register: installer_output + become: yes + - set_fact: + SRIOV_NODE: "{{ installer_output.stdout }}" + - meta: end_play + when: SRIOV_NODE == "False" + - name: Load kud variables + include_vars: + file: kud-vars.yml + when: SRIOV_NODE == "True" + tasks: + - name: Create sriov folder + file: + state: directory + path: "{{ sriov_dest }}" + when: SRIOV_NODE == "True" + ignore_errors: yes + - name: Get SRIOV compatible driver + get_url: "url={{ driver_url }} dest=/tmp/{{ package }}.tar.gz" + when: SRIOV_NODE == "True" + - name: Extract sriov source code + unarchive: + src: "/tmp/{{ package }}.tar.gz" + dest: "{{ sriov_dest }}" + when: SRIOV_NODE == "True" + - name: Build the default target + make: + chdir: "/tmp/sriov/{{ package }}/src" + become: yes + when: SRIOV_NODE == "True" +# Copy all the driver and install script into target node +- hosts: kube-node + become: yes + pre_tasks: + - name: Load kud variables + include_vars: + file: kud-vars.yml + when: SRIOV == "True" + tasks: + - name: create SRIOV driver folder in the target destination + file: + state: directory + path: "{{ item }}" + with_items: + - sriov_driver + when: SRIOV == "True" + - name: Copy SRIOV driver to target destination + command: "cp {{ sriov_dest }}/{{ package }}/src/iavf.ko /root/sriov_driver/" + when: SRIOV == "True" + - name: Copy SRIOV driver install script to target folder + command: "cp {{ playbook_dir }}/install_iavf_drivers.sh /root/sriov_driver/install.sh" + when: SRIOV == "True" + - name: Changing perm of "install.sh", adding "+x" + file: dest=/root/sriov_driver/install.sh mode=a+x + when: SRIOV == "True" + - name: Run a script with arguments + shell: ./install.sh + args: + chdir: "/root/sriov_driver" + when: SRIOV == "True" diff --git a/kud/deployment_infra/playbooks/sriov-nad.yml b/kud/deployment_infra/playbooks/sriov-nad.yml new file mode 100644 index 00000000..7670b700 --- /dev/null +++ b/kud/deployment_infra/playbooks/sriov-nad.yml @@ -0,0 +1,19 @@ +apiVersion: "k8s.cni.cncf.io/v1" +kind: NetworkAttachmentDefinition +metadata: + name: sriov-eno2 + annotations: + k8s.v1.cni.cncf.io/resourceName: intel.com/intel_sriov_700 +spec: + config: '{ + "type": "sriov", + "cniVersion": "0.3.1", + "ipam": { + "type": "host-local", + "subnet": "10.56.206.0/24", + "routes": [ + { "dst": "0.0.0.0/0" } + ], + "gateway": "10.56.206.1" + } + }' diff --git a/kud/deployment_infra/playbooks/sriov_hardware_check.sh b/kud/deployment_infra/playbooks/sriov_hardware_check.sh new file mode 100644 index 00000000..ea1b7b0c --- /dev/null +++ b/kud/deployment_infra/playbooks/sriov_hardware_check.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# SPDX-license-identifier: Apache-2.0 +############################################################################## +# Copyright (c) 2018 +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +set -o pipefail + +source /etc/environment + +ethernet_adpator_version=$( lspci | grep "Ethernet Controller X710" | head -n 1 | cut -d " " -f 8 ) +if [ -z "$ethernet_adpator_version" ]; then + echo "False" + exit 0 +fi +SRIOV_ENABLED=${ethernet_adpator_version:-"false"} +#checking for the right hardware version of NIC on the machine +if [ "$ethernet_adpator_version" == "X710" ]; then + echo "True" +else + echo "False" +fi diff --git a/kud/hosting_providers/vagrant/clean_sriov.sh b/kud/hosting_providers/vagrant/clean_sriov.sh new file mode 100644 index 00000000..76b8a960 --- /dev/null +++ b/kud/hosting_providers/vagrant/clean_sriov.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# SPDX-license-identifier: Apache-2.0 +############################################################################## +# Copyright (c) 2018 +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +modprobe -r iavf +kver=`uname -a | awk '{print $3}'` +rm -rf /lib/modules/$kver/updates/drivers/net/ethernet/intel/iavf/iavf.ko +depmod -a +sudo rm -rf /tmp/sriov +sudo rm -rf iavf-3.7.34.tar.gz diff --git a/kud/hosting_providers/vagrant/installer.sh b/kud/hosting_providers/vagrant/installer.sh index 41b21f64..94023524 100755 --- a/kud/hosting_providers/vagrant/installer.sh +++ b/kud/hosting_providers/vagrant/installer.sh @@ -159,6 +159,13 @@ function install_addons { popd fi done + ansible-playbook $verbose -i $kud_inventory $kud_playbooks/configure-sriov.yml | sudo tee $log_folder/setup-sriov.log + if [[ "${testing_enabled}" == "true" ]]; then + pushd $kud_tests + bash sriov.sh + popd + fi + echo "Add-ons deployment complete..." } # install_plugin() - Install ONAP Multicloud Kubernetes plugin @@ -229,11 +236,9 @@ kud_playbooks=$kud_infra_folder/playbooks kud_tests=$kud_folder/../../tests k8s_info_file=$kud_folder/k8s_info.log testing_enabled=${KUD_ENABLE_TESTS:-false} - sudo mkdir -p $log_folder sudo mkdir -p /opt/csar sudo chown -R $USER /opt/csar - # Install dependencies # Setup proxy variables if [ -f $kud_folder/sources.list ]; then diff --git a/kud/tests/sriov.sh b/kud/tests/sriov.sh new file mode 100755 index 00000000..c66f5db8 --- /dev/null +++ b/kud/tests/sriov.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-license-identifier: Apache-2.0 +############################################################################## +# Copyright (c) 2018 +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +set -o pipefail + +ethernet_adpator_version=$( lspci | grep "Ethernet Controller X710" | head -n 1 | cut -d " " -f 8 ) +if [ -z "$ethernet_adpator_version" ]; then + echo " Ethernet adapator version is not set. SRIOV test case cannot run on this machine" + exit 0 +fi +#checking for the right hardware version of NIC on the machine +if [ $ethernet_adpator_version == "X710" ]; then + echo "NIC card specs match. SRIOV option avaiable for this version." +else + echo -e "Failed. The version supplied does not match.\nTest cannot be executed." + exit 0 +fi + +pod_name=pod-case-01 +rm -f $HOME/$pod_name.yaml +kubectl delete pod $pod_name --ignore-not-found=true --now --wait +allocated_node_resource=$(kubectl describe node | grep "intel.com/intel_sriov_700" | tail -n1 |awk '{print $(NF)}') + +echo "The allocated resource of the node is: " $allocated_node_resource +cat << POD > $HOME/$pod_name.yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-case-01 + annotations: + k8s.v1.cni.cncf.io/networks: sriov-eno2 +spec: + containers: + - name: test-pod + image: docker.io/centos/tools:latest + command: + - /sbin/init + resources: + requests: + intel.com/intel_sriov_700: '1' + limits: + intel.com/intel_sriov_700: '1' +POD +kubectl create -f $HOME/$pod_name.yaml --validate=false + for pod in $pod_name; do + status_phase="" + while [[ $status_phase != "Running" ]]; do + new_phase=$(kubectl get pods $pod | awk 'NR==2{print $3}') + if [[ $new_phase != $status_phase ]]; then + echo "$(date +%H:%M:%S) - $pod : $new_phase" + status_phase=$new_phase + fi + if [[ $new_phase == "Running" ]]; then + echo "Pod is up and running.." + fi + if [[ $new_phase == "Err"* ]]; then + exit 1 + fi + done + done +allocated_node_resource=$(kubectl describe node | grep "intel.com/intel_sriov_700" | tail -n1 |awk '{print $(NF)}') + +echo " The current resource allocation after the pod creation is: " $allocated_node_resource +kubectl delete pod $pod_name --now +echo "Test complete." -- cgit 1.2.3-korg