aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTodd Malsbary <todd.malsbary@intel.com>2021-06-17 17:10:38 -0700
committerTodd Malsbary <todd.malsbary@intel.com>2021-06-21 12:14:48 -0700
commit2cfc1f2d67103726140b32b5a5c555f7a66636be (patch)
tree268feaaf39d4d2cda0750790ec45d32e139f3ad0
parentdc925231a978d1ae4dce969cb4c386d4a1b0bb87 (diff)
Move topology-manager configuration to kubespray
The steps performed by the existing ansible playbook can be performed directly by kubespray. In addtion, fix and enable the topology-manager.sh test. Issue-ID: MULTICLOUD-1324 Signed-off-by: Todd Malsbary <todd.malsbary@intel.com> Change-Id: Iee2197c1fc3e35288796399cccff0d3ae0925a6c
-rw-r--r--kud/deployment_infra/playbooks/configure-topology-manager.yml66
-rw-r--r--kud/deployment_infra/playbooks/kud-vars.yml7
-rwxr-xr-xkud/hosting_providers/containerized/installer.sh2
-rw-r--r--kud/hosting_providers/containerized/inventory/group_vars/k8s-cluster.yml5
-rwxr-xr-xkud/hosting_providers/vagrant/installer.sh6
-rw-r--r--kud/hosting_providers/vagrant/inventory/group_vars/k8s-cluster.yml5
-rwxr-xr-xkud/tests/topology-manager-sriov.sh112
-rwxr-xr-xkud/tests/topology-manager.sh16
8 files changed, 135 insertions, 84 deletions
diff --git a/kud/deployment_infra/playbooks/configure-topology-manager.yml b/kud/deployment_infra/playbooks/configure-topology-manager.yml
deleted file mode 100644
index 012bc8b0..00000000
--- a/kud/deployment_infra/playbooks/configure-topology-manager.yml
+++ /dev/null
@@ -1,66 +0,0 @@
----
-# SPDX-license-identifier: Apache-2.0
-##############################################################################
-# Copyright (c) 2020
-# All rights reserved. This program and the accompanying materials
-# are made available under the terms of the Apache License, Version 2.0
-# which accompanies this distribution, and is available at
-# http://www.apache.org/licenses/LICENSE-2.0
-##############################################################################
-
-- hosts: kube-node
- tasks:
- - name: Load kud variables
- include_vars:
- file: kud-vars.yml
-
- - name: creating kubelet config
- become: yes
- blockinfile:
- path: "{{ kubernetes_config_file }}"
- marker: "# {mark} OpenNESS configuration - General"
- create: yes
- block: |
- featureGates:
- TopologyManager: {{ False if topology_manager.policy == 'none' else True }}
- notify:
- - enable and restart kubelet
-
- - name: customize kubelet config - CPU Manager
- become: yes
- blockinfile:
- path: "{{ kubernetes_config_file }}"
- marker: "# {mark} OpenNESS configuration - CPU Manager"
- block: |
- cpuManagerPolicy: {{ cpu_manager.policy }}
- state: "{{ 'present' if cpu_manager.policy == 'static' else 'absent' }}"
- notify:
- - remove cpu manager checkpoint file
- - enable and restart kubelet
-
- - name: customize kubelet config - Topology Manager
- become: yes
- blockinfile:
- path: "{{ kubernetes_config_file }}"
- marker: "# {mark} OpenNESS configuration - Topology Manager"
- block: |
- topologyManagerPolicy: {{ topology_manager.policy }}
- state: "{{ 'absent' if topology_manager.policy == 'none' else 'present' }}"
- notify:
- - enable and restart kubelet
-
- handlers:
- - name: enable and restart kubelet
- become: yes
- systemd:
- name: kubelet
- daemon_reload: yes
- enabled: yes
- masked: no
- state: restarted
-
- - name: remove cpu manager checkpoint file
- become: yes
- file:
- path: "{{ cpu_manager.checkpoint_file }}"
- state: absent
diff --git a/kud/deployment_infra/playbooks/kud-vars.yml b/kud/deployment_infra/playbooks/kud-vars.yml
index 24a9ef98..35057f5b 100644
--- a/kud/deployment_infra/playbooks/kud-vars.yml
+++ b/kud/deployment_infra/playbooks/kud-vars.yml
@@ -78,13 +78,6 @@ optane_ipmctl_version: 02.00.00.3474
optane_ipmctl_url: "https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/ipmctl/{{ optane_package }}.tar.xz"
optane_ipmctl_package: ipmctl_02.00.00.3474+really01.00.00.3469.orig
-kubernetes_config_file: "/etc/kubernetes/kubelet-config.yaml"
-cpu_manager:
- policy: "static" # Options: none (disabled), static (default)
- checkpoint_file: "/var/lib/kubelet/cpu_manager_state"
-topology_manager:
- policy: "best-effort" # Options: none (disabled), best-effort (default), restricted, single-numa-node
-
emco_git_url: "https://github.com/open-ness/EMCO.git"
emco_repository: "integratedcloudnative/"
emco_version: "openness-21.03"
diff --git a/kud/hosting_providers/containerized/installer.sh b/kud/hosting_providers/containerized/installer.sh
index 427850ab..7365a14f 100755
--- a/kud/hosting_providers/containerized/installer.sh
+++ b/kud/hosting_providers/containerized/installer.sh
@@ -230,7 +230,7 @@ function install_host_artifacts {
done
mkdir -p ${host_addons_dir}/tests
- for test in _common _common_test _functions multus ovn4nfv nfd sriov-network qat cmk; do
+ for test in _common _common_test _functions topology-manager-sriov multus ovn4nfv nfd sriov-network qat cmk; do
cp ${kud_tests}/${test}.sh ${host_addons_dir}/tests
done
diff --git a/kud/hosting_providers/containerized/inventory/group_vars/k8s-cluster.yml b/kud/hosting_providers/containerized/inventory/group_vars/k8s-cluster.yml
index 7d0404a5..a13d8412 100644
--- a/kud/hosting_providers/containerized/inventory/group_vars/k8s-cluster.yml
+++ b/kud/hosting_providers/containerized/inventory/group_vars/k8s-cluster.yml
@@ -124,3 +124,8 @@ podsecuritypolicy_restricted_spec:
# This will fail if allowed-unsafe-sysctls is not set accordingly in kubelet flags
allowedUnsafeSysctls:
- '*'
+
+# Customize kubelet config of CPU and topology manager
+kubelet_node_config_extra_args:
+ cpuManagerPolicy: "static" # Options: none (disabled), static (default)
+ topologyManagerPolicy: "best-effort" # Options: none (disabled), best-effort (default), restricted, single-numa-node
diff --git a/kud/hosting_providers/vagrant/installer.sh b/kud/hosting_providers/vagrant/installer.sh
index c88dc9e6..39da50e7 100755
--- a/kud/hosting_providers/vagrant/installer.sh
+++ b/kud/hosting_providers/vagrant/installer.sh
@@ -186,7 +186,7 @@ function install_addons {
# The order of KUD_ADDONS is important: some plugins (sriov, qat)
# require nfd to be enabled. Some addons are not currently supported with containerd
if [ "${container_runtime}" == "docker" ]; then
- kud_addons=${KUD_ADDONS:-topology-manager virtlet ovn4nfv nfd sriov \
+ kud_addons=${KUD_ADDONS:-virtlet ovn4nfv nfd sriov \
qat optane cmk}
elif [ "${container_runtime}" == "containerd" ]; then
kud_addons=${KUD_ADDONS:-ovn4nfv nfd}
@@ -216,7 +216,9 @@ function install_addons {
popd
fi
# Run other plugin tests
- for addon in ${kud_addons}; do
+ # The topology-manager is added to the tests here as it is
+ # enabled via kubelet config, not an addon
+ for addon in topology-manager ${kud_addons}; do
pushd $kud_tests
bash ${addon}.sh || failed_kud_tests="${failed_kud_tests} ${addon}"
popd
diff --git a/kud/hosting_providers/vagrant/inventory/group_vars/k8s-cluster.yml b/kud/hosting_providers/vagrant/inventory/group_vars/k8s-cluster.yml
index 7803f27a..bf6f8c84 100644
--- a/kud/hosting_providers/vagrant/inventory/group_vars/k8s-cluster.yml
+++ b/kud/hosting_providers/vagrant/inventory/group_vars/k8s-cluster.yml
@@ -121,3 +121,8 @@ podsecuritypolicy_restricted_spec:
# This will fail if allowed-unsafe-sysctls is not set accordingly in kubelet flags
allowedUnsafeSysctls:
- '*'
+
+# Customize kubelet config of CPU and topology manager
+kubelet_node_config_extra_args:
+ cpuManagerPolicy: "static" # Options: none (disabled), static (default)
+ topologyManagerPolicy: "best-effort" # Options: none (disabled), best-effort (default), restricted, single-numa-node
diff --git a/kud/tests/topology-manager-sriov.sh b/kud/tests/topology-manager-sriov.sh
new file mode 100755
index 00000000..447a7c83
--- /dev/null
+++ b/kud/tests/topology-manager-sriov.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# SPDX-license-identifier: Apache-2.0
+##############################################################################
+# Copyright (c) 2020
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+source _common.sh
+source _functions.sh
+
+sriov_capable_nodes=$(kubectl get nodes -o json | jq -r '.items[] | select((.status.capacity."intel.com/intel_sriov_nic"!=null) and ((.status.capacity."intel.com/intel_sriov_nic"|tonumber)>=2)) | .metadata.name')
+if [ -z "$sriov_capable_nodes" ]; then
+ echo "Ethernet adaptor version is not set. Topology manager test case cannot run on this machine"
+ exit 0
+else
+ echo "NIC card specs match. Topology manager option avaiable for this version."
+fi
+
+pod_name=pod-topology-manager
+csar_id=bd55cccc-bf34-11ea-b3de-0242ac130004
+
+function create_pod_yaml {
+ local csar_id=$1
+ _checks_args $csar_id
+ pushd ${CSAR_DIR}/${csar_id}
+
+ cat << POD > $pod_name.yaml
+kind: Pod
+apiVersion: v1
+metadata:
+ name: $pod_name
+ annotations:
+ k8s.v1.cni.cncf.io/networks: sriov-intel
+spec:
+ containers:
+ - name: $pod_name
+ image: docker.io/centos/tools:latest
+ command:
+ - /sbin/init
+ resources:
+ limits:
+ cpu: "1"
+ memory: "500Mi"
+ intel.com/intel_sriov_nic: '1'
+ requests:
+ cpu: "1"
+ memory: "500Mi"
+ intel.com/intel_sriov_nic: '1'
+POD
+ popd
+}
+
+create_pod_yaml ${csar_id}
+kubectl delete pod $pod_name --ignore-not-found=true --now --wait
+kubectl create -f ${CSAR_DIR}/${csar_id}/$pod_name.yaml --validate=false
+
+status_phase=""
+while [[ $status_phase != "Running" ]]; do
+ new_phase=$(kubectl get pods $pod_name | awk 'NR==2{print $3}')
+ if [[ $new_phase != $status_phase ]]; then
+ echo "$(date +%H:%M:%S) - $pod_name : $new_phase"
+ status_phase=$new_phase
+ fi
+ if [[ $new_phase == "Running" ]]; then
+ echo "Pod is up and running.."
+ fi
+ if [[ $new_phase == "Err"* ]]; then
+ exit 1
+ fi
+done
+
+uid=$(kubectl get pod pod-topology-manager -o jsonpath='{.metadata.uid}')
+node_name=$(kubectl get pod $pod_name -o jsonpath='{.spec.nodeName}')
+node_ip=$(kubectl get node $node_name -o jsonpath='{.status.addresses[].address}')
+
+apt-get install -y jq
+cpu_core=$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null $node_ip -- cat /var/lib/kubelet/cpu_manager_state | jq -r --arg UID "${uid}" --arg POD_NAME "${pod_name}" '.entries[$UID][$POD_NAME]')
+numa_node_number=$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null $node_ip -- lscpu | grep "NUMA node(s)" | awk -F ':' '{print $2}')
+for (( node=0; node<$numa_node_number; node++ )); do
+ ranges=$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null $node_ip -- lscpu | grep "NUMA node"$node | awk -F ':' '{print $2}')
+ ranges=(${ranges//,/ })
+ for range in ${ranges[@]}; do
+ min=$(echo $range | awk -F '-' '{print $1}')
+ max=$(echo $range | awk -F '-' '{print $2}')
+ if [ $cpu_core -ge $min ] && [ $cpu_core -le $max ]; then
+ cpu_numa_node=$node
+ fi
+ done
+done
+
+vf_pci=$(kubectl exec -it $pod_name -- env | grep PCIDEVICE_INTEL_COM_INTEL_SRIOV_NIC | awk -F '=' '{print $2}' | sed 's/\r//g')
+vf_numa_node=$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null $node_ip -- cat /sys/bus/pci/devices/$vf_pci/numa_node)
+
+echo "The allocated cpu core is:" $cpu_core
+echo "The numa node of the allocated cpu core is:" $cpu_numa_node
+echo "The PCI address of the allocated vf is:" $vf_pci
+echo "The numa node of the allocated vf is:" $vf_numa_node
+if [ $cpu_numa_node == $vf_numa_node ]; then
+ echo "The allocated cpu core and vf are on the same numa node"
+else
+ echo "The allocated cpu core and vf are on different numa nodes"
+fi
+
+kubectl delete pod $pod_name --now
+echo "Test complete."
diff --git a/kud/tests/topology-manager.sh b/kud/tests/topology-manager.sh
index 5c9f900d..b1126aac 100755
--- a/kud/tests/topology-manager.sh
+++ b/kud/tests/topology-manager.sh
@@ -76,15 +76,15 @@ while [[ $status_phase != "Running" ]]; do
fi
done
-container_id=$(kubectl describe pod $pod_name | grep "Container ID" | awk '{print $3}' )
-container_id=${container_id#docker://}
-container_id=${container_id:0:12}
+uid=$(kubectl get pod pod-topology-manager -o jsonpath='{.metadata.uid}')
+node_name=$(kubectl get pod $pod_name -o jsonpath='{.spec.nodeName}')
+node_ip=$(kubectl get node $node_name -o jsonpath='{.status.addresses[].address}')
apt-get install -y jq
-cpu_core=$(cat /var/lib/kubelet/cpu_manager_state | jq -r .| grep ${container_id} | awk -F ':' '{print $2}'| awk -F '"' '{print $2}')
-numa_node_number=$(lscpu | grep "NUMA node(s)" | awk -F ':' '{print $2}')
+cpu_core=$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null $node_ip -- cat /var/lib/kubelet/cpu_manager_state | jq -r --arg UID "${uid}" --arg POD_NAME "${pod_name}" '.entries[$UID][$POD_NAME]')
+numa_node_number=$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null $node_ip -- lscpu | grep "NUMA node(s)" | awk -F ':' '{print $2}')
for (( node=0; node<$numa_node_number; node++ )); do
- ranges=$(lscpu | grep "NUMA node"$node | awk -F ':' '{print $2}')
+ ranges=$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null $node_ip -- lscpu | grep "NUMA node"$node | awk -F ':' '{print $2}')
ranges=(${ranges//,/ })
for range in ${ranges[@]}; do
min=$(echo $range | awk -F '-' '{print $1}')
@@ -95,8 +95,8 @@ for (( node=0; node<$numa_node_number; node++ )); do
done
done
-vf_pci=$(kubectl exec -it $pod_name env | grep PCIDEVICE_INTEL_COM_INTEL_SRIOV_700 | awk -F '=' '{print $2}' | sed 's/\r//g')
-vf_numa_node=$(cat /sys/bus/pci/devices/$vf_pci/numa_node)
+vf_pci=$(kubectl exec -it $pod_name -- env | grep PCIDEVICE_INTEL_COM_INTEL_SRIOV_700 | awk -F '=' '{print $2}' | sed 's/\r//g')
+vf_numa_node=$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null $node_ip -- cat /sys/bus/pci/devices/$vf_pci/numa_node)
echo "The allocated cpu core is:" $cpu_core
echo "The numa node of the allocated cpu core is:" $cpu_numa_node