From bbd116114760abd2895293010a9df55c645e8bc6 Mon Sep 17 00:00:00 2001 From: Tristan Date: Mon, 22 Aug 2022 10:37:44 +0100 Subject: [PATCH] 9035: Make Cilium rolling-restart delay/timeout configurable (#9176) See #9035 --- docs/cilium.md | 29 +++++++++++++++++++ roles/network_plugin/cilium/defaults/main.yml | 4 +++ roles/network_plugin/cilium/tasks/apply.yml | 4 +-- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/docs/cilium.md b/docs/cilium.md index 665e319ae..e9c3e0d2b 100644 --- a/docs/cilium.md +++ b/docs/cilium.md @@ -153,3 +153,32 @@ cilium_hubble_metrics: ``` [More](https://docs.cilium.io/en/v1.9/operations/metrics/#hubble-exported-metrics) + +## Upgrade considerations + +### Rolling-restart timeouts + +Cilium relies on the kernel's BPF support, which is extremely fast at runtime but incurs a compilation penalty on initialization and update. + +As a result, the Cilium DaemonSet pods can take a significant time to start, which scales with the number of nodes and endpoints in your cluster. + +As part of cluster.yml, this DaemonSet is restarted, and Kubespray's [default timeouts for this operation](../roles/network_plugin/cilium/defaults/main.yml) +are not appropriate for large clusters. + +This means that you will likely want to update these timeouts to a value more in-line with your cluster's number of nodes and their respective CPU performance. +This is configured by the following values: + +```yaml +# Configure how long to wait for the Cilium DaemonSet to be ready again +cilium_rolling_restart_wait_retries_count: 30 +cilium_rolling_restart_wait_retries_delay_seconds: 10 +``` + +The total time allowed (count * delay) should be at least `($number_of_nodes_in_cluster * $cilium_pod_start_time)` for successful rolling updates. There are no +drawbacks to making it higher and giving yourself a time buffer to accommodate transient slowdowns. + +Note: To find the `$cilium_pod_start_time` for your cluster, you can simply restart a Cilium pod on a node of your choice and look at how long it takes for it +to become ready. + +Note 2: The default CPU requests/limits for Cilium pods is set to a very conservative 100m:500m which will likely yield very slow startup for Cilium pods. You +probably want to significantly increase the CPU limit specifically if short bursts of CPU from Cilium are acceptable to you. diff --git a/roles/network_plugin/cilium/defaults/main.yml b/roles/network_plugin/cilium/defaults/main.yml index e244735d9..c590637dc 100644 --- a/roles/network_plugin/cilium/defaults/main.yml +++ b/roles/network_plugin/cilium/defaults/main.yml @@ -236,3 +236,7 @@ cilium_enable_bpf_clock_probe: true # -- Whether to enable CNP status updates. cilium_disable_cnp_status_updates: true + +# Configure how long to wait for the Cilium DaemonSet to be ready again +cilium_rolling_restart_wait_retries_count: 30 +cilium_rolling_restart_wait_retries_delay_seconds: 10 diff --git a/roles/network_plugin/cilium/tasks/apply.yml b/roles/network_plugin/cilium/tasks/apply.yml index ac323a4aa..b977c2177 100644 --- a/roles/network_plugin/cilium/tasks/apply.yml +++ b/roles/network_plugin/cilium/tasks/apply.yml @@ -14,8 +14,8 @@ command: "{{ kubectl }} -n kube-system get pods -l k8s-app=cilium -o jsonpath='{.items[?(@.status.containerStatuses[0].ready==false)].metadata.name}'" # noqa 601 register: pods_not_ready until: pods_not_ready.stdout.find("cilium")==-1 - retries: 30 - delay: 10 + retries: "{{ cilium_rolling_restart_wait_retries_count | int }}" + delay: "{{ cilium_rolling_restart_wait_retries_delay_seconds | int }}" failed_when: false when: inventory_hostname == groups['kube_control_plane'][0]