From a583a2d9aa773554948cabddf2953e85998898c7 Mon Sep 17 00:00:00 2001 From: Utku Ozdemir Date: Wed, 20 Oct 2021 12:52:52 +0300 Subject: [PATCH] Implement drain fallback with --disable-eviction to ignore PDBs Signed-off-by: Utku Ozdemir --- roles/upgrade/pre-upgrade/defaults/main.yml | 6 ++++ roles/upgrade/pre-upgrade/tasks/main.yml | 37 +++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/roles/upgrade/pre-upgrade/defaults/main.yml b/roles/upgrade/pre-upgrade/defaults/main.yml index ddff1ea55..48ba51299 100644 --- a/roles/upgrade/pre-upgrade/defaults/main.yml +++ b/roles/upgrade/pre-upgrade/defaults/main.yml @@ -6,6 +6,12 @@ drain_nodes: true drain_retries: 3 drain_retry_delay_seconds: 10 +drain_fallback_enabled: false +drain_fallback_grace_period: 300 +drain_fallback_timeout: 360s +drain_fallback_retries: 0 +drain_fallback_retry_delay_seconds: 10 + upgrade_node_uncordon_after_drain_failure: true upgrade_node_fail_if_drain_fails: true diff --git a/roles/upgrade/pre-upgrade/tasks/main.yml b/roles/upgrade/pre-upgrade/tasks/main.yml index d969175e3..dd6d6aadc 100644 --- a/roles/upgrade/pre-upgrade/tasks/main.yml +++ b/roles/upgrade/pre-upgrade/tasks/main.yml @@ -73,15 +73,48 @@ {{ bin_dir }}/kubectl drain --force --ignore-daemonsets - --grace-period {{ drain_grace_period }} - --timeout {{ drain_timeout }} + --grace-period {{ hostvars['localhost']['drain_grace_period_after_failure'] | default(drain_grace_period) }} + --timeout {{ hostvars['localhost']['drain_timeout_after_failure'] | default(drain_timeout) }} --delete-local-data {{ kube_override_hostname|default(inventory_hostname) }} {% if drain_pod_selector %}--pod-selector '{{ drain_pod_selector }}'{% endif %} when: drain_nodes register: result + failed_when: + - result.rc != 0 + - not drain_fallback_enabled until: result.rc == 0 retries: "{{ drain_retries }}" delay: "{{ drain_retry_delay_seconds }}" + + - name: Drain fallback + block: + - name: Set facts after regular drain has failed + set_fact: + drain_grace_period_after_failure: "{{ drain_fallback_grace_period }}" + drain_timeout_after_failure: "{{ drain_fallback_timeout }}" + delegate_to: localhost + delegate_facts: yes + run_once: yes + + - name: Drain node - fallback with disabled eviction + command: >- + {{ bin_dir }}/kubectl drain + --force + --ignore-daemonsets + --grace-period {{ drain_fallback_grace_period }} + --timeout {{ drain_fallback_timeout }} + --delete-local-data {{ kube_override_hostname|default(inventory_hostname) }} + {% if drain_pod_selector %}--pod-selector '{{ drain_pod_selector }}'{% endif %} + --disable-eviction + register: drain_fallback_result + until: drain_fallback_result.rc == 0 + retries: "{{ drain_fallback_retries }}" + delay: "{{ drain_fallback_retry_delay_seconds }}" + when: + - drain_nodes + - drain_fallback_enabled + - result.rc != 0 + rescue: - name: Set node back to schedulable command: "{{ bin_dir }}/kubectl --kubeconfig /etc/kubernetes/admin.conf uncordon {{ inventory_hostname }}"