diff --git a/roles/upgrade/pre-upgrade/defaults/main.yml b/roles/upgrade/pre-upgrade/defaults/main.yml index 0179ebbc6..ddff1ea55 100644 --- a/roles/upgrade/pre-upgrade/defaults/main.yml +++ b/roles/upgrade/pre-upgrade/defaults/main.yml @@ -3,6 +3,11 @@ drain_grace_period: 300 drain_timeout: 360s drain_pod_selector: "" drain_nodes: true +drain_retries: 3 +drain_retry_delay_seconds: 10 + +upgrade_node_uncordon_after_drain_failure: true +upgrade_node_fail_if_drain_fails: true upgrade_node_confirm: false upgrade_node_pause_seconds: 0 diff --git a/roles/upgrade/pre-upgrade/tasks/main.yml b/roles/upgrade/pre-upgrade/tasks/main.yml index 18f44470a..bf436d360 100644 --- a/roles/upgrade/pre-upgrade/tasks/main.yml +++ b/roles/upgrade/pre-upgrade/tasks/main.yml @@ -77,14 +77,19 @@ --timeout {{ drain_timeout }} --delete-local-data {{ kube_override_hostname|default(inventory_hostname) }} {% if drain_pod_selector %}--pod-selector '{{ drain_pod_selector }}'{% endif %} - when: - - drain_nodes + when: drain_nodes + register: result + until: result.rc == 0 + retries: "{{ drain_retries }}" + delay: "{{ drain_retry_delay_seconds }}" rescue: - name: Set node back to schedulable command: "{{ bin_dir }}/kubectl --kubeconfig /etc/kubernetes/admin.conf uncordon {{ inventory_hostname }}" + when: upgrade_node_uncordon_after_drain_failure - name: Fail after rescue fail: msg: "Failed to drain node {{ inventory_hostname }}" + when: upgrade_node_fail_if_drain_fails delegate_to: "{{ groups['kube-master'][0] }}" when: - needs_cordoning