Add retries to drain during upgrade. Allow leaving nodes cordoned after drain failure. Allow continuing upgrade if drain fails. (#7206)
This commit is contained in:
parent
9007d6621a
commit
d378d789cf
2 changed files with 12 additions and 2 deletions
|
@ -3,6 +3,11 @@ drain_grace_period: 300
|
|||
drain_timeout: 360s
|
||||
drain_pod_selector: ""
|
||||
drain_nodes: true
|
||||
drain_retries: 3
|
||||
drain_retry_delay_seconds: 10
|
||||
|
||||
upgrade_node_uncordon_after_drain_failure: true
|
||||
upgrade_node_fail_if_drain_fails: true
|
||||
|
||||
upgrade_node_confirm: false
|
||||
upgrade_node_pause_seconds: 0
|
||||
|
|
|
@ -77,14 +77,19 @@
|
|||
--timeout {{ drain_timeout }}
|
||||
--delete-local-data {{ kube_override_hostname|default(inventory_hostname) }}
|
||||
{% if drain_pod_selector %}--pod-selector '{{ drain_pod_selector }}'{% endif %}
|
||||
when:
|
||||
- drain_nodes
|
||||
when: drain_nodes
|
||||
register: result
|
||||
until: result.rc == 0
|
||||
retries: "{{ drain_retries }}"
|
||||
delay: "{{ drain_retry_delay_seconds }}"
|
||||
rescue:
|
||||
- name: Set node back to schedulable
|
||||
command: "{{ bin_dir }}/kubectl --kubeconfig /etc/kubernetes/admin.conf uncordon {{ inventory_hostname }}"
|
||||
when: upgrade_node_uncordon_after_drain_failure
|
||||
- name: Fail after rescue
|
||||
fail:
|
||||
msg: "Failed to drain node {{ inventory_hostname }}"
|
||||
when: upgrade_node_fail_if_drain_fails
|
||||
delegate_to: "{{ groups['kube-master'][0] }}"
|
||||
when:
|
||||
- needs_cordoning
|
||||
|
|
Loading…
Reference in a new issue