Remove ignore_errors from drain tasks and enable retires (#7151)
* Remove ignore_errors from drain tasks and enable retires
* Fix lint error by checking if stdout length is not 0, ie string is not empty.
(cherry picked from commit ccd3aeebbc
)
This commit is contained in:
parent
0f7b9363f9
commit
26183c2523
2 changed files with 26 additions and 11 deletions
|
@ -2,3 +2,5 @@
|
||||||
allow_ungraceful_removal: false
|
allow_ungraceful_removal: false
|
||||||
drain_grace_period: 300
|
drain_grace_period: 300
|
||||||
drain_timeout: 360s
|
drain_timeout: 360s
|
||||||
|
drain_retries: 3
|
||||||
|
drain_retry_delay_seconds: 10
|
||||||
|
|
|
@ -1,14 +1,26 @@
|
||||||
---
|
---
|
||||||
- name: cordon-node | Mark all nodes as unschedulable before drain # noqa 301
|
- name: remove-node | Set `nodes_to_drain` as empty list
|
||||||
command: >-
|
set_fact:
|
||||||
{{ bin_dir }}/kubectl cordon {{ hostvars[item]['kube_override_hostname']|default(item) }}
|
nodes_to_drain: []
|
||||||
with_items:
|
|
||||||
- "{{ node.split(',') | default(groups['kube-node']) }}"
|
- name: remove-node | Identify nodes to drain, ignore non-cluster nodes
|
||||||
register: result
|
shell: |
|
||||||
failed_when: result.rc != 0 and not allow_ungraceful_removal
|
set -o pipefail
|
||||||
|
{{ bin_dir }}/kubectl get nodes -o json \
|
||||||
|
| jq .items[].metadata.name \
|
||||||
|
| jq "select(. | test(\"^{{ hostvars[item]['kube_override_hostname']|default(item) }}$\"))"
|
||||||
|
loop: "{{ node.split(',') | default(groups['kube-node']) }}"
|
||||||
|
register: nodes
|
||||||
delegate_to: "{{ groups['kube-master']|first }}"
|
delegate_to: "{{ groups['kube-master']|first }}"
|
||||||
|
changed_when: false
|
||||||
|
run_once: true
|
||||||
|
|
||||||
|
- name: remove-node | Generate list of nodes to drain
|
||||||
|
set_fact:
|
||||||
|
nodes_to_drain: "{{ nodes_to_drain }} + [ '{{ item.stdout | regex_replace('\"', '') }}' ]"
|
||||||
|
loop: "{{ nodes.results }}"
|
||||||
|
when: item.stdout | length != 0
|
||||||
run_once: true
|
run_once: true
|
||||||
ignore_errors: yes
|
|
||||||
|
|
||||||
- name: remove-node | Drain node except daemonsets resource # noqa 301
|
- name: remove-node | Drain node except daemonsets resource # noqa 301
|
||||||
command: >-
|
command: >-
|
||||||
|
@ -18,10 +30,11 @@
|
||||||
--grace-period {{ drain_grace_period }}
|
--grace-period {{ drain_grace_period }}
|
||||||
--timeout {{ drain_timeout }}
|
--timeout {{ drain_timeout }}
|
||||||
--delete-local-data {{ hostvars[item]['kube_override_hostname']|default(item) }}
|
--delete-local-data {{ hostvars[item]['kube_override_hostname']|default(item) }}
|
||||||
with_items:
|
loop: "{{ nodes_to_drain }}"
|
||||||
- "{{ node.split(',') | default(groups['kube-node']) }}"
|
|
||||||
register: result
|
register: result
|
||||||
failed_when: result.rc != 0 and not allow_ungraceful_removal
|
failed_when: result.rc != 0 and not allow_ungraceful_removal
|
||||||
delegate_to: "{{ groups['kube-master']|first }}"
|
delegate_to: "{{ groups['kube-master']|first }}"
|
||||||
run_once: true
|
run_once: true
|
||||||
ignore_errors: yes
|
until: result.rc == 0 or allow_ungraceful_removal
|
||||||
|
retries: "{{ drain_retries }}"
|
||||||
|
delay: "{{ drain_retry_delay_seconds }}"
|
||||||
|
|
Loading…
Reference in a new issue