Remove ignore_errors from drain tasks and enable retires (#7151)

* Remove ignore_errors from drain tasks and enable retires

* Fix lint error by checking if stdout length is not 0, ie string is not empty.

(cherry picked from commit ccd3aeebbc)
This commit is contained in:
David Louks 2021-01-15 15:17:43 -06:00 committed by Kubernetes Prow Robot
parent 0f7b9363f9
commit 26183c2523
2 changed files with 26 additions and 11 deletions

View file

@ -2,3 +2,5 @@
allow_ungraceful_removal: false
drain_grace_period: 300
drain_timeout: 360s
drain_retries: 3
drain_retry_delay_seconds: 10

View file

@ -1,14 +1,26 @@
---
- name: cordon-node | Mark all nodes as unschedulable before drain # noqa 301
command: >-
{{ bin_dir }}/kubectl cordon {{ hostvars[item]['kube_override_hostname']|default(item) }}
with_items:
- "{{ node.split(',') | default(groups['kube-node']) }}"
register: result
failed_when: result.rc != 0 and not allow_ungraceful_removal
- name: remove-node | Set `nodes_to_drain` as empty list
set_fact:
nodes_to_drain: []
- name: remove-node | Identify nodes to drain, ignore non-cluster nodes
shell: |
set -o pipefail
{{ bin_dir }}/kubectl get nodes -o json \
| jq .items[].metadata.name \
| jq "select(. | test(\"^{{ hostvars[item]['kube_override_hostname']|default(item) }}$\"))"
loop: "{{ node.split(',') | default(groups['kube-node']) }}"
register: nodes
delegate_to: "{{ groups['kube-master']|first }}"
changed_when: false
run_once: true
- name: remove-node | Generate list of nodes to drain
set_fact:
nodes_to_drain: "{{ nodes_to_drain }} + [ '{{ item.stdout | regex_replace('\"', '') }}' ]"
loop: "{{ nodes.results }}"
when: item.stdout | length != 0
run_once: true
ignore_errors: yes
- name: remove-node | Drain node except daemonsets resource # noqa 301
command: >-
@ -18,10 +30,11 @@
--grace-period {{ drain_grace_period }}
--timeout {{ drain_timeout }}
--delete-local-data {{ hostvars[item]['kube_override_hostname']|default(item) }}
with_items:
- "{{ node.split(',') | default(groups['kube-node']) }}"
loop: "{{ nodes_to_drain }}"
register: result
failed_when: result.rc != 0 and not allow_ungraceful_removal
delegate_to: "{{ groups['kube-master']|first }}"
run_once: true
ignore_errors: yes
until: result.rc == 0 or allow_ungraceful_removal
retries: "{{ drain_retries }}"
delay: "{{ drain_retry_delay_seconds }}"