diff --git a/roles/remove-node/pre-remove/defaults/main.yml b/roles/remove-node/pre-remove/defaults/main.yml index 1e5b849cc..deaa8afb7 100644 --- a/roles/remove-node/pre-remove/defaults/main.yml +++ b/roles/remove-node/pre-remove/defaults/main.yml @@ -2,3 +2,5 @@ allow_ungraceful_removal: false drain_grace_period: 300 drain_timeout: 360s +drain_retries: 3 +drain_retry_delay_seconds: 10 diff --git a/roles/remove-node/pre-remove/tasks/main.yml b/roles/remove-node/pre-remove/tasks/main.yml index 563fa036e..42316e209 100644 --- a/roles/remove-node/pre-remove/tasks/main.yml +++ b/roles/remove-node/pre-remove/tasks/main.yml @@ -1,14 +1,26 @@ --- -- name: cordon-node | Mark all nodes as unschedulable before drain # noqa 301 - command: >- - {{ bin_dir }}/kubectl cordon {{ hostvars[item]['kube_override_hostname']|default(item) }} - with_items: - - "{{ node.split(',') | default(groups['kube-node']) }}" - register: result - failed_when: result.rc != 0 and not allow_ungraceful_removal +- name: remove-node | Set `nodes_to_drain` as empty list + set_fact: + nodes_to_drain: [] + +- name: remove-node | Identify nodes to drain, ignore non-cluster nodes + shell: | + set -o pipefail + {{ bin_dir }}/kubectl get nodes -o json \ + | jq .items[].metadata.name \ + | jq "select(. | test(\"^{{ hostvars[item]['kube_override_hostname']|default(item) }}$\"))" + loop: "{{ node.split(',') | default(groups['kube-node']) }}" + register: nodes delegate_to: "{{ groups['kube-master']|first }}" + changed_when: false + run_once: true + +- name: remove-node | Generate list of nodes to drain + set_fact: + nodes_to_drain: "{{ nodes_to_drain }} + [ '{{ item.stdout | regex_replace('\"', '') }}' ]" + loop: "{{ nodes.results }}" + when: item.stdout | length != 0 run_once: true - ignore_errors: yes - name: remove-node | Drain node except daemonsets resource # noqa 301 command: >- @@ -18,10 +30,11 @@ --grace-period {{ drain_grace_period }} --timeout {{ drain_timeout }} --delete-local-data {{ hostvars[item]['kube_override_hostname']|default(item) }} - with_items: - - "{{ node.split(',') | default(groups['kube-node']) }}" + loop: "{{ nodes_to_drain }}" register: result failed_when: result.rc != 0 and not allow_ungraceful_removal delegate_to: "{{ groups['kube-master']|first }}" run_once: true - ignore_errors: yes + until: result.rc == 0 or allow_ungraceful_removal + retries: "{{ drain_retries }}" + delay: "{{ drain_retry_delay_seconds }}"