Refactor remove node to allow removing dead nodes and etcd members (#5009)

Change-Id: I1c59249f08f16d0f6fd60df6ab61f17a0a7df189
This commit is contained in:
Matthew Mosesohn 2019-08-07 14:46:50 +03:00 committed by Kubernetes Prow Robot
parent 7abf6a6958
commit a44235d11b
3 changed files with 70 additions and 17 deletions

View file

@ -51,20 +51,27 @@ You may want to add worker, master or etcd nodes to your existing cluster. This
Remove nodes
------------
You may want to remove **worker** nodes to your existing cluster. This can be done by re-running the `remove-node.yml` playbook. First, all nodes will be drained, then stop some kubernetes services and delete some certificates, and finally execute the kubectl command to delete these nodes. This can be combined with the add node function, This is generally helpful when doing something like autoscaling your clusters. Of course if a node is not working, you can remove the node and install it again.
You may want to remove **master**, **worker**, or **etcd** nodes from your
existing cluster. This can be done by re-running the `remove-node.yml`
playbook. First, all specified nodes will be drained, then stop some
kubernetes services and delete some certificates,
and finally execute the kubectl command to delete these nodes.
This can be combined with the add node function. This is generally helpful
when doing something like autoscaling your clusters. Of course, if a node
is not working, you can remove the node and install it again.
Add worker nodes to the list under kube-node if you want to delete them (or utilize a [dynamic inventory](https://docs.ansible.com/ansible/intro_dynamic_inventory.html)).
ansible-playbook -i inventory/mycluster/hosts.yml remove-node.yml -b -v \
--private-key=~/.ssh/private_key
Use `--extra-vars "node=<nodename>,<nodename2>"` to select the node you want to delete.
Use `--extra-vars "node=<nodename>,<nodename2>"` to select the node(s) you want to delete.
```
ansible-playbook -i inventory/mycluster/hosts.yml remove-node.yml -b -v \
--private-key=~/.ssh/private_key \
--extra-vars "node=nodename,nodename2"
```
If a node is completely unreachable by ssh, add `--extra-vars reset_nodes=no`
to skip the node reset step. If one node is unavailable, but others you wish
to remove are able to connect via SSH, you could set reset_nodes=no as a host
var in inventory.
Connecting to Kubernetes
------------------------

View file

@ -1,6 +1,7 @@
---
- hosts: localhost
become: no
gather_facts: no
tasks:
- name: "Check ansible version >=2.7.8"
assert:
@ -12,12 +13,8 @@
vars:
ansible_connection: local
- hosts: all
vars:
ansible_ssh_pipelining: true
gather_facts: true
- hosts: "{{ node | default('etcd:k8s-cluster:calico-rr') }}"
gather_facts: no
vars_prompt:
name: "delete_nodes_confirmation"
prompt: "Are you sure you want to delete nodes state? Type 'yes' to delete nodes."
@ -31,16 +28,20 @@
when: delete_nodes_confirmation != "yes"
- hosts: kube-master
gather_facts: no
roles:
- { role: kubespray-defaults }
- { role: remove-node/pre-remove, tags: pre-remove }
- hosts: "{{ node | default('kube-node') }}"
gather_facts: no
roles:
- { role: kubespray-defaults }
- { role: reset, tags: reset }
- { role: reset, tags: reset, when: reset_nodes|default(True) }
- hosts: kube-master
# Currently cannot remove first master or etcd
- hosts: "{{ node | default('kube-master[1:]:etcd[:1]') }}"
gather_facts: no
roles:
- { role: kubespray-defaults }
- { role: remove-node/post-remove, tags: post-remove }

View file

@ -1,9 +1,54 @@
---
- name: Lookup node IP in kubernetes
shell: >-
{{ bin_dir }}/kubectl get nodes {{ node }}
-o jsonpath='{range.status.addresses[?(@.type=="InternalIP")]}{.address}{"\n"}{end}'
register: remove_node_ip
when:
- inventory_hostname in groups['etcd']
- ip is not defined
- access_ip is not defined
delegate_to: "{{ groups['etcd']|first }}"
failed_when: false
- name: Set node IP
set_fact:
node_ip: "{{ ip | default(access_ip | default(remove_node_ip.stdout)) | trim }}"
- name: Delete node
command: "{{ bin_dir }}/kubectl delete node {{ item }}"
with_items:
- "{{ node.split(',') | default(groups['kube-node']) }}"
command: "{{ bin_dir }}/kubectl delete node {{ inventory_hostname }}"
delegate_to: "{{ groups['kube-master']|first }}"
run_once: true
ignore_errors: yes
- name: Lookup etcd member id
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep {{ node_ip }} | cut -d: -f1"
register: etcd_member_id
ignore_errors: true
changed_when: false
check_mode: no
tags:
- facts
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}-key.pem"
ETCDCTL_CA_FILE: "{{ etcd_cert_dir }}/ca.pem"
delegate_to: "{{ groups['etcd']|first }}"
when: inventory_hostname in groups['etcd']
- name: Remove etcd member from cluster
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member remove {{ etcd_member_id.stdout }}"
register: etcd_member_in_cluster
ignore_errors: true
changed_when: false
check_mode: no
tags:
- facts
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}-key.pem"
ETCDCTL_CA_FILE: "{{ etcd_cert_dir }}/ca.pem"
delegate_to: "{{ groups['etcd']|first }}"
when:
- inventory_hostname in groups['etcd']
- not etcd_member_id.stdout