From 514359e55674a662d2ff5b1beec247231a193abe Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Fri, 20 Oct 2017 08:02:31 +0100 Subject: [PATCH] Improve etcd scale up (#1846) Now adding unjoined members to existing etcd cluster occurs one at a time so that the cluster does not lose quorum. --- roles/etcd/tasks/configure.yml | 13 +++++-- roles/etcd/tasks/join_member.yml | 41 +++++++++++++++++++++ roles/etcd/templates/etcd-docker.service.j2 | 2 +- 3 files changed, 51 insertions(+), 5 deletions(-) create mode 100644 roles/etcd/tasks/join_member.yml diff --git a/roles/etcd/tasks/configure.yml b/roles/etcd/tasks/configure.yml index 5f8756e71..a0a4b5cf4 100644 --- a/roles/etcd/tasks/configure.yml +++ b/roles/etcd/tasks/configure.yml @@ -9,10 +9,6 @@ tags: - facts -- name: Configure | Add member to the cluster if it is not there - when: is_etcd_master and etcd_member_in_cluster.rc != 0 and etcd_cluster_is_healthy.rc == 0 - shell: "{{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} member add {{ etcd_member_name }} {{ etcd_peer_url }}" - - name: Install etcd launch script template: src: etcd.j2 @@ -29,3 +25,12 @@ backup: yes when: is_etcd_master notify: restart etcd + +- name: Configure | Join member(s) to cluster one at a time + include: join_member.yml + vars: + target_node: "{{ item }}" + loop_control: + pause: 10 + with_items: "{{ groups['etcd'] }}" + when: inventory_hostname == item and etcd_member_in_cluster.rc != 0 and etcd_cluster_is_healthy.rc == 0 diff --git a/roles/etcd/tasks/join_member.yml b/roles/etcd/tasks/join_member.yml new file mode 100644 index 000000000..47c2fbd6e --- /dev/null +++ b/roles/etcd/tasks/join_member.yml @@ -0,0 +1,41 @@ +--- +- name: Join Member | Add member to cluster + shell: "{{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} member add {{ etcd_member_name }} {{ etcd_peer_url }}" + register: member_add_result + until: member_add_result.rc == 0 + retries: 4 + delay: "{{ retry_stagger | random + 3 }}" + when: target_node == inventory_hostname + +- include: refresh_config.yml + vars: + etcd_peer_addresses: >- + {% for host in groups['etcd'] -%} + {%- if hostvars[host]['etcd_member_in_cluster'].rc == 0 -%} + {{ "etcd"+loop.index|string }}=https://{{ hostvars[host].access_ip | default(hostvars[host].ip | default(hostvars[host].ansible_default_ipv4['address'])) }}:2380, + {%- endif -%} + {%- if loop.last -%} + {{ etcd_member_name }}={{ etcd_peer_url }} + {%- endif -%} + {%- endfor -%} + when: target_node == inventory_hostname + +- name: Join Member | reload systemd + command: systemctl daemon-reload + when: target_node == inventory_hostname + +- name: Join Member | Ensure etcd is running + service: + name: etcd + state: started + enabled: yes + when: target_node == inventory_hostname + +- name: Join Member | Ensure member is in cluster + shell: "{{ bin_dir }}/etcdctl --no-sync --peers={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}" + register: etcd_member_in_cluster + changed_when: false + check_mode: no + tags: + - facts + when: target_node == inventory_hostname diff --git a/roles/etcd/templates/etcd-docker.service.j2 b/roles/etcd/templates/etcd-docker.service.j2 index 18deee1d9..4dfbd723d 100644 --- a/roles/etcd/templates/etcd-docker.service.j2 +++ b/roles/etcd/templates/etcd-docker.service.j2 @@ -6,7 +6,7 @@ After=docker.service [Service] User=root PermissionsStartOnly=true -EnvironmentFile=/etc/etcd.env +EnvironmentFile=-/etc/etcd.env ExecStart={{ bin_dir }}/etcd ExecStartPre=-{{ docker_bin_dir }}/docker rm -f {{ etcd_member_name | default("etcd") }} ExecStop={{ docker_bin_dir }}/docker stop {{ etcd_member_name | default("etcd") }}