Improve etcd scale up (#1846)

Now adding unjoined members to existing etcd cluster
occurs one at a time so that the cluster does not
lose quorum.
This commit is contained in:
Matthew Mosesohn 2017-10-20 08:02:31 +01:00 committed by GitHub
parent 55b9d02a99
commit 514359e556
3 changed files with 51 additions and 5 deletions

View file

@ -9,10 +9,6 @@
tags: tags:
- facts - facts
- name: Configure | Add member to the cluster if it is not there
when: is_etcd_master and etcd_member_in_cluster.rc != 0 and etcd_cluster_is_healthy.rc == 0
shell: "{{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} member add {{ etcd_member_name }} {{ etcd_peer_url }}"
- name: Install etcd launch script - name: Install etcd launch script
template: template:
src: etcd.j2 src: etcd.j2
@ -29,3 +25,12 @@
backup: yes backup: yes
when: is_etcd_master when: is_etcd_master
notify: restart etcd notify: restart etcd
- name: Configure | Join member(s) to cluster one at a time
include: join_member.yml
vars:
target_node: "{{ item }}"
loop_control:
pause: 10
with_items: "{{ groups['etcd'] }}"
when: inventory_hostname == item and etcd_member_in_cluster.rc != 0 and etcd_cluster_is_healthy.rc == 0

View file

@ -0,0 +1,41 @@
---
- name: Join Member | Add member to cluster
shell: "{{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} member add {{ etcd_member_name }} {{ etcd_peer_url }}"
register: member_add_result
until: member_add_result.rc == 0
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
when: target_node == inventory_hostname
- include: refresh_config.yml
vars:
etcd_peer_addresses: >-
{% for host in groups['etcd'] -%}
{%- if hostvars[host]['etcd_member_in_cluster'].rc == 0 -%}
{{ "etcd"+loop.index|string }}=https://{{ hostvars[host].access_ip | default(hostvars[host].ip | default(hostvars[host].ansible_default_ipv4['address'])) }}:2380,
{%- endif -%}
{%- if loop.last -%}
{{ etcd_member_name }}={{ etcd_peer_url }}
{%- endif -%}
{%- endfor -%}
when: target_node == inventory_hostname
- name: Join Member | reload systemd
command: systemctl daemon-reload
when: target_node == inventory_hostname
- name: Join Member | Ensure etcd is running
service:
name: etcd
state: started
enabled: yes
when: target_node == inventory_hostname
- name: Join Member | Ensure member is in cluster
shell: "{{ bin_dir }}/etcdctl --no-sync --peers={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}"
register: etcd_member_in_cluster
changed_when: false
check_mode: no
tags:
- facts
when: target_node == inventory_hostname

View file

@ -6,7 +6,7 @@ After=docker.service
[Service] [Service]
User=root User=root
PermissionsStartOnly=true PermissionsStartOnly=true
EnvironmentFile=/etc/etcd.env EnvironmentFile=-/etc/etcd.env
ExecStart={{ bin_dir }}/etcd ExecStart={{ bin_dir }}/etcd
ExecStartPre=-{{ docker_bin_dir }}/docker rm -f {{ etcd_member_name | default("etcd") }} ExecStartPre=-{{ docker_bin_dir }}/docker rm -f {{ etcd_member_name | default("etcd") }}
ExecStop={{ docker_bin_dir }}/docker stop {{ etcd_member_name | default("etcd") }} ExecStop={{ docker_bin_dir }}/docker stop {{ etcd_member_name | default("etcd") }}