Etcd cluster setup makeover

The current way to setup the etc cluster is messy and buggy.

- It checks for cluster is healthy before the cluster is even created.
- The unit files are started on handlers, not in the task, so you mess with "flush handlers".
- The join_member.yml is not used.
- etcd events cluster is not configured for kubeadm
- remove duplicate runs between running the role on etcd nodes and k8s nodes
This commit is contained in:
woopstar 2018-04-01 18:58:08 +02:00 committed by Andreas Kruger
parent 92fc2df214
commit 86e3506ae6
15 changed files with 135 additions and 172 deletions

View file

@ -51,13 +51,13 @@
any_errors_fatal: "{{ any_errors_fatal | default(true) }}" any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
roles: roles:
- { role: kubespray-defaults} - { role: kubespray-defaults}
- { role: etcd, tags: etcd, etcd_cluster_setup: true } - { role: etcd, tags: etcd }
- hosts: k8s-cluster:calico-rr - hosts: k8s-cluster:calico-rr
any_errors_fatal: "{{ any_errors_fatal | default(true) }}" any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
roles: roles:
- { role: kubespray-defaults} - { role: kubespray-defaults}
- { role: etcd, tags: etcd, etcd_cluster_setup: false } - { role: etcd, tags: etcd }
- hosts: etcd:k8s-cluster:vault:calico-rr - hosts: etcd:k8s-cluster:vault:calico-rr
any_errors_fatal: "{{ any_errors_fatal | default(true) }}" any_errors_fatal: "{{ any_errors_fatal | default(true) }}"

View file

@ -1,6 +1,7 @@
--- ---
# Set to false to only do certificate management # Set to false to only do certificate management
etcd_cluster_setup: true etcd_cluster_setup: true
etcd_events_cluster_setup: false
etcd_backup_prefix: "/var/backups" etcd_backup_prefix: "/var/backups"
etcd_data_dir: "/var/lib/etcd" etcd_data_dir: "/var/lib/etcd"

View file

@ -10,7 +10,7 @@
- name: restart etcd-events - name: restart etcd-events
command: /bin/true command: /bin/true
notify: notify:
- etcd-events | reload systemd - etcd | reload systemd
- reload etcd-events - reload etcd-events
- wait for etcd-events up - wait for etcd-events up
@ -19,9 +19,6 @@
- name: etcd | reload systemd - name: etcd | reload systemd
command: systemctl daemon-reload command: systemctl daemon-reload
- name: etcd-events | reload systemd
command: systemctl daemon-reload
- name: reload etcd - name: reload etcd
service: service:
name: etcd name: etcd

View file

@ -1,11 +1,104 @@
--- ---
- name: Configure | Check if etcd cluster is healthy
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} cluster-health | grep -q 'cluster is healthy'"
register: etcd_cluster_is_healthy
ignore_errors: true
changed_when: false
check_mode: no
when: is_etcd_master and etcd_cluster_setup
tags:
- facts
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
- name: Configure | Check if etcd-events cluster is healthy
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_events_access_addresses }} cluster-health | grep -q 'cluster is healthy'"
register: etcd_events_cluster_is_healthy
ignore_errors: true
changed_when: false
check_mode: no
when: is_etcd_master and etcd_events_cluster_setup
tags:
- facts
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
- include_tasks: refresh_config.yml
when: is_etcd_master
- name: Configure | Copy etcd.service systemd file
template:
src: "etcd-{{ etcd_deployment_type }}.service.j2"
dest: /etc/systemd/system/etcd.service
backup: yes
when: is_etcd_master and etcd_cluster_setup
- name: Configure | Copy etcd-events.service systemd file
template:
src: "etcd-events-{{ etcd_deployment_type }}.service.j2"
dest: /etc/systemd/system/etcd-events.service
backup: yes
when: is_etcd_master and etcd_events_cluster_setup
- name: Configure | reload systemd
command: systemctl daemon-reload
when: is_etcd_master
- name: Configure | Ensure etcd is running
service:
name: etcd
state: started
enabled: yes
when: is_etcd_master and etcd_cluster_setup
- name: Configure | Ensure etcd-events is running
service:
name: etcd-events
state: started
enabled: yes
when: is_etcd_master and etcd_events_cluster_setup
- name: Configure | Check if etcd cluster is healthy
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} cluster-health | grep -q 'cluster is healthy'"
register: etcd_cluster_is_healthy
until: etcd_cluster_is_healthy.rc == 0
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
ignore_errors: false
changed_when: false
check_mode: no
when: is_etcd_master and etcd_cluster_setup
tags:
- facts
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
- name: Configure | Check if etcd-events cluster is healthy
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_events_access_addresses }} cluster-health | grep -q 'cluster is healthy'"
register: etcd_events_cluster_is_healthy
until: etcd_events_cluster_is_healthy.rc == 0
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
ignore_errors: false
changed_when: false
check_mode: no
when: is_etcd_master and etcd_events_cluster_setup
tags:
- facts
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
- name: Configure | Check if member is in etcd cluster - name: Configure | Check if member is in etcd cluster
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}" shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}"
register: etcd_member_in_cluster register: etcd_member_in_cluster
ignore_errors: true ignore_errors: true
changed_when: false changed_when: false
check_mode: no check_mode: no
when: is_etcd_master when: is_etcd_master and etcd_cluster_setup
tags: tags:
- facts - facts
environment: environment:
@ -25,44 +118,16 @@
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem" ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem" ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
- name: Configure | Copy etcd.service systemd file
template:
src: "etcd-{{ etcd_deployment_type }}.service.j2"
dest: /etc/systemd/system/etcd.service
backup: yes
when: is_etcd_master
notify: restart etcd
- name: Configure | Copy etcd-events.service systemd file
template:
src: "etcd-events-host.service.j2"
dest: /etc/systemd/system/etcd-events.service
backup: yes
when: is_etcd_master and etcd_deployment_type == "host" and etcd_events_cluster_setup
notify: restart etcd-events
- name: Configure | Copy etcd-events.service systemd file
template:
src: "etcd-events-docker.service.j2"
dest: /etc/systemd/system/etcd-events.service
backup: yes
when: is_etcd_master and etcd_deployment_type == "docker" and etcd_events_cluster_setup
notify: restart etcd-events
- name: Configure | Join member(s) to etcd cluster one at a time - name: Configure | Join member(s) to etcd cluster one at a time
include_tasks: join_etcd_member.yml include_tasks: join_etcd_member.yml
vars: vars:
target_node: "{{ item }}" target_node: "{{ item }}"
loop_control:
pause: 10
with_items: "{{ groups['etcd'] }}" with_items: "{{ groups['etcd'] }}"
when: inventory_hostname == item and etcd_member_in_cluster.rc != 0 and etcd_cluster_is_healthy.rc == 0 when: inventory_hostname == item and etcd_cluster_setup and etcd_member_in_cluster.rc != 0 and etcd_cluster_is_healthy.rc == 0
- name: Configure | Join member(s) to etcd-events cluster one at a time - name: Configure | Join member(s) to etcd-events cluster one at a time
include_tasks: join_etcd-evetns_member.yml include_tasks: join_etcd-events_member.yml
vars: vars:
target_node: "{{ item }}" target_node: "{{ item }}"
loop_control:
pause: 10
with_items: "{{ groups['etcd'] }}" with_items: "{{ groups['etcd'] }}"
when: inventory_hostname == item and etcd_events_cluster_setup and etcd_events_member_in_cluster.rc != 0 and etcd_events_cluster_is_healthy.rc == 0 when: inventory_hostname == item and etcd_events_cluster_setup and etcd_events_member_in_cluster.rc != 0 and etcd_events_cluster_is_healthy.rc == 0

View file

@ -15,6 +15,7 @@
owner: root owner: root
mode: 0700 mode: 0700
run_once: yes run_once: yes
when: inventory_hostname == groups['etcd'][0]
delegate_to: "{{groups['etcd'][0]}}" delegate_to: "{{groups['etcd'][0]}}"
- name: "Gen_certs | create etcd cert dir (on {{groups['etcd'][0]}})" - name: "Gen_certs | create etcd cert dir (on {{groups['etcd'][0]}})"
@ -26,6 +27,7 @@
recurse: yes recurse: yes
mode: 0700 mode: 0700
run_once: yes run_once: yes
when: inventory_hostname == groups['etcd'][0]
delegate_to: "{{groups['etcd'][0]}}" delegate_to: "{{groups['etcd'][0]}}"
- name: Gen_certs | write openssl config - name: Gen_certs | write openssl config
@ -34,7 +36,9 @@
dest: "{{ etcd_config_dir }}/openssl.conf" dest: "{{ etcd_config_dir }}/openssl.conf"
run_once: yes run_once: yes
delegate_to: "{{groups['etcd'][0]}}" delegate_to: "{{groups['etcd'][0]}}"
when: gen_certs|default(false) when:
- gen_certs|default(false)
- inventory_hostname == groups['etcd'][0]
- name: Gen_certs | copy certs generation script - name: Gen_certs | copy certs generation script
copy: copy:
@ -43,8 +47,9 @@
mode: 0700 mode: 0700
run_once: yes run_once: yes
delegate_to: "{{groups['etcd'][0]}}" delegate_to: "{{groups['etcd'][0]}}"
when: gen_certs|default(false) when:
- gen_certs|default(false)
- inventory_hostname == groups['etcd'][0]
- name: Gen_certs | run cert generation script - name: Gen_certs | run cert generation script
command: "bash -x {{ etcd_script_dir }}/make-ssl-etcd.sh -f {{ etcd_config_dir }}/openssl.conf -d {{ etcd_cert_dir }}" command: "bash -x {{ etcd_script_dir }}/make-ssl-etcd.sh -f {{ etcd_config_dir }}/openssl.conf -d {{ etcd_cert_dir }}"
@ -61,7 +66,9 @@
{% endfor %}" {% endfor %}"
run_once: yes run_once: yes
delegate_to: "{{groups['etcd'][0]}}" delegate_to: "{{groups['etcd'][0]}}"
when: gen_certs|default(false) when:
- gen_certs|default(false)
- inventory_hostname == groups['etcd'][0]
notify: set etcd_secret_changed notify: set etcd_secret_changed
- set_fact: - set_fact:
@ -160,5 +167,5 @@
group: "{{ etcd_cert_group }}" group: "{{ etcd_cert_group }}"
state: directory state: directory
owner: kube owner: kube
mode: "u=rwX,g-rwx,o-rwx" mode: "640"
recurse: yes recurse: yes

View file

@ -9,22 +9,22 @@
retries: 4 retries: 4
delay: "{{ retry_stagger | random + 3 }}" delay: "{{ retry_stagger | random + 3 }}"
changed_when: false changed_when: false
when: etcd_cluster_setup
- name: Install etcd launch script - name: Install etcd launch script
template: template:
src: etcd.j2 src: etcd.j2
dest: "{{ bin_dir }}/etcd" dest: "{{ bin_dir }}/etcd"
owner: 'root' owner: 'root'
mode: 0755 mode: 0750
backup: yes backup: yes
notify: restart etcd when: etcd_cluster_setup
- name: Install etcd-events launch script - name: Install etcd-events launch script
template: template:
src: etcd-events.j2 src: etcd-events.j2
dest: "{{ bin_dir }}/etcd-events" dest: "{{ bin_dir }}/etcd-events"
owner: 'root' owner: 'root'
mode: 0755 mode: 0750
backup: yes backup: yes
when: etcd_events_cluster_setup when: etcd_events_cluster_setup
notify: restart etcd-events

View file

@ -10,3 +10,4 @@
retries: 4 retries: 4
delay: "{{ retry_stagger | random + 3 }}" delay: "{{ retry_stagger | random + 3 }}"
changed_when: false changed_when: false
when: etcd_cluster_setup

View file

@ -11,6 +11,7 @@
delay: "{{ retry_stagger | random + 3 }}" delay: "{{ retry_stagger | random + 3 }}"
changed_when: false changed_when: false
environment: "{{proxy_env}}" environment: "{{proxy_env}}"
when: etcd_cluster_setup
- name: Install | Copy etcdctl binary from rkt container - name: Install | Copy etcdctl binary from rkt container
command: >- command: >-
@ -26,3 +27,4 @@
delay: "{{ retry_stagger | random + 3 }}" delay: "{{ retry_stagger | random + 3 }}"
changed_when: false changed_when: false
environment: "{{proxy_env}}" environment: "{{proxy_env}}"
when: etcd_cluster_setup

View file

@ -1,5 +1,5 @@
--- ---
- name: Join Member | Add member to cluster - name: Join Member | Add member to etcd-events cluster
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_events_access_addresses }} member add {{ etcd_member_name }} {{ etcd_events_peer_url }}" shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_events_access_addresses }} member add {{ etcd_member_name }} {{ etcd_events_peer_url }}"
register: member_add_result register: member_add_result
until: member_add_result.rc == 0 until: member_add_result.rc == 0
@ -23,17 +23,6 @@
{%- endfor -%} {%- endfor -%}
when: target_node == inventory_hostname when: target_node == inventory_hostname
- name: Join Member | reload systemd
command: systemctl daemon-reload
when: target_node == inventory_hostname
- name: Join Member | Ensure etcd-events is running
service:
name: etcd-events
state: started
enabled: yes
when: target_node == inventory_hostname
- name: Join Member | Ensure member is in etcd-events cluster - name: Join Member | Ensure member is in etcd-events cluster
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_events_access_addresses }} member list | grep -q {{ etcd_events_access_address }}" shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_events_access_addresses }} member list | grep -q {{ etcd_events_access_address }}"
register: etcd_events_member_in_cluster register: etcd_events_member_in_cluster

View file

@ -1,5 +1,5 @@
--- ---
- name: Join Member | Add member to cluster - name: Join Member | Add member to etcd cluster
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} member add {{ etcd_member_name }} {{ etcd_peer_url }}" shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} member add {{ etcd_member_name }} {{ etcd_peer_url }}"
register: member_add_result register: member_add_result
until: member_add_result.rc == 0 until: member_add_result.rc == 0
@ -23,18 +23,7 @@
{%- endfor -%} {%- endfor -%}
when: target_node == inventory_hostname when: target_node == inventory_hostname
- name: Join Member | reload systemd - name: Join Member | Ensure member is in etcd cluster
command: systemctl daemon-reload
when: target_node == inventory_hostname
- name: Join Member | Ensure etcd is running
service:
name: etcd
state: started
enabled: yes
when: target_node == inventory_hostname
- name: Join Member | Ensure member is in cluster
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}" shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}"
register: etcd_member_in_cluster register: etcd_member_in_cluster
changed_when: false changed_when: false

View file

@ -1,47 +0,0 @@
---
- name: Join Member | Add member to cluster
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} member add {{ etcd_member_name }} {{ etcd_peer_url }}"
register: member_add_result
until: member_add_result.rc == 0
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
when: target_node == inventory_hostname
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
- include_tasks: refresh_config.yml
vars:
etcd_peer_addresses: >-
{% for host in groups['etcd'] -%}
{%- if hostvars[host]['etcd_member_in_cluster'].rc == 0 -%}
{{ "etcd"+loop.index|string }}=https://{{ hostvars[host].access_ip | default(hostvars[host].ip | default(hostvars[host].ansible_default_ipv4['address'])) }}:2380,
{%- endif -%}
{%- if loop.last -%}
{{ etcd_member_name }}={{ etcd_peer_url }}
{%- endif -%}
{%- endfor -%}
when: target_node == inventory_hostname
- name: Join Member | reload systemd
command: systemctl daemon-reload
when: target_node == inventory_hostname
- name: Join Member | Ensure etcd is running
service:
name: etcd
state: started
enabled: yes
when: target_node == inventory_hostname
- name: Join Member | Ensure member is in cluster
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}"
register: etcd_member_in_cluster
changed_when: false
check_mode: no
tags:
- facts
when: target_node == inventory_hostname
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"

View file

@ -6,6 +6,7 @@
- facts - facts
- include_tasks: "gen_certs_{{ cert_management }}.yml" - include_tasks: "gen_certs_{{ cert_management }}.yml"
when:
tags: tags:
- etcd-secrets - etcd-secrets
@ -29,47 +30,28 @@
tags: tags:
- upgrade - upgrade
- include_tasks: set_cluster_health.yml
when: is_etcd_master and etcd_cluster_setup
- include_tasks: configure.yml - include_tasks: configure.yml
when: is_etcd_master and etcd_cluster_setup when: is_etcd_master
- include_tasks: refresh_config.yml - include_tasks: refresh_config.yml
when: is_etcd_master and etcd_cluster_setup when: is_etcd_master
- name: Restart etcd if certs changed - name: Restart etcd if certs changed
command: /bin/true
notify: restart etcd
when: is_etcd_master and etcd_secret_changed|default(false)
- name: Restart etcd-events if certs changed
command: /bin/true
notify: restart etcd
when: is_etcd_master and etcd_events_cluster_setup and etcd_secret_changed|default(false)
# reload-systemd
- meta: flush_handlers
- name: Ensure etcd is running
service: service:
name: etcd name: etcd
state: started state: restarted
enabled: yes enabled: yes
when: is_etcd_master and etcd_cluster_setup when: is_etcd_master and etcd_cluster_setup and etcd_secret_changed|default(false)
- name: Ensure etcd-events is running - name: Restart etcd-events if certs changed
service: service:
name: etcd-events name: etcd-events
state: started state: restarted
enabled: yes enabled: yes
when: is_etcd_master and etcd_events_cluster_setup when: is_etcd_master and etcd_events_cluster_setup and etcd_secret_changed|default(false)
# After etcd cluster is assembled, make sure that # After etcd cluster is assembled, make sure that
# initial state of the cluster is in `existing` # initial state of the cluster is in `existing`
# state insted of `new`. # state insted of `new`.
- include_tasks: set_cluster_health.yml
when: is_etcd_master and etcd_cluster_setup
- include_tasks: refresh_config.yml - include_tasks: refresh_config.yml
when: is_etcd_master and etcd_cluster_setup when: is_etcd_master

View file

@ -4,7 +4,7 @@
src: etcd.env.j2 src: etcd.env.j2
dest: /etc/etcd.env dest: /etc/etcd.env
notify: restart etcd notify: restart etcd
when: is_etcd_master when: is_etcd_master and etcd_cluster_setup
- name: Refresh config | Create etcd-events config file - name: Refresh config | Create etcd-events config file
template: template:

View file

@ -1,26 +0,0 @@
---
- name: Configure | Check if etcd cluster is healthy
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} cluster-health | grep -q 'cluster is healthy'"
register: etcd_cluster_is_healthy
ignore_errors: true
changed_when: false
check_mode: no
when: is_etcd_master
tags:
- facts
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
- name: Configure | Check if etcd-events cluster is healthy
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_events_access_addresses }} cluster-health | grep -q 'cluster is healthy'"
register: etcd_events_cluster_is_healthy
ignore_errors: true
changed_when: false
check_mode: no
when: is_etcd_master and etcd_events_cluster_setup
tags:
- facts
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"

View file

@ -38,6 +38,9 @@ apiServerExtraArgs:
apiserver-count: "{{ kube_apiserver_count }}" apiserver-count: "{{ kube_apiserver_count }}"
{% if kube_version | version_compare('v1.9', '>=') %} {% if kube_version | version_compare('v1.9', '>=') %}
endpoint-reconciler-type: lease endpoint-reconciler-type: lease
{% endif %}
{% if etcd_events_cluster_setup %}
etcd-servers-overrides: "/events#{{ etcd_events_access_addresses }}"
{% endif %} {% endif %}
service-node-port-range: {{ kube_apiserver_node_port_range }} service-node-port-range: {{ kube_apiserver_node_port_range }}
kubelet-preferred-address-types: "{{ kubelet_preferred_address_types }}" kubelet-preferred-address-types: "{{ kubelet_preferred_address_types }}"