Add etcd-events cluster for kube-apiserver (#2385)

Add etcd-events cluster for kube-apiserver
This commit is contained in:
RongZhang 2018-03-01 02:39:14 -06:00 committed by Matthew Mosesohn
parent af7edf4dff
commit 67ffd8e923
17 changed files with 309 additions and 4 deletions

View file

@ -4,6 +4,7 @@ etcd_cluster_setup: true
etcd_backup_prefix: "/var/backups" etcd_backup_prefix: "/var/backups"
etcd_data_dir: "/var/lib/etcd" etcd_data_dir: "/var/lib/etcd"
etcd_events_data_dir: "/var/lib/etcd-events"
etcd_config_dir: /etc/ssl/etcd etcd_config_dir: /etc/ssl/etcd
etcd_cert_dir: "{{ etcd_config_dir }}/ssl" etcd_cert_dir: "{{ etcd_config_dir }}/ssl"

View file

@ -7,17 +7,33 @@
- reload etcd - reload etcd
- wait for etcd up - wait for etcd up
- name: restart etcd-events
command: /bin/true
notify:
- etcd-events | reload systemd
- reload etcd-events
- wait for etcd-events up
- import_tasks: backup.yml - import_tasks: backup.yml
- name: etcd | reload systemd - name: etcd | reload systemd
command: systemctl daemon-reload command: systemctl daemon-reload
- name: etcd-events | reload systemd
command: systemctl daemon-reload
- name: reload etcd - name: reload etcd
service: service:
name: etcd name: etcd
state: restarted state: restarted
when: is_etcd_master when: is_etcd_master
- name: reload etcd-events
service:
name: etcd-events
state: restarted
when: is_etcd_master
- name: wait for etcd up - name: wait for etcd up
uri: uri:
url: "https://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" url: "https://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health"
@ -29,6 +45,17 @@
retries: 10 retries: 10
delay: 5 delay: 5
- name: wait for etcd-events up
uri:
url: "https://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2381/health"
validate_certs: no
client_cert: "{{ etcd_cert_dir }}/member-{{ inventory_hostname }}.pem"
client_key: "{{ etcd_cert_dir }}/member-{{ inventory_hostname }}-key.pem"
register: result
until: result.status is defined and result.status == 200
retries: 10
delay: 5
- name: set etcd_secret_changed - name: set etcd_secret_changed
set_fact: set_fact:
etcd_secret_changed: true etcd_secret_changed: true

View file

@ -1,5 +1,5 @@
--- ---
- name: Configure | Check if member is in cluster - name: Configure | Check if member is in etcd cluster
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}" shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}"
register: etcd_member_in_cluster register: etcd_member_in_cluster
ignore_errors: true ignore_errors: true
@ -12,6 +12,19 @@
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem" ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem" ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem"
- name: Configure | Check if member is in etcd-events cluster
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_events_access_addresses }} member list | grep -q {{ etcd_access_address }}"
register: etcd_events_member_in_cluster
ignore_errors: true
changed_when: false
check_mode: no
when: is_etcd_master and etcd_events_cluster_setup
tags:
- facts
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem"
- name: Configure | Copy etcd.service systemd file - name: Configure | Copy etcd.service systemd file
template: template:
src: "etcd-{{ etcd_deployment_type }}.service.j2" src: "etcd-{{ etcd_deployment_type }}.service.j2"
@ -20,11 +33,36 @@
when: is_etcd_master when: is_etcd_master
notify: restart etcd notify: restart etcd
- name: Configure | Join member(s) to cluster one at a time - name: Configure | Copy etcd-events.service systemd file
include_tasks: join_member.yml template:
src: "etcd-events-host.service.j2"
dest: /etc/systemd/system/etcd-events.service
backup: yes
when: is_etcd_master and etcd_deployment_type == "host" and etcd_events_cluster_setup
notify: restart etcd-events
- name: Configure | Copy etcd-events.service systemd file
template:
src: "etcd-events-docker.service.j2"
dest: /etc/systemd/system/etcd-events.service
backup: yes
when: is_etcd_master and etcd_deployment_type == "docker" and etcd_events_cluster_setup
notify: restart etcd-events
- name: Configure | Join member(s) to etcd cluster one at a time
include_tasks: join_etcd_member.yml
vars: vars:
target_node: "{{ item }}" target_node: "{{ item }}"
loop_control: loop_control:
pause: 10 pause: 10
with_items: "{{ groups['etcd'] }}" with_items: "{{ groups['etcd'] }}"
when: inventory_hostname == item and etcd_member_in_cluster.rc != 0 and etcd_cluster_is_healthy.rc == 0 when: inventory_hostname == item and etcd_member_in_cluster.rc != 0 and etcd_cluster_is_healthy.rc == 0
- name: Configure | Join member(s) to etcd-events cluster one at a time
include_tasks: join_etcd-evetns_member.yml
vars:
target_node: "{{ item }}"
loop_control:
pause: 10
with_items: "{{ groups['etcd'] }}"
when: inventory_hostname == item and etcd_events_cluster_setup and etcd_events_member_in_cluster.rc != 0 and etcd_events_cluster_is_healthy.rc == 0

View file

@ -18,3 +18,13 @@
mode: 0755 mode: 0755
backup: yes backup: yes
notify: restart etcd notify: restart etcd
- name: Install etcd-events launch script
template:
src: etcd-events.j2
dest: "{{ bin_dir }}/etcd-events"
owner: 'root'
mode: 0755
backup: yes
when: etcd_events_cluster_setup
notify: restart etcd-events

View file

@ -0,0 +1,47 @@
---
- name: Join Member | Add member to cluster
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_events_access_addresses }} member add {{ etcd_member_name }} {{ etcd_events_peer_url }}"
register: member_add_result
until: member_add_result.rc == 0
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
when: target_node == inventory_hostname
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem"
- include_tasks: refresh_config.yml
vars:
etcd_events_peer_addresses: >-
{% for host in groups['etcd'] -%}
{%- if hostvars[host]['etcd_events_member_in_cluster'].rc == 0 -%}
{{ "etcd"+loop.index|string }}=https://{{ hostvars[host].access_ip | default(hostvars[host].ip | default(hostvars[host].ansible_default_ipv4['address'])) }}:2382,
{%- endif -%}
{%- if loop.last -%}
{{ etcd_member_name }}={{ etcd_events_peer_url }}
{%- endif -%}
{%- endfor -%}
when: target_node == inventory_hostname
- name: Join Member | reload systemd
command: systemctl daemon-reload
when: target_node == inventory_hostname
- name: Join Member | Ensure etcd-events is running
service:
name: etcd-events
state: started
enabled: yes
when: target_node == inventory_hostname
- name: Join Member | Ensure member is in etcd-events cluster
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_events_access_addresses }} member list | grep -q {{ etcd_events_access_address }}"
register: etcd_events_member_in_cluster
changed_when: false
check_mode: no
tags:
- facts
when: target_node == inventory_hostname
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem"

View file

@ -0,0 +1,47 @@
---
- name: Join Member | Add member to cluster
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} member add {{ etcd_member_name }} {{ etcd_peer_url }}"
register: member_add_result
until: member_add_result.rc == 0
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
when: target_node == inventory_hostname
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem"
- include_tasks: refresh_config.yml
vars:
etcd_peer_addresses: >-
{% for host in groups['etcd'] -%}
{%- if hostvars[host]['etcd_member_in_cluster'].rc == 0 -%}
{{ "etcd"+loop.index|string }}=https://{{ hostvars[host].access_ip | default(hostvars[host].ip | default(hostvars[host].ansible_default_ipv4['address'])) }}:2380,
{%- endif -%}
{%- if loop.last -%}
{{ etcd_member_name }}={{ etcd_peer_url }}
{%- endif -%}
{%- endfor -%}
when: target_node == inventory_hostname
- name: Join Member | reload systemd
command: systemctl daemon-reload
when: target_node == inventory_hostname
- name: Join Member | Ensure etcd is running
service:
name: etcd
state: started
enabled: yes
when: target_node == inventory_hostname
- name: Join Member | Ensure member is in cluster
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}"
register: etcd_member_in_cluster
changed_when: false
check_mode: no
tags:
- facts
when: target_node == inventory_hostname
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem"

View file

@ -43,6 +43,11 @@
notify: restart etcd notify: restart etcd
when: is_etcd_master and etcd_secret_changed|default(false) when: is_etcd_master and etcd_secret_changed|default(false)
- name: Restart etcd-events if certs changed
command: /bin/true
notify: restart etcd
when: is_etcd_master and etcd_events_cluster_setup and etcd_secret_changed|default(false)
# reload-systemd # reload-systemd
- meta: flush_handlers - meta: flush_handlers
@ -53,6 +58,13 @@
enabled: yes enabled: yes
when: is_etcd_master and etcd_cluster_setup when: is_etcd_master and etcd_cluster_setup
- name: Ensure etcd-events is running
service:
name: etcd-events
state: started
enabled: yes
when: is_etcd_master and etcd_events_cluster_setup
# After etcd cluster is assembled, make sure that # After etcd cluster is assembled, make sure that
# initial state of the cluster is in `existing` # initial state of the cluster is in `existing`
# state insted of `new`. # state insted of `new`.

View file

@ -5,3 +5,10 @@
dest: /etc/etcd.env dest: /etc/etcd.env
notify: restart etcd notify: restart etcd
when: is_etcd_master when: is_etcd_master
- name: Refresh config | Create etcd-events config file
template:
src: etcd-events.env.j2
dest: /etc/etcd-events.env
notify: restart etcd-events
when: is_etcd_master and etcd_events_cluster_setup

View file

@ -1,5 +1,5 @@
--- ---
- name: Configure | Check if cluster is healthy - name: Configure | Check if etcd cluster is healthy
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} cluster-health | grep -q 'cluster is healthy'" shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} cluster-health | grep -q 'cluster is healthy'"
register: etcd_cluster_is_healthy register: etcd_cluster_is_healthy
ignore_errors: true ignore_errors: true
@ -11,3 +11,16 @@
environment: environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem" ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem" ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem"
- name: Configure | Check if etcd-events cluster is healthy
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_events_access_addresses }} cluster-health | grep -q 'cluster is healthy'"
register: etcd_events_cluster_is_healthy
ignore_errors: true
changed_when: false
check_mode: no
when: is_etcd_master and etcd_events_cluster_setup
tags:
- facts
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem"

View file

@ -0,0 +1,18 @@
[Unit]
Description=etcd docker wrapper
Wants=docker.socket
After=docker.service
[Service]
User=root
PermissionsStartOnly=true
EnvironmentFile=-/etc/etcd-events.env
ExecStart={{ bin_dir }}/etcd-events
ExecStartPre=-{{ docker_bin_dir }}/docker rm -f {{ etcd_member_name }}-events
ExecStop={{ docker_bin_dir }}/docker stop {{ etcd_member_name }}-events
Restart=always
RestartSec=15s
TimeoutStartSec=30s
[Install]
WantedBy=multi-user.target

View file

@ -0,0 +1,16 @@
[Unit]
Description=etcd
After=network.target
[Service]
Type=notify
User=root
EnvironmentFile=/etc/etcd-events.env
ExecStart={{ bin_dir }}/etcd
NotifyAccess=all
Restart=always
RestartSec=10s
LimitNOFILE=40000
[Install]
WantedBy=multi-user.target

View file

@ -0,0 +1,26 @@
ETCD_DATA_DIR={{ etcd_events_data_dir }}
ETCD_ADVERTISE_CLIENT_URLS={{ etcd_events_client_url }}
ETCD_INITIAL_ADVERTISE_PEER_URLS={{ etcd_events_peer_url }}
ETCD_INITIAL_CLUSTER_STATE={% if etcd_cluster_is_healthy.rc != 0 | bool %}new{% else %}existing{% endif %}
ETCD_METRICS={{ etcd_metrics }}
ETCD_LISTEN_CLIENT_URLS=https://{{ etcd_address }}:2381,https://127.0.0.1:2381
ETCD_ELECTION_TIMEOUT={{ etcd_election_timeout }}
ETCD_HEARTBEAT_INTERVAL={{ etcd_heartbeat_interval }}
ETCD_INITIAL_CLUSTER_TOKEN=k8s_events_etcd
ETCD_LISTEN_PEER_URLS=https://{{ etcd_address }}:2382
ETCD_NAME={{ etcd_member_name }}-events
ETCD_PROXY=off
ETCD_INITIAL_CLUSTER={{ etcd_events_peer_addresses }}
ETCD_AUTO_COMPACTION_RETENTION={{ etcd_compaction_retention }}
# TLS settings
ETCD_TRUSTED_CA_FILE={{ etcd_cert_dir }}/ca.pem
ETCD_CERT_FILE={{ etcd_cert_dir }}/member-{{ inventory_hostname }}.pem
ETCD_KEY_FILE={{ etcd_cert_dir }}/member-{{ inventory_hostname }}-key.pem
ETCD_CLIENT_CERT_AUTH={{ etcd_secure_client | lower}}
ETCD_PEER_TRUSTED_CA_FILE={{ etcd_cert_dir }}/ca.pem
ETCD_PEER_CERT_FILE={{ etcd_cert_dir }}/member-{{ inventory_hostname }}.pem
ETCD_PEER_KEY_FILE={{ etcd_cert_dir }}/member-{{ inventory_hostname }}-key.pem
ETCD_PEER_CLIENT_CERT_AUTH={{ etcd_peer_client_auth }}

View file

@ -0,0 +1,22 @@
#!/bin/bash
{{ docker_bin_dir }}/docker run \
--restart=on-failure:5 \
--env-file=/etc/etcd-events.env \
--net=host \
-v /etc/ssl/certs:/etc/ssl/certs:ro \
-v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \
-v {{ etcd_events_data_dir }}:{{ etcd_events_data_dir }}:rw \
{% if etcd_memory_limit is defined %}
--memory={{ etcd_memory_limit|regex_replace('Mi', 'M') }} \
{% endif %}
--oom-kill-disable \
{% if etcd_cpu_limit is defined %}
--cpu-shares={{ etcd_cpu_limit|regex_replace('m', '') }} \
{% endif %}
{% if etcd_blkio_weight is defined %}
--blkio-weight={{ etcd_blkio_weight }} \
{% endif %}
--name={{ etcd_member_name }}-events \
{{ etcd_image_repo }}:{{ etcd_image_tag }} \
/usr/local/bin/etcd \
"$@"

View file

@ -30,6 +30,9 @@ spec:
- apiserver - apiserver
- --advertise-address={{ ip | default(ansible_default_ipv4.address) }} - --advertise-address={{ ip | default(ansible_default_ipv4.address) }}
- --etcd-servers={{ etcd_access_addresses }} - --etcd-servers={{ etcd_access_addresses }}
{% if etcd_events_cluster_setup %}
- --etcd-servers-overrides=/events#{{ etcd_events_access_addresses }}
{% endif %}
{% if kube_version | version_compare('v1.9', '<') %} {% if kube_version | version_compare('v1.9', '<') %}
- --etcd-quorum-read=true - --etcd-quorum-read=true
{% endif %} {% endif %}

View file

@ -289,16 +289,25 @@ kube_apiserver_client_key: |-
{{ kube_cert_dir }}/apiserver-key.pem {{ kube_cert_dir }}/apiserver-key.pem
{%- endif %} {%- endif %}
# Set to true to deploy etcd-events cluster
etcd_events_cluster_setup: false
# Vars for pointing to etcd endpoints # Vars for pointing to etcd endpoints
is_etcd_master: "{{ inventory_hostname in groups['etcd'] }}" is_etcd_master: "{{ inventory_hostname in groups['etcd'] }}"
etcd_address: "{{ ip | default(ansible_default_ipv4['address']) }}" etcd_address: "{{ ip | default(ansible_default_ipv4['address']) }}"
etcd_access_address: "{{ access_ip | default(etcd_address) }}" etcd_access_address: "{{ access_ip | default(etcd_address) }}"
etcd_peer_url: "https://{{ etcd_access_address }}:2380" etcd_peer_url: "https://{{ etcd_access_address }}:2380"
etcd_client_url: "https://{{ etcd_access_address }}:2379" etcd_client_url: "https://{{ etcd_access_address }}:2379"
etcd_events_peer_url: "https://{{ etcd_access_address }}:2382"
etcd_events_client_url: "https://{{ etcd_access_address }}:2381"
etcd_access_addresses: |- etcd_access_addresses: |-
{% for item in groups['etcd'] -%} {% for item in groups['etcd'] -%}
https://{{ hostvars[item]['access_ip'] | default(hostvars[item]['ip'] | default(hostvars[item]['ansible_default_ipv4']['address'])) }}:2379{% if not loop.last %},{% endif %} https://{{ hostvars[item]['access_ip'] | default(hostvars[item]['ip'] | default(hostvars[item]['ansible_default_ipv4']['address'])) }}:2379{% if not loop.last %},{% endif %}
{%- endfor %} {%- endfor %}
etcd_events_access_addresses: |-
{% for item in groups['etcd'] -%}
https://{{ hostvars[item]['access_ip'] | default(hostvars[item]['ip'] | default(hostvars[item]['ansible_default_ipv4']['address'])) }}:2381{% if not loop.last %},{% endif %}
{%- endfor %}
etcd_member_name: |- etcd_member_name: |-
{% for host in groups['etcd'] %} {% for host in groups['etcd'] %}
{% if inventory_hostname == host %}{{"etcd"+loop.index|string }}{% endif %} {% if inventory_hostname == host %}{{"etcd"+loop.index|string }}{% endif %}
@ -307,3 +316,7 @@ etcd_peer_addresses: |-
{% for item in groups['etcd'] -%} {% for item in groups['etcd'] -%}
{{ "etcd"+loop.index|string }}=https://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} {{ "etcd"+loop.index|string }}=https://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %}
{%- endfor %} {%- endfor %}
etcd_events_peer_addresses: |-
{% for item in groups['etcd'] -%}
{{ "etcd"+loop.index|string }}-events=https://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2382{% if not loop.last %},{% endif %}
{%- endfor %}

View file

@ -8,6 +8,7 @@
- kubelet - kubelet
- vault - vault
- etcd - etcd
- etcd-events
failed_when: false failed_when: false
tags: tags:
- services - services
@ -19,6 +20,7 @@
with_items: with_items:
- kubelet - kubelet
- etcd - etcd
- etcd-events
- vault - vault
- calico-node - calico-node
register: services_removed register: services_removed
@ -95,6 +97,7 @@
- /root/.kube - /root/.kube
- /root/.helm - /root/.helm
- "{{ etcd_data_dir }}" - "{{ etcd_data_dir }}"
- /var/lib/etcd-events
- /etc/ssl/etcd - /etc/ssl/etcd
- /var/log/calico - /var/log/calico
- /etc/cni - /etc/cni
@ -125,6 +128,7 @@
- "{{ bin_dir }}/kubelet" - "{{ bin_dir }}/kubelet"
- "{{ bin_dir }}/etcd-scripts" - "{{ bin_dir }}/etcd-scripts"
- "{{ bin_dir }}/etcd" - "{{ bin_dir }}/etcd"
- "{{ bin_dir }}/etcd-events"
- "{{ bin_dir }}/etcdctl" - "{{ bin_dir }}/etcdctl"
- "{{ bin_dir }}/kubernetes-scripts" - "{{ bin_dir }}/kubernetes-scripts"
- "{{ bin_dir }}/kubectl" - "{{ bin_dir }}/kubectl"

View file

@ -9,6 +9,7 @@ kube_network_plugin: flannel
helm_enabled: true helm_enabled: true
istio_enabled: true istio_enabled: true
efk_enabled: true efk_enabled: true
etcd_events_cluster_setup: true
local_volume_provisioner_enabled: true local_volume_provisioner_enabled: true
etcd_deployment_type: host etcd_deployment_type: host
deploy_netchecker: true deploy_netchecker: true