diff --git a/roles/etcd/defaults/main.yml b/roles/etcd/defaults/main.yml index 4e122e719..4986ad257 100644 --- a/roles/etcd/defaults/main.yml +++ b/roles/etcd/defaults/main.yml @@ -4,6 +4,7 @@ etcd_cluster_setup: true etcd_backup_prefix: "/var/backups" etcd_data_dir: "/var/lib/etcd" +etcd_events_data_dir: "/var/lib/etcd-events" etcd_config_dir: /etc/ssl/etcd etcd_cert_dir: "{{ etcd_config_dir }}/ssl" diff --git a/roles/etcd/handlers/main.yml b/roles/etcd/handlers/main.yml index f6666ee94..a72cbd515 100644 --- a/roles/etcd/handlers/main.yml +++ b/roles/etcd/handlers/main.yml @@ -7,17 +7,33 @@ - reload etcd - wait for etcd up +- name: restart etcd-events + command: /bin/true + notify: + - etcd-events | reload systemd + - reload etcd-events + - wait for etcd-events up + - import_tasks: backup.yml - name: etcd | reload systemd command: systemctl daemon-reload +- name: etcd-events | reload systemd + command: systemctl daemon-reload + - name: reload etcd service: name: etcd state: restarted when: is_etcd_master +- name: reload etcd-events + service: + name: etcd-events + state: restarted + when: is_etcd_master + - name: wait for etcd up uri: url: "https://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" @@ -29,6 +45,17 @@ retries: 10 delay: 5 +- name: wait for etcd-events up + uri: + url: "https://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2381/health" + validate_certs: no + client_cert: "{{ etcd_cert_dir }}/member-{{ inventory_hostname }}.pem" + client_key: "{{ etcd_cert_dir }}/member-{{ inventory_hostname }}-key.pem" + register: result + until: result.status is defined and result.status == 200 + retries: 10 + delay: 5 + - name: set etcd_secret_changed set_fact: etcd_secret_changed: true diff --git a/roles/etcd/tasks/configure.yml b/roles/etcd/tasks/configure.yml index 7af17f69e..d7d3920c6 100644 --- a/roles/etcd/tasks/configure.yml +++ b/roles/etcd/tasks/configure.yml @@ -1,5 +1,5 @@ --- -- name: Configure | Check if member is in cluster +- name: Configure | Check if member is in etcd cluster shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}" register: etcd_member_in_cluster ignore_errors: true @@ -12,6 +12,19 @@ ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem" ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem" +- name: Configure | Check if member is in etcd-events cluster + shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_events_access_addresses }} member list | grep -q {{ etcd_access_address }}" + register: etcd_events_member_in_cluster + ignore_errors: true + changed_when: false + check_mode: no + when: is_etcd_master and etcd_events_cluster_setup + tags: + - facts + environment: + ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem" + ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem" + - name: Configure | Copy etcd.service systemd file template: src: "etcd-{{ etcd_deployment_type }}.service.j2" @@ -20,11 +33,36 @@ when: is_etcd_master notify: restart etcd -- name: Configure | Join member(s) to cluster one at a time - include_tasks: join_member.yml +- name: Configure | Copy etcd-events.service systemd file + template: + src: "etcd-events-host.service.j2" + dest: /etc/systemd/system/etcd-events.service + backup: yes + when: is_etcd_master and etcd_deployment_type == "host" and etcd_events_cluster_setup + notify: restart etcd-events + +- name: Configure | Copy etcd-events.service systemd file + template: + src: "etcd-events-docker.service.j2" + dest: /etc/systemd/system/etcd-events.service + backup: yes + when: is_etcd_master and etcd_deployment_type == "docker" and etcd_events_cluster_setup + notify: restart etcd-events + +- name: Configure | Join member(s) to etcd cluster one at a time + include_tasks: join_etcd_member.yml vars: target_node: "{{ item }}" loop_control: pause: 10 with_items: "{{ groups['etcd'] }}" when: inventory_hostname == item and etcd_member_in_cluster.rc != 0 and etcd_cluster_is_healthy.rc == 0 + +- name: Configure | Join member(s) to etcd-events cluster one at a time + include_tasks: join_etcd-evetns_member.yml + vars: + target_node: "{{ item }}" + loop_control: + pause: 10 + with_items: "{{ groups['etcd'] }}" + when: inventory_hostname == item and etcd_events_cluster_setup and etcd_events_member_in_cluster.rc != 0 and etcd_events_cluster_is_healthy.rc == 0 diff --git a/roles/etcd/tasks/install_docker.yml b/roles/etcd/tasks/install_docker.yml index 291bb5f25..58e1485a5 100644 --- a/roles/etcd/tasks/install_docker.yml +++ b/roles/etcd/tasks/install_docker.yml @@ -18,3 +18,13 @@ mode: 0755 backup: yes notify: restart etcd + +- name: Install etcd-events launch script + template: + src: etcd-events.j2 + dest: "{{ bin_dir }}/etcd-events" + owner: 'root' + mode: 0755 + backup: yes + when: etcd_events_cluster_setup + notify: restart etcd-events diff --git a/roles/etcd/tasks/join_etcd-events_member.yml b/roles/etcd/tasks/join_etcd-events_member.yml new file mode 100644 index 000000000..104ef22df --- /dev/null +++ b/roles/etcd/tasks/join_etcd-events_member.yml @@ -0,0 +1,47 @@ +--- +- name: Join Member | Add member to cluster + shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_events_access_addresses }} member add {{ etcd_member_name }} {{ etcd_events_peer_url }}" + register: member_add_result + until: member_add_result.rc == 0 + retries: 4 + delay: "{{ retry_stagger | random + 3 }}" + when: target_node == inventory_hostname + environment: + ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem" + ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem" + +- include_tasks: refresh_config.yml + vars: + etcd_events_peer_addresses: >- + {% for host in groups['etcd'] -%} + {%- if hostvars[host]['etcd_events_member_in_cluster'].rc == 0 -%} + {{ "etcd"+loop.index|string }}=https://{{ hostvars[host].access_ip | default(hostvars[host].ip | default(hostvars[host].ansible_default_ipv4['address'])) }}:2382, + {%- endif -%} + {%- if loop.last -%} + {{ etcd_member_name }}={{ etcd_events_peer_url }} + {%- endif -%} + {%- endfor -%} + when: target_node == inventory_hostname + +- name: Join Member | reload systemd + command: systemctl daemon-reload + when: target_node == inventory_hostname + +- name: Join Member | Ensure etcd-events is running + service: + name: etcd-events + state: started + enabled: yes + when: target_node == inventory_hostname + +- name: Join Member | Ensure member is in etcd-events cluster + shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_events_access_addresses }} member list | grep -q {{ etcd_events_access_address }}" + register: etcd_events_member_in_cluster + changed_when: false + check_mode: no + tags: + - facts + when: target_node == inventory_hostname + environment: + ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem" + ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem" diff --git a/roles/etcd/tasks/join_etcd_member.yml b/roles/etcd/tasks/join_etcd_member.yml new file mode 100644 index 000000000..b7801f0c9 --- /dev/null +++ b/roles/etcd/tasks/join_etcd_member.yml @@ -0,0 +1,47 @@ +--- +- name: Join Member | Add member to cluster + shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} member add {{ etcd_member_name }} {{ etcd_peer_url }}" + register: member_add_result + until: member_add_result.rc == 0 + retries: 4 + delay: "{{ retry_stagger | random + 3 }}" + when: target_node == inventory_hostname + environment: + ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem" + ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem" + +- include_tasks: refresh_config.yml + vars: + etcd_peer_addresses: >- + {% for host in groups['etcd'] -%} + {%- if hostvars[host]['etcd_member_in_cluster'].rc == 0 -%} + {{ "etcd"+loop.index|string }}=https://{{ hostvars[host].access_ip | default(hostvars[host].ip | default(hostvars[host].ansible_default_ipv4['address'])) }}:2380, + {%- endif -%} + {%- if loop.last -%} + {{ etcd_member_name }}={{ etcd_peer_url }} + {%- endif -%} + {%- endfor -%} + when: target_node == inventory_hostname + +- name: Join Member | reload systemd + command: systemctl daemon-reload + when: target_node == inventory_hostname + +- name: Join Member | Ensure etcd is running + service: + name: etcd + state: started + enabled: yes + when: target_node == inventory_hostname + +- name: Join Member | Ensure member is in cluster + shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}" + register: etcd_member_in_cluster + changed_when: false + check_mode: no + tags: + - facts + when: target_node == inventory_hostname + environment: + ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem" + ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem" diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index a8a9f23ad..bb299126b 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -43,6 +43,11 @@ notify: restart etcd when: is_etcd_master and etcd_secret_changed|default(false) +- name: Restart etcd-events if certs changed + command: /bin/true + notify: restart etcd + when: is_etcd_master and etcd_events_cluster_setup and etcd_secret_changed|default(false) + # reload-systemd - meta: flush_handlers @@ -53,6 +58,13 @@ enabled: yes when: is_etcd_master and etcd_cluster_setup +- name: Ensure etcd-events is running + service: + name: etcd-events + state: started + enabled: yes + when: is_etcd_master and etcd_events_cluster_setup + # After etcd cluster is assembled, make sure that # initial state of the cluster is in `existing` # state insted of `new`. diff --git a/roles/etcd/tasks/refresh_config.yml b/roles/etcd/tasks/refresh_config.yml index 0691d1df9..927663301 100644 --- a/roles/etcd/tasks/refresh_config.yml +++ b/roles/etcd/tasks/refresh_config.yml @@ -5,3 +5,10 @@ dest: /etc/etcd.env notify: restart etcd when: is_etcd_master + +- name: Refresh config | Create etcd-events config file + template: + src: etcd-events.env.j2 + dest: /etc/etcd-events.env + notify: restart etcd-events + when: is_etcd_master and etcd_events_cluster_setup diff --git a/roles/etcd/tasks/set_cluster_health.yml b/roles/etcd/tasks/set_cluster_health.yml index 955208633..68e738031 100644 --- a/roles/etcd/tasks/set_cluster_health.yml +++ b/roles/etcd/tasks/set_cluster_health.yml @@ -1,5 +1,5 @@ --- -- name: Configure | Check if cluster is healthy +- name: Configure | Check if etcd cluster is healthy shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} cluster-health | grep -q 'cluster is healthy'" register: etcd_cluster_is_healthy ignore_errors: true @@ -11,3 +11,16 @@ environment: ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem" ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem" + +- name: Configure | Check if etcd-events cluster is healthy + shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_events_access_addresses }} cluster-health | grep -q 'cluster is healthy'" + register: etcd_events_cluster_is_healthy + ignore_errors: true + changed_when: false + check_mode: no + when: is_etcd_master and etcd_events_cluster_setup + tags: + - facts + environment: + ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem" + ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem" diff --git a/roles/etcd/templates/etcd-events-docker.service.j2 b/roles/etcd/templates/etcd-events-docker.service.j2 new file mode 100644 index 000000000..271980ab7 --- /dev/null +++ b/roles/etcd/templates/etcd-events-docker.service.j2 @@ -0,0 +1,18 @@ +[Unit] +Description=etcd docker wrapper +Wants=docker.socket +After=docker.service + +[Service] +User=root +PermissionsStartOnly=true +EnvironmentFile=-/etc/etcd-events.env +ExecStart={{ bin_dir }}/etcd-events +ExecStartPre=-{{ docker_bin_dir }}/docker rm -f {{ etcd_member_name }}-events +ExecStop={{ docker_bin_dir }}/docker stop {{ etcd_member_name }}-events +Restart=always +RestartSec=15s +TimeoutStartSec=30s + +[Install] +WantedBy=multi-user.target diff --git a/roles/etcd/templates/etcd-events-host.service.j2 b/roles/etcd/templates/etcd-events-host.service.j2 new file mode 100644 index 000000000..6e0167a8c --- /dev/null +++ b/roles/etcd/templates/etcd-events-host.service.j2 @@ -0,0 +1,16 @@ +[Unit] +Description=etcd +After=network.target + +[Service] +Type=notify +User=root +EnvironmentFile=/etc/etcd-events.env +ExecStart={{ bin_dir }}/etcd +NotifyAccess=all +Restart=always +RestartSec=10s +LimitNOFILE=40000 + +[Install] +WantedBy=multi-user.target diff --git a/roles/etcd/templates/etcd-events.env.j2 b/roles/etcd/templates/etcd-events.env.j2 new file mode 100644 index 000000000..c168ab03e --- /dev/null +++ b/roles/etcd/templates/etcd-events.env.j2 @@ -0,0 +1,26 @@ +ETCD_DATA_DIR={{ etcd_events_data_dir }} +ETCD_ADVERTISE_CLIENT_URLS={{ etcd_events_client_url }} +ETCD_INITIAL_ADVERTISE_PEER_URLS={{ etcd_events_peer_url }} +ETCD_INITIAL_CLUSTER_STATE={% if etcd_cluster_is_healthy.rc != 0 | bool %}new{% else %}existing{% endif %} + +ETCD_METRICS={{ etcd_metrics }} +ETCD_LISTEN_CLIENT_URLS=https://{{ etcd_address }}:2381,https://127.0.0.1:2381 +ETCD_ELECTION_TIMEOUT={{ etcd_election_timeout }} +ETCD_HEARTBEAT_INTERVAL={{ etcd_heartbeat_interval }} +ETCD_INITIAL_CLUSTER_TOKEN=k8s_events_etcd +ETCD_LISTEN_PEER_URLS=https://{{ etcd_address }}:2382 +ETCD_NAME={{ etcd_member_name }}-events +ETCD_PROXY=off +ETCD_INITIAL_CLUSTER={{ etcd_events_peer_addresses }} +ETCD_AUTO_COMPACTION_RETENTION={{ etcd_compaction_retention }} + +# TLS settings +ETCD_TRUSTED_CA_FILE={{ etcd_cert_dir }}/ca.pem +ETCD_CERT_FILE={{ etcd_cert_dir }}/member-{{ inventory_hostname }}.pem +ETCD_KEY_FILE={{ etcd_cert_dir }}/member-{{ inventory_hostname }}-key.pem +ETCD_CLIENT_CERT_AUTH={{ etcd_secure_client | lower}} + +ETCD_PEER_TRUSTED_CA_FILE={{ etcd_cert_dir }}/ca.pem +ETCD_PEER_CERT_FILE={{ etcd_cert_dir }}/member-{{ inventory_hostname }}.pem +ETCD_PEER_KEY_FILE={{ etcd_cert_dir }}/member-{{ inventory_hostname }}-key.pem +ETCD_PEER_CLIENT_CERT_AUTH={{ etcd_peer_client_auth }} diff --git a/roles/etcd/templates/etcd-events.j2 b/roles/etcd/templates/etcd-events.j2 new file mode 100644 index 000000000..3b25eaaff --- /dev/null +++ b/roles/etcd/templates/etcd-events.j2 @@ -0,0 +1,22 @@ +#!/bin/bash +{{ docker_bin_dir }}/docker run \ + --restart=on-failure:5 \ + --env-file=/etc/etcd-events.env \ + --net=host \ + -v /etc/ssl/certs:/etc/ssl/certs:ro \ + -v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \ + -v {{ etcd_events_data_dir }}:{{ etcd_events_data_dir }}:rw \ + {% if etcd_memory_limit is defined %} + --memory={{ etcd_memory_limit|regex_replace('Mi', 'M') }} \ + {% endif %} + --oom-kill-disable \ + {% if etcd_cpu_limit is defined %} + --cpu-shares={{ etcd_cpu_limit|regex_replace('m', '') }} \ + {% endif %} + {% if etcd_blkio_weight is defined %} + --blkio-weight={{ etcd_blkio_weight }} \ + {% endif %} + --name={{ etcd_member_name }}-events \ + {{ etcd_image_repo }}:{{ etcd_image_tag }} \ + /usr/local/bin/etcd \ + "$@" diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 index 2ec231f4c..bee13b4ec 100644 --- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 @@ -30,6 +30,9 @@ spec: - apiserver - --advertise-address={{ ip | default(ansible_default_ipv4.address) }} - --etcd-servers={{ etcd_access_addresses }} +{% if etcd_events_cluster_setup %} + - --etcd-servers-overrides=/events#{{ etcd_events_access_addresses }} +{% endif %} {% if kube_version | version_compare('v1.9', '<') %} - --etcd-quorum-read=true {% endif %} diff --git a/roles/kubespray-defaults/defaults/main.yaml b/roles/kubespray-defaults/defaults/main.yaml index 0a4429e05..7c0a0f12c 100644 --- a/roles/kubespray-defaults/defaults/main.yaml +++ b/roles/kubespray-defaults/defaults/main.yaml @@ -289,16 +289,25 @@ kube_apiserver_client_key: |- {{ kube_cert_dir }}/apiserver-key.pem {%- endif %} +# Set to true to deploy etcd-events cluster +etcd_events_cluster_setup: false + # Vars for pointing to etcd endpoints is_etcd_master: "{{ inventory_hostname in groups['etcd'] }}" etcd_address: "{{ ip | default(ansible_default_ipv4['address']) }}" etcd_access_address: "{{ access_ip | default(etcd_address) }}" etcd_peer_url: "https://{{ etcd_access_address }}:2380" etcd_client_url: "https://{{ etcd_access_address }}:2379" +etcd_events_peer_url: "https://{{ etcd_access_address }}:2382" +etcd_events_client_url: "https://{{ etcd_access_address }}:2381" etcd_access_addresses: |- {% for item in groups['etcd'] -%} https://{{ hostvars[item]['access_ip'] | default(hostvars[item]['ip'] | default(hostvars[item]['ansible_default_ipv4']['address'])) }}:2379{% if not loop.last %},{% endif %} {%- endfor %} +etcd_events_access_addresses: |- + {% for item in groups['etcd'] -%} + https://{{ hostvars[item]['access_ip'] | default(hostvars[item]['ip'] | default(hostvars[item]['ansible_default_ipv4']['address'])) }}:2381{% if not loop.last %},{% endif %} + {%- endfor %} etcd_member_name: |- {% for host in groups['etcd'] %} {% if inventory_hostname == host %}{{"etcd"+loop.index|string }}{% endif %} @@ -307,3 +316,7 @@ etcd_peer_addresses: |- {% for item in groups['etcd'] -%} {{ "etcd"+loop.index|string }}=https://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} {%- endfor %} +etcd_events_peer_addresses: |- + {% for item in groups['etcd'] -%} + {{ "etcd"+loop.index|string }}-events=https://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2382{% if not loop.last %},{% endif %} + {%- endfor %} diff --git a/roles/reset/tasks/main.yml b/roles/reset/tasks/main.yml index 88e9065c8..9b87a1a13 100644 --- a/roles/reset/tasks/main.yml +++ b/roles/reset/tasks/main.yml @@ -8,6 +8,7 @@ - kubelet - vault - etcd + - etcd-events failed_when: false tags: - services @@ -19,6 +20,7 @@ with_items: - kubelet - etcd + - etcd-events - vault - calico-node register: services_removed @@ -95,6 +97,7 @@ - /root/.kube - /root/.helm - "{{ etcd_data_dir }}" + - /var/lib/etcd-events - /etc/ssl/etcd - /var/log/calico - /etc/cni @@ -125,6 +128,7 @@ - "{{ bin_dir }}/kubelet" - "{{ bin_dir }}/etcd-scripts" - "{{ bin_dir }}/etcd" + - "{{ bin_dir }}/etcd-events" - "{{ bin_dir }}/etcdctl" - "{{ bin_dir }}/kubernetes-scripts" - "{{ bin_dir }}/kubectl" diff --git a/tests/files/gce_centos7-flannel-addons.yml b/tests/files/gce_centos7-flannel-addons.yml index 272c5e7ae..0e4346f67 100644 --- a/tests/files/gce_centos7-flannel-addons.yml +++ b/tests/files/gce_centos7-flannel-addons.yml @@ -9,6 +9,7 @@ kube_network_plugin: flannel helm_enabled: true istio_enabled: true efk_enabled: true +etcd_events_cluster_setup: true local_volume_provisioner_enabled: true etcd_deployment_type: host deploy_netchecker: true