--- - name: Get etcd endpoint health shell: "{{ bin_dir }}/etcdctl --cacert {{ etcd_cert_dir }}/ca.pem --cert {{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem --key {{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem --endpoints={{ etcd_access_addresses }} endpoint health" register: etcd_endpoint_health ignore_errors: true changed_when: false check_mode: no environment: - ETCDCTL_API: 3 when: - groups['broken_etcd'] - name: Set healthy fact set_fact: healthy: "{{ etcd_endpoint_health.stderr | match('Error: unhealthy cluster') }}" when: - groups['broken_etcd'] - name: Set has_quorum fact set_fact: has_quorum: "{{ etcd_endpoint_health.stdout_lines | select('match', '.*is healthy.*') | list | length >= etcd_endpoint_health.stderr_lines | select('match', '.*is unhealthy.*') | list | length }}" - include_tasks: recover_lost_quorum.yml when: - groups['broken_etcd'] - not has_quorum - name: Remove etcd data dir file: path: "{{ etcd_data_dir }}" state: absent delegate_to: "{{ item }}" with_items: "{{ groups['broken_etcd'] }}" when: - groups['broken_etcd'] - has_quorum - name: Delete old certificates # noqa 302 - rm is ok here for now shell: "rm {{ etcd_cert_dir }}/*{{ item }}*" with_items: "{{ groups['broken_etcd'] }}" register: delete_old_cerificates ignore_errors: true when: groups['broken_etcd'] - name: Fail if unable to delete old certificates fail: msg: "Unable to delete old certificates for: {{ item.item }}" loop: "{{ delete_old_cerificates.results }}" changed_when: false when: - groups['broken_etcd'] - "item.rc != 0 and not 'No such file or directory' in item.stderr" - name: Get etcd cluster members shell: "{{ bin_dir }}/etcdctl --cacert {{ etcd_cert_dir }}/ca.pem --cert {{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem --key {{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem member list" register: member_list changed_when: false check_mode: no environment: - ETCDCTL_API: 3 when: - groups['broken_etcd'] - not healthy - has_quorum - name: Remove broken cluster members shell: "{{ bin_dir }}/etcdctl --cacert {{ etcd_cert_dir }}/ca.pem --cert {{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem --key {{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem --endpoints={{ etcd_access_addresses }} member remove {{ item[1].replace(' ','').split(',')[0] }}" environment: - ETCDCTL_API: 3 with_nested: - "{{ groups['broken_etcd'] }}" - "{{ member_list.stdout_lines }}" when: - groups['broken_etcd'] - not healthy - has_quorum - hostvars[item[0]]['etcd_member_name'] == item[1].replace(' ','').split(',')[2]