--- - name: Get etcd endpoint health command: "{{ bin_dir }}/etcdctl endpoint health" register: etcd_endpoint_health ignore_errors: true # noqa ignore-errors changed_when: false check_mode: no environment: ETCDCTL_API: 3 ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}" ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem" ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem" ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem" when: - groups['broken_etcd'] - name: Set healthy fact set_fact: healthy: "{{ etcd_endpoint_health.stderr is match('Error: unhealthy cluster') }}" when: - groups['broken_etcd'] - name: Set has_quorum fact set_fact: has_quorum: "{{ etcd_endpoint_health.stdout_lines | select('match', '.*is healthy.*') | list | length >= etcd_endpoint_health.stderr_lines | select('match', '.*is unhealthy.*') | list | length }}" when: - groups['broken_etcd'] - include_tasks: recover_lost_quorum.yml when: - groups['broken_etcd'] - not has_quorum - name: Remove etcd data dir file: path: "{{ etcd_data_dir }}" state: absent delegate_to: "{{ item }}" with_items: "{{ groups['broken_etcd'] }}" ignore_errors: true # noqa ignore-errors when: - groups['broken_etcd'] - has_quorum - name: Delete old certificates # noqa 302 ignore-error - rm is ok here for now shell: "rm {{ etcd_cert_dir }}/*{{ item }}*" with_items: "{{ groups['broken_etcd'] }}" register: delete_old_cerificates ignore_errors: true when: groups['broken_etcd'] - name: Fail if unable to delete old certificates fail: msg: "Unable to delete old certificates for: {{ item.item }}" loop: "{{ delete_old_cerificates.results }}" changed_when: false when: - groups['broken_etcd'] - "item.rc != 0 and not 'No such file or directory' in item.stderr" - name: Get etcd cluster members command: "{{ bin_dir }}/etcdctl member list" register: member_list changed_when: false check_mode: no environment: ETCDCTL_API: 3 ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}" ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem" ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem" ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem" when: - groups['broken_etcd'] - not healthy - has_quorum - name: Remove broken cluster members command: "{{ bin_dir }}/etcdctl member remove {{ item[1].replace(' ','').split(',')[0] }}" environment: ETCDCTL_API: 3 ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}" ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem" ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem" ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem" with_nested: - "{{ groups['broken_etcd'] }}" - "{{ member_list.stdout_lines }}" when: - groups['broken_etcd'] - not healthy - has_quorum - hostvars[item[0]]['etcd_member_name'] == item[1].replace(' ','').split(',')[2]