From 9eacde212f7a322b3c61b8ef1074435c1d02f4bd Mon Sep 17 00:00:00 2001 From: Florian Ruynat <16313165+floryut@users.noreply.github.com> Date: Wed, 27 Oct 2021 00:23:09 +0200 Subject: [PATCH] Fix quorum check when recovering broken etcd cluster (#8126) --- roles/etcd/tasks/join_etcd_member.yml | 3 +++ roles/recover_control_plane/etcd/tasks/main.yml | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/roles/etcd/tasks/join_etcd_member.yml b/roles/etcd/tasks/join_etcd_member.yml index 28d259ccc..22440394f 100644 --- a/roles/etcd/tasks/join_etcd_member.yml +++ b/roles/etcd/tasks/join_etcd_member.yml @@ -32,6 +32,9 @@ register: etcd_member_in_cluster changed_when: false check_mode: no + retries: "{{ etcd_retries }}" + delay: "{{ retry_stagger | random + 3 }}" + until: etcd_member_in_cluster.rc == 0 tags: - facts environment: diff --git a/roles/recover_control_plane/etcd/tasks/main.yml b/roles/recover_control_plane/etcd/tasks/main.yml index e3dc33930..45e2c65e4 100644 --- a/roles/recover_control_plane/etcd/tasks/main.yml +++ b/roles/recover_control_plane/etcd/tasks/main.yml @@ -20,10 +20,9 @@ when: - groups['broken_etcd'] -# When there is an error, everything is printed in stderr_lines, even "is healthy" messages. - name: Set has_quorum fact set_fact: - has_quorum: "{{ etcd_endpoint_health.stderr_lines | select('match', '.*is healthy.*') | list | length >= etcd_endpoint_health.stderr_lines | select('match', '.*is unhealthy.*') | list | length }}" + has_quorum: "{{ etcd_endpoint_health.stdout_lines | select('match', '.*is healthy.*') | list | length >= etcd_endpoint_health.stderr_lines | select('match', '.*is unhealthy.*') | list | length }}" when: - groups['broken_etcd']