From dd4bc5fbfe92611de9021bfed319f6c8b8016565 Mon Sep 17 00:00:00 2001 From: Samuel Liu Date: Fri, 9 Sep 2022 16:29:22 +0800 Subject: [PATCH] [etcd] Sometimes, we do not need to run etcd role on all nodes. (#9173) * WIP: sometimes,we not run etcd * fix ansible lint * like calico(kdd) cni, no need run etcd --- cluster.yml | 7 +- roles/etcd/defaults/main.yml | 2 +- roles/etcd/tasks/check_certs.yml | 38 ++++++++-- roles/etcd/tasks/gen_certs_script.yml | 78 ++++++++++----------- roles/etcd/tasks/gen_nodes_certs_script.yml | 32 +++++++++ roles/etcd/tasks/main.yml | 18 ++++- scale.yml | 16 ++++- upgrade-cluster.yml | 7 +- 8 files changed, 140 insertions(+), 58 deletions(-) create mode 100644 roles/etcd/tasks/gen_nodes_certs_script.yml diff --git a/cluster.yml b/cluster.yml index cc169f80b..5f163de6a 100644 --- a/cluster.yml +++ b/cluster.yml @@ -35,7 +35,7 @@ - { role: "container-engine", tags: "container-engine", when: deploy_container_engine } - { role: download, tags: download, when: "not skip_downloads" } -- hosts: etcd +- hosts: etcd:kube_control_plane gather_facts: False any_errors_fatal: "{{ any_errors_fatal | default(true) }}" environment: "{{ proxy_disable_env }}" @@ -59,7 +59,10 @@ vars: etcd_cluster_setup: false etcd_events_cluster_setup: false - when: etcd_deployment_type != "kubeadm" + when: + - etcd_deployment_type != "kubeadm" + - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool + - kube_network_plugin != "calico" or calico_datastore == "etcd" - hosts: k8s_cluster gather_facts: False diff --git a/roles/etcd/defaults/main.yml b/roles/etcd/defaults/main.yml index 1f11e8ddc..bf38acee5 100644 --- a/roles/etcd/defaults/main.yml +++ b/roles/etcd/defaults/main.yml @@ -66,7 +66,7 @@ etcd_memory_limit: "{% if ansible_memtotal_mb < 4096 %}512M{% else %}0{% endif % etcd_blkio_weight: 1000 -etcd_node_cert_hosts: "{{ groups['k8s_cluster'] | union(groups.get('calico_rr', [])) }}" +etcd_node_cert_hosts: "{{ groups['k8s_cluster'] }}" etcd_compaction_retention: "8" diff --git a/roles/etcd/tasks/check_certs.yml b/roles/etcd/tasks/check_certs.yml index ed0580b55..c688c16d8 100644 --- a/roles/etcd/tasks/check_certs.yml +++ b/roles/etcd/tasks/check_certs.yml @@ -33,14 +33,13 @@ stat: path: "{{ etcd_cert_dir }}/{{ item }}" register: etcd_node_certs - when: (('calico_rr' in groups and inventory_hostname in groups['calico_rr']) or - inventory_hostname in groups['k8s_cluster']) + when: inventory_hostname in groups['k8s_cluster'] with_items: - ca.pem - node-{{ inventory_hostname }}.pem - node-{{ inventory_hostname }}-key.pem -- name: "Check_certs | Set 'gen_certs' to true if expected certificates are not on the first etcd node" +- name: "Check_certs | Set 'gen_certs' to true if expected certificates are not on the first etcd node(1/2)" set_fact: gen_certs: true when: force_etcd_cert_refresh or not item in etcdcert_master.files|map(attribute='path') | list @@ -56,13 +55,39 @@ '{{ etcd_cert_dir }}/member-{{ host }}.pem', '{{ etcd_cert_dir }}/member-{{ host }}-key.pem', {% endfor %} - {% set k8s_nodes = groups['k8s_cluster']|union(groups['calico_rr']|default([]))|unique|sort %} + {% set k8s_nodes = groups['kube_control_plane'] %} {% for host in k8s_nodes %} '{{ etcd_cert_dir }}/node-{{ host }}.pem', '{{ etcd_cert_dir }}/node-{{ host }}-key.pem' {% if not loop.last %}{{','}}{% endif %} {% endfor %}] +- name: "Check_certs | Set 'gen_certs' to true if expected certificates are not on the first etcd node(2/2)" + set_fact: + gen_certs: true + run_once: true + with_items: "{{ expected_files }}" + vars: + expected_files: >- + ['{{ etcd_cert_dir }}/ca.pem', + {% set etcd_members = groups['etcd'] %} + {% for host in etcd_members %} + '{{ etcd_cert_dir }}/admin-{{ host }}.pem', + '{{ etcd_cert_dir }}/admin-{{ host }}-key.pem', + '{{ etcd_cert_dir }}/member-{{ host }}.pem', + '{{ etcd_cert_dir }}/member-{{ host }}-key.pem', + {% endfor %} + {% set k8s_nodes = groups['k8s_cluster']|unique|sort %} + {% for host in k8s_nodes %} + '{{ etcd_cert_dir }}/node-{{ host }}.pem', + '{{ etcd_cert_dir }}/node-{{ host }}-key.pem' + {% if not loop.last %}{{','}}{% endif %} + {% endfor %}] + when: + - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool + - kube_network_plugin != "calico" or calico_datastore == "etcd" + - force_etcd_cert_refresh or not item in etcdcert_master.files|map(attribute='path') | list + - name: "Check_certs | Set 'gen_master_certs' object to track whether member and admin certs exist on first etcd node" set_fact: gen_master_certs: |- @@ -89,7 +114,7 @@ set_fact: gen_node_certs: |- { - {% set k8s_nodes = groups['k8s_cluster']|union(groups['calico_rr']|default([]))|unique|sort -%} + {% set k8s_nodes = groups['k8s_cluster'] -%} {% set existing_certs = etcdcert_master.files|map(attribute='path')|list|sort %} {% for host in k8s_nodes -%} {% set host_cert = "%s/node-%s.pem"|format(etcd_cert_dir, host) %} @@ -125,8 +150,7 @@ set_fact: kubernetes_host_requires_sync: true when: - - (('calico_rr' in groups and inventory_hostname in groups['calico_rr']) or - inventory_hostname in groups['k8s_cluster']) and + - inventory_hostname in groups['k8s_cluster'] and inventory_hostname not in groups['etcd'] - (not etcd_node_certs.results[0].stat.exists|default(false)) or (not etcd_node_certs.results[1].stat.exists|default(false)) or diff --git a/roles/etcd/tasks/gen_certs_script.yml b/roles/etcd/tasks/gen_certs_script.yml index fb619bdb0..eb97a824d 100644 --- a/roles/etcd/tasks/gen_certs_script.yml +++ b/roles/etcd/tasks/gen_certs_script.yml @@ -38,7 +38,7 @@ - gen_certs|default(false) - inventory_hostname == groups['etcd'][0] -- name: Gen_certs | run cert generation script +- name: Gen_certs | run cert generation script for etcd and kube control plane nodes command: "bash -x {{ etcd_script_dir }}/make-ssl-etcd.sh -f {{ etcd_config_dir }}/openssl.conf -d {{ etcd_cert_dir }}" environment: - MASTERS: "{% for m in groups['etcd'] %} @@ -46,7 +46,7 @@ {{ m }} {% endif %} {% endfor %}" - - HOSTS: "{% for h in (groups['k8s_cluster'] + groups['calico_rr']|default([]))|unique %} + - HOSTS: "{% for h in groups['kube_control_plane'] %} {% if gen_node_certs[h] %} {{ h }} {% endif %} @@ -56,7 +56,23 @@ when: gen_certs|default(false) notify: set etcd_secret_changed -- name: Gen_certs | Gather etcd member and admin certs from first etcd node +- name: Gen_certs | run cert generation script for all clients + command: "bash -x {{ etcd_script_dir }}/make-ssl-etcd.sh -f {{ etcd_config_dir }}/openssl.conf -d {{ etcd_cert_dir }}" + environment: + - HOSTS: "{% for h in groups['k8s_cluster'] %} + {% if gen_node_certs[h] %} + {{ h }} + {% endif %} + {% endfor %}" + run_once: yes + delegate_to: "{{ groups['etcd'][0] }}" + when: + - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool + - kube_network_plugin != "calico" or calico_datastore == "etcd" + - gen_certs|default(false) + notify: set etcd_secret_changed + +- name: Gen_certs | Gather etcd member/admin and kube_control_plane clinet certs from first etcd node slurp: src: "{{ item }}" register: etcd_master_certs @@ -69,6 +85,10 @@ '{{ etcd_cert_dir }}/member-{{ node }}.pem', '{{ etcd_cert_dir }}/member-{{ node }}-key.pem', {% endfor %}]" + - "[{% for node in (groups['kube_control_plane']) %} + '{{ etcd_cert_dir }}/node-{{ node }}.pem', + '{{ etcd_cert_dir }}/node-{{ node }}-key.pem', + {% endfor %}]" delegate_to: "{{ groups['etcd'][0] }}" when: - inventory_hostname in groups['etcd'] @@ -76,7 +96,7 @@ - inventory_hostname != groups['etcd'][0] notify: set etcd_secret_changed -- name: Gen_certs | Write etcd member and admin certs to other etcd nodes +- name: Gen_certs | Write etcd member/admin and kube_control_plane clinet certs to other etcd nodes copy: dest: "{{ item.item }}" content: "{{ item.content | b64decode }}" @@ -96,7 +116,7 @@ src: "{{ item }}" register: etcd_master_node_certs with_items: - - "[{% for node in (groups['k8s_cluster'] + groups['calico_rr']|default([]))|unique %} + - "[{% for node in groups['k8s_cluster'] %} '{{ etcd_cert_dir }}/node-{{ node }}.pem', '{{ etcd_cert_dir }}/node-{{ node }}-key.pem', {% endfor %}]" @@ -104,6 +124,8 @@ when: - inventory_hostname in groups['etcd'] - inventory_hostname != groups['etcd'][0] + - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool + - kube_network_plugin != "calico" or calico_datastore == "etcd" notify: set etcd_secret_changed - name: Gen_certs | Write node certs to other etcd nodes @@ -117,47 +139,21 @@ when: - inventory_hostname in groups['etcd'] - inventory_hostname != groups['etcd'][0] + - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool + - kube_network_plugin != "calico" or calico_datastore == "etcd" loop_control: label: "{{ item.item }}" -- name: Gen_certs | Set cert names per node - set_fact: - my_etcd_node_certs: [ 'ca.pem', - 'node-{{ inventory_hostname }}.pem', - 'node-{{ inventory_hostname }}-key.pem'] - tags: - - facts - -- name: "Check_certs | Set 'sync_certs' to true on nodes" - set_fact: - sync_certs: true - when: (('calico_rr' in groups and inventory_hostname in groups['calico_rr']) or - inventory_hostname in groups['k8s_cluster']) and - inventory_hostname not in groups['etcd'] - with_items: - - "{{ my_etcd_node_certs }}" - -- name: Gen_certs | Gather node certs - shell: "set -o pipefail && tar cfz - -C {{ etcd_cert_dir }} {{ my_etcd_node_certs|join(' ') }} | base64 --wrap=0" - args: - executable: /bin/bash - warn: false - no_log: "{{ not (unsafe_show_logs|bool) }}" - register: etcd_node_certs - check_mode: no - delegate_to: "{{ groups['etcd'][0] }}" - when: (('calico_rr' in groups and inventory_hostname in groups['calico_rr']) or - inventory_hostname in groups['k8s_cluster']) and +- include_tasks: gen_nodes_certs_script.yml + when: + - inventory_hostname in groups['kube_control_plane'] and sync_certs|default(false) and inventory_hostname not in groups['etcd'] -- name: Gen_certs | Copy certs on nodes - shell: "set -o pipefail && base64 -d <<< '{{ etcd_node_certs.stdout|quote }}' | tar xz -C {{ etcd_cert_dir }}" - args: - executable: /bin/bash - no_log: "{{ not (unsafe_show_logs|bool) }}" - changed_when: false - when: (('calico_rr' in groups and inventory_hostname in groups['calico_rr']) or - inventory_hostname in groups['k8s_cluster']) and +- include_tasks: gen_nodes_certs_script.yml + when: + - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool + - kube_network_plugin != "calico" or calico_datastore == "etcd" + - inventory_hostname in groups['k8s_cluster'] and sync_certs|default(false) and inventory_hostname not in groups['etcd'] - name: Gen_certs | check certificate permissions diff --git a/roles/etcd/tasks/gen_nodes_certs_script.yml b/roles/etcd/tasks/gen_nodes_certs_script.yml new file mode 100644 index 000000000..d176e01aa --- /dev/null +++ b/roles/etcd/tasks/gen_nodes_certs_script.yml @@ -0,0 +1,32 @@ +--- +- name: Gen_certs | Set cert names per node + set_fact: + my_etcd_node_certs: [ 'ca.pem', + 'node-{{ inventory_hostname }}.pem', + 'node-{{ inventory_hostname }}-key.pem'] + tags: + - facts + +- name: "Check_certs | Set 'sync_certs' to true on nodes" + set_fact: + sync_certs: true + with_items: + - "{{ my_etcd_node_certs }}" + +- name: Gen_certs | Gather node certs + shell: "set -o pipefail && tar cfz - -C {{ etcd_cert_dir }} {{ my_etcd_node_certs|join(' ') }} | base64 --wrap=0" + args: + executable: /bin/bash + warn: false + no_log: "{{ not (unsafe_show_logs|bool) }}" + register: etcd_node_certs + check_mode: no + delegate_to: "{{ groups['etcd'][0] }}" + changed_when: false + +- name: Gen_certs | Copy certs on nodes + shell: "set -o pipefail && base64 -d <<< '{{ etcd_node_certs.stdout|quote }}' | tar xz -C {{ etcd_cert_dir }}" + args: + executable: /bin/bash + no_log: "{{ not (unsafe_show_logs|bool) }}" + changed_when: false diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index 465be73fa..fb593dbae 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -12,6 +12,16 @@ - etcd-secrets - include_tasks: upd_ca_trust.yml + when: + - inventory_hostname in groups['etcd']|union(groups['kube_control_plane'])|unique|sort + tags: + - etcd-secrets + +- include_tasks: upd_ca_trust.yml + when: + - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool + - kube_network_plugin != "calico" or calico_datastore == "etcd" + - inventory_hostname in groups['k8s_cluster'] tags: - etcd-secrets @@ -21,7 +31,9 @@ changed_when: false check_mode: no when: - - inventory_hostname in groups['k8s_cluster']|union(groups['calico_rr']|default([]))|unique|sort + - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool + - kube_network_plugin != "calico" or calico_datastore == "etcd" + - inventory_hostname in groups['k8s_cluster'] tags: - master - network @@ -30,7 +42,9 @@ set_fact: etcd_client_cert_serial: "{{ etcd_client_cert_serial_result.stdout.split('=')[1] }}" when: - - inventory_hostname in groups['k8s_cluster']|union(groups['calico_rr']|default([]))|unique|sort + - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool + - kube_network_plugin != "calico" or calico_datastore == "etcd" + - inventory_hostname in groups['k8s_cluster'] tags: - master - network diff --git a/scale.yml b/scale.yml index 533b97727..8e79bfa03 100644 --- a/scale.yml +++ b/scale.yml @@ -27,7 +27,7 @@ import_playbook: facts.yml - name: Generate the etcd certificates beforehand - hosts: etcd + hosts: etcd:kube_control_plane gather_facts: False any_errors_fatal: "{{ any_errors_fatal | default(true) }}" environment: "{{ proxy_disable_env }}" @@ -38,7 +38,10 @@ vars: etcd_cluster_setup: false etcd_events_cluster_setup: false - when: etcd_deployment_type != "kubeadm" + when: + - etcd_deployment_type != "kubeadm" + - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool + - kube_network_plugin != "calico" or calico_datastore == "etcd" - name: Download images to ansible host cache via first kube_control_plane node hosts: kube_control_plane[0] @@ -60,7 +63,14 @@ - { role: kubernetes/preinstall, tags: preinstall } - { role: container-engine, tags: "container-engine", when: deploy_container_engine } - { role: download, tags: download, when: "not skip_downloads" } - - { role: etcd, tags: etcd, etcd_cluster_setup: false, when: "etcd_deployment_type != 'kubeadm'" } + - role: etcd + tags: etcd + vars: + etcd_cluster_setup: false + when: + - etcd_deployment_type != "kubeadm" + - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool + - kube_network_plugin != "calico" or calico_datastore == "etcd" - name: Target only workers to get kubelet installed and checking in on any new nodes(node) hosts: kube_node diff --git a/upgrade-cluster.yml b/upgrade-cluster.yml index 35064ec60..39dd95a01 100644 --- a/upgrade-cluster.yml +++ b/upgrade-cluster.yml @@ -59,7 +59,7 @@ - { role: kubespray-defaults } - { role: container-engine, tags: "container-engine", when: deploy_container_engine } -- hosts: etcd +- hosts: etcd:kube_control_plane gather_facts: False any_errors_fatal: "{{ any_errors_fatal | default(true) }}" environment: "{{ proxy_disable_env }}" @@ -83,7 +83,10 @@ vars: etcd_cluster_setup: false etcd_events_cluster_setup: false - when: etcd_deployment_type != "kubeadm" + when: + - etcd_deployment_type != "kubeadm" + - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] or cilium_deploy_additionally | default(false) | bool + - kube_network_plugin != "calico" or calico_datastore == "etcd" - name: Handle upgrades to master components first to maintain backwards compat. gather_facts: False