From 4348e78b24f15e3ca8b136262297bd3244222602 Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Thu, 20 Jun 2019 11:12:51 -0700 Subject: [PATCH] Enable kubeadm etcd mode (#4818) * Enable kubeadm etcd mode Uses cert commands from kubeadm experimental control plane to enable non-master nodes to obtain etcd certs. Related story: PROD-29434 Change-Id: Idafa1d223e5c6ceadf819b6f9c06adf4c4f74178 * Add validation checks and exclude calico kdd mode Change-Id: Ic234f5e71261d33191376e70d438f9f6d35f358c * Move etcd mode test to ubuntu flannel HA job Change-Id: I9af6fd80a1bbb1692ab10d6da095eb368f6bc732 * rename etcd_mode to etcd_kubeadm_enabled Change-Id: Ib196d6c8a52f48cae370b026f7687ff9ca69c172 --- cluster.yml | 14 +++- inventory/sample/group_vars/all/all.yml | 3 + roles/etcd/tasks/main.yml | 7 ++ roles/kubernetes/kubeadm/defaults/main.yml | 6 ++ .../kubeadm/tasks/kubeadm_etcd_node.yml | 74 +++++++++++++++++++ roles/kubernetes/kubeadm/tasks/main.yml | 10 +++ .../kubernetes/master/defaults/main/etcd.yml | 32 ++++++++ .../kubernetes/master/defaults/main/main.yml | 6 ++ .../kubernetes/master/tasks/kubeadm-etcd.yml | 18 +++++ .../tasks/kubeadm-secondary-experimental.yml | 8 +- roles/kubernetes/master/tasks/main.yml | 4 + .../templates/kubeadm-config.v1beta1.yaml.j2 | 41 ++++++++++ .../preinstall/tasks/0020-verify-settings.yml | 12 +++ .../preinstall/tasks/0040-set_facts.yml | 10 +++ roles/kubespray-defaults/defaults/main.yaml | 4 + roles/network_plugin/calico/tasks/install.yml | 4 +- tests/files/packet_ubuntu-flannel-ha.yml | 3 + upgrade-cluster.yml | 14 +++- 18 files changed, 263 insertions(+), 7 deletions(-) create mode 100644 roles/kubernetes/kubeadm/tasks/kubeadm_etcd_node.yml create mode 100644 roles/kubernetes/master/defaults/main/etcd.yml create mode 100644 roles/kubernetes/master/tasks/kubeadm-etcd.yml diff --git a/cluster.yml b/cluster.yml index 1ee5fc2b7..ebc093a21 100644 --- a/cluster.yml +++ b/cluster.yml @@ -52,13 +52,23 @@ any_errors_fatal: "{{ any_errors_fatal | default(true) }}" roles: - { role: kubespray-defaults} - - { role: etcd, tags: etcd, etcd_cluster_setup: true, etcd_events_cluster_setup: "{{ etcd_events_cluster_enabled }}" } + - role: etcd + tags: etcd + vars: + etcd_cluster_setup: true + etcd_events_cluster_setup: "{{ etcd_events_cluster_enabled }}" + when: not etcd_kubeadm_enabled| default(false) - hosts: k8s-cluster:calico-rr any_errors_fatal: "{{ any_errors_fatal | default(true) }}" roles: - { role: kubespray-defaults} - - { role: etcd, tags: etcd, etcd_cluster_setup: false, etcd_events_cluster_setup: false } + - role: etcd + tags: etcd + vars: + etcd_cluster_setup: false + etcd_events_cluster_setup: false + when: not etcd_kubeadm_enabled| default(false) - hosts: k8s-cluster any_errors_fatal: "{{ any_errors_fatal | default(true) }}" diff --git a/inventory/sample/group_vars/all/all.yml b/inventory/sample/group_vars/all/all.yml index 4b45b6601..7ba22d43c 100644 --- a/inventory/sample/group_vars/all/all.yml +++ b/inventory/sample/group_vars/all/all.yml @@ -2,6 +2,9 @@ ## Directory where etcd data stored etcd_data_dir: /var/lib/etcd +## Experimental kubeadm etcd deployment mode. Available only for new deployment +etcd_kubeadm_enabled: false + ## Directory where the binaries will be installed bin_dir: /usr/local/bin diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index 30112176e..873263fdf 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -79,3 +79,10 @@ # state instead of `new`. - include_tasks: refresh_config.yml when: is_etcd_master + +- name: Install etcdctl binary from etcd role + include_tasks: "{{ role_path }}/../../etcd/tasks/install_host.yml" + vars: + etcd_cluster_setup: true + when: + - etcd_kubeadm_enabled diff --git a/roles/kubernetes/kubeadm/defaults/main.yml b/roles/kubernetes/kubeadm/defaults/main.yml index c98716458..1f1169ad7 100644 --- a/roles/kubernetes/kubeadm/defaults/main.yml +++ b/roles/kubernetes/kubeadm/defaults/main.yml @@ -10,3 +10,9 @@ kube_override_hostname: >- {%- else -%} {{ inventory_hostname }} {%- endif -%} + +# Requests a fresh upload of certificates from first master +kubeadm_etcd_refresh_cert_key: false + +# Experimental kubeadm etcd deployment mode. Available only for new deployment +etcd_kubeadm_enabled: false diff --git a/roles/kubernetes/kubeadm/tasks/kubeadm_etcd_node.yml b/roles/kubernetes/kubeadm/tasks/kubeadm_etcd_node.yml new file mode 100644 index 000000000..f43d20165 --- /dev/null +++ b/roles/kubernetes/kubeadm/tasks/kubeadm_etcd_node.yml @@ -0,0 +1,74 @@ +--- +- name: Refresh certificates so they are fresh and not expired + command: >- + {{ bin_dir }}/kubeadm init phase + --config {{ kube_config_dir }}/kubeadm-config.yaml + upload-certs --experimental-upload-certs + {% if kubeadm_certificate_key is defined %} + --certificate-key={{ kubeadm_certificate_key }} + {% endif %} + register: kubeadm_upload_cert + delegate_to: "{{ groups['kube-master'][0] }}" + when: kubeadm_etcd_refresh_cert_key + run_once: yes + +- name: Parse certificate key if not set + set_fact: + kubeadm_certificate_key: "{{ hostvars[groups['kube-master'][0]]['kubeadm_upload_cert'].stdout_lines[-1] | trim }}" + when: kubeadm_certificate_key is undefined + +- name: Pull control plane certs down + shell: >- + {{ bin_dir }}/kubeadm join phase + control-plane-prepare download-certs + --certificate-key {{ kubeadm_certificate_key }} + --experimental-control-plane + --token {{ kubeadm_token }} + --discovery-token-unsafe-skip-ca-verification + {{ kubeadm_discovery_address }} + && + {{ bin_dir }}/kubeadm join phase + control-plane-prepare certs + --experimental-control-plane + --token {{ kubeadm_token }} + --discovery-token-unsafe-skip-ca-verification + {{ kubeadm_discovery_address }} + args: + creates: "{{ kube_cert_dir }}/apiserver-etcd-client.key" + +- name: Delete unneeded certificates + file: + path: "{{ item }}" + state: absent + with_items: + - "{{ kube_cert_dir }}/apiserver.crt" + - "{{ kube_cert_dir }}/apiserver.key" + - "{{ kube_cert_dir }}/ca.key" + - "{{ kube_cert_dir }}/etcd/ca.key" + - "{{ kube_cert_dir }}/etcd/healthcheck-client.crt" + - "{{ kube_cert_dir }}/etcd/healthcheck-client.key" + - "{{ kube_cert_dir }}/etcd/peer.crt" + - "{{ kube_cert_dir }}/etcd/peer.key" + - "{{ kube_cert_dir }}/etcd/server.crt" + - "{{ kube_cert_dir }}/etcd/server.key" + - "{{ kube_cert_dir }}/front-proxy-ca.crt" + - "{{ kube_cert_dir }}/front-proxy-ca.key" + - "{{ kube_cert_dir }}/front-proxy-client.crt" + - "{{ kube_cert_dir }}/front-proxy-client.key" + - "{{ kube_cert_dir }}/sa.key" + - "{{ kube_cert_dir }}/sa.pub" + +- name: Calculate etcd cert serial + command: "openssl x509 -in {{ kube_cert_dir }}/apiserver-etcd-client.crt -noout -serial" + register: "etcd_client_cert_serial_result" + changed_when: false + when: + - inventory_hostname in groups['k8s-cluster']|union(groups['calico-rr']|default([]))|unique|sort + tags: + - network + +- name: Set etcd_client_cert_serial + set_fact: + etcd_client_cert_serial: "{{ etcd_client_cert_serial_result.stdout.split('=')[1] }}" + tags: + - network diff --git a/roles/kubernetes/kubeadm/tasks/main.yml b/roles/kubernetes/kubeadm/tasks/main.yml index 2e5c8dda7..7c967569f 100644 --- a/roles/kubernetes/kubeadm/tasks/main.yml +++ b/roles/kubernetes/kubeadm/tasks/main.yml @@ -10,6 +10,7 @@ tags: - facts + - name: Check if kubelet.conf exists stat: path: "{{ kube_config_dir }}/kubelet.conf" @@ -168,3 +169,12 @@ - kubeadm_discovery_address != kube_apiserver_endpoint tags: - kube-proxy + +- name: Extract etcd certs from control plane if using etcd kubeadm mode + include_tasks: kubeadm_etcd_node.yml + when: + - etcd_kubeadm_enabled + - kubeadm_control_plane + - inventory_hostname not in groups['kube-master'] + - kube_network_plugin in ["calico", "flannel", "canal", "cilium"] + - kube_network_plugin != "calico" or calico_datastore == "etcd" diff --git a/roles/kubernetes/master/defaults/main/etcd.yml b/roles/kubernetes/master/defaults/main/etcd.yml new file mode 100644 index 000000000..1cced21b7 --- /dev/null +++ b/roles/kubernetes/master/defaults/main/etcd.yml @@ -0,0 +1,32 @@ +--- +# Note: This does not set up DNS entries. It simply adds the following DNS +# entries to the certificate +etcd_cert_alt_names: + - "etcd.kube-system.svc.{{ dns_domain }}" + - "etcd.kube-system.svc" + - "etcd.kube-system" + - "etcd" +etcd_cert_alt_ips: [] + +etcd_heartbeat_interval: "250" +etcd_election_timeout: "5000" + +# etcd_snapshot_count: "10000" + +# Parameters for ionice +# -c takes an integer between 0 and 3 or one of the strings none, realtime, best-effort or idle. +# -n takes an integer between 0 (highest priority) and 7 (lowest priority) +# etcd_ionice: "-c2 -n0" + +etcd_metrics: "basic" + +## A dictionary of extra environment variables to add to etcd.env, formatted like: +## etcd_extra_vars: +## var1: "value1" +## var2: "value2" +## Note this is different from the etcd role with ETCD_ prfexi, caps, and underscores +etcd_extra_vars: {} + +# etcd_quota_backend_bytes: "2G" + +etcd_compaction_retention: "8" diff --git a/roles/kubernetes/master/defaults/main/main.yml b/roles/kubernetes/master/defaults/main/main.yml index b2578e102..195bbf82a 100644 --- a/roles/kubernetes/master/defaults/main/main.yml +++ b/roles/kubernetes/master/defaults/main/main.yml @@ -2,6 +2,12 @@ # disable upgrade cluster upgrade_cluster_setup: false +# Enable kubeadm experimental control plane +kubeadm_control_plane: false + +# Experimental kubeadm etcd deployment mode. Available only for new deployment +etcd_kubeadm_enabled: false + # An experimental dev/test only dynamic volumes provisioner, # for PetSets. Works for kube>=v1.3 only. kube_hostpath_dynamic_provisioner: "false" diff --git a/roles/kubernetes/master/tasks/kubeadm-etcd.yml b/roles/kubernetes/master/tasks/kubeadm-etcd.yml new file mode 100644 index 000000000..bfcb88d2b --- /dev/null +++ b/roles/kubernetes/master/tasks/kubeadm-etcd.yml @@ -0,0 +1,18 @@ +--- +- name: Calculate etcd cert serial + command: "openssl x509 -in {{ kube_cert_dir }}/apiserver-etcd-client.crt -noout -serial" + register: "etcd_client_cert_serial_result" + changed_when: false + tags: + - network + +- name: Set etcd_client_cert_serial + set_fact: + etcd_client_cert_serial: "{{ etcd_client_cert_serial_result.stdout.split('=')[1] }}" + tags: + - network + +- name: Ensure etcdctl binary is installed + include_tasks: "{{ role_path }}/../../etcd/tasks/install_host.yml" + vars: + etcd_cluster_setup: true diff --git a/roles/kubernetes/master/tasks/kubeadm-secondary-experimental.yml b/roles/kubernetes/master/tasks/kubeadm-secondary-experimental.yml index e1dfef01c..fd52389b3 100644 --- a/roles/kubernetes/master/tasks/kubeadm-secondary-experimental.yml +++ b/roles/kubernetes/master/tasks/kubeadm-secondary-experimental.yml @@ -43,6 +43,10 @@ kubeadm_certificate_key: "{{ hostvars[groups['kube-master'][0]]['kubeadm_upload_cert'].stdout_lines[-1] | trim }}" when: kubeadm_certificate_key is undefined +- name: check already run + debug: + msg: "{{ kubeadm_already_run.stat.exists }}" + - name: Joining control plane node to the cluster. command: >- {{ bin_dir }}/kubeadm join @@ -52,9 +56,11 @@ --certificate-key={{ kubeadm_certificate_key }} {% endif %} register: kubeadm_join_control_plane + retries: 3 + until: kubeadm_join_control_plane is succeeded when: - inventory_hostname != groups['kube-master']|first - - not kubeadm_already_run.stat.exists + - kubeadm_already_run is not defined or not kubeadm_already_run.stat.exists environment: PATH: "{{ bin_dir }}:{{ ansible_env.PATH }}" diff --git a/roles/kubernetes/master/tasks/main.yml b/roles/kubernetes/master/tasks/main.yml index 9094ab6cb..6cb14f534 100644 --- a/roles/kubernetes/master/tasks/main.yml +++ b/roles/kubernetes/master/tasks/main.yml @@ -75,3 +75,7 @@ - name: Include kubeadm setup import_tasks: kubeadm-setup.yml + +- name: Include kubeadm etcd extra tasks + include_tasks: kubeadm-etcd.yml + when: etcd_kubeadm_enabled diff --git a/roles/kubernetes/master/templates/kubeadm-config.v1beta1.yaml.j2 b/roles/kubernetes/master/templates/kubeadm-config.v1beta1.yaml.j2 index ae30993a5..619ee829b 100644 --- a/roles/kubernetes/master/templates/kubeadm-config.v1beta1.yaml.j2 +++ b/roles/kubernetes/master/templates/kubeadm-config.v1beta1.yaml.j2 @@ -24,6 +24,7 @@ apiVersion: kubeadm.k8s.io/v1beta1 kind: ClusterConfiguration clusterName: {{ cluster_name }} etcd: +{% if not etcd_kubeadm_enabled %} external: endpoints: {% for endpoint in etcd_access_addresses.split(',') %} @@ -32,6 +33,46 @@ etcd: caFile: {{ etcd_cert_dir }}/{{ kube_etcd_cacert_file }} certFile: {{ etcd_cert_dir }}/{{ kube_etcd_cert_file }} keyFile: {{ etcd_cert_dir }}/{{ kube_etcd_key_file }} +{% elif etcd_kubeadm_enabled %} + local: + imageRepository: "{{ etcd_image_repo | regex_replace("/etcd$","") }}" + imageTag: "{{ etcd_image_tag }}" + dataDir: "/var/lib/etcd" + extraArgs: + metrics: {{ etcd_metrics }} + election-timeout: "{{ etcd_election_timeout }}" + heartbeat-interval: "{{ etcd_heartbeat_interval }}" + auto-compaction-retention: "{{ etcd_compaction_retention }}" +{% if etcd_snapshot_count is defined %} + snapshot-count: "{{ etcd_snapshot_count }}" +{% endif %} +{% if etcd_quota_backend_bytes is defined %} + quota-backend-bytes: "{{ etcd_quota_backend_bytes }}" +{% endif %} +{% if etcd_log_package_levels is defined %} + log-package_levels: "{{ etcd_log_package_levels }}" +{% endif %} +{% for key, value in etcd_extra_vars.items() %} + {{ key }}: "{{ value }}" +{% endfor %} +{% if host_architecture != "amd64" -%} + etcd-unsupported-arch: {{host_architecture}} +{% endif %} + serverCertSANs: +{% for san in etcd_cert_alt_names %} + - {{ san }} +{% endfor %} +{% for san in etcd_cert_alt_ips %} + - {{ san }} +{% endfor %} + peerCertSANs: +{% for san in etcd_cert_alt_names %} + - {{ san }} +{% endfor %} +{% for san in etcd_cert_alt_ips %} + - {{ san }} +{% endfor %} +{% endif %} networking: dnsDomain: {{ dns_domain }} serviceSubnet: {{ kube_service_addresses }} diff --git a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml index 5e4480775..02112c7ed 100644 --- a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml +++ b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml @@ -212,3 +212,15 @@ msg: "resolvconf_mode can only be 'docker_dns', 'host_resolvconf' or 'none'" when: resolvconf_mode is defined run_once: true + +- name: Stop if k8s version is too low for kubeadm etcd mode + assert: + that: kube_version is version('v1.14.0', '>=') + msg: "kubeadm etcd mode requires k8s version >= v1.14.0" + when: etcd_kubeadm_enabled + +- name: Stop if kubeadm etcd mode is enabled but experimental control plane is not + assert: + that: kubeadm_control_plane + msg: "kubeadm etcd mode requires experimental control plane" + when: etcd_kubeadm_enabled diff --git a/roles/kubernetes/preinstall/tasks/0040-set_facts.yml b/roles/kubernetes/preinstall/tasks/0040-set_facts.yml index 796f21d7f..939da53a4 100644 --- a/roles/kubernetes/preinstall/tasks/0040-set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/0040-set_facts.yml @@ -168,3 +168,13 @@ tags: - facts - kube-proxy + +- name: set etcd vars if using kubeadm mode + set_fact: + etcd_cert_dir: "{{ kube_cert_dir }}" + kube_etcd_cacert_file: "etcd/ca.crt" + kube_etcd_cert_file: "apiserver-etcd-client.crt" + kube_etcd_key_file: "apiserver-etcd-client.key" + etcd_deployment_type: host + when: + - etcd_kubeadm_enabled diff --git a/roles/kubespray-defaults/defaults/main.yaml b/roles/kubespray-defaults/defaults/main.yaml index 4620713ab..d9848b64f 100644 --- a/roles/kubespray-defaults/defaults/main.yaml +++ b/roles/kubespray-defaults/defaults/main.yaml @@ -255,7 +255,11 @@ docker_options: >- --userland-proxy-path=/usr/libexec/docker/docker-proxy-current --signature-verification=false {%- endif -%} +# Experimental kubeadm etcd deployment mode. Available only for new deployment +etcd_kubeadm_enabled: false + # Settings for containerized control plane (etcd/kubelet/secrets) +# deployment type for legacy etcd mode etcd_deployment_type: docker cert_management: script diff --git a/roles/network_plugin/calico/tasks/install.yml b/roles/network_plugin/calico/tasks/install.yml index b4923cec8..aa5f1f729 100644 --- a/roles/network_plugin/calico/tasks/install.yml +++ b/roles/network_plugin/calico/tasks/install.yml @@ -45,8 +45,8 @@ uri: url: "{{ etcd_access_addresses.split(',') | first }}/health" validate_certs: no - client_cert: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}.pem" - client_key: "{{ etcd_cert_dir }}/node-{{ inventory_hostname }}-key.pem" + client_cert: "{{ calico_cert_dir }}/cert.crt" + client_key: "{{ calico_cert_dir }}/key.pem" register: result until: result.status == 200 or result.status == 401 retries: 10 diff --git a/tests/files/packet_ubuntu-flannel-ha.yml b/tests/files/packet_ubuntu-flannel-ha.yml index 60c66575f..8642bd6d9 100644 --- a/tests/files/packet_ubuntu-flannel-ha.yml +++ b/tests/files/packet_ubuntu-flannel-ha.yml @@ -6,6 +6,9 @@ mode: ha # Kubespray settings kube_network_plugin: flannel kubeadm_enabled: true +etcd_kubeadm_enabled: true +kubeadm_control_plane: true +kubeadm_certificate_key: 3998c58db6497dd17d909394e62d515368c06ec617710d02edea31c06d741085 skip_non_kubeadm_warning: true deploy_netchecker: true dns_min_replicas: 1 diff --git a/upgrade-cluster.yml b/upgrade-cluster.yml index 5ea8da37d..1ba298e69 100644 --- a/upgrade-cluster.yml +++ b/upgrade-cluster.yml @@ -56,13 +56,23 @@ any_errors_fatal: "{{ any_errors_fatal | default(true) }}" roles: - { role: kubespray-defaults} - - { role: etcd, tags: etcd, etcd_cluster_setup: true } + - role: etcd + tags: etcd + vars: + etcd_cluster_setup: true + etcd_events_cluster_setup: false + when: not etcd_kubeadm_enabled | default(false) - hosts: k8s-cluster any_errors_fatal: "{{ any_errors_fatal | default(true) }}" roles: - { role: kubespray-defaults} - - { role: etcd, tags: etcd, etcd_cluster_setup: false } + - role: etcd + tags: etcd + vars: + etcd_cluster_setup: false + etcd_events_cluster_setup: false + when: not etcd_kubeadm_enabled | default(false) - name: Handle upgrades to master components first to maintain backwards compat. hosts: kube-master