Upgrade to kubeadm (#1667)

* Enable upgrade to kubeadm

* fix kubedns upgrade

* try upgrade route

* use init/upgrade strategy for kubeadm and ignore kubedns svc

* Use bin_dir for kubeadm

* delete more secrets

* fix waiting for terminating pods

* Manually enforce kube-proxy for kubeadm deploy

* remove proxy. update to kubeadm 1.8.0rc1
This commit is contained in:
Matthew Mosesohn 2017-09-26 10:38:58 +01:00 committed by GitHub
parent 1067595b5c
commit bd272e0b3c
17 changed files with 210 additions and 42 deletions

View file

@ -299,12 +299,24 @@ before_script:
UPGRADE_TEST: "graceful"
STARTUP_SCRIPT: ""
.centos_weave_kubeadm_variables: &centos_weave_kubeadm_variables
# stage: deploy-gce-part1
KUBE_NETWORK_PLUGIN: weave
AUTHORIZATION_MODES: "{ 'authorization_modes': [ 'RBAC' ] }"
CLOUD_IMAGE: centos-7
CLOUD_MACHINE_TYPE: "n1-standard-1"
CLOUD_REGION: us-central1-b
CLUSTER_MODE: ha
KUBEADM_ENABLED: "true"
UPGRADE_TEST: "graceful"
STARTUP_SCRIPT: ""
.ubuntu_canal_kubeadm_variables: &ubuntu_canal_kubeadm_variables
# stage: deploy-gce-part1
KUBE_NETWORK_PLUGIN: canal
AUTHORIZATION_MODES: "{ 'authorization_modes': [ 'RBAC' ] }"
CLOUD_IMAGE: ubuntu-1604-xenial
CLOUD_MACHINE_TYPE: "n1-standard-2"
CLOUD_MACHINE_TYPE: "n1-standard-1"
CLOUD_REGION: europe-west1-b
CLUSTER_MODE: ha
KUBEADM_ENABLED: "true"
@ -521,6 +533,27 @@ ubuntu-canal-kubeadm-triggers:
when: on_success
only: ['triggers']
centos-weave-kubeadm-rbac:
stage: deploy-gce-part1
<<: *job
<<: *gce
variables:
<<: *gce_variables
<<: *centos_weave_kubeadm_variables
when: manual
except: ['triggers']
only: ['master', /^pr-.*$/]
centos-weave-kubeadm-triggers:
stage: deploy-gce-part1
<<: *job
<<: *gce
variables:
<<: *gce_variables
<<: *centos_weave_kubeadm_variables
when: on_success
only: ['triggers']
rhel7-weave:
stage: deploy-gce-part1
<<: *job

View file

@ -80,6 +80,7 @@
any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
roles:
- { role: kubespray-defaults}
- { role: kubernetes-apps/rotate_tokens, tags: rotate_tokens, when: "secret_changed|default(false)" }
- { role: kubernetes-apps/network_plugin, tags: network }
- { role: kubernetes-apps/policy_controller, tags: policy-controller }
- { role: kubernetes/client, tags: client }

View file

@ -67,3 +67,17 @@ follows:
* network_plugin (such as Calico or Weave)
* kube-apiserver, kube-scheduler, and kube-controller-manager
* Add-ons (such as KubeDNS)
#### Upgrade considerations
Kubespray supports rotating certificates used for etcd and Kubernetes
components, but some manual steps may be required. If you have a pod that
requires use of a service token and is deployed in a namespace other than
`kube-system`, you will need to manually delete the affected pods after
rotating certificates. This is because all service account tokens are dependent
on the apiserver token that is used to generate them. When the certificate
rotates, all service account tokens must be rotated as well. During the
kubernetes-apps/rotate_tokens role, only pods in kube-system are destroyed and
recreated. All other invalidated service account tokens are cleaned up
automatically, but other pods are not deleted out of an abundance of caution
for impact to user deployed pods.

View file

@ -20,7 +20,7 @@ download_always_pull: False
# Versions
kube_version: v1.7.5
# Change to kube_version after v1.8.0 release
kubeadm_version: "v1.8.0-beta.1"
kubeadm_version: "v1.8.0-rc.1"
etcd_version: v3.2.4
# TODO(mattymo): Move calico versions to roles/network_plugins/calico/defaults
# after migration to container download
@ -37,7 +37,7 @@ pod_infra_version: 3.0
kubeadm_download_url: "https://storage.googleapis.com/kubernetes-release/release/{{ kubeadm_version }}/bin/linux/amd64/kubeadm"
# Checksums
kubeadm_checksum: "ddd5949699d6bdbc0b90b379e7e534f137b1058db1acc8f26cc54843f017ffbf"
kubeadm_checksum: "8f6ceb26b8503bfc36a99574cf6f853be1c55405aa31669561608ad8099bf5bf"
# Containers
etcd_image_repo: "quay.io/coreos/etcd"
@ -123,7 +123,7 @@ downloads:
container: true
repo: "{{ etcd_image_repo }}"
tag: "{{ etcd_image_tag }}"
sha256: "{{etcd_digest_checksum|default(None)}}"
sha256: "{{ etcd_digest_checksum|default(None) }}"
kubeadm:
version: "{{ kubeadm_version }}"
dest: "kubeadm"

View file

@ -8,7 +8,17 @@
delay: 6
when: inventory_hostname == groups['kube-master'][0]
- name: kubeadm | Delete kubeadm kubedns
- name: Kubernetes Apps | Delete old kubedns resources
kube:
name: "kubedns"
namespace: "{{ system_namespace }}"
kubectl: "{{bin_dir}}/kubectl"
resource: "{{ item }}"
state: absent
with_items: ['deploy', 'svc']
tags: upgrade
- name: Kubernetes Apps | Delete kubeadm kubedns
kube:
name: "kubedns"
namespace: "{{ system_namespace }}"
@ -25,9 +35,9 @@
src: "{{item.file}}"
dest: "{{kube_config_dir}}/{{item.file}}"
with_items:
- {name: kubedns, file: kubedns-sa.yml, type: sa}
- {name: kubedns, file: kubedns-deploy.yml.j2, type: deployment}
- {name: kubedns, file: kubedns-svc.yml, type: svc}
- {name: kube-dns, file: kubedns-sa.yml, type: sa}
- {name: kube-dns, file: kubedns-deploy.yml.j2, type: deployment}
- {name: kube-dns, file: kubedns-svc.yml, type: svc}
- {name: kubedns-autoscaler, file: kubedns-autoscaler-sa.yml, type: sa}
- {name: kubedns-autoscaler, file: kubedns-autoscaler-clusterrole.yml, type: clusterrole}
- {name: kubedns-autoscaler, file: kubedns-autoscaler-clusterrolebinding.yml, type: clusterrolebinding}

View file

@ -1,15 +1,4 @@
---
# FIXME: remove if kubernetes/features#124 is implemented
- name: Weave | Purge old weave daemonset
kube:
name: "weave-net"
kubectl: "{{ bin_dir }}/kubectl"
filename: "{{ kube_config_dir }}/weave-net.yml"
resource: "ds"
namespace: "{{system_namespace}}"
state: absent
when: inventory_hostname == groups['kube-master'][0] and weave_manifest.changed
- name: Weave | Start Resources
kube:
name: "weave-net"

View file

@ -0,0 +1,20 @@
---
#FIXME(mattymo): Exclude built in secrets that were automatically rotated,
#instead of filtering manually
- name: Rotate Tokens | Get all serviceaccount tokens to expire
shell: >-
{{ bin_dir }}/kubectl get secrets --all-namespaces
-o 'jsonpath={range .items[*]}{"\n"}{.metadata.namespace}{" "}{.metadata.name}{" "}{.type}{end}'
| grep kubernetes.io/service-account-token
| egrep 'default-token|kube-proxy|kube-dns|dnsmasq|netchecker|weave|calico|canal|flannel|dashboard|cluster-proportional-autoscaler|efk|tiller'
register: tokens_to_delete
run_once: true
- name: Rotate Tokens | Delete expired tokens
command: "{{ bin_dir }}/kubectl delete secrets -n {{ item.split(' ')[0] }} {{ item.split(' ')[1] }}"
with_items: "{{ tokens_to_delete.stdout_lines }}"
run_once: true
- name: Rotate Tokens | Delete pods in system namespace
command: "{{ bin_dir }}/kubectl delete pods -n {{ system_namespace }} --all"
run_once: true

View file

@ -24,7 +24,7 @@
register: kubeadm_client_conf
- name: Join to cluster if needed
command: kubeadm join --config {{ kube_config_dir}}/kubeadm-client.conf --skip-preflight-checks
command: "{{ bin_dir }}/kubeadm join --config {{ kube_config_dir}}/kubeadm-client.conf --skip-preflight-checks"
register: kubeadm_join
when: not is_kube_master and (kubeadm_client_conf.changed or not kubelet_conf.stat.exists)

View file

@ -0,0 +1,3 @@
---
- name: kubeadm | Purge old certs
command: "rm -f {{kube_cert_dir }}/*.pem"

View file

@ -0,0 +1,12 @@
---
- name: Copy old certs to the kubeadm expected path
copy:
src: "{{ kube_cert_dir }}/{{ item.src }}"
dest: "{{ kube_cert_dir }}/{{ item.dest }}"
remote_src: yes
with_items:
- {src: apiserver.pem, dest: apiserver.crt}
- {src: apiserver.pem, dest: apiserver.key}
- {src: ca.pem, dest: ca.crt}
- {src: ca-key.pem, dest: ca.key}
register: kubeadm_copy_old_certs

View file

@ -1,4 +1,35 @@
---
- name: kubeadm | Check if old apiserver cert exists on host
stat:
path: "{{ kube_cert_dir }}/apiserver.pem"
register: old_apiserver_cert
delegate_to: "{{groups['kube-master']|first}}"
run_once: true
- name: kubeadm | Check service account key
stat:
path: "{{ kube_cert_dir }}/sa.key"
register: sa_key_before
delegate_to: "{{groups['kube-master']|first}}"
run_once: true
- name: kubeadm | Check if kubeadm has already run
stat:
path: "{{ kube_config_dir }}/admin.conf"
register: admin_conf
- name: kubeadm | Delete old static pods
file:
path: "{{ kube_config_dir }}/manifests/{{item}}.manifest"
state: absent
with_items: ["kube-apiserver", "kube-controller-manager", "kube-scheduler", "kube-proxy"]
when: old_apiserver_cert.stat.exists
- name: kubeadm | Forcefully delete old static pods
shell: "docker ps -f name=k8s_{{item}} -q | xargs --no-run-if-empty docker rm -f"
with_items: ["kube-apiserver", "kube-controller-manager", "kube-scheduler"]
when: old_apiserver_cert.stat.exists
- name: kubeadm | aggregate all SANs
set_fact:
apiserver_sans: >-
@ -29,18 +60,29 @@
dest: "{{ kube_config_dir }}/kubeadm-config.yaml"
register: kubeadm_config
- name: Check if kubeadm has already run
stat:
path: "{{ kube_config_dir }}/admin.conf"
register: admin_conf
- name: kubeadm | Initialize first master
command: timeout -k 240s 240s kubeadm init --config={{ kube_config_dir }}/kubeadm-config.yaml --skip-preflight-checks
command: timeout -k 240s 240s {{ bin_dir }}/kubeadm init --config={{ kube_config_dir }}/kubeadm-config.yaml --skip-preflight-checks
register: kubeadm_init
#Retry is because upload config sometimes fails
retries: 3
when: inventory_hostname == groups['kube-master']|first and (kubeadm_config.changed or not admin_conf.stat.exists)
when: inventory_hostname == groups['kube-master']|first and not admin_conf.stat.exists
failed_when: kubeadm_init.rc != 0 and "field is immutable" not in kubeadm_init.stderr
notify: Master | restart kubelet
- name: kubeadm | Upgrade first master
command: timeout -k 240s 240s {{ bin_dir }}/kubeadm upgrade apply --config={{ kube_config_dir }}/kubeadm-config.yaml {{ kube_version }} --skip-preflight-checks
register: kubeadm_upgrade
#Retry is because upload config sometimes fails
retries: 3
when: inventory_hostname == groups['kube-master']|first and (kubeadm_config.changed and admin_conf.stat.exists)
failed_when: kubeadm_upgrade.rc != 0 and "field is immutable" not in kubeadm_upgrade.stderr
notify: Master | restart kubelet
# FIXME(mattymo): remove when https://github.com/kubernetes/kubeadm/issues/433 is fixed
- name: kubeadm | Enable kube-proxy
command: "{{ bin_dir }}/kubeadm alpha phase addon kube-proxy --config={{ kube_config_dir }}/kubeadm-config.yaml"
when: inventory_hostname == groups['kube-master']|first
changed_when: false
- name: slurp kubeadm certs
slurp:
@ -62,7 +104,7 @@
delegate_to: "{{ groups['kube-master']|first }}"
run_once: true
- name: write out kubeadm certs
- name: kubeadm | write out kubeadm certs
copy:
dest: "{{ item.item }}"
content: "{{ item.content | b64decode }}"
@ -74,9 +116,32 @@
with_items: "{{ kubeadm_certs.results }}"
when: inventory_hostname != groups['kube-master']|first
- name: kubeadm | Initialize other masters
command: timeout -k 240s 240s kubeadm init --config={{ kube_config_dir }}/kubeadm-config.yaml --skip-preflight-checks
- name: kubeadm | Init other uninitialized masters
command: timeout -k 240s 240s {{ bin_dir }}/kubeadm init --config={{ kube_config_dir }}/kubeadm-config.yaml --skip-preflight-checks
register: kubeadm_init
#Retry is because upload config sometimes fails
retries: 3
when: inventory_hostname != groups['kube-master']|first and (kubeadm_config.changed or not admin_conf.stat.exists or copy_kubeadm_certs.changed)
when: inventory_hostname != groups['kube-master']|first and not admin_conf.stat.exists
failed_when: kubeadm_init.rc != 0 and "field is immutable" not in kubeadm_init.stderr
notify: Master | restart kubelet
- name: kubeadm | Upgrade other masters
command: timeout -k 240s 240s {{ bin_dir }}/kubeadm upgrade apply --config={{ kube_config_dir }}/kubeadm-config.yaml {{ kube_version }} --skip-preflight-checks
register: kubeadm_upgrade
when: inventory_hostname != groups['kube-master']|first and (kubeadm_config.changed and admin_conf.stat.exists)
failed_when: kubeadm_upgrade.rc != 0 and "field is immutable" not in kubeadm_upgrade.stderr
notify: Master | restart kubelet
- name: kubeadm | Check service account key again
stat:
path: "{{ kube_cert_dir }}/sa.key"
register: sa_key_after
delegate_to: "{{groups['kube-master']|first}}"
run_once: true
- name: kubeadm | Set secret_changed if service account key was updated
command: /bin/true
notify: Master | set secret_changed
when: sa_key_before.stat.checksum|default("") != sa_key_after.stat.checksum
- name: kubeadm | cleanup old certs if necessary
include: kubeadm-cleanup-old-certs.yml
when: old_apiserver_cert.stat.exists

View file

@ -17,7 +17,6 @@ networking:
podSubnet: {{ kube_pods_subnet }}
kubernetesVersion: {{ kube_version }}
cloudProvider: {{ cloud_provider|default('') }}
#TODO: cloud provider conf file
authorizationModes:
- Node
{% for mode in authorization_modes %}
@ -53,7 +52,6 @@ apiServerExtraArgs:
runtime-config: {{ kube_api_runtime_config }}
{% endif %}
allow-privileged: "true"
#TODO: Custom flags compatible with kubeadm
controllerManagerExtraArgs:
node-monitor-grace-period: {{ kube_controller_node_monitor_grace_period }}
node-monitor-period: {{ kube_controller_node_monitor_period }}
@ -61,7 +59,6 @@ controllerManagerExtraArgs:
{% if kube_feature_gates %}
feature-gates: {{ kube_feature_gates|join(',') }}
{% endif %}
#schedulerExtraArgs:
apiServerCertSANs:
{% for san in apiserver_sans.split(' ') | unique %}
- {{ san }}

View file

@ -19,12 +19,20 @@
when: kubeadm_enabled
tags: kubeadm
- name: install | Copy binary from download dir
- name: install | Copy kubeadm binary from download dir
command: rsync -piu "{{ local_release_dir }}/kubeadm" "{{ bin_dir }}/kubeadm"
changed_when: false
when: kubeadm_enabled
tags: kubeadm
- name: install | Set kubeadm binary permissions
file:
path: "{{ bin_dir }}/kubeadm"
mode: "0755"
state: file
when: kubeadm_enabled
tags: kubeadm
- include: "install_{{ kubelet_deployment_type }}.yml"
- name: install | Write kubelet systemd init file

View file

@ -96,6 +96,13 @@
when: not kubeadm_enabled
tags: kube-proxy
- name: Purge proxy manifest for kubeadm
file:
path: "{{ kube_manifest_dir }}/kube-proxy.manifest"
state: absent
when: kubeadm_enabled
tags: kube-proxy
# reload-systemd
- meta: flush_handlers

View file

@ -75,6 +75,7 @@
with_items:
- "{{kube_config_dir}}"
- /var/lib/kubelet
- /root/.kube
- "{{ etcd_data_dir }}"
- /etc/ssl/etcd
- /var/log/calico

View file

@ -13,12 +13,20 @@
when: not ansible_os_family in ["CoreOS", "Container Linux by CoreOS"]
- name: Wait for pods to be ready
shell: "{{bin_dir}}/kubectl get pods"
register: pods
until:
- '"ContainerCreating" not in pods.stdout'
- '"Pending" not in pods.stdout'
- '"Terminating" not in pods.stdout'
retries: 60
delay: 2
no_log: true
- name: Get pod names
shell: "{{bin_dir}}/kubectl get pods -o json"
register: pods
until: '"ContainerCreating" not in pods.stdout and "Terminating" not in pods.stdout'
retries: 60
delay: 2
no_log: true
- name: Get hostnet pods

View file

@ -67,7 +67,6 @@
- { role: kubernetes/node, tags: node }
- { role: kubernetes/master, tags: master }
- { role: network_plugin, tags: network }
- { role: kubernetes/kubeadm, tags: kubeadm, when: "kubeadm_enabled" }
- { role: upgrade/post-upgrade, tags: post-upgrade }
#Finally handle worker upgrades, based on given batch size
@ -87,6 +86,7 @@
any_errors_fatal: true
roles:
- { role: kubespray-defaults}
- { role: kubernetes-apps/rotate_tokens, tags: rotate_tokens, when: "secret_changed|default(false)" }
- { role: kubernetes-apps/network_plugin, tags: network }
- { role: kubernetes-apps/policy_controller, tags: policy-controller }
- { role: kubernetes/client, tags: client }