From de51d071894d723fa1886f2bab3a5d010de0a821 Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Tue, 14 Feb 2017 19:08:44 +0300 Subject: [PATCH] Add graceful upgrade process Based on #718 introduced by rsmitty. Includes all roles and all options to support deployment of new hosts in case they were added to inventory. Main difference here is that master role is evaluated first so that master components get upgraded first. Fixes #694 --- .gitlab-ci.yml | 15 +++-- docs/upgrades.md | 16 ++++- roles/upgrade/post-upgrade/tasks/main.yml | 2 +- roles/upgrade/pre-upgrade/tasks/main.yml | 4 +- upgrade-cluster.yml | 81 ++++++++++++++++++++--- 5 files changed, 96 insertions(+), 22 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2d281cd72..305b69575 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -101,8 +101,8 @@ before_script: # Check out latest tag if testing upgrade # Uncomment when gitlab kargo repo has tags - #- test "${UPGRADE_TEST}" = "true" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1)) - - test "${UPGRADE_TEST}" = "true" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0 + #- test "${UPGRADE_TEST}" != "false" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1)) + - test "${UPGRADE_TEST}" != "false" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0 # Create cluster @@ -127,9 +127,10 @@ before_script: cluster.yml # Repeat deployment if testing upgrade - #FIXME(mattymo): repeat "Create cluster" above without duplicating code - > - if [ "${UPGRADE_TEST}" = "true" ]; then + if [ "${UPGRADE_TEST}" != "false" ]; then + test "${UPGRADE_TEST}" == "basic" && PLAYBOOK="cluster.yml"; + test "${UPGRADE_TEST}" == "graceful" && PLAYBOOK="upgrade-cluster.yml"; pip install ansible==2.2.1.0; git checkout "${CI_BUILD_REF}"; ansible-playbook -i inventory/inventory.ini -b --become-user=root --private-key=${HOME}/.ssh/id_rsa -u $SSH_USER @@ -149,7 +150,7 @@ before_script: -e resolvconf_mode=${RESOLVCONF_MODE} -e weave_cpu_requests=${WEAVE_CPU_LIMIT} -e weave_cpu_limit=${WEAVE_CPU_LIMIT} - cluster.yml; + $PLAYBOOK; fi # Tests Cases @@ -253,7 +254,7 @@ before_script: KUBE_NETWORK_PLUGIN: canal CLOUD_IMAGE: debian-8-kubespray CLOUD_REGION: us-east1-b - UPGRADE_TEST: "true" + UPGRADE_TEST: "basic" CLUSTER_MODE: ha .rhel7_weave_variables: &rhel7_weave_variables @@ -261,7 +262,7 @@ before_script: KUBE_NETWORK_PLUGIN: weave CLOUD_IMAGE: rhel-7 CLOUD_REGION: europe-west1-b - UPGRADE_TEST: "true" + UPGRADE_TEST: "graceful" CLUSTER_MODE: default .centos7_flannel_variables: ¢os7_flannel_variables diff --git a/docs/upgrades.md b/docs/upgrades.md index 4a25bd622..9a57f43ac 100644 --- a/docs/upgrades.md +++ b/docs/upgrades.md @@ -18,7 +18,7 @@ versions. Here are all version vars for each component: * flannel_version * kubedns_version -#### Example +#### Unsafe upgrade example If you wanted to upgrade just kube_version from v1.4.3 to v1.4.6, you could deploy the following way: @@ -33,6 +33,20 @@ And then repeat with v1.4.6 as kube_version: ansible-playbook cluster.yml -i inventory/inventory.cfg -e kube_version=v1.4.6 ``` +#### Graceful upgrade + +Kargo also supports cordon, drain and uncordoning of nodes when performing +a cluster upgrade. There is a separate playbook used for this purpose. It is +important to note that upgrade-cluster.yml can only be used for upgrading an +existing cluster. That means there must be at least 1 kube-master already +deployed. + +``` +git fetch origin +git checkout origin/master +ansible-playbook upgrade-cluster cluster.yml -i inventory/inventory.cfg +``` + #### Upgrade order As mentioned above, components are upgraded in the order in which they were diff --git a/roles/upgrade/post-upgrade/tasks/main.yml b/roles/upgrade/post-upgrade/tasks/main.yml index b576b0947..d8243d04d 100644 --- a/roles/upgrade/post-upgrade/tasks/main.yml +++ b/roles/upgrade/post-upgrade/tasks/main.yml @@ -1,5 +1,5 @@ --- - name: Uncordon node - command: kubectl uncordon {{ ansible_hostname }} + command: "{{ bin_dir }}/kubectl uncordon {{ ansible_hostname }}" delegate_to: "{{ groups['kube-master'][0] }}" diff --git a/roles/upgrade/pre-upgrade/tasks/main.yml b/roles/upgrade/pre-upgrade/tasks/main.yml index 390e9e559..90b535d2e 100644 --- a/roles/upgrade/pre-upgrade/tasks/main.yml +++ b/roles/upgrade/pre-upgrade/tasks/main.yml @@ -1,11 +1,11 @@ --- - name: Cordon node - command: kubectl cordon {{ ansible_hostname }} + command: "{{ bin_dir }}/kubectl cordon {{ ansible_hostname }}" delegate_to: "{{ groups['kube-master'][0] }}" - name: Drain node - command: kubectl drain --force --ignore-daemonsets --grace-period 30 --delete-local-data {{ ansible_hostname }} + command: "{{ bin_dir }}/kubectl drain --force --ignore-daemonsets --grace-period 30 --delete-local-data {{ ansible_hostname }}" delegate_to: "{{ groups['kube-master'][0] }}" - name: Sleep for grace period for draining diff --git a/upgrade-cluster.yml b/upgrade-cluster.yml index d7089fc49..1be9c9cab 100644 --- a/upgrade-cluster.yml +++ b/upgrade-cluster.yml @@ -1,33 +1,92 @@ --- -- hosts: all +- hosts: localhost + gather_facts: False + roles: + - bastion-ssh-config + tags: [localhost, bastion] + +- hosts: k8s-cluster:etcd:calico-rr any_errors_fatal: true + gather_facts: false + vars: + # Need to disable pipelining for bootstrap-os as some systems have requiretty in sudoers set, which makes pipelining + # fail. bootstrap-os fixes this on these systems, so in later plays it can be enabled. + ansible_ssh_pipelining: false + roles: + - bootstrap-os + tags: + - bootstrap-os + +- hosts: k8s-cluster:etcd:calico-rr + any_errors_fatal: true + vars: + ansible_ssh_pipelining: true gather_facts: true -- hosts: all:!network-storage +- hosts: k8s-cluster:etcd:calico-rr any_errors_fatal: true roles: + - { role: kernel-upgrade, tags: kernel-upgrade, when: kernel_upgrade is defined and kernel_upgrade } - { role: kubernetes/preinstall, tags: preinstall } + - { role: docker, tags: docker } + - role: rkt + tags: rkt + when: "'rkt' in [etcd_deployment_type, kubelet_deployment_type, vault_deployment_type]" + +- hosts: etcd:k8s-cluster:vault + any_errors_fatal: true + roles: + - { role: vault, tags: vault, vault_bootstrap: true, when: "cert_management == 'vault'" } - hosts: etcd:!k8s-cluster any_errors_fatal: true - serial: 1 roles: - { role: etcd, tags: etcd } -- hosts: kube-node +- hosts: k8s-cluster any_errors_fatal: true - serial: 1 roles: - { role: etcd, tags: etcd } - - { role: upgrade/pre-upgrade, tags: upgrade/pre-upgrade } - - { role: kubernetes/node, tags: node } - - { role: network_plugin, tags: network } - - { role: upgrade/post-upgrade, tags: upgrade/post-upgrade } +- hosts: etcd:k8s-cluster:vault + any_errors_fatal: true + roles: + - { role: vault, tags: vault, when: "cert_management == 'vault'"} + +#Handle upgrades to master components first to maintain backwards compat. - hosts: kube-master any_errors_fatal: true serial: 1 roles: - - { role: etcd, tags: etcd } + - { role: upgrade/pre-upgrade, tags: pre-upgrade } - { role: kubernetes/node, tags: node } - - { role: kubernetes/master, tags: master } \ No newline at end of file + - { role: kubernetes/master, tags: master } + - { role: network_plugin, tags: network } + - { role: upgrade/post-upgrade, tags: post-upgrade } + +#Finally handle worker upgrades, based on given batch size +- hosts: kube-node:!kube-master + any_errors_fatal: true + serial: "{{ serial | default('20%') }}" + roles: + - { role: upgrade/pre-upgrade, tags: pre-upgrade } + - { role: kubernetes/node, tags: node } + - { role: network_plugin, tags: network } + - { role: upgrade/post-upgrade, tags: post-upgrade } + - { role: kubernetes-apps/network_plugin, tags: network } + +- hosts: calico-rr + any_errors_fatal: true + roles: + - { role: network_plugin/calico/rr, tags: network } + +- hosts: k8s-cluster + any_errors_fatal: true + roles: + - { role: dnsmasq, when: "dns_mode == 'dnsmasq_kubedns'", tags: dnsmasq } + - { role: kubernetes/preinstall, when: "dns_mode != 'none' and resolvconf_mode == 'host_resolvconf'", tags: resolvconf } + +- hosts: kube-master[0] + any_errors_fatal: true + roles: + - { role: kubernetes-apps, tags: apps }