Merge pull request #1029 from mattymo/graceful

Add graceful upgrade process
This commit is contained in:
Antoine Legrand 2017-02-17 21:24:32 +01:00 committed by GitHub
commit b84cc14694
7 changed files with 158 additions and 34 deletions

View file

@ -101,8 +101,8 @@ before_script:
# Check out latest tag if testing upgrade # Check out latest tag if testing upgrade
# Uncomment when gitlab kargo repo has tags # Uncomment when gitlab kargo repo has tags
#- test "${UPGRADE_TEST}" = "true" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1)) #- test "${UPGRADE_TEST}" != "false" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1))
- test "${UPGRADE_TEST}" = "true" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0 - test "${UPGRADE_TEST}" != "false" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0
# Create cluster # Create cluster
@ -127,9 +127,10 @@ before_script:
cluster.yml cluster.yml
# Repeat deployment if testing upgrade # Repeat deployment if testing upgrade
#FIXME(mattymo): repeat "Create cluster" above without duplicating code
- > - >
if [ "${UPGRADE_TEST}" = "true" ]; then if [ "${UPGRADE_TEST}" != "false" ]; then
test "${UPGRADE_TEST}" == "basic" && PLAYBOOK="cluster.yml";
test "${UPGRADE_TEST}" == "graceful" && PLAYBOOK="upgrade-cluster.yml";
pip install ansible==2.2.1.0; pip install ansible==2.2.1.0;
git checkout "${CI_BUILD_REF}"; git checkout "${CI_BUILD_REF}";
ansible-playbook -i inventory/inventory.ini -b --become-user=root --private-key=${HOME}/.ssh/id_rsa -u $SSH_USER ansible-playbook -i inventory/inventory.ini -b --become-user=root --private-key=${HOME}/.ssh/id_rsa -u $SSH_USER
@ -149,7 +150,7 @@ before_script:
-e resolvconf_mode=${RESOLVCONF_MODE} -e resolvconf_mode=${RESOLVCONF_MODE}
-e weave_cpu_requests=${WEAVE_CPU_LIMIT} -e weave_cpu_requests=${WEAVE_CPU_LIMIT}
-e weave_cpu_limit=${WEAVE_CPU_LIMIT} -e weave_cpu_limit=${WEAVE_CPU_LIMIT}
cluster.yml; $PLAYBOOK;
fi fi
# Tests Cases # Tests Cases
@ -254,7 +255,7 @@ before_script:
KUBE_NETWORK_PLUGIN: canal KUBE_NETWORK_PLUGIN: canal
CLOUD_IMAGE: debian-8-kubespray CLOUD_IMAGE: debian-8-kubespray
CLOUD_REGION: us-east1-b CLOUD_REGION: us-east1-b
UPGRADE_TEST: "true" UPGRADE_TEST: "basic"
CLUSTER_MODE: ha CLUSTER_MODE: ha
.rhel7_weave_variables: &rhel7_weave_variables .rhel7_weave_variables: &rhel7_weave_variables
@ -262,7 +263,6 @@ before_script:
KUBE_NETWORK_PLUGIN: weave KUBE_NETWORK_PLUGIN: weave
CLOUD_IMAGE: rhel-7 CLOUD_IMAGE: rhel-7
CLOUD_REGION: europe-west1-b CLOUD_REGION: europe-west1-b
UPGRADE_TEST: "true"
CLUSTER_MODE: default CLUSTER_MODE: default
.centos7_flannel_variables: &centos7_flannel_variables .centos7_flannel_variables: &centos7_flannel_variables
@ -278,6 +278,7 @@ before_script:
CLOUD_IMAGE: debian-8-kubespray CLOUD_IMAGE: debian-8-kubespray
CLOUD_REGION: us-central1-b CLOUD_REGION: us-central1-b
CLUSTER_MODE: default CLUSTER_MODE: default
UPGRADE_TEST: "graceful"
.coreos_canal_variables: &coreos_canal_variables .coreos_canal_variables: &coreos_canal_variables
# stage: deploy-gce-part2 # stage: deploy-gce-part2
@ -444,7 +445,7 @@ rhel7-weave-triggers:
when: on_success when: on_success
only: ['triggers'] only: ['triggers']
debian8-calico: debian8-calico-upgrade:
stage: deploy-gce-part2 stage: deploy-gce-part2
<<: *job <<: *job
<<: *gce <<: *gce
@ -540,7 +541,7 @@ coreos-alpha-weave-ha:
except: ['triggers'] except: ['triggers']
only: ['master', /^pr-.*$/] only: ['master', /^pr-.*$/]
ubuntu-rkt-sep: ubuntu-rkt-sep-upgrade:
stage: deploy-gce-part1 stage: deploy-gce-part1
<<: *job <<: *job
<<: *gce <<: *gce

View file

@ -18,7 +18,7 @@ versions. Here are all version vars for each component:
* flannel_version * flannel_version
* kubedns_version * kubedns_version
#### Example #### Unsafe upgrade example
If you wanted to upgrade just kube_version from v1.4.3 to v1.4.6, you could If you wanted to upgrade just kube_version from v1.4.3 to v1.4.6, you could
deploy the following way: deploy the following way:
@ -33,6 +33,20 @@ And then repeat with v1.4.6 as kube_version:
ansible-playbook cluster.yml -i inventory/inventory.cfg -e kube_version=v1.4.6 ansible-playbook cluster.yml -i inventory/inventory.cfg -e kube_version=v1.4.6
``` ```
#### Graceful upgrade
Kargo also supports cordon, drain and uncordoning of nodes when performing
a cluster upgrade. There is a separate playbook used for this purpose. It is
important to note that upgrade-cluster.yml can only be used for upgrading an
existing cluster. That means there must be at least 1 kube-master already
deployed.
```
git fetch origin
git checkout origin/master
ansible-playbook upgrade-cluster cluster.yml -i inventory/inventory.cfg
```
#### Upgrade order #### Upgrade order
As mentioned above, components are upgraded in the order in which they were As mentioned above, components are upgraded in the order in which they were

View file

@ -1,6 +1,5 @@
#FIXME: remove if kubernetes/features#124 is implemented #FIXME: remove if kubernetes/features#124 is implemented
- name: Weave | Purge old weave daemonset - name: Weave | Purge old weave daemonset
run_once: true
kube: kube:
name: "weave-net" name: "weave-net"
kubectl: "{{ bin_dir }}/kubectl" kubectl: "{{ bin_dir }}/kubectl"
@ -12,7 +11,6 @@
- name: Weave | Start Resources - name: Weave | Start Resources
run_once: true
kube: kube:
name: "weave-net" name: "weave-net"
kubectl: "{{ bin_dir }}/kubectl" kubectl: "{{ bin_dir }}/kubectl"
@ -21,17 +19,16 @@
namespace: "{{system_namespace}}" namespace: "{{system_namespace}}"
state: "{{ item | ternary('latest','present') }}" state: "{{ item | ternary('latest','present') }}"
with_items: "{{ weave_manifest.changed }}" with_items: "{{ weave_manifest.changed }}"
delegate_to: "{{groups['kube-master'][0]}}" when: inventory_hostname == groups['kube-master'][0]
- name: "Weave | wait for weave to become available" - name: "Weave | wait for weave to become available"
uri: uri:
url: http://127.0.0.1:6784/status url: http://127.0.0.1:6784/status
return_content: yes return_content: yes
run_once: true
register: weave_status register: weave_status
retries: 12 retries: 180
delay: 10 delay: 10
until: "{{ weave_status.status == 200 and until: "{{ weave_status.status == 200 and
'Status: ready' in weave_status.content }}" 'Status: ready' in weave_status.content }}"
delegate_to: "{{groups['kube-master'][0]}}" when: inventory_hostname == groups['kube-master'][0]

View file

@ -0,0 +1,5 @@
---
- name: Uncordon node
command: "{{ bin_dir }}/kubectl uncordon {{ ansible_hostname }}"
delegate_to: "{{ groups['kube-master'][0] }}"

View file

@ -0,0 +1,12 @@
---
- name: Cordon node
command: "{{ bin_dir }}/kubectl cordon {{ ansible_hostname }}"
delegate_to: "{{ groups['kube-master'][0] }}"
- name: Drain node
command: "{{ bin_dir }}/kubectl drain --force --ignore-daemonsets --grace-period 30 --delete-local-data {{ ansible_hostname }}"
delegate_to: "{{ groups['kube-master'][0] }}"
- name: Sleep for grace period for draining
pause: seconds=30

View file

@ -1,48 +1,51 @@
node1 ansible_ssh_host={{gce.instance_data[0].public_ip}} {% set node1 = gce.instance_data[0].name %}
node2 ansible_ssh_host={{gce.instance_data[1].public_ip}} {% set node2 = gce.instance_data[1].name %}
{{node1}} ansible_ssh_host={{gce.instance_data[0].public_ip}}
{{node2}} ansible_ssh_host={{gce.instance_data[1].public_ip}}
{% if mode is defined and mode in ["separate", "ha"] %} {% if mode is defined and mode in ["separate", "ha"] %}
node3 ansible_ssh_host={{gce.instance_data[2].public_ip}} {% set node3 = gce.instance_data[2].name %}
{{node3}} ansible_ssh_host={{gce.instance_data[2].public_ip}}
{% endif %} {% endif %}
{% if mode is defined and mode == "separate" %} {% if mode is defined and mode == "separate" %}
[kube-master] [kube-master]
node1 {{node1}}
[kube-node] [kube-node]
node2 {{node2}}
[etcd] [etcd]
node3 {{node3}}
[vault] [vault]
node3 {{node3}}
{% elif mode is defined and mode == "ha" %} {% elif mode is defined and mode == "ha" %}
[kube-master] [kube-master]
node1 {{node1}}
node2 {{node2}}
[kube-node] [kube-node]
node3 {{node3}}
[etcd] [etcd]
node2 {{node2}}
node3 {{node3}}
[vault] [vault]
node2 {{node2}}
node3 {{node3}}
{% else %} {% else %}
[kube-master] [kube-master]
node1 {{node1}}
[kube-node] [kube-node]
node2 {{node2}}
[etcd] [etcd]
node1 {{node1}}
[vault] [vault]
node1 {{node1}}
{% endif %} {% endif %}
[k8s-cluster:children] [k8s-cluster:children]

92
upgrade-cluster.yml Normal file
View file

@ -0,0 +1,92 @@
---
- hosts: localhost
gather_facts: False
roles:
- bastion-ssh-config
tags: [localhost, bastion]
- hosts: k8s-cluster:etcd:calico-rr
any_errors_fatal: true
gather_facts: false
vars:
# Need to disable pipelining for bootstrap-os as some systems have requiretty in sudoers set, which makes pipelining
# fail. bootstrap-os fixes this on these systems, so in later plays it can be enabled.
ansible_ssh_pipelining: false
roles:
- bootstrap-os
tags:
- bootstrap-os
- hosts: k8s-cluster:etcd:calico-rr
any_errors_fatal: true
vars:
ansible_ssh_pipelining: true
gather_facts: true
- hosts: k8s-cluster:etcd:calico-rr
any_errors_fatal: true
roles:
- { role: kernel-upgrade, tags: kernel-upgrade, when: kernel_upgrade is defined and kernel_upgrade }
- { role: kubernetes/preinstall, tags: preinstall }
- { role: docker, tags: docker }
- role: rkt
tags: rkt
when: "'rkt' in [etcd_deployment_type, kubelet_deployment_type, vault_deployment_type]"
- hosts: etcd:k8s-cluster:vault
any_errors_fatal: true
roles:
- { role: vault, tags: vault, vault_bootstrap: true, when: "cert_management == 'vault'" }
- hosts: etcd:!k8s-cluster
any_errors_fatal: true
roles:
- { role: etcd, tags: etcd }
- hosts: k8s-cluster
any_errors_fatal: true
roles:
- { role: etcd, tags: etcd }
- hosts: etcd:k8s-cluster:vault
any_errors_fatal: true
roles:
- { role: vault, tags: vault, when: "cert_management == 'vault'"}
#Handle upgrades to master components first to maintain backwards compat.
- hosts: kube-master
any_errors_fatal: true
serial: 1
roles:
- { role: upgrade/pre-upgrade, tags: pre-upgrade }
- { role: kubernetes/node, tags: node }
- { role: kubernetes/master, tags: master }
- { role: network_plugin, tags: network }
- { role: upgrade/post-upgrade, tags: post-upgrade }
#Finally handle worker upgrades, based on given batch size
- hosts: kube-node:!kube-master
any_errors_fatal: true
serial: "{{ serial | default('20%') }}"
roles:
- { role: upgrade/pre-upgrade, tags: pre-upgrade }
- { role: kubernetes/node, tags: node }
- { role: network_plugin, tags: network }
- { role: upgrade/post-upgrade, tags: post-upgrade }
- { role: kubernetes-apps/network_plugin, tags: network }
- hosts: calico-rr
any_errors_fatal: true
roles:
- { role: network_plugin/calico/rr, tags: network }
- hosts: k8s-cluster
any_errors_fatal: true
roles:
- { role: dnsmasq, when: "dns_mode == 'dnsmasq_kubedns'", tags: dnsmasq }
- { role: kubernetes/preinstall, when: "dns_mode != 'none' and resolvconf_mode == 'host_resolvconf'", tags: resolvconf }
- hosts: kube-master[0]
any_errors_fatal: true
roles:
- { role: kubernetes-apps, tags: apps }