Merge pull request #1029 from mattymo/graceful
Add graceful upgrade process
This commit is contained in:
commit
a4076b8ee6
7 changed files with 158 additions and 34 deletions
|
@ -101,8 +101,8 @@ before_script:
|
|||
|
||||
# Check out latest tag if testing upgrade
|
||||
# Uncomment when gitlab kargo repo has tags
|
||||
#- test "${UPGRADE_TEST}" = "true" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1))
|
||||
- test "${UPGRADE_TEST}" = "true" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0
|
||||
#- test "${UPGRADE_TEST}" != "false" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1))
|
||||
- test "${UPGRADE_TEST}" != "false" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0
|
||||
|
||||
|
||||
# Create cluster
|
||||
|
@ -127,9 +127,10 @@ before_script:
|
|||
cluster.yml
|
||||
|
||||
# Repeat deployment if testing upgrade
|
||||
#FIXME(mattymo): repeat "Create cluster" above without duplicating code
|
||||
- >
|
||||
if [ "${UPGRADE_TEST}" = "true" ]; then
|
||||
if [ "${UPGRADE_TEST}" != "false" ]; then
|
||||
test "${UPGRADE_TEST}" == "basic" && PLAYBOOK="cluster.yml";
|
||||
test "${UPGRADE_TEST}" == "graceful" && PLAYBOOK="upgrade-cluster.yml";
|
||||
pip install ansible==2.2.1.0;
|
||||
git checkout "${CI_BUILD_REF}";
|
||||
ansible-playbook -i inventory/inventory.ini -b --become-user=root --private-key=${HOME}/.ssh/id_rsa -u $SSH_USER
|
||||
|
@ -149,7 +150,7 @@ before_script:
|
|||
-e resolvconf_mode=${RESOLVCONF_MODE}
|
||||
-e weave_cpu_requests=${WEAVE_CPU_LIMIT}
|
||||
-e weave_cpu_limit=${WEAVE_CPU_LIMIT}
|
||||
cluster.yml;
|
||||
$PLAYBOOK;
|
||||
fi
|
||||
|
||||
# Tests Cases
|
||||
|
@ -254,7 +255,7 @@ before_script:
|
|||
KUBE_NETWORK_PLUGIN: canal
|
||||
CLOUD_IMAGE: debian-8-kubespray
|
||||
CLOUD_REGION: us-east1-b
|
||||
UPGRADE_TEST: "true"
|
||||
UPGRADE_TEST: "basic"
|
||||
CLUSTER_MODE: ha
|
||||
|
||||
.rhel7_weave_variables: &rhel7_weave_variables
|
||||
|
@ -262,7 +263,6 @@ before_script:
|
|||
KUBE_NETWORK_PLUGIN: weave
|
||||
CLOUD_IMAGE: rhel-7
|
||||
CLOUD_REGION: europe-west1-b
|
||||
UPGRADE_TEST: "true"
|
||||
CLUSTER_MODE: default
|
||||
|
||||
.centos7_flannel_variables: ¢os7_flannel_variables
|
||||
|
@ -278,6 +278,7 @@ before_script:
|
|||
CLOUD_IMAGE: debian-8-kubespray
|
||||
CLOUD_REGION: us-central1-b
|
||||
CLUSTER_MODE: default
|
||||
UPGRADE_TEST: "graceful"
|
||||
|
||||
.coreos_canal_variables: &coreos_canal_variables
|
||||
# stage: deploy-gce-part2
|
||||
|
@ -444,7 +445,7 @@ rhel7-weave-triggers:
|
|||
when: on_success
|
||||
only: ['triggers']
|
||||
|
||||
debian8-calico:
|
||||
debian8-calico-upgrade:
|
||||
stage: deploy-gce-part2
|
||||
<<: *job
|
||||
<<: *gce
|
||||
|
@ -540,7 +541,7 @@ coreos-alpha-weave-ha:
|
|||
except: ['triggers']
|
||||
only: ['master', /^pr-.*$/]
|
||||
|
||||
ubuntu-rkt-sep:
|
||||
ubuntu-rkt-sep-upgrade:
|
||||
stage: deploy-gce-part1
|
||||
<<: *job
|
||||
<<: *gce
|
||||
|
|
|
@ -18,7 +18,7 @@ versions. Here are all version vars for each component:
|
|||
* flannel_version
|
||||
* kubedns_version
|
||||
|
||||
#### Example
|
||||
#### Unsafe upgrade example
|
||||
|
||||
If you wanted to upgrade just kube_version from v1.4.3 to v1.4.6, you could
|
||||
deploy the following way:
|
||||
|
@ -33,6 +33,20 @@ And then repeat with v1.4.6 as kube_version:
|
|||
ansible-playbook cluster.yml -i inventory/inventory.cfg -e kube_version=v1.4.6
|
||||
```
|
||||
|
||||
#### Graceful upgrade
|
||||
|
||||
Kargo also supports cordon, drain and uncordoning of nodes when performing
|
||||
a cluster upgrade. There is a separate playbook used for this purpose. It is
|
||||
important to note that upgrade-cluster.yml can only be used for upgrading an
|
||||
existing cluster. That means there must be at least 1 kube-master already
|
||||
deployed.
|
||||
|
||||
```
|
||||
git fetch origin
|
||||
git checkout origin/master
|
||||
ansible-playbook upgrade-cluster cluster.yml -i inventory/inventory.cfg
|
||||
```
|
||||
|
||||
#### Upgrade order
|
||||
|
||||
As mentioned above, components are upgraded in the order in which they were
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
#FIXME: remove if kubernetes/features#124 is implemented
|
||||
- name: Weave | Purge old weave daemonset
|
||||
run_once: true
|
||||
kube:
|
||||
name: "weave-net"
|
||||
kubectl: "{{ bin_dir }}/kubectl"
|
||||
|
@ -12,7 +11,6 @@
|
|||
|
||||
|
||||
- name: Weave | Start Resources
|
||||
run_once: true
|
||||
kube:
|
||||
name: "weave-net"
|
||||
kubectl: "{{ bin_dir }}/kubectl"
|
||||
|
@ -21,17 +19,16 @@
|
|||
namespace: "{{system_namespace}}"
|
||||
state: "{{ item | ternary('latest','present') }}"
|
||||
with_items: "{{ weave_manifest.changed }}"
|
||||
delegate_to: "{{groups['kube-master'][0]}}"
|
||||
when: inventory_hostname == groups['kube-master'][0]
|
||||
|
||||
|
||||
- name: "Weave | wait for weave to become available"
|
||||
uri:
|
||||
url: http://127.0.0.1:6784/status
|
||||
return_content: yes
|
||||
run_once: true
|
||||
register: weave_status
|
||||
retries: 12
|
||||
retries: 180
|
||||
delay: 10
|
||||
until: "{{ weave_status.status == 200 and
|
||||
'Status: ready' in weave_status.content }}"
|
||||
delegate_to: "{{groups['kube-master'][0]}}"
|
||||
when: inventory_hostname == groups['kube-master'][0]
|
||||
|
|
5
roles/upgrade/post-upgrade/tasks/main.yml
Normal file
5
roles/upgrade/post-upgrade/tasks/main.yml
Normal file
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
|
||||
- name: Uncordon node
|
||||
command: "{{ bin_dir }}/kubectl uncordon {{ ansible_hostname }}"
|
||||
delegate_to: "{{ groups['kube-master'][0] }}"
|
12
roles/upgrade/pre-upgrade/tasks/main.yml
Normal file
12
roles/upgrade/pre-upgrade/tasks/main.yml
Normal file
|
@ -0,0 +1,12 @@
|
|||
---
|
||||
|
||||
- name: Cordon node
|
||||
command: "{{ bin_dir }}/kubectl cordon {{ ansible_hostname }}"
|
||||
delegate_to: "{{ groups['kube-master'][0] }}"
|
||||
|
||||
- name: Drain node
|
||||
command: "{{ bin_dir }}/kubectl drain --force --ignore-daemonsets --grace-period 30 --delete-local-data {{ ansible_hostname }}"
|
||||
delegate_to: "{{ groups['kube-master'][0] }}"
|
||||
|
||||
- name: Sleep for grace period for draining
|
||||
pause: seconds=30
|
|
@ -1,48 +1,51 @@
|
|||
node1 ansible_ssh_host={{gce.instance_data[0].public_ip}}
|
||||
node2 ansible_ssh_host={{gce.instance_data[1].public_ip}}
|
||||
{% set node1 = gce.instance_data[0].name %}
|
||||
{% set node2 = gce.instance_data[1].name %}
|
||||
{{node1}} ansible_ssh_host={{gce.instance_data[0].public_ip}}
|
||||
{{node2}} ansible_ssh_host={{gce.instance_data[1].public_ip}}
|
||||
{% if mode is defined and mode in ["separate", "ha"] %}
|
||||
node3 ansible_ssh_host={{gce.instance_data[2].public_ip}}
|
||||
{% set node3 = gce.instance_data[2].name %}
|
||||
{{node3}} ansible_ssh_host={{gce.instance_data[2].public_ip}}
|
||||
{% endif %}
|
||||
|
||||
{% if mode is defined and mode == "separate" %}
|
||||
[kube-master]
|
||||
node1
|
||||
{{node1}}
|
||||
|
||||
[kube-node]
|
||||
node2
|
||||
{{node2}}
|
||||
|
||||
[etcd]
|
||||
node3
|
||||
{{node3}}
|
||||
|
||||
[vault]
|
||||
node3
|
||||
{{node3}}
|
||||
{% elif mode is defined and mode == "ha" %}
|
||||
[kube-master]
|
||||
node1
|
||||
node2
|
||||
{{node1}}
|
||||
{{node2}}
|
||||
|
||||
[kube-node]
|
||||
node3
|
||||
{{node3}}
|
||||
|
||||
[etcd]
|
||||
node2
|
||||
node3
|
||||
{{node2}}
|
||||
{{node3}}
|
||||
|
||||
[vault]
|
||||
node2
|
||||
node3
|
||||
{{node2}}
|
||||
{{node3}}
|
||||
{% else %}
|
||||
[kube-master]
|
||||
node1
|
||||
{{node1}}
|
||||
|
||||
[kube-node]
|
||||
node2
|
||||
{{node2}}
|
||||
|
||||
[etcd]
|
||||
node1
|
||||
{{node1}}
|
||||
|
||||
[vault]
|
||||
node1
|
||||
{{node1}}
|
||||
{% endif %}
|
||||
|
||||
[k8s-cluster:children]
|
||||
|
|
92
upgrade-cluster.yml
Normal file
92
upgrade-cluster.yml
Normal file
|
@ -0,0 +1,92 @@
|
|||
---
|
||||
- hosts: localhost
|
||||
gather_facts: False
|
||||
roles:
|
||||
- bastion-ssh-config
|
||||
tags: [localhost, bastion]
|
||||
|
||||
- hosts: k8s-cluster:etcd:calico-rr
|
||||
any_errors_fatal: true
|
||||
gather_facts: false
|
||||
vars:
|
||||
# Need to disable pipelining for bootstrap-os as some systems have requiretty in sudoers set, which makes pipelining
|
||||
# fail. bootstrap-os fixes this on these systems, so in later plays it can be enabled.
|
||||
ansible_ssh_pipelining: false
|
||||
roles:
|
||||
- bootstrap-os
|
||||
tags:
|
||||
- bootstrap-os
|
||||
|
||||
- hosts: k8s-cluster:etcd:calico-rr
|
||||
any_errors_fatal: true
|
||||
vars:
|
||||
ansible_ssh_pipelining: true
|
||||
gather_facts: true
|
||||
|
||||
- hosts: k8s-cluster:etcd:calico-rr
|
||||
any_errors_fatal: true
|
||||
roles:
|
||||
- { role: kernel-upgrade, tags: kernel-upgrade, when: kernel_upgrade is defined and kernel_upgrade }
|
||||
- { role: kubernetes/preinstall, tags: preinstall }
|
||||
- { role: docker, tags: docker }
|
||||
- role: rkt
|
||||
tags: rkt
|
||||
when: "'rkt' in [etcd_deployment_type, kubelet_deployment_type, vault_deployment_type]"
|
||||
|
||||
- hosts: etcd:k8s-cluster:vault
|
||||
any_errors_fatal: true
|
||||
roles:
|
||||
- { role: vault, tags: vault, vault_bootstrap: true, when: "cert_management == 'vault'" }
|
||||
|
||||
- hosts: etcd:!k8s-cluster
|
||||
any_errors_fatal: true
|
||||
roles:
|
||||
- { role: etcd, tags: etcd }
|
||||
|
||||
- hosts: k8s-cluster
|
||||
any_errors_fatal: true
|
||||
roles:
|
||||
- { role: etcd, tags: etcd }
|
||||
|
||||
- hosts: etcd:k8s-cluster:vault
|
||||
any_errors_fatal: true
|
||||
roles:
|
||||
- { role: vault, tags: vault, when: "cert_management == 'vault'"}
|
||||
|
||||
#Handle upgrades to master components first to maintain backwards compat.
|
||||
- hosts: kube-master
|
||||
any_errors_fatal: true
|
||||
serial: 1
|
||||
roles:
|
||||
- { role: upgrade/pre-upgrade, tags: pre-upgrade }
|
||||
- { role: kubernetes/node, tags: node }
|
||||
- { role: kubernetes/master, tags: master }
|
||||
- { role: network_plugin, tags: network }
|
||||
- { role: upgrade/post-upgrade, tags: post-upgrade }
|
||||
|
||||
#Finally handle worker upgrades, based on given batch size
|
||||
- hosts: kube-node:!kube-master
|
||||
any_errors_fatal: true
|
||||
serial: "{{ serial | default('20%') }}"
|
||||
roles:
|
||||
- { role: upgrade/pre-upgrade, tags: pre-upgrade }
|
||||
- { role: kubernetes/node, tags: node }
|
||||
- { role: network_plugin, tags: network }
|
||||
- { role: upgrade/post-upgrade, tags: post-upgrade }
|
||||
- { role: kubernetes-apps/network_plugin, tags: network }
|
||||
|
||||
- hosts: calico-rr
|
||||
any_errors_fatal: true
|
||||
roles:
|
||||
- { role: network_plugin/calico/rr, tags: network }
|
||||
|
||||
- hosts: k8s-cluster
|
||||
any_errors_fatal: true
|
||||
roles:
|
||||
- { role: dnsmasq, when: "dns_mode == 'dnsmasq_kubedns'", tags: dnsmasq }
|
||||
- { role: kubernetes/preinstall, when: "dns_mode != 'none' and resolvconf_mode == 'host_resolvconf'", tags: resolvconf }
|
||||
|
||||
- hosts: kube-master[0]
|
||||
any_errors_fatal: true
|
||||
roles:
|
||||
- { role: kubernetes-apps, tags: apps }
|
Loading…
Reference in a new issue