diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2560894ca..9c6630306 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -101,8 +101,8 @@ before_script: # Check out latest tag if testing upgrade # Uncomment when gitlab kargo repo has tags - #- test "${UPGRADE_TEST}" = "true" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1)) - - test "${UPGRADE_TEST}" = "true" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0 + #- test "${UPGRADE_TEST}" != "false" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1)) + - test "${UPGRADE_TEST}" != "false" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0 # Create cluster @@ -127,9 +127,10 @@ before_script: cluster.yml # Repeat deployment if testing upgrade - #FIXME(mattymo): repeat "Create cluster" above without duplicating code - > - if [ "${UPGRADE_TEST}" = "true" ]; then + if [ "${UPGRADE_TEST}" != "false" ]; then + test "${UPGRADE_TEST}" == "basic" && PLAYBOOK="cluster.yml"; + test "${UPGRADE_TEST}" == "graceful" && PLAYBOOK="upgrade-cluster.yml"; pip install ansible==2.2.1.0; git checkout "${CI_BUILD_REF}"; ansible-playbook -i inventory/inventory.ini -b --become-user=root --private-key=${HOME}/.ssh/id_rsa -u $SSH_USER @@ -149,7 +150,7 @@ before_script: -e resolvconf_mode=${RESOLVCONF_MODE} -e weave_cpu_requests=${WEAVE_CPU_LIMIT} -e weave_cpu_limit=${WEAVE_CPU_LIMIT} - cluster.yml; + $PLAYBOOK; fi # Tests Cases @@ -254,7 +255,7 @@ before_script: KUBE_NETWORK_PLUGIN: canal CLOUD_IMAGE: debian-8-kubespray CLOUD_REGION: us-east1-b - UPGRADE_TEST: "true" + UPGRADE_TEST: "basic" CLUSTER_MODE: ha .rhel7_weave_variables: &rhel7_weave_variables @@ -262,7 +263,6 @@ before_script: KUBE_NETWORK_PLUGIN: weave CLOUD_IMAGE: rhel-7 CLOUD_REGION: europe-west1-b - UPGRADE_TEST: "true" CLUSTER_MODE: default .centos7_flannel_variables: ¢os7_flannel_variables @@ -278,6 +278,7 @@ before_script: CLOUD_IMAGE: debian-8-kubespray CLOUD_REGION: us-central1-b CLUSTER_MODE: default + UPGRADE_TEST: "graceful" .coreos_canal_variables: &coreos_canal_variables # stage: deploy-gce-part2 @@ -444,7 +445,7 @@ rhel7-weave-triggers: when: on_success only: ['triggers'] -debian8-calico: +debian8-calico-upgrade: stage: deploy-gce-part2 <<: *job <<: *gce @@ -540,7 +541,7 @@ coreos-alpha-weave-ha: except: ['triggers'] only: ['master', /^pr-.*$/] -ubuntu-rkt-sep: +ubuntu-rkt-sep-upgrade: stage: deploy-gce-part1 <<: *job <<: *gce diff --git a/docs/upgrades.md b/docs/upgrades.md index 4a25bd622..9a57f43ac 100644 --- a/docs/upgrades.md +++ b/docs/upgrades.md @@ -18,7 +18,7 @@ versions. Here are all version vars for each component: * flannel_version * kubedns_version -#### Example +#### Unsafe upgrade example If you wanted to upgrade just kube_version from v1.4.3 to v1.4.6, you could deploy the following way: @@ -33,6 +33,20 @@ And then repeat with v1.4.6 as kube_version: ansible-playbook cluster.yml -i inventory/inventory.cfg -e kube_version=v1.4.6 ``` +#### Graceful upgrade + +Kargo also supports cordon, drain and uncordoning of nodes when performing +a cluster upgrade. There is a separate playbook used for this purpose. It is +important to note that upgrade-cluster.yml can only be used for upgrading an +existing cluster. That means there must be at least 1 kube-master already +deployed. + +``` +git fetch origin +git checkout origin/master +ansible-playbook upgrade-cluster cluster.yml -i inventory/inventory.cfg +``` + #### Upgrade order As mentioned above, components are upgraded in the order in which they were diff --git a/roles/kubernetes-apps/network_plugin/weave/tasks/main.yml b/roles/kubernetes-apps/network_plugin/weave/tasks/main.yml index 1c216fd92..93be1602b 100644 --- a/roles/kubernetes-apps/network_plugin/weave/tasks/main.yml +++ b/roles/kubernetes-apps/network_plugin/weave/tasks/main.yml @@ -1,6 +1,5 @@ #FIXME: remove if kubernetes/features#124 is implemented - name: Weave | Purge old weave daemonset - run_once: true kube: name: "weave-net" kubectl: "{{ bin_dir }}/kubectl" @@ -12,7 +11,6 @@ - name: Weave | Start Resources - run_once: true kube: name: "weave-net" kubectl: "{{ bin_dir }}/kubectl" @@ -21,17 +19,16 @@ namespace: "{{system_namespace}}" state: "{{ item | ternary('latest','present') }}" with_items: "{{ weave_manifest.changed }}" - delegate_to: "{{groups['kube-master'][0]}}" + when: inventory_hostname == groups['kube-master'][0] - name: "Weave | wait for weave to become available" uri: url: http://127.0.0.1:6784/status return_content: yes - run_once: true register: weave_status - retries: 12 + retries: 180 delay: 10 until: "{{ weave_status.status == 200 and 'Status: ready' in weave_status.content }}" - delegate_to: "{{groups['kube-master'][0]}}" + when: inventory_hostname == groups['kube-master'][0] diff --git a/roles/upgrade/post-upgrade/tasks/main.yml b/roles/upgrade/post-upgrade/tasks/main.yml new file mode 100644 index 000000000..d8243d04d --- /dev/null +++ b/roles/upgrade/post-upgrade/tasks/main.yml @@ -0,0 +1,5 @@ +--- + +- name: Uncordon node + command: "{{ bin_dir }}/kubectl uncordon {{ ansible_hostname }}" + delegate_to: "{{ groups['kube-master'][0] }}" diff --git a/roles/upgrade/pre-upgrade/tasks/main.yml b/roles/upgrade/pre-upgrade/tasks/main.yml new file mode 100644 index 000000000..90b535d2e --- /dev/null +++ b/roles/upgrade/pre-upgrade/tasks/main.yml @@ -0,0 +1,12 @@ +--- + +- name: Cordon node + command: "{{ bin_dir }}/kubectl cordon {{ ansible_hostname }}" + delegate_to: "{{ groups['kube-master'][0] }}" + +- name: Drain node + command: "{{ bin_dir }}/kubectl drain --force --ignore-daemonsets --grace-period 30 --delete-local-data {{ ansible_hostname }}" + delegate_to: "{{ groups['kube-master'][0] }}" + +- name: Sleep for grace period for draining + pause: seconds=30 diff --git a/tests/templates/inventory-gce.j2 b/tests/templates/inventory-gce.j2 index 015bdb6a4..f5326229c 100644 --- a/tests/templates/inventory-gce.j2 +++ b/tests/templates/inventory-gce.j2 @@ -1,48 +1,51 @@ -node1 ansible_ssh_host={{gce.instance_data[0].public_ip}} -node2 ansible_ssh_host={{gce.instance_data[1].public_ip}} +{% set node1 = gce.instance_data[0].name %} +{% set node2 = gce.instance_data[1].name %} +{{node1}} ansible_ssh_host={{gce.instance_data[0].public_ip}} +{{node2}} ansible_ssh_host={{gce.instance_data[1].public_ip}} {% if mode is defined and mode in ["separate", "ha"] %} -node3 ansible_ssh_host={{gce.instance_data[2].public_ip}} +{% set node3 = gce.instance_data[2].name %} +{{node3}} ansible_ssh_host={{gce.instance_data[2].public_ip}} {% endif %} {% if mode is defined and mode == "separate" %} [kube-master] -node1 +{{node1}} [kube-node] -node2 +{{node2}} [etcd] -node3 +{{node3}} [vault] -node3 +{{node3}} {% elif mode is defined and mode == "ha" %} [kube-master] -node1 -node2 +{{node1}} +{{node2}} [kube-node] -node3 +{{node3}} [etcd] -node2 -node3 +{{node2}} +{{node3}} [vault] -node2 -node3 +{{node2}} +{{node3}} {% else %} [kube-master] -node1 +{{node1}} [kube-node] -node2 +{{node2}} [etcd] -node1 +{{node1}} [vault] -node1 +{{node1}} {% endif %} [k8s-cluster:children] diff --git a/upgrade-cluster.yml b/upgrade-cluster.yml new file mode 100644 index 000000000..1be9c9cab --- /dev/null +++ b/upgrade-cluster.yml @@ -0,0 +1,92 @@ +--- +- hosts: localhost + gather_facts: False + roles: + - bastion-ssh-config + tags: [localhost, bastion] + +- hosts: k8s-cluster:etcd:calico-rr + any_errors_fatal: true + gather_facts: false + vars: + # Need to disable pipelining for bootstrap-os as some systems have requiretty in sudoers set, which makes pipelining + # fail. bootstrap-os fixes this on these systems, so in later plays it can be enabled. + ansible_ssh_pipelining: false + roles: + - bootstrap-os + tags: + - bootstrap-os + +- hosts: k8s-cluster:etcd:calico-rr + any_errors_fatal: true + vars: + ansible_ssh_pipelining: true + gather_facts: true + +- hosts: k8s-cluster:etcd:calico-rr + any_errors_fatal: true + roles: + - { role: kernel-upgrade, tags: kernel-upgrade, when: kernel_upgrade is defined and kernel_upgrade } + - { role: kubernetes/preinstall, tags: preinstall } + - { role: docker, tags: docker } + - role: rkt + tags: rkt + when: "'rkt' in [etcd_deployment_type, kubelet_deployment_type, vault_deployment_type]" + +- hosts: etcd:k8s-cluster:vault + any_errors_fatal: true + roles: + - { role: vault, tags: vault, vault_bootstrap: true, when: "cert_management == 'vault'" } + +- hosts: etcd:!k8s-cluster + any_errors_fatal: true + roles: + - { role: etcd, tags: etcd } + +- hosts: k8s-cluster + any_errors_fatal: true + roles: + - { role: etcd, tags: etcd } + +- hosts: etcd:k8s-cluster:vault + any_errors_fatal: true + roles: + - { role: vault, tags: vault, when: "cert_management == 'vault'"} + +#Handle upgrades to master components first to maintain backwards compat. +- hosts: kube-master + any_errors_fatal: true + serial: 1 + roles: + - { role: upgrade/pre-upgrade, tags: pre-upgrade } + - { role: kubernetes/node, tags: node } + - { role: kubernetes/master, tags: master } + - { role: network_plugin, tags: network } + - { role: upgrade/post-upgrade, tags: post-upgrade } + +#Finally handle worker upgrades, based on given batch size +- hosts: kube-node:!kube-master + any_errors_fatal: true + serial: "{{ serial | default('20%') }}" + roles: + - { role: upgrade/pre-upgrade, tags: pre-upgrade } + - { role: kubernetes/node, tags: node } + - { role: network_plugin, tags: network } + - { role: upgrade/post-upgrade, tags: post-upgrade } + - { role: kubernetes-apps/network_plugin, tags: network } + +- hosts: calico-rr + any_errors_fatal: true + roles: + - { role: network_plugin/calico/rr, tags: network } + +- hosts: k8s-cluster + any_errors_fatal: true + roles: + - { role: dnsmasq, when: "dns_mode == 'dnsmasq_kubedns'", tags: dnsmasq } + - { role: kubernetes/preinstall, when: "dns_mode != 'none' and resolvconf_mode == 'host_resolvconf'", tags: resolvconf } + +- hosts: kube-master[0] + any_errors_fatal: true + roles: + - { role: kubernetes-apps, tags: apps }