Merge pull request #1153 from mattymo/graceful_drain
Move graceful upgrade test to Ubuntu canal HA, adjust drain
This commit is contained in:
commit
380ad9815f
21 changed files with 127 additions and 37 deletions
|
@ -47,6 +47,7 @@ before_script:
|
|||
PRIVATE_KEY: $GCE_PRIVATE_KEY
|
||||
GS_ACCESS_KEY_ID: $GS_KEY
|
||||
GS_SECRET_ACCESS_KEY: $GS_SECRET
|
||||
CLOUD_MACHINE_TYPE: "g1-small"
|
||||
ANSIBLE_KEEP_REMOTE_FILES: "1"
|
||||
ANSIBLE_CONFIG: ./tests/ansible.cfg
|
||||
BOOTSTRAP_OS: none
|
||||
|
@ -97,6 +98,7 @@ before_script:
|
|||
-e gce_credentials_file=${HOME}/.ssh/gce.json
|
||||
-e gce_project_id=${GCE_PROJECT_ID}
|
||||
-e gce_service_account_email=${GCE_ACCOUNT}
|
||||
-e cloud_machine_type=${CLOUD_MACHINE_TYPE}
|
||||
-e inventory_path=${PWD}/inventory/inventory.ini
|
||||
-e kube_network_plugin=${KUBE_NETWORK_PLUGIN}
|
||||
-e mode=${CLUSTER_MODE}
|
||||
|
@ -260,13 +262,15 @@ before_script:
|
|||
BOOTSTRAP_OS: coreos
|
||||
RESOLVCONF_MODE: host_resolvconf # This is required as long as the CoreOS stable channel uses docker < 1.12
|
||||
|
||||
.debian8_canal_ha_variables: &debian8_canal_ha_variables
|
||||
.ubuntu_canal_ha_variables: &ubuntu_canal_ha_variables
|
||||
# stage: deploy-gce-part1
|
||||
KUBE_NETWORK_PLUGIN: canal
|
||||
CLOUD_IMAGE: debian-8-kubespray
|
||||
CLOUD_REGION: us-east1-b
|
||||
CLOUD_IMAGE: ubuntu-1604-xenial
|
||||
CLOUD_REGION: europe-west1-b
|
||||
CLOUD_MACHINE_TYPE: "n1-standard-2"
|
||||
UPGRADE_TEST: "basic"
|
||||
CLUSTER_MODE: ha
|
||||
UPGRADE_TEST: "graceful"
|
||||
|
||||
.rhel7_weave_variables: &rhel7_weave_variables
|
||||
# stage: deploy-gce-part1
|
||||
|
@ -288,7 +292,6 @@ before_script:
|
|||
CLOUD_IMAGE: debian-8-kubespray
|
||||
CLOUD_REGION: us-central1-b
|
||||
CLUSTER_MODE: default
|
||||
UPGRADE_TEST: "graceful"
|
||||
|
||||
.coreos_canal_variables: &coreos_canal_variables
|
||||
# stage: deploy-gce-part2
|
||||
|
@ -416,24 +419,24 @@ ubuntu-weave-sep-triggers:
|
|||
only: ['triggers']
|
||||
|
||||
# More builds for PRs/merges (manual) and triggers (auto)
|
||||
debian8-canal-ha:
|
||||
ubuntu-canal-ha:
|
||||
stage: deploy-gce-part1
|
||||
<<: *job
|
||||
<<: *gce
|
||||
variables:
|
||||
<<: *gce_variables
|
||||
<<: *debian8_canal_ha_variables
|
||||
<<: *ubuntu_canal_ha_variables
|
||||
when: manual
|
||||
except: ['triggers']
|
||||
only: ['master', /^pr-.*$/]
|
||||
|
||||
debian8-canal-ha-triggers:
|
||||
ubuntu-canal-ha-triggers:
|
||||
stage: deploy-gce-part1
|
||||
<<: *job
|
||||
<<: *gce
|
||||
variables:
|
||||
<<: *gce_variables
|
||||
<<: *debian8_canal_ha_variables
|
||||
<<: *ubuntu_canal_ha_variables
|
||||
when: on_success
|
||||
only: ['triggers']
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ dnsmasq_image_tag: "{{ dnsmasq_version }}"
|
|||
# Limits for dnsmasq/kubedns apps
|
||||
dns_cpu_limit: 100m
|
||||
dns_memory_limit: 170Mi
|
||||
dns_cpu_requests: 70m
|
||||
dns_cpu_requests: 40m
|
||||
dns_memory_requests: 50Mi
|
||||
|
||||
# Autoscaler parameters
|
||||
|
|
|
@ -9,10 +9,10 @@ PermissionsStartOnly=true
|
|||
EnvironmentFile=/etc/etcd.env
|
||||
ExecStart={{ bin_dir }}/etcd
|
||||
ExecStartPre=-{{ docker_bin_dir }}/docker rm -f {{ etcd_member_name | default("etcd") }}
|
||||
ExecReload={{ docker_bin_dir }}/docker restart {{ etcd_member_name | default("etcd") }}
|
||||
ExecStop={{ docker_bin_dir }}/docker stop {{ etcd_member_name | default("etcd") }}
|
||||
Restart=always
|
||||
RestartSec=15s
|
||||
TimeoutStartSec=30s
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -16,10 +16,13 @@ etcd_cert_dir: "{{ etcd_config_dir }}/ssl"
|
|||
# ETCD backend for k8s data
|
||||
kube_apiserver_storage_backend: etcd3
|
||||
|
||||
# By default, force back to etcd2. Set to true to force etcd3 (experimental!)
|
||||
force_etcd3: false
|
||||
|
||||
# Limits for kube components
|
||||
kube_controller_memory_limit: 512M
|
||||
kube_controller_cpu_limit: 250m
|
||||
kube_controller_memory_requests: 170M
|
||||
kube_controller_memory_requests: 100M
|
||||
kube_controller_cpu_requests: 100m
|
||||
kube_controller_node_monitor_grace_period: 40s
|
||||
kube_controller_node_monitor_period: 5s
|
||||
|
@ -27,11 +30,11 @@ kube_controller_pod_eviction_timeout: 5m0s
|
|||
kube_scheduler_memory_limit: 512M
|
||||
kube_scheduler_cpu_limit: 250m
|
||||
kube_scheduler_memory_requests: 170M
|
||||
kube_scheduler_cpu_requests: 100m
|
||||
kube_scheduler_cpu_requests: 80m
|
||||
kube_apiserver_memory_limit: 2000M
|
||||
kube_apiserver_cpu_limit: 800m
|
||||
kube_apiserver_memory_requests: 256M
|
||||
kube_apiserver_cpu_requests: 300m
|
||||
kube_apiserver_cpu_requests: 100m
|
||||
|
||||
|
||||
## Enable/Disable Kube API Server Authentication Methods
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
url: http://localhost:10251/healthz
|
||||
register: scheduler_result
|
||||
until: scheduler_result.status == 200
|
||||
retries: 15
|
||||
retries: 60
|
||||
delay: 5
|
||||
|
||||
- name: Master | wait for kube-controller-manager
|
||||
|
@ -42,5 +42,5 @@
|
|||
url: http://localhost:8080/healthz
|
||||
register: result
|
||||
until: result.status == 200
|
||||
retries: 10
|
||||
retries: 20
|
||||
delay: 6
|
||||
|
|
|
@ -52,6 +52,10 @@
|
|||
|
||||
- name: Create kube system namespace
|
||||
command: "{{ bin_dir }}/kubectl create -f {{kube_config_dir}}/{{system_namespace}}-ns.yml"
|
||||
retries: 4
|
||||
delay: "{{ retry_stagger | random + 3 }}"
|
||||
register: create_system_ns
|
||||
until: create_system_ns.rc == 0
|
||||
changed_when: False
|
||||
when: kubesystem|failed and inventory_hostname == groups['kube-master'][0]
|
||||
tags: apps
|
||||
|
|
|
@ -1,6 +1,29 @@
|
|||
---
|
||||
- name: "Post-upgrade | stop kubelet on all masters"
|
||||
service:
|
||||
name: kubelet
|
||||
state: stopped
|
||||
delegate_to: "{{item}}"
|
||||
with_items: "{{groups['kube-master']}}"
|
||||
when: needs_etcd_migration|bool
|
||||
|
||||
- name: "Post-upgrade | Pause for kubelet stop"
|
||||
pause:
|
||||
seconds: 10
|
||||
when: needs_etcd_migration|bool
|
||||
|
||||
- name: "Post-upgrade | stop kubelet on all masters"
|
||||
service:
|
||||
name: kubelet
|
||||
state: started
|
||||
delegate_to: "{{item}}"
|
||||
with_items: "{{groups['kube-master']}}"
|
||||
when: needs_etcd_migration|bool
|
||||
|
||||
- name: "Post-upgrade | etcd3 upgrade | purge etcd2 k8s data"
|
||||
command: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} rm -r /registry"
|
||||
environment:
|
||||
ETCDCTL_API: 2
|
||||
delegate_to: "{{groups['etcd'][0]}}"
|
||||
run_once: true
|
||||
when: kube_apiserver_storage_backend == "etcd3" and needs_etcd_migration|bool|default(false)
|
||||
|
|
|
@ -38,10 +38,15 @@
|
|||
environment:
|
||||
ETCDCTL_API: 2
|
||||
register: old_data_exists
|
||||
delegate_to: "{{groups['kube-master'][0]}}"
|
||||
delegate_to: "{{groups['etcd'][0]}}"
|
||||
when: kube_apiserver_storage_backend == "etcd3"
|
||||
failed_when: false
|
||||
|
||||
- name: "Pre-upgrade | etcd3 upgrade | use etcd2 unless forced to etc3"
|
||||
set_fact:
|
||||
kube_apiserver_storage_backend: "etcd2"
|
||||
when: old_data_exists.rc == 0 and not force_etcd3|bool
|
||||
|
||||
- name: "Pre-upgrade | etcd3 upgrade | see if data was already migrated"
|
||||
command: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} get --limit=1 --prefix=true /registry/minions"
|
||||
environment:
|
||||
|
@ -53,21 +58,27 @@
|
|||
|
||||
- name: "Pre-upgrade | etcd3 upgrade | set needs_etcd_migration"
|
||||
set_fact:
|
||||
needs_etcd_migration: "{{ kube_apiserver_storage_backend == 'etcd3' and data_migrated.stdout_lines|length == 0 and old_data_exists.rc == 0 }}"
|
||||
needs_etcd_migration: "{{ force_etcd3|default(false) and kube_apiserver_storage_backend == 'etcd3' and data_migrated.stdout_lines|length == 0 and old_data_exists.rc == 0 }}"
|
||||
|
||||
- name: "Pre-upgrade | Write invalid image to kube-apiserver manifest if necessary"
|
||||
replace:
|
||||
dest: /etc/kubernetes/manifests/kube-apiserver.manifest
|
||||
regexp: '(\s+)image:\s+.*?$'
|
||||
replace: '\1image: kill.apiserver.using.fake.image.in:manifest'
|
||||
- name: "Pre-upgrade | Delete master manifests on all kube-masters"
|
||||
file:
|
||||
path: "/etc/kubernetes/manifests/{{item[1]}}.manifest"
|
||||
state: absent
|
||||
delegate_to: "{{item[0]}}"
|
||||
with_nested:
|
||||
- "{{groups['kube-master']}}"
|
||||
- ["kube-apiserver", "kube-controller-manager", "kube-scheduler"]
|
||||
register: kube_apiserver_manifest_replaced
|
||||
when: (secret_changed|default(false) or etcd_secret_changed|default(false) or needs_etcd_migration|bool) and kube_apiserver_manifest.stat.exists
|
||||
|
||||
- name: "Pre-upgrade | Pause while waiting for kubelet to delete kube-apiserver pod"
|
||||
pause:
|
||||
seconds: 20
|
||||
when: kube_apiserver_manifest_replaced.changed
|
||||
tags: kube-apiserver
|
||||
- name: "Pre-upgrade | Delete master containers forcefully on all kube-masters"
|
||||
shell: "docker ps -f name=k8s-{{item}}* -q | xargs --no-run-if-empty docker rm -f"
|
||||
delegate_to: "{{item[0]}}"
|
||||
with_nested:
|
||||
- "{{groups['kube-master']}}"
|
||||
- ["kube-apiserver", "kube-controller-manager", "kube-scheduler"]
|
||||
register: kube_apiserver_manifest_replaced
|
||||
when: (secret_changed|default(false) or etcd_secret_changed|default(false) or needs_etcd_migration|bool) and kube_apiserver_manifest.stat.exists
|
||||
|
||||
- name: "Pre-upgrade | etcd3 upgrade | stop etcd"
|
||||
service:
|
||||
|
|
|
@ -21,7 +21,7 @@ kube_proxy_cpu_requests: 150m
|
|||
nginx_memory_limit: 512M
|
||||
nginx_cpu_limit: 300m
|
||||
nginx_memory_requests: 32M
|
||||
nginx_cpu_requests: 50m
|
||||
nginx_cpu_requests: 25m
|
||||
|
||||
# kube_api_runtime_config:
|
||||
# - extensions/v1beta1/daemonsets=true
|
||||
|
|
|
@ -4,6 +4,9 @@
|
|||
{%- if inventory_hostname in groups['kube-master'] and inventory_hostname not in groups['kube-node'] -%}true{%- else -%}false{%- endif -%}
|
||||
tags: facts
|
||||
|
||||
- include: pre_upgrade.yml
|
||||
tags: kubelet
|
||||
|
||||
- include: install.yml
|
||||
tags: kubelet
|
||||
|
||||
|
|
6
roles/kubernetes/node/tasks/pre_upgrade.yml
Normal file
6
roles/kubernetes/node/tasks/pre_upgrade.yml
Normal file
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
- name: "Pre-upgrade | copy /var/lib/cni from kubelet"
|
||||
command: docker cp kubelet:/var/lib/cni /var/lib/cni
|
||||
args:
|
||||
creates: "/var/lib/cni"
|
||||
failed_when: false
|
|
@ -21,6 +21,7 @@
|
|||
-v {{ docker_daemon_graph }}:/var/lib/docker:rw \
|
||||
-v /var/log:/var/log:rw \
|
||||
-v /var/lib/kubelet:/var/lib/kubelet:shared \
|
||||
-v /var/lib/cni:/var/lib/cni:shared \
|
||||
-v /var/run:/var/run:rw \
|
||||
-v {{kube_config_dir}}:{{kube_config_dir}}:ro \
|
||||
{{ hyperkube_image_repo }}:{{ hyperkube_image_tag}} \
|
||||
|
|
|
@ -34,8 +34,10 @@ ExecStart=/usr/bin/rkt run \
|
|||
{% if kube_network_plugin in ["calico", "weave", "canal"] %}
|
||||
--volume etc-cni,kind=host,source=/etc/cni,readOnly=true \
|
||||
--volume opt-cni,kind=host,source=/opt/cni,readOnly=true \
|
||||
--volume var-lib-cni,kind=host,source=/var/lib/cni,readOnly=false \
|
||||
--mount volume=etc-cni,target=/etc/cni \
|
||||
--mount volume=opt-cni,target=/opt/cni \
|
||||
--mount volume=var-lib-cni,target=/var/lib/cni \
|
||||
{% endif %}
|
||||
--mount volume=dns,target=/etc/resolv.conf \
|
||||
--mount volume=etc-kubernetes,target={{ kube_config_dir }} \
|
||||
|
|
|
@ -27,9 +27,9 @@
|
|||
sync_tokens: true
|
||||
when: >-
|
||||
{%- set tokens = {'sync': False} -%}
|
||||
{%- for server in groups['kube-master'] | intersect(ansible_play_hosts)
|
||||
{%- for server in groups['kube-master'] | intersect(ansible_play_batch)
|
||||
if (not hostvars[server].known_tokens.stat.exists) or
|
||||
(hostvars[server].known_tokens.stat.checksum != known_tokens_master.stat.checksum|default('')) -%}
|
||||
(hostvars[server].known_tokens.stat.checksum|default('') != known_tokens_master.stat.checksum|default('')) -%}
|
||||
{%- set _ = tokens.update({'sync': True}) -%}
|
||||
{%- endfor -%}
|
||||
{{ tokens.sync }}
|
||||
|
|
|
@ -21,13 +21,13 @@ canal_policy_dir: /etc/kubernetes/policy
|
|||
calico_node_memory_limit: 500M
|
||||
calico_node_cpu_limit: 200m
|
||||
calico_node_memory_requests: 64M
|
||||
calico_node_cpu_requests: 100m
|
||||
calico_node_cpu_requests: 50m
|
||||
flannel_memory_limit: 500M
|
||||
flannel_cpu_limit: 200m
|
||||
flannel_memory_requests: 64M
|
||||
flannel_cpu_requests: 100m
|
||||
flannel_cpu_requests: 50m
|
||||
calicoctl_memory_limit: 170M
|
||||
calicoctl_cpu_limit: 100m
|
||||
calicoctl_memory_requests: 32M
|
||||
calicoctl_cpu_requests: 50m
|
||||
calicoctl_cpu_requests: 25m
|
||||
|
||||
|
|
|
@ -3,3 +3,4 @@
|
|||
- name: Uncordon node
|
||||
command: "{{ bin_dir }}/kubectl uncordon {{ ansible_hostname }}"
|
||||
delegate_to: "{{ groups['kube-master'][0] }}"
|
||||
when: needs_cordoning|default(false)
|
||||
|
|
3
roles/upgrade/pre-upgrade/defaults/main.yml
Normal file
3
roles/upgrade/pre-upgrade/defaults/main.yml
Normal file
|
@ -0,0 +1,3 @@
|
|||
drain_grace_period: 30
|
||||
drain_timeout: 40s
|
||||
|
|
@ -1,12 +1,30 @@
|
|||
---
|
||||
- name: See if node is in ready state
|
||||
shell: "kubectl get nodes | grep {{ inventory_hostname }}"
|
||||
register: kubectl_nodes
|
||||
delegate_to: "{{ groups['kube-master'][0] }}"
|
||||
failed_when: false
|
||||
|
||||
- set_fact:
|
||||
needs_cordoning: >-
|
||||
{% if " Ready" in kubectl_nodes.stdout %}
|
||||
true
|
||||
{% else %}
|
||||
false
|
||||
{% endif %}
|
||||
|
||||
- name: Cordon node
|
||||
command: "{{ bin_dir }}/kubectl cordon {{ ansible_hostname }}"
|
||||
delegate_to: "{{ groups['kube-master'][0] }}"
|
||||
when: needs_cordoning
|
||||
|
||||
- name: Drain node
|
||||
command: "{{ bin_dir }}/kubectl drain --force --ignore-daemonsets --grace-period 30 --delete-local-data {{ ansible_hostname }}"
|
||||
command: >-
|
||||
{{ bin_dir }}/kubectl drain
|
||||
--force
|
||||
--ignore-daemonsets
|
||||
--grace-period {{ drain_grace_period }}
|
||||
--timeout {{ drain_timeout }}
|
||||
--delete-local-data {{ ansible_hostname }}
|
||||
delegate_to: "{{ groups['kube-master'][0] }}"
|
||||
|
||||
- name: Sleep for grace period for draining
|
||||
pause: seconds=30
|
||||
when: needs_cordoning
|
||||
|
|
|
@ -27,10 +27,12 @@
|
|||
{{node3}}
|
||||
|
||||
[etcd]
|
||||
{{node1}}
|
||||
{{node2}}
|
||||
{{node3}}
|
||||
|
||||
[vault]
|
||||
{{node1}}
|
||||
{{node2}}
|
||||
{{node3}}
|
||||
{% else %}
|
||||
|
|
|
@ -21,6 +21,9 @@
|
|||
- name: Get pod names
|
||||
shell: "{{bin_dir}}/kubectl get pods -o json"
|
||||
register: pods
|
||||
until: '"ContainerCreating" not in pods.stdout'
|
||||
retries: 60
|
||||
delay: 2
|
||||
no_log: true
|
||||
|
||||
- name: Get hostnet pods
|
||||
|
|
|
@ -79,7 +79,14 @@
|
|||
- { role: kubernetes/node, tags: node }
|
||||
- { role: network_plugin, tags: network }
|
||||
- { role: upgrade/post-upgrade, tags: post-upgrade }
|
||||
- { role: kargo-defaults}
|
||||
|
||||
- hosts: kube-master
|
||||
any_errors_fatal: true
|
||||
roles:
|
||||
- { role: kargo-defaults}
|
||||
- { role: kubernetes-apps/network_plugin, tags: network }
|
||||
- { role: kubernetes-apps/policy_controller, tags: policy-controller }
|
||||
|
||||
- hosts: calico-rr
|
||||
any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
|
||||
|
|
Loading…
Reference in a new issue