c12s-kubespray/roles/reset/tasks/main.yml
Etienne Champetier 557139a8cf Fix reset when using containerd (#7308)
Signed-off-by: Etienne Champetier <e.champetier@ateme.com>
(cherry picked from commit ed2b4b805e)
2021-03-02 08:33:19 -08:00

345 lines
8.5 KiB
YAML

---
- name: reset | stop services
service:
name: "{{ item }}"
state: stopped
with_items:
- kubelet
- vault
failed_when: false
tags:
- services
- name: reset | remove services
file:
path: "/etc/systemd/system/{{ item }}"
state: absent
with_items:
- kubelet.service
- vault.service
- calico-node.service
- containerd.service.d/http-proxy.conf
- crio.service.d/http-proxy.conf
- vault.service.d/http-proxy.conf
register: services_removed
tags:
- services
- containerd
- crio
- name: reset | remove docker dropins
file:
path: "/etc/systemd/system/docker.service.d/{{ item }}"
state: absent
with_items:
- docker-dns.conf
- docker-options.conf
- http-proxy.conf
- docker-orphan-cleanup.conf
register: docker_dropins_removed
tags:
- docker
- name: reset | systemctl daemon-reload # noqa 503
systemd:
daemon_reload: true
when: services_removed.changed or docker_dropins_removed.changed
- name: reset | remove all containers
shell: "set -o pipefail && {{ docker_bin_dir }}/docker ps -aq | xargs -r docker rm -fv"
args:
executable: /bin/bash
register: remove_all_containers
retries: 4
until: remove_all_containers.rc == 0
delay: 5
when: container_manager == "docker"
tags:
- docker
- name: reset | restart docker if needed # noqa 503
service:
name: docker
state: restarted
when: docker_dropins_removed.changed and container_manager == "docker"
tags:
- docker
- name: reset | check if crictl is present
stat:
path: "{{ bin_dir }}/crictl"
register: crictl
- name: reset | stop all cri containers
shell: "set -o pipefail && {{ bin_dir }}/crictl ps -q | xargs -r {{ bin_dir }}/crictl -t 60s stop"
args:
executable: /bin/bash
register: remove_all_cri_containers
retries: 5
until: remove_all_cri_containers.rc == 0
delay: 5
tags:
- crio
- containerd
when:
- crictl.stat.exists
- container_manager in ["crio", "containerd"]
ignore_errors: true
- name: reset | force remove all cri containers
command: "{{ bin_dir }}/crictl rm -a -f"
register: remove_all_cri_containers
retries: 5
until: remove_all_cri_containers.rc == 0
delay: 5
tags:
- crio
- containerd
when:
- crictl.stat.exists
- container_manager in ["crio", "containerd"]
- deploy_container_engine|default(true)
- name: reset | stop and disable crio service
service:
name: crio
state: stopped
enabled: false
failed_when: false
tags: [ crio ]
when: container_manager == "crio"
- name: reset | forcefully wipe CRI-O's container and image storage
command: "crio wipe -f"
failed_when: false
tags: [ crio ]
when: container_manager == "crio"
- name: reset | stop all cri pods
shell: "set -o pipefail && {{ bin_dir }}/crictl pods -q | xargs -r {{ bin_dir }}/crictl -t 60s stopp"
args:
executable: /bin/bash
register: remove_all_cri_containers
retries: 5
until: remove_all_cri_containers.rc == 0
delay: 5
tags: [ containerd ]
when:
- crictl.stat.exists
- container_manager == "containerd"
ignore_errors: true
- block:
- name: reset | force remove all cri pods
command: "{{ bin_dir }}/crictl rmp -a -f"
register: remove_all_cri_containers
retries: 5
until: remove_all_cri_containers.rc == 0
delay: 5
tags: [ containerd ]
when:
- crictl.stat.exists
- container_manager == "containerd"
rescue:
- name: reset | force remove all cri pods (rescue)
shell: "ip netns list | cut -d' ' -f 1 | xargs -n1 ip netns delete && {{ bin_dir }}/crictl rmp -a -f"
- name: reset | stop etcd services
service:
name: "{{ item }}"
state: stopped
with_items:
- etcd
- etcd-events
failed_when: false
tags:
- services
- name: reset | remove etcd services
file:
path: "/etc/systemd/system/{{ item }}.service"
state: absent
with_items:
- etcd
- etcd-events
register: services_removed
tags:
- services
- name: reset | gather mounted kubelet dirs # noqa 301
shell: set -o pipefail && mount | grep /var/lib/kubelet/ | awk '{print $3}' | tac
args:
executable: /bin/bash
warn: false
check_mode: no
register: mounted_dirs
failed_when: false
tags:
- mounts
- name: reset | unmount kubelet dirs # noqa 301
command: umount -f {{ item }}
with_items: "{{ mounted_dirs.stdout_lines }}"
register: umount_dir
when: mounted_dirs
retries: 4
until: umount_dir.rc == 0
delay: 5
tags:
- mounts
- name: flush iptables
iptables:
table: "{{ item }}"
flush: yes
with_items:
- filter
- nat
- mangle
when: flush_iptables|bool
tags:
- iptables
- name: Clear IPVS virtual server table
command: "ipvsadm -C"
when:
- kube_proxy_mode == 'ipvs' and inventory_hostname in groups['k8s-cluster']
- name: reset | check kube-ipvs0 network device
stat:
path: /sys/class/net/kube-ipvs0
register: kube_ipvs0
- name: reset | Remove kube-ipvs0
command: "ip link del kube-ipvs0"
when:
- kube_proxy_mode == 'ipvs'
- kube_ipvs0.stat.exists
- name: reset | check nodelocaldns network device
stat:
path: /sys/class/net/nodelocaldns
register: nodelocaldns_device
- name: reset | Remove nodelocaldns
command: "ip link del nodelocaldns"
when:
- enable_nodelocaldns|default(false)|bool
- nodelocaldns_device.stat.exists
- name: reset | delete some files and directories
file:
path: "{{ item }}"
state: absent
with_items:
- "{{ kube_config_dir }}"
- /var/lib/kubelet
- "{{ ansible_env.HOME | default('/root') }}/.kube"
- "{{ ansible_env.HOME | default('/root') }}/.helm"
- "{{ etcd_data_dir }}"
- "{{ etcd_events_data_dir }}"
- "{{ etcd_config_dir }}"
- /var/log/calico
- /etc/cni
- "{{ nginx_config_dir }}"
- /etc/dnsmasq.d
- /etc/dnsmasq.conf
- /etc/dnsmasq.d-available
- /etc/etcd.env
- /etc/calico
- /etc/weave.env
- /opt/cni
- /etc/dhcp/dhclient.d/zdnsupdate.sh
- /etc/dhcp/dhclient-exit-hooks.d/zdnsupdate
- /run/flannel
- /etc/flannel
- /run/kubernetes
- /usr/local/share/ca-certificates/etcd-ca.crt
- /usr/local/share/ca-certificates/kube-ca.crt
- /usr/local/share/ca-certificates/vault-ca.crt
- /etc/ssl/certs/etcd-ca.pem
- /etc/ssl/certs/kube-ca.pem
- /etc/ssl/certs/vault-ca.crt
- /etc/pki/ca-trust/source/anchors/etcd-ca.crt
- /etc/pki/ca-trust/source/anchors/kube-ca.crt
- /etc/pki/ca-trust/source/anchors/vault-ca.crt
- /etc/vault
- /var/log/pods/
- "{{ bin_dir }}/kubelet"
- "{{ bin_dir }}/etcd-scripts"
- "{{ bin_dir }}/etcd"
- "{{ bin_dir }}/etcd-events"
- "{{ bin_dir }}/etcdctl"
- "{{ bin_dir }}/kubernetes-scripts"
- "{{ bin_dir }}/kubectl"
- "{{ bin_dir }}/kubeadm"
- "{{ bin_dir }}/helm"
- "{{ bin_dir }}/calicoctl"
- "{{ bin_dir }}/calicoctl.sh"
- "{{ bin_dir }}/calico-upgrade"
- "{{ bin_dir }}/weave"
- "{{ bin_dir }}/crictl"
- "{{ bin_dir }}/netctl"
- /var/lib/cni
- /etc/vault
- /etc/openvswitch
- /run/openvswitch
- /var/lib/kube-router
- /var/lib/calico
- /etc/cilium
- /run/calico
ignore_errors: yes
tags:
- files
- name: reset | remove dns settings from dhclient.conf
blockinfile:
path: "{{ item }}"
state: absent
marker: "# Ansible entries {mark}"
failed_when: false
with_items:
- /etc/dhclient.conf
- /etc/dhcp/dhclient.conf
tags:
- files
- dns
- name: reset | remove host entries from /etc/hosts
blockinfile:
path: "/etc/hosts"
state: absent
marker: "# Ansible inventory hosts {mark}"
tags:
- files
- dns
- name: reset | include file with reset tasks specific to the network_plugin if exists
include_tasks: "{{ (role_path + '/../network_plugin/' + kube_network_plugin + '/tasks/reset.yml') | realpath }}"
when:
- kube_network_plugin in ['flannel', 'cilium', 'kube-router', 'calico']
tags:
- network
- name: reset | Restart network
service:
name: >-
{% if ansible_os_family == "RedHat" -%}
{%- if ansible_distribution_major_version|int == 8 or is_fedora_coreos -%}
NetworkManager
{%- else -%}
network
{%- endif -%}
{%- elif ansible_distribution == "Ubuntu" and ansible_distribution_release in ["bionic", "focal"] -%}
systemd-networkd
{%- elif ansible_os_family == "Debian" -%}
networking
{%- endif %}
state: restarted
when:
- ansible_os_family not in ["Flatcar Container Linux by Kinvolk"]
- reset_restart_network
tags:
- services
- network