From 9988513ec3360f3be86a985d781ae353c98f0692 Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Fri, 15 Sep 2017 07:35:36 +0100 Subject: [PATCH] fix kubedns upgrade --- .gitlab-ci.yml | 36 ++++++++++++++- cluster.yml | 1 + roles/kubernetes-apps/ansible/tasks/main.yml | 18 ++++++-- .../rotate_tokens/tasks/rotate_tokens.yml | 45 +++++++++++++++++++ .../tasks/kubeadm-cleanup-old-certs.yml | 3 +- .../kubernetes/master/tasks/kubeadm-setup.yml | 35 ++++++++++----- roles/kubernetes/node/tasks/main.yml | 7 +++ upgrade-cluster.yml | 1 + 8 files changed, 127 insertions(+), 19 deletions(-) create mode 100644 roles/kubernetes-apps/rotate_tokens/tasks/rotate_tokens.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dc0f6c7d3..f682c2726 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -296,12 +296,25 @@ before_script: UPGRADE_TEST: "graceful" STARTUP_SCRIPT: "" +.coreos_weave_kubeadm_variables: &coreos_weave_kubeadm_variables +# stage: deploy-gce-part1 + KUBE_NETWORK_PLUGIN: weave + AUTHORIZATION_MODES: "{ 'authorization_modes': [ 'RBAC' ] }" + CLOUD_IMAGE: coreos-stable-1465-6-0-v20170817 + CLOUD_MACHINE_TYPE: "n1-standard-1" + CLOUD_REGION: us-central1-b + BOOTSTRAP_OS: coreos + CLUSTER_MODE: ha + KUBEADM_ENABLED: "true" + UPGRADE_TEST: "graceful" + STARTUP_SCRIPT: 'systemctl disable locksmithd && systemctl stop locksmithd' + .ubuntu_canal_kubeadm_variables: &ubuntu_canal_kubeadm_variables # stage: deploy-gce-part1 KUBE_NETWORK_PLUGIN: canal AUTHORIZATION_MODES: "{ 'authorization_modes': [ 'RBAC' ] }" CLOUD_IMAGE: ubuntu-1604-xenial - CLOUD_MACHINE_TYPE: "n1-standard-2" + CLOUD_MACHINE_TYPE: "n1-standard-1" CLOUD_REGION: europe-west1-b CLUSTER_MODE: ha KUBEADM_ENABLED: "true" @@ -518,6 +531,27 @@ ubuntu-canal-kubeadm-triggers: when: on_success only: ['triggers'] +coreos-weave-kubeadm-rbac: + stage: deploy-gce-part1 + <<: *job + <<: *gce + variables: + <<: *gce_variables + <<: *coreos_weave_kubeadm_variables + when: manual + except: ['triggers'] + only: ['master', /^pr-.*$/] + +coreos-weave-kubeadm-triggers: + stage: deploy-gce-part1 + <<: *job + <<: *gce + variables: + <<: *gce_variables + <<: *coreos_weave_kubeadm_variables + when: on_success + only: ['triggers'] + rhel7-weave: stage: deploy-gce-part1 <<: *job diff --git a/cluster.yml b/cluster.yml index d9240ac97..db26058a4 100644 --- a/cluster.yml +++ b/cluster.yml @@ -80,6 +80,7 @@ any_errors_fatal: "{{ any_errors_fatal | default(true) }}" roles: - { role: kubespray-defaults} + - { role: kubernetes-apps/rotate_tokens, tags: rotate_tokens, when: "secret_changed|default(false)" } - { role: kubernetes-apps/network_plugin, tags: network } - { role: kubernetes-apps/policy_controller, tags: policy-controller } - { role: kubernetes/client, tags: client } diff --git a/roles/kubernetes-apps/ansible/tasks/main.yml b/roles/kubernetes-apps/ansible/tasks/main.yml index 7771a91c3..9e9a30382 100644 --- a/roles/kubernetes-apps/ansible/tasks/main.yml +++ b/roles/kubernetes-apps/ansible/tasks/main.yml @@ -8,7 +8,17 @@ delay: 6 when: inventory_hostname == groups['kube-master'][0] -- name: kubeadm | Delete kubeadm kubedns +- name: Kubernetes Apps | Delete old kubedns resources + kube: + name: "kubedns" + namespace: "{{ system_namespace }}" + kubectl: "{{bin_dir}}/kubectl" + resource: "{{ item }}" + state: absent + with_items: ['deploy', 'svc'] + tags: upgrade + +- name: Kubernetes Apps | Delete kubeadm kubedns kube: name: "kubedns" namespace: "{{ system_namespace }}" @@ -25,9 +35,9 @@ src: "{{item.file}}" dest: "{{kube_config_dir}}/{{item.file}}" with_items: - - {name: kubedns, file: kubedns-sa.yml, type: sa} - - {name: kubedns, file: kubedns-deploy.yml.j2, type: deployment} - - {name: kubedns, file: kubedns-svc.yml, type: svc} + - {name: kube-dns, file: kubedns-sa.yml, type: sa} + - {name: kube-dns, file: kubedns-deploy.yml.j2, type: deployment} + - {name: kube-dns, file: kubedns-svc.yml, type: svc} - {name: kubedns-autoscaler, file: kubedns-autoscaler-sa.yml, type: sa} - {name: kubedns-autoscaler, file: kubedns-autoscaler-clusterrole.yml, type: clusterrole} - {name: kubedns-autoscaler, file: kubedns-autoscaler-clusterrolebinding.yml, type: clusterrolebinding} diff --git a/roles/kubernetes-apps/rotate_tokens/tasks/rotate_tokens.yml b/roles/kubernetes-apps/rotate_tokens/tasks/rotate_tokens.yml new file mode 100644 index 000000000..0800c78c7 --- /dev/null +++ b/roles/kubernetes-apps/rotate_tokens/tasks/rotate_tokens.yml @@ -0,0 +1,45 @@ +--- +- name: Rotate Tokens | Get list of pods and their current secrets + command: >- + {{ bin_dir }}/kubectl get pods --all-namespaces + -o 'jsonpath={range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{" "}{.spec.volumes[*].name}{"\n"}{end}' + + register: pods_secrets + run_once: true + +- name: Rotate Tokens | Get default tokens to expire + shell: >- + {{ bin_dir }}/kubectl get secrets --all-namespaces + -o 'jsonpath={range .items[*]}{"\n"}{.metadata.namespace}{" "}{.metadata.name}{end}' + | grep default-token + register: tokens_to_delete + run_once: true + +- name: view pods_secrets + debug: msg="{{ pods_secrets.stdout_lines }}" + +- name: view pods_secrets2 + #debug: msg="{{ item.split(" ")[0] }}" + debug: msg="{{ item.split(" ")[0] }} {{ item.split(" ")[1] }}" + with_items: "{{ tokens_to_delete.stdout_lines }}" + +- name: Rotate Tokens | Delete expired tokens + command: "{{ bin_dir }}/kubectl delete secrets -n {{ item.split(' ')[0] }} {{ item.split(' ')[1] }}" + with_items: "{{ tokens_to_delete.stdout_lines }}" + run_once: true + +- set_fact: + t2d: |- + ["default default-token-38nh5", + "kube-public default-token-cx54r", + "kube-system default-token-d6dfh", + "default default-token-b58hs" + ] + +- name: Rotate Tokens | Delete pods with default tokens + command: "{{ bin_dir }}/kubectl delete pod -n {{ item.split(' ')[0] }} {{ item.split(' ')[1] }}" + with_items: "{{ pods_secrets.stdout_lines }}" + register: delete_pods + when: item.split(" ")[0] + " " + item.split(" ")[2] in tokens_to_delete.stdout + failed_when: delete_pods.rc != 0 and "not found" not in delete_pods.stderr + run_once: true diff --git a/roles/kubernetes/master/tasks/kubeadm-cleanup-old-certs.yml b/roles/kubernetes/master/tasks/kubeadm-cleanup-old-certs.yml index 5ec19046d..e1e85e205 100644 --- a/roles/kubernetes/master/tasks/kubeadm-cleanup-old-certs.yml +++ b/roles/kubernetes/master/tasks/kubeadm-cleanup-old-certs.yml @@ -1,4 +1,3 @@ --- - name: kubeadm | Purge old certs - file: - command: "rm -f {{kube_cert_dir }}/*.pem" + command: "rm -f {{kube_cert_dir }}/*.pem" diff --git a/roles/kubernetes/master/tasks/kubeadm-setup.yml b/roles/kubernetes/master/tasks/kubeadm-setup.yml index 6ca909339..ab1ceeea8 100644 --- a/roles/kubernetes/master/tasks/kubeadm-setup.yml +++ b/roles/kubernetes/master/tasks/kubeadm-setup.yml @@ -6,26 +6,27 @@ delegate_to: "{{groups['kube-master']|first}}" run_once: true +- name: kubeadm | Check service account key + stat: + path: "{{ kube_cert_dir }}/sa.key" + register: sa_key_before + delegate_to: "{{groups['kube-master']|first}}" + run_once: true + - name: kubeadm | Check if kubeadm has already run stat: path: "{{ kube_config_dir }}/admin.conf" register: admin_conf -- name: kubeadm | Migrate certificates to prepare for kubeadm - include: kubeadm-migrate-certs.yml - when: - - inventory_hostname == groups['kube-master']|first - - old_apiserver_cert.stat.exists - - name: kubeadm | Delete old static pods file: path: "{{ kube_config_dir }}/manifests/{{item}}.manifest" state: absent - with_items: ["kube-apiserver", "kube-controller-manager", "kube-scheduler"] + with_items: ["kube-apiserver", "kube-controller-manager", "kube-scheduler", "kube-proxy"] when: old_apiserver_cert.stat.exists - name: kubeadm | Forcefully delete old static pods - shell: "docker ps -f name=k8s-{{item}}* -q | xargs --no-run-if-empty docker rm -f" + shell: "docker ps -f name=k8s_{{item}} -q | xargs --no-run-if-empty docker rm -f" with_items: ["kube-apiserver", "kube-controller-manager", "kube-scheduler"] when: old_apiserver_cert.stat.exists @@ -59,8 +60,6 @@ dest: "{{ kube_config_dir }}/kubeadm-config.yaml" register: kubeadm_config - - - name: kubeadm | Initialize first master command: timeout -k 240s 240s kubeadm init --config={{ kube_config_dir }}/kubeadm-config.yaml --skip-preflight-checks register: kubeadm_init @@ -88,7 +87,7 @@ delegate_to: "{{ groups['kube-master']|first }}" run_once: true -- name: write out kubeadm certs +- name: kubeadm | write out kubeadm certs copy: dest: "{{ item.item }}" content: "{{ item.content | b64decode }}" @@ -107,6 +106,18 @@ retries: 3 when: inventory_hostname != groups['kube-master']|first and (kubeadm_config.changed or not admin_conf.stat.exists or copy_kubeadm_certs.changed) +- name: kubeadm | Check service account key again + stat: + path: "{{ kube_cert_dir }}/sa.key" + register: sa_key_after + delegate_to: "{{groups['kube-master']|first}}" + run_once: true + +- name: kubeadm | Set secret_changed if service account key was updated + command: /bin/true + notify: Master | set secret_changed + when: sa_key_before.stat.checksum|default("") != sa_key_after.stat.checksum + - name: kubeadm | cleanup old certs if necessary - include: kubeadm_cleanup_old_certs.yml + include: kubeadm-cleanup-old-certs.yml when: old_apiserver_cert.stat.exists diff --git a/roles/kubernetes/node/tasks/main.yml b/roles/kubernetes/node/tasks/main.yml index 04b5132cb..3f38bc773 100644 --- a/roles/kubernetes/node/tasks/main.yml +++ b/roles/kubernetes/node/tasks/main.yml @@ -98,6 +98,13 @@ when: not kubeadm_enabled tags: kube-proxy +- name: Purge proxy manifest for kubeadm + file: + path: "{{ kube_manifest_dir }}/kube-proxy.manifest" + state: absent + when: kubeadm_enabled + tags: kube-proxy + # reload-systemd - meta: flush_handlers diff --git a/upgrade-cluster.yml b/upgrade-cluster.yml index b83681525..a20123ec7 100644 --- a/upgrade-cluster.yml +++ b/upgrade-cluster.yml @@ -87,6 +87,7 @@ any_errors_fatal: true roles: - { role: kubespray-defaults} + - { role: kubernetes-apps/rotate_tokens, tags: rotate_tokens, when: "secret_changed|default(false)" } - { role: kubernetes-apps/network_plugin, tags: network } - { role: kubernetes-apps/policy_controller, tags: policy-controller } - { role: kubernetes/client, tags: client }