From 45262da7267aea1162c5196ab826f2f7226b98bc Mon Sep 17 00:00:00 2001 From: Cristian Calin <6627509+cristicalin@users.noreply.github.com> Date: Thu, 14 Apr 2022 11:08:46 +0300 Subject: [PATCH] [calico] call calico checks early on to prevent altering the cluster with bad configuration (#8707) --- docs/calico.md | 39 ++++--- .../preinstall/tasks/0020-verify-settings.yml | 85 --------------- roles/kubernetes/preinstall/tasks/main.yml | 8 ++ roles/network_plugin/calico/tasks/check.yml | 102 ++++++++++++++++++ roles/network_plugin/calico/tasks/main.yml | 2 - 5 files changed, 136 insertions(+), 100 deletions(-) diff --git a/docs/calico.md b/docs/calico.md index 51c2ec655..1cec1392c 100644 --- a/docs/calico.md +++ b/docs/calico.md @@ -214,6 +214,13 @@ Calico supports two types of encapsulation: [VXLAN and IP in IP](https://docs.pr *IP in IP* and *VXLAN* is mutualy exclusive modes. +Kubespray defaults have changed after version 2.18 from auto-enabling `ipip` mode to auto-enabling `vxlan`. This was done to facilitate wider deployment scenarios including those where vxlan acceleration is provided by the underlying network devices. + +If you are running your cluster with the default calico settings and are upgrading to a release post 2.18.x (i.e. 2.19 and later or `master` branch) then you have two options: + +* perform a manual migration to vxlan before upgrading kubespray (see migrating from IP in IP to VXLAN below) +* pin the pre-2.19 settings in your ansible inventory (see IP in IP mode settings below) + ### IP in IP mode To configure Ip in Ip mode you need to use the bird network backend. @@ -224,19 +231,6 @@ calico_vxlan_mode: 'Never' calico_network_backend: 'bird' ``` -### VXLAN mode (default) - -To configure VXLAN mode you can use the default settings, the example below is provided for your reference. - -```yml -calico_ipip_mode: 'Never' -calico_vxlan_mode: 'Always' # Possible values is `Always`, `CrossSubnet`, `Never`. -calico_network_backend: 'vxlan' -``` - -In VXLAN mode BGP networking is not required. -We disable BGP to reduce the moving parts in your cluster by `calico_network_backend: vxlan` - ### BGP mode To enable BGP no-encapsulation mode: @@ -247,6 +241,25 @@ calico_vxlan_mode: 'Never' calico_network_backend: 'bird' ``` +### Migrating from IP in IP to VXLAN + +If you would like to migrate from the old IP in IP with `bird` network backends default to the new VXLAN based encapsulation you need to perform this change before running an upgrade of your cluster; the `cluster.yml` and `upgrade-cluster.yml` playbooks will refuse to continue if they detect incompatible settings. + +Execute the following sters on one of the control plane nodes, ensure the cluster in healthy before proceeding. + +```shell +calicoctl.sh patch felixconfig default -p '{"spec":{"vxlanEnabled":true}}' +calicoctl.sh patch ippool default-pool -p '{"spec":{"ipipMode":"Never", "vxlanMode":"Always"}}' +``` + +**Note:** if you created multiple ippools you will need to patch all of them individually to change their encapsulation. The kubespray playbooks only handle the default ippool creaded by kubespray. + +Wait for the `vxlan.calico` interfaces to be created on all cluster nodes and traffic to be routed through it then you can disable `ipip`. + +```shell +calicoctl.sh patch felixconfig default -p '{"spec":{"ipipEnabled":false}}' +``` + ## Configuring interface MTU This is an advanced topic and should usually not be modified unless you know exactly what you are doing. Calico is smart enough to deal with the defaults and calculate the proper MTU. If you do need to set up a custom MTU you can change `calico_veth_mtu` as follows: diff --git a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml index 08f4eaeb1..eae32a4c6 100644 --- a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml +++ b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml @@ -36,44 +36,6 @@ - kube_network_plugin is defined - not ignore_assert_errors -- name: Stop if legacy encapsulation variables are detected (ipip) - assert: - that: - - ipip is not defined - msg: "'ipip' configuration variable is deprecated, please configure your inventory with 'calico_ipip_mode' set to 'Always' or 'CrossSubnet' according to your specific needs" - when: - - kube_network_plugin == 'calico' - - not ignore_assert_errors - -- name: Stop if legacy encapsulation variables are detected (ipip_mode) - assert: - that: - - ipip_mode is not defined - msg: "'ipip_mode' configuration variable is deprecated, please configure your inventory with 'calico_ipip_mode' set to 'Always' or 'CrossSubnet' according to your specific needs" - when: - - kube_network_plugin == 'calico' - - not ignore_assert_errors - -- name: Stop if incompatible network plugin and cloudprovider - assert: - that: - - calico_ipip_mode == 'Never' - - calico_vxlan_mode in ['Always', 'CrossSubnet'] - msg: "When using cloud_provider azure and network_plugin calico calico_ipip_mode must be 'Never' and calico_vxlan_mode 'Always' or 'CrossSubnet'" - when: - - cloud_provider is defined and cloud_provider == 'azure' - - kube_network_plugin == 'calico' - - not ignore_assert_errors - -- name: Stop if supported Calico versions - assert: - that: - - "calico_version in calico_crds_archive_checksums.keys()" - msg: "Calico version not supported {{ calico_version }} not in {{ calico_crds_archive_checksums.keys() }}" - when: - - kube_network_plugin == 'calico' - - not ignore_assert_errors - - name: Stop if unsupported version of Kubernetes assert: that: kube_version is version(kube_version_min_required, '>=') @@ -200,53 +162,6 @@ - cloud-provider - facts -- name: Get current calico cluster version - shell: "set -o pipefail && {{ bin_dir }}/calicoctl.sh version | grep 'Cluster Version:' | awk '{ print $3}'" - args: - executable: /bin/bash - register: calico_version_on_server - async: 10 - poll: 3 - run_once: yes - changed_when: false - failed_when: false - when: - - kube_network_plugin == 'calico' - -- name: Check that current calico version is enough for upgrade - assert: - that: - - calico_version_on_server.stdout is version(calico_min_version_required, '>=') - msg: > - Your version of calico is not fresh enough for upgrade. - Minimum version is {{ calico_min_version_required }} supported by the previous kubespray release. - when: - - kube_network_plugin == 'calico' - - 'calico_version_on_server.stdout is defined' - - calico_version_on_server.stdout - - inventory_hostname == groups['kube_control_plane'][0] - run_once: yes - -- name: "Check that cluster_id is set if calico_rr enabled" - assert: - that: - - cluster_id is defined - msg: "A unique cluster_id is required if using calico_rr" - when: - - kube_network_plugin == 'calico' - - peer_with_calico_rr - - inventory_hostname == groups['kube_control_plane'][0] - run_once: yes - -- name: "Check that calico_rr nodes are in k8s_cluster group" - assert: - that: - - '"k8s_cluster" in group_names' - msg: "calico_rr must be a child group of k8s_cluster group" - when: - - kube_network_plugin == 'calico' - - '"calico_rr" in group_names' - - name: "Check that kube_service_addresses is a network range" assert: that: diff --git a/roles/kubernetes/preinstall/tasks/main.yml b/roles/kubernetes/preinstall/tasks/main.yml index 718f36092..495be4abb 100644 --- a/roles/kubernetes/preinstall/tasks/main.yml +++ b/roles/kubernetes/preinstall/tasks/main.yml @@ -117,3 +117,11 @@ - ansible_os_family == "RedHat" tags: - bootstrap-os + +- name: Run calico checks + include_role: + name: network_plugin/calico + tasks_from: check + when: + - kube_network_plugin == 'calico' + - not ignore_assert_errors diff --git a/roles/network_plugin/calico/tasks/check.yml b/roles/network_plugin/calico/tasks/check.yml index 41bf77cd3..0b164534a 100644 --- a/roles/network_plugin/calico/tasks/check.yml +++ b/roles/network_plugin/calico/tasks/check.yml @@ -1,16 +1,102 @@ --- +- name: Stop if legacy encapsulation variables are detected (ipip) + assert: + that: + - ipip is not defined + msg: "'ipip' configuration variable is deprecated, please configure your inventory with 'calico_ipip_mode' set to 'Always' or 'CrossSubnet' according to your specific needs" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + +- name: Stop if legacy encapsulation variables are detected (ipip_mode) + assert: + that: + - ipip_mode is not defined + msg: "'ipip_mode' configuration variable is deprecated, please configure your inventory with 'calico_ipip_mode' set to 'Always' or 'CrossSubnet' according to your specific needs" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + +- name: Stop if incompatible network plugin and cloudprovider + assert: + that: + - calico_ipip_mode == 'Never' + - calico_vxlan_mode in ['Always', 'CrossSubnet'] + msg: "When using cloud_provider azure and network_plugin calico calico_ipip_mode must be 'Never' and calico_vxlan_mode 'Always' or 'CrossSubnet'" + when: + - cloud_provider is defined and cloud_provider == 'azure' + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + +- name: Stop if supported Calico versions + assert: + that: + - "calico_version in calico_crds_archive_checksums.keys()" + msg: "Calico version not supported {{ calico_version }} not in {{ calico_crds_archive_checksums.keys() }}" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + +- name: Get current calico cluster version + shell: "set -o pipefail && {{ bin_dir }}/calicoctl.sh version | grep 'Cluster Version:' | awk '{ print $3}'" + args: + executable: /bin/bash + register: calico_version_on_server + async: 10 + poll: 3 + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + changed_when: false + failed_when: false + +- name: Check that current calico version is enough for upgrade + assert: + that: + - calico_version_on_server.stdout is version(calico_min_version_required, '>=') + msg: > + Your version of calico is not fresh enough for upgrade. + Minimum version is {{ calico_min_version_required }} supported by the previous kubespray release. + when: + - 'calico_version_on_server.stdout is defined' + - calico_version_on_server.stdout + - inventory_hostname == groups['kube_control_plane'][0] + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + +- name: "Check that cluster_id is set if calico_rr enabled" + assert: + that: + - cluster_id is defined + msg: "A unique cluster_id is required if using calico_rr" + when: + - peer_with_calico_rr + - inventory_hostname == groups['kube_control_plane'][0] + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + +- name: "Check that calico_rr nodes are in k8s_cluster group" + assert: + that: + - '"k8s_cluster" in group_names' + msg: "calico_rr must be a child group of k8s_cluster group" + when: + - '"calico_rr" in group_names' + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + - name: "Check vars defined correctly" assert: that: - "calico_pool_name is defined" - "calico_pool_name is match('^[a-zA-Z0-9-_\\\\.]{2,63}$')" msg: "calico_pool_name contains invalid characters" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check calico network backend defined correctly" assert: that: - "calico_network_backend in ['bird', 'vxlan', 'none']" msg: "calico network backend is not 'bird', 'vxlan' or 'none'" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check ipip and vxlan mode defined correctly" assert: @@ -18,6 +104,8 @@ - "calico_ipip_mode in ['Always', 'CrossSubnet', 'Never']" - "calico_vxlan_mode in ['Always', 'CrossSubnet', 'Never']" msg: "calico inter host encapsulation mode is not 'Always', 'CrossSubnet' or 'Never'" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check ipip and vxlan mode if simultaneously enabled" assert: @@ -26,6 +114,8 @@ msg: "IP in IP and VXLAN mode is mutualy exclusive modes" when: - "calico_ipip_mode in ['Always', 'CrossSubnet']" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check ipip and vxlan mode if simultaneously enabled" assert: @@ -34,6 +124,8 @@ msg: "IP in IP and VXLAN mode is mutualy exclusive modes" when: - "calico_vxlan_mode in ['Always', 'CrossSubnet']" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Get Calico {{ calico_pool_name }} configuration" command: calicoctl.sh get ipPool {{ calico_pool_name }} -o json @@ -48,6 +140,8 @@ set_fact: calico_pool_conf: '{{ calico.stdout | from_json }}' when: calico.rc == 0 and calico.stdout + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check if inventory match current cluster configuration" assert: @@ -59,15 +153,23 @@ msg: "Your inventory doesn't match the current cluster configuration" when: - calico_pool_conf is defined + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check kdd calico_datastore if calico_apiserver_enabled" assert: that: calico_datastore == "kdd" + msg: "When using calico apiserver you need to use the kubernetes datastore" when: - calico_apiserver_enabled + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check kdd calico_datastore if typha_enabled" assert: that: calico_datastore == "kdd" + msg: "When using typha you need to use the kubernetes datastore" when: - typha_enabled + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" diff --git a/roles/network_plugin/calico/tasks/main.yml b/roles/network_plugin/calico/tasks/main.yml index df5812036..81844fa4f 100644 --- a/roles/network_plugin/calico/tasks/main.yml +++ b/roles/network_plugin/calico/tasks/main.yml @@ -1,6 +1,4 @@ --- -- import_tasks: check.yml - - import_tasks: pre.yml - import_tasks: repos.yml