Refactor calico route reflector to run in k8s cluster (#4975)

* Refactor calico-rr to run in k8s cluster with taint

Change-Id: I75a3169ff5b36ce8302fc7ef1c32d3eb697b5afa

* add preinstall checks

* rework calico/rr role

Change-Id: I2f0a7e6cb77cf91ad4a615923680760d2e5d9ca8

* add empty calico-rr group

Change-Id: I006c0a60db9b72d02245bf8fdfabcf982144a5ad
This commit is contained in:
Matthew Mosesohn 2019-08-08 17:37:22 +03:00 committed by Kubernetes Prow Robot
parent 75d1be8272
commit 023108a733
19 changed files with 170 additions and 230 deletions

View file

@ -19,14 +19,14 @@
- { role: kubespray-defaults} - { role: kubespray-defaults}
- { role: bastion-ssh-config, tags: ["localhost", "bastion"]} - { role: bastion-ssh-config, tags: ["localhost", "bastion"]}
- hosts: k8s-cluster:etcd:calico-rr - hosts: k8s-cluster:etcd
any_errors_fatal: "{{ any_errors_fatal | default(true) }}" any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
gather_facts: false gather_facts: false
roles: roles:
- { role: kubespray-defaults} - { role: kubespray-defaults}
- { role: bootstrap-os, tags: bootstrap-os} - { role: bootstrap-os, tags: bootstrap-os}
- hosts: k8s-cluster:etcd:calico-rr - hosts: k8s-cluster:etcd
any_errors_fatal: "{{ any_errors_fatal | default(true) }}" any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
roles: roles:
- { role: kubespray-defaults} - { role: kubespray-defaults}
@ -46,7 +46,7 @@
etcd_events_cluster_setup: "{{ etcd_events_cluster_enabled }}" etcd_events_cluster_setup: "{{ etcd_events_cluster_enabled }}"
when: not etcd_kubeadm_enabled| default(false) when: not etcd_kubeadm_enabled| default(false)
- hosts: k8s-cluster:calico-rr - hosts: k8s-cluster
any_errors_fatal: "{{ any_errors_fatal | default(true) }}" any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
roles: roles:
- { role: kubespray-defaults} - { role: kubespray-defaults}
@ -79,6 +79,12 @@
- { role: kubernetes/kubeadm, tags: kubeadm} - { role: kubernetes/kubeadm, tags: kubeadm}
- { role: network_plugin, tags: network } - { role: network_plugin, tags: network }
- hosts: calico-rr
any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
roles:
- { role: kubespray-defaults}
- { role: network_plugin/calico/rr, tags: ['network', 'calico_rr']}
- hosts: kube-master[0] - hosts: kube-master[0]
any_errors_fatal: "{{ any_errors_fatal | default(true) }}" any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
roles: roles:
@ -95,12 +101,6 @@
- { role: kubernetes-apps/ingress_controller, tags: ingress-controller } - { role: kubernetes-apps/ingress_controller, tags: ingress-controller }
- { role: kubernetes-apps/external_provisioner, tags: external-provisioner } - { role: kubernetes-apps/external_provisioner, tags: external-provisioner }
- hosts: calico-rr
any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
roles:
- { role: kubespray-defaults}
- { role: network_plugin/calico/rr, tags: network }
- hosts: kube-master - hosts: kube-master
any_errors_fatal: "{{ any_errors_fatal | default(true) }}" any_errors_fatal: "{{ any_errors_fatal | default(true) }}"
roles: roles:

View file

@ -119,13 +119,13 @@ recommended here:
You need to edit your inventory and add: You need to edit your inventory and add:
* `calico-rr` group with nodes in it. At the moment it's incompatible with * `calico-rr` group with nodes in it. `calico-rr` can be combined with
`kube-node` due to BGP port conflict with `calico-node` container. So you `kube-node` and/or `kube-master`. `calico-rr` group also must be a child
should not have nodes in both `calico-rr` and `kube-node` groups. group of `k8s-cluster` group.
* `cluster_id` by route reflector node/group (see details * `cluster_id` by route reflector node/group (see details
[here](https://hub.docker.com/r/calico/routereflector/)) [here](https://hub.docker.com/r/calico/routereflector/))
Here's an example of Kubespray inventory with route reflectors: Here's an example of Kubespray inventory with standalone route reflectors:
``` ```
[all] [all]
@ -154,6 +154,7 @@ node5
[k8s-cluster:children] [k8s-cluster:children]
kube-node kube-node
kube-master kube-master
calico-rr
[calico-rr] [calico-rr]
rr0 rr0

View file

@ -12,3 +12,4 @@ node1
[k8s-cluster:children] [k8s-cluster:children]
kube-node kube-node
kube-master kube-master
calico-rr

View file

@ -28,6 +28,9 @@
# node5 # node5
# node6 # node6
[calico-rr]
[k8s-cluster:children] [k8s-cluster:children]
kube-master kube-master
kube-node kube-node
calico-rr

View file

@ -17,3 +17,8 @@ discoveryTokenUnsafeSkipCAVerification: true
nodeRegistration: nodeRegistration:
name: {{ kube_override_hostname }} name: {{ kube_override_hostname }}
criSocket: {{ cri_socket }} criSocket: {{ cri_socket }}
{% if 'calico-rr' in group_names and 'kube-node' not in group_names %}
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/calico-rr
{% endif %}

View file

@ -21,3 +21,8 @@ caCertPath: {{ kube_cert_dir }}/ca.crt
nodeRegistration: nodeRegistration:
name: {{ kube_override_hostname }} name: {{ kube_override_hostname }}
criSocket: {{ cri_socket }} criSocket: {{ cri_socket }}
{% if 'calico-rr' in group_names and 'kube-node' not in group_names %}
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/calico-rr
{% endif %}

View file

@ -166,6 +166,26 @@
- inventory_hostname == groups['kube-master'][0] - inventory_hostname == groups['kube-master'][0]
run_once: yes run_once: yes
- name: "Check that cluster_id is set if calico_rr enabled"
assert:
that:
- cluster_id is defined
msg: "A unique cluster_id is required if using calico_rr"
when:
- kube_network_plugin == 'calico'
- peer_with_calico_rr
- inventory_hostname == groups['kube-master'][0]
run_once: yes
- name: "Check that calico_rr nodes are in k8s-cluster group"
assert:
that:
- '"k8s-cluster" in group_names'
msg: "calico-rr must be a child group of k8s-cluster group"
when:
- kube_network_plugin == 'calico'
- '"calico-rr" in group_names'
- name: "Check that kube_service_addresses is a network range" - name: "Check that kube_service_addresses is a network range"
assert: assert:
that: that:

View file

@ -2,15 +2,4 @@
# Global as_num (/calico/bgp/v1/global/as_num) # Global as_num (/calico/bgp/v1/global/as_num)
# should be the same as in calico role # should be the same as in calico role
global_as_num: "64512" global_as_num: "64512"
calico_baremetal_nodename: "{{ kube_override_hostname | default(inventory_hostname) }}"
calico_cert_dir: /etc/calico/certs
# Limits for apps
calico_rr_memory_limit: 1000M
calico_rr_cpu_limit: 300m
calico_rr_memory_requests: 128M
calico_rr_cpu_requests: 150m
kube_etcd_cacert_file: ca.pem
kube_etcd_cert_file: node-{{ inventory_hostname }}.pem
kube_etcd_key_file: node-{{ inventory_hostname }}-key.pem

View file

@ -1,15 +0,0 @@
---
- name: restart calico-rr
command: /bin/true
notify:
- Calico-rr | reload systemd
- Calico-rr | reload calico-rr
- name: Calico-rr | reload systemd
systemd:
daemon_reload: true
- name: Calico-rr | reload calico-rr
service:
name: calico-rr
state: restarted

View file

@ -1,82 +1,29 @@
--- ---
# Required from inventory: - name: Calico-rr | Pre-upgrade tasks
# calico_rr_ip - which specific IP to use for RR, defaults to include_tasks: pre.yml
# "ip" from inventory or "ansible_default_ipv4.address"
- name: Calico-rr | Set IP fact - name: Calico-rr | Fetch current node object
set_fact: command: "{{ bin_dir }}/calicoctl.sh get node {{ inventory_hostname }} -oyaml"
rr_ip: "{{ calico_rr_ip | default(ip) | default(fallback_ips[inventory_hostname]) }}" register: calico_rr_node
- name: Calico-rr | Create calico certs directory # FIXME(mattymo): Use jsonpatch when ansible/ansible#52931 is merged
file: - name: Calico-rr | Set route reflector cluster ID
dest: "{{ calico_cert_dir }}" shell: >-
state: directory echo -e '{{ calico_rr_node.stdout }}' |
mode: 0750 sed '/bgp:/a \ \ \ \ routeReflectorClusterID: {{ cluster_id }}'
owner: root register: calico_rr_node
group: root when: '("routeReflectorClusterID: " + cluster_id|string) not in calico_rr_node.stdout_lines'
- name: Calico-rr | Link etcd certificates for calico-node
file:
src: "{{ etcd_cert_dir }}/{{ item.s }}"
dest: "{{ calico_cert_dir }}/{{ item.d }}"
state: hard
force: yes
with_items:
- {s: "{{ kube_etcd_cacert_file }}", d: "ca_cert.crt"}
- {s: "{{ kube_etcd_cert_file }}", d: "cert.crt"}
- {s: "{{ kube_etcd_key_file }}", d: "key.pem"}
- name: Calico-rr | Create dir for logs
file:
path: /var/log/calico-rr
state: directory
mode: 0755
owner: root
group: root
- name: Calico-rr | Write calico-rr.env for systemd init file
template:
src: calico-rr.env.j2
dest: /etc/calico/calico-rr.env
notify: restart calico-rr
- name: Calico-rr | Write calico-rr systemd init file
template:
src: calico-rr-docker.service.j2
dest: /etc/systemd/system/calico-rr.service
notify: restart calico-rr
when:
- container_manager in ['crio', 'docker', 'rkt']
- name: Calico-rr | Write calico-rr systemd init file
template:
src: calico-rr-containerd.service.j2
dest: /etc/systemd/system/calico-rr.service
notify: restart calico-rr
when:
- container_manager == 'containerd'
- name: Calico-rr | Configure route reflector - name: Calico-rr | Configure route reflector
command: |- shell: |-
{{ bin_dir }}/etcdctl \ echo -e '{{ calico_rr_node.stdout }}' |
--endpoints={{ etcd_access_addresses }} \ {{ bin_dir }}/calicoctl.sh replace -f-
put /calico/bgp/v1/rr_v4/{{ rr_ip }} \
'{
"ip": "{{ rr_ip }}",
"cluster_id": "{{ cluster_id }}"
}'
environment:
ETCDCTL_API: 3
ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ groups['etcd'][0] }}.pem"
ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ groups['etcd'][0] }}-key.pem"
retries: 4 retries: 4
delay: "{{ retry_stagger | random + 3 }}" delay: "{{ retry_stagger | random + 3 }}"
delegate_to: "{{ groups['etcd'][0] }}"
- meta: flush_handlers - name: Calico-rr | Set label for route reflector
command: >-
- name: Calico-rr | Enable calico-rr {{ bin_dir }}/calicoctl.sh label node {{ inventory_hostname }}
service: 'i-am-a-route-reflector=true' --overwrite
name: calico-rr retries: 4
state: started delay: "{{ retry_stagger | random + 3 }}"
enabled: yes

View file

@ -0,0 +1,15 @@
---
- name: Calico-rr | Disable calico-rr service if it exists
service:
name: calico-rr
state: stopped
enabled: no
failed_when: false
- name: Calico-rr | Delete obsolete files
file:
path: "{{ item }}"
state: absent
with_items:
- /etc/calico/calico-rr.env
- /etc/systemd/system/calico-rr.service

View file

@ -1,27 +0,0 @@
[Unit]
Description=calico-rr
After=containerd.service
Requires=containerd.service
[Service]
EnvironmentFile=/etc/calico/calico-rr.env
ExecStartPre=-{{ containerd_bin_dir }}/ctr t delete -f calico-rr
ExecStart={{ containerd_bin_dir }}/ctr run --net-host --privileged \
--env IP=${IP} \
--env IP6=${IP6} \
--env ETCD_ENDPOINTS=${ETCD_ENDPOINTS} \
--env ETCD_CA_CERT_FILE=${ETCD_CA_CERT_FILE} \
--env ETCD_CERT_FILE=${ETCD_CERT_FILE} \
--env ETCD_KEY_FILE=${ETCD_KEY_FILE} \
--mount type=bind,src=/var/log/calico-rr,dst=/var/log/calico,options=rbind:rw \
--mount type=bind,src={{ calico_cert_dir }},dst={{ calico_cert_dir }},options=rbind:ro \
{{ calico_rr_image_repo }}:{{ calico_rr_image_tag }} \
calico-rr
Restart=always
RestartSec=10s
ExecStop=-{{ containerd_bin_dir }}/ctr c rm calico-rr
[Install]
WantedBy=multi-user.target

View file

@ -1,28 +0,0 @@
[Unit]
Description=calico-rr
After=docker.service
Requires=docker.service
[Service]
EnvironmentFile=/etc/calico/calico-rr.env
ExecStartPre=-{{ docker_bin_dir }}/docker rm -f calico-rr
ExecStart={{ docker_bin_dir }}/docker run --net=host --privileged \
--name=calico-rr \
-e IP=${IP} \
-e IP6=${IP6} \
-e ETCD_ENDPOINTS=${ETCD_ENDPOINTS} \
-e ETCD_CA_CERT_FILE=${ETCD_CA_CERT_FILE} \
-e ETCD_CERT_FILE=${ETCD_CERT_FILE} \
-e ETCD_KEY_FILE=${ETCD_KEY_FILE} \
-v /var/log/calico-rr:/var/log/calico \
-v {{ calico_cert_dir }}:{{ calico_cert_dir }}:ro \
--memory={{ calico_rr_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ calico_rr_cpu_limit|regex_replace('m', '') }} \
{{ calico_rr_image_repo }}:{{ calico_rr_image_tag }}
Restart=always
RestartSec=10s
ExecStop=-{{ docker_bin_dir }}/docker stop calico-rr
[Install]
WantedBy=multi-user.target

View file

@ -1,6 +0,0 @@
ETCD_ENDPOINTS="{{ etcd_access_addresses }}"
ETCD_CA_CERT_FILE="{{ calico_cert_dir }}/ca_cert.crt"
ETCD_CERT_FILE="{{ calico_cert_dir }}/cert.crt"
ETCD_KEY_FILE="{{ calico_cert_dir }}/key.pem"
IP="{{ rr_ip }}"
IP6=""

View file

@ -181,6 +181,46 @@
- inventory_hostname == groups['kube-master'][0] - inventory_hostname == groups['kube-master'][0]
- peer_with_router|default(false) - peer_with_router|default(false)
- name: Calico | Configure peering with route reflectors at global scope
shell: |
echo '{
"apiVersion": "projectcalico.org/v3",
"kind": "BGPPeer",
"metadata": {
"name": "peer-to-rrs"
},
"spec": {
"nodeSelector": "!has(i-am-a-route-reflector)",
"peerSelector": "has(i-am-a-route-reflector)"
}}' | {{ bin_dir }}/calicoctl.sh create --skip-exists -f -
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
with_items:
- "{{ groups['calico-rr'] | default([]) }}"
when:
- inventory_hostname == groups['kube-master'][0]
- peer_with_calico_rr|default(false)
- name: Calico | Configure route reflectors to peer with each other
shell: >
echo '{
"apiVersion": "projectcalico.org/v3",
"kind": "BGPPeer",
"metadata": {
"name": "rr-mesh"
},
"spec": {
"nodeSelector": "has(i-am-a-route-reflector)",
"peerSelector": "has(i-am-a-route-reflector)"
}}' | {{ bin_dir }}/calicoctl.sh create --skip-exists -f -
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
with_items:
- "{{ groups['calico-rr'] | default([]) }}"
when:
- inventory_hostname == groups['kube-master'][0]
- peer_with_calico_rr|default(false)
- name: Calico | Create calico manifests - name: Calico | Create calico manifests
template: template:
src: "{{ item.file }}.j2" src: "{{ item.file }}.j2"
@ -274,25 +314,3 @@
when: when:
- peer_with_router|default(false) - peer_with_router|default(false)
- inventory_hostname in groups['k8s-cluster'] - inventory_hostname in groups['k8s-cluster']
- name: Calico | Configure peering with route reflectors
shell: >
echo '{
"apiVersion": "projectcalico.org/v3",
"kind": "BGPPeer",
"metadata": {
"name": "{{ inventory_hostname }}-{{ hostvars[item]["calico_rr_ip"]|default(hostvars[item]["ip"])|default(fallback_ips[item]) }}"
},
"spec": {
"asNumber": "{{ local_as | default(global_as_num) }}",
"node": "{{ inventory_hostname }}",
"peerIP": "{{ hostvars[item]["calico_rr_ip"]|default(hostvars[item]["ip"])|default(fallback_ips[item]) }}"
}}' | {{ bin_dir }}/calicoctl.sh create --skip-exists -f -
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
with_items:
- "{{ groups['calico-rr'] | default([]) }}"
when:
- peer_with_calico_rr|default(false)
- inventory_hostname in groups['k8s-cluster']
- hostvars[item]['cluster_id'] == cluster_id

View file

@ -50,5 +50,8 @@ instance-1
[k8s-cluster:children] [k8s-cluster:children]
kube-node kube-node
kube-master kube-master
calico-rr
[calico-rr]
[fake_hosts] [fake_hosts]

View file

@ -22,3 +22,6 @@ node2
[k8s-cluster:children] [k8s-cluster:children]
kube-node kube-node
kube-master kube-master
calico-rr
[calico-rr]

View file

@ -43,6 +43,9 @@
{{droplets.results[0].droplet.name}} {{droplets.results[0].droplet.name}}
{% endif %} {% endif %}
[calico-rr]
[k8s-cluster:children] [k8s-cluster:children]
kube-node kube-node
kube-master kube-master
calico-rr

View file

@ -66,6 +66,9 @@
[k8s-cluster:children] [k8s-cluster:children]
kube-node kube-node
kube-master kube-master
calico-rr
[calico-rr]
{% if mode is defined and mode in ["scale", "separate-scale", "ha-scale"] %} {% if mode is defined and mode in ["scale", "separate-scale", "ha-scale"] %}
[fake_hosts] [fake_hosts]