diff --git a/cluster.yml b/cluster.yml index 6f8e63505..cf7efb4bb 100644 --- a/cluster.yml +++ b/cluster.yml @@ -41,6 +41,11 @@ - { role: kubernetes-apps/lib, tags: apps } - { role: kubernetes-apps/network_plugin, tags: network } +- hosts: calico-rr + any_errors_fatal: true + roles: + - { role: network_plugin/calico/rr, tags: network } + - hosts: k8s-cluster any_errors_fatal: true roles: diff --git a/docs/calico.md b/docs/calico.md index 81bf4c5b4..954cce0c6 100644 --- a/docs/calico.md +++ b/docs/calico.md @@ -50,7 +50,7 @@ or for versions prior *v1.0.0*: calicoctl endpoint show --detail ``` -##### Optionnal : Define network backend +##### Optional : Define network backend In some cases you may want to define Calico network backend. Allowed values are 'bird', 'gobgp' or 'none'. Bird is a default value. @@ -60,7 +60,7 @@ To re-define you need to edit the inventory and add a group variable `calico_net calico_network_backend: none ``` -##### Optionnal : BGP Peering with border routers +##### Optional : BGP Peering with border routers In some cases you may want to route the pods subnet and so NAT is not needed on the nodes. For instance if you have a cluster spread on different locations and you want your pods to talk each other no matter where they are located. @@ -72,6 +72,81 @@ you'll need to edit the inventory and add a and a hostvar `local_as` by node. node1 ansible_ssh_host=95.54.0.12 local_as=xxxxxx ``` +##### Optional : Define global AS number + +Optional parameter `global_as_num` defines Calico global AS number (`/calico/bgp/v1/global/as_num` etcd key). +It defaults to "64512". + +##### Optional : BGP Peering with route reflectors + +At large scale you may want to disable full node-to-node mesh in order to +optimize your BGP topology and improve `calico-node` containers' start times. + +To do so you can deploy BGP route reflectors and peer `calico-node` with them as +recommended here: + +* https://hub.docker.com/r/calico/routereflector/ +* http://docs.projectcalico.org/v2.0/reference/private-cloud/l3-interconnect-fabric + +You need to edit your inventory and add: + +* `calico-rr` group with nodes in it. At the moment it's incompatible with + `kube-node` due to BGP port conflict with `calico-node` container. So you + should not have nodes in both `calico-rr` and `kube-node` groups. +* `cluster_id` by route reflector node/group (see details +[here](https://hub.docker.com/r/calico/routereflector/)) + +Here's an example of Kargo inventory with route reflectors: + +``` +[all] +rr0 ansible_ssh_host=10.210.1.10 ip=10.210.1.10 +rr1 ansible_ssh_host=10.210.1.11 ip=10.210.1.11 +node2 ansible_ssh_host=10.210.1.12 ip=10.210.1.12 +node3 ansible_ssh_host=10.210.1.13 ip=10.210.1.13 +node4 ansible_ssh_host=10.210.1.14 ip=10.210.1.14 +node5 ansible_ssh_host=10.210.1.15 ip=10.210.1.15 + +[kube-master] +node2 +node3 + +[etcd] +node2 +node3 +node4 + +[kube-node] +node2 +node3 +node4 +node5 + +[k8s-cluster:children] +kube-node +kube-master + +[calico-rr] +rr0 +rr1 + +[rack0] +rr0 +rr1 +node2 +node3 +node4 +node5 + +[rack0:vars] +cluster_id="1.0.0.1" +``` + +The inventory above will deploy the following topology assuming that calico's +`global_as_num` is set to `65400`: + +![Image](figures/kargo-calico-rr.png?raw=true) + Cloud providers configuration ============================= diff --git a/docs/figures/kargo-calico-rr.png b/docs/figures/kargo-calico-rr.png new file mode 100644 index 000000000..2dacdb515 Binary files /dev/null and b/docs/figures/kargo-calico-rr.png differ diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index 0fdb97897..3e3d42d08 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -42,13 +42,17 @@ flannel_image_tag: "{{ flannel_version }}" calicoctl_image_repo: "calico/ctl" # TODO(apanchenko): v1.0.0-beta can't execute `node run` from Docker container # for details see https://github.com/projectcalico/calico-containers/issues/1291 -calicoctl_image_tag: "v1.0.0-beta-18-gf72bc1d" +calicoctl_image_tag: "v1.0.0-rc3" calico_node_image_repo: "calico/node" calico_node_image_tag: "{{ calico_version }}" calico_cni_image_repo: "calico/cni" calico_cni_image_tag: "{{ calico_cni_version }}" calico_policy_image_repo: "calico/kube-policy-controller" calico_policy_image_tag: latest +# TODO(adidenko): switch to "calico/routereflector" when +# https://github.com/projectcalico/calico-bird/pull/27 is merged +calico_rr_image_repo: "quay.io/l23network/routereflector" +calico_rr_image_tag: "v0.1" exechealthz_version: 1.1 exechealthz_image_repo: "gcr.io/google_containers/exechealthz-amd64" exechealthz_image_tag: "{{ exechealthz_version }}" @@ -142,6 +146,11 @@ downloads: repo: "{{ calico_policy_image_repo }}" tag: "{{ calico_policy_image_tag }}" enabled: "{{ kube_network_plugin == 'canal' }}" + calico_rr: + container: true + repo: "{{ calico_rr_image_repo }}" + tag: "{{ calico_rr_image_tag }}" + enabled: "{{ peer_with_calico_rr }} and kube_network_plugin == 'calico'" pod_infra: container: true repo: "{{ pod_infra_image_repo }}" diff --git a/roles/kubernetes/preinstall/tasks/set_facts.yml b/roles/kubernetes/preinstall/tasks/set_facts.yml index cbe4d9203..f57cd702e 100644 --- a/roles/kubernetes/preinstall/tasks/set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/set_facts.yml @@ -49,6 +49,8 @@ etcd_after_v3: etcd_version | version_compare("v3.0.0", ">=") - set_fact: etcd_container_bin_dir: "{% if etcd_after_v3 %}/usr/local/bin/{% else %}/{% endif %}" +- set_fact: + peer_with_calico_rr: "{{ 'calico-rr' in groups and groups['calico-rr']|length > 0 }}" - include: set_resolv_facts.yml tags: [bootstrap-os, resolvconf, facts] diff --git a/roles/kubernetes/secrets/tasks/gen_certs.yml b/roles/kubernetes/secrets/tasks/gen_certs.yml index ace2a3ba4..f951bb368 100644 --- a/roles/kubernetes/secrets/tasks/gen_certs.yml +++ b/roles/kubernetes/secrets/tasks/gen_certs.yml @@ -51,8 +51,10 @@ - name: Gen_certs | Copy certs on nodes shell: "echo '{{node_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ kube_cert_dir }}" changed_when: false - when: inventory_hostname in groups['kube-node'] and sync_certs|default(false) and - inventory_hostname != groups['kube-master'][0] + when: (inventory_hostname in groups['kube-node'] or + (peer_with_calico_rr and inventory_hostname in groups['calico-rr'])) and + sync_certs|default(false) and + inventory_hostname != groups['kube-master'][0] - name: Gen_certs | check certificate permissions file: diff --git a/roles/network_plugin/calico/defaults/main.yml b/roles/network_plugin/calico/defaults/main.yml index f0f91d39e..391e7c53e 100644 --- a/roles/network_plugin/calico/defaults/main.yml +++ b/roles/network_plugin/calico/defaults/main.yml @@ -12,6 +12,9 @@ overwrite_hyperkube_cni: true calico_cert_dir: /etc/calico/certs etcd_cert_dir: /etc/ssl/etcd/ssl +# Global as_num (/calico/bgp/v1/global/as_num) +global_as_num: "64512" + # You can set MTU value here. If left undefined or empty, it will # not be specified in calico CNI config, so Calico will use built-in # defaults. The value should be a number, not a string. diff --git a/roles/network_plugin/calico/rr/defaults/main.yml b/roles/network_plugin/calico/rr/defaults/main.yml new file mode 100644 index 000000000..116b9cc8c --- /dev/null +++ b/roles/network_plugin/calico/rr/defaults/main.yml @@ -0,0 +1,7 @@ +--- +# Global as_num (/calico/bgp/v1/global/as_num) +# should be the same as in calico role +global_as_num: "64512" + +calico_cert_dir: /etc/calico/certs +etcd_cert_dir: /etc/ssl/etcd/ssl diff --git a/roles/network_plugin/calico/rr/handlers/main.yml b/roles/network_plugin/calico/rr/handlers/main.yml new file mode 100644 index 000000000..edfb1ad71 --- /dev/null +++ b/roles/network_plugin/calico/rr/handlers/main.yml @@ -0,0 +1,15 @@ +--- +- name: restart calico-rr + command: /bin/true + notify: + - Calico-rr | reload systemd + - Calico-rr | reload calico-rr + +- name : Calico-rr | reload systemd + shell: systemctl daemon-reload + when: ansible_service_mgr == "systemd" + +- name: Calico-rr | reload calico-rr + service: + name: calico-rr + state: restarted diff --git a/roles/network_plugin/calico/rr/meta/main.yml b/roles/network_plugin/calico/rr/meta/main.yml new file mode 100644 index 000000000..38fc506cc --- /dev/null +++ b/roles/network_plugin/calico/rr/meta/main.yml @@ -0,0 +1,6 @@ +dependencies: + - role: kubernetes/secrets + - role: docker + when: ansible_os_family != "CoreOS" + - role: download + file: "{{ downloads.calico_rr }}" diff --git a/roles/network_plugin/calico/rr/tasks/main.yml b/roles/network_plugin/calico/rr/tasks/main.yml new file mode 100644 index 000000000..c43851f84 --- /dev/null +++ b/roles/network_plugin/calico/rr/tasks/main.yml @@ -0,0 +1,63 @@ +--- +# Required from inventory: +# calico_rr_ip - which specific IP to use for RR, defaults to +# "ip" from inventory or "ansible_default_ipv4.address" + +- name: Calico-rr | Set IP fact + set_fact: + rr_ip: "{{ calico_rr_ip | default(ip) | default(ansible_default_ipv4.address) }}" + +- name: Calico | Create calico certs directory + file: + dest: "{{ calico_cert_dir }}" + state: directory + mode: 0750 + owner: root + group: root + +- name: Calico | Link etcd certificates for calico-node + file: + src: "{{ kube_cert_dir }}/{{ item.s }}" + dest: "{{ calico_cert_dir }}/{{ item.d }}" + state: hard + force: yes + with_items: + - {s: "ca.pem", d: "ca_cert.crt"} + - {s: "node.pem", d: "cert.crt"} + - {s: "node-key.pem", d: "key.pem"} + +- name: Calico-rr | Create dir for logs + file: + path: /var/log/calico-rr + state: directory + mode: 0755 + owner: root + group: root + +- name: Calico-rr | Write calico-rr.env for systemd init file + template: src=calico-rr.env.j2 dest=/etc/calico/calico-rr.env + when: ansible_service_mgr == "systemd" + notify: restart calico-rr + +- name: Calico-rr | Write calico-rr systemd init file + template: src=calico-rr.service.j2 dest=/etc/systemd/system/calico-rr.service + when: ansible_service_mgr == "systemd" + notify: restart calico-rr + +- name: Calico-rr | Configure route reflector + command: |- + {{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} \ + set /calico/bgp/v1/rr_v4/{{ rr_ip }} \ + '{ + "ip": "{{ rr_ip }}", + "cluster_id": "{{ cluster_id }}" + }' + delegate_to: "{{groups['etcd'][0]}}" + +- meta: flush_handlers + +- name: Calico-rr | Enable calico-rr + service: + name: calico-rr + state: started + enabled: yes diff --git a/roles/network_plugin/calico/rr/templates/calico-rr.env.j2 b/roles/network_plugin/calico/rr/templates/calico-rr.env.j2 new file mode 100644 index 000000000..201caecfe --- /dev/null +++ b/roles/network_plugin/calico/rr/templates/calico-rr.env.j2 @@ -0,0 +1,6 @@ +ETCD_ENDPOINTS="{{ etcd_access_endpoint }}" +ETCD_CA_CERT_FILE="{{ calico_cert_dir }}/ca_cert.crt" +ETCD_CERT_FILE="{{ calico_cert_dir }}/cert.crt" +ETCD_KEY_FILE="{{ calico_cert_dir }}/key.pem" +IP="{{ rr_ip }}" +IP6="" diff --git a/roles/network_plugin/calico/rr/templates/calico-rr.service.j2 b/roles/network_plugin/calico/rr/templates/calico-rr.service.j2 new file mode 100644 index 000000000..1a4b3e977 --- /dev/null +++ b/roles/network_plugin/calico/rr/templates/calico-rr.service.j2 @@ -0,0 +1,27 @@ +[Unit] +Description=calico-rr +After=docker.service +Requires=docker.service + +[Service] +EnvironmentFile=/etc/calico/calico-rr.env +ExecStartPre=-/usr/bin/docker rm -f calico-rr +ExecStart=/usr/bin/docker run --net=host --privileged \ + --name=calico-rr \ + -e IP=${IP} \ + -e IP6=${IP6} \ + -e ETCD_ENDPOINTS=${ETCD_ENDPOINTS} \ + -e ETCD_CA_CERT_FILE=${ETCD_CA_CERT_FILE} \ + -e ETCD_CERT_FILE=${ETCD_CERT_FILE} \ + -e ETCD_KEY_FILE=${ETCD_KEY_FILE} \ + -v /var/log/calico-rr:/var/log/calico \ + -v {{ calico_cert_dir }}:{{ calico_cert_dir }}:ro \ + {{ calico_rr_image_repo }}:{{ calico_rr_image_tag }} + +Restart=always +RestartSec=10s + +ExecStop=-/usr/bin/docker stop calico-rr + +[Install] +WantedBy=multi-user.target diff --git a/roles/network_plugin/calico/tasks/main.yml b/roles/network_plugin/calico/tasks/main.yml index 0480354e8..19d74759c 100644 --- a/roles/network_plugin/calico/tasks/main.yml +++ b/roles/network_plugin/calico/tasks/main.yml @@ -152,6 +152,16 @@ run_once: true tags: facts +- name: Calico | Set global as_num + command: "{{ bin_dir}}/calicoctl config set asNumber {{ global_as_num }}" + run_once: true + when: not legacy_calicoctl + +- name: Calico (old) | Set global as_num + command: "{{ bin_dir}}/calicoctl bgp default-node-as {{ global_as_num }}" + run_once: true + when: legacy_calicoctl + - name: Calico | Write /etc/network-environment template: src=network-environment.j2 dest=/etc/network-environment when: ansible_service_mgr in ["sysvinit","upstart"] @@ -191,8 +201,9 @@ - name: Calico | Disable node mesh shell: "{{ bin_dir }}/calicoctl config set nodeToNodeMesh off" - when: (not legacy_calicoctl and - peer_with_router|default(false) and inventory_hostname in groups['kube-node']) + when: ((peer_with_router|default(false) or peer_with_calico_rr|default(false)) + and inventory_hostname in groups['kube-node'] + and not legacy_calicoctl) run_once: true - name: Calico | Configure peering with router(s) @@ -208,10 +219,27 @@ when: (not legacy_calicoctl and peer_with_router|default(false) and inventory_hostname in groups['kube-node']) +- name: Calico | Configure peering with route reflectors + shell: > + echo '{ + "kind": "bgpPeer", + "spec": {"asNumber": "{{ local_as | default(global_as_num)}}"}, + "apiVersion": "v1", + "metadata": {"node": "{{ inventory_hostname }}", + "scope": "node", + "peerIP": "{{ hostvars[item]["calico_rr_ip"]|default(hostvars[item]["ip"]) }}"} + }' + | {{ bin_dir }}/calicoctl create --skip-exists -f - + with_items: "{{ groups['calico-rr'] | default([]) }}" + when: (not legacy_calicoctl and + peer_with_calico_rr|default(false) and inventory_hostname in groups['kube-node'] + and hostvars[item]['cluster_id'] == cluster_id) + - name: Calico (old) | Disable node mesh shell: "{{ bin_dir }}/calicoctl bgp node-mesh off" - when: (legacy_calicoctl and - peer_with_router|default(false) and inventory_hostname in groups['kube-node']) + when: ((peer_with_router|default(false) or peer_with_calico_rr|default(false)) + and inventory_hostname in groups['kube-node'] + and legacy_calicoctl) run_once: true - name: Calico (old) | Configure peering with router(s) @@ -219,3 +247,10 @@ with_items: "{{ peers|default([]) }}" when: (legacy_calicoctl and peer_with_router|default(false) and inventory_hostname in groups['kube-node']) + +- name: Calico (old) | Configure peering with route reflectors + shell: "{{ bin_dir }}/calicoctl node bgp peer add {{ hostvars[item]['calico_rr_ip']|default(hostvars[item]['ip']) }} as {{ local_as | default(global_as_num) }}" + with_items: "{{ groups['calico-rr'] | default([]) }}" + when: (legacy_calicoctl and + peer_with_calico_rr|default(false) and inventory_hostname in groups['kube-node'] + and hostvars[item]['cluster_id'] == cluster_id)