From a5edd0d709f28507635771cefae43790f4265f20 Mon Sep 17 00:00:00 2001 From: JuanJo Ciarlante Date: Tue, 16 Oct 2018 11:15:05 -0300 Subject: [PATCH] [jjo] add kube-router support (#3339) * [jjo] add kube-router support Fixes cloudnativelabs/kube-router#147. * add kube-router as another network_plugin choice * support most used kube-router flags via `kube_router_foo` vars as other plugins * implement replacing kube-proxy (--run-service-proxy=true) via `kube_proxy_mode: none`, verified in a _non kubeadm_enabled_ install, should also work for recent kubeadm releases via `skipKubeProxyInstall: true` config * [jjo] address PR#3339 review from @woopstar * add busybox image used by kube-router to downloads * fix busybox download groups key * rework kubeadm_enabled + kube_router_run_service_proxy - verify it working ok w/the kubeadm_enabled and kube_router_run_service_proxy true or false - introduce `kube_proxy_remove` fact, to decouple logic from kube_proxy_mode (which affects kubeadm configmap settings, thus no-good to ab-use it to 'none') * improve kube-router.md re: kubeadm_enabled and kube_router_run_service_proxy * address @woopstar latest review * add inventory/sample/group_vars/k8s-cluster/k8s-net-kube-router.yml * fix kube_router_run_service_proxy conditional for kube-proxy removal * fix kube_proxy_remove fact (w/ |bool), add some needed kube-proxy tags on my and existing changes * update kube-router tolerations for 1.12 compatibility * add PriorityClass to kube-router DaemonSet --- .gitlab-ci.yml | 45 ++++ README.md | 6 + docs/kube-router.md | 91 +++++++ docs/openstack.md | 16 +- .../k8s-cluster/k8s-net-kube-router.yml | 37 +++ roles/download/defaults/main.yml | 23 ++ .../network_plugin/kube-router/tasks/main.yml | 22 ++ .../network_plugin/meta/main.yml | 5 + roles/kubernetes/kubeadm/tasks/main.yml | 19 ++ .../kube-controller-manager.manifest.j2 | 2 +- roles/kubernetes/node/tasks/main.yml | 17 +- .../node/templates/kubelet.kubeadm.env.j2 | 2 +- .../node/templates/kubelet.rkt.service.j2 | 2 +- .../node/templates/kubelet.standard.env.j2 | 2 +- .../preinstall/tasks/0020-verify-settings.yml | 2 +- .../preinstall/tasks/0040-set_facts.yml | 17 ++ .../tasks/0050-create_directories.yml | 3 +- roles/kubernetes/secrets/tasks/main.yml | 7 +- .../kube-router/defaults/main.yml | 36 +++ .../network_plugin/kube-router/tasks/main.yml | 17 ++ .../kube-router/tasks/reset.yml | 9 + .../kube-router/templates/kube-router.yml.j2 | 225 ++++++++++++++++++ roles/network_plugin/meta/main.yml | 5 + roles/reset/tasks/main.yml | 3 +- .../win_nodes/kubernetes_patch/tasks/main.yml | 2 + tests/files/gce_centos7-kube-router.yml | 12 + tests/files/gce_coreos-kube-router.yml | 13 + tests/files/gce_ubuntu-kube-router-sep.yml | 11 + 28 files changed, 634 insertions(+), 17 deletions(-) create mode 100644 docs/kube-router.md create mode 100644 inventory/sample/group_vars/k8s-cluster/k8s-net-kube-router.yml create mode 100644 roles/kubernetes-apps/network_plugin/kube-router/tasks/main.yml create mode 100644 roles/network_plugin/kube-router/defaults/main.yml create mode 100644 roles/network_plugin/kube-router/tasks/main.yml create mode 100644 roles/network_plugin/kube-router/tasks/reset.yml create mode 100644 roles/network_plugin/kube-router/templates/kube-router.yml.j2 create mode 100644 tests/files/gce_centos7-kube-router.yml create mode 100644 tests/files/gce_coreos-kube-router.yml create mode 100644 tests/files/gce_ubuntu-kube-router-sep.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 77a9fd41a..4fc458239 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -296,10 +296,18 @@ before_script: # stage: deploy-special MOVED_TO_GROUP_VARS: "true" +.centos7_kube_router_variables: ¢os7_kube_router_variables +# stage: deploy-special + MOVED_TO_GROUP_VARS: "true" + .coreos_alpha_weave_ha_variables: &coreos_alpha_weave_ha_variables # stage: deploy-special MOVED_TO_GROUP_VARS: "true" +.coreos_kube_router_variables: &coreos_kube_router_variables +# stage: deploy-special + MOVED_TO_GROUP_VARS: "true" + .ubuntu_rkt_sep_variables: &ubuntu_rkt_sep_variables # stage: deploy-part1 MOVED_TO_GROUP_VARS: "true" @@ -316,6 +324,10 @@ before_script: # stage: deploy-special MOVED_TO_GROUP_VARS: "true" +.ubuntu_kube_router_variables: &ubuntu_kube_router_variables +# stage: deploy-special + MOVED_TO_GROUP_VARS: "true" + .opensuse_canal_variables: &opensuse_canal_variables # stage: deploy-part2 MOVED_TO_GROUP_VARS: "true" @@ -615,6 +627,17 @@ gce_centos7-calico-ha-triggers: when: on_success only: ['triggers'] +gce_centos7-kube-router: + stage: deploy-special + <<: *job + <<: *gce + variables: + <<: *gce_variables + <<: *centos7_kube_router_variables + when: manual + except: ['triggers'] + only: ['master', /^pr-.*$/] + gce_opensuse-canal: stage: deploy-part2 <<: *job @@ -638,6 +661,17 @@ gce_coreos-alpha-weave-ha: except: ['triggers'] only: ['master', /^pr-.*$/] +gce_coreos-kube-router: + stage: deploy-special + <<: *job + <<: *gce + variables: + <<: *gce_variables + <<: *coreos_kube_router_variables + when: manual + except: ['triggers'] + only: ['master', /^pr-.*$/] + gce_ubuntu-rkt-sep: stage: deploy-part2 <<: *job @@ -682,6 +716,17 @@ gce_ubuntu-flannel-sep: except: ['triggers'] only: ['master', /^pr-.*$/] +gce_ubuntu-kube-router-sep: + stage: deploy-special + <<: *job + <<: *gce + variables: + <<: *gce_variables + <<: *ubuntu_kube_router_variables + when: manual + except: ['triggers'] + only: ['master', /^pr-.*$/] + # Premoderated with manual actions ci-authorized: <<: *job diff --git a/README.md b/README.md index 99a24c1ad..0d2cd5913 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,7 @@ Supported Components - [contiv](https://github.com/contiv/install) v1.1.7 - [flanneld](https://github.com/coreos/flannel) v0.10.0 - [weave](https://github.com/weaveworks/weave) v2.4.1 + - [kube-router](https://github.com/cloudnativelabs/kube-router) v0.2.0 - Application - [cephfs-provisioner](https://github.com/kubernetes-incubator/external-storage) v2.1.0-k8s1.11 - [cert-manager](https://github.com/jetstack/cert-manager) v0.5.0 @@ -164,6 +165,11 @@ You can choose between 6 network plugins. (default: `calico`, except Vagrant use - [weave](docs/weave.md): Weave is a lightweight container overlay network that doesn't require an external K/V database cluster. (Please refer to `weave` [troubleshooting documentation](http://docs.weave.works/weave/latest_release/troubleshooting.html)). +- [kube-router](doc/kube-router.md): Kube-router is a L3 CNI for Kubernetes networking aiming to provide operational + simplicity and high performance: it uses IPVS to provide Kube Services Proxy (if setup to replace kube-proxy), + iptables for network policies, and BGP for ods L3 networking (with optionally BGP peering with out-of-cluster BGP peers). + It can also optionally advertise routes to Kubernetes cluster Pods CIDRs, ClusterIPs, ExternalIPs and LoadBalancerIPs. + The choice is defined with the variable `kube_network_plugin`. There is also an option to leverage built-in cloud provider networking instead. See also [Network checker](docs/netcheck.md). diff --git a/docs/kube-router.md b/docs/kube-router.md new file mode 100644 index 000000000..dca749023 --- /dev/null +++ b/docs/kube-router.md @@ -0,0 +1,91 @@ +Kube-router +=========== + +Kube-router is a L3 CNI provider, as such it will setup IPv4 routing between +nodes to provide Pods' networks reachability. + +See [kube-router documentation](https://www.kube-router.io/). + +## Verifying kube-router install + +Kube-router runs its pods as a `DaemonSet` in the `kube-system` namespace: + +* Check the status of kube-router pods + +``` +# From the CLI +kubectl get pod --namespace=kube-system -l k8s-app=kube-router -owide + +# output +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE +kube-router-4f679 1/1 Running 0 2d 192.168.186.4 mykube-k8s-node-nf-2 +kube-router-5slf8 1/1 Running 0 2d 192.168.186.11 mykube-k8s-node-nf-3 +kube-router-lb6k2 1/1 Running 0 20h 192.168.186.14 mykube-k8s-node-nf-6 +kube-router-rzvrb 1/1 Running 0 20h 192.168.186.17 mykube-k8s-node-nf-4 +kube-router-v6n56 1/1 Running 0 2d 192.168.186.6 mykube-k8s-node-nf-1 +kube-router-wwhg8 1/1 Running 0 20h 192.168.186.16 mykube-k8s-node-nf-5 +kube-router-x2xs7 1/1 Running 0 2d 192.168.186.10 mykube-k8s-master-1 +``` + +* Peek at kube-router container logs: + +``` +# From the CLI +kubectl logs --namespace=kube-system -l k8s-app=kube-router | grep Peer.Up + +# output +time="2018-09-17T16:47:14Z" level=info msg="Peer Up" Key=192.168.186.6 State=BGP_FSM_OPENCONFIRM Topic=Peer +time="2018-09-17T16:47:16Z" level=info msg="Peer Up" Key=192.168.186.11 State=BGP_FSM_OPENCONFIRM Topic=Peer +time="2018-09-17T16:47:46Z" level=info msg="Peer Up" Key=192.168.186.10 State=BGP_FSM_OPENCONFIRM Topic=Peer +time="2018-09-18T19:12:24Z" level=info msg="Peer Up" Key=192.168.186.14 State=BGP_FSM_OPENCONFIRM Topic=Peer +time="2018-09-18T19:12:28Z" level=info msg="Peer Up" Key=192.168.186.17 State=BGP_FSM_OPENCONFIRM Topic=Peer +time="2018-09-18T19:12:38Z" level=info msg="Peer Up" Key=192.168.186.16 State=BGP_FSM_OPENCONFIRM Topic=Peer +[...] +``` + +## Gathering kube-router state + +Kube-router Pods come bundled with a "Pod Toolbox" which provides very +useful internal state views for: + +* IPVS: via `ipvsadm` +* BGP peering and routing info: via `gobgp` + +You need to `kubectl exec -it ...` into a kube-router container to use these, see + for details. + +## Kube-router configuration + + +You can change the default configuration by overriding `kube_router_...` variables +(as found at `roles/network_plugin/kube-router/defaults/main.yml`), +these are named to follow `kube-router` command-line options as per +. + +## Caveats + +### kubeadm_enabled: true + +If you want to set `kube-router` to replace `kube-proxy` +(`--run-service-proxy=true`) while using `kubeadm_enabled`, +then 'kube-proxy` DaemonSet will be removed *after* kubeadm finishes +running, as it's not possible to skip kube-proxy install in kubeadm flags +and/or config, see https://github.com/kubernetes/kubeadm/issues/776. + +Given above, if `--run-service-proxy=true` is needed it would be +better to void `kubeadm_enabled` i.e. set: + +``` +kubeadm_enabled: false +kube_router_run_service_proxy: true + +``` + +If for some reason you do want/need to set `kubeadm_enabled`, removing +it afterwards behave better if kube-proxy is set to ipvs mode, i.e. set: + +``` +kubeadm_enabled: true +kube_router_run_service_proxy: true +kube_proxy_mode: ipvs +``` diff --git a/docs/openstack.md b/docs/openstack.md index ef2d1dcf8..5d07fb33f 100644 --- a/docs/openstack.md +++ b/docs/openstack.md @@ -8,15 +8,15 @@ After that make sure to source in your OpenStack credentials like you would do w The next step is to make sure the hostnames in your `inventory` file are identical to your instance names in OpenStack. Otherwise [cinder](https://wiki.openstack.org/wiki/Cinder) won't work as expected. -Unless you are using calico you can now run the playbook. +Unless you are using calico or kube-router you can now run the playbook. -**Additional step needed when using calico:** +**Additional step needed when using calico or kube-router:** -Calico does not encapsulate all packages with the hosts' ip addresses. Instead the packages will be routed with the PODs ip addresses directly. +Being L3 CNI, calico and kube-router do not encapsulate all packages with the hosts' ip addresses. Instead the packets will be routed with the PODs ip addresses directly. -OpenStack will filter and drop all packages from ips it does not know to prevent spoofing. +OpenStack will filter and drop all packets from ips it does not know to prevent spoofing. -In order to make calico work on OpenStack you will need to tell OpenStack to allow calico's packages by allowing the network it uses. +In order to make L3 CNIs work on OpenStack you will need to tell OpenStack to allow pods packets by allowing the network they use. First you will need the ids of your OpenStack instances that will run kubernetes: @@ -36,10 +36,14 @@ Then you can use the instance ids to find the connected [neutron](https://wiki.o | 5662a4e0-e646-47f0-bf88-d80fbd2d99ef | e1f48aad-df96-4bce-bf61-62ae12bf3f95 | | e5ae2045-a1e1-4e99-9aac-4353889449a7 | 725cd548-6ea3-426b-baaa-e7306d3c8052 | -Given the port ids on the left, you can set the two `allowed_address`(es) in OpenStack. Note that you have to allow both `kube_service_addresses` (default `10.233.0.0/18`) and `kube_pods_subnet` (default `10.233.64.0/18`.) +Given the port ids on the left, you can set the two `allowed-address`(es) in OpenStack. Note that you have to allow both `kube_service_addresses` (default `10.233.0.0/18`) and `kube_pods_subnet` (default `10.233.64.0/18`.) # allow kube_service_addresses and kube_pods_subnet network openstack port set 5662a4e0-e646-47f0-bf88-d80fbd2d99ef --allowed-address ip-address=10.233.0.0/18 --allowed-address ip-address=10.233.64.0/18 openstack port set e5ae2045-a1e1-4e99-9aac-4353889449a7 --allowed-address ip-address=10.233.0.0/18 --allowed-address ip-address=10.233.64.0/18 +If all the VMs in the tenant correspond to kubespray deployment, you can "sweep run" above with: + + openstack port list --device-owner=compute:nova -c ID -f value | xargs -tI@ openstack port set @ --allowed-address ip-address=10.233.0.0/18 --allowed-address ip-address=10.233.64.0/18 + Now you can finally run the playbook. diff --git a/inventory/sample/group_vars/k8s-cluster/k8s-net-kube-router.yml b/inventory/sample/group_vars/k8s-cluster/k8s-net-kube-router.yml new file mode 100644 index 000000000..3d5cb7256 --- /dev/null +++ b/inventory/sample/group_vars/k8s-cluster/k8s-net-kube-router.yml @@ -0,0 +1,37 @@ +# See roles/network_plugin/kube-router//defaults/main.yml + +# Enables Pod Networking -- Advertises and learns the routes to Pods via iBGP +# kube_router_run_router: true + +# Enables Network Policy -- sets up iptables to provide ingress firewall for pods +# kube_router_run_firewall: true + +# Enables Service Proxy -- sets up IPVS for Kubernetes Services +# see docs/kube-router.md "Caveats" section +# kube_router_run_service_proxy: false + +# Add Cluster IP of the service to the RIB so that it gets advertises to the BGP peers. +# kube_router_advertise_cluster_ip: false + +# Add External IP of service to the RIB so that it gets advertised to the BGP peers. +# kube_router_advertise_external_ip: false + +# Add LoadbBalancer IP of service status as set by the LB provider to the RIB so that it gets advertised to the BGP peers. +# kube_router_advertise_loadbalancer_ip: false + +# Array of arbitrary extra arguments to kube-router, see +# https://github.com/cloudnativelabs/kube-router/blob/master/docs/user-guide.md +# kube_router_extra_args: [] + +# ASN numbers of the BGP peer to which cluster nodes will advertise cluster ip and node's pod cidr. +# kube_router_peer_router_asns: ~ + +# The ip address of the external router to which all nodes will peer and advertise the cluster ip and pod cidr's. +# kube_router_peer_router_ips: ~ + +# The remote port of the external BGP to which all nodes will peer. If not set, default BGP port (179) will be used. +# kube_router_peer_router_ports: ~ + +# Setups node CNI to allow hairpin mode, requires node reboots, see +# https://github.com/cloudnativelabs/kube-router/blob/master/docs/user-guide.md#hairpin-mode +# kube_router_support_hairpin_mode: false diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index 854aa81f6..918f3eab6 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -55,6 +55,7 @@ weave_version: "2.4.1" pod_infra_version: 3.1 contiv_version: 1.2.1 cilium_version: "v1.2.0" +kube_router_version: "v0.2.0" # Download URLs kubeadm_download_url: "https://storage.googleapis.com/kubernetes-release/release/{{ kubeadm_version }}/bin/linux/{{ image_arch }}/kubeadm" @@ -152,6 +153,8 @@ contiv_ovs_image_repo: "contiv/ovs" contiv_ovs_image_tag: "latest" cilium_image_repo: "docker.io/cilium/cilium" cilium_image_tag: "{{ cilium_version }}" +kube_router_image_repo: "cloudnativelabs/kube-router" +kube_router_image_tag: "{{ kube_router_version }}" nginx_image_repo: nginx nginx_image_tag: 1.13 dnsmasq_version: 2.78 @@ -178,6 +181,8 @@ kubednsautoscaler_image_repo: "gcr.io/google_containers/cluster-proportional-aut kubednsautoscaler_image_tag: "{{ kubednsautoscaler_version }}" test_image_repo: busybox test_image_tag: latest +busybox_image_repo: busybox +busybox_image_tag: 1.29.2 helm_version: "v2.9.1" helm_image_repo: "lachlanevenson/k8s-helm" helm_image_tag: "{{ helm_version }}" @@ -391,6 +396,15 @@ downloads: groups: - k8s-cluster + kube_router: + enabled: "{{ kube_network_plugin == 'kube-router' }}" + container: true + repo: "{{ kube_router_image_repo }}" + tag: "{{ kube_router_image_tag }}" + sha256: "{{ kube_router_digest_checksum|default(None) }}" + groups: + - k8s-cluster + pod_infra: enabled: true container: true @@ -472,6 +486,15 @@ downloads: groups: - kube-node + busybox: + enabled: "{{ kube_network_plugin in ['kube-router'] }}" + container: true + repo: "{{ busybox_image_repo }}" + tag: "{{ busybox_image_tag }}" + sha256: "{{ busybox_digest_checksum|default(None) }}" + groups: + - k8s-cluster + testbox: enabled: false container: true diff --git a/roles/kubernetes-apps/network_plugin/kube-router/tasks/main.yml b/roles/kubernetes-apps/network_plugin/kube-router/tasks/main.yml new file mode 100644 index 000000000..77f5b8bae --- /dev/null +++ b/roles/kubernetes-apps/network_plugin/kube-router/tasks/main.yml @@ -0,0 +1,22 @@ +--- + +- name: kube-router | Start Resources + kube: + name: "kube-router" + kubectl: "{{ bin_dir }}/kubectl" + filename: "{{ kube_config_dir }}/kube-router.yml" + resource: "ds" + namespace: "kube-system" + state: "latest" + when: + - inventory_hostname == groups['kube-master'][0] + +- name: kube-router | Wait for kube-router pods to be ready + command: "{{bin_dir}}/kubectl -n kube-system get pods -l k8s-app=kube-router -o jsonpath='{.items[?(@.status.containerStatuses[0].ready==false)].metadata.name}'" + register: pods_not_ready + until: pods_not_ready.stdout.find("kube-router")==-1 + retries: 30 + delay: 10 + ignore_errors: yes + when: + - inventory_hostname == groups['kube-master'][0] diff --git a/roles/kubernetes-apps/network_plugin/meta/main.yml b/roles/kubernetes-apps/network_plugin/meta/main.yml index 7876401b8..c88dbf015 100644 --- a/roles/kubernetes-apps/network_plugin/meta/main.yml +++ b/roles/kubernetes-apps/network_plugin/meta/main.yml @@ -29,3 +29,8 @@ dependencies: when: kube_network_plugin == 'weave' tags: - weave + + - role: kubernetes-apps/network_plugin/kube-router + when: kube_network_plugin == 'kube-router' + tags: + - kube-router diff --git a/roles/kubernetes/kubeadm/tasks/main.yml b/roles/kubernetes/kubeadm/tasks/main.yml index 55dbf29a9..c15b0699b 100644 --- a/roles/kubernetes/kubeadm/tasks/main.yml +++ b/roles/kubernetes/kubeadm/tasks/main.yml @@ -96,6 +96,9 @@ - kubeadm_config_api_fqdn is not defined - is_kube_master - kubeadm_discovery_address != kube_apiserver_endpoint + - not kube_proxy_remove + tags: + - kube-proxy # FIXME(mattymo): Reconcile kubelet kubeconfig filename for both deploy modes - name: Symlink kubelet kubeconfig for calico/canal @@ -114,3 +117,19 @@ - kubeadm_config_api_fqdn is not defined - is_kube_master - kubeadm_discovery_address != kube_apiserver_endpoint + - not kube_proxy_remove + tags: + - kube-proxy + +# FIXME(jjo): need to post-remove kube-proxy until https://github.com/kubernetes/kubeadm/issues/776 +# is fixed +- name: Delete kube-proxy daemonset if kube_proxy_remove set, e.g. kube_network_plugin providing proxy services + shell: "{{ bin_dir }}/kubectl delete daemonset -n kube-system kube-proxy" + delegate_to: "{{groups['kube-master']|first}}" + run_once: true + when: + - kube_proxy_remove + - is_kube_master + - kubeadm_discovery_address != kube_apiserver_endpoint + tags: + - kube-proxy diff --git a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 index 23a690ce4..c2208a9e0 100644 --- a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 @@ -55,7 +55,7 @@ spec: {% if kube_network_plugin is defined and kube_network_plugin == 'cloud' %} - --configure-cloud-routes=true {% endif %} -{% if kube_network_plugin is defined and kube_network_plugin in ["cloud", "flannel", "canal", "cilium"] %} +{% if kube_network_plugin is defined and kube_network_plugin in ["cloud", "flannel", "canal", "cilium", "kube-router"] %} - --allocate-node-cidrs=true - --cluster-cidr={{ kube_pods_subnet }} - --service-cluster-ip-range={{ kube_service_addresses }} diff --git a/roles/kubernetes/node/tasks/main.yml b/roles/kubernetes/node/tasks/main.yml index 785849f20..83454f0c7 100644 --- a/roles/kubernetes/node/tasks/main.yml +++ b/roles/kubernetes/node/tasks/main.yml @@ -146,15 +146,26 @@ template: src: manifests/kube-proxy.manifest.j2 dest: "{{ kube_manifest_dir }}/kube-proxy.manifest" - when: not kubeadm_enabled + when: + - not (kubeadm_enabled or kube_proxy_remove) tags: - kube-proxy -- name: Purge proxy manifest for kubeadm +- name: Purge proxy manifest for kubeadm or if proxy services being provided by other means, e.g. network_plugin file: path: "{{ kube_manifest_dir }}/kube-proxy.manifest" state: absent - when: kubeadm_enabled + when: + - kubeadm_enabled or kube_proxy_remove + tags: + - kube-proxy + +- name: Cleanup kube-proxy leftovers from node + command: "{{ docker_bin_dir }}/docker run --rm --privileged -v /lib/modules:/lib/modules --net=host {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} kube-proxy --cleanup" + when: + - kube_proxy_remove + # `kube-proxy --cleanup`, being Ok as per shown WARNING, still returns 255 from above run (?) + ignore_errors: true tags: - kube-proxy diff --git a/roles/kubernetes/node/templates/kubelet.kubeadm.env.j2 b/roles/kubernetes/node/templates/kubelet.kubeadm.env.j2 index 903544b60..96ed45fde 100644 --- a/roles/kubernetes/node/templates/kubelet.kubeadm.env.j2 +++ b/roles/kubernetes/node/templates/kubelet.kubeadm.env.j2 @@ -97,7 +97,7 @@ KUBELET_HOSTNAME="--hostname-override={{ kube_override_hostname }}" {% set all_node_labels = role_node_labels + inventory_node_labels %} KUBELET_ARGS="{{ kubelet_args_base }} {{ kubelet_args_dns }} {{ kube_reserved }} --node-labels={{ all_node_labels | join(',') }} {% if kubelet_custom_flags is string %} {{kubelet_custom_flags}} {% else %}{% for flag in kubelet_custom_flags %} {{flag}} {% endfor %}{% endif %}{% if inventory_hostname in groups['kube-node'] %}{% if kubelet_node_custom_flags is string %} {{kubelet_node_custom_flags}} {% else %}{% for flag in kubelet_node_custom_flags %} {{flag}} {% endfor %}{% endif %}{% endif %}" -{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "canal", "flannel", "weave", "contiv", "cilium"] %} +{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "canal", "flannel", "weave", "contiv", "cilium", "kube-router"] %} KUBELET_NETWORK_PLUGIN="--network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir=/opt/cni/bin" {% elif kube_network_plugin is defined and kube_network_plugin == "cloud" %} KUBELET_NETWORK_PLUGIN="--hairpin-mode=promiscuous-bridge --network-plugin=kubenet" diff --git a/roles/kubernetes/node/templates/kubelet.rkt.service.j2 b/roles/kubernetes/node/templates/kubelet.rkt.service.j2 index ee1eaa1b1..ec1dc4975 100644 --- a/roles/kubernetes/node/templates/kubelet.rkt.service.j2 +++ b/roles/kubernetes/node/templates/kubelet.rkt.service.j2 @@ -33,7 +33,7 @@ ExecStart=/usr/bin/rkt run \ --volume var-lib-docker,kind=host,source={{ docker_daemon_graph }},readOnly=false \ --volume var-lib-kubelet,kind=host,source=/var/lib/kubelet,readOnly=false,recursive=true \ --volume var-log,kind=host,source=/var/log \ -{% if kube_network_plugin in ["calico", "weave", "canal", "flannel", "contiv", "cilium"] %} +{% if kube_network_plugin in ["calico", "weave", "canal", "flannel", "contiv", "cilium", "kube-router"] %} --volume etc-cni,kind=host,source=/etc/cni,readOnly=true \ --volume opt-cni,kind=host,source=/opt/cni,readOnly=true \ --volume var-lib-cni,kind=host,source=/var/lib/cni,readOnly=false \ diff --git a/roles/kubernetes/node/templates/kubelet.standard.env.j2 b/roles/kubernetes/node/templates/kubelet.standard.env.j2 index f649859fe..3af478344 100644 --- a/roles/kubernetes/node/templates/kubelet.standard.env.j2 +++ b/roles/kubernetes/node/templates/kubelet.standard.env.j2 @@ -124,7 +124,7 @@ KUBELET_HOSTNAME="--hostname-override={{ kube_override_hostname }}" KUBELET_ARGS="{{ kubelet_args_base }} {{ kubelet_args_dns }} {{ kubelet_args_kubeconfig }} {{ kube_reserved }} --node-labels={{ all_node_labels | join(',') }} {% if kube_feature_gates %} --feature-gates={{ kube_feature_gates|join(',') }} {% endif %} {% if kubelet_custom_flags is string %} {{kubelet_custom_flags}} {% else %}{% for flag in kubelet_custom_flags %} {{flag}} {% endfor %}{% endif %}{% if inventory_hostname in groups['kube-node'] %}{% if kubelet_node_custom_flags is string %} {{kubelet_node_custom_flags}} {% else %}{% for flag in kubelet_node_custom_flags %} {{flag}} {% endfor %}{% endif %}{% endif %}" -{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "canal", "flannel", "weave", "contiv", "cilium"] %} +{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "canal", "flannel", "weave", "contiv", "cilium", "kube-router"] %} KUBELET_NETWORK_PLUGIN="--network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir=/opt/cni/bin" {% elif kube_network_plugin is defined and kube_network_plugin == "weave" %} DOCKER_SOCKET="--docker-endpoint=unix:/var/run/weave/weave.sock" diff --git a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml index 9b16442fe..72f2bf528 100644 --- a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml +++ b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml @@ -26,7 +26,7 @@ - name: Stop if unknown network plugin assert: - that: kube_network_plugin in ['calico', 'canal', 'flannel', 'weave', 'cloud', 'cilium', 'contiv'] + that: kube_network_plugin in ['calico', 'canal', 'flannel', 'weave', 'cloud', 'cilium', 'contiv', 'kube-router'] when: kube_network_plugin is defined ignore_errors: "{{ ignore_assert_errors }}" diff --git a/roles/kubernetes/preinstall/tasks/0040-set_facts.yml b/roles/kubernetes/preinstall/tasks/0040-set_facts.yml index 1fddb7de5..f0d3001de 100644 --- a/roles/kubernetes/preinstall/tasks/0040-set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/0040-set_facts.yml @@ -158,3 +158,20 @@ paths: - ../vars skip: true + +- name: force kube_proxy removal if proxy services are replaced by other means + set_fact: + kube_proxy_remove: "{{ (kube_network_plugin == 'kube-router') and (kube_router_run_service_proxy is defined and kube_router_run_service_proxy)| bool }}" + tags: + - facts + - kube-proxy + +- name: override kube_proxy_mode to ipvs if kube_proxy_remove is set, as ipvs won't require kube-proxy cleanup when kube-proxy daemonset gets deleted + set_fact: + kube_proxy_mode: 'ipvs' + when: + - kubeadm_enabled + - kube_proxy_remove + tags: + - facts + - kube-proxy diff --git a/roles/kubernetes/preinstall/tasks/0050-create_directories.yml b/roles/kubernetes/preinstall/tasks/0050-create_directories.yml index 307116033..c508af4c9 100644 --- a/roles/kubernetes/preinstall/tasks/0050-create_directories.yml +++ b/roles/kubernetes/preinstall/tasks/0050-create_directories.yml @@ -33,7 +33,7 @@ - "/opt/cni/bin" - "/var/lib/calico" when: - - kube_network_plugin in ["calico", "weave", "canal", "flannel", "contiv", "cilium"] + - kube_network_plugin in ["calico", "weave", "canal", "flannel", "contiv", "cilium", "kube-router"] - inventory_hostname in groups['k8s-cluster'] tags: - network @@ -42,6 +42,7 @@ - weave - canal - contiv + - kube-router - bootstrap-os - name: Create local volume provisioner directories diff --git a/roles/kubernetes/secrets/tasks/main.yml b/roles/kubernetes/secrets/tasks/main.yml index d36c3a057..232474f67 100644 --- a/roles/kubernetes/secrets/tasks/main.yml +++ b/roles/kubernetes/secrets/tasks/main.yml @@ -113,7 +113,11 @@ with_items: - "node-{{ inventory_hostname }}.pem" - "kube-proxy-{{ inventory_hostname }}.pem" - when: inventory_hostname in groups['k8s-cluster'] + when: + - inventory_hostname in groups['k8s-cluster'] + tags: + - node + - kube-proxy - name: "Gen_certs | set kube node certificate serial facts" set_fact: @@ -123,6 +127,7 @@ tags: - kubelet - node + - kube-proxy - import_tasks: gen_tokens.yml tags: diff --git a/roles/network_plugin/kube-router/defaults/main.yml b/roles/network_plugin/kube-router/defaults/main.yml new file mode 100644 index 000000000..5aacbc2b0 --- /dev/null +++ b/roles/network_plugin/kube-router/defaults/main.yml @@ -0,0 +1,36 @@ +--- +# Enables Pod Networking -- Advertises and learns the routes to Pods via iBGP +kube_router_run_router: true + +# Enables Network Policy -- sets up iptables to provide ingress firewall for pods +kube_router_run_firewall: true + +# Enables Service Proxy -- sets up IPVS for Kubernetes Services +# see docs/kube-router.md "Caveats" section +kube_router_run_service_proxy: false + +# Add Cluster IP of the service to the RIB so that it gets advertises to the BGP peers. +kube_router_advertise_cluster_ip: false + +# Add External IP of service to the RIB so that it gets advertised to the BGP peers. +kube_router_advertise_external_ip: false + +# Add LoadbBalancer IP of service status as set by the LB provider to the RIB so that it gets advertised to the BGP peers. +kube_router_advertise_loadbalancer_ip: false + +# Array of arbitrary extra arguments to kube-router, see +# https://github.com/cloudnativelabs/kube-router/blob/master/docs/user-guide.md +kube_router_extra_args: [] + +# ASN numbers of the BGP peer to which cluster nodes will advertise cluster ip and node's pod cidr. +kube_router_peer_router_asns: ~ + +# The ip address of the external router to which all nodes will peer and advertise the cluster ip and pod cidr's. +kube_router_peer_router_ips: ~ + +# The remote port of the external BGP to which all nodes will peer. If not set, default BGP port (179) will be used. +kube_router_peer_router_ports: ~ + +# Setups node CNI to allow hairpin mode, requires node reboots, see +# https://github.com/cloudnativelabs/kube-router/blob/master/docs/user-guide.md#hairpin-mode +kube_router_support_hairpin_mode: false diff --git a/roles/network_plugin/kube-router/tasks/main.yml b/roles/network_plugin/kube-router/tasks/main.yml new file mode 100644 index 000000000..9fc5b0a7d --- /dev/null +++ b/roles/network_plugin/kube-router/tasks/main.yml @@ -0,0 +1,17 @@ +--- + +- name: kube-router | Copy cni plugins from hyperkube + command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp -prf /opt/cni/bin/. /cnibindir/" + register: cni_task_result + until: cni_task_result.rc == 0 + retries: 4 + delay: "{{ retry_stagger | random + 3 }}" + changed_when: false + tags: + - hyperkube + - upgrade + +- name: kube-router | Create manifest + template: + src: kube-router.yml.j2 + dest: "{{ kube_config_dir }}/kube-router.yml" diff --git a/roles/network_plugin/kube-router/tasks/reset.yml b/roles/network_plugin/kube-router/tasks/reset.yml new file mode 100644 index 000000000..7c3fa5347 --- /dev/null +++ b/roles/network_plugin/kube-router/tasks/reset.yml @@ -0,0 +1,9 @@ +--- +- name: reset | check kube-dummy-if network device + stat: + path: /sys/class/net/kube-dummy-if + register: kube_dummy_if + +- name: reset | remove the network device created by kube-router + command: ip link del kube-dummy-if + when: kube_dummy_if.stat.exists diff --git a/roles/network_plugin/kube-router/templates/kube-router.yml.j2 b/roles/network_plugin/kube-router/templates/kube-router.yml.j2 new file mode 100644 index 000000000..eb150daf1 --- /dev/null +++ b/roles/network_plugin/kube-router/templates/kube-router.yml.j2 @@ -0,0 +1,225 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: kube-router-cfg + namespace: kube-system + labels: + tier: node + k8s-app: kube-router +data: + cni-conf.json: | + { + "name":"kubernetes", + "type":"bridge", + "bridge":"kube-bridge", + "isDefaultGateway":true, +{% if kube_router_support_hairpin_mode %} + "hairpinMode":true, +{% endif %} + "ipam": { + "type":"host-local" + } + } + kubeconfig: | + apiVersion: v1 + kind: Config + clusterCIDR: {{ kube_pods_subnet }} + clusters: + - name: cluster + cluster: + certificate-authority: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + server: {{ kube_apiserver_endpoint }} + users: + - name: kube-router + user: + tokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + contexts: + - context: + cluster: cluster + user: kube-router + name: kube-router-context + current-context: kube-router-context + +--- +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + labels: + k8s-app: kube-router + tier: node + name: kube-router + namespace: kube-system +spec: + minReadySeconds: 3 + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + type: RollingUpdate + template: + metadata: + labels: + k8s-app: kube-router + tier: node + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + spec: +{% if kube_version|version_compare('v1.11.1', '>=') %} + priorityClassName: system-cluster-critical +{% endif %} + serviceAccountName: kube-router + containers: + - name: kube-router + image: {{ kube_router_image_repo }}:{{ kube_router_image_tag }} + imagePullPolicy: IfNotPresent + args: + - --run-router={{ kube_router_run_router | bool }} + - --run-firewall={{ kube_router_run_firewall | bool }} + - --run-service-proxy={{ kube_router_run_service_proxy | bool }} + - --kubeconfig=/var/lib/kube-router/kubeconfig +{% if kube_router_advertise_cluster_ip %} + - --advertise-cluster-ip +{% endif %} +{% if kube_router_advertise_external_ip %} + - --advertise-external-ip +{% endif %} +{% if kube_router_advertise_loadbalancer_ip %} + - --advertise-loadbalancer-ip +{% endif %} +{% if kube_router_peer_router_asns %} + - --peer-router-asns {{ kube_router_peer_router_asns }} +{% endif %} +{% if kube_router_peer_router_ips %} + - --peer-router-ips {{ kube_router_peer_router_ips }} +{% endif %} +{% if kube_router_peer_router_ports %} + - --peer-router-ports {{ kube_router_peer_router_ports }} +{% endif %} +{% for arg in kube_router_extra_args %} + - "{{ arg }}" +{% endfor %} + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + livenessProbe: + httpGet: + path: /healthz + port: 20244 + initialDelaySeconds: 10 + periodSeconds: 3 + resources: + requests: + cpu: 250m + memory: 250Mi + securityContext: + privileged: true + volumeMounts: + - name: lib-modules + mountPath: /lib/modules + readOnly: true + - name: cni-conf-dir + mountPath: /etc/cni/net.d + - name: kubeconfig + mountPath: /var/lib/kube-router + readOnly: true + initContainers: + - name: install-cni + image: {{ busybox_image_repo }}:{{ busybox_image_tag }} + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - set -e -x; + if [ ! -f /etc/cni/net.d/10-kuberouter.conf ]; then + TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; + cp /etc/kube-router/cni-conf.json ${TMP}; + mv ${TMP} /etc/cni/net.d/10-kuberouter.conf; + fi; + if [ ! -f /var/lib/kube-router/kubeconfig ]; then + TMP=/var/lib/kube-router/.tmp-kubeconfig; + cp /etc/kube-router/kubeconfig ${TMP}; + mv ${TMP} /var/lib/kube-router/kubeconfig; + fi + volumeMounts: + - mountPath: /etc/cni/net.d + name: cni-conf-dir + - mountPath: /etc/kube-router + name: kube-router-cfg + - name: kubeconfig + mountPath: /var/lib/kube-router + hostNetwork: true + tolerations: + - operator: Exists + # Mark pod as critical for rescheduling (Will have no effect starting with kubernetes 1.12) + - key: CriticalAddonsOnly + operator: "Exists" + volumes: + - name: lib-modules + hostPath: + path: /lib/modules + - name: cni-conf-dir + hostPath: + path: /etc/cni/net.d + - name: kube-router-cfg + configMap: + name: kube-router-cfg + - name: kubeconfig + hostPath: + path: /var/lib/kube-router + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-router + namespace: kube-system + +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1beta1 +metadata: + name: kube-router + namespace: kube-system +rules: + - apiGroups: + - "" + resources: + - namespaces + - pods + - services + - nodes + - endpoints + verbs: + - list + - get + - watch + - apiGroups: + - "networking.k8s.io" + resources: + - networkpolicies + verbs: + - list + - get + - watch + - apiGroups: + - extensions + resources: + - networkpolicies + verbs: + - get + - list + - watch +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1beta1 +metadata: + name: kube-router +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-router +subjects: +- kind: ServiceAccount + name: kube-router + namespace: kube-system diff --git a/roles/network_plugin/meta/main.yml b/roles/network_plugin/meta/main.yml index 4a1a73062..a0fae7207 100644 --- a/roles/network_plugin/meta/main.yml +++ b/roles/network_plugin/meta/main.yml @@ -30,5 +30,10 @@ dependencies: tags: - contiv + - role: network_plugin/kube-router + when: kube_network_plugin == 'kube-router' + tags: + - kube-router + - role: network_plugin/cloud when: kube_network_plugin == 'cloud' diff --git a/roles/reset/tasks/main.yml b/roles/reset/tasks/main.yml index 8122ada08..1fff14a27 100644 --- a/roles/reset/tasks/main.yml +++ b/roles/reset/tasks/main.yml @@ -165,6 +165,7 @@ - /run/contiv - /etc/openvswitch - /run/openvswitch + - /var/lib/kube-router ignore_errors: yes tags: - files @@ -196,7 +197,7 @@ - name: reset | include file with reset tasks specific to the network_plugin if exists include_tasks: "{{ (role_path + '/../network_plugin/' + kube_network_plugin + '/tasks/reset.yml') | realpath }}" when: - - kube_network_plugin in ['flannel', 'cilium', 'contiv'] + - kube_network_plugin in ['flannel', 'cilium', 'contiv', 'kube-router'] tags: - network diff --git a/roles/win_nodes/kubernetes_patch/tasks/main.yml b/roles/win_nodes/kubernetes_patch/tasks/main.yml index a6af1fd90..5e08d88bf 100644 --- a/roles/win_nodes/kubernetes_patch/tasks/main.yml +++ b/roles/win_nodes/kubernetes_patch/tasks/main.yml @@ -32,3 +32,5 @@ - debug: msg={{ patch_kube_proxy_state.stderr_lines }} when: patch_kube_proxy_state is not skipped tags: init + when: + - not kube_proxy_remove diff --git a/tests/files/gce_centos7-kube-router.yml b/tests/files/gce_centos7-kube-router.yml new file mode 100644 index 000000000..c210d853f --- /dev/null +++ b/tests/files/gce_centos7-kube-router.yml @@ -0,0 +1,12 @@ +# Instance settings +cloud_image_family: centos-7 +cloud_region: us-central1-c +cloud_machine_type: "n1-standard-1" +mode: default + +# Deployment settings +kube_network_plugin: kube-router +deploy_netchecker: true +enable_network_policy: true +kubedns_min_replicas: 1 +cloud_provider: gce diff --git a/tests/files/gce_coreos-kube-router.yml b/tests/files/gce_coreos-kube-router.yml new file mode 100644 index 000000000..655ca2dd5 --- /dev/null +++ b/tests/files/gce_coreos-kube-router.yml @@ -0,0 +1,13 @@ +# Instance settings +cloud_image_family: coreos-stable +cloud_region: us-central1-c +mode: default +startup_script: 'systemctl disable locksmithd && systemctl stop locksmithd' + +# Deployment settings +kube_network_plugin: kube-router +bootstrap_os: coreos +resolvconf_mode: host_resolvconf # this is required as long as the coreos stable channel uses docker < 1.12 +deploy_netchecker: true +kubedns_min_replicas: 1 +cloud_provider: gce diff --git a/tests/files/gce_ubuntu-kube-router-sep.yml b/tests/files/gce_ubuntu-kube-router-sep.yml new file mode 100644 index 000000000..fde781ff2 --- /dev/null +++ b/tests/files/gce_ubuntu-kube-router-sep.yml @@ -0,0 +1,11 @@ +# Instance settings +cloud_image_family: ubuntu-1604-lts +cloud_region: us-central1-c +mode: separate + +# Deployment settings +bootstrap_os: ubuntu +kube_network_plugin: kube-router +deploy_netchecker: true +kubedns_min_replicas: 1 +cloud_provider: gce