From 731d32afda257c4713231c477685824f889dfa45 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Wed, 13 Jul 2016 17:13:47 +0200 Subject: [PATCH] Add HA/LB endpoints for kube-apiserver * Add HA docs for API server. * Add auto-evaluated internal endpoints and clarify the loadbalancer_apiserver vars and usecases. * Use facts for kube_apiserver to not repeat code and enable LB endpoints use. * Use /healthz check for the wait-for apiserver. * Use the single endpoint for kubelet instead of the list of apiservers * Specify kube_apiserver_count to for HA layout Signed-off-by: Bogdan Dobrelya --- docs/ha-mode.md | 92 ++++++++++++++++++- inventory/group_vars/all.yml | 27 +----- roles/kubernetes/master/tasks/main.yml | 9 +- .../templates/kubectl-kubeconfig.yaml.j2 | 2 +- .../manifests/kube-apiserver.manifest.j2 | 3 +- .../kube-controller-manager.manifest.j2 | 2 +- .../manifests/kube-scheduler.manifest.j2 | 2 +- roles/kubernetes/node/templates/kubelet.j2 | 2 +- .../manifests/kube-proxy.manifest.j2 | 10 +- .../kubernetes/preinstall/tasks/set_facts.yml | 22 +++++ .../calico/templates/network-environment.j2 | 2 +- 11 files changed, 130 insertions(+), 43 deletions(-) diff --git a/docs/ha-mode.md b/docs/ha-mode.md index b3d752cc9..587d5ea46 100644 --- a/docs/ha-mode.md +++ b/docs/ha-mode.md @@ -31,4 +31,94 @@ flannel networking plugin always uses a single `--etcd-server` endpoint! Kube-apiserver -------------- -TODO(bogdando) TBD + +K8s components require a loadbalancer to access the apiservers via a reverse +proxy. A kube-proxy does not support multiple apiservers for the time being so +you will need to configure your own loadbalancer to achieve HA. Note that +deploying a loadbalancer is up to a user and is not covered by ansible roles +in Kargo. By default, it only configures a non-HA endpoint, which points to +the `access_ip` or IP address of the first server node in the `kube-master` +group. It can also configure clients to use endpoints for a given loadbalancer +type. + +A loadbalancer (LB) may be an external or internal one. An external LB +provides access for external clients, while the internal LB accepts client +connections only to the localhost, similarly to the etcd-proxy HA endpoints. +Given a frontend `VIP` address and `IP1, IP2` addresses of backends, here is +an example configuration for a HAProxy service acting as an external LB: +``` +listen kubernetes-apiserver-https + bind :8383 + option ssl-hello-chk + mode tcp + timeout client 3h + timeout server 3h + server master1 :443 + server master2 :443 + balance roundrobin +``` + +And the corresponding example global vars config: +``` +apiserver_loadbalancer_domain_name: "lb-apiserver.kubernetes.local" +loadbalancer_apiserver: + address: + port: 8383 +``` + +This domain name, or default "lb-apiserver.kubernetes.local", will be inserted +into the `/etc/hosts` file of all servers in the `k8s-cluster` group. Note that +the HAProxy service should as well be HA and requires a VIP management, which +is out of scope of this doc. + +The internal LB may be the case if you do not want to operate a VIP management +HA stack and require no external and no secure access to the K8s API. The group +var `loadbalancer_apiserver_localhost` (defaults to `false`) controls that +deployment layout. When enabled, it is expected each node in the `k8s-cluster` +group to run a loadbalancer that listens the localhost frontend and has all +of the apiservers as backends. Here is an example configuration for a HAProxy + service acting as an internal LB: + +``` +listen kubernetes-apiserver-http + bind localhost:8080 + mode tcp + timeout client 3h + timeout server 3h + server master1 :8080 + server master2 :8080 + balance leastconn +``` + +And the corresponding example global vars config: +``` +loadbalancer_apiserver_localhost: true +``` + +This var overrides an external LB configuration, if any. Note that for this +example, the `kubernetes-apiserver-http` endpoint has backends receiving +unencrypted traffic, which may be a security issue when interconnecting +different nodes, or may be not, if those belong to the isolated management +network without external access. + +In order to achieve HA for HAProxy instances, those must be running on the +each node in the `k8s-cluster` group as well, but require no VIP, thus +no VIP management. + +Access endpoints are evaluated automagically, as the following: + +| Endpoint type | kube-master | non-master | +|------------------------------|---------------|---------------------| +| Local LB (overrides ext) | http://lc:p | http://lc:p | +| External LB, no internal | https://lb:lp | https://lb:lp | +| No ext/int LB (default) | http://lc:p | https://m[0].aip:sp | + +Where: +* `m[0]` - the first node in the `kube-master` group; +* `lb` - LB FQDN, `apiserver_loadbalancer_domain_name`; +* `lc` - localhost; +* `p` - insecure port, `kube_apiserver_insecure_port` +* `sp` - secure port, `kube_apiserver_port`; +* `lp` - LB port, `loadbalancer_apiserver.port`, defers to the secure port; +* `ip` - the node IP, defers to the ansible IP; +* `aip` - `access_ip`, defers to the ip. diff --git a/inventory/group_vars/all.yml b/inventory/group_vars/all.yml index e9b138c92..547a9d612 100644 --- a/inventory/group_vars/all.yml +++ b/inventory/group_vars/all.yml @@ -51,22 +51,14 @@ cluster_name: cluster.local # but don't know about that address themselves. # access_ip: 1.1.1.1 -# Service endpoints. May be a VIP or a load balanced frontend IP, like one -# that a HAProxy or Nginx provides, or just a local service endpoint. -# -# Etcd endpoints use a local etcd-proxies to reach the etcd cluster via -# auto-evaluated endpoints. Those will reuse the access_ip for etcd cluster, -# if specified, or defer to the localhost:2379 as well. - # Etcd access modes: # Enable multiaccess to configure clients to access all of the etcd members directly # as the "http://hostX:port, http://hostY:port, ..." and ignore the proxy loadbalancers. # This may be the case if clients support and loadbalance multiple etcd servers natively. etcd_multiaccess: false -# -# TODO apiserver localhost:8080 and localhost:443 endpoints for kubelets and -# (hyper)kube-* and networking components. +# Assume there are no internal loadbalancers for apiservers exist +loadbalancer_apiserver_localhost: false # Choose network plugin (calico, weave or flannel) kube_network_plugin: flannel @@ -126,21 +118,6 @@ dns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(2)|ipaddr('address') # like you would do when using nova-client before starting the playbook. # cloud_provider: -# For multi masters architecture: -# kube-proxy doesn't support multiple apiservers for the time being so you'll need to configure your own loadbalancer -# This domain name will be inserted into the /etc/hosts file of all servers -# configuration example with haproxy : -# listen kubernetes-apiserver-https -# bind 10.99.0.21:8383 -# option ssl-hello-chk -# mode tcp -# timeout client 3h -# timeout server 3h -# server master1 10.99.0.26:443 -# server master2 10.99.0.27:443 -# balance roundrobin -# apiserver_loadbalancer_domain_name: "lb-apiserver.kubernetes.local" - ## Set these proxy values in order to update docker daemon to use proxies # http_proxy: "" # https_proxy: "" diff --git a/roles/kubernetes/master/tasks/main.yml b/roles/kubernetes/master/tasks/main.yml index 2d9221ba1..b055b35df 100644 --- a/roles/kubernetes/master/tasks/main.yml +++ b/roles/kubernetes/master/tasks/main.yml @@ -26,9 +26,12 @@ when: apiserver_manifest.changed - name: wait for the apiserver to be running - wait_for: - port: "{{kube_apiserver_insecure_port}}" - timeout: 60 + uri: url=http://localhost:8080/healthz + register: result + until: result.status == 200 + retries: 10 + delay: 6 + # Create kube-system namespace - name: copy 'kube-system' namespace manifest diff --git a/roles/kubernetes/master/templates/kubectl-kubeconfig.yaml.j2 b/roles/kubernetes/master/templates/kubectl-kubeconfig.yaml.j2 index 5cc74cf9e..a9800d3ac 100644 --- a/roles/kubernetes/master/templates/kubectl-kubeconfig.yaml.j2 +++ b/roles/kubernetes/master/templates/kubectl-kubeconfig.yaml.j2 @@ -5,7 +5,7 @@ preferences: {} clusters: - cluster: certificate-authority-data: {{ kube_node_cert|b64encode }} - server: https://{{ groups['kube-master'][0] }}:{{ kube_apiserver_port }} + server: {{ kube_apiserver_endpoint }} name: {{ cluster_name }} contexts: - context: diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 index eaecb011b..7961ac36f 100644 --- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 @@ -13,7 +13,8 @@ spec: - apiserver - --advertise-address={{ ip | default(ansible_default_ipv4.address) }} - --etcd-servers={{ etcd_access_endpoint }} - - --insecure-bind-address={{ kube_apiserver_insecure_bind_address | default('127.0.0.1') }} + - --insecure-bind-address={{ kube_apiserver_insecure_bind_address }} + - --apiserver-count={{ kube_apiserver_count }} - --admission-control=NamespaceLifecycle,NamespaceExists,LimitRanger,SecurityContextDeny,ServiceAccount,ResourceQuota - --service-cluster-ip-range={{ kube_service_addresses }} - --client-ca-file={{ kube_cert_dir }}/ca.pem diff --git a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 index 29c56e6a8..f22f49de8 100644 --- a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 @@ -11,7 +11,7 @@ spec: command: - /hyperkube - controller-manager - - --master=http://127.0.0.1:{{kube_apiserver_insecure_port}} + - --master={{ kube_apiserver_endpoint }} - --leader-elect=true - --service-account-private-key-file={{ kube_cert_dir }}/apiserver-key.pem - --root-ca-file={{ kube_cert_dir }}/ca.pem diff --git a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 index f642fbf70..024ddbfaa 100644 --- a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 @@ -12,7 +12,7 @@ spec: - /hyperkube - scheduler - --leader-elect=true - - --master=http://127.0.0.1:{{kube_apiserver_insecure_port}} + - --master={{ kube_apiserver_endpoint }} - --v={{ kube_log_level | default('2') }} livenessProbe: httpGet: diff --git a/roles/kubernetes/node/templates/kubelet.j2 b/roles/kubernetes/node/templates/kubelet.j2 index 7eee44993..bab27305e 100644 --- a/roles/kubernetes/node/templates/kubelet.j2 +++ b/roles/kubernetes/node/templates/kubelet.j2 @@ -7,7 +7,7 @@ KUBE_LOGGING="--logtostderr=true" {% endif %} KUBE_LOG_LEVEL="--v={{ kube_log_level | default('2') }}" {% if inventory_hostname in groups['kube-node'] %} -KUBELET_API_SERVER="--api_servers={% for host in groups['kube-master'] %}https://{{ hostvars[host]['access_ip'] | default(hostvars[host]['ip'] | default(hostvars[host]['ansible_default_ipv4']['address'])) }}:{{ kube_apiserver_port }}{% if not loop.last %},{% endif %}{% endfor %}" +KUBELET_API_SERVER="--api_servers={{ kube_apiserver_endpoint }}" {% endif %} # The address for the info server to serve on (set to 0.0.0.0 or "" for all interfaces) KUBELET_ADDRESS="--address={{ ip | default("0.0.0.0") }}" diff --git a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 index d094766b5..296658cbd 100644 --- a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 +++ b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 @@ -12,14 +12,8 @@ spec: - /hyperkube - proxy - --v={{ kube_log_level | default('2') }} -{% if inventory_hostname in groups['kube-master'] %} - - --master=http://127.0.0.1:{{kube_apiserver_insecure_port}} -{% else %} -{% if loadbalancer_apiserver is defined and apiserver_loadbalancer_domain_name is defined %} - - --master=https://{{ apiserver_loadbalancer_domain_name }}:{{ loadbalancer_apiserver.port }} -{% else %} - - --master=https://{{ hostvars[groups['kube-master'][0]]['access_ip'] | default(hostvars[groups['kube-master'][0]]['ip'] | default(hostvars[groups['kube-master'][0]]['ansible_default_ipv4']['address'])) }}:{{ kube_apiserver_port }} -{% endif%} + - --master={{ kube_apiserver_endpoint }} +{% if not is_kube_master %} - --kubeconfig=/etc/kubernetes/node-kubeconfig.yaml {% endif %} - --bind-address={{ ip | default(ansible_default_ipv4.address) }} diff --git a/roles/kubernetes/preinstall/tasks/set_facts.yml b/roles/kubernetes/preinstall/tasks/set_facts.yml index 5dd283fef..527c29b32 100644 --- a/roles/kubernetes/preinstall/tasks/set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/set_facts.yml @@ -1,4 +1,26 @@ --- +- set_fact: kube_apiserver_count="{{ groups['kube-master'] | length }}" +- set_fact: kube_apiserver_address="{{ ip | default(ansible_default_ipv4['address']) }}" +- set_fact: kube_apiserver_access_address="{{ access_ip | default(kube_apiserver_address) }}" +- set_fact: is_kube_master="{{ inventory_hostname in groups['kube-master'] }}" +- set_fact: first_kube_master="{{ hostvars[groups['kube-master'][0]]['access_ip'] | default(hostvars[groups['kube-master'][0]]['ip'] | default(hostvars[groups['kube-master'][0]]['ansible_default_ipv4']['address'])) }}" +- set_fact: + kube_apiserver_insecure_bind_address: |- + {% if loadbalancer_apiserver_localhost %}{{ kube_apiserver_address }}{% else %}127.0.0.1{% endif %} +- set_fact: + kube_apiserver_endpoint: |- + {% if loadbalancer_apiserver_localhost -%} + http://127.0.0.1:{{ kube_apiserver_insecure_port }} + {%- elif is_kube_master and loadbalancer_apiserver is not defined -%} + http://127.0.0.1:{{ kube_apiserver_insecure_port }} + {%- else -%} + {%- if loadbalancer_apiserver is defined and loadbalancer_apiserver.port is defined -%} + https://{{ apiserver_loadbalancer_domain_name|default('lb-apiserver.kubernetes.local') }}:{{ loadbalancer_apiserver.port|default(kube_apiserver_port) }} + {%- else -%} + https://{{ first_kube_master }}:{{ kube_apiserver_port }} + {%- endif -%} + {%- endif %} + - set_fact: etcd_address="{{ ip | default(ansible_default_ipv4['address']) }}" - set_fact: etcd_access_address="{{ access_ip | default(etcd_address) }}" - set_fact: etcd_peer_url="http://{{ etcd_access_address }}:2380" diff --git a/roles/network_plugin/calico/templates/network-environment.j2 b/roles/network_plugin/calico/templates/network-environment.j2 index 99d592a5e..086803d1b 100644 --- a/roles/network_plugin/calico/templates/network-environment.j2 +++ b/roles/network_plugin/calico/templates/network-environment.j2 @@ -3,7 +3,7 @@ DEFAULT_IPV4={{ip | default(ansible_default_ipv4.address) }} # The Kubernetes master IP -KUBERNETES_MASTER={{ hostvars[groups['kube-master'][0]]['access_ip'] | default(hostvars[groups['kube-master'][0]]['ip'] | default(hostvars[groups['kube-master'][0]]['ansible_default_ipv4']['address'])) }} +KUBERNETES_MASTER={{ first_kube_master }} # IP and port of etcd instance used by Calico ETCD_AUTHORITY={{ etcd_authority }}