From 7a9161d46234d919002e1e4b3e09758ed52ebd5a Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Fri, 3 Feb 2017 18:26:30 +0300 Subject: [PATCH 1/3] Prevent dynamic port allocation in nodePort range kube_apiserver_node_port_range should be accessible only to kube-proxy and not be taken by a dynamic port allocation. Potentially temporary if https://github.com/kubernetes/kubernetes/issues/40920 gets fixed. --- roles/kubernetes/node/defaults/main.yml | 4 ++++ roles/kubernetes/node/tasks/main.yml | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/roles/kubernetes/node/defaults/main.yml b/roles/kubernetes/node/defaults/main.yml index a74e52b77..d60b76208 100644 --- a/roles/kubernetes/node/defaults/main.yml +++ b/roles/kubernetes/node/defaults/main.yml @@ -29,3 +29,7 @@ nginx_image_repo: nginx nginx_image_tag: 1.11.4-alpine etcd_config_dir: /etc/ssl/etcd + +# A port range to reserve for services with NodePort visibility. +# Inclusive at both ends of the range. +kube_apiserver_node_port_range: "30000-32767" diff --git a/roles/kubernetes/node/tasks/main.yml b/roles/kubernetes/node/tasks/main.yml index 3e0c095e1..2c18937c9 100644 --- a/roles/kubernetes/node/tasks/main.yml +++ b/roles/kubernetes/node/tasks/main.yml @@ -21,6 +21,16 @@ notify: restart kubelet tags: kubelet +- name: Ensure nodePort range is reserved + sysctl: + name: net.ipv4.ip_local_reserved_ports + value: "{{ kube_apiserver_node_port_range }}" + sysctl_set: yes + state: present + reload: yes + when: kube_apiserver_node_port_range is defined + tags: kube-proxy + - name: Write proxy manifest template: src: manifests/kube-proxy.manifest.j2 From 3b816ee660f2fc3e5ec73dd9f418c552dc77c7cb Mon Sep 17 00:00:00 2001 From: Aleksandr Didenko Date: Thu, 19 Jan 2017 11:56:14 +0100 Subject: [PATCH 2/3] Update playbooks to support new netchecker Netchecker is rewritten in Go lang with some new args instead of env variables. Also netchecker-server no longer requires kubectl container. Updating playbooks accordingly. --- docs/netcheck.md | 6 +++--- roles/download/defaults/main.yml | 14 +++----------- roles/kubernetes-apps/ansible/defaults/main.yml | 5 ----- .../ansible/templates/netchecker-agent-ds.yml | 7 +++++-- .../templates/netchecker-agent-hostnet-ds.yml | 7 +++++-- .../ansible/templates/netchecker-server-pod.yml | 15 ++++----------- roles/kubernetes-apps/meta/main.yaml | 4 ---- roles/kubernetes/node/meta/main.yml | 4 ---- 8 files changed, 20 insertions(+), 42 deletions(-) diff --git a/docs/netcheck.md b/docs/netcheck.md index 408b0fd8c..bee04cbb3 100644 --- a/docs/netcheck.md +++ b/docs/netcheck.md @@ -2,7 +2,7 @@ Network Checker Application =========================== With the ``deploy_netchecker`` var enabled (defaults to false), Kargo deploys a -Network Checker Application from the 3rd side `l23network/mcp-netchecker` docker +Network Checker Application from the 3rd side `l23network/k8s-netchecker` docker images. It consists of the server and agents trying to reach the server by usual for Kubernetes applications network connectivity meanings. Therefore, this automagically verifies a pod to pod connectivity via the cluster IP and checks @@ -25,8 +25,8 @@ There are related application specifc variables: netchecker_port: 31081 agent_report_interval: 15 netcheck_namespace: default -agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1" -server_img: "quay.io/l23network/mcp-netchecker-server:v0.1" +agent_img: "quay.io/l23network/k8s-netchecker-agent:v1.0" +server_img: "quay.io/l23network/k8s-netchecker-server:v1.0" ``` Note that the application verifies DNS resolve for FQDNs comprising only the diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index 7e06ac574..ef3038d83 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -59,11 +59,9 @@ hyperkube_image_repo: "quay.io/coreos/hyperkube" hyperkube_image_tag: "{{ kube_version }}_coreos.0" pod_infra_image_repo: "gcr.io/google_containers/pause-amd64" pod_infra_image_tag: "{{ pod_infra_version }}" -netcheck_tag: v0.1 -netcheck_kubectl_tag: v0.18.0-120-gaeb4ac55ad12b1-dirty -netcheck_agent_img_repo: "quay.io/l23network/mcp-netchecker-agent" -netcheck_server_img_repo: "quay.io/l23network/mcp-netchecker-server" -netcheck_kubectl_img_repo: "gcr.io/google_containers/kubectl" +netcheck_tag: "v1.0" +netcheck_agent_img_repo: "quay.io/l23network/k8s-netchecker-agent" +netcheck_server_img_repo: "quay.io/l23network/k8s-netchecker-server" weave_kube_image_repo: "weaveworks/weave-kube" weave_kube_image_tag: "{{ weave_version }}" weave_npc_image_repo: "weaveworks/weave-npc" @@ -96,12 +94,6 @@ downloads: tag: "{{ netcheck_tag }}" sha256: "{{ netcheck_agent_digest_checksum|default(None) }}" enabled: "{{ deploy_netchecker|bool }}" - netcheck_kubectl: - container: true - repo: "{{ netcheck_kubectl_img_repo }}" - tag: "{{ netcheck_kubectl_tag }}" - sha256: "{{ netcheck_kubectl_digest_checksum|default(None) }}" - enabled: "{{ deploy_netchecker|bool }}" etcd: version: "{{etcd_version}}" dest: "etcd/etcd-{{ etcd_version }}-linux-amd64.tar.gz" diff --git a/roles/kubernetes-apps/ansible/defaults/main.yml b/roles/kubernetes-apps/ansible/defaults/main.yml index 14deb333d..dcd54c9fe 100644 --- a/roles/kubernetes-apps/ansible/defaults/main.yml +++ b/roles/kubernetes-apps/ansible/defaults/main.yml @@ -31,7 +31,6 @@ agent_report_interval: 15 netcheck_namespace: default agent_img: "{{ netcheck_agent_img_repo }}:{{ netcheck_tag }}" server_img: "{{ netcheck_server_img_repo }}:{{ netcheck_tag }}" -kubectl_image: "{{ netcheck_kubectl_img_repo }}:{{ netcheck_kubectl_tag }}" # Limits for netchecker apps netchecker_agent_cpu_limit: 30m @@ -42,10 +41,6 @@ netchecker_server_cpu_limit: 100m netchecker_server_memory_limit: 256M netchecker_server_cpu_requests: 50m netchecker_server_memory_requests: 128M -netchecker_kubectl_cpu_limit: 30m -netchecker_kubectl_memory_limit: 128M -netchecker_kubectl_cpu_requests: 15m -netchecker_kubectl_memory_requests: 64M # SSL etcd_cert_dir: "/etc/ssl/etcd/ssl" diff --git a/roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml b/roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml index 41900ab33..139498733 100644 --- a/roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml +++ b/roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml @@ -20,8 +20,11 @@ spec: valueFrom: fieldRef: fieldPath: metadata.name - - name: REPORT_INTERVAL - value: '{{ agent_report_interval }}' + args: + - "-v=5" + - "-alsologtostderr=true" + - "-serverendpoint=netchecker-service:8081" + - "-reportinterval={{ agent_report_interval }}" imagePullPolicy: {{ k8s_image_pull_policy }} resources: limits: diff --git a/roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml b/roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml index 5a6a63f36..834321e87 100644 --- a/roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml +++ b/roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml @@ -21,8 +21,11 @@ spec: valueFrom: fieldRef: fieldPath: metadata.name - - name: REPORT_INTERVAL - value: '{{ agent_report_interval }}' + args: + - "-v=5" + - "-alsologtostderr=true" + - "-serverendpoint=netchecker-service:8081" + - "-reportinterval={{ agent_report_interval }}" imagePullPolicy: {{ k8s_image_pull_policy }} resources: limits: diff --git a/roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml b/roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml index c1d8ddb9f..06aea406a 100644 --- a/roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml +++ b/roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml @@ -21,15 +21,8 @@ spec: ports: - containerPort: 8081 hostPort: 8081 - - name: kubectl-proxy - image: "{{ kubectl_image }}" - imagePullPolicy: {{ k8s_image_pull_policy }} - resources: - limits: - cpu: {{ netchecker_kubectl_cpu_limit }} - memory: {{ netchecker_kubectl_memory_limit }} - requests: - cpu: {{ netchecker_kubectl_cpu_requests }} - memory: {{ netchecker_kubectl_memory_requests }} args: - - proxy + - "-v=5" + - "-logtostderr" + - "-kubeproxyinit" + - "-endpoint=0.0.0.0:8081" diff --git a/roles/kubernetes-apps/meta/main.yaml b/roles/kubernetes-apps/meta/main.yaml index 2ddb435de..e847f471e 100644 --- a/roles/kubernetes-apps/meta/main.yaml +++ b/roles/kubernetes-apps/meta/main.yaml @@ -12,9 +12,5 @@ dependencies: file: "{{ downloads.netcheck_agent }}" when: deploy_netchecker tags: [download, netchecker] - - role: download - file: "{{ downloads.netcheck_kubectl }}" - when: deploy_netchecker - tags: [download, netchecker] - {role: kubernetes-apps/ansible, tags: apps} - {role: kubernetes-apps/kpm, tags: [apps, kpm]} diff --git a/roles/kubernetes/node/meta/main.yml b/roles/kubernetes/node/meta/main.yml index 62720257d..2ef549c90 100644 --- a/roles/kubernetes/node/meta/main.yml +++ b/roles/kubernetes/node/meta/main.yml @@ -22,10 +22,6 @@ dependencies: file: "{{ downloads.netcheck_agent }}" when: deploy_netchecker tags: [download, netchecker] - - role: download - file: "{{ downloads.netcheck_kubectl }}" - when: deploy_netchecker - tags: [download, netchecker] - role: download file: "{{ downloads.kubednsmasq }}" tags: [download, dnsmasq] From ad2e1e10bf9b1d42b30ebbebc462494f393fe3c5 Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Tue, 7 Feb 2017 17:46:02 +0300 Subject: [PATCH 3/3] Re-tune ETCD performance params Reduce election timeout to 5000ms (was 10000ms) Raise heartbeat interval to 250ms (was 100ms) Remove etcd cpu share (was 300) Make etcd_cpu_limit and etcd_memory_limit optional. --- roles/etcd/defaults/main.yml | 7 ++++++- roles/etcd/templates/etcd-docker.service.j2 | 7 ++++++- roles/etcd/templates/etcd.j2 | 3 ++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/roles/etcd/defaults/main.yml b/roles/etcd/defaults/main.yml index e733fe56d..9f117da76 100644 --- a/roles/etcd/defaults/main.yml +++ b/roles/etcd/defaults/main.yml @@ -7,6 +7,11 @@ etcd_cert_group: root etcd_script_dir: "{{ bin_dir }}/etcd-scripts" +etcd_heartbeat_interval: "250" +etcd_election_timeout: "5000" + # Limits etcd_memory_limit: 512M -etcd_cpu_limit: 300m + +# Uncomment to set CPU share for etcd +#etcd_cpu_limit: 300m diff --git a/roles/etcd/templates/etcd-docker.service.j2 b/roles/etcd/templates/etcd-docker.service.j2 index 223d2d842..09bd8aacc 100644 --- a/roles/etcd/templates/etcd-docker.service.j2 +++ b/roles/etcd/templates/etcd-docker.service.j2 @@ -14,7 +14,12 @@ ExecStart={{ docker_bin_dir }}/docker run --restart=on-failure:5 \ -v /etc/ssl/certs:/etc/ssl/certs:ro \ -v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \ -v /var/lib/etcd:/var/lib/etcd:rw \ ---memory={{ etcd_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ etcd_cpu_limit|regex_replace('m', '') }} \ +{% if etcd_memory_limit is defined %} +--memory={{ etcd_memory_limit|regex_replace('Mi', 'M') }} \ +{% endif %} +{% if etcd_cpu_limit is defined %} +--cpu-shares={{ etcd_cpu_limit|regex_replace('m', '') }} \ +{% endif %} --name={{ etcd_member_name | default("etcd") }} \ {{ etcd_image_repo }}:{{ etcd_image_tag }} \ {% if etcd_after_v3 %} diff --git a/roles/etcd/templates/etcd.j2 b/roles/etcd/templates/etcd.j2 index efa5a51bd..a4bb8d36a 100644 --- a/roles/etcd/templates/etcd.j2 +++ b/roles/etcd/templates/etcd.j2 @@ -4,7 +4,8 @@ ETCD_INITIAL_ADVERTISE_PEER_URLS={{ etcd_peer_url }} ETCD_INITIAL_CLUSTER_STATE={% if etcd_cluster_is_healthy.rc != 0 | bool %}new{% else %}existing{% endif %} ETCD_LISTEN_CLIENT_URLS=https://{{ etcd_address }}:2379,https://127.0.0.1:2379 -ETCD_ELECTION_TIMEOUT=10000 +ETCD_ELECTION_TIMEOUT={{ etcd_election_timeout }} +ETCD_HEARTBEAT_INTERVAL={{ etcd_heartbeat_interval }} ETCD_INITIAL_CLUSTER_TOKEN=k8s_etcd ETCD_LISTEN_PEER_URLS=https://{{ etcd_address }}:2380 ETCD_NAME={{ etcd_member_name }}