From d588532c9ba4e6e70c7328ce8672e44cc26cd6b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kr=C3=BCger?= Date: Tue, 23 Apr 2019 23:46:02 +0200 Subject: [PATCH] Update probe timeouts, delays etc. (#4612) * Fix merge conflict * Add check delay * Add more liveness and readiness options to metrics-server --- .../provision/templates/glusterfs-daemonset.json.j2 | 4 ++-- .../provision/templates/heketi-bootstrap.json.j2 | 2 +- .../provision/templates/heketi-deployment.json.j2 | 2 +- .../ansible/templates/coredns-deployment.yml.j2 | 6 ++---- .../ansible/templates/dashboard.yml.j2 | 2 -- .../ansible/templates/nodelocaldns-daemonset.yml.j2 | 12 ++++++++++-- .../templates/ds-ingress-nginx-controller.yml.j2 | 10 +++++----- .../templates/metrics-server-deployment.yaml.j2 | 10 ++++------ .../calico/templates/calico-node.yml.j2 | 5 ++--- .../canal/templates/canal-node.yaml.j2 | 2 +- .../network_plugin/cilium/templates/cilium-ds.yml.j2 | 5 +---- .../kube-router/templates/kube-router.yml.j2 | 2 +- tests/files/gce_centos7-flannel-addons.yml | 1 + tests/files/packet_centos7-flannel-addons.yml | 3 +-- 14 files changed, 32 insertions(+), 34 deletions(-) diff --git a/contrib/network-storage/heketi/roles/provision/templates/glusterfs-daemonset.json.j2 b/contrib/network-storage/heketi/roles/provision/templates/glusterfs-daemonset.json.j2 index eddd57eb8..74c031ffe 100644 --- a/contrib/network-storage/heketi/roles/provision/templates/glusterfs-daemonset.json.j2 +++ b/contrib/network-storage/heketi/roles/provision/templates/glusterfs-daemonset.json.j2 @@ -69,7 +69,7 @@ }, "readinessProbe": { "timeoutSeconds": 3, - "initialDelaySeconds": 60, + "initialDelaySeconds": 3, "exec": { "command": [ "/bin/bash", @@ -80,7 +80,7 @@ }, "livenessProbe": { "timeoutSeconds": 3, - "initialDelaySeconds": 60, + "initialDelaySeconds": 10, "exec": { "command": [ "/bin/bash", diff --git a/contrib/network-storage/heketi/roles/provision/templates/heketi-bootstrap.json.j2 b/contrib/network-storage/heketi/roles/provision/templates/heketi-bootstrap.json.j2 index bdcf3e958..43048c6b6 100644 --- a/contrib/network-storage/heketi/roles/provision/templates/heketi-bootstrap.json.j2 +++ b/contrib/network-storage/heketi/roles/provision/templates/heketi-bootstrap.json.j2 @@ -106,7 +106,7 @@ }, "livenessProbe": { "timeoutSeconds": 3, - "initialDelaySeconds": 30, + "initialDelaySeconds": 10, "httpGet": { "path": "/hello", "port": 8080 diff --git a/contrib/network-storage/heketi/roles/provision/templates/heketi-deployment.json.j2 b/contrib/network-storage/heketi/roles/provision/templates/heketi-deployment.json.j2 index 5eb71718c..247f1fd9d 100644 --- a/contrib/network-storage/heketi/roles/provision/templates/heketi-deployment.json.j2 +++ b/contrib/network-storage/heketi/roles/provision/templates/heketi-deployment.json.j2 @@ -122,7 +122,7 @@ }, "livenessProbe": { "timeoutSeconds": 3, - "initialDelaySeconds": 30, + "initialDelaySeconds": 10, "httpGet": { "path": "/hello", "port": 8080 diff --git a/roles/kubernetes-apps/ansible/templates/coredns-deployment.yml.j2 b/roles/kubernetes-apps/ansible/templates/coredns-deployment.yml.j2 index fd7bfc9fa..c20e6cf16 100644 --- a/roles/kubernetes-apps/ansible/templates/coredns-deployment.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/coredns-deployment.yml.j2 @@ -91,19 +91,17 @@ spec: path: /health port: 8080 scheme: HTTP - initialDelaySeconds: 60 timeoutSeconds: 5 successThreshold: 1 - failureThreshold: 5 + failureThreshold: 10 readinessProbe: httpGet: path: /health port: 8080 scheme: HTTP - initialDelaySeconds: 60 timeoutSeconds: 5 successThreshold: 1 - failureThreshold: 5 + failureThreshold: 10 dnsPolicy: Default volumes: - name: config-volume diff --git a/roles/kubernetes-apps/ansible/templates/dashboard.yml.j2 b/roles/kubernetes-apps/ansible/templates/dashboard.yml.j2 index f079bf122..c14d65af1 100644 --- a/roles/kubernetes-apps/ansible/templates/dashboard.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/dashboard.yml.j2 @@ -184,8 +184,6 @@ spec: scheme: HTTPS path: / port: 8443 - initialDelaySeconds: 30 - timeoutSeconds: 30 volumes: - name: kubernetes-dashboard-certs secret: diff --git a/roles/kubernetes-apps/ansible/templates/nodelocaldns-daemonset.yml.j2 b/roles/kubernetes-apps/ansible/templates/nodelocaldns-daemonset.yml.j2 index 204a2838b..96e404edc 100644 --- a/roles/kubernetes-apps/ansible/templates/nodelocaldns-daemonset.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/nodelocaldns-daemonset.yml.j2 @@ -60,10 +60,18 @@ spec: path: /health port: 8080 scheme: HTTP - initialDelaySeconds: 60 timeoutSeconds: 5 successThreshold: 1 - failureThreshold: 3 + failureThreshold: 10 + readinessProbe: + httpGet: + host: {{ nodelocaldns_ip }} + path: /health + port: 8080 + scheme: HTTP + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 10 volumeMounts: - name: config-volume mountPath: /etc/coredns diff --git a/roles/kubernetes-apps/ingress_controller/ingress_nginx/templates/ds-ingress-nginx-controller.yml.j2 b/roles/kubernetes-apps/ingress_controller/ingress_nginx/templates/ds-ingress-nginx-controller.yml.j2 index 792a3f55c..06a9a9018 100644 --- a/roles/kubernetes-apps/ingress_controller/ingress_nginx/templates/ds-ingress-nginx-controller.yml.j2 +++ b/roles/kubernetes-apps/ingress_controller/ingress_nginx/templates/ds-ingress-nginx-controller.yml.j2 @@ -76,16 +76,16 @@ spec: path: /healthz port: 10254 scheme: HTTP - initialDelaySeconds: 10 - periodSeconds: 10 + initialDelaySeconds: 5 + timeoutSeconds: 5 successThreshold: 1 - timeoutSeconds: 1 + failureThreshold: 10 readinessProbe: failureThreshold: 3 httpGet: path: /healthz port: 10254 scheme: HTTP - periodSeconds: 10 + timeoutSeconds: 5 successThreshold: 1 - timeoutSeconds: 1 + failureThreshold: 10 diff --git a/roles/kubernetes-apps/metrics_server/templates/metrics-server-deployment.yaml.j2 b/roles/kubernetes-apps/metrics_server/templates/metrics-server-deployment.yaml.j2 index aa54bd373..69bb0f7ab 100644 --- a/roles/kubernetes-apps/metrics_server/templates/metrics-server-deployment.yaml.j2 +++ b/roles/kubernetes-apps/metrics_server/templates/metrics-server-deployment.yaml.j2 @@ -44,24 +44,22 @@ spec: name: https protocol: TCP livenessProbe: - failureThreshold: 3 httpGet: path: /healthz port: https scheme: HTTPS - initialDelaySeconds: 30 - periodSeconds: 30 successThreshold: 1 + initialDelaySeconds: 20 + failureThreshold: 3 timeoutSeconds: 10 readinessProbe: - failureThreshold: 3 httpGet: path: /healthz port: 443 scheme: HTTPS - initialDelaySeconds: 30 - periodSeconds: 30 successThreshold: 1 + initialDelaySeconds: 20 + failureThreshold: 3 timeoutSeconds: 10 securityContext: # Currently non root is not supported: diff --git a/roles/network_plugin/calico/templates/calico-node.yml.j2 b/roles/network_plugin/calico/templates/calico-node.yml.j2 index f851d3930..d8ea78a44 100644 --- a/roles/network_plugin/calico/templates/calico-node.yml.j2 +++ b/roles/network_plugin/calico/templates/calico-node.yml.j2 @@ -218,10 +218,10 @@ spec: host: 127.0.0.1 path: /liveness port: 9099 - periodSeconds: 10 - initialDelaySeconds: 10 + initialDelaySeconds: 5 failureThreshold: 6 readinessProbe: + failureThreshold: 6 {% if calico_version is version('v3.3.0', '<') %} httpGet: host: 127.0.0.1 @@ -234,7 +234,6 @@ spec: - -bird-ready - -felix-ready {% endif %} - periodSeconds: 10 volumeMounts: - mountPath: /lib/modules name: lib-modules diff --git a/roles/network_plugin/canal/templates/canal-node.yaml.j2 b/roles/network_plugin/canal/templates/canal-node.yaml.j2 index 7d9da1cf2..7b3cba83e 100644 --- a/roles/network_plugin/canal/templates/canal-node.yaml.j2 +++ b/roles/network_plugin/canal/templates/canal-node.yaml.j2 @@ -253,7 +253,7 @@ spec: path: /liveness port: 9099 periodSeconds: 10 - initialDelaySeconds: 10 + initialDelaySeconds: 5 failureThreshold: 6 readinessProbe: {% if calico_version is version('v3.3.0', '<')%} diff --git a/roles/network_plugin/cilium/templates/cilium-ds.yml.j2 b/roles/network_plugin/cilium/templates/cilium-ds.yml.j2 index 21b167724..073da965d 100755 --- a/roles/network_plugin/cilium/templates/cilium-ds.yml.j2 +++ b/roles/network_plugin/cilium/templates/cilium-ds.yml.j2 @@ -132,10 +132,7 @@ spec: command: - cilium - status - # The initial delay for the liveness probe is intentionally large to - # avoid an endless kill & restart cycle if in the event that the initial - # bootstrapping takes longer than expected. - initialDelaySeconds: 120 + initialDelaySeconds: 15 failureThreshold: 10 periodSeconds: 10 readinessProbe: diff --git a/roles/network_plugin/kube-router/templates/kube-router.yml.j2 b/roles/network_plugin/kube-router/templates/kube-router.yml.j2 index 2e50fd171..52fd47ae1 100644 --- a/roles/network_plugin/kube-router/templates/kube-router.yml.j2 +++ b/roles/network_plugin/kube-router/templates/kube-router.yml.j2 @@ -104,7 +104,7 @@ spec: httpGet: path: /healthz port: 20244 - initialDelaySeconds: 10 + initialDelaySeconds: 5 periodSeconds: 3 resources: requests: diff --git a/tests/files/gce_centos7-flannel-addons.yml b/tests/files/gce_centos7-flannel-addons.yml index d430d8436..cf9fc5d7a 100644 --- a/tests/files/gce_centos7-flannel-addons.yml +++ b/tests/files/gce_centos7-flannel-addons.yml @@ -22,6 +22,7 @@ kube_encrypt_secret_data: true cert_manager_enabled: true # Disabled temporarily metrics_server_enabled: false +metrics_server_kubelet_insecure_tls: true kube_token_auth: true kube_basic_auth: true enable_nodelocaldns: false diff --git a/tests/files/packet_centos7-flannel-addons.yml b/tests/files/packet_centos7-flannel-addons.yml index 451f414e8..2979e6b14 100644 --- a/tests/files/packet_centos7-flannel-addons.yml +++ b/tests/files/packet_centos7-flannel-addons.yml @@ -19,8 +19,7 @@ ingress_nginx_enabled: true cert_manager_enabled: true # Disabled temporarily metrics_server_enabled: false +metrics_server_kubelet_insecure_tls: true kube_token_auth: true kube_basic_auth: true enable_nodelocaldns: false - -vm_memory: 6144Mi