nodelocaldns: allow a secondary pod for nodelocaldns for local-HA (#8100)

* nodelocaldns: allow a secondary pod for nodelocaldns for local-HA

* CI: add job to test nodelocaldns secondary
This commit is contained in:
Cristian Calin 2021-11-09 19:57:47 +02:00 committed by GitHub
parent 801268d5c1
commit 039205560a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 281 additions and 17 deletions

View file

@ -194,6 +194,11 @@ packet_amazon-linux-2-aio:
extends: .packet_pr
when: manual
packet_centos8-calico-nodelocaldns-secondary:
stage: deploy-part2
extends: .packet_pr
when: manual
packet_fedora34-kube-ovn-containerd:
stage: deploy-part2
extends: .packet_periodic

View file

@ -212,6 +212,22 @@ nodelocaldns_external_zones:
See [dns_etchosts](#dns_etchosts-coredns) above.
### Nodelocal DNS HA
Under some circumstances the single POD nodelocaldns implementation may not be able to be replaced soon enough and a cluster upgrade or a nodelocaldns upgrade can cause DNS requests to time out for short intervals. If for any reason your applications cannot tollerate this behavior you can enable a redundant nodelocal DNS pod on each node:
```yaml
enable_nodelocaldns_secondary: true
```
**Note:** when the nodelocaldns secondary is enabled, the primary is instructed to no longer tear down the iptables rules it sets up to direct traffic to itself. In case both daemonsets have failing pods on the same node, this can cause a DNS blackout with traffic no longer being forwarded to the coredns central service as a fallback. Please ensure you account for this also if you decide to disable the nodelocaldns cache.
There is a time delta (in seconds) allowed for the secondary nodelocaldns to survive in case both primary and secondary daemonsets are updated at the same time. It is advised to tune this variable after you have performed some tests in your own environment.
```yaml
nodelocaldns_secondary_skew_seconds: 5
```
## Limitations
* Kubespray has yet ways to configure Kubedns addon to forward requests SkyDns can

View file

@ -166,9 +166,12 @@ dns_mode: coredns
# manual_dns_server: 10.x.x.x
# Enable nodelocal dns cache
enable_nodelocaldns: true
enable_nodelocaldns_secondary: false
nodelocaldns_ip: 169.254.25.10
nodelocaldns_health_port: 9254
nodelocaldns_second_health_port: 9256
nodelocaldns_bind_metrics_host_ip: false
nodelocaldns_secondary_skew_seconds: 5
# nodelocaldns_external_zones:
# - zones:
# - example.com

View file

@ -610,7 +610,7 @@ coredns_image_is_namespaced: "{{ (kube_version is version('v1.21.0','>=')) or (c
coredns_image_repo: "{{ kube_image_repo }}{{'/coredns/coredns' if (coredns_image_is_namespaced | bool) else '/coredns' }}"
coredns_image_tag: "{{ coredns_version if (coredns_image_is_namespaced | bool) else (coredns_version | regex_replace('^v', '')) }}"
nodelocaldns_version: "1.17.1"
nodelocaldns_version: "1.21.1"
nodelocaldns_image_repo: "{{ kube_image_repo }}/dns/k8s-dns-node-cache"
nodelocaldns_image_tag: "{{ nodelocaldns_version }}"

View file

@ -17,6 +17,8 @@ nodelocaldns_cpu_requests: 100m
nodelocaldns_memory_limit: 170Mi
nodelocaldns_memory_requests: 70Mi
nodelocaldns_ds_nodeselector: "kubernetes.io/os: linux"
nodelocaldns_prometheus_port: 9253
nodelocaldns_secondary_prometheus_port: 9255
# Limits for dns-autoscaler
dns_autoscaler_cpu_requests: 20m

View file

@ -48,6 +48,7 @@
- "{{ coredns_manifests.results | default({}) }}"
- "{{ coredns_secondary_manifests.results | default({}) }}"
- "{{ nodelocaldns_manifests.results | default({}) }}"
- "{{ nodelocaldns_second_manifests.results | default({}) }}"
when:
- dns_mode != 'none'
- inventory_hostname == groups['kube_control_plane'][0]

View file

@ -43,3 +43,31 @@
tags:
- nodelocaldns
- coredns
- name: Kubernetes Apps | Lay Down nodelocaldns-secondary Template
template:
src: "{{ item.file }}.j2"
dest: "{{ kube_config_dir }}/{{ item.file }}"
with_items:
- { name: nodelocaldns, file: nodelocaldns-second-daemonset.yml, type: daemonset }
register: nodelocaldns_second_manifests
vars:
forwardTarget: >-
{%- if secondaryclusterIP is defined and dns_mode == 'coredns_dual' -%}
{{ primaryClusterIP }} {{ secondaryclusterIP }}
{%- else -%}
{{ primaryClusterIP }}
{%- endif -%}
upstreamForwardTarget: >-
{%- if resolvconf_mode == 'host_resolvconf' and upstream_dns_servers is defined and upstream_dns_servers|length > 0 -%}
{{ upstream_dns_servers|join(' ') }}
{%- else -%}
/etc/resolv.conf
{%- endif -%}
when:
- enable_nodelocaldns
- enable_nodelocaldns_secondary
- inventory_hostname == groups['kube_control_plane'] | first
tags:
- nodelocaldns
- coredns

View file

@ -17,7 +17,7 @@ data:
loop
bind {{ nodelocaldns_ip }}
forward . {{ block['nameservers'] | join(' ') }}
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:9253
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_prometheus_port }}
log
{% if dns_etchosts | default(None) %}
hosts /etc/coredns/hosts {
@ -39,7 +39,7 @@ data:
forward . {{ forwardTarget }} {
force_tcp
}
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:9253
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_prometheus_port }}
health {{ nodelocaldns_ip }}:{{ nodelocaldns_health_port }}
{% if dns_etchosts | default(None) %}
hosts /etc/coredns/hosts {
@ -56,7 +56,7 @@ data:
forward . {{ forwardTarget }} {
force_tcp
}
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:9253
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_prometheus_port }}
}
ip6.arpa:53 {
errors
@ -67,7 +67,7 @@ data:
forward . {{ forwardTarget }} {
force_tcp
}
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:9253
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_prometheus_port }}
}
.:53 {
errors
@ -76,13 +76,91 @@ data:
loop
bind {{ nodelocaldns_ip }}
forward . {{ upstreamForwardTarget }}
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:9253
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_prometheus_port }}
{% if dns_etchosts | default(None) %}
hosts /etc/coredns/hosts {
fallthrough
}
{% endif %}
}
{% if enable_nodelocaldns_secondary %}
Corefile-second: |
{% if nodelocaldns_external_zones is defined and nodelocaldns_external_zones|length > 0 %}
{% for block in nodelocaldns_external_zones %}
{{ block['zones'] | join(' ') }} {
errors
cache {{ block['cache'] | default(30) }}
reload
loop
bind {{ nodelocaldns_ip }}
forward . {{ block['nameservers'] | join(' ') }}
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_secondary_prometheus_port }}
log
{% if dns_etchosts | default(None) %}
hosts /etc/coredns/hosts {
fallthrough
}
{% endif %}
}
{% endfor %}
{% endif %}
{{ dns_domain }}:53 {
errors
cache {
success 9984 30
denial 9984 5
}
reload
loop
bind {{ nodelocaldns_ip }}
forward . {{ forwardTarget }} {
force_tcp
}
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_secondary_prometheus_port }}
health {{ nodelocaldns_ip }}:{{ nodelocaldns_second_health_port }}
{% if dns_etchosts | default(None) %}
hosts /etc/coredns/hosts {
fallthrough
}
{% endif %}
}
in-addr.arpa:53 {
errors
cache 30
reload
loop
bind {{ nodelocaldns_ip }}
forward . {{ forwardTarget }} {
force_tcp
}
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_secondary_prometheus_port }}
}
ip6.arpa:53 {
errors
cache 30
reload
loop
bind {{ nodelocaldns_ip }}
forward . {{ forwardTarget }} {
force_tcp
}
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_secondary_prometheus_port }}
}
.:53 {
errors
cache 30
reload
loop
bind {{ nodelocaldns_ip }}
forward . {{ upstreamForwardTarget }}
prometheus {% if nodelocaldns_bind_metrics_host_ip %}{$MY_HOST_IP}{% endif %}:{{ nodelocaldns_secondary_prometheus_port }}
{% if dns_etchosts | default(None) %}
hosts /etc/coredns/hosts {
fallthrough
}
{% endif %}
}
{% endif %}
{% if dns_etchosts | default(None) %}
hosts: |
{{ dns_etchosts | indent(width=4, indentfirst=None) }}

View file

@ -16,7 +16,7 @@ spec:
k8s-app: nodelocaldns
annotations:
prometheus.io/scrape: 'true'
prometheus.io/port: '9253'
prometheus.io/port: '{{ nodelocaldns_prometheus_port }}'
spec:
nodeSelector:
{{ nodelocaldns_ds_nodeselector }}
@ -38,16 +38,16 @@ spec:
requests:
cpu: {{ nodelocaldns_cpu_requests }}
memory: {{ nodelocaldns_memory_requests }}
args: [ "-localip", "{{ nodelocaldns_ip }}", "-conf", "/etc/coredns/Corefile", "-upstreamsvc", "coredns" ]
securityContext:
privileged: true
{% if nodelocaldns_bind_metrics_host_ip %}
env:
- name: MY_HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
{% endif %}
args:
- -localip
- {{ nodelocaldns_ip }}
- -conf
- /etc/coredns/Corefile
- -upstreamsvc
- coredns
{% if enable_nodelocaldns_secondary %}
- -skipteardown
{% else %}
ports:
- containerPort: 53
name: dns
@ -58,6 +58,16 @@ spec:
- containerPort: 9253
name: metrics
protocol: TCP
{% endif %}
securityContext:
privileged: true
{% if nodelocaldns_bind_metrics_host_ip %}
env:
- name: MY_HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
{% endif %}
livenessProbe:
httpGet:
host: {{ nodelocaldns_ip }}

View file

@ -0,0 +1,103 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nodelocaldns-second
namespace: kube-system
labels:
k8s-app: kube-dns
addonmanager.kubernetes.io/mode: Reconcile
spec:
selector:
matchLabels:
k8s-app: nodelocaldns-second
template:
metadata:
labels:
k8s-app: nodelocaldns-second
annotations:
prometheus.io/scrape: 'true'
prometheus.io/port: '{{ nodelocaldns_secondary_prometheus_port }}'
spec:
nodeSelector:
{{ nodelocaldns_ds_nodeselector }}
priorityClassName: system-cluster-critical
serviceAccountName: nodelocaldns
hostNetwork: true
dnsPolicy: Default # Don't use cluster DNS.
tolerations:
- effect: NoSchedule
operator: "Exists"
- effect: NoExecute
operator: "Exists"
containers:
- name: node-cache
image: "{{ nodelocaldns_image_repo }}:{{ nodelocaldns_image_tag }}"
resources:
limits:
memory: {{ nodelocaldns_memory_limit }}
requests:
cpu: {{ nodelocaldns_cpu_requests }}
memory: {{ nodelocaldns_memory_requests }}
args: [ "-localip", "{{ nodelocaldns_ip }}", "-conf", "/etc/coredns/Corefile", "-upstreamsvc", "coredns", "-skipteardown" ]
securityContext:
privileged: true
{% if nodelocaldns_bind_metrics_host_ip %}
env:
- name: MY_HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
{% endif %}
livenessProbe:
httpGet:
host: {{ nodelocaldns_ip }}
path: /health
port: {{ nodelocaldns_health_port }}
scheme: HTTP
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 10
readinessProbe:
httpGet:
host: {{ nodelocaldns_ip }}
path: /health
port: {{ nodelocaldns_health_port }}
scheme: HTTP
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 10
volumeMounts:
- name: config-volume
mountPath: /etc/coredns
- name: xtables-lock
mountPath: /run/xtables.lock
lifecycle:
preStop:
exec:
command:
- sh
- -c
- sleep {{ nodelocaldns_secondary_skew_seconds }} && kill -9 1
volumes:
- name: config-volume
configMap:
name: nodelocaldns
items:
- key: Corefile-second
path: Corefile
{% if dns_etchosts | default(None) %}
- key: hosts
path: hosts
{% endif %}
- name: xtables-lock
hostPath:
path: /run/xtables.lock
type: FileOrCreate
# Implement a time skew between the main nodelocaldns and this secondary.
# Since the two nodelocaldns instances share the :53 port, we want to keep
# at least one running at any time enven if the manifests are replaced simultaneously
terminationGracePeriodSeconds: {{ nodelocaldns_secondary_skew_seconds }}
updateStrategy:
rollingUpdate:
maxUnavailable: {{ serial | default('20%') }}
type: RollingUpdate

View file

@ -93,9 +93,12 @@ dns_mode: coredns
# Enable nodelocal dns cache
enable_nodelocaldns: true
enable_nodelocaldns_secondary: false
nodelocaldns_ip: 169.254.25.10
nodelocaldns_health_port: 9254
nodelocaldns_second_health_port: 9256
nodelocaldns_bind_metrics_host_ip: false
nodelocaldns_secondary_skew_seconds: 5
# Should be set to a cluster IP if using a custom cluster DNS
manual_dns_server: ""

View file

@ -0,0 +1,15 @@
---
# Instance settings
cloud_image: centos-8
mode: default
vm_memory: 3072Mi
# Kubespray settings
kube_network_plugin: calico
deploy_netchecker: true
dns_min_replicas: 1
enable_nodelocaldns_secondary: true
loadbalancer_apiserver_type: haproxy
# required
calico_iptables_backend: "Auto"