Add advanced net check for DNS K8s app
* Add an option to deploy K8s app to test e2e network connectivity and cluster DNS resolve via Kubedns for nethost/simple pods (defaults to false). * Parametrize existing k8s apps templates with kube_namespace and kube_config_dir instead of hardcode. * For CoreOS, ensure nameservers from inventory to be put in the first place to allow hostnet pods connectivity via short names or FQDN and hostnet agents to pass as well, if netchecker deployed. Signed-off-by: Bogdan Dobrelya <bdobrelia@mirantis.com>
This commit is contained in:
parent
5bc9b9e349
commit
d5b21b34c2
16 changed files with 220 additions and 11 deletions
41
docs/netcheck.md
Normal file
41
docs/netcheck.md
Normal file
|
@ -0,0 +1,41 @@
|
|||
Network Checker Application
|
||||
===========================
|
||||
|
||||
With the ``deploy_netchecker`` var enabled (defaults to false), Kargo deploys a
|
||||
Network Checker Application from the 3rd side `l23network/mcp-netchecker` docker
|
||||
images. It consists of the server and agents trying to reach the server by usual
|
||||
for Kubernetes applications network connectivity meanings. Therefore, this
|
||||
automagically verifies a pod to pod connectivity via the cluster IP and checks
|
||||
if DNS resolve is functioning as well.
|
||||
|
||||
The checks are run by agents on a periodic basis and cover standard and host network
|
||||
pods as well. The history of performed checks may be found in the agents' application
|
||||
logs.
|
||||
|
||||
To get the most recent and cluster-wide network connectivity report, run from
|
||||
any of the cluster nodes:
|
||||
```
|
||||
curl http://localhost:31081/api/v1/connectivity_check
|
||||
```
|
||||
Note that Kargo does not invoke the check but only deploys the application, if
|
||||
requested.
|
||||
|
||||
There are related application specifc variables:
|
||||
```
|
||||
netchecker_port: 31081
|
||||
agent_report_interval: 15
|
||||
netcheck_namespace: default
|
||||
agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1"
|
||||
server_img: "quay.io/l23network/mcp-netchecker-server:v0.1"
|
||||
```
|
||||
|
||||
Note that the application verifies DNS resolve for FQDNs comprising only the
|
||||
combination of the ``netcheck_namespace.dns_domain`` vars, for example the
|
||||
``netchecker-service.default.cluster.local``. If you want to deploy the application
|
||||
to the non default namespace, make sure as well to adjust the ``searchdomains`` var
|
||||
so the resulting search domain records to contain that namespace, like:
|
||||
|
||||
```
|
||||
search: foospace.cluster.local default.cluster.local ...
|
||||
nameserver: ...
|
||||
```
|
|
@ -35,6 +35,8 @@ kube_users:
|
|||
cluster_name: cluster.local
|
||||
# Subdomains of DNS domain to be resolved via /etc/resolv.conf
|
||||
ndots: 5
|
||||
# Deploy netchecker app to verify DNS resolve as an HTTP service
|
||||
deploy_netchecker: false
|
||||
|
||||
# For some environments, each node has a pubilcally accessible
|
||||
# address and an address it should bind services to. These are
|
||||
|
|
|
@ -48,7 +48,20 @@
|
|||
when: resolvconf.rc == 0
|
||||
notify: Dnsmasq | update resolvconf
|
||||
|
||||
- name: Add search domains to resolv.conf
|
||||
- name: Remove search and nameserver options from resolvconf cloud init temporary file
|
||||
lineinfile:
|
||||
dest: "{{resolvconffile}}"
|
||||
state: absent
|
||||
regexp: "^{{ item }}.*$"
|
||||
backup: yes
|
||||
follow: yes
|
||||
with_items:
|
||||
- search
|
||||
- nameserver
|
||||
when: ansible_os_family == "CoreOS"
|
||||
notify: Dnsmasq | update resolvconf for CoreOS
|
||||
|
||||
- name: Add search domains to resolvconf file
|
||||
lineinfile:
|
||||
line: "search {{searchentries}}"
|
||||
dest: "{{resolvconffile}}"
|
||||
|
@ -66,7 +79,7 @@
|
|||
nameserver {{ item }}
|
||||
{% endfor %}
|
||||
state: present
|
||||
insertafter: "^search.*$"
|
||||
insertafter: "^search default.svc.*$"
|
||||
create: yes
|
||||
backup: yes
|
||||
follow: yes
|
||||
|
|
|
@ -58,6 +58,12 @@ hyperkube_image_repo: "quay.io/coreos/hyperkube"
|
|||
hyperkube_image_tag: "{{ kube_version }}_coreos.0"
|
||||
pod_infra_image_repo: "gcr.io/google_containers/pause-amd64"
|
||||
pod_infra_image_tag: "{{ pod_infra_version }}"
|
||||
netcheck_tag: v0.1
|
||||
netcheck_kubectl_tag: v0.18.0-120-gaeb4ac55ad12b1-dirty
|
||||
netcheck_agent_img_repo: "quay.io/l23network/mcp-netchecker-agent"
|
||||
netcheck_server_img_repo: "quay.io/l23network/mcp-netchecker-server"
|
||||
netcheck_kubectl_img_repo: "gcr.io/google_containers/kubectl"
|
||||
|
||||
nginx_image_repo: nginx
|
||||
nginx_image_tag: 1.11.4-alpine
|
||||
dnsmasq_version: 2.72
|
||||
|
@ -73,6 +79,21 @@ test_image_repo: busybox
|
|||
test_image_tag: latest
|
||||
|
||||
downloads:
|
||||
netcheck_server:
|
||||
container: true
|
||||
repo: "{{ netcheck_server_img_repo }}"
|
||||
tag: "{{ netcheck_tag }}"
|
||||
enabled: "{{ deploy_netchecker|bool }}"
|
||||
netcheck_agent:
|
||||
container: true
|
||||
repo: "{{ netcheck_agent_img_repo }}"
|
||||
tag: "{{ netcheck_tag }}"
|
||||
enabled: "{{ deploy_netchecker|bool }}"
|
||||
netcheck_kubectl:
|
||||
container: true
|
||||
repo: "{{ netcheck_kubectl_img_repo }}"
|
||||
tag: "{{ netcheck_kubectl_tag }}"
|
||||
enabled: "{{ deploy_netchecker|bool }}"
|
||||
weave:
|
||||
dest: weave/bin/weave
|
||||
version: "{{weave_version}}"
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
kube_config_dir: /etc/kubernetes
|
||||
kube_namespace: kube-system
|
||||
|
||||
# Versions
|
||||
kubedns_version: 1.7
|
||||
kubednsmasq_version: 1.3
|
||||
|
@ -13,5 +16,14 @@ exechealthz_image_tag: "{{ exechealthz_version }}"
|
|||
calico_policy_image_repo: "calico/kube-policy-controller"
|
||||
calico_policy_image_tag: latest
|
||||
|
||||
# Netchecker
|
||||
deploy_netchecker: false
|
||||
netchecker_port: 31081
|
||||
agent_report_interval: 15
|
||||
netcheck_namespace: default
|
||||
agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1"
|
||||
server_img: "quay.io/l23network/mcp-netchecker-server:v0.1"
|
||||
kubectl_image: "gcr.io/google_containers/kubectl:v0.18.0-120-gaeb4ac55ad12b1-dirty"
|
||||
|
||||
# SSL
|
||||
etcd_cert_dir: "/etc/ssl/etcd/ssl"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
- name: Write calico-policy-controller yaml
|
||||
template: src=calico-policy-controller.yml.j2 dest=/etc/kubernetes/calico-policy-controller.yml
|
||||
template: src=calico-policy-controller.yml.j2 dest={{kube_config_dir}}/calico-policy-controller.yml
|
||||
when: inventory_hostname == groups['kube-master'][0]
|
||||
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
|||
kube:
|
||||
name: "calico-policy-controller"
|
||||
kubectl: "{{bin_dir}}/kubectl"
|
||||
filename: "/etc/kubernetes/calico-policy-controller.yml"
|
||||
namespace: "kube-system"
|
||||
filename: "{{kube_config_dir}}/calico-policy-controller.yml"
|
||||
namespace: "{{kube_namespace}}"
|
||||
resource: "rs"
|
||||
when: inventory_hostname == groups['kube-master'][0]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
- name: Kubernetes Apps | Lay Down KubeDNS Template
|
||||
template: src={{item.file}} dest=/etc/kubernetes/{{item.file}}
|
||||
template: src={{item.file}} dest={{kube_config_dir}}/{{item.file}}
|
||||
with_items:
|
||||
- {file: kubedns-rc.yml, type: rc}
|
||||
- {file: kubedns-svc.yml, type: svc}
|
||||
|
@ -10,10 +10,10 @@
|
|||
- name: Kubernetes Apps | Start Resources
|
||||
kube:
|
||||
name: kubedns
|
||||
namespace: kube-system
|
||||
namespace: "{{ kube_namespace }}"
|
||||
kubectl: "{{bin_dir}}/kubectl"
|
||||
resource: "{{item.item.type}}"
|
||||
filename: /etc/kubernetes/{{item.item.file}}
|
||||
filename: "{{kube_config_dir}}/{{item.item.file}}"
|
||||
state: "{{item.changed | ternary('latest','present') }}"
|
||||
with_items: "{{ manifests.results }}"
|
||||
when: inventory_hostname == groups['kube-master'][0]
|
||||
|
@ -21,3 +21,7 @@
|
|||
- include: tasks/calico-policy-controller.yml
|
||||
when: ( enable_network_policy is defined and enable_network_policy == True ) or
|
||||
( kube_network_plugin == 'canal' )
|
||||
|
||||
- name: Kubernetes Apps | Netchecker
|
||||
include: tasks/netchecker.yml
|
||||
when: deploy_netchecker
|
||||
|
|
20
roles/kubernetes-apps/ansible/tasks/netchecker.yml
Normal file
20
roles/kubernetes-apps/ansible/tasks/netchecker.yml
Normal file
|
@ -0,0 +1,20 @@
|
|||
- name: Kubernetes Apps | Lay Down Netchecker Template
|
||||
template: src={{item.file}} dest={{kube_config_dir}}/{{item.file}}
|
||||
with_items:
|
||||
- {file: netchecker-agent-ds.yml, type: ds, name: netchecker-agent}
|
||||
- {file: netchecker-agent-hostnet-ds.yml, type: ds, name: netchecker-agent-hostnet}
|
||||
- {file: netchecker-server-pod.yml, type: po, name: netchecker-server}
|
||||
- {file: netchecker-server-svc.yml, type: svc, name: netchecker-service}
|
||||
register: manifests
|
||||
when: inventory_hostname == groups['kube-master'][0]
|
||||
|
||||
- name: Kubernetes Apps | Start Netchecker Resources
|
||||
kube:
|
||||
name: "{{item.item.name}}"
|
||||
namespace: "{{netcheck_namespace}}"
|
||||
kubectl: "{{bin_dir}}/kubectl"
|
||||
resource: "{{item.item.type}}"
|
||||
filename: "{{kube_config_dir}}/{{item.item.file}}"
|
||||
state: "{{item.changed | ternary('latest','present') }}"
|
||||
with_items: "{{ manifests.results }}"
|
||||
when: inventory_hostname == groups['kube-master'][0]
|
|
@ -2,7 +2,7 @@ apiVersion: extensions/v1beta1
|
|||
kind: ReplicaSet
|
||||
metadata:
|
||||
name: calico-policy-controller
|
||||
namespace: kube-system
|
||||
namespace: {{ kube_namespace }}
|
||||
labels:
|
||||
k8s-app: calico-policy
|
||||
kubernetes.io/cluster-service: "true"
|
||||
|
|
|
@ -2,7 +2,7 @@ apiVersion: v1
|
|||
kind: ReplicationController
|
||||
metadata:
|
||||
name: kubedns
|
||||
namespace: kube-system
|
||||
namespace: {{ kube_namespace }}
|
||||
labels:
|
||||
k8s-app: kubedns
|
||||
version: v19
|
||||
|
|
|
@ -2,7 +2,7 @@ apiVersion: v1
|
|||
kind: Service
|
||||
metadata:
|
||||
name: kubedns
|
||||
namespace: kube-system
|
||||
namespace: {{ kube_namespace }}
|
||||
labels:
|
||||
k8s-app: kubedns
|
||||
kubernetes.io/cluster-service: "true"
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
apiVersion: extensions/v1beta1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
labels:
|
||||
app: netchecker-agent
|
||||
name: netchecker-agent
|
||||
namespace: {{ netcheck_namespace }}
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
name: netchecker-agent
|
||||
labels:
|
||||
app: netchecker-agent
|
||||
spec:
|
||||
containers:
|
||||
- name: netchecker-agent
|
||||
image: "{{ agent_img }}"
|
||||
env:
|
||||
- name: MY_POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: REPORT_INTERVAL
|
||||
value: '{{ agent_report_interval }}'
|
||||
imagePullPolicy: {{ k8s_image_pull_policy }}
|
|
@ -0,0 +1,26 @@
|
|||
apiVersion: extensions/v1beta1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
labels:
|
||||
app: netchecker-agent-hostnet
|
||||
name: netchecker-agent-hostnet
|
||||
namespace: {{ netcheck_namespace }}
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
name: netchecker-agent-hostnet
|
||||
labels:
|
||||
app: netchecker-agent-hostnet
|
||||
spec:
|
||||
hostNetwork: True
|
||||
containers:
|
||||
- name: netchecker-agent
|
||||
image: "{{ agent_img }}"
|
||||
env:
|
||||
- name: MY_POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: REPORT_INTERVAL
|
||||
value: '{{ agent_report_interval }}'
|
||||
imagePullPolicy: {{ k8s_image_pull_policy }}
|
|
@ -0,0 +1,21 @@
|
|||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: netchecker-server
|
||||
labels:
|
||||
app: netchecker-server
|
||||
namespace: {{ netcheck_namespace }}
|
||||
spec:
|
||||
containers:
|
||||
- name: netchecker-server
|
||||
image: "{{ server_img }}"
|
||||
env:
|
||||
imagePullPolicy: {{ k8s_image_pull_policy }}
|
||||
ports:
|
||||
- containerPort: 8081
|
||||
hostPort: 8081
|
||||
- name: kubectl-proxy
|
||||
image: "{{ kubectl_image }}"
|
||||
imagePullPolicy: {{ k8s_image_pull_policy }}
|
||||
args:
|
||||
- proxy
|
|
@ -0,0 +1,15 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: netchecker-service
|
||||
namespace: {{ netcheck_namespace }}
|
||||
spec:
|
||||
selector:
|
||||
app: netchecker-server
|
||||
ports:
|
||||
-
|
||||
protocol: TCP
|
||||
port: 8081
|
||||
targetPort: 8081
|
||||
nodePort: {{ netchecker_port }}
|
||||
type: NodePort
|
|
@ -9,6 +9,15 @@ dependencies:
|
|||
file: "{{ downloads.nginx }}"
|
||||
- role: download
|
||||
file: "{{ downloads.testbox }}"
|
||||
- role: download
|
||||
file: "{{ downloads.netcheck_server }}"
|
||||
when: deploy_netchecker
|
||||
- role: download
|
||||
file: "{{ downloads.netcheck_agent }}"
|
||||
when: deploy_netchecker
|
||||
- role: download
|
||||
file: "{{ downloads.netcheck_kubectl }}"
|
||||
when: deploy_netchecker
|
||||
- role: download
|
||||
file: "{{ downloads.kubednsmasq }}"
|
||||
when: not skip_dnsmasq_k8s|default(false)
|
||||
|
|
Loading…
Reference in a new issue