Merge pull request #529 from bogdando/netcheck
Add a k8s app for advanced e2e netcheck for DNS
This commit is contained in:
commit
f75e2c5119
16 changed files with 220 additions and 11 deletions
41
docs/netcheck.md
Normal file
41
docs/netcheck.md
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
Network Checker Application
|
||||||
|
===========================
|
||||||
|
|
||||||
|
With the ``deploy_netchecker`` var enabled (defaults to false), Kargo deploys a
|
||||||
|
Network Checker Application from the 3rd side `l23network/mcp-netchecker` docker
|
||||||
|
images. It consists of the server and agents trying to reach the server by usual
|
||||||
|
for Kubernetes applications network connectivity meanings. Therefore, this
|
||||||
|
automagically verifies a pod to pod connectivity via the cluster IP and checks
|
||||||
|
if DNS resolve is functioning as well.
|
||||||
|
|
||||||
|
The checks are run by agents on a periodic basis and cover standard and host network
|
||||||
|
pods as well. The history of performed checks may be found in the agents' application
|
||||||
|
logs.
|
||||||
|
|
||||||
|
To get the most recent and cluster-wide network connectivity report, run from
|
||||||
|
any of the cluster nodes:
|
||||||
|
```
|
||||||
|
curl http://localhost:31081/api/v1/connectivity_check
|
||||||
|
```
|
||||||
|
Note that Kargo does not invoke the check but only deploys the application, if
|
||||||
|
requested.
|
||||||
|
|
||||||
|
There are related application specifc variables:
|
||||||
|
```
|
||||||
|
netchecker_port: 31081
|
||||||
|
agent_report_interval: 15
|
||||||
|
netcheck_namespace: default
|
||||||
|
agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1"
|
||||||
|
server_img: "quay.io/l23network/mcp-netchecker-server:v0.1"
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that the application verifies DNS resolve for FQDNs comprising only the
|
||||||
|
combination of the ``netcheck_namespace.dns_domain`` vars, for example the
|
||||||
|
``netchecker-service.default.cluster.local``. If you want to deploy the application
|
||||||
|
to the non default namespace, make sure as well to adjust the ``searchdomains`` var
|
||||||
|
so the resulting search domain records to contain that namespace, like:
|
||||||
|
|
||||||
|
```
|
||||||
|
search: foospace.cluster.local default.cluster.local ...
|
||||||
|
nameserver: ...
|
||||||
|
```
|
|
@ -35,6 +35,8 @@ kube_users:
|
||||||
cluster_name: cluster.local
|
cluster_name: cluster.local
|
||||||
# Subdomains of DNS domain to be resolved via /etc/resolv.conf
|
# Subdomains of DNS domain to be resolved via /etc/resolv.conf
|
||||||
ndots: 5
|
ndots: 5
|
||||||
|
# Deploy netchecker app to verify DNS resolve as an HTTP service
|
||||||
|
deploy_netchecker: false
|
||||||
|
|
||||||
# For some environments, each node has a pubilcally accessible
|
# For some environments, each node has a pubilcally accessible
|
||||||
# address and an address it should bind services to. These are
|
# address and an address it should bind services to. These are
|
||||||
|
|
|
@ -48,7 +48,20 @@
|
||||||
when: resolvconf.rc == 0
|
when: resolvconf.rc == 0
|
||||||
notify: Dnsmasq | update resolvconf
|
notify: Dnsmasq | update resolvconf
|
||||||
|
|
||||||
- name: Add search domains to resolv.conf
|
- name: Remove search and nameserver options from resolvconf cloud init temporary file
|
||||||
|
lineinfile:
|
||||||
|
dest: "{{resolvconffile}}"
|
||||||
|
state: absent
|
||||||
|
regexp: "^{{ item }}.*$"
|
||||||
|
backup: yes
|
||||||
|
follow: yes
|
||||||
|
with_items:
|
||||||
|
- search
|
||||||
|
- nameserver
|
||||||
|
when: ansible_os_family == "CoreOS"
|
||||||
|
notify: Dnsmasq | update resolvconf for CoreOS
|
||||||
|
|
||||||
|
- name: Add search domains to resolvconf file
|
||||||
lineinfile:
|
lineinfile:
|
||||||
line: "search {{searchentries}}"
|
line: "search {{searchentries}}"
|
||||||
dest: "{{resolvconffile}}"
|
dest: "{{resolvconffile}}"
|
||||||
|
@ -66,7 +79,7 @@
|
||||||
nameserver {{ item }}
|
nameserver {{ item }}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
state: present
|
state: present
|
||||||
insertafter: "^search.*$"
|
insertafter: "^search default.svc.*$"
|
||||||
create: yes
|
create: yes
|
||||||
backup: yes
|
backup: yes
|
||||||
follow: yes
|
follow: yes
|
||||||
|
|
|
@ -58,6 +58,12 @@ hyperkube_image_repo: "quay.io/coreos/hyperkube"
|
||||||
hyperkube_image_tag: "{{ kube_version }}_coreos.0"
|
hyperkube_image_tag: "{{ kube_version }}_coreos.0"
|
||||||
pod_infra_image_repo: "gcr.io/google_containers/pause-amd64"
|
pod_infra_image_repo: "gcr.io/google_containers/pause-amd64"
|
||||||
pod_infra_image_tag: "{{ pod_infra_version }}"
|
pod_infra_image_tag: "{{ pod_infra_version }}"
|
||||||
|
netcheck_tag: v0.1
|
||||||
|
netcheck_kubectl_tag: v0.18.0-120-gaeb4ac55ad12b1-dirty
|
||||||
|
netcheck_agent_img_repo: "quay.io/l23network/mcp-netchecker-agent"
|
||||||
|
netcheck_server_img_repo: "quay.io/l23network/mcp-netchecker-server"
|
||||||
|
netcheck_kubectl_img_repo: "gcr.io/google_containers/kubectl"
|
||||||
|
|
||||||
nginx_image_repo: nginx
|
nginx_image_repo: nginx
|
||||||
nginx_image_tag: 1.11.4-alpine
|
nginx_image_tag: 1.11.4-alpine
|
||||||
dnsmasq_version: 2.72
|
dnsmasq_version: 2.72
|
||||||
|
@ -73,6 +79,21 @@ test_image_repo: busybox
|
||||||
test_image_tag: latest
|
test_image_tag: latest
|
||||||
|
|
||||||
downloads:
|
downloads:
|
||||||
|
netcheck_server:
|
||||||
|
container: true
|
||||||
|
repo: "{{ netcheck_server_img_repo }}"
|
||||||
|
tag: "{{ netcheck_tag }}"
|
||||||
|
enabled: "{{ deploy_netchecker|bool }}"
|
||||||
|
netcheck_agent:
|
||||||
|
container: true
|
||||||
|
repo: "{{ netcheck_agent_img_repo }}"
|
||||||
|
tag: "{{ netcheck_tag }}"
|
||||||
|
enabled: "{{ deploy_netchecker|bool }}"
|
||||||
|
netcheck_kubectl:
|
||||||
|
container: true
|
||||||
|
repo: "{{ netcheck_kubectl_img_repo }}"
|
||||||
|
tag: "{{ netcheck_kubectl_tag }}"
|
||||||
|
enabled: "{{ deploy_netchecker|bool }}"
|
||||||
weave:
|
weave:
|
||||||
dest: weave/bin/weave
|
dest: weave/bin/weave
|
||||||
version: "{{weave_version}}"
|
version: "{{weave_version}}"
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
kube_config_dir: /etc/kubernetes
|
||||||
|
kube_namespace: kube-system
|
||||||
|
|
||||||
# Versions
|
# Versions
|
||||||
kubedns_version: 1.7
|
kubedns_version: 1.7
|
||||||
kubednsmasq_version: 1.3
|
kubednsmasq_version: 1.3
|
||||||
|
@ -20,5 +23,14 @@ exechealthz_image_tag: "{{ exechealthz_version }}"
|
||||||
calico_policy_image_repo: "calico/kube-policy-controller"
|
calico_policy_image_repo: "calico/kube-policy-controller"
|
||||||
calico_policy_image_tag: latest
|
calico_policy_image_tag: latest
|
||||||
|
|
||||||
|
# Netchecker
|
||||||
|
deploy_netchecker: false
|
||||||
|
netchecker_port: 31081
|
||||||
|
agent_report_interval: 15
|
||||||
|
netcheck_namespace: default
|
||||||
|
agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1"
|
||||||
|
server_img: "quay.io/l23network/mcp-netchecker-server:v0.1"
|
||||||
|
kubectl_image: "gcr.io/google_containers/kubectl:v0.18.0-120-gaeb4ac55ad12b1-dirty"
|
||||||
|
|
||||||
# SSL
|
# SSL
|
||||||
etcd_cert_dir: "/etc/ssl/etcd/ssl"
|
etcd_cert_dir: "/etc/ssl/etcd/ssl"
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
- name: Write calico-policy-controller yaml
|
- name: Write calico-policy-controller yaml
|
||||||
template: src=calico-policy-controller.yml.j2 dest=/etc/kubernetes/calico-policy-controller.yml
|
template: src=calico-policy-controller.yml.j2 dest={{kube_config_dir}}/calico-policy-controller.yml
|
||||||
when: inventory_hostname == groups['kube-master'][0]
|
when: inventory_hostname == groups['kube-master'][0]
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
||||||
kube:
|
kube:
|
||||||
name: "calico-policy-controller"
|
name: "calico-policy-controller"
|
||||||
kubectl: "{{bin_dir}}/kubectl"
|
kubectl: "{{bin_dir}}/kubectl"
|
||||||
filename: "/etc/kubernetes/calico-policy-controller.yml"
|
filename: "{{kube_config_dir}}/calico-policy-controller.yml"
|
||||||
namespace: "kube-system"
|
namespace: "{{kube_namespace}}"
|
||||||
resource: "rs"
|
resource: "rs"
|
||||||
when: inventory_hostname == groups['kube-master'][0]
|
when: inventory_hostname == groups['kube-master'][0]
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
---
|
---
|
||||||
- name: Kubernetes Apps | Lay Down KubeDNS Template
|
- name: Kubernetes Apps | Lay Down KubeDNS Template
|
||||||
template: src={{item.file}} dest=/etc/kubernetes/{{item.file}}
|
template: src={{item.file}} dest={{kube_config_dir}}/{{item.file}}
|
||||||
with_items:
|
with_items:
|
||||||
- {file: kubedns-rc.yml, type: rc}
|
- {file: kubedns-rc.yml, type: rc}
|
||||||
- {file: kubedns-svc.yml, type: svc}
|
- {file: kubedns-svc.yml, type: svc}
|
||||||
|
@ -10,10 +10,10 @@
|
||||||
- name: Kubernetes Apps | Start Resources
|
- name: Kubernetes Apps | Start Resources
|
||||||
kube:
|
kube:
|
||||||
name: kubedns
|
name: kubedns
|
||||||
namespace: kube-system
|
namespace: "{{ kube_namespace }}"
|
||||||
kubectl: "{{bin_dir}}/kubectl"
|
kubectl: "{{bin_dir}}/kubectl"
|
||||||
resource: "{{item.item.type}}"
|
resource: "{{item.item.type}}"
|
||||||
filename: /etc/kubernetes/{{item.item.file}}
|
filename: "{{kube_config_dir}}/{{item.item.file}}"
|
||||||
state: "{{item.changed | ternary('latest','present') }}"
|
state: "{{item.changed | ternary('latest','present') }}"
|
||||||
with_items: "{{ manifests.results }}"
|
with_items: "{{ manifests.results }}"
|
||||||
when: inventory_hostname == groups['kube-master'][0]
|
when: inventory_hostname == groups['kube-master'][0]
|
||||||
|
@ -21,3 +21,7 @@
|
||||||
- include: tasks/calico-policy-controller.yml
|
- include: tasks/calico-policy-controller.yml
|
||||||
when: ( enable_network_policy is defined and enable_network_policy == True ) or
|
when: ( enable_network_policy is defined and enable_network_policy == True ) or
|
||||||
( kube_network_plugin == 'canal' )
|
( kube_network_plugin == 'canal' )
|
||||||
|
|
||||||
|
- name: Kubernetes Apps | Netchecker
|
||||||
|
include: tasks/netchecker.yml
|
||||||
|
when: deploy_netchecker
|
||||||
|
|
20
roles/kubernetes-apps/ansible/tasks/netchecker.yml
Normal file
20
roles/kubernetes-apps/ansible/tasks/netchecker.yml
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
- name: Kubernetes Apps | Lay Down Netchecker Template
|
||||||
|
template: src={{item.file}} dest={{kube_config_dir}}/{{item.file}}
|
||||||
|
with_items:
|
||||||
|
- {file: netchecker-agent-ds.yml, type: ds, name: netchecker-agent}
|
||||||
|
- {file: netchecker-agent-hostnet-ds.yml, type: ds, name: netchecker-agent-hostnet}
|
||||||
|
- {file: netchecker-server-pod.yml, type: po, name: netchecker-server}
|
||||||
|
- {file: netchecker-server-svc.yml, type: svc, name: netchecker-service}
|
||||||
|
register: manifests
|
||||||
|
when: inventory_hostname == groups['kube-master'][0]
|
||||||
|
|
||||||
|
- name: Kubernetes Apps | Start Netchecker Resources
|
||||||
|
kube:
|
||||||
|
name: "{{item.item.name}}"
|
||||||
|
namespace: "{{netcheck_namespace}}"
|
||||||
|
kubectl: "{{bin_dir}}/kubectl"
|
||||||
|
resource: "{{item.item.type}}"
|
||||||
|
filename: "{{kube_config_dir}}/{{item.item.file}}"
|
||||||
|
state: "{{item.changed | ternary('latest','present') }}"
|
||||||
|
with_items: "{{ manifests.results }}"
|
||||||
|
when: inventory_hostname == groups['kube-master'][0]
|
|
@ -2,7 +2,7 @@ apiVersion: extensions/v1beta1
|
||||||
kind: ReplicaSet
|
kind: ReplicaSet
|
||||||
metadata:
|
metadata:
|
||||||
name: calico-policy-controller
|
name: calico-policy-controller
|
||||||
namespace: kube-system
|
namespace: {{ kube_namespace }}
|
||||||
labels:
|
labels:
|
||||||
k8s-app: calico-policy
|
k8s-app: calico-policy
|
||||||
kubernetes.io/cluster-service: "true"
|
kubernetes.io/cluster-service: "true"
|
||||||
|
|
|
@ -2,7 +2,7 @@ apiVersion: v1
|
||||||
kind: ReplicationController
|
kind: ReplicationController
|
||||||
metadata:
|
metadata:
|
||||||
name: kubedns
|
name: kubedns
|
||||||
namespace: kube-system
|
namespace: {{ kube_namespace }}
|
||||||
labels:
|
labels:
|
||||||
k8s-app: kubedns
|
k8s-app: kubedns
|
||||||
version: v19
|
version: v19
|
||||||
|
|
|
@ -2,7 +2,7 @@ apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: kubedns
|
name: kubedns
|
||||||
namespace: kube-system
|
namespace: {{ kube_namespace }}
|
||||||
labels:
|
labels:
|
||||||
k8s-app: kubedns
|
k8s-app: kubedns
|
||||||
kubernetes.io/cluster-service: "true"
|
kubernetes.io/cluster-service: "true"
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
apiVersion: extensions/v1beta1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: netchecker-agent
|
||||||
|
name: netchecker-agent
|
||||||
|
namespace: {{ netcheck_namespace }}
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
name: netchecker-agent
|
||||||
|
labels:
|
||||||
|
app: netchecker-agent
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: netchecker-agent
|
||||||
|
image: "{{ agent_img }}"
|
||||||
|
env:
|
||||||
|
- name: MY_POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: REPORT_INTERVAL
|
||||||
|
value: '{{ agent_report_interval }}'
|
||||||
|
imagePullPolicy: {{ k8s_image_pull_policy }}
|
|
@ -0,0 +1,26 @@
|
||||||
|
apiVersion: extensions/v1beta1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: netchecker-agent-hostnet
|
||||||
|
name: netchecker-agent-hostnet
|
||||||
|
namespace: {{ netcheck_namespace }}
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
name: netchecker-agent-hostnet
|
||||||
|
labels:
|
||||||
|
app: netchecker-agent-hostnet
|
||||||
|
spec:
|
||||||
|
hostNetwork: True
|
||||||
|
containers:
|
||||||
|
- name: netchecker-agent
|
||||||
|
image: "{{ agent_img }}"
|
||||||
|
env:
|
||||||
|
- name: MY_POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: REPORT_INTERVAL
|
||||||
|
value: '{{ agent_report_interval }}'
|
||||||
|
imagePullPolicy: {{ k8s_image_pull_policy }}
|
|
@ -0,0 +1,21 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: netchecker-server
|
||||||
|
labels:
|
||||||
|
app: netchecker-server
|
||||||
|
namespace: {{ netcheck_namespace }}
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: netchecker-server
|
||||||
|
image: "{{ server_img }}"
|
||||||
|
env:
|
||||||
|
imagePullPolicy: {{ k8s_image_pull_policy }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 8081
|
||||||
|
hostPort: 8081
|
||||||
|
- name: kubectl-proxy
|
||||||
|
image: "{{ kubectl_image }}"
|
||||||
|
imagePullPolicy: {{ k8s_image_pull_policy }}
|
||||||
|
args:
|
||||||
|
- proxy
|
|
@ -0,0 +1,15 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: netchecker-service
|
||||||
|
namespace: {{ netcheck_namespace }}
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: netchecker-server
|
||||||
|
ports:
|
||||||
|
-
|
||||||
|
protocol: TCP
|
||||||
|
port: 8081
|
||||||
|
targetPort: 8081
|
||||||
|
nodePort: {{ netchecker_port }}
|
||||||
|
type: NodePort
|
|
@ -9,6 +9,15 @@ dependencies:
|
||||||
file: "{{ downloads.nginx }}"
|
file: "{{ downloads.nginx }}"
|
||||||
- role: download
|
- role: download
|
||||||
file: "{{ downloads.testbox }}"
|
file: "{{ downloads.testbox }}"
|
||||||
|
- role: download
|
||||||
|
file: "{{ downloads.netcheck_server }}"
|
||||||
|
when: deploy_netchecker
|
||||||
|
- role: download
|
||||||
|
file: "{{ downloads.netcheck_agent }}"
|
||||||
|
when: deploy_netchecker
|
||||||
|
- role: download
|
||||||
|
file: "{{ downloads.netcheck_kubectl }}"
|
||||||
|
when: deploy_netchecker
|
||||||
- role: download
|
- role: download
|
||||||
file: "{{ downloads.kubednsmasq }}"
|
file: "{{ downloads.kubednsmasq }}"
|
||||||
when: not skip_dnsmasq_k8s|default(false)
|
when: not skip_dnsmasq_k8s|default(false)
|
||||||
|
|
Loading…
Reference in a new issue