diff --git a/cluster.yml b/cluster.yml index 18e458652..9bb149fd2 100644 --- a/cluster.yml +++ b/cluster.yml @@ -16,6 +16,7 @@ any_errors_fatal: true roles: - { role: kubernetes/preinstall, tags: preinstall } + - { role: docker, tags: docker } - hosts: etcd:!k8s-cluster any_errors_fatal: true @@ -40,6 +41,7 @@ any_errors_fatal: true roles: - { role: dnsmasq, tags: dnsmasq } + - { role: kubernetes/preinstall, tags: resolvconf } - hosts: kube-master[0] any_errors_fatal: true diff --git a/docs/ansible.md b/docs/ansible.md index 513ed0dce..d3ecde468 100644 --- a/docs/ansible.md +++ b/docs/ansible.md @@ -63,6 +63,7 @@ The following tags are defined in playbooks: | canal | Network plugin Canal | cloud-provider | Cloud-provider related tasks | dnsmasq | Configuring DNS stack for hosts and K8s apps +| docker | Configuring docker for hosts | download | Fetching container images | etcd | Configuring etcd cluster | etcd-pre-upgrade | Upgrading etcd cluster diff --git a/docs/dns-stack.md b/docs/dns-stack.md index e6df11b73..9905eb496 100644 --- a/docs/dns-stack.md +++ b/docs/dns-stack.md @@ -1,15 +1,24 @@ K8s DNS stack by Kargo ====================== -Kargo configures a [Kubernetes DNS](http://kubernetes.io/docs/admin/dns/) +For K8s cluster nodes, kargo configures a [Kubernetes DNS](http://kubernetes.io/docs/admin/dns/) [cluster add-on](http://releases.k8s.io/master/cluster/addons/README.md) to serve as an authoritative DNS server for a given ``dns_domain`` and its ``svc, default.svc`` default subdomains (a total of ``ndots: 5`` max levels). -Note, additional search (sub)domains may be defined in the ``searchdomains`` +Other nodes in the inventory, like external storage nodes or a separate etcd cluster +node group, considered non-cluster and left up to the user to configure DNS resolve. + +Note, custom ``ndots`` values affect only the dnsmasq daemon set (explained below). +While the kubedns has the ``ndots=5`` hardcoded, which is not recommended due to +[DNS performance reasons](https://github.com/kubernetes/kubernetes/issues/14051). +You can use config maps for the kubedns app to workaround the issue, which is +yet in the Kargo scope. + +Additional search (sub)domains may be defined in the ``searchdomains`` and ``ndots`` vars. And additional recursive DNS resolvers in the `` upstream_dns_servers``, -``nameservers`` vars. Intranet DNS resolvers should be specified in the first -place, followed by external resolvers, for example: +``nameservers`` vars. Intranet/cloud provider DNS resolvers should be specified +in the first place, followed by external resolvers, for example: ``` skip_dnsmasq: true @@ -21,7 +30,13 @@ or skip_dnsmasq: false upstream_dns_servers: [172.18.32.6, 172.18.32.7, 8.8.8.8, 8.8.8.4] ``` -The vars are explained below as well. +The vars are explained below. For the early cluster deployment stage, when there +is yet K8s cluster and apps exist, a user may expect local repos to be +accessible via authoritative intranet resolvers. For that case, if none custom vars +was specified, the default resolver is set to either the cloud provider default +or `8.8.8.8`. And domain is set to the default ``dns_domain`` value as well. +Later, the nameservers will be reconfigured to the DNS service IP that Kargo +configures for K8s cluster. DNS configuration details ------------------------- diff --git a/inventory/group_vars/all.yml b/inventory/group_vars/all.yml index f64cab5aa..0d985d23c 100644 --- a/inventory/group_vars/all.yml +++ b/inventory/group_vars/all.yml @@ -33,8 +33,8 @@ kube_users: # Kubernetes cluster name, also will be used as DNS domain cluster_name: cluster.local -# Subdomains of DNS domain to be resolved via /etc/resolv.conf -ndots: 5 +# Subdomains of DNS domain to be resolved via /etc/resolv.conf for hostnet pods +ndots: 2 # Deploy netchecker app to verify DNS resolve as an HTTP service deploy_netchecker: false diff --git a/roles/dnsmasq/defaults/main.yml b/roles/dnsmasq/defaults/main.yml index 0d3d30200..b867820bf 100644 --- a/roles/dnsmasq/defaults/main.yml +++ b/roles/dnsmasq/defaults/main.yml @@ -11,10 +11,6 @@ #nameservers: # - 127.0.0.1 -# CoreOS cloud init config file to define /etc/resolv.conf content -# for hostnet pods and infra needs -resolveconf_cloud_init_conf: /etc/resolveconf_cloud_init.conf - # Versions dnsmasq_version: 2.72 @@ -25,9 +21,6 @@ dnsmasq_image_tag: "{{ dnsmasq_version }}" # Skip dnsmasq setup skip_dnsmasq: false -# Skip setting up dnsmasq daemonset -skip_dnsmasq_k8s: "{{ skip_dnsmasq }}" - # Limits for dnsmasq/kubedns apps dns_cpu_limit: 100m dns_memory_limit: 170Mi diff --git a/roles/dnsmasq/tasks/dnsmasq.yml b/roles/dnsmasq/tasks/dnsmasq.yml deleted file mode 100644 index bc37580de..000000000 --- a/roles/dnsmasq/tasks/dnsmasq.yml +++ /dev/null @@ -1,58 +0,0 @@ ---- -- name: ensure dnsmasq.d directory exists - file: - path: /etc/dnsmasq.d - state: directory - -- name: ensure dnsmasq.d-available directory exists - file: - path: /etc/dnsmasq.d-available - state: directory - -- name: Write dnsmasq configuration - template: - src: 01-kube-dns.conf.j2 - dest: /etc/dnsmasq.d-available/01-kube-dns.conf - mode: 0755 - backup: yes - -- name: Stat dnsmasq configuration - stat: path=/etc/dnsmasq.d/01-kube-dns.conf - register: sym - -- name: Move previous configuration - command: mv /etc/dnsmasq.d/01-kube-dns.conf /etc/dnsmasq.d-available/01-kube-dns.conf.bak - changed_when: False - when: sym.stat.islnk is defined and sym.stat.islnk == False - -- name: Enable dnsmasq configuration - file: - src: /etc/dnsmasq.d-available/01-kube-dns.conf - dest: /etc/dnsmasq.d/01-kube-dns.conf - state: link - -- name: Create dnsmasq manifests - template: src={{item.file}} dest=/etc/kubernetes/{{item.file}} - with_items: - - {file: dnsmasq-ds.yml, type: ds} - - {file: dnsmasq-svc.yml, type: svc} - register: manifests - when: inventory_hostname == groups['kube-master'][0] - -- name: Start Resources - kube: - name: dnsmasq - namespace: kube-system - kubectl: "{{bin_dir}}/kubectl" - resource: "{{item.item.type}}" - filename: /etc/kubernetes/{{item.item.file}} - state: "{{item.changed | ternary('latest','present') }}" - with_items: "{{ manifests.results }}" - when: inventory_hostname == groups['kube-master'][0] - -- name: Check for dnsmasq port (pulling image and running container) - wait_for: - host: "{{dns_server}}" - port: 53 - delay: 5 - when: inventory_hostname == groups['kube-node'][0] diff --git a/roles/dnsmasq/tasks/main.yml b/roles/dnsmasq/tasks/main.yml index c82e83218..fa89d6c6a 100644 --- a/roles/dnsmasq/tasks/main.yml +++ b/roles/dnsmasq/tasks/main.yml @@ -1,7 +1,61 @@ --- -- include: dnsmasq.yml - when: "{{ not skip_dnsmasq_k8s|bool }}" - tags: dnsmasq +- name: ensure dnsmasq.d directory exists + file: + path: /etc/dnsmasq.d + state: directory + tags: bootstrap-os -- include: resolvconf.yml - tags: resolvconf +- name: ensure dnsmasq.d-available directory exists + file: + path: /etc/dnsmasq.d-available + state: directory + tags: bootstrap-os + +- name: Write dnsmasq configuration + template: + src: 01-kube-dns.conf.j2 + dest: /etc/dnsmasq.d-available/01-kube-dns.conf + mode: 0755 + backup: yes + +- name: Stat dnsmasq configuration + stat: path=/etc/dnsmasq.d/01-kube-dns.conf + register: sym + +- name: Move previous configuration + command: mv /etc/dnsmasq.d/01-kube-dns.conf /etc/dnsmasq.d-available/01-kube-dns.conf.bak + changed_when: False + when: sym.stat.islnk is defined and sym.stat.islnk == False + +- name: Enable dnsmasq configuration + file: + src: /etc/dnsmasq.d-available/01-kube-dns.conf + dest: /etc/dnsmasq.d/01-kube-dns.conf + state: link + +- name: Create dnsmasq manifests + template: src={{item.file}} dest=/etc/kubernetes/{{item.file}} + with_items: + - {file: dnsmasq-ds.yml, type: ds} + - {file: dnsmasq-svc.yml, type: svc} + register: manifests + when: inventory_hostname == groups['kube-master'][0] + +- name: Start Resources + kube: + name: dnsmasq + namespace: kube-system + kubectl: "{{bin_dir}}/kubectl" + resource: "{{item.item.type}}" + filename: /etc/kubernetes/{{item.item.file}} + state: "{{item.changed | ternary('latest','present') }}" + with_items: "{{ manifests.results }}" + when: inventory_hostname == groups['kube-master'][0] + +- name: Check for dnsmasq port (pulling image and running container) + wait_for: + host: "{{dns_server}}" + port: 53 + delay: 5 + when: inventory_hostname == groups['kube-node'][0] + tags: facts diff --git a/roles/dnsmasq/templates/01-kube-dns.conf.j2 b/roles/dnsmasq/templates/01-kube-dns.conf.j2 index 0aa2a6bcf..562b4bbcc 100644 --- a/roles/dnsmasq/templates/01-kube-dns.conf.j2 +++ b/roles/dnsmasq/templates/01-kube-dns.conf.j2 @@ -13,11 +13,8 @@ server=/{{ dns_domain }}/{{ skydns_server }} {% for srv in upstream_dns_servers %} server={{ srv }} {% endfor %} -{% elif cloud_provider is defined and cloud_provider == "gce" %} -server=169.254.169.254 {% else %} - server=8.8.8.8 - server=8.8.4.4 +server={{ default_resolver }} {% endif %} {% if kube_log_level == 4 %} diff --git a/roles/docker/handlers/main.yml b/roles/docker/handlers/main.yml index 04d761796..fd866a109 100644 --- a/roles/docker/handlers/main.yml +++ b/roles/docker/handlers/main.yml @@ -12,17 +12,17 @@ shell: systemctl daemon-reload when: ansible_service_mgr == "systemd" -- name: Docker | reload docker - service: - name: docker - state: restarted - - name: Docker | reload docker.socket service: name: docker.socket state: restarted when: ansible_os_family == 'CoreOS' +- name: Docker | reload docker + service: + name: docker + state: restarted + - name: Docker | pause while Docker restarts pause: seconds=10 prompt="Waiting for docker restart" diff --git a/roles/etcd/meta/main.yml b/roles/etcd/meta/main.yml index c8a1c8c34..744eb9fe2 100644 --- a/roles/etcd/meta/main.yml +++ b/roles/etcd/meta/main.yml @@ -3,8 +3,6 @@ dependencies: - role: adduser user: "{{ addusers.etcd }}" when: ansible_os_family != 'CoreOS' - - role: docker - when: (ansible_os_family != "CoreOS" and etcd_deployment_type == "docker" or inventory_hostname in groups['k8s-cluster']) - role: download file: "{{ downloads.etcd }}" tags: download diff --git a/roles/kubernetes/node/meta/main.yml b/roles/kubernetes/node/meta/main.yml index 8959ebe20..62720257d 100644 --- a/roles/kubernetes/node/meta/main.yml +++ b/roles/kubernetes/node/meta/main.yml @@ -28,13 +28,10 @@ dependencies: tags: [download, netchecker] - role: download file: "{{ downloads.kubednsmasq }}" - when: not skip_dnsmasq_k8s|default(false) tags: [download, dnsmasq] - role: download file: "{{ downloads.kubedns }}" - when: not skip_dnsmasq_k8s|default(false) tags: [download, dnsmasq] - role: download file: "{{ downloads.exechealthz }}" - when: not skip_dnsmasq_k8s|default(false) tags: [download, dnsmasq] diff --git a/roles/kubernetes/preinstall/defaults/main.yml b/roles/kubernetes/preinstall/defaults/main.yml index c02a32e29..64f0ff24b 100644 --- a/roles/kubernetes/preinstall/defaults/main.yml +++ b/roles/kubernetes/preinstall/defaults/main.yml @@ -48,3 +48,7 @@ openstack_tenant_id: "{{ lookup('env','OS_TENANT_ID') }}" # All clients access each node individually, instead of using a load balancer. etcd_multiaccess: true + +# CoreOS cloud init config file to define /etc/resolv.conf content +# for hostnet pods and infra needs +resolveconf_cloud_init_conf: /etc/resolveconf_cloud_init.conf diff --git a/roles/dnsmasq/files/dhclient_nodnsupdate b/roles/kubernetes/preinstall/files/dhclient_nodnsupdate similarity index 100% rename from roles/dnsmasq/files/dhclient_nodnsupdate rename to roles/kubernetes/preinstall/files/dhclient_nodnsupdate diff --git a/roles/dnsmasq/handlers/main.yml b/roles/kubernetes/preinstall/handlers/main.yml similarity index 54% rename from roles/dnsmasq/handlers/main.yml rename to roles/kubernetes/preinstall/handlers/main.yml index b939189d7..e760a8d8b 100644 --- a/roles/dnsmasq/handlers/main.yml +++ b/roles/kubernetes/preinstall/handlers/main.yml @@ -1,11 +1,11 @@ -- name: Dnsmasq | restart network +- name: Preinstall | restart network command: /bin/true notify: - - Dnsmasq | reload network - - Dnsmasq | update resolvconf + - Preinstall | reload network + - Preinstall | update resolvconf when: ansible_os_family != "CoreOS" -- name: Dnsmasq | reload network +- name: Preinstall | reload network service: name: >- {% if ansible_os_family == "RedHat" -%} @@ -16,31 +16,30 @@ state: restarted when: ansible_os_family != "RedHat" and ansible_os_family != "CoreOS" -- name: Dnsmasq | update resolvconf +- name: Preinstall | update resolvconf command: /bin/true notify: - - Dnsmasq | reload resolvconf - - Dnsmasq | reload kubelet + - Preinstall | reload resolvconf + - Preinstall | reload kubelet when: ansible_os_family != "CoreOS" -- name: Dnsmasq | update resolvconf for CoreOS +- name: Preinstall | update resolvconf for CoreOS command: /bin/true notify: - - Dnsmasq | apply resolvconf cloud-init - - Dnsmasq | reload kubelet + - Preinstall | apply resolvconf cloud-init + - Preinstall | reload kubelet when: ansible_os_family == "CoreOS" -- name: Dnsmasq | reload resolvconf +- name: Preinstall | reload resolvconf command: /sbin/resolvconf -u ignore_errors: true -- name: Dnsmasq | apply resolvconf cloud-init +- name: Preinstall | apply resolvconf cloud-init command: /usr/bin/coreos-cloudinit --from-file {{ resolveconf_cloud_init_conf }} when: ansible_os_family == "CoreOS" -- name: Dnsmasq | reload kubelet +- name: Preinstall | reload kubelet service: name: kubelet state: restarted - when: "{{ inventory_hostname in groups['kube-master'] }}" - ignore_errors: true + when: "{{ inventory_hostname in groups['kube-master'] and not dns_early|bool }}" diff --git a/roles/kubernetes/preinstall/tasks/main.yml b/roles/kubernetes/preinstall/tasks/main.yml index 07dc53fec..fd8a808a3 100644 --- a/roles/kubernetes/preinstall/tasks/main.yml +++ b/roles/kubernetes/preinstall/tasks/main.yml @@ -177,3 +177,6 @@ - include: etchosts.yml tags: [bootstrap-os, etchosts] + +- include: resolvconf.yml + tags: [bootstrap-os, resolvconf] diff --git a/roles/dnsmasq/tasks/resolvconf.yml b/roles/kubernetes/preinstall/tasks/resolvconf.yml similarity index 72% rename from roles/dnsmasq/tasks/resolvconf.yml rename to roles/kubernetes/preinstall/tasks/resolvconf.yml index ba367ac48..f82e9ddfb 100644 --- a/roles/dnsmasq/tasks/resolvconf.yml +++ b/roles/kubernetes/preinstall/tasks/resolvconf.yml @@ -4,17 +4,33 @@ register: resolvconf ignore_errors: yes changed_when: false + tags: facts + +- name: check kubelet + stat: + path: "{{ bin_dir }}/kubelet" + register: kubelet + changed_when: false + tags: facts + +- name: check if early DNS configuration stage + set_fact: + dns_early: >- + {%- if kubelet.stat.exists -%}false{%- else -%}true{%- endif -%} + tags: facts - name: target resolv.conf file set_fact: resolvconffile: >- {%- if resolvconf.rc == 0 -%}/etc/resolvconf/resolv.conf.d/head{%- else -%}/etc/resolv.conf{%- endif -%} when: ansible_os_family != "CoreOS" + tags: facts - name: target temporary resolvconf cloud init file set_fact: resolvconffile: /tmp/resolveconf_cloud_init_conf when: ansible_os_family == "CoreOS" + tags: facts - name: create temporary resolveconf cloud init file command: cp -f /etc/resolv.conf "{{ resolvconffile }}" @@ -24,16 +40,30 @@ set_fact: searchentries: "{{ ([ 'default.svc.' + dns_domain, 'svc.' + dns_domain ] + searchdomains|default([])) | join(' ') }}" + tags: facts -- name: pick dnsmasq cluster IP +- name: decide on dns server IP set_fact: - dnsmasq_server: >- - {%- if skip_dnsmasq|bool -%}{{ [ skydns_server ] + upstream_dns_servers|default([]) }}{%- else -%}{{ [ dns_server ] }}{%- endif -%} + dns_server_real: >- + {%- if dns_early|bool -%}{{default_resolver}}{%- else -%}{{dns_server}}{%- endif -%} + +- name: pick dnsmasq cluster IP or default resolver + set_fact: + dnsmasq_server: |- + {%- if skip_dnsmasq|bool and not dns_early|bool -%} + {{ [ skydns_server ] + upstream_dns_servers|default([]) }} + {%- elif dns_early|bool -%} + {{ [ dns_server_real ] + upstream_dns_servers|default([]) }} + {%- else -%} + {{ [ dns_server ] }} + {%- endif -%} + tags: facts - name: generate nameservers to resolvconf set_fact: nameserverentries: "{{ dnsmasq_server|default([]) + nameservers|default([]) }}" + tags: facts - name: Remove search and nameserver options from resolvconf head lineinfile: @@ -46,7 +76,7 @@ - search - nameserver when: resolvconf.rc == 0 - notify: Dnsmasq | update resolvconf + notify: Preinstall | update resolvconf - name: Remove search and nameserver options from resolvconf cloud init temporary file lineinfile: @@ -59,7 +89,7 @@ - search - nameserver when: ansible_os_family == "CoreOS" - notify: Dnsmasq | update resolvconf for CoreOS + notify: Preinstall | update resolvconf for CoreOS - name: Add search domains to resolvconf file lineinfile: @@ -69,7 +99,7 @@ insertbefore: BOF backup: yes follow: yes - notify: Dnsmasq | update resolvconf + notify: Preinstall | update resolvconf - name: Add nameservers to resolv.conf blockinfile: @@ -84,7 +114,7 @@ backup: yes follow: yes marker: "# Ansible nameservers {mark}" - notify: Dnsmasq | update resolvconf + notify: Preinstall | update resolvconf - name: Add options to resolv.conf lineinfile: @@ -99,7 +129,7 @@ - ndots:{{ ndots }} - timeout:2 - attempts:2 - notify: Dnsmasq | update resolvconf + notify: Preinstall | update resolvconf - name: Remove search and nameserver options from resolvconf base lineinfile: @@ -112,16 +142,16 @@ - search - nameserver when: resolvconf.rc == 0 - notify: Dnsmasq | update resolvconf + notify: Preinstall | update resolvconf - name: disable resolv.conf modification by dhclient copy: src=dhclient_nodnsupdate dest=/etc/dhcp/dhclient-enter-hooks.d/znodnsupdate mode=0755 - notify: Dnsmasq | restart network + notify: Preinstall | restart network when: ansible_os_family == "Debian" - name: disable resolv.conf modification by dhclient copy: src=dhclient_nodnsupdate dest=/etc/dhcp/dhclient.d/nodnsupdate mode=u+x - notify: Dnsmasq | restart network + notify: Preinstall | restart network when: ansible_os_family == "RedHat" - name: get temporary resolveconf cloud init file content @@ -135,5 +165,5 @@ src: resolvconf.j2 owner: root mode: 0644 - notify: Dnsmasq | update resolvconf for CoreOS + notify: Preinstall | update resolvconf for CoreOS when: ansible_os_family == "CoreOS" diff --git a/roles/kubernetes/preinstall/tasks/set_facts.yml b/roles/kubernetes/preinstall/tasks/set_facts.yml index bd0d68aff..0f66acd14 100644 --- a/roles/kubernetes/preinstall/tasks/set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/set_facts.yml @@ -49,3 +49,6 @@ etcd_after_v3: etcd_version | version_compare("v3.0.0", ">=") - set_fact: etcd_container_bin_dir: "{% if etcd_after_v3 %}/usr/local/bin/{% else %}/{% endif %}" +- set_fact: + default_resolver: >- + {%- if cloud_provider is defined and cloud_provider == 'gce' -%}169.254.169.254{%- else -%}8.8.8.8{%- endif -%} diff --git a/roles/dnsmasq/templates/resolvconf.j2 b/roles/kubernetes/preinstall/templates/resolvconf.j2 similarity index 100% rename from roles/dnsmasq/templates/resolvconf.j2 rename to roles/kubernetes/preinstall/templates/resolvconf.j2 diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml index a503569f6..8fbb7f704 100644 --- a/roles/network_plugin/flannel/handlers/main.yml +++ b/roles/network_plugin/flannel/handlers/main.yml @@ -1,8 +1,42 @@ --- -- name: delete default docker bridge +- name: Flannel | delete default docker bridge command: ip link delete docker0 ignore_errors: yes - notify: restart docker + notify: Flannel | restart docker + +- name: Flannel | restart docker + command: /bin/true + notify: + - Flannel | reload systemd + - Flannel | reload docker.socket + - Flannel | reload docker + - Flannel | pause while Docker restarts + - Flannel | wait for docker + +- name : Flannel | reload systemd + shell: systemctl daemon-reload + when: ansible_service_mgr == "systemd" + +- name: Flannel | reload docker.socket + service: + name: docker.socket + state: restarted + when: ansible_os_family == 'CoreOS' + +- name: Flannel | reload docker + service: + name: docker + state: restarted + +- name: Flannel | pause while Docker restarts + pause: seconds=10 prompt="Waiting for docker restart" + +- name: Flannel | wait for docker + command: /usr/bin/docker images + register: docker_ready + retries: 10 + delay: 5 + until: docker_ready.rc == 0 - name: Flannel | reload kubelet service: diff --git a/roles/network_plugin/flannel/tasks/main.yml b/roles/network_plugin/flannel/tasks/main.yml index 3e5eff8e8..f4ca65d12 100644 --- a/roles/network_plugin/flannel/tasks/main.yml +++ b/roles/network_plugin/flannel/tasks/main.yml @@ -11,7 +11,7 @@ template: src: flannel-pod.yml dest: /etc/kubernetes/manifests/flannel-pod.manifest - notify: delete default docker bridge + notify: Flannel | delete default docker bridge - name: Flannel | Wait for flannel subnet.env file presence wait_for: @@ -67,7 +67,7 @@ group: root mode: 0644 notify: - - restart docker + - Flannel | restart docker when: ansible_service_mgr in ["sysvinit","upstart"] - name: Flannel | Create docker network systemd drop-in @@ -75,7 +75,7 @@ src: flannel-options.conf.j2 dest: "/etc/systemd/system/docker.service.d/flannel-options.conf" notify: - - restart docker + - Flannel | restart docker when: ansible_service_mgr == "systemd" - meta: flush_handlers