diff --git a/.travis.yml b/.travis.yml index 863374c26..e2a9f9f07 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,8 @@ env: TEST_ID=$TRAVIS_JOB_NUMBER CONTAINER_ENGINE=docker PRIVATE_KEY=$GCE_PRIVATE_KEY + GS_ACCESS_KEY_ID=$GS_KEY + GS_SECRET_ACCESS_KEY=$GS_SECRET ANSIBLE_KEEP_REMOTE_FILES=1 CLUSTER_MODE=default matrix: @@ -101,11 +103,11 @@ env: before_install: # Install Ansible. - - pip install --user boto -U - pip install --user ansible - pip install --user netaddr # W/A https://github.com/ansible/ansible-modules-core/issues/5196#issuecomment-253766186 - pip install --user apache-libcloud==0.20.1 + - pip install --user boto==2.9.0 -U cache: - directories: @@ -122,8 +124,6 @@ before_script: - $HOME/.local/bin/ansible-playbook --version - cp tests/ansible.cfg . # - "echo $HOME/.local/bin/ansible-playbook -i inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root -e '{\"cloud_provider\": true}' $LOG_LEVEL -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} setup-kubernetes/cluster.yml" - ## Configure ansible deployment logs to be collected as an artifact. Enable when GCS configured, see https://docs.travis-ci.com/user/deployment/gcs -# - $HOME/.local/bin/ansible-playbook -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/configure-logs.yaml script: - > @@ -147,8 +147,21 @@ script: - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/020_check-create-pod.yml $LOG_LEVEL ## Ping the between 2 pod - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/030_check-network.yml $LOG_LEVEL - ## Collect env info, enable it once GCS configured, see https://docs.travis-ci.com/user/deployment/gcs -# - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml + +after_failure: + - > + $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER + -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root -e dir=$HOME + scripts/collect-info.yaml + - > + $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/upload-logs-gcs.yml -i "localhost," -c local + -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} + -e gce_project_id=${GCE_PROJECT_ID} + -e gs_key=${GS_ACCESS_KEY_ID} + -e gs_skey=${GS_SECRET_ACCESS_KEY} + -e ostype=${CLOUD_IMAGE} + -e commit=${TRAVIS_COMMIT} + -e dir=${HOME} after_script: - > diff --git a/cluster.yml b/cluster.yml index 295bb668a..12c090169 100644 --- a/cluster.yml +++ b/cluster.yml @@ -27,6 +27,8 @@ - hosts: kube-master roles: - { role: kubernetes/master, tags: master } + - { role: kubernetes-apps/lib, tags: apps } + - { role: kubernetes-apps/network_plugin, tags: network } - hosts: k8s-cluster roles: @@ -34,4 +36,5 @@ - hosts: kube-master[0] roles: + - { role: kubernetes-apps/lib, tags: apps } - { role: kubernetes-apps, tags: apps } diff --git a/contrib/terraform/openstack/README.md b/contrib/terraform/openstack/README.md index ec611a499..2840bde9c 100644 --- a/contrib/terraform/openstack/README.md +++ b/contrib/terraform/openstack/README.md @@ -5,14 +5,13 @@ Openstack. ## Status -This will install a Kubernetes cluster on an Openstack Cloud. It is tested on a -OpenStack Cloud provided by [BlueBox](https://www.blueboxcloud.com/) and -should work on most modern installs of OpenStack that support the basic +This will install a Kubernetes cluster on an Openstack Cloud. It has been tested on a +OpenStack Cloud provided by [BlueBox](https://www.blueboxcloud.com/) and on OpenStack at [EMBL-EBI's](http://www.ebi.ac.uk/) [EMBASSY Cloud](http://www.embassycloud.org/). This should work on most modern installs of OpenStack that support the basic services. There are some assumptions made to try and ensure it will work on your openstack cluster. -* floating-ips are used for access +* floating-ips are used for access, but you can have masters and nodes that don't use floating-ips if needed. You need currently at least 1 floating ip, which we would suggest is used on a master. * you already have a suitable OS image in glance * you already have both an internal network and a floating-ip pool created * you have security-groups enabled @@ -24,16 +23,14 @@ There are some assumptions made to try and ensure it will work on your openstack ## Terraform -Terraform will be used to provision all of the OpenStack resources required to -run Docker Swarm. It is also used to deploy and provision the software +Terraform will be used to provision all of the OpenStack resources. It is also used to deploy and provision the software requirements. ### Prep #### OpenStack -Ensure your OpenStack credentials are loaded in environment variables. This is -how I do it: +Ensure your OpenStack credentials are loaded in environment variables. This can be done by downloading a credentials .rc file from your OpenStack dashboard and sourcing it: ``` $ source ~/.stackrc @@ -46,7 +43,7 @@ differences between OpenStack installs the Terraform does not attempt to create these for you. By default Terraform will expect that your networks are called `internal` and -`external`. You can change this by altering the Terraform variables `network_name` and `floatingip_pool`. +`external`. You can change this by altering the Terraform variables `network_name` and `floatingip_pool`. This can be done on a new variables file or through environment variables. A full list of variables you can change can be found at [variables.tf](variables.tf). @@ -76,8 +73,21 @@ $ echo Setting up Terraform creds && \ export TF_VAR_auth_url=${OS_AUTH_URL} ``` +If you want to provision master or node VMs that don't use floating ips, write on a `my-terraform-vars.tfvars` file, for example: + +``` +number_of_k8s_masters = "1" +number_of_k8s_masters_no_floating_ip = "2" +number_of_k8s_nodes_no_floating_ip = "1" +number_of_k8s_nodes = "0" +``` +This will provision one VM as master using a floating ip, two additional masters using no floating ips (these will only have private ips inside your tenancy) and one VM as node, again without a floating ip. + + + # Provision a Kubernetes Cluster on OpenStack +If not using a tfvars file for your setup, then execute: ``` terraform apply -state=contrib/terraform/openstack/terraform.tfstate contrib/terraform/openstack openstack_compute_secgroup_v2.k8s_master: Creating... @@ -96,6 +106,13 @@ use the `terraform show` command. State path: contrib/terraform/openstack/terraform.tfstate ``` +Alternatively, if you wrote your terraform variables on a file `my-terraform-vars.tfvars`, your command would look like: +``` +terraform apply -state=contrib/terraform/openstack/terraform.tfstate -var-file=my-terraform-vars.tfvars contrib/terraform/openstack +``` + +if you choose to add masters or nodes without floating ips (only internal ips on your OpenStack tenancy), this script will create as well a file `contrib/terraform/openstack/k8s-cluster.yml` with an ssh command for ansible to be able to access your machines tunneling through the first floating ip used. If you want to manually handling the ssh tunneling to these machines, please delete or move that file. If you want to use this, just leave it there, as ansible will pick it up automatically. + Make sure you can connect to the hosts: ``` @@ -114,6 +131,8 @@ example-k8s-master-1 | SUCCESS => { } ``` +if you are deploying a system that needs bootstrapping, like CoreOS, these might have a state `FAILED` due to CoreOS not having python. As long as the state is not `UNREACHABLE`, this is fine. + if it fails try to connect manually via SSH ... it could be somthing as simple as a stale host key. Deploy kubernetes: diff --git a/contrib/terraform/openstack/ansible_bastion_template.txt b/contrib/terraform/openstack/ansible_bastion_template.txt new file mode 100644 index 000000000..cdf012066 --- /dev/null +++ b/contrib/terraform/openstack/ansible_bastion_template.txt @@ -0,0 +1 @@ +ansible_ssh_common_args: '-o ProxyCommand="ssh -o StrictHostKeyChecking=no -W %h:%p -q USER@BASTION_ADDRESS"' diff --git a/contrib/terraform/openstack/group_vars/all.yml b/contrib/terraform/openstack/group_vars/all.yml index b73fb66b2..8b0cd2bcd 100644 --- a/contrib/terraform/openstack/group_vars/all.yml +++ b/contrib/terraform/openstack/group_vars/all.yml @@ -1,9 +1,14 @@ +# Valid bootstrap options (required): xenial, coreos, none +bootstrap_os: "none" + # Directory where the binaries will be installed bin_dir: /usr/local/bin # Where the binaries will be downloaded. # Note: ensure that you've enough disk space (about 1G) local_release_dir: "/tmp/releases" +# Random shifts for retrying failed ops like pushing/downloading +retry_stagger: 5 # Uncomment this line for CoreOS only. # Directory where python binary is installed @@ -28,6 +33,8 @@ kube_users: # Kubernetes cluster name, also will be used as DNS domain cluster_name: cluster.local +# Subdomains of DNS domain to be resolved via /etc/resolv.conf +ndots: 5 # For some environments, each node has a pubilcally accessible # address and an address it should bind services to. These are @@ -51,6 +58,16 @@ cluster_name: cluster.local # but don't know about that address themselves. # access_ip: 1.1.1.1 +# Etcd access modes: +# Enable multiaccess to configure clients to access all of the etcd members directly +# as the "http://hostX:port, http://hostY:port, ..." and ignore the proxy loadbalancers. +# This may be the case if clients support and loadbalance multiple etcd servers natively. +etcd_multiaccess: false + +# Assume there are no internal loadbalancers for apiservers exist and listen on +# kube_apiserver_port (default 443) +loadbalancer_apiserver_localhost: true + # Choose network plugin (calico, weave or flannel) kube_network_plugin: flannel @@ -89,10 +106,12 @@ kube_apiserver_insecure_port: 8080 # (http) # You still must manually configure all your containers to use this DNS server, # Kubernetes won't do this for you (yet). +# Do not install additional dnsmasq +skip_dnsmasq: false # Upstream dns servers used by dnsmasq -upstream_dns_servers: - - 8.8.8.8 - - 8.8.4.4 +#upstream_dns_servers: +# - 8.8.8.8 +# - 8.8.4.4 # # # Use dns server : https://github.com/ansibl8s/k8s-skydns/blob/master/skydns-README.md dns_setup: true @@ -109,21 +128,6 @@ dns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(2)|ipaddr('address') # like you would do when using nova-client before starting the playbook. # cloud_provider: -# For multi masters architecture: -# kube-proxy doesn't support multiple apiservers for the time being so you'll need to configure your own loadbalancer -# This domain name will be inserted into the /etc/hosts file of all servers -# configuration example with haproxy : -# listen kubernetes-apiserver-https -# bind 10.99.0.21:8383 -# option ssl-hello-chk -# mode tcp -# timeout client 3h -# timeout server 3h -# server master1 10.99.0.26:443 -# server master2 10.99.0.27:443 -# balance roundrobin -# apiserver_loadbalancer_domain_name: "lb-apiserver.kubernetes.local" - ## Set these proxy values in order to update docker daemon to use proxies # http_proxy: "" # https_proxy: "" @@ -134,3 +138,7 @@ dns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(2)|ipaddr('address') ## An obvious use case is allowing insecure-registry access ## to self hosted registries like so: docker_options: "--insecure-registry={{ kube_service_addresses }}" + +# default packages to install within the cluster +kpm_packages: [] +# - name: kube-system/grafana diff --git a/contrib/terraform/openstack/kubespray.tf b/contrib/terraform/openstack/kubespray.tf index 27217d08b..ba526b3e0 100644 --- a/contrib/terraform/openstack/kubespray.tf +++ b/contrib/terraform/openstack/kubespray.tf @@ -70,6 +70,28 @@ resource "openstack_compute_instance_v2" "k8s_master" { ssh_user = "${var.ssh_user}" kubespray_groups = "etcd,kube-master,kube-node,k8s-cluster" } + +} + + +resource "openstack_compute_instance_v2" "k8s_master_no_floating_ip" { + name = "${var.cluster_name}-k8s-master-nf-${count.index+1}" + count = "${var.number_of_k8s_masters_no_floating_ip}" + image_name = "${var.image}" + flavor_id = "${var.flavor_k8s_master}" + key_pair = "${openstack_compute_keypair_v2.k8s.name}" + network { + name = "${var.network_name}" + } + security_groups = [ "${openstack_compute_secgroup_v2.k8s_master.name}", + "${openstack_compute_secgroup_v2.k8s.name}" ] + metadata = { + ssh_user = "${var.ssh_user}" + kubespray_groups = "etcd,kube-master,kube-node,k8s-cluster" + } + provisioner "local-exec" { + command = "sed s/USER/${var.ssh_user}/ contrib/terraform/openstack/ansible_bastion_template.txt | sed s/BASTION_ADDRESS/${element(openstack_networking_floatingip_v2.k8s_master.*.address, 0)}/ > contrib/terraform/openstack/group_vars/k8s-cluster.yml" + } } resource "openstack_compute_instance_v2" "k8s_node" { @@ -89,6 +111,28 @@ resource "openstack_compute_instance_v2" "k8s_node" { } } +resource "openstack_compute_instance_v2" "k8s_node_no_floating_ip" { + name = "${var.cluster_name}-k8s-node-nf-${count.index+1}" + count = "${var.number_of_k8s_nodes_no_floating_ip}" + image_name = "${var.image}" + flavor_id = "${var.flavor_k8s_node}" + key_pair = "${openstack_compute_keypair_v2.k8s.name}" + network { + name = "${var.network_name}" + } + security_groups = ["${openstack_compute_secgroup_v2.k8s.name}" ] + metadata = { + ssh_user = "${var.ssh_user}" + kubespray_groups = "kube-node,k8s-cluster" + } + provisioner "local-exec" { + command = "sed s/USER/${var.ssh_user}/ contrib/terraform/openstack/ansible_bastion_template.txt | sed s/BASTION_ADDRESS/${element(openstack_networking_floatingip_v2.k8s_master.*.address, 0)}/ > contrib/terraform/openstack/group_vars/k8s-cluster.yml" + } +} + + + + #output "msg" { # value = "Your hosts are ready to go!\nYour ssh hosts are: ${join(", ", openstack_networking_floatingip_v2.k8s_master.*.address )}" #} diff --git a/contrib/terraform/openstack/variables.tf b/contrib/terraform/openstack/variables.tf index 6c1fc767d..8be38aed5 100644 --- a/contrib/terraform/openstack/variables.tf +++ b/contrib/terraform/openstack/variables.tf @@ -6,10 +6,18 @@ variable "number_of_k8s_masters" { default = 2 } +variable "number_of_k8s_masters_no_floating_ip" { + default = 2 +} + variable "number_of_k8s_nodes" { default = 1 } +variable "number_of_k8s_nodes_no_floating_ip" { + default = 1 +} + variable "public_key_path" { description = "The path of the ssh pub key" default = "~/.ssh/id_rsa.pub" diff --git a/docs/calico.md b/docs/calico.md index 50744f63f..a8bffc0db 100644 --- a/docs/calico.md +++ b/docs/calico.md @@ -10,18 +10,42 @@ docker ps | grep calico The **calicoctl** command allows to check the status of the network workloads. * Check the status of Calico nodes +``` +calicoctl node status +``` + +or for versions prior *v1.0.0*: + ``` calicoctl status ``` * Show the configured network subnet for containers +``` + calicoctl get ippool -o wide +``` + +or for versions prior *v1.0.0*: + ``` calicoctl pool show ``` * Show the workloads (ip addresses of containers and their located) +``` +calicoctl get workloadEndpoint -o wide +``` + +and + +``` +calicoctl get hostEndpoint -o wide +``` + +or for versions prior *v1.0.0*: + ``` calicoctl endpoint show --detail ``` diff --git a/docs/ha-mode.md b/docs/ha-mode.md index 792c18a19..8ec5c93a1 100644 --- a/docs/ha-mode.md +++ b/docs/ha-mode.md @@ -5,10 +5,6 @@ The following components require a highly available endpoints: * etcd cluster, * kube-apiserver service instances. -The former provides the -[etcd-proxy](https://coreos.com/etcd/docs/latest/proxy.html) service to access -the cluster members in HA fashion. - The latter relies on a 3rd side reverse proxies, like Nginx or HAProxy, to achieve the same goal. @@ -57,7 +53,7 @@ type. The following diagram shows how traffic to the apiserver is directed. A user may opt to use an external loadbalancer (LB) instead. An external LB provides access for external clients, while the internal LB accepts client -connections only to the localhost, similarly to the etcd-proxy HA endpoints. +connections only to the localhost. Given a frontend `VIP` address and `IP1, IP2` addresses of backends, here is an example configuration for a HAProxy service acting as an external LB: ``` diff --git a/inventory/group_vars/all.yml b/inventory/group_vars/all.yml index cbf2e63a2..f72276ae6 100644 --- a/inventory/group_vars/all.yml +++ b/inventory/group_vars/all.yml @@ -62,7 +62,7 @@ ndots: 5 # Enable multiaccess to configure clients to access all of the etcd members directly # as the "http://hostX:port, http://hostY:port, ..." and ignore the proxy loadbalancers. # This may be the case if clients support and loadbalance multiple etcd servers natively. -etcd_multiaccess: false +etcd_multiaccess: true # Assume there are no internal loadbalancers for apiservers exist and listen on # kube_apiserver_port (default 443) diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index cbe053fa0..1ea220fd1 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -10,7 +10,7 @@ kube_version: v1.4.3 etcd_version: v3.0.6 #TODO(mattymo): Move calico versions to roles/network_plugins/calico/defaults # after migration to container download -calico_version: v0.22.0 +calico_version: v1.0.0-beta calico_cni_version: v1.4.2 weave_version: v1.6.1 flannel_version: v0.6.2 @@ -39,9 +39,13 @@ flannel_server_helper_image_tag: "{{ flannel_server_helper_version }}" flannel_image_repo: "quay.io/coreos/flannel" flannel_image_tag: "{{ flannel_version }}" calicoctl_image_repo: "calico/ctl" -calicoctl_image_tag: "{{ calico_version }}" +# TODO(apanchenko): v1.0.0-beta can't execute `node run` from Docker container +# for details see https://github.com/projectcalico/calico-containers/issues/1291 +calicoctl_image_tag: "v0.22.0" calico_node_image_repo: "calico/node" calico_node_image_tag: "{{ calico_version }}" +calico_cni_image_repo: "calico/cni" +calico_cni_image_tag: "{{ calico_cni_version }}" hyperkube_image_repo: "quay.io/coreos/hyperkube" hyperkube_image_tag: "{{ kube_version }}_coreos.0" pod_infra_image_repo: "gcr.io/google_containers/pause-amd64" @@ -56,7 +60,7 @@ downloads: url: "{{ calico_cni_download_url }}" owner: "root" mode: "0755" - enabled: "{{ kube_network_plugin == 'calico' }}" + enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}" calico_cni_plugin_ipam: dest: calico/bin/calico-ipam version: "{{calico_cni_version}}" @@ -95,22 +99,27 @@ downloads: container: true repo: "{{ flannel_image_repo }}" tag: "{{ flannel_image_tag }}" - enabled: "{{ kube_network_plugin == 'flannel' }}" + enabled: "{{ kube_network_plugin == 'flannel' or kube_network_plugin == 'canal' }}" flannel_server_helper: container: true repo: "{{ flannel_server_helper_image_repo }}" tag: "{{ flannel_server_helper_image_tag }}" - enabled: "{{ kube_network_plugin == 'flannel' }}" + enabled: "{{ kube_network_plugin == 'flannel' or kube_network_plugin == 'canal' }}" calicoctl: container: true repo: "{{ calicoctl_image_repo }}" tag: "{{ calicoctl_image_tag }}" - enabled: "{{ kube_network_plugin == 'calico' }}" + enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}" calico_node: container: true repo: "{{ calico_node_image_repo }}" tag: "{{ calico_node_image_tag }}" - enabled: "{{ kube_network_plugin == 'calico' }}" + enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}" + calico_cni: + container: true + repo: "{{ calico_cni_image_repo }}" + tag: "{{ calico_cni_image_tag }}" + enabled: "{{ kube_network_plugin == 'canal' }}" pod_infra: container: true repo: "{{ pod_infra_image_repo }}" diff --git a/roles/etcd/defaults/main.yml b/roles/etcd/defaults/main.yml index 02234a2fe..2df4ba165 100644 --- a/roles/etcd/defaults/main.yml +++ b/roles/etcd/defaults/main.yml @@ -1,2 +1,8 @@ --- etcd_bin_dir: "{{ local_release_dir }}/etcd/etcd-{{ etcd_version }}-linux-amd64/" + +etcd_config_dir: /etc/ssl/etcd +etcd_cert_dir: "{{ etcd_config_dir }}/ssl" +etcd_cert_group: root + +etcd_script_dir: "{{ bin_dir }}/etcd-scripts" diff --git a/roles/etcd/files/make-ssl-etcd.sh b/roles/etcd/files/make-ssl-etcd.sh new file mode 100755 index 000000000..4c7db9430 --- /dev/null +++ b/roles/etcd/files/make-ssl-etcd.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +# Author: Smana smainklh@gmail.com +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit +set -o pipefail + +usage() +{ + cat << EOF +Create self signed certificates + +Usage : $(basename $0) -f [-d ] + -h | --help : Show this message + -f | --config : Openssl configuration file + -d | --ssldir : Directory where the certificates will be installed + + ex : + $(basename $0) -f openssl.conf -d /srv/ssl +EOF +} + +# Options parsing +while (($#)); do + case "$1" in + -h | --help) usage; exit 0;; + -f | --config) CONFIG=${2}; shift 2;; + -d | --ssldir) SSLDIR="${2}"; shift 2;; + *) + usage + echo "ERROR : Unknown option" + exit 3 + ;; + esac +done + +if [ -z ${CONFIG} ]; then + echo "ERROR: the openssl configuration file is missing. option -f" + exit 1 +fi +if [ -z ${SSLDIR} ]; then + SSLDIR="/etc/ssl/etcd" +fi + +tmpdir=$(mktemp -d /tmp/etcd_cacert.XXXXXX) +trap 'rm -rf "${tmpdir}"' EXIT +cd "${tmpdir}" + +mkdir -p "${SSLDIR}" + +# Root CA +openssl genrsa -out ca-key.pem 2048 > /dev/null 2>&1 +openssl req -x509 -new -nodes -key ca-key.pem -days 10000 -out ca.pem -subj "/CN=etcd-ca" > /dev/null 2>&1 + +# ETCD member +openssl genrsa -out member-key.pem 2048 > /dev/null 2>&1 +openssl req -new -key member-key.pem -out member.csr -subj "/CN=etcd-member" -config ${CONFIG} > /dev/null 2>&1 +openssl x509 -req -in member.csr -CA ca.pem -CAkey ca-key.pem -CAcreateserial -out member.pem -days 365 -extensions ssl_client -extfile ${CONFIG} > /dev/null 2>&1 + +# Nodes and Admin +for i in node admin; do + openssl genrsa -out ${i}-key.pem 2048 > /dev/null 2>&1 + openssl req -new -key ${i}-key.pem -out ${i}.csr -subj "/CN=kube-${i}" > /dev/null 2>&1 + openssl x509 -req -in ${i}.csr -CA ca.pem -CAkey ca-key.pem -CAcreateserial -out ${i}.pem -days 365 -extensions ssl_client -extfile ${CONFIG} > /dev/null 2>&1 +done + +# Install certs +mv *.pem ${SSLDIR}/ diff --git a/roles/etcd/handlers/main.yml b/roles/etcd/handlers/main.yml index 693754a06..badf0bd79 100644 --- a/roles/etcd/handlers/main.yml +++ b/roles/etcd/handlers/main.yml @@ -6,21 +6,14 @@ - reload etcd - wait for etcd up -- name: restart etcd-proxy - command: /bin/true - notify: - - etcd | reload systemd - - reload etcd-proxy - - wait for etcd up - - name: etcd | reload systemd command: systemctl daemon-reload when: ansible_service_mgr == "systemd" - name: wait for etcd up - uri: url="http://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" + uri: url="https://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" validate_certs=no register: result - until: result.status == 200 + until: result.status is defined and result.status == 200 retries: 10 delay: 5 @@ -30,8 +23,7 @@ state: restarted when: is_etcd_master -- name: reload etcd-proxy - service: - name: etcd-proxy - state: restarted - when: is_etcd_proxy +- name: set etcd_secret_changed + set_fact: + etcd_secret_changed: true + diff --git a/roles/etcd/tasks/check_certs.yml b/roles/etcd/tasks/check_certs.yml new file mode 100644 index 000000000..03a875517 --- /dev/null +++ b/roles/etcd/tasks/check_certs.yml @@ -0,0 +1,36 @@ +--- +- name: "Check_certs | check if the certs have already been generated on first master" + stat: + path: "{{ etcd_cert_dir }}/ca.pem" + delegate_to: "{{groups['etcd'][0]}}" + register: etcdcert_master + run_once: true + +- name: "Check_certs | Set default value for 'sync_certs' and 'gen_certs' to false" + set_fact: + sync_certs: false + gen_certs: false + +- name: "Check_certs | Set 'sync_certs' and 'gen_certs' to true" + set_fact: + gen_certs: true + when: not etcdcert_master.stat.exists + run_once: true + +- name: "Check certs | check if a cert already exists" + stat: + path: "{{ etcd_cert_dir }}/ca.pem" + register: etcdcert + +- name: "Check_certs | Set 'sync_certs' to true" + set_fact: + sync_certs: true + when: >- + {%- set certs = {'sync': False} -%} + {%- for server in play_hosts + if (not hostvars[server].etcdcert.stat.exists|default(False)) or + (hostvars[server].etcdcert.stat.checksum|default('') != etcdcert_master.stat.checksum|default('')) -%} + {%- set _ = certs.update({'sync': True}) -%} + {%- endfor -%} + {{ certs.sync }} + run_once: true diff --git a/roles/etcd/tasks/configure.yml b/roles/etcd/tasks/configure.yml index 514a79d73..a2ef38f2c 100644 --- a/roles/etcd/tasks/configure.yml +++ b/roles/etcd/tasks/configure.yml @@ -26,19 +26,3 @@ mode: 0755 when: ansible_service_mgr in ["sysvinit","upstart"] and ansible_os_family == "Debian" and is_etcd_master notify: restart etcd - -- name: Configure | Copy etcd-proxy.service systemd file - template: - src: "etcd-proxy-{{ etcd_deployment_type }}.service.j2" - dest: /etc/systemd/system/etcd-proxy.service - backup: yes - when: ansible_service_mgr == "systemd" and is_etcd_proxy - notify: restart etcd-proxy -- name: Configure | Write etcd-proxy initd script - template: - src: "deb-etcd-proxy-{{ etcd_deployment_type }}.initd.j2" - dest: /etc/init.d/etcd-proxy - owner: root - mode: 0755 - when: ansible_service_mgr in ["sysvinit","upstart"] and ansible_os_family == "Debian" and is_etcd_proxy - notify: restart etcd-proxy diff --git a/roles/etcd/tasks/gen_certs.yml b/roles/etcd/tasks/gen_certs.yml new file mode 100644 index 000000000..8d1d34b74 --- /dev/null +++ b/roles/etcd/tasks/gen_certs.yml @@ -0,0 +1,112 @@ +--- + +- name: Gen_certs | create etcd script dir + file: + path: "{{ etcd_script_dir }}" + state: directory + owner: root + when: inventory_hostname == groups['etcd'][0] + +- name: Gen_certs | create etcd cert dir + file: + path={{ etcd_cert_dir }} + group={{ etcd_cert_group }} + state=directory + owner=root + recurse=yes + +- name: Gen_certs | write openssl config + template: + src: "openssl.conf.j2" + dest: "{{ etcd_config_dir }}/openssl.conf" + run_once: yes + delegate_to: "{{groups['etcd'][0]}}" + when: gen_certs|default(false) + +- name: Gen_certs | copy certs generation script + copy: + src: "make-ssl-etcd.sh" + dest: "{{ etcd_script_dir }}/make-ssl-etcd.sh" + mode: 0700 + run_once: yes + delegate_to: "{{groups['etcd'][0]}}" + when: gen_certs|default(false) + +- name: Gen_certs | run cert generation script + command: "{{ etcd_script_dir }}/make-ssl-etcd.sh -f {{ etcd_config_dir }}/openssl.conf -d {{ etcd_cert_dir }}" + run_once: yes + delegate_to: "{{groups['etcd'][0]}}" + when: gen_certs|default(false) + notify: set etcd_secret_changed + +- set_fact: + master_certs: ['ca-key.pem', 'admin.pem', 'admin-key.pem', 'member.pem', 'member-key.pem'] + node_certs: ['ca.pem', 'node.pem', 'node-key.pem'] + +- name: Gen_certs | Gather etcd master certs + shell: "tar cfz - -C {{ etcd_cert_dir }} {{ master_certs|join(' ') }} {{ node_certs|join(' ') }}| base64 --wrap=0" + register: etcd_master_cert_data + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + when: sync_certs|default(false) + notify: set etcd_secret_changed + +- name: Gen_certs | Gather etcd node certs + shell: "tar cfz - -C {{ etcd_cert_dir }} {{ node_certs|join(' ') }} | base64 --wrap=0" + register: etcd_node_cert_data + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + when: sync_certs|default(false) + notify: set etcd_secret_changed + +- name: Gen_certs | Copy certs on masters + shell: "echo '{{etcd_master_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ etcd_cert_dir }}" + changed_when: false + when: inventory_hostname in groups['etcd'] and sync_certs|default(false) and + inventory_hostname != groups['etcd'][0] + +- name: Gen_certs | Copy certs on nodes + shell: "echo '{{etcd_node_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ etcd_cert_dir }}" + changed_when: false + when: inventory_hostname in groups['k8s-cluster'] and sync_certs|default(false) and + inventory_hostname not in groups['etcd'] + +- name: Gen_certs | check certificate permissions + file: + path={{ etcd_cert_dir }} + group={{ etcd_cert_group }} + state=directory + owner=kube + recurse=yes + +- name: Gen_certs | set permissions on keys + shell: chmod 0600 {{ etcd_cert_dir}}/*key.pem + when: inventory_hostname in groups['etcd'] + changed_when: false + +- name: Gen_certs | target ca-certificate store file + set_fact: + ca_cert_path: |- + {% if ansible_os_family == "Debian" -%} + /usr/local/share/ca-certificates/etcd-ca.crt + {%- elif ansible_os_family == "RedHat" -%} + /etc/pki/ca-trust/source/anchors/etcd-ca.crt + {%- elif ansible_os_family == "CoreOS" -%} + /etc/ssl/certs/etcd-ca.pem + {%- endif %} + +- name: Gen_certs | add CA to trusted CA dir + copy: + src: "{{ etcd_cert_dir }}/ca.pem" + dest: "{{ ca_cert_path }}" + remote_src: true + register: etcd_ca_cert + +- name: Gen_certs | update ca-certificates (Debian/Ubuntu/CoreOS) + command: update-ca-certificates + when: etcd_ca_cert.changed and ansible_os_family in ["Debian", "CoreOS"] + +- name: Gen_certs | update ca-certificates (RedHat) + command: update-ca-trust extract + when: etcd_ca_cert.changed and ansible_os_family == "RedHat" + diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index 3ecaa00e6..15be1a769 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -1,8 +1,15 @@ --- +- include: pre_upgrade.yml +- include: check_certs.yml +- include: gen_certs.yml - include: install.yml + when: is_etcd_master - include: set_cluster_health.yml + when: is_etcd_master - include: configure.yml + when: is_etcd_master - include: refresh_config.yml + when: is_etcd_master - name: Ensure etcd is running service: @@ -11,23 +18,11 @@ enabled: yes when: is_etcd_master -- name: Ensure etcd-proxy is running - service: - name: etcd-proxy - state: started - enabled: yes - when: is_etcd_proxy - - name: Restart etcd if binary changed command: /bin/true notify: restart etcd when: etcd_deployment_type == "host" and etcd_copy.stdout_lines and is_etcd_master -- name: Restart etcd-proxy if binary changed - command: /bin/true - notify: restart etcd-proxy - when: etcd_deployment_type == "host" and etcd_copy.stdout_lines and is_etcd_proxy - # Reload systemd before starting service - meta: flush_handlers @@ -35,4 +30,6 @@ # initial state of the cluster is in `existing` # state insted of `new`. - include: set_cluster_health.yml + when: is_etcd_master - include: refresh_config.yml + when: is_etcd_master diff --git a/roles/etcd/tasks/pre_upgrade.yml b/roles/etcd/tasks/pre_upgrade.yml new file mode 100644 index 000000000..d1962ea92 --- /dev/null +++ b/roles/etcd/tasks/pre_upgrade.yml @@ -0,0 +1,34 @@ +- name: "Pre-upgrade | check for etcd-proxy unit file" + stat: + path: /etc/systemd/system/etcd-proxy.service + register: kube_apiserver_service_file + +- name: "Pre-upgrade | check for etcd-proxy init script" + stat: + path: /etc/init.d/etcd-proxy + register: kube_apiserver_init_script + +- name: "Pre-upgrade | stop etcd-proxy if service defined" + service: + name: etcd-proxy + state: stopped + when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) + +- name: "Pre-upgrade | remove etcd-proxy service definition" + file: + path: "{{ item }}" + state: absent + when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) + with_items: + - /etc/systemd/system/etcd-proxy.service + - /etc/init.d/etcd-proxy + +- name: "Pre-upgrade | find etcd-proxy container" + command: docker ps -aq --filter "name=etcd-proxy*" + register: etcd_proxy_container + ignore_errors: true + +- name: "Pre-upgrade | remove etcd-proxy if it exists" + command: "docker rm -f {{item}}" + with_items: "{{etcd_proxy_container.stdout_lines}}" + diff --git a/roles/etcd/tasks/refresh_config.yml b/roles/etcd/tasks/refresh_config.yml index 178466153..80a03a7d6 100644 --- a/roles/etcd/tasks/refresh_config.yml +++ b/roles/etcd/tasks/refresh_config.yml @@ -5,10 +5,3 @@ dest: /etc/etcd.env notify: restart etcd when: is_etcd_master - -- name: Refresh config | Create etcd-proxy config file - template: - src: etcd-proxy.j2 - dest: /etc/etcd-proxy.env - notify: restart etcd-proxy - when: is_etcd_proxy diff --git a/roles/etcd/templates/deb-etcd-docker.initd.j2 b/roles/etcd/templates/deb-etcd-docker.initd.j2 index a83aae184..4457b37b9 100644 --- a/roles/etcd/templates/deb-etcd-docker.initd.j2 +++ b/roles/etcd/templates/deb-etcd-docker.initd.j2 @@ -19,8 +19,9 @@ DAEMON={{ docker_bin_dir | default("/usr/bin") }}/docker DAEMON_EXEC=`basename $DAEMON` DAEMON_ARGS="run --restart=always --env-file=/etc/etcd.env \ --net=host \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ +-v /etc/ssl/certs:/etc/ssl/certs:ro \ -v /var/lib/etcd:/var/lib/etcd:rw \ +-v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \ --name={{ etcd_member_name | default("etcd") }} \ {{ etcd_image_repo }}:{{ etcd_image_tag }} \ {% if etcd_after_v3 %} diff --git a/roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 b/roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 deleted file mode 100644 index ad0338a09..000000000 --- a/roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 +++ /dev/null @@ -1,120 +0,0 @@ -#!/bin/sh -set -a - -### BEGIN INIT INFO -# Provides: etcd-proxy -# Required-Start: $local_fs $network $syslog -# Required-Stop: -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: etcd-proxy -# Description: -# etcd-proxy is a proxy for etcd: distributed, consistent key-value store for shared configuration and service discovery -### END INIT INFO - -PATH=/sbin:/usr/sbin:/bin/:/usr/bin -DESC="etcd-proxy" -NAME=etcd-proxy -DAEMON={{ docker_bin_dir | default("/usr/bin") }}/docker -DAEMON_EXEC=`basename $DAEMON` -DAEMON_ARGS="run --restart=always --env-file=/etc/etcd-proxy.env \ ---net=host \ ---stop-signal=SIGKILL \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ ---name={{ etcd_proxy_member_name | default("etcd-proxy") }} \ -{{ etcd_image_repo }}:{{ etcd_image_tag }} \ -{% if etcd_after_v3 %} -{{ etcd_container_bin_dir }}etcd -{% endif %}" - - -SCRIPTNAME=/etc/init.d/$NAME -DAEMON_USER=root -STOP_SCHEDULE="${STOP_SCHEDULE:-QUIT/5/TERM/5/KILL/5}" -PID=/var/run/etcd-proxy.pid - -# Exit if the binary is not present -[ -x "$DAEMON" ] || exit 0 - -# Define LSB log_* functions. -# Depend on lsb-base (>= 3.2-14) to ensure that this file is present -# and status_of_proc is working. -. /lib/lsb/init-functions - -do_status() -{ - status_of_proc -p $PID "$DAEMON" "$NAME" && exit 0 || exit $? -} - -# Function that starts the daemon/service -# -do_start() -{ - {{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_proxy_member_name | default("etcd-proxy") }} &>/dev/null || true - sleep 1 - start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \ - $DAEMON_ARGS \ - || return 2 -} - -# -# Function that stops the daemon/service -# -do_stop() -{ - start-stop-daemon --stop --quiet --retry=$STOP_SCHEDULE --pidfile $PID --name $DAEMON_EXEC - RETVAL="$?" - - sleep 1 - return "$RETVAL" -} - - -case "$1" in - start) - log_daemon_msg "Starting $DESC" "$NAME" - do_start - case "$?" in - 0|1) log_end_msg 0 || exit 0 ;; - 2) log_end_msg 1 || exit 1 ;; - esac - ;; - stop) - log_daemon_msg "Stopping $DESC" "$NAME" - if do_stop; then - log_end_msg 0 - else - log_failure_msg "Can't stop etcd-proxy" - log_end_msg 1 - fi - ;; - status) - if do_status; then - log_end_msg 0 - else - log_failure_msg "etcd-proxy is not running" - log_end_msg 1 - fi - ;; - - restart|force-reload) - log_daemon_msg "Restarting $DESC" "$NAME" - if do_stop; then - if do_start; then - log_end_msg 0 - exit 0 - else - rc="$?" - fi - else - rc="$?" - fi - log_failure_msg "Can't restart etcd-proxy" - log_end_msg ${rc} - ;; - *) - echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2 - exit 3 - ;; -esac - diff --git a/roles/etcd/templates/deb-etcd-proxy-host.initd.j2 b/roles/etcd/templates/deb-etcd-proxy-host.initd.j2 deleted file mode 100644 index d0858bb2f..000000000 --- a/roles/etcd/templates/deb-etcd-proxy-host.initd.j2 +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/sh -set -a - -### BEGIN INIT INFO -# Provides: etcd-proxy -# Required-Start: $local_fs $network $syslog -# Required-Stop: -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: etcd-proxy -# Description: -# etcd-proxy is a proxy for etcd: distributed, consistent key-value store for shared configuration and service discovery -### END INIT INFO - -PATH=/sbin:/usr/sbin:/bin:/usr/bin -DESC="etcd-proxy" -NAME=etcd-proxy -DAEMON={{ bin_dir }}/etcd -DAEMON_ARGS="" -SCRIPTNAME=/etc/init.d/$NAME -DAEMON_USER=etcd -STOP_SCHEDULE="${STOP_SCHEDULE:-QUIT/5/TERM/5/KILL/5}" -PID=/var/run/etcd-proxy.pid - -# Exit if the binary is not present -[ -x "$DAEMON" ] || exit 0 - -# Read configuration variable file if it is present -[ -f /etc/etcd-proxy.env ] && . /etc/etcd-proxy.env - -# Define LSB log_* functions. -# Depend on lsb-base (>= 3.2-14) to ensure that this file is present -# and status_of_proc is working. -. /lib/lsb/init-functions - -do_status() -{ - status_of_proc -p $PID "$DAEMON" "$NAME" && exit 0 || exit $? -} - -# Function that starts the daemon/service -# -do_start() -{ - start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \ - $DAEMON_ARGS \ - || return 2 -} - -# -# Function that stops the daemon/service -# -do_stop() -{ - start-stop-daemon --stop --quiet --retry=$STOP_SCHEDULE --pidfile $PID --name $NAME - RETVAL="$?" - - sleep 1 - return "$RETVAL" -} - - -case "$1" in - start) - log_daemon_msg "Starting $DESC" "$NAME" - do_start - case "$?" in - 0|1) log_end_msg 0 || exit 0 ;; - 2) log_end_msg 1 || exit 1 ;; - esac - ;; - stop) - log_daemon_msg "Stopping $DESC" "$NAME" - if do_stop; then - log_end_msg 0 - else - log_failure_msg "Can't stop etcd-proxy" - log_end_msg 1 - fi - ;; - status) - if do_status; then - log_end_msg 0 - else - log_failure_msg "etcd-proxy is not running" - log_end_msg 1 - fi - ;; - - restart|force-reload) - log_daemon_msg "Restarting $DESC" "$NAME" - if do_stop; then - if do_start; then - log_end_msg 0 - exit 0 - else - rc="$?" - fi - else - rc="$?" - fi - log_failure_msg "Can't restart etcd-proxy" - log_end_msg ${rc} - ;; - *) - echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2 - exit 3 - ;; -esac - diff --git a/roles/etcd/templates/etcd-docker.service.j2 b/roles/etcd/templates/etcd-docker.service.j2 index a37759fec..ff40b5b59 100644 --- a/roles/etcd/templates/etcd-docker.service.j2 +++ b/roles/etcd/templates/etcd-docker.service.j2 @@ -11,7 +11,8 @@ ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=always {# TODO(mattymo): Allow docker IP binding and disable in envfile -p 2380:2380 -p 2379:2379 #} --net=host \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ +-v /etc/ssl/certs:/etc/ssl/certs:ro \ +-v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \ -v /var/lib/etcd:/var/lib/etcd:rw \ --name={{ etcd_member_name | default("etcd") }} \ {{ etcd_image_repo }}:{{ etcd_image_tag }} \ diff --git a/roles/etcd/templates/etcd-proxy-docker.service.j2 b/roles/etcd/templates/etcd-proxy-docker.service.j2 deleted file mode 100644 index bf70f0e7f..000000000 --- a/roles/etcd/templates/etcd-proxy-docker.service.j2 +++ /dev/null @@ -1,28 +0,0 @@ -[Unit] -Description=etcd-proxy docker wrapper -Wants=docker.socket -After=docker.service - -[Service] -User=root -PermissionsStartOnly=true -ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=always \ ---env-file=/etc/etcd-proxy.env \ -{# TODO(mattymo): Allow docker IP binding and disable in envfile - -p 2380:2380 -p 2379:2379 #} ---net=host \ ---stop-signal=SIGKILL \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ ---name={{ etcd_proxy_member_name | default("etcd-proxy") }} \ -{{ etcd_image_repo }}:{{ etcd_image_tag }} \ -{% if etcd_after_v3 %} -{{ etcd_container_bin_dir }}etcd -{% endif %} -ExecStartPre=-{{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_proxy_member_name | default("etcd-proxy") }} -ExecReload={{ docker_bin_dir | default("/usr/bin") }}/docker restart {{ etcd_proxy_member_name | default("etcd-proxy") }} -ExecStop={{ docker_bin_dir | default("/usr/bin") }}/docker stop {{ etcd_proxy_member_name | default("etcd-proxy") }} -Restart=always -RestartSec=15s - -[Install] -WantedBy=multi-user.target diff --git a/roles/etcd/templates/etcd-proxy-host.service.j2 b/roles/etcd/templates/etcd-proxy-host.service.j2 deleted file mode 100644 index 4ea5f7bc9..000000000 --- a/roles/etcd/templates/etcd-proxy-host.service.j2 +++ /dev/null @@ -1,19 +0,0 @@ -[Unit] -Description=etcd-proxy -After=network.target - -[Service] -Type=notify -User=etcd -PermissionsStartOnly=true -EnvironmentFile=/etc/etcd-proxy.env -ExecStart={{ bin_dir }}/etcd -ExecStartPre=/bin/mkdir -p /var/lib/etcd-proxy -ExecStartPre=/bin/chown -R etcd: /var/lib/etcd-proxy -NotifyAccess=all -Restart=always -RestartSec=10s -LimitNOFILE=40000 - -[Install] -WantedBy=multi-user.target diff --git a/roles/etcd/templates/etcd-proxy.j2 b/roles/etcd/templates/etcd-proxy.j2 deleted file mode 100644 index 0a1492a37..000000000 --- a/roles/etcd/templates/etcd-proxy.j2 +++ /dev/null @@ -1,5 +0,0 @@ -ETCD_DATA_DIR=/var/lib/etcd-proxy -ETCD_PROXY=on -ETCD_LISTEN_CLIENT_URLS={{ etcd_access_endpoint }} -ETCD_NAME={{ etcd_proxy_member_name | default("etcd-proxy") }} -ETCD_INITIAL_CLUSTER={{ etcd_peer_addresses }} diff --git a/roles/etcd/templates/etcd.j2 b/roles/etcd/templates/etcd.j2 index b82116612..0b7e1eb9f 100644 --- a/roles/etcd/templates/etcd.j2 +++ b/roles/etcd/templates/etcd.j2 @@ -3,14 +3,19 @@ ETCD_ADVERTISE_CLIENT_URLS={{ etcd_client_url }} ETCD_INITIAL_ADVERTISE_PEER_URLS={{ etcd_peer_url }} ETCD_INITIAL_CLUSTER_STATE={% if etcd_cluster_is_healthy.rc != 0 | bool %}new{% else %}existing{% endif %} -{% if not is_etcd_proxy %} -ETCD_LISTEN_CLIENT_URLS=http://{{ etcd_address }}:2379,http://127.0.0.1:2379 -{% else %} -ETCD_LISTEN_CLIENT_URLS=http://{{ etcd_address }}:2379 -{% endif %} +ETCD_LISTEN_CLIENT_URLS=https://{{ etcd_address }}:2379,https://127.0.0.1:2379 ETCD_ELECTION_TIMEOUT=10000 ETCD_INITIAL_CLUSTER_TOKEN=k8s_etcd -ETCD_LISTEN_PEER_URLS=http://{{ etcd_address }}:2380 +ETCD_LISTEN_PEER_URLS=https://{{ etcd_address }}:2380 ETCD_NAME={{ etcd_member_name }} ETCD_PROXY=off ETCD_INITIAL_CLUSTER={{ etcd_peer_addresses }} + +# TLS settings +ETCD_TRUSTED_CA_FILE={{ etcd_cert_dir }}/ca.pem +ETCD_CERT_FILE={{ etcd_cert_dir }}/node.pem +ETCD_KEY_FILE={{ etcd_cert_dir }}/node-key.pem +ETCD_PEER_TRUSTED_CA_FILE={{ etcd_cert_dir }}/ca.pem +ETCD_PEER_CERT_FILE={{ etcd_cert_dir }}/member.pem +ETCD_PEER_KEY_FILE={{ etcd_cert_dir }}/member-key.pem +ETCD_PEER_CLIENT_CERT_AUTH=true diff --git a/roles/etcd/templates/openssl.conf.j2 b/roles/etcd/templates/openssl.conf.j2 new file mode 100644 index 000000000..3ea328289 --- /dev/null +++ b/roles/etcd/templates/openssl.conf.j2 @@ -0,0 +1,39 @@ +[req] +req_extensions = v3_req +distinguished_name = req_distinguished_name + +[req_distinguished_name] + +[ v3_req ] +basicConstraints = CA:FALSE +keyUsage = nonRepudiation, digitalSignature, keyEncipherment +subjectAltName = @alt_names + +[ ssl_client ] +extendedKeyUsage = clientAuth, serverAuth +basicConstraints = CA:FALSE +subjectKeyIdentifier=hash +authorityKeyIdentifier=keyid,issuer +subjectAltName = @alt_names + +[ v3_ca ] +basicConstraints = CA:TRUE +keyUsage = nonRepudiation, digitalSignature, keyEncipherment +subjectAltName = @alt_names +authorityKeyIdentifier=keyid:always,issuer + +[alt_names] +DNS.1 = localhost +{% for host in groups['etcd'] %} +DNS.{{ 1 + loop.index }} = {{ host }} +{% endfor %} +{% if loadbalancer_apiserver is defined and apiserver_loadbalancer_domain_name is defined %} +{% set idx = groups['etcd'] | length | int + 1 %} +DNS.{{ idx | string }} = {{ apiserver_loadbalancer_domain_name }} +{% endif %} +{% for host in groups['etcd'] %} +IP.{{ 2 * loop.index - 1 }} = {{ hostvars[host]['access_ip'] | default(hostvars[host]['ansible_default_ipv4']['address']) }} +IP.{{ 2 * loop.index }} = {{ hostvars[host]['ip'] | default(hostvars[host]['ansible_default_ipv4']['address']) }} +{% endfor %} +{% set idx = groups['etcd'] | length | int * 2 + 1 %} +IP.{{ idx }} = 127.0.0.1 diff --git a/roles/kubernetes-apps/ansible/defaults/main.yml b/roles/kubernetes-apps/ansible/defaults/main.yml index b1086aa0d..d39d146fd 100644 --- a/roles/kubernetes-apps/ansible/defaults/main.yml +++ b/roles/kubernetes-apps/ansible/defaults/main.yml @@ -9,4 +9,7 @@ kubedns_image_tag: "{{ kubedns_version }}" kubednsmasq_image_repo: "gcr.io/google_containers/kube-dnsmasq-amd64" kubednsmasq_image_tag: "{{ kubednsmasq_version }}" exechealthz_image_repo: "gcr.io/google_containers/exechealthz-amd64" -exechealthz_image_tag: "{{ exechealthz_version }}" \ No newline at end of file +exechealthz_image_tag: "{{ exechealthz_version }}" + +# SSL +etcd_cert_dir: "/etc/ssl/etcd/ssl" diff --git a/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml b/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml index f4ac65aeb..6ad8dd220 100644 --- a/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml +++ b/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml @@ -5,6 +5,9 @@ - name: Start of Calico policy controller kube: + name: "calico-policy-controller" kubectl: "{{bin_dir}}/kubectl" - filename: /etc/kubernetes/calico-policy-controller.yml + filename: "/etc/kubernetes/calico-policy-controller.yml" + namespace: "kube-system" + resource: "rs" when: inventory_hostname == groups['kube-master'][0] diff --git a/roles/kubernetes-apps/ansible/tasks/main.yaml b/roles/kubernetes-apps/ansible/tasks/main.yaml index f31eb442b..130a17a6f 100644 --- a/roles/kubernetes-apps/ansible/tasks/main.yaml +++ b/roles/kubernetes-apps/ansible/tasks/main.yaml @@ -18,6 +18,6 @@ with_items: "{{ manifests.results }}" when: inventory_hostname == groups['kube-master'][0] - - include: tasks/calico-policy-controller.yml - when: enable_network_policy is defined and enable_network_policy == True + when: ( enable_network_policy is defined and enable_network_policy == True ) or + ( kube_network_plugin == 'canal' ) diff --git a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 index 7c0a21cfa..698710b95 100644 --- a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 @@ -26,7 +26,13 @@ spec: image: calico/kube-policy-controller:latest env: - name: ETCD_ENDPOINTS - value: "{{ etcd_endpoint }}" + value: "{{ etcd_access_endpoint }}" + - name: ETCD_CA_CERT_FILE + value: "{{ etcd_cert_dir }}/ca.pem" + - name: ETCD_CERT_FILE + value: "{{ etcd_cert_dir }}/node.pem" + - name: ETCD_KEY_FILE + value: "{{ etcd_cert_dir }}/node-key.pem" # Location of the Kubernetes API - this shouldn't need to be # changed so long as it is used in conjunction with # CONFIGURE_ETC_HOSTS="true". @@ -38,3 +44,11 @@ spec: # This removes the need for KubeDNS to resolve the Service. - name: CONFIGURE_ETC_HOSTS value: "true" + volumeMounts: + - mountPath: {{ etcd_cert_dir }} + name: etcd-certs + readOnly: true + volumes: + - hostPath: + path: {{ etcd_cert_dir }} + name: etcd-certs diff --git a/roles/kubernetes-apps/ansible/library/kube.py b/roles/kubernetes-apps/lib/library/kube.py similarity index 100% rename from roles/kubernetes-apps/ansible/library/kube.py rename to roles/kubernetes-apps/lib/library/kube.py diff --git a/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml b/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml new file mode 100644 index 000000000..c6bcd6992 --- /dev/null +++ b/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml @@ -0,0 +1,17 @@ +- name: Create canal ConfigMap + run_once: true + kube: + name: "canal-config" + kubectl: "{{bin_dir}}/kubectl" + filename: "/etc/kubernetes/canal-config.yaml" + resource: "configmap" + namespace: "kube-system" + +- name: Start flannel and calico-node + run_once: true + kube: + name: "canal-node" + kubectl: "{{bin_dir}}/kubectl" + filename: "/etc/kubernetes/canal-node.yaml" + resource: "ds" + namespace: "kube-system" diff --git a/roles/kubernetes-apps/network_plugin/meta/main.yml b/roles/kubernetes-apps/network_plugin/meta/main.yml new file mode 100644 index 000000000..14a59e5c5 --- /dev/null +++ b/roles/kubernetes-apps/network_plugin/meta/main.yml @@ -0,0 +1,4 @@ +--- +dependencies: + - role: kubernetes-apps/network_plugin/canal + when: kube_network_plugin == 'canal' diff --git a/roles/kubernetes/master/defaults/main.yml b/roles/kubernetes/master/defaults/main.yml index ee32ccf57..269ed3714 100644 --- a/roles/kubernetes/master/defaults/main.yml +++ b/roles/kubernetes/master/defaults/main.yml @@ -28,3 +28,9 @@ kube_apiserver_insecure_bind_address: 127.0.0.1 # Logging directory (sysvinit systems) kube_log_dir: "/var/log/kubernetes" + +# ETCD cert dir for connecting apiserver to etcd +etcd_config_dir: /etc/ssl/etcd +etcd_cert_dir: "{{ etcd_config_dir }}/ssl" + + diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 index ddd6f2085..97e71716a 100644 --- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-apiserver namespace: kube-system + labels: + k8s-app: kube-apiserver spec: hostNetwork: true containers: @@ -14,9 +16,12 @@ spec: - --advertise-address={{ ip | default(ansible_default_ipv4.address) }} - --etcd-servers={{ etcd_access_endpoint }} - --etcd-quorum-read=true + - --etcd-cafile={{ etcd_cert_dir }}/ca.pem + - --etcd-certfile={{ etcd_cert_dir }}/node.pem + - --etcd-keyfile={{ etcd_cert_dir }}/node-key.pem - --insecure-bind-address={{ kube_apiserver_insecure_bind_address }} - --apiserver-count={{ kube_apiserver_count }} - - --admission-control=NamespaceLifecycle,NamespaceExists,LimitRanger,ServiceAccount,ResourceQuota + - --admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota - --service-cluster-ip-range={{ kube_service_addresses }} - --client-ca-file={{ kube_cert_dir }}/ca.pem - --basic-auth-file={{ kube_users_dir }}/known_users.csv @@ -50,6 +55,9 @@ spec: - mountPath: /etc/ssl/certs name: ssl-certs-host readOnly: true + - mountPath: {{ etcd_cert_dir }} + name: etcd-certs + readOnly: true - mountPath: /var/log/ name: logfile volumes: @@ -59,6 +67,9 @@ spec: - hostPath: path: /etc/ssl/certs/ name: ssl-certs-host + - hostPath: + path: {{ etcd_cert_dir }} + name: etcd-certs - hostPath: path: /var/log/ name: logfile diff --git a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 index 3a9e1ef1b..a528f361e 100644 --- a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-controller-manager namespace: kube-system + labels: + k8s-app: kube-controller spec: hostNetwork: true containers: diff --git a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 index 024ddbfaa..15a705937 100644 --- a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-scheduler namespace: kube-system + labels: + k8s-app: kube-scheduler spec: hostNetwork: true containers: diff --git a/roles/kubernetes/node/tasks/main.yml b/roles/kubernetes/node/tasks/main.yml index a8cb6ce5a..700f7eb75 100644 --- a/roles/kubernetes/node/tasks/main.yml +++ b/roles/kubernetes/node/tasks/main.yml @@ -11,6 +11,13 @@ owner: kube when: kube_network_plugin == "calico" +- name: Write Canal cni config + template: + src: "cni-canal.conf.j2" + dest: "/etc/cni/net.d/10-canal.conf" + owner: kube + when: kube_network_plugin == "canal" + - name: Write kubelet config file template: src=kubelet.j2 dest={{ kube_config_dir }}/kubelet.env backup=yes notify: diff --git a/roles/kubernetes/node/templates/cni-calico.conf.j2 b/roles/kubernetes/node/templates/cni-calico.conf.j2 index 4615cdabd..4e9752ef4 100644 --- a/roles/kubernetes/node/templates/cni-calico.conf.j2 +++ b/roles/kubernetes/node/templates/cni-calico.conf.j2 @@ -1,6 +1,10 @@ { "name": "calico-k8s-network", "type": "calico", + "etcd_endpoints": "{{ etcd_access_endpoint }}", + "etcd_cert_file": "{{ etcd_cert_dir }}/node.pem", + "etcd_key_file": "{{ etcd_cert_dir }}/node-key.pem", + "etcd_ca_cert_file": "{{ etcd_cert_dir }}/ca.pem", "log_level": "info", "ipam": { "type": "calico-ipam" diff --git a/roles/kubernetes/node/templates/cni-canal.conf.j2 b/roles/kubernetes/node/templates/cni-canal.conf.j2 new file mode 100644 index 000000000..b835443c7 --- /dev/null +++ b/roles/kubernetes/node/templates/cni-canal.conf.j2 @@ -0,0 +1,15 @@ +{ + "name": "canal-k8s-network", + "type": "flannel", + "delegate": { + "type": "calico", + "etcd_endpoints": "{{ etcd_access_endpoint }}", + "log_level": "info", + "policy": { + "type": "k8s" + }, + "kubernetes": { + "kubeconfig": "{{ kube_config_dir }}/node-kubeconfig.yaml" + } + } +} diff --git a/roles/kubernetes/node/templates/kubelet.j2 b/roles/kubernetes/node/templates/kubelet.j2 index 53f2915d9..46678691a 100644 --- a/roles/kubernetes/node/templates/kubelet.j2 +++ b/roles/kubernetes/node/templates/kubelet.j2 @@ -26,7 +26,7 @@ KUBELET_ARGS="--cluster_dns={{ dns_server }} --cluster_domain={{ dns_domain }} - {% else %} KUBELET_ARGS="--kubeconfig={{ kube_config_dir}}/kubelet.kubeconfig --config={{ kube_manifest_dir }} --pod-infra-container-image={{ pod_infra_image_repo }}:{{ pod_infra_image_tag }}" {% endif %} -{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "weave"] %} +{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "weave", "canal"] %} KUBELET_NETWORK_PLUGIN="--network-plugin=cni --network-plugin-dir=/etc/cni/net.d" {% elif kube_network_plugin is defined and kube_network_plugin == "weave" %} DOCKER_SOCKET="--docker-endpoint=unix:/var/run/weave/weave.sock" diff --git a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 index 7abffe053..86d1e6f9e 100644 --- a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 +++ b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-proxy namespace: kube-system + labels: + k8s-app: kube-proxy spec: hostNetwork: true containers: diff --git a/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 b/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 index 50e054268..8e5dfcc11 100644 --- a/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 +++ b/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: nginx-proxy namespace: kube-system + labels: + k8s-app: kube-nginx spec: hostNetwork: true containers: diff --git a/roles/kubernetes/preinstall/defaults/main.yml b/roles/kubernetes/preinstall/defaults/main.yml index 3eae9757d..c02a32e29 100644 --- a/roles/kubernetes/preinstall/defaults/main.yml +++ b/roles/kubernetes/preinstall/defaults/main.yml @@ -45,3 +45,6 @@ openstack_username: "{{ lookup('env','OS_USERNAME') }}" openstack_password: "{{ lookup('env','OS_PASSWORD') }}" openstack_region: "{{ lookup('env','OS_REGION_NAME') }}" openstack_tenant_id: "{{ lookup('env','OS_TENANT_ID') }}" + +# All clients access each node individually, instead of using a load balancer. +etcd_multiaccess: true diff --git a/roles/kubernetes/preinstall/tasks/main.yml b/roles/kubernetes/preinstall/tasks/main.yml index 49e69a907..5c6520ed3 100644 --- a/roles/kubernetes/preinstall/tasks/main.yml +++ b/roles/kubernetes/preinstall/tasks/main.yml @@ -74,7 +74,7 @@ with_items: - "/etc/cni/net.d" - "/opt/cni/bin" - when: kube_network_plugin in ["calico", "weave"] and "{{ inventory_hostname in groups['k8s-cluster'] }}" + when: kube_network_plugin in ["calico", "weave", "canal"] and "{{ inventory_hostname in groups['k8s-cluster'] }}" - name: Update package management cache (YUM) yum: update_cache=yes name='*' diff --git a/roles/kubernetes/preinstall/tasks/set_facts.yml b/roles/kubernetes/preinstall/tasks/set_facts.yml index 2dd947dda..aec296c6e 100644 --- a/roles/kubernetes/preinstall/tasks/set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/set_facts.yml @@ -23,14 +23,14 @@ - set_fact: etcd_address="{{ ip | default(ansible_default_ipv4['address']) }}" - set_fact: etcd_access_address="{{ access_ip | default(etcd_address) }}" -- set_fact: etcd_peer_url="http://{{ etcd_access_address }}:2380" -- set_fact: etcd_client_url="http://{{ etcd_access_address }}:2379" +- set_fact: etcd_peer_url="https://{{ etcd_access_address }}:2380" +- set_fact: etcd_client_url="https://{{ etcd_access_address }}:2379" - set_fact: etcd_authority="127.0.0.1:2379" -- set_fact: etcd_endpoint="http://{{ etcd_authority }}" +- set_fact: etcd_endpoint="https://{{ etcd_authority }}" - set_fact: etcd_access_addresses: |- {% for item in groups['etcd'] -%} - http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2379{% if not loop.last %},{% endif %} + https://{{ item }}:2379{% if not loop.last %},{% endif %} {%- endfor %} - set_fact: etcd_access_endpoint="{% if etcd_multiaccess %}{{ etcd_access_addresses }}{% else %}{{ etcd_endpoint }}{% endif %}" - set_fact: @@ -41,15 +41,8 @@ - set_fact: etcd_peer_addresses: |- {% for item in groups['etcd'] -%} - {{ "etcd"+loop.index|string }}=http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} + {{ "etcd"+loop.index|string }}=https://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} {%- endfor %} -- set_fact: - etcd_proxy_member_name: |- - {% for host in groups['k8s-cluster'] %} - {% if inventory_hostname == host %}{{"etcd-proxy"+loop.index|string }}{% endif %} - {% endfor %} -- set_fact: - is_etcd_proxy: "{{ inventory_hostname in groups['k8s-cluster'] }}" - set_fact: is_etcd_master: "{{ inventory_hostname in groups['etcd'] }}" - set_fact: diff --git a/roles/kubernetes/secrets/tasks/gen_certs.yml b/roles/kubernetes/secrets/tasks/gen_certs.yml index bec1d9f16..28ae04892 100644 --- a/roles/kubernetes/secrets/tasks/gen_certs.yml +++ b/roles/kubernetes/secrets/tasks/gen_certs.yml @@ -65,21 +65,21 @@ when: inventory_hostname in groups['kube-master'] changed_when: false -- name: Gen_certs | target ca-certificates directory +- name: Gen_certs | target ca-certificates path set_fact: - ca_cert_dir: |- + ca_cert_path: |- {% if ansible_os_family == "Debian" -%} - /usr/local/share/ca-certificates + /usr/local/share/ca-certificates/kube-ca.crt {%- elif ansible_os_family == "RedHat" -%} - /etc/pki/ca-trust/source/anchors + /etc/pki/ca-trust/source/anchors/kube-ca.crt {%- elif ansible_os_family == "CoreOS" -%} - /etc/ssl/certs + /etc/ssl/certs/kube-ca.pem {%- endif %} - name: Gen_certs | add CA to trusted CA dir copy: src: "{{ kube_cert_dir }}/ca.pem" - dest: "{{ ca_cert_dir }}/kube-ca.crt" + dest: "{{ ca_cert_path }}" remote_src: true register: kube_ca_cert @@ -87,7 +87,7 @@ command: update-ca-certificates when: kube_ca_cert.changed and ansible_os_family in ["Debian", "CoreOS"] -- name: Gen_certs | update ca-certificatesa (RedHat) +- name: Gen_certs | update ca-certificates (RedHat) command: update-ca-trust extract when: kube_ca_cert.changed and ansible_os_family == "RedHat" diff --git a/roles/network_plugin/calico/defaults/main.yml b/roles/network_plugin/calico/defaults/main.yml index aec7a5e15..7b608ab7e 100644 --- a/roles/network_plugin/calico/defaults/main.yml +++ b/roles/network_plugin/calico/defaults/main.yml @@ -8,3 +8,6 @@ ipip: false # Set to true if you want your calico cni binaries to overwrite the # ones from hyperkube while leaving other cni plugins intact. overwrite_hyperkube_cni: true + +calico_cert_dir: /etc/calico/certs +etcd_cert_dir: /etc/ssl/etcd/ssl diff --git a/roles/network_plugin/calico/tasks/main.yml b/roles/network_plugin/calico/tasks/main.yml index 46f729883..6563a1f65 100644 --- a/roles/network_plugin/calico/tasks/main.yml +++ b/roles/network_plugin/calico/tasks/main.yml @@ -12,6 +12,24 @@ - meta: flush_handlers +- name: Calico | Create calico certs directory + file: + dest: "{{ calico_cert_dir }}" + state: directory + mode: 0750 + owner: root + group: root + +- name: Calico | Link etcd certificates for calico-node + file: + src: "{{ etcd_cert_dir }}/{{ item.s }}" + dest: "{{ calico_cert_dir }}/{{ item.d }}" + state: hard + with_items: + - {s: "ca.pem", d: "ca_cert.crt"} + - {s: "node.pem", d: "cert.crt"} + - {s: "node-key.pem", d: "key.pem"} + - name: Calico | Install calicoctl container script template: src: calicoctl-container.j2 @@ -41,59 +59,95 @@ when: "{{ overwrite_hyperkube_cni|bool }}" - name: Calico | wait for etcd - uri: url=http://localhost:2379/health + uri: url=https://localhost:2379/health validate_certs=no register: result - until: result.status == 200 + until: result.status == 200 or result.status == 401 retries: 10 delay: 5 - when: inventory_hostname in groups['kube-master'] - -- name: Calico | Check if calico network pool has already been configured - uri: - url: "{{ etcd_endpoint }}/v2/keys/calico/v1/ipam/v4/pool" - return_content: yes - status_code: 200,404 - register: calico_conf + delegate_to: "{{groups['etcd'][0]}}" run_once: true -- name: Calico | Define ipip pool argument +- name: Calico | Check if calico network pool has already been configured + command: |- + curl \ + --cacert {{ etcd_cert_dir }}/ca.pem \ + --cert {{ etcd_cert_dir}}/admin.pem \ + --key {{ etcd_cert_dir }}/admin-key.pem \ + https://localhost:2379/v2/keys/calico/v1/ipam/v4/pool + register: calico_conf + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + +- name: Calico | Check calicoctl version + run_once: true + set_fact: + legacy_calicoctl: "{{ calicoctl_image_tag | version_compare('v1.0.0', '<') }}" + +- name: Calico | Configure calico network pool + shell: > + echo '{ + "kind": "ipPool", + "spec": {"disabled": false, "ipip": {"enabled": {{ cloud_provider is defined or ipip }}}, + "nat-outgoing": {{ nat_outgoing|default(false) and not peer_with_router|default(false) }}}, + "apiVersion": "v1", + "metadata": {"cidr": "{{ kube_pods_subnet }}"} + }' + | {{ bin_dir }}/calicoctl create -f - + environment: + NO_DEFAULT_POOLS: true + run_once: true + when: (not legacy_calicoctl and + "Key not found" in calico_conf.stdout or "nodes" not in calico_conf.stdout) + +- name: Calico (old) | Define ipip pool argument run_once: true set_fact: ipip_arg: "--ipip" - when: cloud_provider is defined or ipip|default(false) + when: (legacy_calicoctl and + cloud_provider is defined or ipip) -- name: Calico | Define nat-outgoing pool argument +- name: Calico (old) | Define nat-outgoing pool argument run_once: true set_fact: nat_arg: "--nat-outgoing" - when: nat_outgoing|default(false) and not peer_with_router|default(false) + when: (legacy_calicoctl and + nat_outgoing|default(false) and not peer_with_router|default(false)) -- name: Calico | Define calico pool task name +- name: Calico (old) | Define calico pool task name run_once: true set_fact: pool_task_name: "with options {{ ipip_arg|default('') }} {{ nat_arg|default('') }}" - when: ipip_arg|default(false) or nat_arg|default(false) + when: (legacy_calicoctl and ipip_arg|default(false) or nat_arg|default(false)) -- name: Calico | Configure calico network pool {{ pool_task_name|default('') }} +- name: Calico (old) | Configure calico network pool {{ pool_task_name|default('') }} command: "{{ bin_dir}}/calicoctl pool add {{ kube_pods_subnet }} {{ ipip_arg|default('') }} {{ nat_arg|default('') }}" environment: NO_DEFAULT_POOLS: true run_once: true - when: calico_conf.status == 404 or "nodes" not in calico_conf.content + when: (legacy_calicoctl and + "Key not found" in calico_conf.stdout or "nodes" not in calico_conf.stdout) - name: Calico | Get calico configuration from etcd - uri: - url: "{{ etcd_endpoint }}/v2/keys/calico/v1/ipam/v4/pool" - return_content: yes - register: calico_pools + command: |- + curl \ + --cacert {{ etcd_cert_dir }}/ca.pem \ + --cert {{ etcd_cert_dir}}/admin.pem \ + --key {{ etcd_cert_dir }}/admin-key.pem \ + https://localhost:2379/v2/keys/calico/v1/ipam/v4/pool + register: calico_pools_raw + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + +- set_fact: + calico_pools: "{{ calico_pools_raw.stdout | from_json }}" run_once: true - name: Calico | Check if calico pool is properly configured fail: msg: 'Only one network pool must be configured and it must be the subnet {{ kube_pods_subnet }}. Please erase calico configuration and run the playbook again ("etcdctl rm --recursive /calico/v1/ipam/v4/pool")' - when: ( calico_pools.json['node']['nodes'] | length > 1 ) or - ( not calico_pools.json['node']['nodes'][0]['key'] | search(".*{{ kube_pods_subnet | ipaddr('network') }}.*") ) + when: ( calico_pools['node']['nodes'] | length > 1 ) or + ( not calico_pools['node']['nodes'][0]['key'] | search(".*{{ kube_pods_subnet | ipaddr('network') }}.*") ) run_once: true - name: Calico | Write /etc/network-environment @@ -124,11 +178,30 @@ enabled: yes - name: Calico | Disable node mesh - shell: "{{ bin_dir }}/calicoctl bgp node-mesh off" - when: peer_with_router|default(false) and inventory_hostname in groups['kube-node'] + shell: "{{ bin_dir }}/calicoctl config set nodeToNodeMesh off" + when: (not legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) - name: Calico | Configure peering with router(s) + shell: > + echo '{ + "kind": "bgppeer", + "spec": {"asNumber": {{ item.as }}}, + "apiVersion": "v1", + "metadata": {"node": "rack1-host1", "scope": "node", "peerIP": "{{ item.router_id }}"} + }' + | {{ bin_dir }}/calicoctl create -f - + with_items: peers + when: (not legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) + +- name: Calico (old) | Disable node mesh + shell: "{{ bin_dir }}/calicoctl bgp node-mesh off" + when: (legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) + +- name: Calico (old) | Configure peering with router(s) shell: "{{ bin_dir }}/calicoctl node bgp peer add {{ item.router_id }} as {{ item.as }}" with_items: peers - when: peer_with_router|default(false) and inventory_hostname in groups['kube-node'] - + when: (legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) diff --git a/roles/network_plugin/calico/templates/calico-node.service.j2 b/roles/network_plugin/calico/templates/calico-node.service.j2 index a7f7e4bab..87a51fac8 100644 --- a/roles/network_plugin/calico/templates/calico-node.service.j2 +++ b/roles/network_plugin/calico/templates/calico-node.service.j2 @@ -1,17 +1,25 @@ [Unit] Description=Calico per-node agent Documentation=https://github.com/projectcalico/calico-docker -After=docker.service docker.socket etcd-proxy.service -Wants=docker.socket etcd-proxy.service +After=docker.service docker.socket +Wants=docker.socket [Service] User=root PermissionsStartOnly=true +{% if legacy_calicoctl %} {% if inventory_hostname in groups['kube-node'] and peer_with_router|default(false)%} ExecStart={{ bin_dir }}/calicoctl node --ip={{ip | default(ansible_default_ipv4.address) }} --as={{ local_as }} --detach=false --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} -{% else %} +{% else %} ExecStart={{ bin_dir }}/calicoctl node --ip={{ip | default(ansible_default_ipv4.address) }} --detach=false --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} -{% endif %} +{% endif %} +{% else %} +{% if inventory_hostname in groups['kube-node'] and peer_with_router|default(false)%} +ExecStart={{ bin_dir }}/calicoctl node run --ip={{ip | default(ansible_default_ipv4.address) }} --as={{ local_as }} --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} +{% else %} +ExecStart={{ bin_dir }}/calicoctl node run --ip={{ip | default(ansible_default_ipv4.address) }} --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} +{% endif %} +{% endif %} Restart=always RestartSec=10s diff --git a/roles/network_plugin/calico/templates/calicoctl-container.j2 b/roles/network_plugin/calico/templates/calicoctl-container.j2 index 466f1df93..7be30928a 100644 --- a/roles/network_plugin/calico/templates/calicoctl-container.j2 +++ b/roles/network_plugin/calico/templates/calicoctl-container.j2 @@ -1,8 +1,13 @@ #!/bin/bash -/usr/bin/docker run --privileged --rm \ ---net=host --pid=host -e ETCD_AUTHORITY={{ etcd_authority }} \ +/usr/bin/docker run -i --privileged --rm \ +--net=host --pid=host \ +-e ETCD_ENDPOINTS={{ etcd_access_endpoint }} \ +-e ETCD_CA_CERT_FILE=/etc/calico/certs/ca_cert.crt \ +-e ETCD_CERT_FILE=/etc/calico/certs/cert.crt \ +-e ETCD_KEY_FILE=/etc/calico/certs/key.pem \ -v /usr/bin/docker:/usr/bin/docker \ -v /var/run/docker.sock:/var/run/docker.sock \ -v /var/run/calico:/var/run/calico \ +-v /etc/calico/certs:/etc/calico/certs:ro \ {{ calicoctl_image_repo }}:{{ calicoctl_image_tag}} \ $@ diff --git a/roles/network_plugin/calico/templates/deb-calico.initd.j2 b/roles/network_plugin/calico/templates/deb-calico.initd.j2 index ddbc22959..e155cae9c 100644 --- a/roles/network_plugin/calico/templates/deb-calico.initd.j2 +++ b/roles/network_plugin/calico/templates/deb-calico.initd.j2 @@ -37,7 +37,7 @@ DAEMON_USER=root do_status() { - if [ $($DOCKER ps | awk '{ print $2 }' | grep calico/node | wc -l) -eq 1 ]; then + if [ $($DOCKER ps --format "{{.Image}}" | grep -cw 'calico/node') -eq 1 ]; then return 0 else return 1 @@ -51,7 +51,11 @@ do_start() do_status retval=$? if [ $retval -ne 0 ]; then +{% if legacy_calicoctl %} ${DAEMON} node --ip=${DEFAULT_IPV4} >>/dev/null && return 0 || return 2 +{% else %} + ${DAEMON} node run --ip=${DEFAULT_IPV4} >>/dev/null && return 0 || return 2 +{% endif %} else return 1 fi @@ -62,7 +66,12 @@ do_start() # do_stop() { +{% if legacy_calicoctl %} ${DAEMON} node stop >> /dev/null || ${DAEMON} node stop --force >> /dev/null +{% else %} + echo "Current version of ${DAEMON} doesn't support 'node stop' command!" + return 1 +{% endif %} } diff --git a/roles/network_plugin/calico/templates/network-environment.j2 b/roles/network_plugin/calico/templates/network-environment.j2 index 086803d1b..8fd13d36c 100644 --- a/roles/network_plugin/calico/templates/network-environment.j2 +++ b/roles/network_plugin/calico/templates/network-environment.j2 @@ -3,7 +3,10 @@ DEFAULT_IPV4={{ip | default(ansible_default_ipv4.address) }} # The Kubernetes master IP -KUBERNETES_MASTER={{ first_kube_master }} +KUBERNETES_MASTER={{ kube_apiserver_endpoint }} # IP and port of etcd instance used by Calico -ETCD_AUTHORITY={{ etcd_authority }} +ETCD_ENDPOINTS={{ etcd_access_endpoint }} +ETCD_CA_CERT_FILE=/etc/calico/certs/ca_cert.crt +ETCD_CERT_FILE=/etc/calico/certs/cert.crt +ETCD_KEY_FILE=/etc/calico/certs/key.pem diff --git a/roles/network_plugin/calico/templates/rh-calico.initd.j2 b/roles/network_plugin/calico/templates/rh-calico.initd.j2 index 6fb870652..7fea72521 100644 --- a/roles/network_plugin/calico/templates/rh-calico.initd.j2 +++ b/roles/network_plugin/calico/templates/rh-calico.initd.j2 @@ -31,7 +31,7 @@ logfile="/var/log/$prog" do_status() { - if [ $($dockerexec ps | awk '{ print $2 }' | grep calico/node | wc -l) -ne 1 ]; then + if [ $($dockerexec ps --format "{{.Image}}" | grep -cw 'calico/node') -ne 1 ]; then return 1 fi } @@ -53,7 +53,11 @@ do_start() { if [ $retval -ne 0 ]; then printf "Starting $prog:\t" echo "\n$(date)\n" >> $logfile - $exec node --ip=${DEFAULT_IPV4} &>>$logfile +{% if legacy_calicoctl %} + $exec node --ip=${DEFAULT_IPV4} &>>$logfile +{% else %} + $exec node run --ip=${DEFAULT_IPV4} &>>$logfile +{% endif %} success echo else @@ -65,7 +69,12 @@ do_start() { do_stop() { echo -n $"Stopping $prog: " +{% if legacy_calicoctl %} $exec node stop >> /dev/null || $exec node stop --force >> /dev/null +{% else %} + echo "Current version of ${exec} doesn't support 'node stop' command!" + return 1 +{% endif %} retval=$? echo return $retval diff --git a/roles/network_plugin/canal/defaults/main.yml b/roles/network_plugin/canal/defaults/main.yml new file mode 100644 index 000000000..24f7c789b --- /dev/null +++ b/roles/network_plugin/canal/defaults/main.yml @@ -0,0 +1,11 @@ +# The interface used by canal for host <-> host communication. +# If left blank, then the interface is chosing using the node's +# default route. +canal_iface: "" + +# Whether or not to masquerade traffic to destinations not within +# the pod network. +canal_masquerade: "true" + +# Log-level +canal_log_level: "info" diff --git a/roles/network_plugin/canal/meta/main.yml b/roles/network_plugin/canal/meta/main.yml new file mode 100644 index 000000000..5b8d38d37 --- /dev/null +++ b/roles/network_plugin/canal/meta/main.yml @@ -0,0 +1,12 @@ +--- +dependencies: + - role: download + file: "{{ downloads.flannel_server_helper }}" + - role: download + file: "{{ downloads.flannel }}" + - role: download + file: "{{ downloads.calico_node }}" + - role: download + file: "{{ downloads.calicoctl }}" + - role: download + file: "{{ downloads.calico_cni }}" diff --git a/roles/network_plugin/canal/tasks/main.yml b/roles/network_plugin/canal/tasks/main.yml new file mode 100644 index 000000000..e88cfad7e --- /dev/null +++ b/roles/network_plugin/canal/tasks/main.yml @@ -0,0 +1,34 @@ +--- +- name: Canal | Set Flannel etcd configuration + command: |- + {{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} \ + set /{{ cluster_name }}/network/config \ + '{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } }' + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + +- name: Canal | Write canal configmap + template: + src: canal-config.yml.j2 + dest: /etc/kubernetes/canal-config.yaml + +- name: Canal | Write canal node configuration + template: + src: canal-node.yml.j2 + dest: /etc/kubernetes/canal-node.yaml + +- name: Canal | Copy cni plugins from hyperkube + command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /usr/bin/rsync -a /opt/cni/bin/ /cnibindir/" + register: cni_task_result + until: cni_task_result.rc == 0 + retries: 4 + delay: "{{ retry_stagger | random + 3 }}" + changed_when: false + +- name: Canal | Copy cni plugins from calico/cni + command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ calico_cni_image_repo }}:{{ calico_cni_image_tag }} sh -c 'cp -a /opt/cni/bin/* /cnibindir/'" + register: cni_task_result + until: cni_task_result.rc == 0 + retries: 4 + delay: "{{ retry_stagger | random + 3 }}" + changed_when: false diff --git a/roles/network_plugin/canal/templates/canal-config.yml.j2 b/roles/network_plugin/canal/templates/canal-config.yml.j2 new file mode 100644 index 000000000..34f3faedb --- /dev/null +++ b/roles/network_plugin/canal/templates/canal-config.yml.j2 @@ -0,0 +1,22 @@ +# This ConfigMap can be used to configure a self-hosted Canal installation. +# See `canal.yaml` for an example of a Canal deployment which uses +# the config in this ConfigMap. +kind: ConfigMap +apiVersion: v1 +metadata: + name: canal-config +data: + # Configure this with the location of your etcd cluster. + etcd_endpoints: "{{ etcd_access_endpoint }}" + + # The interface used by canal for host <-> host communication. + # If left blank, then the interface is chosing using the node's + # default route. + flanneld_iface: "{{ canal_iface }}" + + # Whether or not to masquerade traffic to destinations not within + # the pod network. + masquerade: "{{ canal_masquerade }}" + + # Cluster name for Flannel etcd path + cluster_name: "{{ cluster_name }}" diff --git a/roles/network_plugin/canal/templates/canal-node.yml.j2 b/roles/network_plugin/canal/templates/canal-node.yml.j2 new file mode 100644 index 000000000..ef6793f30 --- /dev/null +++ b/roles/network_plugin/canal/templates/canal-node.yml.j2 @@ -0,0 +1,119 @@ +--- +kind: DaemonSet +apiVersion: extensions/v1beta1 +metadata: + name: canal-node + labels: + k8s-app: canal-node +spec: + selector: + matchLabels: + k8s-app: canal-node + template: + metadata: + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + scheduler.alpha.kubernetes.io/tolerations: '[{"key":"CriticalAddonsOnly", "operator":"Exists"}]' + labels: + k8s-app: canal-node + spec: + hostNetwork: true + volumes: + # Used by calico/node. + - name: lib-modules + hostPath: + path: /lib/modules + - name: var-run-calico + hostPath: + path: /var/run/calico + # Used to install CNI. + - name: cni-bin-dir + hostPath: + path: /opt/cni/bin + - name: cni-net-dir + hostPath: + path: /etc/cni/net.d + # Used by flannel daemon. + - name: run-flannel + hostPath: + path: /run/flannel + - name: resolv + hostPath: + path: /etc/resolv.conf + containers: + # Runs the flannel daemon to enable vxlan networking between + # container hosts. + - name: flannel + image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}" + env: + # Cluster name + - name: CLUSTER_NAME + valueFrom: + configMapKeyRef: + name: canal-config + key: cluster_name + # The location of the etcd cluster. + - name: FLANNELD_ETCD_ENDPOINTS + valueFrom: + configMapKeyRef: + name: canal-config + key: etcd_endpoints + # The interface flannel should run on. + - name: FLANNELD_IFACE + valueFrom: + configMapKeyRef: + name: canal-config + key: flanneld_iface + # Perform masquerade on traffic leaving the pod cidr. + - name: FLANNELD_IP_MASQ + valueFrom: + configMapKeyRef: + name: canal-config + key: masquerade + # Set etcd-prefix + - name: DOCKER_OPT_ETCD_PREFIX + value: "-etcd-prefix=/$(CLUSTER_NAME)/network" + # Write the subnet.env file to the mounted directory. + - name: FLANNELD_SUBNET_FILE + value: "/run/flannel/subnet.env" + command: + - "/bin/sh" + - "-c" + - "/opt/bin/flanneld -etcd-prefix /$(CLUSTER_NAME)/network" + ports: + - hostPort: 10253 + containerPort: 10253 + securityContext: + privileged: true + volumeMounts: + - name: "resolv" + mountPath: "/etc/resolv.conf" + - name: "run-flannel" + mountPath: "/run/flannel" + # Runs calico/node container on each Kubernetes node. This + # container programs network policy and local routes on each + # host. + - name: calico-node + image: "{{ calico_node_image_repo }}:{{ calico_node_image_tag }}" + env: + # The location of the etcd cluster. + - name: ETCD_ENDPOINTS + valueFrom: + configMapKeyRef: + name: canal-config + key: etcd_endpoints + # Disable Calico BGP. Calico is simply enforcing policy. + - name: CALICO_NETWORKING + value: "false" + # Disable file logging so `kubectl logs` works. + - name: CALICO_DISABLE_FILE_LOGGING + value: "true" + securityContext: + privileged: true + volumeMounts: + - mountPath: /lib/modules + name: lib-modules + readOnly: true + - mountPath: /var/run/calico + name: var-run-calico + readOnly: false diff --git a/roles/network_plugin/flannel/tasks/main.yml b/roles/network_plugin/flannel/tasks/main.yml index a6fa183ef..8581d2ce7 100644 --- a/roles/network_plugin/flannel/tasks/main.yml +++ b/roles/network_plugin/flannel/tasks/main.yml @@ -1,9 +1,11 @@ --- -- name: Flannel | Write flannel configuration - template: - src: network.json - dest: /etc/flannel-network.json - backup: yes +- name: Flannel | Set Flannel etcd configuration + command: |- + {{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} \ + set /{{ cluster_name }}/network/config \ + '{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } }' + delegate_to: "{{groups['etcd'][0]}}" + run_once: true - name: Flannel | Create flannel pod manifest template: diff --git a/roles/network_plugin/flannel/templates/flannel-pod.yml b/roles/network_plugin/flannel/templates/flannel-pod.yml index 15523bdde..02c41e18b 100644 --- a/roles/network_plugin/flannel/templates/flannel-pod.yml +++ b/roles/network_plugin/flannel/templates/flannel-pod.yml @@ -12,26 +12,16 @@ - name: "subnetenv" hostPath: path: "/run/flannel" - - name: "networkconfig" + - name: "etcd-certs" hostPath: - path: "/etc/flannel-network.json" + path: "{{ etcd_cert_dir }}" containers: - - name: "flannel-server-helper" - image: "{{ flannel_server_helper_image_repo }}:{{ flannel_server_helper_image_tag }}" - args: - - "--network-config=/etc/flannel-network.json" - - "--etcd-prefix=/{{ cluster_name }}/network" - - "--etcd-server={{ etcd_endpoint }}" - volumeMounts: - - name: "networkconfig" - mountPath: "/etc/flannel-network.json" - imagePullPolicy: "Always" - name: "flannel-container" image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}" command: - "/bin/sh" - "-c" - - "/opt/bin/flanneld -etcd-endpoints {{ etcd_access_endpoint }} -etcd-prefix /{{ cluster_name }}/network {% if flannel_interface is defined %}-iface {{ flannel_interface }}{% endif %} {% if flannel_public_ip is defined %}-public-ip {{ flannel_public_ip }}{% endif %}" + - "/opt/bin/flanneld -etcd-endpoints {{ etcd_access_endpoint }} -etcd-prefix /{{ cluster_name }}/network -etcd-cafile {{ etcd_cert_dir }}/ca.pem -etcd-certfile {{ etcd_cert_dir }}/node.pem -etcd-keyfile {{ etcd_cert_dir }}/node-key.pem {% if flannel_interface is defined %}-iface {{ flannel_interface }}{% endif %} {% if flannel_public_ip is defined %}-public-ip {{ flannel_public_ip }}{% endif %}" ports: - hostPort: 10253 containerPort: 10253 @@ -41,6 +31,8 @@ volumeMounts: - name: "subnetenv" mountPath: "/run/flannel" + - name: "etcd-certs" + mountPath: "{{ etcd_cert_dir }}" securityContext: privileged: true hostNetwork: true diff --git a/roles/network_plugin/flannel/templates/network.json b/roles/network_plugin/flannel/templates/network.json deleted file mode 100644 index cbbec3841..000000000 --- a/roles/network_plugin/flannel/templates/network.json +++ /dev/null @@ -1 +0,0 @@ -{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } } diff --git a/roles/network_plugin/meta/main.yml b/roles/network_plugin/meta/main.yml index 736262ab0..8596c9d70 100644 --- a/roles/network_plugin/meta/main.yml +++ b/roles/network_plugin/meta/main.yml @@ -6,3 +6,5 @@ dependencies: when: kube_network_plugin == 'flannel' - role: network_plugin/weave when: kube_network_plugin == 'weave' + - role: network_plugin/canal + when: kube_network_plugin == 'canal' diff --git a/roles/uploads/defaults/main.yml b/roles/uploads/defaults/main.yml index 0774d324c..7b5797881 100644 --- a/roles/uploads/defaults/main.yml +++ b/roles/uploads/defaults/main.yml @@ -5,7 +5,7 @@ local_release_dir: /tmp kube_version: v1.4.3 etcd_version: v3.0.6 -calico_version: v0.22.0 +calico_version: v0.23.0 calico_cni_version: v1.4.2 weave_version: v1.6.1 diff --git a/scripts/collect-info.yaml b/scripts/collect-info.yaml index 67d4c8b35..0ba47866e 100644 --- a/scripts/collect-info.yaml +++ b/scripts/collect-info.yaml @@ -1,21 +1,14 @@ --- - hosts: all - become: true + become: false gather_facts: no vars: - debug: false commands: - - name: git_info - cmd: find . -type d -name .git -execdir sh -c 'gen-gitinfos.sh global|head -12' \; - name: timedate_info cmd: timedatectl status - - name: space_info - cmd: df -h - name: kernel_info cmd: uname -r - - name: distro_info - cmd: cat /etc/issue.net - name: docker_info cmd: docker info - name: ip_info @@ -24,23 +17,66 @@ cmd: ip ro - name: proc_info cmd: ps auxf | grep -v ]$ - - name: systemctl_info - cmd: systemctl status - name: systemctl_failed_info cmd: systemctl --state=failed --no-pager - name: k8s_info cmd: kubectl get all --all-namespaces -o wide - name: errors_info cmd: journalctl -p err --utc --no-pager + - name: etcd_info + cmd: etcdctl --debug cluster-health + - name: weave_info + cmd: weave report | jq "." + - name: weave_logs + cmd: docker logs weave > weave.log + - name: kubedns_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kubedns -o name`; + do kubectl logs $i --namespace kube-system kubedns > kubedns.log; done" + - name: apiserver_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-apiserver -o name`; + do kubectl logs $i --namespace kube-system > kube-apiserver.log; done" + - name: controller_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-controller -o name`; + do kubectl logs $i --namespace kube-system > kube-controller.log; done" + - name: scheduler_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-scheduler -o name`; + do kubectl logs $i --namespace kube-system > kube-scheduler.log; done" + - name: proxy_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-proxy -o name`; + do kubectl logs $i --namespace kube-system > kube-proxy.log; done" + - name: nginx_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-nginx -o name`; + do kubectl logs $i --namespace kube-system > kube-nginx.log; done" + - name: flannel_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l app=flannel -o name`; + do kubectl logs $i --namespace kube-system flannel-container > flannel.log; done" + - name: canal_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=canal-node -o name`; + do kubectl logs $i --namespace kube-system flannel > flannel.log; done" + - name: calico_policy_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=calico-policy -o name`; + do kubectl logs $i --namespace kube-system calico-policy-controller > calico-policy-controller.log; done" logs: - - /var/log/ansible.log - - /var/log/ansible/ansible.log - /var/log/syslog - /var/log/daemon.log - /var/log/kern.log - - inventory/inventory.ini - - cluster.yml + - /var/log/dpkg.log + - /var/log/apt/history.log + - /var/log/yum.log + - /var/log/calico/bird/current + - /var/log/calico/bird6/current + - /var/log/calico/felix/current + - /var/log/calico/confd/current + - weave.log + - kubedns.log + - kube-apiserver.log + - kube-controller.log + - kube-scheduler.log + - kube-proxy.log + - kube-nginx.log + - flannel.log + - calico-policy-controller.log tasks: - name: Storing commands output @@ -48,10 +84,7 @@ register: output ignore_errors: true with_items: "{{commands}}" - - - debug: var=item - with_items: output.results - when: debug + no_log: True - name: Fetch results fetch: src={{ item.name }} dest=/tmp/collect-info/commands @@ -62,7 +95,7 @@ with_items: "{{logs}}" - name: Pack results and logs - local_action: shell GZIP=-9 tar --remove-files -cvzf logs.tar.gz -C /tmp collect-info + local_action: shell GZIP=-9 tar --remove-files -cvzf {{dir|default(".")}}/logs.tar.gz -C /tmp collect-info run_once: true - name: Clean up collected command outputs diff --git a/scripts/configure-logs.yaml b/scripts/configure-logs.yaml deleted file mode 100644 index d093e9279..000000000 --- a/scripts/configure-logs.yaml +++ /dev/null @@ -1,39 +0,0 @@ ---- -- hosts: localhost - become: true - gather_facts: no - - vars: - log_path: /var/log/ansible/ - conf_file: /etc/ansible/ansible.cfg - human_readable_plugin: false - callback_plugin_path: /usr/share/ansible/plugins/callback - - tasks: - - name: LOGS | ensure log path - file: path="{{log_path}}" state=directory owner={{ansible_ssh_user}} - - - name: LOGS | ensure plugin path - file: path="{{callback_plugin_path}}" state=directory owner={{ansible_ssh_user}} - when: human_readable_plugin - - - name: LOGS | get plugin - git: repo=https://gist.github.com/cd706de198c85a8255f6.git dest=/tmp/cd706de198c85a8255f6 - when: human_readable_plugin - - - name: LOGS | install plugin - copy: src=/tmp/cd706de198c85a8255f6/human_log.py dest="{{callback_plugin_path}}" - when: human_readable_plugin - - - name: LOGS | config - lineinfile: - line: "log_path={{log_path}}/ansible.log" - regexp: "^#log_path|^log_path" - dest: "{{conf_file}}" - - - name: LOGS | callback plugin - lineinfile: - line: "callback_plugins={{callback_plugin_path}}" - regexp: "^#callback_plugins|^callback_plugins" - dest: "{{conf_file}}" - when: human_readable_plugin diff --git a/tests/cloud_playbooks/templates/boto.j2 b/tests/cloud_playbooks/templates/boto.j2 new file mode 100644 index 000000000..660f1a0a3 --- /dev/null +++ b/tests/cloud_playbooks/templates/boto.j2 @@ -0,0 +1,11 @@ +[Credentials] +gs_access_key_id = {{ gs_key }} +gs_secret_access_key = {{ gs_skey }} +[Boto] +https_validate_certificates = True +[GoogleCompute] +[GSUtil] +default_project_id = {{ gce_project_id }} +content_language = en +default_api_version = 2 +[OAuth2] diff --git a/tests/cloud_playbooks/templates/gcs_life.json.j2 b/tests/cloud_playbooks/templates/gcs_life.json.j2 new file mode 100644 index 000000000..a666c8fef --- /dev/null +++ b/tests/cloud_playbooks/templates/gcs_life.json.j2 @@ -0,0 +1,9 @@ +{ + "rule": + [ + { + "action": {"type": "Delete"}, + "condition": {"age": {{expire_days}}} + } + ] +} diff --git a/tests/cloud_playbooks/upload-logs-gcs.yml b/tests/cloud_playbooks/upload-logs-gcs.yml new file mode 100644 index 000000000..80d651ba4 --- /dev/null +++ b/tests/cloud_playbooks/upload-logs-gcs.yml @@ -0,0 +1,75 @@ +--- +- hosts: localhost + become: false + gather_facts: no + + vars: + expire_days: 2 + + tasks: + - name: Generate uniq bucket name prefix + shell: date +%Y%m%d + register: out + + - name: replace_test_id + set_fact: + test_name: "kargo-ci-{{ out.stdout }}" + + - set_fact: + file_name: "{{ostype}}-{{kube_network_plugin}}-{{commit}}-logs.tar.gz" + + - name: Create a bucket + gc_storage: + bucket: "{{ test_name }}" + mode: create + permission: public-read + gs_access_key: "{{ gs_key }}" + gs_secret_key: "{{ gs_skey }}" + no_log: True + + - name: Create a lifecycle template for the bucket + template: + src: gcs_life.json.j2 + dest: "{{dir}}/gcs_life.json" + + - name: Create a boto config to access GCS + template: + src: boto.j2 + dest: "{{dir}}/.boto" + no_log: True + + - name: Download gsutil cp installer + get_url: + url: https://dl.google.com/dl/cloudsdk/channels/rapid/install_google_cloud_sdk.bash + dest: "{{dir}}/gcp-installer.sh" + + - name: Get gsutil tool + script: "{{dir}}/gcp-installer.sh" + environment: + CLOUDSDK_CORE_DISABLE_PROMPTS: 1 + CLOUDSDK_INSTALL_DIR: "{{dir}}" + no_log: True + ignore_errors: true + + - name: Apply the lifecycle rules + command: "{{dir}}/google-cloud-sdk/bin/gsutil lifecycle set {{dir}}/gcs_life.json gs://{{test_name}}" + environment: + BOTO_CONFIG: "{{dir}}/.boto" + no_log: True + + - name: Upload collected diagnostic info + gc_storage: + bucket: "{{ test_name }}" + mode: put + permission: public-read + object: "{{ file_name }}" + src: "{{dir}}/logs.tar.gz" + headers: '{"Content-Encoding": "x-gzip"}' + gs_access_key: "{{ gs_key }}" + gs_secret_key: "{{ gs_skey }}" + expiration: "{{expire_days * 36000|int}}" + ignore_errors: true + no_log: True + + - debug: + msg: "A public url https://storage.googleapis.com/{{test_name}}/{{file_name}}"