From 3b2554217b1bf73126bfc2251e70e4679664ff48 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Thu, 3 Nov 2016 13:57:33 +0100 Subject: [PATCH 01/16] Upload logs to GCS after failure Delete configure logs script as not needed Rework collect info script defaults Signed-off-by: Bogdan Dobrelya --- .travis.yml | 14 +++++--- scripts/collect-info.yaml | 23 ++++++------- scripts/configure-logs.yaml | 39 ----------------------- tests/cloud_playbooks/upload-logs-gcs.yml | 28 ++++++++++++++++ 4 files changed, 48 insertions(+), 56 deletions(-) delete mode 100644 scripts/configure-logs.yaml create mode 100644 tests/cloud_playbooks/upload-logs-gcs.yml diff --git a/.travis.yml b/.travis.yml index 863374c26..42c32ae5b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,8 @@ env: TEST_ID=$TRAVIS_JOB_NUMBER CONTAINER_ENGINE=docker PRIVATE_KEY=$GCE_PRIVATE_KEY + GS_ACCESS_KEY_ID=$GS_KEY + GS_SECRET_ACCESS_KEY=$GS_SECRET ANSIBLE_KEEP_REMOTE_FILES=1 CLUSTER_MODE=default matrix: @@ -122,8 +124,6 @@ before_script: - $HOME/.local/bin/ansible-playbook --version - cp tests/ansible.cfg . # - "echo $HOME/.local/bin/ansible-playbook -i inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root -e '{\"cloud_provider\": true}' $LOG_LEVEL -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} setup-kubernetes/cluster.yml" - ## Configure ansible deployment logs to be collected as an artifact. Enable when GCS configured, see https://docs.travis-ci.com/user/deployment/gcs -# - $HOME/.local/bin/ansible-playbook -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/configure-logs.yaml script: - > @@ -147,8 +147,14 @@ script: - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/020_check-create-pod.yml $LOG_LEVEL ## Ping the between 2 pod - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/030_check-network.yml $LOG_LEVEL - ## Collect env info, enable it once GCS configured, see https://docs.travis-ci.com/user/deployment/gcs -# - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml + +after_failure: + - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml >/dev/null + - > + $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/upload-logs-gcs.yml -i "localhost," -c local + -e test_id=${TEST_ID} + -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} + >/dev/null after_script: - > diff --git a/scripts/collect-info.yaml b/scripts/collect-info.yaml index 67d4c8b35..877b5bf36 100644 --- a/scripts/collect-info.yaml +++ b/scripts/collect-info.yaml @@ -6,16 +6,10 @@ vars: debug: false commands: - - name: git_info - cmd: find . -type d -name .git -execdir sh -c 'gen-gitinfos.sh global|head -12' \; - name: timedate_info cmd: timedatectl status - - name: space_info - cmd: df -h - name: kernel_info cmd: uname -r - - name: distro_info - cmd: cat /etc/issue.net - name: docker_info cmd: docker info - name: ip_info @@ -24,23 +18,26 @@ cmd: ip ro - name: proc_info cmd: ps auxf | grep -v ]$ - - name: systemctl_info - cmd: systemctl status - name: systemctl_failed_info cmd: systemctl --state=failed --no-pager - name: k8s_info cmd: kubectl get all --all-namespaces -o wide - name: errors_info cmd: journalctl -p err --utc --no-pager + - name: etcd_info + cmd: etcdctl --debug cluster-health logs: - - /var/log/ansible.log - - /var/log/ansible/ansible.log - /var/log/syslog - /var/log/daemon.log - /var/log/kern.log - - inventory/inventory.ini - - cluster.yml + - /var/log/dpkg.log + - /var/log/apt/history.log + - /var/log/yum.log + - /var/log/calico/bird/current + - /var/log/calico/bird6/current + - /var/log/calico/felix/current + - /var/log/calico/confd/current tasks: - name: Storing commands output @@ -50,7 +47,7 @@ with_items: "{{commands}}" - debug: var=item - with_items: output.results + with_items: "{{output.results}}" when: debug - name: Fetch results diff --git a/scripts/configure-logs.yaml b/scripts/configure-logs.yaml deleted file mode 100644 index d093e9279..000000000 --- a/scripts/configure-logs.yaml +++ /dev/null @@ -1,39 +0,0 @@ ---- -- hosts: localhost - become: true - gather_facts: no - - vars: - log_path: /var/log/ansible/ - conf_file: /etc/ansible/ansible.cfg - human_readable_plugin: false - callback_plugin_path: /usr/share/ansible/plugins/callback - - tasks: - - name: LOGS | ensure log path - file: path="{{log_path}}" state=directory owner={{ansible_ssh_user}} - - - name: LOGS | ensure plugin path - file: path="{{callback_plugin_path}}" state=directory owner={{ansible_ssh_user}} - when: human_readable_plugin - - - name: LOGS | get plugin - git: repo=https://gist.github.com/cd706de198c85a8255f6.git dest=/tmp/cd706de198c85a8255f6 - when: human_readable_plugin - - - name: LOGS | install plugin - copy: src=/tmp/cd706de198c85a8255f6/human_log.py dest="{{callback_plugin_path}}" - when: human_readable_plugin - - - name: LOGS | config - lineinfile: - line: "log_path={{log_path}}/ansible.log" - regexp: "^#log_path|^log_path" - dest: "{{conf_file}}" - - - name: LOGS | callback plugin - lineinfile: - line: "callback_plugins={{callback_plugin_path}}" - regexp: "^#callback_plugins|^callback_plugins" - dest: "{{conf_file}}" - when: human_readable_plugin diff --git a/tests/cloud_playbooks/upload-logs-gcs.yml b/tests/cloud_playbooks/upload-logs-gcs.yml new file mode 100644 index 000000000..7a7a022c9 --- /dev/null +++ b/tests/cloud_playbooks/upload-logs-gcs.yml @@ -0,0 +1,28 @@ +--- +- hosts: localhost + become: false + gather_facts: no + + vars: + expire: 72000 + + tasks: + - name: replace_test_id + set_fact: + test_name: "{{ test_id | regex_replace('\\.', '-') }}" + + - name: Create a bucket + gc_storage: + bucket: "{{ test_name }}" + mode: create + expiration: "{{ expire }}" + permission: private + + - name: Upload collected diagnostic info + gc_storage: + bucket: "{{ test_name }}" + mode: put + permission: private + expiration: "{{ expire }}" + object: "build-{{ test_name }}-{{ kube_network_plugin }}-logs.tar.gz" + src: logs.tar.gz From 8f20d90f8814cb9be0624321101419bd8fa291b6 Mon Sep 17 00:00:00 2001 From: Spencer Smith Date: Fri, 4 Nov 2016 12:54:35 -0400 Subject: [PATCH 02/16] update admission controllers for > 1.4 --- .../master/templates/manifests/kube-apiserver.manifest.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 index ddd6f2085..4100e8a34 100644 --- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 @@ -16,7 +16,7 @@ spec: - --etcd-quorum-read=true - --insecure-bind-address={{ kube_apiserver_insecure_bind_address }} - --apiserver-count={{ kube_apiserver_count }} - - --admission-control=NamespaceLifecycle,NamespaceExists,LimitRanger,ServiceAccount,ResourceQuota + - --admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota - --service-cluster-ip-range={{ kube_service_addresses }} - --client-ca-file={{ kube_cert_dir }}/ca.pem - --basic-auth-file={{ kube_users_dir }}/known_users.csv From a6bc284abd7e3909f57ff34288e6cc5f5b1c7985 Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Sat, 5 Nov 2016 09:12:40 +0000 Subject: [PATCH 03/16] adds ability to have hosts with no floating ips on terraform/openstack --- contrib/terraform/openstack/README.md | 37 ++++++++++++---- .../openstack/ansible_bastion_template.txt | 1 + .../terraform/openstack/group_vars/all.yml | 44 +++++++++++-------- contrib/terraform/openstack/kubespray.tf | 44 +++++++++++++++++++ contrib/terraform/openstack/variables.tf | 8 ++++ 5 files changed, 107 insertions(+), 27 deletions(-) create mode 100644 contrib/terraform/openstack/ansible_bastion_template.txt diff --git a/contrib/terraform/openstack/README.md b/contrib/terraform/openstack/README.md index ec611a499..2840bde9c 100644 --- a/contrib/terraform/openstack/README.md +++ b/contrib/terraform/openstack/README.md @@ -5,14 +5,13 @@ Openstack. ## Status -This will install a Kubernetes cluster on an Openstack Cloud. It is tested on a -OpenStack Cloud provided by [BlueBox](https://www.blueboxcloud.com/) and -should work on most modern installs of OpenStack that support the basic +This will install a Kubernetes cluster on an Openstack Cloud. It has been tested on a +OpenStack Cloud provided by [BlueBox](https://www.blueboxcloud.com/) and on OpenStack at [EMBL-EBI's](http://www.ebi.ac.uk/) [EMBASSY Cloud](http://www.embassycloud.org/). This should work on most modern installs of OpenStack that support the basic services. There are some assumptions made to try and ensure it will work on your openstack cluster. -* floating-ips are used for access +* floating-ips are used for access, but you can have masters and nodes that don't use floating-ips if needed. You need currently at least 1 floating ip, which we would suggest is used on a master. * you already have a suitable OS image in glance * you already have both an internal network and a floating-ip pool created * you have security-groups enabled @@ -24,16 +23,14 @@ There are some assumptions made to try and ensure it will work on your openstack ## Terraform -Terraform will be used to provision all of the OpenStack resources required to -run Docker Swarm. It is also used to deploy and provision the software +Terraform will be used to provision all of the OpenStack resources. It is also used to deploy and provision the software requirements. ### Prep #### OpenStack -Ensure your OpenStack credentials are loaded in environment variables. This is -how I do it: +Ensure your OpenStack credentials are loaded in environment variables. This can be done by downloading a credentials .rc file from your OpenStack dashboard and sourcing it: ``` $ source ~/.stackrc @@ -46,7 +43,7 @@ differences between OpenStack installs the Terraform does not attempt to create these for you. By default Terraform will expect that your networks are called `internal` and -`external`. You can change this by altering the Terraform variables `network_name` and `floatingip_pool`. +`external`. You can change this by altering the Terraform variables `network_name` and `floatingip_pool`. This can be done on a new variables file or through environment variables. A full list of variables you can change can be found at [variables.tf](variables.tf). @@ -76,8 +73,21 @@ $ echo Setting up Terraform creds && \ export TF_VAR_auth_url=${OS_AUTH_URL} ``` +If you want to provision master or node VMs that don't use floating ips, write on a `my-terraform-vars.tfvars` file, for example: + +``` +number_of_k8s_masters = "1" +number_of_k8s_masters_no_floating_ip = "2" +number_of_k8s_nodes_no_floating_ip = "1" +number_of_k8s_nodes = "0" +``` +This will provision one VM as master using a floating ip, two additional masters using no floating ips (these will only have private ips inside your tenancy) and one VM as node, again without a floating ip. + + + # Provision a Kubernetes Cluster on OpenStack +If not using a tfvars file for your setup, then execute: ``` terraform apply -state=contrib/terraform/openstack/terraform.tfstate contrib/terraform/openstack openstack_compute_secgroup_v2.k8s_master: Creating... @@ -96,6 +106,13 @@ use the `terraform show` command. State path: contrib/terraform/openstack/terraform.tfstate ``` +Alternatively, if you wrote your terraform variables on a file `my-terraform-vars.tfvars`, your command would look like: +``` +terraform apply -state=contrib/terraform/openstack/terraform.tfstate -var-file=my-terraform-vars.tfvars contrib/terraform/openstack +``` + +if you choose to add masters or nodes without floating ips (only internal ips on your OpenStack tenancy), this script will create as well a file `contrib/terraform/openstack/k8s-cluster.yml` with an ssh command for ansible to be able to access your machines tunneling through the first floating ip used. If you want to manually handling the ssh tunneling to these machines, please delete or move that file. If you want to use this, just leave it there, as ansible will pick it up automatically. + Make sure you can connect to the hosts: ``` @@ -114,6 +131,8 @@ example-k8s-master-1 | SUCCESS => { } ``` +if you are deploying a system that needs bootstrapping, like CoreOS, these might have a state `FAILED` due to CoreOS not having python. As long as the state is not `UNREACHABLE`, this is fine. + if it fails try to connect manually via SSH ... it could be somthing as simple as a stale host key. Deploy kubernetes: diff --git a/contrib/terraform/openstack/ansible_bastion_template.txt b/contrib/terraform/openstack/ansible_bastion_template.txt new file mode 100644 index 000000000..cdf012066 --- /dev/null +++ b/contrib/terraform/openstack/ansible_bastion_template.txt @@ -0,0 +1 @@ +ansible_ssh_common_args: '-o ProxyCommand="ssh -o StrictHostKeyChecking=no -W %h:%p -q USER@BASTION_ADDRESS"' diff --git a/contrib/terraform/openstack/group_vars/all.yml b/contrib/terraform/openstack/group_vars/all.yml index b73fb66b2..8b0cd2bcd 100644 --- a/contrib/terraform/openstack/group_vars/all.yml +++ b/contrib/terraform/openstack/group_vars/all.yml @@ -1,9 +1,14 @@ +# Valid bootstrap options (required): xenial, coreos, none +bootstrap_os: "none" + # Directory where the binaries will be installed bin_dir: /usr/local/bin # Where the binaries will be downloaded. # Note: ensure that you've enough disk space (about 1G) local_release_dir: "/tmp/releases" +# Random shifts for retrying failed ops like pushing/downloading +retry_stagger: 5 # Uncomment this line for CoreOS only. # Directory where python binary is installed @@ -28,6 +33,8 @@ kube_users: # Kubernetes cluster name, also will be used as DNS domain cluster_name: cluster.local +# Subdomains of DNS domain to be resolved via /etc/resolv.conf +ndots: 5 # For some environments, each node has a pubilcally accessible # address and an address it should bind services to. These are @@ -51,6 +58,16 @@ cluster_name: cluster.local # but don't know about that address themselves. # access_ip: 1.1.1.1 +# Etcd access modes: +# Enable multiaccess to configure clients to access all of the etcd members directly +# as the "http://hostX:port, http://hostY:port, ..." and ignore the proxy loadbalancers. +# This may be the case if clients support and loadbalance multiple etcd servers natively. +etcd_multiaccess: false + +# Assume there are no internal loadbalancers for apiservers exist and listen on +# kube_apiserver_port (default 443) +loadbalancer_apiserver_localhost: true + # Choose network plugin (calico, weave or flannel) kube_network_plugin: flannel @@ -89,10 +106,12 @@ kube_apiserver_insecure_port: 8080 # (http) # You still must manually configure all your containers to use this DNS server, # Kubernetes won't do this for you (yet). +# Do not install additional dnsmasq +skip_dnsmasq: false # Upstream dns servers used by dnsmasq -upstream_dns_servers: - - 8.8.8.8 - - 8.8.4.4 +#upstream_dns_servers: +# - 8.8.8.8 +# - 8.8.4.4 # # # Use dns server : https://github.com/ansibl8s/k8s-skydns/blob/master/skydns-README.md dns_setup: true @@ -109,21 +128,6 @@ dns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(2)|ipaddr('address') # like you would do when using nova-client before starting the playbook. # cloud_provider: -# For multi masters architecture: -# kube-proxy doesn't support multiple apiservers for the time being so you'll need to configure your own loadbalancer -# This domain name will be inserted into the /etc/hosts file of all servers -# configuration example with haproxy : -# listen kubernetes-apiserver-https -# bind 10.99.0.21:8383 -# option ssl-hello-chk -# mode tcp -# timeout client 3h -# timeout server 3h -# server master1 10.99.0.26:443 -# server master2 10.99.0.27:443 -# balance roundrobin -# apiserver_loadbalancer_domain_name: "lb-apiserver.kubernetes.local" - ## Set these proxy values in order to update docker daemon to use proxies # http_proxy: "" # https_proxy: "" @@ -134,3 +138,7 @@ dns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(2)|ipaddr('address') ## An obvious use case is allowing insecure-registry access ## to self hosted registries like so: docker_options: "--insecure-registry={{ kube_service_addresses }}" + +# default packages to install within the cluster +kpm_packages: [] +# - name: kube-system/grafana diff --git a/contrib/terraform/openstack/kubespray.tf b/contrib/terraform/openstack/kubespray.tf index 27217d08b..ba526b3e0 100644 --- a/contrib/terraform/openstack/kubespray.tf +++ b/contrib/terraform/openstack/kubespray.tf @@ -70,6 +70,28 @@ resource "openstack_compute_instance_v2" "k8s_master" { ssh_user = "${var.ssh_user}" kubespray_groups = "etcd,kube-master,kube-node,k8s-cluster" } + +} + + +resource "openstack_compute_instance_v2" "k8s_master_no_floating_ip" { + name = "${var.cluster_name}-k8s-master-nf-${count.index+1}" + count = "${var.number_of_k8s_masters_no_floating_ip}" + image_name = "${var.image}" + flavor_id = "${var.flavor_k8s_master}" + key_pair = "${openstack_compute_keypair_v2.k8s.name}" + network { + name = "${var.network_name}" + } + security_groups = [ "${openstack_compute_secgroup_v2.k8s_master.name}", + "${openstack_compute_secgroup_v2.k8s.name}" ] + metadata = { + ssh_user = "${var.ssh_user}" + kubespray_groups = "etcd,kube-master,kube-node,k8s-cluster" + } + provisioner "local-exec" { + command = "sed s/USER/${var.ssh_user}/ contrib/terraform/openstack/ansible_bastion_template.txt | sed s/BASTION_ADDRESS/${element(openstack_networking_floatingip_v2.k8s_master.*.address, 0)}/ > contrib/terraform/openstack/group_vars/k8s-cluster.yml" + } } resource "openstack_compute_instance_v2" "k8s_node" { @@ -89,6 +111,28 @@ resource "openstack_compute_instance_v2" "k8s_node" { } } +resource "openstack_compute_instance_v2" "k8s_node_no_floating_ip" { + name = "${var.cluster_name}-k8s-node-nf-${count.index+1}" + count = "${var.number_of_k8s_nodes_no_floating_ip}" + image_name = "${var.image}" + flavor_id = "${var.flavor_k8s_node}" + key_pair = "${openstack_compute_keypair_v2.k8s.name}" + network { + name = "${var.network_name}" + } + security_groups = ["${openstack_compute_secgroup_v2.k8s.name}" ] + metadata = { + ssh_user = "${var.ssh_user}" + kubespray_groups = "kube-node,k8s-cluster" + } + provisioner "local-exec" { + command = "sed s/USER/${var.ssh_user}/ contrib/terraform/openstack/ansible_bastion_template.txt | sed s/BASTION_ADDRESS/${element(openstack_networking_floatingip_v2.k8s_master.*.address, 0)}/ > contrib/terraform/openstack/group_vars/k8s-cluster.yml" + } +} + + + + #output "msg" { # value = "Your hosts are ready to go!\nYour ssh hosts are: ${join(", ", openstack_networking_floatingip_v2.k8s_master.*.address )}" #} diff --git a/contrib/terraform/openstack/variables.tf b/contrib/terraform/openstack/variables.tf index 6c1fc767d..8be38aed5 100644 --- a/contrib/terraform/openstack/variables.tf +++ b/contrib/terraform/openstack/variables.tf @@ -6,10 +6,18 @@ variable "number_of_k8s_masters" { default = 2 } +variable "number_of_k8s_masters_no_floating_ip" { + default = 2 +} + variable "number_of_k8s_nodes" { default = 1 } +variable "number_of_k8s_nodes_no_floating_ip" { + default = 1 +} + variable "public_key_path" { description = "The path of the ssh pub key" default = "~/.ssh/id_rsa.pub" From 39b8336f3fff9b43d1bd35c52085cd8196f342cd Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Fri, 4 Nov 2016 17:53:34 +0100 Subject: [PATCH 04/16] Fix upload logs Signed-off-by: Bogdan Dobrelya --- .travis.yml | 3 ++- tests/cloud_playbooks/upload-logs-gcs.yml | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 42c32ae5b..ac922fd80 100644 --- a/.travis.yml +++ b/.travis.yml @@ -154,7 +154,8 @@ after_failure: $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/upload-logs-gcs.yml -i "localhost," -c local -e test_id=${TEST_ID} -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} - >/dev/null + -e gs_key=${GS_ACCESS_KEY_ID} + -e gs_skey=${GS_SECRET_ACCESS_KEY} after_script: - > diff --git a/tests/cloud_playbooks/upload-logs-gcs.yml b/tests/cloud_playbooks/upload-logs-gcs.yml index 7a7a022c9..ba31ab56f 100644 --- a/tests/cloud_playbooks/upload-logs-gcs.yml +++ b/tests/cloud_playbooks/upload-logs-gcs.yml @@ -17,6 +17,8 @@ mode: create expiration: "{{ expire }}" permission: private + gs_access_key: gs_key + gs_secret_key: gs_skey - name: Upload collected diagnostic info gc_storage: @@ -26,3 +28,16 @@ expiration: "{{ expire }}" object: "build-{{ test_name }}-{{ kube_network_plugin }}-logs.tar.gz" src: logs.tar.gz + gs_access_key: gs_key + gs_secret_key: gs_skey + + - name: Get a link + gc_storage: + bucket: "{{ test_name }}" + object: "build-{{ test_name }}-{{ kube_network_plugin }}-logs.tar.gz" + mode: get_url + register: url + gs_access_key: gs_key + gs_secret_key: gs_skey + + - debug: msg="Download URL {{get_url}}" From f106bf5bc47742a34318dadb8f5681be5ff7a63f Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Wed, 26 Oct 2016 17:56:15 +0300 Subject: [PATCH 05/16] adds ability to have hosts with no floating ips on terraform/openstack (+8 squashed commits) Squashed commits: [f9355ea] Swap order in which we reload docker/socket [2ca6819] Reload docker.socket after installing flannel on coreos Workaround for #569 [9f976e5] Vagrantfile: setup proxy inside virtual machines In corporate networks, it is good to pre-configure proxy variables. [9d7142f] Vagrantfile: use Ubuntu 16.04 LTS Use recent supported version of Ubuntu for local development setup with Vagrant. [50f77cc] Add CI test layouts * Drop Wily from test matrix * Replace the Wily cases dropped with extra cases to test separate roles deployment Signed-off-by: Bogdan Dobrelya [03e162b] Update OWNERS [c7b00ca] Use tar+register instead of copy/slurp for distributing tokens and certs Related bug: https://github.com/ansible/ansible/issues/15405 Uses tar and register because synchronize module cannot sudo on the remote side correctly and copy is too slow. This patch dramatically cuts down the number of tasks to process for cert synchronization. [2778ac6] Add new var skip_dnsmasq_k8s If skip_dnsmasq is set, it will still not set up dnsmasq k8s pod. This enables independent setup of resolvconf section before kubelet is up. --- .travis.yml | 34 +++++++++----- OWNERS | 3 ++ Vagrantfile | 15 ++++++- contrib/terraform/openstack/README.md | 37 ++++++++++++---- .../openstack/ansible_bastion_template.txt | 1 + .../terraform/openstack/group_vars/all.yml | 44 +++++++++++-------- contrib/terraform/openstack/kubespray.tf | 44 +++++++++++++++++++ contrib/terraform/openstack/variables.tf | 8 ++++ roles/dnsmasq/defaults/main.yml | 8 +++- roles/dnsmasq/tasks/main.yml | 2 +- roles/docker/handlers/main.yml | 7 +++ roles/kubernetes/secrets/tasks/gen_certs.yml | 33 +++++++------- roles/kubernetes/secrets/tasks/gen_tokens.yml | 17 +++---- tests/cloud_playbooks/create-gce.yml | 2 +- tests/cloud_playbooks/delete-gce.yml | 2 +- tests/templates/inventory-gce.j2 | 11 +++++ 16 files changed, 198 insertions(+), 70 deletions(-) create mode 100644 contrib/terraform/openstack/ansible_bastion_template.txt diff --git a/.travis.yml b/.travis.yml index 13604765e..863374c26 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,80 +11,92 @@ env: CONTAINER_ENGINE=docker PRIVATE_KEY=$GCE_PRIVATE_KEY ANSIBLE_KEEP_REMOTE_FILES=1 + CLUSTER_MODE=default matrix: # Debian Jessie - >- KUBE_NETWORK_PLUGIN=flannel CLOUD_IMAGE=debian-8-kubespray CLOUD_REGION=europe-west1-b + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=calico CLOUD_IMAGE=debian-8-kubespray CLOUD_REGION=us-central1-c + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=weave CLOUD_IMAGE=debian-8-kubespray CLOUD_REGION=us-east1-d + CLUSTER_MODE=default # Centos 7 - >- KUBE_NETWORK_PLUGIN=flannel CLOUD_IMAGE=centos-7-sudo CLOUD_REGION=asia-east1-c - + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=calico CLOUD_IMAGE=centos-7-sudo CLOUD_REGION=europe-west1-b - + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=weave CLOUD_IMAGE=centos-7-sudo CLOUD_REGION=us-central1-c + CLUSTER_MODE=default # Redhat 7 - >- KUBE_NETWORK_PLUGIN=flannel CLOUD_IMAGE=rhel-7-sudo CLOUD_REGION=us-east1-d - + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=calico CLOUD_IMAGE=rhel-7-sudo CLOUD_REGION=asia-east1-c - + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=weave CLOUD_IMAGE=rhel-7-sudo CLOUD_REGION=europe-west1-b + CLUSTER_MODE=default # Ubuntu 16.04 - >- KUBE_NETWORK_PLUGIN=flannel CLOUD_IMAGE=ubuntu-1604-xenial CLOUD_REGION=us-central1-c + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=calico CLOUD_IMAGE=ubuntu-1604-xenial CLOUD_REGION=us-east1-d + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=weave CLOUD_IMAGE=ubuntu-1604-xenial CLOUD_REGION=asia-east1-c + CLUSTER_MODE=default - # Ubuntu 15.10 + # Extra cases for separated roles - >- KUBE_NETWORK_PLUGIN=flannel - CLOUD_IMAGE=ubuntu-1510-wily + CLOUD_IMAGE=rhel-7-sudo CLOUD_REGION=europe-west1-b + CLUSTER_MODE=separate - >- KUBE_NETWORK_PLUGIN=calico - CLOUD_IMAGE=ubuntu-1510-wily + CLOUD_IMAGE=ubuntu-1604-xenial CLOUD_REGION=us-central1-a + CLUSTER_MODE=separate - >- KUBE_NETWORK_PLUGIN=weave - CLOUD_IMAGE=ubuntu-1510-wily + CLOUD_IMAGE=debian-8-kubespray CLOUD_REGION=us-east1-d + CLUSTER_MODE=separate before_install: @@ -92,7 +104,8 @@ before_install: - pip install --user boto -U - pip install --user ansible - pip install --user netaddr - - pip install --user apache-libcloud + # W/A https://github.com/ansible/ansible-modules-core/issues/5196#issuecomment-253766186 + - pip install --user apache-libcloud==0.20.1 cache: - directories: @@ -114,7 +127,8 @@ before_script: script: - > - $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/create-gce.yml -i tests/local_inventory/hosts -c local $LOG_LEVEL + $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/create-gce.yml -i tests/local_inventory/hosts.cfg -c local $LOG_LEVEL + -e mode=${CLUSTER_MODE} -e test_id=${TEST_ID} -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} -e gce_project_id=${GCE_PROJECT_ID} diff --git a/OWNERS b/OWNERS index 583a0314b..6ecbee5c9 100644 --- a/OWNERS +++ b/OWNERS @@ -4,3 +4,6 @@ owners: - Smana - ant31 + - bogdando + - mattymo + - rsmitty diff --git a/Vagrantfile b/Vagrantfile index 44f80db8c..73f812bdf 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -16,7 +16,7 @@ $vm_cpus = 1 $shared_folders = {} $forwarded_ports = {} $subnet = "172.17.8" -$box = "bento/ubuntu-14.04" +$box = "bento/ubuntu-16.04" host_vars = {} @@ -38,6 +38,13 @@ if ! File.exist?(File.join(File.dirname($inventory), "hosts")) end end +if Vagrant.has_plugin?("vagrant-proxyconf") + $no_proxy = ENV['NO_PROXY'] || ENV['no_proxy'] || "127.0.0.1,localhost" + (1..$num_instances).each do |i| + $no_proxy += ",#{$subnet}.#{i+100}" + end +end + Vagrant.configure("2") do |config| # always use Vagrants insecure key config.ssh.insert_key = false @@ -52,6 +59,12 @@ Vagrant.configure("2") do |config| config.vm.define vm_name = "%s-%02d" % [$instance_name_prefix, i] do |config| config.vm.hostname = vm_name + if Vagrant.has_plugin?("vagrant-proxyconf") + config.proxy.http = ENV['HTTP_PROXY'] || ENV['http_proxy'] || "" + config.proxy.https = ENV['HTTPS_PROXY'] || ENV['https_proxy'] || "" + config.proxy.no_proxy = $no_proxy + end + if $expose_docker_tcp config.vm.network "forwarded_port", guest: 2375, host: ($expose_docker_tcp + i - 1), auto_correct: true end diff --git a/contrib/terraform/openstack/README.md b/contrib/terraform/openstack/README.md index ec611a499..2840bde9c 100644 --- a/contrib/terraform/openstack/README.md +++ b/contrib/terraform/openstack/README.md @@ -5,14 +5,13 @@ Openstack. ## Status -This will install a Kubernetes cluster on an Openstack Cloud. It is tested on a -OpenStack Cloud provided by [BlueBox](https://www.blueboxcloud.com/) and -should work on most modern installs of OpenStack that support the basic +This will install a Kubernetes cluster on an Openstack Cloud. It has been tested on a +OpenStack Cloud provided by [BlueBox](https://www.blueboxcloud.com/) and on OpenStack at [EMBL-EBI's](http://www.ebi.ac.uk/) [EMBASSY Cloud](http://www.embassycloud.org/). This should work on most modern installs of OpenStack that support the basic services. There are some assumptions made to try and ensure it will work on your openstack cluster. -* floating-ips are used for access +* floating-ips are used for access, but you can have masters and nodes that don't use floating-ips if needed. You need currently at least 1 floating ip, which we would suggest is used on a master. * you already have a suitable OS image in glance * you already have both an internal network and a floating-ip pool created * you have security-groups enabled @@ -24,16 +23,14 @@ There are some assumptions made to try and ensure it will work on your openstack ## Terraform -Terraform will be used to provision all of the OpenStack resources required to -run Docker Swarm. It is also used to deploy and provision the software +Terraform will be used to provision all of the OpenStack resources. It is also used to deploy and provision the software requirements. ### Prep #### OpenStack -Ensure your OpenStack credentials are loaded in environment variables. This is -how I do it: +Ensure your OpenStack credentials are loaded in environment variables. This can be done by downloading a credentials .rc file from your OpenStack dashboard and sourcing it: ``` $ source ~/.stackrc @@ -46,7 +43,7 @@ differences between OpenStack installs the Terraform does not attempt to create these for you. By default Terraform will expect that your networks are called `internal` and -`external`. You can change this by altering the Terraform variables `network_name` and `floatingip_pool`. +`external`. You can change this by altering the Terraform variables `network_name` and `floatingip_pool`. This can be done on a new variables file or through environment variables. A full list of variables you can change can be found at [variables.tf](variables.tf). @@ -76,8 +73,21 @@ $ echo Setting up Terraform creds && \ export TF_VAR_auth_url=${OS_AUTH_URL} ``` +If you want to provision master or node VMs that don't use floating ips, write on a `my-terraform-vars.tfvars` file, for example: + +``` +number_of_k8s_masters = "1" +number_of_k8s_masters_no_floating_ip = "2" +number_of_k8s_nodes_no_floating_ip = "1" +number_of_k8s_nodes = "0" +``` +This will provision one VM as master using a floating ip, two additional masters using no floating ips (these will only have private ips inside your tenancy) and one VM as node, again without a floating ip. + + + # Provision a Kubernetes Cluster on OpenStack +If not using a tfvars file for your setup, then execute: ``` terraform apply -state=contrib/terraform/openstack/terraform.tfstate contrib/terraform/openstack openstack_compute_secgroup_v2.k8s_master: Creating... @@ -96,6 +106,13 @@ use the `terraform show` command. State path: contrib/terraform/openstack/terraform.tfstate ``` +Alternatively, if you wrote your terraform variables on a file `my-terraform-vars.tfvars`, your command would look like: +``` +terraform apply -state=contrib/terraform/openstack/terraform.tfstate -var-file=my-terraform-vars.tfvars contrib/terraform/openstack +``` + +if you choose to add masters or nodes without floating ips (only internal ips on your OpenStack tenancy), this script will create as well a file `contrib/terraform/openstack/k8s-cluster.yml` with an ssh command for ansible to be able to access your machines tunneling through the first floating ip used. If you want to manually handling the ssh tunneling to these machines, please delete or move that file. If you want to use this, just leave it there, as ansible will pick it up automatically. + Make sure you can connect to the hosts: ``` @@ -114,6 +131,8 @@ example-k8s-master-1 | SUCCESS => { } ``` +if you are deploying a system that needs bootstrapping, like CoreOS, these might have a state `FAILED` due to CoreOS not having python. As long as the state is not `UNREACHABLE`, this is fine. + if it fails try to connect manually via SSH ... it could be somthing as simple as a stale host key. Deploy kubernetes: diff --git a/contrib/terraform/openstack/ansible_bastion_template.txt b/contrib/terraform/openstack/ansible_bastion_template.txt new file mode 100644 index 000000000..cdf012066 --- /dev/null +++ b/contrib/terraform/openstack/ansible_bastion_template.txt @@ -0,0 +1 @@ +ansible_ssh_common_args: '-o ProxyCommand="ssh -o StrictHostKeyChecking=no -W %h:%p -q USER@BASTION_ADDRESS"' diff --git a/contrib/terraform/openstack/group_vars/all.yml b/contrib/terraform/openstack/group_vars/all.yml index b73fb66b2..8b0cd2bcd 100644 --- a/contrib/terraform/openstack/group_vars/all.yml +++ b/contrib/terraform/openstack/group_vars/all.yml @@ -1,9 +1,14 @@ +# Valid bootstrap options (required): xenial, coreos, none +bootstrap_os: "none" + # Directory where the binaries will be installed bin_dir: /usr/local/bin # Where the binaries will be downloaded. # Note: ensure that you've enough disk space (about 1G) local_release_dir: "/tmp/releases" +# Random shifts for retrying failed ops like pushing/downloading +retry_stagger: 5 # Uncomment this line for CoreOS only. # Directory where python binary is installed @@ -28,6 +33,8 @@ kube_users: # Kubernetes cluster name, also will be used as DNS domain cluster_name: cluster.local +# Subdomains of DNS domain to be resolved via /etc/resolv.conf +ndots: 5 # For some environments, each node has a pubilcally accessible # address and an address it should bind services to. These are @@ -51,6 +58,16 @@ cluster_name: cluster.local # but don't know about that address themselves. # access_ip: 1.1.1.1 +# Etcd access modes: +# Enable multiaccess to configure clients to access all of the etcd members directly +# as the "http://hostX:port, http://hostY:port, ..." and ignore the proxy loadbalancers. +# This may be the case if clients support and loadbalance multiple etcd servers natively. +etcd_multiaccess: false + +# Assume there are no internal loadbalancers for apiservers exist and listen on +# kube_apiserver_port (default 443) +loadbalancer_apiserver_localhost: true + # Choose network plugin (calico, weave or flannel) kube_network_plugin: flannel @@ -89,10 +106,12 @@ kube_apiserver_insecure_port: 8080 # (http) # You still must manually configure all your containers to use this DNS server, # Kubernetes won't do this for you (yet). +# Do not install additional dnsmasq +skip_dnsmasq: false # Upstream dns servers used by dnsmasq -upstream_dns_servers: - - 8.8.8.8 - - 8.8.4.4 +#upstream_dns_servers: +# - 8.8.8.8 +# - 8.8.4.4 # # # Use dns server : https://github.com/ansibl8s/k8s-skydns/blob/master/skydns-README.md dns_setup: true @@ -109,21 +128,6 @@ dns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(2)|ipaddr('address') # like you would do when using nova-client before starting the playbook. # cloud_provider: -# For multi masters architecture: -# kube-proxy doesn't support multiple apiservers for the time being so you'll need to configure your own loadbalancer -# This domain name will be inserted into the /etc/hosts file of all servers -# configuration example with haproxy : -# listen kubernetes-apiserver-https -# bind 10.99.0.21:8383 -# option ssl-hello-chk -# mode tcp -# timeout client 3h -# timeout server 3h -# server master1 10.99.0.26:443 -# server master2 10.99.0.27:443 -# balance roundrobin -# apiserver_loadbalancer_domain_name: "lb-apiserver.kubernetes.local" - ## Set these proxy values in order to update docker daemon to use proxies # http_proxy: "" # https_proxy: "" @@ -134,3 +138,7 @@ dns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(2)|ipaddr('address') ## An obvious use case is allowing insecure-registry access ## to self hosted registries like so: docker_options: "--insecure-registry={{ kube_service_addresses }}" + +# default packages to install within the cluster +kpm_packages: [] +# - name: kube-system/grafana diff --git a/contrib/terraform/openstack/kubespray.tf b/contrib/terraform/openstack/kubespray.tf index 27217d08b..ba526b3e0 100644 --- a/contrib/terraform/openstack/kubespray.tf +++ b/contrib/terraform/openstack/kubespray.tf @@ -70,6 +70,28 @@ resource "openstack_compute_instance_v2" "k8s_master" { ssh_user = "${var.ssh_user}" kubespray_groups = "etcd,kube-master,kube-node,k8s-cluster" } + +} + + +resource "openstack_compute_instance_v2" "k8s_master_no_floating_ip" { + name = "${var.cluster_name}-k8s-master-nf-${count.index+1}" + count = "${var.number_of_k8s_masters_no_floating_ip}" + image_name = "${var.image}" + flavor_id = "${var.flavor_k8s_master}" + key_pair = "${openstack_compute_keypair_v2.k8s.name}" + network { + name = "${var.network_name}" + } + security_groups = [ "${openstack_compute_secgroup_v2.k8s_master.name}", + "${openstack_compute_secgroup_v2.k8s.name}" ] + metadata = { + ssh_user = "${var.ssh_user}" + kubespray_groups = "etcd,kube-master,kube-node,k8s-cluster" + } + provisioner "local-exec" { + command = "sed s/USER/${var.ssh_user}/ contrib/terraform/openstack/ansible_bastion_template.txt | sed s/BASTION_ADDRESS/${element(openstack_networking_floatingip_v2.k8s_master.*.address, 0)}/ > contrib/terraform/openstack/group_vars/k8s-cluster.yml" + } } resource "openstack_compute_instance_v2" "k8s_node" { @@ -89,6 +111,28 @@ resource "openstack_compute_instance_v2" "k8s_node" { } } +resource "openstack_compute_instance_v2" "k8s_node_no_floating_ip" { + name = "${var.cluster_name}-k8s-node-nf-${count.index+1}" + count = "${var.number_of_k8s_nodes_no_floating_ip}" + image_name = "${var.image}" + flavor_id = "${var.flavor_k8s_node}" + key_pair = "${openstack_compute_keypair_v2.k8s.name}" + network { + name = "${var.network_name}" + } + security_groups = ["${openstack_compute_secgroup_v2.k8s.name}" ] + metadata = { + ssh_user = "${var.ssh_user}" + kubespray_groups = "kube-node,k8s-cluster" + } + provisioner "local-exec" { + command = "sed s/USER/${var.ssh_user}/ contrib/terraform/openstack/ansible_bastion_template.txt | sed s/BASTION_ADDRESS/${element(openstack_networking_floatingip_v2.k8s_master.*.address, 0)}/ > contrib/terraform/openstack/group_vars/k8s-cluster.yml" + } +} + + + + #output "msg" { # value = "Your hosts are ready to go!\nYour ssh hosts are: ${join(", ", openstack_networking_floatingip_v2.k8s_master.*.address )}" #} diff --git a/contrib/terraform/openstack/variables.tf b/contrib/terraform/openstack/variables.tf index 6c1fc767d..8be38aed5 100644 --- a/contrib/terraform/openstack/variables.tf +++ b/contrib/terraform/openstack/variables.tf @@ -6,10 +6,18 @@ variable "number_of_k8s_masters" { default = 2 } +variable "number_of_k8s_masters_no_floating_ip" { + default = 2 +} + variable "number_of_k8s_nodes" { default = 1 } +variable "number_of_k8s_nodes_no_floating_ip" { + default = 1 +} + variable "public_key_path" { description = "The path of the ssh pub key" default = "~/.ssh/id_rsa.pub" diff --git a/roles/dnsmasq/defaults/main.yml b/roles/dnsmasq/defaults/main.yml index 7a1e77023..89ab02ab8 100644 --- a/roles/dnsmasq/defaults/main.yml +++ b/roles/dnsmasq/defaults/main.yml @@ -16,4 +16,10 @@ dnsmasq_version: 2.72 # Images dnsmasq_image_repo: "andyshinn/dnsmasq" -dnsmasq_image_tag: "{{ dnsmasq_version }}" \ No newline at end of file +dnsmasq_image_tag: "{{ dnsmasq_version }}" + +# Skip dnsmasq setup +skip_dnsmasq: false + +# Skip setting up dnsmasq daemonset +skip_dnsmasq_k8s: "{{ skip_dnsmasq }}" diff --git a/roles/dnsmasq/tasks/main.yml b/roles/dnsmasq/tasks/main.yml index 46c1604f6..6b271a1e2 100644 --- a/roles/dnsmasq/tasks/main.yml +++ b/roles/dnsmasq/tasks/main.yml @@ -1,5 +1,5 @@ --- - include: dnsmasq.yml - when: "{{ not skip_dnsmasq|bool }}" + when: "{{ not skip_dnsmasq_k8s|bool }}" - include: resolvconf.yml diff --git a/roles/docker/handlers/main.yml b/roles/docker/handlers/main.yml index 6f54f33d5..04d761796 100644 --- a/roles/docker/handlers/main.yml +++ b/roles/docker/handlers/main.yml @@ -3,6 +3,7 @@ command: /bin/true notify: - Docker | reload systemd + - Docker | reload docker.socket - Docker | reload docker - Docker | pause while Docker restarts - Docker | wait for docker @@ -16,6 +17,12 @@ name: docker state: restarted +- name: Docker | reload docker.socket + service: + name: docker.socket + state: restarted + when: ansible_os_family == 'CoreOS' + - name: Docker | pause while Docker restarts pause: seconds=10 prompt="Waiting for docker restart" diff --git a/roles/kubernetes/secrets/tasks/gen_certs.yml b/roles/kubernetes/secrets/tasks/gen_certs.yml index 6057c0676..bec1d9f16 100644 --- a/roles/kubernetes/secrets/tasks/gen_certs.yml +++ b/roles/kubernetes/secrets/tasks/gen_certs.yml @@ -27,31 +27,30 @@ master_certs: ['ca-key.pem', 'admin.pem', 'admin-key.pem', 'apiserver-key.pem', 'apiserver.pem'] node_certs: ['ca.pem', 'node.pem', 'node-key.pem'] -- name: Gen_certs | Get the certs from first master - slurp: - src: "{{ kube_cert_dir }}/{{ item }}" +- name: Gen_certs | Gather master certs + shell: "tar cfz - -C {{ kube_cert_dir }} {{ master_certs|join(' ') }} {{ node_certs|join(' ') }} | base64 --wrap=0" + register: master_cert_data delegate_to: "{{groups['kube-master'][0]}}" - register: slurp_certs - with_items: '{{ master_certs + node_certs }}' - when: sync_certs|default(false) run_once: true - notify: set secret_changed + when: sync_certs|default(false) + +- name: Gen_certs | Gather node certs + shell: "tar cfz - -C {{ kube_cert_dir }} {{ node_certs|join(' ') }} | base64 --wrap=0" + register: node_cert_data + delegate_to: "{{groups['kube-master'][0]}}" + run_once: true + when: sync_certs|default(false) - name: Gen_certs | Copy certs on masters - copy: - content: "{{ item.content|b64decode }}" - dest: "{{ item.source }}" - with_items: '{{slurp_certs.results}}' + shell: "echo '{{master_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ kube_cert_dir }}" + changed_when: false when: inventory_hostname in groups['kube-master'] and sync_certs|default(false) and inventory_hostname != groups['kube-master'][0] - name: Gen_certs | Copy certs on nodes - copy: - content: "{{ item.content|b64decode }}" - dest: "{{ item.source }}" - with_items: '{{slurp_certs.results}}' - when: item.item in node_certs and - inventory_hostname in groups['kube-node'] and sync_certs|default(false) and + shell: "echo '{{node_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ kube_cert_dir }}" + changed_when: false + when: inventory_hostname in groups['kube-node'] and sync_certs|default(false) and inventory_hostname != groups['kube-master'][0] - name: Gen_certs | check certificate permissions diff --git a/roles/kubernetes/secrets/tasks/gen_tokens.yml b/roles/kubernetes/secrets/tasks/gen_tokens.yml index 796657f65..dbe35811b 100644 --- a/roles/kubernetes/secrets/tasks/gen_tokens.yml +++ b/roles/kubernetes/secrets/tasks/gen_tokens.yml @@ -43,20 +43,15 @@ delegate_to: "{{groups['kube-master'][0]}}" when: sync_tokens|default(false) -- name: Gen_tokens | Get the tokens from first master - slurp: - src: "{{ item }}" - register: slurp_tokens - with_items: '{{tokens_list.stdout_lines}}' - run_once: true +- name: Gen_tokens | Gather tokens + shell: "tar cfz - {{ tokens_list.stdout_lines | join(' ') }} | base64 --wrap=0" + register: tokens_data delegate_to: "{{groups['kube-master'][0]}}" + run_once: true when: sync_tokens|default(false) - notify: set secret_changed - name: Gen_tokens | Copy tokens on masters - copy: - content: "{{ item.content|b64decode }}" - dest: "{{ item.source }}" - with_items: '{{slurp_tokens.results}}' + shell: "echo '{{ tokens_data.stdout|quote }}' | base64 -d | tar xz -C /" + changed_when: false when: inventory_hostname in groups['kube-master'] and sync_tokens|default(false) and inventory_hostname != groups['kube-master'][0] diff --git a/tests/cloud_playbooks/create-gce.yml b/tests/cloud_playbooks/create-gce.yml index 840cf2e7c..b2c3e3020 100644 --- a/tests/cloud_playbooks/create-gce.yml +++ b/tests/cloud_playbooks/create-gce.yml @@ -1,6 +1,6 @@ --- - hosts: localhost - sudo: False + become: false gather_facts: no vars: cloud_machine_type: g1-small diff --git a/tests/cloud_playbooks/delete-gce.yml b/tests/cloud_playbooks/delete-gce.yml index d42c6cc91..54902fb6f 100644 --- a/tests/cloud_playbooks/delete-gce.yml +++ b/tests/cloud_playbooks/delete-gce.yml @@ -1,6 +1,6 @@ --- - hosts: localhost - sudo: False + become: false gather_facts: no vars: cloud_machine_type: f1-micro diff --git a/tests/templates/inventory-gce.j2 b/tests/templates/inventory-gce.j2 index 72ad469de..418910771 100644 --- a/tests/templates/inventory-gce.j2 +++ b/tests/templates/inventory-gce.j2 @@ -2,6 +2,16 @@ node1 ansible_ssh_host={{gce.instance_data[0].public_ip}} node2 ansible_ssh_host={{gce.instance_data[1].public_ip}} node3 ansible_ssh_host={{gce.instance_data[2].public_ip}} +{% if mode is defined and mode == "separate" %} +[kube-master] +node1 + +[kube-node] +node2 + +[etcd] +node3 +{% else %} [kube-master] node1 node2 @@ -14,6 +24,7 @@ node3 [etcd] node1 node2 +{% endif %} [k8s-cluster:children] kube-node From 309240cd6fb7168828354027d58557e2e059b36c Mon Sep 17 00:00:00 2001 From: Aleksandr Didenko Date: Thu, 3 Nov 2016 16:57:11 +0100 Subject: [PATCH 06/16] Adding support for canal network plugin This patch provides support for Canal network plugin installation as a self-hosted app, see the following link for details: https://github.com/tigera/canal/tree/master/k8s-install --- cluster.yml | 3 + roles/download/defaults/main.yml | 17 ++- roles/kubernetes-apps/ansible/tasks/main.yaml | 1 - .../{ansible => lib}/library/kube.py | 0 .../network_plugin/canal/tasks/main.yaml | 8 ++ .../network_plugin/meta/main.yml | 4 + roles/kubernetes/node/tasks/main.yml | 7 ++ .../node/templates/cni-canal.conf.j2 | 17 +++ roles/kubernetes/node/templates/kubelet.j2 | 2 +- roles/kubernetes/preinstall/tasks/main.yml | 2 +- roles/network_plugin/canal/defaults/main.yml | 11 ++ roles/network_plugin/canal/meta/main.yml | 12 ++ roles/network_plugin/canal/tasks/main.yml | 27 +++++ .../canal/templates/canal-node.yml.j2 | 112 ++++++++++++++++++ .../canal/templates/network.json.j2 | 1 + roles/network_plugin/meta/main.yml | 2 + 16 files changed, 218 insertions(+), 8 deletions(-) rename roles/kubernetes-apps/{ansible => lib}/library/kube.py (100%) create mode 100644 roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml create mode 100644 roles/kubernetes-apps/network_plugin/meta/main.yml create mode 100644 roles/kubernetes/node/templates/cni-canal.conf.j2 create mode 100644 roles/network_plugin/canal/defaults/main.yml create mode 100644 roles/network_plugin/canal/meta/main.yml create mode 100644 roles/network_plugin/canal/tasks/main.yml create mode 100644 roles/network_plugin/canal/templates/canal-node.yml.j2 create mode 100644 roles/network_plugin/canal/templates/network.json.j2 diff --git a/cluster.yml b/cluster.yml index 295bb668a..12c090169 100644 --- a/cluster.yml +++ b/cluster.yml @@ -27,6 +27,8 @@ - hosts: kube-master roles: - { role: kubernetes/master, tags: master } + - { role: kubernetes-apps/lib, tags: apps } + - { role: kubernetes-apps/network_plugin, tags: network } - hosts: k8s-cluster roles: @@ -34,4 +36,5 @@ - hosts: kube-master[0] roles: + - { role: kubernetes-apps/lib, tags: apps } - { role: kubernetes-apps, tags: apps } diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index cbe053fa0..caf37c444 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -42,6 +42,8 @@ calicoctl_image_repo: "calico/ctl" calicoctl_image_tag: "{{ calico_version }}" calico_node_image_repo: "calico/node" calico_node_image_tag: "{{ calico_version }}" +calico_cni_image_repo: "calico/cni" +calico_cni_image_tag: "{{ calico_cni_version }}" hyperkube_image_repo: "quay.io/coreos/hyperkube" hyperkube_image_tag: "{{ kube_version }}_coreos.0" pod_infra_image_repo: "gcr.io/google_containers/pause-amd64" @@ -56,7 +58,7 @@ downloads: url: "{{ calico_cni_download_url }}" owner: "root" mode: "0755" - enabled: "{{ kube_network_plugin == 'calico' }}" + enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}" calico_cni_plugin_ipam: dest: calico/bin/calico-ipam version: "{{calico_cni_version}}" @@ -95,22 +97,27 @@ downloads: container: true repo: "{{ flannel_image_repo }}" tag: "{{ flannel_image_tag }}" - enabled: "{{ kube_network_plugin == 'flannel' }}" + enabled: "{{ kube_network_plugin == 'flannel' or kube_network_plugin == 'canal' }}" flannel_server_helper: container: true repo: "{{ flannel_server_helper_image_repo }}" tag: "{{ flannel_server_helper_image_tag }}" - enabled: "{{ kube_network_plugin == 'flannel' }}" + enabled: "{{ kube_network_plugin == 'flannel' or kube_network_plugin == 'canal' }}" calicoctl: container: true repo: "{{ calicoctl_image_repo }}" tag: "{{ calicoctl_image_tag }}" - enabled: "{{ kube_network_plugin == 'calico' }}" + enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}" calico_node: container: true repo: "{{ calico_node_image_repo }}" tag: "{{ calico_node_image_tag }}" - enabled: "{{ kube_network_plugin == 'calico' }}" + enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}" + calico_cni: + container: true + repo: "{{ calico_cni_image_repo }}" + tag: "{{ calico_cni_image_tag }}" + enabled: "{{ kube_network_plugin == 'canal' }}" pod_infra: container: true repo: "{{ pod_infra_image_repo }}" diff --git a/roles/kubernetes-apps/ansible/tasks/main.yaml b/roles/kubernetes-apps/ansible/tasks/main.yaml index f31eb442b..ccbca3c80 100644 --- a/roles/kubernetes-apps/ansible/tasks/main.yaml +++ b/roles/kubernetes-apps/ansible/tasks/main.yaml @@ -18,6 +18,5 @@ with_items: "{{ manifests.results }}" when: inventory_hostname == groups['kube-master'][0] - - include: tasks/calico-policy-controller.yml when: enable_network_policy is defined and enable_network_policy == True diff --git a/roles/kubernetes-apps/ansible/library/kube.py b/roles/kubernetes-apps/lib/library/kube.py similarity index 100% rename from roles/kubernetes-apps/ansible/library/kube.py rename to roles/kubernetes-apps/lib/library/kube.py diff --git a/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml b/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml new file mode 100644 index 000000000..46105126b --- /dev/null +++ b/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml @@ -0,0 +1,8 @@ +- name: Start flannel and calico-node + run_once: true + kube: + name: "canal-node" + kubectl: "{{bin_dir}}/kubectl" + filename: "/etc/kubernetes/canal-node.yaml" + resource: "ds" + namespace: "kube-system" diff --git a/roles/kubernetes-apps/network_plugin/meta/main.yml b/roles/kubernetes-apps/network_plugin/meta/main.yml new file mode 100644 index 000000000..14a59e5c5 --- /dev/null +++ b/roles/kubernetes-apps/network_plugin/meta/main.yml @@ -0,0 +1,4 @@ +--- +dependencies: + - role: kubernetes-apps/network_plugin/canal + when: kube_network_plugin == 'canal' diff --git a/roles/kubernetes/node/tasks/main.yml b/roles/kubernetes/node/tasks/main.yml index a8cb6ce5a..700f7eb75 100644 --- a/roles/kubernetes/node/tasks/main.yml +++ b/roles/kubernetes/node/tasks/main.yml @@ -11,6 +11,13 @@ owner: kube when: kube_network_plugin == "calico" +- name: Write Canal cni config + template: + src: "cni-canal.conf.j2" + dest: "/etc/cni/net.d/10-canal.conf" + owner: kube + when: kube_network_plugin == "canal" + - name: Write kubelet config file template: src=kubelet.j2 dest={{ kube_config_dir }}/kubelet.env backup=yes notify: diff --git a/roles/kubernetes/node/templates/cni-canal.conf.j2 b/roles/kubernetes/node/templates/cni-canal.conf.j2 new file mode 100644 index 000000000..db3e9231c --- /dev/null +++ b/roles/kubernetes/node/templates/cni-canal.conf.j2 @@ -0,0 +1,17 @@ +{ + "name": "canal-k8s-network", + "type": "flannel", + "delegate": { + "type": "calico", + "etcd_endpoints": "{{ etcd_access_endpoint }}", + "log_level": "info", + {% if enable_network_policy is defined and enable_network_policy == True %} + "policy": { + "type": "k8s" + }, + {% endif %} + "kubernetes": { + "kubeconfig": "{{ kube_config_dir }}/node-kubeconfig.yaml" + } + } +} diff --git a/roles/kubernetes/node/templates/kubelet.j2 b/roles/kubernetes/node/templates/kubelet.j2 index 53f2915d9..46678691a 100644 --- a/roles/kubernetes/node/templates/kubelet.j2 +++ b/roles/kubernetes/node/templates/kubelet.j2 @@ -26,7 +26,7 @@ KUBELET_ARGS="--cluster_dns={{ dns_server }} --cluster_domain={{ dns_domain }} - {% else %} KUBELET_ARGS="--kubeconfig={{ kube_config_dir}}/kubelet.kubeconfig --config={{ kube_manifest_dir }} --pod-infra-container-image={{ pod_infra_image_repo }}:{{ pod_infra_image_tag }}" {% endif %} -{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "weave"] %} +{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "weave", "canal"] %} KUBELET_NETWORK_PLUGIN="--network-plugin=cni --network-plugin-dir=/etc/cni/net.d" {% elif kube_network_plugin is defined and kube_network_plugin == "weave" %} DOCKER_SOCKET="--docker-endpoint=unix:/var/run/weave/weave.sock" diff --git a/roles/kubernetes/preinstall/tasks/main.yml b/roles/kubernetes/preinstall/tasks/main.yml index 49e69a907..5c6520ed3 100644 --- a/roles/kubernetes/preinstall/tasks/main.yml +++ b/roles/kubernetes/preinstall/tasks/main.yml @@ -74,7 +74,7 @@ with_items: - "/etc/cni/net.d" - "/opt/cni/bin" - when: kube_network_plugin in ["calico", "weave"] and "{{ inventory_hostname in groups['k8s-cluster'] }}" + when: kube_network_plugin in ["calico", "weave", "canal"] and "{{ inventory_hostname in groups['k8s-cluster'] }}" - name: Update package management cache (YUM) yum: update_cache=yes name='*' diff --git a/roles/network_plugin/canal/defaults/main.yml b/roles/network_plugin/canal/defaults/main.yml new file mode 100644 index 000000000..24f7c789b --- /dev/null +++ b/roles/network_plugin/canal/defaults/main.yml @@ -0,0 +1,11 @@ +# The interface used by canal for host <-> host communication. +# If left blank, then the interface is chosing using the node's +# default route. +canal_iface: "" + +# Whether or not to masquerade traffic to destinations not within +# the pod network. +canal_masquerade: "true" + +# Log-level +canal_log_level: "info" diff --git a/roles/network_plugin/canal/meta/main.yml b/roles/network_plugin/canal/meta/main.yml new file mode 100644 index 000000000..5b8d38d37 --- /dev/null +++ b/roles/network_plugin/canal/meta/main.yml @@ -0,0 +1,12 @@ +--- +dependencies: + - role: download + file: "{{ downloads.flannel_server_helper }}" + - role: download + file: "{{ downloads.flannel }}" + - role: download + file: "{{ downloads.calico_node }}" + - role: download + file: "{{ downloads.calicoctl }}" + - role: download + file: "{{ downloads.calico_cni }}" diff --git a/roles/network_plugin/canal/tasks/main.yml b/roles/network_plugin/canal/tasks/main.yml new file mode 100644 index 000000000..eb833bf0a --- /dev/null +++ b/roles/network_plugin/canal/tasks/main.yml @@ -0,0 +1,27 @@ +--- +- name: Canal | Write flannel configuration + template: + src: network.json.j2 + dest: /etc/flannel-network.json + backup: yes + +- name: Canal | Write canal configuration + template: + src: canal-node.yml.j2 + dest: /etc/kubernetes/canal-node.yaml + +- name: Canal | Copy cni plugins from hyperkube + command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /usr/bin/rsync -a /opt/cni/bin/ /cnibindir/" + register: cni_task_result + until: cni_task_result.rc == 0 + retries: 4 + delay: "{{ retry_stagger | random + 3 }}" + changed_when: false + +- name: Canal | Copy cni plugins from calico/cni + command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ calico_cni_image_repo }}:{{ calico_cni_image_tag }} sh -c 'cp -a /opt/cni/bin/* /cnibindir/'" + register: cni_task_result + until: cni_task_result.rc == 0 + retries: 4 + delay: "{{ retry_stagger | random + 3 }}" + changed_when: false diff --git a/roles/network_plugin/canal/templates/canal-node.yml.j2 b/roles/network_plugin/canal/templates/canal-node.yml.j2 new file mode 100644 index 000000000..0b4ea24e4 --- /dev/null +++ b/roles/network_plugin/canal/templates/canal-node.yml.j2 @@ -0,0 +1,112 @@ +--- +kind: DaemonSet +apiVersion: extensions/v1beta1 +metadata: + name: canal-node + labels: + k8s-app: canal-node +spec: + selector: + matchLabels: + k8s-app: canal-node + template: + metadata: + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + scheduler.alpha.kubernetes.io/tolerations: '[{"key":"CriticalAddonsOnly", "operator":"Exists"}]' + labels: + k8s-app: canal-node + spec: + hostNetwork: true + volumes: + # Used by flannel-server-helper + - name: "networkconfig" + hostPath: + path: "/etc/flannel-network.json" + # Used by calico/node. + - name: lib-modules + hostPath: + path: /lib/modules + - name: var-run-calico + hostPath: + path: /var/run/calico + # Used to install CNI. + - name: cni-bin-dir + hostPath: + path: /opt/cni/bin + - name: cni-net-dir + hostPath: + path: /etc/cni/net.d + # Used by flannel daemon. + - name: run-flannel + hostPath: + path: /run/flannel + - name: resolv + hostPath: + path: /etc/resolv.conf + containers: + - name: "flannel-server-helper" + image: "{{ flannel_server_helper_image_repo }}:{{ flannel_server_helper_image_tag }}" + args: + - "--network-config=/etc/flannel-network.json" + - "--etcd-prefix=/{{ cluster_name }}/network" + - "--etcd-server={{ etcd_endpoint }}" + volumeMounts: + - name: "networkconfig" + mountPath: "/etc/flannel-network.json" + imagePullPolicy: "Always" + # Runs the flannel daemon to enable vxlan networking between + # container hosts. + - name: flannel + image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}" + env: + # The location of the etcd cluster. + - name: FLANNELD_ETCD_ENDPOINTS + value: "{{ etcd_access_endpoint }}" + # The interface flannel should run on. + - name: FLANNELD_IFACE + value: "{{ canal_iface }}" + # Perform masquerade on traffic leaving the pod cidr. + - name: FLANNELD_IP_MASQ + value: "{{ canal_masquerade }}" + # Write the subnet.env file to the mounted directory. + - name: FLANNELD_SUBNET_FILE + value: "/run/flannel/subnet.env" + command: + - "/bin/sh" + - "-c" + - "/opt/bin/flanneld -etcd-endpoints {{ etcd_access_endpoint }} -etcd-prefix /{{ cluster_name }}/network {% if canal_iface %}-iface {{ canal_iface }}{% endif %}" + ports: + - hostPort: 10253 + containerPort: 10253 + securityContext: + privileged: true + volumeMounts: + - name: "resolv" + mountPath: "/etc/resolv.conf" + - name: "run-flannel" + mountPath: "/run/flannel" + # Runs calico/node container on each Kubernetes node. This + # container programs network policy and local routes on each + # host. + - name: calico-node + image: "{{ calico_node_image_repo }}:{{ calico_node_image_tag }}" + env: + # The location of the etcd cluster. + - name: ETCD_ENDPOINTS + value: "{{ etcd_access_endpoint }}" + # Disable Calico BGP. Calico is simply enforcing policy. + - name: CALICO_NETWORKING + value: "false" + # Disable file logging so `kubectl logs` works. + - name: CALICO_DISABLE_FILE_LOGGING + value: "true" + securityContext: + privileged: true + volumeMounts: + - mountPath: /lib/modules + name: lib-modules + readOnly: true + - mountPath: /var/run/calico + name: var-run-calico + readOnly: false diff --git a/roles/network_plugin/canal/templates/network.json.j2 b/roles/network_plugin/canal/templates/network.json.j2 new file mode 100644 index 000000000..cbbec3841 --- /dev/null +++ b/roles/network_plugin/canal/templates/network.json.j2 @@ -0,0 +1 @@ +{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } } diff --git a/roles/network_plugin/meta/main.yml b/roles/network_plugin/meta/main.yml index 736262ab0..8596c9d70 100644 --- a/roles/network_plugin/meta/main.yml +++ b/roles/network_plugin/meta/main.yml @@ -6,3 +6,5 @@ dependencies: when: kube_network_plugin == 'flannel' - role: network_plugin/weave when: kube_network_plugin == 'weave' + - role: network_plugin/canal + when: kube_network_plugin == 'canal' From 60a217766fee54f68fae58dc9a69069c7571c625 Mon Sep 17 00:00:00 2001 From: Aleksandr Didenko Date: Tue, 8 Nov 2016 12:13:20 +0100 Subject: [PATCH 07/16] Add ConfigMap for basic configuration options Container settings moved from deamonset yaml to a separate configmap. --- roles/kubernetes-apps/ansible/tasks/main.yaml | 3 +- .../network_plugin/canal/tasks/main.yaml | 9 ++++ .../node/templates/cni-canal.conf.j2 | 8 ++-- roles/network_plugin/canal/tasks/main.yml | 7 ++- .../canal/templates/canal-config.yml.j2 | 22 +++++++++ .../canal/templates/canal-node.yml.j2 | 48 ++++++++++++++++--- 6 files changed, 83 insertions(+), 14 deletions(-) create mode 100644 roles/network_plugin/canal/templates/canal-config.yml.j2 diff --git a/roles/kubernetes-apps/ansible/tasks/main.yaml b/roles/kubernetes-apps/ansible/tasks/main.yaml index ccbca3c80..130a17a6f 100644 --- a/roles/kubernetes-apps/ansible/tasks/main.yaml +++ b/roles/kubernetes-apps/ansible/tasks/main.yaml @@ -19,4 +19,5 @@ when: inventory_hostname == groups['kube-master'][0] - include: tasks/calico-policy-controller.yml - when: enable_network_policy is defined and enable_network_policy == True + when: ( enable_network_policy is defined and enable_network_policy == True ) or + ( kube_network_plugin == 'canal' ) diff --git a/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml b/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml index 46105126b..c6bcd6992 100644 --- a/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml +++ b/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml @@ -1,3 +1,12 @@ +- name: Create canal ConfigMap + run_once: true + kube: + name: "canal-config" + kubectl: "{{bin_dir}}/kubectl" + filename: "/etc/kubernetes/canal-config.yaml" + resource: "configmap" + namespace: "kube-system" + - name: Start flannel and calico-node run_once: true kube: diff --git a/roles/kubernetes/node/templates/cni-canal.conf.j2 b/roles/kubernetes/node/templates/cni-canal.conf.j2 index db3e9231c..b835443c7 100644 --- a/roles/kubernetes/node/templates/cni-canal.conf.j2 +++ b/roles/kubernetes/node/templates/cni-canal.conf.j2 @@ -5,11 +5,9 @@ "type": "calico", "etcd_endpoints": "{{ etcd_access_endpoint }}", "log_level": "info", - {% if enable_network_policy is defined and enable_network_policy == True %} - "policy": { - "type": "k8s" - }, - {% endif %} + "policy": { + "type": "k8s" + }, "kubernetes": { "kubeconfig": "{{ kube_config_dir }}/node-kubeconfig.yaml" } diff --git a/roles/network_plugin/canal/tasks/main.yml b/roles/network_plugin/canal/tasks/main.yml index eb833bf0a..ba83edee8 100644 --- a/roles/network_plugin/canal/tasks/main.yml +++ b/roles/network_plugin/canal/tasks/main.yml @@ -5,7 +5,12 @@ dest: /etc/flannel-network.json backup: yes -- name: Canal | Write canal configuration +- name: Canal | Write canal configmap + template: + src: canal-config.yml.j2 + dest: /etc/kubernetes/canal-config.yaml + +- name: Canal | Write canal node configuration template: src: canal-node.yml.j2 dest: /etc/kubernetes/canal-node.yaml diff --git a/roles/network_plugin/canal/templates/canal-config.yml.j2 b/roles/network_plugin/canal/templates/canal-config.yml.j2 new file mode 100644 index 000000000..34f3faedb --- /dev/null +++ b/roles/network_plugin/canal/templates/canal-config.yml.j2 @@ -0,0 +1,22 @@ +# This ConfigMap can be used to configure a self-hosted Canal installation. +# See `canal.yaml` for an example of a Canal deployment which uses +# the config in this ConfigMap. +kind: ConfigMap +apiVersion: v1 +metadata: + name: canal-config +data: + # Configure this with the location of your etcd cluster. + etcd_endpoints: "{{ etcd_access_endpoint }}" + + # The interface used by canal for host <-> host communication. + # If left blank, then the interface is chosing using the node's + # default route. + flanneld_iface: "{{ canal_iface }}" + + # Whether or not to masquerade traffic to destinations not within + # the pod network. + masquerade: "{{ canal_masquerade }}" + + # Cluster name for Flannel etcd path + cluster_name: "{{ cluster_name }}" diff --git a/roles/network_plugin/canal/templates/canal-node.yml.j2 b/roles/network_plugin/canal/templates/canal-node.yml.j2 index 0b4ea24e4..bdeae6cfd 100644 --- a/roles/network_plugin/canal/templates/canal-node.yml.j2 +++ b/roles/network_plugin/canal/templates/canal-node.yml.j2 @@ -47,10 +47,23 @@ spec: containers: - name: "flannel-server-helper" image: "{{ flannel_server_helper_image_repo }}:{{ flannel_server_helper_image_tag }}" + env: + # Cluster name + - name: CLUSTER_NAME + valueFrom: + configMapKeyRef: + name: canal-config + key: cluster_name + # The location of the etcd cluster. + - name: FLANNELD_ETCD_ENDPOINTS + valueFrom: + configMapKeyRef: + name: canal-config + key: etcd_endpoints args: - "--network-config=/etc/flannel-network.json" - - "--etcd-prefix=/{{ cluster_name }}/network" - - "--etcd-server={{ etcd_endpoint }}" + - "--etcd-prefix=/$(CLUSTER_NAME)/network" + - "--etcd-server=$(FLANNELD_ETCD_ENDPOINTS)" volumeMounts: - name: "networkconfig" mountPath: "/etc/flannel-network.json" @@ -60,22 +73,40 @@ spec: - name: flannel image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}" env: + # Cluster name + - name: CLUSTER_NAME + valueFrom: + configMapKeyRef: + name: canal-config + key: cluster_name # The location of the etcd cluster. - name: FLANNELD_ETCD_ENDPOINTS - value: "{{ etcd_access_endpoint }}" + valueFrom: + configMapKeyRef: + name: canal-config + key: etcd_endpoints # The interface flannel should run on. - name: FLANNELD_IFACE - value: "{{ canal_iface }}" + valueFrom: + configMapKeyRef: + name: canal-config + key: flanneld_iface # Perform masquerade on traffic leaving the pod cidr. - name: FLANNELD_IP_MASQ - value: "{{ canal_masquerade }}" + valueFrom: + configMapKeyRef: + name: canal-config + key: masquerade + # Set etcd-prefix + - name: DOCKER_OPT_ETCD_PREFIX + value: "-etcd-prefix=/$(CLUSTER_NAME)/network" # Write the subnet.env file to the mounted directory. - name: FLANNELD_SUBNET_FILE value: "/run/flannel/subnet.env" command: - "/bin/sh" - "-c" - - "/opt/bin/flanneld -etcd-endpoints {{ etcd_access_endpoint }} -etcd-prefix /{{ cluster_name }}/network {% if canal_iface %}-iface {{ canal_iface }}{% endif %}" + - "/opt/bin/flanneld -etcd-prefix /$(CLUSTER_NAME)/network" ports: - hostPort: 10253 containerPort: 10253 @@ -94,7 +125,10 @@ spec: env: # The location of the etcd cluster. - name: ETCD_ENDPOINTS - value: "{{ etcd_access_endpoint }}" + valueFrom: + configMapKeyRef: + name: canal-config + key: etcd_endpoints # Disable Calico BGP. Calico is simply enforcing policy. - name: CALICO_NETWORKING value: "false" From 4ece73d4327c72b836ff57aa489d48887ec7996a Mon Sep 17 00:00:00 2001 From: Aleksandr Didenko Date: Tue, 8 Nov 2016 12:20:55 +0100 Subject: [PATCH 08/16] Fix idempotency of calico-policy-controller rs We need to specify kube resource type and name in order to avoid playbook errors related to k8s resource duplication. --- .../ansible/tasks/calico-policy-controller.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml b/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml index f4ac65aeb..6ad8dd220 100644 --- a/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml +++ b/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml @@ -5,6 +5,9 @@ - name: Start of Calico policy controller kube: + name: "calico-policy-controller" kubectl: "{{bin_dir}}/kubectl" - filename: /etc/kubernetes/calico-policy-controller.yml + filename: "/etc/kubernetes/calico-policy-controller.yml" + namespace: "kube-system" + resource: "rs" when: inventory_hostname == groups['kube-master'][0] From d197130148c65c286175b192e2a2a81bae502e13 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Mon, 7 Nov 2016 16:00:49 +0100 Subject: [PATCH 09/16] Fix uploading CI logs to GCS * Use gsutil to configure the logs bucket's lifecycle, which is not in the gc_storage module yet. (See https://cloud.google.com/storage/docs/gsutil_install). * Generate uniq bucket names extended with the build's OS type info as well. * Ignore boto related errors for the gc_storage module. * Use no_log when needed to supress noise/secrets in output Signed-off-by: Bogdan Dobrelya --- .travis.yml | 5 +- scripts/collect-info.yaml | 1 + tests/cloud_playbooks/files/gcs_life.json | 9 +++ tests/cloud_playbooks/upload-logs-gcs.yml | 68 +++++++++++++++-------- 4 files changed, 60 insertions(+), 23 deletions(-) create mode 100644 tests/cloud_playbooks/files/gcs_life.json diff --git a/.travis.yml b/.travis.yml index ac922fd80..3bbb46d1c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -149,13 +149,16 @@ script: - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/030_check-network.yml $LOG_LEVEL after_failure: - - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml >/dev/null + - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml - > $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/upload-logs-gcs.yml -i "localhost," -c local -e test_id=${TEST_ID} -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} -e gs_key=${GS_ACCESS_KEY_ID} -e gs_skey=${GS_SECRET_ACCESS_KEY} + -e ostype=${CLOUD_IMAGE} + -e commit=${TRAVIS_COMMIT} + -e pr=${TRAVIS_PULL_REQUEST} after_script: - > diff --git a/scripts/collect-info.yaml b/scripts/collect-info.yaml index 877b5bf36..685b8b787 100644 --- a/scripts/collect-info.yaml +++ b/scripts/collect-info.yaml @@ -45,6 +45,7 @@ register: output ignore_errors: true with_items: "{{commands}}" + no_log: True - debug: var=item with_items: "{{output.results}}" diff --git a/tests/cloud_playbooks/files/gcs_life.json b/tests/cloud_playbooks/files/gcs_life.json new file mode 100644 index 000000000..eaab30b4f --- /dev/null +++ b/tests/cloud_playbooks/files/gcs_life.json @@ -0,0 +1,9 @@ +{ + "rule": + [ + { + "action": {"type": "Delete"}, + "condition": {"age": 2} + } + ] +} diff --git a/tests/cloud_playbooks/upload-logs-gcs.yml b/tests/cloud_playbooks/upload-logs-gcs.yml index ba31ab56f..12013798d 100644 --- a/tests/cloud_playbooks/upload-logs-gcs.yml +++ b/tests/cloud_playbooks/upload-logs-gcs.yml @@ -3,41 +3,65 @@ become: false gather_facts: no - vars: - expire: 72000 - tasks: + - name: Generate uniq bucket name prefix + shell: date +%s | sha256sum | base64 | head -c 32 + register: out + - name: replace_test_id set_fact: - test_name: "{{ test_id | regex_replace('\\.', '-') }}" + test_name: "kargo-{{ commit }}-{{ pr }}-{{ out.stdout|lower }}-{{ test_id | regex_replace('\\.', '-') }}" - name: Create a bucket gc_storage: bucket: "{{ test_name }}" mode: create - expiration: "{{ expire }}" permission: private - gs_access_key: gs_key - gs_secret_key: gs_skey + gs_access_key: "{{ gs_key }}" + gs_secret_key: "{{ gs_skey }}" + no_log: True + + - name: Download gsutil cp installer + get_url: + url: https://dl.google.com/dl/cloudsdk/channels/rapid/install_google_cloud_sdk.bash + dest: /tmp/gcp-installer.sh + + - name: Get gsutil tool + script: /tmp/gcp-installer.sh + environment: + CLOUDSDK_CORE_DISABLE_PROMPTS: 1 + no_log: True + + - name: Create a lifecycle template for the bucket + file: src=gcs_life.json path=/tmp/gcs_life.json + + - name: Hack the boto config for GCS access keys + lineinfile: + dest: .boto + line: "gs_access_key_id = {{ gs_key }}" + regexp: "^#gs_access_key_id = .*$" + no_log: True + + - name: Hack the boto config for GCS secret access keys + lineinfile: + dest: .boto + line: "gs_secret_access_key = {{ gs_skey }}" + regexp: "^#gs_secret_access_key = .*$" + no_log: True + + - name: Apply the lifecycle rules + shell: bash google-cloud-sdk/bin/gsutil lifecycle set /tmp/gcs_life.json gs://{{ test_name }} + environment: + BOTO_CONFIG: .boto - name: Upload collected diagnostic info gc_storage: bucket: "{{ test_name }}" mode: put permission: private - expiration: "{{ expire }}" - object: "build-{{ test_name }}-{{ kube_network_plugin }}-logs.tar.gz" + object: "build-{{ ostype }}-{{ kube_network_plugin }}-logs.tar.gz" src: logs.tar.gz - gs_access_key: gs_key - gs_secret_key: gs_skey - - - name: Get a link - gc_storage: - bucket: "{{ test_name }}" - object: "build-{{ test_name }}-{{ kube_network_plugin }}-logs.tar.gz" - mode: get_url - register: url - gs_access_key: gs_key - gs_secret_key: gs_skey - - - debug: msg="Download URL {{get_url}}" + headers: '{"Content-Encoding": "x-gzip"}' + gs_access_key: "{{ gs_key }}" + gs_secret_key: "{{ gs_skey }}" + ignore_errors: true From 95b460ae9489b38fa681597352ab2602b503c4e2 Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Wed, 9 Nov 2016 13:31:12 +0300 Subject: [PATCH 10/16] Remove etcd-proxy from all nodes and use etcd multiaccess --- docs/ha-mode.md | 6 +- inventory/group_vars/all.yml | 2 +- roles/etcd/handlers/main.yml | 17 +-- roles/etcd/tasks/configure.yml | 16 --- roles/etcd/tasks/main.yml | 19 +-- roles/etcd/tasks/refresh_config.yml | 7 - .../templates/deb-etcd-proxy-docker.initd.j2 | 120 ------------------ .../templates/deb-etcd-proxy-host.initd.j2 | 110 ---------------- .../templates/etcd-proxy-docker.service.j2 | 28 ---- .../etcd/templates/etcd-proxy-host.service.j2 | 19 --- roles/etcd/templates/etcd-proxy.j2 | 5 - roles/etcd/templates/etcd.j2 | 4 - .../templates/calico-policy-controller.yml.j2 | 11 +- roles/kubernetes/master/tasks/pre-upgrade.yml | 9 -- .../manifests/kube-apiserver.manifest.j2 | 1 + .../node/templates/cni-calico.conf.j2 | 1 + roles/kubernetes/preinstall/defaults/main.yml | 3 + .../kubernetes/preinstall/tasks/set_facts.yml | 9 +- roles/network_plugin/calico/tasks/main.yml | 35 +++-- .../calico/templates/calico-node.service.j2 | 4 +- .../calico/templates/calicoctl-container.j2 | 3 +- .../calico/templates/network-environment.j2 | 4 +- .../flannel/templates/flannel-pod.yml | 2 +- 23 files changed, 55 insertions(+), 380 deletions(-) delete mode 100644 roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 delete mode 100644 roles/etcd/templates/deb-etcd-proxy-host.initd.j2 delete mode 100644 roles/etcd/templates/etcd-proxy-docker.service.j2 delete mode 100644 roles/etcd/templates/etcd-proxy-host.service.j2 delete mode 100644 roles/etcd/templates/etcd-proxy.j2 diff --git a/docs/ha-mode.md b/docs/ha-mode.md index 792c18a19..8ec5c93a1 100644 --- a/docs/ha-mode.md +++ b/docs/ha-mode.md @@ -5,10 +5,6 @@ The following components require a highly available endpoints: * etcd cluster, * kube-apiserver service instances. -The former provides the -[etcd-proxy](https://coreos.com/etcd/docs/latest/proxy.html) service to access -the cluster members in HA fashion. - The latter relies on a 3rd side reverse proxies, like Nginx or HAProxy, to achieve the same goal. @@ -57,7 +53,7 @@ type. The following diagram shows how traffic to the apiserver is directed. A user may opt to use an external loadbalancer (LB) instead. An external LB provides access for external clients, while the internal LB accepts client -connections only to the localhost, similarly to the etcd-proxy HA endpoints. +connections only to the localhost. Given a frontend `VIP` address and `IP1, IP2` addresses of backends, here is an example configuration for a HAProxy service acting as an external LB: ``` diff --git a/inventory/group_vars/all.yml b/inventory/group_vars/all.yml index cbf2e63a2..f72276ae6 100644 --- a/inventory/group_vars/all.yml +++ b/inventory/group_vars/all.yml @@ -62,7 +62,7 @@ ndots: 5 # Enable multiaccess to configure clients to access all of the etcd members directly # as the "http://hostX:port, http://hostY:port, ..." and ignore the proxy loadbalancers. # This may be the case if clients support and loadbalance multiple etcd servers natively. -etcd_multiaccess: false +etcd_multiaccess: true # Assume there are no internal loadbalancers for apiservers exist and listen on # kube_apiserver_port (default 443) diff --git a/roles/etcd/handlers/main.yml b/roles/etcd/handlers/main.yml index 693754a06..8104ff1a8 100644 --- a/roles/etcd/handlers/main.yml +++ b/roles/etcd/handlers/main.yml @@ -6,21 +6,14 @@ - reload etcd - wait for etcd up -- name: restart etcd-proxy - command: /bin/true - notify: - - etcd | reload systemd - - reload etcd-proxy - - wait for etcd up - - name: etcd | reload systemd command: systemctl daemon-reload when: ansible_service_mgr == "systemd" - name: wait for etcd up - uri: url="http://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" + uri: url="http://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" register: result - until: result.status == 200 + until: result.status is defined and result.status == 200 retries: 10 delay: 5 @@ -29,9 +22,3 @@ name: etcd state: restarted when: is_etcd_master - -- name: reload etcd-proxy - service: - name: etcd-proxy - state: restarted - when: is_etcd_proxy diff --git a/roles/etcd/tasks/configure.yml b/roles/etcd/tasks/configure.yml index 514a79d73..a2ef38f2c 100644 --- a/roles/etcd/tasks/configure.yml +++ b/roles/etcd/tasks/configure.yml @@ -26,19 +26,3 @@ mode: 0755 when: ansible_service_mgr in ["sysvinit","upstart"] and ansible_os_family == "Debian" and is_etcd_master notify: restart etcd - -- name: Configure | Copy etcd-proxy.service systemd file - template: - src: "etcd-proxy-{{ etcd_deployment_type }}.service.j2" - dest: /etc/systemd/system/etcd-proxy.service - backup: yes - when: ansible_service_mgr == "systemd" and is_etcd_proxy - notify: restart etcd-proxy -- name: Configure | Write etcd-proxy initd script - template: - src: "deb-etcd-proxy-{{ etcd_deployment_type }}.initd.j2" - dest: /etc/init.d/etcd-proxy - owner: root - mode: 0755 - when: ansible_service_mgr in ["sysvinit","upstart"] and ansible_os_family == "Debian" and is_etcd_proxy - notify: restart etcd-proxy diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index 3ecaa00e6..88dfe59d8 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -1,8 +1,13 @@ --- +- include: pre_upgrade.yml - include: install.yml + when: is_etcd_master - include: set_cluster_health.yml + when: is_etcd_master - include: configure.yml + when: is_etcd_master - include: refresh_config.yml + when: is_etcd_master - name: Ensure etcd is running service: @@ -11,23 +16,11 @@ enabled: yes when: is_etcd_master -- name: Ensure etcd-proxy is running - service: - name: etcd-proxy - state: started - enabled: yes - when: is_etcd_proxy - - name: Restart etcd if binary changed command: /bin/true notify: restart etcd when: etcd_deployment_type == "host" and etcd_copy.stdout_lines and is_etcd_master -- name: Restart etcd-proxy if binary changed - command: /bin/true - notify: restart etcd-proxy - when: etcd_deployment_type == "host" and etcd_copy.stdout_lines and is_etcd_proxy - # Reload systemd before starting service - meta: flush_handlers @@ -35,4 +28,6 @@ # initial state of the cluster is in `existing` # state insted of `new`. - include: set_cluster_health.yml + when: is_etcd_master - include: refresh_config.yml + when: is_etcd_master diff --git a/roles/etcd/tasks/refresh_config.yml b/roles/etcd/tasks/refresh_config.yml index 178466153..80a03a7d6 100644 --- a/roles/etcd/tasks/refresh_config.yml +++ b/roles/etcd/tasks/refresh_config.yml @@ -5,10 +5,3 @@ dest: /etc/etcd.env notify: restart etcd when: is_etcd_master - -- name: Refresh config | Create etcd-proxy config file - template: - src: etcd-proxy.j2 - dest: /etc/etcd-proxy.env - notify: restart etcd-proxy - when: is_etcd_proxy diff --git a/roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 b/roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 deleted file mode 100644 index ad0338a09..000000000 --- a/roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 +++ /dev/null @@ -1,120 +0,0 @@ -#!/bin/sh -set -a - -### BEGIN INIT INFO -# Provides: etcd-proxy -# Required-Start: $local_fs $network $syslog -# Required-Stop: -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: etcd-proxy -# Description: -# etcd-proxy is a proxy for etcd: distributed, consistent key-value store for shared configuration and service discovery -### END INIT INFO - -PATH=/sbin:/usr/sbin:/bin/:/usr/bin -DESC="etcd-proxy" -NAME=etcd-proxy -DAEMON={{ docker_bin_dir | default("/usr/bin") }}/docker -DAEMON_EXEC=`basename $DAEMON` -DAEMON_ARGS="run --restart=always --env-file=/etc/etcd-proxy.env \ ---net=host \ ---stop-signal=SIGKILL \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ ---name={{ etcd_proxy_member_name | default("etcd-proxy") }} \ -{{ etcd_image_repo }}:{{ etcd_image_tag }} \ -{% if etcd_after_v3 %} -{{ etcd_container_bin_dir }}etcd -{% endif %}" - - -SCRIPTNAME=/etc/init.d/$NAME -DAEMON_USER=root -STOP_SCHEDULE="${STOP_SCHEDULE:-QUIT/5/TERM/5/KILL/5}" -PID=/var/run/etcd-proxy.pid - -# Exit if the binary is not present -[ -x "$DAEMON" ] || exit 0 - -# Define LSB log_* functions. -# Depend on lsb-base (>= 3.2-14) to ensure that this file is present -# and status_of_proc is working. -. /lib/lsb/init-functions - -do_status() -{ - status_of_proc -p $PID "$DAEMON" "$NAME" && exit 0 || exit $? -} - -# Function that starts the daemon/service -# -do_start() -{ - {{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_proxy_member_name | default("etcd-proxy") }} &>/dev/null || true - sleep 1 - start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \ - $DAEMON_ARGS \ - || return 2 -} - -# -# Function that stops the daemon/service -# -do_stop() -{ - start-stop-daemon --stop --quiet --retry=$STOP_SCHEDULE --pidfile $PID --name $DAEMON_EXEC - RETVAL="$?" - - sleep 1 - return "$RETVAL" -} - - -case "$1" in - start) - log_daemon_msg "Starting $DESC" "$NAME" - do_start - case "$?" in - 0|1) log_end_msg 0 || exit 0 ;; - 2) log_end_msg 1 || exit 1 ;; - esac - ;; - stop) - log_daemon_msg "Stopping $DESC" "$NAME" - if do_stop; then - log_end_msg 0 - else - log_failure_msg "Can't stop etcd-proxy" - log_end_msg 1 - fi - ;; - status) - if do_status; then - log_end_msg 0 - else - log_failure_msg "etcd-proxy is not running" - log_end_msg 1 - fi - ;; - - restart|force-reload) - log_daemon_msg "Restarting $DESC" "$NAME" - if do_stop; then - if do_start; then - log_end_msg 0 - exit 0 - else - rc="$?" - fi - else - rc="$?" - fi - log_failure_msg "Can't restart etcd-proxy" - log_end_msg ${rc} - ;; - *) - echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2 - exit 3 - ;; -esac - diff --git a/roles/etcd/templates/deb-etcd-proxy-host.initd.j2 b/roles/etcd/templates/deb-etcd-proxy-host.initd.j2 deleted file mode 100644 index d0858bb2f..000000000 --- a/roles/etcd/templates/deb-etcd-proxy-host.initd.j2 +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/sh -set -a - -### BEGIN INIT INFO -# Provides: etcd-proxy -# Required-Start: $local_fs $network $syslog -# Required-Stop: -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: etcd-proxy -# Description: -# etcd-proxy is a proxy for etcd: distributed, consistent key-value store for shared configuration and service discovery -### END INIT INFO - -PATH=/sbin:/usr/sbin:/bin:/usr/bin -DESC="etcd-proxy" -NAME=etcd-proxy -DAEMON={{ bin_dir }}/etcd -DAEMON_ARGS="" -SCRIPTNAME=/etc/init.d/$NAME -DAEMON_USER=etcd -STOP_SCHEDULE="${STOP_SCHEDULE:-QUIT/5/TERM/5/KILL/5}" -PID=/var/run/etcd-proxy.pid - -# Exit if the binary is not present -[ -x "$DAEMON" ] || exit 0 - -# Read configuration variable file if it is present -[ -f /etc/etcd-proxy.env ] && . /etc/etcd-proxy.env - -# Define LSB log_* functions. -# Depend on lsb-base (>= 3.2-14) to ensure that this file is present -# and status_of_proc is working. -. /lib/lsb/init-functions - -do_status() -{ - status_of_proc -p $PID "$DAEMON" "$NAME" && exit 0 || exit $? -} - -# Function that starts the daemon/service -# -do_start() -{ - start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \ - $DAEMON_ARGS \ - || return 2 -} - -# -# Function that stops the daemon/service -# -do_stop() -{ - start-stop-daemon --stop --quiet --retry=$STOP_SCHEDULE --pidfile $PID --name $NAME - RETVAL="$?" - - sleep 1 - return "$RETVAL" -} - - -case "$1" in - start) - log_daemon_msg "Starting $DESC" "$NAME" - do_start - case "$?" in - 0|1) log_end_msg 0 || exit 0 ;; - 2) log_end_msg 1 || exit 1 ;; - esac - ;; - stop) - log_daemon_msg "Stopping $DESC" "$NAME" - if do_stop; then - log_end_msg 0 - else - log_failure_msg "Can't stop etcd-proxy" - log_end_msg 1 - fi - ;; - status) - if do_status; then - log_end_msg 0 - else - log_failure_msg "etcd-proxy is not running" - log_end_msg 1 - fi - ;; - - restart|force-reload) - log_daemon_msg "Restarting $DESC" "$NAME" - if do_stop; then - if do_start; then - log_end_msg 0 - exit 0 - else - rc="$?" - fi - else - rc="$?" - fi - log_failure_msg "Can't restart etcd-proxy" - log_end_msg ${rc} - ;; - *) - echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2 - exit 3 - ;; -esac - diff --git a/roles/etcd/templates/etcd-proxy-docker.service.j2 b/roles/etcd/templates/etcd-proxy-docker.service.j2 deleted file mode 100644 index bf70f0e7f..000000000 --- a/roles/etcd/templates/etcd-proxy-docker.service.j2 +++ /dev/null @@ -1,28 +0,0 @@ -[Unit] -Description=etcd-proxy docker wrapper -Wants=docker.socket -After=docker.service - -[Service] -User=root -PermissionsStartOnly=true -ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=always \ ---env-file=/etc/etcd-proxy.env \ -{# TODO(mattymo): Allow docker IP binding and disable in envfile - -p 2380:2380 -p 2379:2379 #} ---net=host \ ---stop-signal=SIGKILL \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ ---name={{ etcd_proxy_member_name | default("etcd-proxy") }} \ -{{ etcd_image_repo }}:{{ etcd_image_tag }} \ -{% if etcd_after_v3 %} -{{ etcd_container_bin_dir }}etcd -{% endif %} -ExecStartPre=-{{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_proxy_member_name | default("etcd-proxy") }} -ExecReload={{ docker_bin_dir | default("/usr/bin") }}/docker restart {{ etcd_proxy_member_name | default("etcd-proxy") }} -ExecStop={{ docker_bin_dir | default("/usr/bin") }}/docker stop {{ etcd_proxy_member_name | default("etcd-proxy") }} -Restart=always -RestartSec=15s - -[Install] -WantedBy=multi-user.target diff --git a/roles/etcd/templates/etcd-proxy-host.service.j2 b/roles/etcd/templates/etcd-proxy-host.service.j2 deleted file mode 100644 index 4ea5f7bc9..000000000 --- a/roles/etcd/templates/etcd-proxy-host.service.j2 +++ /dev/null @@ -1,19 +0,0 @@ -[Unit] -Description=etcd-proxy -After=network.target - -[Service] -Type=notify -User=etcd -PermissionsStartOnly=true -EnvironmentFile=/etc/etcd-proxy.env -ExecStart={{ bin_dir }}/etcd -ExecStartPre=/bin/mkdir -p /var/lib/etcd-proxy -ExecStartPre=/bin/chown -R etcd: /var/lib/etcd-proxy -NotifyAccess=all -Restart=always -RestartSec=10s -LimitNOFILE=40000 - -[Install] -WantedBy=multi-user.target diff --git a/roles/etcd/templates/etcd-proxy.j2 b/roles/etcd/templates/etcd-proxy.j2 deleted file mode 100644 index 0a1492a37..000000000 --- a/roles/etcd/templates/etcd-proxy.j2 +++ /dev/null @@ -1,5 +0,0 @@ -ETCD_DATA_DIR=/var/lib/etcd-proxy -ETCD_PROXY=on -ETCD_LISTEN_CLIENT_URLS={{ etcd_access_endpoint }} -ETCD_NAME={{ etcd_proxy_member_name | default("etcd-proxy") }} -ETCD_INITIAL_CLUSTER={{ etcd_peer_addresses }} diff --git a/roles/etcd/templates/etcd.j2 b/roles/etcd/templates/etcd.j2 index b82116612..c6dc4c28b 100644 --- a/roles/etcd/templates/etcd.j2 +++ b/roles/etcd/templates/etcd.j2 @@ -3,11 +3,7 @@ ETCD_ADVERTISE_CLIENT_URLS={{ etcd_client_url }} ETCD_INITIAL_ADVERTISE_PEER_URLS={{ etcd_peer_url }} ETCD_INITIAL_CLUSTER_STATE={% if etcd_cluster_is_healthy.rc != 0 | bool %}new{% else %}existing{% endif %} -{% if not is_etcd_proxy %} ETCD_LISTEN_CLIENT_URLS=http://{{ etcd_address }}:2379,http://127.0.0.1:2379 -{% else %} -ETCD_LISTEN_CLIENT_URLS=http://{{ etcd_address }}:2379 -{% endif %} ETCD_ELECTION_TIMEOUT=10000 ETCD_INITIAL_CLUSTER_TOKEN=k8s_etcd ETCD_LISTEN_PEER_URLS=http://{{ etcd_address }}:2380 diff --git a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 index 7c0a21cfa..ff69b5ec6 100644 --- a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 @@ -26,7 +26,7 @@ spec: image: calico/kube-policy-controller:latest env: - name: ETCD_ENDPOINTS - value: "{{ etcd_endpoint }}" + value: "{{ etcd_access_endpoint }}" # Location of the Kubernetes API - this shouldn't need to be # changed so long as it is used in conjunction with # CONFIGURE_ETC_HOSTS="true". @@ -38,3 +38,12 @@ spec: # This removes the need for KubeDNS to resolve the Service. - name: CONFIGURE_ETC_HOSTS value: "true" + volumeMounts: + - mountPath: {{ etcd_cert_dir }} + name: etcd-certs + readOnly: true + volumes: + - hostPath: + path: {{ etcd_cert_dir }} + name: etcd-certs + diff --git a/roles/kubernetes/master/tasks/pre-upgrade.yml b/roles/kubernetes/master/tasks/pre-upgrade.yml index 3b9f26de1..239c46be9 100644 --- a/roles/kubernetes/master/tasks/pre-upgrade.yml +++ b/roles/kubernetes/master/tasks/pre-upgrade.yml @@ -14,12 +14,3 @@ name: kube-apiserver state: stopped when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) - -- name: "Pre-upgrade | remove kube-apiserver service definition" - file: - path: "{{ item }}" - state: absent - when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) - with_items: - - /etc/systemd/system/kube-apiserver.service - - /etc/init.d/kube-apiserver diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 index 4100e8a34..bcf9f22d4 100644 --- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 @@ -62,3 +62,4 @@ spec: - hostPath: path: /var/log/ name: logfile + diff --git a/roles/kubernetes/node/templates/cni-calico.conf.j2 b/roles/kubernetes/node/templates/cni-calico.conf.j2 index 4615cdabd..a6558deaa 100644 --- a/roles/kubernetes/node/templates/cni-calico.conf.j2 +++ b/roles/kubernetes/node/templates/cni-calico.conf.j2 @@ -1,6 +1,7 @@ { "name": "calico-k8s-network", "type": "calico", + "etcd_endpoints": "{{ etcd_access_endpoint }}", "log_level": "info", "ipam": { "type": "calico-ipam" diff --git a/roles/kubernetes/preinstall/defaults/main.yml b/roles/kubernetes/preinstall/defaults/main.yml index 3eae9757d..c02a32e29 100644 --- a/roles/kubernetes/preinstall/defaults/main.yml +++ b/roles/kubernetes/preinstall/defaults/main.yml @@ -45,3 +45,6 @@ openstack_username: "{{ lookup('env','OS_USERNAME') }}" openstack_password: "{{ lookup('env','OS_PASSWORD') }}" openstack_region: "{{ lookup('env','OS_REGION_NAME') }}" openstack_tenant_id: "{{ lookup('env','OS_TENANT_ID') }}" + +# All clients access each node individually, instead of using a load balancer. +etcd_multiaccess: true diff --git a/roles/kubernetes/preinstall/tasks/set_facts.yml b/roles/kubernetes/preinstall/tasks/set_facts.yml index 2dd947dda..d51bcbed4 100644 --- a/roles/kubernetes/preinstall/tasks/set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/set_facts.yml @@ -30,7 +30,7 @@ - set_fact: etcd_access_addresses: |- {% for item in groups['etcd'] -%} - http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2379{% if not loop.last %},{% endif %} + http://{{ item }}:2379{% if not loop.last %},{% endif %} {%- endfor %} - set_fact: etcd_access_endpoint="{% if etcd_multiaccess %}{{ etcd_access_addresses }}{% else %}{{ etcd_endpoint }}{% endif %}" - set_fact: @@ -43,13 +43,6 @@ {% for item in groups['etcd'] -%} {{ "etcd"+loop.index|string }}=http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} {%- endfor %} -- set_fact: - etcd_proxy_member_name: |- - {% for host in groups['k8s-cluster'] %} - {% if inventory_hostname == host %}{{"etcd-proxy"+loop.index|string }}{% endif %} - {% endfor %} -- set_fact: - is_etcd_proxy: "{{ inventory_hostname in groups['k8s-cluster'] }}" - set_fact: is_etcd_master: "{{ inventory_hostname in groups['etcd'] }}" - set_fact: diff --git a/roles/network_plugin/calico/tasks/main.yml b/roles/network_plugin/calico/tasks/main.yml index 46f729883..ce43d5224 100644 --- a/roles/network_plugin/calico/tasks/main.yml +++ b/roles/network_plugin/calico/tasks/main.yml @@ -43,17 +43,21 @@ - name: Calico | wait for etcd uri: url=http://localhost:2379/health register: result - until: result.status == 200 + until: result.status == 200 or result.status == 401 retries: 10 delay: 5 - when: inventory_hostname in groups['kube-master'] + delegate_to: "{{groups['etcd'][0]}}" + run_once: true - name: Calico | Check if calico network pool has already been configured - uri: - url: "{{ etcd_endpoint }}/v2/keys/calico/v1/ipam/v4/pool" - return_content: yes - status_code: 200,404 + command: |- + curl \ + --cacert {{ etcd_cert_dir }}/ca.pem \ + --cert {{ etcd_cert_dir}}/admin.pem \ + --key {{ etcd_cert_dir }}/admin-key.pem \ + https://localhost:2379/v2/keys/calico/v1/ipam/v4/pool register: calico_conf + delegate_to: "{{groups['etcd'][0]}}" run_once: true - name: Calico | Define ipip pool argument @@ -79,21 +83,25 @@ environment: NO_DEFAULT_POOLS: true run_once: true - when: calico_conf.status == 404 or "nodes" not in calico_conf.content + when: '"Key not found" in calico_conf.stdout or "nodes" not in calico_conf.stdout' - name: Calico | Get calico configuration from etcd - uri: - url: "{{ etcd_endpoint }}/v2/keys/calico/v1/ipam/v4/pool" - return_content: yes - register: calico_pools + command: |- + curl http://localhost:2379/v2/keys/calico/v1/ipam/v4/pool + register: calico_pools_raw + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + +- set_fact: + calico_pools: "{{ calico_pools_raw.stdout | from_json }}" run_once: true - name: Calico | Check if calico pool is properly configured fail: msg: 'Only one network pool must be configured and it must be the subnet {{ kube_pods_subnet }}. Please erase calico configuration and run the playbook again ("etcdctl rm --recursive /calico/v1/ipam/v4/pool")' - when: ( calico_pools.json['node']['nodes'] | length > 1 ) or - ( not calico_pools.json['node']['nodes'][0]['key'] | search(".*{{ kube_pods_subnet | ipaddr('network') }}.*") ) + when: ( calico_pools['node']['nodes'] | length > 1 ) or + ( not calico_pools['node']['nodes'][0]['key'] | search(".*{{ kube_pods_subnet | ipaddr('network') }}.*") ) run_once: true - name: Calico | Write /etc/network-environment @@ -131,4 +139,3 @@ shell: "{{ bin_dir }}/calicoctl node bgp peer add {{ item.router_id }} as {{ item.as }}" with_items: peers when: peer_with_router|default(false) and inventory_hostname in groups['kube-node'] - diff --git a/roles/network_plugin/calico/templates/calico-node.service.j2 b/roles/network_plugin/calico/templates/calico-node.service.j2 index a7f7e4bab..2a7775fd4 100644 --- a/roles/network_plugin/calico/templates/calico-node.service.j2 +++ b/roles/network_plugin/calico/templates/calico-node.service.j2 @@ -1,8 +1,8 @@ [Unit] Description=Calico per-node agent Documentation=https://github.com/projectcalico/calico-docker -After=docker.service docker.socket etcd-proxy.service -Wants=docker.socket etcd-proxy.service +After=docker.service docker.socket +Wants=docker.socket [Service] User=root diff --git a/roles/network_plugin/calico/templates/calicoctl-container.j2 b/roles/network_plugin/calico/templates/calicoctl-container.j2 index 466f1df93..c8ac759de 100644 --- a/roles/network_plugin/calico/templates/calicoctl-container.j2 +++ b/roles/network_plugin/calico/templates/calicoctl-container.j2 @@ -1,6 +1,7 @@ #!/bin/bash /usr/bin/docker run --privileged --rm \ ---net=host --pid=host -e ETCD_AUTHORITY={{ etcd_authority }} \ +--net=host --pid=host \ +-e ETCD_ENDPOINTS={{ etcd_access_endpoint }} \ -v /usr/bin/docker:/usr/bin/docker \ -v /var/run/docker.sock:/var/run/docker.sock \ -v /var/run/calico:/var/run/calico \ diff --git a/roles/network_plugin/calico/templates/network-environment.j2 b/roles/network_plugin/calico/templates/network-environment.j2 index 086803d1b..0da2db904 100644 --- a/roles/network_plugin/calico/templates/network-environment.j2 +++ b/roles/network_plugin/calico/templates/network-environment.j2 @@ -3,7 +3,7 @@ DEFAULT_IPV4={{ip | default(ansible_default_ipv4.address) }} # The Kubernetes master IP -KUBERNETES_MASTER={{ first_kube_master }} +KUBERNETES_MASTER={{ kube_apiserver_endpoint }} # IP and port of etcd instance used by Calico -ETCD_AUTHORITY={{ etcd_authority }} +ETCD_ENDPOINTS={{ etcd_access_endpoint }} diff --git a/roles/network_plugin/flannel/templates/flannel-pod.yml b/roles/network_plugin/flannel/templates/flannel-pod.yml index 15523bdde..70353f11a 100644 --- a/roles/network_plugin/flannel/templates/flannel-pod.yml +++ b/roles/network_plugin/flannel/templates/flannel-pod.yml @@ -21,7 +21,7 @@ args: - "--network-config=/etc/flannel-network.json" - "--etcd-prefix=/{{ cluster_name }}/network" - - "--etcd-server={{ etcd_endpoint }}" + - "--etcd-endpoints={{ etcd_access_endpoint }}" volumeMounts: - name: "networkconfig" mountPath: "/etc/flannel-network.json" From a32cd85eb7f0fa4c70ead3670fbc73d9e541e26a Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Wed, 9 Nov 2016 13:44:41 +0300 Subject: [PATCH 11/16] Add etcd TLS support --- roles/download/defaults/main.yml | 5 +- roles/etcd/defaults/main.yml | 6 + roles/etcd/files/make-ssl-etcd.sh | 80 +++++++++++++ roles/etcd/handlers/main.yml | 7 +- roles/etcd/tasks/check_certs.yml | 36 ++++++ roles/etcd/tasks/gen_certs.yml | 111 ++++++++++++++++++ roles/etcd/tasks/main.yml | 2 + roles/etcd/tasks/pre_upgrade.yml | 34 ++++++ roles/etcd/templates/deb-etcd-docker.initd.j2 | 3 +- roles/etcd/templates/etcd-docker.service.j2 | 3 +- roles/etcd/templates/etcd.j2 | 13 +- roles/etcd/templates/openssl.conf.j2 | 39 ++++++ .../templates/calico-policy-controller.yml.j2 | 6 + roles/kubernetes/master/defaults/main.yml | 6 + .../manifests/kube-apiserver.manifest.j2 | 10 +- .../node/templates/cni-calico.conf.j2 | 3 + .../kubernetes/preinstall/tasks/set_facts.yml | 10 +- roles/network_plugin/calico/defaults/main.yml | 3 + roles/network_plugin/calico/tasks/main.yml | 26 +++- .../calico/templates/calicoctl-container.j2 | 4 + .../calico/templates/network-environment.j2 | 3 + roles/network_plugin/flannel/tasks/main.yml | 12 +- .../flannel/templates/flannel-pod.yml | 18 +-- .../flannel/templates/network.json | 1 - roles/uploads/defaults/main.yml | 2 +- 25 files changed, 408 insertions(+), 35 deletions(-) create mode 100755 roles/etcd/files/make-ssl-etcd.sh create mode 100644 roles/etcd/tasks/check_certs.yml create mode 100644 roles/etcd/tasks/gen_certs.yml create mode 100644 roles/etcd/tasks/pre_upgrade.yml create mode 100644 roles/etcd/templates/openssl.conf.j2 delete mode 100644 roles/network_plugin/flannel/templates/network.json diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index cbe053fa0..8f33ebd15 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -10,7 +10,7 @@ kube_version: v1.4.3 etcd_version: v3.0.6 #TODO(mattymo): Move calico versions to roles/network_plugins/calico/defaults # after migration to container download -calico_version: v0.22.0 +calico_version: v1.0.0-beta calico_cni_version: v1.4.2 weave_version: v1.6.1 flannel_version: v0.6.2 @@ -39,7 +39,8 @@ flannel_server_helper_image_tag: "{{ flannel_server_helper_version }}" flannel_image_repo: "quay.io/coreos/flannel" flannel_image_tag: "{{ flannel_version }}" calicoctl_image_repo: "calico/ctl" -calicoctl_image_tag: "{{ calico_version }}" +# TODO(mattymo): v1.0.0-beta has different syntax. Needs work to upgrade +calicoctl_image_tag: "v0.22.0" calico_node_image_repo: "calico/node" calico_node_image_tag: "{{ calico_version }}" hyperkube_image_repo: "quay.io/coreos/hyperkube" diff --git a/roles/etcd/defaults/main.yml b/roles/etcd/defaults/main.yml index 02234a2fe..2df4ba165 100644 --- a/roles/etcd/defaults/main.yml +++ b/roles/etcd/defaults/main.yml @@ -1,2 +1,8 @@ --- etcd_bin_dir: "{{ local_release_dir }}/etcd/etcd-{{ etcd_version }}-linux-amd64/" + +etcd_config_dir: /etc/ssl/etcd +etcd_cert_dir: "{{ etcd_config_dir }}/ssl" +etcd_cert_group: root + +etcd_script_dir: "{{ bin_dir }}/etcd-scripts" diff --git a/roles/etcd/files/make-ssl-etcd.sh b/roles/etcd/files/make-ssl-etcd.sh new file mode 100755 index 000000000..4c7db9430 --- /dev/null +++ b/roles/etcd/files/make-ssl-etcd.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +# Author: Smana smainklh@gmail.com +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit +set -o pipefail + +usage() +{ + cat << EOF +Create self signed certificates + +Usage : $(basename $0) -f [-d ] + -h | --help : Show this message + -f | --config : Openssl configuration file + -d | --ssldir : Directory where the certificates will be installed + + ex : + $(basename $0) -f openssl.conf -d /srv/ssl +EOF +} + +# Options parsing +while (($#)); do + case "$1" in + -h | --help) usage; exit 0;; + -f | --config) CONFIG=${2}; shift 2;; + -d | --ssldir) SSLDIR="${2}"; shift 2;; + *) + usage + echo "ERROR : Unknown option" + exit 3 + ;; + esac +done + +if [ -z ${CONFIG} ]; then + echo "ERROR: the openssl configuration file is missing. option -f" + exit 1 +fi +if [ -z ${SSLDIR} ]; then + SSLDIR="/etc/ssl/etcd" +fi + +tmpdir=$(mktemp -d /tmp/etcd_cacert.XXXXXX) +trap 'rm -rf "${tmpdir}"' EXIT +cd "${tmpdir}" + +mkdir -p "${SSLDIR}" + +# Root CA +openssl genrsa -out ca-key.pem 2048 > /dev/null 2>&1 +openssl req -x509 -new -nodes -key ca-key.pem -days 10000 -out ca.pem -subj "/CN=etcd-ca" > /dev/null 2>&1 + +# ETCD member +openssl genrsa -out member-key.pem 2048 > /dev/null 2>&1 +openssl req -new -key member-key.pem -out member.csr -subj "/CN=etcd-member" -config ${CONFIG} > /dev/null 2>&1 +openssl x509 -req -in member.csr -CA ca.pem -CAkey ca-key.pem -CAcreateserial -out member.pem -days 365 -extensions ssl_client -extfile ${CONFIG} > /dev/null 2>&1 + +# Nodes and Admin +for i in node admin; do + openssl genrsa -out ${i}-key.pem 2048 > /dev/null 2>&1 + openssl req -new -key ${i}-key.pem -out ${i}.csr -subj "/CN=kube-${i}" > /dev/null 2>&1 + openssl x509 -req -in ${i}.csr -CA ca.pem -CAkey ca-key.pem -CAcreateserial -out ${i}.pem -days 365 -extensions ssl_client -extfile ${CONFIG} > /dev/null 2>&1 +done + +# Install certs +mv *.pem ${SSLDIR}/ diff --git a/roles/etcd/handlers/main.yml b/roles/etcd/handlers/main.yml index 8104ff1a8..badf0bd79 100644 --- a/roles/etcd/handlers/main.yml +++ b/roles/etcd/handlers/main.yml @@ -11,7 +11,7 @@ when: ansible_service_mgr == "systemd" - name: wait for etcd up - uri: url="http://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" + uri: url="https://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" validate_certs=no register: result until: result.status is defined and result.status == 200 retries: 10 @@ -22,3 +22,8 @@ name: etcd state: restarted when: is_etcd_master + +- name: set etcd_secret_changed + set_fact: + etcd_secret_changed: true + diff --git a/roles/etcd/tasks/check_certs.yml b/roles/etcd/tasks/check_certs.yml new file mode 100644 index 000000000..03a875517 --- /dev/null +++ b/roles/etcd/tasks/check_certs.yml @@ -0,0 +1,36 @@ +--- +- name: "Check_certs | check if the certs have already been generated on first master" + stat: + path: "{{ etcd_cert_dir }}/ca.pem" + delegate_to: "{{groups['etcd'][0]}}" + register: etcdcert_master + run_once: true + +- name: "Check_certs | Set default value for 'sync_certs' and 'gen_certs' to false" + set_fact: + sync_certs: false + gen_certs: false + +- name: "Check_certs | Set 'sync_certs' and 'gen_certs' to true" + set_fact: + gen_certs: true + when: not etcdcert_master.stat.exists + run_once: true + +- name: "Check certs | check if a cert already exists" + stat: + path: "{{ etcd_cert_dir }}/ca.pem" + register: etcdcert + +- name: "Check_certs | Set 'sync_certs' to true" + set_fact: + sync_certs: true + when: >- + {%- set certs = {'sync': False} -%} + {%- for server in play_hosts + if (not hostvars[server].etcdcert.stat.exists|default(False)) or + (hostvars[server].etcdcert.stat.checksum|default('') != etcdcert_master.stat.checksum|default('')) -%} + {%- set _ = certs.update({'sync': True}) -%} + {%- endfor -%} + {{ certs.sync }} + run_once: true diff --git a/roles/etcd/tasks/gen_certs.yml b/roles/etcd/tasks/gen_certs.yml new file mode 100644 index 000000000..e0aad58a2 --- /dev/null +++ b/roles/etcd/tasks/gen_certs.yml @@ -0,0 +1,111 @@ +--- + +- name: Gen_certs | create etcd script dir + file: + path: "{{ etcd_script_dir }}" + state: directory + owner: root + when: inventory_hostname == groups['etcd'][0] + +- name: Gen_certs | create etcd cert dir + file: + path={{ etcd_cert_dir }} + group={{ etcd_cert_group }} + state=directory + owner=root + recurse=yes + +- name: Gen_certs | write openssl config + template: + src: "openssl.conf.j2" + dest: "{{ etcd_config_dir }}/openssl.conf" + run_once: yes + delegate_to: "{{groups['etcd'][0]}}" + when: gen_certs|default(false) + +- name: Gen_certs | copy certs generation script + copy: + src: "make-ssl-etcd.sh" + dest: "{{ etcd_script_dir }}/make-ssl-etcd.sh" + mode: 0700 + run_once: yes + delegate_to: "{{groups['etcd'][0]}}" + when: gen_certs|default(false) + +- name: Gen_certs | run cert generation script + command: "{{ etcd_script_dir }}/make-ssl-etcd.sh -f {{ etcd_config_dir }}/openssl.conf -d {{ etcd_cert_dir }}" + run_once: yes + delegate_to: "{{groups['etcd'][0]}}" + when: gen_certs|default(false) + notify: set etcd_secret_changed + +- set_fact: + master_certs: ['ca-key.pem', 'admin.pem', 'admin-key.pem', 'member.pem', 'member-key.pem'] + node_certs: ['ca.pem', 'node.pem', 'node-key.pem'] + +- name: Gen_certs | Gather etcd master certs + shell: "tar cfz - -C {{ etcd_cert_dir }} {{ master_certs|join(' ') }} {{ node_certs|join(' ') }}| base64 --wrap=0" + register: etcd_master_cert_data + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + when: sync_certs|default(false) + notify: set etcd_secret_changed + +- name: Gen_certs | Gather etcd node certs + shell: "tar cfz - -C {{ etcd_cert_dir }} {{ node_certs|join(' ') }} | base64 --wrap=0" + register: etcd_node_cert_data + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + when: sync_certs|default(false) + notify: set etcd_secret_changed + +- name: Gen_certs | Copy certs on masters + shell: "echo '{{etcd_master_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ etcd_cert_dir }}" + changed_when: false + when: inventory_hostname in groups['etcd'] and sync_certs|default(false) and + inventory_hostname != groups['etcd'][0] + +- name: Gen_certs | Copy certs on nodes + shell: "echo '{{etcd_node_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ etcd_cert_dir }}" + changed_when: false + when: inventory_hostname in groups['k8s-cluster'] and sync_certs|default(false) and + inventory_hostname not in groups['etcd'] + +- name: Gen_certs | check certificate permissions + file: + path={{ etcd_cert_dir }} + group={{ etcd_cert_group }} + state=directory + owner=kube + recurse=yes + +- name: Gen_certs | set permissions on keys + shell: chmod 0600 {{ etcd_cert_dir}}/*key.pem + when: inventory_hostname in groups['etcd'] + changed_when: false + +- name: Gen_certs | target ca-certificates directory + set_fact: + ca_cert_dir: |- + {% if ansible_os_family == "Debian" -%} + /usr/local/share/ca-certificates + {%- elif ansible_os_family == "RedHat" -%} + /etc/pki/ca-trust/source/anchors + {%- elif ansible_os_family == "CoreOS" -%} + /etc/ssl/certs + {%- endif %} + +- name: Gen_certs | add CA to trusted CA dir + copy: + src: "{{ etcd_cert_dir }}/ca.pem" + dest: "{{ ca_cert_dir }}/etcd-ca.crt" + remote_src: true + register: etcd_ca_cert + +- name: Gen_certs | update ca-certificates (Debian/Ubuntu/CoreOS) + command: update-ca-certificates + when: etcd_ca_cert.changed and ansible_os_family in ["Debian", "CoreOS"] + +- name: Gen_certs | update ca-certificatesa (RedHat) + command: update-ca-trust extract + when: etcd_ca_cert.changed and ansible_os_family == "RedHat" diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index 88dfe59d8..15be1a769 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -1,5 +1,7 @@ --- - include: pre_upgrade.yml +- include: check_certs.yml +- include: gen_certs.yml - include: install.yml when: is_etcd_master - include: set_cluster_health.yml diff --git a/roles/etcd/tasks/pre_upgrade.yml b/roles/etcd/tasks/pre_upgrade.yml new file mode 100644 index 000000000..d1962ea92 --- /dev/null +++ b/roles/etcd/tasks/pre_upgrade.yml @@ -0,0 +1,34 @@ +- name: "Pre-upgrade | check for etcd-proxy unit file" + stat: + path: /etc/systemd/system/etcd-proxy.service + register: kube_apiserver_service_file + +- name: "Pre-upgrade | check for etcd-proxy init script" + stat: + path: /etc/init.d/etcd-proxy + register: kube_apiserver_init_script + +- name: "Pre-upgrade | stop etcd-proxy if service defined" + service: + name: etcd-proxy + state: stopped + when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) + +- name: "Pre-upgrade | remove etcd-proxy service definition" + file: + path: "{{ item }}" + state: absent + when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) + with_items: + - /etc/systemd/system/etcd-proxy.service + - /etc/init.d/etcd-proxy + +- name: "Pre-upgrade | find etcd-proxy container" + command: docker ps -aq --filter "name=etcd-proxy*" + register: etcd_proxy_container + ignore_errors: true + +- name: "Pre-upgrade | remove etcd-proxy if it exists" + command: "docker rm -f {{item}}" + with_items: "{{etcd_proxy_container.stdout_lines}}" + diff --git a/roles/etcd/templates/deb-etcd-docker.initd.j2 b/roles/etcd/templates/deb-etcd-docker.initd.j2 index a83aae184..4457b37b9 100644 --- a/roles/etcd/templates/deb-etcd-docker.initd.j2 +++ b/roles/etcd/templates/deb-etcd-docker.initd.j2 @@ -19,8 +19,9 @@ DAEMON={{ docker_bin_dir | default("/usr/bin") }}/docker DAEMON_EXEC=`basename $DAEMON` DAEMON_ARGS="run --restart=always --env-file=/etc/etcd.env \ --net=host \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ +-v /etc/ssl/certs:/etc/ssl/certs:ro \ -v /var/lib/etcd:/var/lib/etcd:rw \ +-v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \ --name={{ etcd_member_name | default("etcd") }} \ {{ etcd_image_repo }}:{{ etcd_image_tag }} \ {% if etcd_after_v3 %} diff --git a/roles/etcd/templates/etcd-docker.service.j2 b/roles/etcd/templates/etcd-docker.service.j2 index a37759fec..ff40b5b59 100644 --- a/roles/etcd/templates/etcd-docker.service.j2 +++ b/roles/etcd/templates/etcd-docker.service.j2 @@ -11,7 +11,8 @@ ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=always {# TODO(mattymo): Allow docker IP binding and disable in envfile -p 2380:2380 -p 2379:2379 #} --net=host \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ +-v /etc/ssl/certs:/etc/ssl/certs:ro \ +-v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \ -v /var/lib/etcd:/var/lib/etcd:rw \ --name={{ etcd_member_name | default("etcd") }} \ {{ etcd_image_repo }}:{{ etcd_image_tag }} \ diff --git a/roles/etcd/templates/etcd.j2 b/roles/etcd/templates/etcd.j2 index c6dc4c28b..0b7e1eb9f 100644 --- a/roles/etcd/templates/etcd.j2 +++ b/roles/etcd/templates/etcd.j2 @@ -3,10 +3,19 @@ ETCD_ADVERTISE_CLIENT_URLS={{ etcd_client_url }} ETCD_INITIAL_ADVERTISE_PEER_URLS={{ etcd_peer_url }} ETCD_INITIAL_CLUSTER_STATE={% if etcd_cluster_is_healthy.rc != 0 | bool %}new{% else %}existing{% endif %} -ETCD_LISTEN_CLIENT_URLS=http://{{ etcd_address }}:2379,http://127.0.0.1:2379 +ETCD_LISTEN_CLIENT_URLS=https://{{ etcd_address }}:2379,https://127.0.0.1:2379 ETCD_ELECTION_TIMEOUT=10000 ETCD_INITIAL_CLUSTER_TOKEN=k8s_etcd -ETCD_LISTEN_PEER_URLS=http://{{ etcd_address }}:2380 +ETCD_LISTEN_PEER_URLS=https://{{ etcd_address }}:2380 ETCD_NAME={{ etcd_member_name }} ETCD_PROXY=off ETCD_INITIAL_CLUSTER={{ etcd_peer_addresses }} + +# TLS settings +ETCD_TRUSTED_CA_FILE={{ etcd_cert_dir }}/ca.pem +ETCD_CERT_FILE={{ etcd_cert_dir }}/node.pem +ETCD_KEY_FILE={{ etcd_cert_dir }}/node-key.pem +ETCD_PEER_TRUSTED_CA_FILE={{ etcd_cert_dir }}/ca.pem +ETCD_PEER_CERT_FILE={{ etcd_cert_dir }}/member.pem +ETCD_PEER_KEY_FILE={{ etcd_cert_dir }}/member-key.pem +ETCD_PEER_CLIENT_CERT_AUTH=true diff --git a/roles/etcd/templates/openssl.conf.j2 b/roles/etcd/templates/openssl.conf.j2 new file mode 100644 index 000000000..3ea328289 --- /dev/null +++ b/roles/etcd/templates/openssl.conf.j2 @@ -0,0 +1,39 @@ +[req] +req_extensions = v3_req +distinguished_name = req_distinguished_name + +[req_distinguished_name] + +[ v3_req ] +basicConstraints = CA:FALSE +keyUsage = nonRepudiation, digitalSignature, keyEncipherment +subjectAltName = @alt_names + +[ ssl_client ] +extendedKeyUsage = clientAuth, serverAuth +basicConstraints = CA:FALSE +subjectKeyIdentifier=hash +authorityKeyIdentifier=keyid,issuer +subjectAltName = @alt_names + +[ v3_ca ] +basicConstraints = CA:TRUE +keyUsage = nonRepudiation, digitalSignature, keyEncipherment +subjectAltName = @alt_names +authorityKeyIdentifier=keyid:always,issuer + +[alt_names] +DNS.1 = localhost +{% for host in groups['etcd'] %} +DNS.{{ 1 + loop.index }} = {{ host }} +{% endfor %} +{% if loadbalancer_apiserver is defined and apiserver_loadbalancer_domain_name is defined %} +{% set idx = groups['etcd'] | length | int + 1 %} +DNS.{{ idx | string }} = {{ apiserver_loadbalancer_domain_name }} +{% endif %} +{% for host in groups['etcd'] %} +IP.{{ 2 * loop.index - 1 }} = {{ hostvars[host]['access_ip'] | default(hostvars[host]['ansible_default_ipv4']['address']) }} +IP.{{ 2 * loop.index }} = {{ hostvars[host]['ip'] | default(hostvars[host]['ansible_default_ipv4']['address']) }} +{% endfor %} +{% set idx = groups['etcd'] | length | int * 2 + 1 %} +IP.{{ idx }} = 127.0.0.1 diff --git a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 index ff69b5ec6..5e0586e16 100644 --- a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 @@ -27,6 +27,12 @@ spec: env: - name: ETCD_ENDPOINTS value: "{{ etcd_access_endpoint }}" + - name: ETCD_CA_CERT_FILE + value: "{{ etcd_cert_dir }}/ca.pem" + - name: ETCD_CERT_FILE + value: "{{ etcd_cert_dir }}/node.pem" + - name: ETCD_KEY_FILE + value: "{{ etcd_cert_dir }}/node-key.pem" # Location of the Kubernetes API - this shouldn't need to be # changed so long as it is used in conjunction with # CONFIGURE_ETC_HOSTS="true". diff --git a/roles/kubernetes/master/defaults/main.yml b/roles/kubernetes/master/defaults/main.yml index ee32ccf57..269ed3714 100644 --- a/roles/kubernetes/master/defaults/main.yml +++ b/roles/kubernetes/master/defaults/main.yml @@ -28,3 +28,9 @@ kube_apiserver_insecure_bind_address: 127.0.0.1 # Logging directory (sysvinit systems) kube_log_dir: "/var/log/kubernetes" + +# ETCD cert dir for connecting apiserver to etcd +etcd_config_dir: /etc/ssl/etcd +etcd_cert_dir: "{{ etcd_config_dir }}/ssl" + + diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 index bcf9f22d4..192e6021b 100644 --- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 @@ -14,6 +14,9 @@ spec: - --advertise-address={{ ip | default(ansible_default_ipv4.address) }} - --etcd-servers={{ etcd_access_endpoint }} - --etcd-quorum-read=true + - --etcd-cafile={{ etcd_cert_dir }}/ca.pem + - --etcd-certfile={{ etcd_cert_dir }}/node.pem + - --etcd-keyfile={{ etcd_cert_dir }}/node-key.pem - --insecure-bind-address={{ kube_apiserver_insecure_bind_address }} - --apiserver-count={{ kube_apiserver_count }} - --admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota @@ -50,6 +53,9 @@ spec: - mountPath: /etc/ssl/certs name: ssl-certs-host readOnly: true + - mountPath: {{ etcd_cert_dir }} + name: etcd-certs + readOnly: true - mountPath: /var/log/ name: logfile volumes: @@ -59,7 +65,9 @@ spec: - hostPath: path: /etc/ssl/certs/ name: ssl-certs-host + - hostPath: + path: {{ etcd_cert_dir }} + name: etcd-certs - hostPath: path: /var/log/ name: logfile - diff --git a/roles/kubernetes/node/templates/cni-calico.conf.j2 b/roles/kubernetes/node/templates/cni-calico.conf.j2 index a6558deaa..4e9752ef4 100644 --- a/roles/kubernetes/node/templates/cni-calico.conf.j2 +++ b/roles/kubernetes/node/templates/cni-calico.conf.j2 @@ -2,6 +2,9 @@ "name": "calico-k8s-network", "type": "calico", "etcd_endpoints": "{{ etcd_access_endpoint }}", + "etcd_cert_file": "{{ etcd_cert_dir }}/node.pem", + "etcd_key_file": "{{ etcd_cert_dir }}/node-key.pem", + "etcd_ca_cert_file": "{{ etcd_cert_dir }}/ca.pem", "log_level": "info", "ipam": { "type": "calico-ipam" diff --git a/roles/kubernetes/preinstall/tasks/set_facts.yml b/roles/kubernetes/preinstall/tasks/set_facts.yml index d51bcbed4..aec296c6e 100644 --- a/roles/kubernetes/preinstall/tasks/set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/set_facts.yml @@ -23,14 +23,14 @@ - set_fact: etcd_address="{{ ip | default(ansible_default_ipv4['address']) }}" - set_fact: etcd_access_address="{{ access_ip | default(etcd_address) }}" -- set_fact: etcd_peer_url="http://{{ etcd_access_address }}:2380" -- set_fact: etcd_client_url="http://{{ etcd_access_address }}:2379" +- set_fact: etcd_peer_url="https://{{ etcd_access_address }}:2380" +- set_fact: etcd_client_url="https://{{ etcd_access_address }}:2379" - set_fact: etcd_authority="127.0.0.1:2379" -- set_fact: etcd_endpoint="http://{{ etcd_authority }}" +- set_fact: etcd_endpoint="https://{{ etcd_authority }}" - set_fact: etcd_access_addresses: |- {% for item in groups['etcd'] -%} - http://{{ item }}:2379{% if not loop.last %},{% endif %} + https://{{ item }}:2379{% if not loop.last %},{% endif %} {%- endfor %} - set_fact: etcd_access_endpoint="{% if etcd_multiaccess %}{{ etcd_access_addresses }}{% else %}{{ etcd_endpoint }}{% endif %}" - set_fact: @@ -41,7 +41,7 @@ - set_fact: etcd_peer_addresses: |- {% for item in groups['etcd'] -%} - {{ "etcd"+loop.index|string }}=http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} + {{ "etcd"+loop.index|string }}=https://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} {%- endfor %} - set_fact: is_etcd_master: "{{ inventory_hostname in groups['etcd'] }}" diff --git a/roles/network_plugin/calico/defaults/main.yml b/roles/network_plugin/calico/defaults/main.yml index aec7a5e15..7b608ab7e 100644 --- a/roles/network_plugin/calico/defaults/main.yml +++ b/roles/network_plugin/calico/defaults/main.yml @@ -8,3 +8,6 @@ ipip: false # Set to true if you want your calico cni binaries to overwrite the # ones from hyperkube while leaving other cni plugins intact. overwrite_hyperkube_cni: true + +calico_cert_dir: /etc/calico/certs +etcd_cert_dir: /etc/ssl/etcd/ssl diff --git a/roles/network_plugin/calico/tasks/main.yml b/roles/network_plugin/calico/tasks/main.yml index ce43d5224..60a728ba0 100644 --- a/roles/network_plugin/calico/tasks/main.yml +++ b/roles/network_plugin/calico/tasks/main.yml @@ -12,6 +12,24 @@ - meta: flush_handlers +- name: Calico | Create calico certs directory + file: + dest: "{{ calico_cert_dir }}" + state: directory + mode: 0750 + owner: root + group: root + +- name: Calico | Link etcd certificates for calico-node + file: + src: "{{ etcd_cert_dir }}/{{ item.s }}" + dest: "{{ calico_cert_dir }}/{{ item.d }}" + state: hard + with_items: + - {s: "ca.pem", d: "ca_cert.crt"} + - {s: "node.pem", d: "cert.crt"} + - {s: "node-key.pem", d: "key.pem"} + - name: Calico | Install calicoctl container script template: src: calicoctl-container.j2 @@ -41,7 +59,7 @@ when: "{{ overwrite_hyperkube_cni|bool }}" - name: Calico | wait for etcd - uri: url=http://localhost:2379/health + uri: url=https://localhost:2379/health validate_certs=no register: result until: result.status == 200 or result.status == 401 retries: 10 @@ -87,7 +105,11 @@ - name: Calico | Get calico configuration from etcd command: |- - curl http://localhost:2379/v2/keys/calico/v1/ipam/v4/pool + curl \ + --cacert {{ etcd_cert_dir }}/ca.pem \ + --cert {{ etcd_cert_dir}}/admin.pem \ + --key {{ etcd_cert_dir }}/admin-key.pem \ + https://localhost:2379/v2/keys/calico/v1/ipam/v4/pool register: calico_pools_raw delegate_to: "{{groups['etcd'][0]}}" run_once: true diff --git a/roles/network_plugin/calico/templates/calicoctl-container.j2 b/roles/network_plugin/calico/templates/calicoctl-container.j2 index c8ac759de..9d47c73ca 100644 --- a/roles/network_plugin/calico/templates/calicoctl-container.j2 +++ b/roles/network_plugin/calico/templates/calicoctl-container.j2 @@ -2,8 +2,12 @@ /usr/bin/docker run --privileged --rm \ --net=host --pid=host \ -e ETCD_ENDPOINTS={{ etcd_access_endpoint }} \ +-e ETCD_CA_CERT_FILE=/etc/calico/certs/ca_cert.crt \ +-e ETCD_CERT_FILE=/etc/calico/certs/cert.crt \ +-e ETCD_KEY_FILE=/etc/calico/certs/key.pem \ -v /usr/bin/docker:/usr/bin/docker \ -v /var/run/docker.sock:/var/run/docker.sock \ -v /var/run/calico:/var/run/calico \ +-v /etc/calico/certs:/etc/calico/certs:ro \ {{ calicoctl_image_repo }}:{{ calicoctl_image_tag}} \ $@ diff --git a/roles/network_plugin/calico/templates/network-environment.j2 b/roles/network_plugin/calico/templates/network-environment.j2 index 0da2db904..8fd13d36c 100644 --- a/roles/network_plugin/calico/templates/network-environment.j2 +++ b/roles/network_plugin/calico/templates/network-environment.j2 @@ -7,3 +7,6 @@ KUBERNETES_MASTER={{ kube_apiserver_endpoint }} # IP and port of etcd instance used by Calico ETCD_ENDPOINTS={{ etcd_access_endpoint }} +ETCD_CA_CERT_FILE=/etc/calico/certs/ca_cert.crt +ETCD_CERT_FILE=/etc/calico/certs/cert.crt +ETCD_KEY_FILE=/etc/calico/certs/key.pem diff --git a/roles/network_plugin/flannel/tasks/main.yml b/roles/network_plugin/flannel/tasks/main.yml index a6fa183ef..8581d2ce7 100644 --- a/roles/network_plugin/flannel/tasks/main.yml +++ b/roles/network_plugin/flannel/tasks/main.yml @@ -1,9 +1,11 @@ --- -- name: Flannel | Write flannel configuration - template: - src: network.json - dest: /etc/flannel-network.json - backup: yes +- name: Flannel | Set Flannel etcd configuration + command: |- + {{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} \ + set /{{ cluster_name }}/network/config \ + '{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } }' + delegate_to: "{{groups['etcd'][0]}}" + run_once: true - name: Flannel | Create flannel pod manifest template: diff --git a/roles/network_plugin/flannel/templates/flannel-pod.yml b/roles/network_plugin/flannel/templates/flannel-pod.yml index 70353f11a..02c41e18b 100644 --- a/roles/network_plugin/flannel/templates/flannel-pod.yml +++ b/roles/network_plugin/flannel/templates/flannel-pod.yml @@ -12,26 +12,16 @@ - name: "subnetenv" hostPath: path: "/run/flannel" - - name: "networkconfig" + - name: "etcd-certs" hostPath: - path: "/etc/flannel-network.json" + path: "{{ etcd_cert_dir }}" containers: - - name: "flannel-server-helper" - image: "{{ flannel_server_helper_image_repo }}:{{ flannel_server_helper_image_tag }}" - args: - - "--network-config=/etc/flannel-network.json" - - "--etcd-prefix=/{{ cluster_name }}/network" - - "--etcd-endpoints={{ etcd_access_endpoint }}" - volumeMounts: - - name: "networkconfig" - mountPath: "/etc/flannel-network.json" - imagePullPolicy: "Always" - name: "flannel-container" image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}" command: - "/bin/sh" - "-c" - - "/opt/bin/flanneld -etcd-endpoints {{ etcd_access_endpoint }} -etcd-prefix /{{ cluster_name }}/network {% if flannel_interface is defined %}-iface {{ flannel_interface }}{% endif %} {% if flannel_public_ip is defined %}-public-ip {{ flannel_public_ip }}{% endif %}" + - "/opt/bin/flanneld -etcd-endpoints {{ etcd_access_endpoint }} -etcd-prefix /{{ cluster_name }}/network -etcd-cafile {{ etcd_cert_dir }}/ca.pem -etcd-certfile {{ etcd_cert_dir }}/node.pem -etcd-keyfile {{ etcd_cert_dir }}/node-key.pem {% if flannel_interface is defined %}-iface {{ flannel_interface }}{% endif %} {% if flannel_public_ip is defined %}-public-ip {{ flannel_public_ip }}{% endif %}" ports: - hostPort: 10253 containerPort: 10253 @@ -41,6 +31,8 @@ volumeMounts: - name: "subnetenv" mountPath: "/run/flannel" + - name: "etcd-certs" + mountPath: "{{ etcd_cert_dir }}" securityContext: privileged: true hostNetwork: true diff --git a/roles/network_plugin/flannel/templates/network.json b/roles/network_plugin/flannel/templates/network.json deleted file mode 100644 index cbbec3841..000000000 --- a/roles/network_plugin/flannel/templates/network.json +++ /dev/null @@ -1 +0,0 @@ -{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } } diff --git a/roles/uploads/defaults/main.yml b/roles/uploads/defaults/main.yml index 0774d324c..7b5797881 100644 --- a/roles/uploads/defaults/main.yml +++ b/roles/uploads/defaults/main.yml @@ -5,7 +5,7 @@ local_release_dir: /tmp kube_version: v1.4.3 etcd_version: v3.0.6 -calico_version: v0.22.0 +calico_version: v0.23.0 calico_cni_version: v1.4.2 weave_version: v1.6.1 From fe16fecd8f09dbf09a30300cdc1420755ec34cae Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Thu, 10 Nov 2016 12:49:47 +0300 Subject: [PATCH 12/16] Fix canal's calico networking config for ETCD TLS Also fixes kube-apiserver upgrade that was erroneously deleted in a previous commit. --- roles/kubernetes/master/tasks/pre-upgrade.yml | 9 +++++++ roles/network_plugin/canal/tasks/main.yml | 12 +++++---- .../canal/templates/canal-node.yml.j2 | 27 ------------------- .../canal/templates/network.json.j2 | 1 - 4 files changed, 16 insertions(+), 33 deletions(-) delete mode 100644 roles/network_plugin/canal/templates/network.json.j2 diff --git a/roles/kubernetes/master/tasks/pre-upgrade.yml b/roles/kubernetes/master/tasks/pre-upgrade.yml index 239c46be9..3b9f26de1 100644 --- a/roles/kubernetes/master/tasks/pre-upgrade.yml +++ b/roles/kubernetes/master/tasks/pre-upgrade.yml @@ -14,3 +14,12 @@ name: kube-apiserver state: stopped when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) + +- name: "Pre-upgrade | remove kube-apiserver service definition" + file: + path: "{{ item }}" + state: absent + when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) + with_items: + - /etc/systemd/system/kube-apiserver.service + - /etc/init.d/kube-apiserver diff --git a/roles/network_plugin/canal/tasks/main.yml b/roles/network_plugin/canal/tasks/main.yml index ba83edee8..e88cfad7e 100644 --- a/roles/network_plugin/canal/tasks/main.yml +++ b/roles/network_plugin/canal/tasks/main.yml @@ -1,9 +1,11 @@ --- -- name: Canal | Write flannel configuration - template: - src: network.json.j2 - dest: /etc/flannel-network.json - backup: yes +- name: Canal | Set Flannel etcd configuration + command: |- + {{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} \ + set /{{ cluster_name }}/network/config \ + '{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } }' + delegate_to: "{{groups['etcd'][0]}}" + run_once: true - name: Canal | Write canal configmap template: diff --git a/roles/network_plugin/canal/templates/canal-node.yml.j2 b/roles/network_plugin/canal/templates/canal-node.yml.j2 index bdeae6cfd..ef6793f30 100644 --- a/roles/network_plugin/canal/templates/canal-node.yml.j2 +++ b/roles/network_plugin/canal/templates/canal-node.yml.j2 @@ -19,10 +19,6 @@ spec: spec: hostNetwork: true volumes: - # Used by flannel-server-helper - - name: "networkconfig" - hostPath: - path: "/etc/flannel-network.json" # Used by calico/node. - name: lib-modules hostPath: @@ -45,29 +41,6 @@ spec: hostPath: path: /etc/resolv.conf containers: - - name: "flannel-server-helper" - image: "{{ flannel_server_helper_image_repo }}:{{ flannel_server_helper_image_tag }}" - env: - # Cluster name - - name: CLUSTER_NAME - valueFrom: - configMapKeyRef: - name: canal-config - key: cluster_name - # The location of the etcd cluster. - - name: FLANNELD_ETCD_ENDPOINTS - valueFrom: - configMapKeyRef: - name: canal-config - key: etcd_endpoints - args: - - "--network-config=/etc/flannel-network.json" - - "--etcd-prefix=/$(CLUSTER_NAME)/network" - - "--etcd-server=$(FLANNELD_ETCD_ENDPOINTS)" - volumeMounts: - - name: "networkconfig" - mountPath: "/etc/flannel-network.json" - imagePullPolicy: "Always" # Runs the flannel daemon to enable vxlan networking between # container hosts. - name: flannel diff --git a/roles/network_plugin/canal/templates/network.json.j2 b/roles/network_plugin/canal/templates/network.json.j2 deleted file mode 100644 index cbbec3841..000000000 --- a/roles/network_plugin/canal/templates/network.json.j2 +++ /dev/null @@ -1 +0,0 @@ -{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } } From 251800eb16777cf5c6b2edcf7362330352bb5a39 Mon Sep 17 00:00:00 2001 From: Aleksandr Didenko Date: Thu, 10 Nov 2016 13:13:03 +0100 Subject: [PATCH 13/16] Fix policy controller 'etcd_cert_dir' variable is missing from 'kubernetes-apps/ansible' role which breaks Calico policy controller deployment. Also fixing calico-policy-controller.yml. --- roles/kubernetes-apps/ansible/defaults/main.yml | 5 ++++- .../templates/calico-policy-controller.yml.j2 | 17 ++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/roles/kubernetes-apps/ansible/defaults/main.yml b/roles/kubernetes-apps/ansible/defaults/main.yml index b1086aa0d..d39d146fd 100644 --- a/roles/kubernetes-apps/ansible/defaults/main.yml +++ b/roles/kubernetes-apps/ansible/defaults/main.yml @@ -9,4 +9,7 @@ kubedns_image_tag: "{{ kubedns_version }}" kubednsmasq_image_repo: "gcr.io/google_containers/kube-dnsmasq-amd64" kubednsmasq_image_tag: "{{ kubednsmasq_version }}" exechealthz_image_repo: "gcr.io/google_containers/exechealthz-amd64" -exechealthz_image_tag: "{{ exechealthz_version }}" \ No newline at end of file +exechealthz_image_tag: "{{ exechealthz_version }}" + +# SSL +etcd_cert_dir: "/etc/ssl/etcd/ssl" diff --git a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 index 5e0586e16..698710b95 100644 --- a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 @@ -44,12 +44,11 @@ spec: # This removes the need for KubeDNS to resolve the Service. - name: CONFIGURE_ETC_HOSTS value: "true" - volumeMounts: - - mountPath: {{ etcd_cert_dir }} - name: etcd-certs - readOnly: true - volumes: - - hostPath: - path: {{ etcd_cert_dir }} - name: etcd-certs - + volumeMounts: + - mountPath: {{ etcd_cert_dir }} + name: etcd-certs + readOnly: true + volumes: + - hostPath: + path: {{ etcd_cert_dir }} + name: etcd-certs From cf7c60029bd2ff5bfa37aa89e19fc88ace68b0d8 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Wed, 9 Nov 2016 14:15:27 +0100 Subject: [PATCH 14/16] Label k8s apps, adjust collect/upload info steps - Drop debugs from collect-info playbook - Drop sudo from collect-info step and add target dir var (required for travis jobs) - Label all k8s apps, including static manifests - Add logs for K8s apps to be collected as well - Fix upload to GCS as a public-read tarball Signed-off-by: Bogdan Dobrelya --- .travis.yml | 11 ++-- .../manifests/kube-apiserver.manifest.j2 | 2 + .../kube-controller-manager.manifest.j2 | 2 + .../manifests/kube-scheduler.manifest.j2 | 2 + .../manifests/kube-proxy.manifest.j2 | 2 + .../manifests/nginx-proxy.manifest.j2 | 2 + scripts/collect-info.yaml | 49 ++++++++++++--- tests/cloud_playbooks/templates/boto.j2 | 11 ++++ .../gcs_life.json.j2} | 2 +- tests/cloud_playbooks/upload-logs-gcs.yml | 62 +++++++++++-------- 10 files changed, 106 insertions(+), 39 deletions(-) create mode 100644 tests/cloud_playbooks/templates/boto.j2 rename tests/cloud_playbooks/{files/gcs_life.json => templates/gcs_life.json.j2} (59%) diff --git a/.travis.yml b/.travis.yml index 3bbb46d1c..e2a9f9f07 100644 --- a/.travis.yml +++ b/.travis.yml @@ -103,11 +103,11 @@ env: before_install: # Install Ansible. - - pip install --user boto -U - pip install --user ansible - pip install --user netaddr # W/A https://github.com/ansible/ansible-modules-core/issues/5196#issuecomment-253766186 - pip install --user apache-libcloud==0.20.1 + - pip install --user boto==2.9.0 -U cache: - directories: @@ -149,16 +149,19 @@ script: - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/030_check-network.yml $LOG_LEVEL after_failure: - - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml + - > + $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER + -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root -e dir=$HOME + scripts/collect-info.yaml - > $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/upload-logs-gcs.yml -i "localhost," -c local - -e test_id=${TEST_ID} -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} + -e gce_project_id=${GCE_PROJECT_ID} -e gs_key=${GS_ACCESS_KEY_ID} -e gs_skey=${GS_SECRET_ACCESS_KEY} -e ostype=${CLOUD_IMAGE} -e commit=${TRAVIS_COMMIT} - -e pr=${TRAVIS_PULL_REQUEST} + -e dir=${HOME} after_script: - > diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 index 4100e8a34..a6718f9e5 100644 --- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-apiserver namespace: kube-system + labels: + k8s-app: kube-apiserver spec: hostNetwork: true containers: diff --git a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 index 3a9e1ef1b..a528f361e 100644 --- a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-controller-manager namespace: kube-system + labels: + k8s-app: kube-controller spec: hostNetwork: true containers: diff --git a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 index 024ddbfaa..15a705937 100644 --- a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-scheduler namespace: kube-system + labels: + k8s-app: kube-scheduler spec: hostNetwork: true containers: diff --git a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 index 7abffe053..86d1e6f9e 100644 --- a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 +++ b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-proxy namespace: kube-system + labels: + k8s-app: kube-proxy spec: hostNetwork: true containers: diff --git a/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 b/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 index 50e054268..8e5dfcc11 100644 --- a/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 +++ b/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: nginx-proxy namespace: kube-system + labels: + k8s-app: kube-nginx spec: hostNetwork: true containers: diff --git a/scripts/collect-info.yaml b/scripts/collect-info.yaml index 685b8b787..0ba47866e 100644 --- a/scripts/collect-info.yaml +++ b/scripts/collect-info.yaml @@ -1,10 +1,9 @@ --- - hosts: all - become: true + become: false gather_facts: no vars: - debug: false commands: - name: timedate_info cmd: timedatectl status @@ -26,6 +25,37 @@ cmd: journalctl -p err --utc --no-pager - name: etcd_info cmd: etcdctl --debug cluster-health + - name: weave_info + cmd: weave report | jq "." + - name: weave_logs + cmd: docker logs weave > weave.log + - name: kubedns_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kubedns -o name`; + do kubectl logs $i --namespace kube-system kubedns > kubedns.log; done" + - name: apiserver_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-apiserver -o name`; + do kubectl logs $i --namespace kube-system > kube-apiserver.log; done" + - name: controller_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-controller -o name`; + do kubectl logs $i --namespace kube-system > kube-controller.log; done" + - name: scheduler_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-scheduler -o name`; + do kubectl logs $i --namespace kube-system > kube-scheduler.log; done" + - name: proxy_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-proxy -o name`; + do kubectl logs $i --namespace kube-system > kube-proxy.log; done" + - name: nginx_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-nginx -o name`; + do kubectl logs $i --namespace kube-system > kube-nginx.log; done" + - name: flannel_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l app=flannel -o name`; + do kubectl logs $i --namespace kube-system flannel-container > flannel.log; done" + - name: canal_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=canal-node -o name`; + do kubectl logs $i --namespace kube-system flannel > flannel.log; done" + - name: calico_policy_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=calico-policy -o name`; + do kubectl logs $i --namespace kube-system calico-policy-controller > calico-policy-controller.log; done" logs: - /var/log/syslog @@ -38,6 +68,15 @@ - /var/log/calico/bird6/current - /var/log/calico/felix/current - /var/log/calico/confd/current + - weave.log + - kubedns.log + - kube-apiserver.log + - kube-controller.log + - kube-scheduler.log + - kube-proxy.log + - kube-nginx.log + - flannel.log + - calico-policy-controller.log tasks: - name: Storing commands output @@ -47,10 +86,6 @@ with_items: "{{commands}}" no_log: True - - debug: var=item - with_items: "{{output.results}}" - when: debug - - name: Fetch results fetch: src={{ item.name }} dest=/tmp/collect-info/commands with_items: "{{commands}}" @@ -60,7 +95,7 @@ with_items: "{{logs}}" - name: Pack results and logs - local_action: shell GZIP=-9 tar --remove-files -cvzf logs.tar.gz -C /tmp collect-info + local_action: shell GZIP=-9 tar --remove-files -cvzf {{dir|default(".")}}/logs.tar.gz -C /tmp collect-info run_once: true - name: Clean up collected command outputs diff --git a/tests/cloud_playbooks/templates/boto.j2 b/tests/cloud_playbooks/templates/boto.j2 new file mode 100644 index 000000000..660f1a0a3 --- /dev/null +++ b/tests/cloud_playbooks/templates/boto.j2 @@ -0,0 +1,11 @@ +[Credentials] +gs_access_key_id = {{ gs_key }} +gs_secret_access_key = {{ gs_skey }} +[Boto] +https_validate_certificates = True +[GoogleCompute] +[GSUtil] +default_project_id = {{ gce_project_id }} +content_language = en +default_api_version = 2 +[OAuth2] diff --git a/tests/cloud_playbooks/files/gcs_life.json b/tests/cloud_playbooks/templates/gcs_life.json.j2 similarity index 59% rename from tests/cloud_playbooks/files/gcs_life.json rename to tests/cloud_playbooks/templates/gcs_life.json.j2 index eaab30b4f..a666c8fef 100644 --- a/tests/cloud_playbooks/files/gcs_life.json +++ b/tests/cloud_playbooks/templates/gcs_life.json.j2 @@ -3,7 +3,7 @@ [ { "action": {"type": "Delete"}, - "condition": {"age": 2} + "condition": {"age": {{expire_days}}} } ] } diff --git a/tests/cloud_playbooks/upload-logs-gcs.yml b/tests/cloud_playbooks/upload-logs-gcs.yml index 12013798d..80d651ba4 100644 --- a/tests/cloud_playbooks/upload-logs-gcs.yml +++ b/tests/cloud_playbooks/upload-logs-gcs.yml @@ -3,65 +3,73 @@ become: false gather_facts: no + vars: + expire_days: 2 + tasks: - name: Generate uniq bucket name prefix - shell: date +%s | sha256sum | base64 | head -c 32 + shell: date +%Y%m%d register: out - name: replace_test_id set_fact: - test_name: "kargo-{{ commit }}-{{ pr }}-{{ out.stdout|lower }}-{{ test_id | regex_replace('\\.', '-') }}" + test_name: "kargo-ci-{{ out.stdout }}" + + - set_fact: + file_name: "{{ostype}}-{{kube_network_plugin}}-{{commit}}-logs.tar.gz" - name: Create a bucket gc_storage: bucket: "{{ test_name }}" mode: create - permission: private + permission: public-read gs_access_key: "{{ gs_key }}" gs_secret_key: "{{ gs_skey }}" no_log: True + - name: Create a lifecycle template for the bucket + template: + src: gcs_life.json.j2 + dest: "{{dir}}/gcs_life.json" + + - name: Create a boto config to access GCS + template: + src: boto.j2 + dest: "{{dir}}/.boto" + no_log: True + - name: Download gsutil cp installer get_url: url: https://dl.google.com/dl/cloudsdk/channels/rapid/install_google_cloud_sdk.bash - dest: /tmp/gcp-installer.sh + dest: "{{dir}}/gcp-installer.sh" - name: Get gsutil tool - script: /tmp/gcp-installer.sh + script: "{{dir}}/gcp-installer.sh" environment: CLOUDSDK_CORE_DISABLE_PROMPTS: 1 + CLOUDSDK_INSTALL_DIR: "{{dir}}" no_log: True - - - name: Create a lifecycle template for the bucket - file: src=gcs_life.json path=/tmp/gcs_life.json - - - name: Hack the boto config for GCS access keys - lineinfile: - dest: .boto - line: "gs_access_key_id = {{ gs_key }}" - regexp: "^#gs_access_key_id = .*$" - no_log: True - - - name: Hack the boto config for GCS secret access keys - lineinfile: - dest: .boto - line: "gs_secret_access_key = {{ gs_skey }}" - regexp: "^#gs_secret_access_key = .*$" - no_log: True + ignore_errors: true - name: Apply the lifecycle rules - shell: bash google-cloud-sdk/bin/gsutil lifecycle set /tmp/gcs_life.json gs://{{ test_name }} + command: "{{dir}}/google-cloud-sdk/bin/gsutil lifecycle set {{dir}}/gcs_life.json gs://{{test_name}}" environment: - BOTO_CONFIG: .boto + BOTO_CONFIG: "{{dir}}/.boto" + no_log: True - name: Upload collected diagnostic info gc_storage: bucket: "{{ test_name }}" mode: put - permission: private - object: "build-{{ ostype }}-{{ kube_network_plugin }}-logs.tar.gz" - src: logs.tar.gz + permission: public-read + object: "{{ file_name }}" + src: "{{dir}}/logs.tar.gz" headers: '{"Content-Encoding": "x-gzip"}' gs_access_key: "{{ gs_key }}" gs_secret_key: "{{ gs_skey }}" + expiration: "{{expire_days * 36000|int}}" ignore_errors: true + no_log: True + + - debug: + msg: "A public url https://storage.googleapis.com/{{test_name}}/{{file_name}}" From c58bd33af7d7d236cb8056099a7a33a1c6bc7c8f Mon Sep 17 00:00:00 2001 From: Artem Panchenko Date: Mon, 7 Nov 2016 22:37:12 +0200 Subject: [PATCH 15/16] Support new version of 'calicoctl' (>=v1.0.0) Since version 'v1.0.0-beta' calicoctl is written in Go and its API differs from old Python based utility. Added support of both old and new version of the utility. --- docs/calico.md | 24 +++++++ roles/download/defaults/main.yml | 3 +- roles/network_plugin/calico/tasks/main.yml | 66 +++++++++++++++---- .../calico/templates/calico-node.service.j2 | 12 +++- .../calico/templates/calicoctl-container.j2 | 2 +- .../calico/templates/deb-calico.initd.j2 | 11 +++- .../calico/templates/rh-calico.initd.j2 | 13 +++- 7 files changed, 113 insertions(+), 18 deletions(-) diff --git a/docs/calico.md b/docs/calico.md index 50744f63f..a8bffc0db 100644 --- a/docs/calico.md +++ b/docs/calico.md @@ -10,18 +10,42 @@ docker ps | grep calico The **calicoctl** command allows to check the status of the network workloads. * Check the status of Calico nodes +``` +calicoctl node status +``` + +or for versions prior *v1.0.0*: + ``` calicoctl status ``` * Show the configured network subnet for containers +``` + calicoctl get ippool -o wide +``` + +or for versions prior *v1.0.0*: + ``` calicoctl pool show ``` * Show the workloads (ip addresses of containers and their located) +``` +calicoctl get workloadEndpoint -o wide +``` + +and + +``` +calicoctl get hostEndpoint -o wide +``` + +or for versions prior *v1.0.0*: + ``` calicoctl endpoint show --detail ``` diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index bf8f8e7c2..1ea220fd1 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -39,7 +39,8 @@ flannel_server_helper_image_tag: "{{ flannel_server_helper_version }}" flannel_image_repo: "quay.io/coreos/flannel" flannel_image_tag: "{{ flannel_version }}" calicoctl_image_repo: "calico/ctl" -# TODO(mattymo): v1.0.0-beta has different syntax. Needs work to upgrade +# TODO(apanchenko): v1.0.0-beta can't execute `node run` from Docker container +# for details see https://github.com/projectcalico/calico-containers/issues/1291 calicoctl_image_tag: "v0.22.0" calico_node_image_repo: "calico/node" calico_node_image_tag: "{{ calico_version }}" diff --git a/roles/network_plugin/calico/tasks/main.yml b/roles/network_plugin/calico/tasks/main.yml index 60a728ba0..6563a1f65 100644 --- a/roles/network_plugin/calico/tasks/main.yml +++ b/roles/network_plugin/calico/tasks/main.yml @@ -78,30 +78,54 @@ delegate_to: "{{groups['etcd'][0]}}" run_once: true -- name: Calico | Define ipip pool argument +- name: Calico | Check calicoctl version + run_once: true + set_fact: + legacy_calicoctl: "{{ calicoctl_image_tag | version_compare('v1.0.0', '<') }}" + +- name: Calico | Configure calico network pool + shell: > + echo '{ + "kind": "ipPool", + "spec": {"disabled": false, "ipip": {"enabled": {{ cloud_provider is defined or ipip }}}, + "nat-outgoing": {{ nat_outgoing|default(false) and not peer_with_router|default(false) }}}, + "apiVersion": "v1", + "metadata": {"cidr": "{{ kube_pods_subnet }}"} + }' + | {{ bin_dir }}/calicoctl create -f - + environment: + NO_DEFAULT_POOLS: true + run_once: true + when: (not legacy_calicoctl and + "Key not found" in calico_conf.stdout or "nodes" not in calico_conf.stdout) + +- name: Calico (old) | Define ipip pool argument run_once: true set_fact: ipip_arg: "--ipip" - when: cloud_provider is defined or ipip|default(false) + when: (legacy_calicoctl and + cloud_provider is defined or ipip) -- name: Calico | Define nat-outgoing pool argument +- name: Calico (old) | Define nat-outgoing pool argument run_once: true set_fact: nat_arg: "--nat-outgoing" - when: nat_outgoing|default(false) and not peer_with_router|default(false) + when: (legacy_calicoctl and + nat_outgoing|default(false) and not peer_with_router|default(false)) -- name: Calico | Define calico pool task name +- name: Calico (old) | Define calico pool task name run_once: true set_fact: pool_task_name: "with options {{ ipip_arg|default('') }} {{ nat_arg|default('') }}" - when: ipip_arg|default(false) or nat_arg|default(false) + when: (legacy_calicoctl and ipip_arg|default(false) or nat_arg|default(false)) -- name: Calico | Configure calico network pool {{ pool_task_name|default('') }} +- name: Calico (old) | Configure calico network pool {{ pool_task_name|default('') }} command: "{{ bin_dir}}/calicoctl pool add {{ kube_pods_subnet }} {{ ipip_arg|default('') }} {{ nat_arg|default('') }}" environment: NO_DEFAULT_POOLS: true run_once: true - when: '"Key not found" in calico_conf.stdout or "nodes" not in calico_conf.stdout' + when: (legacy_calicoctl and + "Key not found" in calico_conf.stdout or "nodes" not in calico_conf.stdout) - name: Calico | Get calico configuration from etcd command: |- @@ -154,10 +178,30 @@ enabled: yes - name: Calico | Disable node mesh - shell: "{{ bin_dir }}/calicoctl bgp node-mesh off" - when: peer_with_router|default(false) and inventory_hostname in groups['kube-node'] + shell: "{{ bin_dir }}/calicoctl config set nodeToNodeMesh off" + when: (not legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) - name: Calico | Configure peering with router(s) + shell: > + echo '{ + "kind": "bgppeer", + "spec": {"asNumber": {{ item.as }}}, + "apiVersion": "v1", + "metadata": {"node": "rack1-host1", "scope": "node", "peerIP": "{{ item.router_id }}"} + }' + | {{ bin_dir }}/calicoctl create -f - + with_items: peers + when: (not legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) + +- name: Calico (old) | Disable node mesh + shell: "{{ bin_dir }}/calicoctl bgp node-mesh off" + when: (legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) + +- name: Calico (old) | Configure peering with router(s) shell: "{{ bin_dir }}/calicoctl node bgp peer add {{ item.router_id }} as {{ item.as }}" with_items: peers - when: peer_with_router|default(false) and inventory_hostname in groups['kube-node'] + when: (legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) diff --git a/roles/network_plugin/calico/templates/calico-node.service.j2 b/roles/network_plugin/calico/templates/calico-node.service.j2 index 2a7775fd4..87a51fac8 100644 --- a/roles/network_plugin/calico/templates/calico-node.service.j2 +++ b/roles/network_plugin/calico/templates/calico-node.service.j2 @@ -7,11 +7,19 @@ Wants=docker.socket [Service] User=root PermissionsStartOnly=true +{% if legacy_calicoctl %} {% if inventory_hostname in groups['kube-node'] and peer_with_router|default(false)%} ExecStart={{ bin_dir }}/calicoctl node --ip={{ip | default(ansible_default_ipv4.address) }} --as={{ local_as }} --detach=false --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} -{% else %} +{% else %} ExecStart={{ bin_dir }}/calicoctl node --ip={{ip | default(ansible_default_ipv4.address) }} --detach=false --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} -{% endif %} +{% endif %} +{% else %} +{% if inventory_hostname in groups['kube-node'] and peer_with_router|default(false)%} +ExecStart={{ bin_dir }}/calicoctl node run --ip={{ip | default(ansible_default_ipv4.address) }} --as={{ local_as }} --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} +{% else %} +ExecStart={{ bin_dir }}/calicoctl node run --ip={{ip | default(ansible_default_ipv4.address) }} --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} +{% endif %} +{% endif %} Restart=always RestartSec=10s diff --git a/roles/network_plugin/calico/templates/calicoctl-container.j2 b/roles/network_plugin/calico/templates/calicoctl-container.j2 index 9d47c73ca..7be30928a 100644 --- a/roles/network_plugin/calico/templates/calicoctl-container.j2 +++ b/roles/network_plugin/calico/templates/calicoctl-container.j2 @@ -1,5 +1,5 @@ #!/bin/bash -/usr/bin/docker run --privileged --rm \ +/usr/bin/docker run -i --privileged --rm \ --net=host --pid=host \ -e ETCD_ENDPOINTS={{ etcd_access_endpoint }} \ -e ETCD_CA_CERT_FILE=/etc/calico/certs/ca_cert.crt \ diff --git a/roles/network_plugin/calico/templates/deb-calico.initd.j2 b/roles/network_plugin/calico/templates/deb-calico.initd.j2 index ddbc22959..e155cae9c 100644 --- a/roles/network_plugin/calico/templates/deb-calico.initd.j2 +++ b/roles/network_plugin/calico/templates/deb-calico.initd.j2 @@ -37,7 +37,7 @@ DAEMON_USER=root do_status() { - if [ $($DOCKER ps | awk '{ print $2 }' | grep calico/node | wc -l) -eq 1 ]; then + if [ $($DOCKER ps --format "{{.Image}}" | grep -cw 'calico/node') -eq 1 ]; then return 0 else return 1 @@ -51,7 +51,11 @@ do_start() do_status retval=$? if [ $retval -ne 0 ]; then +{% if legacy_calicoctl %} ${DAEMON} node --ip=${DEFAULT_IPV4} >>/dev/null && return 0 || return 2 +{% else %} + ${DAEMON} node run --ip=${DEFAULT_IPV4} >>/dev/null && return 0 || return 2 +{% endif %} else return 1 fi @@ -62,7 +66,12 @@ do_start() # do_stop() { +{% if legacy_calicoctl %} ${DAEMON} node stop >> /dev/null || ${DAEMON} node stop --force >> /dev/null +{% else %} + echo "Current version of ${DAEMON} doesn't support 'node stop' command!" + return 1 +{% endif %} } diff --git a/roles/network_plugin/calico/templates/rh-calico.initd.j2 b/roles/network_plugin/calico/templates/rh-calico.initd.j2 index 6fb870652..7fea72521 100644 --- a/roles/network_plugin/calico/templates/rh-calico.initd.j2 +++ b/roles/network_plugin/calico/templates/rh-calico.initd.j2 @@ -31,7 +31,7 @@ logfile="/var/log/$prog" do_status() { - if [ $($dockerexec ps | awk '{ print $2 }' | grep calico/node | wc -l) -ne 1 ]; then + if [ $($dockerexec ps --format "{{.Image}}" | grep -cw 'calico/node') -ne 1 ]; then return 1 fi } @@ -53,7 +53,11 @@ do_start() { if [ $retval -ne 0 ]; then printf "Starting $prog:\t" echo "\n$(date)\n" >> $logfile - $exec node --ip=${DEFAULT_IPV4} &>>$logfile +{% if legacy_calicoctl %} + $exec node --ip=${DEFAULT_IPV4} &>>$logfile +{% else %} + $exec node run --ip=${DEFAULT_IPV4} &>>$logfile +{% endif %} success echo else @@ -65,7 +69,12 @@ do_start() { do_stop() { echo -n $"Stopping $prog: " +{% if legacy_calicoctl %} $exec node stop >> /dev/null || $exec node stop --force >> /dev/null +{% else %} + echo "Current version of ${exec} doesn't support 'node stop' command!" + return 1 +{% endif %} retval=$? echo return $retval From 46ee9faca91e7a4312ce1d31eb31b2e5e22d8e9c Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Fri, 11 Nov 2016 18:39:22 +0300 Subject: [PATCH 16/16] Fix ca certificate loading on CoreOS --- roles/etcd/tasks/gen_certs.yml | 15 ++++++++------- roles/kubernetes/secrets/tasks/gen_certs.yml | 14 +++++++------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/roles/etcd/tasks/gen_certs.yml b/roles/etcd/tasks/gen_certs.yml index e0aad58a2..8d1d34b74 100644 --- a/roles/etcd/tasks/gen_certs.yml +++ b/roles/etcd/tasks/gen_certs.yml @@ -84,21 +84,21 @@ when: inventory_hostname in groups['etcd'] changed_when: false -- name: Gen_certs | target ca-certificates directory +- name: Gen_certs | target ca-certificate store file set_fact: - ca_cert_dir: |- + ca_cert_path: |- {% if ansible_os_family == "Debian" -%} - /usr/local/share/ca-certificates + /usr/local/share/ca-certificates/etcd-ca.crt {%- elif ansible_os_family == "RedHat" -%} - /etc/pki/ca-trust/source/anchors + /etc/pki/ca-trust/source/anchors/etcd-ca.crt {%- elif ansible_os_family == "CoreOS" -%} - /etc/ssl/certs + /etc/ssl/certs/etcd-ca.pem {%- endif %} - name: Gen_certs | add CA to trusted CA dir copy: src: "{{ etcd_cert_dir }}/ca.pem" - dest: "{{ ca_cert_dir }}/etcd-ca.crt" + dest: "{{ ca_cert_path }}" remote_src: true register: etcd_ca_cert @@ -106,6 +106,7 @@ command: update-ca-certificates when: etcd_ca_cert.changed and ansible_os_family in ["Debian", "CoreOS"] -- name: Gen_certs | update ca-certificatesa (RedHat) +- name: Gen_certs | update ca-certificates (RedHat) command: update-ca-trust extract when: etcd_ca_cert.changed and ansible_os_family == "RedHat" + diff --git a/roles/kubernetes/secrets/tasks/gen_certs.yml b/roles/kubernetes/secrets/tasks/gen_certs.yml index bec1d9f16..28ae04892 100644 --- a/roles/kubernetes/secrets/tasks/gen_certs.yml +++ b/roles/kubernetes/secrets/tasks/gen_certs.yml @@ -65,21 +65,21 @@ when: inventory_hostname in groups['kube-master'] changed_when: false -- name: Gen_certs | target ca-certificates directory +- name: Gen_certs | target ca-certificates path set_fact: - ca_cert_dir: |- + ca_cert_path: |- {% if ansible_os_family == "Debian" -%} - /usr/local/share/ca-certificates + /usr/local/share/ca-certificates/kube-ca.crt {%- elif ansible_os_family == "RedHat" -%} - /etc/pki/ca-trust/source/anchors + /etc/pki/ca-trust/source/anchors/kube-ca.crt {%- elif ansible_os_family == "CoreOS" -%} - /etc/ssl/certs + /etc/ssl/certs/kube-ca.pem {%- endif %} - name: Gen_certs | add CA to trusted CA dir copy: src: "{{ kube_cert_dir }}/ca.pem" - dest: "{{ ca_cert_dir }}/kube-ca.crt" + dest: "{{ ca_cert_path }}" remote_src: true register: kube_ca_cert @@ -87,7 +87,7 @@ command: update-ca-certificates when: kube_ca_cert.changed and ansible_os_family in ["Debian", "CoreOS"] -- name: Gen_certs | update ca-certificatesa (RedHat) +- name: Gen_certs | update ca-certificates (RedHat) command: update-ca-trust extract when: kube_ca_cert.changed and ansible_os_family == "RedHat"