Remove etcd-proxy from all nodes and use etcd multiaccess

This commit is contained in:
Matthew Mosesohn 2016-11-09 13:31:12 +03:00
parent 6b0d26ddf0
commit 95b460ae94
23 changed files with 55 additions and 380 deletions

View file

@ -5,10 +5,6 @@ The following components require a highly available endpoints:
* etcd cluster,
* kube-apiserver service instances.
The former provides the
[etcd-proxy](https://coreos.com/etcd/docs/latest/proxy.html) service to access
the cluster members in HA fashion.
The latter relies on a 3rd side reverse proxies, like Nginx or HAProxy, to
achieve the same goal.
@ -57,7 +53,7 @@ type. The following diagram shows how traffic to the apiserver is directed.
A user may opt to use an external loadbalancer (LB) instead. An external LB
provides access for external clients, while the internal LB accepts client
connections only to the localhost, similarly to the etcd-proxy HA endpoints.
connections only to the localhost.
Given a frontend `VIP` address and `IP1, IP2` addresses of backends, here is
an example configuration for a HAProxy service acting as an external LB:
```

View file

@ -62,7 +62,7 @@ ndots: 5
# Enable multiaccess to configure clients to access all of the etcd members directly
# as the "http://hostX:port, http://hostY:port, ..." and ignore the proxy loadbalancers.
# This may be the case if clients support and loadbalance multiple etcd servers natively.
etcd_multiaccess: false
etcd_multiaccess: true
# Assume there are no internal loadbalancers for apiservers exist and listen on
# kube_apiserver_port (default 443)

View file

@ -6,21 +6,14 @@
- reload etcd
- wait for etcd up
- name: restart etcd-proxy
command: /bin/true
notify:
- etcd | reload systemd
- reload etcd-proxy
- wait for etcd up
- name: etcd | reload systemd
command: systemctl daemon-reload
when: ansible_service_mgr == "systemd"
- name: wait for etcd up
uri: url="http://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health"
uri: url="http://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health"
register: result
until: result.status == 200
until: result.status is defined and result.status == 200
retries: 10
delay: 5
@ -29,9 +22,3 @@
name: etcd
state: restarted
when: is_etcd_master
- name: reload etcd-proxy
service:
name: etcd-proxy
state: restarted
when: is_etcd_proxy

View file

@ -26,19 +26,3 @@
mode: 0755
when: ansible_service_mgr in ["sysvinit","upstart"] and ansible_os_family == "Debian" and is_etcd_master
notify: restart etcd
- name: Configure | Copy etcd-proxy.service systemd file
template:
src: "etcd-proxy-{{ etcd_deployment_type }}.service.j2"
dest: /etc/systemd/system/etcd-proxy.service
backup: yes
when: ansible_service_mgr == "systemd" and is_etcd_proxy
notify: restart etcd-proxy
- name: Configure | Write etcd-proxy initd script
template:
src: "deb-etcd-proxy-{{ etcd_deployment_type }}.initd.j2"
dest: /etc/init.d/etcd-proxy
owner: root
mode: 0755
when: ansible_service_mgr in ["sysvinit","upstart"] and ansible_os_family == "Debian" and is_etcd_proxy
notify: restart etcd-proxy

View file

@ -1,8 +1,13 @@
---
- include: pre_upgrade.yml
- include: install.yml
when: is_etcd_master
- include: set_cluster_health.yml
when: is_etcd_master
- include: configure.yml
when: is_etcd_master
- include: refresh_config.yml
when: is_etcd_master
- name: Ensure etcd is running
service:
@ -11,23 +16,11 @@
enabled: yes
when: is_etcd_master
- name: Ensure etcd-proxy is running
service:
name: etcd-proxy
state: started
enabled: yes
when: is_etcd_proxy
- name: Restart etcd if binary changed
command: /bin/true
notify: restart etcd
when: etcd_deployment_type == "host" and etcd_copy.stdout_lines and is_etcd_master
- name: Restart etcd-proxy if binary changed
command: /bin/true
notify: restart etcd-proxy
when: etcd_deployment_type == "host" and etcd_copy.stdout_lines and is_etcd_proxy
# Reload systemd before starting service
- meta: flush_handlers
@ -35,4 +28,6 @@
# initial state of the cluster is in `existing`
# state insted of `new`.
- include: set_cluster_health.yml
when: is_etcd_master
- include: refresh_config.yml
when: is_etcd_master

View file

@ -5,10 +5,3 @@
dest: /etc/etcd.env
notify: restart etcd
when: is_etcd_master
- name: Refresh config | Create etcd-proxy config file
template:
src: etcd-proxy.j2
dest: /etc/etcd-proxy.env
notify: restart etcd-proxy
when: is_etcd_proxy

View file

@ -1,120 +0,0 @@
#!/bin/sh
set -a
### BEGIN INIT INFO
# Provides: etcd-proxy
# Required-Start: $local_fs $network $syslog
# Required-Stop:
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: etcd-proxy
# Description:
# etcd-proxy is a proxy for etcd: distributed, consistent key-value store for shared configuration and service discovery
### END INIT INFO
PATH=/sbin:/usr/sbin:/bin/:/usr/bin
DESC="etcd-proxy"
NAME=etcd-proxy
DAEMON={{ docker_bin_dir | default("/usr/bin") }}/docker
DAEMON_EXEC=`basename $DAEMON`
DAEMON_ARGS="run --restart=always --env-file=/etc/etcd-proxy.env \
--net=host \
--stop-signal=SIGKILL \
-v /usr/share/ca-certificates/:/etc/ssl/certs:ro \
--name={{ etcd_proxy_member_name | default("etcd-proxy") }} \
{{ etcd_image_repo }}:{{ etcd_image_tag }} \
{% if etcd_after_v3 %}
{{ etcd_container_bin_dir }}etcd
{% endif %}"
SCRIPTNAME=/etc/init.d/$NAME
DAEMON_USER=root
STOP_SCHEDULE="${STOP_SCHEDULE:-QUIT/5/TERM/5/KILL/5}"
PID=/var/run/etcd-proxy.pid
# Exit if the binary is not present
[ -x "$DAEMON" ] || exit 0
# Define LSB log_* functions.
# Depend on lsb-base (>= 3.2-14) to ensure that this file is present
# and status_of_proc is working.
. /lib/lsb/init-functions
do_status()
{
status_of_proc -p $PID "$DAEMON" "$NAME" && exit 0 || exit $?
}
# Function that starts the daemon/service
#
do_start()
{
{{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_proxy_member_name | default("etcd-proxy") }} &>/dev/null || true
sleep 1
start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \
$DAEMON_ARGS \
|| return 2
}
#
# Function that stops the daemon/service
#
do_stop()
{
start-stop-daemon --stop --quiet --retry=$STOP_SCHEDULE --pidfile $PID --name $DAEMON_EXEC
RETVAL="$?"
sleep 1
return "$RETVAL"
}
case "$1" in
start)
log_daemon_msg "Starting $DESC" "$NAME"
do_start
case "$?" in
0|1) log_end_msg 0 || exit 0 ;;
2) log_end_msg 1 || exit 1 ;;
esac
;;
stop)
log_daemon_msg "Stopping $DESC" "$NAME"
if do_stop; then
log_end_msg 0
else
log_failure_msg "Can't stop etcd-proxy"
log_end_msg 1
fi
;;
status)
if do_status; then
log_end_msg 0
else
log_failure_msg "etcd-proxy is not running"
log_end_msg 1
fi
;;
restart|force-reload)
log_daemon_msg "Restarting $DESC" "$NAME"
if do_stop; then
if do_start; then
log_end_msg 0
exit 0
else
rc="$?"
fi
else
rc="$?"
fi
log_failure_msg "Can't restart etcd-proxy"
log_end_msg ${rc}
;;
*)
echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2
exit 3
;;
esac

View file

@ -1,110 +0,0 @@
#!/bin/sh
set -a
### BEGIN INIT INFO
# Provides: etcd-proxy
# Required-Start: $local_fs $network $syslog
# Required-Stop:
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: etcd-proxy
# Description:
# etcd-proxy is a proxy for etcd: distributed, consistent key-value store for shared configuration and service discovery
### END INIT INFO
PATH=/sbin:/usr/sbin:/bin:/usr/bin
DESC="etcd-proxy"
NAME=etcd-proxy
DAEMON={{ bin_dir }}/etcd
DAEMON_ARGS=""
SCRIPTNAME=/etc/init.d/$NAME
DAEMON_USER=etcd
STOP_SCHEDULE="${STOP_SCHEDULE:-QUIT/5/TERM/5/KILL/5}"
PID=/var/run/etcd-proxy.pid
# Exit if the binary is not present
[ -x "$DAEMON" ] || exit 0
# Read configuration variable file if it is present
[ -f /etc/etcd-proxy.env ] && . /etc/etcd-proxy.env
# Define LSB log_* functions.
# Depend on lsb-base (>= 3.2-14) to ensure that this file is present
# and status_of_proc is working.
. /lib/lsb/init-functions
do_status()
{
status_of_proc -p $PID "$DAEMON" "$NAME" && exit 0 || exit $?
}
# Function that starts the daemon/service
#
do_start()
{
start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \
$DAEMON_ARGS \
|| return 2
}
#
# Function that stops the daemon/service
#
do_stop()
{
start-stop-daemon --stop --quiet --retry=$STOP_SCHEDULE --pidfile $PID --name $NAME
RETVAL="$?"
sleep 1
return "$RETVAL"
}
case "$1" in
start)
log_daemon_msg "Starting $DESC" "$NAME"
do_start
case "$?" in
0|1) log_end_msg 0 || exit 0 ;;
2) log_end_msg 1 || exit 1 ;;
esac
;;
stop)
log_daemon_msg "Stopping $DESC" "$NAME"
if do_stop; then
log_end_msg 0
else
log_failure_msg "Can't stop etcd-proxy"
log_end_msg 1
fi
;;
status)
if do_status; then
log_end_msg 0
else
log_failure_msg "etcd-proxy is not running"
log_end_msg 1
fi
;;
restart|force-reload)
log_daemon_msg "Restarting $DESC" "$NAME"
if do_stop; then
if do_start; then
log_end_msg 0
exit 0
else
rc="$?"
fi
else
rc="$?"
fi
log_failure_msg "Can't restart etcd-proxy"
log_end_msg ${rc}
;;
*)
echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2
exit 3
;;
esac

View file

@ -1,28 +0,0 @@
[Unit]
Description=etcd-proxy docker wrapper
Wants=docker.socket
After=docker.service
[Service]
User=root
PermissionsStartOnly=true
ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=always \
--env-file=/etc/etcd-proxy.env \
{# TODO(mattymo): Allow docker IP binding and disable in envfile
-p 2380:2380 -p 2379:2379 #}
--net=host \
--stop-signal=SIGKILL \
-v /usr/share/ca-certificates/:/etc/ssl/certs:ro \
--name={{ etcd_proxy_member_name | default("etcd-proxy") }} \
{{ etcd_image_repo }}:{{ etcd_image_tag }} \
{% if etcd_after_v3 %}
{{ etcd_container_bin_dir }}etcd
{% endif %}
ExecStartPre=-{{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_proxy_member_name | default("etcd-proxy") }}
ExecReload={{ docker_bin_dir | default("/usr/bin") }}/docker restart {{ etcd_proxy_member_name | default("etcd-proxy") }}
ExecStop={{ docker_bin_dir | default("/usr/bin") }}/docker stop {{ etcd_proxy_member_name | default("etcd-proxy") }}
Restart=always
RestartSec=15s
[Install]
WantedBy=multi-user.target

View file

@ -1,19 +0,0 @@
[Unit]
Description=etcd-proxy
After=network.target
[Service]
Type=notify
User=etcd
PermissionsStartOnly=true
EnvironmentFile=/etc/etcd-proxy.env
ExecStart={{ bin_dir }}/etcd
ExecStartPre=/bin/mkdir -p /var/lib/etcd-proxy
ExecStartPre=/bin/chown -R etcd: /var/lib/etcd-proxy
NotifyAccess=all
Restart=always
RestartSec=10s
LimitNOFILE=40000
[Install]
WantedBy=multi-user.target

View file

@ -1,5 +0,0 @@
ETCD_DATA_DIR=/var/lib/etcd-proxy
ETCD_PROXY=on
ETCD_LISTEN_CLIENT_URLS={{ etcd_access_endpoint }}
ETCD_NAME={{ etcd_proxy_member_name | default("etcd-proxy") }}
ETCD_INITIAL_CLUSTER={{ etcd_peer_addresses }}

View file

@ -3,11 +3,7 @@ ETCD_ADVERTISE_CLIENT_URLS={{ etcd_client_url }}
ETCD_INITIAL_ADVERTISE_PEER_URLS={{ etcd_peer_url }}
ETCD_INITIAL_CLUSTER_STATE={% if etcd_cluster_is_healthy.rc != 0 | bool %}new{% else %}existing{% endif %}
{% if not is_etcd_proxy %}
ETCD_LISTEN_CLIENT_URLS=http://{{ etcd_address }}:2379,http://127.0.0.1:2379
{% else %}
ETCD_LISTEN_CLIENT_URLS=http://{{ etcd_address }}:2379
{% endif %}
ETCD_ELECTION_TIMEOUT=10000
ETCD_INITIAL_CLUSTER_TOKEN=k8s_etcd
ETCD_LISTEN_PEER_URLS=http://{{ etcd_address }}:2380

View file

@ -26,7 +26,7 @@ spec:
image: calico/kube-policy-controller:latest
env:
- name: ETCD_ENDPOINTS
value: "{{ etcd_endpoint }}"
value: "{{ etcd_access_endpoint }}"
# Location of the Kubernetes API - this shouldn't need to be
# changed so long as it is used in conjunction with
# CONFIGURE_ETC_HOSTS="true".
@ -38,3 +38,12 @@ spec:
# This removes the need for KubeDNS to resolve the Service.
- name: CONFIGURE_ETC_HOSTS
value: "true"
volumeMounts:
- mountPath: {{ etcd_cert_dir }}
name: etcd-certs
readOnly: true
volumes:
- hostPath:
path: {{ etcd_cert_dir }}
name: etcd-certs

View file

@ -14,12 +14,3 @@
name: kube-apiserver
state: stopped
when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False))
- name: "Pre-upgrade | remove kube-apiserver service definition"
file:
path: "{{ item }}"
state: absent
when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False))
with_items:
- /etc/systemd/system/kube-apiserver.service
- /etc/init.d/kube-apiserver

View file

@ -62,3 +62,4 @@ spec:
- hostPath:
path: /var/log/
name: logfile

View file

@ -1,6 +1,7 @@
{
"name": "calico-k8s-network",
"type": "calico",
"etcd_endpoints": "{{ etcd_access_endpoint }}",
"log_level": "info",
"ipam": {
"type": "calico-ipam"

View file

@ -45,3 +45,6 @@ openstack_username: "{{ lookup('env','OS_USERNAME') }}"
openstack_password: "{{ lookup('env','OS_PASSWORD') }}"
openstack_region: "{{ lookup('env','OS_REGION_NAME') }}"
openstack_tenant_id: "{{ lookup('env','OS_TENANT_ID') }}"
# All clients access each node individually, instead of using a load balancer.
etcd_multiaccess: true

View file

@ -30,7 +30,7 @@
- set_fact:
etcd_access_addresses: |-
{% for item in groups['etcd'] -%}
http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2379{% if not loop.last %},{% endif %}
http://{{ item }}:2379{% if not loop.last %},{% endif %}
{%- endfor %}
- set_fact: etcd_access_endpoint="{% if etcd_multiaccess %}{{ etcd_access_addresses }}{% else %}{{ etcd_endpoint }}{% endif %}"
- set_fact:
@ -43,13 +43,6 @@
{% for item in groups['etcd'] -%}
{{ "etcd"+loop.index|string }}=http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %}
{%- endfor %}
- set_fact:
etcd_proxy_member_name: |-
{% for host in groups['k8s-cluster'] %}
{% if inventory_hostname == host %}{{"etcd-proxy"+loop.index|string }}{% endif %}
{% endfor %}
- set_fact:
is_etcd_proxy: "{{ inventory_hostname in groups['k8s-cluster'] }}"
- set_fact:
is_etcd_master: "{{ inventory_hostname in groups['etcd'] }}"
- set_fact:

View file

@ -43,17 +43,21 @@
- name: Calico | wait for etcd
uri: url=http://localhost:2379/health
register: result
until: result.status == 200
until: result.status == 200 or result.status == 401
retries: 10
delay: 5
when: inventory_hostname in groups['kube-master']
delegate_to: "{{groups['etcd'][0]}}"
run_once: true
- name: Calico | Check if calico network pool has already been configured
uri:
url: "{{ etcd_endpoint }}/v2/keys/calico/v1/ipam/v4/pool"
return_content: yes
status_code: 200,404
command: |-
curl \
--cacert {{ etcd_cert_dir }}/ca.pem \
--cert {{ etcd_cert_dir}}/admin.pem \
--key {{ etcd_cert_dir }}/admin-key.pem \
https://localhost:2379/v2/keys/calico/v1/ipam/v4/pool
register: calico_conf
delegate_to: "{{groups['etcd'][0]}}"
run_once: true
- name: Calico | Define ipip pool argument
@ -79,21 +83,25 @@
environment:
NO_DEFAULT_POOLS: true
run_once: true
when: calico_conf.status == 404 or "nodes" not in calico_conf.content
when: '"Key not found" in calico_conf.stdout or "nodes" not in calico_conf.stdout'
- name: Calico | Get calico configuration from etcd
uri:
url: "{{ etcd_endpoint }}/v2/keys/calico/v1/ipam/v4/pool"
return_content: yes
register: calico_pools
command: |-
curl http://localhost:2379/v2/keys/calico/v1/ipam/v4/pool
register: calico_pools_raw
delegate_to: "{{groups['etcd'][0]}}"
run_once: true
- set_fact:
calico_pools: "{{ calico_pools_raw.stdout | from_json }}"
run_once: true
- name: Calico | Check if calico pool is properly configured
fail:
msg: 'Only one network pool must be configured and it must be the subnet {{ kube_pods_subnet }}.
Please erase calico configuration and run the playbook again ("etcdctl rm --recursive /calico/v1/ipam/v4/pool")'
when: ( calico_pools.json['node']['nodes'] | length > 1 ) or
( not calico_pools.json['node']['nodes'][0]['key'] | search(".*{{ kube_pods_subnet | ipaddr('network') }}.*") )
when: ( calico_pools['node']['nodes'] | length > 1 ) or
( not calico_pools['node']['nodes'][0]['key'] | search(".*{{ kube_pods_subnet | ipaddr('network') }}.*") )
run_once: true
- name: Calico | Write /etc/network-environment
@ -131,4 +139,3 @@
shell: "{{ bin_dir }}/calicoctl node bgp peer add {{ item.router_id }} as {{ item.as }}"
with_items: peers
when: peer_with_router|default(false) and inventory_hostname in groups['kube-node']

View file

@ -1,8 +1,8 @@
[Unit]
Description=Calico per-node agent
Documentation=https://github.com/projectcalico/calico-docker
After=docker.service docker.socket etcd-proxy.service
Wants=docker.socket etcd-proxy.service
After=docker.service docker.socket
Wants=docker.socket
[Service]
User=root

View file

@ -1,6 +1,7 @@
#!/bin/bash
/usr/bin/docker run --privileged --rm \
--net=host --pid=host -e ETCD_AUTHORITY={{ etcd_authority }} \
--net=host --pid=host \
-e ETCD_ENDPOINTS={{ etcd_access_endpoint }} \
-v /usr/bin/docker:/usr/bin/docker \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /var/run/calico:/var/run/calico \

View file

@ -3,7 +3,7 @@
DEFAULT_IPV4={{ip | default(ansible_default_ipv4.address) }}
# The Kubernetes master IP
KUBERNETES_MASTER={{ first_kube_master }}
KUBERNETES_MASTER={{ kube_apiserver_endpoint }}
# IP and port of etcd instance used by Calico
ETCD_AUTHORITY={{ etcd_authority }}
ETCD_ENDPOINTS={{ etcd_access_endpoint }}

View file

@ -21,7 +21,7 @@
args:
- "--network-config=/etc/flannel-network.json"
- "--etcd-prefix=/{{ cluster_name }}/network"
- "--etcd-server={{ etcd_endpoint }}"
- "--etcd-endpoints={{ etcd_access_endpoint }}"
volumeMounts:
- name: "networkconfig"
mountPath: "/etc/flannel-network.json"