Controll delayed startup/restart for kubelet/etcd

In order to mitigate sporadic data races in etcd
(publish error: etcdserver: request timed out"):
- Add etcd_start_delay and kubelet_start_delay (defaults to a 5 sec.)
- Increase default start sleep times to foo_start_delay from a 1 sec.
- Add restart sleeping as well.
- Add missing start sleep commands as appropriate.

Closes: https://github.com/kubespray/kargo/issues/342

Signed-off-by: Bogdan Dobrelya <bdobrelia@mirantis.com>
This commit is contained in:
Bogdan Dobrelya 2016-10-18 12:15:33 +02:00
parent 1de127470f
commit 13f5217d88
18 changed files with 28 additions and 4 deletions

View file

@ -30,5 +30,6 @@
service: service:
name: kubelet name: kubelet
state: restarted state: restarted
sleep: "{{ kubelet_start_delay|int }}"
when: "{{ inventory_hostname in groups['kube-master'] }}" when: "{{ inventory_hostname in groups['kube-master'] }}"
ignore_errors: true ignore_errors: true

View file

@ -28,10 +28,12 @@
service: service:
name: etcd name: etcd
state: restarted state: restarted
sleep: "{{ etcd_start_delay|int }}"
when: is_etcd_master when: is_etcd_master
- name: reload etcd-proxy - name: reload etcd-proxy
service: service:
name: etcd-proxy name: etcd-proxy
state: restarted state: restarted
sleep: "{{ etcd_start_delay|int }}"
when: is_etcd_proxy when: is_etcd_proxy

View file

@ -50,7 +50,7 @@ do_status()
do_start() do_start()
{ {
{{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_member_name | default("etcd-proxy") }} &>/dev/null || true {{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_member_name | default("etcd-proxy") }} &>/dev/null || true
sleep 1 sleep {{ etcd_start_delay }}
start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \ start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \
$DAEMON_ARGS \ $DAEMON_ARGS \
|| return 2 || return 2

View file

@ -41,6 +41,7 @@ do_status()
# #
do_start() do_start()
{ {
sleep {{ etcd_start_delay }}
start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \ start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \
$DAEMON_ARGS \ $DAEMON_ARGS \
|| return 2 || return 2

View file

@ -51,7 +51,7 @@ do_status()
do_start() do_start()
{ {
{{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_proxy_member_name | default("etcd-proxy") }} &>/dev/null || true {{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_proxy_member_name | default("etcd-proxy") }} &>/dev/null || true
sleep 1 sleep {{ etcd_start_delay }}
start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \ start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \
$DAEMON_ARGS \ $DAEMON_ARGS \
|| return 2 || return 2

View file

@ -42,6 +42,7 @@ do_status()
# #
do_start() do_start()
{ {
sleep {{ etcd_start_delay }}
start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \ start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \
$DAEMON_ARGS \ $DAEMON_ARGS \
|| return 2 || return 2

View file

@ -23,6 +23,8 @@ ExecReload={{ docker_bin_dir | default("/usr/bin") }}/docker restart {{ etcd_mem
ExecStop={{ docker_bin_dir | default("/usr/bin") }}/docker stop {{ etcd_member_name | default("etcd-proxy") }} ExecStop={{ docker_bin_dir | default("/usr/bin") }}/docker stop {{ etcd_member_name | default("etcd-proxy") }}
Restart=always Restart=always
RestartSec=15s RestartSec=15s
OnStartupSec={{ etcd_start_delay }}
OnBootSec={{ etcd_start_delay }}
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target

View file

@ -11,6 +11,8 @@ NotifyAccess=all
Restart=always Restart=always
RestartSec=10s RestartSec=10s
LimitNOFILE=40000 LimitNOFILE=40000
OnStartupSec={{ etcd_start_delay }}
OnBootSec={{ etcd_start_delay }}
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target

View file

@ -23,6 +23,8 @@ ExecReload={{ docker_bin_dir | default("/usr/bin") }}/docker restart {{ etcd_pro
ExecStop={{ docker_bin_dir | default("/usr/bin") }}/docker stop {{ etcd_proxy_member_name | default("etcd-proxy") }} ExecStop={{ docker_bin_dir | default("/usr/bin") }}/docker stop {{ etcd_proxy_member_name | default("etcd-proxy") }}
Restart=always Restart=always
RestartSec=15s RestartSec=15s
OnStartupSec={{ etcd_start_delay }}
OnBootSec={{ etcd_start_delay }}
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target

View file

@ -14,6 +14,8 @@ NotifyAccess=all
Restart=always Restart=always
RestartSec=10s RestartSec=10s
LimitNOFILE=40000 LimitNOFILE=40000
OnStartupSec={{ etcd_start_delay }}
OnBootSec={{ etcd_start_delay }}
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target

View file

@ -21,6 +21,7 @@
service: service:
name: kubelet name: kubelet
state: restarted state: restarted
sleep: "{{ kubelet_start_delay|int }}"
- name: Master | wait for kube-scheduler - name: Master | wait for kube-scheduler
uri: url=http://localhost:10251/healthz uri: url=http://localhost:10251/healthz

View file

@ -13,3 +13,4 @@
service: service:
name: kubelet name: kubelet
state: restarted state: restarted
sleep: "{{ kubelet_start_delay|int }}"

View file

@ -40,7 +40,7 @@ DAEMON_USER=root
do_start() do_start()
{ {
/usr/bin/docker rm -f kubelet &>/dev/null || true /usr/bin/docker rm -f kubelet &>/dev/null || true
sleep 1 sleep {{ kubelet_start_delay }}
# Return # Return
# 0 if daemon has been started # 0 if daemon has been started
# 1 if daemon was already running # 1 if daemon was already running

View file

@ -1,4 +1,5 @@
#!/bin/bash #!/bin/bash
sleep {{ kubelet_start_delay }}
/usr/bin/docker run --privileged --rm \ /usr/bin/docker run --privileged --rm \
--net=host --pid=host --name=kubelet \ --net=host --pid=host --name=kubelet \
-v /etc/cni:/etc/cni:ro \ -v /etc/cni:/etc/cni:ro \

View file

@ -28,6 +28,8 @@ ExecStartPre=-/usr/bin/docker rm -f kubelet
ExecReload=/usr/bin/docker restart kubelet ExecReload=/usr/bin/docker restart kubelet
Restart=always Restart=always
RestartSec=10s RestartSec=10s
OnStartupSec={{ kubelet_start_delay }}
OnBootSec={{ kubelet_start_delay }}
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target

View file

@ -44,6 +44,7 @@ start() {
if ! [ -f $pidfile ]; then if ! [ -f $pidfile ]; then
printf "Starting $prog:\t" printf "Starting $prog:\t"
echo "\n$(date)\n" >> $logfile echo "\n$(date)\n" >> $logfile
sleep {{ kubelet_start_delay }}
$exec $DAEMON_ARGS &>> $logfile & $exec $DAEMON_ARGS &>> $logfile &
pid=$! pid=$!
echo $pid >> $pidfile echo $pid >> $pidfile

View file

@ -1,6 +1,10 @@
--- ---
run_gitinfos: false run_gitinfos: false
# Controll delayed startup/restart for services, in seconds
etcd_start_delay: 5
kubelet_start_delay: 5
# This directory is where all the additional scripts go # This directory is where all the additional scripts go
# that Kubernetes normally puts in /srv/kubernetes. # that Kubernetes normally puts in /srv/kubernetes.
# This puts them in a sane location # This puts them in a sane location

View file

@ -8,3 +8,4 @@
service: service:
name: kubelet name: kubelet
state: restarted state: restarted
sleep: "{{ kubelet_start_delay|int }}"