From 806f12804bb2c7382b9aa75882d70ec70c5e1b02 Mon Sep 17 00:00:00 2001 From: Josh Lothian Date: Wed, 22 Mar 2017 10:12:57 -0500 Subject: [PATCH 1/6] Fix docker restart in atomic In atomic, containers are left running when docker is restarted. When docker is restarted after the flannel config is put in place, the docker0 interface isn't re-IPed because docker sees the running containers and won't update the previous config. This patch kills all the running containers after docker is stopped. We can't simply `docker stop` the running containers, as they respawn before we've got a chance to stop the docker daemon, so we need to use runc to do this after dockerd is stopped. --- roles/network_plugin/flannel/handlers/main.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml index 98c93a53a..9a87e2ec2 100644 --- a/roles/network_plugin/flannel/handlers/main.yml +++ b/roles/network_plugin/flannel/handlers/main.yml @@ -10,6 +10,7 @@ - Flannel | reload systemd - Flannel | reload docker.socket - Flannel | reload docker + - Flannel | reload docker (atomic) - Flannel | pause while Docker restarts - Flannel | wait for docker @@ -26,6 +27,11 @@ service: name: docker state: restarted + when: not is_atomic + +- name: Flannel | reload docker (atomic) + shell: systemctl stop docker && runc list | awk '!/ID/ {print $1}' | xargs -n 1 -I ID runc kill ID KILL && systemctl start docker + when: is_atomic - name: Flannel | pause while Docker restarts pause: From 8b5072b291ebf4ca1fb0e30f64f61b45304e1835 Mon Sep 17 00:00:00 2001 From: Josh Lothian Date: Thu, 30 Mar 2017 17:57:40 -0500 Subject: [PATCH 2/6] Reconfigure docker restart behavior on atomic Before restarting docker, instruct it to kill running containers when it restarts. Needs a second docker restart after we restore the original behavior, otherwise the next time docker is restarted by an operator, it will unexpectedly bring down all running containers. --- .../network_plugin/flannel/handlers/main.yml | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml index 9a87e2ec2..e3e937a1f 100644 --- a/roles/network_plugin/flannel/handlers/main.yml +++ b/roles/network_plugin/flannel/handlers/main.yml @@ -9,7 +9,9 @@ notify: - Flannel | reload systemd - Flannel | reload docker.socket + - Flannel | reconfigure docker restart behavior (atomic) - Flannel | reload docker + - Flannel | restore docker restart behavior (atomic) - Flannel | reload docker (atomic) - Flannel | pause while Docker restarts - Flannel | wait for docker @@ -23,14 +25,29 @@ state: restarted when: ansible_os_family in ['CoreOS', 'Container Linux by CoreOS'] +- name: Flannel | reconfigure docker restart behavior (atomic) + replace: + name: /etc/docker/daemon.json + regexp: '"live-restore":.*true' + replace: '"live-restore": false' + when: is_atomic + - name: Flannel | reload docker service: name: docker state: restarted - when: not is_atomic + +- name: Flannel | restore docker restart behavior (atomic) + replace: + name: /etc/docker/daemon.json + regexp: '"live-restore": false' + replace: '"live-restore": true' + when: is_atomic - name: Flannel | reload docker (atomic) - shell: systemctl stop docker && runc list | awk '!/ID/ {print $1}' | xargs -n 1 -I ID runc kill ID KILL && systemctl start docker + service: + name: docker + state: restarted when: is_atomic - name: Flannel | pause while Docker restarts From 5e9f27f95b55c15a4d8557a75c87fd5bd5382fcf Mon Sep 17 00:00:00 2001 From: Josh Lothian Date: Fri, 31 Mar 2017 07:46:21 -0500 Subject: [PATCH 3/6] Update handler names and explanation --- roles/network_plugin/flannel/handlers/main.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml index e3e937a1f..412563394 100644 --- a/roles/network_plugin/flannel/handlers/main.yml +++ b/roles/network_plugin/flannel/handlers/main.yml @@ -4,14 +4,18 @@ failed_when: false notify: Flannel | restart docker +# special cases for atomic because it defaults to live-restore: true +# So we disable live-restore to pickup the new flannel IP. After +# we enable it, we have to restart docker again to pickup the new +# setting and restore the original behavior - name: Flannel | restart docker command: /bin/true notify: - Flannel | reload systemd - Flannel | reload docker.socket - - Flannel | reconfigure docker restart behavior (atomic) + - Flannel | configure docker live-restore true (atomic) - Flannel | reload docker - - Flannel | restore docker restart behavior (atomic) + - Flannel | configure docker live-restore false (atomic) - Flannel | reload docker (atomic) - Flannel | pause while Docker restarts - Flannel | wait for docker @@ -25,7 +29,7 @@ state: restarted when: ansible_os_family in ['CoreOS', 'Container Linux by CoreOS'] -- name: Flannel | reconfigure docker restart behavior (atomic) +- name: Flannel | configure docker live-restore true (atomic) replace: name: /etc/docker/daemon.json regexp: '"live-restore":.*true' @@ -37,7 +41,7 @@ name: docker state: restarted -- name: Flannel | restore docker restart behavior (atomic) +- name: Flannel | configure docker live-restore false (atomic) replace: name: /etc/docker/daemon.json regexp: '"live-restore": false' From 7ae917edd9ce982a82f26a08f15d5ca68af063d3 Mon Sep 17 00:00:00 2001 From: Josh Lothian Date: Wed, 5 Apr 2017 15:41:46 -0500 Subject: [PATCH 4/6] Leave 'live-restore' false Leave live-restore false to updates always pick up new network configuration --- roles/network_plugin/flannel/handlers/main.yml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml index 412563394..8fbb6a1fd 100644 --- a/roles/network_plugin/flannel/handlers/main.yml +++ b/roles/network_plugin/flannel/handlers/main.yml @@ -41,19 +41,6 @@ name: docker state: restarted -- name: Flannel | configure docker live-restore false (atomic) - replace: - name: /etc/docker/daemon.json - regexp: '"live-restore": false' - replace: '"live-restore": true' - when: is_atomic - -- name: Flannel | reload docker (atomic) - service: - name: docker - state: restarted - when: is_atomic - - name: Flannel | pause while Docker restarts pause: seconds: 10 From 03159bb29dec08f391d33c708cf8c169096b9337 Mon Sep 17 00:00:00 2001 From: Josh Lothian Date: Fri, 19 May 2017 09:45:46 -0500 Subject: [PATCH 5/6] Remove unused handler Previous patch removed the step that sets live-restore back to false, so don't try to notify that handler any more --- roles/network_plugin/flannel/handlers/main.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml index 8fbb6a1fd..a84d70c70 100644 --- a/roles/network_plugin/flannel/handlers/main.yml +++ b/roles/network_plugin/flannel/handlers/main.yml @@ -15,7 +15,6 @@ - Flannel | reload docker.socket - Flannel | configure docker live-restore true (atomic) - Flannel | reload docker - - Flannel | configure docker live-restore false (atomic) - Flannel | reload docker (atomic) - Flannel | pause while Docker restarts - Flannel | wait for docker From 61d3ebfc04b33af9748c70821d25a679ea9ad6b1 Mon Sep 17 00:00:00 2001 From: Josh Lothian Date: Fri, 19 May 2017 09:50:10 -0500 Subject: [PATCH 6/6] Removed the other unused handler With live-restore: true, we don't need a special docker restart --- roles/network_plugin/flannel/handlers/main.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml index a84d70c70..bd4058976 100644 --- a/roles/network_plugin/flannel/handlers/main.yml +++ b/roles/network_plugin/flannel/handlers/main.yml @@ -15,7 +15,6 @@ - Flannel | reload docker.socket - Flannel | configure docker live-restore true (atomic) - Flannel | reload docker - - Flannel | reload docker (atomic) - Flannel | pause while Docker restarts - Flannel | wait for docker