diff --git a/docs/upgrades.md b/docs/upgrades.md index 0c9731e15..09a11d258 100644 --- a/docs/upgrades.md +++ b/docs/upgrades.md @@ -58,10 +58,17 @@ Client Version: version.Info{Major:"1", Minor:"19", GitVersion:"v1.19.7", GitCom Server Version: version.Info{Major:"1", Minor:"19", GitVersion:"v1.19.7", GitCommit:"1dd5338295409edcfff11505e7bb246f0d325d15", GitTreeState:"clean", BuildDate:"2021-01-13T13:15:20Z", GoVersion:"go1.15.5", Compiler:"gc", Platform:"linux/amd64"} ``` -If you want to manually control the upgrade procedure, you can use the variables `upgrade_node_confirm` or `upgrade_node_pause_seconds`: +### Pausing the upgrade -`upgrade_node_confirm: true` - waiting to confirmation to upgrade next node -`upgrade_node_pause_seconds: 60` - pause 60 seconds before upgrade next node +If you want to manually control the upgrade procedure, you can set some variables to pause the upgrade playbook. Pausing *before* upgrading each upgrade may be useful for inspecting pods running on that node, or performing manual actions on the node: + +* `upgrade_node_confirm: true` - This will pause the playbook execution prior to upgrading each node. The play will resume when manually approved by typing "yes" at the terminal. +* `upgrade_node_pause_seconds: 60` - This will pause the playbook execution for 60 seconds prior to upgrading each node. The play will resume automatically after 60 seconds. + +Pausing *after* upgrading each node may be useful for rebooting the node to apply kernel updates, or testing the still-cordoned node: + +* `upgrade_node_post_upgrade_confirm: true` - This will pause the playbook execution after upgrading each node, but before the node is uncordoned. The play will resume when manually approved by typing "yes" at the terminal. +* `upgrade_node_post_upgrade_pause_seconds: 60` - This will pause the playbook execution for 60 seconds after upgrading each node, but before the node is uncordoned. The play will resume automatically after 60 seconds. ## Node-based upgrade diff --git a/roles/upgrade/post-upgrade/defaults/main.yml b/roles/upgrade/post-upgrade/defaults/main.yml index c3574aed4..aa7284358 100644 --- a/roles/upgrade/post-upgrade/defaults/main.yml +++ b/roles/upgrade/post-upgrade/defaults/main.yml @@ -1,3 +1,5 @@ --- # how long to wait for cilium after upgrade before uncordoning upgrade_post_cilium_wait_timeout: 120s +upgrade_node_post_upgrade_confirm: false +upgrade_node_post_upgrade_pause_seconds: 0 diff --git a/roles/upgrade/post-upgrade/tasks/main.yml b/roles/upgrade/post-upgrade/tasks/main.yml index f460d0863..d1b1af0be 100644 --- a/roles/upgrade/post-upgrade/tasks/main.yml +++ b/roles/upgrade/post-upgrade/tasks/main.yml @@ -11,6 +11,20 @@ --timeout={{ upgrade_post_cilium_wait_timeout }} delegate_to: "{{ groups['kube_control_plane'][0] }}" +- name: Confirm node uncordon + pause: + echo: yes + prompt: "Ready to uncordon node?" + when: + - upgrade_node_post_upgrade_confirm + +- name: Wait before uncordoning node + pause: + seconds: "{{ upgrade_node_post_upgrade_pause_seconds }}" + when: + - not upgrade_node_post_upgrade_confirm + - upgrade_node_post_upgrade_pause_seconds != 0 + - name: Uncordon node command: "{{ kubectl }} uncordon {{ kube_override_hostname|default(inventory_hostname) }}" delegate_to: "{{ groups['kube_control_plane'][0] }}"