diff --git a/docs/vsphere-csi.md b/docs/vsphere-csi.md index 24076efb0..d975f7749 100644 --- a/docs/vsphere-csi.md +++ b/docs/vsphere-csi.md @@ -34,6 +34,9 @@ You need to source the vSphere credentials you use to deploy your machines that | vsphere_csi_node_driver_registrar_image_tag | TRUE | string | | "v1.1.0" | CSI node driver registrat image tag to use | | vsphere_csi_driver_image_tag | TRUE | string | | "v1.0.2" | CSI driver image tag to use | | vsphere_csi_resizer_tag | TRUE | string | | "v1.1.0" | CSI resizer image tag to use +| vsphere_csi_aggressive_node_drain | FALSE | boolean | | false | Enable aggressive node drain strategy | +| vsphere_csi_aggressive_node_unreachable_timeout | FALSE | int | 300 | | Timeout till node will be drained when it in an unreachable state | +| vsphere_csi_aggressive_node_not_ready_timeout | FALSE | int | 300 | | Timeout till node will be drained when it in not-ready state | ## Usage example diff --git a/roles/kubernetes-apps/csi_driver/vsphere/defaults/main.yml b/roles/kubernetes-apps/csi_driver/vsphere/defaults/main.yml index cedd2eac9..612ad4384 100644 --- a/roles/kubernetes-apps/csi_driver/vsphere/defaults/main.yml +++ b/roles/kubernetes-apps/csi_driver/vsphere/defaults/main.yml @@ -16,3 +16,7 @@ vsphere_csi_resizer_tag: "v1.0.0" vsphere_csi_controller_replicas: 1 csi_endpoint: '{% if external_vsphere_version >= "7.0u1" %}/csi{% else %}/var/lib/csi/sockets/pluginproxy{% endif %}' + +vsphere_csi_aggressive_node_drain: False +vsphere_csi_aggressive_node_unreachable_timeout: 300 +vsphere_csi_aggressive_node_not_ready_timeout: 300 diff --git a/roles/kubernetes-apps/csi_driver/vsphere/tasks/main.yml b/roles/kubernetes-apps/csi_driver/vsphere/tasks/main.yml index c4e3755ef..2015b6326 100644 --- a/roles/kubernetes-apps/csi_driver/vsphere/tasks/main.yml +++ b/roles/kubernetes-apps/csi_driver/vsphere/tasks/main.yml @@ -23,6 +23,7 @@ dest: "{{ kube_config_dir }}/{{ item }}" with_items: - vsphere-csi-controller-rbac.yml + - vsphere-csi-node-rbac.yaml - "{{ controller_spec }}" - vsphere-csi-node.yml register: vsphere_csi_manifests diff --git a/roles/kubernetes-apps/csi_driver/vsphere/templates/vsphere-csi-controller-deployment.yml.j2 b/roles/kubernetes-apps/csi_driver/vsphere/templates/vsphere-csi-controller-deployment.yml.j2 index 83972fb8b..cc1681d23 100644 --- a/roles/kubernetes-apps/csi_driver/vsphere/templates/vsphere-csi-controller-deployment.yml.j2 +++ b/roles/kubernetes-apps/csi_driver/vsphere/templates/vsphere-csi-controller-deployment.yml.j2 @@ -28,6 +28,18 @@ spec: - operator: "Exists" key: node-role.kubernetes.io/control-plane effect: NoSchedule +{% if vsphere_csi_aggressive_node_drain %} + # set below toleration if you need an aggressive pod eviction in case when + # node becomes not-ready or unreachable. Default is 300 seconds if not specified. + - key: node.kubernetes.io/not-ready + operator: Exists + effect: NoExecute + tolerationSeconds: {{ vsphere_csi_aggressive_node_not_ready_timeout }} + - key: node.kubernetes.io/unreachable + operator: Exists + effect: NoExecute + tolerationSeconds: {{ vsphere_csi_aggressive_node_unreachable_timeout }} +{% endif %} dnsPolicy: "Default" containers: - name: csi-attacher @@ -51,6 +63,11 @@ spec: - "--timeout=300s" - "--csi-address=$(ADDRESS)" - "--leader-election" +{% if vsphere_csi_controller is version('v2.2.0', '>=') %} + - "--handle-volume-inuse-error=false" + - "--kube-api-qps=100" + - "--kube-api-burst=100" +{% endif %} env: - name: ADDRESS value: /csi/csi.sock @@ -97,6 +114,9 @@ spec: - name: healthz containerPort: 9808 protocol: TCP + - name: prometheus + containerPort: 2112 + protocol: TCP livenessProbe: httpGet: path: /healthz @@ -125,6 +145,10 @@ spec: - "--fss-namespace=$(CSI_NAMESPACE)" {% endif %} imagePullPolicy: {{ k8s_image_pull_policy }} + ports: + - containerPort: 2113 + name: prometheus + protocol: TCP env: - name: FULL_SYNC_INTERVAL_MINUTES value: "30" @@ -154,6 +178,10 @@ spec: - "--csi-address=$(ADDRESS)" - "--leader-election" - "--default-fstype=ext4" +{% if vsphere_csi_controller is version('v2.2.0', '>=') %} + - "--kube-api-qps=100" + - "--kube-api-burst=100" +{% endif %} # needed only for topology aware setup #- "--feature-gates=Topology=true" #- "--strict-topology" @@ -173,6 +201,8 @@ spec: apiVersion: v1 data: "csi-migration": "false" # csi-migration feature is only available for vSphere 7.0U1 + "csi-auth-check": "true" + "online-volume-extend": "true" kind: ConfigMap metadata: name: internal-feature-states.csi.vsphere.vmware.com @@ -185,4 +215,24 @@ metadata: spec: attachRequired: true podInfoOnMount: false +--- +apiVersion: v1 +kind: Service +metadata: + name: vsphere-csi-controller + namespace: kube-system + labels: + app: vsphere-csi-controller +spec: + ports: + - name: ctlr + port: 2112 + targetPort: 2112 + protocol: TCP + - name: syncer + port: 2113 + targetPort: 2113 + protocol: TCP + selector: + app: vsphere-csi-controller diff --git a/roles/kubernetes-apps/csi_driver/vsphere/templates/vsphere-csi-node-rbac.yaml.j2 b/roles/kubernetes-apps/csi_driver/vsphere/templates/vsphere-csi-node-rbac.yaml.j2 new file mode 100644 index 000000000..34aa1ed6f --- /dev/null +++ b/roles/kubernetes-apps/csi_driver/vsphere/templates/vsphere-csi-node-rbac.yaml.j2 @@ -0,0 +1,30 @@ +--- +kind: ServiceAccount +apiVersion: v1 +metadata: + name: vsphere-csi-node + namespace: kube-system +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: vsphere-csi-node-role + namespace: kube-system +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "watch"] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: vsphere-csi-node-binding + namespace: kube-system +subjects: + - kind: ServiceAccount + name: vsphere-csi-node + namespace: kube-system +roleRef: + kind: Role + name: vsphere-csi-node-role + apiGroup: rbac.authorization.k8s.io diff --git a/roles/kubernetes-apps/csi_driver/vsphere/templates/vsphere-csi-node.yml.j2 b/roles/kubernetes-apps/csi_driver/vsphere/templates/vsphere-csi-node.yml.j2 index 779f9e555..9c61a0907 100644 --- a/roles/kubernetes-apps/csi_driver/vsphere/templates/vsphere-csi-node.yml.j2 +++ b/roles/kubernetes-apps/csi_driver/vsphere/templates/vsphere-csi-node.yml.j2 @@ -39,8 +39,10 @@ spec: value: /csi/csi.sock - name: DRIVER_REG_SOCK_PATH value: /var/lib/kubelet/plugins/csi.vsphere.vmware.com/csi.sock +{% if vsphere_csi_controller is version('v2.2.0', '<') %} securityContext: privileged: true +{% endif %} volumeMounts: - name: plugin-dir mountPath: /csi @@ -109,6 +111,12 @@ spec: mountPropagation: "Bidirectional" - name: device-dir mountPath: /dev +{% if vsphere_csi_controller is version('v2.2.0', '>=') %} + - name: blocks-dir + mountPath: /sys/block + - name: sys-devices-dir + mountPath: /sys/devices +{% endif %} ports: - containerPort: 9808 name: healthz @@ -150,6 +158,16 @@ spec: - name: device-dir hostPath: path: /dev +{% if vsphere_csi_controller is version('v2.2.0', '>=') %} + - name: blocks-dir + hostPath: + path: /sys/block + type: Directory + - name: sys-devices-dir + hostPath: + path: /sys/devices + type: Directory +{% endif %} tolerations: - effect: NoExecute operator: Exists