Compare commits

...

5 commits

Author SHA1 Message Date
rtsp
c91a05f330 debian: Fix test failed after bullseye release (#7888)
(cherry picked from commit 79166496f3)
2021-10-29 07:46:51 -07:00
Utku Ozdemir
a583a2d9aa Implement drain fallback with --disable-eviction to ignore PDBs
Signed-off-by: Utku Ozdemir <uoz@protonmail.com>
2021-10-29 07:46:51 -07:00
Vitaliy D
713abf29ca
Update vSphere CPI (#7840)
Backport of #7838

Changes:
  * ClusterRole updated according to the latest manifests from
    https://github.com/kubernetes/cloud-provider-vsphere
  * vSphere CPI/CSI default versions bumped and
    tested successfully on K8S 1.21.1
  * vSphere documentation updated

Signed-off-by: Vitaliy D <vi7alya@gmail.com>
2021-07-30 06:03:37 -07:00
Kenichi Omichi
247d062c02
[2.16] Fix how to get image ID on offline deployment (#7829)
* Add error handling for registorying images (#7787)

When running the script, I faced the following error but it was
difficult to know the root problem due to lack of error handling.

  docker tag" requires exactly 2 arguments.
  See 'docker tag --help'.

  Usage:  docker tag SOURCE_IMAGE[:TAG] TARGET_IMAGE[:TAG]

  Create a tag TARGET_IMAGE that refers to SOURCE_IMAGE

To investigate such errors easily, this adds an error handling.

* Fix how to get image ID on offline deployment (#7808)

Previously IDs of container images were gotten from tar files of container
images but that way was wrong. If multiple json files are contained in a
tar file, the script got multiple IDs and tried to pass these IDs on
`docker tag` command. Then the command was failed.

This updates the script to get image IDs from `docker image inspect` command
to fix this issue.
In addition, this adds a check a registry container exists already or not
before deploying registry container to avoid a container conflict failure.
2021-07-28 00:01:35 -07:00
Kenichi Omichi
9fa051780e
[2.16] Disable OVH CI until voucher situation is cleared up (#7824) (#7831)
* Disable OVH CI until  voucher situation is cleared up (#7824)

* Allow failure on tf-elax_ubuntu18-calico (#7814)

tf-elax_ubuntu18-calico is so flake today. The test job is failed
due to SSH connectivity check error after deploying virtual machines
which are used for Kubernetes nodes.
This allows failure on the job to see the test situation without
pull request merger failures.

Co-authored-by: Maxime Guyot <Miouge1@users.noreply.github.com>
2021-07-27 06:16:45 -07:00
8 changed files with 181 additions and 75 deletions

View file

@ -207,6 +207,7 @@ tf-elastx_ubuntu18-calico:
extends: .terraform_apply
stage: deploy-part3
when: on_success
allow_failure: true
variables:
<<: *elastx_variables
TF_VERSION: $TERRAFORM_14_VERSION
@ -235,44 +236,45 @@ tf-elastx_ubuntu18-calico:
TF_VAR_image: ubuntu-18.04-server-latest
TF_VAR_k8s_allowed_remote_ips: '["0.0.0.0/0"]'
# OVH voucher expired, commenting job until things are sorted out
tf-ovh_cleanup:
stage: unit-tests
tags: [light]
image: python
environment: ovh
variables:
<<: *ovh_variables
before_script:
- pip install -r scripts/openstack-cleanup/requirements.txt
script:
- ./scripts/openstack-cleanup/main.py
# tf-ovh_cleanup:
# stage: unit-tests
# tags: [light]
# image: python
# environment: ovh
# variables:
# <<: *ovh_variables
# before_script:
# - pip install -r scripts/openstack-cleanup/requirements.txt
# script:
# - ./scripts/openstack-cleanup/main.py
tf-ovh_ubuntu18-calico:
extends: .terraform_apply
when: on_success
environment: ovh
variables:
<<: *ovh_variables
TF_VERSION: $TERRAFORM_14_VERSION
PROVIDER: openstack
CLUSTER: $CI_COMMIT_REF_NAME
ANSIBLE_TIMEOUT: "60"
SSH_USER: ubuntu
TF_VAR_number_of_k8s_masters: "0"
TF_VAR_number_of_k8s_masters_no_floating_ip: "1"
TF_VAR_number_of_k8s_masters_no_floating_ip_no_etcd: "0"
TF_VAR_number_of_etcd: "0"
TF_VAR_number_of_k8s_nodes: "0"
TF_VAR_number_of_k8s_nodes_no_floating_ip: "1"
TF_VAR_number_of_gfs_nodes_no_floating_ip: "0"
TF_VAR_number_of_bastions: "0"
TF_VAR_number_of_k8s_masters_no_etcd: "0"
TF_VAR_use_neutron: "0"
TF_VAR_floatingip_pool: "Ext-Net"
TF_VAR_external_net: "6011fbc9-4cbf-46a4-8452-6890a340b60b"
TF_VAR_network_name: "Ext-Net"
TF_VAR_flavor_k8s_master: "defa64c3-bd46-43b4-858a-d93bbae0a229" # s1-8
TF_VAR_flavor_k8s_node: "defa64c3-bd46-43b4-858a-d93bbae0a229" # s1-8
TF_VAR_image: "Ubuntu 18.04"
TF_VAR_k8s_allowed_remote_ips: '["0.0.0.0/0"]'
# tf-ovh_ubuntu18-calico:
# extends: .terraform_apply
# when: on_success
# environment: ovh
# variables:
# <<: *ovh_variables
# TF_VERSION: $TERRAFORM_14_VERSION
# PROVIDER: openstack
# CLUSTER: $CI_COMMIT_REF_NAME
# ANSIBLE_TIMEOUT: "60"
# SSH_USER: ubuntu
# TF_VAR_number_of_k8s_masters: "0"
# TF_VAR_number_of_k8s_masters_no_floating_ip: "1"
# TF_VAR_number_of_k8s_masters_no_floating_ip_no_etcd: "0"
# TF_VAR_number_of_etcd: "0"
# TF_VAR_number_of_k8s_nodes: "0"
# TF_VAR_number_of_k8s_nodes_no_floating_ip: "1"
# TF_VAR_number_of_gfs_nodes_no_floating_ip: "0"
# TF_VAR_number_of_bastions: "0"
# TF_VAR_number_of_k8s_masters_no_etcd: "0"
# TF_VAR_use_neutron: "0"
# TF_VAR_floatingip_pool: "Ext-Net"
# TF_VAR_external_net: "6011fbc9-4cbf-46a4-8452-6890a340b60b"
# TF_VAR_network_name: "Ext-Net"
# TF_VAR_flavor_k8s_master: "defa64c3-bd46-43b4-858a-d93bbae0a229" # s1-8
# TF_VAR_flavor_k8s_node: "defa64c3-bd46-43b4-858a-d93bbae0a229" # s1-8
# TF_VAR_image: "Ubuntu 18.04"
# TF_VAR_k8s_allowed_remote_ips: '["0.0.0.0/0"]'

View file

@ -100,15 +100,35 @@ function register_container_images() {
tar -zxvf ${IMAGE_TAR_FILE}
sudo docker load -i ${IMAGE_DIR}/registry-latest.tar
sudo docker run --restart=always -d -p 5000:5000 --name registry registry:latest
set +e
sudo docker container inspect registry >/dev/null 2>&1
if [ $? -ne 0 ]; then
sudo docker run --restart=always -d -p 5000:5000 --name registry registry:latest
fi
set -e
while read -r line; do
file_name=$(echo ${line} | awk '{print $1}')
org_image=$(echo ${line} | awk '{print $2}')
new_image="${LOCALHOST_NAME}:5000/${org_image}"
image_id=$(tar -tf ${IMAGE_DIR}/${file_name} | grep "\.json" | grep -v manifest.json | sed s/"\.json"//)
raw_image=$(echo ${line} | awk '{print $2}')
new_image="${LOCALHOST_NAME}:5000/${raw_image}"
org_image=$(sudo docker load -i ${IMAGE_DIR}/${file_name} | head -n1 | awk '{print $3}')
image_id=$(sudo docker image inspect ${org_image} | grep "\"Id\":" | awk -F: '{print $3}'| sed s/'\",'//)
if [ -z "${file_name}" ]; then
echo "Failed to get file_name for line ${line}"
exit 1
fi
if [ -z "${raw_image}" ]; then
echo "Failed to get raw_image for line ${line}"
exit 1
fi
if [ -z "${org_image}" ]; then
echo "Failed to get org_image for line ${line}"
exit 1
fi
if [ -z "${image_id}" ]; then
echo "Failed to get image_id for file ${file_name}"
exit 1
fi
sudo docker load -i ${IMAGE_DIR}/${file_name}
sudo docker tag ${image_id} ${new_image}
sudo docker push ${new_image}

View file

@ -2,32 +2,38 @@
vSphere CSI driver allows you to provision volumes over a vSphere deployment. The Kubernetes historic in-tree cloud provider is deprecated and will be removed in future versions.
## Prerequisites
The vSphere user for CSI driver requires a set of privileges to perform Cloud Native Storage operations. Follow the [official guide](https://vsphere-csi-driver.sigs.k8s.io/driver-deployment/prerequisites.html#roles_and_privileges) to configure those.
## Kubespray configuration
To enable vSphere CSI driver, uncomment the `vsphere_csi_enabled` option in `group_vars/all/vsphere.yml` and set it to `true`.
To set the number of replicas for the vSphere CSI controller, you can change `vsphere_csi_controller_replicas` option in `group_vars/all/vsphere.yml`.
You need to source the vSphere credentials you use to deploy your machines that will host Kubernetes.
| Variable | Required | Type | Choices | Default | Comment |
|---------------------------------------------|----------|---------|----------------------------|---------------------------|----------------------------------------------------------------|
| external_vsphere_vcenter_ip | TRUE | string | | | IP/URL of the vCenter |
| external_vsphere_vcenter_port | TRUE | string | | "443" | Port of the vCenter API |
| external_vsphere_insecure | TRUE | string | "true", "false" | "true" | set to "true" if the host above uses a self-signed cert |
| external_vsphere_user | TRUE | string | | | User name for vCenter with required privileges |
| external_vsphere_password | TRUE | string | | | Password for vCenter |
| external_vsphere_datacenter | TRUE | string | | | Datacenter name to use |
| external_vsphere_kubernetes_cluster_id | TRUE | string | | "kubernetes-cluster-id" | Kubernetes cluster ID to use |
| external_vsphere_version | TRUE | string | | "6.7u3" | Vmware Vsphere version where located all VMs |
| vsphere_cloud_controller_image_tag | TRUE | string | | "latest" | Kubernetes cluster ID to use |
| vsphere_syncer_image_tag | TRUE | string | | "v1.0.2" | Syncer image tag to use |
| vsphere_csi_attacher_image_tag | TRUE | string | | "v1.1.1" | CSI attacher image tag to use |
| vsphere_csi_controller | TRUE | string | | "v1.0.2" | CSI controller image tag to use |
| vsphere_csi_controller_replicas | TRUE | integer | | 1 | Number of pods Kubernetes should deploy for the CSI controller |
| vsphere_csi_liveness_probe_image_tag | TRUE | string | | "v1.1.0" | CSI liveness probe image tag to use |
| vsphere_csi_provisioner_image_tag | TRUE | string | | "v1.2.2" | CSI provisioner image tag to use |
| vsphere_csi_node_driver_registrar_image_tag | TRUE | string | | "v1.1.0" | CSI node driver registrat image tag to use |
| vsphere_csi_driver_image_tag | TRUE | string | | "v1.0.2" | CSI driver image tag to use |
vsphere_csi_resizer_tag | TRUE | string | | "v1.0.0" | CSI resizer image tag to use
| Variable | Required | Type | Choices | Default | Comment |
|---------------------------------------------|----------|---------|----------------------------|---------------------------|---------------------------------------------------------------------------------------------------------------------|
| external_vsphere_vcenter_ip | TRUE | string | | | IP/URL of the vCenter |
| external_vsphere_vcenter_port | TRUE | string | | "443" | Port of the vCenter API |
| external_vsphere_insecure | TRUE | string | "true", "false" | "true" | set to "true" if the host above uses a self-signed cert |
| external_vsphere_user | TRUE | string | | | User name for vCenter with required privileges |
| external_vsphere_password | TRUE | string | | | Password for vCenter |
| external_vsphere_datacenter | TRUE | string | | | Datacenter name to use |
| external_vsphere_kubernetes_cluster_id | TRUE | string | | "kubernetes-cluster-id" | Kubernetes cluster ID to use |
| external_vsphere_version | TRUE | string | | "6.7u3" | Vmware Vsphere version where located all VMs |
| external_vsphere_cloud_controller_image_tag | TRUE | string | | "latest" | Kubernetes cluster ID to use |
| vsphere_syncer_image_tag | TRUE | string | | "v2.2.1" | Syncer image tag to use |
| vsphere_csi_attacher_image_tag | TRUE | string | | "v3.1.0" | CSI attacher image tag to use |
| vsphere_csi_controller | TRUE | string | | "v2.2.1" | CSI controller image tag to use |
| vsphere_csi_controller_replicas | TRUE | integer | | 1 | Number of pods Kubernetes should deploy for the CSI controller |
| vsphere_csi_liveness_probe_image_tag | TRUE | string | | "v2.2.0" | CSI liveness probe image tag to use |
| vsphere_csi_provisioner_image_tag | TRUE | string | | "v2.1.0" | CSI provisioner image tag to use |
| vsphere_csi_node_driver_registrar_image_tag | TRUE | string | | "v1.1.0" | CSI node driver registrat image tag to use |
| vsphere_csi_driver_image_tag | TRUE | string | | "v1.0.2" | CSI driver image tag to use |
| vsphere_csi_resizer_tag | TRUE | string | | "v1.1.0" | CSI resizer image tag to use
## Usage example
@ -61,7 +67,7 @@ spec:
- containerPort: 80
protocol: TCP
volumeMounts:
- mountPath: /var/lib/www/html
- mountPath: /usr/share/nginx/html
name: csi-data-vsphere
volumes:
- name: csi-data-vsphere
@ -83,8 +89,8 @@ csi-pvc-vsphere Bound pvc-dc7b1d21-ee41-45e1-98d9-e877cc1533ac 1Gi
And the volume mounted to the Nginx Pod (wait until the Pod is Running):
```ShellSession
kubectl exec -it nginx -- df -h | grep /var/lib/www/html
/dev/sdb 976M 2.6M 907M 1% /var/lib/www/html
kubectl exec -it nginx -- df -h | grep /usr/share/nginx/html
/dev/sdb 976M 2.6M 907M 1% /usr/share/nginx/html
```
## More info

View file

@ -11,13 +11,21 @@
# external_vsphere_version: "6.7u3"
## Tags for the external vSphere Cloud Provider images
## gcr.io/cloud-provider-vsphere/cpi/release/manager
# external_vsphere_cloud_controller_image_tag: "latest"
# vsphere_syncer_image_tag: "v1.0.2"
# vsphere_csi_attacher_image_tag: "v1.1.1"
# vsphere_csi_controller: "v1.0.2"
# vsphere_csi_liveness_probe_image_tag: "v1.1.0"
# vsphere_csi_provisioner_image_tag: "v1.2.2"
# vsphere_csi_resizer_tag: "v1.0.0"
## gcr.io/cloud-provider-vsphere/csi/release/syncer
# vsphere_syncer_image_tag: "v2.2.1"
## quay.io/k8scsi/csi-attacher
# vsphere_csi_attacher_image_tag: "v3.1.0"
## gcr.io/cloud-provider-vsphere/csi/release/driver
# vsphere_csi_controller: "v2.2.1"
## quay.io/k8scsi/livenessprobe
# vsphere_csi_liveness_probe_image_tag: "v2.2.0"
## quay.io/k8scsi/csi-provisioner
# vsphere_csi_provisioner_image_tag: "v2.1.0"
## quay.io/k8scsi/csi-resizer
## makes sense only for vSphere version >=7.0
# vsphere_csi_resizer_tag: "v1.1.0"
## To use vSphere CSI plugin to provision volumes set this value to true
# vsphere_csi_enabled: true

View file

@ -51,6 +51,21 @@
when:
- need_bootstrap.rc != 0
- name: Update Apt cache
raw: apt-get update --allow-releaseinfo-change
become: true
when:
- '''ID=debian'' in os_release.stdout_lines'
- (
'''VERSION="10'' in os_release.stdout_lines' or
'''VERSION="11'' in os_release.stdout_lines'
)
register: bootstrap_update_apt_result
changed_when:
- '"changed its" in bootstrap_update_apt_result.stdout'
- '"value from" in bootstrap_update_apt_result.stdout'
ignore_errors: true
- name: Set the ansible_python_interpreter fact
set_fact:
ansible_python_interpreter: "/usr/bin/python3"

View file

@ -34,6 +34,12 @@ items:
- patch
- update
- watch
- apiGroups:
- ""
resources:
- services/status
verbs:
- patch
- apiGroups:
- ""
resources:
@ -71,5 +77,15 @@ items:
- get
- list
- watch
- apiGroups:
- "coordination.k8s.io"
resources:
- leases
verbs:
- get
- list
- watch
- create
- update
kind: List
metadata: {}

View file

@ -6,6 +6,12 @@ drain_nodes: true
drain_retries: 3
drain_retry_delay_seconds: 10
drain_fallback_enabled: false
drain_fallback_grace_period: 300
drain_fallback_timeout: 360s
drain_fallback_retries: 0
drain_fallback_retry_delay_seconds: 10
upgrade_node_uncordon_after_drain_failure: true
upgrade_node_fail_if_drain_fails: true

View file

@ -73,15 +73,48 @@
{{ bin_dir }}/kubectl drain
--force
--ignore-daemonsets
--grace-period {{ drain_grace_period }}
--timeout {{ drain_timeout }}
--grace-period {{ hostvars['localhost']['drain_grace_period_after_failure'] | default(drain_grace_period) }}
--timeout {{ hostvars['localhost']['drain_timeout_after_failure'] | default(drain_timeout) }}
--delete-local-data {{ kube_override_hostname|default(inventory_hostname) }}
{% if drain_pod_selector %}--pod-selector '{{ drain_pod_selector }}'{% endif %}
when: drain_nodes
register: result
failed_when:
- result.rc != 0
- not drain_fallback_enabled
until: result.rc == 0
retries: "{{ drain_retries }}"
delay: "{{ drain_retry_delay_seconds }}"
- name: Drain fallback
block:
- name: Set facts after regular drain has failed
set_fact:
drain_grace_period_after_failure: "{{ drain_fallback_grace_period }}"
drain_timeout_after_failure: "{{ drain_fallback_timeout }}"
delegate_to: localhost
delegate_facts: yes
run_once: yes
- name: Drain node - fallback with disabled eviction
command: >-
{{ bin_dir }}/kubectl drain
--force
--ignore-daemonsets
--grace-period {{ drain_fallback_grace_period }}
--timeout {{ drain_fallback_timeout }}
--delete-local-data {{ kube_override_hostname|default(inventory_hostname) }}
{% if drain_pod_selector %}--pod-selector '{{ drain_pod_selector }}'{% endif %}
--disable-eviction
register: drain_fallback_result
until: drain_fallback_result.rc == 0
retries: "{{ drain_fallback_retries }}"
delay: "{{ drain_fallback_retry_delay_seconds }}"
when:
- drain_nodes
- drain_fallback_enabled
- result.rc != 0
rescue:
- name: Set node back to schedulable
command: "{{ bin_dir }}/kubectl --kubeconfig /etc/kubernetes/admin.conf uncordon {{ inventory_hostname }}"