From dec11c73387a5dee71e4acaea301228f6aaf3f9e Mon Sep 17 00:00:00 2001 From: shelmingsong Date: Tue, 23 Aug 2022 10:32:49 +0800 Subject: [PATCH] optimize cgroups settings for node reserved --- .../containerd/templates/containerd.service.j2 | 3 +++ roles/kubernetes/node/defaults/main.yml | 10 ++++++++-- .../templates/kubelet-config.v1beta1.yaml.j2 | 6 +++++- .../node/templates/kubelet.service.j2 | 18 ++++++++++++++++++ 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/roles/container-engine/containerd/templates/containerd.service.j2 b/roles/container-engine/containerd/templates/containerd.service.j2 index 09f9a3b2a..0107dc822 100644 --- a/roles/container-engine/containerd/templates/containerd.service.j2 +++ b/roles/container-engine/containerd/templates/containerd.service.j2 @@ -35,6 +35,9 @@ LimitNOFILE=infinity # Only systemd 226 and above support this version. TasksMax=infinity OOMScoreAdjust=-999 +{% if kube_reserved|bool %} +Slice={{ kube_reserved_cgroups_for_service_slice }} +{% endif %} [Install] WantedBy=multi-user.target diff --git a/roles/kubernetes/node/defaults/main.yml b/roles/kubernetes/node/defaults/main.yml index f1657c0e6..82c416b03 100644 --- a/roles/kubernetes/node/defaults/main.yml +++ b/roles/kubernetes/node/defaults/main.yml @@ -12,8 +12,8 @@ kube_resolv_conf: "/etc/resolv.conf" kubelet_enforce_node_allocatable: "\"\"" # Set runtime and kubelet cgroups when using systemd as cgroup driver (default) -kubelet_runtime_cgroups: "/systemd/system.slice" -kubelet_kubelet_cgroups: "/systemd/system.slice" +kubelet_runtime_cgroups: "{{ kube_reserved_cgroups }}/containerd.service" +kubelet_kubelet_cgroups: "{{ kube_reserved_cgroups }}/kubelet.service" # Set runtime and kubelet cgroups when using cgroupfs as cgroup driver kubelet_runtime_cgroups_cgroupfs: "/system.slice/containerd.service" @@ -23,6 +23,10 @@ kubelet_kubelet_cgroups_cgroupfs: "/system.slice/kubelet.service" kubelet_fail_swap_on: true # Reserve this space for kube resources +# Set to true to reserve resources for kube daemons +kube_reserved: false +kube_reserved_cgroups_for_service_slice: kube.slice +kube_reserved_cgroups: "/{{ kube_reserved_cgroups_for_service_slice }}" kube_memory_reserved: 256Mi kube_cpu_reserved: 100m # kube_ephemeral_storage_reserved: 2Gi @@ -35,6 +39,8 @@ kube_master_cpu_reserved: 200m # Set to true to reserve resources for system daemons system_reserved: false +system_reserved_cgroups_for_service_slice: system.slice +system_reserved_cgroups: "/{{ system_reserved_cgroups_for_service_slice }}" system_memory_reserved: 512Mi system_cpu_reserved: 500m # system_ephemeral_storage_reserved: 2Gi diff --git a/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 b/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 index 9982f62aa..885fc2ed7 100644 --- a/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 +++ b/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 @@ -60,6 +60,8 @@ clusterDNS: - {{ dns_address }} {% endfor %} {# Node reserved CPU/memory #} +{% if kube_reserved|bool %} +kubeReservedCgroup: {{ kube_reserved_cgroups }} kubeReserved: {% if is_kube_master|bool %} cpu: {{ kube_master_cpu_reserved }} @@ -80,7 +82,9 @@ kubeReserved: pid: "{{ kube_pid_reserved }}" {% endif %} {% endif %} -{% if system_reserved is defined and system_reserved %} +{% endif %} +{% if system_reserved|bool %} +systemReservedCgroup: {{ system_reserved_cgroups }} systemReserved: {% if is_kube_master|bool %} cpu: {{ system_master_cpu_reserved }} diff --git a/roles/kubernetes/node/templates/kubelet.service.j2 b/roles/kubernetes/node/templates/kubelet.service.j2 index 38360c8df..7513d7e92 100644 --- a/roles/kubernetes/node/templates/kubelet.service.j2 +++ b/roles/kubernetes/node/templates/kubelet.service.j2 @@ -10,6 +10,24 @@ Wants={{ container_manager }}.service [Service] EnvironmentFile=-{{ kube_config_dir }}/kubelet.env +{% if system_reserved|bool %} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpu/{{ system_reserved_cgroups_for_service_slice }} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuacct/{{ system_reserved_cgroups_for_service_slice }} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuset/{{ system_reserved_cgroups_for_service_slice }} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/hugetlb/{{ system_reserved_cgroups_for_service_slice }} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/memory/{{ system_reserved_cgroups_for_service_slice }} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/pids/{{ system_reserved_cgroups_for_service_slice }} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/systemd/{{ system_reserved_cgroups_for_service_slice }} +{% endif %} +{% if kube_reserved|bool %} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpu/{{ kube_reserved_cgroups_for_service_slice }} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuacct/{{ kube_reserved_cgroups_for_service_slice }} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuset/{{ kube_reserved_cgroups_for_service_slice }} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/hugetlb/{{ kube_reserved_cgroups_for_service_slice }} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/memory/{{ kube_reserved_cgroups_for_service_slice }} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/pids/{{ kube_reserved_cgroups_for_service_slice }} +ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/systemd/{{ kube_reserved_cgroups_for_service_slice }} +{% endif %} ExecStart={{ bin_dir }}/kubelet \ $KUBE_LOGTOSTDERR \ $KUBE_LOG_LEVEL \