[containerd] Simplify limiting number of open files per container (#9319)

by setting a default runtime spec with a patch for RLIMIT_NOFILE.

- Introduces containerd_base_runtime_spec_rlimit_nofile.
- Generates base_runtime_spec on-the-fly, to use the containerd version
  of the node.
This commit is contained in:
Ilya Margolin 2022-11-08 15:44:32 +01:00 committed by GitHub
parent 5c25b57989
commit 5a8cf824f6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 31 additions and 223 deletions

View file

@ -64,14 +64,17 @@ is a list of such dictionaries.
Default runtime can be changed by setting `containerd_default_runtime`.
#### base_runtime_spec
#### Base runtime specs and limiting number of open files
`base_runtime_spec` key in a runtime dictionary can be used to explicitly
specify a runtime spec json file. We ship the default one which is generated
with `ctr oci spec > /etc/containerd/cri-base.json`. It will be used if you set
`base_runtime_spec: cri-base.json`. The main advantage of doing so is the presence of
`rlimits` section in this configuration, which will restrict the maximum number
of file descriptors(open files) per container to 1024.
`base_runtime_spec` key in a runtime dictionary is used to explicitly
specify a runtime spec json file. `runc` runtime has it set to `cri-base.json`,
which is generated with `ctr oci spec > /etc/containerd/cri-base.json` and
updated to include a custom setting for maximum number of file descriptors per
container.
You can change maximum number of file descriptors per container for the default
`runc` runtime by setting the `containerd_base_runtime_spec_rlimit_nofile`
variable.
You can tune many more [settings][runtime-spec] by supplying your own file name and content with `containerd_base_runtime_specs`:

View file

@ -15,7 +15,7 @@ containerd_runc_runtime:
type: "io.containerd.runc.v2"
engine: ""
root: ""
# base_runtime_spec: cri-base.json # use this to limit number of file descriptors per container
base_runtime_spec: cri-base.json
options:
systemdCgroup: "{{ containerd_use_systemd_cgroup | ternary('true', 'false') }}"
@ -26,8 +26,17 @@ containerd_additional_runtimes: []
# engine: ""
# root: ""
containerd_base_runtime_spec_rlimit_nofile: 16384
containerd_default_base_runtime_spec_patch:
process:
rlimits:
- type: RLIMIT_NOFILE
hard: "{{ containerd_base_runtime_spec_rlimit_nofile }}"
soft: "{{ containerd_base_runtime_spec_rlimit_nofile }}"
containerd_base_runtime_specs:
cri-base.json: "{{ lookup('file', 'cri-base.json') }}"
cri-base.json: "{{ containerd_default_base_runtime_spec | combine(containerd_default_base_runtime_spec_patch,recursive=1) }}"
containerd_grpc_max_recv_message_size: 16777216
containerd_grpc_max_send_message_size: 16777216

View file

@ -1,214 +0,0 @@
{
"ociVersion": "1.0.2-dev",
"process": {
"user": {
"uid": 0,
"gid": 0
},
"cwd": "/",
"capabilities": {
"bounding": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE"
],
"effective": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE"
],
"inheritable": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE"
],
"permitted": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE"
]
},
"rlimits": [
{
"type": "RLIMIT_NOFILE",
"hard": 1024,
"soft": 1024
}
],
"noNewPrivileges": true
},
"root": {
"path": "rootfs"
},
"mounts": [
{
"destination": "/proc",
"type": "proc",
"source": "proc",
"options": [
"nosuid",
"noexec",
"nodev"
]
},
{
"destination": "/dev",
"type": "tmpfs",
"source": "tmpfs",
"options": [
"nosuid",
"strictatime",
"mode=755",
"size=65536k"
]
},
{
"destination": "/dev/pts",
"type": "devpts",
"source": "devpts",
"options": [
"nosuid",
"noexec",
"newinstance",
"ptmxmode=0666",
"mode=0620",
"gid=5"
]
},
{
"destination": "/dev/shm",
"type": "tmpfs",
"source": "shm",
"options": [
"nosuid",
"noexec",
"nodev",
"mode=1777",
"size=65536k"
]
},
{
"destination": "/dev/mqueue",
"type": "mqueue",
"source": "mqueue",
"options": [
"nosuid",
"noexec",
"nodev"
]
},
{
"destination": "/sys",
"type": "sysfs",
"source": "sysfs",
"options": [
"nosuid",
"noexec",
"nodev",
"ro"
]
},
{
"destination": "/run",
"type": "tmpfs",
"source": "tmpfs",
"options": [
"nosuid",
"strictatime",
"mode=755",
"size=65536k"
]
}
],
"linux": {
"resources": {
"devices": [
{
"allow": false,
"access": "rwm"
}
]
},
"cgroupsPath": "/default",
"namespaces": [
{
"type": "pid"
},
{
"type": "ipc"
},
{
"type": "uts"
},
{
"type": "mount"
},
{
"type": "network"
}
],
"maskedPaths": [
"/proc/acpi",
"/proc/asound",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/sys/firmware",
"/proc/scsi"
],
"readonlyPaths": [
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
]
}
}

View file

@ -84,6 +84,16 @@
notify: restart containerd
when: http_proxy is defined or https_proxy is defined
- name: containerd | Generate default base_runtime_spec
register: ctr_oci_spec
command: "{{ containerd_bin_dir }}/ctr oci spec"
check_mode: false
changed_when: false
- name: containerd | Store generated default base_runtime_spec
set_fact:
containerd_default_base_runtime_spec: "{{ ctr_oci_spec.stdout | from_json }}"
- name: containerd | Write base_runtime_specs
copy:
content: "{{ item.value }}"