From 57c949588e887cd7cf77733d9493bf7b4af6595f Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Wed, 26 Mar 2025 17:11:37 +0200 Subject: [PATCH] Add support for Linux memory policy Enable setting a NUMA memory policy for the container. New linux.memoryPolicy object contains inputs to the set_mempolicy(2) syscall. Signed-off-by: Antti Kervinen --- config-linux.md | 40 +++++++++++++++++++++++++++++++++++ features-linux.md | 31 +++++++++++++++++++++++++++ features.md | 16 ++++++++++++++ schema/config-linux.json | 17 +++++++++++++++ schema/defs-linux.json | 20 ++++++++++++++++++ specs-go/config.go | 35 ++++++++++++++++++++++++++++++ specs-go/features/features.go | 9 ++++++++ 7 files changed, 168 insertions(+) diff --git a/config-linux.md b/config-linux.md index 504f6c203..a8cc65cc8 100644 --- a/config-linux.md +++ b/config-linux.md @@ -791,6 +791,45 @@ and may use a maximum memory bandwidth of 20% on socket 0 and 70% on socket 1. } ``` +## Memory policy + +**`memoryPolicy`** (object, OPTIONAL) sets the NUMA memory policy for the container. +For more information see the [set_mempolicy(2)][set_mempolicy.2] man page. + +* **`mode`** *(string, REQUIRED)* - + + A valid list of constants is shown below. + + * `MPOL_DEFAULT` + * `MPOL_BIND` + * `MPOL_INTERLEAVE` + * `MPOL_WEIGHTED_INTERLEAVE` + * `MPOL_PREFERRED` + * `MPOL_PREFERRED_MANY` + * `MPOL_LOCAL` + +* **`nodes`** *(string, REQUIRED)* - list of memory nodes from which nodemask is constructed to set_mempolicy(2). This is a comma-separated list, with dashes to represent ranges. For example, `0-3,7` represents memory nodes 0,1,2,3, and 7. + +* **`flags`** *(array of strings, OPTIONAL)* - list of flags to use with set_mempolicy(2). + + A valid list of constants is shown below. + + * `MPOL_F_NUMA_BALANCING` + * `MPOL_F_RELATIVE_NODES` + * `MPOL_F_STATIC_NODES` + +### Example + +```json +"linux": { + "memoryPolicy": { + "mode": "MPOL_INTERLEAVE", + "nodes": "2-3" + "flags": ["MPOL_F_STATIC_NODES"], + } +} +``` + ## Sysctl **`sysctl`** (object, OPTIONAL) allows kernel parameters to be modified at runtime for the container. @@ -1073,6 +1112,7 @@ subset of the available options. [tmpfs]: https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt [full.4]: https://man7.org/linux/man-pages/man4/full.4.html +[set_mempolicy.2]: https://man7.org/linux/man-pages/man2/set_mempolicy.2.html [mknod.1]: https://man7.org/linux/man-pages/man1/mknod.1.html [mknod.2]: https://man7.org/linux/man-pages/man2/mknod.2.html [namespaces.7_2]: https://man7.org/linux/man-pages/man7/namespaces.7.html diff --git a/features-linux.md b/features-linux.md index a3488e5a7..96b369281 100644 --- a/features-linux.md +++ b/features-linux.md @@ -195,6 +195,37 @@ Irrelevant to the availability of SELinux on the host operating system. } ``` +## MemoryPolicy + +**`memoryPolicy`** (object, OPTIONAL) represents the runtime's implementation status of memoryPolicy. + +* **`modes`** (array of strings, OPTIONAL). Recognized memory policies. Includes policies that may not be supported by the host operating system. + The runtime MUST recognize the elements in this array as the [`mode` of `linux.memoryPolicy` objects in `config.json`](config-linux.md#memory-policy). + +* **`flags`** (array of strings, OPTIONAL). Recognized flags for memory policies. Includes flags that may not be supported by the host operating system. + The runtime MUST recognize the elements in this in the [`flags` property of the `linux.memoryPolicy` object in `config.json`](config-linux.md#memory-policy) + +### Example + +```json +"memoryPolicy": { + "modes": [ + "MPOL_DEFAULT", + "MPOL_BIND", + "MPOL_INTERLEAVE", + "MPOL_WEIGHTED_INTERLEAVE", + "MPOL_PREFERRED", + "MPOL_PREFERRED_MANY", + "MPOL_LOCAL" + ], + "flags": [ + "MPOL_F_NUMA_BALANCING", + "MPOL_F_RELATIVE_NODES", + "MPOL_F_STATIC_NODES" + ] +} +``` + ## Intel RDT **`intelRdt`** (object, OPTIONAL) represents the runtime's implementation status of Intel RDT. diff --git a/features.md b/features.md index 24cd65c3b..2c65456a8 100644 --- a/features.md +++ b/features.md @@ -354,6 +354,22 @@ Here is a full example for reference. "selinux": { "enabled": true }, + "memoryPolicy": { + "modes": [ + "MPOL_DEFAULT", + "MPOL_BIND", + "MPOL_INTERLEAVE", + "MPOL_WEIGHTED_INTERLEAVE", + "MPOL_PREFERRED", + "MPOL_PREFERRED_MANY", + "MPOL_LOCAL" + ], + "flags": [ + "MPOL_F_NUMA_BALANCING", + "MPOL_F_RELATIVE_NODES", + "MPOL_F_STATIC_NODES" + ] + }, "intelRdt": { "enabled": true } diff --git a/schema/config-linux.json b/schema/config-linux.json index add4cf0e4..ba567f6e7 100644 --- a/schema/config-linux.json +++ b/schema/config-linux.json @@ -283,6 +283,23 @@ } } }, + "memoryPolicy": { + "type": "object", + "properties": { + "mode": { + "$ref": "defs-linux.json#/definitions/MemoryPolicyMode" + }, + "nodes": { + "type": "string" + }, + "flags": { + "type": "array", + "items": { + "$ref": "defs-linux.json#/definitions/MemoryPolicyFlag" + } + } + } + }, "personality": { "type": "object", "$ref": "defs-linux.json#/definitions/Personality" diff --git a/schema/defs-linux.json b/schema/defs-linux.json index 4bf73d0fb..ec34445e0 100644 --- a/schema/defs-linux.json +++ b/schema/defs-linux.json @@ -272,6 +272,26 @@ "allow" ] }, + "MemoryPolicyMode": { + "type": "string", + "enum": [ + "MPOL_DEFAULT", + "MPOL_BIND", + "MPOL_INTERLEAVE", + "MPOL_WEIGHTED_INTERLEAVE", + "MPOL_PREFERRED", + "MPOL_PREFERRED_MANY", + "MPOL_LOCAL" + ] + }, + "MemoryPolicyFlag": { + "type": "string", + "enum": [ + "MPOL_F_NUMA_BALANCING", + "MPOL_F_RELATIVE_NODES", + "MPOL_F_STATIC_NODES" + ] + }, "NetworkInterfacePriority": { "type": "object", "properties": { diff --git a/specs-go/config.go b/specs-go/config.go index 854290da2..01e003310 100644 --- a/specs-go/config.go +++ b/specs-go/config.go @@ -251,6 +251,8 @@ type Linux struct { // IntelRdt contains Intel Resource Director Technology (RDT) information for // handling resource constraints and monitoring metrics (e.g., L3 cache, memory bandwidth) for the container IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"` + // MemoryPolicy contains NUMA memory policy for the container. + MemoryPolicy *LinuxMemoryPolicy `json:"memoryPolicy,omitempty"` // Personality contains configuration for the Linux personality syscall Personality *LinuxPersonality `json:"personality,omitempty"` // TimeOffsets specifies the offset for supporting time namespaces. @@ -855,6 +857,19 @@ type LinuxIntelRdt struct { EnableMBM bool `json:"enableMBM,omitempty"` } +// LinuxMemoryPolicy represents input for the set_mempolicy syscall. +type LinuxMemoryPolicy struct { + // Mode for the set_mempolicy syscall. + Mode MemoryPolicyModeType `json:"mode"` + + // Nodes representing the nodemask for the set_mempolicy syscall in comma separated ranges format. + // Format: "-,,-,..." + Nodes string `json:"nodes"` + + // Flags for the set_mempolicy syscall. + Flags []MemoryPolicyFlagType `json:"flags,omitempty"` +} + // ZOS contains platform-specific configuration for z/OS based containers. type ZOS struct { // Namespaces contains the namespaces that are created and/or joined by the container @@ -884,6 +899,26 @@ const ( ZOSUTSNamespace ZOSNamespaceType = "uts" ) +type MemoryPolicyModeType string + +const ( + MpolDefault MemoryPolicyModeType = "MPOL_DEFAULT" + MpolBind MemoryPolicyModeType = "MPOL_BIND" + MpolInterleave MemoryPolicyModeType = "MPOL_INTERLEAVE" + MpolWeightedInterleave MemoryPolicyModeType = "MPOL_WEIGHTED_INTERLEAVE" + MpolPreferred MemoryPolicyModeType = "MPOL_PREFERRED" + MpolPreferredMany MemoryPolicyModeType = "MPOL_PREFERRED_MANY" + MpolLocal MemoryPolicyModeType = "MPOL_LOCAL" +) + +type MemoryPolicyFlagType string + +const ( + MpolFNumaBalancing MemoryPolicyFlagType = "MPOL_F_NUMA_BALANCING" + MpolFRelativeNodes MemoryPolicyFlagType = "MPOL_F_RELATIVE_NODES" + MpolFStaticNodes MemoryPolicyFlagType = "MPOL_F_STATIC_NODES" +) + // LinuxSchedulerPolicy represents different scheduling policies used with the Linux Scheduler type LinuxSchedulerPolicy string diff --git a/specs-go/features/features.go b/specs-go/features/features.go index d8eb169dc..8271ded8a 100644 --- a/specs-go/features/features.go +++ b/specs-go/features/features.go @@ -47,6 +47,7 @@ type Linux struct { Apparmor *Apparmor `json:"apparmor,omitempty"` Selinux *Selinux `json:"selinux,omitempty"` IntelRdt *IntelRdt `json:"intelRdt,omitempty"` + MemoryPolicy *MemoryPolicy `json:"memoryPolicy,omitempty"` MountExtensions *MountExtensions `json:"mountExtensions,omitempty"` NetDevices *NetDevices `json:"netDevices,omitempty"` } @@ -132,6 +133,14 @@ type IntelRdt struct { Enabled *bool `json:"enabled,omitempty"` } +// MemoryPolicy represents the "memoryPolicy" field. +type MemoryPolicy struct { + // modes is the list of known memory policy modes, e.g., "MPOL_INTERLEAVE". + Modes []string `json:"modes,omitempty"` + // flags is the list of known memory policy mode flags, e.g., "MPOL_F_STATIC_NODES". + Flags []string `json:"flags,omitempty"` +} + // MountExtensions represents the "mountExtensions" field. type MountExtensions struct { // IDMap represents the status of idmap mounts support.