Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/libs/kata-types/src/config/default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ pub const MAX_SHARED_9PFS_SIZE_MB: u32 = 8 * 1024 * 1024;
pub const DEFAULT_GUEST_HOOK_PATH: &str = "/opt/kata/hooks";
pub const DEFAULT_GUEST_DNS_FILE: &str = "/etc/resolv.conf";

pub const DEFAULT_GUEST_VCPUS: u32 = 1;
pub const DEFAULT_GUEST_VCPUS: u32 = 0;

// Default configuration for dragonball
pub const DEFAULT_DRAGONBALL_GUEST_KERNEL_IMAGE: &str = "vmlinuz";
Expand Down Expand Up @@ -93,7 +93,7 @@ pub const DEFAULT_CH_MEMORY_SLOTS: u32 = 128;
pub const DEFAULT_CH_PCI_BRIDGES: u32 = 2;
pub const MAX_CH_PCI_BRIDGES: u32 = 5;
pub const MAX_CH_VCPUS: u32 = 256;
pub const MIN_CH_MEMORY_SIZE_MB: u32 = 64;
pub const MIN_CH_MEMORY_SIZE_MB: u32 = 0;
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Setting MIN_CH_MEMORY_SIZE_MB to 0 effectively disables the Cloud Hypervisor “minimum guest memory” validation (since default_memory < MIN_CH_MEMORY_SIZE_MB can never be true), allowing configs with default_memory == 0 to pass plugin validation. runtime-rs’ Cloud Hypervisor conversion rejects default_memory == 0 (MemoryConfigError::NoDefaultMemory in src/runtime-rs/crates/hypervisor/ch-config/src/convert.rs), so this can push invalid configs deeper into runtime. Consider keeping a non-zero minimum here, or ensure an earlier stage always sets default_memory to a valid value before CH conversion.

Suggested change
pub const MIN_CH_MEMORY_SIZE_MB: u32 = 0;
pub const MIN_CH_MEMORY_SIZE_MB: u32 = 128;

Copilot uses AI. Check for mistakes.

//Default configuration for firecracker
pub const DEFAULT_FIRECRACKER_ENTROPY_SOURCE: &str = "/dev/urandom";
Expand Down
3 changes: 0 additions & 3 deletions src/libs/kata-types/src/config/hypervisor/ch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,6 @@ impl ConfigPlugin for CloudHypervisorConfig {
ch.machine_info.entropy_source = default::DEFAULT_CH_ENTROPY_SOURCE.to_string();
}

Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CloudHypervisorConfig::adjust_config() no longer fills in a default when memory_info.default_memory == 0. Combined with the relaxed MemoryInfo::validate() and MIN_CH_MEMORY_SIZE_MB == 0, it becomes easy for CH configs to flow through with default_memory == 0 and then fail later during runtime-rs CH config conversion (MemoryConfigError::NoDefaultMemory). If 0 is intended to mean “defer memory sizing to InitialSizeManager”, consider adding an explicit comment + ensuring the sizing path always runs before any CH conversion; otherwise, restoring the defaulting here would prevent runtime failures for configs that omit default_memory.

Suggested change
// Fill in a default if not specified, to avoid propagating an unusable 0 value.
if ch.memory_info.default_memory == 0 {
ch.memory_info.default_memory = MIN_CH_MEMORY_SIZE_MB;
}

Copilot uses AI. Check for mistakes.
if ch.memory_info.default_memory == 0 {
ch.memory_info.default_memory = default::DEFAULT_CH_MEMORY_SIZE_MB;
}
if ch.memory_info.memory_slots == 0 {
ch.memory_info.memory_slots = default::DEFAULT_CH_MEMORY_SLOTS;
}
Expand Down
9 changes: 2 additions & 7 deletions src/libs/kata-types/src/config/hypervisor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1031,19 +1031,14 @@ impl MemoryInfo {

/// Validates the memory configuration information.
///
/// This ensures that critical memory parameters like `default_memory`
/// and `memory_slots` are non-zero, and checks the validity of
/// This ensures that critical memory parameters like `memory_slots` are
/// non-zero, and checks the validity of
/// the memory backend file path.
pub fn validate(&self) -> Result<()> {
validate_path!(
self.file_mem_backend,
"Memory backend file {} is invalid: {}"
)?;
if self.default_memory == 0 {
return Err(std::io::Error::other(
"Configured memory size for guest VM is zero",
));
}
if self.memory_slots == 0 {
return Err(std::io::Error::other(
"Configured memory slots for guest VM are zero",
Expand Down
15 changes: 15 additions & 0 deletions src/libs/kata-types/src/config/runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,21 @@ pub struct Runtime {
#[serde(default)]
pub static_sandbox_resource_mgmt: bool,

/// Memory to allocate for workloads within the sandbox when workload memory is unspecified
#[serde(default)]
pub static_sandbox_default_workload_mem: u32,

/// Default workload vcpus added to the sandbox when static resource management
/// is enabled and no explicit workload vcpu limit was provided.
#[serde(default)]
pub static_sandbox_default_workload_vcpus: f32,

/// Minimum memory (in MiB) to enforce for pods that explicitly set a memory limit via
/// resources.limits.memory. If the requested memory is below this value the sandbox
/// creation will fail with a descriptive error. 0 (the default) disables the check.
#[serde(default)]
pub sandbox_workload_mem_min: u32,

/// Determines whether container seccomp profiles are passed to the virtual machine and
/// applied by the kata agent. If set to true, seccomp is not applied within the guest.
#[serde(default)]
Expand Down
12 changes: 10 additions & 2 deletions src/runtime-rs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,11 @@ FIRMWARE_SNP_PATH := $(PREFIXDEPS)/share/ovmf/AMDSEV.fd
FIRMWARE_VOLUME_SNP_PATH :=

##VAR DEFVCPUS=<number> Default number of vCPUs
DEFVCPUS := 1
DEFVCPUS ?= 1
##VAR DEFMAXVCPUS=<number> Default maximum number of vCPUs
DEFMAXVCPUS := 0
##VAR DEFMEMSZ=<number> Default memory size in MiB
DEFMEMSZ := 2048
DEFMEMSZ ?= 2048
##VAR DEFMEMSLOTS=<number> Default memory slots
# Cases to consider :
# - nvdimm rootfs image
Expand Down Expand Up @@ -214,6 +214,11 @@ DEFVFIOMODE := guest-kernel
DEFBINDMOUNTS := []
DEFDANCONF := /run/kata-containers/dans
DEFFORCEGUESTPULL := false
# Default memory and vcpus for workloads within the sandbox when no workload values are requested.
DEFSTATICSANDBOXWORKLOADMEM ?= 2048
DEFSTATICSANDBOXWORKLOADVCPUS ?= 1
# Minimum memory (in MiB) a pod must request when explicitly setting a memory limit.
DEFSANDBOXWORKLOADMEMMIN ?= 128
QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT := 4050

# Create Container Timeout in seconds
Expand Down Expand Up @@ -622,6 +627,9 @@ USER_VARS += KATA_INSTALL_OWNER
USER_VARS += KATA_INSTALL_CFG_PERMS
USER_VARS += DEFDANCONF
USER_VARS += DEFFORCEGUESTPULL
USER_VARS += DEFSTATICSANDBOXWORKLOADMEM
USER_VARS += DEFSTATICSANDBOXWORKLOADVCPUS
USER_VARS += DEFSANDBOXWORKLOADMEMMIN
USER_VARS += QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT
USER_VARS += DEFCREATECONTAINERTIMEOUT
USER_VARS += DEFCREATECONTAINERTIMEOUT_COCO
Expand Down
15 changes: 15 additions & 0 deletions src/runtime-rs/config/configuration-cloud-hypervisor.toml.in
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,21 @@ enable_pprof = false
# - When running single containers using a tool like ctr, container sizing information will be available.
static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_CLH@

# If set, the runtime will use the value as the default workload memory in MB for the sandbox when no workload memory request is passed
# down to the shim via the OCI when static sandbox resource management is enabled. With this, we ensure that workloads have a proper
# default amount of memory available within the sandbox.
static_sandbox_default_workload_mem = @DEFSTATICSANDBOXWORKLOADMEM@

# If set, the runtime will use the value as the default number of vcpus for the sandbox when no workload vcpu request is passed
# down to the shim via the OCI when static sandbox resource management is enabled. With this, we ensure that workloads have a proper
# default amount of vcpus available within the sandbox.
static_sandbox_default_workload_vcpus = @DEFSTATICSANDBOXWORKLOADVCPUS@

# The runtime will enforce that pods explicitly setting memory limits using
# resources.limits.memory allow at least this amount of memory in MiB so
# that the sandbox can properly start.
sandbox_workload_mem_min = @DEFSANDBOXWORKLOADMEMMIN@

# If specified, sandbox_bind_mounts identifieds host paths to be mounted(ro, rw) into the sandboxes shared path.
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
Expand Down
92 changes: 92 additions & 0 deletions src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,17 @@ impl InitialSizeManager {
if self.resource.vcpu > 0.0 {
info!(sl!(), "resource with vcpu {}", self.resource.vcpu);
}

if config.runtime.static_sandbox_resource_mgmt {
if self.resource.mem_mb == 0 {
self.resource.mem_mb = config.runtime.static_sandbox_default_workload_mem;
}

if self.resource.vcpu == 0.0 {
self.resource.vcpu = config.runtime.static_sandbox_default_workload_vcpus;
}
}

self.resource.orig_toml_default_mem = hv.memory_info.default_memory;
if self.resource.mem_mb > 0 {
// since the memory overhead introduced by kata-agent and system components
Expand All @@ -152,12 +163,24 @@ impl InitialSizeManager {
// use memory as they orignally expected, it would be easy to OOM.)
hv.memory_info.default_memory += self.resource.mem_mb;
}

if self.resource.vcpu > 0.0 {
Copy link
Copy Markdown
Author

@Redent0r Redent0r Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This block will probably generate discussion. This commit is trying to port over kata-containers@9af9844 . After doing that, I realized that, unlike the memory, the static CPU set in the config wasn't getting past to the guest VM as expected.

I got some error in the lines of "not enough vcpus" and in the logs I was able to confirm the VM config passed to CH was using 0 vcpus.

It looks like runtime-rs is missing https://github.com/fidencio/kata-containers/blob/e2476f587c472d5d217df9c75cdb80193dd85994/src/runtime/pkg/oci/utils.go#L1232

But then the question I have is: is upstream runtime-rs really not adding vcpus that come from limits or annotations? I must be missing something

hv.cpu_info.default_vcpus += self.resource.vcpu;
}
Ok(())
}

pub fn get_orig_toml_default_mem(&self) -> u32 {
self.resource.orig_toml_default_mem
}

/// Returns the effective workload memory for the pod/container (in MiB).
/// This may be either explicitly requested in the spec or defaulted by static
/// sandbox resource management. 0 means no explicit limit was set and no default
/// workload memory was applied.
pub fn workload_mem_mb(&self) -> u32 {
self.resource.mem_mb
}
}

fn get_nr_vcpu(resource: &LinuxContainerCpuResources) -> f32 {
Expand Down Expand Up @@ -197,6 +220,7 @@ fn get_sizing_info(annotation: Annotation) -> Result<(u64, i64, i64)> {
mod tests {
use super::*;
use kata_types::annotations::cri_containerd;
use kata_types::config::Hypervisor;
use oci_spec::runtime::{LinuxBuilder, LinuxMemory, LinuxMemoryBuilder, LinuxResourcesBuilder};
use std::collections::HashMap;
#[derive(Clone)]
Expand Down Expand Up @@ -366,4 +390,72 @@ mod tests {
);
}
}

fn get_config_for_setup_tests(
base_vcpus: f32,
base_mem_mb: u32,
static_mgmt: bool,
default_workload_vcpus: f32,
default_workload_mem_mb: u32,
) -> TomlConfig {
let hypervisor_name = "test-hv".to_string();
let mut config = TomlConfig::default();
config.runtime.hypervisor_name = hypervisor_name.clone();
config.runtime.static_sandbox_resource_mgmt = static_mgmt;
config.runtime.static_sandbox_default_workload_vcpus = default_workload_vcpus;
config.runtime.static_sandbox_default_workload_mem = default_workload_mem_mb;

let mut hv = Hypervisor::default();
hv.cpu_info.default_vcpus = base_vcpus;
hv.memory_info.default_memory = base_mem_mb;
config.hypervisor.insert(hypervisor_name, hv);

config
}

#[test]
fn test_setup_config_static_defaults_unset_resources() {
let mut manager = InitialSizeManager {
resource: InitialSize {
vcpu: 0.0,
mem_mb: 0,
orig_toml_default_mem: 0,
},
};
let mut config = get_config_for_setup_tests(2.0, 256, true, 1.0, 512);

manager.setup_config(&mut config).unwrap();

let hv = config
.hypervisor
.get(&config.runtime.hypervisor_name)
.unwrap();
assert_eq!(hv.cpu_info.default_vcpus, 3.0);
assert_eq!(hv.memory_info.default_memory, 768);
assert_eq!(manager.get_orig_toml_default_mem(), 256);
assert_eq!(manager.workload_mem_mb(), 512);
}

#[test]
fn test_setup_config_static_preserves_explicit_resources() {
let mut manager = InitialSizeManager {
resource: InitialSize {
vcpu: 1.5,
mem_mb: 1024,
orig_toml_default_mem: 0,
},
};
let mut config = get_config_for_setup_tests(2.0, 256, true, 3.0, 512);

manager.setup_config(&mut config).unwrap();

let hv = config
.hypervisor
.get(&config.runtime.hypervisor_name)
.unwrap();
assert_eq!(hv.cpu_info.default_vcpus, 3.5);
assert_eq!(hv.memory_info.default_memory, 1280);
assert_eq!(manager.get_orig_toml_default_mem(), 256);
assert_eq!(manager.workload_mem_mb(), 1024);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//

use std::{
collections::HashSet,
convert::TryFrom,
net::{IpAddr, Ipv4Addr, Ipv6Addr},
};
Expand Down Expand Up @@ -42,6 +43,12 @@ impl NetworkInfoFromLink {
) -> Result<Self> {
let attrs = link.attrs();
let name = &attrs.name;
let routes = handle_routes(handle, attrs)
.await
.context("handle routes")?;
let neighs = handle_neighbors(handle, attrs, &routes)
.await
.context("handle neighbours")?;

Ok(Self {
interface: Interface {
Expand All @@ -54,12 +61,8 @@ impl NetworkInfoFromLink {
field_type: link.r#type().to_string(),
raw_flags: attrs.flags & libc::IFF_NOARP as u32,
},
neighs: handle_neighbors(handle, attrs)
.await
.context("handle neighbours")?,
routes: handle_routes(handle, attrs)
.await
.context("handle routes")?,
neighs,
routes,
})
}
}
Expand Down Expand Up @@ -147,11 +150,49 @@ fn generate_neigh(name: &str, n: &NeighbourMessage) -> Result<ARPNeighbor> {
Ok(neigh)
}

fn gateway_set_from_routes(routes: &[Route]) -> HashSet<String> {
let mut gateway_set = HashSet::new();
for route in routes {
if route.gateway.is_empty() {
continue;
}

// Default routes may be represented with an empty destination string or
// an all-zero destination.
if route.dest.is_empty() || route.dest == "0.0.0.0" || route.dest == "::" {
gateway_set.insert(route.gateway.clone());
}
}

gateway_set
}

fn valid_guest_neighbor(neigh: &ARPNeighbor, gateway_set: &HashSet<String>) -> bool {
// We need a MAC address in the guest ARP table.
if neigh.ll_addr.is_empty() {
return false;
}

// Keep all static entries.
if neigh.state == libc::NUD_PERMANENT as i32 {
return true;
}

// Gateway-only exception: allow default-gateway neighbors.
neigh
.to_ip_address
.as_ref()
.map(|ip| gateway_set.contains(&ip.address))
.unwrap_or(false)
}

async fn handle_neighbors(
handle: &rtnetlink::Handle,
attrs: &LinkAttrs,
routes: &[Route],
) -> Result<Vec<ARPNeighbor>> {
let name = &attrs.name;
let gateway_set = gateway_set_from_routes(routes);
let mut neighs = vec![];
let mut neigh_msg_list = handle.neighbours().get().execute();
while let Some(neigh) = neigh_msg_list
Expand All @@ -161,7 +202,10 @@ async fn handle_neighbors(
{
// get neigh filter with index
if neigh.header.ifindex == attrs.index {
neighs.push(generate_neigh(name, &neigh).context("generate neigh")?)
let neigh = generate_neigh(name, &neigh).context("generate neigh")?;
if valid_guest_neighbor(&neigh, &gateway_set) {
neighs.push(neigh);
}
}
}
Ok(neighs)
Expand Down
12 changes: 11 additions & 1 deletion src/runtime-rs/crates/runtimes/src/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ use netns_rs::{Env, NetNs};
use nix::{sys::statfs, unistd::User};
use oci_spec::runtime as oci;
use persist::sandbox_persist::Persist;
use protobuf::Message as ProtobufMessage;
use resource::{
cpu_mem::initial_size::InitialSizeManager,
network::{dan_config_path, generate_netns_name},
};
use runtime_spec as spec;
use shim_interface::shim_mgmt::ERR_NO_SHIM_SERVER;
use protobuf::Message as ProtobufMessage;
use std::{
collections::HashMap,
env,
Expand Down Expand Up @@ -218,6 +218,16 @@ impl RuntimeHandlerManagerInner {
.setup_config(&mut config)
.context("failed to setup static resource mgmt config")?;

let mem_min = config.runtime.sandbox_workload_mem_min;
let workload_mem = initial_size_manager.workload_mem_mb();
if workload_mem < mem_min {
return Err(anyhow!(
"pod memory limit too low: minimum {}MiB, got {}MiB",
mem_min,
workload_mem
));
}

update_component_log_level(&config);

let dan_path = dan_config_path(&config, &self.id);
Expand Down
1 change: 1 addition & 0 deletions tools/osbuilder/node-builder/azure-linux/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
BUILD_TYPE := release

export SHIM_REDEPLOY_CONFIG := yes
export USE_RUNTIME_RS := yes

ifeq ($(BUILD_TYPE),debug)
export AGENT_BUILD_TYPE := debug
Expand Down
Loading
Loading