-
Notifications
You must be signed in to change notification settings - Fork 14
Fix get available memory to walk cgroup v2 ancestor chain #1824
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,10 @@ pub(crate) mod proto; | |
| pub mod target; | ||
| pub mod target_metrics; | ||
|
|
||
| use std::fs::read_to_string; | ||
| use std::io::Error; | ||
| use std::path::PathBuf; | ||
|
|
||
| use byte_unit::Byte; | ||
| use sysinfo::System; | ||
|
|
||
|
|
@@ -34,21 +38,277 @@ pub(crate) fn full<T: Into<bytes::Bytes>>( | |
| .boxed() | ||
| } | ||
|
|
||
| /// Get available memory for the process, checking cgroup v2 limits first, | ||
| /// then falling back to system memory. | ||
| /// Determine the available memory for this process by inspecting cgroup | ||
| /// limits, falling back to total system memory. | ||
| /// | ||
| /// Resolution order: | ||
| /// 1. cgroup v2 -- walk from process cgroup up to root, return minimum | ||
| /// `memory.max` across the ancestor chain | ||
| /// 2. cgroup v2 root (`/sys/fs/cgroup/memory.max`) | ||
| /// 3. Total system memory via `sysinfo` | ||
| /// | ||
| /// Step 1 uses `/proc/self/cgroup` to resolve the process path. | ||
| /// Step 2 is the fallback when `/proc/self/cgroup` is unavailable or shows root (`/`). | ||
| /// | ||
| /// References: | ||
| /// - cgroup v2: <https://docs.kernel.org/admin-guide/cgroup-v2.html> | ||
| /// - `/proc/$PID/cgroup`: <https://docs.kernel.org/admin-guide/cgroup-v1/cgroups.html> | ||
| #[must_use] | ||
| pub fn get_available_memory() -> Byte { | ||
| if let Ok(content) = std::fs::read_to_string("/sys/fs/cgroup/memory.max") { | ||
| let content = content.trim(); | ||
| if content == "max" { | ||
| return Byte::from_u64(u64::MAX); | ||
| get_available_memory_with(read_to_string) | ||
| } | ||
|
|
||
| fn parse_cgroup_v2_limit(content: &str) -> Option<Byte> { | ||
| let content = content.trim(); | ||
| if content == "max" { | ||
| return None; | ||
| } | ||
| let bytes: u64 = content.parse().ok()?; | ||
| Some(Byte::from_u64(bytes)) | ||
| } | ||
|
|
||
| /// Extract the cgroup v2 path from `/proc/self/cgroup` content (`0::/path`). | ||
| fn resolve_cgroup_v2_path(proc_cgroup_content: &str) -> Option<&str> { | ||
| for line in proc_cgroup_content.lines() { | ||
| // cgroup v2 unified entries start with "0::" | ||
| if let Some(path) = line.strip_prefix("0::") { | ||
| let path = path.trim(); | ||
| if !path.is_empty() && path != "/" { | ||
| return Some(path); | ||
| } | ||
| } | ||
| let ignore_case = true; | ||
| if let Ok(limit) = Byte::parse_str(content.trim(), ignore_case) { | ||
| } | ||
| None | ||
| } | ||
|
|
||
| /// Inner implementation of [`get_available_memory`] with an injectable file | ||
| /// reader for testing. | ||
| fn get_available_memory_with<F>(read_file: F) -> Byte | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could see an argument for an info log in this function for what it resolved from where. Let me know if that's something you find desirable. |
||
| where | ||
| F: Fn(PathBuf) -> Result<String, Error>, | ||
| { | ||
| let proc_cgroup = read_file(PathBuf::from("/proc/self/cgroup")).ok(); | ||
|
|
||
| // Try cgroup v2: walk from the process-specific path up to the root, | ||
| // taking the minimum memory.max across the ancestor chain. | ||
| if let Some(ref proc_cgroup) = proc_cgroup | ||
| && let Some(cgroup_path) = resolve_cgroup_v2_path(proc_cgroup) | ||
| { | ||
| let cgroup_root = PathBuf::from("/sys/fs/cgroup"); | ||
| let mut path = cgroup_root.join(&cgroup_path[1..]); // strip leading '/' | ||
| let mut min_limit: Option<Byte> = None; | ||
| loop { | ||
| let memory_max = path.join("memory.max"); | ||
| if let Ok(content) = read_file(memory_max) | ||
| && let Some(limit) = parse_cgroup_v2_limit(&content) | ||
| { | ||
| min_limit = Some(match min_limit { | ||
| Some(prev) if prev.as_u64() < limit.as_u64() => prev, | ||
| _ => limit, | ||
| }); | ||
| } | ||
| if path == cgroup_root { | ||
| break; | ||
| } | ||
| if !path.pop() { | ||
| break; | ||
| } | ||
| } | ||
| if let Some(limit) = min_limit { | ||
| return limit; | ||
| } | ||
| } | ||
|
|
||
| // Try cgroup v2 root (works when container has its own cgroup namespace) | ||
| if let Ok(content) = read_file(PathBuf::from("/sys/fs/cgroup/memory.max")) | ||
| && let Some(limit) = parse_cgroup_v2_limit(&content) | ||
| { | ||
| return limit; | ||
| } | ||
|
|
||
| let sys = System::new_all(); | ||
| Byte::from_u64(sys.total_memory()) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use std::io::ErrorKind; | ||
|
|
||
| use super::*; | ||
|
|
||
| fn mock_reader( | ||
| files: &'static [(&'static str, &'static str)], | ||
| ) -> impl Fn(PathBuf) -> Result<String, Error> { | ||
| move |path| { | ||
| for (p, content) in files { | ||
| if PathBuf::from(p) == path { | ||
| return Ok((*content).to_string()); | ||
| } | ||
| } | ||
| Err(Error::new(ErrorKind::NotFound, "not found")) | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn cgroup_v2_max_falls_back_to_system_memory() { | ||
| let read = mock_reader(&[("/sys/fs/cgroup/memory.max", "max\n")]); | ||
| let mem = get_available_memory_with(read); | ||
| let sys = System::new_all(); | ||
| assert_eq!(mem, Byte::from_u64(sys.total_memory())); | ||
| } | ||
|
|
||
| #[test] | ||
| fn cgroup_v2_numeric_limit() { | ||
| let read = mock_reader(&[("/sys/fs/cgroup/memory.max", "12582912\n")]); | ||
| let mem = get_available_memory_with(read); | ||
| assert_eq!(mem, Byte::from_u64(12_582_912)); | ||
| } | ||
|
|
||
| #[test] | ||
| fn cgroup_v2_garbage_falls_back_to_system_memory() { | ||
| let read = mock_reader(&[("/sys/fs/cgroup/memory.max", "not-a-number\n")]); | ||
| let mem = get_available_memory_with(read); | ||
| let sys = System::new_all(); | ||
| assert_eq!(mem, Byte::from_u64(sys.total_memory())); | ||
| } | ||
|
|
||
| #[test] | ||
| fn no_cgroup_file_falls_back_to_system_memory() { | ||
| let read = mock_reader(&[]); | ||
| let mem = get_available_memory_with(read); | ||
| let sys = System::new_all(); | ||
| assert_eq!(mem, Byte::from_u64(sys.total_memory())); | ||
| } | ||
|
|
||
| #[test] | ||
| fn process_specific_cgroup_v2_path() { | ||
| // Simulates host cgroup namespace: /proc/self/cgroup points to a | ||
| // nested cgroup, and the memory limit is at that nested path. | ||
| let read = mock_reader(&[ | ||
| ( | ||
| "/proc/self/cgroup", | ||
| "0::/system.slice/docker-abc123.scope\n", | ||
| ), | ||
| ( | ||
| "/sys/fs/cgroup/system.slice/docker-abc123.scope/memory.max", | ||
| "12582912\n", | ||
| ), | ||
| // Root cgroup has no limit | ||
| ("/sys/fs/cgroup/memory.max", "max\n"), | ||
| ]); | ||
| let mem = get_available_memory_with(read); | ||
| assert_eq!(mem, Byte::from_u64(12_582_912)); | ||
| } | ||
|
|
||
| #[test] | ||
| fn process_specific_path_takes_precedence_over_root() { | ||
| let read = mock_reader(&[ | ||
| ( | ||
| "/proc/self/cgroup", | ||
| "0::/system.slice/docker-abc123.scope\n", | ||
| ), | ||
| ( | ||
| "/sys/fs/cgroup/system.slice/docker-abc123.scope/memory.max", | ||
| "8388608\n", | ||
| ), | ||
| ("/sys/fs/cgroup/memory.max", "16777216\n"), | ||
| ]); | ||
| let mem = get_available_memory_with(read); | ||
| assert_eq!(mem, Byte::from_u64(8_388_608)); | ||
| } | ||
|
|
||
| #[test] | ||
| fn process_specific_path_max_falls_through_to_root() { | ||
| let read = mock_reader(&[ | ||
| ( | ||
| "/proc/self/cgroup", | ||
| "0::/system.slice/docker-abc123.scope\n", | ||
| ), | ||
| ( | ||
| "/sys/fs/cgroup/system.slice/docker-abc123.scope/memory.max", | ||
| "max\n", | ||
| ), | ||
| ("/sys/fs/cgroup/memory.max", "16777216\n"), | ||
| ]); | ||
| let mem = get_available_memory_with(read); | ||
| assert_eq!(mem, Byte::from_u64(16_777_216)); | ||
| } | ||
|
|
||
| #[test] | ||
| fn proc_cgroup_root_path_skipped() { | ||
| // When /proc/self/cgroup shows root path "/", skip it and fall | ||
| // through to the root cgroup check. | ||
| let read = mock_reader(&[ | ||
| ("/proc/self/cgroup", "0::/\n"), | ||
| ("/sys/fs/cgroup/memory.max", "12582912\n"), | ||
| ]); | ||
| let mem = get_available_memory_with(read); | ||
| assert_eq!(mem, Byte::from_u64(12_582_912)); | ||
| } | ||
|
|
||
| #[test] | ||
| fn resolve_cgroup_v2_path_parses_correctly() { | ||
| assert_eq!( | ||
| resolve_cgroup_v2_path("0::/system.slice/docker-abc.scope\n"), | ||
| Some("/system.slice/docker-abc.scope") | ||
| ); | ||
| assert_eq!(resolve_cgroup_v2_path("0::/\n"), None); | ||
| assert_eq!(resolve_cgroup_v2_path(""), None); | ||
| // Non-v2 lines are ignored | ||
| assert_eq!( | ||
| resolve_cgroup_v2_path("1:memory:/docker/abc\n0::/system.slice\n"), | ||
| Some("/system.slice") | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn cgroup_v2_ancestor_limit_is_tighter() { | ||
| // Nested v2 cgroup where the parent has a tighter limit than the | ||
| // child. The effective limit is the minimum up the ancestor chain. | ||
| let read = mock_reader(&[ | ||
| ("/proc/self/cgroup", "0::/parent/child\n"), | ||
| // child: 16 MB | ||
| ("/sys/fs/cgroup/parent/child/memory.max", "16777216\n"), | ||
| // parent: 8 MB (tighter) | ||
| ("/sys/fs/cgroup/parent/memory.max", "8388608\n"), | ||
| // root: no limit | ||
| ("/sys/fs/cgroup/memory.max", "max\n"), | ||
| ]); | ||
| let mem = get_available_memory_with(read); | ||
| assert_eq!(mem, Byte::from_u64(8_388_608)); | ||
| } | ||
|
|
||
| #[test] | ||
| fn cgroup_v2_ancestor_limit_three_levels() { | ||
| // Three-level hierarchy: grandparent is tightest. | ||
| let read = mock_reader(&[ | ||
| ("/proc/self/cgroup", "0::/a/b/c\n"), | ||
| ("/sys/fs/cgroup/a/b/c/memory.max", "33554432\n"), // 32 MB | ||
| ("/sys/fs/cgroup/a/b/memory.max", "16777216\n"), // 16 MB | ||
| ("/sys/fs/cgroup/a/memory.max", "8388608\n"), // 8 MB (tightest) | ||
| ("/sys/fs/cgroup/memory.max", "max\n"), | ||
| ]); | ||
| let mem = get_available_memory_with(read); | ||
| assert_eq!(mem, Byte::from_u64(8_388_608)); | ||
| } | ||
|
|
||
| #[test] | ||
| fn cgroup_v2_ancestor_middle_is_tightest() { | ||
| // Middle ancestor is tightest. | ||
| let read = mock_reader(&[ | ||
| ("/proc/self/cgroup", "0::/a/b/c\n"), | ||
| ("/sys/fs/cgroup/a/b/c/memory.max", "33554432\n"), // 32 MB | ||
| ("/sys/fs/cgroup/a/b/memory.max", "4194304\n"), // 4 MB (tightest) | ||
| ("/sys/fs/cgroup/a/memory.max", "16777216\n"), // 16 MB | ||
| ("/sys/fs/cgroup/memory.max", "max\n"), | ||
| ]); | ||
| let mem = get_available_memory_with(read); | ||
| assert_eq!(mem, Byte::from_u64(4_194_304)); | ||
| } | ||
|
|
||
| #[test] | ||
| fn get_available_memory_returns_nonzero() { | ||
| let mem = get_available_memory(); | ||
| assert!(mem.as_u64() > 0); | ||
| } | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: This reduced the wait time for GH runners from ~15 min to seconds.