[Kernl Bench] Fix run all, avoid system crash (#43)

rengolin · web-flow · commit b5e82505b757 · 2026-01-08T22:20:43.000Z
Run all was broken with tasks variable shadowing, ProcessPoolExecutor
was crashing on trying to pick the next from a generator on multiple
threads. Renamed the variable and now the conflict is gone.

Running all processes at the same time was crashing the system with
lower memory availability, since some tasks would take between 12GB and
16GB each. Now limiting the number of workers based on the available
memory at the start of the script.
diff --git a/examples/ingress/convert-kernel-bench-to-mlir.py b/examples/ingress/convert-kernel-bench-to-mlir.py
@@ -8,6 +8,7 @@
 # there's an ingore list. Runs the conversion in parallel.
 
 import sys
+import psutil
 
 from concurrent.futures import ProcessPoolExecutor
 from dataclasses import dataclass
@@ -20,6 +21,10 @@
 project_root = Path(__file__).parent.parent.parent
 torch_kernels_dir = project_root / "third_party" / "KernelBench" / "KernelBench"
 mlir_kernels_dir = project_root / "cache" / "ingress" / "KernelBench"
+free_mem_gb = psutil.virtual_memory().available // (1024**3)
+print(f"Available memory: {free_mem_gb} GB")
+max_workers = min(free_mem_gb // 12, psutil.cpu_count())  # some workers need 12~16GB
+print(f"Using max_workers={max_workers} based on available memory")
 
 if not torch_kernels_dir.is_dir():
     print(
@@ -196,12 +201,12 @@ def process_task(task: KernelConversionTask):
         print(mlir_kernel, file=f)
 
 
-tasks = sorted(all_tasks(), key=lambda t: (t.level, t.id))
+sorted_tasks = sorted(all_tasks(), key=lambda t: (t.level, t.id))
 
 if len(sys.argv) == 1:
 
     def tasks_():
-        for task in tasks:
+        for task in sorted_tasks:
             if task.ignore_by_default:
                 print(
                     f"Skipping: {task.torch_path.parent}/{task.torch_path.name}",
@@ -217,9 +222,9 @@ def tasks_():
         lhs, rhs = arg.split(",")
         level_id, kernel_id = int(lhs), int(rhs)
         overall_idx = 100 * (level_id - 1) + (kernel_id - 1)
-        tasks_.append(tasks[overall_idx])
+        tasks_.append(sorted_tasks[overall_idx])
     tasks = tasks_
 
 print("Output directory:", mlir_kernels_dir)
-for _ in ProcessPoolExecutor().map(process_task, tasks):
+for _ in ProcessPoolExecutor(max_workers=max_workers).map(process_task, tasks):
     pass  # NB: obtain each result so that exceptions are propagated to the main process
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,6 +12,7 @@ dev = [
   "ruff==0.14.5", # Python linter and formatter
   "pre-commit", # Tool to manage and apply pre-commit hooks
   "pytest>=8.0.0",
+  "psutil",
 ]
 
 [project.optional-dependencies]

Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,7 @@ dev = [`
`12`	`12`	`"ruff==0.14.5", # Python linter and formatter`
`13`	`13`	`"pre-commit", # Tool to manage and apply pre-commit hooks`
`14`	`14`	`"pytest>=8.0.0",`
	`15`	`+ "psutil",`
`15`	`16`	`]`
`16`	`17`
`17`	`18`	`[project.optional-dependencies]`