Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions .agents/projects/iris-jobquery-job-tree.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Iris JobQuery Job Tree

## Problem

The dashboard was building job trees from a flat `ListJobs` response and
inferring hierarchy from slash-delimited names in
`lib/iris/dashboard/src/components/controller/JobsTab.vue` and
`lib/iris/dashboard/src/components/controller/JobDetail.vue`.

That conflicted with the wider `ListJobs` contract used by
`lib/iris/src/iris/client/client.py`, which expects broad job listing behavior
rather than dashboard-specific tree semantics. The result was fragile nested
expansion logic and no explicit server contract for direct child queries.

## Approach

Keep `ListJobs` as the single RPC, but add a typed `JobQuery` envelope in
`lib/iris/src/iris/rpc/controller.proto`:

- `scope=ALL` preserves the current client behavior
- `scope=ROOTS` returns top-level jobs for the dashboard jobs table
- `scope=CHILDREN` returns direct children for a parent job

Also add `has_children` to `iris.job.JobStatus` in
`lib/iris/src/iris/rpc/job.proto` so the UI does not guess whether an expand
button should exist.

## Key Code

```python
if scope == controller_pb2.Controller.JOB_QUERY_SCOPE_ROOTS:
jobs, total_count = _jobs_paginated(...)
elif scope == controller_pb2.Controller.JOB_QUERY_SCOPE_CHILDREN:
if not parent_job_id:
raise ConnectError(Code.INVALID_ARGUMENT, ...)
jobs, total_count = _child_jobs_paginated(...)
else:
jobs, total_count = _jobs_all_filtered(...)

has_children = _parent_ids_with_children(self._db, [j.job_id for j in jobs])
all_jobs = self._jobs_to_protos(
jobs, task_summaries, autoscaler_pending_hints, has_children=has_children
)
```

On the frontend, `JobsTab.vue` and `JobDetail.vue` now cache loaded child rows
by parent job id and track expanded state by job id.

## Tests

- `tests/cluster/controller/test_service.py -k list_jobs`
covers legacy all-jobs behavior and new `ROOTS` / `CHILDREN` scopes.
- `tests/cluster/controller/test_dashboard.py -k "ListJobs or list_jobs"`
covers the dashboard JSON/RPC surface.
- `lib/iris/tests/e2e/test_smoke.py::test_dashboard_job_expand_nested`
still validates nested expansion behavior through the UI path.
136 changes: 68 additions & 68 deletions lib/iris/dashboard/src/components/controller/JobDetail.vue
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ import { controllerRpcCall } from '@/composables/useRpc'
import { useAutoRefresh } from '@/composables/useAutoRefresh'
import { stateToName, stateDisplayName } from '@/types/status'
import type {
JobStatus, TaskStatus, LaunchJobRequest,
JobStatus, TaskStatus, LaunchJobRequest, JobQuery,
GetJobStatusResponse, ListTasksResponse, ListJobsResponse,
ResourceUsage,
} from '@/types/rpc'
import { timestampMs, formatTimestamp, formatDuration, formatRelativeTime, formatBytes, formatDeviceConfig } from '@/utils/formatting'
import { flattenJobTree, getLeafJobName, getParentJobName, jobsWithChildren } from '@/utils/jobTree'
import { timestampMs, formatTimestamp, formatDuration, formatBytes, formatDeviceConfig } from '@/utils/formatting'
import { getLeafJobName } from '@/utils/jobTree'
import PageShell from '@/components/layout/PageShell.vue'
import StatusBadge from '@/components/shared/StatusBadge.vue'
import InfoCard from '@/components/shared/InfoCard.vue'
Expand All @@ -29,8 +29,9 @@ const TERMINAL_STATES = new Set(['succeeded', 'failed', 'killed', 'worker_failed
const job = ref<JobStatus | null>(null)
const jobRequest = ref<LaunchJobRequest | null>(null)
const tasks = ref<TaskStatus[]>([])
const descendantJobs = ref<JobStatus[]>([])
const childJobsByParent = ref<Map<string, JobStatus[]>>(new Map())
const expandedChildJobs = ref<Set<string>>(new Set())
const loadingChildJobs = ref<Set<string>>(new Set())
const loading = ref(true)
const error = ref<string | null>(null)
const profilingTaskId = ref<string | null>(null)
Expand All @@ -40,11 +41,9 @@ const stateFilter = ref('')

type SortColumn = 'task' | 'state' | 'mem' | 'cpu' | 'duration'
type SortDir = 'asc' | 'desc'
type ChildJobsView = 'direct' | 'all'

const sortColumn = ref<SortColumn | null>(null)
const sortDir = ref<SortDir>('asc')
const childJobsView = ref<ChildJobsView>('direct')

type ChildSortColumn = 'name' | 'state' | 'duration'
const childSortColumn = ref<ChildSortColumn | null>(null)
Expand Down Expand Up @@ -82,6 +81,16 @@ async function copyJobName() {

let fetchGeneration = 0

async function fetchChildJobs(parentJobId: string): Promise<JobStatus[]> {
const response = await controllerRpcCall<ListJobsResponse>('ListJobs', {
query: {
scope: 'JOB_QUERY_SCOPE_CHILDREN',
parentJobId,
} satisfies JobQuery,
})
return response.jobs ?? []
}

async function fetchData() {
const gen = ++fetchGeneration
error.value = null
Expand All @@ -99,29 +108,12 @@ async function fetchData() {
jobRequest.value = jobResp.request ?? null
tasks.value = tasksResp.tasks ?? []

// Fetch all descendants by walking the job tree level by level via parentJobId
const jobId = jobResp.job.jobId
if (jobId) {
const result: JobStatus[] = []
const queue = [jobId]
while (queue.length > 0) {
const parentId = queue.shift()!
const resp = await controllerRpcCall<ListJobsResponse>('ListJobs', {
parentJobId: parentId,
})
if (gen !== fetchGeneration) return // superseded by a newer fetchData()
const children = resp.jobs ?? []
result.push(...children)
for (const child of children) {
if (child.hasChildren) {
queue.push(child.jobId)
}
}
}
descendantJobs.value = result
} else {
descendantJobs.value = []
}
const parentIds = [props.jobId, ...expandedChildJobs.value]
const childEntries = await Promise.all(
parentIds.map(async parentJobId => [parentJobId, await fetchChildJobs(parentJobId)] as const),
)
if (gen !== fetchGeneration) return
childJobsByParent.value = new Map(childEntries)
} catch (e) {
if (gen !== fetchGeneration) return // superseded by a newer fetchData()
error.value = e instanceof Error ? e.message : String(e)
Expand Down Expand Up @@ -153,9 +145,9 @@ watch(() => props.jobId, () => {
job.value = null
jobRequest.value = null
tasks.value = []
descendantJobs.value = []
childJobsByParent.value = new Map()
expandedChildJobs.value = new Set()
childJobsView.value = 'direct'
loadingChildJobs.value = new Set()
error.value = null
fetchData()
startRefresh()
Expand Down Expand Up @@ -197,13 +189,6 @@ function taskIndex(taskId: string): string {

// -- Child job helpers --

const visibleChildJobs = computed(() => {
if (childJobsView.value === 'all') return descendantJobs.value
const parentName = job.value?.name
if (!parentName) return []
return descendantJobs.value.filter(child => getParentJobName(child.name) === parentName)
})

function childJobDurationMs(j: JobStatus): number {
const started = timestampMs(j.startedAt)
if (!started) return 0
Expand Down Expand Up @@ -232,17 +217,52 @@ const childJobComparator = computed<((a: JobStatus, b: JobStatus) => number) | u
}
})

const flattenedChildJobs = computed(() => flattenJobTree(visibleChildJobs.value, expandedChildJobs.value, childJobComparator.value))
const expandableChildJobs = computed(() => jobsWithChildren(visibleChildJobs.value))
const flattenedChildJobs = computed(() => {
const result: Array<{ job: JobStatus; depth: number }> = []

function walk(parentJobId: string, depth: number) {
const children = childJobsByParent.value.get(parentJobId) ?? []
const sorted = childJobComparator.value ? [...children].sort(childJobComparator.value) : children
for (const child of sorted) {
result.push({ job: child, depth })
if (expandedChildJobs.value.has(child.jobId)) {
walk(child.jobId, depth + 1)
}
}
}

walk(props.jobId, 0)
return result
})

function toggleExpandedChildJob(jobName: string) {
async function toggleExpandedChildJob(jobStatus: JobStatus) {
const next = new Set(expandedChildJobs.value)
if (next.has(jobName)) {
next.delete(jobName)
} else {
next.add(jobName)
if (next.has(jobStatus.jobId)) {
next.delete(jobStatus.jobId)
expandedChildJobs.value = next
return
}

next.add(jobStatus.jobId)
expandedChildJobs.value = next

if (childJobsByParent.value.has(jobStatus.jobId)) {
return
}

const nextLoading = new Set(loadingChildJobs.value)
nextLoading.add(jobStatus.jobId)
loadingChildJobs.value = nextLoading
try {
const children = await fetchChildJobs(jobStatus.jobId)
const nextChildren = new Map(childJobsByParent.value)
nextChildren.set(jobStatus.jobId, children)
childJobsByParent.value = nextChildren
} finally {
const doneLoading = new Set(loadingChildJobs.value)
doneLoading.delete(jobStatus.jobId)
loadingChildJobs.value = doneLoading
}
}

const SEGMENT_COLORS: Record<string, string> = {
Expand Down Expand Up @@ -580,26 +600,6 @@ async function handleProfile(taskId: string, profilerType: string, format: strin
<h3 class="text-sm font-semibold uppercase tracking-wider text-text-secondary">
Child Jobs
</h3>
<div class="inline-flex rounded-md border border-surface-border bg-surface p-0.5">
<button
class="px-2.5 py-1 text-xs rounded transition-colors"
:class="childJobsView === 'direct'
? 'bg-accent text-white'
: 'text-text-secondary hover:bg-surface-raised hover:text-text'"
@click="childJobsView = 'direct'"
>
Direct only
</button>
<button
class="px-2.5 py-1 text-xs rounded transition-colors"
:class="childJobsView === 'all'
? 'bg-accent text-white'
: 'text-text-secondary hover:bg-surface-raised hover:text-text'"
@click="childJobsView = 'all'"
>
All descendants
</button>
</div>
</div>
<table class="w-full border-collapse">
<thead>
Expand Down Expand Up @@ -629,11 +629,11 @@ async function handleProfile(taskId: string, profilerType: string, format: strin
>
<span class="inline-flex items-center gap-1">
<button
v-if="expandableChildJobs.has(node.job.name)"
v-if="node.job.hasChildren"
class="text-text-muted hover:text-text select-none w-4 text-center text-xs"
@click.stop="toggleExpandedChildJob(node.job.name)"
@click.stop="toggleExpandedChildJob(node.job)"
>
{{ expandedChildJobs.has(node.job.name) ? '▼' : '▶' }}
{{ loadingChildJobs.has(node.job.jobId) ? '…' : (expandedChildJobs.has(node.job.jobId) ? '▼' : '▶') }}
</button>
<span v-else class="w-4" />
<RouterLink
Expand Down
Loading
Loading