Skip to content

Commit c4edcac

Browse files
yaooqinnCopilot
andcommitted
Add summary command for concise application overview
Aggregates app details, resource config (driver/executor/shuffle), and workload stats (jobs/stages/tasks/SQL) into a single view. Uses 6 API calls: app, env, jobs, stages, executors, sql. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 9642be6 commit c4edcac

4 files changed

Lines changed: 134 additions & 0 deletions

File tree

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ spark-history-cli --app-id <id> stages
7171
spark-history-cli --app-id <id> executors --all
7272
spark-history-cli --app-id <id> sql
7373
spark-history-cli --app-id <id> env
74+
spark-history-cli --app-id <id> summary
7475

7576
# SQL execution plans
7677
spark-history-cli --app-id <id> sql-plan <exec-id> # full plan
@@ -106,6 +107,7 @@ executors [--all] List executors
106107
sql [id] List or show SQL executions
107108
sql-plan <id> [opts] Show SQL plan (--view, --dot, -o)
108109
sql-jobs <id> Show jobs for a SQL execution
110+
summary Application overview (config + workload)
109111
rdds List cached RDDs
110112
env Show environment/config
111113
logs [path] Download event logs

spark_history_cli/cli.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,18 @@ def repl(state: CliState):
241241
output_status_block(skin, info, title="Application")
242242
skin.hint(f"Context set to {app_id}")
243243

244+
elif cmd == "summary":
245+
app_id = state.resolve_app_id(None)
246+
app = client.get_application(app_id)
247+
env = client.get_environment(app_id)
248+
jobs = client.list_jobs(app_id)
249+
stages = client.list_stages(app_id)
250+
executors = client.list_all_executors(app_id)
251+
sqls = client.list_sql(app_id, length=100000)
252+
sections = fmt.format_summary(app, env, jobs, stages, executors, sqls)
253+
for title, info in sections.items():
254+
output_status_block(skin, info, title=title)
255+
244256
elif cmd == "jobs":
245257
app_id = state.resolve_app_id(args[0] if args else None)
246258
status_filter = None
@@ -442,6 +454,39 @@ def cmd_app(state: CliState, app_id: str):
442454
output_status_block(skin, info, title="Application")
443455

444456

457+
@cli.command("summary")
458+
@pass_state
459+
def cmd_summary(state: CliState):
460+
"""Show a concise summary of an application.
461+
462+
Aggregates application details, resource config, and workload stats
463+
into a single overview.
464+
465+
Examples:
466+
467+
spark-history-cli -a <app> summary
468+
469+
spark-history-cli -a <app> --json summary
470+
"""
471+
client = state.ensure_client()
472+
app_id = state.resolve_app_id(None)
473+
app = client.get_application(app_id)
474+
env = client.get_environment(app_id)
475+
jobs = client.list_jobs(app_id)
476+
stages = client.list_stages(app_id)
477+
executors = client.list_all_executors(app_id)
478+
sqls = client.list_sql(app_id, length=100000)
479+
if state.json_mode:
480+
sections = fmt.format_summary(app, env, jobs, stages, executors, sqls)
481+
output_json(sections)
482+
else:
483+
from spark_history_cli.utils.repl_skin import ReplSkin
484+
skin = ReplSkin("spark_history", version=__version__)
485+
sections = fmt.format_summary(app, env, jobs, stages, executors, sqls)
486+
for title, info in sections.items():
487+
output_status_block(skin, info, title=title)
488+
489+
445490
@cli.command("jobs")
446491
@click.option("--status", type=click.Choice(
447492
["running", "succeeded", "failed", "unknown"], case_sensitive=False))

spark_history_cli/core/formatters.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,91 @@ def format_app_detail(app: dict) -> dict[str, str]:
115115
return info
116116

117117

118+
def format_summary(
119+
app: dict,
120+
env: dict,
121+
jobs: list[dict],
122+
stages: list[dict],
123+
executors: list[dict],
124+
sqls: list[dict],
125+
) -> dict[str, dict[str, str]]:
126+
"""Build a multi-section summary from several API responses.
127+
128+
Returns an ordered dict of {section_title: {key: value}} pairs.
129+
"""
130+
from collections import Counter
131+
132+
attempts = app.get("attempts", [])
133+
latest = attempts[0] if attempts else {}
134+
status = "RUNNING" if not latest.get("completed", True) else "COMPLETED"
135+
runtime = env.get("runtime", {})
136+
sp = dict(env.get("sparkProperties", []))
137+
138+
# ── Application ──
139+
application = {
140+
"App ID": app.get("id", ""),
141+
"Name": app.get("name", ""),
142+
"Status": f"{_status_icon(status)} {status}",
143+
"Duration": _duration(latest.get("duration")),
144+
"Spark Version": (
145+
f"{latest.get('appSparkVersion', 'N/A')} "
146+
f"(Scala {runtime.get('scalaVersion', 'N/A').replace('version ', '')}, "
147+
f"Java {runtime.get('javaVersion', 'N/A')})"
148+
),
149+
"Master": sp.get("spark.master", "N/A"),
150+
"User": latest.get("sparkUser", ""),
151+
"Started": _ts(latest.get("startTimeEpoch")),
152+
"Ended": _ts(latest.get("endTimeEpoch")),
153+
}
154+
155+
# ── Resources ──
156+
driver_mem = sp.get("spark.driver.memory", "N/A")
157+
driver_cores = sp.get("spark.driver.cores", "N/A")
158+
exec_mem = sp.get("spark.executor.memory", "N/A")
159+
exec_cores = sp.get("spark.executor.cores", "N/A")
160+
exec_instances = sp.get("spark.executor.instances", "N/A")
161+
active_execs = sum(1 for e in executors if e.get("isActive"))
162+
total_execs = len(executors)
163+
dyn_alloc = sp.get("spark.dynamicAllocation.enabled", "false")
164+
165+
resources = {
166+
"Driver": f"{driver_mem} / {driver_cores} cores",
167+
"Executors": f"{exec_instances} × {exec_mem} / {exec_cores} cores ({total_execs} total, {active_execs} active)",
168+
"Dynamic Allocation": dyn_alloc,
169+
"Shuffle Partitions": sp.get("spark.sql.shuffle.partitions", "200"),
170+
"Serializer": sp.get("spark.serializer", "JavaSerializer").rsplit(".", 1)[-1],
171+
}
172+
173+
# ── Workload ──
174+
job_statuses = Counter(j.get("status", "UNKNOWN") for j in jobs)
175+
stage_statuses = Counter(s.get("status", "UNKNOWN") for s in stages)
176+
sql_statuses = Counter(s.get("status", "UNKNOWN") for s in sqls)
177+
178+
total_tasks = sum(j.get("numTasks", 0) for j in jobs)
179+
completed_tasks = sum(j.get("numCompletedTasks", 0) for j in jobs)
180+
181+
def _status_summary(counts: Counter) -> str:
182+
total = sum(counts.values())
183+
parts = []
184+
for s in ["SUCCEEDED", "COMPLETED", "COMPLETE", "RUNNING", "FAILED", "SKIPPED", "KILLED", "PENDING", "UNKNOWN"]:
185+
if counts.get(s):
186+
parts.append(f"{counts[s]} {s.lower()}")
187+
return f"{total} ({', '.join(parts)})" if parts else str(total)
188+
189+
workload = {
190+
"Jobs": _status_summary(job_statuses),
191+
"Stages": _status_summary(stage_statuses),
192+
"Tasks": f"{completed_tasks:,}/{total_tasks:,} completed",
193+
"SQL Executions": _status_summary(sql_statuses),
194+
}
195+
196+
return {
197+
"Application": application,
198+
"Resources": resources,
199+
"Workload": workload,
200+
}
201+
202+
118203
# ── Job Formatters ────────────────────────────────────────────────────
119204

120205

spark_history_cli/skills/SKILL.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ spark-history-cli --json --server http://localhost:18080 --app-id <app-id> sql
3838
spark-history-cli --json --server http://localhost:18080 --app-id <app-id> sql-plan <exec-id> --view final
3939
spark-history-cli --server http://localhost:18080 --app-id <app-id> sql-plan <exec-id> --dot -o plan.dot
4040
spark-history-cli --json --server http://localhost:18080 --app-id <app-id> sql-jobs <exec-id>
41+
spark-history-cli --json --server http://localhost:18080 --app-id <app-id> summary
4142
spark-history-cli --json --server http://localhost:18080 --app-id <app-id> env
4243
spark-history-cli --server http://localhost:18080 --app-id <app-id> logs output.zip
4344
```
@@ -64,6 +65,7 @@ python -m spark_history_cli --json apps
6465
- `--json` + `--view`: structured JSON with `isAdaptive`, `sectionCount`, `plan`, and `sections`
6566
- `-o <file>`: write output to file instead of stdout
6667
- `sql-jobs <id>` for jobs associated with a SQL execution (fetches all linked jobs by ID)
68+
- `summary` for a concise application overview: app info, resource config (driver/executor/shuffle), and workload stats (jobs/stages/tasks/SQL)
6769
- `env` for Spark config/runtime context
6870
- `logs` only when the user explicitly wants the event log archive saved locally
6971

0 commit comments

Comments
 (0)