Skip to content

Commit 1353ffe

Browse files
authored
Merge pull request #2 from timzhong1024/codex/permission-onboarding
[codex] Add permission onboarding flow
2 parents 567ef75 + 6c8c75e commit 1353ffe

7 files changed

Lines changed: 369 additions & 23 deletions

File tree

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,14 @@ It is designed around three defaults:
1717

1818
Use `macos-cua doctor` to inspect current readiness.
1919

20+
Use `macos-cua onboard` to trigger the native prompts, open the relevant System Settings panes, and guide a human through granting both permissions. In a tty session it waits by default; in non-tty mode it triggers the flow and returns immediately unless you pass `--wait`. When Screen Recording appears to have been granted but the process has not yet been restarted, `onboard` surfaces a targeted restart hint rather than a generic enable instruction. Add `--json` for structured output including per-permission `granted`, `waited`, and `likelyNeedsRestart` fields.
21+
2022
## Commands
2123

2224
```text
2325
macos-cua [--json] <command> [args...]
2426
27+
onboard [--wait|--no-wait] [--timeout <seconds>] [--no-request] [--no-open]
2528
doctor
2629
state
2730
record enable|disable|status
@@ -42,6 +45,9 @@ window frontmost|list|activate|minimize|maximize|close
4245

4346
```bash
4447
swift run macos-cua doctor
48+
swift run macos-cua onboard
49+
swift run macos-cua onboard --wait --timeout 180
50+
swift run macos-cua --json onboard --no-wait
4551
swift run macos-cua record enable
4652
swift run macos-cua --json state
4753
swift run macos-cua screenshot /tmp/frontmost.png
@@ -66,6 +72,7 @@ swift run macos-cua window list
6672
- If no usable frontmost window is available, default coordinate-taking commands fall back to screen coordinates and report that fallback in output.
6773
- `window list` is AX-first when Accessibility is available, then falls back to CoreGraphics window discovery.
6874
- `window list`, `window frontmost`, and `state.frontmostWindow.bounds` remain screen-global diagnostics; they are not window-local action coordinates.
75+
- Missing permission errors point back to `macos-cua onboard` so agent and human flows land on the same recovery path.
6976
- Browser DOM/ref actions are intentionally out of scope for this repo.
7077
- `record enable` starts a persistent session under `~/Library/Application Support/macos-cua/records/`; each subsequent command appends an action log entry, a full-screen timeline screenshot, failure-only snapshots, and a replayable `replay.sh` trace until `record disable`.
7178
- A shareable VS Code debug example lives at `.vscode/launch.example.json`; local `.vscode/launch.json` stays ignored.

Sources/macos-cua/Commands/CLI.swift

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ enum CLI {
66
macos-cua [--json] <command> [args...]
77
88
Commands:
9+
onboard [--wait|--no-wait] [--timeout <seconds>] [--no-request] [--no-open]
910
doctor
1011
state
1112
record enable|disable|status
@@ -47,6 +48,8 @@ enum CLI {
4748
let output = CLIOutput(json: json)
4849
try Recorder.executeInvocation(arguments: arguments, command: command, output: output) {
4950
switch command {
51+
case "onboard", "onboarding":
52+
try onboard(args: Array(args.dropFirst()), output: output)
5053
case "doctor":
5154
try doctor(output: output)
5255
case "state":
@@ -81,6 +84,60 @@ enum CLI {
8184
}
8285
}
8386

87+
static func onboard(args: [String], output: CLIOutput) throws {
88+
var waitForReady = PermissionSupport.isInteractiveSession()
89+
var timeoutSeconds = waitForReady ? 120 : 0
90+
var requestPrompt = true
91+
var openSettings = true
92+
var index = 0
93+
94+
while index < args.count {
95+
switch args[index] {
96+
case "--wait":
97+
waitForReady = true
98+
if timeoutSeconds == 0 {
99+
timeoutSeconds = 120
100+
}
101+
index += 1
102+
case "--no-wait":
103+
waitForReady = false
104+
timeoutSeconds = 0
105+
index += 1
106+
case "--timeout":
107+
guard index + 1 < args.count else {
108+
throw CUAError(message: "usage: macos-cua onboard [--wait|--no-wait] [--timeout <seconds>] [--no-request] [--no-open]")
109+
}
110+
timeoutSeconds = try parseInt(args[index + 1], name: "timeout")
111+
if timeoutSeconds < 0 {
112+
throw CUAError(message: "timeout must be >= 0")
113+
}
114+
waitForReady = timeoutSeconds > 0
115+
index += 2
116+
case "--no-request":
117+
requestPrompt = false
118+
index += 1
119+
case "--no-open":
120+
openSettings = false
121+
index += 1
122+
default:
123+
throw CUAError(message: "usage: macos-cua onboard [--wait|--no-wait] [--timeout <seconds>] [--no-request] [--no-open]")
124+
}
125+
}
126+
127+
let progress: ((String) -> Void)? = output.json ? nil : { line in
128+
print(line)
129+
}
130+
let shouldLogProgress = PermissionSupport.isInteractiveSession() && (waitForReady || requestPrompt || openSettings)
131+
let result = PermissionSupport.onboarding(
132+
requestPrompts: requestPrompt,
133+
openSettingsPane: openSettings,
134+
waitForReady: waitForReady,
135+
timeoutSeconds: timeoutSeconds,
136+
log: shouldLogProgress ? progress : nil
137+
)
138+
try output.emit(result.payload, lines: result.lines)
139+
}
140+
84141
static func doctor(output: CLIOutput) throws {
85142
let accessibility = WindowSupport.isAccessibilityTrusted()
86143
let screenRecording = ScreenshotSupport.screenCaptureAccess()
@@ -113,20 +170,26 @@ enum CLI {
113170
"screenRecording": screenRecording,
114171
"syntheticInputReady": accessibility,
115172
"screenshotReady": screenshotCheck,
173+
"allReady": accessibility && screenRecording,
174+
"onboardCommand": "macos-cua onboard",
116175
"frontmostApp": frontmostApp as Any,
117176
"frontmostWindow": frontmostWindow as Any,
118177
"actionSpace": actionSpace,
119178
]
179+
var lines = [
180+
"Accessibility: \(accessibility ? "ready" : "missing")",
181+
"Screen Recording: \(screenRecording ? "ready" : "missing")",
182+
"Synthetic input: \(accessibility ? "ready" : "missing")",
183+
"Screenshot check: \((screenshotCheck["ok"] as? Bool) == true ? "ok" : "failed")",
184+
"Frontmost app: \((frontmostApp?["name"] as? String) ?? "n/a")",
185+
"Frontmost window: \((frontmostWindow?["title"] as? String).flatMap { $0.isEmpty ? nil : $0 } ?? "<untitled>")",
186+
]
187+
if !accessibility || !screenRecording {
188+
lines.append("Next: run `macos-cua onboard` to request missing permissions.")
189+
}
120190
try output.emit(
121191
payload,
122-
lines: [
123-
"Accessibility: \(accessibility ? "ready" : "missing")",
124-
"Screen Recording: \(screenRecording ? "ready" : "missing")",
125-
"Synthetic input: \(accessibility ? "ready" : "missing")",
126-
"Screenshot check: \((screenshotCheck["ok"] as? Bool) == true ? "ok" : "failed")",
127-
"Frontmost app: \((frontmostApp?["name"] as? String) ?? "n/a")",
128-
"Frontmost window: \((frontmostWindow?["title"] as? String).flatMap { $0.isEmpty ? nil : $0 } ?? "<untitled>")",
129-
]
192+
lines: lines
130193
)
131194
}
132195

Sources/macos-cua/Core/Recorder.swift

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,13 @@ struct RecorderEnvironment {
6060
return payload
6161
}
6262
var captureScreenshot: (_ target: ScreenshotTarget, _ path: String) throws -> [String: Any] = { target, path in
63-
try ScreenshotSupport.capture(target: target, path: path)
63+
try ScreenshotSupport.capture(
64+
target: target,
65+
path: path,
66+
coordinateSpace: .screen,
67+
coordinateFallback: false,
68+
reportedBounds: ScreenshotSupport.bounds(for: target)
69+
)
6470
}
6571
}
6672

Sources/macos-cua/Desktop/InputSupport.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ enum InputSupport {
7272
}
7373

7474
static func post(_ event: CGEvent?) throws {
75+
try PermissionSupport.require(.accessibility, for: "synthetic input")
7576
guard let event else {
7677
throw CUAError(message: "failed to create CGEvent")
7778
}

0 commit comments

Comments
 (0)