Skip to content

Commit f72631b

Browse files
Improve standalone symbol extractor to not require a json crashlog
1 parent f8ec8d1 commit f72631b

File tree

1 file changed

+60
-69
lines changed

1 file changed

+60
-69
lines changed
Lines changed: 60 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,45 @@
1-
import json
1+
#!/usr/bin/env python3
2+
23
import sqlite3
34
import re
45
import argparse
56
from pathlib import Path
67

7-
def replace_redacted_in_crash_log(text_log: str, resolved_json: dict) -> str:
8-
# Build symbol lookup table from resolved_json
9-
addr_to_symbol = {}
10-
11-
threads = resolved_json.get("crash", {}).get("threads", [])
12-
for thread in threads:
13-
for frame in thread.get("backtrace", {}).get("contents", []):
14-
addr = frame.get("instruction_addr")
15-
symbol = frame.get("symbol_name")
16-
lib = frame.get("object_name")
17-
if addr is not None and symbol and symbol != "<redacted>" and lib is not None:
18-
# print(f"{addr=}, {symbol=}, {lib=}")
19-
addr_to_symbol[lib] = addr_to_symbol.get(lib, {})
20-
addr_to_symbol[lib][addr] = symbol
8+
def extract_crash_metadata(crash: str) -> dict:
9+
os_type, os_version, build_number, cpu_arch = None, None, None, None
10+
11+
for line in crash.splitlines():
12+
line = line.strip()
13+
14+
# Example: "OS Version: iOS 18.6.2 (22G100)"
15+
if line.startswith("OS Version:"):
16+
match = re.match(r"OS Version:\s+(\w+)\s+([\d\.]+)\s+\(([^)]+)\)", line)
17+
if match:
18+
os_type, os_version, build_number = match.groups()
19+
# Example: "Code Type: ARM-64 (Native)"
20+
elif line.startswith("Code Type:"):
21+
match = re.search(r"Code Type:\s+([A-Za-z0-9\-]+)", line)
22+
if match:
23+
arch = match.group(1).lower()
24+
# Map Apple-style arch names to canonical strings
25+
cpu_arch_map = {
26+
"arm-64": "arm64e",
27+
"arm64": "arm64",
28+
"x86-64": "x86_64"
29+
}
30+
cpu_arch = cpu_arch_map.get(arch, arch)
31+
32+
return {
33+
"osType": os_type,
34+
"osVersion": os_version,
35+
"buildNumber": build_number,
36+
"cpuArch": cpu_arch,
37+
}
38+
39+
def replace_redacted_in_crash_log(crash: str, sqlite_db_path: str) -> str:
40+
metadata = extract_crash_metadata(crash)
41+
conn = sqlite3.connect(sqlite_db_path)
42+
cursor = conn.cursor()
2143

2244
# Match stack frame lines, matches:
2345
# 7 Foundation 0x0000000199bc8500 0x199b11000 + 750848 (<redacted> + 212)
@@ -32,75 +54,44 @@ def replace_redacted_in_crash_log(text_log: str, resolved_json: dict) -> str:
3254
)
3355

3456
def replacer(match):
57+
offset = int(match.group("offset")) - int(match.group("delta"))
3558
lib = match.group("lib")
36-
absaddr = int(match.group("absaddr"), 16)
37-
symbol_name = addr_to_symbol.get(lib).get(absaddr)
38-
if symbol_name:
39-
return match.group(0).replace("<redacted>", symbol_name)
59+
query = """
60+
SELECT symbols.name
61+
FROM symbols
62+
JOIN files ON symbols.file_id = files.id
63+
JOIN builds ON files.build_id = builds.id
64+
WHERE symbols.address = ?
65+
AND files.name = ?
66+
AND builds.build = ?
67+
AND builds.arch = ?
68+
LIMIT 1;
69+
"""
70+
# print("Searching for %s, %s, %s, %s" % (offset, lib, metadata["buildNumber"], metadata["cpuArch"]))
71+
cursor.execute(query, (offset, lib, metadata["buildNumber"], metadata["cpuArch"]))
72+
result = cursor.fetchone()
73+
if result:
74+
return match.group(0).replace("<redacted>", result[0])
4075
return match.group(0)
4176

42-
return frame_regex.sub(replacer, text_log)
43-
44-
def resolve_redacted_symbols(data, sqlite_db_path):
45-
# Connect to the SQLite database
46-
conn = sqlite3.connect(sqlite_db_path)
47-
cursor = conn.cursor()
48-
49-
os_version = data["system"]["os_version"]
50-
51-
for thread in data["crash"]["threads"]:
52-
if "backtrace" not in thread:
53-
continue
54-
for frame in thread["backtrace"]["contents"]:
55-
if frame.get("symbol_name") == "<redacted>":
56-
symbol_addr = frame["symbol_addr"]
57-
object_addr = frame["object_addr"]
58-
object_name = frame["object_name"]
59-
60-
offset = symbol_addr - object_addr
61-
62-
query = """
63-
SELECT symbols.name
64-
FROM symbols
65-
JOIN files ON symbols.file_id = files.id
66-
JOIN builds ON files.build_id = builds.id
67-
WHERE symbols.address = ?
68-
AND files.name = ?
69-
AND builds.build = ?
70-
LIMIT 1;
71-
"""
72-
73-
cursor.execute(query, (offset, object_name, os_version))
74-
result = cursor.fetchone()
75-
76-
if result:
77-
frame["symbol_name"] = result[0]
78-
else:
79-
frame["symbol_name"] = "<unresolved>"
80-
77+
retval = frame_regex.sub(replacer, crash)
8178
conn.close()
82-
return data
79+
return retval
8380

8481

85-
parser = argparse.ArgumentParser(description="Replace '<redacted>' symbols in Apple crash logs using JSON crash data")
86-
parser.add_argument("json_file", help="Path to JSON crash report (*.json)")
82+
parser = argparse.ArgumentParser(description="Replace '<redacted>' symbols in Apple crash logs using symbols.db")
8783
parser.add_argument("crash_log", help="Path to Apple crash log (*.crash)")
8884
parser.add_argument("symbols_db", help="Path to symbols.db")
8985
args = parser.parse_args()
9086

91-
json_path = Path(args.json_file)
9287
crash_log_path = Path(args.crash_log)
9388
symbols_db_path = Path(args.symbols_db)
94-
if not json_path.exists():
95-
print(f"Error: JSON file not found: {json_path}", file=sys.stderr)
96-
sys.exit(1)
9789
if not crash_log_path.exists():
9890
print(f"Error: Crash log file not found: {crash_log_path}", file=sys.stderr)
9991
sys.exit(1)
10092
if not symbols_db_path.exists():
10193
print(f"Warning: symbols.db file not found: {symbols_db_path}", file=sys.stderr)
10294

103-
with open(json_path, 'r') as f:
104-
data =resolve_redacted_symbols(json.load(f), symbols_db_path)
105-
with open(crash_log_path, 'r') as t:
106-
print(replace_redacted_in_crash_log(t.read(), data))
95+
with open(crash_log_path, 'r') as t:
96+
symbolicated_crash = replace_redacted_in_crash_log(t.read(), symbols_db_path)
97+
print(symbolicated_crash)

0 commit comments

Comments
 (0)