|
1 | 1 | import re |
2 | 2 | from rich.progress import Progress |
3 | 3 |
|
4 | | -# ------------------------------ |
5 | | -# Helper functions |
6 | | -# ------------------------------ |
7 | | - |
8 | 4 | def clean_signature(str_sig: str) -> str: |
9 | | - """ |
10 | | - Remove compiler-generated patterns like b__xxx_y in a method signature. |
11 | | - We keep the original class/generic info intact. |
12 | | - """ |
13 | | - str_sig = re.sub(r"(<[^>]+>)b__\d+_\d+", r"\1", str_sig) |
| 5 | + str_sig = re.sub(r"^(?:public|private|protected|internal|static|virtual|sealed|extern|override|\s)+", "", str_sig) |
| 6 | + str_sig = re.sub(r"^(?:System\.)?\w+\s+", "", str_sig, count=1) |
| 7 | + str_sig = re.sub(r"<[^>]*>", "<>", str_sig) |
| 8 | + str_sig = str_sig.strip().rstrip(";") |
14 | 9 | if not str_sig.endswith(")"): |
15 | | - str_sig = str_sig.rstrip(";") + "()" |
| 10 | + str_sig += "()" |
16 | 11 | return str_sig.strip() |
17 | 12 |
|
18 | | - |
19 | 13 | def parse_dump(str_path: str): |
20 | | - """ |
21 | | - Parse a dump.cs file: |
22 | | - - g_offset_to_info: dict mapping offset -> (full_class, signature) |
23 | | - - g_info_to_offset: dict mapping (full_class, signature) -> offset |
24 | | - """ |
25 | 14 | g_offset_to_info = {} |
26 | 15 | g_info_to_offset = {} |
| 16 | + g_class_stack = [] |
27 | 17 |
|
28 | | - g_current_class = None |
29 | | - |
30 | | - # Match full class name like A.B.C |
31 | | - m_class_pattern = re.compile(r"^\s*(?:public|private|internal)\s+class\s+([\w\.]+)") |
32 | | - m_method_pattern = re.compile( |
33 | | - r"^\s*(public|private|protected|internal).*?\)\s*;\s*//\s*(0x[0-9a-fA-F]+)" |
34 | | - ) |
| 18 | + m_class_pattern = re.compile(r"^\s*(?:public|private|internal|protected)?\s*(?:sealed\s+|static\s+|abstract\s+)?class\s+([\w\.<>\+]+)") |
| 19 | + m_method_pattern = re.compile(r"^\s*(?:public|private|protected|internal).*?\)\s*;\s*//\s*(0x[0-9a-fA-F]+)") |
35 | 20 |
|
36 | 21 | with open(str_path, "r", encoding="utf-8", errors="ignore") as f: |
37 | 22 | g_lines = f.readlines() |
38 | 23 |
|
39 | | - # Show progress bar for user feedback |
| 24 | + brace_depth = 0 |
| 25 | + |
40 | 26 | with Progress() as g_progress: |
41 | 27 | task = g_progress.add_task(f"[cyan]Parsing {str_path}...", total=len(g_lines)) |
42 | | - |
43 | 28 | for str_line in g_lines: |
44 | 29 | m_cls = m_class_pattern.search(str_line) |
45 | 30 | if m_cls: |
46 | | - g_current_class = m_cls.group(1) |
47 | | - else: |
| 31 | + cls_name = m_cls.group(1) |
| 32 | + full_cls = f"{g_class_stack[-1]}+{cls_name}" if g_class_stack else cls_name |
| 33 | + g_class_stack.append(full_cls) |
| 34 | + |
| 35 | + brace_depth += str_line.count("{") - str_line.count("}") |
| 36 | + for _ in range(str_line.count("}")): |
| 37 | + if g_class_stack and brace_depth < len(g_class_stack): |
| 38 | + g_class_stack.pop() |
| 39 | + |
| 40 | + if g_class_stack: |
48 | 41 | m_method = m_method_pattern.search(str_line) |
49 | | - if m_method and g_current_class: |
50 | | - str_sig = str_line.split("//")[0].strip() |
51 | | - str_sig = clean_signature(str_sig) |
52 | | - g_offset = m_method.group(2) |
| 42 | + if m_method: |
| 43 | + g_current_class = g_class_stack[-1] |
| 44 | + str_sig = clean_signature(str_line.split("//")[0].strip()) |
| 45 | + g_offset = m_method.group(1) |
53 | 46 | g_offset_to_info[g_offset] = (g_current_class, str_sig) |
54 | 47 | g_info_to_offset[(g_current_class, str_sig)] = g_offset |
55 | 48 |
|
56 | 49 | g_progress.advance(task) |
57 | 50 |
|
58 | 51 | return g_offset_to_info, g_info_to_offset |
59 | 52 |
|
60 | | - |
61 | | -# ------------------------------ |
62 | | -# Mapping old offsets to new |
63 | | -# ------------------------------ |
64 | | - |
65 | 53 | def map_offsets(str_old_dump: str, str_new_dump: str, g_offsets: list): |
66 | 54 | g_old_map, _ = parse_dump(str_old_dump) |
67 | 55 | _, g_new_map = parse_dump(str_new_dump) |
68 | | - |
69 | 56 | g_results = {} |
70 | 57 |
|
71 | 58 | for g_off in g_offsets: |
72 | 59 | if g_off not in g_old_map: |
73 | 60 | g_results[g_off] = (None, None, None) |
74 | 61 | continue |
75 | | - |
76 | 62 | g_cls, str_sig = g_old_map[g_off] |
77 | 63 | g_new_off = g_new_map.get((g_cls, str_sig)) |
| 64 | + if not g_new_off: |
| 65 | + fallback_sig = re.sub(r"b__\d+_\d+", "b", str_sig) |
| 66 | + g_new_off = g_new_map.get((g_cls, fallback_sig)) |
78 | 67 | g_results[g_off] = (g_new_off, g_cls, str_sig) |
79 | 68 |
|
80 | 69 | return g_results |
81 | 70 |
|
82 | | - |
83 | | -# ------------------------------ |
84 | | -# Main processing |
85 | | -# ------------------------------ |
86 | | - |
87 | 71 | def process_input(str_input_file="INPUT.txt", str_output_file="OUTPUT.txt", |
88 | 72 | str_old_dump="dump_old.cs", str_new_dump="dump.cs"): |
89 | | - |
90 | | - # Read all offsets from input |
91 | 73 | with open(str_input_file, "r", encoding="utf-8", errors="ignore") as f: |
92 | 74 | g_input_lines = f.readlines() |
93 | 75 |
|
94 | | - g_all_offsets = re.findall(r"0x[0-9a-fA-F]+", "".join(g_input_lines)) |
95 | | - g_all_offsets = list(dict.fromkeys(g_all_offsets)) # remove duplicates, keep order |
96 | | - |
97 | | - # Map old offsets to new offsets |
| 76 | + g_all_offsets = list(dict.fromkeys(re.findall(r"0x[0-9a-fA-F]+", "".join(g_input_lines)))) |
98 | 77 | g_mapped_offsets = map_offsets(str_old_dump, str_new_dump, g_all_offsets) |
99 | 78 |
|
100 | | - # Replace offsets in each line with progress |
101 | 79 | with Progress() as g_progress, open(str_output_file, "w", encoding="utf-8") as f_out: |
102 | 80 | task = g_progress.add_task("[green]Processing input file...", total=len(g_input_lines)) |
103 | | - |
104 | 81 | for str_line in g_input_lines: |
105 | | - |
106 | 82 | def replace_offset(m): |
107 | 83 | g_off = m.group(0) |
108 | 84 | g_new_off, _, _ = g_mapped_offsets.get(g_off, (None, None, None)) |
109 | | - # if not found, keep original offset |
110 | 85 | return g_new_off if g_new_off else g_off |
111 | | - |
112 | | - str_new_line = re.sub(r"0x[0-9a-fA-F]+", replace_offset, str_line) |
113 | | - f_out.write(str_new_line) |
| 86 | + f_out.write(re.sub(r"0x[0-9a-fA-F]+", replace_offset, str_line)) |
114 | 87 | g_progress.advance(task) |
115 | 88 |
|
116 | | - # Show mapping result for user clarity |
117 | | - print("=== Offset Mapping Result ===") |
118 | 89 | for g_old, (g_new, g_cls, str_sig) in g_mapped_offsets.items(): |
119 | | - if g_new: |
120 | | - print(f"{g_old} -> {g_new} [{g_cls}] {str_sig}") |
121 | | - else: |
122 | | - print(f"{g_old} -> NOT FOUND in new dump") |
| 90 | + print(f"{g_old} -> {g_new if g_new else 'NOT FOUND'} [{g_cls}] {str_sig}") |
123 | 91 |
|
124 | 92 | print(f"\nDone! Output saved to {str_output_file}") |
125 | 93 |
|
126 | | - |
127 | | -# ------------------------------ |
128 | | -# Entry point |
129 | | -# ------------------------------ |
130 | | - |
131 | 94 | if __name__ == "__main__": |
132 | 95 | str_old_dump = input("Enter old dump file (default dump_old.cs): ").strip() or "dump_old.cs" |
133 | 96 | str_new_dump = input("Enter new dump file (default dump.cs): ").strip() or "dump.cs" |
134 | | - |
135 | 97 | process_input("INPUT.txt", "OUTPUT.txt", str_old_dump, str_new_dump) |
0 commit comments