|
1 | 1 | package com.abmo.util |
2 | 2 |
|
3 | 3 | import com.abmo.common.Logger |
4 | | -import java.util.regex.Pattern |
5 | 4 |
|
6 | 5 | class AbyssJsCodeExtractor { |
7 | 6 |
|
8 | | - // hacky tacky solution, got no time to this in an efficient way :) |
9 | | - private fun extractSwitchCaseContent(jsCode: String, variableName: String): String? { |
10 | | - val casePattern = "case\\s*'[^']*'\\s*:(.*?)(?=case\\s*'|}\\s*break;)" |
11 | | - val cases = Regex(casePattern, RegexOption.DOT_MATCHES_ALL).findAll(jsCode) |
12 | | - |
13 | | - return cases.map { match -> |
14 | | - match.groupValues[1] |
15 | | - .trim() |
16 | | - .replace(Regex("\\s*continue;\\s*$"), "") |
17 | | - .trim() |
18 | | - }.maxByOrNull { it.length } |
19 | | - ?.replace(Regex("\\[([a-zA-Z])\\(")) { // replace current function names with the global one |
20 | | - "[${variableName}(" |
21 | | - }?.replace(Regex("(\\.\\.\\.[^,}]+,\\s*)\\.\\.\\.\\w+"), "$1sourcesEncoded: sourcesEncoded") |
22 | | - } |
| 7 | + companion object { |
| 8 | + private val FUNCTION_WITH_SHIFT_PUSH_REGEX = Regex( |
| 9 | + """\(function\s*\([^)]*\)\s*\{(?:[^{}]++|\{(?:[^{}]++|\{[^{}]*+})*+})*+}(?:\([^)]*\))?\s*;?\)""", |
| 10 | + RegexOption.DOT_MATCHES_ALL |
| 11 | + ) |
23 | 12 |
|
24 | | - fun extractWhileSwitchFunction(jsCode: String): String? { |
25 | | - val pattern = "function\\s+\\w+\\s*\\(\\s*\\)\\s*\\{\\s*var\\s+\\w+\\s*=\\s*\\w+\\s*,\\s*\\w+\\s*=\\s*\\{.*?while.*?switch.*?break;.*?}.*?}" |
26 | | - val match = Regex(pattern, RegexOption.DOT_MATCHES_ALL).find(jsCode)?.value |
27 | | - val withoutWhile = match?.replace(Regex("while\\s*\\([^)]*\\)\\s*\\{.*?break;\\s*}", RegexOption.DOT_MATCHES_ALL), "") |
28 | | - return withoutWhile?.replace(Regex("^function\\s+\\w+\\s*\\([^)]*\\)\\s*\\{|}\\s*$"), "")?.trim() |
29 | | - } |
| 13 | + private val SELF_ASSIGNING_FUNCTION_REGEX = Regex( |
| 14 | + """function\s+(\w+)\s*\(\)\s*\{\s*var\s+(\w+)\s*=\s*\[.*?];\s*\1\s*=\s*function\s*\(\)\s*\{\s*return\s+\2;\s*};\s*return\s+\1\(\);\s*}""", |
| 15 | + RegexOption.DOT_MATCHES_ALL |
| 16 | + ) |
30 | 17 |
|
31 | | - fun extractFunction(jsCode: String, functionName: String): String? { |
32 | | - val robustPattern = |
33 | | - """function\s+($functionName)\s*\([^)]*\)\s*\{((?:[^{}]++|\{(?:[^{}]++|\{[^{}]*+})*+})*+)}""".toRegex() |
34 | | - val match = robustPattern.find(jsCode) |
35 | | - return match?.value |
36 | | - } |
| 18 | + private val VARIABLE_ASSIGNMENT_REGEX = Regex("""var\s+([a-zA-Z])\s*=\s*([a-zA-Z]);""") |
| 19 | + private val COMMA_VARIABLE_ASSIGNMENT_REGEX = Regex(",\\s*([a-zA-Z_]\\w*)\\s*=\\s*\\{\\s*}") |
37 | 20 |
|
38 | | - fun extractFunctionsWithShiftAndPush(jsCode: String): List<String> { |
39 | | - val improvedPattern = Regex( |
40 | | - """(\(function\s*\([^)]*\)\s*\{(?:[^{}]++|\{(?:[^{}]++|\{[^{}]*+})*+})*+}(?:\([^)]*\))?\s*;?\))""", |
| 21 | + private val OBJECT_ASSIGNMENT_PATTERN_REGEX = Regex( |
| 22 | + "var\\s+([a-zA-Z_]\\w*)\\s*=\\s*\\{\\s*};.*?var\\s+\\w+\\s*=\\s*\\{\\s*\\.\\.\\.\\1\\s*,", |
41 | 23 | RegexOption.DOT_MATCHES_ALL |
42 | 24 | ) |
43 | | - val results = mutableListOf<String>() |
44 | | - val matches = improvedPattern.findAll(jsCode) |
45 | 25 |
|
46 | | - for (match in matches) { |
47 | | - val functionCode = match.value |
48 | | - val containsShift = functionCode.contains(Regex("""\bshift\b|\['shift']|\.shift\(""")) |
49 | | - val containsPush = functionCode.contains(Regex("""\bpush\b|\['push']|\.push\(""")) |
| 26 | + private val COMPLEX_CONCATENATION_REGEX = Regex( |
| 27 | + """([A-Za-z])\(0x[a-fA-F0-9]+\)(?:\s*\+\s*(?:\([A-Za-z]\(0x[a-fA-F0-9]+\)(?:\s*\+\s*[A-Za-z]\(0x[a-fA-F0-9]+\))*(?:\s*\+\s*'[^']*')?\)|[A-Za-z]\(0x[a-fA-F0-9]+\)|'[^']*'))*""", |
| 28 | + RegexOption.DOT_MATCHES_ALL |
| 29 | + ) |
50 | 30 |
|
51 | | - if (containsShift && containsPush) { |
52 | | - results.add(functionCode) |
53 | | - } |
| 31 | + private val OBJECT_ASSIGN_REPLACEMENT_REGEX = Regex( |
| 32 | + """var\s+[a-zA-Z]\s*=\s*\{\.\.\.[a-zA-Z],\s*sourcesEncoded:\s*sourcesEncoded\s*}""" |
| 33 | + ) |
| 34 | + } |
| 35 | + |
| 36 | + fun getCompleteJsCode(jsResponse: String?): String? { |
| 37 | + if (jsResponse.isNullOrBlank()) { |
| 38 | + Logger.error("Empty response received") |
| 39 | + return null |
54 | 40 | } |
55 | 41 |
|
56 | | - return results |
57 | | - } |
| 42 | + val varName = extractVariableName(jsResponse) |
| 43 | + if (varName == null) { |
| 44 | + Logger.error("Variable name not found") |
| 45 | + return null |
| 46 | + } |
58 | 47 |
|
59 | | - fun extractComplexConcatenation(jsCode: String): String? { |
60 | | - val pattern = Pattern.compile( |
61 | | - """([A-Za-z])\(0x[a-fA-F0-9]+\)(?:\s*\+\s*(?:\([A-Za-z]\(0x[a-fA-F0-9]+\)(?:\s*\+\s*[A-Za-z]\(0x[a-fA-F0-9]+\))*(?:\s*\+\s*'[^']*')?\)|[A-Za-z]\(0x[a-fA-F0-9]+\)|'[^']*'))*""", |
62 | | - Pattern.MULTILINE or Pattern.DOTALL |
63 | | - ) |
| 48 | + val sourcesEncodedDeclaration = extractComplexConcatenation(jsResponse, varName) |
| 49 | + if (sourcesEncodedDeclaration == null) { |
| 50 | + Logger.error("Function chain not found") |
| 51 | + return null |
| 52 | + } |
64 | 53 |
|
65 | | - val matcher = pattern.matcher(jsCode) |
66 | | - var longestChain = "" |
| 54 | + val objectAssignmentPattern = extractObjectAssignmentPattern(jsResponse) |
| 55 | +// println(objectAssignmentPattern) |
| 56 | + if (objectAssignmentPattern == null) { |
| 57 | + Logger.error("Object assignment pattern not found") |
| 58 | + return null |
| 59 | + } |
67 | 60 |
|
68 | | - while (matcher.find()) { |
69 | | - val currentChain = matcher.group() |
70 | | - if (currentChain.length > longestChain.length) { |
71 | | - longestChain = currentChain |
72 | | - } |
| 61 | + |
| 62 | + |
| 63 | + val commaVariableName = extractCommaVariableName(jsResponse) |
| 64 | + if (commaVariableName == null) { |
| 65 | + Logger.error("Comma variable name not found") |
| 66 | + return null |
73 | 67 | } |
74 | 68 |
|
75 | | - return longestChain.ifEmpty { null } |
| 69 | + return buildCompleteCode( |
| 70 | + jsResponse = jsResponse, |
| 71 | + varName = varName, |
| 72 | + sourcesEncodedDeclaration = sourcesEncodedDeclaration, |
| 73 | + objectAssignmentPattern = objectAssignmentPattern, |
| 74 | + commaVariableName = commaVariableName |
| 75 | + ) |
76 | 76 | } |
77 | 77 |
|
78 | | - fun extractSelfAssigningFunction(jsCode: String): String? { |
79 | | - val regex = Regex( |
80 | | - """function\s+(\w+)\s*\(\)\s*\{\s*var\s+(\w+)\s*=\s*\[.*?];\s*\1\s*=\s*function\s*\(\)\s*\{\s*return\s+\2;\s*};\s*return\s+\1\(\);\s*}""", |
81 | | - RegexOption.DOT_MATCHES_ALL |
82 | | - ) |
83 | | - return regex.find(jsCode)?.value |
| 78 | + private fun buildCompleteCode( |
| 79 | + jsResponse: String, |
| 80 | + varName: String, |
| 81 | + sourcesEncodedDeclaration: String, |
| 82 | + objectAssignmentPattern: String, |
| 83 | + commaVariableName: String |
| 84 | + ): String = buildString { |
| 85 | + extractShiftPushFunctions(jsResponse).forEach { appendLine(it) } |
| 86 | + extractSelfAssigningFunction(jsResponse)?.let { appendLine(it) } |
| 87 | + extractFunction(jsResponse, varName)?.let { appendLine(it) } |
| 88 | + appendLine("var $commaVariableName = {};") |
| 89 | + extractVariableDeclarations(jsResponse).forEach { appendLine(it) } |
| 90 | + appendLine(sourcesEncodedDeclaration) |
| 91 | + appendLine(replaceWithObjectAssign(objectAssignmentPattern)) |
| 92 | + appendLine("java.lang.System.out.println(JSON.stringify(b))") |
84 | 93 | } |
85 | 94 |
|
86 | | - fun extractVariableName(jsCode: String): String? { |
87 | | - val pattern = Regex("""var\s+([a-zA-Z])\s*=\s*([a-zA-Z]);""") |
88 | | - return pattern.find(jsCode)?.groupValues?.get(2) |
| 95 | + private fun extractVariableName(jsCode: String): String? = |
| 96 | + VARIABLE_ASSIGNMENT_REGEX.find(jsCode)?.groupValues?.get(2) |
| 97 | + |
| 98 | + private fun extractCommaVariableName(jsCode: String): String? = |
| 99 | + COMMA_VARIABLE_ASSIGNMENT_REGEX.find(jsCode)?.groupValues?.get(1) |
| 100 | + |
| 101 | + private fun extractObjectAssignmentPattern(jsCode: String): String? { |
| 102 | + val startIndex = jsCode.indexOf("JSON") |
| 103 | + val endIndex = jsCode.lastIndexOf("...") |
| 104 | + |
| 105 | + val relevantSection = jsCode.substring(startIndex, endIndex) |
| 106 | + return OBJECT_ASSIGNMENT_PATTERN_REGEX.find(relevantSection)?.value |
| 107 | + ?.plus("sourcesEncoded: sourcesEncoded}") |
89 | 108 | } |
90 | 109 |
|
91 | | - private fun replaceWithObjectAssign(code: String): String { |
92 | | - val regex = Regex("""var\s+[a-zA-Z]\s*=\s*\{\.\.\.[a-zA-Z],\s*sourcesEncoded:\s*sourcesEncoded\s*},\s*[a-zA-Z]\s*=\s*[a-zA-Z]\s*;""") |
93 | | - val variableName = code.substringAfter("...") |
94 | | - .substringBefore(",sourcesEncoded: sourcesEncoded") |
95 | | - return regex.replace(code, """ |
96 | | - var b = Object.assign({}, $variableName); |
97 | | - b.sourcesEncoded = sourcesEncoded; |
98 | | - """.trimIndent()) |
| 110 | + private fun extractComplexConcatenation(jsCode: String, varName: String): String? { |
| 111 | + val longestChain = COMPLEX_CONCATENATION_REGEX.findAll(jsCode) |
| 112 | + .map { it.value } |
| 113 | + .maxByOrNull { it.length } |
| 114 | + ?: return null |
| 115 | + |
| 116 | + val oldName = longestChain.substringBefore("(") |
| 117 | + return "var $oldName = $varName;\nvar sourcesEncoded = $longestChain" |
99 | 118 | } |
100 | 119 |
|
101 | | - fun getCompleteJsCode(jsResponse: String?): String? { |
102 | | - jsResponse ?: run { |
103 | | - Logger.error("empty response received") |
104 | | - return null |
105 | | - } |
| 120 | + private fun extractShiftPushFunctions(jsCode: String): List<String> = |
| 121 | + FUNCTION_WITH_SHIFT_PUSH_REGEX.findAll(jsCode) |
| 122 | + .map { it.value } |
| 123 | + .filter { functionCode -> |
| 124 | + containsPattern(functionCode, "shift") && containsPattern(functionCode, "push") |
| 125 | + } |
| 126 | + .toList() |
106 | 127 |
|
107 | | - val varName = extractVariableName(jsResponse) ?: run { |
108 | | - Logger.error("variable name wasn't found") |
109 | | - return null |
110 | | - } |
| 128 | + private fun containsPattern(code: String, pattern: String): Boolean = |
| 129 | + code.contains(Regex("""\b$pattern\b|\['$pattern']|\.$pattern\(""")) |
111 | 130 |
|
112 | | - val result = extractComplexConcatenation(jsResponse) ?: run { |
113 | | - Logger.error("function chain wasn't found") |
114 | | - return null |
115 | | - } |
| 131 | + private fun extractSelfAssigningFunction(jsCode: String): String? = |
| 132 | + SELF_ASSIGNING_FUNCTION_REGEX.find(jsCode)?.value |
116 | 133 |
|
117 | | - val switchCaseContent = extractSwitchCaseContent(jsResponse, varName) |
118 | | - if (switchCaseContent == null) { |
119 | | - Logger.error("failed to find switch case content") |
120 | | - return null |
121 | | - } |
| 134 | + private fun extractFunction(jsCode: String, functionName: String): String? { |
| 135 | + val pattern = Regex( |
| 136 | + """function\s+($functionName)\s*\([^)]*\)\s*\{((?:[^{}]++|\{(?:[^{}]++|\{[^{}]*+})*+})*+)}""" |
| 137 | + ) |
| 138 | + return pattern.find(jsCode)?.value |
| 139 | + } |
122 | 140 |
|
123 | | - return buildString { |
124 | | - appendLine(extractFunctionsWithShiftAndPush(jsResponse)) |
125 | | - appendLine(extractSelfAssigningFunction(jsResponse)) |
126 | | - appendLine(extractFunction(jsResponse, varName)) |
127 | | - appendLine(extractWhileSwitchFunction(jsResponse)) |
128 | | - appendLine("var sourcesEncoded = $result") |
129 | | - appendLine(replaceWithObjectAssign(switchCaseContent)) |
130 | | - append("java.lang.System.out.println(JSON.stringify(b))") |
| 141 | + private fun replaceWithObjectAssign(code: String): String { |
| 142 | + val variableName = code.substringAfter("...") |
| 143 | + .substringBefore(",sourcesEncoded: sourcesEncoded") |
| 144 | + |
| 145 | + return OBJECT_ASSIGN_REPLACEMENT_REGEX.replace(code) { |
| 146 | + """ |
| 147 | + var b = Object.assign({}, $variableName); |
| 148 | + b.sourcesEncoded = sourcesEncoded; |
| 149 | + """.trimIndent() |
131 | 150 | } |
132 | 151 | } |
133 | 152 |
|
| 153 | + private fun extractVariableDeclarations(jsCode: String): List<String> { |
| 154 | + val regex = Regex("""var\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*\{\s*}\s*;""") |
| 155 | + return regex.findAll(jsCode) |
| 156 | + .map { it.value } |
| 157 | + .toList() |
| 158 | + } |
134 | 159 | } |
135 | 160 |
|
136 | 161 |
|
|
0 commit comments