Skip to content

Commit 344014d

Browse files
fix(security): prevent ReDoS in extractTagContent (#9)
* fix(security): prevent ReDoS in extractTagContent Escape special regex characters in tagName parameter before constructing RegExp to prevent Regular Expression Denial-of-Service attacks from malicious input. * fix(security): eliminate ReDoS in extractTagContent via string-based parsing (#10) * Initial plan * fix(security): replace RegExp with string-based parsing in extractTagContent - Remove dynamic RegExp construction to prevent ReDoS vulnerability - Replace with safe string-based indexOf parsing - Add input validation: only allow alphanumeric, hyphens, underscores - Add length limit (100 chars) for tag names - Maintain all existing functionality and test compatibility Co-authored-by: theagenticguy <9553966+theagenticguy@users.noreply.github.com> * refactor: improve extractTagContent performance and validation - Replace regex validation with character-by-character check - Implement custom case-insensitive search to avoid lowercasing large texts - More efficient for large inputs (no full text copy) - Still prevents ReDoS and maintains all functionality Co-authored-by: theagenticguy <9553966+theagenticguy@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: theagenticguy <9553966+theagenticguy@users.noreply.github.com> * fix: regex type guard --------- Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
1 parent 833c8f7 commit 344014d

1 file changed

Lines changed: 72 additions & 5 deletions

File tree

src/schemas/session.ts

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,18 +158,85 @@ function sanitizeText(text: string): string {
158158
}
159159

160160
/**
161-
* Extracts content between XML-like tags.
161+
* Case-insensitive string search helper.
162162
* @param text - The text to search in
163-
* @param tagName - The tag name to look for
163+
* @param searchStr - The string to search for
164+
* @param startIndex - The index to start searching from
165+
* @returns The index where searchStr is found, or -1 if not found
166+
*/
167+
function indexOfCaseInsensitive(
168+
text: string,
169+
searchStr: string,
170+
startIndex = 0,
171+
): number {
172+
const lowerSearchStr = searchStr.toLowerCase();
173+
const textLen = text.length;
174+
const searchLen = searchStr.length;
175+
176+
for (let i = startIndex; i <= textLen - searchLen; i++) {
177+
let match = true;
178+
for (let j = 0; j < searchLen; j++) {
179+
if (text[i + j]?.toLowerCase() !== lowerSearchStr[j]) {
180+
match = false;
181+
break;
182+
}
183+
}
184+
if (match) return i;
185+
}
186+
return -1;
187+
}
188+
189+
/**
190+
* Extracts content between XML-like tags using string-based parsing.
191+
* Avoids RegExp construction with user input to prevent ReDoS attacks.
192+
* @param text - The text to search in
193+
* @param tagName - The tag name to look for (validated for safety)
164194
* @returns The content between tags, or null if not found
165195
*/
166196
export function extractTagContent(
167197
text: string,
168198
tagName: string,
169199
): string | null {
170-
const pattern = new RegExp(`<${tagName}>([\\s\\S]*?)</${tagName}>`, "i");
171-
const match = text.match(pattern);
172-
const content = match?.[1]?.trim() ?? null;
200+
// Validate tagName character by character to avoid even simple regex
201+
// Only allow alphanumeric, hyphens, underscores (common XML/HTML tag patterns)
202+
if (tagName.length === 0 || tagName.length > 100) {
203+
return null;
204+
}
205+
206+
for (let i = 0; i < tagName.length; i++) {
207+
const char = tagName[i];
208+
if (char === undefined) {
209+
return null;
210+
}
211+
const isValid =
212+
(char >= "a" && char <= "z") ||
213+
(char >= "A" && char <= "Z") ||
214+
(char >= "0" && char <= "9") ||
215+
char === "_" ||
216+
char === "-";
217+
if (!isValid) {
218+
return null;
219+
}
220+
}
221+
222+
// Use string-based parsing with case-insensitive search
223+
const openTag = `<${tagName}>`;
224+
const closeTag = `</${tagName}>`;
225+
226+
const startIdx = indexOfCaseInsensitive(text, openTag);
227+
if (startIdx === -1) {
228+
return null;
229+
}
230+
231+
const contentStart = startIdx + openTag.length;
232+
const endIdx = indexOfCaseInsensitive(text, closeTag, contentStart);
233+
if (endIdx === -1) {
234+
return null;
235+
}
236+
237+
// Extract content from original text to preserve case
238+
const content = text.substring(contentStart, endIdx).trim();
239+
173240
// Sanitize to remove any binary/control characters
174241
return content ? sanitizeText(content) : null;
175242
}

0 commit comments

Comments
 (0)