|
1 | 1 | import json
|
2 |
| -import re |
3 | 2 |
|
| 3 | +import regex as re |
4 | 4 | import structlog
|
5 | 5 | from litellm import ChatCompletionRequest
|
6 | 6 |
|
|
19 | 19 | logger = structlog.get_logger("codegate")
|
20 | 20 |
|
21 | 21 |
|
| 22 | +# Pre-compiled regex patterns for performance |
| 23 | +markdown_code_block = re.compile(r"```.*?```", flags=re.DOTALL) |
| 24 | +markdown_file_listing = re.compile(r"⋮...*?⋮...\n\n", flags=re.DOTALL) |
| 25 | +environment_details = re.compile(r"<environment_details>.*?</environment_details>", flags=re.DOTALL) |
| 26 | + |
| 27 | + |
22 | 28 | class CodegateContextRetriever(PipelineStep):
|
23 | 29 | """
|
24 | 30 | Pipeline step that adds a context message to the completion request when it detects
|
@@ -95,11 +101,9 @@ async def process( # noqa: C901
|
95 | 101 |
|
96 | 102 | # Remove code snippets and file listing from the user messages and search for bad packages
|
97 | 103 | # in the rest of the user query/messsages
|
98 |
| - user_messages = re.sub(r"```.*?```", "", user_message, flags=re.DOTALL) |
99 |
| - user_messages = re.sub(r"⋮...*?⋮...\n\n", "", user_messages, flags=re.DOTALL) |
100 |
| - user_messages = re.sub( |
101 |
| - r"<environment_details>.*?</environment_details>", "", user_messages, flags=re.DOTALL |
102 |
| - ) |
| 104 | + user_messages = markdown_code_block.sub("", user_message) |
| 105 | + user_messages = markdown_file_listing.sub("", user_messages) |
| 106 | + user_messages = environment_details.sub("", user_messages) |
103 | 107 |
|
104 | 108 | # split messages into double newlines, to avoid passing so many content in the search
|
105 | 109 | split_messages = re.split(r"</?task>|\n|\\n", user_messages)
|
|
0 commit comments