Skip to content

Commit 5749abf

Browse files
authored
Merge branch 'main' into alert-autofix-59
2 parents 15eccb4 + 6c9afd1 commit 5749abf

File tree

8 files changed

+63
-41
lines changed

8 files changed

+63
-41
lines changed

api/apps/mcp_server_app.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16+
import asyncio
17+
1618
from quart import Response, request
1719
from api.apps import current_user, login_required
1820

@@ -106,7 +108,7 @@ async def create() -> Response:
106108
return get_data_error_result(message="Tenant not found.")
107109

108110
mcp_server = MCPServer(id=server_name, name=server_name, url=url, server_type=server_type, variables=variables, headers=headers)
109-
server_tools, err_message = get_mcp_tools([mcp_server], timeout)
111+
server_tools, err_message = await asyncio.to_thread(get_mcp_tools, [mcp_server], timeout)
110112
if err_message:
111113
return get_data_error_result(err_message)
112114

@@ -158,7 +160,7 @@ async def update() -> Response:
158160
req["id"] = mcp_id
159161

160162
mcp_server = MCPServer(id=server_name, name=server_name, url=url, server_type=server_type, variables=variables, headers=headers)
161-
server_tools, err_message = get_mcp_tools([mcp_server], timeout)
163+
server_tools, err_message = await asyncio.to_thread(get_mcp_tools, [mcp_server], timeout)
162164
if err_message:
163165
return get_data_error_result(err_message)
164166

@@ -242,7 +244,7 @@ async def import_multiple() -> Response:
242244
headers = {"authorization_token": config["authorization_token"]} if "authorization_token" in config else {}
243245
variables = {k: v for k, v in config.items() if k not in {"type", "url", "headers"}}
244246
mcp_server = MCPServer(id=new_name, name=new_name, url=config["url"], server_type=config["type"], variables=variables, headers=headers)
245-
server_tools, err_message = get_mcp_tools([mcp_server], timeout)
247+
server_tools, err_message = await asyncio.to_thread(get_mcp_tools, [mcp_server], timeout)
246248
if err_message:
247249
results.append({"server": base_name, "success": False, "message": err_message})
248250
continue
@@ -322,7 +324,7 @@ async def list_tools() -> Response:
322324
tool_call_sessions.append(tool_call_session)
323325

324326
try:
325-
tools = tool_call_session.get_tools(timeout)
327+
tools = await asyncio.to_thread(tool_call_session.get_tools, timeout)
326328
except Exception as e:
327329
tools = []
328330
return get_data_error_result(message=f"MCP list tools error: {e}")
@@ -340,7 +342,7 @@ async def list_tools() -> Response:
340342
return server_error_response(e)
341343
finally:
342344
# PERF: blocking call to close sessions — consider moving to background thread or task queue
343-
close_multiple_mcp_toolcall_sessions(tool_call_sessions)
345+
await asyncio.to_thread(close_multiple_mcp_toolcall_sessions, tool_call_sessions)
344346

345347

346348
@manager.route("/test_tool", methods=["POST"]) # noqa: F821
@@ -367,10 +369,10 @@ async def test_tool() -> Response:
367369

368370
tool_call_session = MCPToolCallSession(mcp_server, mcp_server.variables)
369371
tool_call_sessions.append(tool_call_session)
370-
result = tool_call_session.tool_call(tool_name, arguments, timeout)
372+
result = await asyncio.to_thread(tool_call_session.tool_call, tool_name, arguments, timeout)
371373

372374
# PERF: blocking call to close sessions — consider moving to background thread or task queue
373-
close_multiple_mcp_toolcall_sessions(tool_call_sessions)
375+
await asyncio.to_thread(close_multiple_mcp_toolcall_sessions, tool_call_sessions)
374376
return get_json_result(data=result)
375377
except Exception as e:
376378
return server_error_response(e)
@@ -424,13 +426,13 @@ async def test_mcp() -> Response:
424426
tool_call_session = MCPToolCallSession(mcp_server, mcp_server.variables)
425427

426428
try:
427-
tools = tool_call_session.get_tools(timeout)
429+
tools = await asyncio.to_thread(tool_call_session.get_tools, timeout)
428430
except Exception as e:
429431
tools = []
430432
return get_data_error_result(message=f"Test MCP error: {e}")
431433
finally:
432434
# PERF: blocking call to close sessions — consider moving to background thread or task queue
433-
close_multiple_mcp_toolcall_sessions([tool_call_session])
435+
await asyncio.to_thread(close_multiple_mcp_toolcall_sessions, [tool_call_session])
434436

435437
for tool in tools:
436438
tool_dict = tool.model_dump()

api/utils/api_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ def process_args(input_arguments):
163163
if error_arguments:
164164
error_string += "required argument values: {}".format(",".join(["{}={}".format(a[0], a[1]) for a in error_arguments]))
165165
return error_string
166+
return None
166167

167168
def wrapper(func):
168169
@wraps(func)
@@ -409,7 +410,7 @@ def get_parser_config(chunk_method, parser_config):
409410
if default_config is None:
410411
return deep_merge(base_defaults, parser_config)
411412

412-
# Ensure raptor and graphrag fields have default values if not provided
413+
# Ensure raptor and graph_rag fields have default values if not provided
413414
merged_config = deep_merge(base_defaults, default_config)
414415
merged_config = deep_merge(merged_config, parser_config)
415416

common/data_source/confluence_connector.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def _renew_credentials(self) -> tuple[dict[str, Any], bool]:
186186
# between the db and redis everywhere the credentials might be updated
187187
new_credential_str = json.dumps(new_credentials)
188188
self.redis_client.set(
189-
self.credential_key, new_credential_str, nx=True, ex=self.CREDENTIAL_TTL
189+
self.credential_key, new_credential_str, exp=self.CREDENTIAL_TTL
190190
)
191191
self._credentials_provider.set_credentials(new_credentials)
192192

@@ -1599,8 +1599,8 @@ def _convert_page_to_document(
15991599
semantic_identifier=semantic_identifier,
16001600
extension=".html", # Confluence pages are HTML
16011601
blob=page_content.encode("utf-8"), # Encode page content as bytes
1602-
size_bytes=len(page_content.encode("utf-8")), # Calculate size in bytes
16031602
doc_updated_at=datetime_from_string(page["version"]["when"]),
1603+
size_bytes=len(page_content.encode("utf-8")), # Calculate size in bytes
16041604
primary_owners=primary_owners if primary_owners else None,
16051605
metadata=metadata if metadata else None,
16061606
)

common/data_source/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ class Document(BaseModel):
9494
blob: bytes
9595
doc_updated_at: datetime
9696
size_bytes: int
97+
primary_owners: list
9798
metadata: Optional[dict[str, Any]] = None
9899

99100

common/data_source/slack_connector.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ def get_latest_message_time(thread: ThreadType) -> datetime:
167167

168168

169169
def _build_doc_id(channel_id: str, thread_ts: str) -> str:
170-
"""构建文档ID"""
171170
return f"{channel_id}__{thread_ts}"
172171

173172

@@ -179,7 +178,6 @@ def thread_to_doc(
179178
user_cache: dict[str, BasicExpertInfo | None],
180179
channel_access: Any | None,
181180
) -> Document:
182-
"""将线程转换为文档"""
183181
channel_id = channel["id"]
184182

185183
initial_sender_expert_info = expert_info_from_slack_id(
@@ -237,7 +235,6 @@ def filter_channels(
237235
channels_to_connect: list[str] | None,
238236
regex_enabled: bool,
239237
) -> list[ChannelType]:
240-
"""过滤频道"""
241238
if not channels_to_connect:
242239
return all_channels
243240

@@ -381,7 +378,6 @@ def _process_message(
381378
[MessageType], SlackMessageFilterReason | None
382379
] = default_msg_filter,
383380
) -> ProcessedSlackMessage:
384-
"""处理消息"""
385381
thread_ts = message.get("thread_ts")
386382
thread_or_message_ts = thread_ts or message["ts"]
387383
try:
@@ -536,7 +532,6 @@ def retrieve_all_slim_docs_perm_sync(
536532
end: SecondsSinceUnixEpoch | None = None,
537533
callback: Any = None,
538534
) -> GenerateSlimDocumentOutput:
539-
"""获取所有简化文档(带权限同步)"""
540535
if self.client is None:
541536
raise ConnectorMissingCredentialError("Slack")
542537

common/http_client.py

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import os
1717
import time
1818
from typing import Any, Dict, Optional
19-
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
19+
from urllib.parse import urlparse, urlunparse
2020

2121
from common import settings
2222
import httpx
@@ -58,21 +58,34 @@ def _get_delay(backoff_factor: float, attempt: int) -> float:
5858
_SENSITIVE_QUERY_KEYS = {"client_secret", "secret", "code", "access_token", "refresh_token", "password", "token", "app_secret"}
5959

6060
def _redact_sensitive_url_params(url: str) -> str:
61+
"""
62+
Return a version of the URL that is safe to log.
63+
64+
We intentionally drop query parameters and userinfo to avoid leaking
65+
credentials or tokens via logs. Only scheme, host, port and path
66+
are preserved.
67+
"""
6168
try:
6269
parsed = urlparse(url)
63-
if not parsed.query:
64-
return url
65-
clean_query = []
66-
for k, v in parse_qsl(parsed.query, keep_blank_values=True):
67-
if k.lower() in _SENSITIVE_QUERY_KEYS:
68-
clean_query.append((k, "***REDACTED***"))
69-
else:
70-
clean_query.append((k, v))
71-
new_query = urlencode(clean_query, doseq=True)
72-
redacted_url = urlunparse(parsed._replace(query=new_query))
73-
return redacted_url
70+
# Remove any potential userinfo (username:password@)
71+
netloc = parsed.hostname or ""
72+
if parsed.port:
73+
netloc = f"{netloc}:{parsed.port}"
74+
# Reconstruct URL without query, params, fragment, or userinfo.
75+
safe_url = urlunparse(
76+
(
77+
parsed.scheme,
78+
netloc,
79+
parsed.path,
80+
"", # params
81+
"", # query
82+
"", # fragment
83+
)
84+
)
85+
return safe_url
7486
except Exception:
75-
return url
87+
# If parsing fails, fall back to omitting the URL entirely.
88+
return "<redacted-url>"
7689

7790
def _is_sensitive_url(url: str) -> bool:
7891
"""Return True if URL is one of the configured OAuth endpoints."""
@@ -151,9 +164,15 @@ async def async_request(
151164
except httpx.RequestError as exc:
152165
last_exc = exc
153166
if attempt >= retries:
154-
# Do not log the full URL here to avoid leaking sensitive data.
167+
if not _is_sensitive_url(url):
168+
log_url = _redact_sensitive_url_params(url)
169+
logger.warning(f"async_request exhausted retries for {method}")
170+
raise
171+
delay = _get_delay(backoff_factor, attempt)
172+
if not _is_sensitive_url(url):
173+
log_url = _redact_sensitive_url_params(url)
155174
logger.warning(
156-
f"async_request exhausted retries for {method}; last error: {exc}"
175+
f"async_request attempt {attempt + 1}/{retries + 1} failed for {method}; retrying in {delay:.2f}s"
157176
)
158177
raise
159178
delay = _get_delay(backoff_factor, attempt)

deepdoc/parser/ppt_parser.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,9 @@ def __call__(self, fnm, from_page, to_page, callback=None):
8888
texts = []
8989
for shape in sorted(
9090
slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left if x.left is not None else 0)):
91-
try:
92-
txt = self.__extract(shape)
93-
if txt:
94-
texts.append(txt)
95-
except Exception as e:
96-
logging.exception(e)
91+
txt = self.__extract(shape)
92+
if txt:
93+
texts.append(txt)
9794
txts.append("\n".join(texts))
9895

9996
return txts

rag/utils/opendal_conn.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,16 @@ def get_opendal_config():
4545
# Only include non-sensitive keys in logs. Do NOT
4646
# add 'password' or any key containing embedded credentials
4747
# (like 'connection_string').
48-
SAFE_LOG_KEYS = ['scheme', 'host', 'port', 'database', 'table'] # explicitly non-sensitive
49-
loggable_kwargs = {k: v for k, v in kwargs.items() if k in SAFE_LOG_KEYS}
50-
logging.info("Loaded OpenDAL configuration (non sensitive fields only): %s", loggable_kwargs)
48+
safe_log_info = {
49+
"scheme": kwargs.get("scheme"),
50+
"host": kwargs.get("host"),
51+
"port": kwargs.get("port"),
52+
"database": kwargs.get("database"),
53+
"table": kwargs.get("table"),
54+
# indicate presence of credentials without logging them
55+
"has_credentials": any(k in kwargs for k in ("password", "connection_string")),
56+
}
57+
logging.info("Loaded OpenDAL configuration (non sensitive fields only): %s", safe_log_info)
5158

5259
# For safety, explicitly remove sensitive keys from kwargs after use
5360
if "password" in kwargs:

0 commit comments

Comments
 (0)