Add license and Fix IDE warnings (#11985)

JinHai-CN · KevinHuSh · web-flow · commit d38f8a1562d0 · 2025-12-17T17:04:44.000+08:00
### What problem does this PR solve?

- Add license
- Fix IDE warnings

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai &lt;haijin.chn@gmail.com&gt;
Co-authored-by: Kevin Hu &lt;kevinhu.sh@gmail.com&gt;
diff --git a/agent/component/message.py b/agent/component/message.py
@@ -204,10 +204,10 @@ def thoughts(self) -> str:
 
     def _parse_markdown_table_lines(self, table_lines: list):
         """
-        Parse a list of markdown table lines into a pandas DataFrame.
+        Parse a list of Markdown table lines into a pandas DataFrame.
         
         Args:
-            table_lines: List of strings, each representing a row in the markdown table
+            table_lines: List of strings, each representing a row in the Markdown table
                         (excluding separator lines like |---|---|)
         
         Returns:
@@ -278,7 +278,7 @@ def _convert_content(self, content):
                 # Debug: log the content being parsed
                 logging.info(f"XLSX Parser: Content length={len(content) if content else 0}, first 500 chars: {content[:500] if content else 'None'}")
                 
-                # Try to parse ALL markdown tables from the content
+                # Try to parse ALL Markdown tables from the content
                 # Each table will be written to a separate sheet
                 tables = []  # List of (sheet_name, dataframe)
                 
diff --git a/common/data_source/__init__.py b/common/data_source/__init__.py
@@ -1,6 +1,26 @@
 
 """
 Thanks to https://github.com/onyx-dot-app/onyx
+
+Content of this directory is under the "MIT Expat" license as defined below.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
 """
 
 from .blob_connector import BlobStorageConnector
diff --git a/common/data_source/confluence_connector.py b/common/data_source/confluence_connector.py
@@ -717,7 +717,7 @@ def paginated_cql_user_retrieval(
         """
         The search/user endpoint can be used to fetch users.
         It's a separate endpoint from the content/search endpoint used only for users.
-        Otherwise it's very similar to the content/search endpoint.
+        It's very similar to the content/search endpoint.
         """
 
         # this is needed since there is a live bug with Confluence Server/Data Center
diff --git a/common/data_source/discord_connector.py b/common/data_source/discord_connector.py
@@ -233,8 +233,8 @@ class DiscordConnector(LoadConnector, PollConnector):
 
     def __init__(
         self,
-        server_ids: list[str] = [],
-        channel_names: list[str] = [],
+        server_ids: list[str] | None = None,
+        channel_names: list[str] | None = None,
         # YYYY-MM-DD
         start_date: str | None = None,
         batch_size: int = INDEX_BATCH_SIZE,
diff --git a/common/data_source/gmail_connector.py b/common/data_source/gmail_connector.py
@@ -1,5 +1,4 @@
 import logging
-import os
 from typing import Any
 from google.oauth2.credentials import Credentials as OAuthCredentials
 from google.oauth2.service_account import Credentials as ServiceAccountCredentials
diff --git a/common/data_source/google_drive/connector.py b/common/data_source/google_drive/connector.py
@@ -1210,7 +1210,7 @@ def yield_all_docs_from_checkpoint_connector(
         creds = get_credentials_from_env(email, oauth=True)
         print("Credentials loaded successfully")
         print(f"{creds=}")
-        sys.exit(0)
+        # sys.exit(0)
         connector = GoogleDriveConnector(
             include_shared_drives=False,
             shared_drive_urls=None,
diff --git a/common/data_source/google_drive/file_retrieval.py b/common/data_source/google_drive/file_retrieval.py
@@ -341,6 +341,6 @@ def get_all_files_for_oauth(
 
 # Just in case we need to get the root folder id
 def get_root_folder_id(service: Resource) -> str:
-    # we dont paginate here because there is only one root folder per user
+    # we don't paginate here because there is only one root folder per user
     # https://developers.google.com/drive/api/guides/v2-to-v3-reference
     return service.files().get(fileId="root", fields=GoogleFields.ID.value).execute()[GoogleFields.ID.value]
diff --git a/deepdoc/parser/resume/step_two.py b/deepdoc/parser/resume/step_two.py
@@ -147,15 +147,11 @@ def forEdu(cv):
         edu_nst.append(e)
 
     cv["sch_rank_kwd"] = []
-    if cv["school_rank_int"] <= 20 \
-            or ("海外名校" in fea and cv["school_rank_int"] <= 200):
+    if cv["school_rank_int"] <= 20 or ("海外名校" in fea and cv["school_rank_int"] <= 200):
         cv["sch_rank_kwd"].append("顶尖学校")
-    elif cv["school_rank_int"] <= 50 and cv["school_rank_int"] > 20 \
-            or ("海外名校" in fea and cv["school_rank_int"] <= 500 and \
-                cv["school_rank_int"] > 200):
+    elif 50 >= cv["school_rank_int"] > 20 or ("海外名校" in fea and 500 >= cv["school_rank_int"] > 200):
         cv["sch_rank_kwd"].append("精英学校")
-    elif cv["school_rank_int"] > 50 and ("985" in fea or "211" in fea) \
-            or ("海外名校" in fea and cv["school_rank_int"] > 500):
+    elif cv["school_rank_int"] > 50 and ("985" in fea or "211" in fea) or ("海外名校" in fea and cv["school_rank_int"] > 500):
         cv["sch_rank_kwd"].append("优质学校")
     else:
         cv["sch_rank_kwd"].append("一般学校")
@@ -208,8 +204,7 @@ def forEdu(cv):
                     cv["tag_kwd"].append("好学校")
                     cv["tag_kwd"].append("好学历")
                     break
-        if (len(cv.get("degree_kwd", [])) >= 1 and \
-            "本科" in cv["degree_kwd"] and \
+        if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"] and
             any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \
                 or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \
                 or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]):
@@ -406,7 +401,7 @@ def forWork(cv):
 
 def turnTm2Dt(b):
     if not b:
-        return
+        return None
     b = str(b).strip()
     if re.match(r"[0-9]{10,}", b):
         b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10])))
@@ -416,7 +411,7 @@ def turnTm2Dt(b):
 def getYMD(b):
     y, m, d = "", "", "01"
     if not b:
-        return (y, m, d)
+        return y, m, d
     b = turnTm2Dt(b)
     if re.match(r"[0-9]{4}", b):
         y = int(b[:4])
@@ -430,7 +425,7 @@ def getYMD(b):
         d = "1"
     if not m or int(m) > 12 or int(m) < 1:
         m = "1"
-    return (y, m, d)
+    return y, m, d
 
 
 def birth(cv):
@@ -480,22 +475,22 @@ def parse(cv):
     for k in rmkeys:
         del cv[k]
 
-    integerity = 0.
+    integrity = 0.
     flds_num = 0.
 
     def hasValues(flds):
-        nonlocal integerity, flds_num
+        nonlocal integrity, flds_num
         flds_num += len(flds)
         for f in flds:
             v = str(cv.get(f, ""))
             if len(v) > 0 and v != '0' and v != '[]':
-                integerity += 1
+                integrity += 1
 
     hasValues(tks_fld)
     hasValues(small_tks_fld)
     hasValues(kwd_fld)
     hasValues(num_fld)
-    cv["integerity_flt"] = integerity / flds_num
+    cv["integerity_flt"] = integrity / flds_num
 
     if cv.get("corporation_type"):
         for p, r in [(r"(公司|企业|其它|其他|Others*|\n|未填写|Enterprises|Company|companies)", ""),
diff --git a/docs/guides/agent/agent_component_reference/docs_generator.md b/docs/guides/agent/agent_component_reference/docs_generator.md
@@ -40,7 +40,7 @@ In the **Message** component, reference the `download` output variable from the
 
 ### Content
 
-The main text content to include in the document. Supports markdown formatting:
+The main text content to include in the document. Supports Markdown formatting:
 
 - **Bold**: `**text**` or `__text__`
 - **Italic**: `*text*` or `_text_`
diff --git a/rag/prompts/analyze_task_system.md b/rag/prompts/analyze_task_system.md
@@ -41,7 +41,7 @@ Scale depth to match complexity. Always stop once success criteria are met.
 
 **For HIGH (150–250 words for analysis only):**
 - Comprehensive objective analysis; Intent & Scope
-- 5–8 step Plan with dependencies/parallelism
+- 5–8 steps Plan with dependencies/parallelism
 - **Uncertainty & Probes** (key unknowns → probe → stop condition)
 - Measurable Success Criteria; Failure detectors & fallbacks
 - **Source Plan** (evidence acquisition & validation)

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`import logging`
`2`		`-import os`
`3`	`2`	`from typing import Any`
`4`	`3`	`from google.oauth2.credentials import Credentials as OAuthCredentials`
`5`	`4`	`from google.oauth2.service_account import Credentials as ServiceAccountCredentials`