xueyulinn
diff --git a/‎codehawk/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎codehawk/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎codehawk/agents/code_review_planning_agent.py‎
Lines changed: 4 additions & 4 deletions b/‎codehawk/agents/code_review_planning_agent.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎codehawk/agents/code_review_writer_agent.py‎
Lines changed: 12 additions & 12 deletions b/‎codehawk/agents/code_review_writer_agent.py‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎codehawk/agents/convention_evidence_agent.py‎
Lines changed: 4 additions & 4 deletions b/‎codehawk/agents/convention_evidence_agent.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎codehawk/agents/pr_summary_agent.py‎
Lines changed: 7 additions & 7 deletions b/‎codehawk/agents/pr_summary_agent.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎codehawk/agents/prompts.py‎
Lines changed: 64 additions & 53 deletions b/‎codehawk/agents/prompts.py‎
Lines changed: 64 additions & 53 deletions
diff --git a/‎codehawk/agents/sat_summary_agent.py‎
Lines changed: 6 additions & 6 deletions b/‎codehawk/agents/sat_summary_agent.py‎
Lines changed: 6 additions & 6 deletions
@@ -1 +1,3 @@
 """Codehawk application package."""
+
+__version__ = "0.1.0"
@@ -28,14 +28,14 @@ class CodeReviewPlanning(BaseModel):
     questions: list[CodeReviewQuestion]
 
 
-agent = Agent(
+_agent = Agent(
     name="Code Review Planning Agent",
     instructions=PR_CODE_REVIEW_PLANNING_SYSTEM_PROMPT,
     output_type=CodeReviewPlanning,
 )
 
 
-def construct_user_prompt(
+def _construct_user_prompt(
     sat_summary: str,
     pr_files_context: PullRequestFilesContext,
     pr_info_context: PullRequestInfoContext,
@@ -119,12 +119,12 @@ async def generate_code_review_planning(
     pr_info_context: PullRequestInfoContext,
     pr_commits_context: PullRequestCommitsContext,
 ) -> CodeReviewPlanning:
-    prompt = construct_user_prompt(
+    prompt = _construct_user_prompt(
         sat_summary,
         pr_files_context,
         pr_info_context,
         pr_commits_context,
     )
     with trace("CODE_REVIEW_PLANNING"):
-        result = await Runner.run(agent, input=prompt)
+        result = await Runner.run(_agent, input=prompt)
         return result.final_output_as(CodeReviewPlanning)
@@ -23,14 +23,14 @@ class PRReviewOutput(BaseModel):
         description="The inline review comments to create on changed lines.")
 
 
-agent = Agent(
+_agent = Agent(
     name="Code Review Writer Agent",
     instructions=PR_CODE_REVIEW_WRITER_SYSTEM_PROMPT,
     output_type=PRReviewOutput
 )
 
 
-def construct_user_prompt(
+def _construct_user_prompt(
     sat_summary: str,
     pr_files_context: PullRequestFilesContext,
     structual_questions: Any,
@@ -64,12 +64,12 @@ def construct_user_prompt(
             ),
         },
         "planned_questions": {
-            "structural": to_jsonable(structual_questions),
-            "convention": to_jsonable(convention_questions),
+            "structural": _to_jsonable(structual_questions),
+            "convention": _to_jsonable(convention_questions),
         },
         "evidence": {
-            "structural": to_jsonable(structual_evidence),
-            "convention": to_jsonable(conventional_evidence),
+            "structural": _to_jsonable(structual_evidence),
+            "convention": _to_jsonable(conventional_evidence),
         },
     }
     context_json = json.dumps(prompt_context, ensure_ascii=False, indent=2)
@@ -90,7 +90,7 @@ async def generate_code_review_comments(
     conventional_evidence,
     pr_ctx: PRReviewContext,
 ) -> PRReviewOutput:
-    prompt = construct_user_prompt(
+    prompt = _construct_user_prompt(
         sat_summary,
         pr_files_context,
         structual_questions,
@@ -100,17 +100,17 @@ async def generate_code_review_comments(
         pr_ctx,
     )
     with trace("CODE_REVIEW_COMMENTS"):
-        result = await Runner.run(agent, input=prompt, context=pr_ctx)
+        result = await Runner.run(_agent, input=prompt, context=pr_ctx)
         return result.final_output_as(PRReviewOutput)
 
 
-def to_jsonable(value: Any) -> Any:
+def _to_jsonable(value: Any) -> Any:
     if hasattr(value, "model_dump"):
         return value.model_dump(mode="json")
     if isinstance(value, list):
-        return [to_jsonable(item) for item in value]
+        return [_to_jsonable(item) for item in value]
     if isinstance(value, tuple):
-        return [to_jsonable(item) for item in value]
+        return [_to_jsonable(item) for item in value]
     if isinstance(value, dict):
-        return {key: to_jsonable(item) for key, item in value.items()}
+        return {key: _to_jsonable(item) for key, item in value.items()}
     return value
@@ -58,15 +58,15 @@ async def find_similar_code_chunks(
     return json.dumps({"similar_code_chunks": results}, ensure_ascii=False)
 
 
-agent = Agent(
+_agent = Agent(
     name="Conventional Evidence Agent",
     instructions=PR_CONVENTION_EVIDENCE_SYSTEM_PROMPT,
     tools=[find_similar_code_chunks],
     output_type=CodeReviewEvidence,
 )
 
 
-def construct_user_prompt(
+def _construct_user_prompt(
     questions: list[CodeReviewQuestion],
     ctx: PRReviewContext,
 ) -> str:
@@ -109,7 +109,7 @@ async def generate_conventional_evidence(
     questions: list[CodeReviewQuestion],
     ctx: PRReviewContext,
 ) -> CodeReviewEvidence:
-    prompt = construct_user_prompt(questions, ctx)
+    prompt = _construct_user_prompt(questions, ctx)
     with trace("CONVENTIONAL_EVIDENCE"):
-        result = await Runner.run(agent, input=prompt, context=ctx)
+        result = await Runner.run(_agent, input=prompt, context=ctx)
         return result.final_output_as(CodeReviewEvidence)
@@ -10,15 +10,15 @@
     PullRequestInfoContext,
 )
 
-pr_summary_agent = Agent(
+_pr_summary_agent = Agent(
     name="Github Pull Request Summary Agent",
     instructions=PR_SUMMARY_SYSTEM_PROMPT,
     model="gpt-5.5",
     model_settings=ModelSettings(verbosity="low")
 )
 
 
-def construct_pr_full_prompt(
+def _construct_pr_full_prompt(
     pr_info_context: PullRequestInfoContext,
     pr_commits_context: PullRequestCommitsContext,
     pr_files_context: PullRequestFilesContext,
@@ -120,7 +120,7 @@ def construct_pr_full_prompt(
     )
 
 
-def construct_pr_incremental_prompt(
+def _construct_pr_incremental_prompt(
     pr_incremental_context: PullRequestIncrementalCompareContext
 ) -> str:
     prompt_context = {
@@ -171,23 +171,23 @@ async def generate_pr_full_summary(
     pr_files_context: PullRequestFilesContext,
     reason: str | None = None,
 ) -> str:
-    pr_full_summary_prompt = construct_pr_full_prompt(
+    pr_full_summary_prompt = _construct_pr_full_prompt(
         pr_info_context,
         pr_commits_context,
         pr_files_context,
         reason
     )
     with trace("PR Full Summary Workflow"):
-        pr_summary = await Runner.run(pr_summary_agent, pr_full_summary_prompt)
+        pr_summary = await Runner.run(_pr_summary_agent, pr_full_summary_prompt)
         return pr_summary.final_output_as(str)
 
 
 async def generate_pr_incremental_summary(
     pr_incremental_context: PullRequestIncrementalCompareContext
 ) -> str:
-    pr_incremental_summary_prompt = construct_pr_incremental_prompt(
+    pr_incremental_summary_prompt = _construct_pr_incremental_prompt(
         pr_incremental_context,
     )
     with trace("PR Incremental Summary Workflow"):
-        pr_summary = await Runner.run(pr_summary_agent, pr_incremental_summary_prompt)
+        pr_summary = await Runner.run(_pr_summary_agent, pr_incremental_summary_prompt)
         return pr_summary.final_output_as(str)
@@ -71,59 +71,6 @@
 """
 
 
-PR_CODE_REVIEW_SYSTEM_PROMPT = """# Role
-You are a senior software engineer performing pull request code review.
-Your job is to identify concrete, actionable issues that a human reviewer should consider before merging.
-
-# Goal
-Produce a structured pull request review that helps the author and reviewers understand whether the change is safe to merge.
-Prioritize correctness, regressions, security issues, data loss, concurrency problems, API contract breaks, edge cases, and missing tests.
-
-# Success criteria
-Before finishing, the review must:
-- Ground every finding in the provided PR context, changed file patches, or tool results.
-- Prefer a small number of high-signal findings over exhaustive commentary.
-- Include inline comments only for actionable issues tied to specific changed lines.
-- Use the most recent PR head commit as the review commit when available.
-- Choose `REQUEST_CHANGES` only when at least one issue should block merge.
-- Choose `COMMENT` for all other outcomes, including non-blocking concerns, incomplete evidence, or no actionable issues.
-- Never approve the pull request.
-
-# Evidence rules
-- Treat changed file patches as the primary evidence.
-- Treat `static_analysis.summary_markdown` as supporting evidence only. It can suggest areas to inspect, but it is not a substitute for changed file patches or fetched source context.
-- Do not create inline comments based only on static analysis summary text. Verify the issue against the changed patch or `get_full_file` first.
-- Use `get_full_file` when the patch omits context needed to verify a likely issue.
-- Use `find_code_chunks` to inspect indexed functions, classes, modules, signatures, and content previews for the current PR head commit.
-- Use `find_code_references` when call sites, imports, inheritance, decorators, or attribute access can confirm whether a changed symbol has affected callers or dependencies.
-- Use `find_usage_patterns` when a finding depends on how a symbol is normally called or consumed across the repository.
-- Use `find_exception_patterns` when a finding depends on repository exception handling conventions.
-- Use `find_naming_patterns` when a finding depends on similar function, method, or class naming patterns.
-- Do not present repository convention findings as confirmed unless a pattern tool returns concrete supporting examples.
-- Do not call tools just to browse casually; call them when extra context can change the review outcome.
-- Do not invent code behavior, tests, dependencies, or runtime guarantees that are not present in the provided context or tool output.
-- Distinguish confirmed issues from uncertainty. If evidence is incomplete, explain the uncertainty in the review body rather than presenting it as fact.
-
-# Review guidance
-- Focus on issues introduced or exposed by this PR.
-- Do not comment on unchanged code unless it is necessary to explain a changed-line issue.
-- Avoid style, naming, formatting, or preference comments unless they create a real maintainability or correctness risk.
-- Avoid duplicate comments for the same underlying issue.
-- Keep comment bodies concise and specific: state the problem, why it matters, and what change would address it.
-- Do not mention hidden reasoning or internal process.
-
-# Output
-Return the structured review object required by the application.
-Use the structured output schema for fields and types; do not include extra fields.
-
-# Stop rules
-- If the context and tool results are sufficient, produce the review directly.
-- If no actionable issues are found, return a neutral `COMMENT` review with an empty comments list.
-- If a suspected issue cannot be verified from the available evidence, do not create an inline comment for it; mention the uncertainty in the review body if it is important.
-- Do not ask follow-up questions.
-"""
-
-
 PR_CODE_REVIEW_PLANNING_SYSTEM_PROMPT = """# Role
 You are a senior software engineer planning evidence gathering for pull request code review.
 Your job is to turn the provided PR context, changed file patches, and static analysis summary into a compact list of targeted review questions for downstream evidence agents.
@@ -403,3 +350,67 @@
 - If a detail is missing, state the limitation briefly and continue.
 - If the input is malformed or empty, say that no usable static analysis report was provided.
 """
+
+
+SAT_SUMMARY_JSON_SYSTEM_PROMPT = """# Role
+You are a senior software engineer converting static analysis tool results into structured reviewer-facing findings.
+Your job is to normalize SAT tool output into a compact `Report` object for downstream code review workflows.
+
+# Input
+You will receive one or more static analysis tool results from `ToolResultDTO` data.
+Each tool result may include:
+- `status`: `passed`, `findings`, or `failed`.
+- `tool`: the analyzer name, such as `ruff` or `bandit`.
+- `command`: the command that ran.
+- `repo_root`: the analyzed repository root.
+- `exitCode`: the analyzer exit code.
+- `findings`: raw tool findings.
+- `stderr`: analyzer stderr.
+
+# Goal
+Return only concrete static-analysis findings as structured `Item` objects.
+The output is used by later review agents, so every field must be grounded in the provided SAT context.
+
+# Output schema
+Return a `Report` object with:
+- `items`: a list of `Item` objects.
+
+Each `Item` must contain:
+- `tool`: the tool that produced the finding.
+- `rule_id`: the tool rule, code, test ID, or check name when available; otherwise `null`.
+- `file_path`: the reported file path. Use an empty string only when the tool did not provide a file.
+- `description`: a concise reviewer-facing description of the finding.
+- `start_line`, `end_line`, `start_col`, `end_col`: reported location values when available; otherwise `null`.
+- `raw_severity`: the original severity value from the tool when available; otherwise `null`.
+- `normalized_severity`: one of `Low`, `Medium`, `High`, or `Unknown`.
+- `severity_reason`: a short explanation for the normalized severity.
+
+# Severity rules
+- Preserve the original tool severity in `raw_severity` exactly when present. Do not rewrite it.
+- Use `normalized_severity` for CodeHawk's reviewer-facing severity.
+- If the tool provides a clear severity scale, map it conservatively into `Low`, `Medium`, or `High` using the tool's own meaning. Preserve the original value in `raw_severity`.
+- If different tools use different words for comparable severity, normalize by meaning rather than spelling. For example, values that mean fatal, blocker, critical, serious, or high-impact should generally map higher than warning, minor, info, or style.
+- Use `High` for findings whose message indicates the code may fail to parse, fail to import, fail to run, expose a security-sensitive behavior, or break a required runtime contract.
+- Use `Medium` for findings whose message indicates a reliability, maintainability, error-handling, data-handling, or security-review concern, but does not clearly show a build blocker, runtime failure, or high-confidence security issue.
+- Use `Low` for style, formatting, import ordering, unused imports, unused variables, naming, and other cleanup findings unless the message itself indicates a concrete correctness, reliability, or security risk.
+- Use `Unknown` when the input does not provide enough evidence to map severity responsibly.
+- The `severity_reason` must briefly explain the mapping using the tool's raw severity, rule ID, message, or finding category. Do not rely on a hardcoded rule ID alone.
+
+# Evidence rules
+- Use only the provided SAT tool results.
+- Do not invent source code behavior, exploitability, test coverage, business impact, or merge impact.
+- Do not create items for tools that passed with no findings.
+- Do not create code finding items for tools that failed to run. Failed tools have no verified findings.
+- Preserve tool names, rule IDs, file paths, messages, line numbers, columns, and raw severity exactly when available.
+- If a finding is missing optional location or severity fields, set those output fields to `null` instead of guessing.
+- If the report is empty, malformed, or all tools passed with no findings, return `items: []`.
+
+# Field extraction guidance
+- Ruff commonly uses `code`, `filename`, `message`, `location.row`, `location.column`, `end_location.row`, and `end_location.column`.
+- Bandit commonly uses `test_id`, `filename`, `issue_text`, `line_number`, `line_range`, `col_offset`, `end_col_offset`, `issue_severity`, and `issue_confidence`.
+- For unknown tools, use the closest obvious fields for rule ID, file, message, location, and severity, but do not fabricate missing values.
+
+# Stop rules
+- Return only the structured `Report` output required by the application.
+- Do not include Markdown, prose outside the schema, raw JSON dumps, or follow-up questions.
+"""
@@ -5,13 +5,13 @@
 from codehawk.agents.prompts import SAT_SUMMARY_SYSTEM_PROMPT
 from codehawk.models.sat import SATReportDTO, ToolResultDTO
 
-sat_summary_agent = Agent(
+_agent = Agent(
     name="Static Check Agent",
     instructions=SAT_SUMMARY_SYSTEM_PROMPT,
 )
-
-
-def construct_sat_summary_prompt(report: SATReportDTO | ToolResultDTO) -> str:
+
+
+def _construct_user_prompt(report: SATReportDTO | ToolResultDTO) -> str:
     if isinstance(report, SATReportDTO):
         prompt_context = {
             "report_type": "sat_report",
@@ -39,7 +39,7 @@ def construct_sat_summary_prompt(report: SATReportDTO | ToolResultDTO) -> str:
 async def run_sat_summary(report: SATReportDTO | ToolResultDTO) -> str:
     with trace("SAT Summary Workflow"):
         res = await Runner.run(
-            sat_summary_agent,
-            input=construct_sat_summary_prompt(report),
+            _agent,
+            input=_construct_user_prompt(report),
         )
         return res.final_output_as(str)
Original file line number	Diff line number	Diff line change
`@@ -1 +1,3 @@`
`1`	`1`	`"""Codehawk application package."""`
	`2`	`+`
	`3`	`+__version__ = "0.1.0"`