Skip to content

Commit 3be9ba6

Browse files
mrT23EmbeddedDevops1qododavid
authored
Auto scan entire repo ! (#202)
* scan entire repo * fix tests * increased coverage * Added tree_sitter as hidden import. * Incrementing verison * dw/debug scan (#203) * scan works on python fastapi test repo * keep original entrypoint * algo cleaning and improvements --------- Co-authored-by: Embedded DevOps <[email protected]> Co-authored-by: David Wurtz <[email protected]>
1 parent 738bf47 commit 3be9ba6

24 files changed

+1135
-417
lines changed

.github/workflows/ci_pipeline.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ jobs:
135135
136136
- name: Install Dependencies
137137
run: |
138-
pip install poetry wandb
138+
pip install poetry wandb tree_sitter
139139
poetry install
140140
- name: Build Executable
141141
run: make installer

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ installer:
2828
--hidden-import=tiktoken_ext.openai_public \
2929
--hidden-import=tiktoken_ext \
3030
--hidden-import=wandb \
31+
--hidden-import=tree_sitter \
3132
--hidden-import=wandb_gql \
3233
--onefile \
3334
--name cover-agent \

README.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,31 @@ CodiumAI Cover Agent aims to help efficiently increasing code coverage, by autom
3232

3333
## News and Updates
3434

35+
### 2024-11-05:
36+
New mode - scan an entire repo, auto identify the test files, auto collect context for each test file, and extend the test suite with new tests.
37+
How to run:
38+
39+
1) Create a branch in your repo
40+
2) cd to your repo
41+
3) Run the following command:
42+
```shell
43+
poetry run cover-agent \
44+
--project-language="python" \
45+
--project-root="<path_to_your_repo>" \
46+
--code-coverage-report-path="<path_to_your_repo>/coverage.xml" \
47+
--test-command="coverage run -m pytest <relative_path_to_unittest_folder> --cov=<path_to_your_repo> --cov-report=xml --cov-report=term --log-cli-level=INFO --timeout=30" \
48+
--model=bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0
49+
```
50+
51+
Notes:
52+
- `<relative_path_to_unittest_folder>` is optional, but will prevent running e2e test files if exists, which may take a long time"
53+
- You can use other models, like 'gpt-4o' or 'o1-mini', but recommended to use 'sonnet-3.5' as this is currently the best code model in the world.
54+
55+
Additional configuration options:
56+
- `--max-test-files-allowed-to-analyze` - The maximum number of test files to analyze. Default is 20 (to avoid long running times).
57+
- `--look-for-oldest-unchanged-test-files` - If set, the tool will sort the test files by the last modified date and analyze the oldest ones first. This is useful to find the test files that are most likely to be outdated, and for multiple runs. Default is False.
58+
59+
3560
### 2024-09-29:
3661
We are excited to announce the latest series of updates to CoverAgent, delivering significant improvements to functionality, documentation, and testing frameworks. These updates reflect our ongoing commitment to enhancing the developer experience, improving error handling, and refining the testing processes.
3762

@@ -119,6 +144,7 @@ After downloading the executable or installing the Pip package you can run the C
119144
cover-agent \
120145
--source-file-path "<path_to_source_file>" \
121146
--test-file-path "<path_to_test_file>" \
147+
--project-root "<path_to_project_root>" \
122148
--code-coverage-report-path "<path_to_coverage_report>" \
123149
--test-command "<test_command_to_run>" \
124150
--test-command-dir "<directory_to_run_test_command>" \
@@ -138,6 +164,7 @@ Follow the steps in the README.md file located in the `templated_tests/python_fa
138164
cover-agent \
139165
--source-file-path "templated_tests/python_fastapi/app.py" \
140166
--test-file-path "templated_tests/python_fastapi/test_app.py" \
167+
--project-root "templated_tests/python_fastapi" \
141168
--code-coverage-report-path "templated_tests/python_fastapi/coverage.xml" \
142169
--test-command "pytest --cov=. --cov-report=xml --cov-report=term" \
143170
--test-command-dir "templated_tests/python_fastapi" \

cover_agent/AICaller.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -103,17 +103,20 @@ def call_model(self, prompt: dict, max_tokens=4096, stream=True):
103103
completion_tokens = int(usage.completion_tokens)
104104

105105
if "WANDB_API_KEY" in os.environ:
106-
root_span = Trace(
107-
name="inference_"
108-
+ datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"),
109-
kind="llm", # kind can be "llm", "chain", "agent", or "tool"
110-
inputs={
111-
"user_prompt": prompt["user"],
112-
"system_prompt": prompt["system"],
113-
},
114-
outputs={"model_response": content},
115-
)
116-
root_span.log(name="inference")
106+
try:
107+
root_span = Trace(
108+
name="inference_"
109+
+ datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"),
110+
kind="llm", # kind can be "llm", "chain", "agent", or "tool"
111+
inputs={
112+
"user_prompt": prompt["user"],
113+
"system_prompt": prompt["system"],
114+
},
115+
outputs={"model_response": content},
116+
)
117+
root_span.log(name="inference")
118+
except Exception as e:
119+
print(f"Error logging to W&B: {e}")
117120

118121
# Returns: Response, Prompt token count, and Completion token count
119122
return content, prompt_tokens, completion_tokens

cover_agent/CoverAgent.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def __init__(self, args):
2929
self.test_gen = UnitTestGenerator(
3030
source_file_path=args.source_file_path,
3131
test_file_path=args.test_file_output_path,
32+
project_root=args.project_root,
3233
code_coverage_report_path=args.code_coverage_report_path,
3334
test_command=args.test_command,
3435
test_command_dir=args.test_command_dir,
@@ -58,6 +59,13 @@ def _validate_paths(self):
5859
raise FileNotFoundError(
5960
f"Test file not found at {self.args.test_file_path}"
6061
)
62+
63+
# Ensure the project root exists
64+
if self.args.project_root and not os.path.isdir(self.args.project_root):
65+
raise FileNotFoundError(
66+
f"Project root not found at {self.args.project_root}"
67+
)
68+
6169
# Create default DB file if not provided
6270
if not self.args.log_db_path:
6371
self.args.log_db_path = "cover_agent_unit_test_runs.db"

cover_agent/PromptBuilder.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def __init__(
4343
failed_test_runs: str = "",
4444
language: str = "python",
4545
testing_framework: str = "NOT KNOWN",
46+
project_root: str = "",
4647
):
4748
"""
4849
The `PromptBuilder` class is responsible for building a formatted prompt string by replacing placeholders with the actual content of files read during initialization. It takes in various paths and settings as parameters and provides a method to generate the prompt.
@@ -67,8 +68,11 @@ def __init__(
6768
build_prompt(self)
6869
Replaces placeholders with the actual content of files read during initialization and returns the formatted prompt string.
6970
"""
70-
self.source_file_name = os.path.basename(source_file_path)
71-
self.test_file_name = os.path.basename(test_file_path)
71+
self.project_root = project_root
72+
self.source_file_path = source_file_path
73+
self.test_file_path = test_file_path
74+
self.source_file_name_rel = os.path.relpath(source_file_path, project_root)
75+
self.test_file_name_rel = os.path.relpath(test_file_path, project_root)
7276
self.source_file = self._read_file(source_file_path)
7377
self.test_file = self._read_file(test_file_path)
7478
self.code_coverage_report = code_coverage_report
@@ -123,8 +127,8 @@ def _read_file(self, file_path):
123127

124128
def build_prompt(self) -> dict:
125129
variables = {
126-
"source_file_name": self.source_file_name,
127-
"test_file_name": self.test_file_name,
130+
"source_file_name": self.source_file_name_rel,
131+
"test_file_name": self.test_file_name_rel,
128132
"source_file_numbered": self.source_file_numbered,
129133
"test_file_numbered": self.test_file_numbered,
130134
"source_file": self.source_file,
@@ -165,8 +169,8 @@ def build_prompt_custom(self, file) -> dict:
165169
dict: A dictionary containing the system and user prompts.
166170
"""
167171
variables = {
168-
"source_file_name": self.source_file_name,
169-
"test_file_name": self.test_file_name,
172+
"source_file_name": self.source_file_name_rel,
173+
"test_file_name": self.test_file_name_rel,
170174
"source_file_numbered": self.source_file_numbered,
171175
"test_file_numbered": self.test_file_numbered,
172176
"source_file": self.source_file,

cover_agent/UnitTestGenerator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def __init__(
3030
desired_coverage: int = 90, # Default to 90% coverage if not specified
3131
additional_instructions: str = "",
3232
use_report_coverage_feature_flag: bool = False,
33+
project_root: str = "",
3334
):
3435
"""
3536
Initialize the UnitTestGenerator class with the provided parameters.
@@ -57,6 +58,7 @@ def __init__(
5758
self.relevant_line_number_to_insert_imports_after = None
5859
self.relevant_line_number_to_insert_tests_after = None
5960
self.test_headers_indentation = None
61+
self.project_root = project_root
6062
self.source_file_path = source_file_path
6163
self.test_file_path = test_file_path
6264
self.code_coverage_report_path = code_coverage_report_path
@@ -303,6 +305,7 @@ def build_prompt(self) -> dict:
303305
failed_test_runs=failed_test_runs_value,
304306
language=self.language,
305307
testing_framework=self.testing_framework,
308+
project_root=self.project_root,
306309
)
307310

308311
return self.prompt_builder.build_prompt()

cover_agent/lsp_logic/logic.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
from cover_agent.lsp_logic.utils.utils import uri_to_path, is_forbidden_directory
44

55

6-
async def get_direct_context(captures, language, lsp, project_dir, rel_file, target_file):
6+
async def get_direct_context(captures, language, lsp, project_dir, rel_file):
7+
target_file = str(os.path.join(project_dir, rel_file))
78
skip_found_symbols = True
89
context_files = set()
910
context_symbols = set()
@@ -30,14 +31,15 @@ async def get_direct_context(captures, language, lsp, project_dir, rel_file, tar
3031
if project_dir not in d_path:
3132
continue
3233
if not is_forbidden_directory(d_path, language):
33-
print(f"Context definition: \'{name_symbol}\' at line {line} from file \'{rel_d_path}\'")
34+
# print(f"Context definition: \'{name_symbol}\' at line {line} from file \'{rel_d_path}\'")
3435
context_files.add(d_path)
3536
context_symbols.add(name_symbol)
3637
context_symbols_and_files.add((name_symbol, rel_d_path))
3738
return context_files, context_symbols
3839

3940

40-
async def get_reverse_context(captures, lsp, project_dir, rel_file, target_file):
41+
async def get_reverse_context(captures, lsp, project_dir, rel_file):
42+
target_file = str(os.path.join(project_dir, rel_file))
4143
skip_found_symbols = True
4244
reverse_context_files = set()
4345
reverse_context_symbols = set()

cover_agent/lsp_logic/scripts/main.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,14 @@ async def run():
5858
language,
5959
lsp,
6060
project_dir,
61-
rel_file,
62-
target_file)
61+
rel_file)
6362
print("Getting context done.")
6463

6564
print("\nGetting reverse context ...")
6665
reverse_context_files, reverse_context_symbols = await get_reverse_context(captures,
6766
lsp,
6867
project_dir,
69-
rel_file,
70-
target_file)
68+
rel_file)
7169
print("Getting reverse context done.")
7270

7371
print("\n\n================")
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import os
2+
from time import sleep
3+
4+
from jinja2 import Environment, StrictUndefined
5+
6+
from cover_agent.lsp_logic.file_map.file_map import FileMap
7+
from cover_agent.lsp_logic.logic import get_direct_context
8+
from cover_agent.lsp_logic.multilspy import LanguageServer
9+
from cover_agent.lsp_logic.multilspy.multilspy_config import MultilspyConfig
10+
from cover_agent.lsp_logic.multilspy.multilspy_logger import MultilspyLogger
11+
12+
from cover_agent.settings.config_loader import get_settings
13+
from cover_agent.utils import load_yaml
14+
15+
16+
async def analyze_context(test_file, context_files, args, ai_caller):
17+
"""
18+
# we now want to analyze the test file against the source files and determine several things:
19+
# 1. If this test file is a unit test file
20+
# 2. Which of the context files can be seen as the main source file for this test file, for which we want to increase coverage
21+
# 3. Set all other context files as additional 'included_files' for the CoverAgent
22+
"""
23+
source_file = None
24+
context_files_include = context_files
25+
try:
26+
test_file_rel_str = os.path.relpath(test_file, args.project_root)
27+
context_files_rel_filtered_list_str = ""
28+
for file in context_files:
29+
context_files_rel_filtered_list_str += f"`{os.path.relpath(file, args.project_root)}\n`"
30+
variables = {"language": args.project_language,
31+
"test_file_name_rel": test_file_rel_str,
32+
"test_file_content": open(test_file, 'r').read(),
33+
"context_files_names_rel": context_files_rel_filtered_list_str
34+
}
35+
file = 'analyze_test_against_context'
36+
environment = Environment(undefined=StrictUndefined)
37+
settings = get_settings().get(file)
38+
system_prompt = environment.from_string(settings.system).render(variables)
39+
user_prompt = environment.from_string(settings.user).render(variables)
40+
response, prompt_token_count, response_token_count = (
41+
ai_caller.call_model(prompt={"system": system_prompt, "user": user_prompt})
42+
)
43+
response_dict = load_yaml(response)
44+
if int(response_dict.get('is_this_a_unit_test', 0)) == 1:
45+
source_file_rel = response_dict.get('main_file', "").strip().strip('`')
46+
source_file = os.path.join(args.project_root, source_file_rel)
47+
for file in context_files:
48+
file_rel = os.path.relpath(file, args.project_root)
49+
if file_rel == source_file_rel:
50+
context_files_include = [f for f in context_files if f != file]
51+
52+
if source_file:
53+
print(f"Test file: `{test_file}` is a unit test file for source file: `{source_file}`")
54+
else:
55+
print(f"Test file: `{test_file}` is not a unit test file")
56+
except Exception as e:
57+
print(f"Error while analyzing test file {test_file} against context files: {e}")
58+
59+
return source_file, context_files_include
60+
61+
62+
async def find_test_file_context(args, lsp, test_file):
63+
try:
64+
target_file = test_file
65+
rel_file = os.path.relpath(target_file, args.project_root)
66+
67+
# get tree-sitter query results
68+
# print("\nGetting tree-sitter query results for the target file...")
69+
fname_summary = FileMap(target_file, parent_context=False, child_context=False,
70+
header_max=0, project_base_path=args.project_root)
71+
query_results, captures = fname_summary.get_query_results()
72+
# print("Tree-sitter query results for the target file done.")
73+
74+
# print("\nGetting context ...")
75+
context_files, context_symbols = await get_direct_context(captures,
76+
args.project_language,
77+
lsp,
78+
args.project_root,
79+
rel_file)
80+
# filter empty files
81+
context_files_filtered = []
82+
for file in context_files:
83+
with open(file, 'r') as f:
84+
if f.read().strip():
85+
context_files_filtered.append(file)
86+
context_files = context_files_filtered
87+
# print("Getting context done.")
88+
except Exception as e:
89+
print(f"Error while getting context for test file {test_file}: {e}")
90+
context_files = []
91+
92+
return context_files
93+
94+
95+
async def initialize_language_server(args):
96+
logger = MultilspyLogger()
97+
config = MultilspyConfig.from_dict({"code_language": args.project_language})
98+
lsp = LanguageServer.create(config, logger, args.project_root)
99+
sleep(0.1)
100+
return lsp

0 commit comments

Comments
 (0)