Skip to content

Commit e9f9467

Browse files
Detailed logging using databases (#151)
* Adding initial dabase concept. * Initial passing of unit tests. * Refactored db call. Still WIP. * Fixed db name. * Updated DB path. * LogDB working locally but not updating from Docker container out. * Migrated DB inserts to Cover Agent. * Added documentation for DB. * Fixed UnitTestDB tests. * Fixed default DB creation. * Reverting test. * Resolved empty DB path assertion. * Added before/after tests into DB. * Fixed arg call for validate test.
1 parent 9634b99 commit e9f9467

16 files changed

+1704
-1148
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ __pycache__
2121
generated_prompt.md
2222
test_results.html
2323
run.log
24+
*.db
2425

2526
# PyInstaller Generated files
2627
/build/

cover_agent/CoverAgent.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from cover_agent.CustomLogger import CustomLogger
88
from cover_agent.ReportGenerator import ReportGenerator
99
from cover_agent.UnitTestGenerator import UnitTestGenerator
10-
10+
from cover_agent.UnitTestDB import UnitTestDB
1111

1212
class CoverAgent:
1313
def __init__(self, args):
@@ -41,6 +41,10 @@ def _validate_paths(self):
4141
raise FileNotFoundError(
4242
f"Test file not found at {self.args.test_file_path}"
4343
)
44+
if not self.args.log_db_path:
45+
# Create default DB file if not provided
46+
self.args.log_db_path = "cover_agent_unit_test_runs.db"
47+
self.test_db = UnitTestDB(db_connection_string=f"sqlite:///{self.args.log_db_path}")
4448

4549
def _duplicate_test_file(self):
4650
if self.args.test_file_output_path != "":
@@ -73,10 +77,13 @@ def run(self):
7377

7478
for generated_test in generated_tests_dict.get("new_tests", []):
7579
test_result = self.test_gen.validate_test(
76-
generated_test, generated_tests_dict, self.args.run_tests_multiple_times
80+
generated_test, self.args.run_tests_multiple_times
7781
)
7882
test_results_list.append(test_result)
7983

84+
# Insert the test result into the database
85+
self.test_db.insert_attempt(test_result)
86+
8087
iteration_count += 1
8188

8289
if self.test_gen.current_coverage < (self.test_gen.desired_coverage / 100):

cover_agent/UnitTestDB.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
from datetime import datetime
2+
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime
3+
from sqlalchemy.ext.declarative import declarative_base
4+
from sqlalchemy.orm import sessionmaker, scoped_session, load_only
5+
from sqlalchemy.orm.exc import NoResultFound
6+
7+
Base = declarative_base()
8+
9+
class UnitTestGenerationAttempt(Base):
10+
__tablename__ = 'unit_test_generation_attempts'
11+
id = Column(Integer, primary_key=True)
12+
run_time = Column(DateTime, default=datetime.now) # Use local time
13+
status = Column(String)
14+
reason = Column(Text)
15+
exit_code = Column(Integer)
16+
stderr = Column(Text)
17+
stdout = Column(Text)
18+
test_code = Column(Text)
19+
imports = Column(Text)
20+
original_test_file = Column(Text)
21+
processed_test_file = Column(Text)
22+
23+
class UnitTestDB:
24+
def __init__(self, db_connection_string):
25+
self.engine = create_engine(db_connection_string)
26+
Base.metadata.create_all(self.engine)
27+
self.Session = scoped_session(sessionmaker(bind=self.engine))
28+
29+
def insert_attempt(self, test_result: dict):
30+
with self.Session() as session:
31+
new_attempt = UnitTestGenerationAttempt(
32+
run_time=datetime.now(), # Use local time
33+
status=test_result.get("status"),
34+
reason=test_result.get("reason"),
35+
exit_code=test_result.get("exit_code"),
36+
stderr=test_result.get("stderr"),
37+
stdout=test_result.get("stdout"),
38+
test_code=test_result.get("test", {}).get("test_code", ""),
39+
imports=test_result.get("test", {}).get("new_imports_code", ""),
40+
original_test_file=test_result.get("original_test_file"),
41+
processed_test_file=test_result.get("processed_test_file"),
42+
)
43+
session.add(new_attempt)
44+
session.commit()
45+
return new_attempt.id
46+
47+
def select_all_attempts(self):
48+
with self.Session() as session:
49+
return session.query(UnitTestGenerationAttempt).all()
50+
51+
def select_attempt(self, attempt_id):
52+
with self.Session() as session:
53+
try:
54+
return session.query(UnitTestGenerationAttempt).filter_by(id=attempt_id).one()
55+
except NoResultFound:
56+
return None
57+
58+
def select_attempt_in_range(self, start: datetime, end: datetime):
59+
with self.Session() as session:
60+
return session.query(UnitTestGenerationAttempt).filter(
61+
UnitTestGenerationAttempt.run_time >= start,
62+
UnitTestGenerationAttempt.run_time <= end
63+
).all()
64+
65+
def select_attempt_flat(self, attempt_id):
66+
with self.Session() as session:
67+
try:
68+
result = session.query(UnitTestGenerationAttempt).filter_by(id=attempt_id).options(
69+
load_only(
70+
UnitTestGenerationAttempt.id,
71+
UnitTestGenerationAttempt.run_time,
72+
UnitTestGenerationAttempt.status,
73+
UnitTestGenerationAttempt.reason,
74+
UnitTestGenerationAttempt.exit_code,
75+
UnitTestGenerationAttempt.stderr,
76+
UnitTestGenerationAttempt.stdout,
77+
UnitTestGenerationAttempt.test_code,
78+
UnitTestGenerationAttempt.imports,
79+
UnitTestGenerationAttempt.original_test_file,
80+
UnitTestGenerationAttempt.processed_test_file,
81+
)
82+
).one().__dict__
83+
return result
84+
except NoResultFound:
85+
return None

cover_agent/UnitTestGenerator.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ def __init__(
6666
self.language = self.get_code_language(source_file_path)
6767
self.use_report_coverage_feature_flag = use_report_coverage_feature_flag
6868
self.last_coverage_percentages = {}
69+
self.llm_model = llm_model
6970

7071
# Objects to instantiate
7172
self.ai_caller = AICaller(model=llm_model, api_base=api_base)
@@ -421,13 +422,12 @@ def generate_tests(self, max_tokens=4096, dry_run=False):
421422

422423
return tests_dict
423424

424-
def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_attempts=1):
425+
def validate_test(self, generated_test: dict, num_attempts=1):
425426
"""
426427
Validate a generated test by inserting it into the test file, running the test, and checking for pass/fail.
427428
428429
Parameters:
429430
generated_test (dict): The generated test to validate, containing test code and additional imports.
430-
generated_tests_dict (dict): A dictionary containing information about the generated tests.
431431
num_attempts (int, optional): The number of attempts to run the test. Defaults to 1.
432432
433433
Returns:
@@ -449,6 +449,10 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
449449
12. Handle any exceptions that occur during the validation process, log the errors, and roll back the test file if necessary.
450450
13. Log additional details and error messages for failed tests, and optionally, use the Trace class for detailed logging if 'WANDB_API_KEY' is present in the environment variables.
451451
"""
452+
# Store original content of the test file
453+
with open(self.test_file_path, "r") as test_file:
454+
original_content = test_file.read()
455+
452456
try:
453457
# Step 0: no pre-process.
454458
# We asked the model that each generated test should be a self-contained independent test
@@ -482,12 +486,10 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
482486
[delta_indent * " " + line for line in test_code.split("\n")]
483487
)
484488
test_code_indented = "\n" + test_code_indented.strip("\n") + "\n"
485-
if test_code_indented and relevant_line_number_to_insert_tests_after:
486489

490+
if test_code_indented and relevant_line_number_to_insert_tests_after:
487491
# Step 1: Insert the generated test to the relevant line in the test file
488492
additional_imports_lines = ""
489-
with open(self.test_file_path, "r") as test_file:
490-
original_content = test_file.read() # Store original content
491493
original_content_lines = original_content.split("\n")
492494
test_code_lines = test_code_indented.split("\n")
493495
# insert the test code at the relevant line
@@ -546,6 +548,8 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
546548
"stderr": stderr,
547549
"stdout": stdout,
548550
"test": generated_test,
551+
"original_test_file": original_content,
552+
"processed_test_file": processed_test,
549553
}
550554

551555
error_message = extract_error_message_python(fail_details["stdout"])
@@ -624,6 +628,8 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
624628
"stderr": stderr,
625629
"stdout": stdout,
626630
"test": generated_test,
631+
"original_test_file": original_content,
632+
"processed_test_file": processed_test,
627633
}
628634
self.failed_test_runs.append(
629635
{
@@ -658,6 +664,8 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
658664
"stderr": stderr,
659665
"stdout": stdout,
660666
"test": generated_test,
667+
"original_test_file": original_content,
668+
"processed_test_file": processed_test,
661669
}
662670
self.failed_test_runs.append(
663671
{
@@ -675,7 +683,6 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
675683

676684
self.current_coverage = new_percentage_covered
677685

678-
679686
for key in coverage_percentages:
680687
if key not in self.last_coverage_percentages:
681688
self.last_coverage_percentages[key] = 0
@@ -699,6 +706,8 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
699706
"stderr": stderr,
700707
"stdout": stdout,
701708
"test": generated_test,
709+
"original_test_file": original_content,
710+
"processed_test_file": processed_test,
702711
}
703712
except Exception as e:
704713
self.logger.error(f"Error validating test: {e}")
@@ -709,8 +718,27 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
709718
"stderr": str(e),
710719
"stdout": "",
711720
"test": generated_test,
721+
"original_test_file": original_content,
722+
"processed_test_file": "N/A",
712723
}
713724

725+
def to_dict(self):
726+
return {
727+
"source_file_path": self.source_file_path,
728+
"test_file_path": self.test_file_path,
729+
"code_coverage_report_path": self.code_coverage_report_path,
730+
"test_command": self.test_command,
731+
"llm_model": self.llm_model,
732+
"test_command_dir": self.test_command_dir,
733+
"included_files": self.included_files,
734+
"coverage_type": self.coverage_type,
735+
"desired_coverage": self.desired_coverage,
736+
"additional_instructions": self.additional_instructions,
737+
}
738+
739+
def to_json(self):
740+
return json.dumps(self.to_dict())
741+
714742

715743
def extract_error_message_python(fail_message):
716744
"""
@@ -738,4 +766,4 @@ def extract_error_message_python(fail_message):
738766
return ""
739767
except Exception as e:
740768
logging.error(f"Error extracting error message: {e}")
741-
return ""
769+
return ""

cover_agent/main.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ def parse_args():
9696
action="store_true",
9797
help="Setting this to True considers the coverage of all the files in the coverage report. This means we consider a test as good if it increases coverage for a different file other than the source file. Default: False.",
9898
)
99+
parser.add_argument(
100+
"--log-db-path",
101+
default="",
102+
help="Path to optional log database. Default: %(default)s.",
103+
)
99104
return parser.parse_args()
100105

101106

cover_agent/version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.1.48
1+
0.1.49
File renamed without changes.

docs/database_usage.md

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Using a Test Database with Cover Agent
2+
Note: This feature is still in beta
3+
4+
## Requirements
5+
Currently, only SQLite is supported. [SQLite](https://www.sqlite.org/) uses a local `.db` file to write to and read from (versus a server based database). The long term goal is to use any type of database that is support by [SQLAlchemy](https://www.sqlalchemy.org/).
6+
7+
You'll need to have SQLite installed in order to view the tables but to get started you can just create an empty `.db` file using the `touch` command. For example:
8+
```
9+
touch run_tests.db
10+
```
11+
12+
## Running with an external DB
13+
You can run Cover Agent using the `--log-db-path` option. For example:
14+
```
15+
cover-agent \
16+
--source-file-path "templated_tests/python_fastapi/app.py" \
17+
--test-file-path "templated_tests/python_fastapi/test_app.py" \
18+
--code-coverage-report-path "templated_tests/python_fastapi/coverage.xml" \
19+
--test-command "pytest --cov=. --cov-report=xml --cov-report=term" \
20+
--test-command-dir "templated_tests/python_fastapi" \
21+
--coverage-type "cobertura" \
22+
--desired-coverage 70 \
23+
--max-iterations 10 \
24+
--log-db-path "run_tests.db"
25+
```
26+
27+
Cover Agent will create a table called `unit_test_generation_attempts` within the database.
28+
29+
## Integration Tests
30+
You can run the integration test suite and pass in the local `.db` to each Docker container with the following (example) command at the root of this repository:
31+
```
32+
LOG_DB_PATH="<full_path_to_root_folder>/run_tests.db" tests_integration/test_all.sh
33+
```
34+
35+
## Observing the test data
36+
You can look at the test results using an external database reader or the basic SQLite command line tool:
37+
```
38+
sqlite3 run_tests.db
39+
```
40+
41+
Once in SQLite you can show the tables and observe that after running some tests a table called `unit_test_generation_attempts` has been created:
42+
```
43+
sqlite> .tables
44+
unit_test_generation_attempts
45+
```
46+
47+
To get the definition of our table we can run:
48+
```
49+
sqlite> PRAGMA table_info(unit_test_generation_attempts);
50+
0|id|INTEGER|1||1
51+
1|run_time|DATETIME|0||0
52+
2|status|VARCHAR|0||0
53+
3|reason|TEXT|0||0
54+
4|exit_code|INTEGER|0||0
55+
5|stderr|TEXT|0||0
56+
6|stdout|TEXT|0||0
57+
7|test_code|TEXT|0||0
58+
8|imports|TEXT|0||0
59+
```
60+
61+
A simple `select * from unit_test_generation_attempts;` query will display all test results (which include formatted carriage returns). This may be a bit difficult to look at from the command line so using a GUI would probably serve you a bit better.
62+
63+
You can also filter the results to show only failed tests, for example:
64+
```
65+
sqlite> select * from unit_test_generation_attempts where status = "FAIL";
66+
```
File renamed without changes.

0 commit comments

Comments
 (0)