Skip to content

Commit cf9b5cd

Browse files
committed
feat: implement trailmark (@tob-scott-a, @pbottine)
0 parents  commit cf9b5cd

File tree

98 files changed

+34170
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+34170
-0
lines changed

.github/dependabot.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
version: 2
2+
updates:
3+
- package-ecosystem: pip
4+
directory: /
5+
schedule:
6+
interval: weekly
7+
cooldown:
8+
default-days: 7
9+
groups:
10+
all:
11+
patterns: ["*"]
12+
13+
- package-ecosystem: github-actions
14+
directory: /
15+
schedule:
16+
interval: weekly
17+
cooldown:
18+
default-days: 7
19+
groups:
20+
all:
21+
patterns: ["*"]

.github/workflows/ci.yml

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
permissions:
14+
contents: read
15+
16+
jobs:
17+
lint:
18+
name: Lint
19+
runs-on: ubuntu-latest
20+
steps:
21+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
22+
with:
23+
persist-credentials: false
24+
25+
- uses: astral-sh/ruff-action@4919ec5cf1f49eff0871dbcea0da843445b837e6 # v3.6.1
26+
with:
27+
args: check src/ tests/
28+
29+
- uses: astral-sh/ruff-action@4919ec5cf1f49eff0871dbcea0da843445b837e6 # v3.6.1
30+
with:
31+
args: format --check src/ tests/
32+
33+
type-check:
34+
name: Type Check
35+
runs-on: ubuntu-latest
36+
steps:
37+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
38+
with:
39+
persist-credentials: false
40+
41+
- uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0
42+
43+
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
44+
with:
45+
python-version: "3.13"
46+
47+
- run: uv sync --all-groups
48+
49+
- run: uv tool install ty && ty check
50+
51+
test:
52+
name: Test
53+
runs-on: ubuntu-latest
54+
strategy:
55+
matrix:
56+
python-version: ["3.13"]
57+
steps:
58+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
59+
with:
60+
persist-credentials: false
61+
62+
- uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0
63+
64+
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
65+
with:
66+
python-version: ${{ matrix.python-version }}
67+
68+
- run: uv sync --all-groups
69+
70+
- run: uv run pytest -q tests/

.github/workflows/mutation.yml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Mutation Testing
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
permissions:
14+
contents: read
15+
16+
jobs:
17+
mutmut:
18+
name: Mutation Testing
19+
runs-on: ubuntu-latest
20+
steps:
21+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
22+
with:
23+
persist-credentials: false
24+
25+
- uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0
26+
27+
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
28+
with:
29+
python-version: "3.13"
30+
31+
- run: uv sync --all-groups
32+
33+
- name: Run mutation testing
34+
run: uv run mutmut run
35+
36+
- name: Show results
37+
if: always()
38+
run: uv run mutmut results

.github/workflows/semgrep.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Semgrep
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
permissions:
14+
contents: read
15+
16+
jobs:
17+
semgrep:
18+
name: Semgrep Scan
19+
runs-on: ubuntu-latest
20+
steps:
21+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
22+
with:
23+
persist-credentials: false
24+
25+
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
26+
with:
27+
python-version: "3.13"
28+
29+
- name: Install Semgrep
30+
run: pip install semgrep
31+
32+
- name: Run Semgrep
33+
run: >
34+
semgrep scan
35+
--metrics=off
36+
--error
37+
--config p/python
38+
--config p/security-audit
39+
--config p/secrets
40+
--include="*.py"
41+
.

.github/workflows/zizmor.yml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: Zizmor
2+
3+
on:
4+
push:
5+
branches: [main]
6+
paths:
7+
- ".github/workflows/**"
8+
pull_request:
9+
branches: [main]
10+
paths:
11+
- ".github/workflows/**"
12+
13+
concurrency:
14+
group: ${{ github.workflow }}-${{ github.ref }}
15+
cancel-in-progress: true
16+
17+
permissions:
18+
contents: read
19+
actions: read
20+
21+
jobs:
22+
zizmor:
23+
name: Workflow Security Audit
24+
runs-on: ubuntu-latest
25+
steps:
26+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
27+
with:
28+
persist-credentials: false
29+
30+
- uses: zizmorcore/zizmor-action@0dce2577a4760a2749d8cfb7a84b7d5585ebcb7d # v0.5.0
31+
with:
32+
version: latest
33+
advanced-security: false

.gitignore

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
.coverage
2+
.hypothesis
3+
.idea
4+
.pytest_cache
5+
.ruff_cache
6+
.serena
7+
.supply-chain-risk-auditor
8+
/src/trailmark/__pycache__
9+
/src/trailmark/*/__pycache__
10+
/src/trailmark/*/*/__pycache__
11+
/tests/__pycache__
12+
*.pyc
13+
uv.lock
14+
mutants/
15+
src/trailmark/tree_sitter_custom/**/*.so
16+
src/trailmark/tree_sitter_custom/**/*.pyd

AGENTS.md

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Agents
2+
3+
Development context for AI agents working on this codebase.
4+
5+
## Project Overview
6+
7+
Trailmark parses source code into directed graphs of functions, classes,
8+
calls, and semantic metadata for security analysis. It supports 16
9+
languages via tree-sitter (plus a bundled Circom grammar) and uses
10+
rustworkx for graph traversal.
11+
12+
## Architecture
13+
14+
```
15+
CodeGraph (data) -> GraphStore (indexed storage) -> QueryEngine (facade)
16+
```
17+
18+
- **CodeGraph** holds raw nodes, edges, annotations, entrypoints. Mutable.
19+
- **GraphStore** wraps CodeGraph in a rustworkx PyDiGraph. Validates
20+
node existence. Returns model objects.
21+
- **QueryEngine** resolves names to node IDs, delegates to GraphStore,
22+
returns plain dicts for JSON serialization.
23+
24+
## Key Conventions
25+
26+
- All public methods return `False` or `[]` for missing nodes. Never raise.
27+
- QueryEngine returns dicts; GraphStore returns model objects.
28+
- Node IDs follow `module:function`, `module:Class`, `module:Class.method`.
29+
- Edge confidence: `certain`, `inferred`, `uncertain`.
30+
- Annotation sources: `"llm"`, `"docstring"`, `"manual"`.
31+
- Frozen dataclasses for immutable data. `CodeGraph` is mutable.
32+
- No relative (`..`) imports. Use `from trailmark.*`.
33+
34+
## File Layout
35+
36+
```
37+
src/trailmark/
38+
models/ # Data classes: CodeUnit, CodeEdge, Annotation, CodeGraph
39+
graph.py # CodeGraph with add_annotation, clear_annotations, merge
40+
nodes.py # CodeUnit, Parameter, TypeRef, BranchInfo
41+
edges.py # CodeEdge, EdgeKind, EdgeConfidence
42+
annotations.py # Annotation, AnnotationKind, EntrypointTag
43+
parsers/ # Language-specific tree-sitter parsers
44+
base.py # BaseParser protocol
45+
_common.py # Shared parser utilities
46+
python/ # One subpackage per language
47+
javascript/
48+
...
49+
storage/
50+
graph_store.py # GraphStore: rustworkx-backed indexed storage
51+
query/
52+
api.py # QueryEngine: high-level facade
53+
cli.py # CLI entry point
54+
tests/ # pytest test suite
55+
```
56+
57+
## Running Checks
58+
59+
```bash
60+
uv run ruff check --fix src/ tests/
61+
uv run ruff format src/ tests/
62+
uv run ty check
63+
pytest -q
64+
```
65+
66+
## Mutation Testing
67+
68+
```bash
69+
uv run mutmut run
70+
uv run mutmut results
71+
```
72+
73+
### macOS Fork Safety
74+
75+
mutmut uses `fork()` which segfaults with rustworkx on macOS. Set:
76+
77+
```bash
78+
export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
79+
```
80+
81+
This is not needed on Linux/CI (Ubuntu).
82+
83+
## Adding Features
84+
85+
- Follow the three-layer pattern: add to CodeGraph first, then GraphStore
86+
(with validation), then QueryEngine (with name resolution and dict
87+
conversion).
88+
- Add tests at each layer: `test_models.py`, `test_storage.py`,
89+
`test_query.py`.
90+
- Update `README.md` if adding user-facing API.

CONTRIBUTING.md

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Contributing to Trailmark
2+
3+
## Setup
4+
5+
Requires Python >= 3.13 and [uv](https://docs.astral.sh/uv/).
6+
7+
```bash
8+
uv sync --all-groups
9+
```
10+
11+
## Running Checks
12+
13+
Run all three before submitting changes:
14+
15+
```bash
16+
# Lint and format
17+
uv run ruff check --fix src/ tests/
18+
uv run ruff format src/ tests/
19+
20+
# Type check
21+
uv run ty check
22+
23+
# Tests
24+
pytest -q
25+
```
26+
27+
## Mutation Testing
28+
29+
Trailmark uses [mutmut](https://mutmut.readthedocs.io/) to verify test
30+
suite quality. Mutmut generates source code mutations and confirms that
31+
tests catch each one.
32+
33+
```bash
34+
uv run mutmut run
35+
uv run mutmut results
36+
```
37+
38+
### macOS: Fork Safety
39+
40+
mutmut uses `fork()` to isolate mutation runs. On macOS, this conflicts
41+
with the Objective-C runtime and with native extensions like rustworkx
42+
(a Rust/C extension). You **must** set this environment variable:
43+
44+
```bash
45+
export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
46+
```
47+
48+
Without it, mutmut will segfault on every mutant. This is not needed on
49+
Linux (CI runs on Ubuntu without issue).
50+
51+
## Architecture
52+
53+
Trailmark has a three-layer architecture:
54+
55+
1. **CodeGraph** (`src/trailmark/models/graph.py`) -- mutable data
56+
container holding nodes, edges, annotations, and entrypoints.
57+
2. **GraphStore** (`src/trailmark/storage/graph_store.py`) -- wraps
58+
CodeGraph in a rustworkx `PyDiGraph` with bidirectional ID/index
59+
mappings. Validates node existence before mutations.
60+
3. **QueryEngine** (`src/trailmark/query/api.py`) -- high-level facade
61+
that resolves names, delegates to GraphStore, and returns plain dicts.
62+
63+
### Conventions
64+
65+
- **No exceptions for missing nodes.** All methods return `False` or `[]`
66+
when a node is not found.
67+
- **QueryEngine returns dicts**, GraphStore returns model objects.
68+
- **Helper functions** like `_unit_to_dict()`, `_edge_to_dict()`,
69+
`_annotation_to_dict()` live at module level alongside their class.
70+
- **Frozen dataclasses** for immutable data (`CodeUnit`, `CodeEdge`,
71+
`Annotation`). `CodeGraph` is mutable (not frozen).
72+
- **No relative imports.** Use absolute imports from `trailmark.*`.
73+
74+
## Adding a New Language Parser
75+
76+
1. Create `src/trailmark/parsers/<lang>/parser.py` implementing the
77+
`BaseParser` protocol from `src/trailmark/parsers/base.py`.
78+
2. Register the language in `src/trailmark/parsers/__init__.py`.
79+
3. Add tests in `tests/test_<lang>_parser.py`.
80+
4. Update the language table in `README.md`.

0 commit comments

Comments
 (0)