Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
127 commits
Select commit Hold shift + click to select a range
4e7c2c2
feat(v2): implement P0-01 strict schema promotion
caviri Feb 23, 2026
e60a3ca
feat(v2): implement P0-02 agent schema promotion
caviri Feb 23, 2026
25cad65
chore: update dependencies and .gitignore
caviri Feb 23, 2026
8fa9881
feat(v2): implement P0-03 test infrastructure
caviri Feb 23, 2026
5f3d34c
chore: Remove organization enrichment tests from the test suite
caviri Feb 23, 2026
f7cd949
feat(v2): implement P0-04 strict schema validation tests
caviri Feb 23, 2026
35f1751
test(v2): implement P0-05 agent schema valid-fixture checks
caviri Feb 23, 2026
911c3fd
test(v2): add P0-06 strict negative schema validation
caviri Feb 23, 2026
7e6f4cb
feat(v2): add mock github provider fixtures and interface
caviri Feb 23, 2026
26be017
feat(v2): add mock infoscience and ror providers
caviri Feb 23, 2026
9c8a15e
feat(v2): add deterministic mock dataset generator
caviri Feb 23, 2026
3fb2fe8
chore(v2): normalize mock generator constants to ascii
caviri Feb 23, 2026
9da90b3
feat(v2): add mock ORCID provider for P0-08
caviri Feb 23, 2026
5b97d65
feat(v2): add cross-reference validation for P0-12
caviri Feb 23, 2026
2c20d23
test(v2): add red-phase golden tests for P0-13 and P0-14
caviri Feb 23, 2026
c910e35
docs(v2): advance entry task and log P0-08/P0-12/P0-14
caviri Feb 23, 2026
48100e5
feat(v2): scaffold phase-1 package skeleton
caviri Feb 23, 2026
ee27544
docs(v2): advance task pointer and record p1-01 validation
caviri Feb 23, 2026
1135813
feat(v2): add config module and github url classifier
caviri Feb 23, 2026
fcd687d
docs(v2): advance phase-1 task tracker and changelog
caviri Feb 23, 2026
2a74c71
feat(v2): add P1-05 response contracts
caviri Feb 23, 2026
a0a9dc1
feat(v2): add P1-06 error models
caviri Feb 23, 2026
54b68b3
feat(v2): implement P1-07 and P1-08 stub endpoints
caviri Feb 23, 2026
ed2de49
docs(v2): advance entry task and log P1-05 to P1-08
caviri Feb 23, 2026
62f6f6e
feat(v2): mount v2 router in main api
caviri Feb 23, 2026
0f208ad
feat(v2): add v2 health check endpoint
caviri Feb 23, 2026
e4a8ea7
docs(v2): advance entry task and log P1-09 P1-10
caviri Feb 23, 2026
56dfc87
feat(v2): implement phase-2 provider interfaces and agent wrappers
caviri Feb 23, 2026
d80dda5
docs(v2): advance phase entry task and log phase-2 testing
caviri Feb 23, 2026
44d5ebd
feat(v2): implement phase 2 tasks p2-05 through p2-09
caviri Feb 24, 2026
9229b77
fix(api): migrate pydantic v2 validators and fastapi lifespan
caviri Feb 24, 2026
648d5cd
feat(v2): add strict and shacl validation gates
caviri Feb 24, 2026
680045e
feat(v2): add reconciliation pipeline and enum-aligned models
caviri Feb 24, 2026
8065082
chore(dependencies): update rdflib and pyshacl versions, add griffe d…
caviri Feb 24, 2026
7395e98
refactor(v2): canonicalize infoscience ids to core items endpoint
caviri Feb 24, 2026
7b61757
docs: record infoscience canonical id policy and validation evidence
caviri Feb 24, 2026
77b5622
docs(v2): record phase-3 validation evidence and phase-4 handoff
caviri Feb 24, 2026
4fc7613
feat(v2): add sqlite graph store migrations and CRUD
caviri Feb 24, 2026
f8f0f90
docs: update v2 graph-store handoff and changelog
caviri Feb 24, 2026
46e6e70
chore(v2): guard destructive rollback and align docs
caviri Feb 24, 2026
f8e08a7
feat(v2-graph): add run tracking, provenance-aware upsert, and RDF sync
caviri Feb 24, 2026
3783961
feat(graph): normalize rdf entity types and refresh phase handoff docs
caviri Feb 24, 2026
72e7743
feat(graph): add org alias resolution and sqlite write contention saf…
caviri Feb 24, 2026
18eee02
feat(v2): add phase-8 live provider capture and validation tooling
caviri Feb 24, 2026
63d9f04
feat(v2): implement JSON-LD graph export and graph API filtering
caviri Feb 24, 2026
83be5d9
chore: update .gitignore to include .internal/RISKS.md
caviri Feb 24, 2026
d422776
feat(v2): add shared intermediates envelopes and stats stages
caviri Feb 24, 2026
a20c4f7
feat: add v2 logfire bootstrap module
caviri Feb 24, 2026
99912e4
feat(logfire): enhance connectivity checks and credential handling fo…
caviri Feb 24, 2026
cdb5104
feat(v2): instrument request, agent, and pipeline tracing
caviri Feb 24, 2026
83a2a17
feat(v2): add run correlation metrics and structured error events
caviri Feb 24, 2026
a0b5389
feat(v2): add generated model freshness and ttl-schema CI gates
caviri Feb 24, 2026
e11b73e
feat: add v2 migration gates, provider throttling, and parity docs
caviri Feb 24, 2026
63d5edf
chore: update .gitignore to include .logfire
caviri Feb 25, 2026
10d34d1
chore: clean up .gitignore by removing unnecessary entries
caviri Feb 25, 2026
1e5c164
feat(v2): introduce cache-bypass controls and update documentation fo…
caviri Feb 25, 2026
0e6296f
feat(v2): restrict GitHub repository-mode traversal to direct entitie…
caviri Feb 25, 2026
ab38032
feat(v2): add support for optional GitHub author inclusion and enhanc…
caviri Feb 25, 2026
84fa6e5
feat(v2): add typed entity buckets and publication contracts
caviri Feb 25, 2026
84625ee
feat(v2): add deterministic class agents for article links
caviri Feb 25, 2026
6476eae
feat(v2): orchestrate six-class agent stages
caviri Feb 25, 2026
b3f65fa
feat(v2): integrate reconciliation-first strict and SHACL extract gates
caviri Feb 25, 2026
af6ad22
feat(v2): finalize extract output contracts and jsonld build stage
caviri Feb 25, 2026
9d12c63
feat(v2): integrate graph store writes and intermediates APIs
caviri Feb 25, 2026
955f50e
test(v2): tighten extract and graph regression contracts
caviri Feb 25, 2026
463386d
fix(v2): normalize repository creation timestamps for strict extract …
caviri Feb 25, 2026
b654919
v2: enforce uuid4 agent ids and disable synthetic fallbacks by default
caviri Feb 26, 2026
d4346b5
chore: update package dependencies in uv.lock and modify test script …
caviri Feb 26, 2026
4bfdc90
chore(v2): update AGENTS.md to reflect new entry task and adjust Info…
caviri Feb 26, 2026
da352b8
feat(v2): enhance person fanout orchestration to skip GitHub organiza…
caviri Feb 26, 2026
abd7c8c
feat(v2): enhance organization and membership resolution with alterna…
caviri Feb 26, 2026
ea9a199
chore(v2): update AGENTS.md with new entry task and enhance RDF coerc…
caviri Feb 26, 2026
59c9e75
feat(v2): implement JSON-LD context enhancements and organization ide…
caviri Feb 26, 2026
56ba704
Canonicalize pre-resolved v2 entity IDs by idSource
caviri Feb 26, 2026
2f81bd4
Aggregate class entities from stats and relax class empty retries
caviri Feb 26, 2026
51edb7c
Fail fast on required GitHub errors and tighten person/org linking
caviri Feb 26, 2026
e26839b
Promote JSON-LD IRI typing for license citation and org hierarchy
caviri Feb 26, 2026
d7ca8b4
Improve org alias resolution and harden org-account person filtering
caviri Feb 26, 2026
00d6775
Reduce non-actionable warning noise in person and class agents
caviri Feb 26, 2026
f3e9fae
Reduce actionable GIMIE warnings via aliasing and author filtering
caviri Feb 27, 2026
b4ae7c9
fix(v2): Enforce ontology parity by removing `schema:alternateName` f…
caviri Feb 27, 2026
b8dc349
v2 article agent: normalize year-only dates and enrich warning context
caviri Feb 27, 2026
500bc63
v2 article warnings: dedupe unresolved-id notice and add match counts
caviri Feb 27, 2026
559f698
feat(v2): Remove v2 dependency on the v1 TTL cache system, eliminatin…
caviri Feb 27, 2026
30df232
feat(v2): Introduce LLM repository agent with runtime selection and t…
caviri Mar 3, 2026
e3007d4
feat(v2): Add logging configuration to LLM repository agent for impro…
caviri Mar 3, 2026
e762af4
feat(v2): Enhance testing workflows and documentation. Add `pytest-te…
caviri Mar 3, 2026
acd44a5
feat(v2): Introduce LLMPersonAgentV2 for person metadata extraction. …
caviri Mar 4, 2026
6cbfe4d
refactor(justfile): Update linting and type-check commands to run wit…
caviri Mar 4, 2026
f47b34f
chore: Remove debug cache manager database file to clean up temporary…
caviri Mar 4, 2026
537570b
feat(v2): Implement runtime prompt context propagation and enforce pe…
caviri Mar 4, 2026
ca719c5
chore: Remove the RISKS.md file, which contained a risk register for …
caviri Mar 4, 2026
122fa0a
feat(v2): Introduce LLMOrganizationAgentV2 for organization metadata …
caviri Mar 4, 2026
acedc6c
refactor(v2): Remove `schema:alternateName` from organization handlin…
caviri Mar 4, 2026
ff83160
feat(v2): Introduce LLMArticleAgentV2, LLMContributionAgentV2, and LL…
caviri Mar 4, 2026
941ffc9
feat(v2): Introduce LLMLinkVeracityAgentV2 for link relationship veri…
caviri Mar 4, 2026
a8e8765
feat(v2): Enhance entity reconciliation and validation processes. Imp…
caviri Mar 4, 2026
152e0e9
feat(v2): Enhance API and pipeline with link veracity verification an…
caviri Mar 5, 2026
47bc3c9
fix(tests): Update expected triples count in GitHub repository extrac…
caviri Mar 5, 2026
4039e41
feat(v2): Enhance organization identity reconciliation and prompt con…
caviri Mar 5, 2026
1d1d44c
feat(v2): Introduce LLM deduplication and critic stages in extraction…
caviri Mar 5, 2026
08ea271
feat(v2): Enhance LLM extraction pipeline with new tools and link val…
caviri Mar 6, 2026
7661c2b
feat(v2): Add context summary feature to LLM extraction pipeline. Int…
caviri Mar 6, 2026
b7d97de
fix: Update base URL for OpenAI-compatible model configurations to co…
caviri Mar 6, 2026
6c7829d
feat(v2): Add prototype body-based extract endpoint and related enhan…
caviri Mar 6, 2026
641050e
refactor: Improve error handling and logging in GIMIE analysis. Enhan…
caviri Mar 25, 2026
151d2f6
feat(devcontainer): Add SSH feature and password setup script. Update…
caviri Mar 25, 2026
e5547c2
chore(env): Update .env.example to include optional devcontainer SSH …
caviri Mar 25, 2026
89989da
feat(devcontainer): Introduce docker-compose setup for development en…
caviri Mar 25, 2026
03a3e27
feat(devcontainer): Update environment configuration for caching and …
caviri Mar 25, 2026
8ac08d9
feat(devcontainer): Enhance .env.example and docker-compose.yml with …
caviri Mar 25, 2026
5357a67
chore: Update .env.example and .gitignore for improved development se…
caviri Apr 29, 2026
0116416
feat(refactoring): Introduce new agents and modules for EPFL relation…
caviri Apr 29, 2026
34294a7
refactor(v2): Remove deprecated compatibility layer and legacy import…
caviri Apr 29, 2026
629c42e
refactor(simplifying v2): Replace JSONLDExporter with load_jsonld_con…
caviri Apr 29, 2026
4a66f0d
refactor(v2): Remove Logfire integration and related observability co…
caviri Apr 29, 2026
2c9fd90
refactor(v2): Remove obsolete scripts and testing utilities. Delete c…
caviri Apr 29, 2026
997a691
feat(v2 post endpoint): Enhance environment configuration and API doc…
caviri Apr 30, 2026
ff9139e
feat(org relationships): Implement LLM-based organization hierarchy d…
caviri Apr 30, 2026
1b7b467
feat(api): Add link veracity and max concurrent agents configuration.…
caviri Apr 30, 2026
f8f7c3c
feat(ownership check): Implement guarantee_repo_author function to ha…
caviri Apr 30, 2026
6ec99ee
feat(devcontainer): Add Qdrant service to Docker Compose configuratio…
caviri May 1, 2026
cbba58e
feat(rag system and docker compose): Remove obsolete .env.dist file a…
caviri May 1, 2026
cfd7656
feat(RAGs): Enhance .env.example with detailed settings for SWISSUbas…
caviri May 3, 2026
9777306
feat(auth): Implement bearer token authentication for all `/v1/*` rou…
caviri May 4, 2026
e53c63b
feat(env and documentation): Update .env.example to remove deprecated…
caviri May 5, 2026
ce4eb7b
feat(github integration): Enhance GitHub provider with retry logic fo…
caviri May 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
18 changes: 18 additions & 0 deletions .devcontainer/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copy to `.devcontainer/.env` for docker-compose variable substitution.
# Compose reads this file from the `.devcontainer/` directory (not repo-root `.env` for these keys).
#
# Host port mappings (optional):
# SSH_PORT=2220
# APP_PORT=1234
# DEV_PORT=8888
#
# DNS inside the container (optional; defaults are 1.1.1.1 + 8.8.8.8 in docker-compose.yml).
# Use your corporate resolvers if public DNS is blocked:
# DEVCONTAINER_DNS_1=10.0.0.1
# DEVCONTAINER_DNS_2=10.0.0.2
#
# Selenium (standalone Firefox service in docker-compose.yml). Override URL if you use an external grid:
# SELENIUM_REMOTE_URL=http://selenium-standalone-firefox:4444
# Host ports if 4444 / 7900 are already in use:
# SELENIUM_GRID_PORT=4445
# SELENIUM_VNC_PORT=7901
11 changes: 2 additions & 9 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
FROM ghcr.io/astral-sh/uv:python3.12-bookworm

# Set locale to avoid warnings
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8

# Install just and other system dependencies
RUN apt-get update && apt-get install -y \
sudo \
curl \
Expand All @@ -13,16 +11,11 @@ RUN apt-get update && apt-get install -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Crear usuario no-root con UID/GID que suele usar VS Code (1000:1000).
# TOOD: Take this user out of sudoers if you want to use this in fully agents mode.
RUN useradd -ms /bin/bash -u 1000 vscode \
&& apt-get update && apt-get install -y sudo \
&& apt-get update \
&& apt-get install -y sudo \
&& echo "vscode ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers

# Gemini CLI
# Please login outside of the container and copy your credentials to ~/.gemini/...
RUN curl -fsSL https://deb.nodesource.com/setup_24.x | sudo -E bash - && sudo apt-get install -y nodejs
RUN npm install -g @google/gemini-cli

RUN mkdir -p /app/data \
&& chown -R 1000:1000 /app/data \
Expand Down
28 changes: 14 additions & 14 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
{
"name": "git-metadata-extractor-dev",
"build": {
"dockerfile": "Dockerfile"
"dockerComposeFile": "docker-compose.yml",
"service": "devcontainer",
"workspaceFolder": "/workspaces/project",
"containerEnv": {
"UV_CACHE_DIR": "/workspaces/project/.uv-cache"
},
"overrideCommand": false,
"features": {
"ghcr.io/devcontainers/features/sshd:1": {
"version": "latest"
}
},
"runArgs": [
"--env-file",
"${localWorkspaceFolder}/.env",
"--network",
"dev"
],
"remoteUser": "vscode",
"workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",
"customizations": {
"vscode": {
"settings": {
Expand All @@ -26,8 +28,6 @@
]
}
},
"forwardPorts": [
1234
],
"postCreateCommand": "rm -rf .venv && uv venv && uv pip install -e .[dev] && echo '. $PWD/.venv/bin/activate' >> /home/vscode/.bashrc"
}
"postCreateCommand": "mkdir -p .uv-cache && rm -rf .venv && uv venv && uv pip install -e .[dev] && echo '. $PWD/.venv/bin/activate' >> /home/vscode/.bashrc",
"postStartCommand": "bash .devcontainer/set-vscode-password.sh"
}
61 changes: 61 additions & 0 deletions .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Dev container stack. Compose publishes ports on the host (more reliable than
# devcontainer forwardPorts in some setups). Interpolation vars (SSH_PORT, etc.)
# can be set in `.devcontainer/.env` (see `.devcontainer/.env.example`).
#
# Internal SSH: devcontainers `sshd` feature listens on 2222, not 22 — map host:2222.
#
# Explicit DNS: containers on external networks (e.g. `dev`) sometimes get no working resolver
# and `uv pip` fails with "dns error" / "failed to lookup address information".
# Override in `.devcontainer/.env`: DEVCONTAINER_DNS_1 / DEVCONTAINER_DNS_2.
services:
devcontainer:
build:
context: ..
dockerfile: .devcontainer/Dockerfile
dns:
- "${DEVCONTAINER_DNS_1:-1.1.1.1}"
- "${DEVCONTAINER_DNS_2:-8.8.8.8}"
env_file:
- ../.env
environment:
# Avoid ~/.cache/uv (often root-owned after sshd/common-utils); workspace is bind-mounted as vscode.
UV_CACHE_DIR: /workspaces/project/.uv-cache
SELENIUM_REMOTE_URL: ${SELENIUM_REMOTE_URL:-http://gme-selenium-firefox:4444}
ports:
- "${SSH_PORT:-2222}:2222"
- "${APP_PORT:-1234}:1234"
- "${DEV_PORT:-8888}:8888"
volumes:
- ..:/workspaces/project:cached
command: sleep infinity
networks:
- dev
gme-qdrant:
image: qdrant/qdrant:latest
container_name: gme-qdrant
ports:
- "6333:6333"
- "6334:6334"
volumes:
- ../data/qdrant/storage:/qdrant/storage
restart: unless-stopped
networks:
- dev
# README "Option B": multi-session standalone Firefox (ORCID, Selenium-backed tools).
gme-selenium-firefox:
image: selenium/standalone-firefox
container_name: gme-selenium-firefox
ports:
- "${SELENIUM_GRID_PORT:-4444}:4444"
- "${SELENIUM_VNC_PORT:-7900}:7900"
shm_size: "2g"
environment:
SE_NODE_MAX_SESSIONS: "5"
SE_NODE_SESSION_TIMEOUT: "300"
restart: unless-stopped
networks:
- dev

networks:
dev:
external: true
8 changes: 8 additions & 0 deletions .devcontainer/set-vscode-password.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env bash
# Apply VSCODE_PASSWORD to user vscode at container start (not baked into the image).
# Set VSCODE_PASSWORD in .env (this repo loads it via devcontainer runArgs --env-file).
set -euo pipefail
if [[ -z "${VSCODE_PASSWORD:-}" ]]; then
exit 0
fi
printf 'vscode:%s\n' "$VSCODE_PASSWORD" | sudo chpasswd
12 changes: 0 additions & 12 deletions .env.dist

This file was deleted.

Loading
Loading