Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 192 additions & 0 deletions .github/workflows/cleanup_images.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
name: Cleanup Development Images

on:
# Clean up when PRs are closed
pull_request:
types: [closed]

# Clean up when branches are deleted
delete:

# Scheduled cleanup for old images (runs weekly)
schedule:
- cron: '0 2 * * 0' # Every Sunday at 2 AM UTC

# Manual trigger for cleanup
workflow_dispatch:
inputs:
days_old:
description: 'Delete images older than X days'
required: false
default: '100'
type: string
tag_pattern:
description: 'Tag pattern to clean (e.g., pr-*, develop, feature-*)'
required: false
default: 'pr-*'
type: string

jobs:
cleanup-pr-images:
# Only run for closed PRs
if: github.event_name == 'pull_request' && github.event.action == 'closed'
runs-on: ubuntu-latest
permissions:
packages: write

steps:
- name: Delete PR image
uses: actions/github-script@v7
with:
script: |
const prNumber = context.payload.pull_request.number;
const packageName = 'git-metadata-extractor';
const tag = `pr-${prNumber}`;

try {
// Get package version for the PR tag
const { data: versions } = await github.rest.packages.getAllPackageVersionsForPackageOwnedByOrg({
package_type: 'container',
package_name: packageName,
org: context.repo.owner
});

const prVersion = versions.find(v =>
v.metadata && v.metadata.container &&
v.metadata.container.tags.includes(tag)
);

if (prVersion) {
await github.rest.packages.deletePackageVersionForOrg({
package_type: 'container',
package_name: packageName,
org: context.repo.owner,
package_version_id: prVersion.id
});
console.log(`✅ Deleted image with tag: ${tag}`);
} else {
console.log(`ℹ️ No image found with tag: ${tag}`);
}
} catch (error) {
console.log(`⚠️ Error cleaning up image ${tag}:`, error.message);
// Don't fail the workflow if cleanup fails
}

cleanup-branch-images:
# Only run when branches are deleted
if: github.event_name == 'delete' && github.event.ref_type == 'branch'
runs-on: ubuntu-latest
permissions:
packages: write

steps:
- name: Delete branch image
uses: actions/github-script@v7
with:
script: |
const branchName = context.payload.ref;
const packageName = 'git-metadata-extractor';
// Sanitize branch name to match Docker tag format
const tag = branchName.replace(/[^a-zA-Z0-9._-]/g, '-').toLowerCase();

try {
const { data: versions } = await github.rest.packages.getAllPackageVersionsForPackageOwnedByOrg({
package_type: 'container',
package_name: packageName,
org: context.repo.owner
});

const branchVersion = versions.find(v =>
v.metadata && v.metadata.container &&
v.metadata.container.tags.includes(tag)
);

if (branchVersion) {
await github.rest.packages.deletePackageVersionForOrg({
package_type: 'container',
package_name: packageName,
org: context.repo.owner,
package_version_id: branchVersion.id
});
console.log(`✅ Deleted image with tag: ${tag} (branch: ${branchName})`);
} else {
console.log(`ℹ️ No image found with tag: ${tag} (branch: ${branchName})`);
}
} catch (error) {
console.log(`⚠️ Error cleaning up image ${tag}:`, error.message);
}

cleanup-old-images:
# Run on schedule or manual trigger
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
permissions:
packages: write

steps:
- name: Cleanup old development images
uses: actions/github-script@v7
with:
script: |
const packageName = 'git-metadata-extractor';
const daysOld = parseInt('${{ inputs.days_old || 100 }}');
const tagPattern = '${{ inputs.tag_pattern }}' || 'pr-*';
const cutoffDate = new Date();
cutoffDate.setDate(cutoffDate.getDate() - daysOld);

console.log(`🧹 Cleaning up images older than ${daysOld} days (before ${cutoffDate.toISOString()})`);
console.log(`🎯 Tag pattern: ${tagPattern}`);

try {
const { data: versions } = await github.rest.packages.getAllPackageVersionsForPackageOwnedByOrg({
package_type: 'container',
package_name: packageName,
org: context.repo.owner,
per_page: 100
});

let deletedCount = 0;

for (const version of versions) {
const createdAt = new Date(version.created_at);
const tags = version.metadata?.container?.tags || [];

// Skip if not old enough
if (createdAt > cutoffDate) continue;

// Check if any tag matches the pattern
const matchesPattern = tags.some(tag => {
if (tagPattern === 'pr-*') return tag.startsWith('pr-');
if (tagPattern === 'feature-*') return tag.startsWith('feature-');
if (tagPattern === 'develop') return tag === 'develop';
return tag.includes(tagPattern.replace('*', ''));
});

// Skip protected tags (latest, version numbers)
const hasProtectedTag = tags.some(tag =>
tag === 'latest' ||
/^\d+\.\d+\.\d+/.test(tag)
);

if (matchesPattern && !hasProtectedTag) {
try {
await github.rest.packages.deletePackageVersionForOrg({
package_type: 'container',
package_name: packageName,
org: context.repo.owner,
package_version_id: version.id
});
console.log(`✅ Deleted old image: ${tags.join(', ')} (created: ${createdAt.toISOString()})`);
deletedCount++;
} catch (error) {
console.log(`⚠️ Error deleting image ${tags.join(', ')}:`, error.message);
}
}
}

console.log(`🎉 Cleanup completed! Deleted ${deletedCount} images.`);

} catch (error) {
console.error('❌ Error during cleanup:', error.message);
throw error;
}
83 changes: 83 additions & 0 deletions .github/workflows/publish_image_in_GHCR.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
name: Build and Publish Docker Images

on:
push:
branches: [ "main", "develop" ]
pull_request:
branches: [ "main", "develop" ]

workflow_dispatch:

jobs:
build-and-publish:
runs-on: ubuntu-latest
permissions:
contents: write # needed to create the release
packages: write # needed to publish the image

# Skip building images for draft PRs or very old branches
if: |
github.event_name == 'push' ||
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'pull_request' &&
github.event.pull_request.draft == false)

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Extract version from pyproject.toml
id: project_version
run: |
VERSION=$(sed -n '/^\[project\]/,/^\[/p' pyproject.toml | grep '^version =' | sed -E 's/version = "([^"]+)"/\1/')
echo "version=${VERSION}" >> $GITHUB_OUTPUT

- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository }}
tags: |
# For main branch: latest and version tags
type=raw,value=latest,enable={{is_default_branch}}
type=raw,value=${{ steps.project_version.outputs.version }},enable={{is_default_branch}}
# For develop branch: develop tag
type=raw,value=develop,enable=${{ github.ref == 'refs/heads/develop' }}
# For PRs: pr-{number} tag
type=ref,event=pr,prefix=pr-
# For feature branches: branch name (sanitized)
type=ref,event=branch,enable=${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/develop' }}
labels: |
org.opencontainers.image.title=${{ github.repository }}
org.opencontainers.image.description=${{ github.event.repository.description }}
org.opencontainers.image.url=${{ github.event.repository.html_url }}
org.opencontainers.image.source=${{ github.event.repository.clone_url }}
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.licenses=${{ github.event.repository.license.spdx_id }}

- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
file: tools/image/Dockerfile
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

- name: Create GitHub Release
# Only create releases for main branch pushes
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
uses: softprops/action-gh-release@v2
with:
tag_name: v${{ steps.project_version.outputs.version }}
name: Release v${{ steps.project_version.outputs.version }}
body_path: CHANGELOG.md
fail_on_unmatched_files: true
8 changes: 6 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,20 @@ authors = [
{ name = "Martin Fontanet", email = "martin.fontanet@epfl.ch" },
]
description = "This a repository turns strings into things"
readme = "README.md"
requires-python = ">=3.13"
requires-python = ">=3.11"
version = "0.0.1"

dependencies = [
"dotenv>=0.9.9",
"fastapi>=0.116.1",
"openai-agents>=0.0.8",
"pydantic>=2.10.6",
"pydantic-settings>=2.8.1",
"pyfuzon>=0.3.0",
"pytest>=8.4.1",
"python-multipart>=0.0.20",
"rdflib>=7.1.4",
"SPARQLWrapper>=2.0.0",
]
[dependency-groups]
dev = [
Expand Down
2 changes: 2 additions & 0 deletions src/strings2things/app/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# app/__init__.py
# Initialize app module
2 changes: 2 additions & 0 deletions src/strings2things/app/api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# app/api/__init__.py
# Initialize API module
43 changes: 43 additions & 0 deletions src/strings2things/app/api/endpoints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# app/api/endpoints.py

from fastapi import APIRouter, UploadFile, File, Form
from fastapi.responses import Response
from rdflib import Graph
from src.strings2things.app.core.rdf_transformer import RDFTransformer
from src.strings2things.app.core.ontology_manager import (
OntologyManager,
) # Assume this exists
from src.strings2things.app.utils.rdf_utils import (
parse_rdf,
serialize_rdf,
) # Also assume or create
import logging

router = APIRouter()

ontology_manager = OntologyManager()
ontology_manager.load_ontologies()

transformer = RDFTransformer(ontology_manager.get_label_map())


@router.post("/transform")
async def transform_rdf(
file: UploadFile = File(...), serialization: str = Form("turtle")
) -> Response:
"""
Accepts an RDF file upload, transforms it using the label map,
and returns the modified RDF graph in the requested format.
"""
content = await file.read()

try:
input_graph = parse_rdf(content)
transformed_graph = transformer.transform(input_graph)
serialized = serialize_rdf(transformed_graph, output_format=serialization)

return Response(content=serialized, media_type="text/plain")

except Exception as e:
logging.exception("Transformation failed")
return Response(content=str(e), status_code=400)
26 changes: 26 additions & 0 deletions src/strings2things/app/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from typing import List
from pydantic import PrivateAttr
from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
GRAPHDB_USERNAME: str
GRAPHDB_PASSWORD: str
ONTOLOGY_SPARQL_ENDPOINT: str
ONTOLOGY_GRAPH_IRIS: str # raw string from .env
FAIL_ON_AMBIGUOUS_LABELS: bool = True

_graph_iris: List[str] = PrivateAttr()

def __init__(self, **kwargs):
super().__init__(**kwargs)
self._graph_iris = [i.strip() for i in self.ONTOLOGY_GRAPH_IRIS.split(",")]

def get_graph_iris(self) -> List[str]:
return self._graph_iris

model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
extra="ignore",
)
2 changes: 2 additions & 0 deletions src/strings2things/app/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# app/core/__init__.py
# Initialize core logic module
Loading
Loading