Skip to content

Commit 6207a80

Browse files
authored
Merge pull request #8 from Imaging-Plaza/develop
v1.0.0
2 parents 6d94c22 + eb3b72c commit 6207a80

17 files changed

Lines changed: 1840 additions & 253 deletions

.env.dist

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ OPENROUTER_API_KEY=
33
GITHUB_TOKEN=
44
GITLAB_TOKEN=
55
MODEL=
6-
PROVIDER=
6+
PROVIDER=
7+
SELENIUM_REMOTE_URL=

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
## [1.0.0] - 2025-08-06
6+
7+
### Added
8+
- Users and Organization compatibility
9+
- Endpoints refactoring
10+
- Parallel calling
11+
- Multiworkers entrypoint
12+
513
## [0.1.0] - 2025-06-25
614

715
### Added

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@ COPY . .
1515

1616
ENV PYTHONUNBUFFERED=1
1717

18-
ENTRYPOINT ["uvicorn", "src.api:app", "--host", "0.0.0.0", "--port", "1234"]
18+
ENTRYPOINT ["gunicorn", "src.api:app", "--workers", "4", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:1234"]

README.md

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,19 +84,19 @@ If no arguments are provided, it will use the default repository and output path
8484
1. You need to build the image.
8585

8686
``` bash
87-
docker build -t llm-software-finder .
87+
docker build -t git-metadata-extractor .
8888
```
8989

9090
2. Run the image.
9191

9292
``` bash
93-
docker run -it --env-file .env -p 1234:1234 --entrypoint bash llm-software-finder
93+
docker run -it --env-file .env -p 1234:1234 --entrypoint bash git-metadata-extractor
9494
```
9595

9696
If you are developping the application it's useful to mount the app volume.
9797
9898
``` bash
99-
docker run -it --env-file .env -p 1234:1234 -v .:/app --entrypoint bash llm-software-finder
99+
docker run -it --env-file .env -p 1234:1234 -v .:/app --entrypoint bash git-metadata-extractor
100100
```
101101
102102
3. Then you can run the tool via
@@ -105,12 +105,18 @@ If no arguments are provided, it will use the default repository and output path
105105
python src/main.py --url https://github.com/qchapp/lungs-segmentation --output_path output_file.json
106106
```
107107
108+
4. Optional. If you are planning to use the ORCID functionality, you need to start a remote browser and configure the `.env` file.
109+
110+
``` bash
111+
docker run --rm -d -p 4444:4444 -p 7900:7900 --shm-size="2g" selenium/standalone-firefox
112+
```
113+
108114
## How to develop using Docker?
109115
110116
To facilitate the development we can mount the app folder in the docker. By doing this, all changes made in local will be accesible from the running container.
111117
112118
```bash
113-
docker run -it --env-file .env -p 1234:1234 -v .:/app llm-software-finder
119+
docker run -it --env-file .env -p 1234:1234 -v .:/app git-metadata-extractor
114120
```
115121
116122
@@ -119,7 +125,7 @@ docker run -it --env-file .env -p 1234:1234 -v .:/app llm-software-finder
119125
Simply run:
120126
121127
```
122-
docker run -it --env-file .env -p 1234:1234 llm-software-finder
128+
docker run -it --env-file .env -p 1234:1234 git-metadata-extractor
123129
```
124130
125131
and go to `localhost:1234`
@@ -128,7 +134,7 @@ and go to `localhost:1234`
128134
Or if you are running the container with `bash` as the entrypoint, please execute.
129135
130136
```bash
131-
uvicorn src.api:app --host 0.0.0.0 --port 1234 --reload
137+
uvicorn src.api:app --host 0.0.0.0 --workers 4 --port 1234 --reload
132138
```
133139
134140
`--reload` allows you to modify the files and reload automatically the api endpoint. Excellent for development.
@@ -138,3 +144,6 @@ uvicorn src.api:app --host 0.0.0.0 --port 1234 --reload
138144
Quentin Chappuis - EPFL Center for Imaging
139145
Robin Franken - SDSC
140146
Carlos Vivar Rios - SDSC / EPFL Center for Imaging
147+
148+
149+
docker run --network open-pulse --rm -d -p 4444:4444 -p 7900:7900 --shm-size="2g" selenium/standalone-firefox

docs/docker-cleanup-strategy.md

Lines changed: 0 additions & 108 deletions
This file was deleted.

pyproject.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "git-metadata-extractor"
7-
version = "0.1.0"
7+
version = "1.0.0"
88
description = "This project is designed to classify imaging software repositories and extract relevant information using AI models."
99
readme = "README.md"
1010
requires-python = ">=3.9"
@@ -41,6 +41,10 @@ dependencies = [
4141
"rdflib==6.2.0",
4242
"rdflib-jsonld==0.6.2",
4343
"PyYAML==6.0.2",
44+
"selenium==4.34.2",
45+
"beautifulsoup4==4.13.4",
46+
"aiohttp==3.12.15",
47+
"uvicorn-worker==0.3.0"
4448
]
4549

4650
[project.urls]

requirements.txt

Lines changed: 0 additions & 13 deletions
This file was deleted.

src/api.py

Lines changed: 78 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,46 +3,47 @@
33
import os
44
from .core.gimie_methods import extract_gimie
55
from .core.models import convert_jsonld_to_pydantic, convert_pydantic_to_zod_form_dict
6-
from .core.genai_model import llm_request_repo_infos
6+
from .core.genai_model import llm_request_repo_infos, llm_request_userorg_infos
7+
from .core.users_parser import parse_github_user
8+
from .core.orgs_parser import parse_github_organization
79
from .utils.utils import merge_jsonld
810

11+
from pprint import pprint
912

1013

1114
app = FastAPI()
1215

1316
@app.get("/")
1417
def index():
15-
return {"title": "Hello, welcome to the Git Metadata Extractor v0.1.0. Gimie Version 0.7.2. "}
18+
return {"title": f"Hello, welcome to the Git Metadata Extractor v0.2.0. Gimie Version 0.7.2. LLM Model {os.environ['MODEL']}"}
1619

1720
@app.get("/v1/extract/json/{full_path:path}")
1821
async def extract(full_path:str):
1922

2023
jsonld_gimie_data = extract_gimie(full_path, format="json-ld")
2124

2225
try:
23-
llm_result = llm_request_repo_infos(str(full_path))
24-
except Exception as e:
25-
raise HTTPException(
26-
status_code=424,
27-
detail=f"Error from LLM service: {e}"
28-
)
26+
llm_result = await llm_request_repo_infos(str(full_path), output_format="json-ld", max_tokens=30000)
27+
merged_results = merge_jsonld(jsonld_gimie_data, llm_result)
28+
pydantic_data = convert_jsonld_to_pydantic(merged_results["@graph"])
2929

30-
merged_results = merge_jsonld(jsonld_gimie_data, llm_result)
30+
except Exception as e:
3131

32-
pydantic_data = convert_jsonld_to_pydantic(merged_results["@graph"])
32+
pydantic_data = convert_jsonld_to_pydantic(jsonld_gimie_data["@graph"])
33+
print(f"Warning: LLM service failed, using fallback data: {e}")
3334

3435
zod_data = convert_pydantic_to_zod_form_dict(pydantic_data)
3536

3637
return {"link": full_path,
3738
"output": zod_data}
3839

3940
@app.get("/v1/extract/json-ld/{full_path:path}")
40-
async def extract(full_path:str):
41+
async def extract_jsonld(full_path:str):
4142

4243
jsonld_gimie_data = extract_gimie(full_path, format="json-ld")
4344

4445
try:
45-
llm_result = llm_request_repo_infos(str(full_path))
46+
llm_result = await llm_request_repo_infos(str(full_path), max_tokens=20000)
4647
except Exception as e:
4748
raise HTTPException(
4849
status_code=424,
@@ -53,26 +54,82 @@ async def extract(full_path:str):
5354

5455
return {"link": full_path,
5556
"output": merged_results}
57+
58+
@app.get("/v1/org/llm/json/{full_path:path}")
59+
async def get_org_json(full_path: str):
60+
61+
try:
62+
org_metadata = parse_github_organization(full_path.split("/")[-1])
63+
64+
parsed_org_metadata = await llm_request_userorg_infos(org_metadata, item_type="org")
65+
66+
org_metadata_dict = org_metadata.model_dump()
67+
org_metadata_dict.update(parsed_org_metadata)
68+
69+
except Exception as e:
70+
raise HTTPException(
71+
status_code=424,
72+
detail=f"Error from Organization JSON service: {e}"
73+
)
74+
75+
return {"link": full_path,
76+
"output": org_metadata_dict}
77+
78+
@app.get("/v1/user/llm/json/{full_path:path}")
79+
async def get_user_json(full_path: str):
80+
81+
try:
82+
user_metadata = parse_github_user(full_path.split("/")[-1])
83+
84+
parsed_user_metadata = await llm_request_userorg_infos(user_metadata, item_type="user")
85+
86+
user_metadata_dict = user_metadata.model_dump()
87+
88+
user_metadata_dict.update(parsed_user_metadata)
89+
90+
except Exception as e:
91+
raise HTTPException(
92+
status_code=424,
93+
detail=f"Error from Get User service: {e}"
94+
)
95+
96+
return {"link": full_path,
97+
"output": user_metadata_dict}
5698

57-
@app.get("/v1/gimie/{full_path:path}")
58-
async def gimie(full_path:str,
59-
format:str = "json-ld"):
99+
@app.get("/v1/repository/gimie/json-ld/{full_path:path}")
100+
async def gimie(full_path:str):
60101
try:
61-
gimie_output = extract_gimie(full_path, format=format)
102+
gimie_output = extract_gimie(full_path, format="json-ld")
62103
except Exception as e:
63104
raise HTTPException(
64-
status_code=424, #?
65-
detail=f"Error from LLM service: {e}"
105+
status_code=424,
106+
detail=f"Error from Gimie service: {e}"
66107
)
67108

68109
return {"link": full_path,
69110
"output": gimie_output}
70111

71-
@app.get("/v1/llm/{full_path:path}")
72-
async def llm(full_path:str):
112+
@app.get("/v1/repository/llm/json-ld/{full_path:path}")
113+
async def llm_jsonld(full_path:str):
114+
115+
try:
116+
llm_result = await llm_request_repo_infos(str(full_path), max_tokens=20000)
117+
except Exception as e:
118+
raise HTTPException(
119+
status_code=424,
120+
detail=f"Error from LLM service: {e}"
121+
)
122+
123+
return {"link": full_path,
124+
"output": llm_result}
125+
126+
@app.get("/v1/repository/llm/json/{full_path:path}")
127+
async def llm_json(full_path:str):
128+
129+
jsonld_gimie_data = extract_gimie(full_path, format="json-ld")
73130

74131
try:
75-
llm_result = llm_request_repo_infos(str(full_path))
132+
llm_result = await llm_request_repo_infos(str(full_path), gimie_output=jsonld_gimie_data, output_format="json", max_tokens=20000)
76133
except Exception as e:
77134
raise HTTPException(
78135
status_code=424,

0 commit comments

Comments
 (0)