Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .env.dist
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ OPENROUTER_API_KEY=
GITHUB_TOKEN=
GITLAB_TOKEN=
MODEL=
PROVIDER=
PROVIDER=
SELENIUM_REMOTE_URL=
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,6 @@ COPY . .

ENV PYTHONUNBUFFERED=1



ENTRYPOINT ["uvicorn", "src.api:app", "--host", "0.0.0.0", "--port", "1234"]
21 changes: 15 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,19 @@ If no arguments are provided, it will use the default repository and output path
1. You need to build the image.

``` bash
docker build -t llm-software-finder .
docker build -t git-metadata-extractor .
```

2. Run the image.

``` bash
docker run -it --env-file .env -p 1234:1234 --entrypoint bash llm-software-finder
docker run -it --env-file .env -p 1234:1234 --entrypoint bash git-metadata-extractor
```

If you are developping the application it's useful to mount the app volume.

``` bash
docker run -it --env-file .env -p 1234:1234 -v .:/app --entrypoint bash llm-software-finder
docker run -it --env-file .env -p 1234:1234 -v .:/app --entrypoint bash git-metadata-extractor
```

3. Then you can run the tool via
Expand All @@ -105,12 +105,18 @@ If no arguments are provided, it will use the default repository and output path
python src/main.py --url https://github.com/qchapp/lungs-segmentation --output_path output_file.json
```

4. Optional. If you are planning to use the ORCID functionality, you need to start a remote browser and configure the `.env` file.

``` bash
docker run --rm -d -p 4444:4444 -p 7900:7900 --shm-size="2g" selenium/standalone-firefox
```

## How to develop using Docker?

To facilitate the development we can mount the app folder in the docker. By doing this, all changes made in local will be accesible from the running container.

```bash
docker run -it --env-file .env -p 1234:1234 -v .:/app llm-software-finder
docker run -it --env-file .env -p 1234:1234 -v .:/app git-metadata-extractor
```


Expand All @@ -119,7 +125,7 @@ docker run -it --env-file .env -p 1234:1234 -v .:/app llm-software-finder
Simply run:

```
docker run -it --env-file .env -p 1234:1234 llm-software-finder
docker run -it --env-file .env -p 1234:1234 git-metadata-extractor
```

and go to `localhost:1234`
Expand All @@ -128,7 +134,7 @@ and go to `localhost:1234`
Or if you are running the container with `bash` as the entrypoint, please execute.

```bash
uvicorn src.api:app --host 0.0.0.0 --port 1234 --reload
uvicorn src.api:app --host 0.0.0.0 --workers 4 --port 1234 --reload
```

`--reload` allows you to modify the files and reload automatically the api endpoint. Excellent for development.
Expand All @@ -138,3 +144,6 @@ uvicorn src.api:app --host 0.0.0.0 --port 1234 --reload
Quentin Chappuis - EPFL Center for Imaging
Robin Franken - SDSC
Carlos Vivar Rios - SDSC / EPFL Center for Imaging


docker run --network open-pulse --rm -d -p 4444:4444 -p 7900:7900 --shm-size="2g" selenium/standalone-firefox
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "git-metadata-extractor"
version = "0.1.0"
version = "0.2.0"
description = "This project is designed to classify imaging software repositories and extract relevant information using AI models."
readme = "README.md"
requires-python = ">=3.9"
Expand Down Expand Up @@ -41,6 +41,9 @@ dependencies = [
"rdflib==6.2.0",
"rdflib-jsonld==0.6.2",
"PyYAML==6.0.2",
"selenium",
"beautifulsoup4",
"aiohttp",
]

[project.urls]
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ fastapi
uvicorn
gimie==0.7.2
pyyaml
openai
openai
beautifulsoup4
selenium
79 changes: 62 additions & 17 deletions src/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,27 @@
import os
from .core.gimie_methods import extract_gimie
from .core.models import convert_jsonld_to_pydantic, convert_pydantic_to_zod_form_dict
from .core.genai_model import llm_request_repo_infos
from .core.genai_model import llm_request_repo_infos, llm_request_userorg_infos
from .core.users_parser import parse_github_user
from .core.orgs_parser import parse_github_organization
from .utils.utils import merge_jsonld

from pprint import pprint


app = FastAPI()

@app.get("/")
def index():
return {"title": "Hello, welcome to the Git Metadata Extractor v0.1.0. Gimie Version 0.7.2. "}
return {"title": f"Hello, welcome to the Git Metadata Extractor v0.2.0. Gimie Version 0.7.2. LLM Model {os.environ['MODEL']}"}

@app.get("/v1/extract/json/{full_path:path}")
async def extract(full_path:str):

jsonld_gimie_data = extract_gimie(full_path, format="json-ld")

try:
llm_result = llm_request_repo_infos(str(full_path))
llm_result = await llm_request_repo_infos(str(full_path), output_format="json-ld", max_tokens=20000)
except Exception as e:
raise HTTPException(
status_code=424,
Expand All @@ -37,12 +40,12 @@ async def extract(full_path:str):
"output": zod_data}

@app.get("/v1/extract/json-ld/{full_path:path}")
async def extract(full_path:str):
async def extract_jsonld(full_path:str):

jsonld_gimie_data = extract_gimie(full_path, format="json-ld")

try:
llm_result = llm_request_repo_infos(str(full_path))
llm_result = await llm_request_repo_infos(str(full_path), max_tokens=20000)
except Exception as e:
raise HTTPException(
status_code=424,
Expand All @@ -53,26 +56,66 @@ async def extract(full_path:str):

return {"link": full_path,
"output": merged_results}

@app.get("/v1/org/llm/json/{full_path:path}")
async def get_org_json(full_path: str):

try:
org_metadata = parse_github_organization(full_path.split("/")[-1])

parsed_org_metadata = await llm_request_userorg_infos(org_metadata, item_type="org")

org_metadata_dict = org_metadata.model_dump()
org_metadata_dict.update(parsed_org_metadata)

except Exception as e:
raise HTTPException(
status_code=424,
detail=f"Error from Organization JSON service: {e}"
)

return {"link": full_path,
"output": org_metadata_dict}

@app.get("/v1/user/llm/json/{full_path:path}")
async def get_user_json(full_path: str):

try:
user_metadata = parse_github_user(full_path.split("/")[-1])

parsed_user_metadata = await llm_request_userorg_infos(user_metadata, item_type="user")

user_metadata_dict = user_metadata.model_dump()

user_metadata_dict.update(parsed_user_metadata)

except Exception as e:
raise HTTPException(
status_code=424,
detail=f"Error from Get User service: {e}"
)

return {"link": full_path,
"output": user_metadata_dict}

@app.get("/v1/gimie/{full_path:path}")
async def gimie(full_path:str,
format:str = "json-ld"):
@app.get("/v1/repository/gimie/json-ld/{full_path:path}")
async def gimie(full_path:str):
try:
gimie_output = extract_gimie(full_path, format=format)
gimie_output = extract_gimie(full_path, format="json-ld")
except Exception as e:
raise HTTPException(
status_code=424, #?
detail=f"Error from LLM service: {e}"
status_code=424,
detail=f"Error from Gimie service: {e}"
)

return {"link": full_path,
"output": gimie_output}

@app.get("/v1/llm/json-ld/{full_path:path}")
async def llm(full_path:str):
@app.get("/v1/repository/llm/json-ld/{full_path:path}")
async def llm_jsonld(full_path:str):

try:
llm_result = llm_request_repo_infos(str(full_path))
llm_result = await llm_request_repo_infos(str(full_path), max_tokens=20000)
except Exception as e:
raise HTTPException(
status_code=424,
Expand All @@ -82,11 +125,13 @@ async def llm(full_path:str):
return {"link": full_path,
"output": llm_result}

@app.get("/v1/llm/json/{full_path:path}")
async def llm(full_path:str):
@app.get("/v1/repository/llm/json/{full_path:path}")
async def llm_json(full_path:str):

jsonld_gimie_data = extract_gimie(full_path, format="json-ld")

try:
llm_result = llm_request_repo_infos(str(full_path), output_format="json")
llm_result = await llm_request_repo_infos(str(full_path), gimie_output=jsonld_gimie_data, output_format="json", max_tokens=20000)
except Exception as e:
raise HTTPException(
status_code=424,
Expand Down
Loading