Skip to content

Commit 25ea9ea

Browse files
authored
Merge pull request #5 from Imaging-Plaza/feat-discipline-repository-type
Feat discipline repository type
2 parents 6d94c22 + c840daf commit 25ea9ea

5 files changed

Lines changed: 92 additions & 17 deletions

File tree

src/api.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ async def gimie(full_path:str,
6868
return {"link": full_path,
6969
"output": gimie_output}
7070

71-
@app.get("/v1/llm/{full_path:path}")
71+
@app.get("/v1/llm/json-ld/{full_path:path}")
7272
async def llm(full_path:str):
7373

7474
try:
@@ -82,6 +82,20 @@ async def llm(full_path:str):
8282
return {"link": full_path,
8383
"output": llm_result}
8484

85+
@app.get("/v1/llm/json/{full_path:path}")
86+
async def llm(full_path:str):
87+
88+
try:
89+
llm_result = llm_request_repo_infos(str(full_path), output_format="json")
90+
except Exception as e:
91+
raise HTTPException(
92+
status_code=424,
93+
detail=f"Error from LLM service: {e}"
94+
)
95+
96+
return {"link": full_path,
97+
"output": llm_result}
98+
8599
@app.exception_handler(ValueError)
86100
async def value_error_exception_handler(request: Request, exc: ValueError):
87101
return JSONResponse(

src/core/genai_model.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def clone_repo(repo_url):
110110
return None
111111

112112

113-
def llm_request_repo_infos(repo_url):
113+
def llm_request_repo_infos(repo_url, output_format="json-ld"):
114114
# Clone the GitHub repository into a temporary folder
115115
with tempfile.TemporaryDirectory() as temp_dir:
116116
logger.info(f"Cloning {repo_url} into {temp_dir}...")
@@ -146,7 +146,6 @@ def llm_request_repo_infos(repo_url):
146146
raw_result = response.json()["choices"][0]["message"]["content"]
147147
parsed_result = clean_json_string(raw_result)
148148
json_data = json.loads(parsed_result)
149-
pprint(json_data)
150149

151150
logger.info("Successfully parsed API response")
152151

@@ -161,7 +160,13 @@ def llm_request_repo_infos(repo_url):
161160
# TODO. This is hardcoded. Not good.
162161
context_path = "src/files/json-ld-context.json"
163162
# Now convert cleaned data to JSON-LD
164-
return json_to_jsonLD(cleaned_json, context_path)
163+
if output_format == "json-ld":
164+
return json_to_jsonLD(cleaned_json, context_path)
165+
elif output_format == "json":
166+
return cleaned_json
167+
else:
168+
logger.error(f"Unsupported output format: {output_format}")
169+
return None
165170

166171
except Exception as e:
167172
logger.error(f"Error parsing response: {e}")

src/core/models.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,62 @@ class Image(BaseModel):
5757
contentUrl: HttpUrl = None
5858
keywords: ImageKeyword = ImageKeyword.ILLUSTRATIVE_IMAGE
5959

60+
61+
class Discipline(str, Enum):
62+
SOCIAL_SCIENCES = "Social sciences"
63+
ANTHROPOLOGY = "Anthropology"
64+
COMMUNICATION_STUDIES = "Communication studies"
65+
EDUCATION = "Education"
66+
LINGUISTICS = "Linguistics"
67+
RESEARCH = "Research"
68+
SOCIOLOGY = "Sociology"
69+
GEOGRAPHY = "Geography"
70+
PSYCHOLOGY = "Psychology"
71+
POLITICS = "Politics"
72+
ECONOMICS = "Economics"
73+
APPLIED_SCIENCES = "Applied sciences"
74+
HEALTH_SCIENCES = "Health sciences"
75+
ELECTRICAL_ENGINEERING = "Electrical engineering"
76+
CHEMICAL_ENGINEERING = "Chemical engineering"
77+
CIVIL_ENGINEERING = "Civil engineering"
78+
ARCHITECTURE = "Architecture"
79+
COMPUTER_ENGINEERING = "Computer engineering"
80+
ENERGY_ENGINEERING = "Energy engineering"
81+
MILITARY_SCIENCE = "Military science"
82+
INDUSTRIAL_PRODUCTION_ENGINEERING = "Industrial and production engineering"
83+
MECHANICAL_ENGINEERING = "Mechanical engineering"
84+
BIOLOGICAL_ENGINEERING = "Biological engineering"
85+
ENVIRONMENTAL_SCIENCE = "Environmental science"
86+
SYSTEMS_SCIENCE_ENGINEERING = "Systems science and engineering"
87+
INFORMATION_ENGINEERING = "Information engineering"
88+
AGRICULTURAL_FOOD_SCIENCES = "Agricultural and food sciences"
89+
BUSINESS = "Business"
90+
HUMANITIES = "Humanities"
91+
HISTORY = "History"
92+
LITERATURE = "Literature"
93+
ART = "Art"
94+
RELIGION = "Religion"
95+
PHILOSOPHY = "Philosophy"
96+
LAW = "Law"
97+
FORMAL_SCIENCES = "Formal sciences"
98+
MATHEMATICS = "Mathematics"
99+
LOGIC = "Logic"
100+
STATISTICS = "Statistics"
101+
THEORETICAL_COMPUTER_SCIENCE = "Theoretical computer science"
102+
NATURAL_SCIENCES = "Natural sciences"
103+
PHYSICS = "Physics"
104+
ASTRONOMY = "Astronomy"
105+
BIOLOGY = "Biology"
106+
CHEMISTRY = "Chemistry"
107+
EARTH_SCIENCE = "Earth science"
108+
109+
class RepositoryType(str, Enum):
110+
SOFTWARE = "software"
111+
EDUCATIONAL_RESOURCE = "educational resource"
112+
DOCUMENTATION = "documentation"
113+
DATA = "data"
114+
OTHER = "other"
115+
60116
class SoftwareSourceCode(BaseModel):
61117
name: Optional[str] = None
62118
applicationCategory: Optional[List[str]] = None
@@ -74,6 +130,7 @@ class SoftwareSourceCode(BaseModel):
74130
license: Annotated[str, StringConstraints(pattern=r"spdx\.org.*")] = None
75131
author: List[Union[Person, Organization]] = None
76132
relatedToOrganization: Optional[List[str]] = None
133+
relatedToOrganizationJustification: Optional[List[str]] = None
77134
operatingSystem: Optional[List[str]] = None
78135
programmingLanguage: Optional[List[str]] = None
79136
softwareRequirements: Optional[List[str]] = None
@@ -94,6 +151,10 @@ class SoftwareSourceCode(BaseModel):
94151
imagingModality: Optional[List[str]] = None
95152
fairLevel: Optional[str] = None
96153
graph: Optional[str] = None
154+
discipline: Optional[List[Discipline]] = None
155+
disciplineJustification: Optional[List[str]] = None
156+
repositoryType: Optional[RepositoryType] = None
157+
respositoryTypeJustification: Optional[List[str]] = None
97158

98159

99160
############################################################

src/core/prompts.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
- `orcidId` (valid URL, **optional**)
3737
- `affiliation` (list of strings, **optional**): Institutions the author is affiliated with. Do not mention Imaging Plaza unless is explicity mentioned.
3838
- `relatedToOrganization` (list of strings, **optional**): Institutions associated with the software. Do not mention Imaging Plaza unless is explicity mentioned.
39+
- `relatedToOrganizationJustification` (list of strings, **optional**): Justification for the related organizations.
3940
- `softwareRequirements` (list of strings, **optional**): Dependencies or prerequisites for running the software.
4041
- `operatingSystem` (list of strings, **optional**): Compatible operating systems. Use only Windows, Linux, MacOS, or Other.
4142
- `programmingLanguage` (list of strings, **optional**): Programming languages used in the software.
@@ -92,18 +93,10 @@
9293
- `hasExecutableInstructions` (string, **optional**): Any exectuable instructions related to the software. This should point to an URL where the installation is explained. If this is the README file, please make the full URL.
9394
- `readme` (valid URL, **optional**): README url of the software (at the root of the repo)
9495
- `imagingModality (list of strings, **optional**): imaging modalities accepted by the software.
95-
96-
97-
When dealing with Organization pay attention to
98-
-
99-
-
100-
-
101-
102-
When parsing Persons note:
103-
-
104-
-
105-
-
106-
96+
- `discipline` (string, **optional**): Scientific discipline the software belongs to. Base your response on the README and other documentation files content.
97+
- `disciplineJustification` (list of strings, **optional**): Justification for the discipline classification.
98+
- `repositoryType` (string, **optional**): Type of repository (e.g., software, educational resource, documentation, data, other).
99+
- `respositoryTypeJustification` (list of strings, **optional**): Justification for the repository type classification.
107100
108101
PLEASE PROVIDE THE OUTPUT IN JSON FORMAT ONLY, WITHOUT ANY EXPLANATION OR ADDITIONAL TEXT. ALIGN THE RESPONSE TO THE SCHEMA SPECIFICATION.
109102
"""

src/files/json-ld-context.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@
7676
"hasRorId": "md4i:hasRorId",
7777
"legalName": "schema:legalName",
7878
"fundingGrant": "sd:fundingGrant",
79-
"fundingSource": "sd:fundingSource"
79+
"fundingSource": "sd:fundingSource",
80+
"discipline": "pulse:discipline",
81+
"repositoryType": "pulse:repositoryType"
8082
}
8183
}
8284

0 commit comments

Comments
 (0)