1414
1515
1616def build_scholarly_article_profile (qid : str , entity : Dict [str , Any ]) -> Dict [str , Any ]:
17+ claims = entity .get ("claims" , {})
18+
19+ arxiv_id = extract_string_claim (claims , "P21" )
20+ pdf_url = f"https://arxiv.org/pdf/{ arxiv_id } .pdf" if arxiv_id else None
1721
22+ # build the profile as before
1823 label = entity .get ("labels" , {}).get ("en" , {}).get ("value" , qid )
1924 description = entity .get ("descriptions" , {}).get ("en" , {}).get ("value" , "" )
20- claims = entity .get ("claims" , {})
21-
2225 author_ids = extract_item_ids (claims , "P16" )
2326 citation_ids = extract_item_ids (claims , "P223" )
2427 container_ids = extract_item_ids (claims , "P1433" )
@@ -27,7 +30,6 @@ def build_scholarly_article_profile(qid: str, entity: Dict[str, Any]) -> Dict[st
2730 license_ids = extract_item_ids (claims , "P275" )
2831 language_ids = extract_item_ids (claims , "P407" )
2932 keyword_ids = extract_item_ids (claims , "1450" )
30-
3133 publication_date = extract_time_claim (claims , "P28" ) or ""
3234 doi_value = extract_string_claim (claims , "P27" ) or ""
3335 page_range = extract_string_claim (claims , "P304" )
@@ -40,8 +42,6 @@ def build_scholarly_article_profile(qid: str, entity: Dict[str, Any]) -> Dict[st
4042 profile = {
4143 "@context" : "https://schema.org" ,
4244 "@type" : "ScholarlyArticle" ,
43-
44- # Schema.org identity stays in the Wikibase namespace
4545 "@id" : f"{ ENTITY_IRI } { qid } " ,
4646 "name" : label ,
4747 "headline" : label ,
@@ -66,7 +66,7 @@ def build_scholarly_article_profile(qid: str, entity: Dict[str, Any]) -> Dict[st
6666 "@type" : "PropertyValue" ,
6767 "propertyID" : "doi" ,
6868 "value" : doi_value ,
69- "url" : f"https://doi.org/{ doi_value } "
69+ "url" : f"https://doi.org/{ doi_value } " ,
7070 }
7171 profile ["sameAs" ] = [f"https://doi.org/{ doi_value } " ]
7272
@@ -86,4 +86,14 @@ def build_scholarly_article_profile(qid: str, entity: Dict[str, Any]) -> Dict[st
8686 if citation_ids :
8787 profile ["citation" ] = schema_refs_from_ids (citation_ids )
8888
89- return profile
89+ if pdf_url :
90+ profile ["encoding" ] = [
91+ {
92+ "@type" : "MediaObject" ,
93+ "contentUrl" : pdf_url ,
94+ "encodingFormat" : "application/pdf"
95+ }
96+ ]
97+
98+ return profile , pdf_url
99+
0 commit comments