1212SYSTEM_PROMPT_WITH_REASONING = """You are a categorization judge. Your task is to
1313 evaluate whether a given concept satisfies a specific predicate.
1414
15+ Be careful with concepts from adjacent domains such as physics, computer science,
16+ or engineering. A concept should only be classified as mathematical if it is
17+ primarily mathematical in nature. Concepts that merely use mathematics as a tool
18+ (e.g. quantum mechanics, signal processing) should not be considered mathematical
19+ concepts. When in doubt, consider whether the concept originates from or is
20+ primarily studied within mathematics.
21+
1522You must respond with a structured answer containing:
16231. answer: yes or no
17242. confidence: a number from 0 to 100 (representing your confidence percentage)
2431"""
2532
2633
27- def build_categorization_prompt (item , predicate , with_reasoning = False ):
34+ def build_categorization_prompt (
35+ item , predicate , with_reasoning = False , use_other_ids = True
36+ ):
2837 """
2938 Build a prompt for evaluating a concept against a predicate.
3039
3140 Args:
3241 item: Item instance to categorize
3342 predicate: The question/predicate to evaluate
3443 with_reasoning: If True, ask for reasoning in the response
44+ use_other_ids: If True, include external IDs from item.meta
45+ (only applies to Wikidata items)
3546
3647 Returns:
3748 Formatted prompt string
@@ -54,6 +65,11 @@ def build_categorization_prompt(item, predicate, with_reasoning=False):
5465 article_text = item .article_text [:1000 ]
5566 item_info_parts .append (f"Article text: { article_text } " )
5667
68+ if use_other_ids :
69+ other_ids = _get_other_ids (item )
70+ if other_ids :
71+ item_info_parts .append (f"External IDs: { other_ids } " )
72+
5773 item_info = "\n " .join (item_info_parts )
5874
5975 prompt = f"""{ system_prompt }
@@ -73,3 +89,33 @@ def build_categorization_prompt(item, predicate, with_reasoning=False):
7389Please provide your evaluation in the format specified above."""
7490
7591 return prompt
92+
93+
94+ _OTHER_ID_KEYS = {
95+ "mathworld_id" : "MathWorld ID" ,
96+ "nlab_id" : "nLab ID" ,
97+ "proofwiki_id" : "ProofWiki ID" ,
98+ "eom_id" : "Encyclopedia of Mathematics ID" ,
99+ }
100+
101+
102+ def _get_other_ids (item ):
103+ from concepts .models import Item
104+
105+ if item .source != Item .Source .WIKIDATA :
106+ return None
107+ if not item .meta :
108+ return None
109+ try :
110+ import json
111+
112+ meta = json .loads (item .meta )
113+ except (json .JSONDecodeError , TypeError ):
114+ return None
115+
116+ parts = []
117+ for meta_key , label in _OTHER_ID_KEYS .items ():
118+ value = meta .get (meta_key )
119+ if value :
120+ parts .append (f"{ label } : { value } " )
121+ return ", " .join (parts ) if parts else None
0 commit comments