@@ -29,7 +29,12 @@ class AcronymCache(CacheBase):
2929
3030 # ------------------------------------------------------------------ lookup
3131
32- def get_full_name_for_acronym (self , acronym : str , entity_type : str ) -> str | None :
32+ def get_full_name_for_acronym (
33+ self ,
34+ acronym : str ,
35+ entity_type : str ,
36+ min_confidence : float = 0.0 ,
37+ ) -> str | None :
3338 """Return the canonical name for an acronym, or None if not found.
3439
3540 Args:
@@ -44,8 +49,9 @@ def get_full_name_for_acronym(self, acronym: str, entity_type: str) -> str | Non
4449 cursor = conn .cursor ()
4550 cursor .execute (
4651 "SELECT canonical FROM venue_acronyms "
47- "WHERE acronym = ? COLLATE NOCASE AND entity_type = ?" ,
48- (acronym .strip (), entity_type ),
52+ "WHERE acronym = ? COLLATE NOCASE AND entity_type = ? "
53+ "AND confidence_score >= ?" ,
54+ (acronym .strip (), entity_type , min_confidence ),
4955 )
5056 row = cursor .fetchone ()
5157 if row :
@@ -56,46 +62,80 @@ def get_full_name_for_acronym(self, acronym: str, entity_type: str) -> str | Non
5662 detail_logger .debug (f"No entry found for '{ acronym } ' ({ entity_type } )" )
5763 return None
5864
59- def get_canonical_for_variant (self , variant : str , entity_type : str ) -> str | None :
60- """Return the canonical name for a venue variant (abbreviated) form.
61-
62- Looks up the variant in the ``venue_acronym_variants`` table and returns
63- the canonical name of the parent acronym entry. Enables lookup of
64- abbreviated forms such as "ieee trans. pattern anal. mach. intell.".
65+ def get_variant_match (
66+ self ,
67+ variant : str ,
68+ entity_type : str ,
69+ min_confidence : float = 0.0 ,
70+ ) -> dict [str , str | float ] | None :
71+ """Return canonical+acronym match data for a variant lookup.
6572
6673 Args:
67- variant: An abbreviated or alternative venue name to look up
68- entity_type: VenueType value (e.g., 'journal', 'conference')
74+ variant: An abbreviated or alternative venue name.
75+ entity_type: VenueType value (e.g., 'journal', 'conference').
6976
7077 Returns:
71- Canonical name string , or None if no matching variant found .
78+ Dict with keys ``canonical`` and ``acronym`` , or None if no match .
7279 """
7380 detail_logger .debug (f"Looking up variant '{ variant } ' ({ entity_type } )" )
7481 with self .get_connection_with_row_factory () as conn :
7582 cursor = conn .cursor ()
7683 cursor .execute (
7784 """
78- SELECT va.canonical, va.acronym
85+ SELECT va.canonical, va.acronym, va.confidence_score
7986 FROM venue_acronyms va
8087 JOIN venue_acronym_variants vav ON va.id = vav.venue_acronym_id
8188 WHERE vav.variant = ? COLLATE NOCASE
8289 AND va.entity_type = ?
90+ AND va.confidence_score >= ?
8391 LIMIT 1
8492 """ ,
85- (variant .strip (), entity_type ),
93+ (variant .strip (), entity_type , min_confidence ),
8694 )
8795 row = cursor .fetchone ()
8896 if row :
97+ canonical = str (row ["canonical" ])
98+ acronym = str (row ["acronym" ])
8999 detail_logger .debug (
90100 f"Found canonical for variant '{ variant } ' "
91- f"(acronym: '{ row [ ' acronym' ] } ') -> '{ row [ ' canonical' ] } '"
101+ f"(acronym: '{ acronym } ') -> '{ canonical } '"
92102 )
93- return str (row ["canonical" ])
103+ return {
104+ "canonical" : canonical ,
105+ "acronym" : acronym ,
106+ "confidence_score" : float (row ["confidence_score" ]),
107+ }
94108 detail_logger .debug (
95109 f"No variant match found for '{ variant } ' ({ entity_type } )"
96110 )
97111 return None
98112
113+ def get_canonical_for_variant (
114+ self ,
115+ variant : str ,
116+ entity_type : str ,
117+ min_confidence : float = 0.0 ,
118+ ) -> str | None :
119+ """Return the canonical name for a venue variant (abbreviated) form.
120+
121+ Looks up the variant in the ``venue_acronym_variants`` table and returns
122+ the canonical name of the parent acronym entry. Enables lookup of
123+ abbreviated forms such as "ieee trans. pattern anal. mach. intell.".
124+
125+ Args:
126+ variant: An abbreviated or alternative venue name to look up
127+ entity_type: VenueType value (e.g., 'journal', 'conference')
128+
129+ Returns:
130+ Canonical name string, or None if no matching variant found.
131+ """
132+ match = self .get_variant_match (
133+ variant , entity_type , min_confidence = min_confidence
134+ )
135+ if match :
136+ return str (match ["canonical" ])
137+ return None
138+
99139 def get_variants (self , acronym : str , entity_type : str ) -> list [str ]:
100140 """Return all known name variants for an acronym.
101141
@@ -120,42 +160,76 @@ def get_variants(self, acronym: str, entity_type: str) -> list[str]:
120160 )
121161 return [str (row ["variant" ]) for row in cursor .fetchall ()]
122162
123- def get_canonical_for_issn (self , issn : str ) -> str | None :
124- """Return the canonical name for a venue identified by ISSN.
125-
126- Searches the ``venue_acronym_issns`` table. No entity_type filter is
127- applied because ISSNs are globally unique across venue types.
163+ def get_issn_match (
164+ self ,
165+ issn : str ,
166+ min_confidence : float = 0.0 ,
167+ ) -> dict [str , str | float ] | None :
168+ """Return canonical+acronym match data for an ISSN lookup.
128169
129170 Args:
130- issn: ISSN string (e.g. '1550-4859')
171+ issn: ISSN string (e.g. '1550-4859').
131172
132173 Returns:
133- Canonical name string , or None if not found .
174+ Dict with keys ``canonical`` and ``acronym`` , or None if no match .
134175 """
135176 detail_logger .debug (f"Looking up ISSN '{ issn } '" )
136177 with self .get_connection_with_row_factory () as conn :
137178 cursor = conn .cursor ()
138179 cursor .execute (
139180 """
140- SELECT va.canonical, va.acronym
181+ SELECT va.canonical, va.acronym, va.confidence_score
141182 FROM venue_acronyms va
142183 JOIN venue_acronym_issns vai ON va.id = vai.venue_acronym_id
143184 WHERE vai.issn = ?
185+ AND va.confidence_score >= ?
144186 LIMIT 1
145187 """ ,
146- (issn .strip (),),
188+ (issn .strip (), min_confidence ),
147189 )
148190 row = cursor .fetchone ()
149191 if row :
192+ canonical = str (row ["canonical" ])
193+ acronym = str (row ["acronym" ])
150194 detail_logger .debug (
151195 f"Found canonical for ISSN '{ issn } ' "
152- f"(acronym: '{ row [ ' acronym' ] } ') -> '{ row [ ' canonical' ] } '"
196+ f"(acronym: '{ acronym } ') -> '{ canonical } '"
153197 )
154- return str (row ["canonical" ])
198+ return {
199+ "canonical" : canonical ,
200+ "acronym" : acronym ,
201+ "confidence_score" : float (row ["confidence_score" ]),
202+ }
155203 detail_logger .debug (f"No entry found for ISSN '{ issn } '" )
156204 return None
157205
158- def get_issns (self , acronym : str , entity_type : str ) -> list [str ]:
206+ def get_canonical_for_issn (
207+ self ,
208+ issn : str ,
209+ min_confidence : float = 0.0 ,
210+ ) -> str | None :
211+ """Return the canonical name for a venue identified by ISSN.
212+
213+ Searches the ``venue_acronym_issns`` table. No entity_type filter is
214+ applied because ISSNs are globally unique across venue types.
215+
216+ Args:
217+ issn: ISSN string (e.g. '1550-4859')
218+
219+ Returns:
220+ Canonical name string, or None if not found.
221+ """
222+ match = self .get_issn_match (issn , min_confidence = min_confidence )
223+ if match :
224+ return str (match ["canonical" ])
225+ return None
226+
227+ def get_issns (
228+ self ,
229+ acronym : str ,
230+ entity_type : str ,
231+ min_confidence : float = 0.0 ,
232+ ) -> list [str ]:
159233 """Return all known ISSNs for an acronym.
160234
161235 Args:
@@ -172,10 +246,12 @@ def get_issns(self, acronym: str, entity_type: str) -> list[str]:
172246 SELECT vai.issn
173247 FROM venue_acronym_issns vai
174248 JOIN venue_acronyms va ON va.id = vai.venue_acronym_id
175- WHERE va.acronym = ? COLLATE NOCASE AND va.entity_type = ?
249+ WHERE va.acronym = ? COLLATE NOCASE
250+ AND va.entity_type = ?
251+ AND va.confidence_score >= ?
176252 ORDER BY vai.id
177253 """ ,
178- (acronym .strip (), entity_type ),
254+ (acronym .strip (), entity_type , min_confidence ),
179255 )
180256 return [str (row ["issn" ]) for row in cursor .fetchall ()]
181257
0 commit comments