55import datetime
66from dataclasses import dataclass
77
8- from typing import Dict , List , Union , Set
8+ from typing import Dict , List , NoReturn , Optional , Union , Set
99
1010from ._data import (
1111 _PART3_TO_CODES ,
2626)
2727
2828
29+ _STRING_CLEANING_FUNCS = [
30+ lambda x : x .strip ().lower (),
31+ lambda x : x .strip ().title (),
32+ ]
33+
34+
2935class LanguageNotFoundError (Exception ):
3036 pass
3137
@@ -100,13 +106,17 @@ def __eq__(self, other) -> bool:
100106 return isinstance (other , Language ) and self .part3 == other .part3
101107
102108 @classmethod
103- def match (cls , user_input : str , / ) -> Language :
109+ def match (cls , user_input : str , / , * , exact : bool = False ) -> Language :
104110 """Return a ``Language`` instance by matching on the user input.
105111
106112 Parameters
107113 ----------
108114 user_input : str
109115 A language code or name.
116+ exact : bool, optional
117+ Whether to enforce exact matching against the user input.
118+ Defaults to `False`. If `False`, matching is case-insensitive
119+ and ignores leading/trailing whitespace.
110120
111121 Returns
112122 -------
@@ -140,29 +150,29 @@ def match(cls, user_input: str, /) -> Language:
140150 _NameIndexColumn .PRINT_NAME ,
141151 _NameIndexColumn .INVERTED_NAME ,
142152 ]
143- return _PART3_TO_LANGUAGES [_get_part3 (user_input , query_order )]
153+ return _PART3_TO_LANGUAGES [_get_part3 (user_input , query_order , exact )]
144154
145155 @classmethod
146156 def from_part3 (cls , user_input : str , / ) -> Language :
147157 """Return a ``Language`` instance from an ISO 639-3 code."""
148158 return _PART3_TO_LANGUAGES [
149- _get_part3 (user_input , [_CodesColumn .ID , _RetirementsColumn .ID ])
159+ _get_part3_exact (user_input , [_CodesColumn .ID , _RetirementsColumn .ID ])
150160 ]
151161
152162 @classmethod
153163 def from_part2b (cls , user_input : str , / ) -> Language :
154164 """Return a ``Language`` instance from an ISO 639-2 (bibliographic) code."""
155- return _PART3_TO_LANGUAGES [_get_part3 (user_input , [_CodesColumn .PART2B ])]
165+ return _PART3_TO_LANGUAGES [_get_part3_exact (user_input , [_CodesColumn .PART2B ])]
156166
157167 @classmethod
158168 def from_part2t (cls , user_input : str , / ) -> Language :
159169 """Return a ``Language`` instance from an ISO 639-2 (terminological) code."""
160- return _PART3_TO_LANGUAGES [_get_part3 (user_input , [_CodesColumn .PART2T ])]
170+ return _PART3_TO_LANGUAGES [_get_part3_exact (user_input , [_CodesColumn .PART2T ])]
161171
162172 @classmethod
163173 def from_part1 (cls , user_input : str , / ) -> Language :
164174 """Return a ``Language`` instance from an ISO 639-1 code."""
165- return _PART3_TO_LANGUAGES [_get_part3 (user_input , [_CodesColumn .PART1 ])]
175+ return _PART3_TO_LANGUAGES [_get_part3_exact (user_input , [_CodesColumn .PART1 ])]
166176
167177 @classmethod
168178 def from_name (cls , user_input : str , / ) -> Language :
@@ -172,10 +182,57 @@ def from_name(cls, user_input: str, /) -> Language:
172182 _NameIndexColumn .PRINT_NAME ,
173183 _NameIndexColumn .INVERTED_NAME ,
174184 ]
175- return _PART3_TO_LANGUAGES [_get_part3 (user_input , query_order )]
185+ return _PART3_TO_LANGUAGES [_get_part3_exact (user_input , query_order )]
186+
176187
188+ def _raise_language_not_found_error (user_input : str ) -> NoReturn :
189+ raise LanguageNotFoundError (f"{ user_input !r} isn't an ISO language code or name" )
190+
191+
192+ def _get_part3 (
193+ user_input : str , query_order : List [_COLUMN_TYPE ], exact : bool = True
194+ ) -> str :
195+ """Get the part 3 code of a language.
177196
178- def _get_part3 (user_input : str , query_order : List [_COLUMN_TYPE ]) -> str :
197+ Parameters
198+ ----------
199+ user_input : str
200+ The user-provided language code or name.
201+ query_order : List[_COLUMN_TYPE]
202+ A list of columns to specify query order.
203+ exact : bool, optional
204+ Whether to enforce exact matching against the user input. Defaults to `True`.
205+ If `False`, basic string cleaning is applied to the user input.
206+
207+ Returns
208+ -------
209+ str
210+
211+ Raises
212+ ------
213+ LanguageNotFoundError
214+ If `part3` isn't a language name or code
215+ """
216+ try :
217+ return _get_part3_exact (user_input , query_order )
218+ except LanguageNotFoundError as e :
219+ if exact :
220+ raise e
221+ else :
222+ for func in _STRING_CLEANING_FUNCS :
223+ try :
224+ return _get_part3_exact (func (user_input ), query_order , user_input )
225+ except LanguageNotFoundError :
226+ continue
227+ else :
228+ _raise_language_not_found_error (user_input )
229+
230+
231+ def _get_part3_exact (
232+ user_input : str ,
233+ query_order : List [_COLUMN_TYPE ],
234+ original_user_input : Optional [str ] = None ,
235+ ) -> str :
179236 """Get the part 3 code of a language.
180237
181238 Parameters
@@ -184,6 +241,9 @@ def _get_part3(user_input: str, query_order: List[_COLUMN_TYPE]) -> str:
184241 The user-provided language code or name.
185242 query_order : List[_COLUMN_TYPE]
186243 A list of columns to specify query order.
244+ original_user_input : str, optional
245+ The original user input. Default is `None`.
246+ This argument is used when the user input has been cleaned.
187247
188248 Returns
189249 -------
@@ -198,8 +258,7 @@ def _get_part3(user_input: str, query_order: List[_COLUMN_TYPE]) -> str:
198258 for column in query_order :
199259 if column == _CodesColumn .ID :
200260 if user_input in _PART3_TO_CODES :
201- part3 = user_input
202- break
261+ return user_input
203262 elif column == _CodesColumn .PART2B :
204263 part3 = _PART2B_TO_PART3 .get (user_input )
205264 elif column == _CodesColumn .PART2T :
@@ -208,8 +267,7 @@ def _get_part3(user_input: str, query_order: List[_COLUMN_TYPE]) -> str:
208267 part3 = _PART1_TO_PART3 .get (user_input )
209268 elif column == _RetirementsColumn .ID :
210269 if user_input in _PART3_TO_RETIREMENTS :
211- part3 = user_input
212- break
270+ return user_input
213271 elif column == _CodesColumn .REF_NAME :
214272 part3 = _REF_NAME_TO_PART3 .get (user_input )
215273 elif column == _NameIndexColumn .PRINT_NAME :
@@ -222,9 +280,7 @@ def _get_part3(user_input: str, query_order: List[_COLUMN_TYPE]) -> str:
222280 break
223281
224282 if part3 is None :
225- raise LanguageNotFoundError (
226- f"{ user_input !r} isn't an ISO language code or name"
227- )
283+ _raise_language_not_found_error (original_user_input or user_input )
228284
229285 return part3
230286
0 commit comments