|
21 | 21 | from lingua_nostra.lang.common_data_de import _DE_NUMBERS
|
22 | 22 | from lingua_nostra.lang.format_de import pronounce_number_de
|
23 | 23 | from lingua_nostra.time import now_local
|
| 24 | +from lingua_nostra.parse import normalize_decimals |
24 | 25 |
|
25 | 26 |
|
26 | 27 | de_numbers = {
|
@@ -143,20 +144,28 @@ def repl(match):
|
143 | 144 | return (duration, text)
|
144 | 145 |
|
145 | 146 |
|
146 |
| -def extract_number_de(text, short_scale=True, ordinals=False): |
| 147 | +def extract_number_de(text, short_scale=True, ordinals=False, decimal='.'): |
147 | 148 | """
|
148 |
| - This function prepares the given text for parsing by making |
149 |
| - numbers consistent, getting rid of contractions, etc. |
| 149 | + This function extracts a number from a text string, |
| 150 | + handles pronunciations in long scale and short scale |
| 151 | +
|
| 152 | + https://en.wikipedia.org/wiki/Names_of_large_numbers |
| 153 | +
|
150 | 154 | Args:
|
151 | 155 | text (str): the string to normalize
|
| 156 | + short_scale (bool): use short scale if True, long scale if False |
| 157 | + ordinals (bool): consider ordinal numbers, third=3 instead of 1/3 |
| 158 | + decimal (str): character to use as decimal point. defaults to '.' |
152 | 159 | Returns:
|
153 |
| - (int) or (float): The value of extracted number |
154 |
| -
|
155 |
| -
|
156 |
| - undefined articles cannot be suppressed in German: |
157 |
| - 'ein Pferd' means 'one horse' and 'a horse' |
| 160 | + (int) or (float) or False: The extracted number or False if no number |
| 161 | + was found |
| 162 | + Note: |
| 163 | + will always extract numbers formatted with a decimal dot/full stop, |
| 164 | + such as '3.5', even if 'decimal' is specified. |
158 | 165 |
|
159 | 166 | """
|
| 167 | + if decimal != '.': |
| 168 | + text = normalize_decimals(text, decimal) |
160 | 169 | # TODO: short_scale and ordinals don't do anything here.
|
161 | 170 | # The parameters are present in the function signature for API compatibility
|
162 | 171 | # reasons.
|
@@ -1003,20 +1012,28 @@ def normalize_de(text, remove_articles=True):
|
1003 | 1012 | return normalized[1:] # strip the initial space
|
1004 | 1013 |
|
1005 | 1014 |
|
1006 |
| -def extract_numbers_de(text, short_scale=True, ordinals=False): |
1007 |
| - """ |
1008 |
| - Takes in a string and extracts a list of numbers. |
1009 |
| -
|
1010 |
| - Args: |
1011 |
| - text (str): the string to extract a number from |
1012 |
| - short_scale (bool): Use "short scale" or "long scale" for large |
1013 |
| - numbers -- over a million. The default is short scale, which |
1014 |
| - is now common in most English speaking countries. |
1015 |
| - See https://en.wikipedia.org/wiki/Names_of_large_numbers |
1016 |
| - ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3 |
1017 |
| - Returns: |
1018 |
| - list: list of extracted numbers as floats |
| 1015 | +def extract_numbers_de(text, short_scale=True, ordinals=False, decimal='.'): |
1019 | 1016 | """
|
| 1017 | + This function extracts a number from a text string, |
| 1018 | + handles pronunciations in long scale and short scale |
| 1019 | +
|
| 1020 | + https://en.wikipedia.org/wiki/Names_of_large_numbers |
| 1021 | +
|
| 1022 | + Args: |
| 1023 | + text (str): the string to normalize |
| 1024 | + short_scale (bool): use short scale if True, long scale if False |
| 1025 | + ordinals (bool): consider ordinal numbers, third=3 instead of 1/3 |
| 1026 | + decimal (str): character to use as decimal point. defaults to '.' |
| 1027 | + Returns: |
| 1028 | + (int) or (float) or False: The extracted number or False if no number |
| 1029 | + was found |
| 1030 | + Note: |
| 1031 | + will always extract numbers formatted with a decimal dot/full stop, |
| 1032 | + such as '3.5', even if 'decimal' is specified. |
| 1033 | +
|
| 1034 | + """ |
| 1035 | + if decimal != '.': |
| 1036 | + text = normalize_decimals(text, decimal) |
1020 | 1037 | return extract_numbers_generic(text, pronounce_number_de, extract_number_de,
|
1021 | 1038 | short_scale=short_scale, ordinals=ordinals)
|
1022 | 1039 |
|
|
0 commit comments