29
29
from lingua_franca .lang .parse_common import is_numeric , look_for_fractions , \
30
30
invert_dict , ReplaceableNumber , partition_list , tokenize , Token , Normalizer
31
31
from lingua_franca .time import now_local
32
+ from lingua_franca .lang .parse_common import normalize_decimals
32
33
33
34
34
35
def _convert_words_to_numbers_en (text , short_scale = True , ordinals = False ):
@@ -529,7 +530,7 @@ def _initialize_number_data_en(short_scale, speech=True):
529
530
return multiplies , string_num_ordinal_en , string_num_scale_en
530
531
531
532
532
- def extract_number_en (text , short_scale = True , ordinals = False ):
533
+ def extract_number_en (text , short_scale = True , ordinals = False , decimal = '.' ):
533
534
"""
534
535
This function extracts a number from a text string,
535
536
handles pronunciations in long scale and short scale
@@ -540,11 +541,17 @@ def extract_number_en(text, short_scale=True, ordinals=False):
540
541
text (str): the string to normalize
541
542
short_scale (bool): use short scale if True, long scale if False
542
543
ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
544
+ decimal (str): character to use as decimal point. defaults to '.'
543
545
Returns:
544
546
(int) or (float) or False: The extracted number or False if no number
545
547
was found
548
+ Note:
549
+ will always extract numbers formatted with a decimal dot/full stop,
550
+ such as '3.5', even if 'decimal' is specified.
546
551
547
552
"""
553
+ if decimal != '.' :
554
+ text = normalize_decimals (text , decimal )
548
555
return _extract_number_with_text_en (tokenize (text .lower ()),
549
556
short_scale , ordinals ).value
550
557
@@ -1655,7 +1662,7 @@ def is_fractional_en(input_str, short_scale=True, spoken=True):
1655
1662
return False
1656
1663
1657
1664
1658
- def extract_numbers_en (text , short_scale = True , ordinals = False ):
1665
+ def extract_numbers_en (text , short_scale = True , ordinals = False , decimal = '.' ):
1659
1666
"""
1660
1667
Takes in a string and extracts a list of numbers.
1661
1668
@@ -1666,9 +1673,15 @@ def extract_numbers_en(text, short_scale=True, ordinals=False):
1666
1673
is now common in most English speaking countries.
1667
1674
See https://en.wikipedia.org/wiki/Names_of_large_numbers
1668
1675
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
1676
+ decimal (str): character to use as decimal point. defaults to '.'
1669
1677
Returns:
1670
1678
list: list of extracted numbers as floats
1679
+ Note:
1680
+ will always extract numbers formatted with a decimal dot/full stop,
1681
+ such as '3.5', even if 'decimal' is specified.
1671
1682
"""
1683
+ if decimal != '.' :
1684
+ text = normalize_decimals (text , decimal )
1672
1685
results = _extract_numbers_with_text_en (tokenize (text ),
1673
1686
short_scale , ordinals )
1674
1687
return [float (result .value ) for result in results ]
0 commit comments