Skip to content

Commit 678f000

Browse files
committed
split datetime and timedelta into different extractors
1 parent 544528e commit 678f000

File tree

5 files changed

+194
-159
lines changed

5 files changed

+194
-159
lines changed

examples/date.py

+13-38
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,6 @@
11
from simple_NER.annotators.datetime_ner import DateTimeNER
2-
ner = DateTimeNER()
32

4-
for r in ner.extract_entities("The movie is one hour, fifty seven and a half minutes long"):
5-
assert r.value == '1 hour, 57.5 minutes'
6-
if r.entity_type == "duration":
7-
assert r.total_seconds == 7050
8-
"""
9-
{'confidence': 1,
10-
'data': {'days': 0,
11-
'microseconds': 0,
12-
'seconds': 7050,
13-
'spoken': 'one hour fifty seven minutes thirty seconds',
14-
'total_seconds': 7050.0},
15-
'entity_type': 'duration',
16-
'rules': [],
17-
'source_text': 'The movie is one hour, fifty seven and a half minutes long',
18-
'spans': [(13, 53)],
19-
'value': 'one hour, fifty seven and a half minutes'}
20-
21-
"""
22-
else:
23-
assert r.entity_type == "relative_date"
24-
"""
25-
{'confidence': 1,
26-
'data': {'day': 29,
27-
'hour': 1,
28-
'isoformat': '2020-11-29T01:44:29',
29-
'minute': 44,
30-
'month': 11,
31-
'spoken': 'tomorrow',
32-
'timestamp': 1606614269.0,
33-
'weekday': 7,
34-
'year': 2020},
35-
'entity_type': 'relative_date',
36-
'rules': [],
37-
'source_text': 'the movie is 1 hour, 57.5 minutes long',
38-
'spans': [(13, 33)],
39-
'value': '1 hour, 57.5 minutes'}
40-
"""
3+
ner = DateTimeNER()
414

425
for r in ner.extract_entities("my birthday is on december 5th"):
436
assert r.entity_type == "relative_date"
@@ -53,3 +16,15 @@
5316
"""
5417
day: 4 month: 1 year: 2017 hour: 20 minute: 30
5518
"""
19+
20+
for r in ner.extract_entities(
21+
"tomorrow is X yesterday was Y in 10 days it will be Z"):
22+
assert r.entity_type == "relative_date"
23+
print(r.value,
24+
"day:", r.day, "month:", r.month, "year:", r.year,
25+
"hour:", r.hour, "minute:", r.minute)
26+
"""
27+
tomorrow day: 30 month: 11 year: 2020 hour: 0 minute: 0
28+
yesterday day: 28 month: 11 year: 2020 hour: 0 minute: 0
29+
in 10 days day: 9 month: 12 year: 2020 hour: 0 minute: 0
30+
"""

examples/timedelta.py

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from simple_NER.annotators.datetime_ner import TimedeltaNER
2+
3+
ner = TimedeltaNER()
4+
5+
for r in ner.extract_entities(
6+
"5 minutes ago was X 10 minutes from now is Y in 19 hours will "
7+
"be N"):
8+
assert r.entity_type == "duration"
9+
print(r.value, r.total_seconds)
10+
"""
11+
5 minutes 300.0
12+
10 minutes 600.0
13+
19 hours 68400.0
14+
"""
15+
16+
for r in ner.extract_entities(
17+
"What President served for five years six months 2 days"):
18+
# NOTE months/years are not supported because they are not explicit
19+
# how many days is 1 month? how many days is 1 year?
20+
assert r.entity_type == "duration"
21+
print(r.value, r.total_seconds)
22+
"""2 days 172800.0"""
23+
24+
for r in ner.extract_entities("starts in 5 minutes"):
25+
assert r.entity_type == "duration"
26+
print(r.value, r.total_seconds)
27+
"""5 minutes 300.0"""
28+
29+
for r in ner.extract_entities("starts in five minutes"):
30+
assert r.entity_type == "duration"
31+
print(r.value, r.total_seconds)
32+
"""5 minutes 300.0"""
33+
34+

readme.md

+59-23
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ simple rule based named entity recognition
1111
- [Email](#email)
1212
- [Names](#names)
1313
- [Locations](#locations)
14-
- [Date Time](#date-time)
14+
- [Datetime](#date-time)
15+
- [Timedelta](#durations)
1516
- [Units](#units)
1617
- [Keywords](#keywords)
1718
- [Numbers](#numbers)
@@ -26,15 +27,10 @@ simple rule based named entity recognition
2627

2728
Available on pip
2829

29-
pip install simple_NER
30-
31-
from source
30+
```bash
31+
pip install simple_NER
32+
```
3233

33-
git clone https://github.com/JarbasAl/simple_NER
34-
cd simple_NER
35-
pip install -r requirements.txt
36-
pip install .
37-
3834
## Usage
3935

4036
### Rule Based NER
@@ -121,7 +117,7 @@ for ent in ner.extract_entities("name is kevin"):
121117

122118
#### Email
123119

124-
Emails can be annotated using regex rules
120+
Extracting emails using regex rules
125121

126122
```python
127123
from simple_NER.annotators.email_ner import EmailNER
@@ -232,42 +228,81 @@ for r in ner.extract_entities(text):
232228
"""
233229
```
234230

235-
236231
#### Date Time
237232

238-
dates and durations can be annotated using [lingua_franca](https://github.com/MycroftAI/lingua-franca)
233+
Datetime extraction is powered by [lingua_franca](https://github.com/MycroftAI/lingua-franca)
239234

240235
```python
241236
from simple_NER.annotators.datetime_ner import DateTimeNER
242237

243238
ner = DateTimeNER()
244239

245-
for r in ner.extract_entities("The movie is one hour, fifty seven and a half minutes long"):
246-
assert r.value == 'one hour, fifty seven and a half minutes'
247-
assert r.entity_type == "duration"
248-
assert r.total_seconds == 7050
249-
assert r.spoken == 'one hour fifty seven minutes thirty seconds'
250-
251240
for r in ner.extract_entities("my birthday is on december 5th"):
252-
assert r.value == 'december 5'
253241
assert r.entity_type == "relative_date"
254242
print("day:", r.day, "month:", r.month, "year:", r.year)
255243
"""
256-
december 5th
257244
day: 5 month: 12 year: 2019
258245
"""
259246

260247
for r in ner.extract_entities("entries are due by January 4th, 2017 at 8:30pm"):
261-
print(r.value)
262248
assert r.entity_type == "relative_date"
263249
print("day:", r.day,"month:", r.month, "year:", r.year, "hour:", r.hour,
264250
"minute:", r.minute)
265251
"""
266-
January 4th, 2017 at 8:30pm
267252
day: 4 month: 1 year: 2017 hour: 20 minute: 30
253+
"""
254+
255+
for r in ner.extract_entities(
256+
"tomorrow is X yesterday was Y in 10 days it will be Z"):
257+
assert r.entity_type == "relative_date"
258+
print(r.value,
259+
"day:", r.day, "month:", r.month, "year:", r.year,
260+
"hour:", r.hour, "minute:", r.minute)
261+
"""
262+
tomorrow day: 30 month: 11 year: 2020 hour: 0 minute: 0
263+
yesterday day: 28 month: 11 year: 2020 hour: 0 minute: 0
264+
in 10 days day: 9 month: 12 year: 2020 hour: 0 minute: 0
265+
"""
266+
```
267+
268+
#### Durations
268269

270+
durations/timedeltas extraction is powered by [lingua_franca](https://github.com/MycroftAI/lingua-franca)
271+
272+
```python
273+
from simple_NER.annotators.datetime_ner import TimedeltaNER
274+
275+
ner = TimedeltaNER()
276+
277+
for r in ner.extract_entities(
278+
"5 minutes ago was X 10 minutes from now is Y in 19 hours will "
279+
"be N"):
280+
assert r.entity_type == "duration"
281+
print(r.value, r.total_seconds)
282+
"""
283+
5 minutes 300.0
284+
10 minutes 600.0
285+
19 hours 68400.0
269286
"""
270287

288+
for r in ner.extract_entities(
289+
"What President served for five years six months 2 days"):
290+
# NOTE months/years are not supported because they are not explicit
291+
# how many days is 1 month? how many days is 1 year?
292+
assert r.entity_type == "duration"
293+
print(r.value, r.total_seconds)
294+
"""2 days 172800.0"""
295+
296+
for r in ner.extract_entities("starts in 5 minutes"):
297+
assert r.entity_type == "duration"
298+
print(r.value, r.total_seconds)
299+
"""5 minutes 300.0"""
300+
301+
for r in ner.extract_entities("starts in five minutes"):
302+
assert r.entity_type == "duration"
303+
print(r.value, r.total_seconds)
304+
"""5 minutes 300.0"""
305+
271306
```
272307

273308
#### Units
@@ -343,7 +378,7 @@ assert sorted(keywords) == [('free', 1.0),
343378

344379
#### Numbers
345380

346-
Detection of written numbers
381+
Extraction of written numbers is powered by [lingua_franca](https://github.com/MycroftAI/lingua-franca)
347382

348383
```python
349384
from simple_NER.annotators.numbers_ner import NumberNER
@@ -448,6 +483,7 @@ for r in ner.extract_entities("London was founded by the Romans"):
448483
Romans DBpedia:Country http://dbpedia.org/resource/Ancient_Rome
449484
"""
450485
```
486+
451487
### NER wrappers
452488

453489
wrappers are also provided for performing NER with external libs

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def package_files(directory):
1414

1515
setup(
1616
name='simple_NER',
17-
version='0.6.1',
17+
version='0.7.0',
1818
packages=['simple_NER', 'simple_NER.rules', 'simple_NER.annotators',
1919
'simple_NER.annotators.remote', 'simple_NER.utils',
2020
'simple_NER.keywords'],

0 commit comments

Comments
 (0)