@@ -11,7 +11,8 @@ simple rule based named entity recognition
11
11
- [ Email] ( #email )
12
12
- [ Names] ( #names )
13
13
- [ Locations] ( #locations )
14
- - [ Date Time] ( #date-time )
14
+ - [ Datetime] ( #date-time )
15
+ - [ Timedelta] ( #durations )
15
16
- [ Units] ( #units )
16
17
- [ Keywords] ( #keywords )
17
18
- [ Numbers] ( #numbers )
@@ -26,15 +27,10 @@ simple rule based named entity recognition
26
27
27
28
Available on pip
28
29
29
- pip install simple_NER
30
-
31
- from source
30
+ ``` bash
31
+ pip install simple_NER
32
+ ```
32
33
33
- git clone https://github.com/JarbasAl/simple_NER
34
- cd simple_NER
35
- pip install -r requirements.txt
36
- pip install .
37
-
38
34
## Usage
39
35
40
36
### Rule Based NER
@@ -121,7 +117,7 @@ for ent in ner.extract_entities("name is kevin"):
121
117
122
118
#### Email
123
119
124
- Emails can be annotated using regex rules
120
+ Extracting emails using regex rules
125
121
126
122
``` python
127
123
from simple_NER.annotators.email_ner import EmailNER
@@ -232,42 +228,81 @@ for r in ner.extract_entities(text):
232
228
"""
233
229
```
234
230
235
-
236
231
#### Date Time
237
232
238
- dates and durations can be annotated using [ lingua_franca] ( https://github.com/MycroftAI/lingua-franca )
233
+ Datetime extraction is powered by [ lingua_franca] ( https://github.com/MycroftAI/lingua-franca )
239
234
240
235
``` python
241
236
from simple_NER.annotators.datetime_ner import DateTimeNER
242
237
243
238
ner = DateTimeNER()
244
239
245
- for r in ner.extract_entities(" The movie is one hour, fifty seven and a half minutes long" ):
246
- assert r.value == ' one hour, fifty seven and a half minutes'
247
- assert r.entity_type == " duration"
248
- assert r.total_seconds == 7050
249
- assert r.spoken == ' one hour fifty seven minutes thirty seconds'
250
-
251
240
for r in ner.extract_entities(" my birthday is on december 5th" ):
252
- assert r.value == ' december 5'
253
241
assert r.entity_type == " relative_date"
254
242
print (" day:" , r.day, " month:" , r.month, " year:" , r.year)
255
243
"""
256
- december 5th
257
244
day: 5 month: 12 year: 2019
258
245
"""
259
246
260
247
for r in ner.extract_entities(" entries are due by January 4th, 2017 at 8:30pm" ):
261
- print (r.value)
262
248
assert r.entity_type == " relative_date"
263
249
print (" day:" , r.day," month:" , r.month, " year:" , r.year, " hour:" , r.hour,
264
250
" minute:" , r.minute)
265
251
"""
266
- January 4th, 2017 at 8:30pm
267
252
day: 4 month: 1 year: 2017 hour: 20 minute: 30
253
+ """
254
+
255
+ for r in ner.extract_entities(
256
+ " tomorrow is X yesterday was Y in 10 days it will be Z" ):
257
+ assert r.entity_type == " relative_date"
258
+ print (r.value,
259
+ " day:" , r.day, " month:" , r.month, " year:" , r.year,
260
+ " hour:" , r.hour, " minute:" , r.minute)
261
+ """
262
+ tomorrow day: 30 month: 11 year: 2020 hour: 0 minute: 0
263
+ yesterday day: 28 month: 11 year: 2020 hour: 0 minute: 0
264
+ in 10 days day: 9 month: 12 year: 2020 hour: 0 minute: 0
265
+ """
266
+ ```
267
+
268
+ #### Durations
268
269
270
+ durations/timedeltas extraction is powered by [ lingua_franca] ( https://github.com/MycroftAI/lingua-franca )
271
+
272
+ ``` python
273
+ from simple_NER.annotators.datetime_ner import TimedeltaNER
274
+
275
+ ner = TimedeltaNER()
276
+
277
+ for r in ner.extract_entities(
278
+ " 5 minutes ago was X 10 minutes from now is Y in 19 hours will "
279
+ " be N" ):
280
+ assert r.entity_type == " duration"
281
+ print (r.value, r.total_seconds)
282
+ """
283
+ 5 minutes 300.0
284
+ 10 minutes 600.0
285
+ 19 hours 68400.0
269
286
"""
270
287
288
+ for r in ner.extract_entities(
289
+ " What President served for five years six months 2 days" ):
290
+ # NOTE months/years are not supported because they are not explicit
291
+ # how many days is 1 month? how many days is 1 year?
292
+ assert r.entity_type == " duration"
293
+ print (r.value, r.total_seconds)
294
+ """ 2 days 172800.0"""
295
+
296
+ for r in ner.extract_entities(" starts in 5 minutes" ):
297
+ assert r.entity_type == " duration"
298
+ print (r.value, r.total_seconds)
299
+ """ 5 minutes 300.0"""
300
+
301
+ for r in ner.extract_entities(" starts in five minutes" ):
302
+ assert r.entity_type == " duration"
303
+ print (r.value, r.total_seconds)
304
+ """ 5 minutes 300.0"""
305
+
271
306
```
272
307
273
308
#### Units
@@ -343,7 +378,7 @@ assert sorted(keywords) == [('free', 1.0),
343
378
344
379
#### Numbers
345
380
346
- Detection of written numbers
381
+ Extraction of written numbers is powered by [ lingua_franca ] ( https://github.com/MycroftAI/lingua-franca )
347
382
348
383
``` python
349
384
from simple_NER.annotators.numbers_ner import NumberNER
@@ -448,6 +483,7 @@ for r in ner.extract_entities("London was founded by the Romans"):
448
483
Romans DBpedia:Country http://dbpedia.org/resource/Ancient_Rome
449
484
"""
450
485
```
486
+
451
487
### NER wrappers
452
488
453
489
wrappers are also provided for performing NER with external libs
0 commit comments