@@ -5,34 +5,87 @@ use gliner::model::{input::text::TextInput, params::Parameters, pipeline::span::
55
66/// Known relation verbs for extracting predicates from text between entities.
77const RELATION_VERBS : & [ & str ] = & [
8- "works" , "founded" , "leads" , "manages" , "joined" , "acquired" , "owns" ,
9- "based" , "located" , "headquartered" , "reports" , "partnered" , "uses" ,
10- "built" , "developed" , "created" , "provides" , "competes" , "invested" ,
11- "launched" , "serves" , "appointed" , "hired" , "employs" , "supervises" ,
12- "sold" , "purchased" , "ordered" , "shipped" , "reviewed" , "paid" ,
13- "governs" , "applies" , "requires" , "depends" , "integrates" , "runs" ,
14- "heads" , "oversees" , "established" , "operates" , "supports" , "maintains" ,
15- "produces" , "manufactures" , "supplies" , "distributes" , "sponsors" ,
8+ "works" ,
9+ "founded" ,
10+ "leads" ,
11+ "manages" ,
12+ "joined" ,
13+ "acquired" ,
14+ "owns" ,
15+ "based" ,
16+ "located" ,
17+ "headquartered" ,
18+ "reports" ,
19+ "partnered" ,
20+ "uses" ,
21+ "built" ,
22+ "developed" ,
23+ "created" ,
24+ "provides" ,
25+ "competes" ,
26+ "invested" ,
27+ "launched" ,
28+ "serves" ,
29+ "appointed" ,
30+ "hired" ,
31+ "employs" ,
32+ "supervises" ,
33+ "sold" ,
34+ "purchased" ,
35+ "ordered" ,
36+ "shipped" ,
37+ "reviewed" ,
38+ "paid" ,
39+ "governs" ,
40+ "applies" ,
41+ "requires" ,
42+ "depends" ,
43+ "integrates" ,
44+ "runs" ,
45+ "heads" ,
46+ "oversees" ,
47+ "established" ,
48+ "operates" ,
49+ "supports" ,
50+ "maintains" ,
51+ "produces" ,
52+ "manufactures" ,
53+ "supplies" ,
54+ "distributes" ,
55+ "sponsors" ,
1656] ;
1757
1858/// Prepositions that attach to verbs to form compound predicates.
19- const PREPOSITIONS : & [ & str ] = & [
20- "at" , "in" , "for" , "with" , "to" , "by" , "of" , "from" , "on" ,
21- ] ;
59+ const PREPOSITIONS : & [ & str ] = & [ "at" , "in" , "for" , "with" , "to" , "by" , "of" , "from" , "on" ] ;
2260
2361/// Pronouns and noise words that should never be entities.
2462const GARBAGE_WORDS : & [ & str ] = & [
25- "our" , "we" , "us" , "they" , "my" , "your" , "his" , "her" , "i" , "me" ,
26- "it " , "this " , "that " , "these " , "those " , "its " , "their " , "he " , "she " ,
27- "who" , "which" , "what" , "where" , "when" , "how" , " why",
63+ "our" , "we" , "us" , "they" , "my" , "your" , "his" , "her" , "i" , "me" , "it" , "this" , "that" ,
64+ "these " , "those " , "its " , "their " , "he " , "she " , "who " , "which " , "what" , "where" , "when" , "how ",
65+ "why" ,
2866] ;
2967
3068/// Section header words that GliNER sometimes picks up as entities.
3169const SECTION_HEADERS : & [ & str ] = & [
32- "overview" , "summary" , "conclusion" , "introduction" , "background" ,
33- "features" , "pricing" , "security" , "details" , "description" ,
34- "requirements" , "objectives" , "scope" , "methodology" , "results" ,
35- "abstract" , "appendix" , "references" , "contents" ,
70+ "overview" ,
71+ "summary" ,
72+ "conclusion" ,
73+ "introduction" ,
74+ "background" ,
75+ "features" ,
76+ "pricing" ,
77+ "security" ,
78+ "details" ,
79+ "description" ,
80+ "requirements" ,
81+ "objectives" ,
82+ "scope" ,
83+ "methodology" ,
84+ "results" ,
85+ "abstract" ,
86+ "appendix" ,
87+ "references" ,
88+ "contents" ,
3689] ;
3790
3891/// Check if an entity text is garbage that should be filtered out.
@@ -65,7 +118,10 @@ fn extract_verb_predicate(between_text: &str) -> String {
65118
66119 for ( i, word) in words. iter ( ) . enumerate ( ) {
67120 let lower = word. to_lowercase ( ) ;
68- if RELATION_VERBS . iter ( ) . any ( |v| lower == * v || lower. ends_with ( v) ) {
121+ if RELATION_VERBS
122+ . iter ( )
123+ . any ( |v| lower == * v || lower. ends_with ( v) )
124+ {
69125 // Check if next word is a preposition to attach
70126 if i + 1 < words. len ( ) {
71127 let next_lower = words[ i + 1 ] . to_lowercase ( ) ;
0 commit comments