Skip to content

Commit 5eafc3a

Browse files
committed
Merge branch 'main' into plant_hypo_mero
2 parents f37abf2 + 0266433 commit 5eafc3a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+11452
-991
lines changed

scripts/validate.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,13 @@
99
from collections import Counter
1010
from from_yaml import load
1111

12+
# This is temporary list of exceptions for linking where a taxon name is
13+
# used as a generic name. In this case, Wikidata has only a single entry
14+
# while OEWN distinguishes between the taxon and the generic name
15+
# (e.g., hydrangea vs genus Hydrangea).
16+
WIKIDATA_DUPLICATION_EXCEPTIONS = [
17+
"Q134842", "Q138789", "Q161142", "Q644312", "Q1423091", "Q5309794", "Q1024025", "Q133876", "Q1783190", "Q1974044", "Q2704518", "Q2708653", "Q1362995", "Q858999", "Q26949", "Q2511051", "Q5230439", "Q4006525", "Q150242", "Q94815", "Q2719974", "Q908118", "Q2589976", "Q355080", "Q138842", "Q5222959", "Q157905", "Q2975317", "Q2496817", "Q2574724", "Q2707277", "Q161075", "Q2720105", "Q157748", "Q156851", "Q156699", "Q159077", "Q133285", "Q1754165", "Q205265", "Q133827", "Q1425929", "Q158975", "Q1969330", "Q310801", "Q648056", "Q1514169", "Q2452570", "Q1354632", "Q3338824", "Q2213704", "Q135537", "Q7840756", "Q134827", "Q199695", "Q1061596", "Q139000", "Q199456", "Q180597", "Q133017", "Q795375", "Q130948", "Q1708883", "Q2304642", "Q4795779", "Q74083", "Q990101", "Q41317", "Q1137414", "Q2136293", "Q905053", "Q300923", "Q2500468", "Q734870", "Q1518954", "Q40621", "Q132672", "Q309466", "Q1060870", "Q130902", "Q14400", "Q130988", "Q319520", "Q131688", "Q179204", "Q595983", "Q19119", "Q1329239", "Q213536", "Q752529", "Q734720", "Q788536", "Q621861", "Q46316", "Q25336", "Q3388845", "Q344662", "Q2706095", "Q185231", "Q190701", "Q756089", "Q899799", "Q369761", "Q811633", "Q185167", "Q310869", "Q132950", "Q1307559", "Q133259", "Q2166073", "Q329334", "Q1136219" ]
18+
1219
def check_symmetry(wn, fix):
1320
errors = []
1421
for synset in wn.synsets:
@@ -393,7 +400,7 @@ def main():
393400
if synset.wikidata:
394401
ss_wikidatas = synset.wikidata if isinstance(synset.wikidata, list) else [synset.wikidata]
395402
for wikidata in ss_wikidatas:
396-
if wikidata in wikidatas:
403+
if wikidata in wikidatas and wikidata not in WIKIDATA_DUPLICATION_EXCEPTIONS:
397404
print(f"ERROR: QID {wikidata} is duplicated")
398405
errors += 1
399406
else:

src/deprecations.csv

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,27 @@
199199
"ewn-85556310-n","","ewn-81448123-n","","Duplicate (#1134)"
200200
"ewn-10410299-n","i91815","ewn-10300973-n","i91146","Duplicate (#1150)"
201201
"ewn-09750685-n","i87839","ewn-09769084-n","i87974","Duplicate (#1199)"
202+
"ewn-09714775-n","i87600","ewn-09750913-n","i87841","Does not exist (#1202)"
203+
"ewn-06930168-n","i72891","ewn-06919215-n","i72806","Does not exist (#1202)"
204+
"ewn-06928089-n","i72876","ewn-06931172-n","i72899","Does not exist (#1202)"
205+
"ewn-06944501-n","i72998","ewn-06943976-n","i72995","Does not exist (#1202)"
206+
"ewn-06944668-n","i72999","ewn-06943976-n","i72995","Does not exist (#1202)"
207+
"ewn-06946323-n","i73012","ewn-86620955-n","","Split into Mikir and Meitei (#1202)"
208+
"ewn-06949238-n","i73037","ewn-06949067-n","i73035","Not a distinct language (#1202)"
209+
"ewn-06955789-n","i73081","ewn-06954406-n","i73074","Does not exist (#1202)"
210+
"ewn-06961022-n","i73109","ewn-06961158-n","i73110","Duplicate (#1202)"
211+
"ewn-06962811-n","i73122","ewn-06961158-n","i73110","Duplicate (#1202)"
212+
"ewn-06990608-n","i73293","ewn-06990094-n","i73290","Does not exist (#1202)"
213+
"ewn-06970264-n","i73163","ewn-06969284-n","i73156","Does not exist (#1202)"
214+
"ewn-06969880-n","i73160","ewn-06969284-n","i73156","Does not exist (#1202)"
215+
"ewn-07006307-n","i73399","ewn-07008452-n","i73418","Duplicate (#1202)"
216+
"ewn-06934169-n","i72922","ewn-06934283-n","i72923","Does not exist (#1202)"
217+
"ewn-06935458-n","i72932","ewn-06935652-n","i72934","Duplicate (#1202)"
218+
"ewn-06936357-n","i72941","ewn-06936569-n","i72943","Duplicate (#1202)"
219+
"ewn-06948847-n","i73033","ewn-06948688-n","i73031","Duplicate (#1202)"
220+
"ewn-06974800-n","i73194","ewn-06975194-n","i73197","Duplicate (#1202)"
221+
"ewn-06986165-n","i73263","ewn-06985752-n","i73261","Duplicate (#1202)"
222+
"ewn-07005467-n","i73391","ewn-07007795-n","i73413","Duplicate (#1202)"
223+
"ewn-07008186-n","i73416","ewn-07007882-n","i73414","Duplicate (#1202)"
224+
"ewn-06978186-n","i73213","ewn-06978970-n","i73219","Duplicate (#1202)"
225+
"ewn-06978029-n","i73212","ewn-06978360-n","i73214","Duplicate (#1202)"

src/yaml/adj.all.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22714,6 +22714,7 @@
2271422714
ili: i1809
2271522715
members:
2271622716
- fabian
22717+
- Fabian
2271722718
partOfSpeech: s
2271822719
similar:
2271922720
- 00326179-a
@@ -31044,6 +31045,7 @@
3104431045
- streetwise
3104531046
- street smart
3104631047
- with-it
31048+
- street-smart
3104731049
partOfSpeech: s
3104831050
similar:
3104931051
- 00440447-a
@@ -47477,7 +47479,7 @@
4747747479
members:
4747847480
- circular
4747947481
- rotary
47480-
- orbitual
47482+
- orbital
4748147483
partOfSpeech: s
4748247484
similar:
4748347485
- 00678969-a
@@ -118563,6 +118565,7 @@
118563118565
ili: i9297
118564118566
members:
118565118567
- thickspread
118568+
- thick-spread
118566118569
partOfSpeech: s
118567118570
similar:
118568118571
- 01698676-a
@@ -163342,6 +163345,7 @@
163342163345
ili: i12845
163343163346
members:
163344163347
- straightarrow
163348+
- straight-arrow
163345163349
partOfSpeech: s
163346163350
similar:
163347163351
- 02326142-a

src/yaml/adj.pert.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4957,7 +4957,7 @@
49574957
- cellulosic fibers
49584958
ili: i15048
49594959
members:
4960-
- cellulosid
4960+
- cellulosic
49614961
partOfSpeech: a
49624962
02697176-a:
49634963
definition:
@@ -14289,6 +14289,7 @@
1428914289
ili: i16168
1429014290
members:
1429114291
- doric
14292+
- Doric
1429214293
partOfSpeech: a
1429314294
02858358-a:
1429414295
definition:

src/yaml/adv.all.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,6 +1651,7 @@
16511651
- yet
16521652
- til now
16531653
- until now
1654+
- till now
16541655
partOfSpeech: r
16551656
00028594-r:
16561657
definition:
@@ -6433,6 +6434,7 @@
64336434
- so far
64346435
- to that extent
64356436
- to that degree
6437+
- in so far as
64366438
partOfSpeech: r
64376439
00099778-r:
64386440
definition:
@@ -18044,6 +18046,7 @@
1804418046
ili: i19962
1804518047
members:
1804618048
- scot free
18049+
- scot-free
1804718050
partOfSpeech: r
1804818051
00262350-r:
1804918052
definition:

0 commit comments

Comments
 (0)