Skip to content

Commit 7b02e27

Browse files
committed
UA: PressMint preview for PR #65
1 parent 5f7d1df commit 7b02e27

99 files changed

Lines changed: 17472 additions & 27 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Text_ID ID Date Newspaper Article Publisher Volume Issue URN URL Lang Quality Image
2+
PressMint-UA_1910-10-29-3C3C1DE6 PressMint-UA_1910-10-29-3C3C1DE6.ana.p1 1910-10-29 Рада Кінець «Земщины» 245 Ukrainian
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Text_ID ID Date Newspaper Article Publisher Volume Issue URN URL Lang Quality Image
2+
PressMint-UA_1910-10-29-3C3C1DE6 PressMint-UA_1910-10-29-3C3C1DE6.ana.p1 1910-10-29 Рада Кінець «Земщины» 245 українська
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# newdoc id = PressMint-UA_1910-10-29-3C3C1DE6.ana
2+
# newpar id = PressMint-UA_1910-10-29-3C3C1DE6.ana.p1
3+
# lang = uk
4+
# sent_id = PressMint-UA_1910-10-29-3C3C1DE6.ana.p1.s1
5+
# text = ПЕТЕРБУРГ.
6+
1 ПЕТЕРБУРГ петербур NOUN _ Animacy=Inan|Case=Nom|Gender=Fem|Number=Plur 0 _ _ NER=B-LOC|SpaceAfter=No
7+
2 . . PUNCT _ _ 0 _ _ NER=O
8+
9+
# sent_id = PressMint-UA_1910-10-29-3C3C1DE6.ana.p1.s2
10+
# text = В „Русскомъ Знамени “надруковано звістку, що „Земщина “незабаром закривається.
11+
1 В в ADP _ Case=Loc 0 _ _ NER=O
12+
2 PUNCT _ _ 0 _ _ NER=O|SpaceAfter=No
13+
3 Русскомъ русский X _ Foreign=Yes 0 _ _ NER=O
14+
4 Знамени Знамен PROPN _ Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing 0 _ _ NER=B-PER
15+
5 PUNCT _ _ 0 _ _ NER=O|SpaceAfter=No
16+
6 надруковано надрукувати VERB _ Aspect=Perf|Mood=Ind|Person=0|VerbForm=Fin 0 _ _ NER=O
17+
7 звістку звістка NOUN _ Animacy=Inan|Case=Acc|Gender=Fem|Number=Sing 0 _ _ NER=O|SpaceAfter=No
18+
8 , , PUNCT _ _ 0 _ _ NER=O
19+
9 що що SCONJ _ _ 0 _ _ NER=O
20+
10 PUNCT _ _ 0 _ _ NER=O|SpaceAfter=No
21+
11 Земщина Земщина PROPN _ Animacy=Inan|Case=Nom|Gender=Fem|Number=Sing 0 _ _ NER=O
22+
12 PUNCT _ _ 0 _ _ NER=O|SpaceAfter=No
23+
13 незабаром незабаром ADV _ _ 0 _ _ NER=O
24+
14 закривається закриватися VERB _ Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Reflex=Yes|Tense=Pres|VerbForm=Fin 0 _ _ NER=O|SpaceAfter=No
25+
15 . . PUNCT _ _ 0 _ _ NER=O
26+
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# newdoc id = PressMint-UA_1910-10-29-3C3C1DE6.ana
2+
# newpar id = PressMint-UA_1910-10-29-3C3C1DE6.ana.p1
3+
# lang = uk
4+
# sent_id = PressMint-UA_1910-10-29-3C3C1DE6.ana.p1.s1
5+
# text = ПЕТЕРБУРГ.
6+
1 ПЕТЕРБУРГ петербур NOUN _ Animacy=Inan|Case=Nom|Gender=Fem|Number=Plur 0 _ _ NER=B-LOC|SpaceAfter=No
7+
2 . . PUNCT _ _ 0 _ _ NER=O
8+
9+
# sent_id = PressMint-UA_1910-10-29-3C3C1DE6.ana.p1.s2
10+
# text = В „Русскомъ Знамени “надруковано звістку, що „Земщина “незабаром закривається.
11+
1 В в ADP _ Case=Loc 0 _ _ NER=O
12+
2 PUNCT _ _ 0 _ _ NER=O|SpaceAfter=No
13+
3 Русскомъ русский X _ Foreign=Yes 0 _ _ NER=O
14+
4 Знамени Знамен PROPN _ Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing 0 _ _ NER=B-PER
15+
5 PUNCT _ _ 0 _ _ NER=O|SpaceAfter=No
16+
6 надруковано надрукувати VERB _ Aspect=Perf|Mood=Ind|Person=0|VerbForm=Fin 0 _ _ NER=O
17+
7 звістку звістка NOUN _ Animacy=Inan|Case=Acc|Gender=Fem|Number=Sing 0 _ _ NER=O|SpaceAfter=No
18+
8 , , PUNCT _ _ 0 _ _ NER=O
19+
9 що що SCONJ _ _ 0 _ _ NER=O
20+
10 PUNCT _ _ 0 _ _ NER=O|SpaceAfter=No
21+
11 Земщина Земщина PROPN _ Animacy=Inan|Case=Nom|Gender=Fem|Number=Sing 0 _ _ NER=O
22+
12 PUNCT _ _ 0 _ _ NER=O|SpaceAfter=No
23+
13 незабаром незабаром ADV _ _ 0 _ _ NER=O
24+
14 закривається закриватися VERB _ Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Reflex=Yes|Tense=Pres|VerbForm=Fin 0 _ _ NER=O|SpaceAfter=No
25+
15 . . PUNCT _ _ 0 _ _ NER=O
26+
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="PressMint-UA_1910-10-29-3C3C1DE6.ana" xml:lang="uk">
3+
<teiHeader>
4+
<fileDesc>
5+
<titleStmt>
6+
<title xml:lang="uk">Корпус українських історичних газет PressMint-UA (PluG), «Кінець «Земщины»», Рада, №245 29.10.1910 [PressMint.ana SAMPLE]</title>
7+
<title xml:lang="en">Ukrainian historical newspaper corpus PressMint-UA, "Кінець «Земщины»", Рада, 1910-10-29, no. 245 [PressMint.ana SAMPLE]</title>
8+
</titleStmt>
9+
<editionStmt>
10+
<edition>1.0</edition>
11+
</editionStmt>
12+
<extent><!--These numbers do not reflect the size of the sample!-->
13+
<measure unit="paragraphs" quantity="1" xml:lang="en">1 paragraphs</measure>
14+
<measure unit="words" quantity="10" xml:lang="en">10 words</measure>
15+
</extent>
16+
<publicationStmt>
17+
<publisher>
18+
<orgName xml:lang="uk">Дослідницька інфраструктура CLARIN</orgName>
19+
<orgName xml:lang="en">CLARIN research infrastructure</orgName>
20+
<ref target="https://www.clarin.eu/">www.clarin.eu</ref>
21+
</publisher>
22+
<availability status="free">
23+
<licence>http://creativecommons.org/licenses/by/4.0/</licence>
24+
<p xml:lang="en">This work is licensed under the <ref target="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ref>.</p>
25+
</availability>
26+
<date when="2026-06-11">2026-06-11</date>
27+
</publicationStmt>
28+
<sourceDesc>
29+
<bibl>
30+
<title level="j">Рада</title>
31+
<title level="a">Кінець «Земщины»</title>
32+
<pubPlace>Київ</pubPlace>
33+
<date when="1910-10-29">№245 29.10.1910</date>
34+
<biblScope unit="issue">245</biblScope>
35+
<idno type="local" subtype="PluG-path">R/Rada_1910_245/Rada_1910_245_Kinec_Zemschiny.txt</idno>
36+
</bibl>
37+
</sourceDesc>
38+
</fileDesc>
39+
<encodingDesc>
40+
<projectDesc>
41+
<p xml:lang="en"><ref target="https://www.clarin.eu/pressmint">PressMint</ref> is a project that aims to (1) create a multilingual set of corpora of historical newspapers uniformly encoded according to the <ref target="https://clarin-eric.github.io/PressMint/">PressMint encoding guidelines</ref>; (2) add linguistic annotations to the corpora; and (3) make the corpora available through concordancers.</p>
42+
</projectDesc>
43+
<tagsDecl><!--These numbers do not reflect the size of the sample!-->
44+
<namespace name="http://www.tei-c.org/ns/1.0">
45+
<tagUsage gi="body" occurs="1"/>
46+
<tagUsage gi="name" occurs="2"/>
47+
<tagUsage gi="p" occurs="1"/>
48+
<tagUsage gi="pc" occurs="7"/>
49+
<tagUsage gi="s" occurs="2"/>
50+
<tagUsage gi="text" occurs="1"/>
51+
<tagUsage gi="w" occurs="10"/>
52+
</namespace>
53+
</tagsDecl>
54+
</encodingDesc>
55+
<revisionDesc>
56+
<change when="2026-06-11">
57+
<name>Tomaž Erjavec</name>: Made sample.</change>
58+
<change when="2026-04-17">Конвертація PluG -&gt; PressMint TEI XML .ana.</change>
59+
</revisionDesc>
60+
</teiHeader>
61+
<text xml:lang="uk">
62+
<body>
63+
<p xml:id="PressMint-UA_1910-10-29-3C3C1DE6.ana.p1">
64+
<s xml:id="PressMint-UA_1910-10-29-3C3C1DE6.ana.p1.s1">
65+
<name type="LOC">
66+
<w lemma="петербур" msd="UPosTag=NOUN|Animacy=Inan|Case=Nom|Gender=Fem|Number=Plur" join="right">ПЕТЕРБУРГ</w>
67+
</name>
68+
<pc msd="UPosTag=PUNCT">.</pc>
69+
</s>
70+
<s xml:id="PressMint-UA_1910-10-29-3C3C1DE6.ana.p1.s2">
71+
<w lemma="в" msd="UPosTag=ADP|Case=Loc">В</w>
72+
<pc msd="UPosTag=PUNCT" join="right">„</pc>
73+
<w lemma="русский" msd="UPosTag=X|Foreign=Yes">Русскомъ</w>
74+
<name type="PER">
75+
<w lemma="Знамен" msd="UPosTag=PROPN|Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing">Знамени</w>
76+
</name>
77+
<pc msd="UPosTag=PUNCT" join="right">“</pc>
78+
<w lemma="надрукувати" msd="UPosTag=VERB|Aspect=Perf|Mood=Ind|Person=0|VerbForm=Fin">надруковано</w>
79+
<w lemma="звістка" msd="UPosTag=NOUN|Animacy=Inan|Case=Acc|Gender=Fem|Number=Sing" join="right">звістку</w>
80+
<pc msd="UPosTag=PUNCT">,</pc>
81+
<w lemma="що" msd="UPosTag=SCONJ">що</w>
82+
<pc msd="UPosTag=PUNCT" join="right">„</pc>
83+
<w lemma="Земщина" msd="UPosTag=PROPN|Animacy=Inan|Case=Nom|Gender=Fem|Number=Sing">Земщина</w>
84+
<pc msd="UPosTag=PUNCT" join="right">“</pc>
85+
<w lemma="незабаром" msd="UPosTag=ADV">незабаром</w>
86+
<w lemma="закриватися" msd="UPosTag=VERB|Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Reflex=Yes|Tense=Pres|VerbForm=Fin" join="right">закривається</w>
87+
<pc msd="UPosTag=PUNCT">.</pc>
88+
</s>
89+
</p>
90+
<gap reason="editorial">
91+
<desc xml:lang="en">SAMPLING</desc>
92+
</gap>
93+
</body>
94+
</text>
95+
</TEI>
96+
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# newdoc id = PressMint-UA_1910-10-29-3C3C1DE6.ana
2+
# newpar id = PressMint-UA_1910-10-29-3C3C1DE6.ana.p1
3+
# lang = uk
4+
# sent_id = PressMint-UA_1910-10-29-3C3C1DE6.ana.p1.s1
5+
# text = ПЕТЕРБУРГ.
6+
1 ПЕТЕРБУРГ петербур NOUN _ Animacy=Inan|Case=Nom|Gender=Fem|Number=Plur 0 _ _ NER=B-LOC|SpaceAfter=No
7+
2 . . PUNCT _ _ 0 _ _ NER=O
8+
9+
# sent_id = PressMint-UA_1910-10-29-3C3C1DE6.ana.p1.s2
10+
# text = В „Русскомъ Знамени “надруковано звістку, що „Земщина “незабаром закривається.
11+
1 В в ADP _ Case=Loc 0 _ _ NER=O
12+
2 PUNCT _ _ 0 _ _ NER=O|SpaceAfter=No
13+
3 Русскомъ русский X _ Foreign=Yes 0 _ _ NER=O
14+
4 Знамени Знамен PROPN _ Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing 0 _ _ NER=B-PER
15+
5 PUNCT _ _ 0 _ _ NER=O|SpaceAfter=No
16+
6 надруковано надрукувати VERB _ Aspect=Perf|Mood=Ind|Person=0|VerbForm=Fin 0 _ _ NER=O
17+
7 звістку звістка NOUN _ Animacy=Inan|Case=Acc|Gender=Fem|Number=Sing 0 _ _ NER=O|SpaceAfter=No
18+
8 , , PUNCT _ _ 0 _ _ NER=O
19+
9 що що SCONJ _ _ 0 _ _ NER=O
20+
10 PUNCT _ _ 0 _ _ NER=O|SpaceAfter=No
21+
11 Земщина Земщина PROPN _ Animacy=Inan|Case=Nom|Gender=Fem|Number=Sing 0 _ _ NER=O
22+
12 PUNCT _ _ 0 _ _ NER=O|SpaceAfter=No
23+
13 незабаром незабаром ADV _ _ 0 _ _ NER=O
24+
14 закривається закриватися VERB _ Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Reflex=Yes|Tense=Pres|VerbForm=Fin 0 _ _ NER=O|SpaceAfter=No
25+
15 . . PUNCT _ _ 0 _ _ NER=O
26+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
PressMint-UA_1910-10-29-3C3C1DE6.p1 ПЕТЕРБУРГ. В „Русскомъ Знамени “надруковано звістку, що „Земщина “незабаром закривається.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
<text id="PressMint-UA_1910-10-29-3C3C1DE6" lang="українська" date="1910-10-29" year="1910" newspaper="Рада" publisher="" source_url="" article="Кінець «Земщины»" issue="245">
2+
<p id="PressMint-UA_1910-10-29-3C3C1DE6.p1" lang="українська" quality="" image="">
3+
<s id="PressMint-UA_1910-10-29-3C3C1DE6.ana.p1.s1">
4+
<name type="LOC">
5+
ПЕТЕРБУРГ ПЕТЕРБУРГ петербур NOUN Animacy=Inan Case=Nom Gender=Fem Number=Plur
6+
<g/>
7+
</name>
8+
. . . PUNCT -
9+
</s>
10+
<s id="PressMint-UA_1910-10-29-3C3C1DE6.ana.p1.s2">
11+
В В в ADP Case=Loc
12+
„ „ „ PUNCT -
13+
<g/>
14+
Русскомъ Русскомъ русский X Foreign=Yes
15+
<name type="PER">
16+
Знамени Знамени Знамен PROPN Animacy=Inan Case=Loc Gender=Masc Number=Sing
17+
</name>
18+
“ “ “ PUNCT -
19+
<g/>
20+
надруковано надруковано надрукувати VERB Aspect=Perf Mood=Ind Person=0 VerbForm=Fin
21+
звістку звістку звістка NOUN Animacy=Inan Case=Acc Gender=Fem Number=Sing
22+
<g/>
23+
, , , PUNCT -
24+
що що що SCONJ -
25+
„ „ „ PUNCT -
26+
<g/>
27+
Земщина Земщина Земщина PROPN Animacy=Inan Case=Nom Gender=Fem Number=Sing
28+
“ “ “ PUNCT -
29+
<g/>
30+
незабаром незабаром незабаром ADV -
31+
закривається закривається закриватися VERB Aspect=Imp Mood=Ind Number=Sing Person=3 Reflex=Yes Tense=Pres VerbForm=Fin
32+
<g/>
33+
. . . PUNCT -
34+
</s>
35+
</p>
36+
</text>
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="PressMint-UA_1910-10-29-3C3C1DE6" xml:lang="uk">
3+
<teiHeader>
4+
<fileDesc>
5+
<titleStmt>
6+
<title xml:lang="uk">Корпус українських історичних газет PressMint-UA (PluG), «Кінець «Земщины»», Рада, №245 29.10.1910 [PressMint SAMPLE]</title>
7+
<title xml:lang="en">Ukrainian historical newspaper corpus PressMint-UA, "Кінець «Земщины»", Рада, 1910-10-29, no. 245 [PressMint SAMPLE]</title>
8+
</titleStmt>
9+
<editionStmt>
10+
<edition>1.0</edition>
11+
</editionStmt>
12+
<extent><!--These numbers do not reflect the size of the sample!-->
13+
<measure unit="paragraphs" quantity="1" xml:lang="en">1 paragraphs</measure>
14+
<measure unit="words" quantity="10" xml:lang="en">10 words</measure>
15+
</extent>
16+
<publicationStmt>
17+
<publisher>
18+
<orgName xml:lang="uk">Дослідницька інфраструктура CLARIN</orgName>
19+
<orgName xml:lang="en">CLARIN research infrastructure</orgName>
20+
<ref target="https://www.clarin.eu/">www.clarin.eu</ref>
21+
</publisher>
22+
<availability status="free">
23+
<licence>http://creativecommons.org/licenses/by/4.0/</licence>
24+
<p xml:lang="en">This work is licensed under the <ref target="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ref>.</p>
25+
</availability>
26+
<date when="2026-06-11">2026-06-11</date>
27+
</publicationStmt>
28+
<sourceDesc>
29+
<bibl>
30+
<title level="j">Рада</title>
31+
<title level="a">Кінець «Земщины»</title>
32+
<pubPlace>Київ</pubPlace>
33+
<date when="1910-10-29">№245 29.10.1910</date>
34+
<biblScope unit="issue">245</biblScope>
35+
<idno type="local" subtype="PluG-path">R/Rada_1910_245/Rada_1910_245_Kinec_Zemschiny.txt</idno>
36+
</bibl>
37+
</sourceDesc>
38+
</fileDesc>
39+
<encodingDesc>
40+
<projectDesc>
41+
<p xml:lang="en"><ref target="https://www.clarin.eu/pressmint">PressMint</ref> is a project that aims to (1) create a multilingual set of corpora of historical newspapers uniformly encoded according to the <ref target="https://clarin-eric.github.io/PressMint/">PressMint encoding guidelines</ref>; (2) add linguistic annotations to the corpora; and (3) make the corpora available through concordancers.</p>
42+
</projectDesc>
43+
<tagsDecl><!--These numbers do not reflect the size of the sample!-->
44+
<namespace name="http://www.tei-c.org/ns/1.0">
45+
<tagUsage gi="body" occurs="1"/>
46+
<tagUsage gi="p" occurs="1"/>
47+
<tagUsage gi="text" occurs="1"/>
48+
</namespace>
49+
</tagsDecl>
50+
</encodingDesc>
51+
<revisionDesc>
52+
<change when="2026-06-11">
53+
<name>Tomaž Erjavec</name>: Made sample.</change>
54+
<change when="2026-04-17">Конвертація PluG -&gt; PressMint TEI XML.</change>
55+
</revisionDesc>
56+
</teiHeader>
57+
<text xml:lang="uk">
58+
<body>
59+
<p xml:id="PressMint-UA_1910-10-29-3C3C1DE6.p1">ПЕТЕРБУРГ. В „Русскомъ Знамени “надруковано звістку, що „Земщина “незабаром закривається.</p>
60+
<gap reason="editorial">
61+
<desc xml:lang="en">SAMPLING</desc>
62+
</gap>
63+
</body>
64+
</text>
65+
</TEI>
66+
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Text_ID ID Date Newspaper Article Publisher Volume Issue URN URL Lang Quality Image
2+
PressMint-UA_1910-10-29-8330D80B PressMint-UA_1910-10-29-8330D80B.ana.p1 1910-10-29 Рада Галичина 245 Ukrainian
3+
PressMint-UA_1910-10-29-8330D80B PressMint-UA_1910-10-29-8330D80B.ana.p2 1910-10-29 Рада Галичина 245 Ukrainian
4+
PressMint-UA_1910-10-29-8330D80B PressMint-UA_1910-10-29-8330D80B.ana.p9 1910-10-29 Рада Галичина 245 Ukrainian
5+
PressMint-UA_1910-10-29-8330D80B PressMint-UA_1910-10-29-8330D80B.ana.p10 1910-10-29 Рада Галичина 245 Ukrainian

0 commit comments

Comments
 (0)