-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathaddTerminalTimeInfo.pl
More file actions
executable file
·155 lines (131 loc) · 5.79 KB
/
addTerminalTimeInfo.pl
File metadata and controls
executable file
·155 lines (131 loc) · 5.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/perl
use lib $ENV{TDT};
require "format.pl";
require "resources.pl";
my $version = "v.43";
printVersionHeader("AddTerminalTimeInfo $version");
if ($help) { printHelp("addTerminalTimeInfo"); }
elsif ($corpus eq "" || $num_newfactors) { printAbort(); }
else {
my %cases = parseFactorHash();
foreach $factor (@factornames) {
# ID of variable in the output file (i.e. column)
my $fid = getFactorID($factor, %cases);
print "Factor ID for factor '$factor': $fid\n\n";
######################
# creating new factors
######################
my $factorDur = $factor . "_duration";
%cases = createFactor($factorDur, %cases);
my $fidDur = getFactorID($factorDur, %cases);
my $factorSyl = $factor . "_syllables";
%cases = createFactor($factorSyl, %cases);
my $fidSyl = getFactorID($factorSyl, %cases);
my $factorPauseB = $factor . "_precedingPause";
%cases = createFactor($factorPauseB, %cases);
my $fidPauseB = getFactorID($factorPauseB, %cases);
my $factorPauseA = $factor . "_followingPause";
%cases = createFactor($factorPauseA, %cases);
my $fidPauseA = getFactorID($factorPauseA, %cases);
my $factorSpIndex = $factor . "_spWindow";
%cases = createFactor($factorSpIndex, %cases);
my $fidSpIndex = getFactorID($factorSpIndex, %cases);
my $factorSpSylIndex = $factor . "_spWindowSyllablePosition";
%cases = createFactor($factorSpSylIndex, %cases);
my $fidSpSylIndex = getFactorID($factorSpSylIndex, %cases);
my $factorSpSyl = $factor . "_spWindowSyllables";
%cases = createFactor($factorSpSyl, %cases);
my $fidSpSyl = getFactorID($factorSpSyl, %cases);
my $factorSpDur = $factor . "_spWindowSyllableDuration";
%cases = createFactor($factorSpDur, %cases);
my $fidSpDur = getFactorID($factorSpDur, %cases);
my $factorSpTotal = $factor . "_spWindowTotalDuration";
%cases = createFactor($factorSpTotal, %cases);
my $fidSpTotal = getFactorID($factorSpTotal, %cases);
my $factorBreak = $factor . "_BreakIndex";
%cases = createFactor($factorBreak, %cases);
my $fidBreak = getFactorID($factorBreak, %cases);
my $factorPhraseTone = $factor . "_PhraseTone";
%cases = createFactor($factorPhraseTone, %cases);
my $fidPhraseTone = getFactorID($factorPhraseTone, %cases);
my $factorBoundaryTone = $factor . "_BoundaryTone";
%cases = createFactor($factorBoundaryTone, %cases);
my $fidBoundaryTone = getFactorID($factorBoundaryTone, %cases);
my $factorAccentStrength = $factor . "_AccentStrength";
%cases = createFactor($factorAccentStrength, %cases);
my $fidAccentStrength = getFactorID($factorAccentStrength, %cases);
my $factorAccentType = $factor . "_AccentType";
%cases = createFactor($factorAccentType, %cases);
my $fidAccentType = getFactorID($factorAccentType, %cases);
# can run out memory here!
# switch this back on to load all time info at once (costs memory)
# my %words = getWordTimeInfoLines();
my $TAG_dur = "phonwordDuration";
my $TAG_syl = "phonwordSyllables";
my $TAG_precPause = "phonwordPrecedingPause";
my $TAG_follPause = "phonwordFollowingPause";
my $TAG_spIndex = "spWindow";
my $TAG_spSylIndex = "spWindowSyllablePosition";
my $TAG_spSylDur = "spWindowSyllableDuration";
my $TAG_spSyl= "spWindowSyllables";
my $TAG_spTotalDur = "spWindowTotalDuration";
my $TAG_breakIndex = "prosPhraseIndex";
my $TAG_phraseTone = "prosPhraseTone";
my $TAG_boundaryTone = "prosPhraseBoundaryTone";
my $TAG_accentStrength = "prosAccentStrength";
my $TAG_accentType = "prosAccentType";
my $missing = 0;
my $empty = 0;
my $illegals = 0;
my %words;
my $oldconversation = 0;
foreach $id (sort sortTGrep2ID keys %cases) {
if ($cases{$id}[$fid] eq $factor) { next; }
elsif ($cases{$id}[$fid] =~ /((sw\d+)_s\d+_\d+)/) {
my $xmlid = $1;
my $conversation = $2;
if ($oldconversation ne $conversation) { %words = getWordTimeInfo($conversation); }
$oldconversation = $conversation;
if ($words{$xmlid} =~ /$TAG_dur=\"([^\"]+)\".+$TAG_syl=\"([^\"]+)\".+$TAG_precPause=\"([^\"]+)\".+$TAG_follPause=\"([^\"]+)\"/) {
$cases{$id}[$fidDur] = $1;
$cases{$id}[$fidSyl] = $2;
$cases{$id}[$fidPauseB] = $3;
$cases{$id}[$fidPauseA] = $4;
if ($words{$xmlid} =~ /$TAG_spIndex=\"([^\"]+)\".+$TAG_spSylIndex=\"([^\"]+)\".+$TAG_spSyl=\"([^\"]+)\".+$TAG_spSylDur=\"([^\"]+)\".+$TAG_spTotalDur=\"([^\"]+)\"/) {
$cases{$id}[$fidSpIndex] = $1;
$cases{$id}[$fidSpSylIndex] = $2;
$cases{$id}[$fidSpSyl] = $3;
$cases{$id}[$fidSpDur] = $4;
$cases{$id}[$fidSpTotal] = $5;
}
if ($words{$xmlid} =~ /$TAG_breakIndex=\"([^\"]+)\"/) { $cases{$id}[$fidBreak] = $1; }
if ($words{$xmlid} =~ /$TAG_phraseTone=\"([^\"]+)\"/) { $cases{$id}[$fidPhraseTone] = $1; }
if ($words{$xmlid} =~ /$TAG_boundaryTone=\"([^\"]+)\"/) { $cases{$id}[$fidBoundaryTone] =$1; }
if ($words{$xmlid} =~ /$TAG_accentStrength=\"([^\"]+)\"/) { $cases{$id}[$fidAccentStrength] =$1; }
if ($words{$xmlid} =~ /$TAG_accentType=\"([^\"]+)\"/) { $cases{$id}[$fidAccentType] =$1; }
}
else {
if ($warnings) { warn formatWarning("No duration information found for xml ID $xmlid"); }
$missing++;
}
}
else {
if ($cases{$id}[$fid] eq "") {
$empty++;
if ($warning) { warn "Factor $factor for $id has empty xml ID value.\n"; }
}
else {
print "Factor $factor for $id is not in right format: $cases{$id}[$fid]\n";
$illegals++;
}
}
}
if ($missing > 0) { printLine("No duration information found for ${missing} cases (see -w for more detail)."); }
print "NB: Due to annotation inconsistencies in the current XML version, there may be missing speechrate and prosodic information.\n";
if ($empty > 0) { printLine("There were $empty cells with empty xml ID values."); }
if ($illegals > 0) { printLine("There were $illegals cells with invalid xml IDs."); }
printLine();
}
writeFactorHash(%cases);
}
printFooter();