Skip to content

Commit a01fb9b

Browse files
authored
Merge pull request #131 from pettarin/devel
Doc review and two bugs fixed
2 parents 7c58b5c + a7d500f commit a01fb9b

29 files changed

+137
-112
lines changed

README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,9 @@ for languages with good ASR models,
269269
**aeneas** offers some options to improve
270270
the quality of the alignment at word-level:
271271
272-
* multilevel text (since v1.5.1), and/or
273-
* MFCC nonspeech masking (since v1.7.0, disabled by default).
272+
* multilevel text (since v1.5.1),
273+
* MFCC nonspeech masking (since v1.7.0, disabled by default),
274+
* use better TTS engines, like Festival or AWS/Nuance TTS API (since v1.5.0).
274275
275276
If you use the ``aeneas.tools.execute_task`` command line tool,
276277
you can add ``--presets-word`` switch to enable MFCC nonspeech masking, for example:

README.rst

+4-2
Original file line numberDiff line numberDiff line change
@@ -299,8 +299,10 @@ aligners <https://github.com/pettarin/forced-alignment-tools>`__ for
299299
languages with good ASR models, **aeneas** offers some options to
300300
improve the quality of the alignment at word-level:
301301

302-
- multilevel text (since v1.5.1), and/or
303-
- MFCC nonspeech masking (since v1.7.0, disabled by default).
302+
- multilevel text (since v1.5.1),
303+
- MFCC nonspeech masking (since v1.7.0, disabled by default),
304+
- use better TTS engines, like Festival or AWS/Nuance TTS API (since
305+
v1.5.0).
304306

305307
If you use the ``aeneas.tools.execute_task`` command line tool, you can
306308
add ``--presets-word`` switch to enable MFCC nonspeech masking, for

aeneas/syncmap/format.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ class SyncMapFormat(object):
108108
* Multiple lines: no
109109
110110
Please note that the text is assumed to be contained
111-
in double quotes (``"..."``),
111+
in double quotes ("..."),
112112
which are stripped when reading from file,
113113
and added back when writing to file.
114114
@@ -128,7 +128,7 @@ class SyncMapFormat(object):
128128
* Multiple lines: no
129129
130130
Please note that the text is assumed to be contained
131-
in double quotes (``"..."``),
131+
in double quotes ("..."),
132132
which are stripped when reading from file,
133133
and added back when writing to file.
134134
@@ -180,7 +180,7 @@ class SyncMapFormat(object):
180180
* Multiple levels: no
181181
* Multiple lines: no
182182
183-
See also https://tla.mpi.nl/tla-news/documentation-of-eaf-elan-annotation-format/https://tla.mpi.nl/tla-news/documentation-of-eaf-elan-annotation-format/
183+
See also https://tla.mpi.nl/tla-news/documentation-of-eaf-elan-annotation-format/
184184
185185
.. versionadded:: 1.5.0
186186
"""
@@ -252,7 +252,7 @@ class SyncMapFormat(object):
252252
* Multiple levels: no
253253
* Multiple lines: no
254254
255-
See also https://github.com/pettarin/rb_smil_emulatorhttps://github.com/pettarin/rb_smil_emulator
255+
See also https://github.com/pettarin/rb_smil_emulator
256256
257257
Deprecated, it will be removed in v2.0.0.
258258
@@ -750,7 +750,7 @@ class = "IntervalTier"
750750
* Multiple lines: no
751751
752752
Deprecated, it will be removed in v2.0.0.
753-
Use XML instead.
753+
Use ``XML`` instead.
754754
755755
.. deprecated:: 1.2.0
756756
"""

aeneas/syncmap/fragmentlist.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -459,9 +459,18 @@ def fragments_ending_inside_nonspeech_intervals(
459459
# | *********** | nsi
460460
# | ***X | frag (X=frag.end)
461461
#
462+
# NOTE this case might happen as the following:
463+
#
464+
# *************** nsi shadow
465+
# | *** | nsi
466+
# | **X | frag (X=frag.end)
467+
#
468+
# so we must invalidate the nsi if this happens
469+
#
470+
nsi_counter[nsi_index] = (None, [])
462471
nsi_index += 1
463472
frag_index += 1
464-
self.log(u" nsi_shadow entirely contains frag => skip to next fragment, nsi")
473+
self.log(u" nsi_shadow entirely contains frag => invalidate nsi, and skip to next fragment, nsi")
465474
else:
466475
#
467476
# *************** nsi shadow

aeneas/syncmap/smfaudacity.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,16 @@
2121
# You should have received a copy of the GNU Affero General Public License
2222
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2323

24-
"""
25-
TBW
26-
"""
27-
2824
from __future__ import absolute_import
2925
from __future__ import print_function
3026

3127
from aeneas.syncmap.smfgtabular import SyncMapFormatGenericTabular
3228

3329

3430
class SyncMapFormatAudacity(SyncMapFormatGenericTabular):
31+
"""
32+
Handler for Audacity I/O format.
33+
"""
3534

3635
TAG = u"SyncMapFormatAudacity"
3736

aeneas/syncmap/smfbase.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121
# You should have received a copy of the GNU Affero General Public License
2222
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2323

24-
"""
25-
TBW
26-
"""
27-
2824
from __future__ import absolute_import
2925
from __future__ import print_function
3026

@@ -34,6 +30,9 @@
3430

3531

3632
class SyncMapFormatBase(Loggable):
33+
"""
34+
Base class for I/O handlers.
35+
"""
3736

3837
TAG = u"SyncMapFormatBase"
3938

aeneas/syncmap/smfcsv.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,16 @@
2121
# You should have received a copy of the GNU Affero General Public License
2222
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2323

24-
"""
25-
TBW
26-
"""
27-
2824
from __future__ import absolute_import
2925
from __future__ import print_function
3026

3127
from aeneas.syncmap.smfgtabular import SyncMapFormatGenericTabular
3228

3329

3430
class SyncMapFormatCSV(SyncMapFormatGenericTabular):
31+
"""
32+
Handler for comma-separated values (CSV) I/O format.
33+
"""
3534

3635
TAG = u"SyncMapFormatCSV"
3736

aeneas/syncmap/smfeaf.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121
# You should have received a copy of the GNU Affero General Public License
2222
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2323

24-
"""
25-
TBW
26-
"""
27-
2824
from __future__ import absolute_import
2925
from __future__ import print_function
3026

@@ -36,6 +32,9 @@
3632

3733

3834
class SyncMapFormatEAF(SyncMapFormatGenericXML):
35+
"""
36+
Handler for ELAN I/O format (EAF).
37+
"""
3938

4039
TAG = u"SyncMapFormatEAF"
4140

aeneas/syncmap/smfgsubtitles.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121
# You should have received a copy of the GNU Affero General Public License
2222
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2323

24-
"""
25-
TBW
26-
"""
27-
2824
from __future__ import absolute_import
2925
from __future__ import print_function
3026

@@ -33,6 +29,9 @@
3329

3430

3531
class SyncMapFormatGenericSubtitles(SyncMapFormatBase):
32+
"""
33+
Base class for subtitles-like I/O format handlers.
34+
"""
3635

3736
TAG = u"SyncMapFormatGenericSubtitles"
3837

aeneas/syncmap/smfgtabular.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121
# You should have received a copy of the GNU Affero General Public License
2222
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2323

24-
"""
25-
TBW
26-
"""
27-
2824
from __future__ import absolute_import
2925
from __future__ import print_function
3026

@@ -33,6 +29,9 @@
3329

3430

3531
class SyncMapFormatGenericTabular(SyncMapFormatBase):
32+
"""
33+
Base class for tabular-like I/O format handlers.
34+
"""
3635

3736
TAG = u"SyncMapFormatGenericTabular"
3837

aeneas/syncmap/smfgxml.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121
# You should have received a copy of the GNU Affero General Public License
2222
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2323

24-
"""
25-
TBW
26-
"""
27-
2824
from __future__ import absolute_import
2925
from __future__ import print_function
3026
from itertools import chain
@@ -34,6 +30,9 @@
3430

3531

3632
class SyncMapFormatGenericXML(SyncMapFormatBase):
33+
"""
34+
Base class for XML-like I/O format handlers.
35+
"""
3736

3837
TAG = u"SyncMapFormatGenericTabular"
3938

aeneas/syncmap/smfjson.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121
# You should have received a copy of the GNU Affero General Public License
2222
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2323

24-
"""
25-
TBW
26-
"""
27-
2824
from __future__ import absolute_import
2925
from __future__ import print_function
3026
import json
@@ -34,6 +30,9 @@
3430

3531

3632
class SyncMapFormatJSON(SyncMapFormatBase):
33+
"""
34+
Handler for JSON I/O format.
35+
"""
3736

3837
TAG = u"SyncMapFormatJSON"
3938

aeneas/syncmap/smfrbse.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121
# You should have received a copy of the GNU Affero General Public License
2222
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2323

24-
"""
25-
TBW
26-
"""
27-
2824
from __future__ import absolute_import
2925
from __future__ import print_function
3026
import json
@@ -34,6 +30,11 @@
3430

3531

3632
class SyncMapFormatRBSE(SyncMapFormatBase):
33+
"""
34+
Handler for RBSE
35+
(i.e., JSON compatible with ``rb_smil_emulator.js``)
36+
I/O format.
37+
"""
3738

3839
TAG = u"SyncMapFormatRBSE"
3940

aeneas/syncmap/smfsmil.py

+13-10
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121
# You should have received a copy of the GNU Affero General Public License
2222
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2323

24-
"""
25-
TBW
26-
"""
27-
2824
from __future__ import absolute_import
2925
from __future__ import print_function
3026

@@ -35,6 +31,9 @@
3531

3632

3733
class SyncMapFormatSMIL(SyncMapFormatGenericXML):
34+
"""
35+
Handler for SMIL for EPUB 3 I/O format.
36+
"""
3837

3938
TAG = u"SyncMapFormatSMIL"
4039

@@ -70,12 +69,16 @@ def parse(self, input_text, syncmap):
7069
if child.tag == (smil_ns + "text"):
7170
identifier = gf.safe_unicode(gf.split_url(child.get("src"))[1])
7271
elif child.tag == (smil_ns + "audio"):
73-
begin = gf.time_from_hhmmssmmm(child.get("clipBegin"))
74-
if begin is None:
75-
begin = gf.time_from_ssmmm(child.get("clipBegin"))
76-
end = gf.time_from_hhmmssmmm(child.get("clipEnd"))
77-
if end is None:
78-
end = gf.time_from_ssmmm(child.get("clipEnd"))
72+
begin_text = child.get("clipBegin")
73+
if ":" in begin_text:
74+
begin = gf.time_from_hhmmssmmm(begin_text)
75+
else:
76+
begin = gf.time_from_ssmmm(begin_text)
77+
end_text = child.get("clipEnd")
78+
if ":" in end_text:
79+
end = gf.time_from_hhmmssmmm(end_text)
80+
else:
81+
end = gf.time_from_ssmmm(end_text)
7982
# TODO read text from additional text_file?
8083
self._add_fragment(
8184
syncmap=syncmap,

aeneas/syncmap/smfsrt.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121
# You should have received a copy of the GNU Affero General Public License
2222
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2323

24-
"""
25-
TBW
26-
"""
27-
2824
from __future__ import absolute_import
2925
from __future__ import print_function
3026

@@ -33,6 +29,9 @@
3329

3430

3531
class SyncMapFormatSRT(SyncMapFormatGenericSubtitles):
32+
"""
33+
Handler for SubRip (SRT) I/O format.
34+
"""
3635

3736
TAG = u"SyncMapFormatSRT"
3837

aeneas/syncmap/smfssv.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,16 @@
2121
# You should have received a copy of the GNU Affero General Public License
2222
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2323

24-
"""
25-
TBW
26-
"""
27-
2824
from __future__ import absolute_import
2925
from __future__ import print_function
3026

3127
from aeneas.syncmap.smfgtabular import SyncMapFormatGenericTabular
3228

3329

3430
class SyncMapFormatSSV(SyncMapFormatGenericTabular):
31+
"""
32+
Handler for space-separated plain text (SSV) I/O format.
33+
"""
3534

3635
TAG = u"SyncMapFormatSSV"
3736

aeneas/syncmap/smfsub.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121
# You should have received a copy of the GNU Affero General Public License
2222
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2323

24-
"""
25-
TBW
26-
"""
27-
2824
from __future__ import absolute_import
2925
from __future__ import print_function
3026

@@ -33,6 +29,9 @@
3329

3430

3531
class SyncMapFormatSUB(SyncMapFormatGenericSubtitles):
32+
"""
33+
Handler for SubViewer (SUB) I/O format.
34+
"""
3635

3736
TAG = u"SyncMapFormatSUB"
3837

0 commit comments

Comments
 (0)