Skip to content
This repository was archived by the owner on Dec 13, 2021. It is now read-only.

Commit 9d6f84d

Browse files
committed
Rebased with JOSHUA-252 and resolved merge conflicts
2 parents 1586853 + 02f3ef1 commit 9d6f84d

File tree

669 files changed

+2280
-27642
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

669 files changed

+2280
-27642
lines changed

.gitmodules

-7
Original file line numberDiff line numberDiff line change
@@ -1,7 +0,0 @@
1-
[submodule "berkeleylm"]
2-
path = ext/berkeleylm
3-
url = https://github.com/joshua-decoder/berkeleylm.git
4-
[submodule "ext/kenlm"]
5-
path = ext/kenlm
6-
url = https://github.com/kpu/kenlm.git
7-
branch = 56fdb5c44fca34d5a2e07d96139c28fb163983c5

bin/bleu

+15
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,20 @@
11
#!/usr/bin/env bash
22

3+
# Licensed to the Apache Software Foundation (ASF) under one or more
4+
# contributor license agreements. See the NOTICE file distributed with
5+
# this work for additional information regarding copyright ownership.
6+
# The ASF licenses this file to You under the Apache License, Version 2.0
7+
# (the "License"); you may not use this file except in compliance with
8+
# the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
318
if [[ -z $2 ]]; then
419
echo "Usage: bleu output reference"
520
exit 1

bin/extract-1best

+15
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,18 @@
11
#!/bin/bash
22

3+
# Licensed to the Apache Software Foundation (ASF) under one or more
4+
# contributor license agreements. See the NOTICE file distributed with
5+
# this work for additional information regarding copyright ownership.
6+
# The ASF licenses this file to You under the Apache License, Version 2.0
7+
# (the "License"); you may not use this file except in compliance with
8+
# the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
318
java -Xmx500m -cp $JOSHUA/class -Dfile.encoding=utf8 joshua.util.ExtractTopCand $1 - $2

bin/joshua-decoder

+16
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,20 @@
11
#!/bin/bash
2+
3+
# Licensed to the Apache Software Foundation (ASF) under one or more
4+
# contributor license agreements. See the NOTICE file distributed with
5+
# this work for additional information regarding copyright ownership.
6+
# The ASF licenses this file to You under the Apache License, Version 2.0
7+
# (the "License"); you may not use this file except in compliance with
8+
# the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
218
#
319
# Joshua decoder invocation script.
420
#

bin/meteor

+15
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,20 @@
11
#!/usr/bin/env bash
22

3+
# Licensed to the Apache Software Foundation (ASF) under one or more
4+
# contributor license agreements. See the NOTICE file distributed with
5+
# this work for additional information regarding copyright ownership.
6+
# The ASF licenses this file to You under the Apache License, Version 2.0
7+
# (the "License"); you may not use this file except in compliance with
8+
# the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
318
if [[ -z $3 ]]; then
419
echo "Usage: meteor output reference lang"
520
exit 1
File renamed without changes.

examples/README.md

+37-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Examples
1+
# Joshua Examples
22

33
The examples in this directory demonstrate how to exercise different
44
Joshua features. If you have any comments or questions please submit
@@ -10,4 +10,39 @@ Bugs or source code issues should be logged in our
1010
The decoding examples and model training examples in the subdirectories of this
1111
directory assume you have downloaded the Fisher Spanish--English dataset, which
1212
contains speech-recognizer output paired with English translations. This data
13-
can be downloaded by running the [download.sh](https://github.com/apache/incubator-joshua/blob/master/examples/download.sh) script.
13+
can be downloaded by running the [download.sh](https://github.com/apache/incubator-joshua/blob/master/src/examples/resources/download.sh) script.
14+
15+
# Building a Spanish --> English Translation Model using the Fisher Spanish CALLHOME corpus
16+
17+
An example of how to build a model using the Fisher Spanish CALLHOME corpus
18+
19+
A) Download the corpus:
20+
1) mkdir $HOME/git
21+
2) cd $HOME/git
22+
3) curl -o fisher-callhome-corpus.zip https://codeload.github.com/joshua-decoder/fisher-callhome-corpus/legacy.zip/master
23+
4) unzip fisher-callhome-corpus.zip
24+
5) # Set environment variable SPANISH=$HOME/git/fisher-callhome-corpus
25+
5) mv joshua-decoder-*/ fisher-callhome-corpus
26+
27+
B) Download and install Joshua:
28+
1) cd /directory/to/install/
29+
2) git clone https://github.com/apache/incubator-joshua.git
30+
3) cd incubator-joshua
31+
4) # Set environment variable JAVA_HOME=/path/to/java # Try $(readlink -f /usr/bin/javac | sed "s:/bin/javac::")
32+
5) # Set environment variable JOSHUA=/directory/to/install/joshua
33+
6) mvn install
34+
35+
C) Train the model:
36+
1) mkdir -p $HOME/expts/joshua && cd $HOME/expts/joshua
37+
2) $JOSHUA/bin/pipeline.pl \
38+
--rundir 1 \
39+
--readme "Baseline Hiero run" \
40+
--source es \
41+
--target en \
42+
--lm-gen srilm \
43+
--witten-bell \
44+
--corpus $SPANISH/corpus/asr/callhome_train \
45+
--corpus $SPANISH/corpus/asr/fisher_train \
46+
--tune $SPANISH/corpus/asr/fisher_dev \
47+
--test $SPANISH/corpus/asr/callhome_devtest \
48+
--lm-order 3

examples/README.sp_to_en

-32
This file was deleted.

ext/berkeleylm

-1
This file was deleted.

ext/giza-pp/GIZA++-v2/ATables.cpp

-119
This file was deleted.

0 commit comments

Comments
 (0)