delta2323 · delta2323 · Jun 22, 2016 · Jun 22, 2016 · Jun 22, 2016 · Jun 22, 2016
diff --git a/deepmark_chainer/net/big_lstm.py b/deepmark_chainer/net/big_lstm.py
@@ -16,19 +16,19 @@ class BigLSTM(link.Chain):
 
     """
 
-    def __init__(self, vocab_size=10, rnn_unit='LSTM'):
-        embed = embed_id.EmbedID(vocab_size, 10)
+    def __init__(self, vocab_size=793471, embed_dim=8192, rnn_unit='LSTM'):
+        embed = embed_id.EmbedID(vocab_size, embed_dim)
         if rnn_unit == 'LSTM':
-            rnns = link.ChainList(lstm.LSTM(10, 20),
-                                  lstm.LSTM(20, 20))
+            rnns = link.ChainList(lstm.LSTM(embed_dim, 8192),
+                                  lstm.LSTM(8192, 1024))
         elif rnn_unit == 'GRU':
-            rnns = link.ChainList(gru.StatefulGRU(20, 10),
-                                  gru.StatefulGRU(20, 20))
+            rnns = link.ChainList(gru.StatefulGRU(8192, embed_dim),
+                                  gru.StatefulGRU(1024, 8192))
         else:
             raise ValueError('Invalid RNN unit:{}'.format(rnn_unit))
 
-        linears = link.ChainList(linear.Linear(20, 10),
-                                 linear.Linear(10, vocab_size))
+        linears = link.ChainList(linear.Linear(1024, 8192),
+                                 linear.Linear(8192, 1024))
         super(BigLSTM, self).__init__(embed=embed, rnns=rnns,
                                       linears=linears)
         self.train = True

diff --git a/evaluate/train_text.py b/evaluate/train_text.py
@@ -21,7 +21,7 @@
                     help='Random seed')
 parser.add_argument('--iteration', '-i', type=int, default=10,
                     help='The number of iteration to be averaged over.')
-parser.add_argument('--seq-length', '-t', type=int, default=200,
+parser.add_argument('--seq-length', '-t', type=int, default=20,
                     help='Sequence length')
 parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU to use. Negative value to use CPU')
 parser.add_argument('--cudnn', '-c', action='store_true', help='If this flag is set, cuDNN is enabled.')
@@ -37,19 +37,20 @@
                     'This iteration is not included in the mean elapsed time.'
                     'If we do not use GPU, we do not clear cache at all regardless of the value of '
                     'this option.')
+parser.add_argument('--vocab-size', type=int, default=10,
+                    help='Number of vocablaries.')
 parser.add_argument('--batchsize', '-b', type=int, default=50, help='Batchsize')
 args = parser.parse_args()
 
 numpy.random.seed(args.seed)
 if args.gpu >= 0:
     cuda.cupy.random.seed(args.seed)
 
-vocab_size = 10
 
 if args.predictor == 'small-lstm':
-    predictor = net.small_lstm.SmallLSTM(vocab_size)
+    predictor = net.small_lstm.SmallLSTM(args.vocab_size)
 elif args.predictor == 'big-lstm':
-    predictor = net.big_lstm.BigLSTM(vocab_size)
+    predictor = net.big_lstm.BigLSTM(args.vocab_size)
 else:
     raise ValueError('Invalid architector:{}'.format(args.predictor))
 model = L.Classifier(predictor)
@@ -74,11 +75,11 @@
         cache.clear_cache(args.cache_level)
 
     # data generation
-    data = numpy.random.randint(0, vocab_size,
+    data = numpy.random.randint(0, args.vocab_size,
                                 (args.batchsize, args.seq_length)
                                 ).astype(numpy.int32)
     data = chainer.Variable(xp.asarray(data))
-    label = numpy.random.randint(0, vocab_size,
+    label = numpy.random.randint(0, args.vocab_size,
                                  (args.batchsize, args.seq_length)
                                  ).astype(numpy.int32)
     label = chainer.Variable(xp.asarray(label))

diff --git a/tests/net_tests/test_big_lstm.py b/tests/net_tests/test_big_lstm.py
@@ -13,8 +13,8 @@
 class TestBigLSTM(unittest.TestCase):
 
     def setUp(self):
-        self.x = numpy.random.randint(0, 10, (10, 20)).astype(numpy.int32)
-        self.l = big_lstm.BigLSTM()
+        self.x = numpy.random.randint(0, 10, (1, 20)).astype(numpy.int32)
+        self.l = big_lstm.BigLSTM(10, 10)
 
     def check_forward(self, xp):
         x = chainer.Variable(xp.asarray(self.x))