Merge pull request #28 from dbc148/master

neubig · web-flow · commit e94758328367 · 2019-01-28T20:23:45.000-05:00
removed any unnecessary parameter() calls, other small fixes
diff --git a/01-intro/bow.py b/01-intro/bow.py
@@ -22,22 +22,21 @@ def read_dataset(filename):
 ntags = len(t2i)
 
 # Start DyNet and define trainer
-model = dy.Model()
+model = dy.ParameterCollection()
 trainer = dy.AdamTrainer(model)
 
 # Define the model
-W_sm = model.add_lookup_parameters((nwords, ntags)) # Word weights
-b_sm = model.add_parameters((ntags))                # Softmax bias
+W = model.add_lookup_parameters((nwords, ntags)) # Word weights
+b = model.add_parameters((ntags))                # Softmax bias
 
 # A function to calculate scores for one value
 def calc_scores(words):
   # Create a computation graph, and add parameters
   dy.renew_cg()
-  b_sm_exp = dy.parameter(b_sm)
   # Take the sum of all the embedding vectors for each word
-  score = dy.esum([dy.lookup(W_sm, x) for x in words])
+  score = dy.esum([dy.lookup(W, x) for x in words])
   # Add the bias vector and return
-  return score + b_sm_exp
+  return score + b
 
 for ITER in range(100):
   # Perform training
diff --git a/01-intro/cbow.py b/01-intro/cbow.py
@@ -22,7 +22,7 @@ def read_dataset(filename):
 ntags = len(t2i)
 
 # Start DyNet and define trainer
-model = dy.Model()
+model = dy.ParameterCollection()
 trainer = dy.AdamTrainer(model)
 
 # Define the model
@@ -35,9 +35,7 @@ def read_dataset(filename):
 def calc_scores(words):
   dy.renew_cg()
   cbow = dy.esum([dy.lookup(W_emb, x) for x in words])
-  W_sm_exp = dy.parameter(W_sm)
-  b_sm_exp = dy.parameter(b_sm)
-  return W_sm_exp * cbow + b_sm_exp
+  return W_sm * cbow + b_sm
 
 for ITER in range(100):
   # Perform training
diff --git a/01-intro/deep-cbow.py b/01-intro/deep-cbow.py
@@ -22,7 +22,7 @@ def read_dataset(filename):
 ntags = len(t2i)
 
 # Start DyNet and define trainer
-model = dy.Model()
+model = dy.ParameterCollection()
 trainer = dy.AdamTrainer(model)
 
 # Define the model
@@ -40,8 +40,8 @@ def calc_scores(words):
   dy.renew_cg()
   h = dy.esum([dy.lookup(W_emb, x) for x in words])
   for W_h_i, b_h_i in zip(W_h, b_h):
-    h = dy.tanh( dy.parameter(W_h_i) * h + dy.parameter(b_h_i) )
-  return dy.parameter(W_sm) * h + dy.parameter(b_sm)
+    h = dy.tanh( W_h_i * h + b_h_i )
+  return W_sm * h + b_sm
 
 for ITER in range(100):
   # Perform training
diff --git a/02-lm/loglin-lm.py b/02-lm/loglin-lm.py
@@ -29,7 +29,7 @@ def read_dataset(filename):
 nwords = len(w2i)
 
 # Start DyNet and define trainer
-model = dy.Model()
+model = dy.ParameterCollection()
 trainer = dy.SimpleSGDTrainer(model, learning_rate=0.1)
 
 # Define the model
@@ -39,7 +39,7 @@ def read_dataset(filename):
 # A function to calculate scores for one value
 def calc_score_of_history(words):
   # Create a list of things to sum up with only the bias vector at first
-  score_vecs = [dy.parameter(b_sm)]
+  score_vecs = [b_sm]
   for word_id, lookup_param in zip(words, W_sm): 
     score_vecs.append(lookup_param[word_id])
   return dy.esum(score_vecs)
diff --git a/02-lm/nn-lm-batch.py b/02-lm/nn-lm-batch.py
@@ -30,15 +30,15 @@ def read_dataset(filename):
 nwords = len(w2i)
 
 # Start DyNet and define trainer
-model = dy.Model()
+model = dy.ParameterCollection()
 trainer = dy.AdamTrainer(model, alpha=0.001)
 
 # Define the model
 W_emb = model.add_lookup_parameters((nwords, EMB_SIZE)) # Word weights at each position
-W_h_p = model.add_parameters((HID_SIZE, EMB_SIZE * N))    # Weights of the softmax
-b_h_p = model.add_parameters((HID_SIZE))                  # Weights of the softmax
-W_sm_p = model.add_parameters((nwords, HID_SIZE))         # Weights of the softmax
-b_sm_p = model.add_parameters((nwords))                   # Softmax bias
+W_h = model.add_parameters((HID_SIZE, EMB_SIZE * N))    # Weights of the softmax
+b_h = model.add_parameters((HID_SIZE))                  # Weights of the softmax
+W_sm = model.add_parameters((nwords, HID_SIZE))         # Weights of the softmax
+b_sm = model.add_parameters((nwords))                   # Softmax bias
 
 # A function to calculate scores for one value
 def calc_score_of_histories(words, dropout=0.0):
@@ -47,15 +47,11 @@ def calc_score_of_histories(words, dropout=0.0):
   # Lookup the embeddings and concatenate them
   emb = dy.concatenate([dy.lookup_batch(W_emb, x) for x in words])
   # Create the hidden layer
-  W_h = dy.parameter(W_h_p)
-  b_h = dy.parameter(b_h_p)
   h = dy.tanh(dy.affine_transform([b_h, W_h, emb]))
   # Perform dropout
   if dropout != 0.0:
     h = dy.dropout(h, dropout)
   # Calculate the score and return
-  W_sm = dy.parameter(W_sm_p)
-  b_sm = dy.parameter(b_sm_p)
   return dy.affine_transform([b_sm, W_sm, h])
 
 # Calculate the loss value for the entire sentence
diff --git a/02-lm/nn-lm-optim.py b/02-lm/nn-lm-optim.py
@@ -30,32 +30,28 @@ def read_dataset(filename):
 nwords = len(w2i)
 
 # Start DyNet and define trainer
-model = dy.Model()
+model = dy.ParameterCollection()
 
 # CHANGE 1: Use Adam instead of Simple SGD
 trainer = dy.AdamTrainer(model, alpha=0.001)
 
 # Define the model
 W_emb = model.add_lookup_parameters((nwords, EMB_SIZE)) # Word weights at each position
-W_h_p = model.add_parameters((HID_SIZE, EMB_SIZE * N))    # Weights of the softmax
-b_h_p = model.add_parameters((HID_SIZE))                  # Weights of the softmax
-W_sm_p = model.add_parameters((nwords, HID_SIZE))         # Weights of the softmax
-b_sm_p = model.add_parameters((nwords))                   # Softmax bias
+W_h = model.add_parameters((HID_SIZE, EMB_SIZE * N))    # Weights of the softmax
+b_h = model.add_parameters((HID_SIZE))                  # Weights of the softmax
+W_sm = model.add_parameters((nwords, HID_SIZE))         # Weights of the softmax
+b_sm = model.add_parameters((nwords))                   # Softmax bias
 
 # A function to calculate scores for one value
 def calc_score_of_history(words, dropout=0.0):
   # Lookup the embeddings and concatenate them
   emb = dy.concatenate([W_emb[x] for x in words])
   # Create the hidden layer
-  W_h = dy.parameter(W_h_p)
-  b_h = dy.parameter(b_h_p)
   h = dy.tanh(dy.affine_transform([b_h, W_h, emb]))
   # CHANGE 2: perform dropout
   if dropout != 0.0:
     h = dy.dropout(h, dropout)
   # Calculate the score and return
-  W_sm = dy.parameter(W_sm_p)
-  b_sm = dy.parameter(b_sm_p)
   return dy.affine_transform([b_sm, W_sm, h])
 
 # Calculate the loss value for the entire sentence
diff --git a/02-lm/nn-lm.py b/02-lm/nn-lm.py
@@ -30,27 +30,23 @@ def read_dataset(filename):
 nwords = len(w2i)
 
 # Start DyNet and define trainer
-model = dy.Model()
+model = dy.ParameterCollection()
 trainer = dy.SimpleSGDTrainer(model, learning_rate=0.1)
 
 # Define the model
 W_emb = model.add_lookup_parameters((nwords, EMB_SIZE)) # Word weights at each position
-W_h_p = model.add_parameters((HID_SIZE, EMB_SIZE * N))    # Weights of the softmax
-b_h_p = model.add_parameters((HID_SIZE))                  # Weights of the softmax
-W_sm_p = model.add_parameters((nwords, HID_SIZE))         # Weights of the softmax
-b_sm_p = model.add_parameters((nwords))                   # Softmax bias
+W_h = model.add_parameters((HID_SIZE, EMB_SIZE * N))    # Weights of the softmax
+b_h = model.add_parameters((HID_SIZE))                  # Weights of the softmax
+W_sm = model.add_parameters((nwords, HID_SIZE))         # Weights of the softmax
+b_sm = model.add_parameters((nwords))                   # Softmax bias
 
 # A function to calculate scores for one value
 def calc_score_of_history(words):
   # Lookup the embeddings and concatenate them
   emb = dy.concatenate([W_emb[x] for x in words])
   # Create the hidden layer
-  W_h = dy.parameter(W_h_p)
-  b_h = dy.parameter(b_h_p)
   h = dy.tanh(dy.affine_transform([b_h, W_h, emb]))
   # Calculate the score and return
-  W_sm = dy.parameter(W_sm_p)
-  b_sm = dy.parameter(b_sm_p)
   return dy.affine_transform([b_sm, W_sm, h])
 
 # Calculate the loss value for the entire sentence
diff --git a/03-wordemb/wordemb-cbow.py b/03-wordemb/wordemb-cbow.py
@@ -32,12 +32,12 @@ def read_dataset(filename):
     labels_file.write(i2w[i] + '\n')
 
 # Start DyNet and define trainer
-model = dy.Model()
+model = dy.ParameterCollection()
 trainer = dy.SimpleSGDTrainer(model, learning_rate=0.1)
 
 # Define the model
 W_c_p = model.add_lookup_parameters((nwords, EMB_SIZE)) # Word weights at each position
-W_w_p = model.add_parameters((nwords, EMB_SIZE))         # Weights of the softmax
+W_w = model.add_parameters((nwords, EMB_SIZE))         # Weights of the softmax
 
 # Calculate the loss value for the entire sentence
 def calc_sent_loss(sent):
@@ -49,8 +49,6 @@ def calc_sent_loss(sent):
   padded_sent = [S] * N + sent + [S] * N
   padded_emb = [W_c_p[x] for x in padded_sent]
 
-  W_w = dy.parameter(W_w_p)
-
   # Step through the sentence
   all_losses = [] 
   for i in range(N,len(sent)+N):
@@ -88,7 +86,7 @@ def calc_sent_loss(sent):
 
   print("saving embedding files")
   with open(embeddings_location, 'w') as embeddings_file:
-    W_w_np = W_w_p.as_array()
+    W_w_a = W_w.as_array()
     for i in range(nwords):
-      ith_embedding = '\t'.join(map(str, W_w_np[i]))
+      ith_embedding = '\t'.join(map(str, W_w_a[i]))
       embeddings_file.write(ith_embedding + '\n')
diff --git a/03-wordemb/wordemb-skip.py b/03-wordemb/wordemb-skip.py
@@ -32,12 +32,12 @@ def read_dataset(filename):
     labels_file.write(i2w[i] + '\n')
 
 # Start DyNet and define trainer
-model = dy.Model()
+model = dy.ParameterCollection()
 trainer = dy.SimpleSGDTrainer(model, learning_rate=0.1)
 
 # Define the model
 W_c_p = model.add_lookup_parameters((nwords, EMB_SIZE)) # Word weights at each position
-W_w_p = model.add_parameters((nwords, EMB_SIZE))         # Weights of the softmax
+W_w = model.add_parameters((nwords, EMB_SIZE))         # Weights of the softmax
 
 # Calculate the loss value for the entire sentence
 def calc_sent_loss(sent):
@@ -48,8 +48,6 @@ def calc_sent_loss(sent):
   #as we need to predict the eos as well, the future window at that point is N past it 
   emb = [W_c_p[x] for x in sent]
 
-  W_w = dy.parameter(W_w_p)
-
   # Step through the sentence
   all_losses = [] 
   for i, my_emb in enumerate(emb):
@@ -74,7 +72,7 @@ def calc_sent_loss(sent):
     train_words += len(sent)
     my_loss.backward()
     trainer.update()
-    if (sent_id+1) % 5000 == 0:
+    if (sent_id+1) % 500 == 0:
       print("--finished %r sentences" % (sent_id+1))
   print("iter %r: train loss/word=%.4f, ppl=%.4f, time=%.2fs" % (ITER, train_loss/train_words, math.exp(train_loss/train_words), time.time()-start))
   # Evaluate on dev set
@@ -89,7 +87,7 @@ def calc_sent_loss(sent):
 
   print("saving embedding files")
   with open(embeddings_location, 'w') as embeddings_file:
-    W_w_np = W_w_p.as_array()
+    W_w_a = W_w.as_array()
     for i in range(nwords):
-      ith_embedding = '\t'.join(map(str, W_w_np[i]))
+      ith_embedding = '\t'.join(map(str, W_w_a[i]))
       embeddings_file.write(ith_embedding + '\n')
diff --git a/04-efficiency/slow-impl.py b/04-efficiency/slow-impl.py
@@ -10,7 +10,7 @@
 # Create the model
 model = dy.ParameterCollection()
 trainer = dy.SimpleSGDTrainer(model)
-W_p = model.add_parameters((100,100))
+W = model.add_parameters((100,100))
 
 # Create the "training data"
 x_vecs = []
@@ -22,7 +22,6 @@
 # Do the processing
 for my_iter in range(1000):
   dy.renew_cg()
-  W = dy.parameter(W_p)
   total = 0
   for x in x_vecs:
     for y in y_vecs:
diff --git a/04-efficiency/wordemb-skip-binary.py b/04-efficiency/wordemb-skip-binary.py
@@ -41,19 +41,18 @@ def read_dataset(filename):
     labels_file.write(i2w[i] + '\n')
 
 # Start DyNet and define trainer
-model = dy.Model()
+model = dy.ParameterCollection()
 trainer = dy.SimpleSGDTrainer(model, learning_rate=0.1)
 
 # Define the model
 W_w_p = model.add_lookup_parameters((nwords, EMB_SIZE)) # Word weights
-W_c_p = model.add_parameters((nbits, EMB_SIZE)) # Binary prediction weights
+W_c = model.add_parameters((nbits, EMB_SIZE)) # Binary prediction weights
 
 # Calculate the loss value for the entire sentence
 def calc_sent_loss(sent):
   # Create a computation graph
   dy.renew_cg()
 
-  W_c = dy.parameter(W_c_p)
   
   # Get embeddings for the sentence
   emb = [W_w_p[x] for x in sent]
diff --git a/04-efficiency/wordemb-skip-ns.py b/04-efficiency/wordemb-skip-ns.py
@@ -50,7 +50,7 @@ def read_dataset(filename):
     labels_file.write(i2w[i] + '\n')
 
 # Start DyNet and define trainer
-model = dy.Model()
+model = dy.ParameterCollection()
 trainer = dy.SimpleSGDTrainer(model, learning_rate=0.1)
 
 # Define the model
diff --git a/05-cnn/cnn-activation.py b/05-cnn/cnn-activation.py
@@ -24,7 +24,7 @@ def read_dataset(filename):
 ntags = 5
 
 # Start DyNet and define trainer
-model = dy.Model()
+model = dy.ParameterCollection()
 trainer = dy.AdamTrainer(model)
 
 # Define the model
@@ -40,46 +40,38 @@ def read_dataset(filename):
 
 def calc_scores(wids):
     dy.renew_cg()
-    W_cnn_express = dy.parameter(W_cnn)
-    b_cnn_express = dy.parameter(b_cnn)
-    W_sm_express = dy.parameter(W_sm)
-    b_sm_express = dy.parameter(b_sm)
     if len(wids) < WIN_SIZE:
         wids += [0] * (WIN_SIZE-len(wids))
 
     cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
-    cnn_out = dy.conv2d_bias(cnn_in, W_cnn_express, b_cnn_express, stride=(1, 1), is_valid=False)
+    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
     pool_out = dy.max_dim(cnn_out, d=1)
     pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
     pool_out = dy.rectify(pool_out)
-    return W_sm_express * pool_out + b_sm_express
+    return W_sm * pool_out + b_sm
 
 def calc_predict_and_activations(wids, tag, words):
     dy.renew_cg()
-    W_cnn_express = dy.parameter(W_cnn)
-    b_cnn_express = dy.parameter(b_cnn)
-    W_sm_express = dy.parameter(W_sm)
-    b_sm_express = dy.parameter(b_sm)
     if len(wids) < WIN_SIZE:
         wids += [0] * (WIN_SIZE-len(wids))
 
     cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
-    cnn_out = dy.conv2d_bias(cnn_in, W_cnn_express, b_cnn_express, stride=(1, 1), is_valid=False)
+    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
     filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue()
     activations = filters.argmax(axis=0)
 
     pool_out = dy.max_dim(cnn_out, d=1)
     pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
     pool_out = dy.rectify(pool_out)
 
-    scores = (W_sm_express * pool_out + b_sm_express).npvalue()
+    scores = (W_sm * pool_out + b_sm).npvalue()
     print '%d ||| %s' % (tag, ' '.join(words))
     predict = np.argmax(scores)
     print display_activations(words, activations)
     print 'scores=%s, predict: %d' % (scores, predict)
     features = pool_out.npvalue()
-    W = W_sm_express.npvalue()
-    bias = b_sm_express.npvalue()
+    W = W_sm.npvalue()
+    bias = b_sm.npvalue()
     print '  bias=%s' % bias
     contributions = W * features
     print ' very bad (%.4f): %s' % (scores[0], contributions[0])
diff --git a/05-cnn/cnn-class.py b/05-cnn/cnn-class.py
diff --git a/06-rnn/lm-lstm.py b/06-rnn/lm-lstm.py
diff --git a/06-rnn/lm-minibatch.py b/06-rnn/lm-minibatch.py
diff --git a/06-rnn/sentiment-lstm.py b/06-rnn/sentiment-lstm.py
diff --git a/06-rnn/sentiment-rnn.py b/06-rnn/sentiment-rnn.py