forked from brandonrobertz/sentence-autosegmentation
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfasttext_skipgram_embedding.py
More file actions
48 lines (40 loc) · 1.17 KB
/
fasttext_skipgram_embedding.py
File metadata and controls
48 lines (40 loc) · 1.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# coding: utf-8
from __future__ import print_function
import numpy as np
from keras.models import Sequential
from keras.layers import Embedding
window_size = 1
# using skipgram embeddings built using fasttext:
# fasttext skipgram -input dataset -output dataset.skipgram
with open('data/dataset.skipgram.vec', 'r') as f:
data = f.readlines()
word_vectors = {}
samples, dim = data[0].split()
for line in data[1:]:
word, vec = line.split(' ', 1)
word_vectors[word] = np.array([
float(i) for i in vec.split()
], dtype='float32')
E = np.zeros(shape=(int(samples), int(dim)), dtype='float32')
word_index = word_vectors.keys()
for ix in range(len(word_index)):
word = word_index[ix]
vec = word_vectors[word]
for j in range(int(dim)):
E[ix][j] = vec[j]
embedding = Embedding(
len(word_index),
int(dim),
weights=[E],
input_length=window_size,
trainable=False
)
model = Sequential()
model.add(embedding)
model.compile('sgd', 'mse', ['accuracy'])
pred = model.predict(np.array([[0]]))
p = pred[0][0]
a = word_vectors[word_index[0]]
print( "Predicted embedding vector", p)
print( "Actual embedding vector", a)
print( "Equal?", p == a)