Skip to content

Commit 6cc1b47

Browse files
committed
Adding README.md and a simple python script to check the nearest words
1 parent d83de07 commit 6cc1b47

File tree

2 files changed

+79
-0
lines changed

2 files changed

+79
-0
lines changed

README.md

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# fnlp
2+
3+
This repo contains scripts to gather finance data and train NLP models using the text data.
4+
5+
## Word Vectors
6+
7+
Trained word vectors are available on the [releases](https://github.com/hardikp/fnlp/releases) page.
8+
9+
Let's check if the closest words make sense.
10+
11+
```bash
12+
$ python3 test_word_vectors.py --word IRA
13+
Roth
14+
SEP
15+
IRAs
16+
401
17+
retirement
18+
19+
$ python3 test_word_vectors.py --word option
20+
call
21+
put
22+
options
23+
exercise
24+
underlying
25+
26+
$ python3 test_word_vectors.py --word stock
27+
shares
28+
market
29+
stocks
30+
share
31+
price
32+
```

test_word_vectors.py

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from __future__ import absolute_import, division, print_function
2+
3+
from argparse import ArgumentParser
4+
from collections import Counter
5+
from sklearn.metrics.pairwise import cosine_similarity
6+
import numpy as np
7+
8+
9+
def print_nearest_words(args):
10+
# Load the word vectors
11+
embeddings_index = {}
12+
f = open(args.vectors)
13+
for line in f:
14+
values = line.split(' ')
15+
w = values[0]
16+
coefs = np.asarray(values[1:], dtype='float32')
17+
embeddings_index[w] = coefs
18+
f.close()
19+
20+
# Get the similarity scores
21+
score_dict = {}
22+
for w in embeddings_index.keys():
23+
if args.word == w:
24+
continue
25+
26+
score = cosine_similarity(embeddings_index[args.word].reshape(1, -1), embeddings_index[w].reshape(1, -1))[0][0]
27+
score_dict[w] = score
28+
29+
closest = Counter(score_dict).most_common(args.num_words)
30+
31+
for word, score in closest:
32+
if args.verbose:
33+
print(score, word)
34+
else:
35+
print(word)
36+
37+
38+
if __name__ == '__main__':
39+
parser = ArgumentParser()
40+
parser.add_argument('--vectors', default='vectors.txt', help='Word vector file')
41+
parser.add_argument('--vocab', default='vocab.txt', help='Vocab file')
42+
parser.add_argument('--word', default='dollar', help='Input word')
43+
parser.add_argument('--verbose', type=bool, default=False, help='Print score')
44+
parser.add_argument('--num_words', type=int, default=5, help='Number of closest words to print')
45+
args = parser.parse_args()
46+
47+
print_nearest_words(args)

0 commit comments

Comments
 (0)