diff --git a/README.md b/README.md index 1c54594..934011b 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ embeddings = model.get_embeddings()# get embedding vectors ```python G = nx.read_edgelist('../data/flight/brazil-airports.edgelist',create_using=nx.DiGraph(),nodetype=None,data=[('weight',int)])#read graph -model = model = Struc2Vec(G, 10, 80, workers=4, verbose=40, ) #init model +model = Struc2Vec(G, 10, 80, workers=4, verbose=40, ) #init model model.train(window_size = 5, iter = 3)# train model embeddings = model.get_embeddings()# get embedding vectors ``` diff --git a/ge/classify.py b/ge/classify.py index eb2bc67..6381284 100644 --- a/ge/classify.py +++ b/ge/classify.py @@ -5,6 +5,7 @@ from sklearn.metrics import f1_score, accuracy_score from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import MultiLabelBinarizer +import pandas as pd class TopKRanker(OneVsRestClassifier): @@ -69,17 +70,9 @@ def split_train_evaluate(self, X, Y, train_precent, seed=0): def read_node_label(filename, skip_head=False): - fin = open(filename, 'r') - X = [] - Y = [] - while 1: - if skip_head: - fin.readline() - l = fin.readline() - if l == '': - break - vec = l.strip().split(' ') - X.append(vec[0]) - Y.append(vec[1:]) - fin.close() + fin = pd.read_csv(filename) + cols=fin.columns + X = fin[cols[0]] + Y = fin[cols[1]] + return X, Y