Skip to content

fix encoding error when setup; and Parameter error due to the version… #64

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ge/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .line import LINE
from .sdne import SDNE
from .struc2vec import Struc2Vec
from .bfswalk import BFSWalk


__all__ = ["DeepWalk", "Node2Vec", "LINE", "SDNE", "Struc2Vec"]
__all__ = ["DeepWalk", "Node2Vec", "LINE", "SDNE", "Struc2Vec", "BFSWalk"]
67 changes: 67 additions & 0 deletions ge/models/alibaba-eges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# -*- coding:utf-8 -*-

"""



Author:

Chengliang Zhao, [email protected]



Reference:

[1] Jizhe Wang, Pipei Huang, Huan Zhao, Zhibo Zhang, Binqiang Zhao, and Dik Lun Lee. 2018. Billion-scale Commodity Embedding for E-commerce Recommendation in Alibaba. In <i>Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining</i> (<i>KDD '18</i>). Association for Computing Machinery, New York, NY, USA, 839–848. DOI:https://doi.org/10.1145/3219819.3219869


"""
from ..walker import RandomWalker
from gensim.models import Word2Vec
import pandas as pd
import numpy as np


class EGES:
def __init__(self, graph, walk_length, num_walks, workers=1):

self.graph = graph
self.w2v_model = None
self._embeddings = {}

self.walker = RandomWalker(
graph, p=1, q=1, )
self.sentences = self.walker.simulate_walks(
num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1)

def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs):

kwargs["sentences"] = self.sentences
kwargs["min_count"] = kwargs.get("min_count", 0)
kwargs["vector_size"] = embed_size
kwargs["sg"] = 1 # skip gram
kwargs["hs"] = 1 # deepwalk use Hierarchical Softmax
kwargs["workers"] = workers
kwargs["window"] = window_size
kwargs["epochs"] = iter

# print("Learning embedding vectors...")
# model = Word2Vec(**kwargs)
# print("Learning embedding vectors done!")

# self.w2v_model = model
# return model




def get_embeddings(self,):
if self.w2v_model is None:
print("model not train")
return {}

self._embeddings = {}
for word in self.graph.nodes():
self._embeddings[word] = self.w2v_model.wv[word]

return self._embeddings
67 changes: 67 additions & 0 deletions ge/models/bfswalk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# -*- coding:utf-8 -*-

"""



Author:

Weichen Shen,[email protected]



Reference:

[1] Perozzi B, Al-Rfou R, Skiena S. Deepwalk: Online learning of social representations[C]//Proceedings of the 20th ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2014: 701-710.(http://www.perozzi.net/publications/14_kdd_deepwalk.pdf)



"""
from ..walker import RandomWalker
from gensim.models import Word2Vec, word2vec
import pandas as pd


class BFSWalk:
def __init__(self, graph, outlier, walk_length, num_walks, workers=1, weight = False):

self.graph = graph
self.w2v_model = None
self._embeddings = {}

self.walker = RandomWalker(
graph, p=1, q=1, )
self.sentences = self.walker.simulate_walks("bfs", outlier,
num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1, weight = weight)

def train(self, walkfile, embed_size=128, window_size=5, workers=3, iter=5, sg = 1, hs=1, **kwargs):

kwargs["sentences"] = word2vec.Text8Corpus(walkfile)
kwargs["min_count"] = kwargs.get("min_count", 1)
kwargs["vector_size"] = embed_size
kwargs["sg"] = sg # skip gram
kwargs["hs"] = hs # deepwalk use Hierarchical Softmax
kwargs["workers"] = workers
kwargs["window"] = window_size
kwargs["epochs"] = iter

print("Learning embedding vectors...")
model = Word2Vec(**kwargs)
print("Learning embedding vectors done!")

self.w2v_model = model
return model

def get_embeddings(self,):
if self.w2v_model is None:
print("model not train")
return {}

self._embeddings = {}
for word in self.graph.nodes():
self._embeddings[word] = self.w2v_model.wv[word]

return self._embeddings

def get_sentences(self):
return self.sentences
25 changes: 14 additions & 11 deletions ge/models/deepwalk.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,32 @@

"""
from ..walker import RandomWalker
from gensim.models import Word2Vec
from gensim.models import Word2Vec, word2vec
import pandas as pd


class DeepWalk:
def __init__(self, graph, walk_length, num_walks, workers=1):
def __init__(self, graph, outlier, walk_length, num_walks, workers=1, weight = False):

self.graph = graph
self.w2v_model = None
self._embeddings = {}

self.walker = RandomWalker(
graph, p=1, q=1, )
self.sentences = self.walker.simulate_walks(
num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1)
self.sentences = self.walker.simulate_walks("deep", outlier,
num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1, weight = weight)

def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs):
def train(self, walkfile, embed_size=128, window_size=5, workers=3, iter=5, sg = 1, hs=1, **kwargs):

kwargs["sentences"] = self.sentences
kwargs["min_count"] = kwargs.get("min_count", 0)
kwargs["size"] = embed_size
kwargs["sg"] = 1 # skip gram
kwargs["hs"] = 1 # deepwalk use Hierarchical Softmax
kwargs["sentences"] = word2vec.Text8Corpus(walkfile)
kwargs["min_count"] = kwargs.get("min_count", 1)
kwargs["vector_size"] = embed_size
kwargs["sg"] = sg # skip gram
kwargs["hs"] = hs # deepwalk use Hierarchical Softmax
kwargs["workers"] = workers
kwargs["window"] = window_size
kwargs["iter"] = iter
kwargs["epochs"] = iter

print("Learning embedding vectors...")
model = Word2Vec(**kwargs)
Expand All @@ -62,3 +62,6 @@ def get_embeddings(self,):
self._embeddings[word] = self.w2v_model.wv[word]

return self._embeddings

def get_sentences(self):
return self.sentences
2 changes: 1 addition & 1 deletion ge/models/line.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def __init__(self, graph, embedding_size=8, negative_ratio=5, order='second',):
self.node_size = graph.number_of_nodes()
self.edge_size = graph.number_of_edges()
self.samples_per_epoch = self.edge_size*(1+negative_ratio)

self._gen_sampling_table()
self.reset_model()

Expand Down
25 changes: 14 additions & 11 deletions ge/models/node2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@

"""

from gensim.models import Word2Vec
from gensim.models import Word2Vec, word2vec
import pandas as pd

from ..walker import RandomWalker


class Node2Vec:

def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=0):
def __init__(self, graph, outlier, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=0):

self.graph = graph
self._embeddings = {}
Expand All @@ -36,19 +36,19 @@ def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_r
print("Preprocess transition probs...")
self.walker.preprocess_transition_probs()

self.sentences = self.walker.simulate_walks(
num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1)
self.sentences = self.walker.simulate_walks("node", outlier,
num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1, weight=False)

def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs):
def train(self, walkfile, embed_size=128, window_size=5, workers=3, iter=5, sg = 1, hs = 1, **kwargs):

kwargs["sentences"] = self.sentences
kwargs["min_count"] = kwargs.get("min_count", 0)
kwargs["size"] = embed_size
kwargs["sg"] = 1
kwargs["hs"] = 0 # node2vec not use Hierarchical Softmax
kwargs["sentences"] = word2vec.Text8Corpus(walkfile)
kwargs["min_count"] = kwargs.get("min_count", 1)
kwargs["vector_size"] = embed_size
kwargs["sg"] = sg
kwargs["hs"] = hs # node2vec not use Hierarchical Softmax
kwargs["workers"] = workers
kwargs["window"] = window_size
kwargs["iter"] = iter
kwargs["epochs"] = iter

print("Learning embedding vectors...")
model = Word2Vec(**kwargs)
Expand All @@ -68,3 +68,6 @@ def get_embeddings(self,):
self._embeddings[word] = self.w2v_model.wv[word]

return self._embeddings

def get_sentences(self):
return self.sentences
4 changes: 2 additions & 2 deletions ge/models/struc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ def train(self, embed_size=128, window_size=5, workers=3, iter=5):
sentences = self.sentences

print("Learning representation...")
model = Word2Vec(sentences, size=embed_size, window=window_size, min_count=0, hs=1, sg=1, workers=workers,
iter=iter)
model = Word2Vec(sentences, vector_size=embed_size, window=window_size, min_count=0, hs=1, sg=1, workers=workers,
epochs=iter)
print("Learning representation done!")
self.w2v_model = model

Expand Down
Loading