Skip to content

fix(interactive): fix ldbc data importing in Groot #4583

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 50 additions & 50 deletions interactive_engine/groot-server/src/main/resources/import_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,11 @@ def create_ldbc_graph_schema(graph):
schema.add_vertex_label('PERSON').add_primary_key('id', 'long').add_property('firstName', 'str').add_property('lastName', 'str').add_property('gender', 'str').add_property('birthday', 'long').add_property('creationDate', 'long').add_property('locationIP', 'str').add_property('browserUsed', 'str').add_property('language', 'str').add_property('email', 'str')
schema.add_vertex_label('COMMENT').add_primary_key('id', 'long').add_property('creationDate', 'long').add_property('locationIP', 'str').add_property('browserUsed', 'str').add_property('content', 'str').add_property('length','int')
schema.add_vertex_label('POST').add_primary_key('id', 'long').add_property('imageFile', 'str').add_property('creationDate', 'long').add_property('locationIP', 'str').add_property('browserUsed', 'str').add_property('language', 'str').add_property('content', 'str').add_property('length', 'int')
schema.add_vertex_label('FORUM').add_primary_key('id', 'long').add_property('title', 'str').add_property('creationDate', 'str')
schema.add_vertex_label('FORUM').add_primary_key('id', 'long').add_property('title', 'str').add_property('creationDate', 'long')
schema.add_vertex_label('ORGANISATION').add_primary_key('id', 'long').add_property('type', 'str').add_property('name', 'str').add_property('url', 'str')
schema.add_vertex_label('TAGCLASS').add_primary_key('id', 'long').add_property('name', 'str').add_property('url', 'str')
schema.add_vertex_label('TAG').add_primary_key('id', 'long').add_property('name', 'str').add_property('url', 'str')
schema.add_edge_label('HASCREATOR').source('COMMENT').destination('PERSON').source('POST').destination('PERSON')
schema.add_edge_label('HASCREATOR').source('COMMENT').destination('PERSON').source('POST').destination('PERSON').add_property('creationDate','long')
schema.add_edge_label('HASTAG').source('COMMENT').destination('TAG').source('POST').destination('TAG').source('FORUM').destination('TAG')
schema.add_edge_label('ISLOCATEDIN').source('COMMENT').destination('PLACE').source('POST').destination('PLACE').source('PERSON').destination('PLACE').source('ORGANISATION').destination('PLACE')
schema.add_edge_label('REPLYOF').source('COMMENT').destination('COMMENT').source('COMMENT').destination('POST')
Expand All @@ -106,8 +106,8 @@ def create_ldbc_graph_schema(graph):
schema.add_edge_label('HASINTEREST').source('PERSON').destination('TAG')
schema.add_edge_label('KNOWS').source('PERSON').destination('PERSON').add_property('creationDate','long')
schema.add_edge_label('LIKES').source('PERSON').destination('COMMENT').source('PERSON').destination('POST').add_property('creationDate','long')
schema.add_edge_label('STUDYAT').source('PERSON').destination('ORGANISATION').add_property('classYear','long')
schema.add_edge_label('WORKAT').source('PERSON').destination('ORGANISATION').add_property('workFrom','long')
schema.add_edge_label('STUDYAT').source('PERSON').destination('ORGANISATION').add_property('classYear','int')
schema.add_edge_label('WORKAT').source('PERSON').destination('ORGANISATION').add_property('workFrom','int')
schema.add_edge_label('ISPARTOF').source('PLACE').destination('PLACE')
schema.add_edge_label('ISSUBCLASSOF').source('TAGCLASS').destination('TAGCLASS')
schema.add_edge_label('HASTYPE').source('TAG').destination('TAGCLASS')
Expand All @@ -127,10 +127,10 @@ def create_movie_graph_schema(graph):
schema.update()

def load_data_of_modern_graph(conn, graph, prefix):
person = pd.read_csv(os.path.join(prefix, "person.csv"), sep="|")
software = pd.read_csv(os.path.join(prefix, "software.csv"), sep="|")
knows = pd.read_csv(os.path.join(prefix, "knows.csv"), sep="|")
created = pd.read_csv(os.path.join(prefix, "created.csv"), sep="|")
person = pd.read_csv(os.path.join(prefix, "person.csv"), sep="|", na_filter=False)
software = pd.read_csv(os.path.join(prefix, "software.csv"), sep="|", na_filter=False)
knows = pd.read_csv(os.path.join(prefix, "knows.csv"), sep="|", na_filter=False)
created = pd.read_csv(os.path.join(prefix, "created.csv"), sep="|", na_filter=False)
vertices = []
vertices.extend(
[
Expand Down Expand Up @@ -279,37 +279,37 @@ def prepare_edges(edges, data, edge_type, source_type, destination_type, propert


def load_data_of_ldbc_graph(conn, graph, prefix):
place = pd.read_csv(os.path.join(prefix, "place_0_0.csv"), sep="|")
person = pd.read_csv(os.path.join(prefix, "person_0_0.csv"), sep="|")
comment = pd.read_csv(os.path.join(prefix, "comment_0_0.csv"), sep="|")
post = pd.read_csv(os.path.join(prefix, "post_0_0.csv"), sep="|")
forum = pd.read_csv(os.path.join(prefix, "forum_0_0.csv"), sep="|")
organisation = pd.read_csv(os.path.join(prefix, "organisation_0_0.csv"), sep="|")
tagclass = pd.read_csv(os.path.join(prefix, "tagclass_0_0.csv"), sep="|")
tag = pd.read_csv(os.path.join(prefix, "tag_0_0.csv"), sep="|")
comment_hascreator = pd.read_csv(os.path.join(prefix, "comment_hasCreator_person_0_0.csv"), sep="|")
post_hascreator = pd.read_csv(os.path.join(prefix, "post_hasCreator_person_0_0.csv"), sep="|")
comment_hastag = pd.read_csv(os.path.join(prefix, "comment_hasTag_tag_0_0.csv"), sep="|")
post_hastag = pd.read_csv(os.path.join(prefix, "post_hasTag_tag_0_0.csv"), sep="|")
forum_hastag = pd.read_csv(os.path.join(prefix, "forum_hasTag_tag_0_0.csv"), sep="|")
comment_islocatedin = pd.read_csv(os.path.join(prefix, "comment_isLocatedIn_place_0_0.csv"), sep="|")
post_islocatedin = pd.read_csv(os.path.join(prefix, "post_isLocatedIn_place_0_0.csv"), sep="|")
person_islocatedin = pd.read_csv(os.path.join(prefix, "person_isLocatedIn_place_0_0.csv"), sep="|")
organisation_islocatedin = pd.read_csv(os.path.join(prefix, "organisation_isLocatedIn_place_0_0.csv"), sep="|")
comment_replyof_comment = pd.read_csv(os.path.join(prefix, "comment_replyOf_comment_0_0.csv"), sep="|")
comment_replyof_post = pd.read_csv(os.path.join(prefix, "comment_replyOf_post_0_0.csv"), sep="|")
forum_containerof_post = pd.read_csv(os.path.join(prefix, "forum_containerOf_post_0_0.csv"), sep="|")
forum_hasmember_person = pd.read_csv(os.path.join(prefix, "forum_hasMember_person_0_0.csv"), sep="|")
forum_hasmoderator_person = pd.read_csv(os.path.join(prefix, "forum_hasModerator_person_0_0.csv"), sep="|")
person_hasinterest_tag = pd.read_csv(os.path.join(prefix, "person_hasInterest_tag_0_0.csv"), sep="|")
person_knows_person = pd.read_csv(os.path.join(prefix, "person_knows_person_0_0.csv"), sep="|")
person_likes_comment = pd.read_csv(os.path.join(prefix, "person_likes_comment_0_0.csv"), sep="|")
person_likes_post = pd.read_csv(os.path.join(prefix, "person_likes_post_0_0.csv"), sep="|")
person_studyat_organisation = pd.read_csv(os.path.join(prefix, "person_studyAt_organisation_0_0.csv"), sep="|")
person_workat_organisation = pd.read_csv(os.path.join(prefix, "person_workAt_organisation_0_0.csv"), sep="|")
place_ispartof_place = pd.read_csv(os.path.join(prefix, "place_isPartOf_place_0_0.csv"), sep="|")
tagclass_isSubclassOf_tagclass = pd.read_csv(os.path.join(prefix, "tagclass_isSubclassOf_tagclass_0_0.csv"), sep="|")
tag_hastype_tagclass = pd.read_csv(os.path.join(prefix, "tag_hasType_tagclass_0_0.csv"), sep="|")
place = pd.read_csv(os.path.join(prefix, "place_0_0.csv"), sep="|", na_filter=False)
person = pd.read_csv(os.path.join(prefix, "person_0_0.csv"), sep="|", na_filter=False)
comment = pd.read_csv(os.path.join(prefix, "comment_0_0.csv"), sep="|", na_filter=False)
post = pd.read_csv(os.path.join(prefix, "post_0_0.csv"), sep="|", na_filter=False)
forum = pd.read_csv(os.path.join(prefix, "forum_0_0.csv"), sep="|", na_filter=False)
organisation = pd.read_csv(os.path.join(prefix, "organisation_0_0.csv"), sep="|", na_filter=False)
tagclass = pd.read_csv(os.path.join(prefix, "tagclass_0_0.csv"), sep="|", na_filter=False)
tag = pd.read_csv(os.path.join(prefix, "tag_0_0.csv"), sep="|", na_filter=False)
comment_hascreator = pd.read_csv(os.path.join(prefix, "comment_hasCreator_person_0_0.csv"), sep="|", na_filter=False)
post_hascreator = pd.read_csv(os.path.join(prefix, "post_hasCreator_person_0_0.csv"), sep="|", na_filter=False)
comment_hastag = pd.read_csv(os.path.join(prefix, "comment_hasTag_tag_0_0.csv"), sep="|", na_filter=False)
post_hastag = pd.read_csv(os.path.join(prefix, "post_hasTag_tag_0_0.csv"), sep="|", na_filter=False)
forum_hastag = pd.read_csv(os.path.join(prefix, "forum_hasTag_tag_0_0.csv"), sep="|", na_filter=False)
comment_islocatedin = pd.read_csv(os.path.join(prefix, "comment_isLocatedIn_place_0_0.csv"), sep="|", na_filter=False)
post_islocatedin = pd.read_csv(os.path.join(prefix, "post_isLocatedIn_place_0_0.csv"), sep="|", na_filter=False)
person_islocatedin = pd.read_csv(os.path.join(prefix, "person_isLocatedIn_place_0_0.csv"), sep="|", na_filter=False)
organisation_islocatedin = pd.read_csv(os.path.join(prefix, "organisation_isLocatedIn_place_0_0.csv"), sep="|", na_filter=False)
comment_replyof_comment = pd.read_csv(os.path.join(prefix, "comment_replyOf_comment_0_0.csv"), sep="|", na_filter=False)
comment_replyof_post = pd.read_csv(os.path.join(prefix, "comment_replyOf_post_0_0.csv"), sep="|", na_filter=False)
forum_containerof_post = pd.read_csv(os.path.join(prefix, "forum_containerOf_post_0_0.csv"), sep="|", na_filter=False)
forum_hasmember_person = pd.read_csv(os.path.join(prefix, "forum_hasMember_person_0_0.csv"), sep="|", na_filter=False)
forum_hasmoderator_person = pd.read_csv(os.path.join(prefix, "forum_hasModerator_person_0_0.csv"), sep="|", na_filter=False)
person_hasinterest_tag = pd.read_csv(os.path.join(prefix, "person_hasInterest_tag_0_0.csv"), sep="|", na_filter=False)
person_knows_person = pd.read_csv(os.path.join(prefix, "person_knows_person_0_0.csv"), sep="|", na_filter=False)
person_likes_comment = pd.read_csv(os.path.join(prefix, "person_likes_comment_0_0.csv"), sep="|", na_filter=False)
person_likes_post = pd.read_csv(os.path.join(prefix, "person_likes_post_0_0.csv"), sep="|", na_filter=False)
person_studyat_organisation = pd.read_csv(os.path.join(prefix, "person_studyAt_organisation_0_0.csv"), sep="|", na_filter=False)
person_workat_organisation = pd.read_csv(os.path.join(prefix, "person_workAt_organisation_0_0.csv"), sep="|", na_filter=False)
place_ispartof_place = pd.read_csv(os.path.join(prefix, "place_isPartOf_place_0_0.csv"), sep="|", na_filter=False)
tagclass_isSubclassOf_tagclass = pd.read_csv(os.path.join(prefix, "tagclass_isSubclassOf_tagclass_0_0.csv"), sep="|", na_filter=False)
tag_hastype_tagclass = pd.read_csv(os.path.join(prefix, "tag_hasType_tagclass_0_0.csv"), sep="|", na_filter=False)
vertices = []
prepare_vertices(vertices, place, "PLACE", ["name", "url", "type"])
prepare_vertices(vertices, person, "PERSON", ["firstName", "lastName", "gender", "birthday", "creationDate", "locationIP", "browserUsed", "language", "email"])
Expand All @@ -320,8 +320,8 @@ def load_data_of_ldbc_graph(conn, graph, prefix):
prepare_vertices(vertices, tagclass, "TAGCLASS", ["name", "url"])
prepare_vertices(vertices, tag, "TAG", ["name", "url"])
edges = []
prepare_edges(edges, comment_hascreator, "HASCREATOR", "COMMENT", "PERSON", [])
prepare_edges(edges, post_hascreator, "HASCREATOR", "POST", "PERSON", [])
prepare_edges(edges, comment_hascreator, "HASCREATOR", "COMMENT", "PERSON", ["creationDate"])
prepare_edges(edges, post_hascreator, "HASCREATOR", "POST", "PERSON", ["creationDate"])
prepare_edges(edges, comment_hastag, "HASTAG", "COMMENT", "TAG", [])
prepare_edges(edges, post_hastag, "HASTAG", "POST", "TAG", [])
prepare_edges(edges, forum_hastag, "HASTAG", "FORUM", "TAG", [])
Expand Down Expand Up @@ -349,15 +349,15 @@ def load_data_of_ldbc_graph(conn, graph, prefix):
print("load ldbc graph done")

def load_data_of_movie_graph(conn, graph, prefix):
movie = pd.read_csv(os.path.join(prefix, "Movie.csv"), sep="|")
person = pd.read_csv(os.path.join(prefix, "Person.csv"), sep="|")
user = pd.read_csv(os.path.join(prefix, "User.csv"), sep="|")
acted_in = pd.read_csv(os.path.join(prefix, "ACTED_IN.csv"), sep="|")
directed = pd.read_csv(os.path.join(prefix, "DIRECTED.csv"), sep="|")
review = pd.read_csv(os.path.join(prefix, "REVIEWED.csv"), sep="|")
follows = pd.read_csv(os.path.join(prefix, "FOLLOWS.csv"), sep="|")
wrote = pd.read_csv(os.path.join(prefix, "WROTE.csv"), sep="|")
produced = pd.read_csv(os.path.join(prefix, "PRODUCED.csv"), sep="|")
movie = pd.read_csv(os.path.join(prefix, "Movie.csv"), sep="|", na_filter=False)
person = pd.read_csv(os.path.join(prefix, "Person.csv"), sep="|", na_filter=False)
user = pd.read_csv(os.path.join(prefix, "User.csv"), sep="|", na_filter=False)
acted_in = pd.read_csv(os.path.join(prefix, "ACTED_IN.csv"), sep="|", na_filter=False)
directed = pd.read_csv(os.path.join(prefix, "DIRECTED.csv"), sep="|", na_filter=False)
review = pd.read_csv(os.path.join(prefix, "REVIEWED.csv"), sep="|", na_filter=False)
follows = pd.read_csv(os.path.join(prefix, "FOLLOWS.csv"), sep="|", na_filter=False)
wrote = pd.read_csv(os.path.join(prefix, "WROTE.csv"), sep="|", na_filter=False)
produced = pd.read_csv(os.path.join(prefix, "PRODUCED.csv"), sep="|", na_filter=False)
vertices = []
prepare_vertices(vertices, movie, "Movie", ["released", "tagline", "title"])
prepare_vertices(vertices, person, "Person", ["born", "name"])
Expand Down
Loading