-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearch.py
91 lines (68 loc) · 4.24 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
def searchDatabase(database, startID, endID):
if startID == endID:
return [[startID]]
paths = []
unvisited_forward = {startID: [None]}
unvisited_backward = {endID: [None]}
visited_forward = {}
visited_backward = {}
forward_depth = 0
backward_depth = 0
while (len(paths) == 0) and ((len(unvisited_forward) != 0) and (len(unvisited_backward) != 0)):
forward_links_count = database.getLinksCount('out', unvisited_forward.keys())
backward_links_count = database.getLinksCount('in', unvisited_backward.keys())
#-------------------- FORWARD BREADTH FIRST SEARCH --------------------#
if forward_links_count < backward_links_count:
forward_depth += 1
print(unvisited_forward)
outgoing_links = database.getLinks('out', unvisited_forward.keys()) # Fetch the pages which can be reached from the currently unvisited forward pages.
for page_id in unvisited_forward: # Mark all of the unvisited forward pages as visited.
visited_forward[page_id] = unvisited_forward[page_id]
unvisited_forward.clear() # Clear the unvisited forward dictionary.
for source_page_id, target_page_ids in outgoing_links:
for target_page_id in target_page_ids.split('|'):
if target_page_id:
target_page_id = int(target_page_id)
if (target_page_id not in visited_forward) and (target_page_id not in unvisited_forward): # If the target page is in neither visited forward nor unvisited forward, add it to unvisited forward.
unvisited_forward[target_page_id] = [source_page_id]
elif target_page_id in unvisited_forward: # If the target page is in unvisited forward, add the source page as another one of its parents.
unvisited_forward[target_page_id].append(source_page_id)
#-------------------- BACKWARD BREADTH FIRST SEARCH --------------------#
else:
backward_depth += 1
incoming_links = database.getLinks('in', unvisited_backward.keys()) # Fetch the pages which can reach the currently unvisited backward pages.
for page_id in unvisited_backward: # Mark all of the unvisited backward pages as visited.
visited_backward[page_id] = unvisited_backward[page_id]
unvisited_backward.clear() # Clear the unvisited backward dictionary.
for target_page_id, source_page_ids in incoming_links:
for source_page_id in source_page_ids.split('|'):
if source_page_id:
source_page_id = int(source_page_id)
if (source_page_id not in visited_backward) and (source_page_id not in unvisited_backward): # If the source page is in neither visited backward nor unvisited backward, add it to unvisited backward.
unvisited_backward[source_page_id] = [target_page_id]
elif source_page_id in unvisited_backward: # If the source page is in unvisited backward, add the target page as another one of its parents.
unvisited_backward[source_page_id].append(target_page_id)
#-------------------- CHECK FOR PATH COMPLETION --------------------#
for page_id in unvisited_forward: # The search is complete if any of the pages are in both unvisited backward and unvisited backward, so find the resulting paths.
if page_id in unvisited_backward:
paths_from_source = get_paths(unvisited_forward[page_id], visited_forward)
paths_from_target = get_paths(unvisited_backward[page_id], visited_backward)
for path_from_source in paths_from_source:
for path_from_target in paths_from_target:
current_path = list(path_from_source) + [page_id] + list(reversed(path_from_target))
# TODO: This line shouldn't be required, but there are some unexpected duplicates.
if current_path not in paths:
paths.append(current_path)
return paths
def get_paths(page_ids, visited_dict):
paths = []
for page_id in page_ids:
if page_id is None:
return [[]]
else: # recursively get the paths for the current page's children and append them to paths.
current_paths = get_paths(visited_dict[page_id], visited_dict)
for current_path in current_paths:
new_path = list(current_path)
new_path.append(page_id)
paths.append(new_path)
return paths