Skip to content

Commit 00696a0

Browse files
Merge pull request #10 from FranciscoKloganB/develop
Ready for Metropolis Hastings insertion
2 parents d7ae781 + d17cbef commit 00696a0

14 files changed

Lines changed: 455 additions & 171 deletions

File tree

hive/app/basic_simulation.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def usage():
88
print(" -------------------------------------------------------------------------")
99
print(" Francisco Barros (francisco.teixeira.de.barros@tecnico.ulisboa.pt\n")
1010
print(" Run a simulation for Markov Chain Based Swarm Guidance algorithm on a P2P Network that persists files\n")
11-
print(" Typical usage: basic_simulation.py --simfile=sim01.json --filepath=yourfile.txt\n")
11+
print(" Typical usage: basic_simulation.py --simfile=sim01.json\n")
1212
print(" Display all optional flags and other important notices: basic_simulation.py --help\n")
1313
print(" -------------------------------------------------------------------------\n")
1414
sys.exit(" ")
@@ -25,8 +25,7 @@ def help():
2525
def main():
2626

2727
try:
28-
options, args = getopt.getopt(sys.argv[1:], "uhs:f:", ["usage", "help", "simfile=", "filepath="])
29-
28+
options, args = getopt.getopt(sys.argv[1:], "uhs:", ["usage", "help", "simfile="])
3029
for options, args in options:
3130
if options in ("-u", "--usage"):
3231
usage()
@@ -36,10 +35,6 @@ def main():
3635
sim_file_path = str(args).strip()
3736
if not sim_file_path:
3837
sys.exit("Invalid simulation filepath. A simulation file is required for execution rules.")
39-
if options in ("-f", "--filepath"):
40-
file_path = str(args)
41-
if not file_path:
42-
sys.exit("Invalid share file. Simulation requires file to simulate sharing.")
4338

4439
except getopt.GetoptError:
4540
usage()

hive/app/domain/Enums.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ class HttpCodes(Enum):
1111
OK = 200
1212
NOT_FOUND = 404
1313
TIME_OUT = 408
14+
SERVER_DOWN = 521

hive/app/domain/Hivemind.py

Lines changed: 253 additions & 112 deletions
Large diffs are not rendered by default.

hive/app/domain/MarkovMatrix.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,17 @@
33

44

55
class MarkovMatrix:
6+
# region docstrings
67
"""
78
Implements a matrix which adheres to markov chain theory and implements some basic markov chains' behaviour
89
:ivar states: identifiers for the buckets existing on network passed in matching order to their transition arrays
910
:type list<str>
1011
:ivar transition_matrix: concrete markov matrix data structure with named rows and columns according to passed states
1112
:type 2D pandas.DataFrame
1213
"""
14+
# endregion
1315

16+
# region class variables, instance variables and constructors
1417
def __init__(self, states, transition_arrays):
1518
"""
1619
Initialize the Markov Chain instance.
@@ -26,7 +29,9 @@ def __init__(self, states, transition_arrays):
2629
columns=states,
2730
index=states
2831
)
32+
# endregion
2933

34+
# region instance methods
3035
def next_state(self, current_state):
3136
"""
3237
Choose list variable given probability of each variable
@@ -36,4 +41,5 @@ def next_state(self, current_state):
3641
:type str
3742
"""
3843
# https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.random.choice.html
39-
return np.random.choice(self.states, p=self.transition_matrix[current_state])
44+
return np.random.choice(a=self.states, p=self.transition_matrix[current_state])
45+
# endregion

hive/app/domain/SharedFilePart.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66

77
class SharedFilePart:
8+
# region docstrings
89
"""
910
Represents a simulation over the P2P Network that tries to persist a file using stochastic swarm guidance
1011
:ivar part_name: original name of the file this part belongs to
@@ -22,7 +23,9 @@ class SharedFilePart:
2223
:ivar markov_matrix: container object describing and implementing Markov Chain behaviour
2324
:type hive.domain.MarkovChain
2425
"""
26+
# endregion
2527

28+
# region class variables, instance variables and constructors
2629
def __init__(self, part_name, part_number, part_data, ddv=None, transition_matrix_definition=None):
2730
"""
2831
:param part_name: original name of the file this part belongs to
@@ -41,9 +44,9 @@ def __init__(self, part_name, part_number, part_data, ddv=None, transition_matri
4144
self.__part_id = part_name + "_#_" + str(part_number)
4245
self.__part_data = ConvertUtils.bytes_to_base64_string(part_data)
4346
self.__sha256 = CryptoUtils.sha256(part_data)
44-
self.__desired_distribution = np.array(ddv).transpose() if ddv is not None else ddv
45-
self.__markov_matrix = MarkovMatrix(transition_matrix_definition[0], transition_matrix_definition[1])
47+
# endregion
4648

49+
# region properties
4750
@property
4851
def part_name(self):
4952
return self.__part_name
@@ -63,14 +66,4 @@ def part_data(self):
6366
@property
6467
def sha256(self):
6568
return self.__sha256
66-
67-
@property
68-
def desired_distribution(self):
69-
return self.__desired_distribution
70-
71-
@property
72-
def markov_matrix(self):
73-
return self.__markov_matrix
74-
75-
def get_next_state(self, worker_id):
76-
return self.__markov_matrix.next_state(worker_id)
69+
# endregion

hive/app/domain/Worker.py

Lines changed: 72 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,125 @@
1+
import numpy as np
2+
13
from utils import CryptoUtils
24
from utils.ResourceTracker import ResourceTracker as rT
35
from domain.Enums import HttpCodes
46

57

68
class Worker:
9+
# region docstrings
710
"""
811
Defines a node on the P2P network. Workers are subject to constraints imposed by Hivemind, constraints they inflict
912
on themselves based on available computing power (CPU, RAM, etc...) and can have [0, N] shared file parts. Workers
1013
have the ability to reconstruct lost file parts when needed.
11-
:ivar hivemind: coordinator of the unstructured Hybrid P2P network that enlisted this worker for a Hive
12-
:type str
14+
:ivar file_parts: key part_name maps to a dict of part_id keys whose values are SharedFilePart
15+
:type dict<str, dict<str, SharedFilePart>
1316
:ivar name: id of this worker node that uniquely identifies him in the network
1417
:type str
15-
:ivar file_parts: part_id is a key to a SharedFilePart
16-
:type dict<string, SharedFilePart>
18+
:ivar hivemind: coordinator of the unstructured Hybrid P2P network that enlisted this worker for a Hive
19+
:type str
20+
:ivar routing_table: maps file name with state transition probabilities, from this worker to other workers
21+
:type dict<str, pandas.DataFrame>
1722
"""
23+
# endregion
1824

25+
# region class variables, instance variables and constructors
1926
def __init__(self, hivemind, name):
20-
self.hivemind = hivemind
21-
self.name = name
2227
self.file_parts = {}
28+
self.__routing_table = {}
29+
self.name = name
30+
self.hivemind = hivemind
31+
# endregion
2332

33+
# region overriden class methods
2434
def __hash__(self):
2535
# allows a worker object to be used as a dictionary key
2636
return hash(str(self.name))
2737

2838
def __eq__(self, other):
39+
if isinstance(other, str):
40+
return self.name == other
2941
return (self.hivemind, self.name) == (other.hivemind, other.name)
3042

3143
def __ne__(self, other):
3244
return not(self == other)
45+
# endregion
3346

47+
# region file recovery methods
3448
def __init_recovery_protocol(self, part):
3549
"""
36-
# TODO
3750
When a corrupt file is received initiate recovery protocol, if this is the node with the most file parts
3851
The recovery protocol consists of reconstructing the damaged file part from other parts on the system, it may be
3952
necessary to obtain other files from other nodes to initiate reconstruction
53+
# Note to self - This is not important right now! This is only important after MCMC with metropolis hastings works
54+
# For now assume that when a node dies, if it had less than N-K parts, his parts are given to someone else
4055
"""
56+
# TODO:
57+
# corrupted or missing file recovery algorithm
4158
pass
59+
# endregion
60+
61+
# region instance methods
62+
def set_file_routing(self, file_name, labeled_transition_vector):
63+
"""
64+
:param file_name: a file name that is being shared on the hive
65+
:type str
66+
:param labeled_transition_vector: probability vector indicating transitions to other states for the given file
67+
:type 1-D numpy.Array in column format
68+
"""
69+
self.__routing_table[file_name] = labeled_transition_vector
4270

43-
def receive_part(self, part):
44-
if CryptoUtils.sha256(part.part_data) == part.sha256:
45-
self.file_parts[part.part_id] = part
71+
def receive_part(self, part, no_check=False):
72+
if no_check or CryptoUtils.sha256(part.part_data) == part.sha256:
73+
if part.name in self.file_parts:
74+
self.file_parts[part.name][part.part_id] = part
75+
else:
76+
self.file_parts[part.name] = {}
77+
self.file_parts[part.name][part.part_id] = part
4678
else:
4779
print("part_name: {}, part_number: {} - corrupted".format(part.part_name, str(part.part_number)))
4880
self.__init_recovery_protocol(part)
4981

5082
def send_part(self):
51-
tmp = {}
52-
for part_id, part in self.file_parts.items():
53-
dest_worker = part.get_next_state(self.name)
83+
for part_name, part_id_sfp_dict in self.file_parts.items():
84+
tmp = {}
85+
for part_id, sfp_obj in part_id_sfp_dict.items():
86+
dest_worker = self.get_next_state(file_name=part_name)
5487
if dest_worker == self.name:
55-
tmp[part_id] = part
88+
tmp[part_id] = sfp_obj
5689
else:
57-
response_code = self.hivemind.simulate_transmission(dest_worker, part)
90+
response_code = self.hivemind.simulate_transmission(dest_worker, sfp_obj)
5891
if response_code != HttpCodes.OK:
59-
tmp[part_id] = part
60-
self.file_parts = tmp
92+
# TODO:
93+
# make use of the HttpCode responses with more than a binary behaviour
94+
tmp[part_id] = sfp_obj
95+
self.file_parts[part_name] = tmp
6196

6297
def leave_hive(self, orderly=True):
98+
"""
99+
Resets the field of the Worker instance
100+
:param orderly: When True asks the hivemind (master node) to redistribute files belonging to the Worker instance
101+
:type bool
102+
"""
63103
if orderly:
64104
self.hivemind.simulate_redistribution(self.file_parts)
65105
self.hivemind = None
66106
self.name = None
67107
self.file_parts = None
68108

109+
def get_next_state(self, file_name):
110+
"""
111+
:param file_name: the name of the file the part to be routed belongs to
112+
:type: str
113+
:return: the name of the worker to whom the file should be routed too
114+
:type: str
115+
"""
116+
file_routing_table = self.__routing_table[file_name]
117+
row_labels = [*file_routing_table.index.values]
118+
label_probabilities = [*file_routing_table[self.name]]
119+
return np.random.choice(a=row_labels, p=label_probabilities)
120+
# endregion
121+
122+
# region static methods
69123
@staticmethod
70124
def get_resource_utilization(*args):
71125
"""
@@ -82,6 +136,4 @@ def get_resource_utilization(*args):
82136
for arg in args:
83137
results[arg] = rT.get_value(arg)
84138
return results
85-
86-
87-
139+
# endregion
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
class ConvergenceData:
2+
# region docstrings
3+
"""
4+
Holds data that helps an domain.Hivemind keep track of converge in a simulation
5+
:cvar __DEVIATION_TOLERANCE: percentage in which a value on a distribution vector can deviate from another in eq cmp
6+
:type float
7+
:cvar MIN_CONVERGENCE_THRESHOLD: how many consecutive convergent stages must a file have to be considered converged
8+
:type int
9+
:ivar cswc: indicates how many consecutive steps a file has in convergence
10+
:type int
11+
:ivar largest_convergence_set: indicates the biggest set of consecutive steps throughout the simulaton for this file
12+
:type int
13+
:ivar convergence_set: list registering stages in which a file has seen convergence. Registers only when above min conv. threshold
14+
:type list<int>
15+
:ivar convergence_sets: list with all convergence sets found for this file during a simulation
16+
:type list<list<int>>
17+
"""
18+
# endregion
19+
20+
# region class variables, instance variables and constructors
21+
__DEVIATION_TOLERANCE = 0.01
22+
MIN_CONVERGENCE_THRESHOLD = 3
23+
24+
def __init__(self):
25+
self.cswc = 0
26+
self.convergence_set = []
27+
self.convergence_sets = []
28+
self.largest_convergence_set = 0
29+
# endregion
30+
31+
# region instance methods
32+
def cswc_increment_and_get(self, increment):
33+
self.cswc += increment
34+
return self.cswc
35+
36+
def try_set_largest(self):
37+
if len(self.convergence_set) > self.largest_convergence_set:
38+
self.largest_convergence_set = self.cswc
39+
40+
def try_update_convergence_set(self, stage):
41+
if self.cswc >= ConvergenceData.MIN_CONVERGENCE_THRESHOLD:
42+
self.convergence_set.append(stage)
43+
return True
44+
else:
45+
return False
46+
47+
def save_sets_and_reset_data(self):
48+
self.cswc = 0
49+
if self.convergence_set:
50+
self.try_set_largest()
51+
self.convergence_sets.append(self.largest_convergence_set)
52+
self.convergence_set = []
53+
# endregion
54+
55+
# region static methods
56+
@staticmethod
57+
def equal_distributions(one, another):
58+
row_count = len(one)
59+
if row_count != len(another):
60+
return False
61+
for i in range(0, row_count):
62+
deviation = another[i] * ConvergenceData.__DEVIATION_TOLERANCE
63+
lower_bound = another[i] - deviation
64+
upper_bound = another[i] + deviation
65+
if lower_bound < one[i] < upper_bound:
66+
continue
67+
else:
68+
return False
69+
return True
70+
# endregion

hive/app/domain/helpers/__init__.py

Whitespace-only changes.

hive/app/static/powerglove_reloaded/powerglove.ini

Lines changed: 0 additions & 7 deletions
This file was deleted.
6.57 MB
Loading

0 commit comments

Comments
 (0)