Skip to content

Commit b2a8b0c

Browse files
committed
Added: Finished documentation using docstrings.
1 parent d187844 commit b2a8b0c

File tree

1 file changed

+81
-2
lines changed

1 file changed

+81
-2
lines changed

Code/cmangoes.py

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,28 @@ def plot_molecule_graph(G, labels, folder_name='graph', graph_num=None):
159159

160160

161161
def encode_molecule(mol, plot_molecule=None, level=None, folder_name='graph'):
162+
"""
163+
encode_molecule function traverses molecules one level at a time and
164+
and creates a graph representation of that molecule.
165+
166+
Args:
167+
mol (networkx.Graph): A graph describing a molecule. Nodes will have an
168+
'element', 'aromatic' and a 'charge', and if `explicit_hydrogen` is
169+
False a 'hcount'. Depending on the input, they will also have 'isotope'
170+
and 'class' information. Edges will have an 'order'.
171+
plot_molecule (int, optional): This argument contains the number of the
172+
sequence from the input for which the molecule representation (image)
173+
should be generated. If set to 1, the algorithm will generate an image
174+
for the first sequence of the input file. Defaults to None.
175+
level (int, optional): Describes the level for the traversing
176+
algorithm. Defaults to None.
177+
output_path (str, optional): This variable contains the name of the
178+
directory for encoding images. Defaults to 'graph'.
179+
180+
Returns:
181+
pd.DataFrame: The columns of this DataFrame are carbon atoms in the
182+
molecule. Each row holds neighbors of all carbon atoms in columns.
183+
"""
162184

163185
elements = mol.nodes(data="element")
164186
G = create_graph_for_molecule(mol)
@@ -340,6 +362,29 @@ def get_unique_atoms(mol):
340362
def dummy_encode_molecules(smiles, binary_encoding=True, print_progress=False,
341363
plot_molecule=None, level=None,
342364
folder_name='graph'):
365+
"""
366+
dummy_encode_molecules dummy encodes the traversed molecule.
367+
368+
Args:
369+
smiles (list): This list contains smiles strings as elements.
370+
binary_encoding (bool, optional): If this flag is True, the binary
371+
encoding is calculated. If it is False, discretized encoding is
372+
calculated. Defaults to True.
373+
print_progress (bool, optional): If True, the progress of the
374+
calculation will be shown to the user. Defaults to False.
375+
plot_molecule (int, optional): This argument contains the number of the
376+
sequence from the input for which the molecule representation (image)
377+
should be generated. If set to 1, the algorithm will generate an image
378+
for the first sequence of the input file. Defaults to None.
379+
level (int, optional): Describes the level for the traversing
380+
algorithm. Defaults to None.
381+
output_path (str, optional): This variable contains the name of the
382+
directory for encoding images. Defaults to 'graph'.
383+
384+
Returns:
385+
list: The elements of this list are pd.DataFrames that represent dummy
386+
encodings of each input file.
387+
"""
343388
res = []
344389
number_of_elements = len(smiles)
345390

@@ -391,6 +436,22 @@ def dummy_encode_molecules(smiles, binary_encoding=True, print_progress=False,
391436

392437
# Function to normalize dummy encoding
393438
def normalize_encodings(dummy_encodings, names, center_encoding=True):
439+
"""
440+
normalize_encodings either centers of shifts the encodings by padding them
441+
with zeroes.
442+
443+
Args:
444+
dummy_encodings (list): The elements of this list are pd.DataFrames
445+
that represent dummy encodings of each input file.
446+
names (list): This list contains strings of atoms as elements.
447+
center_encoding (bool, optional): If this flag is True, the encoding
448+
is centered. If it is False, the encoding is shifted to the right.
449+
Defaults to True.
450+
451+
Returns:
452+
dict: This dictionary contains the normalized encodings for each input
453+
file.
454+
"""
394455

395456
max_dim = 0
396457
squared_matrices = []
@@ -435,7 +496,7 @@ def generate_imgs_from_encoding(normalized_encoding, binary_encoding=True,
435496
436497
Args:
437498
normalized_encoding (dict): This dictionary contains the normalized
438-
encodings for each atom in the molecule.
499+
encodings for each input file.
439500
binary_encoding (bool, optional): If this flag is True, the binary
440501
encoding is calculated. If it is False, discretized encoding is
441502
calculated. Defaults to True.
@@ -568,7 +629,7 @@ def csv_export(normalized_encoding, classes=pd.DataFrame(),
568629
569630
Args:
570631
normalized_encoding (dict): This dictionary contains the normalized
571-
encodings for each atom in the molecule.
632+
encodings for each input file.
572633
classes (pd.DataFrame, optional): This DataFrame contains one column
573634
that holds the prediction class for each sequence. Defaults to
574635
pd.DataFrame.
@@ -597,6 +658,24 @@ def csv_export(normalized_encoding, classes=pd.DataFrame(),
597658
# Helper function to generate all permutatations of encodings
598659
def generate_all_encodings(smiles, names, data_set_identifier, level,
599660
classes=pd.DataFrame()):
661+
"""
662+
generate_all_encodings is a helper function used to generate encodings for
663+
all data presented in the original paper.
664+
665+
Args:
666+
smiles (list): This list contains smiles strings as elements.
667+
names (list): This list contains strings of atoms as elements.
668+
data_set_identifier (str): This string is used to generate a directory
669+
for the result of a specific data set.
670+
level (int, optional): Describes the level for the traversing
671+
algorithm. Defaults to None.
672+
classes (pd.DataFrame, optional): This DataFrame contains one column
673+
that holds the prediction class for each sequence. Defaults to
674+
pd.DataFrame.
675+
676+
Returns:
677+
None: None.
678+
"""
600679

601680
# Hard-coded paths for testing purposes
602681
root_test_path = os.path.join('..', 'Test', 'Paper')

0 commit comments

Comments
 (0)