@@ -159,6 +159,28 @@ def plot_molecule_graph(G, labels, folder_name='graph', graph_num=None):
159159
160160
161161def encode_molecule (mol , plot_molecule = None , level = None , folder_name = 'graph' ):
162+ """
163+ encode_molecule function traverses molecules one level at a time and
164+ and creates a graph representation of that molecule.
165+
166+ Args:
167+ mol (networkx.Graph): A graph describing a molecule. Nodes will have an
168+ 'element', 'aromatic' and a 'charge', and if `explicit_hydrogen` is
169+ False a 'hcount'. Depending on the input, they will also have 'isotope'
170+ and 'class' information. Edges will have an 'order'.
171+ plot_molecule (int, optional): This argument contains the number of the
172+ sequence from the input for which the molecule representation (image)
173+ should be generated. If set to 1, the algorithm will generate an image
174+ for the first sequence of the input file. Defaults to None.
175+ level (int, optional): Describes the level for the traversing
176+ algorithm. Defaults to None.
177+ output_path (str, optional): This variable contains the name of the
178+ directory for encoding images. Defaults to 'graph'.
179+
180+ Returns:
181+ pd.DataFrame: The columns of this DataFrame are carbon atoms in the
182+ molecule. Each row holds neighbors of all carbon atoms in columns.
183+ """
162184
163185 elements = mol .nodes (data = "element" )
164186 G = create_graph_for_molecule (mol )
@@ -340,6 +362,29 @@ def get_unique_atoms(mol):
340362def dummy_encode_molecules (smiles , binary_encoding = True , print_progress = False ,
341363 plot_molecule = None , level = None ,
342364 folder_name = 'graph' ):
365+ """
366+ dummy_encode_molecules dummy encodes the traversed molecule.
367+
368+ Args:
369+ smiles (list): This list contains smiles strings as elements.
370+ binary_encoding (bool, optional): If this flag is True, the binary
371+ encoding is calculated. If it is False, discretized encoding is
372+ calculated. Defaults to True.
373+ print_progress (bool, optional): If True, the progress of the
374+ calculation will be shown to the user. Defaults to False.
375+ plot_molecule (int, optional): This argument contains the number of the
376+ sequence from the input for which the molecule representation (image)
377+ should be generated. If set to 1, the algorithm will generate an image
378+ for the first sequence of the input file. Defaults to None.
379+ level (int, optional): Describes the level for the traversing
380+ algorithm. Defaults to None.
381+ output_path (str, optional): This variable contains the name of the
382+ directory for encoding images. Defaults to 'graph'.
383+
384+ Returns:
385+ list: The elements of this list are pd.DataFrames that represent dummy
386+ encodings of each input file.
387+ """
343388 res = []
344389 number_of_elements = len (smiles )
345390
@@ -391,6 +436,22 @@ def dummy_encode_molecules(smiles, binary_encoding=True, print_progress=False,
391436
392437# Function to normalize dummy encoding
393438def normalize_encodings (dummy_encodings , names , center_encoding = True ):
439+ """
440+ normalize_encodings either centers of shifts the encodings by padding them
441+ with zeroes.
442+
443+ Args:
444+ dummy_encodings (list): The elements of this list are pd.DataFrames
445+ that represent dummy encodings of each input file.
446+ names (list): This list contains strings of atoms as elements.
447+ center_encoding (bool, optional): If this flag is True, the encoding
448+ is centered. If it is False, the encoding is shifted to the right.
449+ Defaults to True.
450+
451+ Returns:
452+ dict: This dictionary contains the normalized encodings for each input
453+ file.
454+ """
394455
395456 max_dim = 0
396457 squared_matrices = []
@@ -435,7 +496,7 @@ def generate_imgs_from_encoding(normalized_encoding, binary_encoding=True,
435496
436497 Args:
437498 normalized_encoding (dict): This dictionary contains the normalized
438- encodings for each atom in the molecule .
499+ encodings for each input file .
439500 binary_encoding (bool, optional): If this flag is True, the binary
440501 encoding is calculated. If it is False, discretized encoding is
441502 calculated. Defaults to True.
@@ -568,7 +629,7 @@ def csv_export(normalized_encoding, classes=pd.DataFrame(),
568629
569630 Args:
570631 normalized_encoding (dict): This dictionary contains the normalized
571- encodings for each atom in the molecule .
632+ encodings for each input file .
572633 classes (pd.DataFrame, optional): This DataFrame contains one column
573634 that holds the prediction class for each sequence. Defaults to
574635 pd.DataFrame.
@@ -597,6 +658,24 @@ def csv_export(normalized_encoding, classes=pd.DataFrame(),
597658# Helper function to generate all permutatations of encodings
598659def generate_all_encodings (smiles , names , data_set_identifier , level ,
599660 classes = pd .DataFrame ()):
661+ """
662+ generate_all_encodings is a helper function used to generate encodings for
663+ all data presented in the original paper.
664+
665+ Args:
666+ smiles (list): This list contains smiles strings as elements.
667+ names (list): This list contains strings of atoms as elements.
668+ data_set_identifier (str): This string is used to generate a directory
669+ for the result of a specific data set.
670+ level (int, optional): Describes the level for the traversing
671+ algorithm. Defaults to None.
672+ classes (pd.DataFrame, optional): This DataFrame contains one column
673+ that holds the prediction class for each sequence. Defaults to
674+ pd.DataFrame.
675+
676+ Returns:
677+ None: None.
678+ """
600679
601680 # Hard-coded paths for testing purposes
602681 root_test_path = os .path .join ('..' , 'Test' , 'Paper' )
0 commit comments