diff --git a/config/config.yaml b/config/config.yaml index b7cc9fe0..1aa6c8d3 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -22,7 +22,7 @@ container_registry: base_url: docker.io # The owner or project of the registry # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs - owner: reedcompbio + owner: gabeah # This list of algorithms should be generated by a script which checks the filesystem for installs. # It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm @@ -43,6 +43,15 @@ container_registry: # careful: too many parameters might make your runs take a long time. algorithms: + - name: "responsenet" + params: + include: true + run1: + gamma: [10] + _include_st: [false] + _verbose: [false] + _output_log: [false] + - name: "pathlinker" params: include: true @@ -51,7 +60,7 @@ algorithms: - name: "omicsintegrator1" params: - include: true + include: false run1: b: [5, 6] w: np.linspace(0,5,2) @@ -59,7 +68,7 @@ algorithms: - name: "omicsintegrator2" params: - include: true + include: false run1: b: [4] g: [0] @@ -69,7 +78,7 @@ algorithms: - name: "meo" params: - include: true + include: false run1: max_path_length: [3] local_search: ["Yes"] @@ -77,18 +86,18 @@ algorithms: - name: "mincostflow" params: - include: true + include: false run1: flow: [1] # The flow must be an int capacity: [1] - name: "allpairs" params: - include: true + include: false - name: "domino" params: - include: true + include: false run1: slice_threshold: [0.3] module_threshold: [0.05] diff --git a/docker-wrappers/ResponseNet/Dockerfile b/docker-wrappers/ResponseNet/Dockerfile new file mode 100644 index 00000000..0284da30 --- /dev/null +++ b/docker-wrappers/ResponseNet/Dockerfile @@ -0,0 +1,10 @@ +#uses ubuntu as the base +FROM python:3.10.7 + +WORKDIR /ResponseNet + +RUN pip install ortools && pip install networkx + +# Last built 7/26/24 @ 2:45 PM PST +RUN wget https://raw.githubusercontent.com/Reed-CompBio/ResponseNet/40f3ff9b0db1391b0709c49cc7140dced0ed9102/responsenet.py + diff --git a/docker-wrappers/ResponseNet/README.md b/docker-wrappers/ResponseNet/README.md new file mode 100644 index 00000000..f9fe059a --- /dev/null +++ b/docker-wrappers/ResponseNet/README.md @@ -0,0 +1 @@ +# The README for ResponseNet \ No newline at end of file diff --git a/spras/responsenet.py b/spras/responsenet.py new file mode 100644 index 00000000..3540193e --- /dev/null +++ b/spras/responsenet.py @@ -0,0 +1,150 @@ +import warnings +from pathlib import Path + +import pandas as pd + +from spras.containers import prepare_volume, run_container +from spras.interactome import ( + convert_undirected_to_directed, + reinsert_direction_col_directed, +) +from spras.prm import PRM + +__all__ = ['ResponseNet'] + +""" +ResponseNet will construct a fully directed graph from the provided input file +- an edge is represented with a head and tail node, which represents the direction of the interation between two nodes +- uses networkx Digraph() object + +Expected raw input format: +Interactor1 Interactor2 Weight +- the expected raw input file should have node pairs in the 1st and 2nd columns, with a weight in the 3rd column +- it can include bidirectional edges, but will only keep one copy of repeated edges +""" +class ResponseNet (PRM): + required_inputs = ['sources', 'targets', 'edges'] + + @staticmethod + def generate_inputs(data, filename_map): + """ + Access fields from the dataset and write the required input files + @param data: dataset + @param filename_map: a dict mapping file types in the required_inputs to the filename for that type + """ + + # ensures the required input are within the filename_map + for input_type in ResponseNet.required_inputs: + if input_type not in filename_map: + raise ValueError(f"{input_type} filename is missing") + + # will take the sources and write them to files, and repeats with targets + for node_type in ['sources', 'targets']: + nodes = data.request_node_columns([node_type]) + if nodes is None: + raise ValueError(f'No {node_type} found in the node files') + # take nodes one column data frame, call sources/ target series + nodes = nodes.loc[nodes[node_type]] + # creates with the node type without headers + nodes.to_csv(filename_map[node_type], index=False, columns=['NODEID'], header=False) + + # create the network of edges + # responsenet should be recieving a directed graph + edges = data.get_interactome() + edges = convert_undirected_to_directed(edges) + + # creates the edges files that contains the head and tail nodes and the weights after them + edges.to_csv(filename_map['edges'], sep='\t', index=False, columns=["Interactor1", "Interactor2", "Weight"], + header=False) + + @staticmethod + def run(sources=None, targets=None, edges=None, output_file=None, gamma=10, _include_st=False, _verbose=False ,_output_log=False, container_framework="docker"): + """ + Run ResponseNet with Docker (or singularity) + @param sources: input sources (required) + @param targets: input targets (required) + @param edges: input network file (required) + @param output_file: output file name (required) + @param gamma: integer representing gamma (optional, default is 10) + @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional) + """ + print("testing 2") + + # ensures that these parameters are required + if not sources or not targets or not edges or not output_file: + raise ValueError('Required ResponseNet arguments are missing') + + # the data files will be mapped within this directory within the container + work_dir = '/ResponseNet' + + # the tuple is for mapping the sources, targets, edges, and output + volumes = list() + + bind_path, sources_file = prepare_volume(sources, work_dir) + volumes.append(bind_path) + + bind_path, targets_file = prepare_volume(targets, work_dir) + volumes.append(bind_path) + + bind_path, edges_file = prepare_volume(edges, work_dir) + volumes.append(bind_path) + + # Create a prefix for the output filename and ensure the directory exists + out_dir = Path(output_file).parent + out_dir.mkdir(parents=True, exist_ok=True) + bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) + volumes.append(bind_path) + mapped_out_prefix = mapped_out_dir + '/out' + + # Makes the Python command to run within in the container + command = ['python', + 'responsenet.py', + '--edges_file', edges_file, + '--sources_file', sources_file, + '--targets_file', targets_file, + '--output', mapped_out_prefix, + '--gamma', gamma] + + # add optional flags, value can be changed in config/config.yaml + if _include_st: + command.append('-st') + if _verbose: + command.append('-v') + if _output_log: + command.append('-o') + + # choosing to run in docker or singularity container + container_suffix = "responsenet" + + #print(container_framework, container_suffix, command, volumes, work_dir, sep="\n") + + # constructs a docker run call + out = run_container(container_framework, + container_suffix, + command, + volumes, + work_dir) + print(out) + + # TODO: Make sure we get an output file and a log file for user inspection, see DOMINO + + + @staticmethod + def parse_output(raw_pathway_file, standardized_pathway_file): + """ + Convert a predicted pathway into the universal format + + Although the algorithm constructs a directed network, the resulting network is treated as undirected. + This is because the flow within the network doesn't imply causal relationships between nodes. + The primary goal of the algorithm is node identification, not the identification of directional edges. + + @param raw_pathway_file: pathway file produced by an algorithm's run function + @param standardized_pathway_file: the same pathway written in the universal format + """ + + df = pd.read_csv(raw_pathway_file, sep='\t', header=None) + df = add_rank_column(df) + # Currently directed edges in the input will be converted to undirected edges in the output + df = reinsert_direction_col_directed(df) + df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') + diff --git a/spras/runner.py b/spras/runner.py index 6ef26496..a0fc9509 100644 --- a/spras/runner.py +++ b/spras/runner.py @@ -7,6 +7,7 @@ from spras.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1 from spras.omicsintegrator2 import OmicsIntegrator2 as omicsintegrator2 from spras.pathlinker import PathLinker as pathlinker +from spras.responsenet import ResponseNet as responsenet def run(algorithm, params): diff --git a/test/ResponseNet/test_rn.py b/test/ResponseNet/test_rn.py new file mode 100644 index 00000000..32f9981e --- /dev/null +++ b/test/ResponseNet/test_rn.py @@ -0,0 +1,55 @@ +import shutil +from pathlib import Path + +import pytest + +import spras.config as config +from spras.responsenet import ResponseNet + +config.init_from_file("config/config.yaml") + +TEST_DIR = 'test/ResponseNet/' +OUT_FILE = TEST_DIR + 'output/responsenet-output.txt' + + +class TestResponseNet: + + # Speed up the tests by only running this test on all input graphs + # The remaining tests run only on graph1 + @pytest.mark.parametrize('graph', ['graph1']) + def test_responsenet_required(self, graph): + out_path = Path(OUT_FILE) + out_path.unlink(missing_ok=True) + + ResponseNet.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt', + targets=TEST_DIR + 'input/' + graph + '/targets.txt', + edges=TEST_DIR + 'input/' + graph + '/edges.txt', + output_file=OUT_FILE) + assert out_path.exists() + # TODO: assert for the output .equals expected_output instead of only testing + # that the output file exists + + + @pytest.mark.parametrize('graph', ['graph1']) + def test_responsenet_all_optional(self, graph): + out_path = Path(OUT_FILE) + out_path.unlink(missing_ok=True) + # Include all optional arguments + MinCostFlow.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt', + targets=TEST_DIR + 'input/' + graph + '/targets.txt', + edges=TEST_DIR + 'input/' + graph + '/edges.txt', + OUT_FILE=OUT_FILE, + gamma=10, + _verbose=True, + _output_log = True, + _include_st = True) + assert out_path.exists() + + @pytest.mark.parametrize('graph', ['graph1']) + def test_mincostflow_missing(self, graph): + # Test the expected error is raised when required arguments are missing + with pytest.raises(ValueError): + MinCostFlow.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt', + targets=TEST_DIR + 'input/' + graph + '/targets.txt', + output_file=OUT_FILE) +