Skip to content

Responsenet Implementation into SPRAS #176

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ container_registry:
base_url: docker.io
# The owner or project of the registry
# For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs
owner: reedcompbio
owner: gabeah

# This list of algorithms should be generated by a script which checks the filesystem for installs.
# It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm
Expand All @@ -43,6 +43,15 @@ container_registry:
# careful: too many parameters might make your runs take a long time.

algorithms:
- name: "responsenet"
params:
include: true
run1:
gamma: [10]
_include_st: [false]
_verbose: [false]
_output_log: [false]

- name: "pathlinker"
params:
include: true
Expand All @@ -51,15 +60,15 @@ algorithms:

- name: "omicsintegrator1"
params:
include: true
include: false
run1:
b: [5, 6]
w: np.linspace(0,5,2)
d: [10]

- name: "omicsintegrator2"
params:
include: true
include: false
run1:
b: [4]
g: [0]
Expand All @@ -69,26 +78,26 @@ algorithms:

- name: "meo"
params:
include: true
include: false
run1:
max_path_length: [3]
local_search: ["Yes"]
rand_restarts: [10]

- name: "mincostflow"
params:
include: true
include: false
run1:
flow: [1] # The flow must be an int
capacity: [1]

- name: "allpairs"
params:
include: true
include: false

- name: "domino"
params:
include: true
include: false
run1:
slice_threshold: [0.3]
module_threshold: [0.05]
Expand Down
10 changes: 10 additions & 0 deletions docker-wrappers/ResponseNet/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#uses ubuntu as the base
FROM python:3.10.7

WORKDIR /ResponseNet

RUN pip install ortools && pip install networkx

# Last built 7/26/24 @ 2:45 PM PST
RUN wget https://raw.githubusercontent.com/Reed-CompBio/ResponseNet/40f3ff9b0db1391b0709c49cc7140dced0ed9102/responsenet.py

1 change: 1 addition & 0 deletions docker-wrappers/ResponseNet/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# The README for ResponseNet
150 changes: 150 additions & 0 deletions spras/responsenet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import warnings
from pathlib import Path

import pandas as pd

from spras.containers import prepare_volume, run_container
from spras.interactome import (
convert_undirected_to_directed,
reinsert_direction_col_directed,
)
from spras.prm import PRM

__all__ = ['ResponseNet']

"""
ResponseNet will construct a fully directed graph from the provided input file
- an edge is represented with a head and tail node, which represents the direction of the interation between two nodes
- uses networkx Digraph() object

Expected raw input format:
Interactor1 Interactor2 Weight
- the expected raw input file should have node pairs in the 1st and 2nd columns, with a weight in the 3rd column
- it can include bidirectional edges, but will only keep one copy of repeated edges
"""
class ResponseNet (PRM):
required_inputs = ['sources', 'targets', 'edges']

@staticmethod
def generate_inputs(data, filename_map):
"""
Access fields from the dataset and write the required input files
@param data: dataset
@param filename_map: a dict mapping file types in the required_inputs to the filename for that type
"""

# ensures the required input are within the filename_map
for input_type in ResponseNet.required_inputs:
if input_type not in filename_map:
raise ValueError(f"{input_type} filename is missing")

# will take the sources and write them to files, and repeats with targets
for node_type in ['sources', 'targets']:
nodes = data.request_node_columns([node_type])
if nodes is None:
raise ValueError(f'No {node_type} found in the node files')
# take nodes one column data frame, call sources/ target series
nodes = nodes.loc[nodes[node_type]]
# creates with the node type without headers
nodes.to_csv(filename_map[node_type], index=False, columns=['NODEID'], header=False)

# create the network of edges
# responsenet should be recieving a directed graph
edges = data.get_interactome()
edges = convert_undirected_to_directed(edges)

# creates the edges files that contains the head and tail nodes and the weights after them
edges.to_csv(filename_map['edges'], sep='\t', index=False, columns=["Interactor1", "Interactor2", "Weight"],
header=False)

@staticmethod
def run(sources=None, targets=None, edges=None, output_file=None, gamma=10, _include_st=False, _verbose=False ,_output_log=False, container_framework="docker"):
"""
Run ResponseNet with Docker (or singularity)
@param sources: input sources (required)
@param targets: input targets (required)
@param edges: input network file (required)
@param output_file: output file name (required)
@param gamma: integer representing gamma (optional, default is 10)
@param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
"""
print("testing 2")

# ensures that these parameters are required
if not sources or not targets or not edges or not output_file:
raise ValueError('Required ResponseNet arguments are missing')

# the data files will be mapped within this directory within the container
work_dir = '/ResponseNet'

# the tuple is for mapping the sources, targets, edges, and output
volumes = list()

bind_path, sources_file = prepare_volume(sources, work_dir)
volumes.append(bind_path)

bind_path, targets_file = prepare_volume(targets, work_dir)
volumes.append(bind_path)

bind_path, edges_file = prepare_volume(edges, work_dir)
volumes.append(bind_path)

# Create a prefix for the output filename and ensure the directory exists
out_dir = Path(output_file).parent
out_dir.mkdir(parents=True, exist_ok=True)
bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir)
volumes.append(bind_path)
mapped_out_prefix = mapped_out_dir + '/out'

# Makes the Python command to run within in the container
command = ['python',
'responsenet.py',
'--edges_file', edges_file,
'--sources_file', sources_file,
'--targets_file', targets_file,
'--output', mapped_out_prefix,
'--gamma', gamma]

# add optional flags, value can be changed in config/config.yaml
if _include_st:
command.append('-st')
if _verbose:
command.append('-v')
if _output_log:
command.append('-o')

# choosing to run in docker or singularity container
container_suffix = "responsenet"

#print(container_framework, container_suffix, command, volumes, work_dir, sep="\n")

# constructs a docker run call
out = run_container(container_framework,
container_suffix,
command,
volumes,
work_dir)
print(out)

# TODO: Make sure we get an output file and a log file for user inspection, see DOMINO


@staticmethod
def parse_output(raw_pathway_file, standardized_pathway_file):
"""
Convert a predicted pathway into the universal format

Although the algorithm constructs a directed network, the resulting network is treated as undirected.
This is because the flow within the network doesn't imply causal relationships between nodes.
The primary goal of the algorithm is node identification, not the identification of directional edges.

@param raw_pathway_file: pathway file produced by an algorithm's run function
@param standardized_pathway_file: the same pathway written in the universal format
"""

df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
df = add_rank_column(df)
# Currently directed edges in the input will be converted to undirected edges in the output
df = reinsert_direction_col_directed(df)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')

1 change: 1 addition & 0 deletions spras/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from spras.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1
from spras.omicsintegrator2 import OmicsIntegrator2 as omicsintegrator2
from spras.pathlinker import PathLinker as pathlinker
from spras.responsenet import ResponseNet as responsenet


def run(algorithm, params):
Expand Down
55 changes: 55 additions & 0 deletions test/ResponseNet/test_rn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import shutil
from pathlib import Path

import pytest

import spras.config as config
from spras.responsenet import ResponseNet

config.init_from_file("config/config.yaml")

TEST_DIR = 'test/ResponseNet/'
OUT_FILE = TEST_DIR + 'output/responsenet-output.txt'


class TestResponseNet:

# Speed up the tests by only running this test on all input graphs
# The remaining tests run only on graph1
@pytest.mark.parametrize('graph', ['graph1'])
def test_responsenet_required(self, graph):
out_path = Path(OUT_FILE)
out_path.unlink(missing_ok=True)

ResponseNet.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt',
targets=TEST_DIR + 'input/' + graph + '/targets.txt',
edges=TEST_DIR + 'input/' + graph + '/edges.txt',
output_file=OUT_FILE)
assert out_path.exists()
# TODO: assert for the output .equals expected_output instead of only testing
# that the output file exists


@pytest.mark.parametrize('graph', ['graph1'])
def test_responsenet_all_optional(self, graph):
out_path = Path(OUT_FILE)
out_path.unlink(missing_ok=True)
# Include all optional arguments
MinCostFlow.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt',
targets=TEST_DIR + 'input/' + graph + '/targets.txt',
edges=TEST_DIR + 'input/' + graph + '/edges.txt',
OUT_FILE=OUT_FILE,
gamma=10,
_verbose=True,
_output_log = True,
_include_st = True)
assert out_path.exists()

@pytest.mark.parametrize('graph', ['graph1'])
def test_mincostflow_missing(self, graph):
# Test the expected error is raised when required arguments are missing
with pytest.raises(ValueError):
MinCostFlow.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt',
targets=TEST_DIR + 'input/' + graph + '/targets.txt',
output_file=OUT_FILE)