Skip to content
Open
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ polychrom/_polymer_math.cpp
.idea
dist
polychrom/*.so
.vscode
63 changes: 63 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# CLAUDE.md

This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.

## Project Overview

Polychrom is an Open2C polymer simulation library designed to build mechanistic models of chromosomes. It simulates biological processes subject to forces or constraints, which are then compared to Hi-C maps, microscopy, and other data sources.

## Development Commands

### Installation
```bash
pip install cython # Required dependency
pip install -r requirements.txt
pip install -e . # Install in development mode
```

### Testing
```bash
pytest # Run all tests
```

### Building Cython Extensions
```bash
python setup.py build_ext --inplace
```

## Architecture

### Core Components

**Simulation Module** (`polychrom/simulation.py`)
- Central `Simulation` class manages the entire simulation lifecycle
- Handles platform setup (CUDA/OpenCL/CPU), integrators, and parameters
- Methods: `set_data()` for loading conformations, `add_force()` for adding forces, `doBlock()` for running simulation steps

**Forces System** (`polychrom/forces.py`, `polychrom/forcekits.py`)
- Forces define polymer behavior: connectivity, confinement, crosslinks, tethering
- Individual forces in `forces.py` are functions that create OpenMM force objects
- Complex force combinations are packaged as "forcekits" (e.g., `polymer_chains`)
- Legacy forces available in `polychrom/legacy/forces.py`

**Data Storage** (`polychrom/hdf5_format.py`)
- HDF5Reporter handles simulation output in HDF5 format
- Backwards compatibility with legacy format via legacy reporter
- `polymerutils.load()` function reads both new and old formats

**Starting Conformations** (`polychrom/starting_conformations.py`)
- Functions to generate initial polymer configurations
- Example: `grow_cubic()` creates a cubic lattice conformation

### Key Design Patterns

1. **Force Architecture**: Forces are simple functions that wrap OpenMM force objects, returning the force with a `.name` attribute
2. **Simulation Flow**: Initialize Simulation → Load data → Add forces → Run blocks in loop → Save via reporter
3. **Extensibility**: Users can define custom forces in their scripts following the pattern in `forces.py`

## Important Notes

- OpenMM is the underlying engine (required dependency not in requirements.txt)
- Cython extensions in `_polymer_math.pyx` require compilation
- Main use case is loop extrusion simulations (see `examples/loopExtrusion/`)
- Testing uses pytest with configuration in `pytest.ini`
1 change: 1 addition & 0 deletions examples/customIntegrators/corr_noise.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
>>> python corr_noise.py [gpuid]
"""

import os
import sys
import time
Expand Down
1 change: 0 additions & 1 deletion examples/example/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
In this simulation, a simple polymer chain of 10,000 monomers is simulated.
"""


import os
import sys

Expand Down
1 change: 1 addition & 0 deletions examples/loopExtrusion/directionalModel/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This folder contains legacy code for the 2016 loop extrusion paper. Not for use in production.
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,25 @@

matplotlib.use("Agg")

import ctypes
import multiprocessing as mp
import os
import numpy as np

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ctypes
from mirnylib.plotting import nicePlot
import multiprocessing as mp
import pyximport

from mirnylib.plotting import nicePlot
from openmmlib import polymerutils

from openmmlib.polymerutils import scanBlocks

pyximport.install()
import sys
from contextlib import closing

from mirnylib.h5dict import h5dict
from mirnylib.numutils import coarsegrain, completeIC, zoomArray
from mirnylib.systemutils import fmap, setExceptionHook
from mirnylib.numutils import coarsegrain, completeIC
from mirnylib.numutils import zoomArray
from contextlib import closing
import sys
from smcTranslocator import smcTranslocatorDirectional

filename = "/net/levsha/share/nezar/ctcf_sites/GM12878.ctcf_narrowPeak.loj.encodeMotif.rad21.txt"
Expand Down Expand Up @@ -70,9 +69,7 @@ def averageContacts(contactFunction, inValues, N, **kwargs):

filenameChunks = [filenames[i::nproc] for i in range(nproc)]

with closing(
mp.Pool(processes=nproc, initializer=init, initargs=sharedARrays_)
) as p:
with closing(mp.Pool(processes=nproc, initializer=init, initargs=sharedARrays_)) as p:
p.map(worker, filenameChunks)


Expand Down Expand Up @@ -201,9 +198,7 @@ def doSim(i):
hicdata = np.clip(hicdata, 0, np.percentile(hicdata, 99.99))
hicdata /= np.mean(np.sum(hicdata, axis=1))

fmap(
doSim, range(30), n=20
) # number of threads to use. On a 20-core machine I use 20.
fmap(doSim, range(30), n=20) # number of threads to use. On a 20-core machine I use 20.

arr = coarsegrain(arr, 20)
arr = np.clip(arr, 0, np.percentile(arr, 99.9))
Expand Down Expand Up @@ -249,9 +244,10 @@ def calculateAverageLoop():


def doPolymerSimulation(steps, dens, stiff, folder):
import time

from openmmlib.openmmlib import Simulation
from openmmlib.polymerutils import grow_rw
import time

SMCTran = initModel()

Expand Down Expand Up @@ -322,7 +318,5 @@ def doPolymerSimulation(steps, dens, stiff, folder):
steps=5000,
stiff=2,
dens=0.2,
folder="flagshipLifetime{1}Mu{2}_try={0}".format(
sys.argv[1], sys.argv[2], sys.argv[3]
),
folder="flagshipLifetime{1}Mu{2}_try={0}".format(sys.argv[1], sys.argv[2], sys.argv[3]),
)
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
import matplotlib

# matplotlib.use("Agg")

from mirnylib.plotting import nicePlot
import os
import pickle
from openmmlib import contactmaps
from mirnylib.numutils import zoomArray
from openmmlib import polymerutils

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from mirnylib.h5dict import h5dict
import pandas as pd
from mirnylib.genome import Genome
from mirnylib.numutils import completeIC, coarsegrain
from mirnylib.h5dict import h5dict
from mirnylib.numutils import coarsegrain, completeIC, zoomArray
from mirnylib.plotting import nicePlot
from mirnylib.systemutils import setExceptionHook
from openmmlib import contactmaps, polymerutils
from openmmlib.contactmapManager import averageContacts
import pandas as pd
from mirnylib.numutils import coarsegrain

# matplotlib.use("Agg")


setExceptionHook()

Expand All @@ -33,8 +30,8 @@ def __init__(self, i, forw, rev, blocks, steps):
import pyximport

pyximport.install()
from smcTranslocatorDirectional import smcTranslocator
import numpy as np
from smcTranslocatorDirectional import smcTranslocator

N = len(forw)
birthArray = np.zeros(N, dtype=np.double) + 0.1
Expand Down Expand Up @@ -365,16 +362,8 @@ def showCmapNew():
if end > 75000:
continue
plt.xlim([st, end])
plt.savefig(
os.path.join(
"heatmaps", "{0}_st={1}_end={2}_r=2.png".format(fname, st, end)
)
)
plt.savefig(
os.path.join(
"heatmaps", "{0}_st={1}_end={2}_r=2.pdf".format(fname, st, end)
)
)
plt.savefig(os.path.join("heatmaps", "{0}_st={1}_end={2}_r=2.png".format(fname, st, end)))
plt.savefig(os.path.join("heatmaps", "{0}_st={1}_end={2}_r=2.pdf".format(fname, st, end)))
plt.clf()

plt.show()
Expand Down
37 changes: 26 additions & 11 deletions polychrom/contactmaps.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
r"""
Building contact maps
=====================

Expand Down Expand Up @@ -47,6 +47,7 @@
import random
import warnings
from contextlib import closing
from functools import partial

import numpy as np

Expand Down Expand Up @@ -75,6 +76,7 @@ def tonumpyarray(mp_arr):

def findN(filenames, loadFunction, exceptions):
"Finds length of data in filenames, handling the fact that files could be not loadable"
N = -1
for i in range(30):
if i == 29:
raise ValueError("Could not load any of the 30 randomly selected files")
Expand Down Expand Up @@ -281,6 +283,10 @@ def worker(x):
return


def identity(x):
return x


def averageContacts(contactIterator, inValues, N, **kwargs):
"""
A main workhorse for averaging contacts on multiple cores into one shared contact
Expand Down Expand Up @@ -352,7 +358,7 @@ def next(self): # actual realization of the self.next method
contactBlock = kwargs.get("contactBlock", 5000000)
classInitArgs = kwargs.get("classInitArgs", [])
classInitKwargs = kwargs.get("classInitKwargs", {})
contactProcessing = kwargs.get("contactProcessing", lambda x: x)
contactProcessing = kwargs.get("contactProcessing", identity)
finalSize = N * (N + 1) // 2
boundaries = np.linspace(0, finalSize, bucketNum + 1, dtype=int)
chunks = zip(boundaries[:-1], boundaries[1:])
Expand Down Expand Up @@ -399,6 +405,10 @@ def __init__(
"""
self.filenames = filenames
self.cutoff = cutoff
if loadFunction is None:
loadFunction = polymerutils.load
if contactFunction is None:
contactFunction = polymer_analyses.calculate_contacts
self.exceptionsToIgnore = exceptionsToIgnore
self.contactFunction = contactFunction
self.loadFunction = loadFunction
Expand Down Expand Up @@ -447,6 +457,15 @@ def monomerResolutionContactMap(
)


def contactAction(contacts, myBins):
contacts = np.asarray(contacts, order="C")
cshape = contacts.shape
contacts = contacts.reshape(-1,)
contacts = np.searchsorted(myBins[0], contacts) - 1
contacts = contacts.reshape(cshape)
return contacts


def binnedContactMap(
filenames,
chains=None,
Expand Down Expand Up @@ -480,14 +499,6 @@ def binnedContactMap(
chromosomeStarts = np.cumsum(chainBinNums)
chromosomeStarts = np.hstack((0, chromosomeStarts))

def contactAction(contacts, myBins=[bins]):
contacts = np.asarray(contacts, order="C")
cshape = contacts.shape
contacts.shape = (-1,)
contacts = np.searchsorted(myBins[0], contacts) - 1
contacts.shape = cshape
return contacts

args = [cutoff, loadFunction, exceptionsToIgnore, contactFinder]
values = [filenames[i::n] for i in range(n)]
mymap = averageContacts(
Expand All @@ -496,7 +507,7 @@ def contactAction(contacts, myBins=[bins]):
Nbase,
classInitArgs=args,
useFmap=useFmap,
contactProcessing=contactAction,
contactProcessing=partial(contactAction, myBins=[bins]),
nproc=n,
)
return mymap, chromosomeStarts
Expand Down Expand Up @@ -526,6 +537,10 @@ def __init__(
When initialized, the iterator should store these args properly and create
all necessary constructs
"""
if loadFunction is None:
loadFunction = polymerutils.load
if contactFunction is None:
contactFunction = polymer_analyses.calculate_contacts
self.filenames = filenames
self.cutoff = cutoff
self.exceptionsToIgnore = exceptionsToIgnore
Expand Down
1 change: 1 addition & 0 deletions polychrom/contrib/integrators.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
integrators use 0.1 or below).
"""

import numpy as np
import openmm as mm
from openmmtools import utils
Expand Down
3 changes: 3 additions & 0 deletions polychrom/forcekits.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ def polymer_chains(
# for pair in exc:
# nb_force.addExclusion(int(pair[0]), int(pair[1]))
num_exc = nb_force.getNumExclusions()
else:
print("Could not find a way to exclude neighbouring chain particles from {}".format(nb_force.name))
num_exc = 0

print("Number of exceptions:", num_exc)

Expand Down
Loading