Skip to content

Commit 8c0d3fa

Browse files
committed
Add Docker/Singularity container support for CLI; fix EBI API one-seq-at-a-time submission
1 parent 47652a7 commit 8c0d3fa

5 files changed

Lines changed: 270 additions & 32 deletions

File tree

packages/cli/src/docker-runner.ts

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import { spawn } from 'node:child_process'
2+
import * as fs from 'node:fs'
3+
import * as os from 'node:os'
4+
import path from 'node:path'
5+
6+
import { toFasta } from './util.ts'
7+
8+
import type { InterProScanResponse, InterProScanResults } from 'msa-parsers'
9+
10+
const INTERPROSCAN_IMAGE = 'interpro/interproscan:latest'
11+
12+
export async function runDockerInterProScan(
13+
sequences: { id: string; seq: string }[],
14+
programs: string[],
15+
): Promise<InterProScanResults[]> {
16+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'interproscan-'))
17+
const inputFile = path.join(tmpDir, 'input.fasta')
18+
19+
try {
20+
fs.writeFileSync(inputFile, toFasta(sequences), 'utf8')
21+
22+
console.log(
23+
` Running InterProScan via Docker on ${sequences.length} sequences...`,
24+
)
25+
console.log(` Image: ${INTERPROSCAN_IMAGE}`)
26+
27+
await new Promise<void>((resolve, reject) => {
28+
const args = [
29+
'run',
30+
'--rm',
31+
'-v',
32+
`${tmpDir}:/data`,
33+
INTERPROSCAN_IMAGE,
34+
'-i',
35+
'/data/input.fasta',
36+
'-o',
37+
'/data/output.json',
38+
'-f',
39+
'JSON',
40+
'-appl',
41+
programs.join(','),
42+
]
43+
44+
console.log(` docker ${args.join(' ')}`)
45+
46+
const proc = spawn('docker', args, {
47+
stdio: ['ignore', 'pipe', 'pipe'],
48+
})
49+
50+
proc.stdout.on('data', (data: Buffer) => {
51+
const line = data.toString().trim()
52+
if (line) {
53+
console.log(` ${line}`)
54+
}
55+
})
56+
57+
let stderr = ''
58+
proc.stderr.on('data', (data: Buffer) => {
59+
stderr += data.toString()
60+
const line = data.toString().trim()
61+
if (line) {
62+
console.log(` ${line}`)
63+
}
64+
})
65+
66+
proc.on('close', code => {
67+
if (code === 0) {
68+
resolve()
69+
} else {
70+
reject(
71+
new Error(
72+
`Docker InterProScan failed with code ${code}: ${stderr}`,
73+
),
74+
)
75+
}
76+
})
77+
78+
proc.on('error', err => {
79+
reject(
80+
new Error(
81+
`Failed to run Docker: ${err.message}. Is Docker installed and running?`,
82+
),
83+
)
84+
})
85+
})
86+
87+
const outputFile = path.join(tmpDir, 'output.json')
88+
if (!fs.existsSync(outputFile)) {
89+
throw new Error('InterProScan did not produce output file')
90+
}
91+
92+
const outputContent = fs.readFileSync(outputFile, 'utf8')
93+
const response: InterProScanResponse = JSON.parse(outputContent)
94+
95+
return response.results
96+
} finally {
97+
try {
98+
fs.rmSync(tmpDir, { recursive: true })
99+
} catch {
100+
// ignore cleanup errors
101+
}
102+
}
103+
}

packages/cli/src/ebi-api.ts

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
import { toFasta } from './util.ts'
2-
31
import type { InterProScanResponse, InterProScanResults } from 'msa-parsers'
42

53
const BASE_URL = 'https://www.ebi.ac.uk/Tools/services/rest/iprscan5'
64

75
async function submitJob(
8-
sequences: { id: string; seq: string }[],
6+
sequence: { id: string; seq: string },
97
programs: string[],
108
email: string,
119
): Promise<string> {
@@ -16,13 +14,14 @@ async function submitJob(
1614
},
1715
body: new URLSearchParams({
1816
email,
19-
sequence: toFasta(sequences),
17+
sequence: `>${sequence.id}\n${sequence.seq}`,
2018
appl: programs.join(','),
2119
}),
2220
})
2321

2422
if (!response.ok) {
25-
throw new Error(`Failed to submit job: ${response.statusText}`)
23+
const text = await response.text()
24+
throw new Error(`Failed to submit job: ${response.statusText} - ${text}`)
2625
}
2726

2827
return response.text()
@@ -45,9 +44,8 @@ async function getResults(jobId: string): Promise<InterProScanResponse> {
4544
}
4645

4746
async function waitForJob(jobId: string): Promise<void> {
48-
console.log(` Waiting for job ${jobId}...`)
4947
let attempts = 0
50-
const maxAttempts = 300 // 5 minutes max wait
48+
const maxAttempts = 300
5149

5250
while (attempts < maxAttempts) {
5351
const status = await checkStatus(jobId)
@@ -74,25 +72,16 @@ export async function runEbiInterProScan(
7472
sequences: { id: string; seq: string }[],
7573
programs: string[],
7674
email: string,
77-
batchSize: number,
75+
_batchSize: number,
7876
): Promise<InterProScanResults[]> {
7977
const allResults: InterProScanResults[] = []
80-
const batches: { id: string; seq: string }[][] = []
81-
82-
for (let i = 0; i < sequences.length; i += batchSize) {
83-
batches.push(sequences.slice(i, i + batchSize))
84-
}
85-
86-
console.log(` Submitting ${batches.length} batch(es)...`)
8778

88-
for (let i = 0; i < batches.length; i++) {
89-
const batch = batches[i]!
90-
console.log(
91-
` Processing batch ${i + 1}/${batches.length} (${batch.length} sequences)...`,
92-
)
79+
for (let i = 0; i < sequences.length; i++) {
80+
const seq = sequences[i]!
81+
console.log(` [${i + 1}/${sequences.length}] Submitting ${seq.id}...`)
9382

94-
const jobId = await submitJob(batch, programs, email)
95-
console.log(` Job submitted: ${jobId}`)
83+
const jobId = await submitJob(seq, programs, email)
84+
console.log(` Job: ${jobId}`)
9685

9786
await waitForJob(jobId)
9887

@@ -101,7 +90,7 @@ export async function runEbiInterProScan(
10190
allResults.push(r)
10291
}
10392

104-
console.log(` Batch ${i + 1} complete`)
93+
console.log(` [${i + 1}/${sequences.length}] Done`)
10594
}
10695

10796
return allResults

packages/cli/src/index.ts

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,25 @@ const { values, positionals } = parseArgs({
1616
type: 'boolean',
1717
default: false,
1818
},
19+
docker: {
20+
type: 'boolean',
21+
default: false,
22+
},
23+
singularity: {
24+
type: 'boolean',
25+
default: false,
26+
},
27+
'singularity-image': {
28+
type: 'string',
29+
default: 'docker://interpro/interproscan:latest',
30+
},
1931
'interproscan-path': {
2032
type: 'string',
2133
default: 'interproscan.sh',
2234
},
2335
programs: {
2436
type: 'string',
25-
default: 'Pfam',
37+
default: 'PfamA,CDD',
2638
},
2739
email: {
2840
type: 'string',
@@ -53,21 +65,30 @@ COMMANDS:
5365
OPTIONS:
5466
-o, --output <file> Output GFF file (default: domains.gff)
5567
--local Use local InterProScan installation
68+
--docker Use Docker (interpro/interproscan image)
69+
--singularity Use Singularity/Apptainer container
70+
--singularity-image <image> Singularity image (default: docker://interpro/interproscan:latest)
5671
--interproscan-path <path> Path to interproscan.sh (default: interproscan.sh)
57-
--programs <list> Comma-separated list of programs (default: Pfam)
72+
--programs <list> Comma-separated list of programs (default: PfamA,CDD)
5873
--email <email> Email for EBI API (default: user@example.com)
5974
--batch-size <n> Number of sequences per API batch (default: 30)
6075
-h, --help Show this help message
6176
6277
EXAMPLES:
63-
# Run InterProScan using EBI API
78+
# Run InterProScan using EBI API (one sequence at a time)
6479
react-msaview-cli interproscan alignment.fasta -o domains.gff
6580
66-
# Run with local InterProScan
81+
# Run with Docker (processes all sequences locally, much faster)
82+
react-msaview-cli interproscan alignment.fasta -o domains.gff --docker
83+
84+
# Run with local InterProScan installation
6785
react-msaview-cli interproscan alignment.fasta -o domains.gff --local
6886
69-
# Specify programs
70-
react-msaview-cli interproscan alignment.clustal -o domains.gff --programs Pfam,SMART
87+
# Run with Singularity using a local .sif file
88+
react-msaview-cli interproscan alignment.fasta -o domains.gff --singularity --singularity-image /path/to/interproscan.sif
89+
90+
# Run with Singularity pulling from Docker Hub (requires network)
91+
react-msaview-cli interproscan alignment.fasta -o domains.gff --singularity
7192
`)
7293
}
7394

@@ -90,6 +111,9 @@ async function main() {
90111
inputFile,
91112
outputFile: values.output,
92113
useLocal: values.local,
114+
useDocker: values.docker,
115+
useSingularity: values.singularity,
116+
singularityImage: values['singularity-image'],
93117
interproscanPath: values['interproscan-path'],
94118
programs: values.programs.split(','),
95119
email: values.email,

packages/cli/src/interproscan-msa.ts

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,20 @@ import {
66
parseMSA,
77
} from 'msa-parsers'
88

9+
import { runDockerInterProScan } from './docker-runner'
910
import { runEbiInterProScan } from './ebi-api'
1011
import { runLocalInterProScan } from './local-runner'
12+
import { runSingularityInterProScan } from './singularity-runner'
1113

1214
import type { InterProScanResults } from 'msa-parsers'
1315

1416
export interface InterProScanOptions {
1517
inputFile: string
1618
outputFile: string
1719
useLocal: boolean
20+
useDocker: boolean
21+
useSingularity: boolean
22+
singularityImage: string
1823
interproscanPath: string
1924
programs: string[]
2025
email: string
@@ -26,6 +31,9 @@ export async function runInterProScan(options: InterProScanOptions) {
2631
inputFile,
2732
outputFile,
2833
useLocal,
34+
useDocker,
35+
useSingularity,
36+
singularityImage,
2937
interproscanPath,
3038
programs,
3139
email,
@@ -52,19 +60,29 @@ export async function runInterProScan(options: InterProScanOptions) {
5260

5361
let allResults: InterProScanResults[]
5462

55-
if (useLocal) {
63+
if (useSingularity) {
64+
console.log('Running InterProScan via Singularity...')
65+
allResults = await runSingularityInterProScan(
66+
sequences,
67+
programs,
68+
singularityImage,
69+
)
70+
} else if (useDocker) {
71+
console.log('Running InterProScan via Docker...')
72+
allResults = await runDockerInterProScan(sequences, programs)
73+
} else if (useLocal) {
5674
console.log(`Running local InterProScan at ${interproscanPath}...`)
5775
allResults = await runLocalInterProScan(
5876
sequences,
5977
interproscanPath,
6078
programs,
6179
)
6280
} else {
63-
console.log(`Running InterProScan via EBI API...`)
81+
console.log('Running InterProScan via EBI API...')
6482
allResults = await runEbiInterProScan(sequences, programs, email, batchSize)
6583
}
6684

67-
console.log(`Converting results to GFF...`)
85+
console.log('Converting results to GFF...')
6886
const gff = interProResponseToGFF(allResults)
6987

7088
console.log(`Writing output to ${outputFile}...`)

0 commit comments

Comments
 (0)