Skip to content

Commit c5fe184

Browse files
committed
Include ancestry estimation and trace
1 parent d15dded commit c5fe184

File tree

4 files changed

+709
-0
lines changed

4 files changed

+709
-0
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package genepi.imputationserver.steps;
2+
3+
import cloudgene.sdk.internal.WorkflowContext;
4+
import cloudgene.sdk.internal.WorkflowStep;
5+
import genepi.imputationserver.steps.ancestry.PopulationPredictor;
6+
7+
public class PopulationPredictorStep extends WorkflowStep {
8+
9+
@Override
10+
public boolean run(WorkflowContext context) {
11+
12+
PopulationPredictor predictor = new PopulationPredictor();
13+
predictor.setSamplesFile(context.get("samples"));
14+
predictor.setReferenceFile(context.get("reference_pc"));
15+
predictor.setStudyFile(context.get("study_pc"));
16+
int pcs = Integer.parseInt(context.get("max_pcs"));
17+
predictor.setMaxPcs(pcs);
18+
predictor.predictPopulation(context.get("output"));
19+
return true;
20+
21+
}
22+
23+
}
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
package genepi.imputationserver.steps;
2+
3+
import java.io.File;
4+
import java.io.IOException;
5+
import java.io.PrintWriter;
6+
import java.io.StringWriter;
7+
import java.util.ArrayList;
8+
9+
import cloudgene.sdk.internal.WorkflowContext;
10+
import cloudgene.sdk.internal.WorkflowStep;
11+
import genepi.imputationserver.steps.ancestry.TraceInputValidation;
12+
import genepi.imputationserver.steps.ancestry.TraceInputValidation.TraceInputValidationResult;
13+
import genepi.imputationserver.steps.fastqc.ITask;
14+
import genepi.imputationserver.steps.fastqc.ITaskProgressListener;
15+
import genepi.imputationserver.steps.fastqc.LiftOverTask;
16+
import genepi.imputationserver.steps.fastqc.TaskResults;
17+
import genepi.imputationserver.steps.vcf.VcfFileUtil;
18+
import genepi.imputationserver.util.DefaultPreferenceStore;
19+
import genepi.io.FileUtil;
20+
import genepi.io.text.LineWriter;
21+
import htsjdk.variant.vcf.VCFFileReader;
22+
import htsjdk.variant.vcf.VCFHeader;
23+
24+
public class TraceStep extends WorkflowStep {
25+
26+
public static String BUILD = "hg19";
27+
28+
public static final int MIN_VARIANTS = 100;
29+
30+
@Override
31+
public boolean run(WorkflowContext context) {
32+
prepareTraceJobs(context);
33+
return true;
34+
}
35+
36+
protected void setupTabix(String folder) {
37+
VcfFileUtil.setTabixBinary(FileUtil.path(folder, "bin", "tabix"));
38+
}
39+
40+
public boolean prepareTraceJobs(WorkflowContext context) {
41+
42+
String folder = getFolder(FastQualityControl.class);
43+
setupTabix(folder);
44+
45+
String genotypes = context.get("files");
46+
String buildGwas = context.get("build");
47+
48+
int batchSize = Integer.parseInt(context.get("batch_size"));
49+
String output = context.get("output");
50+
51+
String[] files = FileUtil.getFiles(genotypes, "*.vcf.gz$|*.vcf$");
52+
53+
try {
54+
55+
// load job.config
56+
File jobConfig = new File(FileUtil.path(folder, "job.config"));
57+
DefaultPreferenceStore store = new DefaultPreferenceStore();
58+
if (jobConfig.exists()) {
59+
store.load(jobConfig);
60+
} else {
61+
context.log(
62+
"Configuration file '" + jobConfig.getAbsolutePath() + "' not available. Use default values.");
63+
}
64+
65+
// Check if liftover is needed
66+
if (!buildGwas.equals(BUILD)) {
67+
68+
context.warning("Uploaded data is " + buildGwas + " and reference is " + BUILD + ".");
69+
String chainFile = store.getString(buildGwas + "To" + BUILD);
70+
if (chainFile == null) {
71+
context.error("Currently we do not support liftOver from " + buildGwas + " to " + BUILD);
72+
return false;
73+
}
74+
75+
String fullPathChainFile = FileUtil.path(folder, chainFile);
76+
if (!new File(fullPathChainFile).exists()) {
77+
context.error("Chain file " + fullPathChainFile + " not found.");
78+
return false;
79+
}
80+
81+
String chunksDir = FileUtil.path(context.getLocalTemp(), "liftover");
82+
FileUtil.createDirectory(chunksDir);
83+
84+
LiftOverTask task = new LiftOverTask();
85+
task.setVcfFilenames(files);
86+
task.setChainFile(fullPathChainFile);
87+
task.setChunksDir(chunksDir);
88+
task.setExcludedSnpsWriter(null);
89+
90+
TaskResults results = runTask(context, task);
91+
92+
if (results.isSuccess()) {
93+
files = task.getNewVcfFilenames();
94+
} else {
95+
return false;
96+
}
97+
98+
}
99+
100+
String mergedFile = FileUtil.path(output, "study.merged.vcf.gz");
101+
102+
if (!checkDataAndMerge(context, files, mergedFile)) {
103+
return false;
104+
}
105+
106+
context.beginTask("Preparing TRACE jobs");
107+
108+
// read number of samples from first vcf file
109+
VCFFileReader reader = new VCFFileReader(new File(mergedFile), false);
110+
VCFHeader header = reader.getFileHeader();
111+
reader.close();
112+
ArrayList<String> sampels = header.getSampleNamesInOrder();
113+
int nIndividuals = sampels.size();
114+
int batch = 0;
115+
int start = 1;
116+
int end;
117+
118+
while (start <= nIndividuals) {
119+
end = start + batchSize - 1;
120+
if (end > nIndividuals) {
121+
end = nIndividuals;
122+
}
123+
124+
LineWriter lineWriter = new LineWriter(FileUtil.path(output, batch + ".batch"));
125+
for (int i = start - 1; i <= end - 1; i++) {
126+
lineWriter.write(sampels.get(i));
127+
}
128+
lineWriter.close();
129+
130+
start = end + 1;
131+
batch++;
132+
133+
context.log("\t- Created batch No. " + batch);
134+
}
135+
136+
context.endTask("Prepared " + batch + " batch job" + ((batch > 1) ? "s." : "."), WorkflowContext.OK);
137+
138+
return true;
139+
140+
} catch (IOException e) {
141+
context.error("An internal server error occured.");
142+
e.printStackTrace();
143+
}
144+
145+
context.error("Execution failed. Please, contact administrator.");
146+
147+
return false;
148+
}
149+
150+
public boolean checkDataAndMerge(WorkflowContext context, String[] files, String mergedFile) {
151+
152+
try {
153+
154+
context.beginTask("Input Validation");
155+
156+
String referenceSites = context.get("reference_sites");
157+
158+
TraceInputValidation validation = new TraceInputValidation();
159+
160+
TraceInputValidationResult result = validation.mergeAndCheckSites(files, referenceSites, mergedFile);
161+
162+
String message = "Loaded " + result.getTotal() + " variants" + "\n" + "Variants with different alleles: "
163+
+ result.getAlleleMissmatch() + "\n" + "Variants with allele switches: "
164+
+ result.getAlleleSwitches() + "\n" + "Variants not found in reference: " + result.getNotFound()
165+
+ "\n" + "Overlapping variants used by LASER: " + result.getFound();
166+
167+
context.endTask(message, WorkflowContext.OK);
168+
169+
if (result.getFound() <= MIN_VARIANTS) {
170+
context.error("Number of variants shared with reference is too small (&le;" + MIN_VARIANTS
171+
+ ").\nPlease, check if input data are correct or try to use another ancestry reference panel.");
172+
return false;
173+
}
174+
175+
return true;
176+
177+
} catch (IOException e) {
178+
context.error("Input Validation failed: " + e);
179+
e.printStackTrace();
180+
return false;
181+
}
182+
}
183+
184+
protected TaskResults runTask(final WorkflowContext context, ITask task) {
185+
context.beginTask("Running " + task.getName() + "...");
186+
TaskResults results;
187+
try {
188+
results = task.run(new ITaskProgressListener() {
189+
190+
@Override
191+
public void progress(String message) {
192+
context.updateTask(message, WorkflowContext.RUNNING);
193+
194+
}
195+
});
196+
197+
if (results.isSuccess()) {
198+
context.endTask(task.getName(), WorkflowContext.OK);
199+
} else {
200+
context.endTask(task.getName() + "\n" + results.getMessage(), WorkflowContext.ERROR);
201+
}
202+
return results;
203+
} catch (InterruptedException e) {
204+
e.printStackTrace();
205+
TaskResults result = new TaskResults();
206+
result.setSuccess(false);
207+
result.setMessage(e.getMessage());
208+
StringWriter s = new StringWriter();
209+
e.printStackTrace(new PrintWriter(s));
210+
context.println("Task '" + task.getName() + "' failed.\nException:" + s.toString());
211+
context.endTask(e.getMessage(), WorkflowContext.ERROR);
212+
return result;
213+
} catch (Exception e) {
214+
e.printStackTrace();
215+
TaskResults result = new TaskResults();
216+
result.setSuccess(false);
217+
result.setMessage(e.getMessage());
218+
StringWriter s = new StringWriter();
219+
e.printStackTrace(new PrintWriter(s));
220+
context.println("Task '" + task.getName() + "' failed.\nException:" + s.toString());
221+
context.endTask(task.getName() + " failed.", WorkflowContext.ERROR);
222+
return result;
223+
} catch (Error e) {
224+
e.printStackTrace();
225+
TaskResults result = new TaskResults();
226+
result.setSuccess(false);
227+
result.setMessage(e.getMessage());
228+
StringWriter s = new StringWriter();
229+
e.printStackTrace(new PrintWriter(s));
230+
context.println("Task '" + task.getName() + "' failed.\nException:" + s.toString());
231+
context.endTask(task.getName() + " failed.", WorkflowContext.ERROR);
232+
return result;
233+
}
234+
235+
}
236+
237+
}

0 commit comments

Comments
 (0)