Skip to content

Commit

Permalink
Merge branch 'features/msav-support' of https://github.com/genepi/imp…
Browse files Browse the repository at this point in the history
…utationserver into fix/2023-10-24-minimac41
  • Loading branch information
abought committed Oct 24, 2023
2 parents bf8ec3c + 8a03f7c commit 73d647e
Show file tree
Hide file tree
Showing 37 changed files with 88 additions and 98 deletions.
Binary file removed files/bin/Minimac4
Binary file not shown.
Binary file added files/bin/minimac4
Binary file not shown.
3 changes: 0 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,8 @@

<groupId>genepi</groupId>
<artifactId>imputationserver</artifactId>

<version>1.7.4</version>

<packaging>jar</packaging>

<name>University of Michigan Imputation Server</name>
<url>http://maven.apache.org</url>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ protected void setup(Context context) throws IOException, InterruptedException {
String referenceName = parameters.get(ImputationJob.REF_PANEL);
imputationParameters.setPhasing(phasingEngine);
imputationParameters.setReferencePanelName(referenceName);
imputationParameters.setMinR2(minR2);
imputationParameters.setPhasingRequired(phasingRequired);

// get cached files
Expand Down Expand Up @@ -153,7 +152,7 @@ protected void setup(Context context) throws IOException, InterruptedException {
mapBeagleFilename = cache.getFile(mapBeagle);
}

String minimacCommand = cache.getFile("Minimac4");
String minimacCommand = cache.getFile("minimac4");
String eagleCommand = cache.getFile("eagle");
String beagleCommand = cache.getFile("beagle.jar");
String tabixCommand = cache.getFile("tabix");
Expand Down Expand Up @@ -226,6 +225,7 @@ protected void setup(Context context) throws IOException, InterruptedException {
pipeline.setPhasingWindow(phasingWindow);
pipeline.setBuild(build);
pipeline.setMinimacWindow(window);
pipeline.setMinR2(minR2);

}

Expand Down Expand Up @@ -289,16 +289,8 @@ public void map(LongWritable key, Text value, Context context) throws IOExceptio
statistics.setImportTime((end - start) / 1000);

} else {
if (imputationParameters.getMinR2() > 0) {
// filter by r2
String filteredInfoFilename = outputChunk.getInfoFilename() + "_filtered";
filterInfoFileByR2(outputChunk.getInfoFilename(), filteredInfoFilename,
imputationParameters.getMinR2());
HdfsUtil.put(filteredInfoFilename, HdfsUtil.path(output, chunk + ".info"));

} else {
HdfsUtil.put(outputChunk.getInfoFilename(), HdfsUtil.path(output, chunk + ".info"));
}

HdfsUtil.put(outputChunk.getInfoFilename(), HdfsUtil.path(output, chunk + ".info"));

long start = System.currentTimeMillis();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ public class ImputationPipeline {
private int minimacWindow;

private int phasingWindow;

private double minR2;

private String refFilename;

Expand Down Expand Up @@ -288,6 +290,16 @@ public boolean phaseWithBeagle(VcfChunk input, VcfChunkOutput output, String ref
public boolean imputeVCF(VcfChunkOutput output)
throws InterruptedException, IOException, CompilationFailedException {

// create tabix index
Command tabix = new Command(tabixCommand);
tabix.setSilent(false);
tabix.setParams(output.getPhasedVcfFilename());
System.out.println("Command: " + tabix.getExecutedCommand());
if (tabix.execute() != 0) {
System.out.println("Error during index creation: " + tabix.getStdOut());
return false;
}

String chr = "";
if (build.equals("hg38")) {
chr = "chr" + output.getChromosome();
Expand All @@ -306,6 +318,7 @@ public boolean imputeVCF(VcfChunkOutput output)
binding.put("chr", chr);
binding.put("unphased", false);
binding.put("mapMinimac", mapMinimac);
binding.put("minR2", minR2);

String[] params = createParams(minimacParams, binding);

Expand Down Expand Up @@ -474,4 +487,8 @@ public void setMapBeagleFilename(String mapBeagleFilename) {
this.mapBeagleFilename = mapBeagleFilename;
}

public void setMinR2(double minR2) {
this.minR2 = minR2;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public static Properties defaults() {
defaults.setProperty("server.url", "https://imputationserver.sph.umich.edu");
defaults.setProperty("minimac.tmp", "/tmp");
defaults.setProperty("minimac.command",
"--refHaps ${ref} --haps ${vcf} --start ${start} --end ${end} --window ${window} --prefix ${prefix} --chr ${chr} --cpus 1 --noPhoneHome --format GT,DS,GP --allTypedSites --meta --minRatio 0.00001 ${chr =='MT' ? '--myChromosome ' + chr : ''} ${unphased ? '--unphasedOutput' : ''} ${mapMinimac != null ? '--referenceEstimates --map ' + mapMinimac : ''}");
"--region ${chr}:${start}-${end} --overlap ${window} --output ${prefix}.dose.vcf.gz --output-format vcf.gz --format GT,DS,GP,HDS --min-ratio 0.00001 --all-typed-sites --sites ${prefix}.info --empirical-output ${prefix}.empiricalDose.vcf.gz ${minR2 != 0 ? '--min-r2 ' + minR2 : ''} ${mapMinimac != null ? '--map ' + mapMinimac : ''} ${ref} ${vcf}");
defaults.setProperty("eagle.command",
"--vcfRef ${ref} --vcfTarget ${vcf} --geneticMapFile ${map} --outPrefix ${prefix} --bpStart ${start} --bpEnd ${end} --allowRefAltSwap --vcfOutFormat z --keepMissingPloidyX");
defaults.setProperty("beagle.command",
Expand Down
51 changes: 20 additions & 31 deletions src/main/java/genepi/imputationserver/util/FileMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,35 +24,18 @@ public static void splitIntoHeaderAndData(String input, OutputStream outHeader,

while (reader.next()) {
String line = reader.get();

if (!line.startsWith("#")) {
if (parameters.getMinR2() > 0) {
// rsq set. parse line and check rsq
String info = parseInfo(line);
if (info != null) {
boolean keep = keepVcfLineByInfo(info, R2_FLAG, parameters.getMinR2());
if (keep) {
outData.write(line.getBytes());
outData.write("\n".getBytes());
}
} else {
// no valid vcf line. keep line
outData.write(line.getBytes());
outData.write("\n".getBytes());
}
} else {
// no rsq set. keep all lines without parsing
outData.write(line.getBytes());
outData.write("\n".getBytes());
}
outData.write(line.getBytes());
outData.write("\n".getBytes());
} else {

// write filter command before ID List starting with #CHROM
if (line.startsWith("#CHROM")) {
outHeader.write(("##pipeline=" + ImputationPipeline.PIPELINE_VERSION + "\n").getBytes());
outHeader.write(("##imputation=" + ImputationPipeline.IMPUTATION_VERSION + "\n").getBytes());
outHeader.write(("##phasing=" + parameters.getPhasingMethod() + "\n").getBytes());
outHeader.write(("##panel=" + parameters.getReferencePanelName() + "\n").getBytes());
outHeader.write(("##r2Filter=" + parameters.getMinR2() + "\n").getBytes());
outHeader.write(("##mis_pipeline=" + ImputationPipeline.PIPELINE_VERSION + "\n").getBytes());
outHeader.write(("##mis_imputation=" + ImputationPipeline.IMPUTATION_VERSION + "\n").getBytes());
outHeader.write(("##mis_phasing=" + parameters.getPhasingMethod() + "\n").getBytes());
outHeader.write(("##mis_panel=" + parameters.getReferencePanelName() + "\n").getBytes());
}

// write all headers except minimac4 command
Expand Down Expand Up @@ -85,9 +68,9 @@ public static void splitPhasedIntoHeaderAndData(String input, OutputStream outHe

// write filter command before ID List starting with #CHROM
if (line.startsWith("#CHROM")) {
outHeader.write(("##pipeline=" + ImputationPipeline.PIPELINE_VERSION + "\n").getBytes());
outHeader.write(("##phasing=" + parameters.getPhasingMethod() + "\n").getBytes());
outHeader.write(("##panel=" + parameters.getReferencePanelName() + "\n").getBytes());
outHeader.write(("##mis_pipeline=" + ImputationPipeline.PIPELINE_VERSION + "\n").getBytes());
outHeader.write(("##mis_phasing=" + parameters.getPhasingMethod() + "\n").getBytes());
outHeader.write(("##mis_panel=" + parameters.getReferencePanelName() + "\n").getBytes());
}

// write all headers except eagle command
Expand Down Expand Up @@ -129,24 +112,30 @@ public static void mergeAndGzInfo(List<String> hdfs, String local) throws IOExce

LineReader reader = new LineReader(in);

boolean header = true;
boolean lineBreak = false;

while (reader.next()) {

String line = reader.get();

if (header) {
if (line.startsWith("#")) {

if (firstFile) {

if (lineBreak) {
out.write('\n');
}
out.write(line.toString().getBytes());
firstFile = false;
lineBreak = true;
}
header = false;
} else {
out.write('\n');
out.write(line.toString().getBytes());
}
}

firstFile = false;

in.close();

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ public class ImputationParameters {

private String referencePanelName;

private double minR2;

private String phasing;

private boolean phasingRequired;
Expand All @@ -20,14 +18,6 @@ public void setReferencePanelName(String referencePanelName) {
this.referencePanelName = referencePanelName;
}

public double getMinR2() {
return minR2;
}

public void setMinR2(double minR2) {
this.minR2 = minR2;
}

public String getPhasing() {
return phasing;
}
Expand Down
53 changes: 29 additions & 24 deletions src/test/java/genepi/imputationserver/steps/ImputationTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ public void testPipelineWithEagle() throws IOException, ZipException {
assertEquals(true, file.isPhased());
assertEquals(TOTAL_REFPANEL_CHR20_B37, file.getNoSnps());

int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz") - 1;
int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz");
assertEquals(snpInInfo, file.getNoSnps());

FileUtil.deleteDirectory("test-data/tmp");
Expand Down Expand Up @@ -358,10 +358,10 @@ public void testValidatePanelWithEagle() throws IOException, ZipException {

VCFFileReader reader = new VCFFileReader(new File("test-data/tmp/chr20.dose.vcf.gz"), false);
VCFHeader header = reader.getFileHeader();
assertEquals("hapmap2", header.getOtherHeaderLine("panel").getValue());
assertEquals(ImputationPipeline.EAGLE_VERSION, header.getOtherHeaderLine("phasing").getValue());
assertEquals(ImputationPipeline.IMPUTATION_VERSION, header.getOtherHeaderLine("imputation").getValue());
assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("pipeline").getValue());
assertEquals("hapmap2", header.getOtherHeaderLine("mis_panel").getValue());
assertEquals(ImputationPipeline.EAGLE_VERSION, header.getOtherHeaderLine("mis_phasing").getValue());
assertEquals(ImputationPipeline.IMPUTATION_VERSION, header.getOtherHeaderLine("mis_imputation").getValue());
assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("mis_pipeline").getValue());

FileUtil.deleteDirectory("test-data/tmp");

Expand Down Expand Up @@ -404,10 +404,10 @@ public void testValidatePanelWithBeagle() throws IOException, ZipException {

VCFFileReader reader = new VCFFileReader(new File("test-data/tmp/chr20.dose.vcf.gz"), false);
VCFHeader header = reader.getFileHeader();
assertEquals("hapmap2", header.getOtherHeaderLine("panel").getValue());
assertEquals(ImputationPipeline.BEAGLE_VERSION, header.getOtherHeaderLine("phasing").getValue());
assertEquals(ImputationPipeline.IMPUTATION_VERSION, header.getOtherHeaderLine("imputation").getValue());
assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("pipeline").getValue());
assertEquals("hapmap2", header.getOtherHeaderLine("mis_panel").getValue());
assertEquals(ImputationPipeline.BEAGLE_VERSION, header.getOtherHeaderLine("mis_phasing").getValue());
assertEquals(ImputationPipeline.IMPUTATION_VERSION, header.getOtherHeaderLine("mis_imputation").getValue());
assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("mis_pipeline").getValue());

FileUtil.deleteDirectory("test-data/tmp");

Expand Down Expand Up @@ -451,9 +451,9 @@ public void testValidatePanelPhasingOnly() throws IOException, ZipException {

VCFFileReader reader = new VCFFileReader(new File("test-data/tmp/chr20.phased.vcf.gz"), false);
VCFHeader header = reader.getFileHeader();
assertEquals("hapmap2", header.getOtherHeaderLine("panel").getValue());
assertEquals(ImputationPipeline.EAGLE_VERSION, header.getOtherHeaderLine("phasing").getValue());
assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("pipeline").getValue());
assertEquals("hapmap2", header.getOtherHeaderLine("mis_panel").getValue());
assertEquals(ImputationPipeline.EAGLE_VERSION, header.getOtherHeaderLine("mis_phasing").getValue());
assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("mis_pipeline").getValue());

FileUtil.deleteDirectory("test-data/tmp");

Expand Down Expand Up @@ -497,9 +497,9 @@ public void testValidatePanelPhasedInput() throws IOException, ZipException {

VCFFileReader reader = new VCFFileReader(new File("test-data/tmp/chr20.dose.vcf.gz"), false);
VCFHeader header = reader.getFileHeader();
assertEquals("hapmap2", header.getOtherHeaderLine("panel").getValue());
assertEquals("n/a", header.getOtherHeaderLine("phasing").getValue());
assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("pipeline").getValue());
assertEquals("hapmap2", header.getOtherHeaderLine("mis_panel").getValue());
assertEquals("n/a", header.getOtherHeaderLine("mis_phasing").getValue());
assertEquals(ImputationPipeline.PIPELINE_VERSION, header.getOtherHeaderLine("mis_pipeline").getValue());

// FileUtil.deleteDirectory("test-data/tmp");

Expand Down Expand Up @@ -611,7 +611,7 @@ public void testPipelineWithEagleAndScores() throws IOException, ZipException {
assertEquals(true, file.isPhased());
assertEquals(TOTAL_REFPANEL_CHR20_B37, file.getNoSnps());

int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz") - 1;
int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz");
assertEquals(snpInInfo, file.getNoSnps());

String[] args = { "test-data/tmp/chr20.dose.vcf.gz", "--ref", "PGS000018,PGS000027", "--out",
Expand Down Expand Up @@ -704,7 +704,7 @@ public void testPipelineWithEagleAndScoresAndFormat() throws IOException, ZipExc
assertEquals(true, file.isPhased());
assertEquals(TOTAL_REFPANEL_CHR20_B37, file.getNoSnps());

int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz") - 1;
int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz");
assertEquals(snpInInfo, file.getNoSnps());

String[] args = { "test-data/tmp/chr20.dose.vcf.gz", "--ref", score1, "--out", "test-data/tmp/expected.txt" };
Expand Down Expand Up @@ -962,7 +962,7 @@ public void testPipelineWithEagleAndR2Filter() throws IOException, ZipException
// TODO: update SNPS_WITH_R2_BELOW_05
assertTrue(TOTAL_REFPANEL_CHR20_B37 > file.getNoSnps());

int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz") - 1;
int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz");
assertEquals(snpInInfo, file.getNoSnps());

FileUtil.deleteDirectory("test-data/tmp");
Expand All @@ -973,7 +973,12 @@ private int getLineCount(String filename) throws IOException {
LineReader reader = new LineReader(filename);
int lines = 0;
while (reader.next()) {
lines++;

String line = reader.get();
{
if (!line.startsWith("#"))
lines++;
}
}
return lines;
}
Expand Down Expand Up @@ -1001,12 +1006,12 @@ private boolean checkSortPositionInfo(String filename) throws IOException {

String line = reader.get();

if (!line.startsWith("SNP")) {
String snp = line.split("\t")[0];
if (Integer.valueOf(snp.split(":")[1]) <= pos) {
if (!line.startsWith("#")) {
String snp = line.split("\\s+")[1];
if (Integer.valueOf(snp) <= pos) {
return false;
}
pos = Integer.valueOf(snp.split(":")[1]);
pos = Integer.valueOf(snp);
}

}
Expand Down Expand Up @@ -1081,7 +1086,7 @@ public void testCompareInfoAndDosageSize() throws IOException, ZipException {

// subtract header
int infoCount = getLineCount("test-data/tmp/chr20.info.gz");
assertEquals(infoCount - 1, file.getNoSnps());
assertEquals(infoCount, file.getNoSnps());
FileUtil.deleteDirectory("test-data/tmp");

}
Expand Down
2 changes: 1 addition & 1 deletion test-data/configs/beagle/panels.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
panels:

- id: hapmap2
hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.m3vcf.gz
hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav
legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz
refBeagle: ref-panels/hapmap_r22.chr$chr.CEU.hg19.recode.bref3
mapBeagle: ref-panels/plink.chr$chr.GRCh37.map
Expand Down
Binary file not shown.
Loading

0 comments on commit 73d647e

Please sign in to comment.