diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index f31d6231c2..39341fad0c 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -131,6 +131,7 @@ public abstract class TargetMetricsCollector unfilteredBaseQHistogram = new Histogram<>("baseq", "unfiltered_baseq_count"); + private final Histogram unCappedUnfilteredBaseQHistogram = new Histogram<>("baseq", "unfiltered_baseq_count"); private static final double LOG_ODDS_THRESHOLD = 3.0; @@ -367,6 +368,8 @@ public class PerUnitTargetMetricCollector implements PerUnitMetricCollector highQualityCoverageByTarget; @@ -374,6 +377,8 @@ public class PerUnitTargetMetricCollector implements PerUnitMetricCollector unfilteredCoverageByTarget; + private long hqMaxDepth = 0; + private final TargetMetrics metrics = new TargetMetrics(); private final int minimumBaseQuality; private final CountingAdapterFilter adapterFilter; @@ -631,11 +636,13 @@ public void acceptRecord(final SAMRecord record) { if (ufCoverage.getDepths()[targetOffset] <= coverageCap) { baseQHistogramArray[qual]++; } + unCappedBaseQHistogramArray[qual]++; // Then filtered if (highQual) { final Coverage hqCoverage = highQualityCoverageByTarget.get(target); hqCoverage.addBase(targetOffset); + hqMaxDepth = Math.max(hqMaxDepth, hqCoverage.getDepths()[targetOffset]); if (coveredTargets.add(target)) { hqCoverage.incrementReadCount(); @@ -699,16 +706,14 @@ public void finish() { /** Calculates how much additional sequencing is needed to raise 80% of bases to the mean for the lane. */ private void calculateTargetCoverageMetrics() { - final long[] highQualityCoverageHistogramArray = new long[coverageCap+1]; + + LongStream.range(0, hqMaxDepth).forEach(i -> highQualityDepthHistogram.increment((int) i, 0)); + int zeroCoverageTargets = 0; // the number of bases we counted towards the depth histogram plus those that got thrown out by the coverage cap long totalCoverage = 0; - // the maximum depth at any target base - long maxDepth = 0; - - // the minimum depth at any target base long minDepth = Long.MAX_VALUE; // The "how many target bases at at-least X" calculations. @@ -723,7 +728,7 @@ private void calculateTargetCoverageMetrics() { for (final Coverage c : this.highQualityCoverageByTarget.values()) { if (!c.hasCoverage()) { zeroCoverageTargets++; - highQualityCoverageHistogramArray[0] += c.interval.length(); + highQualityDepthHistogram.increment(0, c.interval.length()); targetBases[0] += c.interval.length(); minDepth = 0; continue; @@ -731,8 +736,7 @@ private void calculateTargetCoverageMetrics() { for (final int depth : c.getDepths()) { totalCoverage += depth; - highQualityCoverageHistogramArray[Math.min(depth, coverageCap)]++; - maxDepth = Math.max(maxDepth, depth); + highQualityDepthHistogram.increment(depth, 1); minDepth = Math.min(minDepth, depth); // Add to the "how many target bases at at-least X" calculations. @@ -747,16 +751,11 @@ private void calculateTargetCoverageMetrics() { throw new PicardException("the number of target bases with at least 0x coverage does not equal the number of target bases"); } - for (int i = 0; i < highQualityCoverageHistogramArray.length; ++i) { - highQualityDepthHistogram.increment(i, highQualityCoverageHistogramArray[i]); - } - - // we do this instead of highQualityDepthHistogram.getMean() because the histogram imposes a coverage cap metrics.MEAN_TARGET_COVERAGE = (double) totalCoverage / metrics.TARGET_TERRITORY; metrics.MEDIAN_TARGET_COVERAGE = highQualityDepthHistogram.getMedian(); - metrics.MAX_TARGET_COVERAGE = maxDepth; + metrics.MAX_TARGET_COVERAGE = hqMaxDepth; // Use Math.min() to account for edge case where highQualityCoverageByTarget is empty (minDepth=Long.MAX_VALUE) - metrics.MIN_TARGET_COVERAGE = Math.min(minDepth, maxDepth); + metrics.MIN_TARGET_COVERAGE = Math.min(minDepth, hqMaxDepth); // compute the coverage value such that 80% of target bases have better coverage than it i.e. 20th percentile // this roughly measures how much we must sequence extra such that 80% of target bases have coverage at least as deep as the current mean coverage @@ -783,32 +782,33 @@ private void calculateTargetCoverageMetrics() { metrics.PCT_TARGET_BASES_100000X = (double) targetBases[17] / (double) targetBases[0]; } + private void calculateTheoreticalHetSensitivity(){ - final long[] unfilteredDepthHistogramArray = new long[coverageCap + 1]; + LongStream.range(0, coverageCap).forEach(i -> unfilteredDepthHistogram.increment((int) i, 0)); // collect the unfiltered coverages (i.e. only quality 2 bases excluded) for all targets into a histogram array for (final Coverage c : this.unfilteredCoverageByTarget.values()) { if (!c.hasCoverage()) { - unfilteredDepthHistogramArray[0] += c.interval.length(); + unfilteredDepthHistogram.increment(0, c.interval.length()); continue; } for (final int depth : c.getDepths()) { - unfilteredDepthHistogramArray[Math.min(depth, coverageCap)]++; + unfilteredDepthHistogram.increment(Math.min(depth, coverageCap), 1); } } - if (LongStream.of(baseQHistogramArray).sum() != LongStream.rangeClosed(0, coverageCap).map(i -> i * unfilteredDepthHistogramArray[(int)i]).sum()) { + if (LongStream.of(baseQHistogramArray).sum() != unfilteredDepthHistogram.getSum()) { throw new PicardException("numbers of bases in the base quality histogram and the coverage histogram are not equal"); } - // TODO: normalize the arrays directly. then we don't have to convert to Histograms + //TODO this is used elsewhere, so can't remove from here just yet - consider moving to accessor. for (int i=0; i hsMetricsComparableMetricsFile) { hsMetricsComparableMetricsFile.addMetric(convertMetric(this.metrics)); hsMetricsComparableMetricsFile.addHistogram(highQualityDepthHistogram); - hsMetricsComparableMetricsFile.addHistogram(unfilteredBaseQHistogram); + hsMetricsComparableMetricsFile.addHistogram(unCappedUnfilteredBaseQHistogram); } } diff --git a/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java b/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java index 363eab1aa8..90ce6eb274 100644 --- a/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java +++ b/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java @@ -217,6 +217,41 @@ public void testHsMetricsHandlesIndelsAppropriately() throws IOException { Assert.assertEquals(insWithIndelHandling.PCT_USABLE_BASES_ON_TARGET, 1.0d); // inserted bases are counted as on target } + @Test + public void testHsMetricsF80DoesNotUseCovCap() throws IOException { + final SAMRecordSetBuilder highCoverage = new SAMRecordSetBuilder(true, SortOrder.coordinate); + final IntervalList targets = new IntervalList(highCoverage.getHeader()); + final IntervalList baits = new IntervalList(highCoverage.getHeader()); + targets.add(new Interval("chr1", 1000, 1199, false, "t1")); + baits.add(new Interval("chr1", 950, 1049, false, "b1")); + baits.add(new Interval("chr1", 1050, 1149, false, "b2")); + baits.add(new Interval("chr1", 1150, 1249, false, "b3")); + + // Generate 100000 reads that fully cover the target in each BAM + for (int i=0; i<100000; ++i) { + highCoverage.addFrag( "r" + i, 0, 1000, false, false, "100M100M", null, 30); + } + + // Write things out to file + final File dir = IOUtil.createTempDir("hsmetrics.test").toFile(); + final File bs = new File(dir, "baits.interval_list").getAbsoluteFile(); + final File ts = new File(dir, "targets.interval_list").getAbsoluteFile(); + baits.write(bs); + targets.write(ts); + final File withHighCovBam = writeBam(highCoverage, new File(dir, "fold_80.bam")); + + // Now run CollectHsMetrics + final File out = Files.createTempFile("hsmetrics_high_coverage.", ".txt").toFile(); + runPicardCommandLine(Arrays.asList("COVERAGE_CAP=10", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withHighCovBam.getAbsolutePath())); + final HsMetrics highCoverageMetrics = readMetrics(out); + + IOUtil.deleteDirectoryTree(dir); + // actual coverage should not be impacted by coverage_cap + Assert.assertEquals(highCoverageMetrics.MEAN_TARGET_COVERAGE, 100000); + Assert.assertEquals(highCoverageMetrics.MEDIAN_TARGET_COVERAGE, 100000); + Assert.assertEquals(highCoverageMetrics.FOLD_80_BASE_PENALTY, 1); + } + @Test public void testHsMetricsHighTargetCoverage() throws IOException {