From 347c0ac571be29d7aa59fea3090947d9dcc9f8f0 Mon Sep 17 00:00:00 2001 From: michaelgatzen Date: Wed, 21 Sep 2022 13:31:18 -0400 Subject: [PATCH] Fix EdgeReadIterator (#1616) * Fixing bug in EdgeReadIterator - Adding tests to catch previously faulty behavior - Also fixing other bugs regarding sorting of IntervalLists in AbstractLocusIterator and IntervalListReferenceSequenceMask --- .../samtools/util/AbstractLocusIterator.java | 25 ++- .../samtools/util/EdgeReadIterator.java | 176 +++++++++++++----- .../IntervalListReferenceSequenceMask.java | 7 +- .../samtools/util/EdgeReadIteratorTest.java | 127 ++++++++++++- 4 files changed, 264 insertions(+), 71 deletions(-) diff --git a/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java b/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java index 79cc39e044..2a530ab6a4 100644 --- a/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java +++ b/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java @@ -142,11 +142,6 @@ public abstract class AbstractLocusIterator locusComparator = new LocusComparator<>(); - /** - * Last processed interval, relevant only if list of intervals is defined. - */ - private int lastInterval = 0; - public SAMFileHeader getHeader() { @@ -159,7 +154,7 @@ public SAMFileHeader getHeader() { * * @param samReader must be coordinate sorted * @param intervalList Either the list of desired intervals, or null. Note that if an intervalList is - * passed in that is not coordinate sorted, it will eventually be coordinated sorted by this class. + * passed in that is not coordinate sorted, it will eventually be coordinated sorted here. * @param useIndex If true, do indexed lookup to improve performance. Not relevant if intervalList == null. * It is no longer the case the useIndex==true can make performance worse. It should always perform at least * as well as useIndex==false, and generally will be much faster. @@ -176,8 +171,14 @@ public AbstractLocusIterator(final SamReader samReader, final IntervalList inter this.samReader = samReader; this.useIndex = useIndex; if (intervalList != null) { - intervals = intervalList.uniqued().getIntervals(); - this.referenceSequenceMask = new IntervalListReferenceSequenceMask(intervalList); + if (!intervalList.getHeader().getSequenceDictionary().isSameDictionary(getHeader().getSequenceDictionary())) { + throw new SAMException("The sequence dictionary of the interval list file differs from the sequence dictionary of the input SAM file."); + } + + final boolean intervalListIsSorted = intervalList.getHeader().getSortOrder() == SAMFileHeader.SortOrder.coordinate; + final IntervalList sortedIntervalList = intervalListIsSorted ? intervalList : intervalList.sorted(); + intervals = sortedIntervalList.uniqued().getIntervals(); + this.referenceSequenceMask = new IntervalListReferenceSequenceMask(sortedIntervalList); } else { intervals = null; this.referenceSequenceMask = new WholeGenomeReferenceSequenceMask(samReader.getFileHeader()); @@ -264,6 +265,9 @@ private boolean hasRemainingMaskBases() { */ @Override public K next() { + if (this.samIterator == null) { + iterator(); + } // if we don't have any completed entries to return, try and make some! while (complete.isEmpty() && samHasMore()) { final SAMRecord rec = samIterator.peek(); @@ -576,11 +580,6 @@ protected List getIntervals() { return intervals; } - protected Interval getCurrentInterval() { - if (intervals == null) return null; - return intervals.get(lastInterval); - } - public boolean isIncludeIndels() { return includeIndels; } diff --git a/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java b/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java index 4ed47b4de8..7fd7a69a5f 100644 --- a/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java +++ b/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java @@ -24,10 +24,7 @@ package htsjdk.samtools.util; -import htsjdk.samtools.AlignmentBlock; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.SamReader; +import htsjdk.samtools.*; /** * Iterator that traverses a SAM File, accumulating information on a per-locus basis. @@ -42,6 +39,12 @@ * */ public class EdgeReadIterator extends AbstractLocusIterator> { + // These variables are required to perform the detection of overlap between reads and intervals + private Interval currentInterval = null; + private final PeekableIterator intervalListIterator; + private final IntervalCoordinateComparator intervalCoordinateComparator; + + private final OverlapDetector overlapDetector; /** * Prepare to iterate through the given SAM records, skipping non-primary alignments. Do not use @@ -58,7 +61,7 @@ public EdgeReadIterator(final SamReader samReader) { * * @param samReader must be coordinate sorted * @param intervalList Either the list of desired intervals, or null. Note that if an intervalList is - * passed in that is not coordinate sorted, it will eventually be coordinated sorted by this class. + * passed in that is not coordinate sorted, it will be coordinated sorted by this class. */ public EdgeReadIterator(final SamReader samReader, final IntervalList intervalList) { this(samReader, intervalList, samReader.hasIndex()); @@ -69,13 +72,52 @@ public EdgeReadIterator(final SamReader samReader, final IntervalList intervalLi * * @param samReader must be coordinate sorted * @param intervalList Either the list of desired intervals, or null. Note that if an intervalList is - * passed in that is not coordinate sorted, it will eventually be coordinated sorted by this class. + * passed in that is not coordinate sorted, it will be coordinated sorted here. * @param useIndex If true, do indexed lookup to improve performance. Not relevant if intervalList == null. * It is no longer the case the useIndex==true can make performance worse. It should always perform at least * as well as useIndex==false, and generally will be much faster. */ public EdgeReadIterator(final SamReader samReader, final IntervalList intervalList, final boolean useIndex) { super(samReader, intervalList, useIndex); + + if (getIntervals() == null) { + intervalListIterator = null; + intervalCoordinateComparator = null; + overlapDetector = null; + } else { + // For the easy case of a read being fully contained in an interval, we use an iterator to keep track + // of the current interval. + intervalListIterator = new PeekableIterator<>(getIntervals().iterator()); + if (intervalListIterator.hasNext()) { + currentInterval = intervalListIterator.next(); + } + // We also need this comparator to keep track of the different contigs. The constructor of + // AbstractLocusInfo ensures that the sequence dictionaries of the SAM file and the interval list match. + intervalCoordinateComparator = new IntervalCoordinateComparator(getHeader()); + + // For more complicated cases, we need an OverlapDetector. + overlapDetector = OverlapDetector.create(getIntervals()); + } + } + + /** + * This function updates currentInterval according to the position of the record that it is + * presented and determines if the current read is fully contained in the currentInterval. + * @param rec The record we want to consider + * @return True, if rec is fully contained in the current interval, otherwise false + */ + protected boolean advanceCurrentIntervalAndCheckIfIntervalContainsRead(final SAMRecord rec) { + // currentInterval should never be null when calling this method, but we have to check it just to make sure, + // so that we don't get a NullPointerException in the return statement. + if (currentInterval == null) { + return false; + } + // Here we need to update the currentInterval. We have to do this using an + // IntervalCoordinateComparator to take factor in the order in the sequence dictionary. + while (intervalListIterator.peek() != null && intervalCoordinateComparator.compare(new Interval(rec), intervalListIterator.peek()) > 0) { + currentInterval = intervalListIterator.next(); + } + return currentInterval.contains(rec); } /** @@ -88,19 +130,48 @@ public EdgeReadIterator(final SamReader samReader, final IntervalList intervalLi */ @Override void accumulateSamRecord(SAMRecord rec) { + // In the case that no intervals are passed, or that the current interval completely contains + // the current read (which is the most common case for WGS), set needToConsiderIntervals to false, so we don't + // have to find intersections and can later emit the read right away. + final boolean needToConsiderIntervals = intervals != null && !advanceCurrentIntervalAndCheckIfIntervalContainsRead(rec); + // interpret the CIGAR string and add the base info for (final AlignmentBlock alignmentBlock : rec.getAlignmentBlocks()) { + // General concept + // + // Example: Read (or more accurately, AlignmentBlock) from position 101 to 108 + // + // accumulator (ArrayList) 30 31 32 33 34 35 36 37 38 (has one LocusInfo at each position, corresponding to the genomic position) + // LocusInfo objects (with genomic pos.) 100 101 102 103 104 105 106 107 108 (the LocusInfo objects can contain EdgingRecordAndOffset objects, if a record starts or ends there) + // ^ ^ + // | | + // EdgingRecordAndOffset objects B E + + // Steps: + // 1. Fill accumulator with enough LocusInfos to the end of the read + // 2. Determine which parts of the read are covered by the specified intervals + // (this step will be skipped if the read is completely contained within the + // interval, i.e. needToConsiderIntervals is false, otherwise each overlap + // will be considered individually) + // 2. Create a EdgingRecordAndOffset object with the type BEGIN and add it + // to the LocusInfo at the position in the accumulator corresponding to + // the start position of the read (or overlap) + // 3. Create a EdgingRecordAndOffset object with the type END and add it + // to the LocusInfo at the position in the accumulator corresponding to + // the end position of the read (or overlap) + // 0-based offset into the read of the current base - final int readOffset = alignmentBlock.getReadStart() - 1; + final int offsetStartOfAlignmentBlockInRead = alignmentBlock.getReadStart() - 1; // 1-based reference position that the current base aligns to - final int refPos = alignmentBlock.getReferenceStart(); + final int referencePositionStartOfAlignmentBlock = alignmentBlock.getReferenceStart(); + // Here we add the first entry to the accumulator, which is the start of this AlignmentBlock. if (accumulator.isEmpty()) { accumulator.add(createLocusInfo(getReferenceSequence(rec.getReferenceIndex()), rec.getAlignmentStart())); } - // The accumulator should always have LocusInfos that correspond to one consecutive segment of loci from one reference - // sequence. So + // The accumulator should always have LocusInfos that correspond to one consecutive segment of loci from + // one reference sequence. So // accumulator.get(0).getPosition() + accumulator.size() == accumulator.get(accumulator.size()-1).getPosition()+1 final int accumulatorNextPosition = accumulator.get(0).getPosition() + accumulator.size(); @@ -108,53 +179,60 @@ void accumulateSamRecord(SAMRecord rec) { throw new IllegalStateException("The accumulator has gotten into a funk. Cannot continue"); } - // Ensure there are AbstractLocusInfos up to and including this position - for (int locusPos = accumulatorNextPosition; locusPos <= refPos + alignmentBlock.getLength(); ++locusPos) { + // Ensure there are consecutive AbstractLocusInfos up to and including the end of the AlignmentBlock + for (int locusPos = accumulatorNextPosition; locusPos <= referencePositionStartOfAlignmentBlock + alignmentBlock.getLength(); ++locusPos) { accumulator.add(createLocusInfo(getReferenceSequence(rec.getReferenceIndex()), locusPos)); } - /* Let's assume an alignment block starts in some locus. - * We put two records to the accumulator. The first one has the "begin" type which corresponds to the locus - * where the block starts. The second one has the "end" type which corresponds to the other locus where the block ends. - */ + // Let's assume an alignment block starts in some locus. + // We put two records to the accumulator. The first one has the "begin" type which corresponds to the locus + // where the block starts. The second one has the "end" type which corresponds to the other locus where the block ends. // 0-based offset from the aligned position of the first base in the read to the aligned position // of the current base. - int refOffsetInterval = refPos - rec.getAlignmentStart(); // corresponds to the beginning of the alignment block - int refOffsetEndInterval = refOffsetInterval + alignmentBlock.getLength();; - int startShift = 0; - - // intersect intervals and alignment block - if (getIntervals() != null) { - // get the current interval we're processing - Interval interval = getCurrentInterval(); - if (interval != null) { - final int intervalEnd = interval.getEnd(); - final int intervalStart = interval.getStart(); - // check if an interval and the alignment block overlap - if (!CoordMath.overlaps(refPos, refPos + alignmentBlock.getLength(), intervalStart, intervalEnd)) { - continue; - } - // if the alignment block starts out of an interval, shift the starting position - if (refPos < intervalStart) { - startShift = intervalStart - refPos; - refOffsetInterval = refOffsetInterval + startShift; - } - // if the alignment block ends out of an interval, shift the ending position - final int readEnd = refPos + alignmentBlock.getLength(); - if (readEnd > intervalEnd) { - refOffsetEndInterval = refOffsetEndInterval - (readEnd - intervalEnd) + 1; - } + final int offsetStartOfAlignmentBlockOnReference = referencePositionStartOfAlignmentBlock - rec.getAlignmentStart(); + // Similar for the end of the alignment block. We can simply add the length of the block, since by + // definition all bases in an AlignmentBlock match the reference alignment + final int offsetEndOfAlignmentBlockOnReference = offsetStartOfAlignmentBlockOnReference + alignmentBlock.getLength(); + + if (needToConsiderIntervals) { + // If the read isn't fully contained within the currentInterval, we need to manually handle each of the overlaps. + + for (final Interval interval : overlapDetector.getOverlaps(new Interval(rec.getContig(), referencePositionStartOfAlignmentBlock, referencePositionStartOfAlignmentBlock + alignmentBlock.getLength()))) { + // In case the start position is smaller than the start of the interval, we need to determine the offset (we need this later)... + final int offsetStartOfIntervalInAlignmentBlock = referencePositionStartOfAlignmentBlock < interval.getStart() ? interval.getStart() - referencePositionStartOfAlignmentBlock : 0; + // ... and add it to the start position to get the actual position from where we want to count. + final int offsetStartOfActualSequenceOnReference = offsetStartOfAlignmentBlockOnReference + offsetStartOfIntervalInAlignmentBlock; + + // Similarly, we need to determine the actual end of the sequence we want to consider. + final int referencePositionEndOfAlignmentBlock = referencePositionStartOfAlignmentBlock + alignmentBlock.getLength(); + // For that, we find the difference between the end position of the AlignmentBlock and the end of the interval, and subtract it from the offset of end of the AlignmentBlock + final int offsetEndOfActualSequenceOnReference = offsetEndOfAlignmentBlockOnReference - (referencePositionEndOfAlignmentBlock > interval.getEnd() ? referencePositionEndOfAlignmentBlock - interval.getEnd() - 1 : 0); + + final int length = offsetEndOfActualSequenceOnReference - offsetStartOfActualSequenceOnReference; + + // accumulate start of the overlap block + final EdgingRecordAndOffset recordAndOffset = createRecordAndOffset(rec, offsetStartOfAlignmentBlockInRead + offsetStartOfIntervalInAlignmentBlock, length, referencePositionStartOfAlignmentBlock + offsetStartOfIntervalInAlignmentBlock); + accumulator.get(offsetStartOfActualSequenceOnReference).add(recordAndOffset); + + // accumulate end of the overlap block + final EdgingRecordAndOffset recordAndOffsetEnd = createRecordAndOffset(recordAndOffset); + accumulator.get(offsetEndOfActualSequenceOnReference).add(recordAndOffsetEnd); } + } else { + // If the read is fully contained within the interval, then we don't need to determine the overlaps, + // which will speed this up significantly. + + final int length = offsetEndOfAlignmentBlockOnReference - offsetStartOfAlignmentBlockOnReference; + + // accumulate start of the alignment block + final EdgingRecordAndOffset recordAndOffset = createRecordAndOffset(rec, offsetStartOfAlignmentBlockInRead, length, referencePositionStartOfAlignmentBlock); + accumulator.get(offsetStartOfAlignmentBlockOnReference).add(recordAndOffset); + + // accumulate end of the alignment block + final EdgingRecordAndOffset recordAndOffsetEnd = createRecordAndOffset(recordAndOffset); + accumulator.get(offsetEndOfAlignmentBlockOnReference).add(recordAndOffsetEnd); } - final int length = refOffsetEndInterval - refOffsetInterval; - // add the alignment block to the accumulator when it starts and when it ends - final EdgingRecordAndOffset recordAndOffset = createRecordAndOffset(rec, readOffset + startShift, length, refPos + startShift); - // accumulate start of the alignment block - accumulator.get(refOffsetInterval).add(recordAndOffset); - final EdgingRecordAndOffset recordAndOffsetEnd = createRecordAndOffset(recordAndOffset); - // accumulate end of the alignment block - accumulator.get(refOffsetEndInterval).add(recordAndOffsetEnd); } } diff --git a/src/main/java/htsjdk/samtools/util/IntervalListReferenceSequenceMask.java b/src/main/java/htsjdk/samtools/util/IntervalListReferenceSequenceMask.java index 08c2dd5e19..bf65c288f2 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalListReferenceSequenceMask.java +++ b/src/main/java/htsjdk/samtools/util/IntervalListReferenceSequenceMask.java @@ -46,10 +46,9 @@ public class IntervalListReferenceSequenceMask implements ReferenceSequenceMask public IntervalListReferenceSequenceMask(final IntervalList intervalList) { this.header = intervalList.getHeader(); - if (intervalList.getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { - intervalList.sorted(); - } - final List uniqueIntervals = intervalList.uniqued().getIntervals(); + final boolean isSorted = intervalList.getHeader().getSortOrder() == SAMFileHeader.SortOrder.coordinate; + final IntervalList sortedIntervalList = isSorted ? intervalList : intervalList.sorted(); + final List uniqueIntervals = sortedIntervalList.uniqued().getIntervals(); if (uniqueIntervals.isEmpty()) { lastSequenceIndex = -1; lastPosition = 0; diff --git a/src/test/java/htsjdk/samtools/util/EdgeReadIteratorTest.java b/src/test/java/htsjdk/samtools/util/EdgeReadIteratorTest.java index 4f5d953171..2e83876edc 100644 --- a/src/test/java/htsjdk/samtools/util/EdgeReadIteratorTest.java +++ b/src/test/java/htsjdk/samtools/util/EdgeReadIteratorTest.java @@ -23,9 +23,7 @@ */ package htsjdk.samtools.util; -import htsjdk.samtools.SAMRecordSetBuilder; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; +import htsjdk.samtools.*; import org.testng.Assert; import org.testng.annotations.Test; @@ -252,7 +250,8 @@ public void testSetIncludeIndels() { */ @Test public void testNotIntersectingInterval() { - SamReader samReader = createSamFileReader(); + SamReader samReader = createSamFileReader(createSamFileHeader("@HD\tSO:coordinate\tVN:1.0\n" + + "@SQ\tSN:chrM\tLN:100\n")); IntervalList intervals = createIntervalList("@HD\tSO:coordinate\tVN:1.0\n" + "@SQ\tSN:chrM\tLN:100\n" + @@ -273,7 +272,8 @@ public void testNotIntersectingInterval() { */ @Test public void testIntersectingInterval() { - SamReader samReader = createSamFileReader(); + SamReader samReader = createSamFileReader(createSamFileHeader("@HD\tSO:coordinate\tVN:1.0\n" + + "@SQ\tSN:chrM\tLN:100\n")); IntervalList intervals = createIntervalList("@HD\tSO:coordinate\tVN:1.0\n" + "@SQ\tSN:chrM\tLN:100\n" + "chrM\t5\t15\t+\ttest"); @@ -302,6 +302,8 @@ public void testIntersectingInterval() { public void testIntersectingAndNotInterval() { final SAMRecordSetBuilder builder = getRecordBuilder(); + builder.setHeader(createSamFileHeader("@HD\tSO:coordinate\tVN:1.0\n" + + "@SQ\tSN:chrM\tLN:100\n")); // add records up to coverage for the test in that position final int startPosition = 40; // Were it not for the gap, these two reads would not overlap @@ -361,6 +363,8 @@ public void testNoGapsInLocusAccumulator() { public void testIntersectingIntervalWithComplicatedCigar() { final SAMRecordSetBuilder builder = getRecordBuilder(); + builder.setHeader(createSamFileHeader("@HD\tSO:coordinate\tVN:1.0\n" + + "@SQ\tSN:chrM\tLN:100\n")); // add records up to coverage for the test in that position final int startPosition = 1; // Were it not for the gap, these two reads would not overlap @@ -398,6 +402,108 @@ public void testIntersectingIntervalWithComplicatedCigar() { assertEquals(21, locusPosition); } + /** + * Test for handling multiple intervals + */ + @Test + public void testMultipleIntervals() { + String samHeaderString = "@HD\tSO:coordinate\tVN:1.0\n" + + "@SQ\tSN:chr1\tLN:100\n" + + "@SQ\tSN:chr2\tLN:100\n" + + "@SQ\tSN:chr3\tLN:100\n" + + "@SQ\tSN:chr4\tLN:100\n"; + + final SAMRecordSetBuilder builder = getRecordBuilder(); + builder.setHeader(createSamFileHeader(samHeaderString)); + + builder.addFrag("fullyContainedInChr1", 0, 10, false, false, "10M10D10M", null, 10); + builder.addFrag("startOutsideChr2", 1, 15, false, false, "10M10D10M", null, 10); + builder.addFrag("endOutsideChr2", 1, 65, false, false, "10M10D10M", null, 10); + builder.addFrag("spanningThreeIntervalsChr4", 3, 1, false, false, "100M", null, 10); + + IntervalList intervals = createIntervalList(samHeaderString + + "chr1\t1\t100\t+\ttest\n" + + "chr2\t20\t70\t+\ttest\n" + + "chr4\t20\t30\t+\ttest\n" + + "chr4\t40\t50\t+\ttest\n" + + "chr4\t60\t70\t+\ttest\n"); + + // These are the covered intervals that we expect to be covered + final Interval[] intervalsCovered = { + new Interval("chr1", 10, 19), // fullyContainedInChr1: first alignment block + new Interval("chr1", 30, 39), // fullyContainedInChr1: second alignment block + new Interval("chr2", 20, 24), // startOutsideChr2: first alignment block + new Interval("chr2", 35, 44), // startOutsideChr2: second alignment block + new Interval("chr2", 65, 70), // endOutsideChr2: first alignment block (second isn't covered at all) + new Interval("chr4", 20, 30), // spanningThreeIntervalsChr4: first interval + new Interval("chr4", 40, 50), // spanningThreeIntervalsChr4: second interval + new Interval("chr4", 60, 70), // spanningThreeIntervalsChr4: third interval + }; + + EdgeReadIterator iterator = new EdgeReadIterator(builder.getSamReader(), intervals); + AbstractLocusInfo currentLocusInfo = iterator.next(); + for (final Interval interval : intervalsCovered) { + // Continue iterating over the LocusInfos if there is no RecordAndOffsets (size == 0) or it isn't a BEGIN record. + while(currentLocusInfo.getRecordAndOffsets().size() < 1 || currentLocusInfo.getRecordAndOffsets().get(0).getType() != EdgingRecordAndOffset.Type.BEGIN) { + currentLocusInfo = iterator.next(); + } + EdgingRecordAndOffset currentEdgingRecordAndOffset = currentLocusInfo.getRecordAndOffsets().get(0); + + assertEquals(currentLocusInfo.getContig(), interval.getContig(), "Read: " + currentEdgingRecordAndOffset.getReadName()); + assertEquals(currentLocusInfo.getPosition(), interval.getStart(), "Read: " + currentEdgingRecordAndOffset.getReadName()); + assertEquals(currentLocusInfo.getPosition() + currentEdgingRecordAndOffset.getLength() - 1, interval.getEnd(), "Read: " + currentEdgingRecordAndOffset.getReadName()); + + currentLocusInfo = iterator.next(); + } + } + + /** + * Test for handling multiple intervals + */ + @Test + public void testIntervalCompletelyContainsRead() { + String samHeaderString = "@HD\tSO:coordinate\tVN:1.0\n" + + "@SQ\tSN:Z_AlphabeticallyOutOfOrderContig\tLN:100\n" + + "@SQ\tSN:chr1\tLN:100\n" + + "@SQ\tSN:chr2\tLN:100\n" + + "@SQ\tSN:chr3\tLN:100\n" + + "@SQ\tSN:chr4\tLN:100\n" + + "@SQ\tSN:chr5\tLN:100\n" + + "@SQ\tSN:chr6\tLN:100\n"; + + final SAMRecordSetBuilder builder = getRecordBuilder(); + builder.setHeader(createSamFileHeader(samHeaderString)); + + builder.addFrag("containedInChr1", 1, 1, false, false, "10M", null, 10); + builder.addFrag("notContainedInChr2", 2, 42, false, false, "10M", null, 10); + builder.addFrag("containedInChr4", 4, 41, false, false, "10M", null, 10); + builder.addFrag("containedInChr5", 5, 2, false, false, "10M", null, 10); + builder.addFrag("notContainedInChr6", 6, 1, false, false, "10M", null, 10); + + IntervalList intervals = createIntervalList(samHeaderString + + "chr1\t1\t50\t+\ttest\n" + + "chr2\t1\t50\t+\ttest\n" + + "chr3\t1\t50\t+\ttest\n" + + "chr4\t1\t50\t+\ttest\n" + + "chr5\t1\t50\t+\ttest\n"); + + final boolean[] expectedResults = { + true, + false, + true, + true, + false + }; + + EdgeReadIterator iterator = new EdgeReadIterator(builder.getSamReader(), intervals); + int i = 0; + for (final SAMRecord record : builder.getRecords()) { + assertEquals(iterator.advanceCurrentIntervalAndCheckIfIntervalContainsRead(record), expectedResults[i], "Read: " + record.getReadName()); + i += 1; + } + assertEquals(i, expectedResults.length); // Make sure we checked all reads + } + private void fillEmptyLocus(int[] expectedReferencePositions, int[] expectedDepths, int[][] expectedReadOffsets, int i) { expectedReferencePositions[i] = i + 1; expectedDepths[i] = 0; @@ -405,7 +511,14 @@ private void fillEmptyLocus(int[] expectedReferencePositions, int[] expectedDept } private SamReader createSamFileReader() { + return createSamFileReader(null); + } + + private SamReader createSamFileReader(final SAMFileHeader header) { final SAMRecordSetBuilder builder = getRecordBuilder(); + if (header != null) { + builder.setHeader(header); + } // add records up to coverage for the test in that position final int startPosition = 1; for (int i = 0; i < coverage; i++) { @@ -422,4 +535,8 @@ private IntervalList createIntervalList(final String s) { throw new RuntimeException("Trouble closing reader: " + s, e); } } + + private SAMFileHeader createSamFileHeader(final String s) { + return new SAMTextHeaderCodec().decode(BufferedLineReader.fromString(s), null); + } }