Skip to content

Commit

Permalink
fix: preserve add_pair backwards compatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
msto committed Oct 17, 2023
1 parent e5bf00f commit a79899a
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 41 deletions.
15 changes: 11 additions & 4 deletions fgpyo/sam/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ def add_pair(
bases2: Optional[str] = None,
quals1: Optional[List[int]] = None,
quals2: Optional[List[int]] = None,
chrom1: str = sam.NO_REF_NAME,
chrom: str = sam.NO_REF_NAME,
chrom1: Optional[str] = None,
chrom2: Optional[str] = None,
start1: int = sam.NO_REF_POS,
start2: int = sam.NO_REF_POS,
Expand Down Expand Up @@ -380,8 +381,9 @@ def add_pair(
bases2: The bases for R2. If None is given a random sequence is generated.
quals1: The list of int qualities for R1. If None, the default base quality is used.
quals2: The list of int qualities for R2. If None, the default base quality is used.
chrom1: The chromosome to which R1 is mapped. Defaults to the unmapped value.
chrom2: The chromosome to which R2 is mapped. If None, `chrom1` is used.
chrom: The chromosome to which both reads are mapped. Defaults to the unmapped value.
chrom1: The chromosome to which R1 is mapped. If None, `chrom` is used.
chrom2: The chromosome to which R2 is mapped. If None, `chrom` is used.
start1: The start position of R1. Defaults to the unmapped value.
start2: The start position of R2. Defaults to the unmapped value.
cigar1: The cigar string for R1. Defaults to None for unmapped reads, otherwise all M.
Expand All @@ -406,7 +408,12 @@ def add_pair(
raise ValueError(f"Invalid value for strand2: {strand2}")

name = name if name is not None else self._next_name()
chrom2 = chrom2 if chrom2 is not None else chrom1

# Use `chrom` for both reads if `chrom1` and `chrom2` are not specified.
if chrom1 is None and chrom2 is None:
chrom1 = chrom2 = chrom
elif chrom1 is None or chrom2 is None:
raise ValueError("When using chrom1 or chrom2, both must be specified.")

# Setup R1
r1 = self._new_rec(name=name, chrom=chrom1, start=start1, mapq=mapq1, attrs=attrs)
Expand Down
64 changes: 34 additions & 30 deletions fgpyo/sam/tests/test_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_add_pair_all_fields() -> None:
builder = SamBuilder()
builder.add_pair(
name="q1",
chrom1="chr1",
chrom="chr1",
bases1="ACGTG",
quals1=[20, 21, 22, 23, 24],
start1=10000,
Expand Down Expand Up @@ -52,7 +52,7 @@ def test_add_pair_all_fields() -> None:

def test_add_pair_minimal() -> None:
builder = SamBuilder(r1_len=10, r2_len=5, base_quality=25, mapping_quality=20)
r1, r2 = builder.add_pair(chrom1="chr1", start1=1000, start2=1200)
r1, r2 = builder.add_pair(chrom="chr1", start1=1000, start2=1200)
assert r1.query_name == r2.query_name
assert r1.reference_name == r2.reference_name == "chr1"
assert r1.reference_start == 1000
Expand All @@ -71,22 +71,20 @@ def test_add_pair_minimal() -> None:

def test_add_pair_mix_and_match() -> None:
builder = SamBuilder(r1_len=100, r2_len=100, base_quality=30)
r1, r2 = builder.add_pair(
chrom1="chr1", start1=500, start2=700, cigar1="75M", cigar2="9M1I30M"
)
r1, r2 = builder.add_pair(chrom="chr1", start1=500, start2=700, cigar1="75M", cigar2="9M1I30M")
assert len(r1.query_sequence) == len(r1.query_qualities) == 75
assert len(r2.query_sequence) == len(r2.query_qualities) == 40

r1, r2 = builder.add_pair(
chrom1="chr1", start1=500, start2=700, bases1="ACGTGCATGC", bases2="ACGAC"
chrom="chr1", start1=500, start2=700, bases1="ACGTGCATGC", bases2="ACGAC"
)
assert len(r1.query_sequence) == len(r1.query_qualities) == 10
assert len(r2.query_sequence) == len(r2.query_qualities) == 5
assert r1.cigarstring == "10M"
assert r2.cigarstring == "5M"

r1, r2 = builder.add_pair(
chrom1="chr1", start1=500, start2=700, quals1=[30] * 20, quals2=[20] * 10
chrom="chr1", start1=500, start2=700, quals1=[30] * 20, quals2=[20] * 10
)
assert len(r1.query_sequence) == len(r1.query_qualities) == 20
assert len(r2.query_sequence) == len(r2.query_qualities) == 10
Expand All @@ -95,18 +93,18 @@ def test_add_pair_mix_and_match() -> None:

# Now what if we provide multiple values that are inconsistent
with pytest.raises(ValueError, match="not length compatible"):
builder.add_pair(chrom1="chr1", start1=10, start2=99, bases1="ACGTG", cigar1="10M")
builder.add_pair(chrom="chr1", start1=10, start2=99, bases1="ACGTG", cigar1="10M")

with pytest.raises(ValueError, match="not length compatible"):
builder.add_pair(chrom1="chr1", start1=10, start2=99, bases1="ACGTG", quals1=[2, 2])
builder.add_pair(chrom="chr1", start1=10, start2=99, bases1="ACGTG", quals1=[2, 2])

with pytest.raises(ValueError, match="not length compatible"):
builder.add_pair(chrom1="chr1", start1=10, start2=99, quals1=[2, 2], cigar1="5M")
builder.add_pair(chrom="chr1", start1=10, start2=99, quals1=[2, 2], cigar1="5M")


def test_unmapped_reads() -> None:
builder = SamBuilder()
r1, r2 = builder.add_pair(chrom1="chr1", start1=1000)
r1, r2 = builder.add_pair(chrom="chr1", start1=1000)
assert not r1.is_unmapped
assert r1.mate_is_unmapped
assert r2.is_unmapped
Expand All @@ -117,7 +115,7 @@ def test_unmapped_reads() -> None:
assert rec.next_reference_name == "chr1"
assert rec.next_reference_start == 1000

r1, r2 = builder.add_pair(chrom1="chr1", start2=2000)
r1, r2 = builder.add_pair(chrom="chr1", start2=2000)
assert r1.is_unmapped
assert not r1.mate_is_unmapped
assert not r2.is_unmapped
Expand All @@ -128,7 +126,7 @@ def test_unmapped_reads() -> None:
assert rec.next_reference_name == "chr1"
assert rec.next_reference_start == 2000

r1, r2 = builder.add_pair(chrom1=sam.NO_REF_NAME)
r1, r2 = builder.add_pair(chrom=sam.NO_REF_NAME)
assert r1.is_unmapped
assert r1.mate_is_unmapped
assert r2.is_unmapped
Expand All @@ -142,34 +140,34 @@ def test_unmapped_reads() -> None:

def test_invalid_strand() -> None:
with pytest.raises(ValueError, match="strand"):
SamBuilder().add_pair(chrom1="chr1", start1=100, start2=200, strand1="F", strand2="R")
SamBuilder().add_pair(chrom="chr1", start1=100, start2=200, strand1="F", strand2="R")


def test_proper_pair() -> None:
builder = SamBuilder()

# Regular innies
for rec in builder.add_pair(chrom1="chr1", start1=5000, start2=5200, strand1="+", strand2="-"):
for rec in builder.add_pair(chrom="chr1", start1=5000, start2=5200, strand1="+", strand2="-"):
assert rec.is_proper_pair
for rec in builder.add_pair(chrom1="chr1", start1=5200, start2=5000, strand1="-", strand2="+"):
for rec in builder.add_pair(chrom="chr1", start1=5200, start2=5000, strand1="-", strand2="+"):
assert rec.is_proper_pair

# Outies
for rec in builder.add_pair(chrom1="chr1", start1=5000, start2=5200, strand1="-", strand2="+"):
for rec in builder.add_pair(chrom="chr1", start1=5000, start2=5200, strand1="-", strand2="+"):
assert not rec.is_proper_pair
for rec in builder.add_pair(chrom1="chr1", start1=5200, start2=5000, strand1="+", strand2="-"):
for rec in builder.add_pair(chrom="chr1", start1=5200, start2=5000, strand1="+", strand2="-"):
assert not rec.is_proper_pair

# Unmapped
for rec in builder.add_pair(chrom1="chr1", start1=5000, strand1="+"):
for rec in builder.add_pair(chrom="chr1", start1=5000, strand1="+"):
assert not rec.is_proper_pair
for rec in builder.add_pair(chrom1="chr1", start2=5000, strand2="+"):
for rec in builder.add_pair(chrom="chr1", start2=5000, strand2="+"):
assert not rec.is_proper_pair
for rec in builder.add_pair():
assert not rec.is_proper_pair


def test_discordant_pair() -> None:
def test_chrom1_chrom2() -> None:
builder = SamBuilder()
r1, r2 = builder.add_pair(chrom1="chr1", start1=1000, chrom2="chr2", start2=1000)

Expand All @@ -181,6 +179,12 @@ def test_discordant_pair() -> None:
assert not r1.is_reverse
assert r2.is_reverse

with pytest.raises(ValueError, match="When using chrom1 or chrom2, both must be specified."):
r1, r2 = builder.add_pair(chrom1="chr1", start1=1000, start2=1000)

with pytest.raises(ValueError, match="When using chrom1 or chrom2, both must be specified."):
r1, r2 = builder.add_pair(chrom2="chr1", start1=1000, start2=1000)


def test_add_single() -> None:
builder = SamBuilder(r1_len=25, r2_len=50)
Expand Down Expand Up @@ -214,10 +218,10 @@ def test_add_single() -> None:

def test_sorting() -> None:
builder = SamBuilder()
builder.add_pair(chrom1="chr1", start1=5000, start2=4700, strand1="-", strand2="+")
builder.add_pair(chrom1="chr1", start1=4000, start2=4300)
builder.add_pair(chrom1="chr5", start1=4000, start2=4300)
builder.add_pair(chrom1="chr2", start1=4000, start2=4300)
builder.add_pair(chrom="chr1", start1=5000, start2=4700, strand1="-", strand2="+")
builder.add_pair(chrom="chr1", start1=4000, start2=4300)
builder.add_pair(chrom="chr5", start1=4000, start2=4300)
builder.add_pair(chrom="chr2", start1=4000, start2=4300)

last_ref_id = -1
last_start = -1
Expand All @@ -232,17 +236,17 @@ def test_sorting() -> None:
def test_custom_sd() -> None:
builder1 = SamBuilder()
builder2 = SamBuilder(sd=[{"SN": "hi", "LN": 999}, {"SN": "bye", "LN": 888}])
builder1.add_pair(chrom1="chr1", start1=200, start2=400)
builder2.add_pair(chrom1="hi", start1=200, start2=400)
builder1.add_pair(chrom="chr1", start1=200, start2=400)
builder2.add_pair(chrom="hi", start1=200, start2=400)

with pytest.raises(ValueError, match="not a valid chromosome name"):
builder1.add_pair(chrom1="hi", start1=200, start2=400)
builder1.add_pair(chrom="hi", start1=200, start2=400)

with pytest.raises(ValueError, match="not a valid chromosome name"):
builder2.add_pair(chrom1="chr1", start1=200, start2=400)
builder2.add_pair(chrom="chr1", start1=200, start2=400)


def test_custom_rg() -> None:
builder = SamBuilder(rg={"ID": "novel", "SM": "custom_rg", "LB": "foo", "PL": "ILLUMINA"})
for rec in builder.add_pair(chrom1="chr1", start1=100, start2=200):
for rec in builder.add_pair(chrom="chr1", start1=100, start2=200):
assert rec.get_tag("RG") == "novel"
4 changes: 2 additions & 2 deletions fgpyo/sam/tests/test_clipping.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ def r(start: Optional[int], cigar: Optional[str], strand: Optional[str] = "+") -
"""Constructs a read for testing."""
builder = SamBuilder()
if start:
r1, r2 = builder.add_pair(chrom1="chr1", start1=start, cigar1=cigar, strand1=strand)
r1, r2 = builder.add_pair(chrom="chr1", start1=start, cigar1=cigar, strand1=strand)
else:
r1, r2 = builder.add_pair()
return r1


def test_make_read_unmapped() -> None:
builder = SamBuilder()
r1, r2 = builder.add_pair(chrom1="chr1", start1=100, start2=250)
r1, r2 = builder.add_pair(chrom="chr1", start1=100, start2=250)

clipping._make_read_unmapped(r1)
assert r1.is_unmapped
Expand Down
2 changes: 1 addition & 1 deletion fgpyo/sam/tests/test_sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def test_is_clipping() -> None:

def test_isize() -> None:
builder = SamBuilder()
r1, r2 = builder.add_pair(chrom1="chr1", start1=100, cigar1="115M", start2=250, cigar2="40M")
r1, r2 = builder.add_pair(chrom="chr1", start1=100, cigar1="115M", start2=250, cigar2="40M")
assert sam.isize(r1, r2) == 190
assert sam.isize(r2, r1) == -190

Expand Down
4 changes: 2 additions & 2 deletions fgpyo/sam/tests/test_template_iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

def test_template_init_function() -> None:
builder: SamBuilder = SamBuilder()
builder.add_pair(name="x", chrom1="chr1", start1=1, start2=2)
builder.add_pair(name="x", chrom="chr1", start1=1, start2=2)
r1 = builder.to_sorted_list()[0]
r2 = builder.to_sorted_list()[1]
template = Template(
Expand All @@ -26,7 +26,7 @@ def test_to_templates() -> None:
builder = SamBuilder()

# Series of alignments for one template
builder.add_pair(name="q1", chrom1="chr1", start1=1, start2=2)
builder.add_pair(name="q1", chrom="chr1", start1=1, start2=2)
builder.add_single(name="q1", read_num=1, chrom="chr1", start=1, supplementary=True)
builder.add_single(name="q1", read_num=1, chrom="chr1", start=11, supplementary=True)
builder.add_single(name="q1", read_num=2, chrom="chr1", start=2, supplementary=True)
Expand Down
4 changes: 2 additions & 2 deletions fgpyo/util/tests/test_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def test_progress_logger_as_context_manager() -> None:


builder = SamBuilder()
r1_mapped_named, r2_unmapped_named = builder.add_pair(chrom1="chr1", start1=1000)
r1_unmapped_un_named, r2_unmapped_un_named = builder.add_pair(chrom1=sam.NO_REF_NAME)
r1_mapped_named, r2_unmapped_named = builder.add_pair(chrom="chr1", start1=1000)
r1_unmapped_un_named, r2_unmapped_un_named = builder.add_pair(chrom=sam.NO_REF_NAME)


@pytest.mark.parametrize(
Expand Down

0 comments on commit a79899a

Please sign in to comment.