-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
bugfixes and adding code to simulate primer-directed read1
- Loading branch information
Showing
1 changed file
with
24 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,12 @@ | ||
#!/usr/bin/env python | ||
from Bio import SeqIO | ||
from Bio import Alphabet | ||
from Bio.Restriction import * | ||
from collections import OrderedDict | ||
import numpy | ||
import sys | ||
import re | ||
from Bio.Seq import Seq | ||
|
||
# restriction enzymes used in reaction | ||
cut4 = Restriction.NlaIII | ||
|
@@ -15,14 +18,27 @@ def findRestrictionSites(enzyme, seq): | |
""" | ||
return enzyme.search(seq,linear=False) | ||
|
||
def findPrimingSites(oligo, seq): | ||
"""For supplied priming sequence, find positions of all matches in a given sequence | ||
returns list of sites. | ||
""" | ||
array = [] | ||
for m in re.finditer(oligo, seq.tostring()): | ||
array.append(m.end()) | ||
rc_oligo = Seq(oligo) | ||
rc_oligo.reverse_complement() | ||
for m in re.finditer(rc_oligo.tostring(), seq.tostring()): | ||
array.append(m.end()) | ||
return array | ||
|
||
def drawDelta(minLength,maxLength): | ||
"""Draws from a distribution only accepting values between min and max.""" | ||
# as this relates to a circular chromosome, the min and max could be considered one value. | ||
# could do this modulo length of chromosome. | ||
geometricProbability = 1.0e-4 | ||
geometricProbability = 1.0e-5 | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
cerebis
Collaborator
|
||
|
||
delta = numpy.random.geometric(p=geometricProbability,size=1) | ||
while (delta < minLength and delta > maxLength): | ||
while (delta < minLength or delta > maxLength): | ||
delta = numpy.random.geometric(p=geometricProbability,size=1) | ||
return int(delta) | ||
|
||
|
@@ -136,6 +152,7 @@ def __init__(self, name, parentCell, sequence): | |
self.parentCell = parentCell | ||
self.sequence = sequence | ||
self.cutSites = {} | ||
self.cutSites['515F'] = numpy.array(findPrimingSites("GTGCCAGC[AC]GCCGCGGTAA",sequence.seq)) | ||
self.cutSites['4cut'] = numpy.array(findRestrictionSites(cut4, sequence.seq)) | ||
self.cutSites['6cut'] = numpy.array(findRestrictionSites(cut6, sequence.seq)) | ||
|
||
|
@@ -390,6 +407,7 @@ def getCDF(self): | |
def makeUnconstrainedPartA(): | ||
rep = comm.getRepliconByIndex(comm.pickReplicon()) | ||
pos6c = rep.randomCutSite('6cut') | ||
# pos6c = rep.randomCutSite('515F') | ||
fwd = comm.isForwardStrand() | ||
pos4c = rep.nearestCutSiteAbove('4cut',pos6c) | ||
seq = rep.subSeq(pos6c,pos4c, True) | ||
|
@@ -460,6 +478,9 @@ def makeConstrainedPartB(partA): | |
if len(fragment) < 200: # fudge minimum length of total fragment | ||
skipCount += 1 | ||
continue | ||
if len(fragment) > 1000: # fudge maximum length of total fragment | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong. |
||
skipCount += 1 | ||
continue | ||
|
||
read1 = makeRead(fragment, True, readLength) | ||
read1.id = "frg" + str(fragCount) + "fwd" | ||
|
@@ -476,4 +497,4 @@ def makeConstrainedPartB(partA): | |
# Close output file before exit | ||
hOutput.close() | ||
|
||
print "Ignored " + str(skipCount) + " fragments due to short length" | ||
print "Ignored " + str(skipCount) + " fragments due to length restrictions" |
looking at the results this appeared to be a better match to the Beitel et al data