diff --git a/SeqLib b/SeqLib index 0e3cee9..b0da251 160000 --- a/SeqLib +++ b/SeqLib @@ -1 +1 @@ -Subproject commit 0e3cee95e083980b230e5119146a113bd2e8e520 +Subproject commit b0da25148837b444568309714db3df5c95eb951b diff --git a/src/CommandLineRegion.h b/src/CommandLineRegion.h index c32f4fa..74ec9f6 100644 --- a/src/CommandLineRegion.h +++ b/src/CommandLineRegion.h @@ -32,9 +32,9 @@ CommandLineRegion(const std::string& mf, int t) : f(mf), type(t), pad(0), i_flag }; -static SeqLib::ReadFilter BuildReadFilterFromCommandLineRegion(const CommandLineRegion& c, const SeqLib::BamHeader& hdr) { +static SeqLib::Filter::ReadFilter BuildReadFilterFromCommandLineRegion(const CommandLineRegion& c, const SeqLib::BamHeader& hdr) { - SeqLib::ReadFilter r; + SeqLib::Filter::ReadFilter r; //r.m_region_file = c.f; // set a whole genome ALL rule @@ -48,7 +48,7 @@ static SeqLib::ReadFilter BuildReadFilterFromCommandLineRegion(const CommandLine } // add the abstract rule - SeqLib::AbstractRule ar; + SeqLib::Filter::AbstractRule ar; // set the flag if (c.i_flag || c.e_flag) { @@ -58,28 +58,26 @@ static SeqLib::ReadFilter BuildReadFilterFromCommandLineRegion(const CommandLine // set the other fields if (c.len) - ar.len = SeqLib::Range(c.len, INT_MAX, false); + ar.len = SeqLib::Filter::Range(c.len, INT_MAX, false); if (c.nbases != INT_MAX) - ar.nbases = SeqLib::Range(0, c.nbases, false); + ar.nbases = SeqLib::Filter::Range(0, c.nbases, false); if (c.phred) - ar.phred = SeqLib::Range(c.phred, INT_MAX, false); + ar.phred = SeqLib::Filter::Range(c.phred, INT_MAX, false); if (c.mapq) - ar.mapq = SeqLib::Range(c.mapq, INT_MAX, false); + ar.mapq = SeqLib::Filter::Range(c.mapq, INT_MAX, false); if (c.clip) - ar.clip = SeqLib::Range(c.clip, INT_MAX, false); + ar.clip = SeqLib::Filter::Range(c.clip, INT_MAX, false); if (c.del) - ar.del = SeqLib::Range(c.del, INT_MAX, false); + ar.del = SeqLib::Filter::Range(c.del, INT_MAX, false); if (c.ins) - ar.ins = SeqLib::Range(c.ins, INT_MAX, false); + ar.ins = SeqLib::Filter::Range(c.ins, INT_MAX, false); // set the id //ar.id = id + "_CMD_RULE"; -#ifdef HAVE_AHO_CORASICK // add a motif rule if (!c.motif.empty()) ar.addMotifRule(c.motif, false); -#endif // add read group rule ar.SetReadGroup(c.rg); diff --git a/src/STCoverage.cpp b/src/STCoverage.cpp index bed968e..07c5ba5 100644 --- a/src/STCoverage.cpp +++ b/src/STCoverage.cpp @@ -9,12 +9,12 @@ void STCoverage::settleCoverage() { SeqLib::GRC tmp = m_grc; - m_grc.mergeOverlappingIntervals(); + m_grc.MergeOverlappingIntervals(); } STCoverage::STCoverage(const SeqLib::GenomicRegion& gr) { m_gr = gr; - v = uint16_sp(new std::vector(gr.width(),0)); + v = uint16_sp(new std::vector(gr.Width(),0)); } uint16_t STCoverage::maxCov() const { diff --git a/src/VariantBamWalker.cpp b/src/VariantBamWalker.cpp index 2aac90d..cee7fe3 100644 --- a/src/VariantBamWalker.cpp +++ b/src/VariantBamWalker.cpp @@ -16,7 +16,7 @@ void VariantBamWalker::writeVariantBam() SeqLib::BamRecordVector buffer; // check if the BAM is sorted by looking at the header - std::string hh = m_hdr.AsString(); //std::string(header()->text); + std::string hh = Header().AsString(); //std::string(header()->text); bool sorted = hh.find("SO:coord") != std::string::npos; if (!sorted && max_cov > 0) { @@ -27,8 +27,8 @@ void VariantBamWalker::writeVariantBam() // check that regions are sufficient size for (auto& k : m_region) - if (k.width() < 1000) - k.pad(1000); + if (k.Width() < 1000) + k.Pad(1000); while (GetNextRecord(r)) { @@ -37,7 +37,6 @@ void VariantBamWalker::writeVariantBam() // prepare for case of long reads buffer_size = std::max((int32_t)r.Length() * 5, buffer_size); - //std::cerr << "...r " << r << " rule " << rule << std::endl; TrackSeenRead(r); // add coverage @@ -160,15 +159,12 @@ void VariantBamWalker::printMessage(const SeqLib::BamRecord &r) const if (rc_main.total <= 0) { std::sprintf(buffer, "NO READS FOUND at %2s:%-11s",chrname.c_str(), posstring.c_str()); - std::string outr(buffer); - //std::cerr << outr << SnowTools::displayRuntime(start) << std::endl; + std::cerr << std::string(buffer) << std::endl; return; } std::sprintf (buffer, "Read %11s at %2s:%-11s. Kept %10s (%2d%%) -- ", rc_main.totalString().c_str(), chrname.c_str(), posstring.c_str(), rc_main.keepString().c_str(), rc_main.percent()); - std::string outr(buffer); - //std::cerr << outr << SnowTools::displayRuntime(start) << std::endl;; - + std::cerr << std::string(buffer) << std::endl; } diff --git a/src/VariantBamWalker.h b/src/VariantBamWalker.h index 01918e3..5a00ba1 100644 --- a/src/VariantBamWalker.h +++ b/src/VariantBamWalker.h @@ -12,9 +12,7 @@ class VariantBamWalker: public SeqLib::BamReader { public: - VariantBamWalker() {} - - VariantBamWalker(const std::string in) : SeqLib::BamReader(in) {} + VariantBamWalker() {} void writeVariantBam(); @@ -37,7 +35,7 @@ class VariantBamWalker: public SeqLib::BamReader bool m_verbose = false; - SeqLib::ReadFilterCollection m_mr; + SeqLib::Filter::ReadFilterCollection m_mr; SeqLib::BamWriter m_writer; diff --git a/src/variant.cpp b/src/variant.cpp index ceda948..0e7f4c6 100644 --- a/src/variant.cpp +++ b/src/variant.cpp @@ -36,10 +36,10 @@ static const char *VARIANT_BAM_USAGE_MESSAGE = " Description: Filter a BAM/SAM/CRAM/STDIN according to hierarchical rules\n" "\n" " General options\n" -" --help Display this help and exit\n" +" -h, --help Display this help and exit\n" " -v, --verbose Verbose output\n" //" -c, --counts-file File to place read counts per rule / region\n" -" -x, --no-output Don't output reads (used for profiling with -q and/or counting with -c)\n" +" -x, --no-output Don't output reads (used for profiling with -q)\n" " -r, --rules JSON ecript for the rules.\n" " -k, --proc-regions-file Samtools-style region string (e.g. 1:1,000-2,000) or BED of regions to process\n" " Output options\n" @@ -63,9 +63,9 @@ static const char *VARIANT_BAM_USAGE_MESSAGE = " --min-clip Minimum number of quality clipped bases\n" " --max-nbases Maximum number of N bases\n" " --min-mapq Minimum mapping quality\n" -" --min-del Minimum number of inserted bases\n" -" --min-ins Minimum number of deleted bases\n" -" --min-readlength Minimum read length (after base-quality trimming)\n" +" --min-del Minimum number of deleted bases\n" +" --min-ins Minimum number of inserted bases\n" +" --min-length Minimum read length (after base-quality trimming)\n" " --motif Motif file\n" " -R, --read-group Limit to just a single read group\n" " -f, --include-aln-flag Flags to include (like samtools -f)\n" @@ -178,7 +178,7 @@ int main(int argc, char** argv) { #ifndef __APPLE__ // start the timer - //clock_gettime(CLOCK_MONOTONIC, &start); + // clock_gettime(CLOCK_MONOTONIC, &start); #endif // parse the command line @@ -217,7 +217,7 @@ int main(int argc, char** argv) { } else if (opt::proc_regions.find(":") != std::string::npos) { grv_proc_regions.add(SeqLib::GenomicRegion(opt::proc_regions, reader.Header())); } - grv_proc_regions.createTreeMap(); + grv_proc_regions.CreateTreeMap(); } // open for writing @@ -269,10 +269,10 @@ int main(int argc, char** argv) { std::cerr << "Rules script: " << str << std::endl; } - SeqLib::ReadFilterCollection rfc; + SeqLib::Filter::ReadFilterCollection rfc; if (!opt::rules.empty()) - rfc = SeqLib::ReadFilterCollection(opt::rules, reader.Header()); + rfc = SeqLib::Filter::ReadFilterCollection(opt::rules, reader.Header()); // make sure command_line_reigons makes sense if (command_line_regions.size() == 2 && command_line_regions[1].all()) { @@ -289,7 +289,7 @@ int main(int argc, char** argv) { // add specific mini rules from command-line for (auto& i : command_line_regions) { - SeqLib::ReadFilter rf = BuildReadFilterFromCommandLineRegion(i, reader.Header()); + SeqLib::Filter::ReadFilter rf = BuildReadFilterFromCommandLineRegion(i, reader.Header()); //SeqLib::MiniRules mr(i, walk.header()); //SeqLib::ReadFilter rf(i, reader.Header()); //mr.pad = i.pad; @@ -318,7 +318,7 @@ int main(int argc, char** argv) { SeqLib::GRC rules_rg = grv_proc_regions; //reader.GetMiniRulesCollection().getAllRegions(); - rules_rg.createTreeMap(); + rules_rg.CreateTreeMap(); if (grv_proc_regions.size() && rules_rg.size()) { // intersect rules regions with mask regions. // dont incorporate rules regions if there are any mate-linked regions