Skip to content

Commit

Permalink
Merge pull request #38 from jaebeom-kim/master
Browse files Browse the repository at this point in the history
Support *.fna.gz and *.fq.gz query files in classify module
  • Loading branch information
jaebeom-kim authored Aug 10, 2023
2 parents 4da2f6d + 21a5122 commit 38d969c
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 265 deletions.
419 changes: 172 additions & 247 deletions src/commons/Classifier.cpp

Large diffs are not rendered by default.

21 changes: 10 additions & 11 deletions src/commons/Classifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <cmath>
#include "Match.h"
#include <unordered_set>

#define BufferSize 16'777'216 //16 * 1024 * 1024 // 16 M
using namespace std;

Expand Down Expand Up @@ -131,23 +132,21 @@ class Classifier {
{3, 2, 3, 3, 4, 4, 1, 0}};

// Index reads in query file
static void splitFASTQ(vector<SequenceBlock> & seqSegments, const string & queryPath);
static void splitFASTA(vector<SequenceBlock> & seqSegments, const string & queryPath);
static void splitQueryFile(vector<SequenceBlock> & seqSegments, const string & queryPath);

// Extract query k-mer
void fillQueryKmerBufferParallel(QueryKmerBuffer &kmerBuffer,
MmapedData<char> &seqFile,
const vector<SequenceBlock> &seqs,
void fillQueryKmerBufferParallel(KSeqWrapper* kseq1,
QueryKmerBuffer &kmerBuffer,
vector<Query> & queryList,
const pair<size_t, size_t> & currentSplit,
const LocalParameters &par);

void fillQueryKmerBufferParallel(QueryKmerBuffer &kmerBuffer,
const vector<SequenceBlock> &seqs,
const vector<SequenceBlock> &seqs2,
vector<Query> & queryList,
const pair<size_t, size_t> & currentSplit,
const LocalParameters &par);
void fillQueryKmerBufferParallel_paired(KSeqWrapper* kseq1,
KSeqWrapper* kseq2,
QueryKmerBuffer &kmerBuffer,
vector<Query> &queryList,
const pair<size_t, size_t> &currentSplit,
const LocalParameters &par);

static int getMaxCoveredLength(int queryLength);

Expand Down
7 changes: 3 additions & 4 deletions src/commons/IndexCreator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,8 @@ IndexCreator::IndexCreator(const LocalParameters &par, string dbDir, string fnaL
}

IndexCreator::~IndexCreator() {
if (taxonomy != nullptr){
delete taxonomy;
}
delete taxonomy;
delete subMat;
}

void IndexCreator::createIndex(const LocalParameters &par) {
Expand Down Expand Up @@ -115,7 +114,6 @@ void IndexCreator::createIndex(const LocalParameters &par) {
delete[] uniqKmerIdx;
}
delete[] splitChecker;

}

void IndexCreator::updateIndex(const LocalParameters &par) {
Expand Down Expand Up @@ -483,6 +481,7 @@ void IndexCreator::reduceRedundancy(TargetKmerBuffer & kmerBuffer, size_t * uniq
for(size_t i = 0; i < splits.size(); i++){
delete[] idxOfEachSplit[i];
}
delete[] idxOfEachSplit;
delete[] cntOfEachSplit;
}

Expand Down
2 changes: 1 addition & 1 deletion src/commons/SeqIterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ void SeqIterator::generateIntergenicKmerList(_gene *genes, _node *nodes, int num
free(kmer);
}

void SeqIterator::maskLowComplexityRegions(char *seq, char *maskedSeq, ProbabilityMatrix & probMat,
void SeqIterator::maskLowComplexityRegions(const char *seq, char *maskedSeq, ProbabilityMatrix & probMat,
float maskProb, const BaseMatrix * subMat) {
unsigned int seqLen = 0;
while (seq[seqLen] != '\0') {
Expand Down
2 changes: 1 addition & 1 deletion src/commons/SeqIterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class SeqIterator {
int fillBufferWithKmerFromBlock(const PredictedBlock &block, const char *seq, TargetKmerBuffer &kmerBuffer,
size_t &posToWrite, int seqID, int taxIdAtRank);

static void maskLowComplexityRegions(char * seq, char * maskedSeq, ProbabilityMatrix & probMat,
static void maskLowComplexityRegions(const char * seq, char * maskedSeq, ProbabilityMatrix & probMat,
float maskProb, const BaseMatrix * subMat);

void printKmerInDNAsequence(uint64_t kmer);
Expand Down
10 changes: 10 additions & 0 deletions src/workflow/classify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "Parameters.h"
#include "LocalParameters.h"
#include "NcbiTaxonomy.h"
#include "FileUtil.h"

void setClassifyDefaults(LocalParameters & par){
par.virusTaxId = 10239;// Taxonomy ID of virus taxon in NCBI
Expand Down Expand Up @@ -34,6 +35,15 @@ int classify(int argc, const char **argv, const Command& command)
setClassifyDefaults(par);
par.parseParameters(argc, argv, command, true, Parameters::PARSE_ALLOW_EMPTY, 0);

if (par.seqMode == 2) {
if (!FileUtil::directoryExists(par.filenames[3].c_str())) {
FileUtil::makeDir(par.filenames[3].c_str());
}
} else {
if (!FileUtil::directoryExists(par.filenames[2].c_str())) {
FileUtil::makeDir(par.filenames[2].c_str());
}
}

#ifdef OPENMP
omp_set_num_threads(par.threads);
Expand Down

0 comments on commit 38d969c

Please sign in to comment.