From 797e7d67af7e47263e6ebcc05b6958fe9819fe85 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Thu, 22 Aug 2024 09:38:13 +0900
Subject: [PATCH 01/21] Query k-mer extraction process is optimized.

previous
1. Load thread num of sequence
2. Each thread process one sequence

Now
1. One thread keeps loading sequences for other threads to process
 - for each thread, 1000 sequences are loaded
2. When 1000 seuquences are loaded, one of idle threads take them to process
---
 src/commons/IndexCreator.cpp    |  19 +-
 src/commons/KmerExtractor.cpp   | 435 +++++++++++++++++++++++---------
 src/commons/KmerExtractor.h     |   8 +
 src/commons/ProdigalWrapper.cpp | 225 ++++++++++++++++-
 src/commons/ProdigalWrapper.h   |   6 +
 src/commons/SeqIterator.cpp     | 256 ++-----------------
 src/commons/SeqIterator.h       |  31 +--
 src/commons/common.h            |  12 +
 8 files changed, 599 insertions(+), 393 deletions(-)
diff --git a/src/commons/IndexCreator.cpp b/src/commons/IndexCreator.cpp
index c89ae373..8a584c00 100644
--- a/src/commons/IndexCreator.cpp
+++ b/src/commons/IndexCreator.cpp
@@ -866,6 +866,7 @@ size_t IndexCreator::fillTargetKmerBuffer(TargetKmerBuffer &kmerBuffer,
         kseq_buffer_t buffer;
         kseq_t *seq;
         vector<uint64_t> intergenicKmers;
+        vector<int> aaSeq;
 #pragma omp for schedule(dynamic, 1)
         for (size_t i = 0; i < fnaSplits.size(); i++) {
             if (!checker[i] && !hasOverflow) {
@@ -917,7 +918,7 @@ size_t IndexCreator::fillTargetKmerBuffer(TargetKmerBuffer &kmerBuffer,
                     prodigal->getPredictedGenes(seq->seq.s);
                     seqIterator.generateIntergenicKmerList(prodigal->genes, prodigal->nodes,
                                                            prodigal->getNumberOfPredictedGenes(),
-                                                           intergenicKmers,seq->seq.s);
+                                                           intergenicKmers, seq->seq.s);
 
                     // Get min k-mer hash list for determining strandness
                     seqIterator.getMinHashList(standardList, seq->seq.s);
@@ -941,7 +942,7 @@ size_t IndexCreator::fillTargetKmerBuffer(TargetKmerBuffer &kmerBuffer,
                             // Get extended ORFs
                             prodigal->getPredictedGenes(seq->seq.s);
                             prodigal->removeCompletelyOverlappingGenes();
-                            seqIterator.getExtendedORFs(prodigal->finalGenes, prodigal->nodes, extendedORFs,
+                            prodigal->getExtendedORFs(prodigal->finalGenes, prodigal->nodes, extendedORFs,
                                                              prodigal->fng, strlen(seq->seq.s),
                                                         orfNum, intergenicKmers, seq->seq.s);
                             // Get masked sequence
@@ -955,14 +956,16 @@ size_t IndexCreator::fillTargetKmerBuffer(TargetKmerBuffer &kmerBuffer,
 
                             // Get k-mers from extended ORFs
                             for (size_t orfCnt = 0; orfCnt < orfNum; orfCnt++) {
-                                seqIterator.translateBlock(maskedSeq, extendedORFs[orfCnt]);
+                                aaSeq.clear();
+                                seqIterator.translateBlock(maskedSeq, extendedORFs[orfCnt], aaSeq);
                                 tempCheck = seqIterator.fillBufferWithKmerFromBlock(
                                         extendedORFs[orfCnt],
                                         maskedSeq,
                                         kmerBuffer,
                                         posToWrite,
                                         int(processedSeqCnt[fnaSplits[i].file_idx] + fnaSplits[i].offset + s_cnt),
-                                        fnaSplits[i].speciesID);
+                                        fnaSplits[i].speciesID,
+                                        aaSeq);
                                 if (tempCheck == -1) {
                                     cout << "ERROR: Buffer overflow " << seq->name.s << seq->seq.l << endl;
                                 }
@@ -976,7 +979,7 @@ size_t IndexCreator::fillTargetKmerBuffer(TargetKmerBuffer &kmerBuffer,
                             // Get extended ORFs
                             prodigal->getPredictedGenes(reverseCompliment);
                             prodigal->removeCompletelyOverlappingGenes();
-                            seqIterator.getExtendedORFs(prodigal->finalGenes, prodigal->nodes, extendedORFs,
+                            prodigal->getExtendedORFs(prodigal->finalGenes, prodigal->nodes, extendedORFs,
                                                              prodigal->fng, strlen(reverseCompliment),
                                                         orfNum, intergenicKmers, reverseCompliment);
 
@@ -990,14 +993,16 @@ size_t IndexCreator::fillTargetKmerBuffer(TargetKmerBuffer &kmerBuffer,
                             }
 
                             for (size_t orfCnt = 0; orfCnt < orfNum; orfCnt++) {
-                                seqIterator.translateBlock(maskedSeq, extendedORFs[orfCnt]);
+                                aaSeq.clear();
+                                seqIterator.translateBlock(maskedSeq, extendedORFs[orfCnt], aaSeq);
                                 tempCheck = seqIterator.fillBufferWithKmerFromBlock(
                                         extendedORFs[orfCnt],
                                         maskedSeq,
                                         kmerBuffer,
                                         posToWrite,
                                         int(processedSeqCnt[fnaSplits[i].file_idx] + fnaSplits[i].offset + s_cnt),
-                                        fnaSplits[i].speciesID);
+                                        fnaSplits[i].speciesID,
+                                        aaSeq);
                                 if (tempCheck == -1) {
                                     cout << "ERROR: Buffer overflow " << seq->name.s << seq->seq.l << endl;
                                 }
diff --git a/src/commons/KmerExtractor.cpp b/src/commons/KmerExtractor.cpp
index c9edff9b..81f90495 100644
--- a/src/commons/KmerExtractor.cpp
+++ b/src/commons/KmerExtractor.cpp
@@ -1,8 +1,8 @@
 #include "KmerExtractor.h"
-#include "common.h"
 #include <unordered_map>
 
 KmerExtractor::KmerExtractor(const LocalParameters &par) {
+    seqIterator = new SeqIterator(par);
     spaceNum = 0;
     maskMode = par.maskMode;
     maskProb = par.maskProb;
@@ -11,6 +11,7 @@ KmerExtractor::KmerExtractor(const LocalParameters &par) {
 }
 
 KmerExtractor::~KmerExtractor() {
+    delete seqIterator;
     delete probMatrix;
     delete subMat;
 }
@@ -46,72 +47,112 @@ void KmerExtractor::extractQueryKmers(QueryKmerBuffer &kmerBuffer,
     cout << "Time spent for sorting query metamer list: " << double(time(nullptr) - beforeQueryKmerSort) << endl;
 }
 
-void KmerExtractor::fillQueryKmerBufferParallel(KSeqWrapper *kseq1,
+void KmerExtractor::fillQueryKmerBufferParallel(KSeqWrapper *kseq,
                                                 QueryKmerBuffer &kmerBuffer,
-                                                vector<Query> &queryList,
+                                                std::vector<Query> &queryList,
                                                 const QuerySplit &currentSplit,
-                                                const LocalParameters &par) {                                                   
+                                                const LocalParameters &par) {   
+    size_t readLength = 1000;
     size_t processedQueryNum = 0;
+    size_t chunkSize = 1000;
  
-     // Array to store reads of thread number
-     vector<string> reads1(par.threads);
- 
-    while (processedQueryNum < currentSplit.readCnt) {
-        size_t currentQueryNum = min(currentSplit.readCnt - processedQueryNum, (size_t) par.threads);
-        size_t count = 0;
-        while (count < currentQueryNum) {
-            // Read query
-            kseq1->ReadEntry();
-            const KSeqWrapper::KSeqEntry & e1 = kseq1->entry;
-
-            // Get k-mer count
-            int kmerCnt = LocalUtil::getQueryKmerNumber<int>((int) e1.sequence.l, spaceNum);
-
-            // Query Info
-            queryList[processedQueryNum].queryLength = LocalUtil::getMaxCoveredLength((int) e1.sequence.l);
-            queryList[processedQueryNum].name = string(e1.name.s);
-            queryList[processedQueryNum].kmerCnt = (int) (kmerCnt);
-
-            // Store reads
-            reads1[count] = string(kseq1->entry.sequence.s);
-
-            processedQueryNum ++;
-            count ++;
+    // Reserve memory for each read of the chunk for each thread
+    std::vector<std::vector<string>> chunkReads_thread(par.threads);
+    for (size_t i = 0; i < par.threads; ++i) {
+        chunkReads_thread[i].resize(chunkSize);
+        for (size_t j = 0; j < chunkSize; ++j) {
+            chunkReads_thread[i][j].reserve(readLength);
         }
-#pragma omp parallel default(none), shared(par, kmerBuffer, cout, processedQueryNum, queryList, currentQueryNum, count, reads1)
+    }
+
+    // Initialize atomic variable for active tasks
+    std::vector<std::atomic<bool>> busyThreads(par.threads);
+    for (size_t i = 0; i < par.threads; ++i) {
+        busyThreads[i].store(false);
+    }
+
+    // OpenMP parallel region with tasks
+#pragma omp parallel default(none) shared(par, readLength, kmerBuffer, \
+queryList, currentSplit, processedQueryNum, kseq, chunkSize, chunkReads_thread, busyThreads)
+    {
+        char *maskedSeq = new char[readLength];
+        char *seq = nullptr;
+        vector<int> aaFrames_read[6];
+        // reserve memory for each frame
+        for (int i = 0; i < 6; ++i) {
+            aaFrames_read[i].reserve(readLength/3);
+        }     
+#pragma omp single nowait
         {
-            SeqIterator seqIterator(par);
-            size_t posToWrite;
-#pragma omp for schedule(dynamic, 1)
-            for (size_t i = 0; i < currentQueryNum; i ++) {
-                size_t queryIdx = processedQueryNum - currentQueryNum + i;
-                // Get k-mer count
-                int kmerCnt = LocalUtil::getQueryKmerNumber<int>(reads1[i].length(), spaceNum);
+            int masterThread = omp_get_thread_num();
+            size_t count = 0;
+            while (processedQueryNum < currentSplit.readCnt) {
+                // Find an idle thread
+                int threadId;
+                for (threadId = 0; threadId < par.threads; ++threadId) {
+                    if (threadId == masterThread) { continue; }
+                    if (!busyThreads[threadId].load()) {
+                        busyThreads[threadId].store(true);
+                        break;
+                    }
+                }
                 
-                // Ignore short read
-                if (kmerCnt < 1) { continue; }
-
-                // Get masked sequence
-                char *maskedSeq1 = nullptr;
-                if (maskMode) {
-                    maskedSeq1 = new char[reads1[i].length() + 1];
-                    SeqIterator::maskLowComplexityRegions(reads1[i].c_str(),maskedSeq1, *probMatrix, maskProb, subMat);
-                } else {
-                    maskedSeq1 = const_cast<char *>(reads1[i].c_str());
+                if (threadId == par.threads) {
+                    continue;
                 }
 
-                posToWrite = kmerBuffer.reserveMemory(kmerCnt);
-
-                // Process Read 1
-                seqIterator.sixFrameTranslation(maskedSeq1, (int) reads1[i].length());
-                seqIterator.fillQueryKmerBuffer(maskedSeq1, (int) reads1[i].length(), kmerBuffer, posToWrite,
-                                                (uint32_t) queryIdx);
+                size_t chunkEnd = min(processedQueryNum + chunkSize, currentSplit.readCnt);
+                count = 0;
 
-                if (maskMode) {
-                    delete[] maskedSeq1;
+                for (size_t i = 0; i < chunkSize && processedQueryNum < chunkEnd; ++i, ++processedQueryNum) {
+                    kseq->ReadEntry();
+                    queryList[processedQueryNum].name = string(kseq->entry.name.s);
+                    queryList[processedQueryNum].queryLength = LocalUtil::getMaxCoveredLength((int) kseq->entry.sequence.l);
+                    
+                    // Check if the read is too short
+                    int kmerCnt = LocalUtil::getQueryKmerNumber<int>((int) kseq->entry.sequence.l, spaceNum);
+                    if (kmerCnt < 1) {
+                        chunkReads_thread[threadId][i] = "";
+                        queryList[processedQueryNum].kmerCnt = 0;
+                    } else {
+                        chunkReads_thread[threadId][i] = string(kseq->entry.sequence.s);
+                        queryList[processedQueryNum].kmerCnt = kmerCnt;
+                    }
+                    count++;
                 }
+                
+                // Process each chunk by idle threads
+                #pragma omp task firstprivate(count, processedQueryNum, threadId)
+                {
+                    for (size_t i = 0; i < count; ++i) {
+                        size_t queryIdx = processedQueryNum - count + i;
+                        if (chunkReads_thread[threadId][i].empty()) { continue; }
+                        
+                        // Get masked sequence
+                        if (maskMode) {
+                            if (readLength < chunkReads_thread[threadId][i].length() + 1) {
+                                readLength = chunkReads_thread[threadId][i].length() + 1;
+                                delete[] maskedSeq;
+                                maskedSeq = new char[readLength];
+                            }
+                            SeqIterator::maskLowComplexityRegions(chunkReads_thread[threadId][i].c_str(), maskedSeq, *probMatrix, maskProb, subMat);
+                            seq = maskedSeq;
+                        } else {
+                            seq = const_cast<char *>(chunkReads_thread[threadId][i].c_str());
+                        }
+
+                        size_t posToWrite = kmerBuffer.reserveMemory(queryList[queryIdx].kmerCnt);
+                        
+                        // Process Read 1
+                        seqIterator->sixFrameTranslation(seq, (int) chunkReads_thread[threadId][i].length(), aaFrames_read);
+                        seqIterator->fillQueryKmerBuffer(seq, (int) chunkReads_thread[threadId][i].length(), kmerBuffer, posToWrite, 
+                                                        (uint32_t) queryIdx, aaFrames_read);
+                    }
+                    busyThreads[threadId].store(false);
+                }   
             }
         }
+        delete[] maskedSeq;
     }
 }
 
@@ -121,89 +162,231 @@ void KmerExtractor::fillQueryKmerBufferParallel_paired(KSeqWrapper *kseq1,
                                                        vector<Query> &queryList,
                                                        const QuerySplit &currentSplit,
                                                        const LocalParameters &par) {
+    size_t readLength = 1000;
     size_t processedQueryNum = 0;
-
-    // Array to store reads of thread number
-    vector<string> reads1(par.threads);
-    vector<string> reads2(par.threads);
-
-    while (processedQueryNum < currentSplit.readCnt) {
-        size_t currentQueryNum = min(currentSplit.readCnt - processedQueryNum, (size_t) par.threads);
-        size_t count = 0;
-
-        // Fill reads in sequential
-        while (count < currentQueryNum) {
-            // Read query
-            kseq1->ReadEntry();
-            kseq2->ReadEntry();
-            const KSeqWrapper::KSeqEntry & e1 = kseq1->entry;
-            const KSeqWrapper::KSeqEntry & e2 = kseq2->entry;
-
-            // Get k-mer count
-            int kmerCnt = LocalUtil::getQueryKmerNumber<int>((int) e1.sequence.l, spaceNum);
-            int kmerCnt2 = LocalUtil::getQueryKmerNumber<int>((int) e2.sequence.l, spaceNum);
-
-            // Query Info
-            queryList[processedQueryNum].queryLength = LocalUtil::getMaxCoveredLength((int) e1.sequence.l);
-            queryList[processedQueryNum].queryLength2 = LocalUtil::getMaxCoveredLength((int) e2.sequence.l);
-            queryList[processedQueryNum].name = string(e1.name.s);
-            queryList[processedQueryNum].kmerCnt = (int) (kmerCnt + kmerCnt2);
-
-            // Store reads
-            reads1[count] = string(kseq1->entry.sequence.s);
-            reads2[count] = string(kseq2->entry.sequence.s);
-
-            processedQueryNum ++;
-            count ++;
+    size_t chunkSize = 1000;
+    // Reserve memory for each read of the chunk for each thread
+    std::vector<std::vector<string>> chunkReads1_thread(par.threads);
+    std::vector<std::vector<string>> chunkReads2_thread(par.threads);
+    for (size_t i = 0; i < par.threads; ++i) {
+        chunkReads1_thread[i].resize(chunkSize);
+        chunkReads2_thread[i].resize(chunkSize);
+        for (size_t j = 0; j < chunkSize; ++j) {
+            chunkReads1_thread[i][j].reserve(readLength);
+            chunkReads2_thread[i][j].reserve(readLength);
         }
+    }
+
+    // Initialize atomic variable for active tasks
+    std::vector<std::atomic<bool>> busyThreads(par.threads);
+    for (size_t i = 0; i < par.threads; ++i) {
+        busyThreads[i].store(false);
+    }
 
-        // Process reads in parallel
-#pragma omp parallel default(none), shared(par, kmerBuffer, cout, processedQueryNum, queryList, currentQueryNum, currentSplit, count, reads1, reads2)
+    // OpenMP parallel region with tasks
+#pragma omp parallel default(none) shared(par, readLength, \
+kmerBuffer, queryList, currentSplit, processedQueryNum, kseq1, kseq2, \
+chunkSize, chunkReads1_thread, chunkReads2_thread, busyThreads, cout)
+    {
+        char *maskedSeq1 = new char[readLength];
+        char *maskedSeq2 = new char[readLength];   
+        char *seq1 = nullptr;
+        char *seq2 = nullptr;
+        vector<int> aaFrames_read1[6];
+        vector<int> aaFrames_read2[6];
+        // reserve memory for each frame
+        for (int i = 0; i < 6; ++i) {
+            aaFrames_read1[i].reserve(readLength/3);
+            aaFrames_read2[i].reserve(readLength/3);
+        }     
+#pragma omp single nowait
         {
-            SeqIterator seqIterator(par);
-            SeqIterator seqIterator2(par);
-            size_t posToWrite;
-#pragma omp for schedule(dynamic, 1)
-            for (size_t i = 0; i < currentQueryNum; i ++) {
-                size_t queryIdx = processedQueryNum - currentQueryNum + i;
-                // Get k-mer count
-                int kmerCnt = LocalUtil::getQueryKmerNumber<int>(reads1[i].length(), spaceNum);
-                int kmerCnt2 = LocalUtil::getQueryKmerNumber<int>(reads2[i].length(), spaceNum);
-
-                // Ignore short read
-                if (kmerCnt2 < 1 || kmerCnt < 1) { continue; }
-
-                // Get masked sequence
-                char *maskedSeq1 = nullptr;
-                char *maskedSeq2 = nullptr;
-                if (maskMode) {
-                    maskedSeq1 = new char[reads1[i].length() + 1];
-                    maskedSeq2 = new char[reads2[i].length() + 1];
-                    SeqIterator::maskLowComplexityRegions(reads1[i].c_str(),maskedSeq1, *probMatrix, maskProb, subMat);
-                    SeqIterator::maskLowComplexityRegions(reads2[i].c_str(),maskedSeq2, *probMatrix, maskProb, subMat);
-                } else {
-                    maskedSeq1 = const_cast<char *>(reads1[i].c_str());
-                    maskedSeq2 = const_cast<char *>(reads2[i].c_str());
+            int masterThread = omp_get_thread_num();
+            size_t count = 0;
+            while (processedQueryNum < currentSplit.readCnt) {
+                // Find an idle thread
+                int threadId;
+                for (threadId = 0; threadId < par.threads; ++threadId) {
+                    if (threadId == masterThread) { continue; }
+                    if (!busyThreads[threadId].load()) {
+                        busyThreads[threadId].store(true);
+                        break;
+                    }
                 }
 
-                posToWrite = kmerBuffer.reserveMemory(kmerCnt + kmerCnt2);
-
-                // Process Read 1
-                seqIterator.sixFrameTranslation(maskedSeq1, (int) reads1[i].length());
-                seqIterator.fillQueryKmerBuffer(maskedSeq1, (int) reads1[i].length(), kmerBuffer, posToWrite,
-                                                (uint32_t) queryIdx);
+                if (threadId == par.threads) {
+                    continue;
+                }
 
-                // Process Read 2
-                seqIterator2.sixFrameTranslation(maskedSeq2, (int) reads2[i].length());
-                seqIterator2.fillQueryKmerBuffer(maskedSeq2, (int) reads2[i].length(), kmerBuffer, posToWrite,
-                                                 (uint32_t) queryIdx, queryList[queryIdx].queryLength+3);
+                size_t chunkEnd = min(processedQueryNum + chunkSize, currentSplit.readCnt);
+                count = 0;
 
-                if (maskMode) {
-                    delete[] maskedSeq1;
-                    delete[] maskedSeq2;
+                for (size_t i = 0; i < chunkSize && processedQueryNum < chunkEnd; ++i, ++processedQueryNum) {
+                    kseq1->ReadEntry();
+                    kseq2->ReadEntry();
+                    queryList[processedQueryNum].name = string(kseq1->entry.name.s);
+                    queryList[processedQueryNum].queryLength = LocalUtil::getMaxCoveredLength((int) kseq1->entry.sequence.l);
+                    queryList[processedQueryNum].queryLength2 = LocalUtil::getMaxCoveredLength((int) kseq2->entry.sequence.l);
+                    
+                    // Check if the read is too short
+                    int kmerCnt1 = LocalUtil::getQueryKmerNumber<int>((int) kseq1->entry.sequence.l, spaceNum);
+                    int kmerCnt2 = LocalUtil::getQueryKmerNumber<int>((int) kseq2->entry.sequence.l, spaceNum);
+                    if (kmerCnt1 < 1 || kmerCnt2 < 1) {
+                        chunkReads1_thread[threadId][i] = "";
+                        chunkReads2_thread[threadId][i] = "";
+                        queryList[processedQueryNum].kmerCnt = 0;
+                    } else {
+                        // cout << threadId << endl
+                        chunkReads1_thread[threadId][i] = string(kseq1->entry.sequence.s);
+                        chunkReads2_thread[threadId][i] = string(kseq2->entry.sequence.s);
+                        queryList[processedQueryNum].kmerCnt = kmerCnt1 + kmerCnt2;
+                    }
+                    count++;
                 }
+                
+
+                // Process each chunk by idle threads
+                #pragma omp task firstprivate(count, processedQueryNum, threadId)
+                {
+                    for (size_t i = 0; i < count; ++i) {
+                        size_t queryIdx = processedQueryNum - count + i;
+                        if (chunkReads1_thread[threadId][i].empty() || chunkReads2_thread[threadId][i].empty()) { continue; }
+                        
+                        // Get masked sequence
+                        if (maskMode) {
+                            if (readLength < chunkReads1_thread[threadId][i].length() + 1 || readLength < chunkReads2_thread[threadId][i].length() + 1) {
+                                readLength = max(chunkReads1_thread[threadId][i].length() + 1, chunkReads2_thread[threadId][i].length() + 1);
+                                delete[] maskedSeq1;
+                                delete[] maskedSeq2;
+                                maskedSeq1 = new char[readLength];
+                                maskedSeq2 = new char[readLength];
+                            }
+                            SeqIterator::maskLowComplexityRegions(chunkReads1_thread[threadId][i].c_str(),maskedSeq1, *probMatrix, maskProb, subMat);
+                            SeqIterator::maskLowComplexityRegions(chunkReads2_thread[threadId][i].c_str(),maskedSeq2, *probMatrix, maskProb, subMat);
+                            seq1 = maskedSeq1;
+                            seq2 = maskedSeq2;
+                        } else {
+                            seq1 = const_cast<char *>(chunkReads1_thread[threadId][i].c_str());
+                            seq2 = const_cast<char *>(chunkReads2_thread[threadId][i].c_str());
+                        }
+
+                        size_t posToWrite = kmerBuffer.reserveMemory(queryList[queryIdx].kmerCnt);
+                        
+                        // Process Read 1
+                        seqIterator->sixFrameTranslation(seq1, (int) chunkReads1_thread[threadId][i].length(), aaFrames_read1);
+                        seqIterator->fillQueryKmerBuffer(seq1, (int) chunkReads1_thread[threadId][i].length(), kmerBuffer, posToWrite, 
+                                                        (uint32_t) queryIdx, aaFrames_read1);
+
+                        // Process Read 2
+                        seqIterator->sixFrameTranslation(seq2, (int) chunkReads2_thread[threadId][i].length(), aaFrames_read2);
+                        seqIterator->fillQueryKmerBuffer(seq2, (int) chunkReads2_thread[threadId][i].length(), kmerBuffer, posToWrite,
+                                                        (uint32_t) queryIdx, aaFrames_read2, queryList[queryIdx].queryLength+3);
+                    }
+                    busyThreads[threadId].store(false);
+                }   
             }
         }
+        delete[] maskedSeq1;
+        delete[] maskedSeq2;
     }
 }
+    
+
+
+// void KmerExtractor::fillQueryKmerBufferParallel_paired2(KSeqWrapper *kseq1,
+//                                                        KSeqWrapper *kseq2,
+//                                                        QueryKmerBuffer &kmerBuffer,
+//                                                        vector<Query> &queryList,
+//                                                        const QuerySplit &currentSplit,
+//                                                        const LocalParameters &par) {
+//     size_t processedQueryNum = 0;
+
+//     // Array to store reads of thread number
+//     vector<string> reads1(par.threads);
+//     vector<string> reads2(par.threads);
+
+//     while (processedQueryNum < currentSplit.readCnt) {
+//         size_t currentQueryNum = min(currentSplit.readCnt - processedQueryNum, (size_t) par.threads);
+//         size_t count = 0;
+
+//         // Fill reads in sequential
+//         while (count < currentQueryNum) {
+//             // Read query
+//             kseq1->ReadEntry();
+//             kseq2->ReadEntry();
+//             const KSeqWrapper::KSeqEntry & e1 = kseq1->entry;
+//             const KSeqWrapper::KSeqEntry & e2 = kseq2->entry;
+
+//             // Get k-mer count
+//             int kmerCnt = LocalUtil::getQueryKmerNumber<int>((int) e1.sequence.l, spaceNum);
+//             int kmerCnt2 = LocalUtil::getQueryKmerNumber<int>((int) e2.sequence.l, spaceNum);
+
+//             // Query Info
+//             queryList[processedQueryNum].queryLength = LocalUtil::getMaxCoveredLength((int) e1.sequence.l);
+//             queryList[processedQueryNum].queryLength2 = LocalUtil::getMaxCoveredLength((int) e2.sequence.l);
+//             queryList[processedQueryNum].name = string(e1.name.s);
+//             queryList[processedQueryNum].kmerCnt = (int) (kmerCnt + kmerCnt2);
+
+//             // Store reads
+//             reads1[count] = string(kseq1->entry.sequence.s);
+//             reads2[count] = string(kseq2->entry.sequence.s);
+
+//             processedQueryNum ++;
+//             count ++;
+//         }
+
+//         // Process reads in parallel
+// #pragma omp parallel default(none), shared(par, kmerBuffer, cout, processedQueryNum, queryList, currentQueryNum, currentSplit, count, reads1, reads2)
+//         {
+//             size_t posToWrite;
+//             size_t readLength = 1000;
+//             char *maskedSeq1 = new char[readLength];
+//             char *maskedSeq2 = new char[readLength];
+//             char *seq1 = nullptr;
+//             char *seq2 = nullptr;            
+// #pragma omp for schedule(dynamic, 1)
+//             for (size_t i = 0; i < currentQueryNum; i ++) {
+//                 size_t queryIdx = processedQueryNum - currentQueryNum + i;
+//                 // Get k-mer count
+//                 int kmerCnt = LocalUtil::getQueryKmerNumber<int>(reads1[i].length(), spaceNum);
+//                 int kmerCnt2 = LocalUtil::getQueryKmerNumber<int>(reads2[i].length(), spaceNum);
+
+//                 // Ignore short read
+//                 if (kmerCnt2 < 1 || kmerCnt < 1) { continue; }
+
+//                 // Get masked sequence
+//                 if (maskMode) {
+//                     if (readLength < reads1[i].length() + 1 || readLength < reads2[i].length() + 1) {
+//                         readLength = max(reads1[i].length() + 1, reads2[i].length() + 1);
+//                         delete[] maskedSeq1;
+//                         delete[] maskedSeq2;
+//                         maskedSeq1 = new char[readLength];
+//                         maskedSeq2 = new char[readLength];
+//                     }
+//                     SeqIterator::maskLowComplexityRegions(reads1[i].c_str(),maskedSeq1, *probMatrix, maskProb, subMat);
+//                     SeqIterator::maskLowComplexityRegions(reads2[i].c_str(),maskedSeq2, *probMatrix, maskProb, subMat);
+//                     seq1 = maskedSeq1;
+//                     seq2 = maskedSeq2;
+//                 } else {
+//                     seq1 = const_cast<char *>(reads1[i].c_str());
+//                     seq2 = const_cast<char *>(reads2[i].c_str());
+//                 }
+
+//                 posToWrite = kmerBuffer.reserveMemory(kmerCnt + kmerCnt2);
+
+//                 // Process Read 1
+//                 seqIterator->sixFrameTranslation(seq1, (int) reads1[i].length());
+//                 seqIterator->fillQueryKmerBuffer(seq1, (int) reads1[i].length(), kmerBuffer, posToWrite,
+//                                                 (uint32_t) queryIdx);
+
+//                 // Process Read 2
+//                 seqIterator->sixFrameTranslation(seq2, (int) reads2[i].length());
+//                 seqIterator->fillQueryKmerBuffer(seq2, (int) reads2[i].length(), kmerBuffer, posToWrite,
+//                                                  (uint32_t) queryIdx, queryList[queryIdx].queryLength+3);
+//             }
+//             delete[] maskedSeq1;
+//             delete[] maskedSeq2;
+//         }
+//     }
+// }
 
diff --git a/src/commons/KmerExtractor.h b/src/commons/KmerExtractor.h
index bac04a27..b77aa122 100644
--- a/src/commons/KmerExtractor.h
+++ b/src/commons/KmerExtractor.h
@@ -8,6 +8,7 @@
 
 class KmerExtractor {
 private:
+    SeqIterator * seqIterator;
     // Parameters
     int spaceNum;
     int maskMode;
@@ -31,6 +32,13 @@ class KmerExtractor {
                                             const QuerySplit & currentSplit,
                                             const LocalParameters &par);
 
+    void fillQueryKmerBufferParallel_paired2(KSeqWrapper* kseq1,
+                                            KSeqWrapper* kseq2,
+                                            QueryKmerBuffer &kmerBuffer,
+                                            vector<Query> &queryList,
+                                            const QuerySplit & currentSplit,
+                                            const LocalParameters &par);                                        
+
 public:
     explicit KmerExtractor(const LocalParameters & par);
     ~KmerExtractor();
diff --git a/src/commons/ProdigalWrapper.cpp b/src/commons/ProdigalWrapper.cpp
index 9e9c6aec..ca6c381d 100644
--- a/src/commons/ProdigalWrapper.cpp
+++ b/src/commons/ProdigalWrapper.cpp
@@ -1,4 +1,5 @@
 #include "ProdigalWrapper.h"
+#include "SeqIterator.h"
 #include <iostream>
 ProdigalWrapper::~ProdigalWrapper() {
     for(size_t i = 0; i < NUM_META; i++){
@@ -333,4 +334,226 @@ void ProdigalWrapper::setTrainingInfo(_training &tinf) {
 
 void ProdigalWrapper::updateTrainingInfo(_training &tinf2) {
     this->tinf = tinf2;
-}
\ No newline at end of file
+}
+
+// It makes the blocks for translation
+// Each block has a predicted gene part and an intergenic region. When another gene shows up, new block starts.
+void ProdigalWrapper::getExtendedORFs(struct _gene *genes, struct _node *nodes, vector<PredictedBlock> &blocks,
+                                       size_t numOfGene, size_t length,
+                                       size_t &blockIdx, vector<uint64_t> &intergenicKmerList, const char *seq) {
+
+    //Exceptional case 1: 0 prdicted gene
+    if (numOfGene == 0) {
+        blocks.emplace_back(0, length - 1, 1);
+        blockIdx++;
+        return;
+    }
+
+    //Exceptional case 2: Only 1 prdicted gene
+    int frame;
+    int rightEnd = 0;
+    int leftEnd = 0;
+    if (numOfGene == 1) {
+        if (nodes[genes[0].start_ndx].strand == 1) { //forward
+            frame = (genes[0].begin - 1) % 3;
+            leftEnd = 0;
+            while (leftEnd % 3 != frame) leftEnd++; // y - (y - x) % 3 .. which would be faster?
+            blocks.emplace_back(leftEnd, length - 1, 1);
+            blockIdx++;
+        } else { //reverse
+            frame = (genes[0].end - 1) % 3;
+            rightEnd = length - 1;
+            while (rightEnd % 3 != frame) rightEnd--;
+            blocks.emplace_back(0, rightEnd, -1);
+            blockIdx++;
+        }
+        return;
+    }
+
+
+    /* Main routine */
+
+    bool hasBeenExtendedToLeft = false;
+    int k = 23;
+    char *newIntergenicKmer = (char *) malloc(sizeof(char) * (k + 1));
+    char *leftKmer = (char *) malloc(sizeof(char) * (k + 1));
+    char *rightKmer = (char *) malloc(sizeof(char) * (k + 1));
+    char *leftKmerReverse = (char *) malloc(sizeof(char) * (k + 1));
+    char *rightKmerReverse = (char *) malloc(sizeof(char) * (k + 1));
+    bool isReverse = false;
+    uint64_t leftKmerHash = 0, rightKmerHash = 0;
+
+
+    // Extend the first gene to cover intergenic regions
+    if (nodes[genes[0].start_ndx].strand == 1) { //forward
+        frame = (genes[0].begin - 1) % 3;
+        leftEnd = 0;
+        while (leftEnd % 3 != frame) leftEnd++;
+        blocks.emplace_back(leftEnd, genes[1].begin - 1 + 22, 1);
+        blockIdx++;
+    } else {
+        frame = (genes[0].end - 1) % 3;
+        rightEnd = genes[1].begin - 1 + 22;
+        while (rightEnd % 3 != frame) rightEnd--;
+        blocks.emplace_back(0, rightEnd, -1);
+        blockIdx++;
+    }
+
+    // From the second gene to the second last gene
+    for (size_t geneIdx = 1; geneIdx < numOfGene - 1; geneIdx++) {
+        isReverse = false;
+
+        // Make two k-mer hash; each from left and right of current gene. They are used for choosing extension direction.
+        strncpy(leftKmer, seq + genes[geneIdx].begin - 1 - k, k);
+        strncpy(rightKmer, seq + genes[geneIdx].end, k);
+        if (nodes[genes[geneIdx].start_ndx].strand == 1) {
+            leftKmerHash = XXH64(leftKmer, k, 0);
+            rightKmerHash = XXH64(rightKmer, k, 0);
+        } else {
+            isReverse = true;
+            for (int j = k - 1; j >= 0; j--) {
+                leftKmerReverse[k - j - 1] = SeqIterator::iRCT[leftKmer[j]];
+                rightKmerReverse[k - j - 1] = SeqIterator::iRCT[rightKmer[j]];
+            }
+            leftKmerHash = XXH64(leftKmerReverse, k, 0);
+            rightKmerHash = XXH64(rightKmerReverse, k, 0);
+        }
+
+        // Extend genes to cover intergenic regions
+        if (find(intergenicKmerList.begin(), intergenicKmerList.end(), leftKmerHash) !=
+            intergenicKmerList.end()) { //Extension to left
+            if (!hasBeenExtendedToLeft) {
+                if (!isReverse) { // Forward
+                    blocks.emplace_back(genes[geneIdx].begin - 1, genes[geneIdx].end - 1, 1);
+                    blockIdx++;
+                } else { // Reverse
+                    blocks.emplace_back(genes[geneIdx].begin - 1, genes[geneIdx].end - 1, -1);
+                    blockIdx++;
+                }
+            } else {
+                if (!isReverse) { //forward
+                    frame = (genes[geneIdx].begin - 1) % 3;
+                    leftEnd = genes[geneIdx - 1].end -1 -22;
+                    while (leftEnd % 3 != frame) leftEnd++;
+                    blocks.emplace_back(leftEnd, genes[geneIdx].end - 1, 1);
+                    blockIdx++;
+                } else { // reverse
+                    blocks.emplace_back(genes[geneIdx - 1].end - 22 - 1, genes[geneIdx].end - 1, -1);
+                    blockIdx++;
+                }
+            }
+            hasBeenExtendedToLeft = true;
+        } else { // Extension to right
+            if (hasBeenExtendedToLeft) {
+                if (!isReverse) { //forward
+                    frame = (genes[geneIdx].begin - 1) % 3;
+                    leftEnd = genes[geneIdx - 1].end - 1 - 22;
+                    while (leftEnd % 3 != frame) leftEnd++;
+                    blocks.emplace_back(leftEnd, genes[geneIdx + 1].begin - 1 + 22, 1);
+                    blockIdx++;
+                } else {
+                    frame = (genes[geneIdx].end - 1) % 3;
+                    rightEnd = genes[geneIdx + 1].begin - 1 + 22;
+                    while (rightEnd % 3 != frame) rightEnd--;
+                    blocks.emplace_back(genes[geneIdx - 1].end - 1 - 22, rightEnd, -1);
+                    blockIdx++;
+                }
+            } else {
+                if (!isReverse) { //forward
+                    blocks.emplace_back(genes[geneIdx].begin - 1, genes[geneIdx + 1].begin - 1 + 22, 1);
+                    blockIdx++;
+                } else {
+                    frame = (genes[geneIdx].end - 1) % 3;
+                    rightEnd = genes[geneIdx + 1].begin - 1 + 22;
+                    while (rightEnd % 3 != frame) rightEnd--;
+                    blocks.emplace_back(genes[geneIdx].begin - 1, rightEnd, -1);
+                    blockIdx++;
+                }
+            }
+            hasBeenExtendedToLeft = false;
+
+            if (find(intergenicKmerList.begin(), intergenicKmerList.end(), rightKmerHash) == intergenicKmerList.end()) {
+                intergenicKmerList.push_back(rightKmerHash);
+            }
+        }
+    }
+
+    // // For the last gene
+    // // Extend to the end of the genome
+    // isReverse = !(nodes[genes[numOfGene - 1].start_ndx].strand == 1);    
+    // rightEnd = length - 1;
+    // if (isReverse) {
+    //     frame = (genes[numOfGene - 1].end - 1) % 3;
+    //     while (rightEnd % 3 != frame) rightEnd--;
+    // }
+    // // If left region is not covered, cover it.
+    // leftEnd = genes[numOfGene - 1].begin - 1;
+    // if (hasBeenExtendedToLeft) {
+    //     leftEnd = genes[numOfGene - 2].end - 1 - 22;
+    //     if (!isReverse) {
+    //         frame = (genes[numOfGene - 1].begin - 1) % 3;
+    //         while (leftEnd % 3 != frame) leftEnd++;
+    //     }
+    // }
+    // blocks.emplace_back(leftEnd, rightEnd, isReverse ? -1 : 1);
+    // if (find(intergenicKmerList.begin(), intergenicKmerList.end(), rightKmerHash) == intergenicKmerList.end()) {
+    //         intergenicKmerList.push_back(rightKmerHash);
+    // }
+
+    //For the last gene
+    if (find(intergenicKmerList.begin(), intergenicKmerList.end(), leftKmerHash) !=
+        intergenicKmerList.end()) { //extension to left
+        if (!isReverse) { //forward
+            frame = (genes[numOfGene - 1].begin - 1) % 3;
+            leftEnd = genes[numOfGene - 2].end - 1 - 22;
+            while (leftEnd % 3 != frame) leftEnd++;
+            blocks.emplace_back(leftEnd, length - 1, 1);
+            blockIdx++;
+        } else { // reverse
+            frame = (genes[numOfGene - 1].end - 1) % 3;
+            rightEnd = length - 1;
+            while (rightEnd % 3 != frame) rightEnd--;
+            blocks.emplace_back(genes[numOfGene - 2].end - 22 - 1, rightEnd, -1);
+            blockIdx++;
+        }
+    } else { //extension to right
+        if (hasBeenExtendedToLeft) {
+            if (!isReverse) { //forward
+                frame = (genes[numOfGene - 1].begin - 1) % 3;
+                leftEnd = genes[numOfGene - 2].end - 1 - 22;
+                while (leftEnd % 3 != frame) leftEnd++;
+                blocks.emplace_back(leftEnd, length - 1, 1);
+                blockIdx++;
+            } else {
+                frame = (genes[numOfGene - 1].end - 1) % 3;
+                rightEnd = length - 1;
+                while (rightEnd % 3 != frame) rightEnd--;
+                blocks.emplace_back(genes[numOfGene - 2].end - 22 - 1, rightEnd, -1);
+                blockIdx++;
+            }
+        } else {
+            if (!isReverse) {
+                blocks.emplace_back(genes[numOfGene - 1].begin, length - 1, 1);
+                blockIdx++;
+            } else {
+                frame = (genes[numOfGene - 1].end - 1) % 3;
+                rightEnd = length - 1;
+                while (rightEnd % 3 != frame) rightEnd--;
+                blocks.emplace_back(genes[numOfGene - 1].begin - 1, rightEnd, -1);
+                blockIdx++;
+            }
+        }
+
+        //If current intergenic sequences is new, update intergenicKmerList.
+        if (find(intergenicKmerList.begin(), intergenicKmerList.end(), rightKmerHash) == intergenicKmerList.end()) {
+            intergenicKmerList.push_back(rightKmerHash);
+        }
+    }
+
+
+    free(newIntergenicKmer);
+    free(leftKmer);
+    free(rightKmer);
+    free(leftKmerReverse);
+    free(rightKmerReverse);
+}
diff --git a/src/commons/ProdigalWrapper.h b/src/commons/ProdigalWrapper.h
index 15d5c219..8570d1c4 100644
--- a/src/commons/ProdigalWrapper.h
+++ b/src/commons/ProdigalWrapper.h
@@ -16,6 +16,10 @@
 #include "prodigalsequence.h"
 #include "training.h"
 #include "printBinary.h"
+#include "common.h"
+#include <vector>
+#include "xxhash.h"
+#include "SeqIterator.h"
 
 #define MIN_SINGLE_GENOME 20000
 #define IDEAL_SINGLE_GENOME 100000
@@ -60,5 +64,7 @@ class ProdigalWrapper{
     void printGenes();
     ProdigalWrapper();
     ~ProdigalWrapper();
+    void getExtendedORFs(struct _gene *genes, struct _node *nodes, std::vector<PredictedBlock> &blocks, size_t numOfGene,
+            size_t length, size_t &numOfBlocks, std::vector<uint64_t> &intergenicKmerList, const char *seq);    
 };
 #endif //ADCLASSIFIER2_PRODIGALWRAPPER_H
diff --git a/src/commons/SeqIterator.cpp b/src/commons/SeqIterator.cpp
index ddb1b132..1f42dbd0 100644
--- a/src/commons/SeqIterator.cpp
+++ b/src/commons/SeqIterator.cpp
@@ -10,8 +10,6 @@ const string SeqIterator::iRCT = "..............................................
                                  "................................................................"
                                  "................................................................";
 
-
-
 SeqIterator::~SeqIterator() {
     delete[] mask;
     delete[] mask_int;
@@ -255,7 +253,7 @@ SeqIterator::SeqIterator(const LocalParameters &par) {
 }
 
 // It translates a DNA sequence to amino acid sequences with six frames
-void SeqIterator::sixFrameTranslation(const char *seq, int len) {
+void SeqIterator::sixFrameTranslation(const char *seq, int len, vector<int> *aaFrames) {
     for (int i = 0; i < 6; i++) { aaFrames[i].clear(); }
 
     size_t end = len - 1;
@@ -314,8 +312,15 @@ void SeqIterator::sixFrameTranslation(const char *seq, int len) {
     }
 }
 
-void SeqIterator::fillQueryKmerBuffer(const char *seq, int seqLen, QueryKmerBuffer &kmerBuffer, size_t &posToWrite, uint32_t seqID,
-                                 uint32_t offset) {
+void SeqIterator::fillQueryKmerBuffer(
+    const char *seq,
+    int seqLen, 
+    QueryKmerBuffer &kmerBuffer, 
+    size_t &posToWrite, 
+    uint32_t seqID, 
+    vector<int> *aaFrames,
+    uint32_t offset) 
+    {
     int forOrRev;
     uint64_t tempKmer = 0;
     int checkN;
@@ -386,18 +391,18 @@ SeqIterator::addDNAInfo_QueryKmer(uint64_t &kmer, const char *seq, int forOrRev,
     }
 }
 
-bool SeqIterator::translateBlock(const char *seq, PredictedBlock block) {
-    aaFrames[0].clear();
-    if(aaFrames->capacity() < strlen(seq) / 3 + 1) {
-        aaFrames->reserve(strlen(seq) / 3 + 1);
+bool SeqIterator::translateBlock(const char *seq, PredictedBlock block, vector<int> & aaSeq) {
+    aaSeq.clear();
+    if(aaSeq.capacity() < strlen(seq) / 3 + 1) {
+        aaSeq.reserve(strlen(seq) / 3 + 1);
     }
     if (block.strand == 1) {
         for (int i = block.start; i + 2 <= block.end; i = i + 3) {
-            aaFrames[0].push_back(nuc2aa[nuc2int(atcg[seq[i]])][nuc2int(atcg[seq[i + 1]])][nuc2int(atcg[seq[i + 2]])]);
+            aaSeq.push_back(nuc2aa[nuc2int(atcg[seq[i]])][nuc2int(atcg[seq[i + 1]])][nuc2int(atcg[seq[i + 2]])]);
         }
     } else {
         for (int i = block.end; i >= (int) block.start + 2; i = i - 3) {
-            aaFrames[0].push_back(nuc2aa[nuc2int(iRCT[atcg[seq[i]]])][nuc2int(iRCT[atcg[seq[i - 1]]])][
+            aaSeq.push_back(nuc2aa[nuc2int(iRCT[atcg[seq[i]]])][nuc2int(iRCT[atcg[seq[i - 1]]])][
                                           nuc2int(iRCT[atcg[seq[i - 2]]])]);
         }
     }
@@ -427,19 +432,19 @@ char *SeqIterator::reverseCompliment(char *read, size_t length) const {
 // It extracts kmers from amino acid sequence with DNA information and fill the kmerBuffer with them.
 int
 SeqIterator::fillBufferWithKmerFromBlock(const PredictedBlock &block, const char *seq, TargetKmerBuffer &kmerBuffer,
-                                         size_t &posToWrite, int seqID, int taxIdAtRank) {
+                                         size_t &posToWrite, int seqID, int taxIdAtRank, const vector<int> & aaSeq) {
     uint64_t tempKmer = 0;
-    int len = (int) aaFrames[0].size();
+    int len = (int) aaSeq.size();
     int checkN;
     for (int kmerCnt = 0; kmerCnt < len - kmerLength - spaceNum_int + 1; kmerCnt++) {
         tempKmer = 0;
         checkN = 0;
         for (uint32_t i = 0, j = 0; i < kmerLength + spaceNum; i++, j += mask[i]) {
-            if (-1 == aaFrames[0][kmerCnt + i]) {
+            if (-1 == aaSeq[kmerCnt + i]) {
                 checkN = 1;
                 break;
             }
-            tempKmer += aaFrames[0][kmerCnt + i] * powers[j] * mask[i];
+            tempKmer += aaSeq[kmerCnt + i] * powers[j] * mask[i];
         }
         if (checkN == 1) {
             kmerBuffer.buffer[posToWrite] = {UINT64_MAX, -1, 0, false};
@@ -485,227 +490,6 @@ size_t SeqIterator::getNumOfKmerForBlock(const PredictedBlock &block) {
     return len / 3 - 7;
 }
 
-// It makes the blocks for translation
-// Each block has a predicted gene part and an intergenic region. When another gene shows up, new block starts.
-void SeqIterator::getExtendedORFs(struct _gene *genes, struct _node *nodes, vector<PredictedBlock> &blocks,
-                                       size_t numOfGene, size_t length,
-                                       size_t &blockIdx, vector<uint64_t> &intergenicKmerList, const char *seq) {
-
-    //Exceptional case 1: 0 prdicted gene
-    if (numOfGene == 0) {
-        blocks.emplace_back(0, length - 1, 1);
-        blockIdx++;
-        return;
-    }
-
-    //Exceptional case 2: Only 1 prdicted gene
-    int frame;
-    int rightEnd = 0;
-    int leftEnd = 0;
-    if (numOfGene == 1) {
-        if (nodes[genes[0].start_ndx].strand == 1) { //forward
-            frame = (genes[0].begin - 1) % 3;
-            leftEnd = 0;
-            while (leftEnd % 3 != frame) leftEnd++; // y - (y - x) % 3 .. which would be faster?
-            blocks.emplace_back(leftEnd, length - 1, 1);
-            blockIdx++;
-        } else { //reverse
-            frame = (genes[0].end - 1) % 3;
-            rightEnd = length - 1;
-            while (rightEnd % 3 != frame) rightEnd--;
-            blocks.emplace_back(0, rightEnd, -1);
-            blockIdx++;
-        }
-        return;
-    }
-
-
-    /* Main routine */
-
-    bool hasBeenExtendedToLeft = false;
-    int k = 23;
-    char *newIntergenicKmer = (char *) malloc(sizeof(char) * (k + 1));
-    char *leftKmer = (char *) malloc(sizeof(char) * (k + 1));
-    char *rightKmer = (char *) malloc(sizeof(char) * (k + 1));
-    char *leftKmerReverse = (char *) malloc(sizeof(char) * (k + 1));
-    char *rightKmerReverse = (char *) malloc(sizeof(char) * (k + 1));
-    bool isReverse = false;
-    uint64_t leftKmerHash = 0, rightKmerHash = 0;
-
-
-    // Extend the first gene to cover intergenic regions
-    if (nodes[genes[0].start_ndx].strand == 1) { //forward
-        frame = (genes[0].begin - 1) % 3;
-        leftEnd = 0;
-        while (leftEnd % 3 != frame) leftEnd++;
-        blocks.emplace_back(leftEnd, genes[1].begin - 1 + 22, 1);
-        blockIdx++;
-    } else {
-        frame = (genes[0].end - 1) % 3;
-        rightEnd = genes[1].begin - 1 + 22;
-        while (rightEnd % 3 != frame) rightEnd--;
-        blocks.emplace_back(0, rightEnd, -1);
-        blockIdx++;
-    }
-
-    // From the second gene to the second last gene
-    for (size_t geneIdx = 1; geneIdx < numOfGene - 1; geneIdx++) {
-        isReverse = false;
-
-        // Make two k-mer hash; each from left and right of current gene. They are used for choosing extension direction.
-        strncpy(leftKmer, seq + genes[geneIdx].begin - 1 - k, k);
-        strncpy(rightKmer, seq + genes[geneIdx].end, k);
-        if (nodes[genes[geneIdx].start_ndx].strand == 1) {
-            leftKmerHash = XXH64(leftKmer, k, 0);
-            rightKmerHash = XXH64(rightKmer, k, 0);
-        } else {
-            isReverse = true;
-            for (int j = k - 1; j >= 0; j--) {
-                leftKmerReverse[k - j - 1] = iRCT[leftKmer[j]];
-                rightKmerReverse[k - j - 1] = iRCT[rightKmer[j]];
-            }
-            leftKmerHash = XXH64(leftKmerReverse, k, 0);
-            rightKmerHash = XXH64(rightKmerReverse, k, 0);
-        }
-
-        // Extend genes to cover intergenic regions
-        if (find(intergenicKmerList.begin(), intergenicKmerList.end(), leftKmerHash) !=
-            intergenicKmerList.end()) { //Extension to left
-            if (!hasBeenExtendedToLeft) {
-                if (!isReverse) { // Forward
-                    blocks.emplace_back(genes[geneIdx].begin - 1, genes[geneIdx].end - 1, 1);
-                    blockIdx++;
-                } else { // Reverse
-                    blocks.emplace_back(genes[geneIdx].begin - 1, genes[geneIdx].end - 1, -1);
-                    blockIdx++;
-                }
-            } else {
-                if (!isReverse) { //forward
-                    frame = (genes[geneIdx].begin - 1) % 3;
-                    leftEnd = genes[geneIdx - 1].end -1 -22;
-                    while (leftEnd % 3 != frame) leftEnd++;
-                    blocks.emplace_back(leftEnd, genes[geneIdx].end - 1, 1);
-                    blockIdx++;
-                } else { // reverse
-                    blocks.emplace_back(genes[geneIdx - 1].end - 22 - 1, genes[geneIdx].end - 1, -1);
-                    blockIdx++;
-                }
-            }
-            hasBeenExtendedToLeft = true;
-        } else { // Extension to right
-            if (hasBeenExtendedToLeft) {
-                if (!isReverse) { //forward
-                    frame = (genes[geneIdx].begin - 1) % 3;
-                    leftEnd = genes[geneIdx - 1].end - 1 - 22;
-                    while (leftEnd % 3 != frame) leftEnd++;
-                    blocks.emplace_back(leftEnd, genes[geneIdx + 1].begin - 1 + 22, 1);
-                    blockIdx++;
-                } else {
-                    frame = (genes[geneIdx].end - 1) % 3;
-                    rightEnd = genes[geneIdx + 1].begin - 1 + 22;
-                    while (rightEnd % 3 != frame) rightEnd--;
-                    blocks.emplace_back(genes[geneIdx - 1].end - 1 - 22, rightEnd, -1);
-                    blockIdx++;
-                }
-            } else {
-                if (!isReverse) { //forward
-                    blocks.emplace_back(genes[geneIdx].begin - 1, genes[geneIdx + 1].begin - 1 + 22, 1);
-                    blockIdx++;
-                } else {
-                    frame = (genes[geneIdx].end - 1) % 3;
-                    rightEnd = genes[geneIdx + 1].begin - 1 + 22;
-                    while (rightEnd % 3 != frame) rightEnd--;
-                    blocks.emplace_back(genes[geneIdx].begin - 1, rightEnd, -1);
-                    blockIdx++;
-                }
-            }
-            hasBeenExtendedToLeft = false;
-
-            if (find(intergenicKmerList.begin(), intergenicKmerList.end(), rightKmerHash) == intergenicKmerList.end()) {
-                intergenicKmerList.push_back(rightKmerHash);
-            }
-        }
-    }
-
-    // // For the last gene
-    // // Extend to the end of the genome
-    // isReverse = !(nodes[genes[numOfGene - 1].start_ndx].strand == 1);    
-    // rightEnd = length - 1;
-    // if (isReverse) {
-    //     frame = (genes[numOfGene - 1].end - 1) % 3;
-    //     while (rightEnd % 3 != frame) rightEnd--;
-    // }
-    // // If left region is not covered, cover it.
-    // leftEnd = genes[numOfGene - 1].begin - 1;
-    // if (hasBeenExtendedToLeft) {
-    //     leftEnd = genes[numOfGene - 2].end - 1 - 22;
-    //     if (!isReverse) {
-    //         frame = (genes[numOfGene - 1].begin - 1) % 3;
-    //         while (leftEnd % 3 != frame) leftEnd++;
-    //     }
-    // }
-    // blocks.emplace_back(leftEnd, rightEnd, isReverse ? -1 : 1);
-    // if (find(intergenicKmerList.begin(), intergenicKmerList.end(), rightKmerHash) == intergenicKmerList.end()) {
-    //         intergenicKmerList.push_back(rightKmerHash);
-    // }
-
-    //For the last gene
-    if (find(intergenicKmerList.begin(), intergenicKmerList.end(), leftKmerHash) !=
-        intergenicKmerList.end()) { //extension to left
-        if (!isReverse) { //forward
-            frame = (genes[numOfGene - 1].begin - 1) % 3;
-            leftEnd = genes[numOfGene - 2].end - 1 - 22;
-            while (leftEnd % 3 != frame) leftEnd++;
-            blocks.emplace_back(leftEnd, length - 1, 1);
-            blockIdx++;
-        } else { // reverse
-            frame = (genes[numOfGene - 1].end - 1) % 3;
-            rightEnd = length - 1;
-            while (rightEnd % 3 != frame) rightEnd--;
-            blocks.emplace_back(genes[numOfGene - 2].end - 22 - 1, rightEnd, -1);
-            blockIdx++;
-        }
-    } else { //extension to right
-        if (hasBeenExtendedToLeft) {
-            if (!isReverse) { //forward
-                frame = (genes[numOfGene - 1].begin - 1) % 3;
-                leftEnd = genes[numOfGene - 2].end - 1 - 22;
-                while (leftEnd % 3 != frame) leftEnd++;
-                blocks.emplace_back(leftEnd, length - 1, 1);
-                blockIdx++;
-            } else {
-                frame = (genes[numOfGene - 1].end - 1) % 3;
-                rightEnd = length - 1;
-                while (rightEnd % 3 != frame) rightEnd--;
-                blocks.emplace_back(genes[numOfGene - 2].end - 22 - 1, rightEnd, -1);
-                blockIdx++;
-            }
-        } else {
-            if (!isReverse) {
-                blocks.emplace_back(genes[numOfGene - 1].begin, length - 1, 1);
-                blockIdx++;
-            } else {
-                frame = (genes[numOfGene - 1].end - 1) % 3;
-                rightEnd = length - 1;
-                while (rightEnd % 3 != frame) rightEnd--;
-                blocks.emplace_back(genes[numOfGene - 1].begin - 1, rightEnd, -1);
-                blockIdx++;
-            }
-        }
-
-        //If current intergenic sequences is new, update intergenicKmerList.
-        if (find(intergenicKmerList.begin(), intergenicKmerList.end(), rightKmerHash) == intergenicKmerList.end()) {
-            intergenicKmerList.push_back(rightKmerHash);
-        }
-    }
-
-
-    free(newIntergenicKmer);
-    free(leftKmer);
-    free(rightKmer);
-    free(leftKmerReverse);
-    free(rightKmerReverse);
-}
 
 bool SeqIterator::compareMinHashList(priority_queue <uint64_t> list1, priority_queue <uint64_t> &list2, size_t length1,
                                      size_t length2) {
diff --git a/src/commons/SeqIterator.h b/src/commons/SeqIterator.h
index b3be8638..c53093e0 100644
--- a/src/commons/SeqIterator.h
+++ b/src/commons/SeqIterator.h
@@ -33,23 +33,9 @@ KSEQ_INIT(kseq_buffer_t*, kseq_buffer_reader)
 
 using namespace std;
 
-typedef struct PredictedBlock {
-    PredictedBlock(int start, int end, int strand) : start(start), end(end), strand(strand) {}
-
-    void printPredictedBlock() {
-        cout << strand << " " << start << " " << end << endl;
-    }
-
-    int start;
-    int end;
-    int strand; //true for forward
-} PredictedBlock;
-
 class SeqIterator {
 private:
-    static const string iRCT;
-    static const string atcg;
-    vector<int> aaFrames[6];
+    // vector<int> aaFrames[6];
     uint64_t powers[10];
     int nuc2aa[8][8][8];
     uint64_t nuc2num[4][4][4];
@@ -60,30 +46,29 @@ class SeqIterator {
     int bitsForCodon;
     int bitsFor8Codons;
 
-
     void addDNAInfo_QueryKmer(uint64_t &kmer, const char *seq, int forOrRev, uint32_t kmerCnt, uint32_t frame,
                               int readLength);
 
     void addDNAInfo_TargetKmer(uint64_t &kmer, const char *seq, const PredictedBlock &block, const int &kmerCnt);
 
 public:
+    static const string iRCT;
+    static const string atcg;
+
     void fillQueryKmerBuffer(const char *seq, int seqLen, QueryKmerBuffer &kmerBuffer, size_t &posToWrite,
-                             uint32_t seqID, uint32_t offset = 0);
+                             uint32_t seqID, vector<int> *aaFrames, uint32_t offset = 0);
 
     string reverseCompliment(string &read) const;
 
     char *reverseCompliment(char *read, size_t length) const;
 
-    void sixFrameTranslation(const char *seq, int seqLen);
+    void sixFrameTranslation(const char *seq, int seqLen, vector<int> *aaFrames);
 
-    bool translateBlock(const char *seq, PredictedBlock block);
+    bool translateBlock(const char *seq, PredictedBlock block, vector<int> & aaSeq);
 
     void generateIntergenicKmerList(struct _gene *genes, struct _node *nodes, int numberOfGenes,
                                     vector<uint64_t> &intergenicKmerList, const char *seq);
 
-    void getExtendedORFs(struct _gene *genes, struct _node *nodes, vector<PredictedBlock> &blocks, size_t numOfGene,
-            size_t length, size_t &numOfBlocks, vector<uint64_t> &intergenicKmerList, const char *seq);
-
     void getMinHashList(priority_queue<uint64_t> &sortedHashQue, const char *seq);
 
     bool
@@ -94,7 +79,7 @@ class SeqIterator {
     size_t getNumOfKmerForBlock(const PredictedBlock &block);
 
     int fillBufferWithKmerFromBlock(const PredictedBlock &block, const char *seq, TargetKmerBuffer &kmerBuffer,
-                                     size_t &posToWrite, int seqID, int taxIdAtRank);
+                                     size_t &posToWrite, int seqID, int taxIdAtRank, const vector<int> & aaSeq);
 
     static void maskLowComplexityRegions(const char * seq, char * maskedSeq, ProbabilityMatrix & probMat,
                                          float maskProb, const BaseMatrix * subMat);
diff --git a/src/commons/common.h b/src/commons/common.h
index dc847fcb..2961060d 100644
--- a/src/commons/common.h
+++ b/src/commons/common.h
@@ -27,6 +27,18 @@ struct SequenceBlock{
     size_t seqLength;
 };
 
+typedef struct PredictedBlock {
+    PredictedBlock(int start, int end, int strand) : start(start), end(end), strand(strand) {}
+
+    void printPredictedBlock() {
+        std::cout << strand << " " << start << " " << end << std::endl;
+    }
+
+    int start;
+    int end;
+    int strand; //true for forward
+} PredictedBlock;
+
 struct Query{
     int queryId;
     int classification;

From 60c62900319b74b3d8c82cace1936561cb88b600 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Thu, 22 Aug 2024 13:45:36 +0900
Subject: [PATCH 02/21] Working on optimizing match analysis process

---
 src/commons/Classifier.cpp    |  53 +++++++++++++++--
 src/commons/Classifier.h      |  11 +++-
 src/commons/KmerExtractor.cpp | 106 +---------------------------------
 src/commons/KmerExtractor.h   |  10 +---
 src/commons/Taxonomer.cpp     |  25 ++++----
 src/commons/Taxonomer.h       |  17 +++---
 6 files changed, 85 insertions(+), 137 deletions(-)

diff --git a/src/commons/Classifier.cpp b/src/commons/Classifier.cpp
index ee49ce41..6531fcfe 100644
--- a/src/commons/Classifier.cpp
+++ b/src/commons/Classifier.cpp
@@ -23,7 +23,7 @@ Classifier::Classifier(LocalParameters & par) {
     } else {
         kmerMatcher = new KmerMatcher(par, taxonomy);
     }
-    taxonomer = new Taxonomer(par, taxonomy);
+    // taxonomer = new Taxonomer(par, taxonomy);
     reporter = new Reporter(par, taxonomy);
 }
 
@@ -32,7 +32,7 @@ Classifier::~Classifier() {
     delete queryIndexer;
     delete kmerExtractor;
     delete kmerMatcher;
-    delete taxonomer;
+    // delete taxonomer;
     delete reporter;
 }
 
@@ -113,7 +113,7 @@ void Classifier::startClassify(const LocalParameters &par) {
                 kmerMatcher->sortMatches(&matchBuffer);
                 
                 // Classify queries based on the matches.
-                taxonomer->assignTaxonomy(matchBuffer.buffer, matchBuffer.startIndexOfReserve, queryList, par);
+                assignTaxonomy(matchBuffer.buffer, matchBuffer.startIndexOfReserve, queryList, par);
 
                 // Write classification results
                 reporter->writeReadClassification(queryList);
@@ -148,8 +148,53 @@ void Classifier::startClassify(const LocalParameters &par) {
     reporter->closeReadClassificationFile();
 
     // Write report files
-    reporter->writeReportFile(totalSeqCnt, taxonomer->getTaxCounts());
+    reporter->writeReportFile(totalSeqCnt, taxCounts);
 
     // Memory deallocation
     free(matchBuffer.buffer);
 }
+
+void Classifier::assignTaxonomy(const Match *matchList,
+                               size_t numOfMatches,
+                               std::vector<Query> &queryList,
+                               const LocalParameters &par) {
+    time_t beforeAnalyze = time(nullptr);
+    cout << "Analyzing matches ..." << endl;
+
+    // Divide matches into blocks for multi threading
+    size_t seqNum = queryList.size();
+    MatchBlock *matchBlocks = new MatchBlock[seqNum];
+    size_t matchIdx = 0;
+    size_t blockIdx = 0;
+    uint32_t currentQuery;
+    while (matchIdx < numOfMatches) {
+        currentQuery = matchList[matchIdx].qInfo.sequenceID;
+        matchBlocks[blockIdx].id = currentQuery;
+        matchBlocks[blockIdx].start = matchIdx;
+        while ((currentQuery == matchList[matchIdx].qInfo.sequenceID) && (matchIdx < numOfMatches)) ++matchIdx;
+        matchBlocks[blockIdx].end = matchIdx - 1;
+        blockIdx++;
+    }
+
+    // Process each block
+#pragma omp parallel default(none), shared(cout, matchBlocks, matchList, seqNum, queryList, blockIdx, par)
+    {
+        Taxonomer taxonomer(par, taxonomy);
+#pragma omp for schedule(dynamic, 1)
+        for (size_t i = 0; i < blockIdx; ++i) {
+            taxonomer.chooseBestTaxon(matchBlocks[i].id,
+                            matchBlocks[i].start,
+                            matchBlocks[i].end,
+                            matchList,
+                            queryList,
+                            par);
+        }
+    }
+
+    for (size_t i = 0; i < seqNum; i++) {
+        ++taxCounts[queryList[i].classification];
+    }
+    delete[] matchBlocks;
+    cout << "Time spent for analyzing: " << double(time(nullptr) - beforeAnalyze) << endl;
+
+}
\ No newline at end of file
diff --git a/src/commons/Classifier.h b/src/commons/Classifier.h
index 81a0b027..c2bd9962 100644
--- a/src/commons/Classifier.h
+++ b/src/commons/Classifier.h
@@ -45,17 +45,26 @@ class Classifier {
     QueryIndexer * queryIndexer;
     KmerExtractor * kmerExtractor;
     KmerMatcher * kmerMatcher;
-    Taxonomer * taxonomer;
+    // Taxonomer * taxonomer;
     Reporter * reporter;
     NcbiTaxonomy * taxonomy;
 
+    unordered_map<TaxID, unsigned int> taxCounts;
+
 public:
     void startClassify(const LocalParameters &par);
 
+    void assignTaxonomy(const Match *matchList,
+                        size_t numOfMatches,
+                        std::vector<Query> & queryList,
+                        const LocalParameters &par);
+
     explicit Classifier(LocalParameters & par);
 
     virtual ~Classifier();
 
+    unordered_map<TaxID, unsigned int> & getTaxCounts() { return taxCounts; }
+
 };
 
 
diff --git a/src/commons/KmerExtractor.cpp b/src/commons/KmerExtractor.cpp
index 81f90495..0559748c 100644
--- a/src/commons/KmerExtractor.cpp
+++ b/src/commons/KmerExtractor.cpp
@@ -58,7 +58,7 @@ void KmerExtractor::fillQueryKmerBufferParallel(KSeqWrapper *kseq,
  
     // Reserve memory for each read of the chunk for each thread
     std::vector<std::vector<string>> chunkReads_thread(par.threads);
-    for (size_t i = 0; i < par.threads; ++i) {
+    for (int i = 0; i < par.threads; ++i) {
         chunkReads_thread[i].resize(chunkSize);
         for (size_t j = 0; j < chunkSize; ++j) {
             chunkReads_thread[i][j].reserve(readLength);
@@ -66,7 +66,7 @@ void KmerExtractor::fillQueryKmerBufferParallel(KSeqWrapper *kseq,
     }
 
     // Initialize atomic variable for active tasks
-    std::vector<std::atomic<bool>> busyThreads(par.threads);
+    std::vector<atomic<bool>> busyThreads(par.threads);
     for (size_t i = 0; i < par.threads; ++i) {
         busyThreads[i].store(false);
     }
@@ -289,104 +289,4 @@ chunkSize, chunkReads1_thread, chunkReads2_thread, busyThreads, cout)
         delete[] maskedSeq1;
         delete[] maskedSeq2;
     }
-}
-    
-
-
-// void KmerExtractor::fillQueryKmerBufferParallel_paired2(KSeqWrapper *kseq1,
-//                                                        KSeqWrapper *kseq2,
-//                                                        QueryKmerBuffer &kmerBuffer,
-//                                                        vector<Query> &queryList,
-//                                                        const QuerySplit &currentSplit,
-//                                                        const LocalParameters &par) {
-//     size_t processedQueryNum = 0;
-
-//     // Array to store reads of thread number
-//     vector<string> reads1(par.threads);
-//     vector<string> reads2(par.threads);
-
-//     while (processedQueryNum < currentSplit.readCnt) {
-//         size_t currentQueryNum = min(currentSplit.readCnt - processedQueryNum, (size_t) par.threads);
-//         size_t count = 0;
-
-//         // Fill reads in sequential
-//         while (count < currentQueryNum) {
-//             // Read query
-//             kseq1->ReadEntry();
-//             kseq2->ReadEntry();
-//             const KSeqWrapper::KSeqEntry & e1 = kseq1->entry;
-//             const KSeqWrapper::KSeqEntry & e2 = kseq2->entry;
-
-//             // Get k-mer count
-//             int kmerCnt = LocalUtil::getQueryKmerNumber<int>((int) e1.sequence.l, spaceNum);
-//             int kmerCnt2 = LocalUtil::getQueryKmerNumber<int>((int) e2.sequence.l, spaceNum);
-
-//             // Query Info
-//             queryList[processedQueryNum].queryLength = LocalUtil::getMaxCoveredLength((int) e1.sequence.l);
-//             queryList[processedQueryNum].queryLength2 = LocalUtil::getMaxCoveredLength((int) e2.sequence.l);
-//             queryList[processedQueryNum].name = string(e1.name.s);
-//             queryList[processedQueryNum].kmerCnt = (int) (kmerCnt + kmerCnt2);
-
-//             // Store reads
-//             reads1[count] = string(kseq1->entry.sequence.s);
-//             reads2[count] = string(kseq2->entry.sequence.s);
-
-//             processedQueryNum ++;
-//             count ++;
-//         }
-
-//         // Process reads in parallel
-// #pragma omp parallel default(none), shared(par, kmerBuffer, cout, processedQueryNum, queryList, currentQueryNum, currentSplit, count, reads1, reads2)
-//         {
-//             size_t posToWrite;
-//             size_t readLength = 1000;
-//             char *maskedSeq1 = new char[readLength];
-//             char *maskedSeq2 = new char[readLength];
-//             char *seq1 = nullptr;
-//             char *seq2 = nullptr;            
-// #pragma omp for schedule(dynamic, 1)
-//             for (size_t i = 0; i < currentQueryNum; i ++) {
-//                 size_t queryIdx = processedQueryNum - currentQueryNum + i;
-//                 // Get k-mer count
-//                 int kmerCnt = LocalUtil::getQueryKmerNumber<int>(reads1[i].length(), spaceNum);
-//                 int kmerCnt2 = LocalUtil::getQueryKmerNumber<int>(reads2[i].length(), spaceNum);
-
-//                 // Ignore short read
-//                 if (kmerCnt2 < 1 || kmerCnt < 1) { continue; }
-
-//                 // Get masked sequence
-//                 if (maskMode) {
-//                     if (readLength < reads1[i].length() + 1 || readLength < reads2[i].length() + 1) {
-//                         readLength = max(reads1[i].length() + 1, reads2[i].length() + 1);
-//                         delete[] maskedSeq1;
-//                         delete[] maskedSeq2;
-//                         maskedSeq1 = new char[readLength];
-//                         maskedSeq2 = new char[readLength];
-//                     }
-//                     SeqIterator::maskLowComplexityRegions(reads1[i].c_str(),maskedSeq1, *probMatrix, maskProb, subMat);
-//                     SeqIterator::maskLowComplexityRegions(reads2[i].c_str(),maskedSeq2, *probMatrix, maskProb, subMat);
-//                     seq1 = maskedSeq1;
-//                     seq2 = maskedSeq2;
-//                 } else {
-//                     seq1 = const_cast<char *>(reads1[i].c_str());
-//                     seq2 = const_cast<char *>(reads2[i].c_str());
-//                 }
-
-//                 posToWrite = kmerBuffer.reserveMemory(kmerCnt + kmerCnt2);
-
-//                 // Process Read 1
-//                 seqIterator->sixFrameTranslation(seq1, (int) reads1[i].length());
-//                 seqIterator->fillQueryKmerBuffer(seq1, (int) reads1[i].length(), kmerBuffer, posToWrite,
-//                                                 (uint32_t) queryIdx);
-
-//                 // Process Read 2
-//                 seqIterator->sixFrameTranslation(seq2, (int) reads2[i].length());
-//                 seqIterator->fillQueryKmerBuffer(seq2, (int) reads2[i].length(), kmerBuffer, posToWrite,
-//                                                  (uint32_t) queryIdx, queryList[queryIdx].queryLength+3);
-//             }
-//             delete[] maskedSeq1;
-//             delete[] maskedSeq2;
-//         }
-//     }
-// }
-
+}
\ No newline at end of file
diff --git a/src/commons/KmerExtractor.h b/src/commons/KmerExtractor.h
index b77aa122..a2699139 100644
--- a/src/commons/KmerExtractor.h
+++ b/src/commons/KmerExtractor.h
@@ -5,6 +5,7 @@
 #include "KSeqWrapper.h"
 #include "common.h"
 #include <unordered_map>
+#include <atomic>
 
 class KmerExtractor {
 private:
@@ -31,14 +32,7 @@ class KmerExtractor {
                                             vector<Query> &queryList,
                                             const QuerySplit & currentSplit,
                                             const LocalParameters &par);
-
-    void fillQueryKmerBufferParallel_paired2(KSeqWrapper* kseq1,
-                                            KSeqWrapper* kseq2,
-                                            QueryKmerBuffer &kmerBuffer,
-                                            vector<Query> &queryList,
-                                            const QuerySplit & currentSplit,
-                                            const LocalParameters &par);                                        
-
+                                      
 public:
     explicit KmerExtractor(const LocalParameters & par);
     ~KmerExtractor();
diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 60202c51..7a65481a 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -97,12 +97,11 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery,
     speciesMatches.reserve(end - offset + 1);
     TaxonScore speciesScore(0, 0, 0, 0, 0);
     speciesScore = getBestSpeciesMatches(speciesMatches,
-                                            matchList,
-                                            end,
-                                            offset,                        
-                                            queryList[currentQuery].queryLength + queryList[currentQuery].queryLength2);
+                                         matchList,
+                                         end,
+                                         offset,                        
+                                         queryList[currentQuery].queryLength + queryList[currentQuery].queryLength2);
     
-
     // If there is no proper species for current query, it is un-classified.
     if (speciesScore.score == 0 || speciesScore.score < par.minScore) {
         queryList[currentQuery].isClassified = false;
@@ -128,18 +127,18 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery,
     unordered_map<TaxID, unsigned int> taxCnt;
     filterRedundantMatches(speciesMatches, taxCnt);
     for (auto & tax : taxCnt) {
-        queryList[currentQuery].taxCnt[tax.first] = tax.second;    
+      queryList[currentQuery].taxCnt[tax.first] = tax.second;    
     }
     
     // If score is not enough, classify to the parent of the selected species
     if (speciesScore.score < par.minSpScore) {
-        queryList[currentQuery].isClassified = true;
-        queryList[currentQuery].classification = taxonomy->taxonNode(
-                taxonomy->getTaxIdAtRank(speciesScore.taxId, "species"))->parentTaxId;
-        queryList[currentQuery].score = speciesScore.score;
-        queryList[currentQuery].coverage = speciesScore.coverage;
-        queryList[currentQuery].hammingDist = speciesScore.hammingDist;
-        return;
+      queryList[currentQuery].isClassified = true;
+      queryList[currentQuery].classification = taxonomy->taxonNode(
+              taxonomy->getTaxIdAtRank(speciesScore.taxId, "species"))->parentTaxId;
+      queryList[currentQuery].score = speciesScore.score;
+      queryList[currentQuery].coverage = speciesScore.coverage;
+      queryList[currentQuery].hammingDist = speciesScore.hammingDist;
+      return;
     }
 
     // Lower rank classification
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index 3cf4f9fa..fbe3d08f 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -45,6 +45,13 @@ struct MatchPath {
 
 };
 
+struct MatchBlock {
+    MatchBlock(size_t start, size_t end, int id) : start(start), end(end), id(id) {}
+    MatchBlock() : start(0), end(0), id(0) {}
+    size_t start;
+    size_t end;
+    uint32_t id;
+};
 
 class Taxonomer {
 private:
@@ -67,18 +74,12 @@ class Taxonomer {
     // Internal
     int denominator;
 
-    struct MatchBlock {
-        MatchBlock(size_t start, size_t end, int id) : start(start), end(end), id(id) {}
-        MatchBlock() : start(0), end(0), id(0) {}
-        size_t start;
-        size_t end;
-        uint32_t id;
-    };
+
 
 
 
     // Output
-    unordered_map<TaxID, unsigned int> taxCounts;
+   unordered_map<TaxID, unsigned int> taxCounts;
 
 
 public:

From a12882cd3bff63111ea4aaf888320d0c4b0d0f1e Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Thu, 22 Aug 2024 14:30:12 +0900
Subject: [PATCH 03/21] vector containers generated in functions became member
 variables to reuse

---
 src/commons/Taxonomer.cpp | 481 ++------------------------------------
 src/commons/Taxonomer.h   |  11 +
 2 files changed, 28 insertions(+), 464 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 7a65481a..e2611ae6 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -36,6 +36,14 @@ Taxonomer::Taxonomer(const LocalParameters &par, NcbiTaxonomy *taxonomy) : taxon
     } else {
         denominator = 1000;
     }
+
+    // Reserve memoory for internal containers
+    speciesMatches.reserve(1000);
+    curFrameMatches.reserve(1000);
+    matchPaths.reserve(1000);
+    maxSpecies.reserve(1000);
+    curPosMatches.reserve(1000);
+    nextPosMatches.reserve(1000);
 }
 
 Taxonomer::~Taxonomer() {
@@ -93,8 +101,11 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery,
                                 vector<Query> & queryList,
                                 const LocalParameters &par) {
     // Get the best species and its matches for the current query
-    vector<Match> speciesMatches;
-    speciesMatches.reserve(end - offset + 1);
+    if (speciesMatches.size() < end - offset + 1) {
+        speciesMatches.reserve(end - offset + 1);
+    }
+    speciesMatches.clear();
+
     TaxonScore speciesScore(0, 0, 0, 0, 0);
     speciesScore = getBestSpeciesMatches(speciesMatches,
                                          matchList,
@@ -251,10 +262,8 @@ TaxonScore Taxonomer::getBestSpeciesMatches(vector<Match> & speciesMatches,
                                             size_t end,
                                             size_t offset,
                                             int queryLength) {
-    vector<vector<const Match *>> matchesForEachSpecies;
     TaxonScore bestScore;
-    vector<const Match *> curFrameMatches;
-    vector<MatchPath> matchPaths;
+    matchPaths.clear();
     unordered_map<TaxID, float> species2score;
     unordered_map<TaxID, vector<MatchPath>> species2matchPaths;
     float bestSpScore = 0;
@@ -298,7 +307,7 @@ TaxonScore Taxonomer::getBestSpeciesMatches(vector<Match> & speciesMatches,
         return bestScore;
     }
 
-    vector<TaxID> maxSpecies;
+    maxSpecies.clear();
     for (auto & spScore : species2score) {
         if (spScore.second >= bestSpScore * tieRatio) {
             maxSpecies.push_back(spScore.first);
@@ -344,103 +353,6 @@ TaxonScore Taxonomer::getBestSpeciesMatches(vector<Match> & speciesMatches,
     return bestScore;                                  
 }
 
-// TaxonScore Taxonomer::getBestSpeciesMatches(vector<Match> & speciesMatches,
-//                                             const Match *matchList,
-//                                             size_t end,
-//                                             size_t offset,
-//                                             Query & currentQuery) {
-//     vector<vector<const Match *>> matchesForEachSpecies;
-//     TaxonScore bestScore;    
-//     vector<const Match *> curFrameMatches;
-//     vector<MatchPath> matchPaths;
-//     unordered_map<TaxID, float> species2score;
-//     unordered_map<TaxID, vector<MatchPath>> species2matchPaths;
-//     float bestSpScore = 0;
-//     unordered_map<TaxID, pair<size_t, size_t>> speciesMatchRange;
-    
-//     size_t i = offset;
-//     while (i  < end + 1) {
-//         TaxID currentSpecies = matchList[i].speciesId;
-//         size_t start = i;
-//         // For current species
-//         while ((i < end + 1) && currentSpecies == matchList[i].speciesId) {
-//             uint8_t curFrame = matchList[i].qInfo.frame;
-//             curFrameMatches.clear();
-//             // For current frame
-//             while ((i < end + 1) && currentSpecies == matchList[i].speciesId && curFrame == matchList[i].qInfo.frame) {
-//                 curFrameMatches.push_back(&matchList[i]);
-//                 i ++;
-//             }
-//             if (curFrameMatches.size() > 1) {
-//                 remainConsecutiveMatches(curFrameMatches, matchPaths, currentSpecies);
-//             }
-//         }
-//         speciesMatchRange[currentSpecies] = make_pair(start, i);
-
-//         // Combine MatchPaths
-//         if (!matchPaths.empty()) {
-//             float score = combineMatchPaths(matchPaths, species2matchPaths[currentSpecies], currentQuery.queryLength + currentQuery.queryLength2);
-
-//             score = min(score, 1.0f);
-//             if (score > 0.f) {
-//                 species2score[currentSpecies] = score;
-//             }
-//             if (score > bestSpScore) {
-//                 bestSpScore = score;
-//             }
-//         }
-//         matchPaths.clear();
-//     }
-//     // If there are no meaningful species
-//     if (species2score.empty()) {
-//         bestScore.score = 0;
-//         return bestScore;
-//     }
-//     vector<TaxID> maxSpecies;
-//     for (auto & spScore : species2score) {
-//         if (spScore.second >= bestSpScore * tieRatio) {
-//             maxSpecies.push_back(spScore.first);
-//         }
-//     }
-
-//     // More than one species --> LCA
-//     float coveredLength = 0.f;
-//     if (maxSpecies.size() > 1) {
-//         bestScore.LCA = true;
-//         bestScore.taxId = taxonomy->LCA(maxSpecies)->taxId;
-//         for (auto & sp : maxSpecies) {
-//             bestScore.score += species2score[sp];
-//             coveredLength = 0;
-//             for (auto & matchPath : species2matchPaths[maxSpecies[0]]) {
-//                 coveredLength += matchPath.end - matchPath.start + 1;
-//             }
-//             bestScore.coverage += coveredLength / (currentQuery.queryLength + currentQuery.queryLength2);
-//         }
-//         bestScore.score /= maxSpecies.size();
-//         bestScore.coverage /= maxSpecies.size();
-//         return bestScore;
-//     }
-    
-//     // One species
-//     bestScore.taxId = maxSpecies[0];
-//     bestScore.score = species2score[maxSpecies[0]];
-    
-//     int hammingDist = 0;
-//     for (auto & matchPath : species2matchPaths[maxSpecies[0]]) {
-//         coveredLength += matchPath.end - matchPath.start + 1;
-//         hammingDist += matchPath.hammingDist;
-//     }
-//     speciesMatches.reserve(speciesMatchRange[bestScore.taxId].second
-//                         - speciesMatchRange[bestScore.taxId].first + 1);
-
-//     for (size_t i = speciesMatchRange[bestScore.taxId].first; i < speciesMatchRange[bestScore.taxId].second; i++) {
-//         speciesMatches.push_back(matchList[i]);
-//     }
-//     bestScore.coverage = coveredLength / (currentQuery.queryLength + currentQuery.queryLength2);
-//     bestScore.hammingDist = hammingDist;
-
-//     return bestScore;                    
-// }
 
 float Taxonomer::combineMatchPaths(vector<MatchPath> & matchPaths,
                                    vector<MatchPath> & combinedMatchPaths,
@@ -520,8 +432,8 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
                                          TaxID speciesId) {
     size_t i = 0;
     size_t end = curFrameMatches.size();
-    vector<const Match *> curPosMatches;
-    vector<const Match *> nextPosMatches;
+    curPosMatches.clear();
+    nextPosMatches.clear();
     map<const Match *, vector<const Match *>> linkedMatches; 
 
     size_t currPos = curFrameMatches[0]->qInfo.pos;
@@ -572,9 +484,6 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
                         if (isConsecutive(nextPosMatch, curPosMatch)){
                             linkedMatches[curPosMatch].push_back(nextPosMatch);
                         }
-                        // if (nextPosMatch->isConsecutive_DNA(curPosMatch)) {
-                        //     linkedMatches[curPosMatch].push_back(nextPosMatch);
-                        // }
                     }
                 }
 
@@ -681,360 +590,6 @@ depthScore Taxonomer::DFS(const vector<const Match *> &matches,
     return bestDepthScore;
 }
 
-TaxonScore Taxonomer::scoreTaxon(vector<const Match *> &filteredMatches,
-                                 TaxID taxId,
-                                 int queryLength) {
-    // Calculate Hamming distance & covered length
-    int coveredPosCnt = 0;
-    uint16_t currHammings;
-    int aminoAcidNum = (int) queryLength / 3;
-    int currPos;
-    size_t matchNum = filteredMatches.size();
-    size_t f = 0;
-
-    // Get the smallest hamming distance at each position of query
-    auto *hammingsAtEachPos = new signed char[aminoAcidNum + 1];
-    memset(hammingsAtEachPos, 24, (aminoAcidNum + 1));
-    while (f < matchNum) {
-        currPos = filteredMatches[f]->qInfo.pos / 3;
-        currHammings = filteredMatches[f]->rightEndHamming;
-        if (GET_2_BITS(currHammings) < hammingsAtEachPos[currPos + unmaskedPos[0]])
-            hammingsAtEachPos[currPos + unmaskedPos[0]] = GET_2_BITS(currHammings);
-        if (GET_2_BITS(currHammings >> 2) < hammingsAtEachPos[currPos + unmaskedPos[1]])
-            hammingsAtEachPos[currPos + unmaskedPos[1]] = GET_2_BITS(currHammings >> 2);
-        if (GET_2_BITS(currHammings >> 4) < hammingsAtEachPos[currPos + unmaskedPos[2]])
-            hammingsAtEachPos[currPos + unmaskedPos[2]] = GET_2_BITS(currHammings >> 4);
-        if (GET_2_BITS(currHammings >> 6) < hammingsAtEachPos[currPos + unmaskedPos[3]])
-            hammingsAtEachPos[currPos + unmaskedPos[3]] = GET_2_BITS(currHammings >> 6);
-        if (GET_2_BITS(currHammings >> 8) < hammingsAtEachPos[currPos + unmaskedPos[4]])
-            hammingsAtEachPos[currPos + unmaskedPos[4]] = GET_2_BITS(currHammings >> 8);
-        if (GET_2_BITS(currHammings >> 10) < hammingsAtEachPos[currPos + unmaskedPos[5]])
-            hammingsAtEachPos[currPos + unmaskedPos[5]] = GET_2_BITS(currHammings >> 10);
-        if (GET_2_BITS(currHammings >> 12) < hammingsAtEachPos[currPos + unmaskedPos[6]])
-            hammingsAtEachPos[currPos + unmaskedPos[6]] = GET_2_BITS(currHammings >> 12);
-        if (GET_2_BITS(currHammings >> 14) < hammingsAtEachPos[currPos + unmaskedPos[7]])
-            hammingsAtEachPos[currPos + unmaskedPos[7]] = GET_2_BITS(currHammings >> 14);
-        f++;
-    }
-
-    // Sum up hamming distances and count the number of position covered by the matches.
-    float hammingSum = 0;
-    for (int h = 0; h < aminoAcidNum; h++) {
-        if (hammingsAtEachPos[h] == 0) { // Add 0 for 0 hamming dist.
-            coveredPosCnt++;
-        } else if (hammingsAtEachPos[h] != 24) { // Add 1.5, 2, 2.5 for 1, 2, 3 hamming dist. respectively
-            hammingSum += 1.0f + (0.5f * hammingsAtEachPos[h]);
-            coveredPosCnt++;
-        }
-    }
-    delete[] hammingsAtEachPos;
-
-    // Score current genus
-    int coveredLength = coveredPosCnt * 3;
-    if (coveredLength > queryLength) coveredLength = queryLength;
-    float score = ((float) coveredLength - hammingSum) / (float) queryLength;
-    float coverage = (float) (coveredLength) / (float) (queryLength);
-
-    return {taxId, score, coverage, (int) hammingSum, 0};
-}
-
-TaxonScore Taxonomer::scoreTaxon(vector<const Match *> &filteredMatches,
-                                 TaxID taxId,       
-                                 int readLength1,
-                                 int readLength2) {
-
-    // Calculate Hamming distance & covered length
-    int aminoAcidNum_total = ((int) readLength1 / 3) + ((int) readLength2 / 3);
-    int aminoAcidNum_read1 = ((int) readLength1 / 3);
-    int currPos;
-    size_t matchNum = filteredMatches.size();
-    size_t f = 0;
-
-    // Get the smallest hamming distance at each position of query
-    auto *hammingsAtEachPos = new signed char[aminoAcidNum_total + 3];
-    memset(hammingsAtEachPos, 24, (aminoAcidNum_total + 3));
-    while (f < matchNum) {
-        uint8_t minHammingDist = 24;
-        uint16_t currHammings = 0;
-        currPos = (int) filteredMatches[f]->qInfo.pos / 3;
-        // Find the closest match at current position
-        while ((f < matchNum) && currPos == (int) filteredMatches[f]->qInfo.pos / 3) {
-            if (filteredMatches[f]->hamming < minHammingDist) {
-                minHammingDist = filteredMatches[f]->hamming;
-                currHammings = filteredMatches[f]->rightEndHamming;
-            }
-            f++;
-        }
-        // Update hamming distance at each position
-        if (GET_2_BITS(currHammings) < hammingsAtEachPos[currPos + unmaskedPos[0]])
-            hammingsAtEachPos[currPos + unmaskedPos[0]] = GET_2_BITS(currHammings);
-        if (GET_2_BITS(currHammings >> 2) < hammingsAtEachPos[currPos + unmaskedPos[1]])
-            hammingsAtEachPos[currPos + unmaskedPos[1]] = GET_2_BITS(currHammings >> 2);
-        if (GET_2_BITS(currHammings >> 4) < hammingsAtEachPos[currPos + unmaskedPos[2]])
-            hammingsAtEachPos[currPos + unmaskedPos[2]] = GET_2_BITS(currHammings >> 4);
-        if (GET_2_BITS(currHammings >> 6) < hammingsAtEachPos[currPos + unmaskedPos[3]])
-            hammingsAtEachPos[currPos + unmaskedPos[3]] = GET_2_BITS(currHammings >> 6);
-        if (GET_2_BITS(currHammings >> 8) < hammingsAtEachPos[currPos + unmaskedPos[4]])
-            hammingsAtEachPos[currPos + unmaskedPos[4]] = GET_2_BITS(currHammings >> 8);
-        if (GET_2_BITS(currHammings >> 10) < hammingsAtEachPos[currPos + unmaskedPos[5]])
-            hammingsAtEachPos[currPos + unmaskedPos[5]] = GET_2_BITS(currHammings >> 10);
-        if (GET_2_BITS(currHammings >> 12) < hammingsAtEachPos[currPos + unmaskedPos[6]])
-            hammingsAtEachPos[currPos + unmaskedPos[6]] = GET_2_BITS(currHammings >> 12);
-        if (GET_2_BITS(currHammings >> 14) < hammingsAtEachPos[currPos + unmaskedPos[7]])
-            hammingsAtEachPos[currPos + unmaskedPos[7]] = GET_2_BITS(currHammings >> 14);
-        f++;
-    }
-
-    // Sum up hamming distances and count the number of position covered by the matches.
-    float hammingSum = 0;
-    int coveredPosCnt_read1 = 0;
-    int coveredPosCnt_read2 = 0;
-    for (int h = 0; h < aminoAcidNum_total; h++) {
-        // Read 1
-        if (h < aminoAcidNum_read1) {
-            if (hammingsAtEachPos[h] == 0) { // Add 0 for 0 hamming dist.
-                coveredPosCnt_read1++;
-            } else if (hammingsAtEachPos[h] != 24) { // Add 1.5, 2, 2.5 for 1, 2, 3 hamming dist. respectively
-                hammingSum += 1.0f + (0.5f * (float) hammingsAtEachPos[h]);
-                coveredPosCnt_read1++;
-            }
-        }
-            // Read 2
-        else {
-            if (hammingsAtEachPos[h] == 0) { // Add 0 for 0 hamming dist.
-                coveredPosCnt_read2++;
-            } else if (hammingsAtEachPos[h] != 24) { // Add 1.5, 2, 2.5 for 1, 2, 3 hamming dist. respectively
-                hammingSum += 1.0f + (0.5f * (float) hammingsAtEachPos[h]);
-                coveredPosCnt_read2++;
-            }
-        }
-    }
-    delete[] hammingsAtEachPos;
-
-    // Score current genus
-    int coveredLength_read1 = coveredPosCnt_read1 * 3;
-    int coveredLength_read2 = coveredPosCnt_read2 * 3;
-    if (coveredLength_read1 > readLength1) coveredLength_read1 = readLength1;
-    if (coveredLength_read2 > readLength2) coveredLength_read2 = readLength2;
-    float score =
-            ((float) (coveredLength_read1 + coveredLength_read2) - hammingSum) / (float) (readLength1 + readLength2);
-    float coverage = (float) (coveredLength_read1 + coveredLength_read2) / (float) (readLength1 + readLength2);
-
-//    matchesForEachGenus.push_back(move(filteredMatches));
-    return {taxId, score, coverage, (int) hammingSum, 0};
-}
-
-TaxonScore Taxonomer::chooseSpecies(const vector<Match> &matches,
-                                     int queryLength,
-                                     vector<TaxID> &species,
-                                     unordered_map<TaxID, pair<int, int>> & speciesMatchRange) {
-    // Score each species
-    std::unordered_map<TaxID, TaxonScore> speciesScores;
-    size_t i = 0;
-    TaxID currentSpeices;
-    size_t numOfMatch = matches.size();
-    size_t speciesBegin, speciesEnd;
-    while (i < numOfMatch) {
-        currentSpeices = matches[i].speciesId;
-        speciesBegin = i;
-        while ((i < numOfMatch) && currentSpeices == matches[i].speciesId) {
-            i++;
-        }
-        speciesEnd = i;
-        speciesScores[currentSpeices] = scoreSpecies(matches, speciesBegin, speciesEnd, queryLength);
-        speciesMatchRange[currentSpeices] = {(int) speciesBegin, (int) speciesEnd};
-        speciesScores[currentSpeices].taxId = currentSpeices;
-    }
-
-    // Get the best species
-    TaxonScore bestScore;
-    for (auto & sp : speciesScores) {
-        if (sp.second.score > bestScore.score) {
-            species.clear();
-            species.push_back(sp.first);
-            bestScore = sp.second;
-        } else if (sp.second.coverage == bestScore.coverage) {
-            species.push_back(sp.first);
-        }
-    }
-    return bestScore;
-}
-
-TaxonScore Taxonomer::chooseSpecies(const vector<Match> &matches,
-                                     int read1Length,
-                                     int read2Length,
-                                     vector<TaxID> &species,
-                                     unordered_map<TaxID, pair<int, int>> & speciesMatchRange) {
-    // Score each species
-    std::unordered_map<TaxID, TaxonScore> speciesScores;
-
-    size_t i = 0;
-    TaxID currentSpeices;
-    size_t numOfMatch = matches.size();
-    size_t speciesBegin, speciesEnd;
-    while (i < numOfMatch) {
-        currentSpeices = matches[i].speciesId;
-        speciesBegin = i;
-        while ((i < numOfMatch) && currentSpeices == matches[i].speciesId) {
-            i++;
-        }
-        speciesEnd = i;
-        speciesScores[currentSpeices] = scoreSpecies(matches, speciesBegin, speciesEnd, read1Length, read2Length);
-        speciesMatchRange[currentSpeices] = {(int) speciesBegin, (int) speciesEnd};
-        speciesScores[currentSpeices].taxId = currentSpeices;
-    }
-
-    // Get the best species
-    TaxonScore bestScore;
-    for (auto & sp : speciesScores) {
-        if (sp.second.score > bestScore.score) {
-            species.clear();
-            species.push_back(sp.first);
-            bestScore = sp.second;
-        } else if (sp.second.coverage == bestScore.coverage) {
-            species.push_back(sp.first);
-        }
-    }
-    return bestScore;
-}
-
-TaxonScore Taxonomer::scoreSpecies(const vector<Match> &matches,
-                                    size_t begin,
-                                    size_t end,
-                                    int queryLength) {
-
-    // Get the largest hamming distance at each position of query
-    int aminoAcidNum = queryLength / 3;
-    auto *hammingsAtEachPos = new signed char[aminoAcidNum + 1];
-    memset(hammingsAtEachPos, -1, (aminoAcidNum + 1));
-    int currPos;
-    size_t walker = begin;
-    uint16_t currHammings;
-    while (walker < end) {
-        currPos = matches[walker].qInfo.pos / 3;
-        currHammings = matches[walker].rightEndHamming;
-        if (GET_2_BITS(currHammings) > hammingsAtEachPos[currPos + unmaskedPos[0]])
-            hammingsAtEachPos[currPos + unmaskedPos[0]] = GET_2_BITS(currHammings);
-        if (GET_2_BITS(currHammings >> 2) > hammingsAtEachPos[currPos + unmaskedPos[1]])
-            hammingsAtEachPos[currPos + unmaskedPos[1]] = GET_2_BITS(currHammings >> 2);
-        if (GET_2_BITS(currHammings >> 4) > hammingsAtEachPos[currPos + unmaskedPos[2]])
-            hammingsAtEachPos[currPos + unmaskedPos[2]] = GET_2_BITS(currHammings >> 4);
-        if (GET_2_BITS(currHammings >> 6) > hammingsAtEachPos[currPos + unmaskedPos[3]])
-            hammingsAtEachPos[currPos + unmaskedPos[3]] = GET_2_BITS(currHammings >> 6);
-        if (GET_2_BITS(currHammings >> 8) > hammingsAtEachPos[currPos + unmaskedPos[4]])
-            hammingsAtEachPos[currPos + unmaskedPos[4]] = GET_2_BITS(currHammings >> 8);
-        if (GET_2_BITS(currHammings >> 10) > hammingsAtEachPos[currPos + unmaskedPos[5]])
-            hammingsAtEachPos[currPos + unmaskedPos[5]] = GET_2_BITS(currHammings >> 10);
-        if (GET_2_BITS(currHammings >> 12) > hammingsAtEachPos[currPos + unmaskedPos[6]])
-            hammingsAtEachPos[currPos + unmaskedPos[6]] = GET_2_BITS(currHammings >> 12);
-        if (GET_2_BITS(currHammings >> 14) > hammingsAtEachPos[currPos + unmaskedPos[7]])
-            hammingsAtEachPos[currPos + unmaskedPos[7]] = GET_2_BITS(currHammings >> 14);
-        walker++;
-    }
-
-    // Sum up hamming distances and count the number of position covered by the matches.
-    float hammingSum = 0;
-    int hammingDist = 0;
-    int coveredPosCnt = 0;
-    for (int h = 0; h < aminoAcidNum; h++) {
-        if (hammingsAtEachPos[h] == 0) { // Add 0 for 0 hamming dist.
-            coveredPosCnt++;
-        } else if (hammingsAtEachPos[h] != -1) { // Add 1.5, 2, 2.5 for 1, 2, 3 hamming dist. respectively
-            hammingSum += 1.0f + (0.5f * (float) hammingsAtEachPos[h]);
-            hammingDist += hammingsAtEachPos[h];
-            coveredPosCnt++;
-        }
-    }
-    delete[] hammingsAtEachPos;
-    // Score
-    int coveredLength = coveredPosCnt * 3;
-    if (coveredLength >= queryLength) coveredLength = queryLength;
-
-    float score = ((float)coveredLength - hammingSum) / (float) queryLength;
-    float coverage = (float) coveredLength / (float) (queryLength);
-
-    return {0, score, coverage, hammingDist, 0};
-}
-
-TaxonScore Taxonomer::scoreSpecies(const vector<Match> &matches,
-                                    size_t begin,
-                                    size_t end,
-                                    int queryLength,
-                                    int queryLength2) {
-
-    // Get the largest hamming distance at each position of query
-    int aminoAcidNum_total = queryLength / 3 + queryLength2 / 3;
-    int aminoAcidNum_read1 = queryLength / 3;
-    auto *hammingsAtEachPos = new signed char[aminoAcidNum_total + 3];
-    memset(hammingsAtEachPos, -1, (aminoAcidNum_total + 3));
-
-    int currPos;
-    size_t walker = begin;
-    uint16_t currHammings;
-
-    while (walker < end) {
-        currPos = matches[walker].qInfo.pos / 3;
-        currHammings = matches[walker].rightEndHamming;
-        if (GET_2_BITS(currHammings) > hammingsAtEachPos[currPos + unmaskedPos[0]])
-            hammingsAtEachPos[currPos + unmaskedPos[0]] = GET_2_BITS(currHammings);
-        if (GET_2_BITS(currHammings >> 2) > hammingsAtEachPos[currPos + unmaskedPos[1]])
-            hammingsAtEachPos[currPos + unmaskedPos[1]] = GET_2_BITS(currHammings >> 2);
-        if (GET_2_BITS(currHammings >> 4) > hammingsAtEachPos[currPos + unmaskedPos[2]])
-            hammingsAtEachPos[currPos + unmaskedPos[2]] = GET_2_BITS(currHammings >> 4);
-        if (GET_2_BITS(currHammings >> 6) > hammingsAtEachPos[currPos + unmaskedPos[3]])
-            hammingsAtEachPos[currPos + unmaskedPos[3]] = GET_2_BITS(currHammings >> 6);
-        if (GET_2_BITS(currHammings >> 8) > hammingsAtEachPos[currPos + unmaskedPos[4]])
-            hammingsAtEachPos[currPos + unmaskedPos[4]] = GET_2_BITS(currHammings >> 8);
-        if (GET_2_BITS(currHammings >> 10) > hammingsAtEachPos[currPos + unmaskedPos[5]])
-            hammingsAtEachPos[currPos + unmaskedPos[5]] = GET_2_BITS(currHammings >> 10);
-        if (GET_2_BITS(currHammings >> 12) > hammingsAtEachPos[currPos + unmaskedPos[6]])
-            hammingsAtEachPos[currPos + unmaskedPos[6]] = GET_2_BITS(currHammings >> 12);
-        if (GET_2_BITS(currHammings >> 14) > hammingsAtEachPos[currPos + unmaskedPos[7]])
-            hammingsAtEachPos[currPos + unmaskedPos[7]] = GET_2_BITS(currHammings >> 14);
-        walker++;
-    }
-
-    // Sum up hamming distances and count the number of position covered by the matches.
-    float hammingSum = 0;
-    int hammingDist = 0;
-    int coveredPosCnt_read1 = 0;
-    int coveredPosCnt_read2 = 0;
-    for (int h = 0; h < aminoAcidNum_total; h++) {
-        // Read 1
-        if (h < aminoAcidNum_read1) {
-            if (hammingsAtEachPos[h] == 0) { // Add 0 for 0 hamming dist.
-                coveredPosCnt_read1++;
-            } else if (hammingsAtEachPos[h] != -1) { // Add 1.5, 2, 2.5 for 1, 2, 3 hamming dist. respectively
-                hammingSum += 1.0f + (0.5f * (float) hammingsAtEachPos[h]);
-                hammingDist += hammingsAtEachPos[h];
-                coveredPosCnt_read1++;
-            }
-        }
-            // Read 2
-        else {
-            if (hammingsAtEachPos[h] == 0) { // Add 0 for 0 hamming dist.
-                coveredPosCnt_read2++;
-            } else if (hammingsAtEachPos[h] != -1) { // Add 1.5, 2, 2.5 for 1, 2, 3 hamming dist. respectively
-                hammingSum += 1.0f + (0.5f * (float) hammingsAtEachPos[h]);
-                hammingDist += hammingsAtEachPos[h];
-                coveredPosCnt_read2++;
-            }
-        }
-    }
-    delete[] hammingsAtEachPos;
-
-    // Score
-    int coveredLength_read1 = coveredPosCnt_read1 * 3;
-    int coveredLength_read2 = coveredPosCnt_read2 * 3;
-    if (coveredLength_read1 >= queryLength) coveredLength_read1 = queryLength;
-    if (coveredLength_read2 >= queryLength2) coveredLength_read2 = queryLength2;
-
-    float score = ((float) (coveredLength_read1 + coveredLength_read2) - hammingSum) / (float) (queryLength + queryLength2);
-    float coverage = (float) (coveredLength_read1 + coveredLength_read2) / (float) (queryLength + queryLength2);
-
-    return {0, score, coverage, hammingDist, 0};
-}
-
 bool Taxonomer::isConsecutive(const Match * match1, const Match * match2) {
     // match1 87654321 -> 08765432
     // match2 98765432 -> 08765432
@@ -1044,8 +599,6 @@ bool Taxonomer::isConsecutive(const Match * match1, const Match * match2) {
 bool Taxonomer::isConsecutive_diffFrame(const Match * match1, const Match * match2) {
     // int hamming1 = match1->hamming - GET_2_BITS(match1->rightEndHamming);
     // int hamming2 = match2->hamming - GET_2_BITS(match2->rightEndHamming >> 14);
-    // cout << match1->rightEndHamming << " " << match2->rightEndHamming << endl;
-    // cout << hamming1 << " " << hamming2 << endl;
     // match1 87654321 -> 08765432
     // match2 98765432 -> 08765432
     return (match1->hamming - GET_2_BITS(match1->rightEndHamming)) == (match2->hamming - GET_2_BITS(match2->rightEndHamming >> 14));
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index fbe3d08f..36452f01 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -73,6 +73,16 @@ class Taxonomer {
 
     // Internal
     int denominator;
+    vector<Match> speciesMatches;
+
+    // getBestSpeciesMatches
+    vector<const Match *> curFrameMatches;
+    vector<MatchPath> matchPaths;
+    vector<TaxID> maxSpecies;
+
+    // remainConsecutiveMatches
+    vector<const Match *> curPosMatches;
+    vector<const Match *> nextPosMatches;
 
 
 
@@ -86,6 +96,7 @@ class Taxonomer {
     Taxonomer(const LocalParameters & par, NcbiTaxonomy * taxonomy);
     ~Taxonomer();
 
+
     void assignTaxonomy(const Match *matchList,
                         size_t numOfMatches,
                         std::vector<Query> & queryList,

From a50671c61fe45e59c88b9304c3ed1a99cb8fee17 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Thu, 22 Aug 2024 15:03:00 +0900
Subject: [PATCH 04/21] unordered maps and sets generated in functions became
 member variables to reuse

---
 src/commons/Taxonomer.cpp | 40 +++++++++++++++++++++++----------------
 src/commons/Taxonomer.h   | 13 ++++++++++---
 2 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index e2611ae6..70f8bf05 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -38,12 +38,20 @@ Taxonomer::Taxonomer(const LocalParameters &par, NcbiTaxonomy *taxonomy) : taxon
     }
 
     // Reserve memoory for internal containers
-    speciesMatches.reserve(1000);
-    curFrameMatches.reserve(1000);
-    matchPaths.reserve(1000);
-    maxSpecies.reserve(1000);
-    curPosMatches.reserve(1000);
-    nextPosMatches.reserve(1000);
+    speciesMatches.reserve(4096);
+    curFrameMatches.reserve(4096);
+    matchPaths.reserve(4096);
+    maxSpecies.reserve(4096);
+    curPosMatches.reserve(4096);
+    nextPosMatches.reserve(4096);
+    
+    species2score.reserve(4096);
+    species2matchPaths.reserve(4096);
+    speciesMatchRange.reserve(4096);
+    taxCnt.reserve(4096);
+    cladeCnt.reserve(4096);
+    used.reserve(4096);
+    idx2depthScore.reserve(4096);    
 }
 
 Taxonomer::~Taxonomer() {
@@ -135,7 +143,7 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery,
     }
 
     // Filter redundant matches
-    unordered_map<TaxID, unsigned int> taxCnt;
+    taxCnt.clear();
     filterRedundantMatches(speciesMatches, taxCnt);
     for (auto & tax : taxCnt) {
       queryList[currentQuery].taxCnt[tax.first] = tax.second;    
@@ -195,7 +203,7 @@ void Taxonomer::filterRedundantMatches(vector<Match> & speciesMatches,
 
 TaxID Taxonomer::lowerRankClassification(const unordered_map<TaxID, unsigned int> & taxCnt, TaxID spTaxId, int queryLength) {
     unsigned int maxCnt = (queryLength - 1)/denominator + 1;
-    unordered_map<TaxID, TaxonCounts> cladeCnt;
+    cladeCnt.clear();
     getSpeciesCladeCounts(taxCnt, cladeCnt, spTaxId);
     if (accessionLevel == 2) { // Don't do accession-level classification
         // Remove leaf nodes
@@ -262,13 +270,13 @@ TaxonScore Taxonomer::getBestSpeciesMatches(vector<Match> & speciesMatches,
                                             size_t end,
                                             size_t offset,
                                             int queryLength) {
-    TaxonScore bestScore;
     matchPaths.clear();
-    unordered_map<TaxID, float> species2score;
-    unordered_map<TaxID, vector<MatchPath>> species2matchPaths;
+    species2score.clear();
+    species2matchPaths.clear();
+    speciesMatchRange.clear();
+    
+    TaxonScore bestScore;
     float bestSpScore = 0;
-    unordered_map<TaxID, pair<size_t, size_t>> speciesMatchRange;
-
     size_t i = offset;
     while (i  < end + 1) {
         TaxID currentSpecies = matchList[i].speciesId;
@@ -501,9 +509,9 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
     if (taxonomy->IsAncestor(eukaryotaTaxId, speciesId)) {
         MIN_DEPTH = minConsCntEuk - 1;
     }
-    unordered_set<const Match *> used;
-    unordered_map<const Match *, depthScore> idx2depthScore;
-    
+
+    used.clear();
+    idx2depthScore.clear();
     for (const auto& entry : linkedMatches) {
         if (!used.count(entry.first)) {
             used.insert(entry.first);
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index 36452f01..84145d8e 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -75,18 +75,25 @@ class Taxonomer {
     int denominator;
     vector<Match> speciesMatches;
 
+    // chooseBestTaxon
+    unordered_map<TaxID, unsigned int> taxCnt;
+
     // getBestSpeciesMatches
     vector<const Match *> curFrameMatches;
     vector<MatchPath> matchPaths;
     vector<TaxID> maxSpecies;
+    unordered_map<TaxID, float> species2score;
+    unordered_map<TaxID, vector<MatchPath>> species2matchPaths;
+    unordered_map<TaxID, pair<size_t, size_t>> speciesMatchRange;
 
     // remainConsecutiveMatches
     vector<const Match *> curPosMatches;
     vector<const Match *> nextPosMatches;
+    unordered_set<const Match *> used;
+    unordered_map<const Match *, depthScore> idx2depthScore;
 
-
-
-
+    // lowerRankClassification
+    unordered_map<TaxID, TaxonCounts> cladeCnt;
 
     // Output
    unordered_map<TaxID, unsigned int> taxCounts;

From a1145c6495419d2dd86e4440381db3893c0bbcbb Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Thu, 22 Aug 2024 15:22:04 +0900
Subject: [PATCH 05/21] skip lowerRankClassification to measure time

---
 src/commons/Taxonomer.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 70f8bf05..db641d23 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -161,9 +161,11 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery,
     }
 
     // Lower rank classification
-    TaxID result = lowerRankClassification(taxCnt,
-                                         speciesScore.taxId,
-                                          queryList[currentQuery].queryLength + queryList[currentQuery].queryLength2);
+    // TaxID result = lowerRankClassification(taxCnt,
+    //                                      speciesScore.taxId,
+    //                                       queryList[currentQuery].queryLength + queryList[currentQuery].queryLength2);
+    
+    TaxID result = speciesScore.taxId;
 
     // Store classification results
     queryList[currentQuery].isClassified = true;

From 4bdd089ab40b95b1507028cbf56351dbeecf7c89 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Thu, 22 Aug 2024 15:40:01 +0900
Subject: [PATCH 06/21] measure time spent for spliting matches

---
 src/commons/Taxonomer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index db641d23..07e16524 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -79,7 +79,7 @@ void Taxonomer::assignTaxonomy(const Match *matchList,
         matchBlocks[blockIdx].end = matchIdx - 1;
         blockIdx++;
     }
-
+    cout << "Time spent for spliting matches: " << double(time(nullptr) - beforeAnalyze) << endl;
     // Process each block
 #pragma omp parallel default(none), shared(cout, matchBlocks, matchList, seqNum, queryList, blockIdx, par)
     {

From 8df0188e9d68ea5c71992e8c78c610e09b13eb6f Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Thu, 22 Aug 2024 16:17:46 +0900
Subject: [PATCH 07/21] measure time spent for spliting matches

---
 src/commons/Classifier.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/commons/Classifier.cpp b/src/commons/Classifier.cpp
index 6531fcfe..eecf1837 100644
--- a/src/commons/Classifier.cpp
+++ b/src/commons/Classifier.cpp
@@ -175,7 +175,7 @@ void Classifier::assignTaxonomy(const Match *matchList,
         matchBlocks[blockIdx].end = matchIdx - 1;
         blockIdx++;
     }
-
+    cout << "Time spent for spliting matches: " << double(time(nullptr) - beforeAnalyze) << endl;
     // Process each block
 #pragma omp parallel default(none), shared(cout, matchBlocks, matchList, seqNum, queryList, blockIdx, par)
     {

From 6cc6ea58f6ea8af795aae50ce0a5a8d5174e8ef1 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Thu, 22 Aug 2024 16:43:58 +0900
Subject: [PATCH 08/21] Skip filterRedundantMatches to measure time

---
 src/commons/Taxonomer.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 07e16524..d151b98d 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -143,11 +143,11 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery,
     }
 
     // Filter redundant matches
-    taxCnt.clear();
-    filterRedundantMatches(speciesMatches, taxCnt);
-    for (auto & tax : taxCnt) {
-      queryList[currentQuery].taxCnt[tax.first] = tax.second;    
-    }
+    // taxCnt.clear();
+    // filterRedundantMatches(speciesMatches, taxCnt);
+    // for (auto & tax : taxCnt) {
+    //   queryList[currentQuery].taxCnt[tax.first] = tax.second;    
+    // }
     
     // If score is not enough, classify to the parent of the selected species
     if (speciesScore.score < par.minSpScore) {
@@ -162,8 +162,8 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery,
 
     // Lower rank classification
     // TaxID result = lowerRankClassification(taxCnt,
-    //                                      speciesScore.taxId,
-    //                                       queryList[currentQuery].queryLength + queryList[currentQuery].queryLength2);
+    //                                        speciesScore.taxId,
+    //                                        queryList[currentQuery].queryLength + queryList[currentQuery].queryLength2);
     
     TaxID result = speciesScore.taxId;
 

From d96e7fbf41c81f5d6833e90b77dd8b325d2b3346 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Fri, 23 Aug 2024 13:37:16 +0900
Subject: [PATCH 09/21] Use std::move() instead of
 vectorA=vectorB;vectorB.clear()

---
 src/commons/Classifier.cpp | 1 +
 src/commons/Taxonomer.cpp  | 6 ++----
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/commons/Classifier.cpp b/src/commons/Classifier.cpp
index eecf1837..1c509777 100644
--- a/src/commons/Classifier.cpp
+++ b/src/commons/Classifier.cpp
@@ -194,6 +194,7 @@ void Classifier::assignTaxonomy(const Match *matchList,
     for (size_t i = 0; i < seqNum; i++) {
         ++taxCounts[queryList[i].classification];
     }
+    
     delete[] matchBlocks;
     cout << "Time spent for analyzing: " << double(time(nullptr) - beforeAnalyze) << endl;
 
diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index d151b98d..13abb7c9 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -471,8 +471,7 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
                 }
             }
             // Update curPosMatches and nextPosMatches
-            curPosMatches = nextPosMatches;
-            nextPosMatches.clear();
+            curPosMatches = std::move(nextPosMatches);
             currPos = nextPos;
         }
     } else {
@@ -499,8 +498,7 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
 
             }
             // Update curPosMatches and nextPosMatches
-            curPosMatches = nextPosMatches;
-            nextPosMatches.clear();
+            curPosMatches = std::move(nextPosMatches);
             currPos = nextPos;
         }
     }

From 43f1fcb5747515cfc405fa14630120a05a158c11 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Mon, 26 Aug 2024 18:18:47 +0900
Subject: [PATCH 10/21] Remove std::map<Match, vector<Match>>, and Use two
 parallel vectors for key and values and a vector for starting value idx for
 each key.

---
 src/commons/Taxonomer.cpp | 127 ++++++++++++++++++++++++--------------
 src/commons/Taxonomer.h   |  19 ++++--
 2 files changed, 95 insertions(+), 51 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 13abb7c9..28d7bbb0 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -44,6 +44,9 @@ Taxonomer::Taxonomer(const LocalParameters &par, NcbiTaxonomy *taxonomy) : taxon
     maxSpecies.reserve(4096);
     curPosMatches.reserve(4096);
     nextPosMatches.reserve(4096);
+    linkedMatchKeys.reserve(4096);
+    linkedMatchValues.reserve(4096);
+    linkedMatchValuesIdx.reserve(4096);
     
     species2score.reserve(4096);
     species2matchPaths.reserve(4096);
@@ -143,11 +146,11 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery,
     }
 
     // Filter redundant matches
-    // taxCnt.clear();
-    // filterRedundantMatches(speciesMatches, taxCnt);
-    // for (auto & tax : taxCnt) {
-    //   queryList[currentQuery].taxCnt[tax.first] = tax.second;    
-    // }
+    taxCnt.clear();
+    filterRedundantMatches(speciesMatches, taxCnt);
+    for (auto & tax : taxCnt) {
+      queryList[currentQuery].taxCnt[tax.first] = tax.second;    
+    }
     
     // If score is not enough, classify to the parent of the selected species
     if (speciesScore.score < par.minSpScore) {
@@ -161,11 +164,11 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery,
     }
 
     // Lower rank classification
-    // TaxID result = lowerRankClassification(taxCnt,
-    //                                        speciesScore.taxId,
-    //                                        queryList[currentQuery].queryLength + queryList[currentQuery].queryLength2);
+    TaxID result = lowerRankClassification(taxCnt,
+                                           speciesScore.taxId,
+                                           queryList[currentQuery].queryLength + queryList[currentQuery].queryLength2);
     
-    TaxID result = speciesScore.taxId;
+    // TaxID result = speciesScore.taxId;
 
     // Store classification results
     queryList[currentQuery].isClassified = true;
@@ -444,7 +447,9 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
     size_t end = curFrameMatches.size();
     curPosMatches.clear();
     nextPosMatches.clear();
-    map<const Match *, vector<const Match *>> linkedMatches; 
+    linkedMatchKeys.clear();
+    linkedMatchValues.clear();
+    linkedMatchValuesIdx.clear();
 
     size_t currPos = curFrameMatches[0]->qInfo.pos;
     uint64_t frame = curFrameMatches[0]->qInfo.frame;
@@ -463,11 +468,19 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
             if (currPos + 3 == nextPos) {
                 // Compare curPosMatches and nextPosMatches
                 for (auto &curPosMatch: curPosMatches) {
+                    size_t startIdx = linkedMatchValues.size();
+                    bool found = false;
                     for (auto &nextPosMatch: nextPosMatches) {
                         if (isConsecutive(curPosMatch, nextPosMatch)){
-                            linkedMatches[curPosMatch].push_back(nextPosMatch);
+                            linkedMatchValues.push_back(nextPosMatch);
+                            found = true;
+                            // linkedMatches[curPosMatch].push_back(nextPosMatch);
                         }
                     }
+                    if (found) {
+                        linkedMatchKeys.push_back(curPosMatch);
+                        linkedMatchValuesIdx.push_back(startIdx);
+                    }
                 }
             }
             // Update curPosMatches and nextPosMatches
@@ -489,11 +502,19 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
             if (currPos + 3 == nextPos) {
                 // Compare curPosMatches and nextPosMatches
                 for (auto &curPosMatch: curPosMatches) {
+                    size_t startIdx = linkedMatchValues.size();
+                    bool found = false;
                     for (auto &nextPosMatch: nextPosMatches) {
                         if (isConsecutive(nextPosMatch, curPosMatch)){
-                            linkedMatches[curPosMatch].push_back(nextPosMatch);
+                            linkedMatchValues.push_back(nextPosMatch);
+                            found = true;
+                            // linkedMatches[curPosMatch].push_back(nextPosMatch);
                         }
                     }
+                    if (found) {
+                        linkedMatchKeys.push_back(curPosMatch);
+                        linkedMatchValuesIdx.push_back(startIdx);
+                    }
                 }
 
             }
@@ -512,85 +533,101 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
 
     used.clear();
     idx2depthScore.clear();
-    for (const auto& entry : linkedMatches) {
-        if (!used.count(entry.first)) {
-            used.insert(entry.first);
+
+    for (size_t k = 0; k < linkedMatchKeys.size(); k++) {
+        const Match * key = linkedMatchKeys[k];
+        size_t startIdx = linkedMatchValuesIdx[k];
+        size_t endIdx = (k + 1 < linkedMatchKeys.size()) ? linkedMatchValuesIdx[k + 1] : linkedMatchValues.size();
+
+        if (!used.count(key)) {
+            used.insert(key);
             depthScore bestPath{};
-            for (size_t j = 0; j < entry.second.size(); j++) {
-                used.insert(entry.second[j]);
+            for (size_t j = startIdx; j < endIdx; j++) {
+                used.insert(linkedMatchValues[j]);
                 depthScore curPath = DFS(curFrameMatches,
-                                         entry.second[j],
-                                         linkedMatches,
-                                        1,
+                                         linkedMatchValues[j],
+                                         linkedMatchKeys,
+                                         linkedMatchValues,
+                                         linkedMatchValuesIdx,
+                                         1,
                                          MIN_DEPTH, 
                                          used, 
                                          idx2depthScore,
-                                         entry.first->getScore(),
-                                         entry.first->hamming);
+                                         key->getScore(),
+                                         key->hamming);
                 if (curPath.score > bestPath.score && curPath.depth > MIN_DEPTH) {
                     bestPath = curPath;
                 }
             }
             // Store the best path
             if (bestPath.depth > MIN_DEPTH) {
-                matchPaths.emplace_back(entry.first->qInfo.pos, // start coordinate on query
-                                        entry.first->qInfo.pos + bestPath.depth * 3 + 20, // end coordinate on query
+                matchPaths.emplace_back(key->qInfo.pos, // start coordinate on query
+                                        key->qInfo.pos + bestPath.depth * 3 + 20, // end coordinate on query
                                         bestPath.score, bestPath.hammingDist,
-                                        entry.first,
+                                        key,
                                         bestPath.endMatch);
             }
         }
     }
 }
 
-depthScore Taxonomer::DFS(const vector<const Match *> &matches,
-                          const Match * curMatch,
-                          const map<const Match *, vector<const Match *>> &linkedMatches,
-                          size_t depth, size_t MIN_DEPTH,
-                          unordered_set<const Match *> &used,
-                          unordered_map<const Match *, depthScore> & match2depthScore,
-                          float score, int hammingDist) {
+depthScore Taxonomer::DFS(
+    const vector<const Match *> &matches,
+    const Match *curMatch,
+    const vector<const Match *> &linkedMatchesKeys,
+    const vector<const Match *> &linkedMatchesValues,
+    const vector<size_t> &linkedMatchesIndices,
+    size_t depth, size_t MIN_DEPTH,
+    unordered_set<const Match *> &used,
+    unordered_map<const Match *, depthScore> &match2depthScore,
+    float score, int hammingDist) 
+{
     depth++;
-    depthScore bestDepthScore = depthScore{};
+    depthScore bestDepthScore{};
     depthScore returnDepthScore;
     depthScore curDepthScore;
-    float recievedScore = score;
-    if (linkedMatches.find(curMatch) == linkedMatches.end()) { // reached a leaf node
+    float receivedScore = score;
+
+    auto it = find(linkedMatchesKeys.begin(), linkedMatchesKeys.end(), curMatch);
+    if (it == linkedMatchesKeys.end()) { // Reached a leaf node
         uint8_t lastEndHamming = (curMatch->rightEndHamming >> 14);
         if (lastEndHamming == 0) {
             score += 3.0f;
         } else {
             score += 2.0f - 0.5f * lastEndHamming;
         }
-        match2depthScore[curMatch] = depthScore(1, score - recievedScore, lastEndHamming, curMatch);
+        match2depthScore[curMatch] = depthScore(1, score - receivedScore, lastEndHamming, curMatch);
         return depthScore(depth, score, hammingDist + lastEndHamming, curMatch);
-    } else { // not a leaf node
+    } else { // Not a leaf node
+        size_t index = it - linkedMatchesKeys.begin();
+        size_t startIdx = linkedMatchesIndices[index];
+        size_t endIdx = (index + 1 < linkedMatchesIndices.size()) ? linkedMatchesIndices[index + 1] : linkedMatchesValues.size();
+
         uint8_t lastEndHamming = (curMatch->rightEndHamming >> 14);
         if (lastEndHamming == 0) {
             score += 3.0f;
         } else {
             score += 2.0f - 0.5f * lastEndHamming;
         }
-        for (const Match * nextMatch: linkedMatches.at(curMatch)) {
+        for (size_t i = startIdx; i < endIdx; ++i) {
+            const Match *nextMatch = linkedMatchesValues[i];
             used.insert(nextMatch);
-            // Reuse the depth score of nextMatchIdx if it has been calculated
-            if (match2depthScore.find(nextMatch) != match2depthScore.end()){
+            if (match2depthScore.find(nextMatch) != match2depthScore.end()) {
                 returnDepthScore = match2depthScore[nextMatch];
                 curDepthScore = depthScore(returnDepthScore.depth + depth,
                                            returnDepthScore.score + score,
                                            returnDepthScore.hammingDist + hammingDist + lastEndHamming,
                                            returnDepthScore.endMatch);
             } else {
-                curDepthScore = DFS(matches, nextMatch, linkedMatches, depth, MIN_DEPTH, used, match2depthScore, score, hammingDist + lastEndHamming);
+                curDepthScore = DFS(matches, nextMatch, linkedMatchesKeys, linkedMatchesValues, linkedMatchesIndices, depth, MIN_DEPTH, used, match2depthScore, score, hammingDist + lastEndHamming);
             }
-            if (curDepthScore.score > bestDepthScore.score
-                && curDepthScore.depth > MIN_DEPTH) {
+            if (curDepthScore.score > bestDepthScore.score && curDepthScore.depth > MIN_DEPTH) {
                 bestDepthScore = curDepthScore;
             }
-        }    
+        }
         if (bestDepthScore.depth > MIN_DEPTH) {
             match2depthScore[curMatch] = depthScore(bestDepthScore.depth - depth + 1,
-                                                     bestDepthScore.score - recievedScore,
+                                                     bestDepthScore.score - receivedScore,
                                                      bestDepthScore.hammingDist - hammingDist,
                                                      bestDepthScore.endMatch);
         }
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index 84145d8e..24995f41 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -89,6 +89,9 @@ class Taxonomer {
     // remainConsecutiveMatches
     vector<const Match *> curPosMatches;
     vector<const Match *> nextPosMatches;
+    vector<const Match *> linkedMatchKeys;
+    vector<const Match *> linkedMatchValues;
+    vector<size_t> linkedMatchValuesIdx;
     unordered_set<const Match *> used;
     unordered_map<const Match *, depthScore> idx2depthScore;
 
@@ -98,6 +101,16 @@ class Taxonomer {
     // Output
    unordered_map<TaxID, unsigned int> taxCounts;
 
+   depthScore DFS(const vector<const Match *> &matches,
+                                         const Match *curMatch,
+                                         const vector<const Match *> &linkedMatchesKeys,
+                                         const vector<const Match *> &linkedMatchesValues,
+                                         const vector<size_t> &linkedMatchesIndices,
+                                         size_t depth, size_t MIN_DEPTH,
+                                         unordered_set<const Match *> &used,
+                                         unordered_map<const Match *, depthScore> &match2depthScore,
+                                         float score, int hammingDist);
+
 
 public:
     Taxonomer(const LocalParameters & par, NcbiTaxonomy * taxonomy);
@@ -135,12 +148,6 @@ class Taxonomer {
     void filterRedundantMatches(vector<Match> & speciesMatches,
                                 unordered_map<TaxID, unsigned int> & taxCnt);
 
-    depthScore DFS(const vector<const Match *> &matches, const Match * curMatchIdx,
-                   const map<const Match *, vector<const Match *>> &linkedMatches,
-                   size_t depth, size_t MIN_DEPTH, unordered_set<const Match *> &used,
-                   unordered_map<const Match *, depthScore> &match2depthScore,
-                   float score, int hammingDist);
-
     static bool isConsecutive(const Match * match1, const Match * match2);
 
     static bool isConsecutive_diffFrame(const Match * match1, const Match * match2);

From c750443b29f579853946f0bfbc6f2ed7eb005279 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Tue, 27 Aug 2024 10:43:14 +0900
Subject: [PATCH 11/21] use unordered_set to check existing keys

---
 src/commons/Classifier.cpp | 1 -
 src/commons/Taxonomer.cpp  | 9 ++++++---
 src/commons/Taxonomer.h    | 2 ++
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/commons/Classifier.cpp b/src/commons/Classifier.cpp
index 1c509777..4dc01285 100644
--- a/src/commons/Classifier.cpp
+++ b/src/commons/Classifier.cpp
@@ -175,7 +175,6 @@ void Classifier::assignTaxonomy(const Match *matchList,
         matchBlocks[blockIdx].end = matchIdx - 1;
         blockIdx++;
     }
-    cout << "Time spent for spliting matches: " << double(time(nullptr) - beforeAnalyze) << endl;
     // Process each block
 #pragma omp parallel default(none), shared(cout, matchBlocks, matchList, seqNum, queryList, blockIdx, par)
     {
diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 28d7bbb0..4bad2d06 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -45,6 +45,7 @@ Taxonomer::Taxonomer(const LocalParameters &par, NcbiTaxonomy *taxonomy) : taxon
     curPosMatches.reserve(4096);
     nextPosMatches.reserve(4096);
     linkedMatchKeys.reserve(4096);
+    linkedMatchKeySet.reserve(4096);
     linkedMatchValues.reserve(4096);
     linkedMatchValuesIdx.reserve(4096);
     
@@ -82,7 +83,6 @@ void Taxonomer::assignTaxonomy(const Match *matchList,
         matchBlocks[blockIdx].end = matchIdx - 1;
         blockIdx++;
     }
-    cout << "Time spent for spliting matches: " << double(time(nullptr) - beforeAnalyze) << endl;
     // Process each block
 #pragma omp parallel default(none), shared(cout, matchBlocks, matchList, seqNum, queryList, blockIdx, par)
     {
@@ -448,6 +448,7 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
     curPosMatches.clear();
     nextPosMatches.clear();
     linkedMatchKeys.clear();
+    linkedMatchKeySet.clear();
     linkedMatchValues.clear();
     linkedMatchValuesIdx.clear();
 
@@ -479,6 +480,7 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
                     }
                     if (found) {
                         linkedMatchKeys.push_back(curPosMatch);
+                        linkedMatchKeySet.insert(curPosMatch);
                         linkedMatchValuesIdx.push_back(startIdx);
                     }
                 }
@@ -513,6 +515,7 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
                     }
                     if (found) {
                         linkedMatchKeys.push_back(curPosMatch);
+                        linkedMatchKeySet.insert(curPosMatch);
                         linkedMatchValuesIdx.push_back(startIdx);
                     }
                 }
@@ -575,6 +578,7 @@ depthScore Taxonomer::DFS(
     const vector<const Match *> &matches,
     const Match *curMatch,
     const vector<const Match *> &linkedMatchesKeys,
+    const unordered_set<const Match *> &linkedMatchKeySet,
     const vector<const Match *> &linkedMatchesValues,
     const vector<size_t> &linkedMatchesIndices,
     size_t depth, size_t MIN_DEPTH,
@@ -588,8 +592,7 @@ depthScore Taxonomer::DFS(
     depthScore curDepthScore;
     float receivedScore = score;
 
-    auto it = find(linkedMatchesKeys.begin(), linkedMatchesKeys.end(), curMatch);
-    if (it == linkedMatchesKeys.end()) { // Reached a leaf node
+    if (linkedMatchKeySet.find(curMatch) == linkedMatchKeySet.end()) { // Reached a leaf node
         uint8_t lastEndHamming = (curMatch->rightEndHamming >> 14);
         if (lastEndHamming == 0) {
             score += 3.0f;
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index 24995f41..93484a3b 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -90,6 +90,7 @@ class Taxonomer {
     vector<const Match *> curPosMatches;
     vector<const Match *> nextPosMatches;
     vector<const Match *> linkedMatchKeys;
+    unordered_set<const Match *> linkedMatchKeySet;
     vector<const Match *> linkedMatchValues;
     vector<size_t> linkedMatchValuesIdx;
     unordered_set<const Match *> used;
@@ -104,6 +105,7 @@ class Taxonomer {
    depthScore DFS(const vector<const Match *> &matches,
                                          const Match *curMatch,
                                          const vector<const Match *> &linkedMatchesKeys,
+                                         const unordered_set<const Match *> &linkedMatchKeySet,
                                          const vector<const Match *> &linkedMatchesValues,
                                          const vector<size_t> &linkedMatchesIndices,
                                          size_t depth, size_t MIN_DEPTH,

From feccf12e6b6da32a0baa02cc0c5fc99a77d6cd13 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Tue, 27 Aug 2024 10:58:47 +0900
Subject: [PATCH 12/21] use unordered_map<Key Match, Key idx>

---
 src/commons/Taxonomer.cpp | 20 ++++++++++++--------
 src/commons/Taxonomer.h   |  4 ++--
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 4bad2d06..2122023d 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -45,9 +45,9 @@ Taxonomer::Taxonomer(const LocalParameters &par, NcbiTaxonomy *taxonomy) : taxon
     curPosMatches.reserve(4096);
     nextPosMatches.reserve(4096);
     linkedMatchKeys.reserve(4096);
-    linkedMatchKeySet.reserve(4096);
     linkedMatchValues.reserve(4096);
     linkedMatchValuesIdx.reserve(4096);
+    keyIndexMap.reserve(4096);
     
     species2score.reserve(4096);
     species2matchPaths.reserve(4096);
@@ -448,9 +448,9 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
     curPosMatches.clear();
     nextPosMatches.clear();
     linkedMatchKeys.clear();
-    linkedMatchKeySet.clear();
     linkedMatchValues.clear();
     linkedMatchValuesIdx.clear();
+    keyIndexMap.clear();
 
     size_t currPos = curFrameMatches[0]->qInfo.pos;
     uint64_t frame = curFrameMatches[0]->qInfo.frame;
@@ -480,7 +480,6 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
                     }
                     if (found) {
                         linkedMatchKeys.push_back(curPosMatch);
-                        linkedMatchKeySet.insert(curPosMatch);
                         linkedMatchValuesIdx.push_back(startIdx);
                     }
                 }
@@ -515,7 +514,6 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
                     }
                     if (found) {
                         linkedMatchKeys.push_back(curPosMatch);
-                        linkedMatchKeySet.insert(curPosMatch);
                         linkedMatchValuesIdx.push_back(startIdx);
                     }
                 }
@@ -527,6 +525,10 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
         }
     }
 
+    for (size_t i = 0; i < linkedMatchKeys.size(); ++i) {
+        keyIndexMap[linkedMatchKeys[i]] = i;
+    }
+
     // Iterate linkedMatches to get filteredMatches 
     // (ignore matches not enoughly consecutive)
     size_t MIN_DEPTH = minConsCnt - 1;
@@ -550,6 +552,7 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
                 depthScore curPath = DFS(curFrameMatches,
                                          linkedMatchValues[j],
                                          linkedMatchKeys,
+                                         keyIndexMap,
                                          linkedMatchValues,
                                          linkedMatchValuesIdx,
                                          1,
@@ -578,7 +581,7 @@ depthScore Taxonomer::DFS(
     const vector<const Match *> &matches,
     const Match *curMatch,
     const vector<const Match *> &linkedMatchesKeys,
-    const unordered_set<const Match *> &linkedMatchKeySet,
+    const unordered_map<const Match *, size_t> & keyIndexMap,
     const vector<const Match *> &linkedMatchesValues,
     const vector<size_t> &linkedMatchesIndices,
     size_t depth, size_t MIN_DEPTH,
@@ -592,7 +595,8 @@ depthScore Taxonomer::DFS(
     depthScore curDepthScore;
     float receivedScore = score;
 
-    if (linkedMatchKeySet.find(curMatch) == linkedMatchKeySet.end()) { // Reached a leaf node
+    auto it = keyIndexMap.find(curMatch);
+    if (it == keyIndexMap.end()) { // Reached a leaf node
         uint8_t lastEndHamming = (curMatch->rightEndHamming >> 14);
         if (lastEndHamming == 0) {
             score += 3.0f;
@@ -602,7 +606,7 @@ depthScore Taxonomer::DFS(
         match2depthScore[curMatch] = depthScore(1, score - receivedScore, lastEndHamming, curMatch);
         return depthScore(depth, score, hammingDist + lastEndHamming, curMatch);
     } else { // Not a leaf node
-        size_t index = it - linkedMatchesKeys.begin();
+        size_t index = it->second;
         size_t startIdx = linkedMatchesIndices[index];
         size_t endIdx = (index + 1 < linkedMatchesIndices.size()) ? linkedMatchesIndices[index + 1] : linkedMatchesValues.size();
 
@@ -622,7 +626,7 @@ depthScore Taxonomer::DFS(
                                            returnDepthScore.hammingDist + hammingDist + lastEndHamming,
                                            returnDepthScore.endMatch);
             } else {
-                curDepthScore = DFS(matches, nextMatch, linkedMatchesKeys, linkedMatchesValues, linkedMatchesIndices, depth, MIN_DEPTH, used, match2depthScore, score, hammingDist + lastEndHamming);
+                curDepthScore = DFS(matches, nextMatch, linkedMatchesKeys, keyIndexMap, linkedMatchesValues, linkedMatchesIndices, depth, MIN_DEPTH, used, match2depthScore, score, hammingDist + lastEndHamming);
             }
             if (curDepthScore.score > bestDepthScore.score && curDepthScore.depth > MIN_DEPTH) {
                 bestDepthScore = curDepthScore;
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index 93484a3b..8e29e193 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -90,7 +90,7 @@ class Taxonomer {
     vector<const Match *> curPosMatches;
     vector<const Match *> nextPosMatches;
     vector<const Match *> linkedMatchKeys;
-    unordered_set<const Match *> linkedMatchKeySet;
+    unordered_map<const Match *, size_t> keyIndexMap;
     vector<const Match *> linkedMatchValues;
     vector<size_t> linkedMatchValuesIdx;
     unordered_set<const Match *> used;
@@ -105,7 +105,7 @@ class Taxonomer {
    depthScore DFS(const vector<const Match *> &matches,
                                          const Match *curMatch,
                                          const vector<const Match *> &linkedMatchesKeys,
-                                         const unordered_set<const Match *> &linkedMatchKeySet,
+                                         const unordered_map<const Match *, size_t> & keyIndexMap,
                                          const vector<const Match *> &linkedMatchesValues,
                                          const vector<size_t> &linkedMatchesIndices,
                                          size_t depth, size_t MIN_DEPTH,

From fb87be73d4afeac6fe53c9f796e539f157282f31 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Tue, 27 Aug 2024 11:55:53 +0900
Subject: [PATCH 13/21] back to 43f1fcb

---
 src/commons/Taxonomer.cpp | 17 +++++------------
 src/commons/Taxonomer.h   | 20 +++++++++-----------
 2 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 2122023d..28d7bbb0 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -47,7 +47,6 @@ Taxonomer::Taxonomer(const LocalParameters &par, NcbiTaxonomy *taxonomy) : taxon
     linkedMatchKeys.reserve(4096);
     linkedMatchValues.reserve(4096);
     linkedMatchValuesIdx.reserve(4096);
-    keyIndexMap.reserve(4096);
     
     species2score.reserve(4096);
     species2matchPaths.reserve(4096);
@@ -83,6 +82,7 @@ void Taxonomer::assignTaxonomy(const Match *matchList,
         matchBlocks[blockIdx].end = matchIdx - 1;
         blockIdx++;
     }
+    cout << "Time spent for spliting matches: " << double(time(nullptr) - beforeAnalyze) << endl;
     // Process each block
 #pragma omp parallel default(none), shared(cout, matchBlocks, matchList, seqNum, queryList, blockIdx, par)
     {
@@ -450,7 +450,6 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
     linkedMatchKeys.clear();
     linkedMatchValues.clear();
     linkedMatchValuesIdx.clear();
-    keyIndexMap.clear();
 
     size_t currPos = curFrameMatches[0]->qInfo.pos;
     uint64_t frame = curFrameMatches[0]->qInfo.frame;
@@ -525,10 +524,6 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
         }
     }
 
-    for (size_t i = 0; i < linkedMatchKeys.size(); ++i) {
-        keyIndexMap[linkedMatchKeys[i]] = i;
-    }
-
     // Iterate linkedMatches to get filteredMatches 
     // (ignore matches not enoughly consecutive)
     size_t MIN_DEPTH = minConsCnt - 1;
@@ -552,7 +547,6 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
                 depthScore curPath = DFS(curFrameMatches,
                                          linkedMatchValues[j],
                                          linkedMatchKeys,
-                                         keyIndexMap,
                                          linkedMatchValues,
                                          linkedMatchValuesIdx,
                                          1,
@@ -581,7 +575,6 @@ depthScore Taxonomer::DFS(
     const vector<const Match *> &matches,
     const Match *curMatch,
     const vector<const Match *> &linkedMatchesKeys,
-    const unordered_map<const Match *, size_t> & keyIndexMap,
     const vector<const Match *> &linkedMatchesValues,
     const vector<size_t> &linkedMatchesIndices,
     size_t depth, size_t MIN_DEPTH,
@@ -595,8 +588,8 @@ depthScore Taxonomer::DFS(
     depthScore curDepthScore;
     float receivedScore = score;
 
-    auto it = keyIndexMap.find(curMatch);
-    if (it == keyIndexMap.end()) { // Reached a leaf node
+    auto it = find(linkedMatchesKeys.begin(), linkedMatchesKeys.end(), curMatch);
+    if (it == linkedMatchesKeys.end()) { // Reached a leaf node
         uint8_t lastEndHamming = (curMatch->rightEndHamming >> 14);
         if (lastEndHamming == 0) {
             score += 3.0f;
@@ -606,7 +599,7 @@ depthScore Taxonomer::DFS(
         match2depthScore[curMatch] = depthScore(1, score - receivedScore, lastEndHamming, curMatch);
         return depthScore(depth, score, hammingDist + lastEndHamming, curMatch);
     } else { // Not a leaf node
-        size_t index = it->second;
+        size_t index = it - linkedMatchesKeys.begin();
         size_t startIdx = linkedMatchesIndices[index];
         size_t endIdx = (index + 1 < linkedMatchesIndices.size()) ? linkedMatchesIndices[index + 1] : linkedMatchesValues.size();
 
@@ -626,7 +619,7 @@ depthScore Taxonomer::DFS(
                                            returnDepthScore.hammingDist + hammingDist + lastEndHamming,
                                            returnDepthScore.endMatch);
             } else {
-                curDepthScore = DFS(matches, nextMatch, linkedMatchesKeys, keyIndexMap, linkedMatchesValues, linkedMatchesIndices, depth, MIN_DEPTH, used, match2depthScore, score, hammingDist + lastEndHamming);
+                curDepthScore = DFS(matches, nextMatch, linkedMatchesKeys, linkedMatchesValues, linkedMatchesIndices, depth, MIN_DEPTH, used, match2depthScore, score, hammingDist + lastEndHamming);
             }
             if (curDepthScore.score > bestDepthScore.score && curDepthScore.depth > MIN_DEPTH) {
                 bestDepthScore = curDepthScore;
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index 8e29e193..91082cdd 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -90,7 +90,6 @@ class Taxonomer {
     vector<const Match *> curPosMatches;
     vector<const Match *> nextPosMatches;
     vector<const Match *> linkedMatchKeys;
-    unordered_map<const Match *, size_t> keyIndexMap;
     vector<const Match *> linkedMatchValues;
     vector<size_t> linkedMatchValuesIdx;
     unordered_set<const Match *> used;
@@ -103,15 +102,14 @@ class Taxonomer {
    unordered_map<TaxID, unsigned int> taxCounts;
 
    depthScore DFS(const vector<const Match *> &matches,
-                                         const Match *curMatch,
-                                         const vector<const Match *> &linkedMatchesKeys,
-                                         const unordered_map<const Match *, size_t> & keyIndexMap,
-                                         const vector<const Match *> &linkedMatchesValues,
-                                         const vector<size_t> &linkedMatchesIndices,
-                                         size_t depth, size_t MIN_DEPTH,
-                                         unordered_set<const Match *> &used,
-                                         unordered_map<const Match *, depthScore> &match2depthScore,
-                                         float score, int hammingDist);
+                  const Match *curMatch,
+                  const vector<const Match *> &linkedMatchesKeys,
+                  const vector<const Match *> &linkedMatchesValues,
+                  const vector<size_t> &linkedMatchesIndices,
+                  size_t depth, size_t MIN_DEPTH,
+                  unordered_set<const Match *> &used,
+                  unordered_map<const Match *, depthScore> &match2depthScore,
+                  float score, int hammingDist);
 
 
 public:
@@ -222,4 +220,4 @@ class Taxonomer {
 };
 
 
-#endif //METABULI_TAXONOMER_H
+#endif //METABULI_TAXONOMER_H
\ No newline at end of file

From dd0ca5f67faa59fe0c863a5c70434cee54afc9f8 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Tue, 27 Aug 2024 14:34:19 +0900
Subject: [PATCH 14/21] Reducing use of unordered_map

---
 src/commons/Taxonomer.cpp | 120 +++++++++++++++++++++-----------------
 src/commons/Taxonomer.h   |  19 +++---
 2 files changed, 77 insertions(+), 62 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 28d7bbb0..aea9f42b 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -37,24 +37,35 @@ Taxonomer::Taxonomer(const LocalParameters &par, NcbiTaxonomy *taxonomy) : taxon
         denominator = 1000;
     }
 
-    // Reserve memoory for internal containers
+
+    // chooseBestTaxon
+    taxCnt.reserve(4096);
+
+    // getBestSpeciesMatches
     speciesMatches.reserve(4096);
     curFrameMatches.reserve(4096);
     matchPaths.reserve(4096);
+    combinedMatchPaths.reserve(4096);
     maxSpecies.reserve(4096);
+    speciesList.reserve(4096);
+    speciesPathIdx.reserve(4096);
+    speciesCombPathIdx.reserve(4096);
+    speciesScores.reserve(4096);
+
+    // remainConsecutiveMatches
     curPosMatches.reserve(4096);
     nextPosMatches.reserve(4096);
     linkedMatchKeys.reserve(4096);
     linkedMatchValues.reserve(4096);
     linkedMatchValuesIdx.reserve(4096);
-    
-    species2score.reserve(4096);
-    species2matchPaths.reserve(4096);
-    speciesMatchRange.reserve(4096);
-    taxCnt.reserve(4096);
-    cladeCnt.reserve(4096);
     used.reserve(4096);
-    idx2depthScore.reserve(4096);    
+    idx2depthScore.reserve(4096); 
+
+    // lowerRankClassification
+    cladeCnt.reserve(4096);
+
+    // Output
+    taxCounts.reserve(4096);
 }
 
 Taxonomer::~Taxonomer() {
@@ -179,16 +190,16 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery,
     queryList[currentQuery].newSpecies = false;
 }
 
-void Taxonomer::filterRedundantMatches(vector<Match> & speciesMatches,
+void Taxonomer::filterRedundantMatches(vector<const Match *> & speciesMatches,
                                         unordered_map<TaxID, unsigned int> & taxCnt) {
     // Sort matches by the coordinate on the query
     sort(speciesMatches.begin(), speciesMatches.end(),
-         [](const Match & a, const Match & b) { return a.qInfo.pos < b.qInfo.pos; });
+         [](const Match * a, const Match * b) { return a->qInfo.pos < b->qInfo.pos; });
     
     // Remove redundant matches
     size_t matchNum = speciesMatches.size();
     for (size_t i = 0; i < matchNum;) {
-        size_t currQuotient = speciesMatches[i].qInfo.pos / 3;
+        size_t currQuotient = speciesMatches[i]->qInfo.pos / 3;
         uint8_t minHamming = speciesMatches[i].hamming;
         Match minHammingMatch = speciesMatches[i];
         TaxID minHammingTaxId = minHammingMatch.targetId;
@@ -276,16 +287,21 @@ TaxonScore Taxonomer::getBestSpeciesMatches(vector<Match> & speciesMatches,
                                             size_t offset,
                                             int queryLength) {
     matchPaths.clear();
-    species2score.clear();
-    species2matchPaths.clear();
-    speciesMatchRange.clear();
+    combinedMatchPaths.clear();
+    speciesList.clear();
+    speciesPathIdx.clear();
+    speciesCombPathIdx.clear();
+    speciesScores.clear();
     
+    pair<size_t, size_t> bestSpeciesRange;
     TaxonScore bestScore;
     float bestSpScore = 0;
     size_t i = offset;
+    size_t meaningfulSpecies = 0;
     while (i  < end + 1) {
         TaxID currentSpecies = matchList[i].speciesId;
         size_t start = i;
+        size_t previousPathSize = matchPaths.size();
         // For current species
         while ((i < end + 1) && currentSpecies == matchList[i].speciesId) {
             uint8_t curFrame = matchList[i].qInfo.frame;
@@ -299,81 +315,77 @@ TaxonScore Taxonomer::getBestSpeciesMatches(vector<Match> & speciesMatches,
                 remainConsecutiveMatches(curFrameMatches, matchPaths, currentSpecies);
             }
         }
-        speciesMatchRange[currentSpecies] = make_pair(start, i);
+        size_t pathSize = matchPaths.size();
         // Combine MatchPaths
-        if (!matchPaths.empty()) {
-            float score = combineMatchPaths(matchPaths, species2matchPaths[currentSpecies], queryLength);
+        if (pathSize > previousPathSize) {
+            speciesList.push_back(currentSpecies);
+            speciesPathIdx.push_back(previousPathSize);
+            speciesCombPathIdx.push_back(combinedMatchPaths.size());
+            float score = combineMatchPaths(matchPaths, previousPathSize, combinedMatchPaths, combinedMatchPaths.size(), queryLength);
             score = min(score, 1.0f);
+            speciesScores.push_back(score);
             if (score > 0.f) {
-                species2score[currentSpecies] = score;
+                meaningfulSpecies++;
             }
             if (score > bestSpScore) {
                 bestSpScore = score;
+                bestSpeciesRange = make_pair(start, i);
             }
         }
-        matchPaths.clear();
+        // matchPaths.clear();
     }
     
     // If there are no meaningful species
-    if (species2score.empty()) {
+    if (meaningfulSpecies == 0) {
         bestScore.score = 0;
         return bestScore;
     }
 
     maxSpecies.clear();
-    for (auto & spScore : species2score) {
-        if (spScore.second >= bestSpScore * tieRatio) {
-            maxSpecies.push_back(spScore.first);
+    float coveredLength = 0.f;
+    for (size_t i = 0; i < speciesList.size(); i++) {
+        if (speciesScores[i] >= bestSpScore * tieRatio) {
+            maxSpecies.push_back(speciesList[i]);
+            bestScore.score += speciesScores[i];
+            // Calculate coverage
+            // size_t start = speciesCombPathIdx[i];
+            // size_t end = (i + 1 < speciesCombPathIdx.size()) ? speciesCombPathIdx[i + 1] : combinedMatchPaths.size();
+            // coveredLength = 0;
+            // for (size_t j = start; j < end; j++) {
+            //     coveredLength += combinedMatchPaths[j].end - combinedMatchPaths[j].start + 1;
+            // }
+            // bestScore.coverage += coveredLength / queryLength;    
         }
     }
-
-    // More than one species --> LCA
-    float coveredLength = 0.f;
+    
+    // More than one species --> LCA    
     if (maxSpecies.size() > 1) {
         bestScore.LCA = true;
         bestScore.taxId = taxonomy->LCA(maxSpecies)->taxId;
-        for (auto & sp : maxSpecies) {
-            bestScore.score += species2score[sp];
-            coveredLength = 0;
-            for (auto & matchPath : species2matchPaths[maxSpecies[0]]) {
-                coveredLength += matchPath.end - matchPath.start + 1;
-            }
-            bestScore.coverage += coveredLength / queryLength;
-        }
         bestScore.score /= maxSpecies.size();
-        bestScore.coverage /= maxSpecies.size();
+        // bestScore.coverage /= maxSpecies.size();
         return bestScore;
     }
     
-
     // One species
     bestScore.taxId = maxSpecies[0];
-    bestScore.score = species2score[maxSpecies[0]];
-    int hammingDist = 0;
-    for (auto & matchPath : species2matchPaths[maxSpecies[0]]) {
-        coveredLength += matchPath.end - matchPath.start + 1;
-        hammingDist += matchPath.hammingDist;
-    }
-    speciesMatches.reserve(speciesMatchRange[bestScore.taxId].second
-                        - speciesMatchRange[bestScore.taxId].first + 1);
+    speciesMatches.reserve(bestSpeciesRange.second - bestSpeciesRange.first + 1);
 
-    for (size_t j = speciesMatchRange[bestScore.taxId].first; j < speciesMatchRange[bestScore.taxId].second; j++) {
-        speciesMatches.push_back(matchList[j]);
+    for (size_t j = bestSpeciesRange.first; j < bestSpeciesRange.second; j++) {
+        speciesMatches.push_back(&matchList[j]);
     }
-    bestScore.coverage = coveredLength / queryLength;
-    bestScore.hammingDist = hammingDist;
     
     return bestScore;                                  
 }
 
 
 float Taxonomer::combineMatchPaths(vector<MatchPath> & matchPaths,
+                                   size_t matchPathStart,
                                    vector<MatchPath> & combinedMatchPaths,
+                                   size_t combMatchPathStart,
                                    int readLength) {
-    combinedMatchPaths.clear();
-
     // Sort matchPaths by the their score
-    sort(matchPaths.begin(), matchPaths.end(),
+    sort(matchPaths.begin() + matchPathStart, matchPaths.end(),
          [](const MatchPath &a, const MatchPath &b) {
            if (a.score != b.score) {
              return a.score > b.score;
@@ -389,13 +401,13 @@ float Taxonomer::combineMatchPaths(vector<MatchPath> & matchPaths,
     // 2. Add the matchPath with the highest score that is not overlapped with the matchPath in combinedMatchPaths
     // 3. Repeat 2 until no matchPath can be added
     float score = 0;
-    for (size_t i = 0; i < matchPaths.size(); i++) {  
-        if (combinedMatchPaths.empty()) {
+    for (size_t i = matchPathStart; i < matchPaths.size(); i++) {  
+        if (combMatchPathStart == combinedMatchPaths.size()) {
             combinedMatchPaths.push_back(matchPaths[i]);
             score += matchPaths[i].score;
         } else {
             bool isOverlapped = false;
-            for (size_t j = 0; j < combinedMatchPaths.size(); j++) {
+            for (size_t j = combMatchPathStart; j < combinedMatchPaths.size(); j++) {
                 if (isMatchPathOverlapped(matchPaths[i], combinedMatchPaths[j])) { // overlap!
                     int overlappedLength = min(matchPaths[i].end, combinedMatchPaths[j].end) 
                                             - max(matchPaths[i].start, combinedMatchPaths[j].start) + 1;
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index 91082cdd..789a6358 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -42,7 +42,6 @@ struct MatchPath {
     int hammingDist;
     const Match * startMatch;
     const Match * endMatch;
-
 };
 
 struct MatchBlock {
@@ -73,7 +72,7 @@ class Taxonomer {
 
     // Internal
     int denominator;
-    vector<Match> speciesMatches;
+    vector<const Match *> speciesMatches;
 
     // chooseBestTaxon
     unordered_map<TaxID, unsigned int> taxCnt;
@@ -81,10 +80,12 @@ class Taxonomer {
     // getBestSpeciesMatches
     vector<const Match *> curFrameMatches;
     vector<MatchPath> matchPaths;
+    vector<MatchPath> combinedMatchPaths;
     vector<TaxID> maxSpecies;
-    unordered_map<TaxID, float> species2score;
-    unordered_map<TaxID, vector<MatchPath>> species2matchPaths;
-    unordered_map<TaxID, pair<size_t, size_t>> speciesMatchRange;
+    vector<TaxID> speciesList;
+    vector<size_t> speciesPathIdx;
+    vector<size_t> speciesCombPathIdx;
+    vector<float> speciesScores;
 
     // remainConsecutiveMatches
     vector<const Match *> curPosMatches;
@@ -134,8 +135,10 @@ class Taxonomer {
                                   TaxID speciesID);
     
     float combineMatchPaths(vector<MatchPath> & matchPaths,
-                           vector<MatchPath> & combinedMatchPaths,
-                           int readLength);
+                            size_t matchPathStart,
+                            vector<MatchPath> & combMatchPaths,
+                            size_t combMatchPathStart,
+                            int readLength);
 
     bool isMatchPathOverlapped(const MatchPath & matchPath1, const MatchPath & matchPath2);
 
@@ -152,7 +155,7 @@ class Taxonomer {
 
     static bool isConsecutive_diffFrame(const Match * match1, const Match * match2);
 
-    TaxonScore getBestSpeciesMatches(vector<Match> &speciesMatches, const Match *matchList, size_t end,
+    TaxonScore getBestSpeciesMatches(vector<const Match *> &speciesMatches, const Match *matchList, size_t end,
                                      size_t offset, int queryLength);
     
     // TaxonScore getBestSpeciesMatches(vector<Match> &speciesMatches, const Match *matchList, size_t end,

From 9aa02a25c6d53c833727ecebe64d4bfb149c9e28 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Tue, 27 Aug 2024 14:38:43 +0900
Subject: [PATCH 15/21] from vector<Match> speciesMathces to vector<const Match
 *> speciesMatches

---
 src/commons/Taxonomer.cpp | 18 +++++++++---------
 src/commons/Taxonomer.h   |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index aea9f42b..316c6f77 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -200,16 +200,16 @@ void Taxonomer::filterRedundantMatches(vector<const Match *> & speciesMatches,
     size_t matchNum = speciesMatches.size();
     for (size_t i = 0; i < matchNum;) {
         size_t currQuotient = speciesMatches[i]->qInfo.pos / 3;
-        uint8_t minHamming = speciesMatches[i].hamming;
-        Match minHammingMatch = speciesMatches[i];
+        uint8_t minHamming = speciesMatches[i]->hamming;
+        Match minHammingMatch = *speciesMatches[i];
         TaxID minHammingTaxId = minHammingMatch.targetId;
-        while ((i < matchNum) && (currQuotient == speciesMatches[i].qInfo.pos / 3)) {
-            if (speciesMatches[i].hamming < minHamming) {
-                minHamming = speciesMatches[i].hamming;
-                minHammingMatch = speciesMatches[i];
+        while ((i < matchNum) && (currQuotient == speciesMatches[i]->qInfo.pos / 3)) {
+            if (speciesMatches[i]->hamming < minHamming) {
+                minHamming = speciesMatches[i]->hamming;
+                minHammingMatch = *speciesMatches[i];
                 minHammingTaxId = minHammingMatch.targetId;
-            } else if (speciesMatches[i].hamming == minHamming) {
-                minHammingTaxId = taxonomy->LCA(minHammingTaxId, speciesMatches[i].targetId);
+            } else if (speciesMatches[i]->hamming == minHamming) {
+                minHammingTaxId = taxonomy->LCA(minHammingTaxId, speciesMatches[i]->targetId);
             }
             i++;
         }
@@ -281,7 +281,7 @@ TaxID Taxonomer::BFS(const unordered_map<TaxID, TaxonCounts> & cladeCnt, TaxID r
     }
 }
 
-TaxonScore Taxonomer::getBestSpeciesMatches(vector<Match> & speciesMatches,
+TaxonScore Taxonomer::getBestSpeciesMatches(vector<const Match * > & speciesMatches,
                                             const Match *matchList,
                                             size_t end,
                                             size_t offset,
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index 789a6358..d47ff861 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -148,7 +148,7 @@ class Taxonomer {
 
     void trimMatchPath(MatchPath & path1, const MatchPath & path2, int overlapLength);
 
-    void filterRedundantMatches(vector<Match> & speciesMatches,
+    void filterRedundantMatches(vector<const Match *> & speciesMatches,
                                 unordered_map<TaxID, unsigned int> & taxCnt);
 
     static bool isConsecutive(const Match * match1, const Match * match2);

From 7aebb7ad0a69e18d0593c1d74c095fe7262c7d54 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Tue, 27 Aug 2024 19:08:09 +0900
Subject: [PATCH 16/21] Removed std::unordered_set<const Match *> visited.

---
 src/commons/Taxonomer.cpp | 23 ++++++++---------------
 src/commons/Taxonomer.h   | 23 ++++++++++-------------
 2 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 316c6f77..378f2b93 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -58,8 +58,7 @@ Taxonomer::Taxonomer(const LocalParameters &par, NcbiTaxonomy *taxonomy) : taxon
     linkedMatchKeys.reserve(4096);
     linkedMatchValues.reserve(4096);
     linkedMatchValuesIdx.reserve(4096);
-    used.reserve(4096);
-    idx2depthScore.reserve(4096); 
+    match2depthScore.reserve(4096); 
 
     // lowerRankClassification
     cladeCnt.reserve(4096);
@@ -239,8 +238,8 @@ TaxID Taxonomer::lowerRankClassification(const unordered_map<TaxID, unsigned int
 }
 
 void Taxonomer::getSpeciesCladeCounts(const unordered_map<TaxID, unsigned int> &taxCnt,
-                                       unordered_map<TaxID, TaxonCounts> & cladeCount,
-                                       TaxID speciesTaxID) {
+                                      unordered_map<TaxID, TaxonCounts> & cladeCount,
+                                      TaxID speciesTaxID) {
     for (auto it = taxCnt.begin(); it != taxCnt.end(); ++it) {
         TaxonNode const * taxon = taxonomy->taxonNode(it->first);
         cladeCount[taxon->taxId].taxCount = it->second;
@@ -543,19 +542,16 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
         MIN_DEPTH = minConsCntEuk - 1;
     }
 
-    used.clear();
-    idx2depthScore.clear();
+    match2depthScore.clear();
 
     for (size_t k = 0; k < linkedMatchKeys.size(); k++) {
         const Match * key = linkedMatchKeys[k];
         size_t startIdx = linkedMatchValuesIdx[k];
         size_t endIdx = (k + 1 < linkedMatchKeys.size()) ? linkedMatchValuesIdx[k + 1] : linkedMatchValues.size();
 
-        if (!used.count(key)) {
-            used.insert(key);
+        if (match2depthScore.find(key) == match2depthScore.end()) {
             depthScore bestPath{};
             for (size_t j = startIdx; j < endIdx; j++) {
-                used.insert(linkedMatchValues[j]);
                 depthScore curPath = DFS(curFrameMatches,
                                          linkedMatchValues[j],
                                          linkedMatchKeys,
@@ -563,15 +559,14 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
                                          linkedMatchValuesIdx,
                                          1,
                                          MIN_DEPTH, 
-                                         used, 
-                                         idx2depthScore,
+                                         match2depthScore,
                                          key->getScore(),
                                          key->hamming);
                 if (curPath.score > bestPath.score && curPath.depth > MIN_DEPTH) {
                     bestPath = curPath;
                 }
             }
-            // Store the best path
+            match2depthScore[key] = bestPath;
             if (bestPath.depth > MIN_DEPTH) {
                 matchPaths.emplace_back(key->qInfo.pos, // start coordinate on query
                                         key->qInfo.pos + bestPath.depth * 3 + 20, // end coordinate on query
@@ -590,7 +585,6 @@ depthScore Taxonomer::DFS(
     const vector<const Match *> &linkedMatchesValues,
     const vector<size_t> &linkedMatchesIndices,
     size_t depth, size_t MIN_DEPTH,
-    unordered_set<const Match *> &used,
     unordered_map<const Match *, depthScore> &match2depthScore,
     float score, int hammingDist) 
 {
@@ -623,7 +617,6 @@ depthScore Taxonomer::DFS(
         }
         for (size_t i = startIdx; i < endIdx; ++i) {
             const Match *nextMatch = linkedMatchesValues[i];
-            used.insert(nextMatch);
             if (match2depthScore.find(nextMatch) != match2depthScore.end()) {
                 returnDepthScore = match2depthScore[nextMatch];
                 curDepthScore = depthScore(returnDepthScore.depth + depth,
@@ -631,7 +624,7 @@ depthScore Taxonomer::DFS(
                                            returnDepthScore.hammingDist + hammingDist + lastEndHamming,
                                            returnDepthScore.endMatch);
             } else {
-                curDepthScore = DFS(matches, nextMatch, linkedMatchesKeys, linkedMatchesValues, linkedMatchesIndices, depth, MIN_DEPTH, used, match2depthScore, score, hammingDist + lastEndHamming);
+                curDepthScore = DFS(matches, nextMatch, linkedMatchesKeys, linkedMatchesValues, linkedMatchesIndices, depth, MIN_DEPTH, match2depthScore, score, hammingDist + lastEndHamming);
             }
             if (curDepthScore.score > bestDepthScore.score && curDepthScore.depth > MIN_DEPTH) {
                 bestDepthScore = curDepthScore;
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index d47ff861..dd9b50c4 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -93,31 +93,28 @@ class Taxonomer {
     vector<const Match *> linkedMatchKeys;
     vector<const Match *> linkedMatchValues;
     vector<size_t> linkedMatchValuesIdx;
-    unordered_set<const Match *> used;
-    unordered_map<const Match *, depthScore> idx2depthScore;
+    unordered_map<const Match *, depthScore> match2depthScore;
 
     // lowerRankClassification
     unordered_map<TaxID, TaxonCounts> cladeCnt;
 
     // Output
-   unordered_map<TaxID, unsigned int> taxCounts;
+    unordered_map<TaxID, unsigned int> taxCounts;
 
-   depthScore DFS(const vector<const Match *> &matches,
-                  const Match *curMatch,
-                  const vector<const Match *> &linkedMatchesKeys,
-                  const vector<const Match *> &linkedMatchesValues,
-                  const vector<size_t> &linkedMatchesIndices,
-                  size_t depth, size_t MIN_DEPTH,
-                  unordered_set<const Match *> &used,
-                  unordered_map<const Match *, depthScore> &match2depthScore,
-                  float score, int hammingDist);
+    depthScore DFS(const vector<const Match *> &matches,
+                   const Match *curMatch,
+                   const vector<const Match *> &linkedMatchesKeys,
+                   const vector<const Match *> &linkedMatchesValues,
+                   const vector<size_t> &linkedMatchesIndices,
+                   size_t depth, size_t MIN_DEPTH,
+                   unordered_map<const Match *, depthScore> &match2depthScore,
+                   float score, int hammingDist);
 
 
 public:
     Taxonomer(const LocalParameters & par, NcbiTaxonomy * taxonomy);
     ~Taxonomer();
 
-
     void assignTaxonomy(const Match *matchList,
                         size_t numOfMatches,
                         std::vector<Query> & queryList,

From a40311218cdf7f78292dc89560cd688b87c5a7da Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Tue, 27 Aug 2024 19:52:56 +0900
Subject: [PATCH 17/21] removed a vector<const Match *> curFrameMatches

---
 src/commons/Taxonomer.cpp | 47 +++++++++++++++++----------------------
 src/commons/Taxonomer.h   | 10 ++++-----
 2 files changed, 26 insertions(+), 31 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 378f2b93..508e4e99 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -43,7 +43,6 @@ Taxonomer::Taxonomer(const LocalParameters &par, NcbiTaxonomy *taxonomy) : taxon
 
     // getBestSpeciesMatches
     speciesMatches.reserve(4096);
-    curFrameMatches.reserve(4096);
     matchPaths.reserve(4096);
     combinedMatchPaths.reserve(4096);
     maxSpecies.reserve(4096);
@@ -304,14 +303,13 @@ TaxonScore Taxonomer::getBestSpeciesMatches(vector<const Match * > & speciesMatc
         // For current species
         while ((i < end + 1) && currentSpecies == matchList[i].speciesId) {
             uint8_t curFrame = matchList[i].qInfo.frame;
-            curFrameMatches.clear();
+            size_t start = i;
             // For current frame
             while ((i < end + 1) && currentSpecies == matchList[i].speciesId && curFrame == matchList[i].qInfo.frame) {
-                curFrameMatches.push_back(&matchList[i]);
                 i ++;
             }
-            if (curFrameMatches.size() > 1) {
-                remainConsecutiveMatches(curFrameMatches, matchPaths, currentSpecies);
+            if (i - start >= minConsCnt) {
+                remainConsecutiveMatches(matchList, start, i, matchPaths, currentSpecies);
             }
         }
         size_t pathSize = matchPaths.size();
@@ -451,28 +449,29 @@ void Taxonomer::trimMatchPath(MatchPath & path1, const MatchPath & path2, int ov
     }
 }
 
-void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameMatches,
+void Taxonomer::remainConsecutiveMatches(const Match * matchList,
+                                         size_t start,
+                                         size_t end,
                                          vector<MatchPath> & matchPaths,
                                          TaxID speciesId) {
-    size_t i = 0;
-    size_t end = curFrameMatches.size();
+    size_t i = start;
     curPosMatches.clear();
     nextPosMatches.clear();
     linkedMatchKeys.clear();
     linkedMatchValues.clear();
     linkedMatchValuesIdx.clear();
 
-    size_t currPos = curFrameMatches[0]->qInfo.pos;
-    uint64_t frame = curFrameMatches[0]->qInfo.frame;
+    size_t currPos = matchList[start].qInfo.pos;
+    uint64_t frame = matchList[start].qInfo.frame;
     if (frame < 3) { // Forward frame
-        while ( i < end && curFrameMatches[i]->qInfo.pos == currPos) {
-            curPosMatches.emplace_back(curFrameMatches[i]);
+        while ( i < end && matchList[i].qInfo.pos == currPos) {
+            curPosMatches.emplace_back(matchList + i);
             i++;
         }
         while (i < end) {
-            uint32_t nextPos = curFrameMatches[i]->qInfo.pos;
-            while (i < end  && nextPos == curFrameMatches[i]->qInfo.pos) {
-                nextPosMatches.emplace_back(curFrameMatches[i]);
+            uint32_t nextPos = matchList[i].qInfo.pos;
+            while (i < end  && nextPos == matchList[i].qInfo.pos) {
+                nextPosMatches.emplace_back(matchList + i);
                 ++ i;
             }
             // Check if current position and next position are consecutive
@@ -485,7 +484,6 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
                         if (isConsecutive(curPosMatch, nextPosMatch)){
                             linkedMatchValues.push_back(nextPosMatch);
                             found = true;
-                            // linkedMatches[curPosMatch].push_back(nextPosMatch);
                         }
                     }
                     if (found) {
@@ -499,14 +497,14 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
             currPos = nextPos;
         }
     } else {
-        while ( i < end && curFrameMatches[i]->qInfo.pos == currPos) {
-            curPosMatches.emplace_back(curFrameMatches[i]);
+        while ( i < end && matchList[i].qInfo.pos == currPos) {
+            curPosMatches.emplace_back(matchList + i);
             i++;
         }
         while (i < end) {
-            uint32_t nextPos = curFrameMatches[i]->qInfo.pos;
-            while (i < end  && nextPos == curFrameMatches[i]->qInfo.pos) {
-                nextPosMatches.emplace_back(curFrameMatches[i]);
+            uint32_t nextPos = matchList[i].qInfo.pos;
+            while (i < end  && nextPos == matchList[i].qInfo.pos) {
+                nextPosMatches.emplace_back(matchList + i);
                 ++ i;
             }
             // Check if current position and next position are consecutive
@@ -519,7 +517,6 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
                         if (isConsecutive(nextPosMatch, curPosMatch)){
                             linkedMatchValues.push_back(nextPosMatch);
                             found = true;
-                            // linkedMatches[curPosMatch].push_back(nextPosMatch);
                         }
                     }
                     if (found) {
@@ -552,8 +549,7 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
         if (match2depthScore.find(key) == match2depthScore.end()) {
             depthScore bestPath{};
             for (size_t j = startIdx; j < endIdx; j++) {
-                depthScore curPath = DFS(curFrameMatches,
-                                         linkedMatchValues[j],
+                depthScore curPath = DFS(linkedMatchValues[j],
                                          linkedMatchKeys,
                                          linkedMatchValues,
                                          linkedMatchValuesIdx,
@@ -579,7 +575,6 @@ void Taxonomer::remainConsecutiveMatches(const vector<const Match *> & curFrameM
 }
 
 depthScore Taxonomer::DFS(
-    const vector<const Match *> &matches,
     const Match *curMatch,
     const vector<const Match *> &linkedMatchesKeys,
     const vector<const Match *> &linkedMatchesValues,
@@ -624,7 +619,7 @@ depthScore Taxonomer::DFS(
                                            returnDepthScore.hammingDist + hammingDist + lastEndHamming,
                                            returnDepthScore.endMatch);
             } else {
-                curDepthScore = DFS(matches, nextMatch, linkedMatchesKeys, linkedMatchesValues, linkedMatchesIndices, depth, MIN_DEPTH, match2depthScore, score, hammingDist + lastEndHamming);
+                curDepthScore = DFS(nextMatch, linkedMatchesKeys, linkedMatchesValues, linkedMatchesIndices, depth, MIN_DEPTH, match2depthScore, score, hammingDist + lastEndHamming);
             }
             if (curDepthScore.score > bestDepthScore.score && curDepthScore.depth > MIN_DEPTH) {
                 bestDepthScore = curDepthScore;
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index dd9b50c4..2a9ced39 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -78,7 +78,6 @@ class Taxonomer {
     unordered_map<TaxID, unsigned int> taxCnt;
 
     // getBestSpeciesMatches
-    vector<const Match *> curFrameMatches;
     vector<MatchPath> matchPaths;
     vector<MatchPath> combinedMatchPaths;
     vector<TaxID> maxSpecies;
@@ -101,8 +100,7 @@ class Taxonomer {
     // Output
     unordered_map<TaxID, unsigned int> taxCounts;
 
-    depthScore DFS(const vector<const Match *> &matches,
-                   const Match *curMatch,
+    depthScore DFS(const Match *curMatch,
                    const vector<const Match *> &linkedMatchesKeys,
                    const vector<const Match *> &linkedMatchesValues,
                    const vector<size_t> &linkedMatchesIndices,
@@ -127,9 +125,11 @@ class Taxonomer {
                          vector<Query> & queryList,
                          const LocalParameters &par);
 
-    void remainConsecutiveMatches(const vector<const Match *> & curFrameMatches,
+    void remainConsecutiveMatches(const Match * matchList,
+                                  size_t start,
+                                  size_t end,
                                   vector<MatchPath> & matchPaths,
-                                  TaxID speciesID);
+                                  TaxID speciesId);
     
     float combineMatchPaths(vector<MatchPath> & matchPaths,
                             size_t matchPathStart,

From 5f754bf19948c12a0b7cf17640fbc5f9c6ff7755 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Tue, 27 Aug 2024 20:19:29 +0900
Subject: [PATCH 18/21] removed vectors (curPosMatches, nextPosMatches) in
 remainConsecutiveMatches

---
 src/commons/Taxonomer.cpp | 50 +++++++++++++++++++++++----------------
 src/commons/Taxonomer.h   |  2 --
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 508e4e99..e1c07c6a 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -52,8 +52,6 @@ Taxonomer::Taxonomer(const LocalParameters &par, NcbiTaxonomy *taxonomy) : taxon
     speciesScores.reserve(4096);
 
     // remainConsecutiveMatches
-    curPosMatches.reserve(4096);
-    nextPosMatches.reserve(4096);
     linkedMatchKeys.reserve(4096);
     linkedMatchValues.reserve(4096);
     linkedMatchValuesIdx.reserve(4096);
@@ -455,8 +453,6 @@ void Taxonomer::remainConsecutiveMatches(const Match * matchList,
                                          vector<MatchPath> & matchPaths,
                                          TaxID speciesId) {
     size_t i = start;
-    curPosMatches.clear();
-    nextPosMatches.clear();
     linkedMatchKeys.clear();
     linkedMatchValues.clear();
     linkedMatchValuesIdx.clear();
@@ -464,70 +460,82 @@ void Taxonomer::remainConsecutiveMatches(const Match * matchList,
     size_t currPos = matchList[start].qInfo.pos;
     uint64_t frame = matchList[start].qInfo.frame;
     if (frame < 3) { // Forward frame
-        while ( i < end && matchList[i].qInfo.pos == currPos) {
-            curPosMatches.emplace_back(matchList + i);
-            i++;
+        size_t curPosMatchStart = i;        
+        while (i < end && matchList[i].qInfo.pos == currPos) {
+            ++ i;
         }
+        size_t curPosMatchEnd = i; // exclusive
+
         while (i < end) {
             uint32_t nextPos = matchList[i].qInfo.pos;
+            size_t nextPosMatchStart = i;
             while (i < end  && nextPos == matchList[i].qInfo.pos) {
-                nextPosMatches.emplace_back(matchList + i);
                 ++ i;
             }
+            size_t nextPosMatchEnd = i; // exclusive
+
             // Check if current position and next position are consecutive
             if (currPos + 3 == nextPos) {
                 // Compare curPosMatches and nextPosMatches
-                for (auto &curPosMatch: curPosMatches) {
+                for (size_t curIdx = curPosMatchStart; curIdx < curPosMatchEnd; ++curIdx) {
                     size_t startIdx = linkedMatchValues.size();
                     bool found = false;
-                    for (auto &nextPosMatch: nextPosMatches) {
-                        if (isConsecutive(curPosMatch, nextPosMatch)){
-                            linkedMatchValues.push_back(nextPosMatch);
+                    for (size_t nextIdx = nextPosMatchStart; nextIdx < nextPosMatchEnd; nextIdx++) {
+                        if (isConsecutive(matchList + curIdx, matchList + nextIdx)){
+                            linkedMatchValues.push_back(matchList + nextIdx);
                             found = true;
                         }
                     }
                     if (found) {
-                        linkedMatchKeys.push_back(curPosMatch);
+                        linkedMatchKeys.push_back(matchList + curIdx);
                         linkedMatchValuesIdx.push_back(startIdx);
                     }
                 }
             }
             // Update curPosMatches and nextPosMatches
-            curPosMatches = std::move(nextPosMatches);
+            curPosMatchStart = nextPosMatchStart;
+            curPosMatchEnd = nextPosMatchEnd;
             currPos = nextPos;
         }
     } else {
+        size_t curPosMatchStart = i;
+
         while ( i < end && matchList[i].qInfo.pos == currPos) {
             curPosMatches.emplace_back(matchList + i);
             i++;
         }
+        size_t curPosMatchEnd = i; // exclusive
+
         while (i < end) {
             uint32_t nextPos = matchList[i].qInfo.pos;
+            size_t nextPosMatchStart = i;
             while (i < end  && nextPos == matchList[i].qInfo.pos) {
                 nextPosMatches.emplace_back(matchList + i);
                 ++ i;
             }
+            size_t nextPosMatchEnd = i; // exclusive
+
             // Check if current position and next position are consecutive
             if (currPos + 3 == nextPos) {
                 // Compare curPosMatches and nextPosMatches
-                for (auto &curPosMatch: curPosMatches) {
+                for (size_t curIdx = curPosMatchStart; curIdx < curPosMatchEnd; ++curIdx) {
                     size_t startIdx = linkedMatchValues.size();
                     bool found = false;
-                    for (auto &nextPosMatch: nextPosMatches) {
-                        if (isConsecutive(nextPosMatch, curPosMatch)){
-                            linkedMatchValues.push_back(nextPosMatch);
+                    for (size_t nextIdx = nextPosMatchStart; nextIdx < nextPosMatchEnd; nextIdx++) {
+                        if (isConsecutive(matchList + nextIdx, matchList + curIdx)){
+                            linkedMatchValues.push_back(matchList + nextIdx);
                             found = true;
                         }
                     }
                     if (found) {
-                        linkedMatchKeys.push_back(curPosMatch);
+                        linkedMatchKeys.push_back(matchList + curIdx);
                         linkedMatchValuesIdx.push_back(startIdx);
                     }
                 }
-
             }
             // Update curPosMatches and nextPosMatches
-            curPosMatches = std::move(nextPosMatches);
+            curPosMatchStart = nextPosMatchStart;
+            curPosMatchEnd = nextPosMatchEnd;
             currPos = nextPos;
         }
     }
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index 2a9ced39..ac5c2abe 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -87,8 +87,6 @@ class Taxonomer {
     vector<float> speciesScores;
 
     // remainConsecutiveMatches
-    vector<const Match *> curPosMatches;
-    vector<const Match *> nextPosMatches;
     vector<const Match *> linkedMatchKeys;
     vector<const Match *> linkedMatchValues;
     vector<size_t> linkedMatchValuesIdx;

From 8dfd406acafa54d18062feb1058d2aa14475ff72 Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Tue, 27 Aug 2024 20:29:00 +0900
Subject: [PATCH 19/21] removed vectors (curPosMatches, nextPosMatches) in
 remainConsecutiveMatches

---
 src/commons/Taxonomer.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index e1c07c6a..5808a472 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -501,7 +501,6 @@ void Taxonomer::remainConsecutiveMatches(const Match * matchList,
         size_t curPosMatchStart = i;
 
         while ( i < end && matchList[i].qInfo.pos == currPos) {
-            curPosMatches.emplace_back(matchList + i);
             i++;
         }
         size_t curPosMatchEnd = i; // exclusive
@@ -510,7 +509,6 @@ void Taxonomer::remainConsecutiveMatches(const Match * matchList,
             uint32_t nextPos = matchList[i].qInfo.pos;
             size_t nextPosMatchStart = i;
             while (i < end  && nextPos == matchList[i].qInfo.pos) {
-                nextPosMatches.emplace_back(matchList + i);
                 ++ i;
             }
             size_t nextPosMatchEnd = i; // exclusive

From e990156549ae22dc3c036656e79df850c928525e Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Wed, 28 Aug 2024 14:00:55 +0900
Subject: [PATCH 20/21] stop using 'vector<const Match *> speciesMatches' +
 optimize filterRedundantMatches

---
 src/commons/Taxonomer.cpp | 101 ++++++++++++++++++++++++--------------
 src/commons/Taxonomer.h   |  22 ++++++---
 2 files changed, 80 insertions(+), 43 deletions(-)

diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 5808a472..911b975d 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -42,7 +42,6 @@ Taxonomer::Taxonomer(const LocalParameters &par, NcbiTaxonomy *taxonomy) : taxon
     taxCnt.reserve(4096);
 
     // getBestSpeciesMatches
-    speciesMatches.reserve(4096);
     matchPaths.reserve(4096);
     combinedMatchPaths.reserve(4096);
     maxSpecies.reserve(4096);
@@ -60,12 +59,21 @@ Taxonomer::Taxonomer(const LocalParameters &par, NcbiTaxonomy *taxonomy) : taxon
     // lowerRankClassification
     cladeCnt.reserve(4096);
 
+    // filterRedundantMatches
+    arraySize_filterRedundantMatches = 4096;
+    bestMatchForQuotient = new const Match*[arraySize_filterRedundantMatches]();
+    bestMatchTaxIdForQuotient = new TaxID[arraySize_filterRedundantMatches];
+    minHammingForQuotient = new uint8_t[arraySize_filterRedundantMatches];
+
+
     // Output
     taxCounts.reserve(4096);
 }
 
 Taxonomer::~Taxonomer() {
-
+    delete[] bestMatchForQuotient;
+    delete[] bestMatchTaxIdForQuotient;
+    delete[] minHammingForQuotient;
 }
 
 void Taxonomer::assignTaxonomy(const Match *matchList,
@@ -119,13 +127,9 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery,
                                 vector<Query> & queryList,
                                 const LocalParameters &par) {
     // Get the best species and its matches for the current query
-    if (speciesMatches.size() < end - offset + 1) {
-        speciesMatches.reserve(end - offset + 1);
-    }
-    speciesMatches.clear();
-
     TaxonScore speciesScore(0, 0, 0, 0, 0);
-    speciesScore = getBestSpeciesMatches(speciesMatches,
+    std::pair<size_t, size_t> bestSpeciesRange;
+    speciesScore = getBestSpeciesMatches(bestSpeciesRange,
                                          matchList,
                                          end,
                                          offset,                        
@@ -154,7 +158,10 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery,
 
     // Filter redundant matches
     taxCnt.clear();
-    filterRedundantMatches(speciesMatches, taxCnt);
+    filterRedundantMatches(matchList,
+                           bestSpeciesRange,
+                           taxCnt,
+                           queryList[currentQuery].queryLength + queryList[currentQuery].queryLength2);
     for (auto & tax : taxCnt) {
       queryList[currentQuery].taxCnt[tax.first] = tax.second;    
     }
@@ -186,30 +193,41 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery,
     queryList[currentQuery].newSpecies = false;
 }
 
-void Taxonomer::filterRedundantMatches(vector<const Match *> & speciesMatches,
-                                        unordered_map<TaxID, unsigned int> & taxCnt) {
-    // Sort matches by the coordinate on the query
-    sort(speciesMatches.begin(), speciesMatches.end(),
-         [](const Match * a, const Match * b) { return a->qInfo.pos < b->qInfo.pos; });
+void Taxonomer::filterRedundantMatches(const Match *matchList,
+                                       const std::pair<size_t, size_t> & bestSpeciesRange,
+                                       unordered_map<TaxID, unsigned int> & taxCnt,
+                                       int queryLength) {    
+    // Determine the maximum quotient we need to handle
+    size_t maxQuotient = (queryLength + 3) / 3;
     
-    // Remove redundant matches
-    size_t matchNum = speciesMatches.size();
-    for (size_t i = 0; i < matchNum;) {
-        size_t currQuotient = speciesMatches[i]->qInfo.pos / 3;
-        uint8_t minHamming = speciesMatches[i]->hamming;
-        Match minHammingMatch = *speciesMatches[i];
-        TaxID minHammingTaxId = minHammingMatch.targetId;
-        while ((i < matchNum) && (currQuotient == speciesMatches[i]->qInfo.pos / 3)) {
-            if (speciesMatches[i]->hamming < minHamming) {
-                minHamming = speciesMatches[i]->hamming;
-                minHammingMatch = *speciesMatches[i];
-                minHammingTaxId = minHammingMatch.targetId;
-            } else if (speciesMatches[i]->hamming == minHamming) {
-                minHammingTaxId = taxonomy->LCA(minHammingTaxId, speciesMatches[i]->targetId);
+    ensureArraySize(maxQuotient + 1);
+
+    // std::fill_n(bestMatchTaxIdForQuotient, maxQuotient + 1, 0);
+
+    for (size_t i = bestSpeciesRange.first; i < bestSpeciesRange.second; i ++) {
+        size_t currQuotient = matchList[i].qInfo.pos / 3;
+        uint8_t hamming = matchList[i].hamming;
+
+        if (bestMatchForQuotient[currQuotient] == nullptr) {
+            bestMatchForQuotient[currQuotient] = matchList + i;
+            bestMatchTaxIdForQuotient[currQuotient] = matchList[i].targetId;
+            minHammingForQuotient[currQuotient] = hamming;
+        } else {
+            if (hamming < minHammingForQuotient[currQuotient]) {
+                bestMatchForQuotient[currQuotient] = matchList + i;
+                bestMatchTaxIdForQuotient[currQuotient] = matchList[i].targetId;
+                minHammingForQuotient[currQuotient] = hamming;
+            } else if (hamming == minHammingForQuotient[currQuotient]) {
+                bestMatchTaxIdForQuotient[currQuotient] = taxonomy->LCA(
+                    bestMatchTaxIdForQuotient[currQuotient], matchList[i].targetId);
             }
-            i++;
         }
-        taxCnt[minHammingTaxId]++;
+    }
+
+    for (size_t i = 0; i <= maxQuotient; ++i) {
+        if (bestMatchForQuotient[i] != nullptr) {
+            taxCnt[bestMatchTaxIdForQuotient[i]]++;
+        }
     }
 }
 
@@ -277,7 +295,7 @@ TaxID Taxonomer::BFS(const unordered_map<TaxID, TaxonCounts> & cladeCnt, TaxID r
     }
 }
 
-TaxonScore Taxonomer::getBestSpeciesMatches(vector<const Match * > & speciesMatches,
+TaxonScore Taxonomer::getBestSpeciesMatches(std::pair<size_t, size_t> & bestSpeciesRange,
                                             const Match *matchList,
                                             size_t end,
                                             size_t offset,
@@ -289,7 +307,6 @@ TaxonScore Taxonomer::getBestSpeciesMatches(vector<const Match * > & speciesMatc
     speciesCombPathIdx.clear();
     speciesScores.clear();
     
-    pair<size_t, size_t> bestSpeciesRange;
     TaxonScore bestScore;
     float bestSpScore = 0;
     size_t i = offset;
@@ -364,11 +381,6 @@ TaxonScore Taxonomer::getBestSpeciesMatches(vector<const Match * > & speciesMatc
     
     // One species
     bestScore.taxId = maxSpecies[0];
-    speciesMatches.reserve(bestSpeciesRange.second - bestSpeciesRange.first + 1);
-
-    for (size_t j = bestSpeciesRange.first; j < bestSpeciesRange.second; j++) {
-        speciesMatches.push_back(&matchList[j]);
-    }
     
     return bestScore;                                  
 }
@@ -653,4 +665,19 @@ bool Taxonomer::isConsecutive_diffFrame(const Match * match1, const Match * matc
     // match1 87654321 -> 08765432
     // match2 98765432 -> 08765432
     return (match1->hamming - GET_2_BITS(match1->rightEndHamming)) == (match2->hamming - GET_2_BITS(match2->rightEndHamming >> 14));
+}
+
+
+void Taxonomer::ensureArraySize(size_t newSize) {
+    if (newSize > arraySize_filterRedundantMatches) {
+        delete[] bestMatchForQuotient;
+        delete[] bestMatchTaxIdForQuotient;
+        delete[] minHammingForQuotient;
+        bestMatchForQuotient = new const Match*[newSize]();
+        bestMatchTaxIdForQuotient = new TaxID[newSize]();
+        minHammingForQuotient = new uint8_t[newSize];
+        arraySize_filterRedundantMatches = newSize;
+    }
+    std::memset(bestMatchForQuotient, 0, newSize * sizeof(const Match*));
+    std::memset(minHammingForQuotient, std::numeric_limits<uint8_t>::max(), newSize * sizeof(uint8_t));
 }
\ No newline at end of file
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index ac5c2abe..29f3f36c 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -72,7 +72,7 @@ class Taxonomer {
 
     // Internal
     int denominator;
-    vector<const Match *> speciesMatches;
+    // vector<const Match *> speciesMatches;
 
     // chooseBestTaxon
     unordered_map<TaxID, unsigned int> taxCnt;
@@ -95,6 +95,13 @@ class Taxonomer {
     // lowerRankClassification
     unordered_map<TaxID, TaxonCounts> cladeCnt;
 
+    // filterRedundantMatches
+    const Match **bestMatchForQuotient;
+    TaxID *bestMatchTaxIdForQuotient;
+    uint8_t *minHammingForQuotient;
+    size_t arraySize_filterRedundantMatches;
+
+
     // Output
     unordered_map<TaxID, unsigned int> taxCounts;
 
@@ -106,6 +113,8 @@ class Taxonomer {
                    unordered_map<const Match *, depthScore> &match2depthScore,
                    float score, int hammingDist);
 
+    void ensureArraySize(size_t newSize);
+
 
 public:
     Taxonomer(const LocalParameters & par, NcbiTaxonomy * taxonomy);
@@ -143,18 +152,19 @@ class Taxonomer {
 
     void trimMatchPath(MatchPath & path1, const MatchPath & path2, int overlapLength);
 
-    void filterRedundantMatches(vector<const Match *> & speciesMatches,
-                                unordered_map<TaxID, unsigned int> & taxCnt);
+    void filterRedundantMatches(const Match *matchList,
+                                const std::pair<size_t, size_t> & bestSpeciesRange,
+                                unordered_map<TaxID, unsigned int> & taxCnt,
+                                int queryLength);
 
     static bool isConsecutive(const Match * match1, const Match * match2);
 
     static bool isConsecutive_diffFrame(const Match * match1, const Match * match2);
 
-    TaxonScore getBestSpeciesMatches(vector<const Match *> &speciesMatches, const Match *matchList, size_t end,
+    TaxonScore getBestSpeciesMatches(std::pair<size_t, size_t> & bestSpeciesRange,
+                                     const Match *matchList, size_t end,
                                      size_t offset, int queryLength);
     
-    // TaxonScore getBestSpeciesMatches(vector<Match> &speciesMatches, const Match *matchList, size_t end,
-    //                                  size_t offset, Query & currentQuery);
     // TaxonScore getBestGenusMatches_spaced(vector<Match> &matchesForMajorityLCA, const Match *matchList, size_t end, size_t offset,
     //                                       int readLength1, int readLength2);
     // TaxonScore getBestGenusMatches_spaced(vector<Match> &matchesForMajorityLCA, const Match *matchList, size_t end, size_t offset,

From ac245735ba209d73943ae08a961c73ab52bcd6ea Mon Sep 17 00:00:00 2001
From: Jaebeom Kim <jbeom0731@gmail.com>
Date: Wed, 28 Aug 2024 18:35:57 +0900
Subject: [PATCH 21/21] Now single thread is okay. Minor optimizations in
 remainConsecutiveMatches

---
 src/commons/KmerExtractor.cpp | 313 ++++++++++++++++++++--------------
 src/commons/KmerExtractor.h   |  24 +++
 src/commons/KmerMatcher.cpp   |   8 +-
 src/commons/Taxonomer.cpp     |  25 ++-
 src/commons/Taxonomer.h       |   1 +
 src/commons/common.h          |   7 +-
 src/util/apply-threshold.cpp  |   6 +-
 src/util/binning2report.cpp   |   2 +-
 8 files changed, 241 insertions(+), 145 deletions(-)

diff --git a/src/commons/KmerExtractor.cpp b/src/commons/KmerExtractor.cpp
index 0559748c..593ebe16 100644
--- a/src/commons/KmerExtractor.cpp
+++ b/src/commons/KmerExtractor.cpp
@@ -65,15 +65,24 @@ void KmerExtractor::fillQueryKmerBufferParallel(KSeqWrapper *kseq,
         }
     }
 
+    // Vector to check empty reads
+    std::vector<std::vector<bool>> emptyReads(par.threads);
+    for (int i = 0; i < par.threads; ++i) {
+        emptyReads[i].resize(chunkSize);
+        for (size_t j = 0; j < chunkSize; ++j) {
+            emptyReads[i][j] = false;
+        }
+    }
+
     // Initialize atomic variable for active tasks
     std::vector<atomic<bool>> busyThreads(par.threads);
-    for (size_t i = 0; i < par.threads; ++i) {
+    for (int i = 0; i < par.threads; ++i) {
         busyThreads[i].store(false);
     }
 
     // OpenMP parallel region with tasks
 #pragma omp parallel default(none) shared(par, readLength, kmerBuffer, \
-queryList, currentSplit, processedQueryNum, kseq, chunkSize, chunkReads_thread, busyThreads)
+queryList, currentSplit, processedQueryNum, kseq, chunkSize, chunkReads_thread, busyThreads, emptyReads)
     {
         char *maskedSeq = new char[readLength];
         char *seq = nullptr;
@@ -89,11 +98,15 @@ queryList, currentSplit, processedQueryNum, kseq, chunkSize, chunkReads_thread,
             while (processedQueryNum < currentSplit.readCnt) {
                 // Find an idle thread
                 int threadId;
-                for (threadId = 0; threadId < par.threads; ++threadId) {
-                    if (threadId == masterThread) { continue; }
-                    if (!busyThreads[threadId].load()) {
-                        busyThreads[threadId].store(true);
-                        break;
+                if (par.threads == 1) {
+                    threadId = 0;
+                } else {
+                    for (threadId = 0; threadId < par.threads; ++threadId) {
+                        if (threadId == masterThread) { continue; }
+                        if (!busyThreads[threadId].load()) {
+                            busyThreads[threadId].store(true);
+                            break;
+                        }
                     }
                 }
                 
@@ -104,52 +117,24 @@ queryList, currentSplit, processedQueryNum, kseq, chunkSize, chunkReads_thread,
                 size_t chunkEnd = min(processedQueryNum + chunkSize, currentSplit.readCnt);
                 count = 0;
 
-                for (size_t i = 0; i < chunkSize && processedQueryNum < chunkEnd; ++i, ++processedQueryNum) {
-                    kseq->ReadEntry();
-                    queryList[processedQueryNum].name = string(kseq->entry.name.s);
-                    queryList[processedQueryNum].queryLength = LocalUtil::getMaxCoveredLength((int) kseq->entry.sequence.l);
-                    
-                    // Check if the read is too short
-                    int kmerCnt = LocalUtil::getQueryKmerNumber<int>((int) kseq->entry.sequence.l, spaceNum);
-                    if (kmerCnt < 1) {
-                        chunkReads_thread[threadId][i] = "";
-                        queryList[processedQueryNum].kmerCnt = 0;
-                    } else {
-                        chunkReads_thread[threadId][i] = string(kseq->entry.sequence.s);
-                        queryList[processedQueryNum].kmerCnt = kmerCnt;
-                    }
-                    count++;
-                }
-                
-                // Process each chunk by idle threads
-                #pragma omp task firstprivate(count, processedQueryNum, threadId)
-                {
-                    for (size_t i = 0; i < count; ++i) {
-                        size_t queryIdx = processedQueryNum - count + i;
-                        if (chunkReads_thread[threadId][i].empty()) { continue; }
-                        
-                        // Get masked sequence
-                        if (maskMode) {
-                            if (readLength < chunkReads_thread[threadId][i].length() + 1) {
-                                readLength = chunkReads_thread[threadId][i].length() + 1;
-                                delete[] maskedSeq;
-                                maskedSeq = new char[readLength];
-                            }
-                            SeqIterator::maskLowComplexityRegions(chunkReads_thread[threadId][i].c_str(), maskedSeq, *probMatrix, maskProb, subMat);
-                            seq = maskedSeq;
-                        } else {
-                            seq = const_cast<char *>(chunkReads_thread[threadId][i].c_str());
-                        }
-
-                        size_t posToWrite = kmerBuffer.reserveMemory(queryList[queryIdx].kmerCnt);
-                        
-                        // Process Read 1
-                        seqIterator->sixFrameTranslation(seq, (int) chunkReads_thread[threadId][i].length(), aaFrames_read);
-                        seqIterator->fillQueryKmerBuffer(seq, (int) chunkReads_thread[threadId][i].length(), kmerBuffer, posToWrite, 
-                                                        (uint32_t) queryIdx, aaFrames_read);
+                loadChunkOfReads(kseq,
+                                 queryList,
+                                 processedQueryNum,
+                                 chunkSize,
+                                 chunkEnd,
+                                 chunkReads_thread[threadId],
+                                 emptyReads[threadId],
+                                 count,
+                                 false);
+                if (par.threads == 1) {
+                    processSequence(count, processedQueryNum, chunkReads_thread[threadId], emptyReads[threadId], seq, maskedSeq, readLength, kmerBuffer, queryList, aaFrames_read, false);
+                } else {
+                    #pragma omp task firstprivate(count, processedQueryNum, threadId)
+                    {
+                        processSequence(count, processedQueryNum, chunkReads_thread[threadId], emptyReads[threadId], seq, maskedSeq, readLength, kmerBuffer, queryList, aaFrames_read, false);
+                        busyThreads[threadId].store(false);
                     }
-                    busyThreads[threadId].store(false);
-                }   
+                }  
             }
         }
         delete[] maskedSeq;
@@ -168,7 +153,7 @@ void KmerExtractor::fillQueryKmerBufferParallel_paired(KSeqWrapper *kseq1,
     // Reserve memory for each read of the chunk for each thread
     std::vector<std::vector<string>> chunkReads1_thread(par.threads);
     std::vector<std::vector<string>> chunkReads2_thread(par.threads);
-    for (size_t i = 0; i < par.threads; ++i) {
+    for (int i = 0; i < par.threads; ++i) {
         chunkReads1_thread[i].resize(chunkSize);
         chunkReads2_thread[i].resize(chunkSize);
         for (size_t j = 0; j < chunkSize; ++j) {
@@ -176,20 +161,30 @@ void KmerExtractor::fillQueryKmerBufferParallel_paired(KSeqWrapper *kseq1,
             chunkReads2_thread[i][j].reserve(readLength);
         }
     }
+    // Vector to check empty reads
+    std::vector<std::vector<bool>> emptyReads(par.threads);
+    for (int i = 0; i < par.threads; ++i) {
+        emptyReads[i].resize(chunkSize);
+        for (size_t j = 0; j < chunkSize; ++j) {
+            emptyReads[i][j] = false;
+        }
+    }
 
     // Initialize atomic variable for active tasks
     std::vector<std::atomic<bool>> busyThreads(par.threads);
-    for (size_t i = 0; i < par.threads; ++i) {
+    for (int i = 0; i < par.threads; ++i) {
         busyThreads[i].store(false);
     }
 
     // OpenMP parallel region with tasks
 #pragma omp parallel default(none) shared(par, readLength, \
 kmerBuffer, queryList, currentSplit, processedQueryNum, kseq1, kseq2, \
-chunkSize, chunkReads1_thread, chunkReads2_thread, busyThreads, cout)
+chunkSize, chunkReads1_thread, chunkReads2_thread, busyThreads, cout, emptyReads)
     {
-        char *maskedSeq1 = new char[readLength];
-        char *maskedSeq2 = new char[readLength];   
+        size_t maxReadLength1 = 1000;
+        size_t maxReadLength2 = 1000;
+        char *maskedSeq1 = new char[maxReadLength1];
+        char *maskedSeq2 = new char[maxReadLength2];   
         char *seq1 = nullptr;
         char *seq2 = nullptr;
         vector<int> aaFrames_read1[6];
@@ -202,91 +197,159 @@ chunkSize, chunkReads1_thread, chunkReads2_thread, busyThreads, cout)
 #pragma omp single nowait
         {
             int masterThread = omp_get_thread_num();
-            size_t count = 0;
+           
             while (processedQueryNum < currentSplit.readCnt) {
                 // Find an idle thread
                 int threadId;
-                for (threadId = 0; threadId < par.threads; ++threadId) {
-                    if (threadId == masterThread) { continue; }
-                    if (!busyThreads[threadId].load()) {
-                        busyThreads[threadId].store(true);
-                        break;
+                if (par.threads == 1) {
+                    threadId = 0;
+                } else {
+                    for (threadId = 0; threadId < par.threads; ++threadId) {
+                        if (threadId == masterThread) { continue; }
+                        if (!busyThreads[threadId].load()) {
+                            busyThreads[threadId].store(true);
+                            break;
+                        }
                     }
                 }
-
                 if (threadId == par.threads) {
                     continue;
                 }
 
                 size_t chunkEnd = min(processedQueryNum + chunkSize, currentSplit.readCnt);
+                
+                size_t count = 0;
+                size_t processedQueryNumCopy = processedQueryNum;
+                loadChunkOfReads(kseq1,
+                                 queryList,
+                                 processedQueryNum,
+                                 chunkSize,
+                                 chunkEnd,
+                                 chunkReads1_thread[threadId],
+                                 emptyReads[threadId],
+                                 count,
+                                 false);
+
                 count = 0;
+                processedQueryNum = processedQueryNumCopy;
+                loadChunkOfReads(kseq2,
+                                 queryList,
+                                 processedQueryNum,
+                                 chunkSize,
+                                 chunkEnd,
+                                 chunkReads2_thread[threadId],
+                                 emptyReads[threadId],
+                                 count,
+                                 true);
 
-                for (size_t i = 0; i < chunkSize && processedQueryNum < chunkEnd; ++i, ++processedQueryNum) {
-                    kseq1->ReadEntry();
-                    kseq2->ReadEntry();
-                    queryList[processedQueryNum].name = string(kseq1->entry.name.s);
-                    queryList[processedQueryNum].queryLength = LocalUtil::getMaxCoveredLength((int) kseq1->entry.sequence.l);
-                    queryList[processedQueryNum].queryLength2 = LocalUtil::getMaxCoveredLength((int) kseq2->entry.sequence.l);
-                    
-                    // Check if the read is too short
-                    int kmerCnt1 = LocalUtil::getQueryKmerNumber<int>((int) kseq1->entry.sequence.l, spaceNum);
-                    int kmerCnt2 = LocalUtil::getQueryKmerNumber<int>((int) kseq2->entry.sequence.l, spaceNum);
-                    if (kmerCnt1 < 1 || kmerCnt2 < 1) {
-                        chunkReads1_thread[threadId][i] = "";
-                        chunkReads2_thread[threadId][i] = "";
-                        queryList[processedQueryNum].kmerCnt = 0;
-                    } else {
-                        // cout << threadId << endl
-                        chunkReads1_thread[threadId][i] = string(kseq1->entry.sequence.s);
-                        chunkReads2_thread[threadId][i] = string(kseq2->entry.sequence.s);
-                        queryList[processedQueryNum].kmerCnt = kmerCnt1 + kmerCnt2;
+                if (par.threads == 1) {
+                    processSequence(count, processedQueryNum, chunkReads1_thread[threadId], emptyReads[threadId], seq1, maskedSeq1, maxReadLength1, kmerBuffer, queryList, aaFrames_read1, false);
+                    processSequence(count, processedQueryNum, chunkReads2_thread[threadId], emptyReads[threadId], seq2, maskedSeq2, maxReadLength2, kmerBuffer, queryList, aaFrames_read2, true);
+                } 
+                else {
+                    #pragma omp task firstprivate(count, processedQueryNum, threadId)
+                    {
+                        processSequence(count, processedQueryNum, chunkReads1_thread[threadId], emptyReads[threadId], seq1, maskedSeq1, maxReadLength1, kmerBuffer, queryList, aaFrames_read1, false);
+                        processSequence(count, processedQueryNum, chunkReads2_thread[threadId], emptyReads[threadId], seq2, maskedSeq2, maxReadLength2, kmerBuffer, queryList, aaFrames_read2, true);
+                        busyThreads[threadId].store(false);
                     }
-                    count++;
-                }
-                
+                }   
+            }   
+        }   
+        delete[] maskedSeq1;
+        delete[] maskedSeq2;
+    }
+}
 
-                // Process each chunk by idle threads
-                #pragma omp task firstprivate(count, processedQueryNum, threadId)
-                {
-                    for (size_t i = 0; i < count; ++i) {
-                        size_t queryIdx = processedQueryNum - count + i;
-                        if (chunkReads1_thread[threadId][i].empty() || chunkReads2_thread[threadId][i].empty()) { continue; }
-                        
-                        // Get masked sequence
-                        if (maskMode) {
-                            if (readLength < chunkReads1_thread[threadId][i].length() + 1 || readLength < chunkReads2_thread[threadId][i].length() + 1) {
-                                readLength = max(chunkReads1_thread[threadId][i].length() + 1, chunkReads2_thread[threadId][i].length() + 1);
-                                delete[] maskedSeq1;
-                                delete[] maskedSeq2;
-                                maskedSeq1 = new char[readLength];
-                                maskedSeq2 = new char[readLength];
-                            }
-                            SeqIterator::maskLowComplexityRegions(chunkReads1_thread[threadId][i].c_str(),maskedSeq1, *probMatrix, maskProb, subMat);
-                            SeqIterator::maskLowComplexityRegions(chunkReads2_thread[threadId][i].c_str(),maskedSeq2, *probMatrix, maskProb, subMat);
-                            seq1 = maskedSeq1;
-                            seq2 = maskedSeq2;
-                        } else {
-                            seq1 = const_cast<char *>(chunkReads1_thread[threadId][i].c_str());
-                            seq2 = const_cast<char *>(chunkReads2_thread[threadId][i].c_str());
-                        }
+void KmerExtractor::processSequence(size_t count,
+                                    size_t processedQueryNum,
+                                    const vector<string> & reads,
+                                    const vector<bool> & emptyReads,
+                                    char *seq,
+                                    char *maskedSeq,
+                                    size_t & maxReadLength,
+                                    QueryKmerBuffer &kmerBuffer,
+                                    const vector<Query> & queryList,
+                                    vector<int> *aaFrames,
+                                    bool isReverse) {
+    for (size_t i = 0; i < count; ++i) {
+        size_t queryIdx = processedQueryNum - count + i;
+        if (emptyReads[i]) { continue; }
+        // Get masked sequence
+        if (maskMode) {
+            if (maxReadLength < reads[i].length() + 1) {
+                maxReadLength = reads[i].length() + 1;
+                delete[] maskedSeq;
+                maskedSeq = new char[maxReadLength];
+            }
+            SeqIterator::maskLowComplexityRegions(reads[i].c_str(), maskedSeq, *probMatrix, maskProb, subMat);
+            seq = maskedSeq;
+        } else {
+            seq = const_cast<char *>(reads[i].c_str());
+        }
 
-                        size_t posToWrite = kmerBuffer.reserveMemory(queryList[queryIdx].kmerCnt);
-                        
-                        // Process Read 1
-                        seqIterator->sixFrameTranslation(seq1, (int) chunkReads1_thread[threadId][i].length(), aaFrames_read1);
-                        seqIterator->fillQueryKmerBuffer(seq1, (int) chunkReads1_thread[threadId][i].length(), kmerBuffer, posToWrite, 
-                                                        (uint32_t) queryIdx, aaFrames_read1);
+        size_t posToWrite = 0;
+        if (isReverse) {
+            posToWrite = kmerBuffer.reserveMemory(queryList[queryIdx].kmerCnt2);
+            seqIterator->sixFrameTranslation(seq, (int) reads[i].length(), aaFrames);
+            seqIterator->fillQueryKmerBuffer(seq, (int) reads[i].length(), kmerBuffer, posToWrite, 
+                                            (uint32_t) queryIdx, aaFrames, queryList[queryIdx].queryLength+3);
+        } else {
+            posToWrite = kmerBuffer.reserveMemory(queryList[queryIdx].kmerCnt);
+            seqIterator->sixFrameTranslation(seq, (int) reads[i].length(), aaFrames);
+            seqIterator->fillQueryKmerBuffer(seq, (int) reads[i].length(), kmerBuffer, posToWrite, 
+                                            (uint32_t) queryIdx, aaFrames);
+        }
+    }
+}
 
-                        // Process Read 2
-                        seqIterator->sixFrameTranslation(seq2, (int) chunkReads2_thread[threadId][i].length(), aaFrames_read2);
-                        seqIterator->fillQueryKmerBuffer(seq2, (int) chunkReads2_thread[threadId][i].length(), kmerBuffer, posToWrite,
-                                                        (uint32_t) queryIdx, aaFrames_read2, queryList[queryIdx].queryLength+3);
-                    }
-                    busyThreads[threadId].store(false);
-                }   
+void KmerExtractor::loadChunkOfReads(KSeqWrapper *kseq,
+                                     vector<Query> & queryList,
+                                     size_t & processedQueryNum,
+                                     size_t chunkSize,
+                                     size_t chunkEnd,
+                                     vector<string> & reads,
+                                     vector<bool> & emptyReads,
+                                     size_t & count,
+                                     bool isReverse) {
+    if (isReverse) {
+        for (size_t i = 0; i < chunkSize && processedQueryNum < chunkEnd; ++i, ++processedQueryNum) {
+            kseq->ReadEntry();
+            queryList[processedQueryNum].queryLength2 = LocalUtil::getMaxCoveredLength((int) kseq->entry.sequence.l);   
+            
+            if (emptyReads[i]) { continue; }
+
+            // Check if the read is too short
+            int kmerCnt = LocalUtil::getQueryKmerNumber<int>((int) kseq->entry.sequence.l, spaceNum);
+            if (kmerCnt < 1) {
+                reads[i] = "";
+                emptyReads[i] = true;
+                queryList[processedQueryNum].kmerCnt2 = 0;
+            } else {
+                reads[i] = string(kseq->entry.sequence.s);
+                // emptyReads[i] = false; // already set to false
+                queryList[processedQueryNum].kmerCnt2 = kmerCnt;
             }
+            count++;
+        }
+    } else {
+        for (size_t i = 0; i < chunkSize && processedQueryNum < chunkEnd; ++i, ++processedQueryNum) {
+            kseq->ReadEntry();
+            queryList[processedQueryNum].name = string(kseq->entry.name.s);
+            queryList[processedQueryNum].queryLength = LocalUtil::getMaxCoveredLength((int) kseq->entry.sequence.l);
+
+            // Check if the read is too short
+            int kmerCnt = LocalUtil::getQueryKmerNumber<int>((int) kseq->entry.sequence.l, spaceNum);
+            if (kmerCnt < 1) {
+                reads[i] = "";
+                emptyReads[i] = true;
+                queryList[processedQueryNum].kmerCnt = 0;
+            } else {
+                reads[i] = string(kseq->entry.sequence.s);
+                emptyReads[i] = false;
+                queryList[processedQueryNum].kmerCnt = kmerCnt;
+            }
+            count++;
         }
-        delete[] maskedSeq1;
-        delete[] maskedSeq2;
     }
 }
\ No newline at end of file
diff --git a/src/commons/KmerExtractor.h b/src/commons/KmerExtractor.h
index a2699139..650f03f3 100644
--- a/src/commons/KmerExtractor.h
+++ b/src/commons/KmerExtractor.h
@@ -32,6 +32,28 @@ class KmerExtractor {
                                             vector<Query> &queryList,
                                             const QuerySplit & currentSplit,
                                             const LocalParameters &par);
+
+    void loadChunkOfReads(KSeqWrapper *kseq,
+                          vector<Query> & queryList,
+                          size_t & processedQueryNum,
+                          size_t chunkSize,
+                          size_t chunkEnd,
+                          vector<string> & reads,
+                          vector<bool> & emptyReads,
+                          size_t & count,
+                          bool isReverse);
+
+    void processSequence(size_t count,
+                         size_t processedQueryNum,
+                         const vector<string> & reads,
+                         const vector<bool> & emptyReads,
+                         char *seq,
+                         char *maskedSeq,
+                         size_t & maxReadLength,
+                         QueryKmerBuffer &kmerBuffer,
+                         const vector<Query> & queryList,
+                         vector<int> *aaFrames,
+                         bool isReverse);
                                       
 public:
     explicit KmerExtractor(const LocalParameters & par);
@@ -44,6 +66,8 @@ class KmerExtractor {
                            KSeqWrapper* kseq2 = nullptr);
 
 
+
+
 };
 
 static inline bool compareForLinearSearch(const QueryKmer &a, const QueryKmer &b) {
diff --git a/src/commons/KmerMatcher.cpp b/src/commons/KmerMatcher.cpp
index 46bdae08..9f9a2e9a 100644
--- a/src/commons/KmerMatcher.cpp
+++ b/src/commons/KmerMatcher.cpp
@@ -210,23 +210,23 @@ querySplits, queryKmerList, matchBuffer, cout, targetDiffIdxFileName, numOfDiffI
             size_t kmerInfoBufferIdx = 0;
             size_t diffIdxBufferIdx = 0;
 
-            //query variables
+            // Query variables
             uint64_t currentQuery = UINT64_MAX;
             uint64_t currentQueryAA = UINT64_MAX;
             QueryKmerInfo currentQueryInfo;
             
-            //target variables
+            // Target variables
             size_t diffIdxPos = 0;
             std::vector<uint64_t> candidateTargetKmers; //vector for candidate target k-mer, some of which are selected after based on hamming distance
             std::vector<TargetKmerInfo> candidateKmerInfos;
             uint64_t currentTargetKmer;
 
-            //Match buffer for each thread
+            // Match buffer for each thread
             int localBufferSize = 2'000'000; // 32 Mb
             auto *matches = new Match[localBufferSize]; // 16 * 2'000'000 = 32 Mb
             int matchCnt = 0;
 
-            //vectors for selected target k-mers
+            // Vectors for selected target k-mers
             std::vector<uint8_t> selectedHammingSum;
             std::vector<size_t> selectedMatches;
             std::vector<uint16_t> selectedHammings;
diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp
index 911b975d..85e537e0 100644
--- a/src/commons/Taxonomer.cpp
+++ b/src/commons/Taxonomer.cpp
@@ -344,7 +344,6 @@ TaxonScore Taxonomer::getBestSpeciesMatches(std::pair<size_t, size_t> & bestSpec
                 bestSpeciesRange = make_pair(start, i);
             }
         }
-        // matchPaths.clear();
     }
     
     // If there are no meaningful species
@@ -468,6 +467,12 @@ void Taxonomer::remainConsecutiveMatches(const Match * matchList,
     linkedMatchKeys.clear();
     linkedMatchValues.clear();
     linkedMatchValuesIdx.clear();
+    
+    linkedMatchKeys.resize(end - start);
+    linkedMatchValuesIdx.resize(end - start);
+    size_t linkedMatchKeysIdx = 0;
+
+    
 
     size_t currPos = matchList[start].qInfo.pos;
     uint64_t frame = matchList[start].qInfo.frame;
@@ -499,8 +504,8 @@ void Taxonomer::remainConsecutiveMatches(const Match * matchList,
                         }
                     }
                     if (found) {
-                        linkedMatchKeys.push_back(matchList + curIdx);
-                        linkedMatchValuesIdx.push_back(startIdx);
+                        linkedMatchKeys[linkedMatchKeysIdx] = matchList + curIdx;
+                        linkedMatchValuesIdx[linkedMatchKeysIdx++] = startIdx;
                     }
                 }
             }
@@ -538,8 +543,8 @@ void Taxonomer::remainConsecutiveMatches(const Match * matchList,
                         }
                     }
                     if (found) {
-                        linkedMatchKeys.push_back(matchList + curIdx);
-                        linkedMatchValuesIdx.push_back(startIdx);
+                        linkedMatchKeys[linkedMatchKeysIdx] = matchList + curIdx;
+                        linkedMatchValuesIdx[linkedMatchKeysIdx++] = startIdx;
                     }
                 }
             }
@@ -559,16 +564,17 @@ void Taxonomer::remainConsecutiveMatches(const Match * matchList,
 
     match2depthScore.clear();
 
-    for (size_t k = 0; k < linkedMatchKeys.size(); k++) {
+    for (size_t k = 0; k < linkedMatchKeysIdx; k++) {
         const Match * key = linkedMatchKeys[k];
         size_t startIdx = linkedMatchValuesIdx[k];
-        size_t endIdx = (k + 1 < linkedMatchKeys.size()) ? linkedMatchValuesIdx[k + 1] : linkedMatchValues.size();
+        size_t endIdx = (k + 1 < linkedMatchKeysIdx) ? linkedMatchValuesIdx[k + 1] : linkedMatchValues.size();
 
         if (match2depthScore.find(key) == match2depthScore.end()) {
             depthScore bestPath{};
             for (size_t j = startIdx; j < endIdx; j++) {
                 depthScore curPath = DFS(linkedMatchValues[j],
                                          linkedMatchKeys,
+                                         linkedMatchKeysIdx,
                                          linkedMatchValues,
                                          linkedMatchValuesIdx,
                                          1,
@@ -595,6 +601,7 @@ void Taxonomer::remainConsecutiveMatches(const Match * matchList,
 depthScore Taxonomer::DFS(
     const Match *curMatch,
     const vector<const Match *> &linkedMatchesKeys,
+    size_t linkedMatchKeysIdx,
     const vector<const Match *> &linkedMatchesValues,
     const vector<size_t> &linkedMatchesIndices,
     size_t depth, size_t MIN_DEPTH,
@@ -620,7 +627,7 @@ depthScore Taxonomer::DFS(
     } else { // Not a leaf node
         size_t index = it - linkedMatchesKeys.begin();
         size_t startIdx = linkedMatchesIndices[index];
-        size_t endIdx = (index + 1 < linkedMatchesIndices.size()) ? linkedMatchesIndices[index + 1] : linkedMatchesValues.size();
+        size_t endIdx = (index + 1 < linkedMatchKeysIdx) ? linkedMatchesIndices[index + 1] : linkedMatchesValues.size();
 
         uint8_t lastEndHamming = (curMatch->rightEndHamming >> 14);
         if (lastEndHamming == 0) {
@@ -637,7 +644,7 @@ depthScore Taxonomer::DFS(
                                            returnDepthScore.hammingDist + hammingDist + lastEndHamming,
                                            returnDepthScore.endMatch);
             } else {
-                curDepthScore = DFS(nextMatch, linkedMatchesKeys, linkedMatchesValues, linkedMatchesIndices, depth, MIN_DEPTH, match2depthScore, score, hammingDist + lastEndHamming);
+                curDepthScore = DFS(nextMatch, linkedMatchesKeys, linkedMatchKeysIdx, linkedMatchesValues, linkedMatchesIndices, depth, MIN_DEPTH, match2depthScore, score, hammingDist + lastEndHamming);
             }
             if (curDepthScore.score > bestDepthScore.score && curDepthScore.depth > MIN_DEPTH) {
                 bestDepthScore = curDepthScore;
diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h
index 29f3f36c..5c84c8c1 100644
--- a/src/commons/Taxonomer.h
+++ b/src/commons/Taxonomer.h
@@ -107,6 +107,7 @@ class Taxonomer {
 
     depthScore DFS(const Match *curMatch,
                    const vector<const Match *> &linkedMatchesKeys,
+                   size_t linkedMatchKeysIdx,
                    const vector<const Match *> &linkedMatchesValues,
                    const vector<size_t> &linkedMatchesIndices,
                    size_t depth, size_t MIN_DEPTH,
diff --git a/src/commons/common.h b/src/commons/common.h
index 2961060d..16f6ff09 100644
--- a/src/commons/common.h
+++ b/src/commons/common.h
@@ -48,6 +48,7 @@ struct Query{
     int queryLength;
     int queryLength2;
     int kmerCnt;
+    int kmerCnt2;
     bool isClassified;
     bool newSpecies; // 36 byte
 
@@ -58,13 +59,13 @@ struct Query{
     bool operator==(int id) const { return queryId == id;}
 
     Query(int queryId, int classification, float score, float coverage, int hammingDist, int queryLength,
-          int queryLength2, int kmerCnt, bool isClassified, bool newSpecies, std::string name)
+          int queryLength2, int kmerCnt, int kmerCnt2, bool isClassified, bool newSpecies, std::string name)
             : queryId(queryId), classification(classification), score(score), coverage(coverage),
-              hammingDist(hammingDist), queryLength(queryLength), queryLength2(queryLength2), kmerCnt(kmerCnt),
+              hammingDist(hammingDist), queryLength(queryLength), queryLength2(queryLength2), kmerCnt(kmerCnt), kmerCnt2(kmerCnt2),
               isClassified(isClassified), newSpecies(newSpecies), name(std::move(name)) {}
 
     Query() : queryId(0), classification(0), score(0), coverage(0), hammingDist(0), queryLength(0),
-              queryLength2(0), kmerCnt(0), isClassified(false), newSpecies(false) {}
+              queryLength2(0), kmerCnt(0), kmerCnt2(0), isClassified(false), newSpecies(false) {}
 };
 
 template<typename T>
diff --git a/src/util/apply-threshold.cpp b/src/util/apply-threshold.cpp
index 4b3a2d49..f54c52c3 100644
--- a/src/util/apply-threshold.cpp
+++ b/src/util/apply-threshold.cpp
@@ -61,7 +61,7 @@ int applyThreshold(int argc, const char **argv, const Command &command) {
             // Low coverage or low score
             if (stof(columns[par.coverageCol]) < par.minCoverage || stof(columns[par.scoreCol]) < par.minScore) {
                 newResults.emplace_back(lineCnt, 0, stof(columns[par.scoreCol]), stof(columns[par.coverageCol]), stoi(columns[6]),
-                                        stoi(columns[3]),0, 0, false, false, columns[1]);
+                                        stoi(columns[3]),0, 0, 0, false, false, columns[1]);
                 //int queryId, int classification, float score, float coverage, int hammingDist, int queryLength,
                         //          int queryLength2, int kmerCnt, bool isClassified, bool newSpecies, std::string name
                 taxonCounts[0]++;
@@ -70,11 +70,11 @@ int applyThreshold(int argc, const char **argv, const Command &command) {
             else if (stof(columns[par.scoreCol]) < par.minSpScore && ncbiTaxonomy.taxonNode(ncbiTaxonomy.getTaxIdAtRank(stoi(columns[2]), "species"))->rankIdx == 4) {
                 TaxID parentTaxId = ncbiTaxonomy.taxonNode(ncbiTaxonomy.getTaxIdAtRank(stoi(columns[2]), "species"))->parentTaxId;
                 newResults.emplace_back(lineCnt, parentTaxId, stof(columns[4]), stof(columns[5]), stoi(columns[6]),
-                                        stoi(columns[3]),0, 0, true, false, columns[1]);
+                                        stoi(columns[3]),0, 0, 0, true, false, columns[1]);
                 taxonCounts[parentTaxId]++;
             } else {
                 newResults.emplace_back(lineCnt, stoi(columns[2]), stof(columns[4]), stof(columns[5]), stoi(columns[6]),
-                                        stoi(columns[3]),0, 0, stoi(columns[0]), false, columns[1]);
+                                        stoi(columns[3]),0, 0, 0, stoi(columns[0]), false, columns[1]);
                 taxonCounts[stoi(columns[2])]++;
             }
             lineCnt++;
diff --git a/src/util/binning2report.cpp b/src/util/binning2report.cpp
index 33920ed1..bf55ccb6 100644
--- a/src/util/binning2report.cpp
+++ b/src/util/binning2report.cpp
@@ -44,7 +44,7 @@ int binning2report(int argc, const char **argv, const Command &command){
                 columns.push_back(eachItem);
             }
             binnings.emplace_back(lineCnt, stoi(columns[par.taxidCol]), 0, 0, 0,
-                                  0,0, 0, stoi(columns[par.taxidCol]), false, columns[par.readIdCol]);
+                                  0, 0, 0, 0, stoi(columns[par.taxidCol]), false, columns[par.readIdCol]);
             taxonCounts[stoi(columns[par.taxidCol])]++;
             lineCnt++;
         }