Skip to content

Commit

Permalink
Update unit tests and remove unused code
Browse files Browse the repository at this point in the history
  • Loading branch information
jonperdomo committed Oct 25, 2024
1 parent c061b0a commit 866fa6a
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 114 deletions.
8 changes: 0 additions & 8 deletions __main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,6 @@ def main():
required=False,
help=argparse.SUPPRESS
)

# Chromosome mean coverage values passed in as a comma-separated list (e.g. chr1:100,chr2:200,chr3:300)
parser.add_argument(
"--chr-cov",
required=False,
help=argparse.SUPPRESS
)

# ----------------------------------------------------------------------- #

Expand Down Expand Up @@ -229,7 +222,6 @@ def main():
input_data.setThreadCount(args.threads)
input_data.setChromosome(args.chr)
input_data.setRegion(args.region)
input_data.setMeanChromosomeCoverage(args.chr_cov)
input_data.setAlleleFreqFilepaths(args.pfb)
input_data.setHMMFilepath(args.hmm)
input_data.setOutputDir(args.output)
Expand Down
5 changes: 0 additions & 5 deletions include/input_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,6 @@ class InputData {
std::pair<int32_t, int32_t> getRegion();
bool isRegionSet();

// Set entire-chromosome mean coverage values to speed up the log2 ratio calculations.
void setMeanChromosomeCoverage(std::string chr_cov);
double getMeanChromosomeCoverage(std::string chr);

// Set the output directory where the results will be written.
void setOutputDir(std::string dirpath);
std::string getOutputDir();
Expand Down Expand Up @@ -122,7 +118,6 @@ class InputData {
std::string chr; // Chromosome to analyze
std::pair<int32_t, int32_t> start_end; // Region to analyze
bool region_set; // True if a region is set
std::map<std::string, double> chr_cov; // Map of pre-calculated mean coverage values for each chromosome
int thread_count;
std::string hmm_filepath;
std::string cnv_filepath;
Expand Down
13 changes: 0 additions & 13 deletions src/cnv_caller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -581,19 +581,6 @@ void CNVCaller::loadChromosomeData(std::string chr)
printMessage("Calculating mean chromosome coverage for " + chr + "...");
mean_chr_cov = calculateMeanChromosomeCoverage(chr);
printMessage("Mean chromosome coverage for " + chr + ": " + std::to_string(mean_chr_cov));
// double mean_chr_cov = 0;
// try
// {
// mean_chr_cov = this->input_data->getMeanChromosomeCoverage(chr);
// printMessage("User-provided mean chromosome coverage for " + chr + ": " + std::to_string(mean_chr_cov));
// }
// catch(const std::out_of_range& e)
// {
// // No user-provided mean chromosome coverage
// printMessage("Calculating mean chromosome coverage for " + chr + "...");
// mean_chr_cov = calculateMeanChromosomeCoverage(chr);
// printMessage("Mean chromosome coverage for " + chr + ": " + std::to_string(mean_chr_cov));
// }
this->mean_chr_cov = mean_chr_cov;

// Read the SNP positions and B-allele frequency values from the VCF file
Expand Down
59 changes: 0 additions & 59 deletions src/input_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,65 +212,6 @@ bool InputData::isRegionSet()
return this->region_set;
}

void InputData::setMeanChromosomeCoverage(std::string chr_cov)
{
// Update the chromosome coverage map if the string is not empty
if (chr_cov != "")
{
// Split the string by commas
std::istringstream ss(chr_cov);
std::string token;
std::vector<std::string> chr_cov_pairs;

while (std::getline(ss, token, ','))
{
chr_cov_pairs.push_back(token);
}

// Iterate over the pairs
for (auto const &pair : chr_cov_pairs)
{
// Split the pair by colon
std::istringstream ss(pair);
std::string token;
std::vector<std::string> chr_cov;

while (std::getline(ss, token, ':'))
{
chr_cov.push_back(token);
}

// Check if the pair is valid
if (chr_cov.size() == 2)
{
// Get the chromosome and coverage
std::string chr = chr_cov[0];
double cov = std::stod(chr_cov[1]);

// Add the pair to the map
this->chr_cov[chr] = cov;

// Print the pair
std::cout << "Set mean coverage for " << chr << " to " << cov << std::endl;
}
}
}
}

double InputData::getMeanChromosomeCoverage(std::string chr)
{
// Using find to check if the key exists
auto it = chr_cov.find(chr);

// If key is not found, throw an error
if (it == chr_cov.end()) {
throw std::out_of_range("Key not found in the map.");
}

// Key exists, return the corresponding double value
return it->second;
}

void InputData::setAlleleFreqFilepaths(std::string filepath)
{
// this->pfb_filepath = filepath;
Expand Down
61 changes: 32 additions & 29 deletions tests/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,18 @@
TEST_SNPS_FILE = os.path.join(test_dir_path, 'snps_21.vcf.gz')


# Test the main function.
def test_run():
"""Test the run function of the contextsv module with a small dataset."""

# Set input parameters.
input_data = contextsv.InputData()
input_data.setShortReadBam(TEST_BAM_FILE)
input_data.setLongReadBam(TEST_BAM_FILE)
input_data.setRefGenome(TEST_REF_FILE)
input_data.setSNPFilepath(TEST_SNPS_FILE)
# input_data.setRegion("21:14486099-14515105")
input_data.setChromosome("21")
input_data.setRegion("14486099-14515105")
input_data.setThreadCount(1)
input_data.setMeanChromosomeCoverage("21:80.6292")
input_data.setAlleleFreqFilepaths("")
input_data.setHMMFilepath("")
input_data.setOutputDir(TEST_OUTDIR)
Expand All @@ -57,29 +55,34 @@ def test_run():
# Run the analysis.
contextsv.run(input_data)

# ========================================================================

# TODO: Create new tests after testing and updating the CNV output format
# # Check that the output file exists.
# output_file = os.path.join(TEST_OUTDIR, 'cnv_data.tsv')
# assert os.path.exists(output_file)

# # Check that the output file is not empty.
# assert os.path.getsize(output_file) > 0

# # Check that the output file has the correct number of lines.
# with open(output_file, 'r', encoding='utf-8') as f:
# assert len(f.readlines()) == 55

# # Check that the output file has the correct header.
# with open(output_file, 'r', encoding='utf-8') as f:
# assert f.readline().strip() == "chromosome\tposition\tsnp\tb_allele_freq\tlog2_ratio\tcnv_state\tpopulation_freq"

# # Check that the output file has the correct SNP values in the last line
# with open(output_file, 'r', encoding='utf-8') as f:
# last_line = f.readlines()[-1].strip('\n')
# print("The last line of the output file is: ")
# print(last_line)
# actual_line="21\t14508888\t0\t0.5\t0.0522005\t6\t0.5"
# print(actual_line)
# assert last_line == actual_line
# Check that the output file exists.
output_file = os.path.join(TEST_OUTDIR, 'output.vcf')
assert os.path.exists(output_file)

# Check that the VCF file is not empty.
assert os.path.getsize(output_file) > 0

# Check that the VCF file has the correct number of lines.
with open(output_file, 'r', encoding='utf-8') as f:
assert len(f.readlines()) == 21

# Check that the VCF file has the correct header, and the correct
# VCF CHROM, POS, and INFO fields in the next 2 lines.
header_line = 18
with open(output_file, 'r', encoding='utf-8') as f:
for i, line in enumerate(f):
if i == header_line:
assert line.strip() == "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE"
elif i == header_line + 1:
# Get the first, second, and eighth fields from the line
fields = line.strip().split('\t')
assert fields[0] == "21"
assert fields[1] == "14458394"
assert fields[7] == "END=14458394;SVTYPE=INS;SVLEN=1341;SUPPORT=1;SVMETHOD=CONTEXTSVv0.1;ALN=CIGARINS,;CLIPSUP=0;REPTYPE=NA;HMM=0.000000"
elif i == header_line + 2:
fields = line.strip().split('\t')
assert fields[0] == "21"
assert fields[1] == "14458394"
assert fields[7] == "END=14458394;SVTYPE=INS;SVLEN=1344;SUPPORT=1;SVMETHOD=CONTEXTSVv0.1;ALN=CIGARINS,;CLIPSUP=0;REPTYPE=NA;HMM=0.000000"
break

0 comments on commit 866fa6a

Please sign in to comment.