Skip to content

Commit

Permalink
Allow testing only within a given chrom:start-end
Browse files Browse the repository at this point in the history
This limits both single variant and group test results to only include the range specified by the user

Implements #26
  • Loading branch information
welchr committed May 9, 2020
1 parent 4d970b4 commit 7d2994e
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 158 deletions.
258 changes: 140 additions & 118 deletions raremetal/src/Meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ Meta::Meta() {
RegionStatus = false;
Region = "";
cond = "";
Chr = "";
Start = -1;
End = -1;
region_chrom = "";
region_start = -1;
region_end = -1;
useExactMetaMethod = false; // use Jingjing's exact method
normPop = false; // correct for population stratification
simplifyCovLoad = false; // only load cov matrix between selected marker for group test
Expand Down Expand Up @@ -122,9 +122,9 @@ void Meta::Prepare() {
printf("Restrict analysis to region %s!\n", Region.c_str());
StringArray tf;
tf.AddTokens(Region, ":-");
Chr = tf[0];
Start = tf[1].AsInteger();
End = tf[2].AsInteger();
region_chrom = tf[0];
region_start = tf[1].AsInteger();
region_end = tf[2].AsInteger();
}

SampleSize.Dimension(scorefile.Length());
Expand Down Expand Up @@ -673,48 +673,66 @@ void Meta::PoolSummaryStat(GroupFromAnnotation &group)
buffer.ReadLine(file);
bool adjust;
tellRvOrRmw(buffer, adjust, marker_col, cov_col);
// if (!status)
// error("File %s is neither RMW or rvtest score file!\n", filename.c_str());
ifclose(file);
FormatAdjust.Push(adjust);

file = ifopen(filename, "r");
bool pass_header = 0;
while (!ifeof(file))
{
buffer.ReadLine(file);
if (buffer.Length() < 2)
{
continue;
}
if (!pass_header)
{
if (SampleSize[study] == -1)
{
if (buffer.Find("##AnalyzedSamples") != -1)
{
StringArray tokens;
tokens.AddTokens(buffer, "=");
SampleSize[study] = tokens[1].AsInteger();
continue;
}
}
if (buffer.FindChar('#') == -1 && buffer.Find("CHROM") == -1)
{
pass_header = 1;
}
}
if (!pass_header)
{
continue;
}
double current_chisq;
bool status = poolSingleRecord(study, current_chisq, duplicateSNP, adjust, buffer, covReader);
if (status)
{
chisq_study_i.Push(current_chisq);

while (!ifeof(file)) {
buffer.ReadLine(file);
if (buffer.Length() < 2) {
continue;
}

if (SampleSize[study] == -1) {
if (buffer.Find("##AnalyzedSamples") != -1) {
StringArray tokens;
tokens.AddTokens(buffer, "=");
SampleSize[study] = tokens[1].AsInteger();
continue;
}
}
if (buffer.FindChar('#') == -1 && buffer.Find("CHROM") == -1) {
break;
}
}

// If the user requested a specific region, then use tabix to seek the file forward to the line with
// the starting position of the region.
if (RegionStatus) {
Tabix tabix;
String score_index = filename + ".tbi";
StatGenStatus::Status libstatus = tabix.readIndex(score_index.c_str());

if (libstatus != StatGenStatus::SUCCESS) {
error("Cannot open tabix index for score statistic file %s\n", score_index.c_str());
}

bool status = SetIfilePosition(file, tabix, region_chrom, region_start);
if (!status) {
error("Cannot find position %s:%d-%d in score statistic file %s!\n", region_chrom.c_str(), region_start, region_end, filename.c_str());
}
}

bool region_done;
bool status;
while (!ifeof(file)) {
buffer.ReadLine(file);

double current_chisq;
region_done = false;
status = false;
poolSingleRecord(study, current_chisq, duplicateSNP, adjust, buffer, covReader, status, region_done);

if (region_done) {
break;
}

if (status) {
chisq_study_i.Push(current_chisq);
}
}

ifclose(file);

total_N += SampleSize[study];
Expand Down Expand Up @@ -1956,22 +1974,31 @@ int Meta::MatchOneAllele(String ref_current, int &idx)

// pool single record from score file
// if u2/v2 is generated, return true
bool Meta::poolSingleRecord(int study, double &current_chisq, int &duplicateSNP, bool adjust, String &buffer,
SummaryFileReader &covReader)
void Meta::poolSingleRecord(int study, double &current_chisq, int &duplicateSNP, bool adjust, String &buffer,
SummaryFileReader &covReader, bool &status, bool &region_done)
{
StringArray tokens;
tokens.AddTokens(buffer, SEPARATORS);
if (tokens[0].Find("#") != -1)
{
return 0;
return;
}

if (tokens[0].Find("chr") != -1)
{
tokens[0] = tokens[0].SubStr(3);
}

String chr_pos = tokens[0] + ":" + tokens[1];
String chrom = tokens[0];
int pos = atoi(tokens[1].c_str());

if (RegionStatus && (pos > region_end || chrom != region_chrom)) {
status = false;
region_done = true;
return;
}

String chr_pos = chrom + ":" + tokens[1];
std::string std_chrpos(chr_pos.c_str());
int direction_idx = directionByChrPos.Integer(chr_pos);

Expand All @@ -1982,7 +2009,7 @@ bool Meta::poolSingleRecord(int study, double &current_chisq, int &duplicateSNP,
duplicateSNP++;
fprintf(log, "Warning: variant %s from study %s is skipped because of duplicate records in the same study.\n",
chr_pos.c_str(), scorefile[study].c_str());
return 0;
return;
}

//POOLING STEP1: if fail HWE or CALLRATE then skip this record
Expand Down Expand Up @@ -2073,7 +2100,7 @@ bool Meta::poolSingleRecord(int study, double &current_chisq, int &duplicateSNP,
UpdateExcludedMarker(study, chr_pos, filter_type,
tokens[0] + ":" + tokens[1] + ":" + tokens[2] + ":" + tokens[3]);
fprintf(log,"Warning: variant %s from study %s is skipped because of call rate, HWE, or missing alleles.\n",chr_pos.c_str(),scorefile[study].c_str());
return 0;
return;
}

//STEP2: check if this position has been hashed. If yes, match alleles; if not, hash position and ref alt alleles.
Expand Down Expand Up @@ -2157,7 +2184,7 @@ bool Meta::poolSingleRecord(int study, double &current_chisq, int &duplicateSNP,
char direct = '!';
UpdateDirection(direction_idx, study, direct, chr_pos, true);
fprintf(log,"Warning: variant %s from study %s is skipped because alleles did not match previous study.\n",chr_pos.c_str(),scorefile[study].c_str());
return 0;
return;
}
if (match == 1)
{
Expand All @@ -2184,7 +2211,7 @@ bool Meta::poolSingleRecord(int study, double &current_chisq, int &duplicateSNP,
char direct = '!';
UpdateDirection(direction_idx, study, direct, chr_pos, true);
fprintf(log,"Warning: variant %s from study %s is skipped because alleles did not match previous study.\n",chr_pos.c_str(),scorefile[study].c_str());
return 0;
return;
}
UpdateStrIntHash(chr_pos, current_N, recSize);
if (match == 1)
Expand Down Expand Up @@ -2259,7 +2286,7 @@ bool Meta::poolSingleRecord(int study, double &current_chisq, int &duplicateSNP,
if (c1 + c2 == 0 || c2 + c3 == 0)
{
fprintf(log,"Warning: variant %s from study %s is skipped because variant is monomorphic.\n",chr_pos.c_str(),scorefile[study].c_str());
return 0;
return;
}

//push the chisq statistics for GC calculation
Expand All @@ -2278,7 +2305,8 @@ bool Meta::poolSingleRecord(int study, double &current_chisq, int &duplicateSNP,
updateSNPcond(study, flip, adjust, chr_pos, tokens, covReader);
}
}
return 1;

status = true;
}

// if dup marker, return true
Expand Down Expand Up @@ -3108,81 +3136,75 @@ void Meta::loadSingleCovInGroup(GroupFromAnnotation &group)
String filename = covfile[study];
IFILE covfile_;
covfile_ = ifopen(filename, "r");
if (covfile_ == NULL)
{
error("ERROR! Cannot open file: %s! Input cov file has to be bgzipped and tabix indexed using the following command:\n bgzip yourfile.singlevar.cov.txt; tabix -c \"#\" -s 1 -b 2 -e 2 yourprefix.singlevar.cov.txt.gz\n",
filename.c_str());
if (covfile_ == NULL) {
error("ERROR! Cannot open file: %s! Input cov file has to be bgzipped and tabix indexed using the following command:\n bgzip yourfile.singlevar.cov.txt; tabix -c \"#\" -s 1 -b 2 -e 2 yourprefix.singlevar.cov.txt.gz\n",filename.c_str());
}

bool pass_header = false;
bool newFormat = false;
while (!ifeof(covfile_)) {
String buffer;
buffer.ReadLine(covfile_);
if (!pass_header) {
if (buffer.Find("CHROM") == -1) {
continue;
}

// now check new or old format
StringArray tokens;
tokens.AddTokens(buffer, "\t ");
if (tokens[2] == "MARKERS_IN_WINDOW" && tokens[3] == "COV_MATRICES") {
newFormat = false;
}
else if (tokens[2] == "EXP" && tokens[3] == "COV_MATRICES") {
newFormat = true;
}
else if (tokens[3] == "NUM_MARKER" && tokens[4] == "MARKER_POS" && tokens[5] == "COV") {
newFormat = false;
}
else {
error("Covariance matrix is neither new or old format...are you using the right file?\n");
}

pass_header = true;
}
}

Tabix covtabix;
String tabix_name = filename + ".tbi";
StatGenStatus::Status libstatus = covtabix.readIndex(tabix_name.c_str());
if (RegionStatus)
{
if (libstatus != StatGenStatus::SUCCESS)
{
error("Cannot open tabix file %s!\n", tabix_name.c_str());
}
bool status = SetIfilePosition(covfile_, covtabix, Chr, Start);
if (!status)
{
error("Cannot find position %s:%d-%d in cov file %s!\n", Chr.c_str(), Start, End, filename.c_str());
}
if (RegionStatus) {
if (libstatus != StatGenStatus::SUCCESS) {
error("Cannot open tabix file %s!\n", tabix_name.c_str());
}
bool status = SetIfilePosition(covfile_, covtabix, region_chrom, region_start);
if (!status) {
error("Cannot find position %s:%d-%d in cov file %s!\n", region_chrom.c_str(), region_start, region_end, filename.c_str());
}
}

int m = 0;
bool pass_header = 0;
bool newFormat = 0;
while (!ifeof(covfile_))
{
String buffer;
buffer.ReadLine(covfile_);
if (!pass_header)
{
if (buffer.Find("CHROM") == -1)
{
continue;
}
// now check new or old format
StringArray tokens;
tokens.AddTokens(buffer, "\t ");
if (tokens[2] == "MARKERS_IN_WINDOW" && tokens[3] == "COV_MATRICES")
{
newFormat = 0;
} else if (tokens[2] == "EXP" && tokens[3] == "COV_MATRICES")
{
newFormat = 1;
} else if (tokens[3] == "NUM_MARKER" && tokens[4] == "MARKER_POS" && tokens[5] == "COV")
{
newFormat = 0;
} else
{
error("Covariance matrix is neither new or old format...are you using the right file?\n");
}
pass_header = 1;
continue;
while (!ifeof(covfile_)) {
String buffer;
buffer.ReadLine(covfile_);

StringArray tokens;
tokens.AddTokens(buffer, "\t ");
if (RegionStatus) {
if (tokens[1].AsInteger() > region_end || tokens[0] != region_chrom) { // out of this region or into another chromosome
break;
}
StringArray tokens;
tokens.AddTokens(buffer, "\t ");
if (RegionStatus)
{
if (tokens[1].AsInteger() > End || tokens[0] != Chr)
{ // out of this region or into another chromosome
break;
}
}
if (FormatAdjust[study])
{ // rvtest
readCovOldFormatLine(study, tokens, m);
} else
{
if (newFormat)
{
readCovNewFormatLine(study, tokens, m);
} else
{
readCovOldFormatLine(study, tokens, m);
}
}

if (FormatAdjust[study]) { // rvtest
readCovOldFormatLine(study, tokens, m);
} else {
if (newFormat) {
readCovNewFormatLine(study, tokens, m);
} else {
readCovOldFormatLine(study, tokens, m);
}
}
}
ifclose(covfile_);
printf("done\n");
Expand Down
12 changes: 6 additions & 6 deletions raremetal/src/Meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ class Meta
int marker_col;
int cov_col;
bool altMAF; // if TRUE, exclude size of studies that do not contain that variant
bool RegionStatus; // if TRUE, restrict gene-based test to the specified region
bool RegionStatus; // if TRUE, restrict single-variant and gene-based test to the specified region
String Region; // raw region option
String Chr;
int Start;
int End; // 3 variables to define region
String region_chrom;
int region_start;
int region_end; // 3 variables to define region
FILE *log;
bool skipOutput;

Expand Down Expand Up @@ -229,8 +229,8 @@ class Meta

int MatchOneAllele(String refalt_current, int &marker_idx);

bool poolSingleRecord(int study, double &current_chisq, int &duplicateSNP, bool adjust, String &buffer,
SummaryFileReader &covReader);
void poolSingleRecord(int study, double &current_chisq, int &duplicateSNP, bool adjust, String &buffer,
SummaryFileReader &covReader, bool &status, bool &region_done);

void poolHeterogeneity(int study, bool adjust, String &buffer, SummaryFileReader &covReader);

Expand Down
Loading

0 comments on commit 7d2994e

Please sign in to comment.