Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DO NOT MERGE UNTIL TESTED Feature/add threadpools #27

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 23 additions & 21 deletions c/bam2bedgraph.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,15 @@
#include <getopt.h>
#include <stdlib.h>
#include <string.h>
#include "htslib/thread_pool.h"
#include "bam_access.h"
#include "utils.h"

static char *input_file = NULL;
static char *output_file = NULL;
static char *region = NULL;
static int filter = 0;
int nthreads = 0; // shared pool
uint8_t is_overlap = 0;

KHASH_MAP_INIT_STR(strh,uint8_t)
Expand All @@ -49,19 +51,20 @@ void print_usage (int exit_code){

printf ("Usage: bam2bedgraph -i input.[cr|b]am -o file [-r region] [-h] [-v]\n\n");
printf ("Create a BEDGraph file of genomic coverage. BAM file must be sorted.\n");
printf ("-i --input Path to bam/cram input file. [default: stdin]\n");
printf ("-o --output File path for output. [default: stdout]\n\n");
printf ("-i --input Path to bam/cram input file. [default: stdin]\n");
printf ("-o --output File path for output. [default: stdout]\n\n");
printf ("Optional:\n");
printf ("-r --region Region in bam to access.\n");
printf ("-f --filter Ignore reads with the filter flags [int].\n");
printf ("-a --overlap Use overlapping read check.\n\n");
printf ("-r --region Region in bam to access.\n");
printf ("-f --filter [int] Ignore reads with the filter flags.\n");
printf ("-@ --num_threads [int] Use thread pool with specified number of threads.\n");
printf ("-a --overlap Use overlapping read check.\n\n");
printf ("Other:\n");
printf ("-h --help Display this usage information.\n");
printf ("-v --version Prints the version number.\n\n");
printf ("-h --help Display this usage information.\n");
printf ("-v --version Prints the version number.\n\n");
exit(exit_code);
}

void options(int argc, char *argv[]){
int options(int argc, char *argv[]){
const struct option long_opts[] =
{
{"version", no_argument, 0, 'v'},
Expand All @@ -71,6 +74,7 @@ void options(int argc, char *argv[]){
{"filter",required_argument,0,'f'},
{"output",required_argument,0,'o'},
{"overlap", no_argument, 0, 'a'},
{"num_threads",required_argument,0,'@'},
{"rna",no_argument,0, 'a'},
{ NULL, 0, NULL, 0}

Expand All @@ -80,7 +84,7 @@ void options(int argc, char *argv[]){
int iarg = 0;

//Iterate through options
while((iarg = getopt_long(argc, argv, "i:o:r:f:avh", long_opts, &index)) != -1){
while((iarg = getopt_long(argc, argv, "i:o:r:f:@:avh", long_opts, &index)) != -1){
switch(iarg){
case 'i':
input_file = optarg;
Expand All @@ -95,10 +99,7 @@ void options(int argc, char *argv[]){
break;

case 'f':
if(sscanf(optarg, "%i", &filter) != 1){
printf("Error parsing -f argument '%s'. Should be an integer > 0",optarg);
print_usage(1);
}
check(sscanf(optarg, "%i", &filter) == 1, "Error parsing -f argument '%s'. Should be an integer > 0",optarg);
break;

case 'a':
Expand Down Expand Up @@ -129,16 +130,16 @@ void options(int argc, char *argv[]){
input_file = "-"; // htslib recognises this as a special case
}
if (strcmp(input_file,"-") != 0) {
if(check_exist(input_file) != 1){
printf("Input file (-i) %s does not exist.\n",input_file);
print_usage(1);
}
check(check_exist(input_file) == 1, "Input file (-i) %s does not exist.\n",input_file);
}
if (output_file==NULL || strcmp(output_file,"/dev/stdout")==0) {
output_file = "-"; // we recognise this as a special case
}

return;
return 0;
error:
print_usage(1);
return -1;
}

// callback for bam_plbuf_init()
Expand Down Expand Up @@ -203,7 +204,8 @@ static int pileup_func_overlap(uint32_t tid, uint32_t position, int n, const bam
}

int main(int argc, char *argv[]){
options(argc, argv);
int chk_opt = options(argc, argv);
check(chk_opt==0,"Error parsing options.");
tmpstruct_t tmp;
FILE *out = NULL;
if (strcmp(output_file,"-")==0) {
Expand All @@ -221,10 +223,10 @@ int main(int argc, char *argv[]){
func_reg = &pileup_func_overlap;
}
if(region == NULL){
check = process_bam_file(input_file, func, &tmp, filter, NULL);
check = process_bam_file(input_file, func, &tmp, filter, nthreads,NULL);
check(check==1,"Error parsing bam file.");
}else{
check = process_bam_region(input_file, func_reg, &tmp, filter, region, NULL);
check = process_bam_region(input_file, func_reg, &tmp, filter, region, nthreads, NULL);
check(check==1,"Error parsing bam region.");
}
fprintf(out,"%s\t%d\t%d\t%d\n", tmp.head->target_name[tmp.ltid], tmp.lstart,tmp.lpos+1, tmp.lcoverage);
Expand Down
50 changes: 23 additions & 27 deletions c/bam2bw.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ int filter = 4;
char base = 0;
uint8_t is_overlap = 0;
int include_zeroes = 0;
int nthreads = 0; // shared pool
uint32_t single = 1;
char *last_contig = "";

Expand All @@ -67,6 +68,7 @@ void print_usage (int exit_code){
printf("-c --region [file] A samtools style region (contig:start-stop) or a bed file of regions over which to produce the bigwig file\n");
printf("-z --include-zeroes Include zero coverage regions as additional entries to the bw file\n");
printf("-r --reference [file] Path to reference genome.fa file (required for cram if ref_path cannot be resolved)\n");
printf("-@ --num_threads [int] Use thread pool with specified number of threads.\n");
printf("-a --overlap Use overlapping read check\n\n");
printf ("Other:\n");
printf("-h --help Display this usage information.\n");
Expand All @@ -93,7 +95,7 @@ int get_int_length(int input){
return (input == 0 ? 1 : (int)(log10(input)+1));
}

void setup_options(int argc, char *argv[]){
int setup_options(int argc, char *argv[]){
const struct option long_opts[] =
{
{"input", required_argument, 0, 'i'},
Expand All @@ -103,6 +105,7 @@ void setup_options(int argc, char *argv[]){
{"reference",required_argument, 0, 'r'},
{"include-zeroes",no_argument, 0, 'z'},
{"overlap", no_argument, 0, 'a'},
{"num_threads",required_argument,0,'@'},
{"help", no_argument, 0, 'h'},
{"version", no_argument, 0, 'v'},
{ NULL, 0, NULL, 0}
Expand All @@ -113,24 +116,21 @@ void setup_options(int argc, char *argv[]){
int iarg = 0;

//Iterate through options
while((iarg = getopt_long(argc, argv, "F:i:o:c:r:azhv",long_opts, &index)) != -1){
while((iarg = getopt_long(argc, argv, "F:i:o:c:r:@:azhv",long_opts, &index)) != -1){
switch(iarg){
case 'F':
if(sscanf(optarg, "%i", &filter) != 1){
fprintf(stderr,"Error parsing -F|--filter argument '%s'. Should be an integer > 0",optarg);
print_usage(1);
}
check(sscanf(optarg, "%i", &filter)==1,"Error parsing -F|--filter argument '%s'. Should be an integer > 0",optarg);
break;
case 'i':
input_file = optarg;
if(check_exist(input_file) != 1){
fprintf(stderr,"Input bam file %s does not appear to exist.\n",input_file);
print_usage(1);
}
check(check_exist(input_file)==1, "Input bam file %s does not appear to exist.\n",input_file);
break;
case 'o':
out_file = optarg;
break;
case '@':
check(sscanf(optarg, "%i", &nthreads)==1, "Error parsing -@ argument '%s'. Should be an integer > 0", optarg);
break;
case 'h':
print_usage (0);
break;
Expand All @@ -141,10 +141,7 @@ void setup_options(int argc, char *argv[]){
region_store = optarg;
//First check for a region format
int res = check_region_string(region_store);
if(res<0){
fprintf(stderr,"Region %s is not in correct format or not an existing bed file.\n",region_store);
print_usage(1);
}
check(res>=0,"Region %s is not in correct format or not an existing bed file.\n",region_store);
break;
case 'r':
reference = optarg;
Expand All @@ -164,17 +161,16 @@ void setup_options(int argc, char *argv[]){

}//End of iteration through options

if(input_file==NULL){
fprintf(stderr,"Required option -i|--input-bam not defined.\n");
print_usage(1);
}
check(input_file!=NULL, "Required option -i|--input-bam not defined.\n");

if(is_base && region_store==NULL){
fprintf(stderr,"Option -r|--region must be used with the -b|--base option.\n");
print_usage(1);
}
if(is_base){
check(region_store!=NULL, "Option -r|--region must be used with the -b|--base option.\n");
}

return;
return 0;
error:
print_usage(1);
return -1;
}

// callback for bam_plbuf_init()
Expand Down Expand Up @@ -301,7 +297,8 @@ uint32_t getContigLength(char *contig,chromList_t *chromList){
}

int main(int argc, char *argv[]){
setup_options(argc, argv);
int check_opt = setup_options(argc, argv);
check(check_opt==0,"Error parsing options.");
tmpstruct_t tmp;
int no_of_regions = 0;

Expand Down Expand Up @@ -350,8 +347,7 @@ int main(int argc, char *argv[]){
char *contig = malloc(sizeof(char)*1024);
check_mem(contig);
int beg,end;
int chk = sscanf(region_store,"%[^:]:%d-%d",contig,&beg,&end);
check(chk==3,"Error reading line '%s' from regions bed file.",region_store);
check(sscanf(region_store,"%[^:]:%d-%d",contig,&beg,&end)==3,"Error reading line '%s' from regions bed file.",region_store);

//Attempt to add contig as key to hash
int absent;
Expand Down Expand Up @@ -435,7 +431,7 @@ int main(int argc, char *argv[]){
}
int i=0;
for(i=0;i<no_of_regions;i++){
chck = process_bam_region(input_file, func_reg, &tmp, filter, our_region_list[i], reference);
chck = process_bam_region(input_file, func_reg, &tmp, filter, our_region_list[i], nthreads, reference);
check(chck==1,"Error parsing bam region.");
start = tmp.lstart;
stop = tmp.lpos+1;
Expand Down
42 changes: 21 additions & 21 deletions c/bam2bwbases.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ uint8_t is_base = 0;
uint8_t is_overlap = 0;
int filter = 4;
char base = 0;
int nthreads = 0; // shared pool
uint32_t single = 1;

void print_usage (int exit_code){
Expand All @@ -64,6 +65,7 @@ void print_usage (int exit_code){
printf("Optional: \n");
printf("-c --region [file] A samtools style region (contig:start-stop) or a bed file of regions over which to produce the bigwig file\n");
printf("-r --reference [file] Path to reference genome.fa file (required for cram if ref_path cannot be resolved)\n");
printf("-@ --num_threads [int] Use thread pool with specified number of threads.\n");
printf("-a --overlap Use overlapping read check\n\n");
printf ("Other:\n");
printf("-h --help Display this usage information.\n");
Expand All @@ -90,7 +92,7 @@ int get_int_length(int input){
return (input == 0 ? 1 : (int)(log10(input)+1));
}

void setup_options(int argc, char *argv[]){
int setup_options(int argc, char *argv[]){
const struct option long_opts[] =
{
{"input", required_argument, 0, 'i'},
Expand All @@ -99,6 +101,7 @@ void setup_options(int argc, char *argv[]){
{"region",required_argument, 0, 'c'},
{"reference",required_argument, 0, 'r'},
{"overlap", no_argument, 0, 'a'},
{"num_threads",required_argument,0,'@'},
{"help", no_argument, 0, 'h'},
{"version", no_argument, 0, 'v'},
{ NULL, 0, NULL, 0}
Expand All @@ -109,24 +112,21 @@ void setup_options(int argc, char *argv[]){
int iarg = 0;

//Iterate through options
while((iarg = getopt_long(argc, argv, "F:i:o:c:r:ahv",long_opts, &index)) != -1){
while((iarg = getopt_long(argc, argv, "F:i:o:c:r:@:ahv",long_opts, &index)) != -1){
switch(iarg){
case 'F':
if(sscanf(optarg, "%i", &filter) != 1){
fprintf(stderr,"Error parsing -F|--filter argument '%s'. Should be an integer > 0",optarg);
print_usage(1);
}
check(sscanf(optarg, "%i", &filter)==1, "Error parsing -F|--filter argument '%s'. Should be an integer > 0",optarg);
break;
case 'i':
input_file = optarg;
if(check_exist(input_file) != 1){
fprintf(stderr,"Input bam file %s does not appear to exist.\n",input_file);
print_usage(1);
}
check(check_exist(input_file)==1, "Input bam file %s does not appear to exist.\n",input_file);
break;
case 'o':
out_file = optarg;
break;
case '@':
check(sscanf(optarg, "%i", &nthreads)==1, "Error parsing -@ argument '%s'. Should be an integer > 0", optarg);
break;
case 'h':
print_usage (0);
break;
Expand All @@ -137,10 +137,7 @@ void setup_options(int argc, char *argv[]){
region_store = optarg;
//First check for a region format
int res = check_region_string(region_store);
if(res<0){
fprintf(stderr,"Region %s is not in correct format or not an existing bed file.\n",region_store);
print_usage(1);
}
check(res>=0, "Region %s is not in correct format or not an existing bed file.\n",region_store);
break;
case 'a':
is_overlap = 1;
Expand All @@ -157,15 +154,17 @@ void setup_options(int argc, char *argv[]){

}//End of iteration through options

if(input_file==NULL){
fprintf(stderr,"Required option -i|--input-bam not defined.\n");
print_usage(1);
}
check(input_file!=NULL, "Required option -i|--input-bam not defined.\n");

if(out_file == NULL){
out_file = "output.bam.bw";
}
return;

return 0;

error:
print_usage (1);
return -1;
}

// callback for bam_plbuf_init()
Expand Down Expand Up @@ -296,7 +295,8 @@ bigWigFile_t *initialise_bw_output(char *out_file, chromList_t *chromList){
}

int main(int argc, char *argv[]){
setup_options(argc, argv);
int chk_opt = setup_options(argc, argv);
check(chk_opt==0,"Error parsing options");
tmpstruct_t *perbase = NULL;
int no_of_regions = 0;
int sq_lines = get_no_of_SQ_lines(input_file);
Expand Down Expand Up @@ -435,7 +435,7 @@ int main(int argc, char *argv[]){
perbase[k].reg_start = sta;
perbase[k].reg_stop = sto;
}
chck = process_bam_region_bases(input_file, func_reg, perbase, filter, our_region_list[i], reference);
chck = process_bam_region_bases(input_file, func_reg, perbase, filter, our_region_list[i], nthreads, reference);
check(chck==1,"Error processing bam region.");
int b=0;
for(b=0;b<4;b++){
Expand Down
Loading