diff --git a/sumstats/load.py b/sumstats/load.py index 21c7063..df9e3f8 100644 --- a/sumstats/load.py +++ b/sumstats/load.py @@ -186,7 +186,6 @@ def load_study_info(self): """ sql = sq.sqlClient(self.sqldb) identifier = self.study + "+" + self.qtl_group + "+" + self.quant_method - print(self.trait_file) trait_file_id = os.path.basename(self.trait_file) data = [self.study, identifier, self.qtl_group, self.tissue, trait_file_id, self.tissue_ont, self.treatment, self.treatment_ont, self.quant_method ] sql.cur.execute("insert or ignore into study_info values (?,?,?,?,?,?,?,?,?)", data) diff --git a/sumstats/utils/argument_utils.py b/sumstats/utils/argument_utils.py index 7b824e2..12e4c8b 100644 --- a/sumstats/utils/argument_utils.py +++ b/sumstats/utils/argument_utils.py @@ -37,8 +37,6 @@ def convert_search_args(args): paginate = args.paginate chromosome = args.chr - if chromosome is not None: - chromosome = int(chromosome) pval_interval = args.pval pval_interval = FloatInterval().set_string_tuple(pval_interval) diff --git a/sumstats/utils/sqlite_client.py b/sumstats/utils/sqlite_client.py index 596dc6b..2e3e706 100644 --- a/sumstats/utils/sqlite_client.py +++ b/sumstats/utils/sqlite_client.py @@ -86,19 +86,19 @@ def get_study_context_meta(self, identifier): "condition_label": None } - #self.cur.execute("SELECT * FROM study_info where identifier =?", (identifier,)) + self.cur.execute("SELECT * FROM study_info where identifier =?", (identifier,)) - self.cur.execute(""" - SELECT s.study, s.identifier, q.qtl_group, q.cell_type, s.trait_file, q.ontology_term, q.condition, q.condition_label - FROM qtl_context_mapping AS q - JOIN study_info AS s - ON q.study = s.study AND q.qtl_group = s.qtl_group - WHERE s.identifier =? - """, (identifier,)) + #self.cur.execute(""" + # SELECT s.study, s.identifier, q.qtl_group, q.cell_type, s.trait_file, q.ontology_term, q.condition, q.condition_label + # FROM qtl_context_mapping AS q + # JOIN study_info AS s + # ON q.study = s.study AND q.qtl_group = s.qtl_group + # WHERE s.identifier =? + # """, (identifier,)) data = self.cur.fetchone() if data: - data_dict["study"], data_dict["identifier"], data_dict["qtl_group"], data_dict["tissue_label"], data_dict["phen"], data_dict["tissue_ont"], data_dict["condition"], data_dict["condition_label"] = data + data_dict["study"], data_dict["identifier"], data_dict["qtl_group"], data_dict["tissue_label"], data_dict["phen"], data_dict["tissue_ont"], data_dict["condition"], _ , data_dict["quant_method"], data_dict["condition_label"] = data return data_dict def get_traits(self): @@ -305,7 +305,8 @@ def commit(self): self.cur.execute("COMMIT") def drop_rsid_index(self): - self.cur.execute("DROP INDEX rsid_idx") + self.cur.execute("DROP INDEX IF EXISTS rsid_idx") + def create_rsid_index(self): self.cur.execute("CREATE INDEX rsid_idx on snp (rsid)") diff --git a/sumstats/utils/vcf_to_sqlite.py b/sumstats/utils/vcf_to_sqlite.py index 850e00c..a5033f9 100755 --- a/sumstats/utils/vcf_to_sqlite.py +++ b/sumstats/utils/vcf_to_sqlite.py @@ -6,31 +6,36 @@ def main(): argparser = argparse.ArgumentParser() - argparser.add_argument('-vcf', help='The name of the vcf to be processed', required=True) + argparser.add_argument('-vcf', help='The name of the vcf to be processed', required=False) argparser.add_argument('-db', help='The name of the database to load to', required=True) + argparser.add_argument('-index', help='create index on the rsid', required=False, action='store_true') args = argparser.parse_args() db = args.db - vcf = args.vcf + if args.vcf: + vcf = args.vcf - vcfdf = pd.read_csv(vcf, sep='\t', - comment='#', - header=None, - dtype=str, - usecols=[0, 1, 2], - names=['CHROM', 'POS', 'RSID'] - ) - - vcfdf.RSID = vcfdf.RSID.str.replace("rs","") - vcfdf.CHROM =vcfdf.CHROM.replace({'X': 23, 'Y': 24, 'MT': 25}) - - sql = sq.sqlClient(db) - sql.drop_rsid_index() - list_of_tuples = list(vcfdf.itertuples(index=False, name=None)) - sql.cur.execute('BEGIN TRANSACTION') - sql.cur.executemany("insert or ignore into snp(chr, position, rsid) values (?, ?, ?)", list_of_tuples) - sql.cur.execute('COMMIT') - sql.create_rsid_index() + vcfdf = pd.read_csv(vcf, sep='\t', + comment='#', + header=None, + dtype=str, + usecols=[0, 1, 2], + names=['CHROM', 'POS', 'RSID'] + ) + + vcfdf.RSID = vcfdf.RSID.str.replace("rs","") + sql = sq.sqlClient(db) + sql.drop_rsid_index() + list_of_tuples = list(vcfdf.itertuples(index=False, name=None)) + sql.cur.execute('BEGIN TRANSACTION') + sql.cur.executemany("insert or ignore into snp(chr, position, rsid) values (?, ?, ?)", list_of_tuples) + sql.cur.execute('COMMIT') + if args.index: + sql = sq.sqlClient(db) + sql.drop_rsid_index() + sql.create_rsid_index() + else: + print("nothing left to do") if __name__ == '__main__':