-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #15 from biomedbigdata/shuffle_pheno
added phenotype shuffle option to epiJSON
- Loading branch information
Showing
6 changed files
with
105 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
CompileFlags: | ||
Add: [-std=c++20] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -128,4 +128,7 @@ docker/data | |
__pycache__ | ||
|
||
|
||
test_out | ||
test_out | ||
debug_scripts | ||
.cache | ||
compile_commands.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
// | ||
// Created by juli on 08.11.23. | ||
// | ||
|
||
#include "PlinkShufflePhenotype.hpp" | ||
#include "../util/TimeLogger.hpp" | ||
|
||
namespace epi { | ||
|
||
PlinkShufflePhenotype::PlinkShufflePhenotype(std::string input_path, std::string output_path) { | ||
this->input_path = input_path; | ||
this->output_path = output_path; | ||
} | ||
|
||
|
||
void PlinkShufflePhenotype::run(std::shared_ptr<DataModel> data) { | ||
TimeLogger logger("shufffle phenotype"); | ||
|
||
Logger::logLine("Read fam file"); | ||
CSVParser ind_parser; | ||
ind_parser.parse(input_path + ".fam", ' '); | ||
if (ind_parser.num_columns() < 6) ind_parser.parse(input_path + ".fam", '\t'); | ||
|
||
// create a list of indices and shuffle them -> output file will get the phenotypes in that order | ||
std::vector<size_t> index_list; | ||
index_list.reserve(ind_parser.num_rows()); | ||
for(size_t i = 0; i < ind_parser.num_rows();++i) { | ||
index_list.push_back(i); | ||
} | ||
std::shuffle(index_list.begin(), index_list.end(), data->random_device[omp_get_thread_num()]); | ||
|
||
|
||
std::ofstream pheno_file(output_path + ".fam"); | ||
|
||
for (size_t i = 0; i < ind_parser.num_rows(); ++i) { | ||
for (size_t col = 0; col < 5; ++col) { | ||
pheno_file << ind_parser.cell(i, col) << '\t'; | ||
} | ||
pheno_file << ind_parser.cell(index_list[i], 5) << '\n'; | ||
} | ||
pheno_file.close(); | ||
|
||
// copy bim and bed file | ||
std::filesystem::copy_file(input_path + ".bed", output_path + ".bed"); | ||
std::filesystem::copy_file(input_path + ".bim", output_path + ".bim"); | ||
|
||
|
||
logger.stop(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
// | ||
// Created by juli on 08.11.23. | ||
// | ||
|
||
#ifndef GENEPISEEKER_PLINKSHUFFLEPHENOTYPE_HPP | ||
#define GENEPISEEKER_PLINKSHUFFLEPHENOTYPE_HPP | ||
|
||
#include "Job.hpp" | ||
|
||
namespace epi { | ||
|
||
class PlinkShufflePhenotype : public Job { | ||
public: | ||
PlinkShufflePhenotype(std::string input_path, std::string output_path); | ||
void run(std::shared_ptr<DataModel> data) override; | ||
|
||
private: | ||
std::string input_path; | ||
std::string output_path; | ||
}; | ||
|
||
} // epi | ||
|
||
#ifdef HEADER_ONLY | ||
#include "PlinkShufflePhenotype.cpp" | ||
#endif | ||
|
||
#endif //GENEPISEEKER_PLINKSHUFFLEPHENOTYPE_HPP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters