Skip to content

Commit

Permalink
feat: split main() to record::process_record
Browse files Browse the repository at this point in the history
  • Loading branch information
dongspy committed Nov 2, 2023
1 parent f85d5b6 commit 70d41bf
Show file tree
Hide file tree
Showing 8 changed files with 342 additions and 40 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

95 changes: 91 additions & 4 deletions src/bin/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use tracing::info;
use legoseq::blockinfo::{get_block_info_fasta_from_file, BLOCKFLAGS};
use legoseq::readblockalign::ReadBlockAlign;
use legoseq::utils::get_reader;
use legoseq::record::{process_record, FastxRecord};

#[derive(Parser)]
#[command(version, author, about, long_about = None)]
Expand All @@ -28,7 +29,7 @@ struct Cli {
#[arg(long, value_name = "FILE")]
in2: Option<String>,
/// input type, fasta or fastq, default is fastq
#[arg(long, value_name = "INPUT_TYPE", default_value="fastq")]
#[arg(long, value_name = "INPUT_TYPE", default_value = "fastq")]
input_type: Option<String>,
/// fasta file
#[arg(long, value_name = "FILE")]
Expand All @@ -51,6 +52,7 @@ struct Cli {
template: Option<String>,
}


fn main() {
let cli = Cli::parse();
let threads = &cli.threads.to_owned();
Expand Down Expand Up @@ -80,7 +82,92 @@ fn main() {

let outdir = Path::new(outdir);
let read_info_file = outdir.join(format!("{}.{}", prefix, "read_info.stat.tsv"));
let read_info_handle = Arc::new(Mutex::new(File::create(read_info_file.clone()).unwrap()));
let read_info_handle: Arc<Mutex<File>> = Arc::new(Mutex::new(File::create(read_info_file.clone()).unwrap()));
BLOCKFLAGS.lock().unwrap().iter().for_each(|(k, v)| {
writeln!(read_info_handle.lock().unwrap(), "#idx:flag={}:{}", k, v).unwrap();
});
// 统计所有 flag 的数目
let flag_stat_hash: Arc<Mutex<HashMap<usize, usize>>> = Arc::new(Mutex::new(HashMap::new()));

let record_r1 = fastq::Reader::new(get_reader(r1_file)).records();
if let Some(r2_file) = r2_file {
// output read info statistics

let out_fq_file_r1 = outdir.join(format!("{}.{}", prefix, "template.r1.fastq"));
let out_fq_file_r2 = outdir.join(format!("{}.{}", prefix, "template.r2.fastq"));
let ud_fq_file_r1 = outdir.join(format!("{}.{}", prefix, "undetermined.r1.fastq"));
let ud_fq_file_r2 = outdir.join(format!("{}.{}", prefix, "undetermined.r2.fastq"));

let block_info_list = get_block_info_fasta_from_file(block_info_file, fasta_file).unwrap();
let out_fq_handle_vec: Arc<Mutex<Vec<File>>> = Arc::new(Mutex::new(vec![
File::create(out_fq_file_r1.clone()).unwrap(),
File::create(out_fq_file_r2.clone()).unwrap(),
]));
let ud_fq_handle_vec: Arc<Mutex<Vec<File>>> = Arc::new(Mutex::new(vec![
File::create(ud_fq_file_r1.clone()).unwrap(),
File::create(ud_fq_file_r2.clone()).unwrap(),
]));

let barcode_handle_hash: DashMap<String, Arc<Mutex<Vec<File>>>> = DashMap::new();

let record_r2 = fastq::Reader::new(get_reader(r2_file)).records();
record_r1
.into_iter()
.zip(record_r2.into_iter())
.par_bridge()
.for_each(|(record_r1, record_r2)| {
let recordx = FastxRecord{
record1: record_r1.unwrap(),
record2: Some(record_r2.unwrap()),
};
let barcode_handle_hash = barcode_handle_hash.clone();
process_record(
recordx,
&block_info_list,
prefix,
outdir,
barcode_handle_hash,
template.clone(),
out_fq_handle_vec.clone(),
ud_fq_handle_vec.clone(),
read_info_handle.clone(),
flag_stat_hash.clone(),
)
})
}
}


fn main0() {
let cli = Cli::parse();
let threads = &cli.threads.to_owned();
let outdir = &cli.outdir;
let r1_file = &cli.in1;
let r2_file = &cli.in2;
let fasta_file = &cli.fasta;
let block_info_file = &cli.block_info;
let prefix = &cli.prefix;
let template: &Option<String> = &cli.template;
tracing_subscriber::fmt::init();
info!("Start");

rayon::ThreadPoolBuilder::new()
.num_threads(*threads)
.build_global()
.unwrap();

//minijinja
let template_string = fs::read_to_string(template.clone().unwrap()).expect("无法读取模板文件");
// 创建一个新的 MiniJinja 环境
let env = Environment::new();
// 从字符串创建一个模板
let template: Template<'_, '_> = env
.template_from_str(&template_string)
.expect("无法从字符串创建模板");

let outdir = Path::new(outdir);
let read_info_file = outdir.join(format!("{}.{}", prefix, "read_info.stat.tsv"));
let read_info_handle: Arc<Mutex<File>> = Arc::new(Mutex::new(File::create(read_info_file.clone()).unwrap()));
BLOCKFLAGS.lock().unwrap().iter().for_each(|(k, v)| {
writeln!(read_info_handle.lock().unwrap(), "#idx:flag={}:{}", k, v).unwrap();
});
Expand All @@ -97,11 +184,11 @@ fn main() {
let ud_fq_file_r2 = outdir.join(format!("{}.{}", prefix, "undetermined.r2.fastq"));

let block_info_list = get_block_info_fasta_from_file(block_info_file, fasta_file).unwrap();
let out_fq_handle_vec = Arc::new(Mutex::new(vec![
let out_fq_handle_vec: Arc<Mutex<Vec<File>>> = Arc::new(Mutex::new(vec![
File::create(out_fq_file_r1.clone()).unwrap(),
File::create(out_fq_file_r2.clone()).unwrap(),
]));
let ud_fq_handle_vec = Arc::new(Mutex::new(vec![
let ud_fq_handle_vec: Arc<Mutex<Vec<File>>> = Arc::new(Mutex::new(vec![
File::create(ud_fq_file_r1.clone()).unwrap(),
File::create(ud_fq_file_r2.clone()).unwrap(),
]));
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ pub mod utils;
// pub mod wapper;
pub mod readblockalign;
pub mod wasm;

pub mod record;
26 changes: 15 additions & 11 deletions src/readblockalign.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
#![allow(unused_assignments)]
use std::collections::HashMap;

use bio::io::fastq::Record;
// use bio::io::fastq::Record;
use bio_types::sequence::SequenceRead;
use minijinja::value::Value;
use minijinja::Template;
use serde::{Deserialize, Serialize};
use bio::io::{fasta, fastq};

use crate::aligner::Alignment;
use crate::blockinfo::get_block_info_fasta;
// use crate::blockinfo::get_block_info_fasta;
use crate::utils::Strand;
use crate::utils::{check_vec_equal, dna_to_spans};
use crate::{blockalign::BlockAlign, blockinfo::BlockInfo};
use crate::record::Record;


/// save the fastq record and its block align information
#[derive(Debug, Clone)]
pub struct ReadBlockAlign {
pub struct ReadBlockAlign<R:Record+Clone> {
pub block_idx_list: Vec<String>,
pub record: Record,
pub record: R,
pub block_align: HashMap<String, Option<BlockAlign>>, // blockname: block_align
strand: Strand,
}
Expand Down Expand Up @@ -48,10 +52,10 @@ pub fn block_align_read(read: &[u8], block_info_list: &[BlockInfo]) -> Vec<Optio
block_align_list
}

impl ReadBlockAlign {
impl<R:Record + Clone> ReadBlockAlign<R> where R: Clone {
pub fn new(
block_idx_list: &[String],
record: &Record,
record: &R,
block_align: &HashMap<String, Option<BlockAlign>>,
strand: Strand,
) -> Self {
Expand Down Expand Up @@ -91,7 +95,7 @@ impl ReadBlockAlign {
}

/// 获取 blockinfo 各个 block 的比对情况,包括 fix 和 variable
pub fn read_block_info(record: &Record, block_info_list: &[BlockInfo]) -> Self {
pub fn read_block_info(record: &R, block_info_list: &[BlockInfo]) -> Self {
let read = record.seq();
let mut block_align_hash: HashMap<String, Option<BlockAlign>> = HashMap::new();
let read_len = read.len();
Expand Down Expand Up @@ -265,7 +269,7 @@ impl ReadBlockAlign {
&self,
block_info_list: &[BlockInfo],
export_block: &[String],
) -> Option<Record> {
) -> Option<fastq::Record> {
let record = &self.record;
let seq_len = record.seq().len();

Expand Down Expand Up @@ -310,7 +314,7 @@ impl ReadBlockAlign {
}

let new_record =
Record::with_attrs(record.id(), record.desc(), &new_seq_vec, &new_qual_vec);
fastq::Record::with_attrs(record.id(), record.desc(), &new_seq_vec, &new_qual_vec);
Some(new_record)
}

Expand Down Expand Up @@ -348,7 +352,7 @@ impl ReadBlockAlign {
seq_hash.insert(
"read".to_string(),
JinjaSeq {
name: String::from_utf8(self.record.name().to_vec()).unwrap_or("".to_string()),
name: self.record.id().to_string(),
seq: String::from_utf8(self.record.seq().to_vec()).unwrap_or("".to_string()),
qual: String::from_utf8(self.record.qual().to_vec()).unwrap_or("".to_string()),
..Default::default()
Expand Down Expand Up @@ -420,7 +424,7 @@ pub fn to_str(s: Option<String>) -> String {
}

impl JinjaSeq {
pub fn new(name: &str, record: &Record, start: usize, end: usize, strand: &Strand) -> Self {
pub fn new<R:Record>(name: &str, record: &R, start: usize, end: usize, strand: &Strand) -> Self {
let seq_len = record.seq().len();
let seq = &record.seq().to_vec()[start.min(seq_len)..end.min(seq_len)];
let qual = &record.qual().to_vec()[start.min(seq_len)..end.min(seq_len)];
Expand Down Expand Up @@ -463,7 +467,7 @@ CCCCCCCCCCCCC";
let read =
b"CTGGGGGGGGGGGGGGCGATCGATCGTAAACCCCCCCCCCCCCCAACGCTTTTTTTTTTTTTTCTCGCTATATCGTATCGATGTAC";
let read = b"AAAAAAAAAAAAAATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGCCCCCCCCCCCCC";
let record = Record::with_attrs("read01_rev", None, read, read);
let record = fastq::Record::with_attrs("read01_rev", None, read, read);
let read_block_align = ReadBlockAlign::read_block_info(&record, &blockinfo_vec);
let block_align = &read_block_align.block_align;
for (k, v) in block_align {
Expand Down
Loading

0 comments on commit 70d41bf

Please sign in to comment.