Skip to content

Latest commit

 

History

History
1594 lines (1350 loc) · 54.9 KB

work_download-process-concatenate-genomes_S-cerevisiae-S-pombe.printed.md

File metadata and controls

1594 lines (1350 loc) · 54.9 KB

#work_download-process_genomes-S-cerevisiae-S-pombe.printed.md

Table of Contents
  1. Get situated
  2. Download S. pombe fastas, gff3s
    1. Printed
    2. Notes
  3. Download S. cerevisiae fastas, gff3
    1. Printed
  4. Prepare S. pombe fasta, gff3 for concatenation with S. cerevisiae
    1. Printed
  5. Prepare S. cerevisiae fasta, gff3 for concatenation with S. pombe
    1. Printed
  6. Concatenate processed fastas and gff3s in new directory combined_SC_SP/
    1. Printed
  7. Create bowtie2 indices for "combined_SC_SP.fa.gz"
    1. Printed
  8. Copy files to Rina and Rachel
    1. Printed


Get situated



Download S. pombe fastas, gff3s

Printed

Printed: Download *S. pombe* fastas, gff3s
❯ if [[ ! -d "fasta/" ]]; then mkdir "fasta/"; fi
mkdir: created directory 'fasta/'


❯ cd "fasta/"
/home/kalavatt/tsukiyamalab/kalavatt/genomes/Schizosaccharomyces_pombe/fasta


❯ u_fa="https://www.pombase.org/data/genome_sequence_and_features/genome_sequence"


❯ f_fa=(
>    Schizosaccharomyces_pombe_all_chromosomes.fa.gz
>    Schizosaccharomyces_pombe_chr_II_telomeric_gap.fa.gz
>    Schizosaccharomyces_pombe_chromosome_I.fa.gz
>    Schizosaccharomyces_pombe_chromosome_II.fa.gz
>    Schizosaccharomyces_pombe_chromosome_III.fa.gz
>    Schizosaccharomyces_pombe_mating_type_region.fa.gz
>    Schizosaccharomyces_pombe_mitochondrial_chromosome.fa.gz
>)


❯ for i in "${f_fa[@]}"; do curl "${u_fa}/${i}" > "${i}"; done
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 3685k  100 3685k    0     0  1848k      0  0:00:01  0:00:01 --:--:-- 1847k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  6568  100  6568    0     0  13295      0 --:--:-- --:--:-- --:--:-- 13295
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1633k  100 1633k    0     0  1031k      0  0:00:01  0:00:01 --:--:-- 1030k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1325k  100 1325k    0     0   923k      0  0:00:01  0:00:01 --:--:--  923k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  710k  100  710k    0     0   555k      0  0:00:01  0:00:01 --:--:--  555k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  6563  100  6563    0     0  13285      0 --:--:-- --:--:-- --:--:-- 13285
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  6229  100  6229    0     0  12609      0 --:--:-- --:--:-- --:--:-- 12660


❯ .,
total 9.3M
drwxrwx--- 2 kalavatt  466 May 29 08:48 ./
drwxrwx--- 3 kalavatt   23 May 29 08:47 ../
-rw-rw---- 1 kalavatt 3.6M May 29 08:48 Schizosaccharomyces_pombe_all_chromosomes.fa.gz
-rw-rw---- 1 kalavatt 6.5K May 29 08:48 Schizosaccharomyces_pombe_chr_II_telomeric_gap.fa.gz
-rw-rw---- 1 kalavatt 1.6M May 29 08:48 Schizosaccharomyces_pombe_chromosome_I.fa.gz
-rw-rw---- 1 kalavatt 1.3M May 29 08:48 Schizosaccharomyces_pombe_chromosome_II.fa.gz
-rw-rw---- 1 kalavatt 711K May 29 08:48 Schizosaccharomyces_pombe_chromosome_III.fa.gz
-rw-rw---- 1 kalavatt 6.5K May 29 08:48 Schizosaccharomyces_pombe_mating_type_region.fa.gz
-rw-rw---- 1 kalavatt 6.1K May 29 08:48 Schizosaccharomyces_pombe_mitochondrial_chromosome.fa.gz


❯ zcat Schizosaccharomyces_pombe_all_chromosomes.fa.gz | grep "^>"
>chr_II_telomeric_gap Schizosaccharomyces_pombe
>I Schizosaccharomyces_pombe
>II Schizosaccharomyces_pombe
>III Schizosaccharomyces_pombe
>mating_type_region Schizosaccharomyces_pombe
>mitochondrial Schizosaccharomyces_pombe


❯ if [[ ! -d "gff3" ]]; then mkdir "gff3"; fi
mkdir: created directory 'gff3'


❯ cd "gff3/"
/home/kalavatt/tsukiyamalab/kalavatt/genomes/Schizosaccharomyces_pombe/gff3


❯ u_gff3="https://www.pombase.org/data/genome_sequence_and_features/gff3/"


❯ f_gff3=(
>    Schizosaccharomyces_pombe_all_chromosomes.gff3.gz
>    Schizosaccharomyces_pombe_chr_II_telomeric_gap.gff3.gz
>    Schizosaccharomyces_pombe_chromosome_I.gff3.gz
>    Schizosaccharomyces_pombe_chromosome_II.gff3.gz
>    Schizosaccharomyces_pombe_chromosome_III.gff3.gz
>    Schizosaccharomyces_pombe_mating_type_region.gff3.gz
>    Schizosaccharomyces_pombe_mitochondrial_chromosome.gff3.gz
>)


❯ for i in "${f_gff3[@]}"; do curl "${u_gff3}/${i}" > "${i}"; done
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  600k  100  600k    0     0   393k      0  0:00:01  0:00:01 --:--:--  393k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   297  100   297    0     0    602      0 --:--:-- --:--:-- --:--:--   602
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  266k  100  266k    0     0   238k      0  0:00:01  0:00:01 --:--:--  237k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  220k  100  220k    0     0   197k      0  0:00:01  0:00:01 --:--:--  197k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  113k  100  113k    0     0   118k      0 --:--:-- --:--:-- --:--:--  118k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   399  100   399    0     0    810      0 --:--:-- --:--:-- --:--:--   810
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1478  100  1478    0     0   2991      0 --:--:-- --:--:-- --:--:--  2991


❯ .,
total 2.6M
drwxrwx--- 2 kalavatt  480 May 29 09:02 ./
drwxrwx--- 4 kalavatt   45 May 29 09:01 ../
-rw-rw---- 1 kalavatt 601K May 29 09:02 Schizosaccharomyces_pombe_all_chromosomes.gff3.gz
-rw-rw---- 1 kalavatt  297 May 29 09:02 Schizosaccharomyces_pombe_chr_II_telomeric_gap.gff3.gz
-rw-rw---- 1 kalavatt 267K May 29 09:02 Schizosaccharomyces_pombe_chromosome_I.gff3.gz
-rw-rw---- 1 kalavatt 221K May 29 09:02 Schizosaccharomyces_pombe_chromosome_II.gff3.gz
-rw-rw---- 1 kalavatt 114K May 29 09:02 Schizosaccharomyces_pombe_chromosome_III.gff3.gz
-rw-rw---- 1 kalavatt  399 May 29 09:02 Schizosaccharomyces_pombe_mating_type_region.gff3.gz
-rw-rw---- 1 kalavatt 1.5K May 29 09:02 Schizosaccharomyces_pombe_mitochondrial_chromosome.gff3.gz


❯ zcat Schizosaccharomyces_pombe_all_chromosomes.gff3.gz \
>    | cut -f 1 \
>    | sort \
>    | uniq
chr_II_telomeric_gap
##gff-version 3
I
II
III
mating_type_region
mitochondrial

Notes

Notes: Download *S. pombe* fastas, gff3s

Fasta file information as of 2023-0529, the date of downloading (no README in directory):

  • Schizosaccharomyces_pombe_all_chromosomes.fa.gz 2023-05-11 02:56 3.6M
  • Schizosaccharomyces_pombe_chr_II_telomeric_gap.fa.gz 2023-05-11 02:56 6.4K
  • Schizosaccharomyces_pombe_chromosome_I.fa.gz 2023-05-11 02:56 1.6M
  • Schizosaccharomyces_pombe_chromosome_II.fa.gz 2023-05-11 02:56 1.3M
  • Schizosaccharomyces_pombe_chromosome_III.fa.gz 2023-05-11 02:56 711K
  • Schizosaccharomyces_pombe_mating_type_region.fa.gz 2023-05-11 02:56 6.4K
  • Schizosaccharomyces_pombe_mitochondrial_chromosome.fa.gz 2023-05-11 02:56 6.1K

Gff3 file information as of 2023-0529, the date of downloading (no README in directory):

  • Schizosaccharomyces_pombe_all_chromosomes.gff3.gz 2023-05-28 02:12 601K
  • Schizosaccharomyces_pombe_chr_II_telomeric_gap.gff3.gz 2023-05-28 02:12 297
  • Schizosaccharomyces_pombe_chromosome_I.gff3.gz 2023-05-28 02:12 267K
  • Schizosaccharomyces_pombe_chromosome_II.gff3.gz 2023-05-28 02:12 220K
  • Schizosaccharomyces_pombe_chromosome_III.gff3.gz 2023-05-28 02:12 114K
  • Schizosaccharomyces_pombe_mating_type_region.gff3.gz 2023-05-28 02:12 401
  • Schizosaccharomyces_pombe_mitochondrial_chromosome.gff3.gz 2023-05-28 02:12 1.4K


Download S. cerevisiae fastas, gff3

Printed

Printed: Download *S. cerevisiae* fastas, gff3
❯ cd "${d_cerevisiae}" ||
>    echo "cd'ing failed; check on this..."
/home/kalavatt/genomes/Saccharomyces_cerevisiae


❯ u_tgz="http://sgd-archive.yeastgenome.org/sequence/S288C_reference/genome_releases"


❯ f_tgz="S288C_reference_genome_R64-3-1_20210421.tgz"


❯ curl "${u_tgz}/${f_tgz}" > "${f_tgz}"
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 19.6M  100 19.6M    0     0  28.2M      0 --:--:-- --:--:-- --:--:-- 28.2M


❯ tar -xvzf "${f_tgz}"
S288C_reference_genome_R64-3-1_20210421/
S288C_reference_genome_R64-3-1_20210421/orf_coding_all_R64-3-1_20210421.fasta.gz
S288C_reference_genome_R64-3-1_20210421/other_features_genomic_R64-3-1_20210421.fasta.gz
S288C_reference_genome_R64-3-1_20210421/orf_trans_all_R64-3-1_20210421.fasta.gz
S288C_reference_genome_R64-3-1_20210421/S288C_reference_sequence_R64-3-1_20210421.fsa.gz
S288C_reference_genome_R64-3-1_20210421/NotFeature_R64-3-1_20210421.fasta.gz
S288C_reference_genome_R64-3-1_20210421/gene_association_R64-3-1_20210421.sgd.gz
S288C_reference_genome_R64-3-1_20210421/saccharomyces_cerevisiae_R64-3-1_20210421.gff.gz
S288C_reference_genome_R64-3-1_20210421/rna_coding_R64-3-1_20210421.fasta.gz


❯ mv S288C_reference_genome_R64-3-1_20210421/ fasta/
renamed 'S288C_reference_genome_R64-3-1_20210421/' -> 'fasta/'


❯ mkdir gff3/ && \
>    mv fasta/saccharomyces_cerevisiae_R64-3-1_20210421.gff.gz gff3/
mkdir: created directory 'gff3/'
renamed 'fasta/saccharomyces_cerevisiae_R64-3-1_20210421.gff.gz' -> 'gff3/saccharomyces_cerevisiae_R64-3-1_20210421.gff.gz'


❯ .,s
-rw-rw---- 1 kalavatt 20M May 29 08:53 ./S288C_reference_genome_R64-3-1_20210421.tgz

./fasta:
total 19M
drwxr-x--- 2 kalavatt  413 May 29 08:54 ./
drwxrwx--- 4 kalavatt  106 May 29 08:54 ../
-rw-r----- 1 kalavatt 3.6M Apr 27  2021 gene_association_R64-3-1_20210421.sgd.gz
-rw-r----- 1 kalavatt 1.1M Apr 21  2021 NotFeature_R64-3-1_20210421.fasta.gz
-rw-r----- 1 kalavatt 3.7M Apr 21  2021 orf_coding_all_R64-3-1_20210421.fasta.gz
-rw-r----- 1 kalavatt 2.6M Apr 21  2021 orf_trans_all_R64-3-1_20210421.fasta.gz
-rw-r----- 1 kalavatt 187K Apr 21  2021 other_features_genomic_R64-3-1_20210421.fasta.gz
-rw-r----- 1 kalavatt  42K Apr 27  2021 rna_coding_R64-3-1_20210421.fasta.gz
-rw-r----- 1 kalavatt 3.7M Apr 21  2021 S288C_reference_sequence_R64-3-1_20210421.fsa.gz

./gff3:
total 6.0M
drwxrwx--- 2 kalavatt   66 May 29 08:54 ./
drwxrwx--- 4 kalavatt  106 May 29 08:54 ../
-rw-r----- 1 kalavatt 5.1M Apr 27  2021 saccharomyces_cerevisiae_R64-3-1_20210421.gff.gz


❯ .,
total 23M
drwxrwx---  4 kalavatt 106 May 29 08:54 ./
drwxrwx--- 16 kalavatt 578 May 29 08:46 ../
drwxr-x---  2 kalavatt 413 May 29 08:54 fasta/
drwxrwx---  2 kalavatt  66 May 29 08:54 gff3/
-rw-rw----  1 kalavatt 20M May 29 08:53 S288C_reference_genome_R64-3-1_20210421.tgz


Prepare S. pombe fasta, gff3 for concatenation with S. cerevisiae

Printed

Printed: Prepare S. pombe fasta, gff3 for concatenation with S. cerevisiae
❯ cd "${d_pombe}" ||
>    echo "cd'ing failed; check on this..."
/home/kalavatt/genomes/Schizosaccharomyces_pombe


❯ if [[ ! -d "fasta-processed" ]]; then mkdir "fasta-processed"; fi
mkdir: created directory 'fasta-processed'


❯ cp \
>    "fasta/Schizosaccharomyces_pombe_all_chromosomes.fa.gz" \
>    "fasta-processed/"
'fasta/Schizosaccharomyces_pombe_all_chromosomes.fa.gz' -> 'fasta-processed/Schizosaccharomyces_pombe_all_chromosomes.fa.gz'


❯ cd "fasta-processed" ||
>    echo "cd'ing failed; check on this..."
/home/kalavatt/genomes/Schizosaccharomyces_pombe/fasta-processed


❯ zgrep "^>" "Schizosaccharomyces_pombe_all_chromosomes.fa.gz"
>chr_II_telomeric_gap Schizosaccharomyces_pombe
>I Schizosaccharomyces_pombe
>II Schizosaccharomyces_pombe
>III Schizosaccharomyces_pombe
>mating_type_region Schizosaccharomyces_pombe
>mitochondrial Schizosaccharomyces_pombe


❯ gzip -cd "Schizosaccharomyces_pombe_all_chromosomes.fa.gz" \
>    > "Schizosaccharomyces_pombe_all_chromosomes.fa"


❯ if [[ -f "tmp.fa" ]]; then rm "tmp.fa"; fi


❯ sed 's/^>chr_II_telomeric_gap\ Schizosaccharomyces_pombe/>SP_II_TG/g;s/^>I\ Schizosaccharomyces_pombe/>SP_I/g;s/^>II\ Schizosaccharomyces_pombe/>SP_II/g;s/^>III\ Schizosaccharomyces_pombe/>SP_III/g;s/^>mating_type_region\ Schizosaccharomyces_pombe/>SP_MTR/g;s/^>mitochondrial\ Schizosaccharomyces_pombe/SP_Mito/g' "Schizosaccharomyces_pombe_all_chromosomes.fa" \
>    > "tmp.fa"


❯ cat "tmp.fa" | grep "^>"
>SP_II_TG
>SP_I
>SP_II
>SP_III
>SP_MTR
>SP_Mito


❯ mv -f "tmp.fa" "Schizosaccharomyces_pombe_all_chromosomes.fa"
renamed 'tmp.fa' -> 'Schizosaccharomyces_pombe_all_chromosomes.fa'


❯ cat "Schizosaccharomyces_pombe_all_chromosomes.fa" | grep "^>"
>SP_II_TG
>SP_I
>SP_II
>SP_III
>SP_MTR
>SP_Mito


❯ rm *.gz


❯ gzip *.fa


❯ .,
total 4.6M
drwxrwx--- 2 kalavatt   65 May 29 09:25 ./
drwxrwx--- 5 kalavatt   78 May 29 09:12 ../
-rw-rw---- 1 kalavatt 3.8M May 29 09:21 Schizosaccharomyces_pombe_all_chromosomes.fa.gz


❯ zcat "Schizosaccharomyces_pombe_all_chromosomes.fa.gz" | grep "^>"
>SP_II_TG
>SP_I
>SP_II
>SP_III
>SP_MTR
>SP_Mito


❯ cd .. && pwd
/home/kalavatt/genomes/Schizosaccharomyces_pombe


❯ if [[ ! -d "gff3-processed/" ]]; then mkdir "gff3-processed/"; fi
mkdir: created directory 'gff3-processed/'


❯ cp "gff3/Schizosaccharomyces_pombe_all_chromosomes.gff3.gz" "gff3-processed/"
'gff3/Schizosaccharomyces_pombe_all_chromosomes.gff3.gz' -> 'gff3-processed/Schizosaccharomyces_pombe_all_chromosomes.gff3.gz'


❯ cd "gff3-processed/" ||
>    echo "cd'ing failed; check on this..."
/home/kalavatt/genomes/Schizosaccharomyces_pombe/gff3-processed


❯ zcat "Schizosaccharomyces_pombe_all_chromosomes.gff3.gz" \
>    | cut -f 1 \
>    | sort \
>    | uniq
chr_II_telomeric_gap
##gff-version 3
I
II
III
mating_type_region
mitochondrial


❯ zcat "Schizosaccharomyces_pombe_all_chromosomes.gff3.gz" \
>    | sed 's/^chr_II_telomeric_gap/SP_II_TG/g;s/^I/SP_I/g;s/^II/SP_II/g;s/^III/SP_III/g;s/^mating_type_region/SP_MTR/g;s/^mitochondrial/SP_Mito/g' \
>        > "tmp.gff3"


❯ head "tmp.gff3"
##gff-version 3
SP_I    PomBase gene    1798347 1798835 .   +   .   ID=SPAC1002.01;Name=mrx11
SP_I    PomBase mRNA    1798347 1798835 .   +   .   ID=SPAC1002.01.1;Parent=SPAC1002.01
SP_I    PomBase CDS 1798347 1798835 .   +   0   ID=SPAC1002.01.1:exon:1;Parent=SPAC1002.01.1
SP_I    PomBase gene    1799014 1800053 .   +   .   ID=SPAC1002.02;Name=pom34
SP_I    PomBase mRNA    1799014 1800053 .   +   .   ID=SPAC1002.02.1;Parent=SPAC1002.02
SP_I    PomBase five_prime_UTR  1799014 1799127 .   +   .   ID=SPAC1002.02.1:five_prime_UTR:1;Parent=SPAC1002.02.1
SP_I    PomBase CDS 1799128 1799817 .   +   0   ID=SPAC1002.02.1:exon:1;Parent=SPAC1002.02.1
SP_I    PomBase three_prime_UTR 1799818 1800053 .   +   .   ID=SPAC1002.02.1:three_prime_UTR:1;Parent=SPAC1002.02.1
SP_I    PomBase gene    1799915 1803070 .   -   .   ID=SPAC1002.03c;Name=gls2


❯ tail "tmp.gff3"
SP_I    PomBase TR_box  4514798 4514815 .   -   .   ID=CU329670_TR_box_4514798..4514815
SP_III  PomBase long_terminal_repeat    2319921 2320269 .   -   .   ID=SPLTRC.71
SP_II   PomBase long_terminal_repeat    2339945 2340297 .   -   .   ID=SPLTRB.34
SP_I    PomBase long_terminal_repeat    4525577 4525926 .   +   .   ID=SPLTRA.71
SP_III  PomBase gene_group  2450422 2452883 .   +   .   ID=CU329672_gene_group_2450422..2452883
SP_III  PomBase long_terminal_repeat    2108288 2108631 .   -   .   ID=SPLTRC.58
SP_III  PomBase long_terminal_repeat    782301  782649  .   -   .   ID=SPLTRC.27
SP_III  PomBase region  814799  814896  .   -   .   ID=CU329672_region_814799..814896
SP_III  PomBase dh_repeat   1087567 1091508 .   +   .   ID=SPRPTCENC.9
SP_I    PomBase long_terminal_repeat    32863   33057   .   -   .   ID=SPLTRA.6


❯ zcat "../gff3/Schizosaccharomyces_pombe_all_chromosomes.gff3.gz" | tail
I   PomBase TR_box  4514798 4514815 .   -   .   ID=CU329670_TR_box_4514798..4514815
III PomBase long_terminal_repeat    2319921 2320269 .   -   .   ID=SPLTRC.71
II  PomBase long_terminal_repeat    2339945 2340297 .   -   .   ID=SPLTRB.34
I   PomBase long_terminal_repeat    4525577 4525926 .   +   .   ID=SPLTRA.71
III PomBase gene_group  2450422 2452883 .   +   .   ID=CU329672_gene_group_2450422..2452883
III PomBase long_terminal_repeat    2108288 2108631 .   -   .   ID=SPLTRC.58
III PomBase long_terminal_repeat    782301  782649  .   -   .   ID=SPLTRC.27
III PomBase region  814799  814896  .   -   .   ID=CU329672_region_814799..814896
III PomBase dh_repeat   1087567 1091508 .   +   .   ID=SPRPTCENC.9
I   PomBase long_terminal_repeat    32863   33057   .   -   .   ID=SPLTRA.6


❯ cat "tmp.gff3" \
>    | cut -f 1 \
>    | sort \
>    | uniq
##gff-version 3
SP_I
SP_II
SP_III
SP_II_TG
SP_Mito
SP_MTR


❯ mv "tmp.gff3" "Schizosaccharomyces_pombe_all_chromosomes.gff3"
renamed 'tmp.gff3' -> 'Schizosaccharomyces_pombe_all_chromosomes.gff3'


❯ rm "Schizosaccharomyces_pombe_all_chromosomes.gff3.gz"


❯ gzip "Schizosaccharomyces_pombe_all_chromosomes.gff3"


❯ .,
total 984K
drwxrwx--- 2 kalavatt   67 May 29 09:39 ./
drwxrwx--- 6 kalavatt  110 May 29 09:31 ../
-rw-rw---- 1 kalavatt 648K May 29 09:36 Schizosaccharomyces_pombe_all_chromosomes.gff3.gz


❯ zcat "Schizosaccharomyces_pombe_all_chromosomes.gff3.gz" \
>    | cut -f 1 \
>    | sort \
>    | uniq
##gff-version 3
SP_I
SP_II
SP_III
SP_II_TG
SP_Mito
SP_MTR


Prepare S. cerevisiae fasta, gff3 for concatenation with S. pombe

Printed

Printed: Prepare S. cerevisiae fasta, gff3 for concatenation with S. pombe
❯ cd "${HOME}/genomes/" ||
>    echo "cd'ing failed; check on this..."


❯ cd "${d_cerevisiae}" ||
>    echo "cd'ing failed; check on this..."
/home/kalavatt/genomes/Saccharomyces_cerevisiae


❯ if [[ ! -d "fasta-processed" ]]; then mkdir "fasta-processed"; fi
mkdir: created directory 'fasta-processed'


❯ cp \
>    "fasta/S288C_reference_sequence_R64-3-1_20210421.fsa.gz" \
>    "fasta-processed/"
'fasta/S288C_reference_sequence_R64-3-1_20210421.fsa.gz' -> 'fasta-processed/S288C_reference_sequence_R64-3-1_20210421.fsa.gz'


❯ cd "fasta-processed" ||
>    echo "cd'ing failed; check on this..."
/home/kalavatt/genomes/Saccharomyces_cerevisiae/fasta-processed


❯ zcat "S288C_reference_sequence_R64-3-1_20210421.fsa.gz" | grep "^>"
>ref|NC_001133| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=I]
>ref|NC_001134| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=II]
>ref|NC_001135| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=III]
>ref|NC_001136| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=IV]
>ref|NC_001137| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=V]
>ref|NC_001138| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=VI]
>ref|NC_001139| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=VII]
>ref|NC_001140| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=VIII]
>ref|NC_001141| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=IX]
>ref|NC_001142| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=X]
>ref|NC_001143| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=XI]
>ref|NC_001144| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=XII]
>ref|NC_001145| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=XIII]
>ref|NC_001146| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=XIV]
>ref|NC_001147| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=XV]
>ref|NC_001148| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [chromosome=XVI]
>ref|NC_001224| [org=Saccharomyces cerevisiae] [strain=S288C] [moltype=genomic] [location=mitochondrion] [top=circular]


❯ zcat "S288C_reference_sequence_R64-3-1_20210421.fsa.gz" \
>    | sed 's/^>ref|NC_001133|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=I\]/>I/g;s/^>ref|NC_001134|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=II\]/>II/g;s/^>ref|NC_001135|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=III\]/>III/g;s/^>ref|NC_001136|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=IV\]/>IV/g;s/^>ref|NC_001137|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=V\]/>V/g;s/^>ref|NC_001138|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=VI\]/>VI/g;s/^>ref|NC_001139|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=VII\]/>VII/g;s/^>ref|NC_001140|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=VIII\]/>VIII/g;s/^>ref|NC_001141|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=IX\]/>IX/g;s/^>ref|NC_001142|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=X\]/>X/g;s/^>ref|NC_001143|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=XI\]/>XI/g;s/^>ref|NC_001144|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=XII\]/>XII/g;s/^>ref|NC_001145|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=XIII\]/>XIII/g;s/^>ref|NC_001146|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=XIV\]/>XIV/g;s/^>ref|NC_001147|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=XV\]/>XV/g;s/^>ref|NC_001148|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[chromosome=XVI\]/>XVI/g;s/^>ref|NC_001224|\ \[org=Saccharomyces\ cerevisiae\]\ \[strain=S288C\]\ \[moltype=genomic\]\ \[location=mitochondrion\]\ \[top=circular\]/>Mito/g' \
>        > "tmp.fa"


❯ cat "tmp.fa" | grep "^>"
>I
>II
>III
>IV
>V
>VI
>VII
>VIII
>IX
>X
>XI
>XII
>XIII
>XIV
>XV
>XVI
>Mito


❯ rm "S288C_reference_sequence_R64-3-1_20210421.fsa.gz"


❯ mv -f "tmp.fa" "S288C_reference_sequence_R64-3-1_20210421.fa"
renamed 'tmp.fa' -> 'S288C_reference_sequence_R64-3-1_20210421.fa'


❯ gzip "S288C_reference_sequence_R64-3-1_20210421.fa"


❯ .,
total 4.5M
drwxrwx--- 2 kalavatt   65 May 29 10:07 ./
drwxrwx--- 5 kalavatt  139 May 29 09:53 ../
-rw-rw---- 1 kalavatt 3.7M May 29 09:58 S288C_reference_sequence_R64-3-1_20210421.fa.gz


❯ zcat "S288C_reference_sequence_R64-3-1_20210421.fa.gz" | grep "^>"
>I
>II
>III
>IV
>V
>VI
>VII
>VIII
>IX
>X
>XI
>XII
>XIII
>XIV
>XV
>XVI
>Mito


❯ cd .. && pwd
/home/kalavatt/genomes/Saccharomyces_cerevisiae


❯ if [[ ! -d "gff3-processed/" ]]; then mkdir "gff3-processed/"; fi
mkdir: created directory 'gff3-processed/'


❯ cp "gff3/saccharomyces_cerevisiae_R64-3-1_20210421.gff.gz" "gff3-processed/"
'gff3/saccharomyces_cerevisiae_R64-3-1_20210421.gff.gz' -> 'gff3-processed/saccharomyces_cerevisiae_R64-3-1_20210421.gff.gz'


❯ cd "gff3-processed/" ||
>    echo "cd'ing failed; check on this..."
/home/kalavatt/genomes/Saccharomyces_cerevisiae/gff3-processed


❯ gzip -cd "saccharomyces_cerevisiae_R64-3-1_20210421.gff.gz" \
>    > "saccharomyces_cerevisiae_R64-3-1_20210421.gff"


❯ if [[ -f "tmp.gff3" ]]; then rm "tmp.gff3"; fi


❯ sed -n '/###/q;p' < saccharomyces_cerevisiae_R64-3-1_20210421.gff > tmp.gff3


❯ cat "tmp.gff3" \
>    | cut -f 1 \
>    | sort \
>    | uniq
#
#!assembly R64-3-1
chrI
chrII
chrIII
chrIV
chrIX
chrmt
chrV
chrVI
chrVII
chrVIII
chrX
chrXI
chrXII
chrXIII
chrXIV
chrXV
chrXVI
# Created by Saccharomyces Genome Database (http://www.yeastgenome.org/)
#!data-source SGD
#!date-produced 2021-04-27 10:49:32
# Features from the 16 nuclear chromosomes labeled chrI to chrXVI,
##gff-version 3
# https://downloads.yeastgenome.org/latest/saccharomyces_cerevisiae.gff.gz
# Please send comments and suggestions to [email protected]
# plus the mitochondrial genome labeled chrmt.
#!refseq-version GCF_000146045.2
# Saccharomyces cerevisiae S288C genome (version=R64-3-1)
# SGD is funded as a National Human Genome Research Institute Biomedical Informatics Resource from
# the U. S. National Institutes of Health to Stanford University.
# Weekly updates of this file are available for download from:


❯ cat "tmp.gff3" \
>    | sed 's/^chr//g;s/^mt/Mito/g' \
>        > "tmp.2.gff3"


❯ cat "tmp.2.gff3" \
>    | cut -f 1 \
>    | sort \
>    | uniq
#
#!assembly R64-3-1
# Created by Saccharomyces Genome Database (http://www.yeastgenome.org/)
#!data-source SGD
#!date-produced 2021-04-27 10:49:32
# Features from the 16 nuclear chromosomes labeled chrI to chrXVI,
##gff-version 3
# https://downloads.yeastgenome.org/latest/saccharomyces_cerevisiae.gff.gz
I
II
III
IV
IX
Mito
# Please send comments and suggestions to [email protected]
# plus the mitochondrial genome labeled chrmt.
#!refseq-version GCF_000146045.2
# Saccharomyces cerevisiae S288C genome (version=R64-3-1)
# SGD is funded as a National Human Genome Research Institute Biomedical Informatics Resource from
# the U. S. National Institutes of Health to Stanford University.
V
VI
VII
VIII
# Weekly updates of this file are available for download from:
X
XI
XII
XIII
XIV
XV
XVI


❯ head "tmp.2.gff3"
##gff-version 3
#!date-produced 2021-04-27 10:49:32
#!data-source SGD
#!assembly R64-3-1
#!refseq-version GCF_000146045.2
#
# Saccharomyces cerevisiae S288C genome (version=R64-3-1)
#
# Features from the 16 nuclear chromosomes labeled chrI to chrXVI,
# plus the mitochondrial genome labeled chrmt.


❯ tail "tmp.2.gff3"
Mito    SGD origin_of_replication   82329   82600   .   +   .   ID=ORI5;Name=ORI5;gene=ORI5;Alias=ORI5;Note=Mitochondrial%20origin%20of%20replication;display=Mitochondrial%20origin%20of%20replication;dbxref=SGD:S000029671;curie=SGD:S000029671
Mito    SGD tRNA_gene   85035   85112   .   +   .   ID=YNCQ0026W;Name=YNCQ0026W;Alias=tM%28CAU%29Q2,tRNA-fMet;Ontology_term=GO:0005739,GO:0030533,GO:0070125,SO:0000704;Note=Mitochondrial%20formylated%20methionine%20tRNA%20%28tRNA-fMet%29%3B%20predicted%20by%20tRNAscan-SE%20analysis;display=Mitochondrial%20formylated%20methionine%20tRNA%20%28tRNA-fMet%29;dbxref=SGD:S000007326;curie=SGD:S000007326
Mito    SGD noncoding_exon  85035   85112   .   +   .   Parent=YNCQ0026W_tRNA;Name=YNCQ0026W_noncoding_exon
Mito    SGD tRNA    85035   85112   .   +   .   ID=YNCQ0026W_tRNA;Name=YNCQ0026W_tRNA;Parent=YNCQ0026W
Mito    SGD ncRNA_gene  85295   85777   .   +   .   ID=YNCQ0027W;Name=YNCQ0027W;gene=RPM1;Alias=RPM1,Q0285;Ontology_term=GO:0001682,GO:0004526,GO:0005739,GO:0008033,GO:0030678,SO:0000704;Note=RNA%20component%20of%20mitochondrial%20RNase%20P%3B%20mitochondrial%20RNase%20P%20also%20contains%20the%20protein%20subunit%20Rpm2p%3B%20RNase%20P%20removes%205'%20extensions%20from%20mitochondrial%20tRNA%20precursors%3B%20RPM1%20is%20conserved%20in%20bacteria%2C%20fungi%2C%20and%20protozoa;display=RNA%20component%20of%20mitochondrial%20RNase%20P;dbxref=SGD:S000029023;curie=SGD:S000029023
Mito    SGD noncoding_exon  85295   85777   .   +   .   Parent=YNCQ0027W_ncRNA;Name=YNCQ0027W_noncoding_exon
Mito    SGD ncRNA   85295   85777   .   +   .   ID=YNCQ0027W_ncRNA;Name=YNCQ0027W_ncRNA;Parent=YNCQ0027W
Mito    SGD gene    85554   85709   .   +   .   ID=Q0297;Name=Q0297;Alias=ORF12;Ontology_term=GO:0003674,GO:0005575,GO:0008150,SO:0000704;Note=Dubious%20open%20reading%20frame%3B%20unlikely%20to%20encode%20a%20functional%20protein%2C%20based%20on%20available%20experimental%20and%20comparative%20sequence%20data%3B%20partially%20overlaps%20the%20verified%20gene%20RPM1;display=Dubious%20open%20reading%20frame;dbxref=SGD:S000007284;orf_classification=Dubious;curie=SGD:S000007284
Mito    SGD CDS 85554   85709   .   +   0   Parent=Q0297_mRNA;Name=Q0297_CDS;orf_classification=Dubious;protein_id=UniProtKB:Q9ZZV8
Mito    SGD mRNA    85554   85709   .   +   .   ID=Q0297_mRNA;Name=Q0297_mRNA;Parent=Q0297


❯ mv "tmp.2.gff3" "saccharomyces_cerevisiae_R64-3-1_20210421.gff3"
renamed 'tmp.2.gff3' -> 'saccharomyces_cerevisiae_R64-3-1_20210421.gff3'


❯ gzip "saccharomyces_cerevisiae_R64-3-1_20210421.gff3"


❯ mv "tmp.2.gff3" "saccharomyces_cerevisiae_R64-3-1_20210421.gff3"
renamed 'tmp.2.gff3' -> 'saccharomyces_cerevisiae_R64-3-1_20210421.gff3'


❯ .,
total 34M
drwxrwx--- 2 kalavatt  222 May 29 10:41 ./
drwxrwx--- 6 kalavatt  171 May 29 10:15 ../
-rw-rw---- 1 kalavatt  20M May 29 10:21 saccharomyces_cerevisiae_R64-3-1_20210421.gff
-rw-rw---- 1 kalavatt 1.6M May 29 10:37 saccharomyces_cerevisiae_R64-3-1_20210421.gff3.gz
-rw-r----- 1 kalavatt 5.1M May 29 10:16 saccharomyces_cerevisiae_R64-3-1_20210421.gff.gz
-rw-rw---- 1 kalavatt 7.4M May 29 10:34 tmp.gff3
 

❯ rm \
>    "saccharomyces_cerevisiae_R64-3-1_20210421.gff" \
>    "saccharomyces_cerevisiae_R64-3-1_20210421.gff.gz" \
>    "tmp.gff3"


❯ .,
total 1.9M
drwxrwx--- 2 kalavatt   67 May 29 10:42 ./
drwxrwx--- 6 kalavatt  171 May 29 10:15 ../
-rw-rw---- 1 kalavatt 1.6M May 29 10:37 saccharomyces_cerevisiae_R64-3-1_20210421.gff3.gz


❯ zcat "saccharomyces_cerevisiae_R64-3-1_20210421.gff3.gz" \
>    | cut -f 1 \
>    | sort \
>    | uniq
#
#!assembly R64-3-1
# Created by Saccharomyces Genome Database (http://www.yeastgenome.org/)
#!data-source SGD
#!date-produced 2021-04-27 10:49:32
# Features from the 16 nuclear chromosomes labeled chrI to chrXVI,
##gff-version 3
# https://downloads.yeastgenome.org/latest/saccharomyces_cerevisiae.gff.gz
I
II
III
IV
IX
Mito
# Please send comments and suggestions to [email protected]
# plus the mitochondrial genome labeled chrmt.
#!refseq-version GCF_000146045.2
# Saccharomyces cerevisiae S288C genome (version=R64-3-1)
# SGD is funded as a National Human Genome Research Institute Biomedical Informatics Resource from
# the U. S. National Institutes of Health to Stanford University.
V
VI
VII
VIII
# Weekly updates of this file are available for download from:
X
XI
XII
XIII
XIV
XV
XVI


Concatenate processed fastas and gff3s in new directory combined_SC_SP/

Printed

Printed: Concatenate processed fastas and gff3s in new directory combined_SC_SP/
❯ cd "${HOME}/tsukiyamalab/kalavatt/genomes/combined_SC_SP" ||
>    echo "cd'ing failed; check on this..."


❯ if [[ ! -d "combined_SC_SP" ]]; then mkdir -p combined_SC_SP/{fasta,gff3}; fi
mkdir: created directory 'combined_SC_SP'
mkdir: created directory 'combined_SC_SP/fasta'
mkdir: created directory 'combined_SC_SP/gff3'


❯ cp \
>    "Saccharomyces_cerevisiae/gff3-processed/saccharomyces_cerevisiae_R64-3-1_20210421.gff3.gz" \
>    "combined_SC_SP/gff3/"
'Saccharomyces_cerevisiae/gff3-processed/saccharomyces_cerevisiae_R64-3-1_20210421.gff3.gz' -> 'combined_SC_SP/gff3/saccharomyces_cerevisiae_R64-3-1_20210421.gff3.gz'


❯ cp \
>    "Schizosaccharomyces_pombe/gff3-processed/Schizosaccharomyces_pombe_all_chromosomes.gff3.gz" \
>    "combined_SC_SP/gff3/"
'Schizosaccharomyces_pombe/gff3-processed/Schizosaccharomyces_pombe_all_chromosomes.gff3.gz' -> 'combined_SC_SP/gff3/Schizosaccharomyces_pombe_all_chromosomes.gff3.gz'


❯ cp \
>    "Saccharomyces_cerevisiae/fasta-processed/S288C_reference_sequence_R64-3-1_20210421.fa.gz" \
>    "combined_SC_SP/fasta/"
'Saccharomyces_cerevisiae/fasta-processed/S288C_reference_sequence_R64-3-1_20210421.fa.gz' -> 'combined_SC_SP/fasta/S288C_reference_sequence_R64-3-1_20210421.fa.gz'


❯ cp \
>    "Schizosaccharomyces_pombe/fasta-processed/Schizosaccharomyces_pombe_all_chromosomes.fa.gz" \
>    "combined_SC_SP/fasta/"
'Schizosaccharomyces_pombe/fasta-processed/Schizosaccharomyces_pombe_all_chromosomes.fa.gz' -> 'combined_SC_SP/fasta/Schizosaccharomyces_pombe_all_chromosomes.fa.gz'


❯ cd "combined_SC_SP/" ||
>    echo "cd'ing failed; check on this..."
/home/kalavatt/tsukiyamalab/kalavatt/genomes/combined_SC_SP


❯ cd "gff3/" ||
>    echo "cd'ing failed; check on this..."
/home/kalavatt/tsukiyamalab/kalavatt/genomes/combined_SC_SP/gff3


❯ cat \
>    "saccharomyces_cerevisiae_R64-3-1_20210421.gff3.gz" \
>    "Schizosaccharomyces_pombe_all_chromosomes.gff3.gz" \
>        > "combined_SC_SP.gff3.gz"


❯ .,
total 5.9M
drwxrwx--- 2 kalavatt  174 May 29 10:56 ./
drwxrwx--- 4 kalavatt   45 May 29 10:52 ../
-rw-rw---- 1 kalavatt 2.2M May 29 10:56 combined_SC_SP.gff3.gz
-rw-rw---- 1 kalavatt 1.6M May 29 10:52 saccharomyces_cerevisiae_R64-3-1_20210421.gff3.gz
-rw-rw---- 1 kalavatt 648K May 29 10:52 Schizosaccharomyces_pombe_all_chromosomes.gff3.gz


❯ zcat "combined_SC_SP.gff3.gz" \
>    | cut -f 1 \
>    | sort \
>    | uniq
#
#!assembly R64-3-1
# Created by Saccharomyces Genome Database (http://www.yeastgenome.org/)
#!data-source SGD
#!date-produced 2021-04-27 10:49:32
# Features from the 16 nuclear chromosomes labeled chrI to chrXVI,
##gff-version 3
# https://downloads.yeastgenome.org/latest/saccharomyces_cerevisiae.gff.gz
I
II
III
IV
IX
Mito
# Please send comments and suggestions to [email protected]
# plus the mitochondrial genome labeled chrmt.
#!refseq-version GCF_000146045.2
# Saccharomyces cerevisiae S288C genome (version=R64-3-1)
# SGD is funded as a National Human Genome Research Institute Biomedical Informatics Resource from
SP_I
SP_II
SP_III
SP_II_TG
SP_Mito
SP_MTR
# the U. S. National Institutes of Health to Stanford University.
V
VI
VII
VIII
# Weekly updates of this file are available for download from:
X
XI
XII
XIII
XIV
XV
XVI


❯ cd "../fasta" ||
>    echo "cd'ing failed; check on this..."


❯ cat \
>    "S288C_reference_sequence_R64-3-1_20210421.fa.gz" \
>    "Schizosaccharomyces_pombe_all_chromosomes.fa.gz" \
>        > "combined_SC_SP.fa.gz"


❯ .,
total 18M
drwxrwx--- 2 kalavatt  168 May 29 11:01 ./
drwxrwx--- 4 kalavatt   45 May 29 10:52 ../
-rw-rw---- 1 kalavatt 7.4M May 29 11:01 combined_SC_SP.fa.gz
-rw-rw---- 1 kalavatt 3.7M May 29 10:54 S288C_reference_sequence_R64-3-1_20210421.fa.gz
-rw-rw---- 1 kalavatt 3.8M May 29 10:54 Schizosaccharomyces_pombe_all_chromosomes.fa.gz


❯ zcat "combined_SC_SP.fa.gz" | grep "^>"
>I
>II
>III
>IV
>V
>VI
>VII
>VIII
>IX
>X
>XI
>XII
>XIII
>XIV
>XV
>XVI
>Mito
>SP_II_TG
>SP_I
>SP_II
>SP_III
>SP_MTR
>SP_Mito


Create bowtie2 indices for "combined_SC_SP.fa.gz"

Printed

Printed: Create bowtie2 indices for "combined_SC_SP.fa.gz"
❯ cd "${HOME}/genomes/combined_SC_SP" ||
>    echo "cd'ing failed; check on this..."


❯ if [[ ! -d "bowtie2/" ]]; then mkdir "bowtie2/"; fi
mkdir: created directory 'bowtie2/'


❯ #  Index the fasta file


❯ cd "fasta/"
/home/kalavatt/genomes/combined_SC_SP/fasta


❯ gzip -cd "combined_SC_SP.fa.gz" > "combined_SC_SP.fa"


❯ ml SAMtools/1.16.1-GCC-11.2.0 Bowtie2/2.4.4-GCC-11.2.0


❯ cat "combined_SC_SP.fa" | grep "^>"
>I
>II
>III
>IV
>V
>VI
>VII
>VIII
>IX
>X
>XI
>XII
>XIII
>XIV
>XV
>XVI
>Mito
>SP_II_TG
>SP_I
>SP_II
>SP_III
>SP_MTR
>SP_Mito


❯ samtools faidx "combined_SC_SP.fa"


❯ #  Create a "chrom-info" file


❯ cut -f 1,2 "combined_SC_SP.fa.fai" > "combined_SC_SP.chrom-info.tsv"


❯ #  Build the indices


❯ cd .. && pwd
/home/kalavatt/genomes/combined_SC_SP


❯ bowtie2-build fasta/combined_SC_SP.fa bowtie2/combined_SC_SP \
>    1> >(tee -a bowtie2/combined_SC_SP.stdout.txt) \
>    2> >(tee -a bowtie2/combined_SC_SP.stderr.txt)
Settings:
  Output files: "bowtie2/combined_SC_SP.*.bt2"
  Line rate: 6 (line is 64 bytes)
  Lines per side: 1 (side is 64 bytes)
  Offset rate: 4 (one in 16)
  FTable chars: 10
  Strings: unpacked
  Max bucket size: default
  Max bucket size, sqrt multiplier: default
  Max bucket size, len divisor: 4
  Difference-cover sample period: 1024
  Endianness: little
  Actual local endianness: little
  Sanity checking: disabled
  Assertions: disabled
  Random seed: 0
  Sizeofs: void*:8, int:4, long:8, size_t:8
Input files DNA, FASTA:
  fasta/combined_SC_SP.fa
Reading reference sizes
Building a SMALL index
  Time reading reference sizes: 00:00:00
Calculating joined length
Writing header
Reserving space for joined string
Joining reference sequences
  Time to join reference sequences: 00:00:00
bmax according to bmaxDivN setting: 6197021
Using parameters --bmax 4647766 --dcv 1024
  Doing ahead-of-time memory usage test
  Passed!  Constructing with these parameters: --bmax 4647766 --dcv 1024
Constructing suffix-array element generator
Building DifferenceCoverSample
  Building sPrime
  Building sPrimeOrder
  V-Sorting samples
  V-Sorting samples time: 00:00:00
  Allocating rank array
  Ranking v-sort output
  Ranking v-sort output time: 00:00:00
  Invoking Larsson-Sadakane on ranks
  Invoking Larsson-Sadakane on ranks time: 00:00:00
  Sanity-checking and returning
Building samples
Reserving space for 12 sample suffixes
Generating random suffixes
QSorting 12 sample offsets, eliminating duplicates
QSorting sample offsets, eliminating duplicates time: 00:00:00
Multikey QSorting 12 samples
  (Using difference cover)
  Multikey QSorting samples time: 00:00:00
Calculating bucket sizes
Splitting and merging
  Splitting and merging time: 00:00:00
Avg bucket size: 2.75423e+06 (target: 4647765)
Converting suffix-array elements to index image
Allocating ftab, absorbFtab
Entering Ebwt loop
Getting block 1 of 9
  Reserving size (4647766) for bucket 1
  Calculating Z arrays for bucket 1
  Entering block accumulator loop for bucket 1:
  bucket 1: 10%
  bucket 1: 20%
  bucket 1: 30%
  bucket 1: 40%
  bucket 1: 50%
  bucket 1: 60%
  bucket 1: 70%
  bucket 1: 80%
  bucket 1: 90%
  bucket 1: 100%
  Sorting block of length 3260001 for bucket 1
  (Using difference cover)
  Sorting block time: 00:00:01
Returning block of 3260002 for bucket 1
Getting block 2 of 9
  Reserving size (4647766) for bucket 2
  Calculating Z arrays for bucket 2
  Entering block accumulator loop for bucket 2:
  bucket 2: 10%
  bucket 2: 20%
  bucket 2: 30%
  bucket 2: 40%
  bucket 2: 50%
  bucket 2: 60%
  bucket 2: 70%
  bucket 2: 80%
  bucket 2: 90%
  bucket 2: 100%
  Sorting block of length 2867770 for bucket 2
  (Using difference cover)
  Sorting block time: 00:00:00
Returning block of 2867771 for bucket 2
Getting block 3 of 9
  Reserving size (4647766) for bucket 3
  Calculating Z arrays for bucket 3
  Entering block accumulator loop for bucket 3:
  bucket 3: 10%
  bucket 3: 20%
  bucket 3: 30%
  bucket 3: 40%
  bucket 3: 50%
  bucket 3: 60%
  bucket 3: 70%
  bucket 3: 80%
  bucket 3: 90%
  bucket 3: 100%
  Sorting block of length 2560185 for bucket 3
  (Using difference cover)
  Sorting block time: 00:00:00
Returning block of 2560186 for bucket 3
Getting block 4 of 9
  Reserving size (4647766) for bucket 4
  Calculating Z arrays for bucket 4
  Entering block accumulator loop for bucket 4:
  bucket 4: 10%
  bucket 4: 20%
  bucket 4: 30%
  bucket 4: 40%
  bucket 4: 50%
  bucket 4: 60%
  bucket 4: 70%
  bucket 4: 80%
  bucket 4: 90%
  bucket 4: 100%
  Sorting block of length 2972413 for bucket 4
  (Using difference cover)
  Sorting block time: 00:00:00
Returning block of 2972414 for bucket 4
Getting block 5 of 9
  Reserving size (4647766) for bucket 5
  Calculating Z arrays for bucket 5
  Entering block accumulator loop for bucket 5:
  bucket 5: 10%
  bucket 5: 20%
  bucket 5: 30%
  bucket 5: 40%
  bucket 5: 50%
  bucket 5: 60%
  bucket 5: 70%
  bucket 5: 80%
  bucket 5: 90%
  bucket 5: 100%
  Sorting block of length 2671929 for bucket 5
  (Using difference cover)
  Sorting block time: 00:00:01
Returning block of 2671930 for bucket 5
Getting block 6 of 9
  Reserving size (4647766) for bucket 6
  Calculating Z arrays for bucket 6
  Entering block accumulator loop for bucket 6:
  bucket 6: 10%
  bucket 6: 20%
  bucket 6: 30%
  bucket 6: 40%
  bucket 6: 50%
  bucket 6: 60%
  bucket 6: 70%
  bucket 6: 80%
  bucket 6: 90%
  bucket 6: 100%
  Sorting block of length 4280047 for bucket 6
  (Using difference cover)
  Sorting block time: 00:00:01
Returning block of 4280048 for bucket 6
Getting block 7 of 9
  Reserving size (4647766) for bucket 7
  Calculating Z arrays for bucket 7
  Entering block accumulator loop for bucket 7:
  bucket 7: 10%
  bucket 7: 20%
  bucket 7: 30%
  bucket 7: 40%
  bucket 7: 50%
  bucket 7: 60%
  bucket 7: 70%
  bucket 7: 80%
  bucket 7: 90%
  bucket 7: 100%
  Sorting block of length 736213 for bucket 7
  (Using difference cover)
  Sorting block time: 00:00:00
Returning block of 736214 for bucket 7
Getting block 8 of 9
  Reserving size (4647766) for bucket 8
  Calculating Z arrays for bucket 8
  Entering block accumulator loop for bucket 8:
  bucket 8: 10%
  bucket 8: 20%
  bucket 8: 30%
  bucket 8: 40%
  bucket 8: 50%
  bucket 8: 60%
  bucket 8: 70%
  bucket 8: 80%
  bucket 8: 90%
  bucket 8: 100%
  Sorting block of length 4277836 for bucket 8
  (Using difference cover)
  Sorting block time: 00:00:01
Returning block of 4277837 for bucket 8
Getting block 9 of 9
  Reserving size (4647766) for bucket 9
  Calculating Z arrays for bucket 9
  Entering block accumulator loop for bucket 9:
  bucket 9: 10%
  bucket 9: 20%
  bucket 9: 30%
  bucket 9: 40%
  bucket 9: 50%
  bucket 9: 60%
  bucket 9: 70%
  bucket 9: 80%
  bucket 9: 90%
  bucket 9: 100%
  Sorting block of length 1161682 for bucket 9
  (Using difference cover)
  Sorting block time: 00:00:00
Returning block of 1161683 for bucket 9
Exited Ebwt loop
fchr[A]: 0
fchr[C]: 7802696
fchr[G]: 12398259
fchr[T]: 16992544
fchr[$]: 24788084
Exiting Ebwt::buildToDisk()
Returning from initFromVector
Wrote 12457710 bytes to primary EBWT file: bowtie2/combined_SC_SP.1.bt2
Wrote 6197028 bytes to secondary EBWT file: bowtie2/combined_SC_SP.2.bt2
Re-opening _in1 and _in2 as input streams
Returning from Ebwt constructor
Headers:
    len: 24788084
    bwtLen: 24788085
    sz: 6197021
    bwtSz: 6197022
    lineRate: 6
    offRate: 4
    offMask: 0xfffffff0
    ftabChars: 10
    eftabLen: 20
    eftabSz: 80
    ftabLen: 1048577
    ftabSz: 4194308
    offsLen: 1549256
    offsSz: 6197024
    lineSz: 64
    sideSz: 64
    sideBwtSz: 48
    sideBwtLen: 192
    numSides: 129105
    numLines: 129105
    ebwtTotLen: 8262720
    ebwtTotSz: 8262720
    color: 0
    reverse: 0
Total time for call to driver() for forward index: 00:00:11
Reading reference sizes
  Time reading reference sizes: 00:00:00
Calculating joined length
Writing header
Reserving space for joined string
Joining reference sequences
  Time to join reference sequences: 00:00:01
  Time to reverse reference sequence: 00:00:00
bmax according to bmaxDivN setting: 6197021
Using parameters --bmax 4647766 --dcv 1024
  Doing ahead-of-time memory usage test
  Passed!  Constructing with these parameters: --bmax 4647766 --dcv 1024
Constructing suffix-array element generator
Building DifferenceCoverSample
  Building sPrime
  Building sPrimeOrder
  V-Sorting samples
  V-Sorting samples time: 00:00:00
  Allocating rank array
  Ranking v-sort output
  Ranking v-sort output time: 00:00:00
  Invoking Larsson-Sadakane on ranks
  Invoking Larsson-Sadakane on ranks time: 00:00:00
  Sanity-checking and returning
Building samples
Reserving space for 12 sample suffixes
Generating random suffixes
QSorting 12 sample offsets, eliminating duplicates
QSorting sample offsets, eliminating duplicates time: 00:00:00
Multikey QSorting 12 samples
  (Using difference cover)
  Multikey QSorting samples time: 00:00:00
Calculating bucket sizes
Splitting and merging
  Splitting and merging time: 00:00:00
Split 1, merged 7; iterating...
Splitting and merging
  Splitting and merging time: 00:00:00
Avg bucket size: 3.54115e+06 (target: 4647765)
Converting suffix-array elements to index image
Allocating ftab, absorbFtab
Entering Ebwt loop
Getting block 1 of 7
  Reserving size (4647766) for bucket 1
  Calculating Z arrays for bucket 1
  Entering block accumulator loop for bucket 1:
  bucket 1: 10%
  bucket 1: 20%
  bucket 1: 30%
  bucket 1: 40%
  bucket 1: 50%
  bucket 1: 60%
  bucket 1: 70%
  bucket 1: 80%
  bucket 1: 90%
  bucket 1: 100%
  Sorting block of length 4364643 for bucket 1
  (Using difference cover)
  Sorting block time: 00:00:00
Returning block of 4364644 for bucket 1
Getting block 2 of 7
  Reserving size (4647766) for bucket 2
  Calculating Z arrays for bucket 2
  Entering block accumulator loop for bucket 2:
  bucket 2: 10%
  bucket 2: 20%
  bucket 2: 30%
  bucket 2: 40%
  bucket 2: 50%
  bucket 2: 60%
  bucket 2: 70%
  bucket 2: 80%
  bucket 2: 90%
  bucket 2: 100%
  Sorting block of length 3463592 for bucket 2
  (Using difference cover)
  Sorting block time: 00:00:01
Returning block of 3463593 for bucket 2
Getting block 3 of 7
  Reserving size (4647766) for bucket 3
  Calculating Z arrays for bucket 3
  Entering block accumulator loop for bucket 3:
  bucket 3: 10%
  bucket 3: 20%
  bucket 3: 30%
  bucket 3: 40%
  bucket 3: 50%
  bucket 3: 60%
  bucket 3: 70%
  bucket 3: 80%
  bucket 3: 90%
  bucket 3: 100%
  Sorting block of length 3603783 for bucket 3
  (Using difference cover)
  Sorting block time: 00:00:00
Returning block of 3603784 for bucket 3
Getting block 4 of 7
  Reserving size (4647766) for bucket 4
  Calculating Z arrays for bucket 4
  Entering block accumulator loop for bucket 4:
  bucket 4: 10%
  bucket 4: 20%
  bucket 4: 30%
  bucket 4: 40%
  bucket 4: 50%
  bucket 4: 60%
  bucket 4: 70%
  bucket 4: 80%
  bucket 4: 90%
  bucket 4: 100%
  Sorting block of length 4546286 for bucket 4
  (Using difference cover)
  Sorting block time: 00:00:01
Returning block of 4546287 for bucket 4
Getting block 5 of 7
  Reserving size (4647766) for bucket 5
  Calculating Z arrays for bucket 5
  Entering block accumulator loop for bucket 5:
  bucket 5: 10%
  bucket 5: 20%
  bucket 5: 30%
  bucket 5: 40%
  bucket 5: 50%
  bucket 5: 60%
  bucket 5: 70%
  bucket 5: 80%
  bucket 5: 90%
  bucket 5: 100%
  Sorting block of length 1146309 for bucket 5
  (Using difference cover)
  Sorting block time: 00:00:00
Returning block of 1146310 for bucket 5
Getting block 6 of 7
  Reserving size (4647766) for bucket 6
  Calculating Z arrays for bucket 6
  Entering block accumulator loop for bucket 6:
  bucket 6: 10%
  bucket 6: 20%
  bucket 6: 30%
  bucket 6: 40%
  bucket 6: 50%
  bucket 6: 60%
  bucket 6: 70%
  bucket 6: 80%
  bucket 6: 90%
  bucket 6: 100%
  Sorting block of length 3789355 for bucket 6
  (Using difference cover)
  Sorting block time: 00:00:01
Returning block of 3789356 for bucket 6
Getting block 7 of 7
  Reserving size (4647766) for bucket 7
  Calculating Z arrays for bucket 7
  Entering block accumulator loop for bucket 7:
  bucket 7: 10%
  bucket 7: 20%
  bucket 7: 30%
  bucket 7: 40%
  bucket 7: 50%
  bucket 7: 60%
  bucket 7: 70%
  bucket 7: 80%
  bucket 7: 90%
  bucket 7: 100%
  Sorting block of length 3874110 for bucket 7
  (Using difference cover)
  Sorting block time: 00:00:01
Returning block of 3874111 for bucket 7
Exited Ebwt loop
fchr[A]: 0
fchr[C]: 7802696
fchr[G]: 12398259
fchr[T]: 16992544
fchr[$]: 24788084
Exiting Ebwt::buildToDisk()
Returning from initFromVector
Wrote 12457710 bytes to primary EBWT file: bowtie2/combined_SC_SP.rev.1.bt2
Wrote 6197028 bytes to secondary EBWT file: bowtie2/combined_SC_SP.rev.2.bt2
Re-opening _in1 and _in2 as input streams
Returning from Ebwt constructor
Headers:
    len: 24788084
    bwtLen: 24788085
    sz: 6197021
    bwtSz: 6197022
    lineRate: 6
    offRate: 4
    offMask: 0xfffffff0
    ftabChars: 10
    eftabLen: 20
    eftabSz: 80
    ftabLen: 1048577
    ftabSz: 4194308
    offsLen: 1549256
    offsSz: 6197024
    lineSz: 64
    sideSz: 64
    sideBwtSz: 48
    sideBwtLen: 192
    numSides: 129105
    numLines: 129105
    ebwtTotLen: 8262720
    ebwtTotSz: 8262720
    color: 0
    reverse: 1
Total time for backward call to driver() for mirror index: 00:00:12


Copy files to Rina and Rachel

Printed

Printed: Copy files to Rina and Rachel
❯ cd "${HOME}/tsukiyamalab/kalavatt/genomes" ||
>    echo "cd'ing failed; check on this..."


❯ dir_SC_SP="combined_SC_SP/"
❯ dir_Rachel="${HOME}/tsukiyamalab/Rachel"
❯ dir_Rina="${HOME}/tsukiyamalab/Rina"


❯ cp -r "${dir_SC_SP}" "${dir_Rachel}"
'combined_SC_SP/' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP'
'combined_SC_SP/gff3' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/gff3'
'combined_SC_SP/gff3/saccharomyces_cerevisiae_R64-3-1_20210421.gff3.gz' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/gff3/saccharomyces_cerevisiae_R64-3-1_20210421.gff3.gz'
'combined_SC_SP/gff3/combined_SC_SP.gff3.gz' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/gff3/combined_SC_SP.gff3.gz'
'combined_SC_SP/gff3/Schizosaccharomyces_pombe_all_chromosomes.gff3.gz' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/gff3/Schizosaccharomyces_pombe_all_chromosomes.gff3.gz'
'combined_SC_SP/bowtie2' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/bowtie2'
'combined_SC_SP/bowtie2/combined_SC_SP.stdout.txt' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/bowtie2/combined_SC_SP.stdout.txt'
'combined_SC_SP/bowtie2/combined_SC_SP.stderr.txt' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/bowtie2/combined_SC_SP.stderr.txt'
'combined_SC_SP/bowtie2/combined_SC_SP.3.bt2' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/bowtie2/combined_SC_SP.3.bt2'
'combined_SC_SP/bowtie2/combined_SC_SP.4.bt2' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/bowtie2/combined_SC_SP.4.bt2'
'combined_SC_SP/bowtie2/combined_SC_SP.1.bt2' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/bowtie2/combined_SC_SP.1.bt2'
'combined_SC_SP/bowtie2/combined_SC_SP.2.bt2' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/bowtie2/combined_SC_SP.2.bt2'
'combined_SC_SP/bowtie2/combined_SC_SP.rev.1.bt2' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/bowtie2/combined_SC_SP.rev.1.bt2'
'combined_SC_SP/bowtie2/combined_SC_SP.rev.2.bt2' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/bowtie2/combined_SC_SP.rev.2.bt2'
'combined_SC_SP/fasta' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/fasta'
'combined_SC_SP/fasta/combined_SC_SP.fa' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/fasta/combined_SC_SP.fa'
'combined_SC_SP/fasta/S288C_reference_sequence_R64-3-1_20210421.fa.gz' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/fasta/S288C_reference_sequence_R64-3-1_20210421.fa.gz'
'combined_SC_SP/fasta/Schizosaccharomyces_pombe_all_chromosomes.fa.gz' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/fasta/Schizosaccharomyces_pombe_all_chromosomes.fa.gz'
'combined_SC_SP/fasta/combined_SC_SP.fa.gz' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/fasta/combined_SC_SP.fa.gz'
'combined_SC_SP/fasta/combined_SC_SP.fa.fai' -> '/home/kalavatt/tsukiyamalab/Rachel/combined_SC_SP/fasta/combined_SC_SP.fa.fai'


❯ cp -r "${dir_SC_SP}" "${dir_Rina}"
'combined_SC_SP/' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP'
'combined_SC_SP/gff3' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/gff3'
'combined_SC_SP/gff3/saccharomyces_cerevisiae_R64-3-1_20210421.gff3.gz' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/gff3/saccharomyces_cerevisiae_R64-3-1_20210421.gff3.gz'
'combined_SC_SP/gff3/combined_SC_SP.gff3.gz' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/gff3/combined_SC_SP.gff3.gz'
'combined_SC_SP/gff3/Schizosaccharomyces_pombe_all_chromosomes.gff3.gz' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/gff3/Schizosaccharomyces_pombe_all_chromosomes.gff3.gz'
'combined_SC_SP/bowtie2' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/bowtie2'
'combined_SC_SP/bowtie2/combined_SC_SP.stdout.txt' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/bowtie2/combined_SC_SP.stdout.txt'
'combined_SC_SP/bowtie2/combined_SC_SP.stderr.txt' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/bowtie2/combined_SC_SP.stderr.txt'
'combined_SC_SP/bowtie2/combined_SC_SP.3.bt2' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/bowtie2/combined_SC_SP.3.bt2'
'combined_SC_SP/bowtie2/combined_SC_SP.4.bt2' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/bowtie2/combined_SC_SP.4.bt2'
'combined_SC_SP/bowtie2/combined_SC_SP.1.bt2' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/bowtie2/combined_SC_SP.1.bt2'
'combined_SC_SP/bowtie2/combined_SC_SP.2.bt2' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/bowtie2/combined_SC_SP.2.bt2'
'combined_SC_SP/bowtie2/combined_SC_SP.rev.1.bt2' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/bowtie2/combined_SC_SP.rev.1.bt2'
'combined_SC_SP/bowtie2/combined_SC_SP.rev.2.bt2' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/bowtie2/combined_SC_SP.rev.2.bt2'
'combined_SC_SP/fasta' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/fasta'
'combined_SC_SP/fasta/combined_SC_SP.fa' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/fasta/combined_SC_SP.fa'
'combined_SC_SP/fasta/S288C_reference_sequence_R64-3-1_20210421.fa.gz' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/fasta/S288C_reference_sequence_R64-3-1_20210421.fa.gz'
'combined_SC_SP/fasta/Schizosaccharomyces_pombe_all_chromosomes.fa.gz' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/fasta/Schizosaccharomyces_pombe_all_chromosomes.fa.gz'
'combined_SC_SP/fasta/combined_SC_SP.fa.gz' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/fasta/combined_SC_SP.fa.gz'
'combined_SC_SP/fasta/combined_SC_SP.fa.fai' -> '/home/kalavatt/tsukiyamalab/Rina/combined_SC_SP/fasta/combined_SC_SP.fa.fai'