diff --git a/bin/download_proteins_entrez.py b/bin/download_proteins_entrez.py index e719b92f..d55cad4f 100755 --- a/bin/download_proteins_entrez.py +++ b/bin/download_proteins_entrez.py @@ -142,7 +142,7 @@ def main(args=None): + "It needs to be a tsv file containing 'taxon_id' and/or optionally 'assembly_id' and 'abundance." ) - taxIds = list(set(taxIds)) + taxIds = sorted(list(set(taxIds))) #################################################################################################### # Process TaxIDs @@ -307,7 +307,7 @@ def main(args=None): # some proteins, such as 487413233, occur within multiple sequences of the assembly! # -> assembly only listed once! - proteinIds = list(dict_proteinId_assemblyIds.keys()) + proteinIds = sorted(list(dict_proteinId_assemblyIds.keys())) print("# proteins (unique): ", len(proteinIds)) # -> # proteins with refseq source (<= # IPG proteins) @@ -394,7 +394,7 @@ def main(args=None): for proteinId in proteinIds: accVersion = dict_protein_uid_acc[proteinId] # write out protein_tmp_id, entity_name (taxon_id) - for assemblyId in dict_proteinId_assemblyIds[proteinId]: + for assemblyId in sorted(dict_proteinId_assemblyIds[proteinId]): taxId = dict_assemblyId_taxId[assemblyId] print(accVersion, taxId, sep="\t", file=args.entities_proteins, flush=True) diff --git a/bin/fasta_to_tsv.py b/bin/fasta_to_tsv.py index 62dce641..4c780943 100755 --- a/bin/fasta_to_tsv.py +++ b/bin/fasta_to_tsv.py @@ -14,9 +14,13 @@ ) args = parser.parse_args() +records_out = [] with gzip.open(args.input, "rt") as handle: for record in SeqIO.parse(handle, "fasta"): if args.remove_asterisk and record.seq[-1] == "*": - print(f"{record.id}\t{record.seq[:-1]}") + records_out.append([str(record.id),"\t",str(record.seq[:-1]),"\n"]) else: - print(f"{record.id}\t{record.seq}") + records_out.append([str(record.id),"\t",str(record.seq),"\n"]) +# Two dimensional array to enable sorting +records_out = sorted(records_out, key=lambda x: x[0]) +print("".join(["".join(rec) for rec in records_out])) diff --git a/bin/generate_peptides.py b/bin/generate_peptides.py index c8524741..097f7c70 100755 --- a/bin/generate_peptides.py +++ b/bin/generate_peptides.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 import argparse -from gzip import GzipFile -from io import TextIOWrapper +import gzip import sys import pandas as pd @@ -127,7 +126,7 @@ def main(args=None): #################### # generate peptides - with TextIOWrapper(GzipFile(args.peptides, 'w', mtime=0), encoding='utf-8') as pep_handle: + with gzip.open(args.peptides, "wt") as pep_handle: print_header = True id_counter = 0 @@ -158,21 +157,23 @@ def main(args=None): results = results.groupby(["protein_id", "peptide_sequence"]).size().reset_index(name="count") # -> protein_id, peptide_sequence, count results["count"] = pd.to_numeric(results["count"], downcast="unsigned") + # prepare df for joining + results.set_index("peptide_sequence", inplace=True) + results.sort_index(inplace=True, kind="stable") - results = results.sort_values(by="peptide_sequence") - pep_ids = results.groupby("peptide_sequence").ngroup() - results["peptide_id"] = pep_ids + id_counter - id_counter = id_counter + len(pep_ids) - + unique_peptides = pd.DataFrame(index=results.index.drop_duplicates()) + unique_peptides["peptide_id"] = range(id_counter, id_counter + len(unique_peptides)) + id_counter += len(unique_peptides) # -> peptide_sequence, peptide_id - results[["peptide_id", "peptide_sequence"]].drop_duplicates().sort_values(by=["peptide_sequence","peptide_id"]).to_csv(pep_handle, mode="a", sep="\t", index=False, header=print_header) + unique_peptides.to_csv(pep_handle, mode="a", sep="\t", index=True, header=print_header) + results = results.join(unique_peptides) # -> protein_id, peptide_sequence, count, peptide_id print("\nInfo: results (['protein_id','peptide_sequence','peptide_id','count'])", flush=True) results.info(verbose=False, memory_usage=print_mem) - results[["protein_id", "peptide_id", "count"]].drop_duplicates().sort_values(by=["protein_id", "peptide_id"]).to_csv( + results[["protein_id", "peptide_id", "count"]].to_csv( args.proteins_peptides, mode="a", sep="\t", index=False, header=print_header ) diff --git a/bin/generate_protein_and_entity_ids.py b/bin/generate_protein_and_entity_ids.py index fcb1876d..793fe1c3 100755 --- a/bin/generate_protein_and_entity_ids.py +++ b/bin/generate_protein_and_entity_ids.py @@ -6,8 +6,7 @@ # proteins from 'proteins' input type: not known if unique or not, handle separately for now (in case of unique ids this causes unnecessary redundancy; could add parameter for this in future) import argparse -from gzip import GzipFile -from io import TextIOWrapper +import gzip import sys import pandas as pd @@ -86,14 +85,24 @@ def parse_args(args=None): def main(args=None): args = parse_args(args) + next_protein_id = 0 + next_entity_id = 0 + proteins_columns = ["protein_id", "protein_orig_id", "protein_sequence"] entities_proteins_columns = ["entity_id", "protein_id"] entities_columns = ["entity_id", "entity_name"] microbiomes_entities_columns = ["microbiome_id", "entity_id"] - with TextIOWrapper(GzipFile(args.out_proteins, 'w', mtime=0), encoding='utf-8') as outfile_proteins: + with gzip.open(args.out_proteins, "wt") as outfile_proteins, open( + args.out_entities_proteins, "w" + ) as outfile_entities_proteins, open(args.out_entities, "w") as outfile_entities, open( + args.out_microbiomes_entities, "w" + ) as outfile_microbiomes_entities: + # HEADERS + print("\t".join(proteins_columns), file=outfile_proteins) + print("\t".join(entities_proteins_columns), file=outfile_entities_proteins) + print("\t".join(entities_columns), file=outfile_entities) + print("\t".join(microbiomes_entities_columns), file=outfile_microbiomes_entities) - entities_dfs = [] - proteins_dfs = [] # # PREDICTED PROTEINS # @@ -118,7 +127,6 @@ def main(args=None): # Read all provided files while checking in each microbiome_bare_id # Bins contain multiple files within one filepath (gzipped) corresponding to one microbiome_bare_id check_in_microbiome_bare_id = set() - for microbiome_bare_id, bin_basename, inpath in zip( args.predicted_proteins_microbiome_ids, args.predicted_proteins_bin_basenames, args.predicted_proteins ): @@ -132,6 +140,9 @@ def main(args=None): else: proteins["entity_name"] = bin_basename + proteins["protein_id"] = range(next_protein_id, next_protein_id + len(proteins)) + next_protein_id += len(proteins) + # Check if microbiome is coassembly if len(microbiomes.groupby("microbiome_bare_id").get_group(microbiome_bare_id)) != 1: all_entities = [] @@ -139,24 +150,39 @@ def main(args=None): # (i.e. microbiome_bare_id) and assign the corresponding microbiome_id to the entities for microbiome_id in microbiomes.groupby("microbiome_bare_id").get_group(microbiome_bare_id)[ "microbiome_id" - ]: + ].sort_values(): entities = pd.DataFrame() entities = proteins[["entity_name"]].drop_duplicates() - + entities["entity_id"] = range(next_entity_id, next_entity_id + len(entities)) # Instead of microbiome_bare_id append microbiome_id entities["microbiome_id"] = microbiome_id all_entities.append(entities) + next_entity_id += len(entities) entities = pd.concat(all_entities) else: entities = proteins[["entity_name"]].drop_duplicates() + entities["entity_id"] = range(next_entity_id, next_entity_id + len(entities)) + next_entity_id += len(entities) # If not coassembled microbiome_id = microbiome_bare_id entities["microbiome_id"] = microbiome_bare_id - # Merge all dfs of all microbiomes - proteins_dfs.append(proteins) - entities_dfs.append(entities) + # Write proteins + proteins.rename(columns={"protein_tmp_id": "protein_orig_id"}, inplace=True) + proteins[proteins_columns].to_csv(outfile_proteins, sep="\t", header=False, index=False) + # Write entities_proteins + proteins.merge(entities)[["entity_id", "protein_id"]].drop_duplicates().to_csv( + outfile_entities_proteins, sep="\t", header=False, index=False + ) + # Write entities + entities[entities_columns].drop_duplicates().to_csv( + outfile_entities, sep="\t", header=False, index=False + ) + # Write microbiomes - entities + entities[microbiomes_entities_columns].to_csv( + outfile_microbiomes_entities, sep="\t", index=False, header=False + ) # # ENTREZ PROTEINS @@ -174,43 +200,37 @@ def main(args=None): args.entrez_microbiomes_entities, "\t" ) # entity_name, microbiome_id, entity_weight - # Collect Entities - entities = microbiomes_entities[["entity_name", "microbiome_id"]].drop_duplicates() - proteins = proteins.merge(entities_proteins) - - entities_dfs.append(entities) - proteins_dfs.append(proteins) - - proteins = pd.concat(proteins_dfs, ignore_index=True) - entities = pd.concat(entities_dfs, ignore_index=True) - - # Collect Entities and sort them - entities["entity_name"] = entities["entity_name"].astype(str) # Taxids are read as numeric and cannot be compared to bins/assembly ids - entities = entities.sort_values(by="entity_name") - entities["entity_id"] = entities.groupby("entity_name").ngroup() - - # Collect Proteins ans sort them - proteins.rename(columns={"protein_tmp_id": "protein_orig_id"}, inplace=True) - proteins["entity_name"] = proteins["entity_name"].astype(str) # Taxids are read as numeric and cannot be compared to bins/assembly ids - proteins = proteins.sort_values(by="protein_orig_id") - proteins["protein_id"] = proteins.groupby("protein_orig_id").ngroup() - - - # Write Proteins - proteins[proteins_columns].drop_duplicates().sort_values(by="protein_id").to_csv(outfile_proteins, sep="\t", index=False) - - # Write Entities-Proteins - proteins.merge(entities)[entities_proteins_columns].drop_duplicates().sort_values(by=entities_proteins_columns).to_csv( - args.out_entities_proteins, sep="\t", index=False - ) - - # Write entities - entities[entities_columns].sort_values(by=entities_columns).drop_duplicates().to_csv(args.out_entities, sep="\t", index=False) + # Assign protein_id + proteins["protein_id"] = range(next_protein_id, next_protein_id + len(proteins)) + next_protein_id += len(proteins) + + # Assign entity_id + entities = microbiomes_entities[["entity_name"]].drop_duplicates() + entities["entity_id"] = range(next_entity_id, next_entity_id + len(entities)) + next_entity_id += len(entities) + + # Write proteins + proteins.rename(columns={"protein_tmp_id": "protein_orig_id"})[proteins_columns].to_csv( + outfile_proteins, sep="\t", header=False, index=False + ) + + entities_microbiomes_proteins = ( + entities_proteins.merge(proteins) + .merge(entities) + .merge(microbiomes_entities)[["entity_id", "protein_id", "microbiome_id", "entity_weight"]] + ) + + # Write entities_proteins: 'entity_id', 'protein_id' + entities_microbiomes_proteins[entities_proteins_columns].to_csv( + outfile_entities_proteins, sep="\t", header=False, index=False + ) + # Write entities: 'entity_id', 'entity_name' + entities[entities_columns].to_csv(outfile_entities, sep="\t", header=False, index=False) + # Write microbiomes - entities: 'microbiome_id', 'entity_id' + entities_microbiomes_proteins[microbiomes_entities_columns].drop_duplicates().to_csv( + outfile_microbiomes_entities, sep="\t", header=False, index=False + ) - # Write microbiomes - entities - entities[microbiomes_entities_columns].sort_values(by=microbiomes_entities_columns).to_csv( - args.out_microbiomes_entities, sep="\t", index=False - ) if __name__ == "__main__": sys.exit(main()) diff --git a/testdata/input.assembly_only.csv b/testdata/input.assembly_only.csv deleted file mode 100644 index e085e35d..00000000 --- a/testdata/input.assembly_only.csv +++ /dev/null @@ -1,3 +0,0 @@ -condition,type,microbiome_path,alleles,weights_path -cond_1,assembly,https://github.com/skrakau/metapep/raw/dev/testdata/test_minigut.tiny.contigs.fa.gz,A*01:01,https://github.com/skrakau/metapep/raw/dev/testdata/test_minigut.contig_weights.tsv -cond_2,assembly,https://github.com/skrakau/metapep/raw/dev/testdata/test_minigut.tiny.copy.contigs.fa.gz,A*01:01 B*07:02,https://github.com/skrakau/metapep/raw/dev/testdata/test_minigut.copy.contig_weights.tsv diff --git a/testdata/input.bins.csv b/testdata/input.bins.csv deleted file mode 100644 index 4378b96d..00000000 --- a/testdata/input.bins.csv +++ /dev/null @@ -1,3 +0,0 @@ -condition,type,microbiome_path,alleles,weights_path -cond_1,assembly,https://github.com/skrakau/metapep/raw/dev/testdata/test_minigut.tiny.contigs.fa.gz,A*01:01, -cond_2,bins,https://github.com/skrakau/metapep/raw/dev/testdata/test_bins.tar.gz,A*01:01, diff --git a/testdata/input.csv b/testdata/input.csv deleted file mode 100644 index e15020b3..00000000 --- a/testdata/input.csv +++ /dev/null @@ -1,5 +0,0 @@ -condition,type,microbiome_path,alleles,weights_path -cond_1,taxa,https://github.com/skrakau/metapep/raw/dev/testdata/taxids.txt,A*01:01, -cond_2,taxa,https://github.com/skrakau/metapep/raw/dev/testdata/taxids.tiny.txt,A*01:01 B*07:02, -cond_3,taxa,https://github.com/skrakau/metapep/raw/dev/testdata/taxids.tiny.txt,A*01:01, -cond_4,assembly,https://github.com/skrakau/metapep/raw/dev/testdata/test_minigut.contigs.fa.gz,A*01:01,https://github.com/skrakau/metapep/raw/dev/testdata/test_minigut.contig_weights.tsv diff --git a/testdata/input.mouse.csv b/testdata/input.mouse.csv deleted file mode 100644 index 4c634052..00000000 --- a/testdata/input.mouse.csv +++ /dev/null @@ -1,5 +0,0 @@ -condition,type,microbiome_path,alleles,weights_path -cond_1,taxa,https://github.com/skrakau/metapep/raw/dev/testdata/taxids.txt,H2-Db, -cond_2,taxa,https://github.com/skrakau/metapep/raw/dev/testdata/taxids.tiny.txt,H2-Db H2-Ld, -cond_3,taxa,https://github.com/skrakau/metapep/raw/dev/testdata/taxids.tiny.txt,H2-Db, -cond_4,assembly,https://github.com/skrakau/metapep/raw/dev/testdata/test_minigut.contigs.fa.gz,H2-Db,https://github.com/skrakau/metapep/raw/dev/testdata/test_minigut.contig_weights.tsv diff --git a/testdata/input.tiny.csv b/testdata/input.tiny.csv deleted file mode 100644 index e0548e4b..00000000 --- a/testdata/input.tiny.csv +++ /dev/null @@ -1,4 +0,0 @@ -condition,type,microbiome_path,alleles,weights_path -cond_1,taxa,https://github.com/skrakau/metapep/raw/dev/testdata/taxids.tiny.txt,A*01:01, -cond_2,taxa,https://github.com/skrakau/metapep/raw/dev/testdata/taxids.tiny.txt,A*01:01 B*07:02, -cond_3,assembly,https://github.com/skrakau/metapep/raw/dev/testdata/test_minigut.contigs.fa.gz,A*01:01,https://github.com/skrakau/metapep/raw/dev/testdata/test_minigut.contig_weights.tsv diff --git a/testdata/input.tmp.csv b/testdata/input.tmp.csv deleted file mode 100644 index abdda374..00000000 --- a/testdata/input.tmp.csv +++ /dev/null @@ -1,4 +0,0 @@ -condition,type,microbiome_path,alleles,weights_path -cond_1,taxa,/Users/skrakau/Development/nf-core-metapep/testdata/taxids.small.txt,A*01:01, -cond_2,taxa,/Users/skrakau/Development/nf-core-metapep/testdata/taxids.small.txt,A*01:01 B*07:02, -cond_3,assembly,/Users/skrakau/Workspaces/test_metapep/test_stuff/test_minigut.contigs.fa.gz,A*01:01,/Users/skrakau/Workspaces/test_metapep/test_stuff/MEGAHIT-test_minigut.contig_weights.tsv diff --git a/testdata/taxids.tiny.txt b/testdata/taxids.tiny.txt deleted file mode 100644 index 8569b7e2..00000000 --- a/testdata/taxids.tiny.txt +++ /dev/null @@ -1,3 +0,0 @@ -taxon_id -243273 -662945 diff --git a/testdata/taxids.txt b/testdata/taxids.txt deleted file mode 100644 index bf9c8bf5..00000000 --- a/testdata/taxids.txt +++ /dev/null @@ -1,3 +0,0 @@ -taxon_id -637912 -1081888 diff --git a/testdata/test_bins.tar.gz b/testdata/test_bins.tar.gz deleted file mode 100644 index f4a6c353..00000000 Binary files a/testdata/test_bins.tar.gz and /dev/null differ diff --git a/testdata/test_minigut.contig_weights.tsv b/testdata/test_minigut.contig_weights.tsv deleted file mode 100644 index d8913d3b..00000000 --- a/testdata/test_minigut.contig_weights.tsv +++ /dev/null @@ -1,273 +0,0 @@ -contig_name weight -k141_0 6.74424 -k141_137 5.63812 -k141_1 5.13475 -k141_2 6.26019 -k141_138 6.26537 -k141_139 5.4537 -k141_140 6.48591 -k141_141 7.82164 -k141_142 7.54511 -k141_3 8.03122 -k141_4 4.21446 -k141_5 6.18518 -k141_6 8.00176 -k141_7 8.06771 -k141_8 6.24218 -k141_9 3.94043 -k141_10 6.26622 -k141_11 6.56613 -k141_143 8.224 -k141_12 7.196 -k141_144 4.01949 -k141_13 1.36429 -k141_14 5.92391 -k141_15 8.21073 -k141_145 8.87006 -k141_16 5.08484 -k141_17 16.5312 -k141_18 6.60731 -k141_146 6.32272 -k141_19 6.09608 -k141_147 5.57835 -k141_20 6.04471 -k141_148 8.85447 -k141_21 6.01372 -k141_149 5.35103 -k141_22 7.57596 -k141_23 7.15174 -k141_150 6.62459 -k141_151 6.22894 -k141_24 8.1403 -k141_152 6.19072 -k141_25 5.75677 -k141_153 3.83624 -k141_154 5.91983 -k141_155 6.95204 -k141_156 6.21418 -k141_157 6.8985 -k141_26 7.84097 -k141_158 6.07905 -k141_159 5.31532 -k141_160 7.63433 -k141_161 6.07454 -k141_162 6.28119 -k141_27 6.26562 -k141_28 5.18841 -k141_163 4.83505 -k141_29 6.32939 -k141_30 6.59875 -k141_31 6.9585 -k141_32 6.15337 -k141_33 5.61638 -k141_164 8.08622 -k141_34 7.13686 -k141_35 5.34294 -k141_165 5.4473 -k141_36 7.15481 -k141_166 6.67325 -k141_37 6.19655 -k141_38 5.67073 -k141_39 6.16419 -k141_40 5.70121 -k141_167 8.06466 -k141_168 6.34049 -k141_41 6.40006 -k141_169 5.95954 -k141_42 4.76633 -k141_170 7.54573 -k141_43 5.41221 -k141_44 3.91612 -k141_171 5.84296 -k141_172 6.21475 -k141_173 6.11367 -k141_174 5.89914 -k141_175 8.82378 -k141_45 8.25848 -k141_176 5.76024 -k141_46 6.35841 -k141_177 6.45911 -k141_178 5.32 -k141_179 5.38 -k141_47 7.16025 -k141_48 0.995868 -k141_49 7.78206 -k141_50 5.04692 -k141_180 8.82958 -k141_181 8.65542 -k141_51 6.80488 -k141_182 0.977901 -k141_52 6.96639 -k141_53 6.81356 -k141_183 5.71648 -k141_54 6.49538 -k141_55 5.59054 -k141_56 5.53857 -k141_57 5.53521 -k141_184 6.43829 -k141_185 9.23188 -k141_58 8.34178 -k141_186 5.90975 -k141_59 5.63797 -k141_187 6.78346 -k141_60 5.69253 -k141_61 5.15721 -k141_62 4.80528 -k141_63 6.11612 -k141_64 5.19155 -k141_65 5.25629 -k141_188 6.56876 -k141_66 6.11422 -k141_189 8.43669 -k141_67 5.54607 -k141_190 6.7437 -k141_68 5.76612 -k141_69 6.98683 -k141_191 8.32525 -k141_70 6.96257 -k141_71 6.5997 -k141_192 6.06382 -k141_193 6.03742 -k141_194 6.44722 -k141_195 5.67824 -k141_72 7.68784 -k141_73 5.46361 -k141_74 4.68507 -k141_75 5.77059 -k141_196 8.33135 -k141_76 9.02828 -k141_197 6.92132 -k141_198 5.77227 -k141_77 6.02446 -k141_199 5.45649 -k141_78 5.98127 -k141_200 6.47908 -k141_202 6.50996 -k141_79 9.34575 -k141_203 5.61734 -k141_204 4.96987 -k141_205 7.58779 -k141_80 8.79558 -k141_81 6.42808 -k141_82 5.46116 -k141_83 6.12879 -k141_84 14.3868 -k141_206 8.30131 -k141_85 6.2087 -k141_207 6.70115 -k141_208 8.00432 -k141_86 6.08348 -k141_209 6.2269 -k141_210 5.94384 -k141_87 5.94532 -k141_88 5.38292 -k141_211 5.84733 -k141_212 5.94232 -k141_213 5.45939 -k141_89 6.71062 -k141_90 0.977901 -k141_91 7.68034 -k141_214 8.56502 -k141_92 8.37851 -k141_215 6.24277 -k141_216 4.97482 -k141_93 6.04966 -k141_217 6.30995 -k141_94 8.68481 -k141_218 6.21844 -k141_219 6.48897 -k141_95 6.29813 -k141_96 6.1084 -k141_97 8.20518 -k141_98 4.86494 -k141_99 5.56487 -k141_220 8.03064 -k141_221 5.63985 -k141_222 4.61996 -k141_223 5.61852 -k141_224 5.36939 -k141_225 5.88412 -k141_226 6.37173 -k141_100 8.13328 -k141_227 5.6295 -k141_101 6.09361 -k141_228 6.45449 -k141_102 5.57076 -k141_229 5.69979 -k141_103 6.56256 -k141_104 3.79913 -k141_105 0.584071 -k141_106 7.88 -k141_107 4.0662 -k141_230 7.41893 -k141_108 5.81238 -k141_231 8.12647 -k141_109 6.05948 -k141_232 5.92621 -k141_110 5.88202 -k141_233 6.23165 -k141_111 7.03262 -k141_234 5.76316 -k141_112 6.5161 -k141_113 5.88775 -k141_114 6.15621 -k141_115 6.19383 -k141_116 8.73807 -k141_117 6.77007 -k141_118 7.8975 -k141_119 6.64174 -k141_120 6.4399 -k141_121 7.90724 -k141_122 1.52841 -k141_123 5.72168 -k141_124 6.91227 -k141_125 6.2406 -k141_126 5.62465 -k141_127 5.49895 -k141_235 8.27024 -k141_236 7.4459 -k141_128 5.84584 -k141_237 6.92484 -k141_129 8.36755 -k141_238 8.25159 -k141_239 6.78299 -k141_240 4.90039 -k141_130 6.20316 -k141_241 8.60358 -k141_131 8.0373 -k141_242 6.20794 -k141_132 6.24063 -k141_133 7.94217 -k141_243 6.24523 -k141_244 5.663 -k141_245 5.75412 -k141_246 3.61922 -k141_247 5.63403 -k141_134 8.57562 -k141_248 8.12917 -k141_249 5.24283 -k141_135 6.28727 -k141_250 5.88162 -k141_251 5.78735 -k141_252 7.00744 -k141_253 5.82059 -k141_255 7.02128 -k141_136 6.49917 -k141_256 6.56353 -k141_257 7.45489 -k141_258 1.19255 -k141_259 6.53909 -k141_260 5.81143 -k141_261 8.3397 -k141_262 4.97564 -k141_263 7.11907 -k141_264 1.04908 -k141_265 8.59483 -k141_266 6.27781 -k141_267 5.3324 -k141_268 5.60783 -k141_269 8.08235 -k141_270 8.86567 -k141_271 6.30739 -k141_272 5.51562 -k141_273 5.71984 diff --git a/testdata/test_minigut.contigs.fa.gz b/testdata/test_minigut.contigs.fa.gz deleted file mode 100644 index 2c99e7c1..00000000 Binary files a/testdata/test_minigut.contigs.fa.gz and /dev/null differ diff --git a/testdata/test_minigut.copy.contig_weights.tsv b/testdata/test_minigut.copy.contig_weights.tsv deleted file mode 100644 index d8913d3b..00000000 --- a/testdata/test_minigut.copy.contig_weights.tsv +++ /dev/null @@ -1,273 +0,0 @@ -contig_name weight -k141_0 6.74424 -k141_137 5.63812 -k141_1 5.13475 -k141_2 6.26019 -k141_138 6.26537 -k141_139 5.4537 -k141_140 6.48591 -k141_141 7.82164 -k141_142 7.54511 -k141_3 8.03122 -k141_4 4.21446 -k141_5 6.18518 -k141_6 8.00176 -k141_7 8.06771 -k141_8 6.24218 -k141_9 3.94043 -k141_10 6.26622 -k141_11 6.56613 -k141_143 8.224 -k141_12 7.196 -k141_144 4.01949 -k141_13 1.36429 -k141_14 5.92391 -k141_15 8.21073 -k141_145 8.87006 -k141_16 5.08484 -k141_17 16.5312 -k141_18 6.60731 -k141_146 6.32272 -k141_19 6.09608 -k141_147 5.57835 -k141_20 6.04471 -k141_148 8.85447 -k141_21 6.01372 -k141_149 5.35103 -k141_22 7.57596 -k141_23 7.15174 -k141_150 6.62459 -k141_151 6.22894 -k141_24 8.1403 -k141_152 6.19072 -k141_25 5.75677 -k141_153 3.83624 -k141_154 5.91983 -k141_155 6.95204 -k141_156 6.21418 -k141_157 6.8985 -k141_26 7.84097 -k141_158 6.07905 -k141_159 5.31532 -k141_160 7.63433 -k141_161 6.07454 -k141_162 6.28119 -k141_27 6.26562 -k141_28 5.18841 -k141_163 4.83505 -k141_29 6.32939 -k141_30 6.59875 -k141_31 6.9585 -k141_32 6.15337 -k141_33 5.61638 -k141_164 8.08622 -k141_34 7.13686 -k141_35 5.34294 -k141_165 5.4473 -k141_36 7.15481 -k141_166 6.67325 -k141_37 6.19655 -k141_38 5.67073 -k141_39 6.16419 -k141_40 5.70121 -k141_167 8.06466 -k141_168 6.34049 -k141_41 6.40006 -k141_169 5.95954 -k141_42 4.76633 -k141_170 7.54573 -k141_43 5.41221 -k141_44 3.91612 -k141_171 5.84296 -k141_172 6.21475 -k141_173 6.11367 -k141_174 5.89914 -k141_175 8.82378 -k141_45 8.25848 -k141_176 5.76024 -k141_46 6.35841 -k141_177 6.45911 -k141_178 5.32 -k141_179 5.38 -k141_47 7.16025 -k141_48 0.995868 -k141_49 7.78206 -k141_50 5.04692 -k141_180 8.82958 -k141_181 8.65542 -k141_51 6.80488 -k141_182 0.977901 -k141_52 6.96639 -k141_53 6.81356 -k141_183 5.71648 -k141_54 6.49538 -k141_55 5.59054 -k141_56 5.53857 -k141_57 5.53521 -k141_184 6.43829 -k141_185 9.23188 -k141_58 8.34178 -k141_186 5.90975 -k141_59 5.63797 -k141_187 6.78346 -k141_60 5.69253 -k141_61 5.15721 -k141_62 4.80528 -k141_63 6.11612 -k141_64 5.19155 -k141_65 5.25629 -k141_188 6.56876 -k141_66 6.11422 -k141_189 8.43669 -k141_67 5.54607 -k141_190 6.7437 -k141_68 5.76612 -k141_69 6.98683 -k141_191 8.32525 -k141_70 6.96257 -k141_71 6.5997 -k141_192 6.06382 -k141_193 6.03742 -k141_194 6.44722 -k141_195 5.67824 -k141_72 7.68784 -k141_73 5.46361 -k141_74 4.68507 -k141_75 5.77059 -k141_196 8.33135 -k141_76 9.02828 -k141_197 6.92132 -k141_198 5.77227 -k141_77 6.02446 -k141_199 5.45649 -k141_78 5.98127 -k141_200 6.47908 -k141_202 6.50996 -k141_79 9.34575 -k141_203 5.61734 -k141_204 4.96987 -k141_205 7.58779 -k141_80 8.79558 -k141_81 6.42808 -k141_82 5.46116 -k141_83 6.12879 -k141_84 14.3868 -k141_206 8.30131 -k141_85 6.2087 -k141_207 6.70115 -k141_208 8.00432 -k141_86 6.08348 -k141_209 6.2269 -k141_210 5.94384 -k141_87 5.94532 -k141_88 5.38292 -k141_211 5.84733 -k141_212 5.94232 -k141_213 5.45939 -k141_89 6.71062 -k141_90 0.977901 -k141_91 7.68034 -k141_214 8.56502 -k141_92 8.37851 -k141_215 6.24277 -k141_216 4.97482 -k141_93 6.04966 -k141_217 6.30995 -k141_94 8.68481 -k141_218 6.21844 -k141_219 6.48897 -k141_95 6.29813 -k141_96 6.1084 -k141_97 8.20518 -k141_98 4.86494 -k141_99 5.56487 -k141_220 8.03064 -k141_221 5.63985 -k141_222 4.61996 -k141_223 5.61852 -k141_224 5.36939 -k141_225 5.88412 -k141_226 6.37173 -k141_100 8.13328 -k141_227 5.6295 -k141_101 6.09361 -k141_228 6.45449 -k141_102 5.57076 -k141_229 5.69979 -k141_103 6.56256 -k141_104 3.79913 -k141_105 0.584071 -k141_106 7.88 -k141_107 4.0662 -k141_230 7.41893 -k141_108 5.81238 -k141_231 8.12647 -k141_109 6.05948 -k141_232 5.92621 -k141_110 5.88202 -k141_233 6.23165 -k141_111 7.03262 -k141_234 5.76316 -k141_112 6.5161 -k141_113 5.88775 -k141_114 6.15621 -k141_115 6.19383 -k141_116 8.73807 -k141_117 6.77007 -k141_118 7.8975 -k141_119 6.64174 -k141_120 6.4399 -k141_121 7.90724 -k141_122 1.52841 -k141_123 5.72168 -k141_124 6.91227 -k141_125 6.2406 -k141_126 5.62465 -k141_127 5.49895 -k141_235 8.27024 -k141_236 7.4459 -k141_128 5.84584 -k141_237 6.92484 -k141_129 8.36755 -k141_238 8.25159 -k141_239 6.78299 -k141_240 4.90039 -k141_130 6.20316 -k141_241 8.60358 -k141_131 8.0373 -k141_242 6.20794 -k141_132 6.24063 -k141_133 7.94217 -k141_243 6.24523 -k141_244 5.663 -k141_245 5.75412 -k141_246 3.61922 -k141_247 5.63403 -k141_134 8.57562 -k141_248 8.12917 -k141_249 5.24283 -k141_135 6.28727 -k141_250 5.88162 -k141_251 5.78735 -k141_252 7.00744 -k141_253 5.82059 -k141_255 7.02128 -k141_136 6.49917 -k141_256 6.56353 -k141_257 7.45489 -k141_258 1.19255 -k141_259 6.53909 -k141_260 5.81143 -k141_261 8.3397 -k141_262 4.97564 -k141_263 7.11907 -k141_264 1.04908 -k141_265 8.59483 -k141_266 6.27781 -k141_267 5.3324 -k141_268 5.60783 -k141_269 8.08235 -k141_270 8.86567 -k141_271 6.30739 -k141_272 5.51562 -k141_273 5.71984 diff --git a/testdata/test_minigut.copy.contigs.fa.gz b/testdata/test_minigut.copy.contigs.fa.gz deleted file mode 100644 index 2c99e7c1..00000000 Binary files a/testdata/test_minigut.copy.contigs.fa.gz and /dev/null differ diff --git a/testdata/test_minigut.tiny.contigs.fa.gz b/testdata/test_minigut.tiny.contigs.fa.gz deleted file mode 100644 index 05293b37..00000000 Binary files a/testdata/test_minigut.tiny.contigs.fa.gz and /dev/null differ diff --git a/testdata/test_minigut.tiny.copy.contigs.fa.gz b/testdata/test_minigut.tiny.copy.contigs.fa.gz deleted file mode 100644 index 05293b37..00000000 Binary files a/testdata/test_minigut.tiny.copy.contigs.fa.gz and /dev/null differ diff --git a/tests/pipeline/test.nf.test.snap b/tests/pipeline/test.nf.test.snap index f91ab948..eded0993 100644 --- a/tests/pipeline/test.nf.test.snap +++ b/tests/pipeline/test.nf.test.snap @@ -4,23 +4,23 @@ "alleles.tsv:md5,77994270b1eab3e8363c90d8d09780fa", "conditions_alleles.tsv:md5,e865774e32935a69306461ff47428aa8", "conditions.tsv:md5,a6fe5daaf11a90b61c83c9934969c52c", - "entities_proteins.tsv:md5,40559463e23017a527de4a0d44029762", - "entities.tsv:md5,57b7ddd2bbc0ae2f577e53ebfee0bed9", + "entities_proteins.tsv:md5,fd4cbfeca8e8f176c9e2f7aff39921bf", + "entities.tsv:md5,beb0a0fca43b5511cdabaf24bcc9113a", "microbiomes_entities.no_weights.tsv:md5,4d45837586c7c6dc4a19b41735215b01", "microbiomes_entities.nucl.tsv:md5,f5a3ff3cc1f8100f56238b9fab405e66", - "microbiomes_entities.tsv:md5,f8d9569231eeee796b23e250b37ea2ff", + "microbiomes_entities.tsv:md5,ea5c9629387ee19ccc485af0157a922c", "microbiomes.tsv:md5,20f141ba0c97325e9d99a9c0765f9a37", - "proteins_peptides.tsv:md5,40b02b3372c1cbfe7197f5440e724a24", - "peptides.tsv.gz:md5,b776a0ad7d110f8984dc43f74f2f5f2a", - "proteins.tsv.gz:md5,2a3e88df7aaa7c92c043671d4c823b75", - "predictions.tsv.gz:md5,3c86baab1aa53a295c9fe14f65aa1259", + "proteins_peptides.tsv:md5,e4d6b0199769075ff20c8eeca1485852", + "peptides.tsv.gz:md5,66f6ceef1d5212d061b46c90703282d9", + "proteins.tsv.gz:md5,3b397fc51074066efe2d31a5aff2abe3", + "predictions.tsv.gz:md5,a50679d98e43e09858bf5f29ed222cd2", "stats.txt:md5,83204291158696ce5d19c4e976f38efe" ], "meta": { "nf-test": "0.8.4", "nextflow": "24.04.3" }, - "timestamp": "2024-07-18T11:22:51.147921797" + "timestamp": "2024-08-02T07:24:38.491278622" }, "logs": { "content": [ @@ -33,4 +33,4 @@ }, "timestamp": "2024-06-26T08:14:54+0000" } -} \ No newline at end of file +} diff --git a/tests/pipeline/test_all.nf.test.snap b/tests/pipeline/test_all.nf.test.snap index 11141483..6f0da13a 100644 --- a/tests/pipeline/test_all.nf.test.snap +++ b/tests/pipeline/test_all.nf.test.snap @@ -14,23 +14,23 @@ "alleles.tsv:md5,77994270b1eab3e8363c90d8d09780fa", "conditions_alleles.tsv:md5,0e992c19471bb56c9b6e6a6f71302742", "conditions.tsv:md5,87707cd0dee14b0ca4face090ba9dce9", - "entities_proteins.tsv:md5,6eb14bc6f5a22a56244897e3c37f7698", - "entities.tsv:md5,243f199fd2cb7d9ff8e0c416865f8334", - "microbiomes_entities.no_weights.tsv:md5,b62d2213adce7c6111056d8181fe6b2c", + "entities_proteins.tsv:md5,57a704e833a65a1809c369452864e241", + "entities.tsv:md5,b7d3d019c258859c93bbc15095d304f1", + "microbiomes_entities.no_weights.tsv:md5,977da228f562b386b0f84fc718cc22e9", "microbiomes_entities.nucl.tsv:md5,aa0d4cc3546508c8e5b81bc9b4b16e39", - "microbiomes_entities.tsv:md5,64b5e7a2c23009f9ab54b5667cd45efb", + "microbiomes_entities.tsv:md5,03309271d7a983c7b4a5c73418676bbc", "microbiomes.tsv:md5,70871c8b3023f5a8412c05f4a69dda2f", - "proteins_peptides.tsv:md5,37a4c26c385c2189457aa37430c15eb4", - "peptides.tsv.gz:md5,2956eeef0ce98f2af1e94ec3f48caccf", - "proteins.tsv.gz:md5,5628f70522e8696a90d49ccf989e0d1b", - "predictions.tsv.gz:md5,024939bcd5a845c02d28b27668e69c91", + "proteins_peptides.tsv:md5,aff145bbff9f962891db2c2a540242c4", + "peptides.tsv.gz:md5,36aba6d50b1aad95859a547b06d04fc5", + "proteins.tsv.gz:md5,027823577635c591b58326410b039b32", + "predictions.tsv.gz:md5,d3b36995f3b4027d1a14e726a0acc6c2", "stats.txt:md5,896155e0f043f30bd71c38c8ee7b466c" ], "meta": { "nf-test": "0.8.4", "nextflow": "24.04.3" }, - "timestamp": "2024-07-18T14:41:01.231189333" + "timestamp": "2024-08-02T07:59:44.990798256" }, "logs": { "content": [ @@ -44,4 +44,4 @@ }, "timestamp": "2024-07-17T15:58:41.467969153" } -} \ No newline at end of file +} diff --git a/tests/pipeline/test_assembly_only.nf.test.snap b/tests/pipeline/test_assembly_only.nf.test.snap index 06b985db..b9e44f64 100644 --- a/tests/pipeline/test_assembly_only.nf.test.snap +++ b/tests/pipeline/test_assembly_only.nf.test.snap @@ -4,23 +4,23 @@ "alleles.tsv:md5,77994270b1eab3e8363c90d8d09780fa", "conditions_alleles.tsv:md5,9db7e0afc739b23c430877e336b0143f", "conditions.tsv:md5,a6fe5daaf11a90b61c83c9934969c52c", - "entities_proteins.tsv:md5,20fa4e2263edf5875a55f668bfd54255", - "entities.tsv:md5,03eb30795257d5e6a0a52bdfc09481fd", + "entities_proteins.tsv:md5,2ba821a04755841dc6f116965f853f40", + "entities.tsv:md5,61f364c97c883f45663ae27d115b229f", "microbiomes_entities.no_weights.tsv:md5,bd449dafeb86c374552f2e22a9780b19", "microbiomes_entities.nucl.tsv:md5,3888070b634251c0ae3e9fd4857140d7", - "microbiomes_entities.tsv:md5,f93b40a413fde8dd0a10712270c1a5f0", + "microbiomes_entities.tsv:md5,6cef150e7f5fc2f3320d9a93da00f5de", "microbiomes.tsv:md5,3d597aec25752f7ca83436b31bfaa128", - "proteins_peptides.tsv:md5,d3643f39837bc11a4984fa5260c2820f", - "peptides.tsv.gz:md5,afb2ba3ea67d4affb72d6e7f6451d45c", + "proteins_peptides.tsv:md5,c2a16b2d30c9cf5ea52207dd799b4603", + "peptides.tsv.gz:md5,dd0f94bea5079df8ef06895de255038b", "proteins.tsv.gz:md5,104b786dfa89e183a46e29f1f37f17de", - "predictions.tsv.gz:md5,bfff151d9a6dc15006a91c8fd7feddcc", + "predictions.tsv.gz:md5,98eeb5672f338ad5e8de7e2047261037", "stats.txt:md5,d138e30d814db5203a789b7eddd04732" ], "meta": { "nf-test": "0.8.4", "nextflow": "24.04.3" }, - "timestamp": "2024-07-18T08:16:13.284036932" + "timestamp": "2024-08-01T12:28:55.298980309" }, "logs": { "content": [ @@ -33,4 +33,4 @@ }, "timestamp": "2024-06-26T08:14:54+0000" } -} \ No newline at end of file +} diff --git a/tests/pipeline/test_bins_only.nf.test.snap b/tests/pipeline/test_bins_only.nf.test.snap index 825ff822..3566d8c8 100644 --- a/tests/pipeline/test_bins_only.nf.test.snap +++ b/tests/pipeline/test_bins_only.nf.test.snap @@ -4,23 +4,23 @@ "alleles.tsv:md5,77994270b1eab3e8363c90d8d09780fa", "conditions_alleles.tsv:md5,9db7e0afc739b23c430877e336b0143f", "conditions.tsv:md5,5b04349198a406fc16760c7e8808bb23", - "entities_proteins.tsv:md5,58c51a20d57c93252d1bdf1469916a6d", - "entities.tsv:md5,4a739da6f90ba23b83f37e655147569e", + "entities_proteins.tsv:md5,20e495ff06ccacb0f81dbd65e556fc83", + "entities.tsv:md5,7a472316b8e96e9b12d7ca3ff6a88d39", "microbiomes_entities.no_weights.tsv:md5,bf3ff9d0390b49c6ecafd89c93909b67", "microbiomes_entities.nucl.tsv:md5,84b6b907695caaddc959682504749832", "microbiomes_entities.tsv:md5,2f40573eb52f3d823aad2f09a639eee2", "microbiomes.tsv:md5,3f58c3029d21a59316861c28af4361d1", - "proteins_peptides.tsv:md5,1efa2eb7dcf90e4b6496d83543f86ecc", - "peptides.tsv.gz:md5,eb5ce3f59f7536ad1ecc96b136d0a10f", - "proteins.tsv.gz:md5,3771dbafc5d6dffa6381ec8dc5b2f69e", - "predictions.tsv.gz:md5,d15c308bf9783a3008a2cc4125665dd3", + "proteins_peptides.tsv:md5,50df6a9278679870190795d7b7968113", + "peptides.tsv.gz:md5,34497a1a57af3c3e48fceb762cf94f75", + "proteins.tsv.gz:md5,1ae4d6be0e8bc4beb3d225aed3379d1c", + "predictions.tsv.gz:md5,1dd8ab58446c326432beadb44a0b327a", "stats.txt:md5,62d598a845aff61f2be843daa250f485" ], "meta": { "nf-test": "0.8.4", "nextflow": "24.04.3" }, - "timestamp": "2024-07-18T11:59:48.935446889" + "timestamp": "2024-08-01T13:39:59.996820893" }, "logs": { "content": [ @@ -33,4 +33,4 @@ }, "timestamp": "2024-06-26T08:14:54+0000" } -} \ No newline at end of file +} diff --git a/tests/pipeline/test_coassembly.nf.test.snap b/tests/pipeline/test_coassembly.nf.test.snap index ac9d7708..3d3b3719 100644 --- a/tests/pipeline/test_coassembly.nf.test.snap +++ b/tests/pipeline/test_coassembly.nf.test.snap @@ -4,23 +4,23 @@ "alleles.tsv:md5,77994270b1eab3e8363c90d8d09780fa", "conditions_alleles.tsv:md5,ad6438cc0e62033f39629d7d002169b5", "conditions.tsv:md5,092aba068d94feb11b61a22d694359d7", - "entities_proteins.tsv:md5,40559463e23017a527de4a0d44029762", - "entities.tsv:md5,57b7ddd2bbc0ae2f577e53ebfee0bed9", - "microbiomes_entities.no_weights.tsv:md5,4f01d93b99cae67bc21a646ea4ee0a06", + "entities_proteins.tsv:md5,522e78ac27cbbc30e0a40fa316ee93c5", + "entities.tsv:md5,a3a3d919964f1dec57abbae3c74fb8f7", + "microbiomes_entities.no_weights.tsv:md5,45512c04e01b2f652b60c5438377411e", "microbiomes_entities.nucl.tsv:md5,2f884256b54995a265af83b2eb68e773", - "microbiomes_entities.tsv:md5,6e0af270f7dc6a245eb7f75f6cd475f5", + "microbiomes_entities.tsv:md5,134661aa3d07b62ccd0cd242c4e8fe6b", "microbiomes.tsv:md5,b89503022fcbd9b7d7f9eefe7cbccf0f", - "proteins_peptides.tsv:md5,40b02b3372c1cbfe7197f5440e724a24", - "peptides.tsv.gz:md5,b776a0ad7d110f8984dc43f74f2f5f2a", - "proteins.tsv.gz:md5,2a3e88df7aaa7c92c043671d4c823b75", - "predictions.tsv.gz:md5,227c6c6dd1b92e40757e1f12bd60512b", + "proteins_peptides.tsv:md5,ce3ba728c26a7a6866ecbfc11946c1cc", + "peptides.tsv.gz:md5,66f6ceef1d5212d061b46c90703282d9", + "proteins.tsv.gz:md5,e8a2211019ca7b516ccfa5313cb3d2de", + "predictions.tsv.gz:md5,bf3a26621094771540cd9b4642eb2f81", "stats.txt:md5,f2957aced6994532212f0d25b3efba98" ], "meta": { "nf-test": "0.8.4", "nextflow": "24.04.3" }, - "timestamp": "2024-07-18T14:04:13.49212786" + "timestamp": "2024-08-02T10:23:41.546325795" }, "logs": { "content": [ @@ -33,4 +33,4 @@ }, "timestamp": "2024-06-26T08:14:54+0000" } -} \ No newline at end of file +} diff --git a/tests/pipeline/test_mhcflurry.nf.test.snap b/tests/pipeline/test_mhcflurry.nf.test.snap index 67120f10..0a6e3cf8 100644 --- a/tests/pipeline/test_mhcflurry.nf.test.snap +++ b/tests/pipeline/test_mhcflurry.nf.test.snap @@ -4,23 +4,23 @@ "alleles.tsv:md5,77994270b1eab3e8363c90d8d09780fa", "conditions_alleles.tsv:md5,9db7e0afc739b23c430877e336b0143f", "conditions.tsv:md5,a6fe5daaf11a90b61c83c9934969c52c", - "entities_proteins.tsv:md5,20fa4e2263edf5875a55f668bfd54255", - "entities.tsv:md5,03eb30795257d5e6a0a52bdfc09481fd", + "entities_proteins.tsv:md5,2ba821a04755841dc6f116965f853f40", + "entities.tsv:md5,61f364c97c883f45663ae27d115b229f", "microbiomes_entities.no_weights.tsv:md5,bd449dafeb86c374552f2e22a9780b19", "microbiomes_entities.nucl.tsv:md5,3888070b634251c0ae3e9fd4857140d7", - "microbiomes_entities.tsv:md5,f93b40a413fde8dd0a10712270c1a5f0", + "microbiomes_entities.tsv:md5,6cef150e7f5fc2f3320d9a93da00f5de", "microbiomes.tsv:md5,3d597aec25752f7ca83436b31bfaa128", - "proteins_peptides.tsv:md5,d3643f39837bc11a4984fa5260c2820f", - "peptides.tsv.gz:md5,afb2ba3ea67d4affb72d6e7f6451d45c", + "proteins_peptides.tsv:md5,c2a16b2d30c9cf5ea52207dd799b4603", + "peptides.tsv.gz:md5,dd0f94bea5079df8ef06895de255038b", "proteins.tsv.gz:md5,104b786dfa89e183a46e29f1f37f17de", - "predictions.tsv.gz:md5,14e08243d25907c53182bfeff23361f1", + "predictions.tsv.gz:md5,ac12cfb575078de97c490487d86813ef", "stats.txt:md5,d138e30d814db5203a789b7eddd04732" ], "meta": { "nf-test": "0.8.4", "nextflow": "24.04.3" }, - "timestamp": "2024-07-18T08:42:10.642128393" + "timestamp": "2024-08-01T13:55:42.000764934" }, "logs": { "content": [ @@ -32,4 +32,4 @@ }, "timestamp": "2024-06-26T08:14:54+0000" } -} \ No newline at end of file +} diff --git a/tests/pipeline/test_mhcnuggets_1.nf.test.snap b/tests/pipeline/test_mhcnuggets_1.nf.test.snap index 8fff9d5f..7cf89733 100644 --- a/tests/pipeline/test_mhcnuggets_1.nf.test.snap +++ b/tests/pipeline/test_mhcnuggets_1.nf.test.snap @@ -4,23 +4,23 @@ "alleles.tsv:md5,77994270b1eab3e8363c90d8d09780fa", "conditions_alleles.tsv:md5,9db7e0afc739b23c430877e336b0143f", "conditions.tsv:md5,a6fe5daaf11a90b61c83c9934969c52c", - "entities_proteins.tsv:md5,20fa4e2263edf5875a55f668bfd54255", - "entities.tsv:md5,03eb30795257d5e6a0a52bdfc09481fd", + "entities_proteins.tsv:md5,2ba821a04755841dc6f116965f853f40", + "entities.tsv:md5,61f364c97c883f45663ae27d115b229f", "microbiomes_entities.no_weights.tsv:md5,bd449dafeb86c374552f2e22a9780b19", "microbiomes_entities.nucl.tsv:md5,3888070b634251c0ae3e9fd4857140d7", - "microbiomes_entities.tsv:md5,f93b40a413fde8dd0a10712270c1a5f0", + "microbiomes_entities.tsv:md5,6cef150e7f5fc2f3320d9a93da00f5de", "microbiomes.tsv:md5,3d597aec25752f7ca83436b31bfaa128", - "proteins_peptides.tsv:md5,d3643f39837bc11a4984fa5260c2820f", - "peptides.tsv.gz:md5,afb2ba3ea67d4affb72d6e7f6451d45c", + "proteins_peptides.tsv:md5,c2a16b2d30c9cf5ea52207dd799b4603", + "peptides.tsv.gz:md5,dd0f94bea5079df8ef06895de255038b", "proteins.tsv.gz:md5,104b786dfa89e183a46e29f1f37f17de", - "predictions.tsv.gz:md5,29f3565a2c200753d0d890009ea68c8e", + "predictions.tsv.gz:md5,956fd0987dd9132a01559cc9c89ff4ca", "stats.txt:md5,d138e30d814db5203a789b7eddd04732" ], "meta": { "nf-test": "0.8.4", "nextflow": "24.04.3" }, - "timestamp": "2024-07-18T08:43:42.547130099" + "timestamp": "2024-08-01T13:57:14.849262708" }, "logs": { "content": [ @@ -32,4 +32,4 @@ }, "timestamp": "2024-06-26T08:14:54+0000" } -} \ No newline at end of file +} diff --git a/tests/pipeline/test_mhcnuggets_2.nf.test.snap b/tests/pipeline/test_mhcnuggets_2.nf.test.snap index 52d30f44..a9a02e5a 100644 --- a/tests/pipeline/test_mhcnuggets_2.nf.test.snap +++ b/tests/pipeline/test_mhcnuggets_2.nf.test.snap @@ -4,23 +4,23 @@ "alleles.tsv:md5,ab0e4dbbb55b8154d37256018c19d67c", "conditions_alleles.tsv:md5,769c299e94c7df654a5a14da2771c95d", "conditions.tsv:md5,a6fe5daaf11a90b61c83c9934969c52c", - "entities_proteins.tsv:md5,20fa4e2263edf5875a55f668bfd54255", - "entities.tsv:md5,03eb30795257d5e6a0a52bdfc09481fd", + "entities_proteins.tsv:md5,2ba821a04755841dc6f116965f853f40", + "entities.tsv:md5,61f364c97c883f45663ae27d115b229f", "microbiomes_entities.no_weights.tsv:md5,bd449dafeb86c374552f2e22a9780b19", "microbiomes_entities.nucl.tsv:md5,3888070b634251c0ae3e9fd4857140d7", - "microbiomes_entities.tsv:md5,f93b40a413fde8dd0a10712270c1a5f0", + "microbiomes_entities.tsv:md5,6cef150e7f5fc2f3320d9a93da00f5de", "microbiomes.tsv:md5,3d597aec25752f7ca83436b31bfaa128", - "proteins_peptides.tsv:md5,d3643f39837bc11a4984fa5260c2820f", - "peptides.tsv.gz:md5,afb2ba3ea67d4affb72d6e7f6451d45c", + "proteins_peptides.tsv:md5,c2a16b2d30c9cf5ea52207dd799b4603", + "peptides.tsv.gz:md5,dd0f94bea5079df8ef06895de255038b", "proteins.tsv.gz:md5,104b786dfa89e183a46e29f1f37f17de", - "predictions.tsv.gz:md5,409c7af0f77acb5f57d26ec985d6753f", + "predictions.tsv.gz:md5,d88dfc746b74451b1cd00eb06ea834bb", "stats.txt:md5,d138e30d814db5203a789b7eddd04732" ], "meta": { "nf-test": "0.8.4", "nextflow": "24.04.3" }, - "timestamp": "2024-07-18T08:45:26.411151617" + "timestamp": "2024-08-01T13:58:50.730628977" }, "logs": { "content": [ @@ -32,4 +32,4 @@ }, "timestamp": "2024-06-26T08:14:54+0000" } -} \ No newline at end of file +} diff --git a/tests/pipeline/test_mouse.nf.test.snap b/tests/pipeline/test_mouse.nf.test.snap index 3fcb155a..02f19085 100644 --- a/tests/pipeline/test_mouse.nf.test.snap +++ b/tests/pipeline/test_mouse.nf.test.snap @@ -14,23 +14,23 @@ "alleles.tsv:md5,314847ac5a69fe93ce75c6db0c2496d8", "conditions_alleles.tsv:md5,e865774e32935a69306461ff47428aa8", "conditions.tsv:md5,a6fe5daaf11a90b61c83c9934969c52c", - "entities_proteins.tsv:md5,490726a86418ba9eb6d248d439c690df", - "entities.tsv:md5,8c4aea9d11b4e00b539098743a70686c", - "microbiomes_entities.no_weights.tsv:md5,4d45837586c7c6dc4a19b41735215b01", + "entities_proteins.tsv:md5,72713ba58d4c87e54b015afdb2e4c856", + "entities.tsv:md5,02424da04f3ca8ddd587f63d2202acf6", + "microbiomes_entities.no_weights.tsv:md5,55eae24113e41d074d4b2e91912d7be6", "microbiomes_entities.nucl.tsv:md5,f5a3ff3cc1f8100f56238b9fab405e66", - "microbiomes_entities.tsv:md5,f8d9569231eeee796b23e250b37ea2ff", + "microbiomes_entities.tsv:md5,c8e318f1ff72f1469a1359085ff7bbef", "microbiomes.tsv:md5,2130005595bdc7ffc770d8304fd9a936", - "proteins_peptides.tsv:md5,033de254de014518dc3954700bf45979", - "peptides.tsv.gz:md5,1669dfbfd06c54ea2595934268698d0e", - "proteins.tsv.gz:md5,10935afd27899a048eed2c374ef52ccd", - "predictions.tsv.gz:md5,02f424dcbef3bbda87dfe15f82cacf01", + "proteins_peptides.tsv:md5,4179b229762eac3862de4d407db105a8", + "peptides.tsv.gz:md5,260d711f2ea11fb3327f3eff6b6a7516", + "proteins.tsv.gz:md5,3314974c69464fd88fb4a7de16811436", + "predictions.tsv.gz:md5,b9edf43eb41ac84bf129001fdc70b852", "stats.txt:md5,6e5ecca241fb1cd3fea7ea66c4b81e2a" ], "meta": { "nf-test": "0.8.4", "nextflow": "24.04.3" }, - "timestamp": "2024-07-18T14:43:57.443165259" + "timestamp": "2024-08-02T11:37:25.76942764" }, "logs": { "content": [ @@ -44,4 +44,4 @@ }, "timestamp": "2024-07-18T05:18:07.866546467" } -} \ No newline at end of file +} diff --git a/tests/pipeline/test_mouse_all_pep_lengths.nf.test.snap b/tests/pipeline/test_mouse_all_pep_lengths.nf.test.snap index aebe3932..c2e92dbc 100644 --- a/tests/pipeline/test_mouse_all_pep_lengths.nf.test.snap +++ b/tests/pipeline/test_mouse_all_pep_lengths.nf.test.snap @@ -4,23 +4,23 @@ "alleles.tsv:md5,314847ac5a69fe93ce75c6db0c2496d8", "conditions_alleles.tsv:md5,9db7e0afc739b23c430877e336b0143f", "conditions.tsv:md5,a6fe5daaf11a90b61c83c9934969c52c", - "entities_proteins.tsv:md5,20fa4e2263edf5875a55f668bfd54255", - "entities.tsv:md5,03eb30795257d5e6a0a52bdfc09481fd", + "entities_proteins.tsv:md5,2ba821a04755841dc6f116965f853f40", + "entities.tsv:md5,61f364c97c883f45663ae27d115b229f", "microbiomes_entities.no_weights.tsv:md5,bd449dafeb86c374552f2e22a9780b19", "microbiomes_entities.nucl.tsv:md5,3888070b634251c0ae3e9fd4857140d7", - "microbiomes_entities.tsv:md5,f93b40a413fde8dd0a10712270c1a5f0", + "microbiomes_entities.tsv:md5,6cef150e7f5fc2f3320d9a93da00f5de", "microbiomes.tsv:md5,3d597aec25752f7ca83436b31bfaa128", - "proteins_peptides.tsv:md5,a36f320c292c459266d487c09527369c", - "peptides.tsv.gz:md5,4c05dc85b7de0a165f0c6745aaaeb60b", + "proteins_peptides.tsv:md5,101da74f4d060728f471d130490c1e83", + "peptides.tsv.gz:md5,d3120ff2e6c4599883e958605ea7089a", "proteins.tsv.gz:md5,104b786dfa89e183a46e29f1f37f17de", - "predictions.tsv.gz:md5,e9030a6180bc8492ba989e361b6f48d9", + "predictions.tsv.gz:md5,84ef89b536bae027616716ff387ac395", "stats.txt:md5,2675aa8f40b92d9df9c1537d77e8ade1" ], "meta": { "nf-test": "0.8.4", "nextflow": "24.04.3" }, - "timestamp": "2024-07-18T07:19:25.909691429" + "timestamp": "2024-08-01T13:02:54.575826204" }, "logs": { "content": [ @@ -32,4 +32,4 @@ }, "timestamp": "2024-06-26T08:14:54+0000" } -} \ No newline at end of file +} diff --git a/tests/pipeline/test_taxa_only.nf.test.snap b/tests/pipeline/test_taxa_only.nf.test.snap index 87825762..813eca0e 100644 --- a/tests/pipeline/test_taxa_only.nf.test.snap +++ b/tests/pipeline/test_taxa_only.nf.test.snap @@ -14,23 +14,23 @@ "alleles.tsv:md5,77994270b1eab3e8363c90d8d09780fa", "conditions_alleles.tsv:md5,9db7e0afc739b23c430877e336b0143f", "conditions.tsv:md5,a6fe5daaf11a90b61c83c9934969c52c", - "entities_proteins.tsv:md5,ca52fb4e86fc2bf79ac6196f16b20b39", + "entities_proteins.tsv:md5,25ca7e495a9b98a194ce71384820803f", "entities.tsv:md5,c766a12bbb1d4ef4707be7077f774191", - "microbiomes_entities.no_weights.tsv:md5,bf602ff424a227edcf8b0b51e33c3af5", + "microbiomes_entities.no_weights.tsv:md5,2d18c7210b304eb7593106073bc3686f", "microbiomes_entities.nucl.tsv:md5,84b6b907695caaddc959682504749832", "microbiomes_entities.tsv:md5,9fc7c15404073c65179dd908dc9fde78", "microbiomes.tsv:md5,05bafa72c953e6cb0bd72c296d6fe707", - "proteins_peptides.tsv:md5,4b388b4c053192bcc6b95e54cf1d8bd0", - "peptides.tsv.gz:md5,70d25f2b10438f46431995a18f23bdf2", - "proteins.tsv.gz:md5,6f8d47a842ce55855f0075fd48455397", - "predictions.tsv.gz:md5,a1747e6748fed828131ad44ce88bb424", - "stats.txt:md5,0c00426cf413b8bd729a9ce723211e39" + "proteins_peptides.tsv:md5,3cc6b7bdc6b91016cce5a94ff4a0fa6e", + "peptides.tsv.gz:md5,0e3c1e87feb2aac34de1ba3b246f27ca", + "proteins.tsv.gz:md5,e51aef45c63fd5047e8d21eab26038a5", + "predictions.tsv.gz:md5,6a931de30c25a2d984193d2586a7eb4b", + "stats.txt:md5,c10c27a1ba080ff59dcb5b62467129b2" ], "meta": { "nf-test": "0.8.4", "nextflow": "24.04.3" }, - "timestamp": "2024-07-18T14:49:36.279842816" + "timestamp": "2024-08-02T06:34:04.10122" }, "logs": { "content": [ @@ -44,4 +44,4 @@ }, "timestamp": "2024-07-18T14:49:36.871618584" } -} \ No newline at end of file +} diff --git a/tests/pipeline/test_taxa_specific_assembly.nf.test.snap b/tests/pipeline/test_taxa_specific_assembly.nf.test.snap index 19c05ab7..ddb1ec49 100644 --- a/tests/pipeline/test_taxa_specific_assembly.nf.test.snap +++ b/tests/pipeline/test_taxa_specific_assembly.nf.test.snap @@ -14,23 +14,23 @@ "alleles.tsv:md5,77994270b1eab3e8363c90d8d09780fa", "conditions_alleles.tsv:md5,9db7e0afc739b23c430877e336b0143f", "conditions.tsv:md5,a6fe5daaf11a90b61c83c9934969c52c", - "entities_proteins.tsv:md5,ca52fb4e86fc2bf79ac6196f16b20b39", + "entities_proteins.tsv:md5,25ca7e495a9b98a194ce71384820803f", "entities.tsv:md5,c766a12bbb1d4ef4707be7077f774191", - "microbiomes_entities.no_weights.tsv:md5,bf602ff424a227edcf8b0b51e33c3af5", + "microbiomes_entities.no_weights.tsv:md5,2d18c7210b304eb7593106073bc3686f", "microbiomes_entities.nucl.tsv:md5,84b6b907695caaddc959682504749832", "microbiomes_entities.tsv:md5,9fc7c15404073c65179dd908dc9fde78", "microbiomes.tsv:md5,02bf660f1c06ace8993249ef6b527385", - "proteins_peptides.tsv:md5,4b388b4c053192bcc6b95e54cf1d8bd0", - "peptides.tsv.gz:md5,70d25f2b10438f46431995a18f23bdf2", - "proteins.tsv.gz:md5,6f8d47a842ce55855f0075fd48455397", - "predictions.tsv.gz:md5,a1747e6748fed828131ad44ce88bb424", - "stats.txt:md5,0c00426cf413b8bd729a9ce723211e39" + "proteins_peptides.tsv:md5,3cc6b7bdc6b91016cce5a94ff4a0fa6e", + "peptides.tsv.gz:md5,0e3c1e87feb2aac34de1ba3b246f27ca", + "proteins.tsv.gz:md5,e51aef45c63fd5047e8d21eab26038a5", + "predictions.tsv.gz:md5,6a931de30c25a2d984193d2586a7eb4b", + "stats.txt:md5,c10c27a1ba080ff59dcb5b62467129b2" ], "meta": { "nf-test": "0.8.4", "nextflow": "24.04.3" }, - "timestamp": "2024-07-18T09:29:51.608097827" + "timestamp": "2024-08-02T08:13:36.245359697" }, "logs": { "content": [ @@ -44,4 +44,4 @@ }, "timestamp": "2024-07-17T16:08:40.670649421" } -} \ No newline at end of file +}