diff --git a/bin/retrieve_archives_from_lts.py b/bin/retrieve_archives_from_lts.py index d248210..7e6e3c3 100644 --- a/bin/retrieve_archives_from_lts.py +++ b/bin/retrieve_archives_from_lts.py @@ -56,11 +56,11 @@ def main(): # Load the config_file from default location load_config() - with ELOADRetrieval() as eload_retrieval: - eload_retrieval.retrieve_eloads_and_projects(args.eload, args.retrieve_associated_project, args.update_path, - args.eload_dirs_files, args.project, args.project_dirs_files, - args.eload_lts_dir, args.project_lts_dir, args.eload_retrieval_dir, - args.project_retrieval_dir) + eload_retrieval = ELOADRetrieval() + eload_retrieval.retrieve_eloads_and_projects(args.eload, args.retrieve_associated_project, args.update_path, + args.eload_dirs_files, args.project, args.project_dirs_files, + args.eload_lts_dir, args.project_lts_dir, args.eload_retrieval_dir, + args.project_retrieval_dir) if __name__ == "__main__": diff --git a/bin/submission_status.py b/bin/submission_status.py new file mode 100755 index 0000000..139d040 --- /dev/null +++ b/bin/submission_status.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +# Copyright 2020 EMBL - European Bioinformatics Institute +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from argparse import ArgumentParser + +from ebi_eva_common_pyutils.logger import logging_config as log_cfg + +from eva_submission.eload_status import EloadStatus +from eva_submission.eload_validation import EloadValidation +from eva_submission.submission_config import load_config + +logger = log_cfg.get_logger(__name__) + + +def main(): + argparse = ArgumentParser(description='Provide a submission status for any ELOAD based on what has been accessioned and loaded.') + argparse.add_argument('--eload', required=True, type=int, help='The ELOAD number for this submission') + argparse.add_argument('--debug', action='store_true', default=False, + help='Set the script to output logging information at debug level') + + args = argparse.parse_args() + + log_cfg.add_stdout_handler() + if args.debug: + log_cfg.set_log_level(logging.DEBUG) + + # Load the config_file from default location + load_config() + + with EloadStatus(args.eload) as eload: + eload.status() + + +if __name__ == "__main__": + main() diff --git a/eva_submission/eload_brokering.py b/eva_submission/eload_brokering.py index 3062052..ef8dfc7 100644 --- a/eva_submission/eload_brokering.py +++ b/eva_submission/eload_brokering.py @@ -1,3 +1,4 @@ +import csv import os import shutil import subprocess @@ -144,10 +145,26 @@ def _get_valid_vcf_files(self): valid_vcf_files.extend(files) if files else None return valid_vcf_files + def _generate_csv_mappings(self): + vcf_files_mapping_csv = os.path.join(self.eload_dir, 'brokering_vcf_files_mapping.csv') + with open(vcf_files_mapping_csv, 'w', newline='') as file: + writer = csv.writer(file) + writer.writerow(['vcf', 'fasta', 'report', 'assembly_accession']) + sub_analyses = self.eload_cfg.query('submission', 'analyses') + valid_analyses = self.eload_cfg.query('validation', 'valid', 'analyses') + for analysis_alias in valid_analyses: + fasta = sub_analyses[analysis_alias]['assembly_fasta'] + report = sub_analyses[analysis_alias]['assembly_report'] + assembly_accession = sub_analyses[analysis_alias]['assembly_accession'] + for vcf_file in valid_analyses[analysis_alias]['vcf_files']: + writer.writerow([vcf_file, fasta, report, assembly_accession]) + return vcf_files_mapping_csv + def _run_brokering_prep_workflow(self): output_dir = self.create_nextflow_temp_output_directory() + cfg['executable']['python']['script_path'] = os.path.dirname(os.path.dirname(__file__)) brokering_config = { - 'vcf_files': self._get_valid_vcf_files(), + 'vcf_files_mapping': self._generate_csv_mappings(), 'output_dir': output_dir, 'executable': cfg['executable'] } diff --git a/eva_submission/eload_status.py b/eva_submission/eload_status.py new file mode 100644 index 0000000..e59f808 --- /dev/null +++ b/eva_submission/eload_status.py @@ -0,0 +1,358 @@ +import csv +import glob +import gzip +import os +import subprocess +import sys +import tempfile +from functools import cached_property + +from ebi_eva_common_pyutils.logger import AppLogger +from ebi_eva_common_pyutils.metadata_utils import resolve_variant_warehouse_db_name +from ebi_eva_common_pyutils.mongo_utils import get_mongo_connection_handle +from ebi_eva_common_pyutils.pg_utils import get_all_results_for_query +from ebi_eva_common_pyutils.config import cfg +from eva_submission.retrieve_eload_and_project_from_lts import ELOADRetrieval +from eva_submission.submission_config import EloadConfig + + +class EloadStatus(AppLogger): + + def __init__(self, eload_number: int): + self.eload_num = eload_number + self.eload = f'ELOAD_{eload_number}' + self.eload_dir = os.path.abspath(os.path.join(cfg['eloads_dir'], self.eload)) + self.config_path = os.path.join(self.eload_dir, '.' + self.eload + '_config.yml') + + @cached_property + def tmp_dir(self): + tmp_dir = tempfile.TemporaryDirectory() + return tmp_dir.name + + @cached_property + def eload_config_file(self): + if not self.eload_dir_exists(): + eload_config_file = self.eload + '/.' + self.eload + '_config.yml' + compressed_eload_config = eload_config_file + '.gz' + try: + eload_retrieval = ELOADRetrieval() + eload_retrieval.retrieve_eloads_and_projects( + self.eload_num, retrieve_associated_project=False, update_path=False, + eload_dirs_files=[compressed_eload_config], project=None, project_dirs_files=None, + eload_lts_dir=None, project_lts_dir=None, eload_retrieval_dir=self.tmp_dir, + project_retrieval_dir=None + ) + eload_cfg = EloadConfig(os.path.join(self.tmp_dir, eload_config_file)) + except subprocess.CalledProcessError: + return + else: + eload_cfg = EloadConfig(os.path.join(self.config_path)) + return eload_cfg + + @cached_property + def project_from_config(self): + if self.eload_config_file: + return self.eload_config_file.query('brokering', 'ena', 'PROJECT') + + @cached_property + def analysis_from_config(self): + if self.eload_config_file: + return self.eload_config_file.query('brokering', 'ena', 'ANALYSIS') + + @cached_property + def taxonomy_from_config(self): + if self.eload_config_file: + return self.eload_config_file.query('submission', 'taxonomy_id') + + def source_assembly_from_config(self, analysis): + if self.eload_config_file: + alias = [alias for alias, a in self.analysis_from_config.items() if analysis == a][0] + return self.eload_config_file.query('submission', 'analyses', alias, 'assembly_accession') + + def retrieve_project_from_metadata(self): + with self.metadata_connection_handle as conn: + query = f"select project_accession from evapro.project_eva_submission where eload_id={self.eload_num};" + rows = get_all_results_for_query(conn, query) + if len(rows) != 1: + self.info(f'No project accession for {self.eload} found in metadata DB.') + return + return rows[0][0] + + def eload_dir_exists(self): + return os.path.exists(self.eload_dir) + + @cached_property + def project(self): + project_accession = self.project_from_config + if not project_accession: + project_accession = self.retrieve_project_from_metadata() + return project_accession + + @cached_property + def analyses(self): + analysis_dict = self.analysis_from_config + if analysis_dict: + return analysis_dict.values() + + def status(self): + header = [ + "eload", "project", "analysis", "taxonomy", "source_assembly", "target_assembly", "metadata_load_status", + "accessioning_status", "remapping_status", "clustering_status", "variant_load_status", + "statistics_status", "annotation_status" + ] + all_status = [] + if self.analyses: + for analysis in self.analyses: + status = self.status_per_analysis(analysis) + if not status: + all_status.append(self.build_status( + analysis=analysis, + taxonomy=self.taxonomy_from_config, + source_assembly=self.source_assembly_from_config(analysis), + target_assembly=self.find_current_target_assembly_for_taxonomy(self.taxonomy_from_config) + )) + else: + all_status.extend(status) + else: + all_status = self.status_per_analysis() + writer = csv.DictWriter(sys.stdout, fieldnames=header, delimiter='\t') + # writer.writeheader() + for st in all_status: + writer.writerow(st) + + @cached_property + def mongo_conn(self): + return get_mongo_connection_handle(cfg['maven']['environment'], cfg['maven']['settings_file']) + + def build_status(self, analysis='Not found', taxonomy='Not found', source_assembly='Not found', + target_assembly='Not found', metadata_load_status='Pending', accessioning_status='Pending', + remapping_status='Pending', clustering_status='Pending', variant_load_status='Pending', + statistics_status='Pending', annotation_status='Pending'): + return { + "eload": self.eload, + "project": str(self.project), + "analysis": analysis, + "taxonomy": taxonomy, + "source_assembly": source_assembly, + "target_assembly": target_assembly, + "metadata_load_status": metadata_load_status, + "accessioning_status": accessioning_status, + "remapping_status": remapping_status, + "clustering_status": clustering_status, + "variant_load_status": variant_load_status, + "statistics_status": statistics_status, + "annotation_status": annotation_status + } + + def status_per_analysis(self, analysis_provided=None): + st_per_analysis = [] + for analysis, source_assembly, taxonomy, filenames in self.project_information(analysis_provided): + # initialise results with default values + accessioning_status = remapping_status = clustering_status = target_assembly = 'Not found' + if not taxonomy: + self.error(f'No Assembly set present in the metadata for project: {self.project}:{analysis}') + taxonomy = self.get_taxonomy_for_project() + if not taxonomy: + self.error(f'Project {self.project}:{analysis} has no taxonomy associated and the metadata ' + f'should be checked.') + return [self.build_status(analysis=analysis)] + + if taxonomy != 9606: + list_ssid_accessioned = self.check_accessioning_was_done(analysis, filenames) + accessioning_status = 'Done' if len(list_ssid_accessioned) > 0 else 'Pending' + target_assembly = self.find_current_target_assembly_for_taxonomy(taxonomy) + remapping_status = 'Required' if source_assembly != target_assembly else 'Not_required' + if source_assembly != target_assembly: + list_ssid_remapped = self.check_remapping_was_done(target_assembly, list_ssid_accessioned) + if list_ssid_remapped: + remapping_status = 'Done' + assembly = target_assembly + else: + assembly = source_assembly + list_ssid_clustered = self.check_clustering_was_done(assembly, list_ssid_accessioned) + clustering_status = 'Done' if list_ssid_clustered else 'Pending' + + study_loaded, statistics_loaded, annotation_loaded = self.find_loaded_study_in_variant_warehouse( + source_assembly, taxonomy, analysis + ) + variant_load_status = 'Done' if study_loaded else 'Pending' + statistics_status = 'Done' if statistics_loaded else 'Pending' + annotation_status = 'Done' if annotation_loaded else 'Pending' + st_per_analysis.append(self.build_status( + analysis=analysis, + taxonomy=taxonomy, + source_assembly=source_assembly, + target_assembly=target_assembly, + metadata_load_status="Done", + accessioning_status=accessioning_status, + remapping_status=remapping_status, + clustering_status=clustering_status, + variant_load_status=variant_load_status, + statistics_status=statistics_status, + annotation_status=annotation_status + )) + return st_per_analysis + + def project_information(self, analysis): + """Retrieve project information from the metadata. Information retrieve include + the analysis and associated taxonomy, genome and file names that are included in this project.""" + query = ( + "select distinct pa.project_accession, pa.analysis_accession, a.vcf_reference_accession, at.taxonomy_id, f.filename " + "from project_analysis pa " + "join analysis a on pa.analysis_accession=a.analysis_accession " + "left join assembly_set at on at.assembly_set_id=a.assembly_set_id " + "left join analysis_file af on af.analysis_accession=a.analysis_accession " + "join file f on f.file_id=af.file_id " + f"where f.file_type='VCF' and pa.project_accession='{self.project}' " + ) + if analysis: + query += f"and pa.analysis_accession='{analysis}'" + else: + query += "order by pa.project_accession, pa.analysis_accession" + filenames = [] + current_analysis = current_assembly = current_tax_id = None + with self.metadata_connection_handle as conn: + for project, analysis, assembly, tax_id, filename in get_all_results_for_query(conn, query): + if analysis != current_analysis: + if current_analysis: + yield current_analysis, current_assembly, current_tax_id, filenames + current_analysis = analysis + current_assembly = assembly + current_tax_id = tax_id + filenames = [] + filenames.append(filename) + yield current_analysis, current_assembly, current_tax_id, filenames + + def get_taxonomy_for_project(self): + taxonomies = [] + query = f"select distinct taxonomy_id from evapro.project_taxonomy where project_accession='{self.project}'" + with self.metadata_connection_handle as conn: + for tax_id, in get_all_results_for_query(conn, query): + taxonomies.append(tax_id) + if len(taxonomies) == 1: + return taxonomies[0] + else: + self.error(f'Cannot retrieve a single taxonomy for project {self.project}. Found {len(taxonomies)}.') + + def check_accessioning_was_done(self, analysis, filenames): + """ + Check that an accessioning file can be found on codon + It parses and provide a 1000 submitted variant accessions from that project. + """ + accessioning_reports = self.get_accession_reports_for_study() + accessioned_filenames = [self.get_accession_file(f) for f in filenames] + if len(accessioning_reports) == 1: + # Only one accessioning report + accessioning_report = accessioning_reports[0] + elif len([r for r in accessioning_reports if os.path.basename(r) in accessioned_filenames]) == 1: + # Only one accessioning report name present in the database for this analysis + accessioning_report = [r for r in accessioning_reports if os.path.basename(r) in accessioned_filenames][0] + elif len([r for r in accessioning_reports if analysis in r]) == 1: + # Only one accessioning report that contains the analysis accession in its name + accessioning_report = [r for r in accessioning_reports if analysis in r][0] + elif accessioning_reports: + # Multiple accessioning reports and we cannot figure out which one is for this analysis + # Assume that the first one can be used + self.warning( + f'Assume all accessioning reports are from project {self.project}:{analysis} ' + f'and only use the first one: {accessioning_reports[0]}\n' + f'All reports: {", ".join(accessioning_reports)}' + ) + accessioning_report = accessioning_reports[0] + else: + # No reports + self.error(f'Cannot assign accessioning report to project {self.project} analysis {analysis} ' + f'for files {accessioning_reports}') + return [] + return self.get_accessioning_info_from_file(accessioning_report) + + def get_accession_file(self, filename): + basefile = '' + if filename.endswith('.vcf.gz'): + basefile = filename[:-7] + elif filename.endswith('.vcf'): + basefile = filename[:-4] + return basefile + '.accessioned.vcf.gz ' + + def get_accessioning_info_from_file(self, path): + """ + Read the accessioning report to retrieve the first 1000 Submitted variant accessions + """ + no_of_ss_ids_in_file = 0 + first_1000_ids = [] + with gzip.open(path, 'rt') as f: + for line in f: + if line.startswith('#'): + continue + elif line.split("\t")[2].startswith("ss"): + no_of_ss_ids_in_file = no_of_ss_ids_in_file + 1 + if no_of_ss_ids_in_file <= 1000: + first_1000_ids.append(int(line.split("\t")[2][2:])) + else: + break + return first_1000_ids + + def get_accession_reports_for_study(self): + """ + Given a study, find the accessioning report path for that study on codon and the FTP. + Look for files ending with accessioned.vcf.gz. + """ + local_files = glob.glob(os.path.join(cfg['projects_dir'], self.project, '60_eva_public', '*accessioned.vcf.gz')) + ftp_files = glob.glob(os.path.join(cfg['public_ftp_dir'], self.project, '*accessioned.vcf.gz')) + local_file_base_names = [os.path.basename(f) for f in local_files] + # remove duplicates based on file base names + accessioning_reports = local_files + [f for f in ftp_files if os.path.basename(f) not in local_file_base_names] + if not accessioning_reports: + self.error(f"Could not find any file in FTP or Codon for Study {self.project}") + return accessioning_reports + + def check_remapping_was_done(self, target_assembly, list_ssid): + ss_variants = self.find_submitted_variant_in_assembly(target_assembly, list_ssid) + self.info(f'Found {len(ss_variants)} remapped variants out of {len(list_ssid)} in {target_assembly}') + return [ss_variant['accession'] for ss_variant in ss_variants] + + def check_clustering_was_done(self, assembly, list_ssid): + ss_variants = self.find_submitted_variant_in_assembly(assembly, list_ssid) + ss_variants = [ss_variant['accession'] for ss_variant in ss_variants if 'rs' in ss_variant] + self.info(f'Found {len(ss_variants)} clustered variants out of {len(list_ssid)} in {assembly}') + return ss_variants + + def find_submitted_variant_in_assembly(self, assembly, list_ssid): + filters = {'seq': assembly, 'accession': {'$in': list_ssid}} + cursor = self.mongo_conn['eva_accession_sharded']['submittedVariantEntity'].find(filters) + variants = [] + for variant in cursor: + variants.append(variant) + return variants + + def find_loaded_study_in_variant_warehouse(self, assembly, taxonomy, analysis): + with self.metadata_connection_handle as conn: + db_name = resolve_variant_warehouse_db_name(conn, assembly, taxonomy) + filters = {'sid': self.project, 'fid': analysis} + cursor = self.mongo_conn[db_name]['files_2_0'].find(filters) + studies = list(cursor) + study_loaded = statistics_loaded = annotation_loaded = False + if len(studies) > 0: + study_loaded = True + if 'st' in studies[0]: + statistics_loaded = True + # Searching for variants when they don't exist is Slow ! + if study_loaded: + filters = {"files.sid": self.project, "files.fid": analysis} + variant = self.mongo_conn[db_name]['variants_2_0'].find_one(filters) + if variant: + filters = {"_id": {"$regex": "^" + variant["_id"] + ".*"}} + annotation = self.mongo_conn[db_name]['annotations_2_0'].find_one(filters) + if annotation: + annotation_loaded = True + return study_loaded, statistics_loaded, annotation_loaded + + def find_current_target_assembly_for_taxonomy(self, taxonomy): + query = f"select assembly_id from evapro.supported_assembly_tracker where taxonomy_id={taxonomy} and current=true" + assemblies = [] + with self.metadata_connection_handle as conn: + for asm, in get_all_results_for_query(conn, query): + assemblies.append(asm) + assert len(assemblies) < 2, f'Multiple target assemblies found for taxonomy {taxonomy}' + if assemblies: + return assemblies[0] diff --git a/eva_submission/eload_submission.py b/eva_submission/eload_submission.py index e007098..6c0e72c 100755 --- a/eva_submission/eload_submission.py +++ b/eva_submission/eload_submission.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import os import random import string diff --git a/eva_submission/eload_validation.py b/eva_submission/eload_validation.py index b354333..0fcc1d2 100755 --- a/eva_submission/eload_validation.py +++ b/eva_submission/eload_validation.py @@ -229,18 +229,17 @@ def parse_sv_check_log(self, sv_check_log): return nb_sv def _generate_csv_mappings(self): - vcf_files_mapping_csv = os.path.join(self.eload_dir, 'vcf_files_mapping.csv') + vcf_files_mapping_csv = os.path.join(self.eload_dir, 'validation_vcf_files_mapping.csv') with open(vcf_files_mapping_csv, 'w', newline='') as file: writer = csv.writer(file) - writer.writerow(['vcf', 'fasta', 'report', 'assembly_accession']) + writer.writerow(['vcf', 'fasta', 'report']) analyses = self.eload_cfg.query('submission', 'analyses') for analysis_alias, analysis_data in analyses.items(): fasta = analysis_data['assembly_fasta'] report = analysis_data['assembly_report'] - assembly_accession = analysis_data['assembly_accession'] if analysis_data['vcf_files']: for vcf_file in analysis_data['vcf_files']: - writer.writerow([vcf_file, fasta, report, assembly_accession]) + writer.writerow([vcf_file, fasta, report]) else: self.warning(f"VCF files for analysis {analysis_alias} not found") return vcf_files_mapping_csv diff --git a/eva_submission/retrieve_eload_and_project_from_lts.py b/eva_submission/retrieve_eload_and_project_from_lts.py index c609aee..3e16f4e 100644 --- a/eva_submission/retrieve_eload_and_project_from_lts.py +++ b/eva_submission/retrieve_eload_and_project_from_lts.py @@ -1,16 +1,13 @@ import fileinput -import logging import os import yaml from ebi_eva_common_pyutils import command_utils from ebi_eva_common_pyutils.config import cfg -from ebi_eva_common_pyutils.logger import logging_config as log_cfg +from ebi_eva_common_pyutils.logger import logging_config as log_cfg, AppLogger -logger = log_cfg.get_logger(__name__) - -class ELOADRetrieval(): +class ELOADRetrieval(AppLogger): def create_dir_if_not_exist(self, dir_path): if not os.path.exists(dir_path): @@ -44,9 +41,9 @@ def get_project_from_eload_config(self, retrieved_dir, archive_name): project_acc = eload_content['brokering']['ena']['PROJECT'] return project_acc except: - logging.warning(f'No Project accession found in ELOAD config for ELOAD {archive_name}') + self.warning(f'No Project accession found in ELOAD config for ELOAD {archive_name}') else: - logger.warning(f"No ELOAD config file found for eload: {archive_name}") + self.warning(f"No ELOAD config file found for eload: {archive_name}") def update_path_in_eload_config(self, retrieved_dir, archive_name): config_file_path = os.path.join(retrieved_dir, archive_name, f'.{archive_name}_config.yml') @@ -62,7 +59,7 @@ def retrieve_archive(self, archive_path, retrieval_output_path, files_dirs_to_re command = f"tar -xf {archive_path} -C {retrieval_output_path} {files_dirs_to_retrieve}" command_utils.run_command_with_output('Retrieve files/dir from tar', command) else: - logger.error(f'Archive path {archive_path} does not exist') + self.error(f'Archive path {archive_path} does not exist') def get_files_or_dirs_to_retrieve_from_archive(self, archive): files_dirs_to_retrieve = [] @@ -76,7 +73,7 @@ def get_files_or_dirs_to_retrieve_from_archive(self, archive): def retrieve_eload(self, eload, retrieve_associated_project, update_path, eload_dirs_files, eload_lts_dir, eload_output_dir): self.create_dir_if_not_exist(eload_output_dir) - logging.info(f"Retrieving Eloads") + self.info(f"Retrieving Eloads") # Retrieve eload eload_tar = f'{eload}.tar' @@ -97,7 +94,7 @@ def retrieve_eload(self, eload, retrieve_associated_project, update_path, eload_ return self.get_project_from_eload_config(eload_output_dir, eload) def retrieve_project(self, project, project_dirs_files, project_lts_dir, project_output_dir): - logging.info(f"Retrieving Project") + self.info(f"Retrieving Project") self.create_dir_if_not_exist(project_output_dir) project_tar = f'{project}.tar' @@ -113,7 +110,6 @@ def retrieve_project(self, project, project_dirs_files, project_lts_dir, project def retrieve_eloads_and_projects(self, eload, retrieve_associated_project, update_path, eload_dirs_files, project, project_dirs_files, eload_lts_dir, project_lts_dir, eload_retrieval_dir, project_retrieval_dir): - eload_lts_dir = eload_lts_dir or cfg['eloads_lts_dir'] project_lts_dir = project_lts_dir or cfg['projects_lts_dir'] eload_retrieval_dir = eload_retrieval_dir or cfg['eloads_dir'] diff --git a/requirements.txt b/requirements.txt index ecd9141..ddc9662 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ cached-property cerberus -ebi-eva-common-pyutils>=0.5.5 +ebi-eva-common-pyutils>=0.5.6 eva-vcf-merge>=0.0.6 humanize lxml diff --git a/tests/test_eload_brokering.py b/tests/test_eload_brokering.py index fac9501..ad83690 100644 --- a/tests/test_eload_brokering.py +++ b/tests/test_eload_brokering.py @@ -86,11 +86,18 @@ def test_run_brokering_prep_workflow(self): self.eload.eload_cfg.set('validation', 'valid', 'analyses', value={ 'analysis_alias1': { 'vcf_files': [os.path.join(self.resources_folder, 'vcf_file.vcf')], + }, + }) + self.eload.eload_cfg.set('submission', 'analyses', value={ + 'analysis_alias1': { 'assembly_fasta': os.path.join(self.resources_folder, 'reference_genome.fa'), + 'assembly_report': os.path.join(self.resources_folder, 'reference_genome_assembly_report.tx'), + 'assembly_accession': 'GCA000000.1' }, }) cfg.content['executable'] = { - 'nextflow': 'path_to_nextflow' + 'nextflow': 'path_to_nextflow', + 'python': {'script_path': 'path_to_script'} } temp_dir = 'temporary_directory' nf_script = os.path.join(NEXTFLOW_DIR, 'prepare_brokering.nf')