Skip to content

Commit

Permalink
Fix from submissions (#166)
Browse files Browse the repository at this point in the history
* Reformat the archival notification text
* Make the brokering results overwrite previous results for everything but PROJECT
  • Loading branch information
tcezard authored Jul 17, 2023
1 parent 1484843 commit 5121dbe
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 16 deletions.
32 changes: 22 additions & 10 deletions eva_submission/eload_brokering.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,12 @@ def broker_to_ena(self, force=False, existing_project=None, async_upload=False,
# Upload XML to ENA
ena_uploader.upload_xml_files_to_ena(dry_ena_upload)
if not dry_ena_upload:
# Update the accessions in case we're working with existing project
accessions = ena_uploader.results
accessions.update(self.eload_cfg.query('brokering', 'ena', ret_default={}))
self.eload_cfg.set('brokering', 'ena', value=accessions)
# Update the project accession in case we're working with existing project
# We should not be uploading additional analysis in th same ELOAD so no need to update
pre_existing_project = self.eload_cfg.query('brokering', 'ena', 'PROJECT')
if pre_existing_project and 'PROJECT' not in ena_uploader.results:
ena_uploader.results['PROJECT'] = pre_existing_project
self.eload_cfg.set('brokering', 'ena', value=ena_uploader.results)
self.eload_cfg.set('brokering', 'ena', 'date', value=self.now)
self.eload_cfg.set('brokering', 'ena', 'hold_date', value=ena_uploader.converter.hold_date)
self.eload_cfg.set('brokering', 'ena', 'pass', value=not bool(ena_uploader.results['errors']))
Expand Down Expand Up @@ -292,8 +294,9 @@ def _ena_report(self):
return '\n'.join(reports)

def _archival_confirmation_text(self):
if not self._brokering_complete():
return 'NA'
study_title = self.eload_cfg.query('submission', 'project_title')

hold_date = self.eload_cfg.query('brokering', 'ena', 'hold_date')
brokering_date_from_config = self.eload_cfg.query('brokering', 'brokering_date')
try:
Expand All @@ -302,18 +305,24 @@ def _archival_confirmation_text(self):
brokering_date = datetime.date.today()
brokering_date_plus_3 = brokering_date + datetime.timedelta(days=3)
available_date = hold_date if hold_date is not None else brokering_date_plus_3

if isinstance(available_date, datetime.datetime) or isinstance(available_date, datetime.date):
available_date_str = available_date.strftime("%Y-%m-%d")
else:
available_date_str = available_date.split(" ")[0]
project_accession = self.eload_cfg.query('brokering', 'ena', 'PROJECT')
analysis_accession = self.eload_cfg.query('brokering', 'ena', 'ANALYSIS')
analysis_accession = self.eload_cfg.query('brokering', 'ena', 'ANALYSIS', ret_default={})

taxonomy_id = self.eload_cfg.query('submission', 'taxonomy_id')
non_human_study_text = 'Please allow at least 48 hours from the initial release date provided for the data to be made available through this link. Each variant will be issued a unique SS# ID which will be made available to download via the "browsable files" link on the EVA study page.' if taxonomy_id!=9606 else ""

archival_text_data = {
'study_title': study_title,
'available_date': available_date,
'available_date': available_date_str,
'project_accession': project_accession,
'analysis_accession': analysis_accession,
'analysis_accession': ', '.join([
f'{self._undo_unique_alias(alias)}=>{accession}'
for alias, accession in analysis_accession.items()
]),
'non_human_study': non_human_study_text
}

Expand All @@ -326,7 +335,7 @@ def _archival_confirmation_text(self):
You can also notify us when your paper has been assigned a PMID. We will add this to your study page in the EVA. If there is anything else you need please do not hesitate to notify me. Archived data can be referenced using the project accession & associated URL e.g. The variant data for this study have been deposited in the European Variation Archive (EVA) at EMBL-EBI under accession number {project_accession} (https://www.ebi.ac.uk/eva/?eva-study={project_accession})
The EVA can be cited directly using the associated literature:
Cezard T, Cunningham F, Hunt SE, Koylass B, Kumar N, Saunders G, Shen A, Silva AF, Tsukanov K, Venkataraman S, Flicek P, Parkinson H, Keane TM. The European Variation Archive: a FAIR resource of genomic variation for all species. Nucleic Acids Res. 2021 Oct 28:gkab960. doi: 10.1093/nar/gkab960. PMID: 34718739.
"""
"""

return archival_text.format(**archival_text_data)

Expand Down Expand Up @@ -357,3 +366,6 @@ def report(self):
{archival_confirmation_text}
"""
print(report.format(**report_data))

def _brokering_complete(self):
return all([self.eload_cfg.query('brokering', key, 'pass') for key in ['ena', 'Biosamples']])
5 changes: 5 additions & 0 deletions eva_submission/eload_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ def _unique_alias(self, alias):
return f'{self.eload}_{alias}'
return alias

def _undo_unique_alias(self, alias):
if alias.startswith(self.eload + '_'):
return alias[len(self.eload) + 1:]
return alias

def create_log_file(self):
logfile_name = os.path.join(self.eload_dir, str(self.eload) + "_submission.log")
if logfile_name not in eload_logging_files:
Expand Down
Binary file modified tests/resources/brokering/metadata_sheet_fail.xlsx
Binary file not shown.
3 changes: 2 additions & 1 deletion tests/resources/eloads/ELOAD_4/.ELOAD_4_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ brokering:
pass: true
brokering_date: 2021-01-01 12:20:.0
ena:
ANALYSIS: ERZ0000001
ANALYSIS:
alias1: ERZ0000001
PROJECT: PRJEB00001
SUBMISSION: ERA0000001
errors: []
Expand Down
25 changes: 22 additions & 3 deletions tests/test_eload_brokering.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def test_report(self):
- Hold date:
- Accessions: PROJECT: PRJEB00001
SUBMISSION: ERA0000001
ANALYSIS: ERZ0000001
ANALYSIS: {'alias1': 'ERZ0000001'}
- Errors:
- receipt: <?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="receipt.xsl"?>
Expand All @@ -202,13 +202,13 @@ def test_report(self):
Your EVA submission "Greatest project ever" has now been archived and will be made available to the public on 2021-01-04. The accessions associated with your submission are:
Project: PRJEB00001
Analyses: ERZ0000001
Analyses: alias1=>ERZ0000001
If you wish your data to be held private beyond the date specified above, please let us know. Once released, the data will be made available to download from this link: https://www.ebi.ac.uk/eva/?eva-study=PRJEB00001
Please allow at least 48 hours from the initial release date provided for the data to be made available through this link. Each variant will be issued a unique SS# ID which will be made available to download via the "browsable files" link on the EVA study page.
You can also notify us when your paper has been assigned a PMID. We will add this to your study page in the EVA. If there is anything else you need please do not hesitate to notify me. Archived data can be referenced using the project accession & associated URL e.g. The variant data for this study have been deposited in the European Variation Archive (EVA) at EMBL-EBI under accession number PRJEB00001 (https://www.ebi.ac.uk/eva/?eva-study=PRJEB00001)
The EVA can be cited directly using the associated literature:
Cezard T, Cunningham F, Hunt SE, Koylass B, Kumar N, Saunders G, Shen A, Silva AF, Tsukanov K, Venkataraman S, Flicek P, Parkinson H, Keane TM. The European Variation Archive: a FAIR resource of genomic variation for all species. Nucleic Acids Res. 2021 Oct 28:gkab960. doi: 10.1093/nar/gkab960. PMID: 34718739.
'''
with patch('builtins.print') as mprint:
self.existing_eload.eload_cfg.set('submission', 'metadata_spreadsheet', value=os.path.join(self.existing_eload.eload_dir, '10_submitted/metadata_file/metadata_sheet.xlsx'))
Expand Down Expand Up @@ -257,3 +257,22 @@ def test_update_metadata_from_config_for_files(self):
{'Analysis Alias': 'GAE2', 'File Name': 'ELOAD_3/GAE2.vcf.gz', 'File Type': 'vcf', 'MD5': None, 'row_num': 4},
{'Analysis Alias': 'GAE2', 'File Name': 'ELOAD_3/GAE2.vcf.gz.csi', 'File Type': 'csi', 'MD5': None, 'row_num': 5}
]

def test_archival_confirmation_text(self):
self.eload.eload_cfg.set('submission', 'project_title', value='Great project')
self.eload.eload_cfg.set('brokering', 'Biosamples', 'pass', value=True)
self.eload.eload_cfg.set('brokering', 'ena', 'pass', value=True)
self.eload.eload_cfg.set('brokering', 'ena', 'hold_date', value='2022-10-21 09:14:53.389160')
self.eload.eload_cfg.set('brokering', 'ena', 'PROJECT', value='PRJEB00001')
self.eload.eload_cfg.set('brokering', 'ena', 'ANALYSIS', value={'ELOAD_3_alias1': 'ERZ00000001', 'ELOAD_3_alias2': 'ERZ00000002'})
expected_text = '''
Your EVA submission "Great project" has now been archived and will be made available to the public on 2022-10-21. The accessions associated with your submission are:
Project: PRJEB00001
Analyses: alias1=>ERZ00000001, alias2=>ERZ00000002
If you wish your data to be held private beyond the date specified above, please let us know. Once released, the data will be made available to download from this link: https://www.ebi.ac.uk/eva/?eva-study=PRJEB00001
Please allow at least 48 hours from the initial release date provided for the data to be made available through this link. Each variant will be issued a unique SS# ID which will be made available to download via the "browsable files" link on the EVA study page.
You can also notify us when your paper has been assigned a PMID. We will add this to your study page in the EVA. If there is anything else you need please do not hesitate to notify me. Archived data can be referenced using the project accession & associated URL e.g. The variant data for this study have been deposited in the European Variation Archive (EVA) at EMBL-EBI under accession number PRJEB00001 (https://www.ebi.ac.uk/eva/?eva-study=PRJEB00001)
The EVA can be cited directly using the associated literature:
Cezard T, Cunningham F, Hunt SE, Koylass B, Kumar N, Saunders G, Shen A, Silva AF, Tsukanov K, Venkataraman S, Flicek P, Parkinson H, Keane TM. The European Variation Archive: a FAIR resource of genomic variation for all species. Nucleic Acids Res. 2021 Oct 28:gkab960. doi: 10.1093/nar/gkab960. PMID: 34718739.
'''
assert self.eload._archival_confirmation_text() == expected_text
3 changes: 1 addition & 2 deletions tests/test_xlsx_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def setUp(self) -> None:
def tearDown(self) -> None:
os.remove(self.metadata_file_wrong_sc_name_copy)


def test_cerberus_validation(self):
self.validator.cerberus_validation()
self.assertEqual(self.validator.error_list, [])
Expand All @@ -44,7 +43,7 @@ def test_complex_validation_failure(self):
expected_errors = [
'Check Analysis Alias vs Samples: GAE2,None present in Analysis Alias not in Samples',
'Check Analysis Alias vs Files: GAE2,None present in Analysis Alias not in Files',
'In row 102, collection_date is not a date or "not provided": it is "Dote of collection"'
'In row 102, collection_date is not a date or "not provided": it is set to "Date of collection"'
]
self.assertEqual(self.validator_fail.error_list, expected_errors)

Expand Down

0 comments on commit 5121dbe

Please sign in to comment.