EBIvariation · nitin-ebi · Sep 17, 2023 · Sep 17, 2023 · Sep 17, 2023 · Sep 20, 2023
diff --git a/eva_submission/ENA_submission/upload_to_ENA.py b/eva_submission/ENA_submission/upload_to_ENA.py
@@ -71,9 +71,10 @@ def _post_xml_file_to_ena(self, url, file_dict):
         )
         return response
 
-    def upload_xml_files_to_ena(self, dry_ena_upload=False):
+    def upload_xml_files_to_ena(self, private_config_xml_file, profile, dry_ena_upload=False):
         """Upload the xml files to the webin submission endpoint and parse the receipt."""
-        submission_file, project_file, analysis_file = self.converter.create_submission_files(self.eload)
+        submission_file, project_file, analysis_file = self.converter.create_submission_files(self.eload,
+                                                                                    private_config_xml_file, profile)
         file_dict = {
             'SUBMISSION': (os.path.basename(submission_file), get_file_content(submission_file), 'application/xml'),
             'ANALYSIS': (os.path.basename(analysis_file), get_file_content(analysis_file), 'application/xml')
@@ -114,10 +115,10 @@ def parse_ena_receipt(self, ena_xml_receipt):
 
 class ENAUploaderAsync(ENAUploader):
 
-    def upload_xml_files_to_ena(self, dry_ena_upload=False):
+    def upload_xml_files_to_ena(self, private_config_xml_file, profile, dry_ena_upload=False):
         """Upload the xml file to the asynchronous endpoint and monitor the results from the poll endpoint."""
 
-        webin_file = self.converter.create_single_submission_file(self.eload)
+        webin_file = self.converter.create_single_submission_file(self.eload, private_config_xml_file, profile)
         file_dict = {
             'file': (os.path.basename(webin_file), get_file_content(webin_file), 'application/xml'),
         }

diff --git a/eva_submission/ENA_submission/xlsx_to_ENA_xml.py b/eva_submission/ENA_submission/xlsx_to_ENA_xml.py
@@ -7,7 +7,7 @@
 from xml.etree.ElementTree import Element, ElementTree
 
 from ebi_eva_common_pyutils.logger import AppLogger
-from ebi_eva_common_pyutils.taxonomy.taxonomy import get_scientific_name_from_ensembl
+from ebi_eva_common_pyutils.taxonomy.taxonomy import get_scientific_name_from_taxonomy
 
 from eva_submission.eload_utils import check_existing_project_in_ena
 from eva_submission.xlsx.xlsx_parser_eva import EvaXlsxReader
@@ -109,7 +109,7 @@ def existing_project(self):
             return prj_title
         return None
 
-    def _create_project_xml(self):
+    def _create_project_xml(self, private_config_xml_file, profile):
         """
         This function read the project row from the XLS parser then create and populate an XML element following ENA
         data model.
@@ -151,7 +151,9 @@ def _create_project_xml(self):
         if 'Tax ID' in project_row:
             org_elemt = add_element(sub_project_elemt, 'ORGANISM')
             add_element(org_elemt, 'TAXON_ID', element_text=str(project_row.get('Tax ID')).strip())
-            scientific_name = get_scientific_name_from_ensembl(str(project_row.get('Tax ID')).strip())
+            scientific_name = get_scientific_name_from_taxonomy(str(project_row.get('Tax ID')).strip(),
+                                                                private_config_xml_file=private_config_xml_file,
+                                                                profile=profile)
             add_element(org_elemt, 'SCIENTIFIC_NAME', element_text=scientific_name)
 
             add_element(org_elemt, 'STRAIN', element_text=project_row.get('Strain', ''), content_required=True)
@@ -339,13 +341,13 @@ def write_xml_to_file(xml_element, output_file):
         with open(output_file, 'bw') as open_file:
             open_file.write(prettify(etree))
 
-    def create_submission_files(self, eload):
+    def create_submission_files(self, eload, private_config_xml_file, profile):
         files_to_submit = []
         if not self.is_existing_project:
             files_to_submit.append(
                 {'file_name': os.path.basename(self.project_file), 'schema': 'project'}
             )
-            projects_elemt = self._create_project_xml()
+            projects_elemt = self._create_project_xml(private_config_xml_file, profile)
             self.write_xml_to_file(projects_elemt, self.project_file)
             project_file = self.project_file
         else:
@@ -363,7 +365,7 @@ def create_submission_files(self, eload):
 
         return self.submission_file, project_file, self.analysis_file
 
-    def create_single_submission_file(self, eload):
+    def create_single_submission_file(self, eload, private_config_xml_file, profile):
         root = Element('WEBIN')
         # Submission ELEMENT
         action = 'ADD'
@@ -372,7 +374,7 @@ def create_single_submission_file(self, eload):
 
         # Project ELEMENT
         if not self.is_existing_project:
-            projects_elemt = self._create_project_xml()
+            projects_elemt = self._create_project_xml(private_config_xml_file, profile)
             root.append(projects_elemt)
 
         # Analysis ELEMENT

diff --git a/eva_submission/eload_brokering.py b/eva_submission/eload_brokering.py
@@ -82,7 +82,8 @@ def broker_to_ena(self, force=False, existing_project=None, async_upload=False,
             else:
                 ena_uploader.upload_vcf_files_to_ena_ftp(files_to_upload)
             # Upload XML to ENA
-            ena_uploader.upload_xml_files_to_ena(dry_ena_upload)
+            ena_uploader.upload_xml_files_to_ena(cfg['maven']['settings_file'], cfg['maven']['environment'],
+                                                 dry_ena_upload)
             if not dry_ena_upload:
                 # Update the project accession in case we're working with existing project
                 # We should not be uploading additional analysis in th same ELOAD so no need to update

diff --git a/eva_submission/eload_preparation.py b/eva_submission/eload_preparation.py
@@ -5,7 +5,7 @@
 
 from retry import retry
 
-from ebi_eva_common_pyutils.taxonomy.taxonomy import get_scientific_name_from_ensembl
+from ebi_eva_common_pyutils.taxonomy.taxonomy import get_scientific_name_from_taxonomy
 from ebi_eva_common_pyutils.config import cfg
 from ebi_eva_common_pyutils.config_utils import get_contig_alias_db_creds_for_profile
 
@@ -151,7 +151,9 @@ def detect_metadata_attributes(self):
         taxonomy_id = eva_metadata.project.get('Tax ID')
         if taxonomy_id and (isinstance(taxonomy_id, int) or taxonomy_id.isdigit()):
             self.eload_cfg.set('submission', 'taxonomy_id', value=int(taxonomy_id))
-            scientific_name = get_scientific_name_from_ensembl(taxonomy_id)
+            scientific_name = get_scientific_name_from_taxonomy(taxonomy_id,
+                                                                private_config_xml_file=cfg['maven']['settings_file'],
+                                                                profile=cfg['maven']['environment'])
             self.eload_cfg.set('submission', 'scientific_name', value=scientific_name)
         else:
             if taxonomy_id:

diff --git a/eva_submission/eload_validation.py b/eva_submission/eload_validation.py
@@ -87,7 +87,7 @@ def _get_valid_vcf_files_by_analysis(self):
 
     def _validate_metadata_format(self):
         validator = EvaXlsxValidator(self.eload_cfg['submission']['metadata_spreadsheet'])
-        validator.validate()
+        validator.validate(cfg['maven']['settings_file'], cfg['maven']['environment'])
         self.eload_cfg['validation']['metadata_check']['metadata_spreadsheet'] = self.eload_cfg['submission']['metadata_spreadsheet']
         self.eload_cfg['validation']['metadata_check']['errors'] = validator.error_list
         self.eload_cfg['validation']['metadata_check']['pass'] = len(validator.error_list) == 0

diff --git a/eva_submission/xlsx/xlsx_validation.py b/eva_submission/xlsx/xlsx_validation.py
@@ -4,7 +4,7 @@
 import yaml
 from cerberus import Validator
 from ebi_eva_common_pyutils.logger import AppLogger
-from ebi_eva_common_pyutils.taxonomy.taxonomy import get_scientific_name_from_ensembl
+from ebi_eva_common_pyutils.taxonomy.taxonomy import get_scientific_name_from_taxonomy
 from ebi_eva_common_pyutils.variation.assembly_utils import retrieve_genbank_assembly_accessions_from_ncbi
 from requests import HTTPError
 
@@ -25,10 +25,10 @@ def __init__(self, metadata_file):
 
         self.error_list = []
 
-    def validate(self):
+    def validate(self, private_config_xml_file, profile):
         self.cerberus_validation()
         self.complex_validation()
-        self.semantic_validation()
+        self.semantic_validation(private_config_xml_file, profile)
 
     def cerberus_validation(self):
         """
@@ -79,7 +79,7 @@ def complex_validation(self):
             )
             self.check_date(row, 'collection_date', required=True)
 
-    def semantic_validation(self):
+    def semantic_validation(self, private_config_xml_file, profile):
         """
         Validation of the data that involve checking its meaning
         This function adds error statements to the errors attribute
@@ -98,7 +98,9 @@ def semantic_validation(self):
         taxid_and_species_list = set([(row['Tax Id'], row['Scientific Name']) for row in self.metadata['Sample'] if row['Tax Id']])
         for taxid, species in taxid_and_species_list:
             try:
-                scientific_name = get_scientific_name_from_ensembl(int(taxid))
+                scientific_name = get_scientific_name_from_taxonomy(int(taxid),
+                                                                private_config_xml_file=private_config_xml_file,
+                                                                profile=profile)
                 if species != scientific_name:
                     if species.lower() == scientific_name.lower():
                         correct_taxid_sc_name[taxid] = scientific_name

diff --git a/tests/test_eload_preparation.py b/tests/test_eload_preparation.py
@@ -2,6 +2,7 @@
 import os
 import shutil
 from unittest import TestCase, mock
+from unittest.mock import patch
 
 from ebi_eva_common_pyutils.config import cfg
 
@@ -79,7 +80,11 @@ def test_detect_metadata_attributes(self):
         self.create_vcfs()
         metadata = self.create_metadata()
         self.eload.eload_cfg.set('submission', 'metadata_spreadsheet', value=metadata)
-        self.eload.detect_metadata_attributes()
+        cfg.content['maven']['settings_file'] = None
+        cfg.content['maven']['environment'] = None
+        with patch('eva_submission.eload_preparation.get_scientific_name_from_taxonomy') as m_sci_name:
+            m_sci_name.return_value = 'Homo sapiens'
+            self.eload.detect_metadata_attributes()
 
         assert self.eload.eload_cfg.query('submission', 'project_title') == 'Greatest project ever'
         assert self.eload.eload_cfg.query('submission', 'taxonomy_id') == 9606

diff --git a/tests/test_upload_to_ENA.py b/tests/test_upload_to_ENA.py
@@ -95,13 +95,15 @@ def test_parse_ena_receipt_multiple_analyses(self):
         }
 
     def test_single_upload_xml_files_to_ena(self):
-        with patch.object(ENAUploader, '_post_xml_file_to_ena') as mock_post,\
-             patch('eva_submission.ENA_submission.upload_to_ENA.requests.get') as mock_get:
+        with patch.object(ENAUploader, '_post_xml_file_to_ena') as mock_post, \
+                patch('eva_submission.ENA_submission.xlsx_to_ENA_xml.get_scientific_name_from_taxonomy') as m_sci_name, \
+                patch('eva_submission.ENA_submission.upload_to_ENA.requests.get') as mock_get:
+            m_sci_name.return_value = 'Homo Sapiens'
             json_data = {'submissionId': 'ERA123456', '_links': [{'rel': 'poll-xml', 'href': 'https://example.com/link'}]}
             mock_post.return_value = Mock(status_code=200, json=Mock(return_value=json_data))
             mock_get.return_value = Mock(status_code=200, text=self.receipt)
             self.assertFalse(os.path.isfile(self.uploader_async.converter.single_submission_file))
-            self.uploader_async.upload_xml_files_to_ena()
+            self.uploader_async.upload_xml_files_to_ena(None, None)
             self.assertTrue(os.path.isfile(self.uploader_async.converter.single_submission_file))
             mock_post.assert_called_with(
                 'https://wwwdev.ebi.ac.uk/ena/submit/webin-v2/submit/queue',
@@ -119,16 +121,21 @@ def test_single_upload_xml_files_to_ena(self):
 
     def test_single_upload_xml_files_to_ena_failed(self):
         self.assertFalse(os.path.isfile(self.uploader_async.converter.single_submission_file))
-        self.uploader_async.upload_xml_files_to_ena()
+        with patch('eva_submission.ENA_submission.xlsx_to_ENA_xml.get_scientific_name_from_taxonomy') as m_sci_name:
+            m_sci_name.return_value = 'Homo Sapiens'
+            self.uploader_async.upload_xml_files_to_ena(None, None)
         self.assertTrue(os.path.isfile(self.uploader_async.converter.single_submission_file))
         self.assertEqual(self.uploader_async.results, {'errors': ['403']})
 
+
     def test_single_dry_upload_xml_files_to_ena(self):
         with patch.object(ENAUploader, '_post_xml_file_to_ena') as mock_post,\
              patch('eva_submission.ENA_submission.upload_to_ENA.requests.get') as mock_get, \
+             patch('eva_submission.ENA_submission.xlsx_to_ENA_xml.get_scientific_name_from_taxonomy') as m_sci_name, \
              patch.object(ENAUploaderAsync, 'info') as mock_info:
+            m_sci_name.return_value = 'Homo Sapiens'
             self.assertFalse(os.path.isfile(self.uploader_async.converter.single_submission_file))
-            self.uploader_async.upload_xml_files_to_ena(dry_ena_upload=True)
+            self.uploader_async.upload_xml_files_to_ena(None, None, dry_ena_upload=True)
             self.assertTrue(os.path.isfile(self.uploader_async.converter.single_submission_file))
             mock_info.assert_any_call('Would have uploaded the following XML files to ENA asynchronous submission '
                                       'endpoint:')

diff --git a/tests/test_xlsx_to_xml.py b/tests/test_xlsx_to_xml.py
@@ -158,9 +158,9 @@ def test_create_project(self):
 </PROJECT_SET>
 '''
         self.converter.reader = Mock(project=self.project_row)
-        with patch('eva_submission.ENA_submission.xlsx_to_ENA_xml.get_scientific_name_from_ensembl') as m_sci_name:
+        with patch('eva_submission.ENA_submission.xlsx_to_ENA_xml.get_scientific_name_from_taxonomy') as m_sci_name:
             m_sci_name.return_value = 'Oncorhynchus mykiss'
-            root = self.converter._create_project_xml()
+            root = self.converter._create_project_xml(None, None)
             expected_root = ET.fromstring(expected_project)
             assert elements_equal(root, expected_root)
 
@@ -179,9 +179,9 @@ def test_add_analysis_to_existing_project(self):
         ))
 
     def test_process_metadata_spreadsheet(self):
-        with patch('eva_submission.ENA_submission.xlsx_to_ENA_xml.get_scientific_name_from_ensembl') as m_sci_name:
+        with patch('eva_submission.ENA_submission.xlsx_to_ENA_xml.get_scientific_name_from_taxonomy') as m_sci_name:
             m_sci_name.return_value = 'Oncorhynchus mykiss'
-            self.converter.create_submission_files('TEST1')
+            self.converter.create_submission_files('TEST1', None, None)
         assert os.path.isfile(os.path.join(self.brokering_folder, 'TEST1.Submission.xml'))
         assert os.path.isfile(os.path.join(self.brokering_folder, 'TEST1.Project.xml'))
         assert os.path.isfile(os.path.join(self.brokering_folder, 'TEST1.Analysis.xml'))
@@ -243,21 +243,29 @@ def test_create_submission_with_date(self):
         assert elements_equal(root, expected_root)
 
     def test_create_submission_files(self):
-        submission_file, project_file, analysis_file = self.converter.create_submission_files('ELOAD_1')
+        with patch('eva_submission.ENA_submission.xlsx_to_ENA_xml.get_scientific_name_from_taxonomy') as m_sci_name:
+            m_sci_name.return_value = 'Homo Sapiens'
+            submission_file, project_file, analysis_file = self.converter.create_submission_files('ELOAD_1',
+                                                                                                  None, None)
         assert os.path.exists(submission_file)
         assert os.path.exists(project_file)
         assert os.path.exists(analysis_file)
 
     def test_create_submission_files_for_existing_project(self):
         # When the project already exist not PROJECT XML will be generated
-        with patch.object(EnaXlsxConverter, 'is_existing_project', return_value=True):
-            submission_file, project_file, analysis_file = self.converter.create_submission_files('ELOAD_1')
+        with patch('eva_submission.ENA_submission.xlsx_to_ENA_xml.get_scientific_name_from_taxonomy') as m_sci_name, \
+                patch.object(EnaXlsxConverter, 'is_existing_project', return_value=True):
+            m_sci_name.return_value = 'Homo Sapiens'
+            submission_file, project_file, analysis_file = self.converter.create_submission_files('ELOAD_1',
+                                                                                                  None, None)
             assert os.path.exists(submission_file)
             assert project_file is None
             assert os.path.exists(analysis_file)
             assert not os.path.exists(self.converter.project_file)
 
     def test_create_single_submission_files(self):
-        self.converter.create_single_submission_file('ELOAD_1')
+        with patch('eva_submission.ENA_submission.xlsx_to_ENA_xml.get_scientific_name_from_taxonomy') as m_sci_name:
+            m_sci_name.return_value = 'Homo Sapiens'
+            self.converter.create_single_submission_file('ELOAD_1', None, None)
 
 
diff --git a/tests/test_xlsx_validation.py b/tests/test_xlsx_validation.py
@@ -49,9 +49,9 @@ def test_complex_validation_failure(self):
         self.assertEqual(self.validator_fail.error_list, expected_errors)
 
     def test_validate(self):
-        with patch('eva_submission.xlsx.xlsx_validation.get_scientific_name_from_ensembl') as m_sci_name:
+        with patch('eva_submission.xlsx.xlsx_validation.get_scientific_name_from_taxonomy') as m_sci_name:
             m_sci_name.return_value = 'Homo sapiens'
-            self.validator.validate()
+            self.validator.validate(None, None)
         assert self.validator.error_list == []
 
     def test_correct_scientific_name_in_metadata(self):
@@ -62,9 +62,9 @@ def test_correct_scientific_name_in_metadata(self):
         assert len([s for s in scientific_name_list if s == 'Homo Sapiens']) == 10
         assert len([s for s in scientific_name_list if s == 'HS']) == 10
 
-        with patch('eva_submission.xlsx.xlsx_validation.get_scientific_name_from_ensembl') as m_sci_name:
+        with patch('eva_submission.xlsx.xlsx_validation.get_scientific_name_from_taxonomy') as m_sci_name:
             m_sci_name.return_value = 'Homo sapiens'
-            self.validator_sc_name.validate()
+            self.validator_sc_name.validate(None, None)
         assert self.validator_sc_name.error_list == ['In Samples, Taxonomy 9606 and scientific name HS are inconsistent']
 
         reader_after_modification = EvaXlsxReader(self.metadata_file_wrong_sc_name_copy)