EBIvariation · tcezard · Sep 12, 2023 · Sep 1, 2023 · Sep 1, 2023 · Sep 1, 2023
diff --git a/eva_submission/ENA_submission/upload_to_ENA.py b/eva_submission/ENA_submission/upload_to_ENA.py
@@ -130,7 +130,7 @@ def upload_xml_files_to_ena(self, dry_ena_upload=False):
         if response.status_code == 200:
             json_data = response.json()
             if 'links' in json_data:
-                xml_link = [link_dict['href'] for link_dict in json_data['links'] if link_dict['rel'] == 'poll-xml'][0]
+                xml_link = [link_dict['href'] for link_dict in json_data['_links'] if link_dict['rel'] == 'poll-xml'][0]
                 self.results['submissionId'] = json_data['submissionId']
                 self.results['poll-links'] = xml_link
                 self.monitor_results()

diff --git a/eva_submission/eload_submission.py b/eva_submission/eload_submission.py
@@ -122,6 +122,7 @@ def update_metadata_spreadsheet(self, input_spreadsheet, output_spreadsheet=None
                     'Sample Accession': self.eload_cfg['brokering']['Biosamples']['Samples'][sample_row.get('Sample Name')]
                 })
             else:
+                sample_row['Analysis Alias'] = self._unique_alias(sample_row['Analysis Alias'])
                 sample_rows.append(sample_row)
 
         file_rows = []
@@ -131,15 +132,15 @@ def update_metadata_spreadsheet(self, input_spreadsheet, output_spreadsheet=None
                 vcf_file_info = self.eload_cfg['brokering']['analyses'][analysis]['vcf_files'][vcf_file_name]
                 # Add the vcf file
                 file_rows.append({
-                    'Analysis Alias': analysis,
+                    'Analysis Alias': self._unique_alias(analysis),
                     'File Name': self.eload + '/' + os.path.basename(vcf_file_name),
                     'File Type': 'vcf',
                     'MD5': vcf_file_info['md5']
                 })
 
                 # Add the index file
                 file_rows.append({
-                    'Analysis Alias': analysis,
+                    'Analysis Alias': self._unique_alias(analysis),
                     'File Name': self.eload + '/' + os.path.basename(vcf_file_info['csi']),
                     'File Type': 'csi',
                     'MD5': vcf_file_info['csi_md5']
@@ -154,9 +155,8 @@ def update_metadata_spreadsheet(self, input_spreadsheet, output_spreadsheet=None
 
         analysis_rows = reader.analysis
         for analysis_row in analysis_rows:
-            if self.eload not in analysis_row['Analysis Alias']:
-                # Add the eload id to ensure that the analysis alias is unique
-                analysis_row['Analysis Alias'] = self._unique_alias(analysis_row['Analysis Alias'])
+            # Add the eload id to ensure that the analysis alias is unique
+            analysis_row['Analysis Alias'] = self._unique_alias(analysis_row['Analysis Alias'])
 
         if output_spreadsheet:
             eva_xls_writer = EvaXlsxWriter(input_spreadsheet, output_spreadsheet)

diff --git a/eva_submission/xlsx/xlsx_validation.py b/eva_submission/xlsx/xlsx_validation.py
@@ -1,5 +1,5 @@
 import os
-from datetime import datetime
+import datetime
 
 import yaml
 from cerberus import Validator
@@ -153,8 +153,20 @@ def check_date(self, row, key, required=True):
         if required and key not in row:
             self.error_list.append(f'In row {row.get("row_num")}, {key} is required and missing')
             return
-        if key in row and (isinstance(row[key], datetime) or str(row[key]).lower() in not_provided_check_list):
+        if key in row and (
+                isinstance(row[key], datetime.date) or
+                isinstance(row[key], datetime.datetime) or
+                self._check_date_str_format(row[key]) or
+                str(row[key]).lower() in not_provided_check_list
+        ):
             return
         self.error_list.append(f'In row {row.get("row_num")}, {key} is not a date or "not provided": '
                                f'it is set to "{row.get(key)}"')
 
+    def _check_date_str_format(self, d):
+        try:
+            datetime.datetime.strptime(d, "%Y-%m-%d")
+            return True
+        except ValueError:
+            return False
+
diff --git a/tests/test_xlsx_validation.py b/tests/test_xlsx_validation.py
@@ -1,3 +1,4 @@
+import datetime
 import os
 import shutil
 from unittest import TestCase
@@ -73,3 +74,22 @@ def test_correct_scientific_name_in_metadata(self):
         assert len([s for s in scientific_name_list if s == 'Homo Sapiens']) == 0
         assert len([s for s in scientific_name_list if s == 'HS']) == 10
 
+    def test_check_date(self):
+        assert self.validator.error_list == []
+        row = {"row_num": 1, "collection_date": 'not provided'}
+        self.validator.check_date(row, 'collection_date', required=True)
+        assert self.validator.error_list == []
+
+        row = {"row_num": 1, "collection_date": datetime.date(year=2019, month=6, day=8)}
+        self.validator.check_date(row, 'collection_date', required=True)
+        assert self.validator.error_list == []
+
+        row = {"row_num": 1, "collection_date": '2019-06-08'}
+        self.validator.check_date(row, 'collection_date', required=True)
+        assert self.validator.error_list == []
+
+        row = {"row_num": 1, "collection_date": '2019-06-08,2019-06-09'}
+        self.validator.check_date(row, 'collection_date', required=True)
+        assert self.validator.error_list == [
+            'In row 1, collection_date is not a date or "not provided": it is set to "2019-06-08,2019-06-09"'
+        ]