diff --git a/irodsadmin/management/commands/irodsorphans.py b/irodsadmin/management/commands/irodsorphans.py index 4b8f3b4e..81342486 100644 --- a/irodsadmin/management/commands/irodsorphans.py +++ b/irodsadmin/management/commands/irodsorphans.py @@ -39,14 +39,34 @@ def __init__(self): super().__init__() self.irods_backend = get_backend_api('omics_irods') - def _get_uuid_from_path(self, path): - """Helper function to get UUID from a path""" - match = re.search( - r'/([a-f0-9]{2})/\1[a-f0-9]{6}-([a-f0-9]{4}-){3}[a-f0-9]{12}$', path + def _sort_colls_on_projects(self, all_project_collections, project_list): + """Helper function to sort collections based on project list""" + # Separate strings with UUIDs from those without + strings_with_uuids = [] + strings_without_uuids = [] + + # Iterate through L1 and classify strings + for coll in all_project_collections: + match = re.search( + r'[a-f0-9]{8}-([a-f0-9]{4}-){3}[a-f0-9]{12}', coll.path + ) + if match and match.group() in project_list: + strings_with_uuids.append(coll) + else: + strings_without_uuids.append(coll) + + # Sort strings with UUIDs based on L2 + sorted_strings_with_uuids = sorted( + strings_with_uuids, + key=lambda coll: project_list.index( + re.search( + r'[a-f0-9]{8}-([a-f0-9]{4}-){3}[a-f0-9]{12}', coll.path + ).group() + ), ) - if match: - return match.group(1) - return None + + # Return the sorted strings with UUIDs followed by strings without UUIDs + return sorted_strings_with_uuids + strings_without_uuids def _get_assay_collections(self, assays): """Return a list of all assay collection names.""" @@ -162,16 +182,11 @@ def _get_orphans(self, irods, expected, assays): project.full_title: project.sodar_uuid for project in Project.objects.filter(type=PROJECT_TYPE_PROJECT) } - # Sort dict by full_title - project_dict = dict( - sorted(project_dict.items(), key=lambda item: item[0]) - ) + # Sort dict by full_title and extract the UUID values + project_list = [str(val) for _, val in sorted(project_dict.items())] # Sort collections by project full_title - sorted_project_collections = sorted( - all_project_collections, - key=lambda coll: project_dict.get( - self._get_uuid_from_path(coll.path), float('inf') - ), + sorted_project_collections = self._sort_colls_on_projects( + all_project_collections, project_list ) for collection in sorted_project_collections: diff --git a/irodsadmin/tests/test_commands.py b/irodsadmin/tests/test_commands.py index efddee94..85e6fb7c 100644 --- a/irodsadmin/tests/test_commands.py +++ b/irodsadmin/tests/test_commands.py @@ -501,52 +501,30 @@ def test_command_multiple(self): def test_command_ordering(self): """Test ordering of orphans in command output""" - # Create orphans for multiple projects + project1 = self.make_project('A_Project', PROJECT_TYPE_PROJECT, None) + project2 = self.make_project('B_Project', PROJECT_TYPE_PROJECT, None) + self.make_assignment(project1, self.user, self.role_owner) + self.make_assignment(project2, self.user, self.role_owner) orphan_path1 = '{}/sample_data/study_{}'.format( - self.irods_backend.get_path(self.project), str(uuid.uuid4()) + self.irods_backend.get_path(project1), str(uuid.uuid4()) ) - self.irods.collections.create(orphan_path1) - - orphan_path2 = '{}/landing_zones/{}/{}/{}'.format( - self.irods_backend.get_path(self.project), - self.user.username, - self.study.get_display_name().replace(' ', '_').lower(), - '20201031_123456', + orphan_path2 = '{}/sample_data/study_{}'.format( + self.irods_backend.get_path(project2), str(uuid.uuid4()) ) + self.irods.collections.create(orphan_path1) self.irods.collections.create(orphan_path2) - # Create another project and study - project2 = self.make_project( - 'Another Project', - PROJECT_TYPE_PROJECT, - None, - ) - self.make_assignment(project2, self.user, self.role_owner) - investigation2 = self.import_isa_from_file(SHEET_PATH, project2) - investigation2.irods_status = True - investigation2.save() - - orphan_path3 = '{}/sample_data/study_{}'.format( - self.irods_backend.get_path(project2), DUMMY_UUID - ) - self.irods.collections.create(orphan_path3) - # Run the orphans management command output = self.catch_stdout() # Define the expected output based on ordering expected = '{};{};{};0;0 bytes\n'.format( - str(project2.sodar_uuid), - project2.full_title, - orphan_path3, + str(project1.sodar_uuid), + project1.full_title, + orphan_path1, ) expected += '{};{};{};0;0 bytes\n'.format( - str(self.project.sodar_uuid), - self.project.full_title, + str(project2.sodar_uuid), + project2.full_title, orphan_path2, ) - expected += '{};{};{};0;0 bytes\n'.format( - str(self.project.sodar_uuid), - self.project.full_title, - orphan_path1, - ) self.assertEqual(expected, output)