diff --git a/coldfront/plugins/fasrc/tests/testdata/att_dummy.json b/coldfront/plugins/fasrc/tests/testdata/att_dummy.json index 22ef507b7..24bbf8b96 100644 --- a/coldfront/plugins/fasrc/tests/testdata/att_dummy.json +++ b/coldfront/plugins/fasrc/tests/testdata/att_dummy.json @@ -9,7 +9,7 @@ "byte_usage": 8183568264, "fs_path": "/n/holylfs10", "storage_type": "Quota", - "server": "/n/holylfs10" + "server": "holylfs10" }, { "begin_date": "2022-03-29T15:16:37.170349700-04:00", @@ -33,7 +33,7 @@ "tb_usage": 24.757998044826, "fs_path": "/n/holylfs10", "storage_type": "Quota", - "server": "/n/holylfs10" + "server": "holylfs10" } ] } diff --git a/coldfront/plugins/fasrc/tests/tests.py b/coldfront/plugins/fasrc/tests/tests.py index 40d4dbb2d..9e69cd651 100644 --- a/coldfront/plugins/fasrc/tests/tests.py +++ b/coldfront/plugins/fasrc/tests/tests.py @@ -1,14 +1,15 @@ from django.test import TestCase -from coldfront.plugins.fasrc.utils import (AllTheThingsConn, push_quota_data) -from coldfront.core.test_helpers.factories import (setup_models, - UserFactory, - ProjectFactory, - ResourceFactory, - AllocationFactory, - AAttributeTypeFactory, - AllocationAttributeTypeFactory - ) +from coldfront.plugins.fasrc.utils import AllTheThingsConn, push_quota_data +from coldfront.core.test_helpers.factories import ( + setup_models, + UserFactory, + ProjectFactory, + ResourceFactory, + AllocationFactory, + AAttributeTypeFactory, + AllocationAttributeTypeFactory, +) UTIL_FIXTURES = [ @@ -30,7 +31,12 @@ def setUpTestData(cls): gordon_lab = ProjectFactory(pi=aalice, title="gordon_lab") gordon_alloc = AllocationFactory(project=gordon_lab) gordon_alloc.resources.add(ResourceFactory(name='holylfs10/tier1', id=1)) - AllocationAttributeTypeFactory(name='RequiresPayment', attribute_type=AAttributeTypeFactory(name='Boolean')) + AllocationAttributeTypeFactory( + name='RequiresPayment', attribute_type=AAttributeTypeFactory(name='Boolean') + ) + AllocationAttributeTypeFactory( + name='Subdirectory', attribute_type=AAttributeTypeFactory(name='Text') + ) def setUp(self): self.attconn = AllTheThingsConn() @@ -39,5 +45,6 @@ def setUp(self): self.testusers = self.pref + 'att_users_dummy.json' def test_push_quota_data(self): + """Ensure that push runs successfully""" push_quota_data(self.testfiles) # assert AllocationAttribute. diff --git a/coldfront/plugins/fasrc/utils.py b/coldfront/plugins/fasrc/utils.py index 213856ae4..091e78f14 100644 --- a/coldfront/plugins/fasrc/utils.py +++ b/coldfront/plugins/fasrc/utils.py @@ -5,11 +5,12 @@ import requests from coldfront.core.utils.common import import_from_settings -from coldfront.core.utils.fasrc import (log_missing, - read_json, - select_one_project_allocation, - save_json, - id_present_missing_projects) +from coldfront.core.utils.fasrc import ( + log_missing, + read_json, + save_json, + id_present_missing_projects +) from coldfront.core.resource.models import Resource from coldfront.core.allocation.models import AllocationAttributeType @@ -38,7 +39,7 @@ def produce_query_statement(self, vol_type, volumes=None): 'server_replace': '/n/', 'path_replace': '/n//', 'unique':'datetime(e.DotsLFSUpdateDate) as begin_date' - }, + }, 'isilon': { 'match': '[r:Owns]-(e:IsilonPath) MATCH (d:ConfigValue {Name: \'IsilonPath.Invocation\'})', 'validation_query':"r.DotsUpdateDate = d.DotsUpdateDate \ @@ -55,8 +56,8 @@ def produce_query_statement(self, vol_type, volumes=None): 'server_replace': '01.rc.fas.harvard.edu', 'path_replace': '/ifs/', 'unique':'datetime(e.DotsUpdateDate) as begin_date' - } } + } d = query_dict[vol_type] if volumes: @@ -65,21 +66,23 @@ def produce_query_statement(self, vol_type, volumes=None): volumes = '|'.join([r.name.split('/')[0] for r in Resource.objects.all()]) where = f"(e.{d['server']} =~ \'.*({volumes}).*\')" - statement = {'statement': f"MATCH p=(g:Group)-{d['match']} \ - WHERE {where} AND {d['validation_query']}\ - AND NOT (g.ADSamAccountName =~ '.*(disabled|rc_admin).*')\ - AND (datetime() - duration('P31D') <= datetime(r.{d['r_updated']})) \ - RETURN \ - {d['unique']}, \ - g.ADSamAccountName as lab,\ - (e.SizeGB / 1024.0) as tb_allocation, \ - e.{d['sizebytes']} as byte_allocation,\ - e.{d['usedbytes']} as byte_usage,\ - (e.{d['usedgb']} / 1024.0) as tb_usage,\ - replace(e.{d['fs_path']}, '{d['path_replace']}', '') as fs_path, \ - {d['storage_type']} as storage_type, \ - datetime(r.{d['r_updated']}) as rel_updated, \ - replace(e.{d['server']}, '{d['server_replace']}', '') as server"} + statement = { + 'statement': f"MATCH p=(g:Group)-{d['match']} \ + WHERE {where} AND {d['validation_query']}\ + AND NOT (g.ADSamAccountName =~ '.*(disabled|rc_admin).*')\ + AND (datetime() - duration('P31D') <= datetime(r.{d['r_updated']})) \ + RETURN \ + {d['unique']}, \ + g.ADSamAccountName as lab,\ + (e.SizeGB / 1024.0) as tb_allocation, \ + e.{d['sizebytes']} as byte_allocation,\ + e.{d['usedbytes']} as byte_usage,\ + (e.{d['usedgb']} / 1024.0) as tb_usage,\ + replace(e.{d['fs_path']}, '{d['path_replace']}', '') as fs_path, \ + {d['storage_type']} as storage_type, \ + datetime(r.{d['r_updated']}) as rel_updated, \ + replace(e.{d['server']}, '{d['server_replace']}', '') as server" + } self.queries['statements'].append(statement) @@ -105,7 +108,9 @@ def _standardize_attquery(self): def _standardize_nesefile(self): datafile = 'nese_data/pools' header_file = 'nese_data/pools.header' - translator = dict((kv.split('=') for kv in (l.strip('\n') for l in open('nese_data/groupkey')))) + translator = dict(( + kv.split('=') for kv in (l.strip('\n') for l in open('nese_data/groupkey')) + )) headers_df = pd.read_csv(header_file, header=0, delim_whitespace=True) headers = headers_df.columns.values.tolist() data = pd.read_csv(datafile, names=headers, delim_whitespace=True) @@ -120,7 +125,10 @@ def _standardize_nesefile(self): data['tb_allocation'] = data['mib_capacity'] / 953674.3164 data['tb_usage'] = data['mib_used'] / 953674.3164 data['fs_path'] = None - data = data[['lab', 'server', 'storage_type', 'byte_allocation', 'byte_usage', 'tb_allocation', 'tb_usage', 'fs_path']] + data = data[[ + 'lab', 'server', 'storage_type', 'byte_allocation', + 'byte_usage', 'tb_allocation', 'tb_usage', 'fs_path', + ]] nesedict = data.to_dict(orient='records') return nesedict @@ -205,6 +213,41 @@ def pull_quota_data(self, volumes=None): logger.debug(resp_json) return resp_json +def matched_dict_processing(allocation, data_dicts, paired_allocs, log_message): + if len(data_dicts) == 1: + logger.debug(log_message) + paired_allocs[allocation] = data_dicts[0] + else: + logger.warning('too many matches for allocation %s: %s', allocation, data_dicts) + return paired_allocs + +def pair_allocations_data(project, quota_dicts): + """pair allocations with usage dicts + """ + unpaired_allocs = project.allocation_set.filter(status__name='Active') + paired_allocs = {} + # first, pair allocations with those that have same + for allocation in unpaired_allocs: + dicts = [ + d for d in quota_dicts if allocation.path.lower() == d['fs_path'].lower() + ] + if dicts: + log_message = f'Path-based match: {allocation}, {allocation.path}, {dicts[0]}' + paired_allocs = matched_dict_processing(allocation, dicts, paired_allocs, log_message) + unpaired_allocs = [ + a for a in unpaired_allocs if a not in paired_allocs + ] + unpaired_dicts = [d for d in quota_dicts if d not in paired_allocs.values()] + for allocation in unpaired_allocs: + dicts = [ + d for d in unpaired_dicts if d['server'] in allocation.resources.first().name + ] + if dicts: + log_message = f'Resource-based match: {allocation}, {dicts[0]}' + paired_allocs = matched_dict_processing(allocation, dicts, paired_allocs, log_message) + return paired_allocs + + def push_quota_data(result_file): """update group quota & usage values in Coldfront from a JSON of quota data. """ @@ -222,63 +265,45 @@ def push_quota_data(result_file): allocation_attribute_types = AllocationAttributeType.objects.all() allocation_attribute_type_payment = allocation_attribute_types.get(name='RequiresPayment') - for lab, allocations in result_json_cleaned.items(): + for lab, quota_dicts in result_json_cleaned.items(): logger.info('PROJECT: %s ====================================', lab) # Find the correct allocation_allocationattributes to update by: # 1. finding the project with a name that matches lab.lab - proj_query = proj_models.get(title=lab) - for allocation in allocations: + project = proj_models.get(title=lab) + # 2. pair project allocations with data + allocation_data_dict = pair_allocations_data(project, quota_dicts) + for allocation, data_dict in allocation_data_dict.items(): try: - # 2. find the resource that matches/approximates the server value - resource = Resource.objects.get(name__contains=allocation['server']) - - # 3. find the allocation with a matching project and resource_type - alloc_obj = select_one_project_allocation(proj_query, resource, dirpath=allocation['fs_path']) - error_message = None - if alloc_obj is None: - error_message = 'No Allocation' - missing_allocations.append({ - 'resource_name':resource.name, - 'project_title': proj_query.title, - 'path': allocation['fs_path'] - }) - elif alloc_obj == 'MultiAllocationError': - print(allocation['fs_path']) - error_message = 'Unresolved multiple Allocations' - if error_message: - logger.warning('ERROR: %s for allocation %s-%s', - error_message, proj_query.title, resource.name) - counts['all_err'] += 1 - continue - - logger.info('allocation: %s', alloc_obj.__dict__) - - # 4. get the storage quota TB allocation_attribute that has allocation=a. - allocation_values = { 'Storage Quota (TB)': - [allocation['tb_allocation'],allocation['tb_usage']] } - if allocation['byte_allocation'] is not None: - allocation_values['Quota_In_Bytes'] = [ allocation['byte_allocation'], - allocation['byte_usage']] + # 3. get the storage quota TB allocation_attribute that has allocation=a. + allocation_values = { + 'Storage Quota (TB)': [data_dict['tb_allocation'],data_dict['tb_usage']] + } + if data_dict['byte_allocation'] is not None: + allocation_values['Quota_In_Bytes'] = [ + data_dict['byte_allocation'], data_dict['byte_usage'] + ] else: logger.warning( - 'no byte_allocation value for allocation %s, lab %s on resource %s', - alloc_obj.pk, lab, allocation['server']) + 'no byte_allocation value for allocation %s, lab %s on resource %s', + allocation.pk, lab, data_dict['server'] + ) for k, v in allocation_values.items(): - allocation_attribute_type_obj = allocation_attribute_types.get(name=k) - allocattribute_obj, _ = alloc_obj.allocationattribute_set.update_or_create( - allocation_attribute_type=allocation_attribute_type_obj, - defaults={'value': v[0]} - ) - allocattribute_obj.allocationattributeusage.value = v[1] - allocattribute_obj.allocationattributeusage.save() + allocation_attr_type_obj = allocation_attribute_types.get(name=k) + alloc_attr_obj, _ = allocation.allocationattribute_set.update_or_create( + allocation_attribute_type=allocation_attr_type_obj, + defaults={'value': v[0]} + ) + alloc_attr_obj.allocationattributeusage.value = v[1] + alloc_attr_obj.allocationattributeusage.save() # 5. AllocationAttribute - alloc_obj.allocationattribute_set.update_or_create( - allocation_attribute_type=allocation_attribute_type_payment, - defaults={'value':True}) + allocation.allocationattribute_set.update_or_create( + allocation_attribute_type=allocation_attribute_type_payment, + defaults={'value':True} + ) counts['complete'] += 1 except Exception as e: - allocation_name = f"{allocation['lab']}/{allocation['server']}" + allocation_name = f"{data_dict['lab']}/{data_dict['server']}" errored_allocations[allocation_name] = e log_missing('allocation', missing_allocations) logger.warning('error counts: %s', counts)