Skip to content

Commit

Permalink
change allocation-quota matching strategy
Browse files Browse the repository at this point in the history
  • Loading branch information
claire-peters committed Jul 11, 2023
1 parent 4274954 commit 2b2c1a8
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 82 deletions.
4 changes: 2 additions & 2 deletions coldfront/plugins/fasrc/tests/testdata/att_dummy.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"byte_usage": 8183568264,
"fs_path": "/n/holylfs10",
"storage_type": "Quota",
"server": "/n/holylfs10"
"server": "holylfs10"
},
{
"begin_date": "2022-03-29T15:16:37.170349700-04:00",
Expand All @@ -33,7 +33,7 @@
"tb_usage": 24.757998044826,
"fs_path": "/n/holylfs10",
"storage_type": "Quota",
"server": "/n/holylfs10"
"server": "holylfs10"
}
]
}
27 changes: 17 additions & 10 deletions coldfront/plugins/fasrc/tests/tests.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from django.test import TestCase

from coldfront.plugins.fasrc.utils import (AllTheThingsConn, push_quota_data)
from coldfront.core.test_helpers.factories import (setup_models,
UserFactory,
ProjectFactory,
ResourceFactory,
AllocationFactory,
AAttributeTypeFactory,
AllocationAttributeTypeFactory
)
from coldfront.plugins.fasrc.utils import AllTheThingsConn, push_quota_data
from coldfront.core.test_helpers.factories import (
setup_models,
UserFactory,
ProjectFactory,
ResourceFactory,
AllocationFactory,
AAttributeTypeFactory,
AllocationAttributeTypeFactory,
)


UTIL_FIXTURES = [
Expand All @@ -30,7 +31,12 @@ def setUpTestData(cls):
gordon_lab = ProjectFactory(pi=aalice, title="gordon_lab")
gordon_alloc = AllocationFactory(project=gordon_lab)
gordon_alloc.resources.add(ResourceFactory(name='holylfs10/tier1', id=1))
AllocationAttributeTypeFactory(name='RequiresPayment', attribute_type=AAttributeTypeFactory(name='Boolean'))
AllocationAttributeTypeFactory(
name='RequiresPayment', attribute_type=AAttributeTypeFactory(name='Boolean')
)
AllocationAttributeTypeFactory(
name='Subdirectory', attribute_type=AAttributeTypeFactory(name='Text')
)

def setUp(self):
self.attconn = AllTheThingsConn()
Expand All @@ -39,5 +45,6 @@ def setUp(self):
self.testusers = self.pref + 'att_users_dummy.json'

def test_push_quota_data(self):
"""Ensure that push runs successfully"""
push_quota_data(self.testfiles)
# assert AllocationAttribute.
165 changes: 95 additions & 70 deletions coldfront/plugins/fasrc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
import requests

from coldfront.core.utils.common import import_from_settings
from coldfront.core.utils.fasrc import (log_missing,
read_json,
select_one_project_allocation,
save_json,
id_present_missing_projects)
from coldfront.core.utils.fasrc import (
log_missing,
read_json,
save_json,
id_present_missing_projects
)
from coldfront.core.resource.models import Resource
from coldfront.core.allocation.models import AllocationAttributeType

Expand Down Expand Up @@ -38,7 +39,7 @@ def produce_query_statement(self, vol_type, volumes=None):
'server_replace': '/n/',
'path_replace': '/n//',
'unique':'datetime(e.DotsLFSUpdateDate) as begin_date'
},
},
'isilon': {
'match': '[r:Owns]-(e:IsilonPath) MATCH (d:ConfigValue {Name: \'IsilonPath.Invocation\'})',
'validation_query':"r.DotsUpdateDate = d.DotsUpdateDate \
Expand All @@ -55,8 +56,8 @@ def produce_query_statement(self, vol_type, volumes=None):
'server_replace': '01.rc.fas.harvard.edu',
'path_replace': '/ifs/',
'unique':'datetime(e.DotsUpdateDate) as begin_date'
}
}
}
d = query_dict[vol_type]

if volumes:
Expand All @@ -65,21 +66,23 @@ def produce_query_statement(self, vol_type, volumes=None):
volumes = '|'.join([r.name.split('/')[0] for r in Resource.objects.all()])
where = f"(e.{d['server']} =~ \'.*({volumes}).*\')"

statement = {'statement': f"MATCH p=(g:Group)-{d['match']} \
WHERE {where} AND {d['validation_query']}\
AND NOT (g.ADSamAccountName =~ '.*(disabled|rc_admin).*')\
AND (datetime() - duration('P31D') <= datetime(r.{d['r_updated']})) \
RETURN \
{d['unique']}, \
g.ADSamAccountName as lab,\
(e.SizeGB / 1024.0) as tb_allocation, \
e.{d['sizebytes']} as byte_allocation,\
e.{d['usedbytes']} as byte_usage,\
(e.{d['usedgb']} / 1024.0) as tb_usage,\
replace(e.{d['fs_path']}, '{d['path_replace']}', '') as fs_path, \
{d['storage_type']} as storage_type, \
datetime(r.{d['r_updated']}) as rel_updated, \
replace(e.{d['server']}, '{d['server_replace']}', '') as server"}
statement = {
'statement': f"MATCH p=(g:Group)-{d['match']} \
WHERE {where} AND {d['validation_query']}\
AND NOT (g.ADSamAccountName =~ '.*(disabled|rc_admin).*')\
AND (datetime() - duration('P31D') <= datetime(r.{d['r_updated']})) \
RETURN \
{d['unique']}, \
g.ADSamAccountName as lab,\
(e.SizeGB / 1024.0) as tb_allocation, \
e.{d['sizebytes']} as byte_allocation,\
e.{d['usedbytes']} as byte_usage,\
(e.{d['usedgb']} / 1024.0) as tb_usage,\
replace(e.{d['fs_path']}, '{d['path_replace']}', '') as fs_path, \
{d['storage_type']} as storage_type, \
datetime(r.{d['r_updated']}) as rel_updated, \
replace(e.{d['server']}, '{d['server_replace']}', '') as server"
}
self.queries['statements'].append(statement)


Expand All @@ -105,7 +108,9 @@ def _standardize_attquery(self):
def _standardize_nesefile(self):
datafile = 'nese_data/pools'
header_file = 'nese_data/pools.header'
translator = dict((kv.split('=') for kv in (l.strip('\n') for l in open('nese_data/groupkey'))))
translator = dict((
kv.split('=') for kv in (l.strip('\n') for l in open('nese_data/groupkey'))
))
headers_df = pd.read_csv(header_file, header=0, delim_whitespace=True)
headers = headers_df.columns.values.tolist()
data = pd.read_csv(datafile, names=headers, delim_whitespace=True)
Expand All @@ -120,7 +125,10 @@ def _standardize_nesefile(self):
data['tb_allocation'] = data['mib_capacity'] / 953674.3164
data['tb_usage'] = data['mib_used'] / 953674.3164
data['fs_path'] = None
data = data[['lab', 'server', 'storage_type', 'byte_allocation', 'byte_usage', 'tb_allocation', 'tb_usage', 'fs_path']]
data = data[[
'lab', 'server', 'storage_type', 'byte_allocation',
'byte_usage', 'tb_allocation', 'tb_usage', 'fs_path',
]]
nesedict = data.to_dict(orient='records')
return nesedict

Expand Down Expand Up @@ -205,6 +213,41 @@ def pull_quota_data(self, volumes=None):
logger.debug(resp_json)
return resp_json

def matched_dict_processing(allocation, data_dicts, paired_allocs, log_message):
if len(data_dicts) == 1:
logger.debug(log_message)
paired_allocs[allocation] = data_dicts[0]
else:
logger.warning('too many matches for allocation %s: %s', allocation, data_dicts)
return paired_allocs

def pair_allocations_data(project, quota_dicts):
"""pair allocations with usage dicts
"""
unpaired_allocs = project.allocation_set.filter(status__name='Active')
paired_allocs = {}
# first, pair allocations with those that have same
for allocation in unpaired_allocs:
dicts = [
d for d in quota_dicts if allocation.path.lower() == d['fs_path'].lower()
]
if dicts:
log_message = f'Path-based match: {allocation}, {allocation.path}, {dicts[0]}'
paired_allocs = matched_dict_processing(allocation, dicts, paired_allocs, log_message)
unpaired_allocs = [
a for a in unpaired_allocs if a not in paired_allocs
]
unpaired_dicts = [d for d in quota_dicts if d not in paired_allocs.values()]
for allocation in unpaired_allocs:
dicts = [
d for d in unpaired_dicts if d['server'] in allocation.resources.first().name
]
if dicts:
log_message = f'Resource-based match: {allocation}, {dicts[0]}'
paired_allocs = matched_dict_processing(allocation, dicts, paired_allocs, log_message)
return paired_allocs


def push_quota_data(result_file):
"""update group quota & usage values in Coldfront from a JSON of quota data.
"""
Expand All @@ -222,63 +265,45 @@ def push_quota_data(result_file):
allocation_attribute_types = AllocationAttributeType.objects.all()
allocation_attribute_type_payment = allocation_attribute_types.get(name='RequiresPayment')

for lab, allocations in result_json_cleaned.items():
for lab, quota_dicts in result_json_cleaned.items():
logger.info('PROJECT: %s ====================================', lab)
# Find the correct allocation_allocationattributes to update by:
# 1. finding the project with a name that matches lab.lab
proj_query = proj_models.get(title=lab)
for allocation in allocations:
project = proj_models.get(title=lab)
# 2. pair project allocations with data
allocation_data_dict = pair_allocations_data(project, quota_dicts)
for allocation, data_dict in allocation_data_dict.items():
try:
# 2. find the resource that matches/approximates the server value
resource = Resource.objects.get(name__contains=allocation['server'])

# 3. find the allocation with a matching project and resource_type
alloc_obj = select_one_project_allocation(proj_query, resource, dirpath=allocation['fs_path'])
error_message = None
if alloc_obj is None:
error_message = 'No Allocation'
missing_allocations.append({
'resource_name':resource.name,
'project_title': proj_query.title,
'path': allocation['fs_path']
})
elif alloc_obj == 'MultiAllocationError':
print(allocation['fs_path'])
error_message = 'Unresolved multiple Allocations'
if error_message:
logger.warning('ERROR: %s for allocation %s-%s',
error_message, proj_query.title, resource.name)
counts['all_err'] += 1
continue

logger.info('allocation: %s', alloc_obj.__dict__)

# 4. get the storage quota TB allocation_attribute that has allocation=a.
allocation_values = { 'Storage Quota (TB)':
[allocation['tb_allocation'],allocation['tb_usage']] }
if allocation['byte_allocation'] is not None:
allocation_values['Quota_In_Bytes'] = [ allocation['byte_allocation'],
allocation['byte_usage']]
# 3. get the storage quota TB allocation_attribute that has allocation=a.
allocation_values = {
'Storage Quota (TB)': [data_dict['tb_allocation'],data_dict['tb_usage']]
}
if data_dict['byte_allocation'] is not None:
allocation_values['Quota_In_Bytes'] = [
data_dict['byte_allocation'], data_dict['byte_usage']
]
else:
logger.warning(
'no byte_allocation value for allocation %s, lab %s on resource %s',
alloc_obj.pk, lab, allocation['server'])
'no byte_allocation value for allocation %s, lab %s on resource %s',
allocation.pk, lab, data_dict['server']
)
for k, v in allocation_values.items():
allocation_attribute_type_obj = allocation_attribute_types.get(name=k)
allocattribute_obj, _ = alloc_obj.allocationattribute_set.update_or_create(
allocation_attribute_type=allocation_attribute_type_obj,
defaults={'value': v[0]}
)
allocattribute_obj.allocationattributeusage.value = v[1]
allocattribute_obj.allocationattributeusage.save()
allocation_attr_type_obj = allocation_attribute_types.get(name=k)
alloc_attr_obj, _ = allocation.allocationattribute_set.update_or_create(
allocation_attribute_type=allocation_attr_type_obj,
defaults={'value': v[0]}
)
alloc_attr_obj.allocationattributeusage.value = v[1]
alloc_attr_obj.allocationattributeusage.save()

# 5. AllocationAttribute
alloc_obj.allocationattribute_set.update_or_create(
allocation_attribute_type=allocation_attribute_type_payment,
defaults={'value':True})
allocation.allocationattribute_set.update_or_create(
allocation_attribute_type=allocation_attribute_type_payment,
defaults={'value':True}
)
counts['complete'] += 1
except Exception as e:
allocation_name = f"{allocation['lab']}/{allocation['server']}"
allocation_name = f"{data_dict['lab']}/{data_dict['server']}"
errored_allocations[allocation_name] = e
log_missing('allocation', missing_allocations)
logger.warning('error counts: %s', counts)
Expand Down

0 comments on commit 2b2c1a8

Please sign in to comment.