Skip to content

Commit

Permalink
collect user-level usage
Browse files Browse the repository at this point in the history
  • Loading branch information
claire-peters committed Nov 15, 2023
1 parent f6f5978 commit 72ac937
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 28 deletions.
4 changes: 2 additions & 2 deletions coldfront/plugins/slurm/associations.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def spec_dict(self):
"""Return dict of Slurm Specs"""
spec_dict = {}
for s in self.specs:
for k, v in s.split(':'):
spec_dict[k] = v
i = s.split('=')
spec_dict[i[0]] = i[1]
return spec_dict

def format_specs(self):
Expand Down
13 changes: 11 additions & 2 deletions coldfront/plugins/slurm/management/commands/slurm_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from coldfront.core.allocation.models import (
AllocationStatusChoice,
AllocationAttributeType,
AllocationUserStatusChoice,
)
from coldfront.core.resource.models import Resource
from coldfront.plugins.slurm.associations import SlurmCluster
Expand Down Expand Up @@ -57,6 +58,7 @@ def handle(self, *args, **options):
name='Core Usage (Hours)')
fairshare_attr_type_obj = AllocationAttributeType.objects.get(
name='Fairshare')
auser_status_active = AllocationUserStatusChoice.objects.get(name='Active')

for resource, cluster in slurm_clusters.items():

Expand All @@ -78,6 +80,11 @@ def handle(self, *args, **options):
)
allocation_obj.resources.add(resource)
allocation_obj.save()
elif len(allocation_objs) == 1:
allocation_obj = allocation_objs.first()
elif len(allocation_objs) > 1:
print("Too many allocations:", allocation_objs)
continue
# used in XDMOD to correspond with pi_filter, I think
# 'slurm_account_name'? XDMOD_ACCOUNT_ATTRIBUTE_NAME

Expand All @@ -92,7 +99,8 @@ def handle(self, *args, **options):

allocation_obj.allocationattribute_set.get_or_create(
allocation_attribute_type=hours_attr_type_obj,
defaults= {'value': 0})
defaults={'value': 0}
)

account_spec_dict = account.spec_dict()

Expand All @@ -101,7 +109,8 @@ def handle(self, *args, **options):
if fairshare:
allocation_obj.allocationattribute_set.get_or_create(
allocation_attribute_type=fairshare_attr_type_obj,
defaults= {'value': fairshare})
defaults={'value': fairshare}
)

# add allocationusers from account
for user_name, user_account in account.users:
Expand Down
6 changes: 6 additions & 0 deletions coldfront/plugins/xdmod/management/commands/xdmod_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,12 @@ def process_total_cpu_hours(self):
logger.warning(
"Total CPU hours = %s for allocation %s account %s cpu_hours %s resources %s",
usage, s, account_name, cpu_hours, resources)
# collect user-level usage and update allocationuser entries with them
usage_data = fetcher.xdmod_fetch_cpu_hours(account_name, statistics='per-user')
for user in s.allocationuser_set.all():
if user.user.username in usage_data:
user.usage = usage_data[user.user.username]
user.save()
if self.sync:
cpu_hours_attr = s.allocationattribute_set.get(
allocation_attribute_type__name=XDMOD_CPU_HOURS_ATTRIBUTE_NAME)
Expand Down
59 changes: 35 additions & 24 deletions coldfront/plugins/xdmod/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,10 @@
import xml.etree.ElementTree as ET

import requests
from requests.auth import HTTPBasicAuth

from coldfront.core.utils.common import import_from_settings
from coldfront.core.utils.fasrc import get_quarter_start_end


XDMOD_USER = import_from_settings('XDMOD_USER', '')
XDMOD_PASS = import_from_settings('XDMOD_PASS', '')

XDMOD_CLOUD_PROJECT_ATTRIBUTE_NAME = import_from_settings(
'XDMOD_CLOUD_PROJECT_ATTRIBUTE_NAME', 'Cloud Account Name')
XDMOD_CLOUD_CORE_TIME_ATTRIBUTE_NAME = import_from_settings(
Expand Down Expand Up @@ -60,28 +55,22 @@ class XdmodNotFoundError(XdmodError):
class XDModFetcher:
def __init__(self, start=QUARTER_START, end=QUARTER_END, resources=None):
self.url = f'{XDMOD_API_URL}{_ENDPOINT_CORE_HOURS}'
if resources is None:
resources = []

self.resources = resources
payload = _DEFAULT_PARAMS
payload['start_date'] = start
payload['end_date'] = end
payload['resource_filter'] = f'"{",".join(resources)}"'
payload['operation'] = 'get_data'
if resources:
payload['resource_filter'] = f'"{",".join(resources)}"'
self.payload = payload
self.group_by = {'total':'pi', 'per-user':'user'}
self.group_by = {'total':'pi', 'per-user':'person'}

def fetch_data(self, payload, search_item=None):
r = requests.get(
self.url, params=payload, auth=HTTPBasicAuth(XDMOD_USER, XDMOD_PASS)
)
r = requests.get(self.url, params=payload)
logger.info(r.url)
logger.info(r.text)

try:
error = r.json()
# XXXX fix me. Here we assume any json response is bad as we're
# expecting xml but XDMoD should just return json always.
raise XdmodNotFoundError(f'Got json response but expected XML: {error}')
except json.decoder.JSONDecodeError as e:
pass
Expand All @@ -92,25 +81,47 @@ def fetch_data(self, payload, search_item=None):
raise XdmodError(f'Invalid XML data returned from XDMoD API: {e}') from e

rows = root.find('rows')
if len(rows) != 1:
if len(rows) < 1:
raise XdmodNotFoundError(
f'Rows not found for {search_item} - {self.payload["resource_filter"]}'
)
return rows

def fetch_value(self, payload, search_item=None):
rows = self.fetch_data(payload, search_item=search_item)
cells = rows.find('row').findall('cell')
if len(cells) != 2:
raise XdmodError('Invalid XML data returned from XDMoD API: Cells not found')

stats = cells[1].find('value').text
return stats

def xdmod_fetch(self, account, statistics, realm, group_by='total'):
"""fetch either total or per-user usage stats"""
def fetch_table(self, payload, search_item=None):
"""make a dictionary of usernames and their associated core hours from
XML data.
"""
# return rows extracted from XML data
rows = self.fetch_data(payload, search_item=search_item)
# Produce a dict of usernames and their associated core hours from those rows
stats = {}
for row in rows:
cells = row.findall('cell')
username = cells[0].find('value').text
stats[username] = cells[1].find('value').text
return stats

def xdmod_fetch(self, account, statistic, realm, group_by='total'):
"""fetch either total or per-user usage stats for specified project"""
payload = dict(self.payload)
payload['pi_filter'] = f'"{account}"'
payload['group_by'] = self.group_by[group_by]
payload['statistic'] = statistics
payload['statistic'] = statistic
payload['realm'] = realm
core_hours = self.fetch_data(payload, search_item=account)
if group_by == 'total':
core_hours = self.fetch_value(payload, search_item=account)
elif group_by == 'per-user':
core_hours = self.fetch_table(payload, search_item=account)
else:
raise Exception('unrecognized group_by value')
return core_hours

def xdmod_fetch_all_project_usages(self, statistic):
Expand All @@ -119,7 +130,7 @@ def xdmod_fetch_all_project_usages(self, statistic):
payload['group_by'] = 'pi'
payload['realm'] = 'Jobs'
payload['statistic'] = statistic
stats = self.fetch_data(payload)
stats = self.fetch_table(payload)
return stats

def xdmod_fetch_cpu_hours(self, account, group_by='total', statistics='total_cpu_hours'):
Expand All @@ -141,5 +152,5 @@ def xdmod_fetch_cloud_core_time(self, project):
payload['realm'] = 'Cloud'
payload['statistic'] = 'cloud_core_time'

core_hours = self.fetch_data(payload, search_item=project)
core_hours = self.fetch_value(payload, search_item=project)
return core_hours

0 comments on commit 72ac937

Please sign in to comment.