From 2ad422d4b58286f73e441e8b45511ac18f14e115 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 14 Feb 2024 09:10:58 -0500 Subject: [PATCH] refactor: move report to https://github.com/CCBR/reports --- CHANGELOG.md | 6 +- bin/render.R | 5 - bin/render.sh | 11 - bin/render_report_biowulf.sh | 29 -- docker/spacesavers2/Dockerfile | 105 ------ docker/spacesavers2/environment.txt | 26 -- docker/spacesavers2/meta.yml | 4 - report.Rmd | 504 ---------------------------- 8 files changed, 4 insertions(+), 686 deletions(-) delete mode 100755 bin/render.R delete mode 100755 bin/render.sh delete mode 100755 bin/render_report_biowulf.sh delete mode 100644 docker/spacesavers2/Dockerfile delete mode 100644 docker/spacesavers2/environment.txt delete mode 100644 docker/spacesavers2/meta.yml delete mode 100644 report.Rmd diff --git a/CHANGELOG.md b/CHANGELOG.md index b65cf73..3ca870d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ ## spacesavers2 development version +- Move the report to a separate internal repository (@kelly-sovacool) + ### New features ### Bug fixes @@ -21,11 +23,11 @@ - `grubbers` `--limit` can be < 1 GiB (float) (#70, @kopardev) - `grubbers` output file format changed. New original file column added. Original file is required by `usurp`. - `mimeo` `--duplicateonly` now correctly handles duplicates owned by different UIDs. (#71, @kopardev) - - Update `blamematrix` and to account for corrected duplicate handling in `mimeo`. + - Update `blamematrix` and to account for corrected duplicate handling in `mimeo`. - `usurp` now uses the new "original file" column from `grubbers` while creating hard-links. - Total size now closely resembles `df` results (fix #75 @kopardev) - Files with future timestamps are handled correctly (fix #76, @kopardev) - + ## spacesavers2 0.10.2 - Now tracking user-facing changes with a changelog. (#61, @kelly-sovacool) diff --git a/bin/render.R b/bin/render.R deleted file mode 100755 index ebb8ae1..0000000 --- a/bin/render.R +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env Rscript -rmarkdown::render("report.Rmd", - output_file = "datashare/report.html", - params = list(input_dir = "data") -) diff --git a/bin/render.sh b/bin/render.sh deleted file mode 100755 index 6c3242a..0000000 --- a/bin/render.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -# to be executed from /data/CCBR_Pipeliner/Tools/spacesavers2/report -# Usage: bash bin/render_report_biowulf.sh -module load singularity -SINGULARITY_CACHEDIR=/data/CCBR_Pipeliner/SIFS - -# render report -echo "cd /mnt && \ - Rscript bin/render.R \ - " |\ - singularity exec -C -B $PWD:/mnt,/data/CCBR_Pipeliner/userdata/spacesavers2/:/mnt/data docker://nciccbr/spacesavers2:0.1.1 bash diff --git a/bin/render_report_biowulf.sh b/bin/render_report_biowulf.sh deleted file mode 100755 index 714eaa9..0000000 --- a/bin/render_report_biowulf.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash -# to be executed from /data/CCBR_Pipeliner/Tools/spacesavers2/report -# Usage: bash bin/render_report_biowulf.sh -module load singularity -SINGULARITY_CACHEDIR=/data/CCBR_Pipeliner/SIFS - -today=$(date +'%Y-%m-%d') -year=$(date +'%Y') -mkdir -p datashare/$year -html_filename="datashare/${year}/spacesavers2-report_${today}.html" -recipient_email="kelly.sovacool@nih.gov,vishal.koparde@nih.gov" - -url=https://hpc.nih.gov/~CCBR_Pipeliner/spacesavers2/${year}/spacesavers2-report_${today}.html - -# update disk usage -bash bin/disk_usage.sh -# render report and send via email -echo "cd /mnt && \ - Rscript bin/render.R && \ - cp datashare/report.html $html_filename && \ - python src/send_email.py \ - $html_filename \ - $url \ - $recipient_email \ - " |\ - singularity exec -C -B $PWD:/mnt,/data/CCBR_Pipeliner/userdata/spacesavers2/:/mnt/data docker://nciccbr/spacesavers2:0.1.1 bash - -chmod -R a+r datashare/ -cp -r datashare/* /data/CCBR_Pipeliner/datashare/spacesavers2/ diff --git a/docker/spacesavers2/Dockerfile b/docker/spacesavers2/Dockerfile deleted file mode 100644 index 233b5fe..0000000 --- a/docker/spacesavers2/Dockerfile +++ /dev/null @@ -1,105 +0,0 @@ -FROM ubuntu:20.04 - -# build time variables -ARG BUILD_DATE="000000" -ENV BUILD_DATE=${BUILD_DATE} -ARG BUILD_TAG="000000" -ENV BUILD_TAG=${BUILD_TAG} -ARG REPONAME="000000" -ENV REPONAME=${REPONAME} - -RUN mkdir -p /opt2 && mkdir -p /data2 -ENV TZ=America/New_York -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone - -RUN apt update && apt-get -y upgrade -# Set the locale -RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - locales build-essential cmake cpanminus && \ - localedef -i en_US -f UTF-8 en_US.UTF-8 && \ - cpanm FindBin Term::ReadLine - -# install basic dependencies with apt-get -RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - build-essential \ - figlet \ - g++ \ - gcc \ - gfortran \ - git \ - libatlas-base-dev \ - libblas-dev \ - libboost-dev \ - libbz2-dev \ - libcurl4-openssl-dev \ - libexpat1-dev \ - libfreetype6-dev \ - libgd-dev \ - libgd-perl \ - libglib2.0-dev \ - libgpgme11-dev \ - libgs-dev \ - libgsl-dev \ - libgsl0-dev \ - libhtml-template-compiled-perl \ - libicu-dev \ - libjudy-dev \ - liblapack-dev \ - liblzma-dev \ - libmysqlclient-dev \ - libncurses-dev \ - libopenmpi-dev \ - libpng-dev \ - librtmp-dev \ - libseccomp-dev \ - libssl-dev \ - libtool \ - libxml-libxml-debugging-perl \ - libxml-opml-simplegen-perl \ - libxml2-dev \ - libxslt-dev \ - make \ - manpages-dev \ - openjdk-17-jre-headless \ - parallel \ - pigz \ - pkg-config \ - python3-pip \ - python3-dev \ - rsync \ - squashfs-tools \ - unzip \ - uuid-dev \ - wget \ - zlib1g \ - zlib1g-dev \ - zlibc - -# Install conda and give write permissions to conda folder -RUN echo 'export PATH=/opt2/conda/bin:$PATH' > /etc/profile.d/conda.sh && \ - wget --quiet "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" -O ~/miniforge3.sh && \ - /bin/bash ~/miniforge3.sh -b -p /opt2/conda && \ - rm ~/miniforge3.sh && chmod 777 -R /opt2/conda/ -ENV PATH="/opt2/conda/bin:$PATH" - -# install pandoc & R packages -COPY environment.txt /data2/ -RUN mamba install -c conda-forge --file /data2/environment.txt -ENV R_LIBS_USER=/opt2/conda/lib/R/library/ - -# install quarto -ENV QUARTO_VERSION="1.3.450" -ADD https://github.com/quarto-dev/quarto-cli/releases/download/v${QUARTO_VERSION}/quarto-${QUARTO_VERSION}-linux-amd64.tar.gz /opt2 -WORKDIR /opt2 -RUN tar -xzvf quarto-${QUARTO_VERSION}-linux-amd64.tar.gz -ENV PATH="/opt2/quarto-${QUARTO_VERSION}/bin/:${PATH}" -RUN quarto check - -# Save Dockerfile in the docker -COPY Dockerfile /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} -RUN chmod a+r /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} - -# cleanup -WORKDIR /data2 -RUN apt-get clean && apt-get purge \ - && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* diff --git a/docker/spacesavers2/environment.txt b/docker/spacesavers2/environment.txt deleted file mode 100644 index 5334639..0000000 --- a/docker/spacesavers2/environment.txt +++ /dev/null @@ -1,26 +0,0 @@ -pandoc -r-base=4.3.1 -r-DT -r-RColorBrewer -r-bslib=0.5.1 -r-crosstalk -r-downlit -r-dplyr -r-fontawesome -r-ggplot2 -r-glue -r-here -r-janitor -r-knitr -r-lubridate -r-plotly -r-purrr -r-readr -r-rlang -r-rmarkdown -r-scales -r-shiny -r-stringr -r-tibble -r-tidyr -r-xml2 diff --git a/docker/spacesavers2/meta.yml b/docker/spacesavers2/meta.yml deleted file mode 100644 index d68cd0a..0000000 --- a/docker/spacesavers2/meta.yml +++ /dev/null @@ -1,4 +0,0 @@ -dockerhub_namespace: nciccbr -image_name: spacesavers2 -version: 0.1.1 -container: "$(dockerhub_namespace)/$(image_name):$(version)" diff --git a/report.Rmd b/report.Rmd deleted file mode 100644 index c04f01f..0000000 --- a/report.Rmd +++ /dev/null @@ -1,504 +0,0 @@ ---- -title: "spacesavers2 🚀 report" -author: "CCR Collaborative Bioinformatics Resource" -date: '`r lubridate::today()`' -output: - html_document: - theme: - version: 5 - code_folding: hide - toc: true - self_contained: true -params: - input_dir: '/data/CCBR_Pipeliner/userdata/spacesavers2/' - n_top_users: 10 -knit: (function(inputFile, encoding) { - rmarkdown::render(inputFile, encoding = encoding, output_dir = "datashare/") }) ---- -```{r setup} -knitr::opts_chunk$set(message = FALSE, warning = FALSE) -``` - -Notice a bug or want to make a suggestion for this report? [Open an issue](https://github.com/CCBR/spacesavers2/issues) on GitHub. - -```{r load} -library(bslib) -library(dplyr) -library(DT) -library(fontawesome) -library(ggplot2) -library(glue) -library(here) -library(htmltools) -library(knitr) -library(lubridate) -library(plotly) -library(purrr) -library(readr) -library(rlang) -library(scales) -library(shiny) -library(stringr) -library(tidyr) -theme_set(theme_bw()) - -to_bytes <- function(x, from_unit) { - bytes_units <- list( - KiB = 1, - MiB = 2, - GiB = 3, - TiB = 4 - ) - return(x * (1024^bytes_units[[from_unit]])) -} -from_bytes <- function(x, to_unit) { - return(x * x / (to_bytes(x, to_unit))) -} - -from_bytes_v <- Vectorize(from_bytes) -to_bytes_v <- Vectorize(to_bytes) - -filter_users <- function(dat, usercol = username) { - non_people <- c("allusers", "rpcuser", "slurm") - dat %>% - filter( - !({{ usercol }} %in% non_people), # not actual people - !str_detect({{ usercol }}, "[0-9]") # entirely numeric usernames - ) -} - -is_large_range <- function(x, n_orders_magnitude = 5) { - xrange <- range(x) - return((xrange[2] - xrange[1]) >= 10^n_orders_magnitude) -} - -plot_user_metric <- function(dat, x_metric) { - dat %>% - ggplot(aes( - x = eval_tidy(data_sym(x_metric)), - y = username, - fill = eval_tidy(data_sym(x_metric)), - text = glue("{username}\n{eval_tidy(data_sym(x_metric))} {x_metric}") - )) + - geom_col() + - # TODO: ggplotly doesn't know what to do with scale::label_log - # {if (is_large_range(dat %>% pull(x_metric))) scale_x_log10(labels = label_log(digits = 2)) } + - labs(x = x_metric, y = "") + - theme(legend.position = "none") -} - -plot_metric_time <- function(dat, y_metric) { - dat %>% - ggplot(aes( - x = date, - y = eval_tidy(data_sym(y_metric)), - color = username - )) + - geom_line(alpha = 0.7) + - geom_point(aes(text = glue("{username}\n{eval_tidy(data_sym(y_metric))} {y_metric}"))) + - labs(y = y_metric) -} - -min_user_bytes_GiB <- 10 -panel_summary <- function(dat, - folder_path = "/data/CCBR", - plot_fcn = plot_metric_time, - min_bytes_GiB = min_user_bytes_GiB) { - summary_dat_folder <- dat %>% - filter(FolderPath == folder_path) %>% - mutate(TotalBytes_GiB = from_bytes(TotalBytes, 'GiB')) %>% - # only keep users with at least 10 GiB total usage - filter(TotalBytes_GiB >= min_bytes_GiB) %>% - select(-TotalBytes_GiB) - top_users <- summary_dat_folder %>% - pivot_longer(all_of(summary_metrics), - names_to = "metric" - ) %>% - mutate(value_adj = case_when( - metric == "OverallScore" ~ -value, - TRUE ~ value - )) %>% - group_by(metric) %>% - slice_max(order_by = value_adj, n = n_top_users) %>% - pull(username) %>% - unique() - plots <- summary_metrics %>% lapply(function(y_metric) { - user_order <- summary_dat_folder %>% - filter(username %in% top_users) %>% - pivot_longer(all_of(summary_metrics), - names_to = "metric" - ) %>% - mutate(value_adj = case_when( - metric == "OverallScore" ~ -value, - TRUE ~ value - )) %>% - filter(metric == y_metric) %>% - arrange(by = value_adj) %>% - pull(username) %>% - unique() - if (y_metric == "TotalBytes" | y_metric == "DuplicateBytes") { - to_unit <- "TiB" # TODO: dynamically set based on range of metric - new_metric_name <- glue("{y_metric}_{to_unit}") - summary_dat_folder <- summary_dat_folder %>% - mutate("{new_metric_name}" := from_bytes(eval_tidy(data_sym(y_metric)), to_unit)) - y_metric <- new_metric_name - } else if (y_metric == "TotalMeanAge" | y_metric == "DuplicateMeanAge") { - new_metric_name <- glue("{y_metric}_Days") - summary_dat_folder <- summary_dat_folder %>% - rename("{new_metric_name}" := y_metric) - y_metric <- new_metric_name - } else if (y_metric == "TotalFiles" | y_metric == "DuplicateFiles") { - new_metric_name <- glue("{y_metric}_Millions") - summary_dat_folder <- summary_dat_folder %>% - mutate("{new_metric_name}" := eval_tidy(data_sym(y_metric)) / 10^6) - y_metric <- new_metric_name - } - p <- summary_dat_folder %>% - filter(username %in% user_order) %>% - mutate(username = factor(username, levels = user_order)) %>% - mutate(across(where(is.numeric), round, digits = 2)) %>% - plot_fcn(y_metric) - nav_panel(title = y_metric, card_header(y_metric), ggplotly(p, tooltip = "text")) - }) - nav_panel( - title = markdown(glue("`{folder_path}`")), - navset_pill_list(!!!plots) - ) -} -``` - -```{r read_data} -n_top_users <- params$n_top_users -input_dir <- params$input_dir # here("data") -aggregated_filetypes <- c("blamematrix", "catalog", "mimeo") -# TODO: only load last N weeks of data to keep RAM usage reasonably low -all_files <- tibble(filename = list.dirs(input_dir) %>% - Filter(function(x) { - x != input_dir - }, .) %>% - lapply(function(x) { - list.files(x, full.names = TRUE) - }) %>% - unlist()) -user_dat <- all_files %>% - filter(!str_detect(filename, paste(aggregated_filetypes, collapse = "|"))) %>% - separate_wider_delim(filename, - delim = ".", cols_remove = FALSE, - names = c("date", "path", "username", "file", "ext"), - too_few = "debug" - ) %>% - mutate(date = as_date(basename(date))) - -dates <- user_dat %>% - filter(!is.na(date)) %>% - pull(date) %>% - unique() -most_recent_date <- dates %>% max() - -total_usage_tb <- user_dat %>% - filter( - username == "allusers", - date == most_recent_date, - file == "summary", - path == "_data_CCBR" - ) %>% - pull(filename) %>% - read_tsv() %>% - filter(FolderPath == "/data/CCBR") %>% - mutate(disk_usage_tb = from_bytes(TotalBytes, "TiB")) %>% - pull(disk_usage_tb) -# TODO disk_usage_tb doesn't agree with output from `df` - -grubbers_allusers_err <- user_dat %>% - filter( - username == "allusers", - date == most_recent_date, - file == "grubbers", - ext == "err", - path == "_data_CCBR" - ) %>% - pull(filename) %>% - read_lines() -grubbers_message <- grubbers_allusers_err[2] %>% - str_split(":") %>% - unlist() %>% - .[3] - -user_dat <- user_dat %>% filter_users() -usernames <- user_dat %>% - pull(username) %>% - unique() - -summary_dat_recent <- user_dat %>% - filter( - date == most_recent_date, file == "summary" - ) %>% - pull(filename) %>% - map(function(x) { - read_tsv(x) %>% mutate(filename = x) - }) %>% - list_rbind() %>% - separate_wider_delim(filename, - delim = ".", cols_remove = FALSE, - names = c("basepath", "path", "username", "file", "ext") - ) -summary_metrics <- summary_dat_recent %>% - pivot_longer(where(is.numeric), names_to = "metric") %>% - pull(metric) %>% - unique() -``` - -## Total disk usage - -```{r disk_usage_latest} -disk_usage <- read_tsv(here("results", "disk_usage.tsv")) %>% - mutate(used_tib = from_bytes(to_bytes(Used, "KiB"), "TiB"), - avail_tib = from_bytes(to_bytes(Avail, "KiB"),"TiB"), - size_tib = used_tib + avail_tib) -df_date <- disk_usage %>% - slice_max(datetime) %>% - pull(datetime) %>% - as_date() - -layout_column_wrap( - width = 1 / 2, - value_box( - title = p(fa("hard-drive"), " Disk space in /data/CCBR"), - value = markdown(disk_usage %>% - slice_max(datetime) %>% - mutate(Usage = glue("{round(used_tib,1)} / {size_tib}")) %>% - select(Usage, `Use%`) %>% - kable()), - theme = "warning" - ), - value_box( - title = p(fa("users", prefer_type = "regular"), " Users"), - value = p(glue("{length(usernames)} users as of {format(df_date, '%b %d, %Y')}")), - theme = "primary" - ) -) -``` - -### Total usage over time - -```{r disk_usage_over_time} -p <- disk_usage %>% - mutate(datetime = lubridate::as_datetime(datetime)) %>% - rename(used = used_tib, size = size_tib, avail = avail_tib) %>% - pivot_longer(c(used, size), names_to = 'metric') %>% - mutate(value = round(value, 2)) %>% - ggplot(aes( - x = datetime, - y = value, - color = metric, - group = metric - )) + - geom_line(alpha = 0.7) + - geom_point(aes(text = glue("{value} TiB"))) + - scale_x_datetime(labels = date_format("%b %Y")) + - scale_color_brewer(palette = "Set2", - breaks = c('size', 'used') # enforce order - ) + - labs(y = 'TiB', x = '') + - theme(legend.title = element_blank()) - -card(ggplotly(p, tooltip = "text")) -``` - - -## Summary over time - -Usage by top users for each spacesavers metric. -Only users with at least `r min_user_bytes_GiB` GiB of total disk usage are shown. - -```{r summary_over_time} -summary_dat_all <- user_dat %>% - filter( - file == "summary" - ) %>% - pull(filename) %>% - map(function(x) { - read_tsv(x) %>% mutate(filename = x) - }) %>% - list_rbind() %>% - separate_wider_delim(filename, - delim = ".", cols_remove = FALSE, - names = c("basepath", "path", "username", "file", "ext") - ) %>% - mutate(date = str_replace(basepath, ".*/", "") %>% as_date()) - -navset_tab( - summary_dat_all %>% panel_summary("/data/CCBR", plot_metric_time), - summary_dat_all %>% panel_summary("/data/CCBR/rawdata", plot_metric_time), - summary_dat_all %>% panel_summary("/data/CCBR/projects", plot_metric_time), -) -``` - - -## Most recent summary (`r most_recent_date`) - -Usage by top users for each spacesavers metric. - -```{r summary_recent} -navset_tab( - summary_dat_recent %>% panel_summary("/data/CCBR", plot_user_metric), - summary_dat_recent %>% panel_summary("/data/CCBR/rawdata", plot_user_metric), - summary_dat_recent %>% panel_summary("/data/CCBR/projects", plot_user_metric), -) -``` - -## Summary table - -```{r allusers_summary} -allusers_summary <- all_files %>% - filter(str_detect(filename, "_data_CCBR.allusers.summary.txt")) %>% - separate_wider_delim(filename, - delim = ".", cols_remove = FALSE, - names = c("date", "path", "username", "file", "ext") - ) %>% - mutate(date = as_date(basename(date))) %>% - slice_max(order_by = date) %>% - pull(filename) %>% - map(function(x) { - read_tsv(x) - }) %>% - list_rbind() %>% - mutate( - TotalBytes_GiB = round(from_bytes_v(TotalBytes, "GiB"), 2), - DuplicateBytes_GiB = round(from_bytes_v(DuplicateBytes, "GiB"), 2), - .before = "DuplicateBytes" - ) %>% - select(-c(TotalBytes, DuplicateBytes)) - -card( - card_header("Summary across all users"), - datatable(allusers_summary, fillContainer = TRUE) -) -``` - - -## Blame matrix - -```{r blame} -blame_matrix <- all_files %>% - filter(str_detect(filename, "blamematrix")) %>% - separate_wider_delim(filename, - delim = ".", cols_remove = FALSE, - names = c("date", "path", "file", "ext") - ) %>% - mutate(date = as_date(basename(date))) %>% - filter(!is.na(date), file == "blamematrix", ext == "tsv", path == "_data_CCBR") %>% - slice_max(order_by = date) %>% - pull(filename) %>% - map(function(x) { - read_tsv(x) - }) %>% - list_rbind() - -card( - card_header("Disk usage by user in subdirectories"), - datatable(blame_matrix, fillContainer = TRUE) -) -``` - - -## Duplicate files - -`r grubbers_message` - -### Potential savings per user - -```{r grub_err} -grub_err <- user_dat %>% - filter_users() %>% - filter(!is.na(date), file == "grubbers", ext == "err", path == "_data_CCBR") %>% - slice_max(order_by = date) %>% - pull(filename) %>% - map(function(x) { - read_tsv(x, col_names = FALSE) %>% - mutate(filename = x) - }) %>% - list_rbind() %>% - filter(str_detect(X1, "Deleting")) %>% - separate_wider_delim(filename, - delim = ".", cols_remove = FALSE, - names = c("date", "path", "username", "file", "ext") - ) %>% - mutate( - date = as_date(basename(date)), - grub_msg = str_replace_all(X1, regex("^.*:"), ""), - savings_value = as.numeric( - str_replace_all( - grub_msg, - regex(".*save ([\\d\\.]*) [\\w!]+"), - "\\1" - ) - ), - savings_unit = str_replace_all( - grub_msg, - regex(".*save [\\d\\.]* ([\\w]+)!"), - "\\1" - ), - savings_bytes = to_bytes_v(savings_value, savings_unit) - ) - -user_grub_table <- grub_err %>% - arrange(desc(savings_bytes)) %>% - select(username, savings_value, savings_unit) - -card( - card_header("Savings per user"), - datatable(user_grub_table, fillContainer = TRUE) -) -``` - - -### All high-value duplicates - -```{r grubbers} -grub_dat <- user_dat %>% - filter_users() %>% - filter(!is.na(date), file == "grubbers", ext == "tsv", path == "_data_CCBR") %>% - slice_max(order_by = date) %>% - pull(filename) %>% - map(function(x) { - read_tsv(x, col_names = FALSE) %>% - mutate(filename = x) - }) %>% - list_rbind() %>% - rename( - file_hash = X1, - file_count = X2, - total_disk_usage = X3, - single_disk_usage = X4, - filepaths = X5 - ) %>% - separate_wider_delim(filename, - delim = ".", cols_remove = FALSE, - names = c("date", "path", "username", "file", "ext") - ) %>% - mutate(date = as_date(basename(date))) %>% - filter_users() %>% - separate_wider_delim(total_disk_usage, - delim = " ", - names = c("total_disk_usage_value", "total_disk_usage_unit"), - cols_remove = FALSE - ) %>% - separate_wider_delim(single_disk_usage, - delim = " ", - names = c("single_disk_usage_value", "single_disk_usage_unit"), - cols_remove = FALSE - ) %>% - mutate(across(all_of(c("total_disk_usage_value", "single_disk_usage_value")), as.numeric)) - -top_files <- grub_dat %>% - arrange(order_by = desc(total_disk_usage_value)) %>% - select(total_disk_usage_value, username, filepaths) %>% - rename(disk_usage_gb = total_disk_usage_value) - -card(card_header("Top files"), datatable(top_files, fillContainer = TRUE)) -``` - - -For instructions on how to replace duplicates with hard links, see the -[`usurp` command in the spacesavers docs](https://ccbr.github.io/spacesavers2/usurp/).