From b088c1a5b7f07a6ce1b7c178e401c95e8e6ccea6 Mon Sep 17 00:00:00 2001 From: <> Date: Fri, 26 Jan 2024 00:31:12 +0000 Subject: [PATCH] Deployed 43db803 with MkDocs version: 1.4.1 --- .nojekyll | 0 2024/report_2024-01-17.html | 5539 ++++++++++++++ 404.html | 1 + assets/images/favicon.png | Bin 0 -> 1870 bytes assets/images/spacesavers2.png | Bin 0 -> 41865 bytes assets/javascripts/bundle.078830c0.min.js | 29 + assets/javascripts/bundle.078830c0.min.js.map | 8 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++++++++++++ .../workers/search.5bf1dace.min.js | 48 + .../workers/search.5bf1dace.min.js.map | 8 + assets/stylesheets/main.20d9efc8.min.css | 1 + assets/stylesheets/main.20d9efc8.min.css.map | 1 + assets/stylesheets/palette.cbb835fc.min.css | 1 + .../stylesheets/palette.cbb835fc.min.css.map | 1 + blamematrix/index.html | 18 + catalog/index.html | 28 + e2e/index.html | 16 + grubbers/index.html | 22 + index.html | 3 + mimeo/index.html | 26 + report.html | 5539 ++++++++++++++ requirements.txt | 39 + search/search_index.json | 1 + sitemap.xml | 38 + sitemap.xml.gz | Bin 0 -> 201 bytes usurp/index.html | 17 + 51 files changed, 18577 insertions(+) create mode 100644 .nojekyll create mode 100644 2024/report_2024-01-17.html create mode 100644 404.html create mode 100644 assets/images/favicon.png create mode 100644 assets/images/spacesavers2.png create mode 100644 assets/javascripts/bundle.078830c0.min.js create mode 100644 assets/javascripts/bundle.078830c0.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.5bf1dace.min.js create mode 100644 assets/javascripts/workers/search.5bf1dace.min.js.map create mode 100644 assets/stylesheets/main.20d9efc8.min.css create mode 100644 assets/stylesheets/main.20d9efc8.min.css.map create mode 100644 assets/stylesheets/palette.cbb835fc.min.css create mode 100644 assets/stylesheets/palette.cbb835fc.min.css.map create mode 100644 blamematrix/index.html create mode 100644 catalog/index.html create mode 100644 e2e/index.html create mode 100644 grubbers/index.html create mode 100644 index.html create mode 100644 mimeo/index.html create mode 100644 report.html create mode 100644 requirements.txt create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz create mode 100644 usurp/index.html diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/2024/report_2024-01-17.html b/2024/report_2024-01-17.html new file mode 100644 index 0000000..ebeb739 --- /dev/null +++ b/2024/report_2024-01-17.html @@ -0,0 +1,5539 @@ + + + + +
+ + + + + + + + + + +knitr::opts_chunk$set(message = FALSE, warning = FALSE)
+View this report on the web: https://ccbr.github.io/spacesavers2/2024/report_2024-01-17.html
+library(bslib)
+library(dplyr)
+library(DT)
+library(fontawesome)
+library(ggplot2)
+library(glue)
+library(here)
+library(htmltools)
+library(knitr)
+library(lubridate)
+library(plotly)
+library(purrr)
+library(readr)
+library(rlang)
+library(scales)
+library(shiny)
+library(stringr)
+library(tidyr)
+theme_set(theme_bw())
+
+to_bytes <- function(x, from_unit) {
+ bytes_units <- list(
+ KiB = 1,
+ MiB = 2,
+ GiB = 3,
+ TiB = 4
+ )
+ return(x * (1024^bytes_units[[from_unit]]))
+}
+from_bytes <- function(x, to_unit) {
+ return(x * x / (to_bytes(x, to_unit)))
+}
+
+from_bytes_v <- Vectorize(from_bytes)
+to_bytes_v <- Vectorize(to_bytes)
+
+filter_users <- function(dat, usercol = username) {
+ non_people <- c("allusers", "rpcuser", "slurm")
+ dat %>%
+ filter(
+ !({{ usercol }} %in% non_people), # not actual people
+ !str_detect({{ usercol }}, "[0-9]") # entirely numeric usernames
+ )
+}
+
+is_large_range <- function(x, n_orders_magnitude = 5) {
+ xrange <- range(x)
+ return((xrange[2] - xrange[1]) >= 10^n_orders_magnitude)
+}
+
+plot_user_metric <- function(dat, x_metric) {
+ dat %>%
+ ggplot(aes(
+ x = eval_tidy(data_sym(x_metric)),
+ y = username,
+ fill = eval_tidy(data_sym(x_metric)),
+ text = glue("{username}\n{eval_tidy(data_sym(x_metric))} {x_metric}")
+ )) +
+ geom_col() +
+ # TODO: ggplotly doesn't know what to do with scale::label_log
+ # {if (is_large_range(dat %>% pull(x_metric))) scale_x_log10(labels = label_log(digits = 2)) } +
+ labs(x = x_metric, y = "") +
+ theme(legend.position = "none")
+}
+
+plot_metric_time <- function(dat, y_metric) {
+ dat %>%
+ ggplot(aes(
+ x = date,
+ y = eval_tidy(data_sym(y_metric)),
+ color = username
+ )) +
+ geom_line(alpha = 0.7) +
+ geom_point(aes(text = glue("{username}\n{eval_tidy(data_sym(y_metric))} {y_metric}"))) +
+ labs(y = y_metric)
+}
+
+panel_summary <- function(dat,
+ folder_path = "/data/CCBR",
+ plot_fcn = plot_metric_time) {
+ summary_dat_folder <- dat %>%
+ filter(FolderPath == folder_path)
+ top_users <- summary_dat_folder %>%
+ pivot_longer(all_of(summary_metrics),
+ names_to = "metric"
+ ) %>%
+ mutate(value_adj = case_when(
+ metric == "OverallScore" ~ -value,
+ TRUE ~ value
+ )) %>%
+ group_by(metric) %>%
+ slice_max(order_by = value_adj, n = n_top_users) %>%
+ pull(username) %>%
+ unique()
+ plots <- summary_metrics %>% lapply(function(y_metric) {
+ user_order <- summary_dat_folder %>%
+ filter(username %in% top_users) %>%
+ pivot_longer(all_of(summary_metrics),
+ names_to = "metric"
+ ) %>%
+ mutate(value_adj = case_when(
+ metric == "OverallScore" ~ -value,
+ TRUE ~ value
+ )) %>%
+ filter(metric == y_metric) %>%
+ arrange(by = value_adj) %>%
+ pull(username) %>%
+ unique()
+ if (y_metric == "TotalBytes" | y_metric == "DuplicateBytes") {
+ to_unit <- "TiB" # TODO: dynamically set based on range of metric
+ new_metric_name <- glue("{y_metric}_{to_unit}")
+ summary_dat_folder <- summary_dat_folder %>%
+ mutate("{new_metric_name}" := from_bytes(eval_tidy(data_sym(y_metric)), to_unit))
+ y_metric <- new_metric_name
+ } else if (y_metric == "TotalMeanAge" | y_metric == "DuplicateMeanAge") {
+ new_metric_name <- glue("{y_metric}_Days")
+ summary_dat_folder <- summary_dat_folder %>%
+ rename("{new_metric_name}" := y_metric)
+ y_metric <- new_metric_name
+ } else if (y_metric == "TotalFiles" | y_metric == "DuplicateFiles") {
+ new_metric_name <- glue("{y_metric}_Millions")
+ summary_dat_folder <- summary_dat_folder %>%
+ mutate("{new_metric_name}" := eval_tidy(data_sym(y_metric)) / 10^6)
+ y_metric <- new_metric_name
+ }
+ p <- summary_dat_folder %>%
+ filter(username %in% user_order) %>%
+ mutate(username = factor(username, levels = user_order)) %>%
+ mutate(across(where(is.numeric), round, digits = 2)) %>%
+ plot_fcn(y_metric)
+ nav_panel(title = y_metric, card_header(y_metric), ggplotly(p, tooltip = "text"))
+ })
+ nav_panel(
+ title = markdown(glue("`{folder_path}`")),
+ navset_pill_list(!!!plots)
+ )
+}
+n_top_users <- params$n_top_users
+input_dir <- params$input_dir # here("data")
+aggregated_filetypes <- c("blamematrix", "catalog", "mimeo")
+# TODO: only load last N weeks of data to keep RAM usage reasonably low
+all_files <- tibble(filename = list.dirs(input_dir) %>%
+ Filter(function(x) {
+ x != input_dir
+ }, .) %>%
+ lapply(function(x) {
+ list.files(x, full.names = TRUE)
+ }) %>%
+ unlist())
+user_dat <- all_files %>%
+ filter(!str_detect(filename, paste(aggregated_filetypes, collapse = "|"))) %>%
+ separate_wider_delim(filename,
+ delim = ".", cols_remove = FALSE,
+ names = c("date", "path", "username", "file", "ext"),
+ too_few = "debug"
+ ) %>%
+ mutate(date = as_date(basename(date)))
+
+dates <- user_dat %>%
+ filter(!is.na(date)) %>%
+ pull(date) %>%
+ unique()
+most_recent_date <- dates %>% max()
+
+total_usage_tb <- user_dat %>%
+ filter(
+ username == "allusers",
+ date == most_recent_date,
+ file == "summary",
+ path == "_data_CCBR"
+ ) %>%
+ pull(filename) %>%
+ read_tsv() %>%
+ filter(FolderPath == "/data/CCBR") %>%
+ mutate(disk_usage_tb = from_bytes(TotalBytes, "TiB")) %>%
+ pull(disk_usage_tb)
+# TODO disk_usage_tb doesn't agree with output from `df`
+
+grubbers_allusers_err <- user_dat %>%
+ filter(
+ username == "allusers",
+ date == most_recent_date,
+ file == "grubbers",
+ ext == "err",
+ path == "_data_CCBR"
+ ) %>%
+ pull(filename) %>%
+ read_lines()
+grubbers_message <- grubbers_allusers_err[2] %>%
+ str_split(":") %>%
+ unlist() %>%
+ .[3]
+
+user_dat <- user_dat %>% filter_users()
+usernames <- user_dat %>%
+ pull(username) %>%
+ unique()
+
+summary_dat_recent <- user_dat %>%
+ filter(
+ date == most_recent_date, file == "summary"
+ ) %>%
+ pull(filename) %>%
+ map(function(x) {
+ read_tsv(x) %>% mutate(filename = x)
+ }) %>%
+ list_rbind() %>%
+ separate_wider_delim(filename,
+ delim = ".", cols_remove = FALSE,
+ names = c("basepath", "path", "username", "file", "ext")
+ )
+summary_metrics <- summary_dat_recent %>%
+ pivot_longer(where(is.numeric), names_to = "metric") %>%
+ pull(metric) %>%
+ unique()
+disk_usage <- read_tsv(here("results", "disk_usage.txt"))
+df_date <- disk_usage %>%
+ pull("date") %>%
+ as_date()
+
+layout_column_wrap(
+ width = 1 / 2,
+ value_box(
+ title = p(fa("hard-drive"), " Disk space in /data/CCBR"),
+ value = markdown(disk_usage %>%
+ mutate(Usage = glue("{Used} / {Size}")) %>%
+ select(Usage, `Use%`) %>%
+ kable()),
+ theme = "warning"
+ ),
+ value_box(
+ title = p(fa("users", prefer_type = "regular"), " Users"),
+ value = p(glue("{length(usernames)} users as of {format(df_date, '%b %d, %Y')}")),
+ theme = "primary"
+ )
+)
++ + Disk space in /data/CCBR +
+Usage | +Use% | +
---|---|
197T / 200T | +99% | +
+ + Users +
+36 users as of Oct 17, 2023
+Usage by top users for each spacesavers metric.
+summary_dat_all <- user_dat %>%
+ filter(
+ file == "summary"
+ ) %>%
+ pull(filename) %>%
+ map(function(x) {
+ read_tsv(x) %>% mutate(filename = x)
+ }) %>%
+ list_rbind() %>%
+ separate_wider_delim(filename,
+ delim = ".", cols_remove = FALSE,
+ names = c("basepath", "path", "username", "file", "ext")
+ ) %>%
+ mutate(date = str_replace(basepath, ".*/", "") %>% as_date())
+
+navset_tab(
+ summary_dat_all %>% panel_summary("/data/CCBR", plot_metric_time),
+ summary_dat_all %>% panel_summary("/data/CCBR/rawdata", plot_metric_time),
+ summary_dat_all %>% panel_summary("/data/CCBR/projects", plot_metric_time),
+)
+Usage by top users for each spacesavers metric.
+navset_tab(
+ summary_dat_recent %>% panel_summary("/data/CCBR", plot_user_metric),
+ summary_dat_recent %>% panel_summary("/data/CCBR/rawdata", plot_user_metric),
+ summary_dat_recent %>% panel_summary("/data/CCBR/projects", plot_user_metric),
+)
+allusers_summary <- all_files %>%
+ filter(str_detect(filename, "_data_CCBR.allusers.summary.txt")) %>%
+ separate_wider_delim(filename,
+ delim = ".", cols_remove = FALSE,
+ names = c("date", "path", "username", "file", "ext")
+ ) %>%
+ mutate(date = as_date(basename(date))) %>%
+ slice_max(order_by = date) %>%
+ pull(filename) %>%
+ map(function(x) {
+ read_tsv(x)
+ }) %>%
+ list_rbind() %>%
+ mutate(
+ TotalBytes_GiB = round(from_bytes_v(TotalBytes, "GiB"), 2),
+ DuplicateBytes_GiB = round(from_bytes_v(DuplicateBytes, "GiB"), 2),
+ .before = "DuplicateBytes"
+ ) %>%
+ select(-c(TotalBytes, DuplicateBytes))
+
+card(
+ card_header("Summary across all users"),
+ datatable(allusers_summary, fillContainer = TRUE)
+)
+blame_matrix <- all_files %>%
+ filter(str_detect(filename, "blamematrix")) %>%
+ separate_wider_delim(filename,
+ delim = ".", cols_remove = FALSE,
+ names = c("date", "path", "file", "ext")
+ ) %>%
+ mutate(date = as_date(basename(date))) %>%
+ filter(!is.na(date), file == "blamematrix", ext == "tsv", path == "_data_CCBR") %>%
+ slice_max(order_by = date) %>%
+ pull(filename) %>%
+ map(function(x) {
+ read_tsv(x)
+ }) %>%
+ list_rbind()
+
+card(
+ card_header("Disk usage by user in subdirectories"),
+ datatable(blame_matrix, fillContainer = TRUE)
+)
+Deleting top grubbers will save 6.61 TiB!
+grub_err <- user_dat %>%
+ filter_users() %>%
+ filter(!is.na(date), file == "grubbers", ext == "err", path == "_data_CCBR") %>%
+ slice_max(order_by = date) %>%
+ pull(filename) %>%
+ map(function(x) {
+ read_tsv(x, col_names = FALSE) %>%
+ mutate(filename = x)
+ }) %>%
+ list_rbind() %>%
+ filter(str_detect(X1, "Deleting")) %>%
+ separate_wider_delim(filename,
+ delim = ".", cols_remove = FALSE,
+ names = c("date", "path", "username", "file", "ext")
+ ) %>%
+ mutate(
+ date = as_date(basename(date)),
+ grub_msg = str_replace_all(X1, regex("^.*:"), ""),
+ savings_value = as.numeric(
+ str_replace_all(
+ grub_msg,
+ regex(".*save ([\\d\\.]*) [\\w!]+"),
+ "\\1"
+ )
+ ),
+ savings_unit = str_replace_all(
+ grub_msg,
+ regex(".*save [\\d\\.]* ([\\w]+)!"),
+ "\\1"
+ ),
+ savings_bytes = to_bytes_v(savings_value, savings_unit)
+ )
+
+user_grub_table <- grub_err %>%
+ arrange(desc(savings_bytes)) %>%
+ select(username, savings_value, savings_unit)
+
+card(
+ card_header("Savings per user"),
+ datatable(user_grub_table, fillContainer = TRUE)
+)
+grub_dat <- user_dat %>%
+ filter_users() %>%
+ filter(!is.na(date), file == "grubbers", ext == "tsv", path == "_data_CCBR") %>%
+ slice_max(order_by = date) %>%
+ pull(filename) %>%
+ map(function(x) {
+ read_tsv(x, col_names = FALSE) %>%
+ mutate(filename = x)
+ }) %>%
+ list_rbind() %>%
+ rename(
+ file_hash = X1,
+ file_count = X2,
+ total_disk_usage = X3,
+ single_disk_usage = X4,
+ filepaths = X5
+ ) %>%
+ separate_wider_delim(filename,
+ delim = ".", cols_remove = FALSE,
+ names = c("date", "path", "username", "file", "ext")
+ ) %>%
+ mutate(date = as_date(basename(date))) %>%
+ filter_users() %>%
+ separate_wider_delim(total_disk_usage,
+ delim = " ",
+ names = c("total_disk_usage_value", "total_disk_usage_unit"),
+ cols_remove = FALSE
+ ) %>%
+ separate_wider_delim(single_disk_usage,
+ delim = " ",
+ names = c("single_disk_usage_value", "single_disk_usage_unit"),
+ cols_remove = FALSE
+ ) %>%
+ mutate(across(all_of(c("total_disk_usage_value", "single_disk_usage_value")), as.numeric))
+
+top_files <- grub_dat %>%
+ arrange(order_by = desc(total_disk_usage_value)) %>%
+ select(total_disk_usage_value, username, filepaths) %>%
+ rename(disk_usage_gb = total_disk_usage_value)
+
+card(card_header("Top files"), datatable(top_files, fillContainer = TRUE))
+For instructions on how to replace duplicates with hard links, see
+the usurp
+command in the spacesavers docs.