From cd30aaac27f76eb1afbf928dfc64627afdd52cee Mon Sep 17 00:00:00 2001 From: Norio Nomura Date: Wed, 24 Jul 2024 17:01:22 +0900 Subject: [PATCH 1/4] test.yml: use image cache with parameters created from template Use `actions/cache@v4` with following params: path: ".download/by-url-sha256/$(echo $location | sha256sum | cut -d' ' -f1)" key: "image-$digest" enableCrossOsArchive: true To make the image cache cross-platform, the cache directory is specified using a relative path from the working directory, and the platform-specific cache directories are accessed via symbolic links. This should reduce the cache size used by the CI. Signed-off-by: Norio Nomura setup_cache_for_template: support url in template parameter Signed-off-by: Norio Nomura --- .../setup_cache_for_template/action.yml | 88 +++++++++++++++++++ .github/workflows/test.yml | 57 ++++++------ hack/debug-cache.sh | 2 +- 3 files changed, 114 insertions(+), 33 deletions(-) create mode 100644 .github/actions/setup_cache_for_template/action.yml diff --git a/.github/actions/setup_cache_for_template/action.yml b/.github/actions/setup_cache_for_template/action.yml new file mode 100644 index 00000000000..0f439647090 --- /dev/null +++ b/.github/actions/setup_cache_for_template/action.yml @@ -0,0 +1,88 @@ +name: 'setup cache for template' +description: 'setup cache for template' +inputs: + arch: + description: arch to setup cache for + required: false + template: + description: template yaml file + required: true +runs: + using: "composite" + steps: + - name: "detect platform for download directory" + id: detect-platform + run: | + if [[ "$(uname)" == "Darwin" ]]; then + download_dir=~/Library/Caches/lima/download + else + download_dir=~/.cache/lima/download + fi + echo "download-dir=$download_dir" >> "$GITHUB_OUTPUT" + shell: bash + - name: "create cache parameters from template" + if: always() + id: cache-params-from-template + run: | + set -eux + arch="${{ inputs.arch }}" + template="${{ inputs.template }}" + case "$template" in + https://*) + tmp_yaml=$(mktemp -d)/template.yaml + curl -sSLf "$template" > $tmp_yaml || exit 1 + template=$tmp_yaml + ;; + *) + test -f "$template" || exit 1 + ;; + esac + + # detect arch from template if not provided + arch="${arch:-$(yq '.arch // ""' "$template")}" + arch="${arch:-$(uname -m)}" + # normalize arch. amd64 -> x86_64, arm64 -> aarch64 + case "$arch" in + amd64) arch=x86_64 ;; + arm64) arch=aarch64 ;; + esac + + # extract digest and location from template using arch + digest="$(yq ".images | map(select(.arch == \"$arch\")) | .[0].digest // \"\"" "$template")" + location="$(yq ".images | map(select(.arch == \"$arch\")) | .[0].location // \"\"" "$template")" + test -n "$location" || exit 1 + + # path to cache + if command -v sha256sum > /dev/null; then + sha256="$(echo -n "$location" | sha256sum | cut -d' ' -f1)" + elif command -v shasum > /dev/null; then + sha256="$(echo -n "$location" | shasum -a 256 | cut -d' ' -f1)" + else + echo "sha256sum or shasum not found" >&2 + exit 1 + fi + echo "path=.download/by-url-sha256/$sha256" >> "$GITHUB_OUTPUT" + + # key for cache + key="${digest:+image-$digest}" + # fallback to os and hash of template file if digest not found + key="${key:-${{ runner.os }}-${{ hashFiles(inputs.template) }}}" + echo "key=$key" >> "$GITHUB_OUTPUT" + shell: bash + + - name: "Cache ${{ steps.cache-params-from-template.outputs.path }}" + # avoid using `~` in path that will be expanded to platform specific home directory + uses: actions/cache@v4 + with: + path: ${{ steps.cache-params-from-template.outputs.path }} + key: ${{ steps.cache-params-from-template.outputs.key }} + enableCrossOsArchive: true + + - name: "Create symbolic link named ${{ steps.detect-platform.outputs.download-dir }} pointing to .download" + run: | + set -eux + [ -d .download ] || mkdir -p .download + path_to_cache=${{ steps.detect-platform.outputs.download-dir }} + mkdir -p $(dirname $path_to_cache) + ln -sfn $PWD/.download $path_to_cache + shell: bash diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index eea519bf499..a6320a9bf94 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -157,12 +157,10 @@ jobs: - uses: actions/setup-go@v5 with: go-version: 1.23.x - - name: Cache ~/Library/Caches/lima/download - uses: actions/cache@v4 + - name: Cache image used by default.yaml + uses: ./.github/actions/setup_cache_for_template with: - path: ~/Library/Caches/lima/download - # hashFiles do not seem to support symlinks - key: ${{ runner.os }}-${{ hashFiles('templates/default.yaml') }} + template: templates/default.yaml - name: Unit tests run: go test -v ./... - name: Make @@ -230,15 +228,14 @@ jobs: - uses: actions/setup-go@v5 with: go-version: 1.23.x - - id: path_for_hashFiles - # It seems that `hashFiles` cannot use `..` as a path component, so generate a normalized path here. - run: echo "NORMALIZED=$(realpath --relative-to=$PWD examples/${{ matrix.template }})" >> "$GITHUB_OUTPUT" - - uses: actions/cache@v4 - with: - path: ~/.cache/lima/download - # hashFiles do not seem to support symlinks - # TODO: more fine-grained cache - key: ${{ runner.os }}-${{ hashFiles(steps.path_for_hashFiles.outputs.NORMALIZED) }} + - name: normalize template path + id: normalize_template_path + # `hashFiles` cannot use `..` as a path component, so generate a normalized path here. + run: echo "NORMALIZED=$(realpath templates/${{ matrix.template }})" >> "$GITHUB_OUTPUT" + - name: Cache image used by ${{ steps.normalize_template_path.outputs.NORMALIZED }} + uses: ./.github/actions/setup_cache_for_template + with: + template: ${{ steps.normalize_template_path.outputs.NORMALIZED }} - name: Make run: make - name: Install @@ -331,12 +328,10 @@ jobs: - uses: actions/setup-go@v5 with: go-version: 1.23.x - - name: Cache ~/Library/Caches/lima/download - uses: actions/cache@v4 + - name: Cache image used by vmnet.yaml + uses: ./.github/actions/setup_cache_for_template with: - path: ~/Library/Caches/lima/download - # hashFiles do not seem to support symlinks - key: ${{ runner.os }}-${{ hashFiles('examples/vmnet.yaml') }} + template: templates/vmnet.yaml - name: Make run: make - name: Install @@ -386,11 +381,10 @@ jobs: - uses: actions/setup-go@v5 with: go-version: 1.23.x - - name: Cache ~/Library/Caches/lima/download - uses: actions/cache@v4 + - name: Cache image used by ${{ matrix.oldver }}/examples/ubuntu-lts.yaml + uses: ./.github/actions/setup_cache_for_template with: - path: ~/Library/Caches/lima/download - key: ${{ runner.os }}-upgrade-${{ matrix.oldver }} + template: https://raw.githubusercontent.com/lima-vm/lima/${{ matrix.oldver }}/examples/ubuntu-lts.yaml - name: Install test dependencies run: brew install qemu bash coreutils - name: Test @@ -420,15 +414,14 @@ jobs: - uses: actions/setup-go@v5 with: go-version: 1.23.x - - id: path_for_hashFiles - # It seems that `hashFiles` cannot use `..` as a path component, so generate a normalized path here. - run: echo "NORMALIZED=$(realpath examples/${{ matrix.template }})" >> "$GITHUB_OUTPUT" - - name: Cache ~/Library/Caches/lima/download - uses: actions/cache@v4 - with: - path: ~/Library/Caches/lima/download - # hashFiles do not seem to support symlinks - key: ${{ runner.os }}-${{ hashFiles(steps.path_for_hashFiles.outputs.NORMALIZED) }} + - name: normalize template path + id: normalize_template_path + # `hashFiles` cannot use `..` as a path component, so generate a normalized path here. + run: echo "NORMALIZED=$(realpath templates/${{ matrix.template }})" >> "$GITHUB_OUTPUT" + - name: Cache image used by ${{ steps.normalize_template_path.outputs.NORMALIZED }} + uses: ./.github/actions/setup_cache_for_template + with: + template: ${{ steps.normalize_template_path.outputs.NORMALIZED }} - name: Make run: make - name: Install diff --git a/hack/debug-cache.sh b/hack/debug-cache.sh index d2f3ba9be72..db04cf1b05d 100755 --- a/hack/debug-cache.sh +++ b/hack/debug-cache.sh @@ -4,7 +4,7 @@ cache_dir="${HOME}/Library/Caches" if [ "$(uname -s)" != "Darwin" ]; then cache_dir="${HOME}/.cache" fi -if [ ! -e "${cache_dir}/lima" ]; then +if [ ! -e "${cache_dir}/lima/download/by-url-sha256" ]; then echo "No cache" exit 0 fi From 99c1247434ecdf08bc8b711682a325d30b97bbab Mon Sep 17 00:00:00 2001 From: Norio Nomura Date: Sat, 3 Aug 2024 23:34:13 +0900 Subject: [PATCH 2/4] setup_cache_for_template: Improve caching mechanism for images and `nerdctl` archives - Change cache key to `url-sha256:$sha256` for caching images without a digest - Include image basename in the cache key - Use `limactl validate --fill` to retrieve nerdctl archive info and set cache if needed - `test.yml`: Change cache configuration to run after `make install` Signed-off-by: Norio Nomura test.yml: Remove `normalize_template_path` since `hashFile` is no longer used. Signed-off-by: Norio Nomura --- .../setup_cache_for_template/action.yml | 111 +++++++++++++++--- .github/workflows/test.yml | 41 +++---- 2 files changed, 110 insertions(+), 42 deletions(-) diff --git a/.github/actions/setup_cache_for_template/action.yml b/.github/actions/setup_cache_for_template/action.yml index 0f439647090..6632e1bce6a 100644 --- a/.github/actions/setup_cache_for_template/action.yml +++ b/.github/actions/setup_cache_for_template/action.yml @@ -7,6 +7,10 @@ inputs: template: description: template yaml file required: true + detect-containerd: + description: detect containerd usage from template by using limactl validate + required: false + default: 'true' runs: using: "composite" steps: @@ -27,10 +31,15 @@ runs: set -eux arch="${{ inputs.arch }}" template="${{ inputs.template }}" + detect_containerd="${{ inputs.detect-containerd }}" + if [[ $detect_containerd == "true" ]] && ! command -v limactl &>/dev/null; then + echo "containerd detection is disabled because limactl is not found" >&2 + detect_containerd="false" + fi case "$template" in https://*) tmp_yaml=$(mktemp -d)/template.yaml - curl -sSLf "$template" > $tmp_yaml || exit 1 + curl -sSLf "$template" > "$tmp_yaml" || exit 1 template=$tmp_yaml ;; *) @@ -47,27 +56,85 @@ runs: arm64) arch=aarch64 ;; esac - # extract digest and location from template using arch - digest="$(yq ".images | map(select(.arch == \"$arch\")) | .[0].digest // \"\"" "$template")" - location="$(yq ".images | map(select(.arch == \"$arch\")) | .[0].location // \"\"" "$template")" - test -n "$location" || exit 1 - - # path to cache - if command -v sha256sum > /dev/null; then - sha256="$(echo -n "$location" | sha256sum | cut -d' ' -f1)" - elif command -v shasum > /dev/null; then - sha256="$(echo -n "$location" | shasum -a 256 | cut -d' ' -f1)" + # extract digest, location and size by parsing template using arch + readonly yq_filter=" + [ + .images | map(select(.arch == \"${arch}\")) | [.[0,1].location, .[0,1].digest], + .containerd|[.system or .user], + .containerd.archives | map(select(.arch == \"${arch}\")) | [.[0].location, .[0].digest] + ]|flatten|.[] + " + if [[ $detect_containerd == "true" ]]; then + parsed=$(LIMA_HOME=$(mktemp -d) limactl validate "$template" --fill 2>/dev/null | yq eval "${yq_filter}") else - echo "sha256sum or shasum not found" >&2 + parsed=$(yq eval "${yq_filter}" "$template") + fi + # macOS earlier than 15.0 uses bash 3.2.57, which does not support readarray -t + # readarray -t arr <<<"$parsed" + while IFS= read -r line; do arr+=("$line"); done <<<"${parsed}" + readonly locations=("${arr[@]:0:2}") digests=("${arr[@]:2:2}") + readonly containerd="${arr[4]}" containerd_location="${arr[5]}" containerd_digest="${arr[6]}" + for ((i = 0; i < ${#locations[@]}; i++)); do + [[ ${locations[i]} != "null" ]] || continue + http_code=$(curl -sIL -w "%{http_code}" "${locations[i]}" -o /dev/null) + if [[ ${http_code} -eq 200 ]]; then + location=${locations[i]} + digest=${digests[i]} + break + fi + done + if [[ -z ${location} ]]; then + echo "Failed to get the image location for ${template}" >&2 exit 1 fi - echo "path=.download/by-url-sha256/$sha256" >> "$GITHUB_OUTPUT" + + function location_to_sha256() { + local location=$1 + if command -v sha256sum > /dev/null; then + sha256="$(echo -n "$location" | sha256sum | cut -d' ' -f1)" + elif command -v shasum > /dev/null; then + sha256="$(echo -n "$location" | shasum -a 256 | cut -d' ' -f1)" + else + echo "sha256sum or shasum not found" >&2 + exit 1 + fi + echo "$sha256" + } + + function location_to_cache_path() { + local location=$1 + sha256=$(location_to_sha256 "$location") && echo ".download/by-url-sha256/$sha256" + } + + # path to cache + image_cache_path=$(location_to_cache_path "$location") + echo "path=$image_cache_path" >> "$GITHUB_OUTPUT" # key for cache - key="${digest:+image-$digest}" - # fallback to os and hash of template file if digest not found - key="${key:-${{ runner.os }}-${{ hashFiles(inputs.template) }}}" + image_basename=$(basename "$location") + if [[ "$digest" != "null" ]]; then + key="image:$image_basename-$digest" + else + # use sha256 of location as key if digest is not available + key="image:$image_basename-url-sha256:$(location_to_sha256 "$location")" + fi echo "key=$key" >> "$GITHUB_OUTPUT" + + # containerd path and key for cache + if [[ $containerd == "true" && "$containerd_location" != "null" ]]; then + containerd_basename=$(basename "$containerd_location") + if [[ ${containerd_digest} != "null" ]]; then + containerd_key="containerd:$containerd_basename-$containerd_digest" + else + containerd_key="containerd:$containerd_basename-url-sha256:$(sha256 "$containerd_location")" + fi + echo "containerd-key=$containerd_key" >> "$GITHUB_OUTPUT" + containerd_cache_path=$(location_to_cache_path "$containerd_location") + echo "containerd-path=$containerd_cache_path" >> "$GITHUB_OUTPUT" + else + echo "containerd-key=" >> "$GITHUB_OUTPUT" + echo "containerd-path=" >> "$GITHUB_OUTPUT" + fi shell: bash - name: "Cache ${{ steps.cache-params-from-template.outputs.path }}" @@ -78,11 +145,19 @@ runs: key: ${{ steps.cache-params-from-template.outputs.key }} enableCrossOsArchive: true + - name: "Cache ${{ steps.cache-params-from-template.outputs.containerd-key }}" + if: ${{ steps.cache-params-from-template.outputs.containerd-key != '' }} + uses: actions/cache@v4 + with: + path: ${{ steps.cache-params-from-template.outputs.containerd-path }} + key: ${{ steps.cache-params-from-template.outputs.containerd-key }} + enableCrossOsArchive: true + - name: "Create symbolic link named ${{ steps.detect-platform.outputs.download-dir }} pointing to .download" run: | set -eux [ -d .download ] || mkdir -p .download path_to_cache=${{ steps.detect-platform.outputs.download-dir }} - mkdir -p $(dirname $path_to_cache) - ln -sfn $PWD/.download $path_to_cache + mkdir -p "$(dirname "$path_to_cache")" + ln -sfn "$PWD/.download" "$path_to_cache" shell: bash diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a6320a9bf94..d0bb8a271fc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -157,16 +157,16 @@ jobs: - uses: actions/setup-go@v5 with: go-version: 1.23.x - - name: Cache image used by default.yaml - uses: ./.github/actions/setup_cache_for_template - with: - template: templates/default.yaml - name: Unit tests run: go test -v ./... - name: Make run: make - name: Install run: make install + - name: Cache image used by default.yaml + uses: ./.github/actions/setup_cache_for_template + with: + template: templates/default.yaml - name: Validate templates run: find -L templates -name '*.yaml' | xargs limactl validate - name: Install test dependencies @@ -228,18 +228,14 @@ jobs: - uses: actions/setup-go@v5 with: go-version: 1.23.x - - name: normalize template path - id: normalize_template_path - # `hashFiles` cannot use `..` as a path component, so generate a normalized path here. - run: echo "NORMALIZED=$(realpath templates/${{ matrix.template }})" >> "$GITHUB_OUTPUT" - - name: Cache image used by ${{ steps.normalize_template_path.outputs.NORMALIZED }} - uses: ./.github/actions/setup_cache_for_template - with: - template: ${{ steps.normalize_template_path.outputs.NORMALIZED }} - name: Make run: make - name: Install run: sudo make install + - name: Cache image used by templates/${{ matrix.template }} + uses: ./.github/actions/setup_cache_for_template + with: + template: templates/${{ matrix.template }} - name: Install test dependencies run: | sudo apt-get update @@ -328,14 +324,14 @@ jobs: - uses: actions/setup-go@v5 with: go-version: 1.23.x - - name: Cache image used by vmnet.yaml - uses: ./.github/actions/setup_cache_for_template - with: - template: templates/vmnet.yaml - name: Make run: make - name: Install run: make install + - name: Cache image used by vmnet.yaml + uses: ./.github/actions/setup_cache_for_template + with: + template: templates/vmnet.yaml - name: Install test dependencies run: brew install qemu bash coreutils iperf3 - name: Install socket_vmnet @@ -385,6 +381,7 @@ jobs: uses: ./.github/actions/setup_cache_for_template with: template: https://raw.githubusercontent.com/lima-vm/lima/${{ matrix.oldver }}/examples/ubuntu-lts.yaml + detect-containerd: "false" - name: Install test dependencies run: brew install qemu bash coreutils - name: Test @@ -414,18 +411,14 @@ jobs: - uses: actions/setup-go@v5 with: go-version: 1.23.x - - name: normalize template path - id: normalize_template_path - # `hashFiles` cannot use `..` as a path component, so generate a normalized path here. - run: echo "NORMALIZED=$(realpath templates/${{ matrix.template }})" >> "$GITHUB_OUTPUT" - - name: Cache image used by ${{ steps.normalize_template_path.outputs.NORMALIZED }} - uses: ./.github/actions/setup_cache_for_template - with: - template: ${{ steps.normalize_template_path.outputs.NORMALIZED }} - name: Make run: make - name: Install run: make install + - name: Cache image used by templates/${{ matrix.template }} + uses: ./.github/actions/setup_cache_for_template + with: + template: templates/${{ matrix.template }} - name: Install test dependencies run: brew install bash coreutils jq - name: Uninstall qemu From f8acc9d23981585189ef82012217ce77f4c99a8e Mon Sep 17 00:00:00 2001 From: Norio Nomura Date: Sun, 4 Aug 2024 00:38:02 +0900 Subject: [PATCH 3/4] calculate-cache.sh: calculate cache size usage for new caching method versus previous method This was created in response to the question at https://github.com/lima-vm/lima/pull/2508#discussion_r1699798651. Signed-off-by: Norio Nomura calculate-cache.sh: `shfmt -s` Signed-off-by: Norio Nomura calculate-cache.sh: extract `runs_on` and `template` from workflow file Signed-off-by: Norio Nomura calculate-cache.sh: add DEBUG=1 to save collected information as yaml Signed-off-by: Norio Nomura calculate-cache.sh: add descriptions and output examples to functions Signed-off-by: Norio Nomura calculate-cache.sh: use `--jq` instead of `| jq` Signed-off-by: Norio Nomura calculate-cache.sh: add some descriptions Signed-off-by: Norio Nomura calculate-cache.sh: resolve shfmt issue Signed-off-by: Norio Nomura calculate-cache.sh: mention response cache file Signed-off-by: Norio Nomura calculate-cache.sh: add `select(.steps)|` Signed-off-by: Norio Nomura --- hack/calculate-cache.sh | 299 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 299 insertions(+) create mode 100755 hack/calculate-cache.sh diff --git a/hack/calculate-cache.sh b/hack/calculate-cache.sh new file mode 100755 index 00000000000..9c7902a42b6 --- /dev/null +++ b/hack/calculate-cache.sh @@ -0,0 +1,299 @@ +#!/usr/bin/env bash +# This script calculates the expected content size, actual cached size, and cache-keys used in caching method before and after +# implementation in https://github.com/lima-vm/lima/pull/2508 +# +# Answer to the question in https://github.com/lima-vm/lima/pull/2508#discussion_r1699798651 + +# usage: [DEBUG=1] ./hack/calculate-cache.sh +# DEBUG=1 will save the collected information in .calculate-cache-collected-info-{before,after}.yaml +# +# This script does: +# 1. extracts `runs_on` and `template` from workflow file (.github/workflows/test.yml) +# 2. check each template for image and nerdctl +# 3. detect size of image and nerdctl (responses from remote are cached for faster iteration) +# save the response in .calculate-cache-response-cache.yaml +# 4. print content size, actual cache size (if available), by cache key +# +# The major differences for reducing cache usage are as follows: +# - it is now cached `~/.cache/lima/download/by-url-sha256/$sha256` instead of caching `~/.cache/lima/download` +# - the cache keys are now based on the image digest and nerdctl digest instead of the template file's hash +# - enables the use of cache regardless of the operating system used to execute CI. +# +# The script requires the following commands: +# - gh: GitHub CLI. +# Using to get the cache information +# - jq: Command-line JSON processor +# Parse the workflow file and print runs-on and template. +# Parse output from gh cache list +# Calculate the expected content size, actual cached size, and cache-keys used. +# - limactl: lima CLI. +# Using to validate the template file for getting nerdctl location and digest. +# - sha256sum: Print or check SHA256 (256-bit) checksums +# - xxd: make a hexdump or do the reverse. +# Using to simulate the 'hashFile()' function in the workflow. +# - yq: Command-line YAML processor. +# Parse the template file for image and nerdctl location, digest, and size. +# Parse the cache response file for the cache. +# Convert the collected information to JSON. + +set -u -o pipefail + +required_commands=(gh jq limactl sha256sum xxd yq) +for cmd in "${required_commands[@]}"; do + if ! command -v "${cmd}" &>/dev/null; then + echo "${cmd} is required. Please install it" >&2 + exit 1 + fi +done + +# current workflow uses x86_64 only +arch=x86_64 + +LIMA_HOME=$(mktemp -d) +export LIMA_HOME + +# parse the workflow file and print runs-on and template +# e.g. +# ```console +# $ print_runs_on_template_from_workflow .github/workflows/test.yml +# macos-12 templates/default.yaml +# ubuntu-24.04 templates/alpine.yaml +# ubuntu-24.04 templates/debian.yaml +# ubuntu-24.04 templates/fedora.yaml +# ubuntu-24.04 templates/archlinux.yaml +# ubuntu-24.04 templates/opensuse.yaml +# ubuntu-24.04 templates/experimental/net-user-v2.yaml +# ubuntu-24.04 templates/experimental/9p.yaml +# ubuntu-24.04 templates/docker.yaml +# ubuntu-24.04 templates/../hack/test-templates/alpine-9p-writable.yaml +# ubuntu-24.04 templates/../hack/test-templates/test-misc.yaml +# macos-12 templates/vmnet.yaml +# macos-12 https://raw.githubusercontent.com/lima-vm/lima/v0.15.1/examples/ubuntu-lts.yaml +# macos-13 templates/experimental/vz.yaml +# macos-13 templates/fedora.yaml +# ``` +function print_runs_on_template_from_workflow() { + yq -o=j "$1" | jq -r ' + "./.github/actions/setup_cache_for_template" as $action | + "\\$\\{\\{\\s*(?\\S*)\\s*\\}\\}" as $pattern | + .jobs | map_values(select(.steps)| + ."runs-on" as $runs_on | + { + template: .steps | map_values(select(.uses == $action)) | first |.with.template, + matrix: .strategy.matrix + } | select(.template) | + . + { path: .template | (if test($pattern) then sub(".*\($pattern).*";"\(.path)")|split(".") else null end) } | + ( + .template as $template| + if .path then + getpath(.path)|map(. as $item|$template|sub($pattern;$item)) + else + [$template] + end + ) | map("\($runs_on)\t\(.)") + + ) | flatten |.[] + ' +} + +# returns the OS name from the runner equivalent to the expression `${{ runner.os }}` in the workflow +# e.g. +# ```console +# $ runner_os_from_runner "macos-12" +# macOS +# $ runner_os_from_runner "ubuntu-24.04" +# Linux +# ``` +function runner_os_from_runner() { + # shellcheck disable=SC2249 + case "$1" in + macos*) + echo macOS + ;; + ubuntu*) + echo Linux + ;; + esac +} + +# check the remote location and return the http code and size. +# The result is cached in .calculate-cache-response-cache.yaml +# e.g. +# ```console +# $ check_location "https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img" +# 200 585498624 +# ``` +function check_location() { + location="$1" + readonly cache_file="./.calculate-cache-response-cache.yaml" + # check response_cache.yaml for the cache + if [[ -f ${cache_file} ]]; then + cached=$(yq -e eval ".[\"${location}\"]" "${cache_file}" 2>/dev/null) && echo "${cached}" && return + else + touch "${cache_file}" + fi + http_code_and_size=$(curl -sIL -w "%{http_code} %header{Content-Length}" "${location}" -o /dev/null) + yq eval ".[\"${location}\"] = \"${http_code_and_size}\"" -i "${cache_file}" + echo "${http_code_and_size}" +} + +# print image location, digest, size, hash, containerd, containerd_location, containerd_digest, containerd_size from the template +# e.g. +# ```console +# $ print_location_digest_size_hash_from_template "templates/default.yaml" +# https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img sha256:d2377667ea95222330ca2287817403c85178dad397e9fed768a9b4aec79d2a7f 585498624 49aa50a4872ded07ebf657c0eaf9e44ecc0c174d033a97c537ecd270f35b462f true https://github.com/containerd/nerdctl/releases/download/v1.7.6/nerdctl-full-1.7.6-linux-amd64.tar.gz sha256:2c841e097fcfb5a1760bd354b3778cb695b44cd01f9f271c17507dc4a0b25606 237465717 +# ``` +function print_location_digest_size_hash_from_template() { + readonly template=$1 + case "${template}" in + http*) + template_yaml=$(curl -sSL "${template}") + ;; + *) + template_yaml=$(<"${template}") + ;; + esac + readonly yq_filter=" + [ + .images | map(select(.arch == \"${arch}\")) | [.[0,1].location, .[0,1].digest], + .containerd|[.system or .user], + .containerd.archives | map(select(.arch == \"${arch}\")) | [.[0].location, .[0].digest] + ]|flatten|.[] + " + if command -v limactl &>/dev/null; then + parsed=$(limactl validate <(echo "${template_yaml}") --fill 2>/dev/null | yq eval "${yq_filter}") + else + parsed=$(yq eval "${yq_filter}" <<<"${template_yaml}") + fi + # macOS earlier than 15.0 uses bash 3.2.57, which does not support readarray -t + # readarray -t arr <<<"${parsed}" + while IFS= read -r line; do arr+=("${line}"); done <<<"${parsed}" + readonly locations=("${arr[@]:0:2}") digests=("${arr[@]:2:2}") + readonly containerd="${arr[4]}" containerd_location="${arr[5]}" containerd_digest="${arr[6]}" + declare location digest size hash + for ((i = 0; i < ${#locations[@]}; i++)); do + [[ ${locations[i]} != null ]] || continue + http_code_and_size=$(check_location "${locations[i]}") + read -r http_code size <<<"${http_code_and_size}" + if [[ ${http_code} -eq 200 ]]; then + location=${locations[i]} + digest=${digests[i]} + break + fi + done + if [[ -z ${location} ]]; then + echo "Failed to get the image location for ${template}" >&2 + return 1 + fi + hash=$(sha256sum <<<"${template_yaml}" | cut -d' ' -f1 | xxd -r -p | sha256sum | cut -d' ' -f1) + declare containerd_size + containerd_http_code_and_size=$(check_location "${containerd_location}") + read -r _containerd_http_code containerd_size <<<"${containerd_http_code_and_size}" + echo "${location} ${digest} ${size} ${hash} ${containerd} ${containerd_location} ${containerd_digest} ${containerd_size}" +} + +# format first column to MiB +# e.g. +# ```console +# $ echo 585498624 | size_to_mib +# 558.38 MiB +# ``` +function size_to_mib() { + awk ' + function mib(size) { return sprintf("%7.2f MiB", size / 1024 / 1024) } + int($1)>0{ $1=" "mib($1) } + int($2)>0{ $2=mib($2) } + int($2)==0 && NF>1{ $2="<>" } + { print } + ' +} + +# actual_cache_sizes=$(gh cache list --json key,createdAt,sizeInBytes|jq '[.[]|{"key":.key,"value":.sizeInBytes}]|from_entries') +# e.g. +# ```console +# $ echo "${actual_cache_sizes}" +# { +# "Linux-1c3b2791d52735d916dc44767c745c2319eb7cae74af71bbf45ddb268f42fc1d": 810758533, +# "Linux-231c66957fc2cdb18ea10e63f60770049026e29051ecd6598fc390b60d6a4fa6": 633036717, +# "Linux-3b906d46fa532e3bc348c35fc8e7ede6c69f0b27032046ee2cbb56d4022d1146": 574242367, +# "Linux-69a547b760dbf1650007ed541408474237bc611704077214adcac292de556444": 70310855, +# "Linux-7782f8b4ff8cd378377eb79f8d61c9559b94bbd0c11d19eb380ee7bda19af04e": 494141177, +# "Linux-8812aedfe81b4456d421645928b493b1f2f88aff04b7f3171207492fd44cd189": 812730766, +# "Linux-caa7d8af214d55ad8902e82d5918e61573f3d6795d2b5ad9a35305e26fa0e6a9": 754723892, +# "Linux-colima-v0.6.5": 226350335, +# "Linux-de83bce0608d787e3c68c7a31c5fab2b6d054320fd7bf633a031845e2ee03414": 810691197, +# "Linux-eb88a19dfcf2fb98278e7c7e941c143737c6d7cd8950a88f58e04b4ee7cef1bc": 570625794, +# "Linux-f88f0b3b678ff6432386a42bdd27661133c84a36ad29f393da407c871b0143eb": 68490954, +# "golangci-lint.cache-Linux-2850-74615231540133417fd618c72e37be92c5d3b3ad": 2434144, +# "macOS-231c66957fc2cdb18ea10e63f60770049026e29051ecd6598fc390b60d6a4fa6": 633020464, +# "macOS-49aa50a4872ded07ebf657c0eaf9e44ecc0c174d033a97c537ecd270f35b462f": 813179462, +# "macOS-8f37f663956af5f743f0f99ab973729b6a02f200ebfac7a3a036eff296550732": 810756770, +# "macOS-ef5509b5d4495c8c3590442ee912ad1c9a33f872dc4a29421c524fc1e2103b59": 813179476, +# "macOS-upgrade-v0.15.1": 1157814690, +# "setup-go-Linux-ubuntu20-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1015518352, +# "setup-go-Linux-ubuntu20-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 936433302, +# "setup-go-Linux-ubuntu24-go-1.22.6-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1090001859, +# "setup-go-Linux-ubuntu24-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 526146768, +# "setup-go-Windows-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1155374040, +# "setup-go-Windows-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 1056433137, +# "setup-go-macOS-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1060919942, +# "setup-go-macOS-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 982139209 +# } +actual_cache_sizes=$( + gh cache list --json key,createdAt,sizeInBytes \ + --jq 'sort_by(.createdAt)|reverse|unique_by(.key)|sort_by(.key)|map({"key":.key,"value":.sizeInBytes})|from_entries' +) + +workflows=( + .github/workflows/test.yml +) + +# shellcheck disable=SC2016 +echo "=> compare expected content size, actual cached size, and cache-keys used before and after the change in https://github.com/lima-vm/lima/pull/2508" +# iterate over before and after +for cache_method in before after; do + echo "==> ${cache_method}" + echo "content-size actual-size cache-key" + output_yaml=$( + for workflow in "${workflows[@]}"; do + print_runs_on_template_from_workflow "${workflow}" + done | while IFS=$'\t' read -r runner template; do + runner_os=$(runner_os_from_runner "${runner}") + location_digest_size_hash=$(print_location_digest_size_hash_from_template "${template}") || continue + read -r location digest size hash containerd containerd_location containerd_digest containerd_size <<<"${location_digest_size_hash}" + if [[ ${cache_method} != after ]]; then + key=${runner_os}-${hash} + elif [[ ${digest} == null ]]; then + key=image:$(basename "${location}")-url-sha256:$(echo -n "${location}" | sha256sum | cut -d' ' -f1) + else + key=image:$(basename "${location}")-${digest} + fi + if [[ ${containerd} == true ]]; then + if [[ ${cache_method} != after ]]; then + # previous caching method packages the containerd archive with the image + size=$((size + containerd_size)) + else + # new caching method packages the containerd archive separately + containerd_key=containerd:$(basename "${containerd_location}")-${containerd_digest} + printf -- "- key: %s\n template: %s\n location: %s\n digest: %s\n size: %s\n" \ + "${containerd_key}" "${template}" "${containerd_location}" "${containerd_digest}" "${containerd_size}" + fi + fi + printf -- "- key: %s\n template: %s\n location: %s\n digest: %s\n size: %s\n" \ + "${key}" "${template}" "${location}" "${digest}" "${size}" + done + ) + output_json=$(yq -o=j . <<<"${output_yaml}") + + # print size key + jq --argjson actual_size "${actual_cache_sizes}" -r 'unique_by(.key)|sort_by(.key)|.[]|[.size, $actual_size[.key] // 0, .key]|@tsv' <<<"${output_json}" | size_to_mib + # total + echo "------------" + jq '[unique_by(.key)|.[]|.size]|add' <<<"${output_json}" | size_to_mib + # save the collected information as yaml if DEBUG is set + if [[ -n ${DEBUG:+1} ]]; then + cat <<<"${output_yaml}" >".calculate-cache-collected-info-${cache_method}.yaml" + echo "Saved the collected information in .calculate-cache-collected-info-${cache_method}.yaml" + fi + echo "" +done From 5058106bf5f9e15f2fdaf56333c1b4d576066559 Mon Sep 17 00:00:00 2001 From: Norio Nomura Date: Tue, 20 Aug 2024 19:07:59 +0900 Subject: [PATCH 4/4] setup_cache_for_template: add `kernel` and `initrd` cache support refactor some common codes from `setup_cache_for_template` and `calculate-cache.sh` into `cache-common-inc.sh` Signed-off-by: Norio Nomura --- .../setup_cache_for_template/action.yml | 135 ++------ .github/workflows/test.yml | 19 +- hack/cache-common-inc.sh | 323 ++++++++++++++++++ hack/calculate-cache.sh | 128 ++----- 4 files changed, 388 insertions(+), 217 deletions(-) create mode 100755 hack/cache-common-inc.sh diff --git a/.github/actions/setup_cache_for_template/action.yml b/.github/actions/setup_cache_for_template/action.yml index 6632e1bce6a..19c60d80779 100644 --- a/.github/actions/setup_cache_for_template/action.yml +++ b/.github/actions/setup_cache_for_template/action.yml @@ -7,10 +7,6 @@ inputs: template: description: template yaml file required: true - detect-containerd: - description: detect containerd usage from template by using limactl validate - required: false - default: 'true' runs: using: "composite" steps: @@ -29,120 +25,35 @@ runs: id: cache-params-from-template run: | set -eux - arch="${{ inputs.arch }}" - template="${{ inputs.template }}" - detect_containerd="${{ inputs.detect-containerd }}" - if [[ $detect_containerd == "true" ]] && ! command -v limactl &>/dev/null; then - echo "containerd detection is disabled because limactl is not found" >&2 - detect_containerd="false" - fi - case "$template" in - https://*) - tmp_yaml=$(mktemp -d)/template.yaml - curl -sSLf "$template" > "$tmp_yaml" || exit 1 - template=$tmp_yaml - ;; - *) - test -f "$template" || exit 1 - ;; - esac - - # detect arch from template if not provided - arch="${arch:-$(yq '.arch // ""' "$template")}" - arch="${arch:-$(uname -m)}" - # normalize arch. amd64 -> x86_64, arm64 -> aarch64 - case "$arch" in - amd64) arch=x86_64 ;; - arm64) arch=aarch64 ;; - esac - - # extract digest, location and size by parsing template using arch - readonly yq_filter=" - [ - .images | map(select(.arch == \"${arch}\")) | [.[0,1].location, .[0,1].digest], - .containerd|[.system or .user], - .containerd.archives | map(select(.arch == \"${arch}\")) | [.[0].location, .[0].digest] - ]|flatten|.[] - " - if [[ $detect_containerd == "true" ]]; then - parsed=$(LIMA_HOME=$(mktemp -d) limactl validate "$template" --fill 2>/dev/null | yq eval "${yq_filter}") - else - parsed=$(yq eval "${yq_filter}" "$template") - fi - # macOS earlier than 15.0 uses bash 3.2.57, which does not support readarray -t - # readarray -t arr <<<"$parsed" - while IFS= read -r line; do arr+=("$line"); done <<<"${parsed}" - readonly locations=("${arr[@]:0:2}") digests=("${arr[@]:2:2}") - readonly containerd="${arr[4]}" containerd_location="${arr[5]}" containerd_digest="${arr[6]}" - for ((i = 0; i < ${#locations[@]}; i++)); do - [[ ${locations[i]} != "null" ]] || continue - http_code=$(curl -sIL -w "%{http_code}" "${locations[i]}" -o /dev/null) - if [[ ${http_code} -eq 200 ]]; then - location=${locations[i]} - digest=${digests[i]} - break - fi - done - if [[ -z ${location} ]]; then - echo "Failed to get the image location for ${template}" >&2 - exit 1 - fi - - function location_to_sha256() { - local location=$1 - if command -v sha256sum > /dev/null; then - sha256="$(echo -n "$location" | sha256sum | cut -d' ' -f1)" - elif command -v shasum > /dev/null; then - sha256="$(echo -n "$location" | shasum -a 256 | cut -d' ' -f1)" - else - echo "sha256sum or shasum not found" >&2 - exit 1 - fi - echo "$sha256" - } - - function location_to_cache_path() { - local location=$1 - sha256=$(location_to_sha256 "$location") && echo ".download/by-url-sha256/$sha256" - } - - # path to cache - image_cache_path=$(location_to_cache_path "$location") - echo "path=$image_cache_path" >> "$GITHUB_OUTPUT" + source hack/cache-common-inc.sh + print_cache_informations_from_template "${{ inputs.template }}" "${{ inputs.arch }}" >> "$GITHUB_OUTPUT" + shell: bash - # key for cache - image_basename=$(basename "$location") - if [[ "$digest" != "null" ]]; then - key="image:$image_basename-$digest" - else - # use sha256 of location as key if digest is not available - key="image:$image_basename-url-sha256:$(location_to_sha256 "$location")" - fi - echo "key=$key" >> "$GITHUB_OUTPUT" + - name: "Cache ${{ steps.cache-params-from-template.outputs.image-path }}" + if: ${{ steps.cache-params-from-template.outputs.image-key != '' }} + # avoid using `~` in path that will be expanded to platform specific home directory + uses: actions/cache@v4 + with: + path: ${{ steps.cache-params-from-template.outputs.image-path }} + key: ${{ steps.cache-params-from-template.outputs.image-key }} + enableCrossOsArchive: true - # containerd path and key for cache - if [[ $containerd == "true" && "$containerd_location" != "null" ]]; then - containerd_basename=$(basename "$containerd_location") - if [[ ${containerd_digest} != "null" ]]; then - containerd_key="containerd:$containerd_basename-$containerd_digest" - else - containerd_key="containerd:$containerd_basename-url-sha256:$(sha256 "$containerd_location")" - fi - echo "containerd-key=$containerd_key" >> "$GITHUB_OUTPUT" - containerd_cache_path=$(location_to_cache_path "$containerd_location") - echo "containerd-path=$containerd_cache_path" >> "$GITHUB_OUTPUT" - else - echo "containerd-key=" >> "$GITHUB_OUTPUT" - echo "containerd-path=" >> "$GITHUB_OUTPUT" - fi - shell: bash + - name: "Cache ${{ steps.cache-params-from-template.outputs.kernel-path }}" + if: ${{ steps.cache-params-from-template.outputs.kernel-key != '' }} + # avoid using `~` in path that will be expanded to platform specific home directory + uses: actions/cache@v4 + with: + path: ${{ steps.cache-params-from-template.outputs.kernel-path }} + key: ${{ steps.cache-params-from-template.outputs.kernel-key }} + enableCrossOsArchive: true - - name: "Cache ${{ steps.cache-params-from-template.outputs.path }}" + - name: "Cache ${{ steps.cache-params-from-template.outputs.initrd-path }}" + if: ${{ steps.cache-params-from-template.outputs.initrd-key != '' }} # avoid using `~` in path that will be expanded to platform specific home directory uses: actions/cache@v4 with: - path: ${{ steps.cache-params-from-template.outputs.path }} - key: ${{ steps.cache-params-from-template.outputs.key }} + path: ${{ steps.cache-params-from-template.outputs.initrd-path }} + key: ${{ steps.cache-params-from-template.outputs.initrd-key }} enableCrossOsArchive: true - name: "Cache ${{ steps.cache-params-from-template.outputs.containerd-key }}" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d0bb8a271fc..a77056b4f9d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -163,10 +163,6 @@ jobs: run: make - name: Install run: make install - - name: Cache image used by default.yaml - uses: ./.github/actions/setup_cache_for_template - with: - template: templates/default.yaml - name: Validate templates run: find -L templates -name '*.yaml' | xargs limactl validate - name: Install test dependencies @@ -180,11 +176,15 @@ jobs: # GitHub runners seem to have lima installed by brew already; we don't want/need it time brew uninstall --ignore-dependencies lima colima time brew install qemu bash coreutils curl jq - - name: "Show cache" - run: ./hack/debug-cache.sh - name: "Inject `no_timer_check` to kernel cmdline" # workaround to https://github.com/lima-vm/lima/issues/84 run: ./hack/inject-cmdline-to-template.sh templates/default.yaml no_timer_check + - name: Cache image used by default.yaml + uses: ./.github/actions/setup_cache_for_template + with: + template: templates/default.yaml + - name: "Show cache" + run: ./hack/debug-cache.sh - name: "Test default.yaml" uses: nick-invision/retry@v3 with: @@ -328,6 +328,9 @@ jobs: run: make - name: Install run: make install + - name: "Inject `no_timer_check` to kernel cmdline" + # workaround to https://github.com/lima-vm/lima/issues/84 + run: ./hack/inject-cmdline-to-template.sh templates/vmnet.yaml no_timer_check - name: Cache image used by vmnet.yaml uses: ./.github/actions/setup_cache_for_template with: @@ -350,9 +353,6 @@ jobs: - name: Unit test (pkg/networks) with socket_vmnet # Set -count=1 to disable cache run: go test -v -count=1 ./pkg/networks/... - - name: "Inject `no_timer_check` to kernel cmdline" - # workaround to https://github.com/lima-vm/lima/issues/84 - run: ./hack/inject-cmdline-to-template.sh templates/vmnet.yaml no_timer_check - name: Test socket_vmnet uses: nick-invision/retry@v3 with: @@ -381,7 +381,6 @@ jobs: uses: ./.github/actions/setup_cache_for_template with: template: https://raw.githubusercontent.com/lima-vm/lima/${{ matrix.oldver }}/examples/ubuntu-lts.yaml - detect-containerd: "false" - name: Install test dependencies run: brew install qemu bash coreutils - name: Test diff --git a/hack/cache-common-inc.sh b/hack/cache-common-inc.sh new file mode 100755 index 00000000000..f5bfbfc9796 --- /dev/null +++ b/hack/cache-common-inc.sh @@ -0,0 +1,323 @@ +#!/usr/bin/env bash + +# e.g. +# ```console +# $ download_template_if_needed templates/default.yaml +# templates/default.yaml +# $ download_template_if_needed https://raw.githubusercontent.com/lima-vm/lima/v0.15.1/examples/ubuntu-lts.yaml +# /tmp/tmp.1J9Q6Q/template.yaml +# ``` +function download_template_if_needed() { + local template="$1" + case "${template}" in + https://*) + tmp_yaml=$(mktemp -d)/template.yaml + curl -sSLf "${template}" >"${tmp_yaml}" || return + echo "${tmp_yaml}" + ;; + *) + test -f "${template}" || return + echo "${template}" + ;; + esac +} + +# e.g. +# ```console +# $ print_image_locations_for_arch_from_template templates/default.yaml +# https://cloud-images.ubuntu.com/releases/24.04/release-20240809/ubuntu-24.04-server-cloudimg-arm64.img sha256:2e0c90562af1970ffff220a5073a7830f4acc2aad55b31593003e8c363381e7a +# https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-arm64.img null +# ``` +function print_image_locations_for_arch_from_template() { + local template arch + template=$(download_template_if_needed "$1") || return + local -r template=${template} + arch=$(detect_arch "${template}" "${2:-}") || return + local -r arch=${arch} + + # extract digest, location and size by parsing template using arch + local -r yq_filter="[.images | map(select(.arch == \"${arch}\")) | .[].location] | .[]" + yq eval "${yq_filter}" "${template}" +} + +# e.g. +# ```console +# $ detect_arch templates/default.yaml +# x86_64 +# $ detect_arch templates/default.yaml arm64 +# aarch64 +# ``` +function detect_arch() { + local template arch + template=$(download_template_if_needed "$1") || return + local -r template=${template} + + arch="${2:-$(yq '.arch // ""' "${template}")}" + arch="${arch:-$(uname -m)}" + # normalize arch. amd64 -> x86_64, arm64 -> aarch64 + case "${arch}" in + amd64 | x86_64) arch=x86_64 ;; + aarch64 | arm64) arch=aarch64 ;; + *) ;; + esac + echo "${arch}" +} + +# e.g. +# ```console +# $ print_image_locations_for_arch_from_template templates/default.yaml|print_valid_image_index +# 0 +# ``` +function print_valid_image_index() { + local index=0 + while read -r location; do + [[ ${location} != "null" ]] || continue + http_code_and_size=$(check_location_with_cache "${location}") + read -r http_code _size <<<"${http_code_and_size}" + if [[ ${http_code} -eq 200 ]]; then + echo "${index}" + return + fi + index=$((index + 1)) + done + echo "Failed to get the valid image location" >&2 + return 1 +} + +# e.g. +# ```console +# $ size_from_location "https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img" +# 585498624 +# ``` +function size_from_location() { + ( + set -o pipefail + local location=$1 + check_location "${location}" | cut -d' ' -f2 + ) +} + +# Check the remote location and return the http code and size. +# If GITHUB_ACTIONS is true, the result is not cached. +# e.g. +# ```console +# $ check_location "https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img" +# 200 585498624 +# ``` +function check_location() { + # shellcheck disable=SC2154 + if [[ ${GITHUB_ACTIONS:-false} == true ]]; then + check_location_without_cache "$1" + else + check_location_with_cache "$1" + fi +} + +# Check the remote location and return the http code and size. +# The result is cached in .check_location-response-cache.yaml +# e.g. +# ```console +# $ check_location_with_cache "https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img" +# 200 585498624 +# ``` +function check_location_with_cache() { + local -r location="$1" cache_file=".check_location-response-cache.yaml" + # check ${cache_file} for the cache + if [[ -f ${cache_file} ]]; then + cached=$(yq -e eval ".[\"${location}\"]" "${cache_file}" 2>/dev/null) && echo "${cached}" && return + else + touch "${cache_file}" + fi + http_code_and_size=$(check_location_without_cache "${location}") || return + yq eval ".[\"${location}\"] = \"${http_code_and_size}\"" -i "${cache_file}" || return + echo "${http_code_and_size}" +} + +# e.g. +# ```console +# $ check_location "https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img" +# 200 585498624 +# ``` +function check_location_without_cache() { + local -r location="$1" + curl -sIL -w "%{http_code} %header{Content-Length}" "${location}" -o /dev/null +} + +# e.g. +# ```console +# $ print_image_kernel_initrd_locations_with_digest_for_arch_from_template_at_index templates/default.yaml 0 +# https://cloud-images.ubuntu.com/releases/24.04/release-20240809/ubuntu-24.04-server-cloudimg-arm64.img +# sha256:2e0c90562af1970ffff220a5073a7830f4acc2aad55b31593003e8c363381e7a +# null +# null +# null +# null +# ``` +function print_image_kernel_initrd_locations_with_digest_for_arch_from_template_at_index() { + local template index="${2:-}" arch + template=$(download_template_if_needed "$1") || return + local -r template=${template} + arch=$(detect_arch "${template}" "${3:-}") || return + local -r arch=${arch} + + local -r yq_filter="[(.images[] | select(.arch == \"${arch}\"))].[${index}]|[ + .location, + .digest, + .kernel.location, + .kernel.digest, + .initrd.location, + .initrd.digest + ]" + yq -o=t eval "${yq_filter}" "${template}" +} + +# e.g. +# ```console +# $ print_containerd_config_for_arch_from_template templates/default.yaml +# true +# https://github.com/containerd/nerdctl/releases/download/v1.7.6/nerdctl-full-1.7.6-linux-arm64.tar.gz +# sha256:77c747f09853ee3d229d77e8de0dd3c85622537d82be57433dc1fca4493bab95 +# ``` +function print_containerd_config_for_arch_from_template() { + local template arch + template=$(download_template_if_needed "$1") || return + local -r template=${template} + arch=$(detect_arch "${template}" "${2:-}") || return + local -r arch=${arch} + + local -r yq_filter=" + [.containerd|[.system or .user], + .containerd.archives | map(select(.arch == \"${arch}\")) | [.[0].location, .[0].digest]]|flatten + " + validated_template="$( + limactl validate "${template}" --fill 2>/dev/null || echo "{.containerd: {system: false, user: false, archives: []}}" + )" + yq -o=t eval "${yq_filter}" <<<"${validated_template}" +} + +# e.g. +# ```console +# $ location_to_sha256 "https://cloud-images.ubuntu.com/releases/24.04/release-20240809/ubuntu-24.04-server-cloudimg-arm64.img" +# ae988d797c6de06b9c8a81a2b814904151135ccfd4616c22948057f6280477e8 +# ``` +function location_to_sha256() { + ( + set -o pipefail + local -r location="$1" + if command -v sha256sum >/dev/null; then + sha256="$(echo -n "${location}" | sha256sum | cut -d' ' -f1)" + elif command -v shasum >/dev/null; then + sha256="$(echo -n "${location}" | shasum -a 256 | cut -d' ' -f1)" + else + echo "sha256sum or shasum not found" >&2 + exit 1 + fi + echo "${sha256}" + ) +} + +# e.g. +# ```console +# $ location_to_cache_path "https://cloud-images.ubuntu.com/releases/24.04/release-20240809/ubuntu-24.04-server-cloudimg-arm64.img" +# .download/by-url-sha256/ae988d797c6de06b9c8a81a2b814904151135ccfd4616c22948057f6280477e8 +# ``` +function location_to_cache_path() { + local location=$1 + [[ ${location} != "null" ]] || return + sha256=$(location_to_sha256 "${location}") && echo ".download/by-url-sha256/${sha256}" +} + +# e.g. +# ```console +# $ cache_key_from_prefix_location_and_digest image "https://cloud-images.ubuntu.com/releases/24.04/release-20240809/ubuntu-24.04-server-cloudimg-arm64.img" "sha256:2e0c90562af1970ffff220a5073a7830f4acc2aad55b31593003e8c363381e7a" +# image:ubuntu-24.04-server-cloudimg-arm64.img-sha256:2e0c90562af1970ffff220a5073a7830f4acc2aad55b31593003e8c363381e7a +# $ cache_key_from_prefix_location_and_digest image "https://cloud-images.ubuntu.com/releases/24.04/release-20240809/ubuntu-24.04-server-cloudimg-arm64.img" "null" +# image:ubuntu-24.04-server-cloudimg-arm64.img-url-sha256:ae988d797c6de06b9c8a81a2b814904151135ccfd4616c22948057f6280477e8 +# ``` +function cache_key_from_prefix_location_and_digest() { + local prefix=$1 location=$2 digest=$3 location_basename + [[ ${location} != "null" ]] || return + location_basename=$(basename "${location}") + if [[ ${digest} != "null" ]]; then + echo "${prefix}:${location_basename}-${digest}" + else + # use sha256 of location as key if digest is not available + echo "${prefix}:${location_basename}-url-sha256:$(location_to_sha256 "${location}")" + fi +} + +# e.g. +# ```console +# $ print_path_and_key_for_cache image "https://cloud-images.ubuntu.com/releases/24.04/release-20240809/ubuntu-24.04-server-cloudimg-arm64.img" "sha256:2e0c90562af1970ffff220a5073a7830f4acc2aad55b31593003e8c363381e7a" +# image-path=.download/by-url-sha256/ae988d797c6de06b9c8a81a2b814904151135ccfd4616c22948057f6280477e8 +# image-key=image:ubuntu-24.04-server-cloudimg-arm64.img-sha256:2e0c90562af1970ffff220a5073a7830f4acc2aad55b31593003e8c363381e7a +# ``` +function print_path_and_key_for_cache() { + local -r prefix=$1 location=$2 digest=$3 + cache_path=$(location_to_cache_path "${location}" || true) + cache_key=$(cache_key_from_prefix_location_and_digest "${prefix}" "${location}" "${digest}" || true) + echo "${prefix}-path=${cache_path}" + echo "${prefix}-key=${cache_key}" +} + +# e.g. +# ```console +# $ print_cache_informations_from_template templates/default.yaml +# image-path=.download/by-url-sha256/ae988d797c6de06b9c8a81a2b814904151135ccfd4616c22948057f6280477e8 +# image-key=image:ubuntu-24.04-server-cloudimg-arm64.img-sha256:2e0c90562af1970ffff220a5073a7830f4acc2aad55b31593003e8c363381e7a +# kernel-path= +# kernel-key= +# initrd-path= +# initrd-key= +# containerd-path=.download/by-url-sha256/21cc8dfa548ea8a678135bd6984c9feb9f8a01901d10b11bb491f6f4e7537158 +# containerd-key=containerd:nerdctl-full-1.7.6-linux-arm64.tar.gz-sha256:77c747f09853ee3d229d77e8de0dd3c85622537d82be57433dc1fca4493bab95 +# $ print_cache_informations_from_template templates/experimental/riscv64.yaml +# image-path=.download/by-url-sha256/760b6ec69c801177bdaea06d7ee25bcd6ab72a331b9d3bf38376578164eb8f01 +# image-key=image:ubuntu-24.04-server-cloudimg-riscv64.img-sha256:361d72c5ed9781b097ab2dfb1a489c64e51936be648bbc5badee762ebdc50c31 +# kernel-path=.download/by-url-sha256/4568026693dc0f31a551b6741839979c607ee6bb0bf7209c89f3348321c52c61 +# kernel-key=kernel:qemu-riscv64_smode_uboot.elf-sha256:d4b3a10c3ef04219641802a586dca905e768805f5a5164fb68520887df54f33c +# initrd-path= +# initrd-key= +# ``` +function print_cache_informations_from_template() { + ( + set -o pipefail + local template index image_kernel_initrd_info location digest containerd_info + template=$(download_template_if_needed "$1") || return + local -r template="${template}" + index=$(print_image_locations_for_arch_from_template "${template}" "${@:2}" | print_valid_image_index) || return + local -r index="${index}" + image_kernel_initrd_info=$(print_image_kernel_initrd_locations_with_digest_for_arch_from_template_at_index "${template}" "${index}" "${@:2}") || return + # shellcheck disable=SC2034 + read -r image_location image_digest kernel_location kernel_digest initrd_location initrd_digest <<<"${image_kernel_initrd_info}" + for prefix in image kernel initrd; do + location=${prefix}_location + digest=${prefix}_digest + print_path_and_key_for_cache "${prefix}" "${!location}" "${!digest}" + done + if command -v limactl >/dev/null; then + containerd_info=$(print_containerd_config_for_arch_from_template "${template}" "${@:2}") || return + read -r containerd_enabled containerd_location containerd_digest <<<"${containerd_info}" + if [[ ${containerd_enabled} == "true" ]]; then + print_path_and_key_for_cache "containerd" "${containerd_location}" "${containerd_digest}" + fi + fi + ) +} + +# Compatible with hashFile() in GitHub Actions +# e.g. +# ```console +# $ hash_file templates/default.yaml +# ceec5ba3dc8872c083b2eb7f44e3e3f295d5dcdeccf0961ee153be6586525e5e +# ``` +function hash_file() { + ( + set -o pipefail + local hash="" + for file in "$@"; do + hash="${hash}$(sha256sum "${file}" | cut -d' ' -f1)" || return + done + echo "${hash}" | xxd -r -p | sha256sum | cut -d' ' -f1 + ) +} diff --git a/hack/calculate-cache.sh b/hack/calculate-cache.sh index 9c7902a42b6..1d1a89e9555 100755 --- a/hack/calculate-cache.sh +++ b/hack/calculate-cache.sh @@ -4,19 +4,23 @@ # # Answer to the question in https://github.com/lima-vm/lima/pull/2508#discussion_r1699798651 +scriptdir=$(dirname "${BASH_SOURCE[0]}") +# shellcheck source=./common.inc.sh +. "${scriptdir}/cache-common-inc.sh" + # usage: [DEBUG=1] ./hack/calculate-cache.sh # DEBUG=1 will save the collected information in .calculate-cache-collected-info-{before,after}.yaml # # This script does: # 1. extracts `runs_on` and `template` from workflow file (.github/workflows/test.yml) -# 2. check each template for image and nerdctl -# 3. detect size of image and nerdctl (responses from remote are cached for faster iteration) -# save the response in .calculate-cache-response-cache.yaml +# 2. check each template for image, kernel, initrd, and nerdctl +# 3. detect size of image, kernel, initrd, and nerdctl (responses from remote are cached for faster iteration) +# save the response in .check_location-response-cache.yaml # 4. print content size, actual cache size (if available), by cache key # # The major differences for reducing cache usage are as follows: # - it is now cached `~/.cache/lima/download/by-url-sha256/$sha256` instead of caching `~/.cache/lima/download` -# - the cache keys are now based on the image digest and nerdctl digest instead of the template file's hash +# - the cache keys are now based on the image, kernel, initrd, and nerdctl digest instead of the template file's hash # - enables the use of cache regardless of the operating system used to execute CI. # # The script requires the following commands: @@ -116,82 +120,6 @@ function runner_os_from_runner() { esac } -# check the remote location and return the http code and size. -# The result is cached in .calculate-cache-response-cache.yaml -# e.g. -# ```console -# $ check_location "https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img" -# 200 585498624 -# ``` -function check_location() { - location="$1" - readonly cache_file="./.calculate-cache-response-cache.yaml" - # check response_cache.yaml for the cache - if [[ -f ${cache_file} ]]; then - cached=$(yq -e eval ".[\"${location}\"]" "${cache_file}" 2>/dev/null) && echo "${cached}" && return - else - touch "${cache_file}" - fi - http_code_and_size=$(curl -sIL -w "%{http_code} %header{Content-Length}" "${location}" -o /dev/null) - yq eval ".[\"${location}\"] = \"${http_code_and_size}\"" -i "${cache_file}" - echo "${http_code_and_size}" -} - -# print image location, digest, size, hash, containerd, containerd_location, containerd_digest, containerd_size from the template -# e.g. -# ```console -# $ print_location_digest_size_hash_from_template "templates/default.yaml" -# https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img sha256:d2377667ea95222330ca2287817403c85178dad397e9fed768a9b4aec79d2a7f 585498624 49aa50a4872ded07ebf657c0eaf9e44ecc0c174d033a97c537ecd270f35b462f true https://github.com/containerd/nerdctl/releases/download/v1.7.6/nerdctl-full-1.7.6-linux-amd64.tar.gz sha256:2c841e097fcfb5a1760bd354b3778cb695b44cd01f9f271c17507dc4a0b25606 237465717 -# ``` -function print_location_digest_size_hash_from_template() { - readonly template=$1 - case "${template}" in - http*) - template_yaml=$(curl -sSL "${template}") - ;; - *) - template_yaml=$(<"${template}") - ;; - esac - readonly yq_filter=" - [ - .images | map(select(.arch == \"${arch}\")) | [.[0,1].location, .[0,1].digest], - .containerd|[.system or .user], - .containerd.archives | map(select(.arch == \"${arch}\")) | [.[0].location, .[0].digest] - ]|flatten|.[] - " - if command -v limactl &>/dev/null; then - parsed=$(limactl validate <(echo "${template_yaml}") --fill 2>/dev/null | yq eval "${yq_filter}") - else - parsed=$(yq eval "${yq_filter}" <<<"${template_yaml}") - fi - # macOS earlier than 15.0 uses bash 3.2.57, which does not support readarray -t - # readarray -t arr <<<"${parsed}" - while IFS= read -r line; do arr+=("${line}"); done <<<"${parsed}" - readonly locations=("${arr[@]:0:2}") digests=("${arr[@]:2:2}") - readonly containerd="${arr[4]}" containerd_location="${arr[5]}" containerd_digest="${arr[6]}" - declare location digest size hash - for ((i = 0; i < ${#locations[@]}; i++)); do - [[ ${locations[i]} != null ]] || continue - http_code_and_size=$(check_location "${locations[i]}") - read -r http_code size <<<"${http_code_and_size}" - if [[ ${http_code} -eq 200 ]]; then - location=${locations[i]} - digest=${digests[i]} - break - fi - done - if [[ -z ${location} ]]; then - echo "Failed to get the image location for ${template}" >&2 - return 1 - fi - hash=$(sha256sum <<<"${template_yaml}" | cut -d' ' -f1 | xxd -r -p | sha256sum | cut -d' ' -f1) - declare containerd_size - containerd_http_code_and_size=$(check_location "${containerd_location}") - read -r _containerd_http_code containerd_size <<<"${containerd_http_code_and_size}" - echo "${location} ${digest} ${size} ${hash} ${containerd} ${containerd_location} ${containerd_digest} ${containerd_size}" -} - # format first column to MiB # e.g. # ```console @@ -258,29 +186,39 @@ for cache_method in before after; do for workflow in "${workflows[@]}"; do print_runs_on_template_from_workflow "${workflow}" done | while IFS=$'\t' read -r runner template; do - runner_os=$(runner_os_from_runner "${runner}") - location_digest_size_hash=$(print_location_digest_size_hash_from_template "${template}") || continue - read -r location digest size hash containerd containerd_location containerd_digest containerd_size <<<"${location_digest_size_hash}" + template=$(download_template_if_needed "${template}") || continue + arch=$(detect_arch "${template}" "${arch}") || continue + index=$(print_image_locations_for_arch_from_template "${template}" "${arch}" | print_valid_image_index) || continue + image_kernel_initrd_info=$(print_image_kernel_initrd_locations_with_digest_for_arch_from_template_at_index "${template}" "${index}" "${arch}") || continue + # shellcheck disable=SC2034 # shellcheck does not detect dynamic variables usage + read -r image_location image_digest kernel_location kernel_digest initrd_location initrd_digest <<<"${image_kernel_initrd_info}" + containerd_info=$(print_containerd_config_for_arch_from_template "${template}" "${@:2}") || continue + # shellcheck disable=SC2034 # shellcheck does not detect dynamic variables usage + read -r _containerd_enabled containerd_location containerd_digest <<<"${containerd_info}" + if [[ ${cache_method} != after ]]; then - key=${runner_os}-${hash} - elif [[ ${digest} == null ]]; then - key=image:$(basename "${location}")-url-sha256:$(echo -n "${location}" | sha256sum | cut -d' ' -f1) + key=$(runner_os_from_runner "${runner}" || true)-$(hash_file "${template}") else - key=image:$(basename "${location}")-${digest} + key=$(cache_key_from_prefix_location_and_digest image "${image_location}" "${image_digest}") fi - if [[ ${containerd} == true ]]; then + size=$(size_from_location "${image_location}") + for prefix in containerd kernel initrd; do + location="${prefix}_location" + digest="${prefix}_digest" + [[ ${!location} != null ]] || continue if [[ ${cache_method} != after ]]; then - # previous caching method packages the containerd archive with the image - size=$((size + containerd_size)) + # previous caching method packages all files in download to a single cache key + size=$((size + $(size_from_location "${!location}"))) else - # new caching method packages the containerd archive separately - containerd_key=containerd:$(basename "${containerd_location}")-${containerd_digest} + # new caching method caches each file separately + key_for_prefix=$(cache_key_from_prefix_location_and_digest "${prefix}" "${!location}" "${!digest}") + size_for_prefix=$(size_from_location "${!location}") printf -- "- key: %s\n template: %s\n location: %s\n digest: %s\n size: %s\n" \ - "${containerd_key}" "${template}" "${containerd_location}" "${containerd_digest}" "${containerd_size}" + "${key_for_prefix}" "${template}" "${!location}" "${!digest}" "${size_for_prefix}" fi - fi + done printf -- "- key: %s\n template: %s\n location: %s\n digest: %s\n size: %s\n" \ - "${key}" "${template}" "${location}" "${digest}" "${size}" + "${key}" "${template}" "${image_location}" "${image_digest}" "${size}" done ) output_json=$(yq -o=j . <<<"${output_yaml}")