From 963f9885325324a20ed1d6de7711678360c02dd4 Mon Sep 17 00:00:00 2001 From: "@picocreator (Eugene Cheah)" Date: Wed, 23 Aug 2023 09:37:21 +0000 Subject: [PATCH] test the upload runs --- .github/workflows/notebook-run.yml | 3 ++ notebook/github-runner/github-runner.sh | 25 ++++++++++++++-- notebook/github-runner/hf-upload.py | 40 +++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 notebook/github-runner/hf-upload.py diff --git a/.github/workflows/notebook-run.yml b/.github/workflows/notebook-run.yml index 31f02158..309d58e7 100644 --- a/.github/workflows/notebook-run.yml +++ b/.github/workflows/notebook-run.yml @@ -29,6 +29,9 @@ env: jobs: notebook-run: + # Due to github worker hard limitation, of 24 hours + # we apply a timeout of 23 hours instead. + timeout-minutes: 1380 name: ${{github.event.inputs.notebookFile}} on ${{github.event.inputs.gpuTarget}} / cuda-${{github.event.inputs.cudaVersion}} runs-on: - cuda-${{github.event.inputs.cudaVersion}} diff --git a/notebook/github-runner/github-runner.sh b/notebook/github-runner/github-runner.sh index 471552fe..6a57c15b 100644 --- a/notebook/github-runner/github-runner.sh +++ b/notebook/github-runner/github-runner.sh @@ -14,6 +14,11 @@ if [[ -z "${WANDB_API_KEY}" ]]; then exit 1 fi +# The HF repo directory to use +if [[ -z "${HF_REPO_SYNC}" ]]; then + HF_REPO_SYNC="rwkv-x-dev/rwkv-x-playground" +fi + # Get the notebook script from the first arg NOTEBOOK_FILE=$1 @@ -31,7 +36,7 @@ CACHE_DIR="$ACTION_DIR/.cache/" mkdir -p "$CACHE_DIR" # Log the proj dir -echo "#" +echo "# ------" echo "# Starting github notebook runner" echo "#" echo "# PROJ_DIR: $PROJ_DIR" @@ -39,7 +44,7 @@ echo "# NOTEBOOK_DIR: $NOTEBOOK_DIR" echo "# NOTEBOOK_FILE: $NOTEBOOK_FILE" echo "#" echo "# CACHE_DIR: $CACHE_DIR" -echo "#" +echo "# ------" # Check if the notebook file exists, in the notebook directory if [[ ! -f "$NOTEBOOK_DIR/$NOTEBOOK_FILE" ]]; then @@ -127,4 +132,18 @@ echo "# [NOTE] Running notebook: $NOTEBOOK_FILE" cd "$INPUT_FILE_DIR" papermill \ -k python3 --log-output \ - "$INPUT_FILE_PATH" "$OUTPUT_FILE_PATH" \ No newline at end of file + "$INPUT_FILE_PATH" "$OUTPUT_FILE_PATH" + +# ----- +# Upload the output notebook to the github repo +# ----- + +# Upload the result files +echo "# ------" +echo "# Uploading models & notebooks to HF repo" +echo "# ------" + +# Get $NOTEBOOK_FILE, without the ipynb filetype +NOTEBOOK_FILE_NOEXT="${NOTEBOOK_FILE%.*}" + +python3 ./hf-upload.py "$HF_REPO_SYNC" "$NOTEBOOK_FILE_NOEXT" diff --git a/notebook/github-runner/hf-upload.py b/notebook/github-runner/hf-upload.py new file mode 100644 index 00000000..70d48deb --- /dev/null +++ b/notebook/github-runner/hf-upload.py @@ -0,0 +1,40 @@ +# Get the Hugging Face Hub API +from huggingface_hub import HfApi +api = HfApi() + +# Get the repo path from the script first arg +import sys +REPO_PATH = sys.argv[1] +REPO_SUBDIR = sys.argv[2] + +# Get the current script dir +import os + +RUNNER_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +NOTEBOOK_DIR = os.path.dirname(RUNNER_SCRIPT_DIR) +PROJ_DIR = os.path.dirname(NOTEBOOK_DIR) + +MODEL_DIR = os.path.join(PROJ_DIR, "model") +OUTPUT_DIR = os.path.join(PROJ_DIR, "output") + +# Upload the models +api.upload_folder( + folder_path=MODEL_DIR, + repo_id=REPO_PATH, + path_in_repo=REPO_SUBDIR, + repo_type="model", + multi_commits=True, + allow_patterns=["*.pth"], + commit_message=f"[GHA] {REPO_SUBDIR}.ipynb result models" +) + +# Upload the ipynb files +api.upload_folder( + folder_path=OUTPUT_DIR, + repo_id=REPO_PATH, + path_in_repo=REPO_SUBDIR, + repo_type="model", + multi_commits=True, + allow_patterns=["*.ipynb"], + commit_message=f"[GHA] {REPO_SUBDIR}.ipynb result notebooks" +) \ No newline at end of file