Skip to content

Commit

Permalink
Make ssh and tail working, add the whole CI into rust.sh
Browse files Browse the repository at this point in the history
  • Loading branch information
utensil committed Oct 18, 2024
1 parent 14f0204 commit 282a65f
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 13 deletions.
41 changes: 28 additions & 13 deletions yard-rs/runpod-xp/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,6 @@ def signal_handler(_signal, _frame):

logging.info(f" - Estimated time to download and extrace the image: {eta} seconds")
logging.info(" - While you're waiting, you can check the status of the pod at https://www.runpod.io/console/pods ")
logging.info(f" - After started, use the following command to ssh into the pod: {ssh_command}")
# logging.info(f" or the following command in CodeSpace: {codespace_ssh_command}")

runtime = None
Expand All @@ -237,18 +236,34 @@ def signal_handler(_signal, _frame):
logging.info(f"Pod {pod['id']} started:\n{as_yaml(pod_info)}")
edit_discord_message(msg_created, f"Pod {pod['id']} started:\n{as_yaml(pod_info)}")

logging.info(f"Use the following command to ssh into the pod:\n{ssh_command}")

# try:
# total_try_time = 0
# while total_try_time < 60:
# # wait 5 seconds, ssh into the pod, and run `tail -f /content/rust.log`
# time.sleep(5)
# child = pexpect.spawn(f"{ssh_command} 'tail -f /content/rust.log'")
# child.expect(pexpect.EOF)
# total_try_time += 5
# except Exception as ex:
# log_error(f"Failed to tail the log for pod {pod['id']}", exc_info=ex)
logging.info(f"Use the following command to ssh into the pod:\n\n{ssh_command}\n\n")

total_try_time = 0
with tqdm(total=60) as pbar:
while total_try_time < 60:
try:
# wait 5 seconds, ssh into the pod, and run `tail -f /content/run.log`
time.sleep(POLL_PERIOD)
child = pexpect.spawn(f"{ssh_command}")
child.expect(":/#")
child.sendline("tail -f /content/*.log")
# read from child 100 each time until EOF
while child.isalive():
output = child.read(500)
if output:
print(output.decode(), end='')
else:
break
child.expect(pexpect.EOF)
except UnicodeDecodeError as ex:
logging.error(f"Failed to decode the output of the log for pod {pod['id']}", exc_info=ex)
break
except Exception as ex:
logging.error(f"Failed to tail the log for pod {pod['id']}", exc_info=ex)

total_try_time += POLL_PERIOD
pbar.update(POLL_PERIOD)


# myself = runpod.get_myself()

Expand Down
2 changes: 2 additions & 0 deletions yard-rs/runpod-xp/scripts/rust.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ cargo run --example gelu --no-default-features --features=cuda
cd ../../
cd yard-rs/krnl-xp
cargo test
cd ../../
just ci

cd /content/native-land/yard-rs/runpod-xp
pip install -r requirements-runpod.txt
Expand Down

0 comments on commit 282a65f

Please sign in to comment.