From 7fd79880e6153440dec528422d18ef9c590234dc Mon Sep 17 00:00:00 2001 From: DaMandal0rian Date: Wed, 25 Sep 2024 14:26:07 +0200 Subject: [PATCH] scripts to manage node launch --- scripts/launch-nodes/README.md | 115 +++++++++++ scripts/launch-nodes/install_dependencies.sh | 42 ++++ scripts/launch-nodes/manage_subspace.py | 201 +++++++++++++++++++ scripts/launch-nodes/nodes.toml | 20 ++ 4 files changed, 378 insertions(+) create mode 100644 scripts/launch-nodes/README.md create mode 100755 scripts/launch-nodes/install_dependencies.sh create mode 100755 scripts/launch-nodes/manage_subspace.py create mode 100644 scripts/launch-nodes/nodes.toml diff --git a/scripts/launch-nodes/README.md b/scripts/launch-nodes/README.md new file mode 100644 index 00000000..a830fbf3 --- /dev/null +++ b/scripts/launch-nodes/README.md @@ -0,0 +1,115 @@ + +# Subspace Node Manager + +This script manages the deployment of Subspace nodes (RPC, Farmer, and Bootstrap nodes) on multiple servers using SSH. It updates the `.env` file with the specified release version and coordinates the startup sequence to ensure that RPC and Farmer nodes are started first. The Bootstrap node is updated last with the correct `GENESIS_HASH` and then started. + +## Features + +- SSH into multiple servers defined in a TOML configuration file. +- Modify `.env` files in the Subspace directory with a new release version and update `GENESIS_HASH`. +- Restart Subspace nodes using `docker-compose down -v` and `docker-compose up -d`. +- Retrieve the `protocol_version` hash from the RPC node logs and use it to update the Bootstrap node. +- Ensure proper start order (RPC and Farmer nodes first, Bootstrap node last). + +## Prerequisites + +- **Python 3.x** installed on your local machine. +- The following Python libraries (installed via the provided `install_dependencies.sh` script): + - `paramiko` for SSH connections. + - `toml` for reading the configuration file. +- SSH access to the remote servers where the Subspace nodes are running. +- Ensure the remote servers have Docker and Docker Compose installed. + +## Installation + +### Step 1: Install Dependencies + +1. Clone the repository or download the Python script and associated files. +2. Use the provided `install_dependencies.sh` script to install the required Python packages in a virtual environment. + +```bash +chmod +x install_dependencies.sh +./install_dependencies.sh +``` + +This will create a virtual environment (`subspace_env`) and install the required packages: `paramiko` and `toml`. + +### Step 2: Activate the Virtual Environment + +Activate the virtual environment where the dependencies are installed: + +```bash +source subspace_env/bin/activate +``` + +### Step 3: Prepare Configuration + +Create a TOML configuration file (`nodes.toml`) with details for your Bootstrap, RPC, and Farmer nodes. The file should look like this: + +```toml +# TOML file containing server details + +[bootstrap_node] +host = "bootstrap.example.com" +user = "username" +ssh_key = "/path/to/private/key" + +[farmer_rpc_nodes] + +[[farmer_rpc_nodes]] +host = "rpc.example.com" +user = "username" +ssh_key = "/path/to/private/key" +type = "rpc" + +[[farmer_rpc_nodes]] +host = "farmer.example.com" +user = "username" +ssh_key = "/path/to/private/key" +type = "farmer" +``` + +- **`bootstrap_node`:** This section defines the Bootstrap node. +- **`farmer_rpc_nodes`:** This section contains the RPC and Farmer nodes. The `type` field specifies whether the node is an RPC node or a Farmer node. + +### Step 4: Running the Script + +Once the configuration file is ready, run the Python script with the following command: + +```bash +python manage_subspace.py --config nodes.toml --release_version gemini-3h-2024-sep-17 --subspace_dir /home/ubuntu/subspace/subspace +``` + +- `--config`: Path to the TOML configuration file. +- `--release_version`: The release version to be used to update the `DOCKER_TAG` in the `.env` files. +- `--subspace_dir`: Path to the Subspace directory (default: `/home/ubuntu/subspace`). + +### Step 5: Deactivate the Virtual Environment + +Once the script has run, deactivate the virtual environment: + +```bash +deactivate +``` + +## Logging and Error Handling + +The script logs important actions and any errors that occur. The following log levels are used: + +- **INFO**: General information about the script's progress (e.g., starting/stopping nodes, modifying files). +- **WARNING**: Warnings about non-critical issues (e.g., retries during protocol version extraction). +- **ERROR**: Errors that prevent successful execution (e.g., failed SSH connections, issues with running commands). + +## Retry Mechanism + +The script includes a retry mechanism when extracting the `protocol_version` from the RPC node logs. It attempts to grep the log multiple times (default 5 retries) with a delay (default 10 seconds) between attempts. + +## License + +This project is licensed under the MIT License. + +## Troubleshooting + +- Ensure you have SSH access to all nodes and that your private key is properly configured. +- Ensure Docker and Docker Compose are installed and configured on the target servers. +- Check your `.env` file permissions to make sure the script can read and write to it. diff --git a/scripts/launch-nodes/install_dependencies.sh b/scripts/launch-nodes/install_dependencies.sh new file mode 100755 index 00000000..91787dc8 --- /dev/null +++ b/scripts/launch-nodes/install_dependencies.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Function to check if Python is installed +check_python() { + if ! command -v python3 &> /dev/null + then + echo "Python3 could not be found. Please install Python 3.x before proceeding." + exit 1 + fi +} + +# Function to create a virtual environment and install dependencies +install_dependencies() { + # Check if virtualenv is installed, if not install it + if ! python3 -m venv --help &> /dev/null; then + echo "virtualenv not found, installing..." + pip3 install virtualenv + fi + + # Create virtual environment + echo "Creating a virtual environment..." + python3 -m venv subspace_env + + # Activate the virtual environment + source subspace_env/bin/activate + + # Install required Python packages + echo "Installing required dependencies with pip..." + pip install paramiko tomli colorlog + + # Deactivate virtual environment after installing + deactivate + + echo "Dependencies installed in 'subspace_env' virtual environment." + echo "To activate it, run: source subspace_env/bin/activate" +} + +# Check for Python installation +check_python + +# Install dependencies +install_dependencies diff --git a/scripts/launch-nodes/manage_subspace.py b/scripts/launch-nodes/manage_subspace.py new file mode 100755 index 00000000..5c859aa1 --- /dev/null +++ b/scripts/launch-nodes/manage_subspace.py @@ -0,0 +1,201 @@ +import paramiko +import argparse +import tomli +import re +import logging +import colorlog +from time import sleep + +# Configure logging with colorlog +handler = colorlog.StreamHandler() +handler.setFormatter(colorlog.ColoredFormatter( + '%(log_color)s%(asctime)s - %(levelname)s - %(message)s', + log_colors={ + 'DEBUG': 'cyan', + 'INFO': 'green', + 'WARNING': 'yellow', + 'ERROR': 'red', + 'CRITICAL': 'bold_red', + } +)) +logger = colorlog.getLogger(__name__) +logger.addHandler(handler) +logger.setLevel(logging.INFO) + +def ssh_connect(host, user, key_file): + """Establish an SSH connection to a server.""" + try: + client = paramiko.SSHClient() + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + client.connect(hostname=host, username=user, key_filename=key_file) + logger.info(f"Connected to {host}") + return client + except Exception as e: + logger.error(f"Failed to connect to {host}: {e}") + raise + +def run_command(client, command): + """Run a command over SSH and return the output.""" + try: + stdin, stdout, stderr = client.exec_command(command) + output = stdout.read().decode('utf-8') + error = stderr.read().decode('utf-8') + if error: + logger.error(f"Error running command: {error}") + return output, error + except Exception as e: + logger.error(f"Failed to run command: {command}: {e}") + raise + +def docker_compose_down(client, subspace_dir): + """Run sudo docker compose down -v in the subspace directory.""" + try: + command = f'cd {subspace_dir} && sudo docker compose down -v' + logger.info(f"Running sudo docker compose down -v in {subspace_dir}") + run_command(client, command) + except Exception as e: + logger.error(f"Failed to run sudo docker compose down -v: {e}") + raise + +def modify_env_file(client, subspace_dir, release_version, genesis_hash=None): + """Modify the .env file to update the Docker tag and optionally the Genesis Hash.""" + env_file = f'{subspace_dir}/.env' + + try: + sftp = client.open_sftp() + with sftp.open(env_file, 'r') as f: + env_data = f.readlines() + + # Modify the Docker tag and optionally the Genesis hash + with sftp.open(env_file, 'w') as f: + for line in env_data: + if line.startswith('DOCKER_TAG='): + f.write(f'DOCKER_TAG={release_version}\n') + elif genesis_hash and line.startswith('GENESIS_HASH='): + f.write(f'GENESIS_HASH={genesis_hash}\n') + else: + f.write(line) + logger.info(f"Modified .env file in {env_file}") + except Exception as e: + logger.error(f"Failed to modify .env file: {e}") + raise + +def grep_protocol_version(client, retries=5, interval=30): + """Grep the logs to find the protocol version and extract the hash.""" + logs_command = 'sudo docker logs --tail 100 subspace-archival-node-1 | grep "protocol_version="' + + for attempt in range(retries): + try: + stdout, stderr = run_command(client, logs_command) + match = re.search(r'protocol_version=/subspace/2/([a-f0-9]+)', stdout) + if match: + logger.info(f"Protocol version hash found: {match.group(1)}") + return match.group(1) + else: + logger.warning(f"Protocol version hash not found. Attempt {attempt + 1} of {retries}") + except Exception as e: + logger.error(f"Error grepping protocol version: {e}") + + if attempt < retries - 1: + logger.info(f"Retrying in {interval} seconds...") + sleep(interval) + + logger.error("Failed to retrieve protocol version hash after retries.") + return None + + +def docker_compose_up(client, subspace_dir): + """Run sudo docker compose up -d in the subspace directory.""" + try: + command = f'cd {subspace_dir} && sudo docker compose up -d' + logger.info(f"Running sudo docker compose up -d in {subspace_dir}") + run_command(client, command) + except Exception as e: + logger.error(f"Failed to run sudo docker compose up -d: {e}") + raise + +def main(): + # Parse command line arguments + parser = argparse.ArgumentParser(description="Manage Subspace nodes via SSH") + parser.add_argument('--config', required=True, help='Path to the TOML config file') + parser.add_argument('--release_version', required=True, help='Release version to update in the .env file') + parser.add_argument('--subspace_dir', default='/home/ubuntu/subspace', help='Path to the Subspace directory (default: /home/ubuntu/subspace)') + args = parser.parse_args() + + # Read configuration from the TOML file using tomli + with open(args.config, 'rb') as f: + config = tomli.load(f) + + bootstrap_node = config['bootstrap_node'] + farmer_rpc_nodes = config['farmer_rpc_nodes'] + + release_version = args.release_version + subspace_dir = args.subspace_dir + + # Step 1: sudo docker compose down -v on all farmer and RPC nodes + for node in farmer_rpc_nodes: + try: + logger.info(f"Connecting to {node['host']} for sudo docker compose down -v...") + client = ssh_connect(node['host'], node['user'], node['ssh_key']) + + # Run sudo docker compose down -v + docker_compose_down(client, subspace_dir) + + # Close connection after shutdown + client.close() + except Exception as e: + logger.error(f"Error during sudo docker compose down -v on {node['host']}: {e}") + + # Step 2: Update .env and start sudo docker compose for RPC and Farmer nodes + protocol_version_hash = None + for node in farmer_rpc_nodes: + try: + logger.info(f"Connecting to {node['host']}...") + client = ssh_connect(node['host'], node['user'], node['ssh_key']) + + # Modify the .env file + modify_env_file(client, subspace_dir, release_version) + + # Start sudo docker compose up -d + docker_compose_up(client, subspace_dir) + + # If this is the RPC node, grep the logs for protocol version hash + if node['type'] == 'rpc': + logger.info(f"Waiting for the RPC node to start...") + sleep(30) # Adjust sleep time as necessary + + logger.info(f"Grep protocol version from logs on {node['host']}...") + protocol_version_hash = grep_protocol_version(client) + + if not protocol_version_hash: + logger.error(f"Failed to retrieve protocol version hash on {node['host']}") + continue + + client.close() + except Exception as e: + logger.error(f"Error during update and start on {node['host']}: {e}") + + # Step 3: SSH into the bootstrap node and update GENESIS_HASH, then start it + if protocol_version_hash: + try: + logger.info(f"Connecting to the bootstrap node {bootstrap_node['host']} for sudo docker compose down -v...") + client = ssh_connect(bootstrap_node['host'], bootstrap_node['user'], bootstrap_node['ssh_key']) + + # Run sudo docker compose down -v for the bootstrap node + docker_compose_down(client, subspace_dir) + + # Modify .env with the new GENESIS_HASH + modify_env_file(client, subspace_dir, release_version, genesis_hash=protocol_version_hash) + + # Start the bootstrap node + docker_compose_up(client, subspace_dir) + + client.close() + logger.info("Bootstrap node started with the updated Genesis Hash.") + except Exception as e: + logger.error(f"Error during bootstrap node update: {e}") + else: + logger.error("Protocol version hash not found, skipping bootstrap node start.") + +if __name__ == '__main__': + main() diff --git a/scripts/launch-nodes/nodes.toml b/scripts/launch-nodes/nodes.toml new file mode 100644 index 00000000..751853bd --- /dev/null +++ b/scripts/launch-nodes/nodes.toml @@ -0,0 +1,20 @@ +# TOML file containing server details + +[bootstrap_node] +host = "34.201.40.91" +user = "ubuntu" +ssh_key = "key.pem" + +# Remove the [farmer_rpc_nodes] block and only use the array of tables below + +[[farmer_rpc_nodes]] +host = "54.209.76.129" +user = "ubuntu" +ssh_key = "key.pem" +type = "rpc" + +[[farmer_rpc_nodes]] +host = "44.202.161.154" +user = "ubuntu" +ssh_key = "key.pem" +type = "farmer"