From 3ef10ca926f73a994f39bf950a81577099f06b55 Mon Sep 17 00:00:00 2001 From: Ioannis Karasavvaidis <32846251+CoMfUcIoS@users.noreply.github.com> Date: Thu, 12 Sep 2024 20:52:11 +0100 Subject: [PATCH] (PE-38815-18) Official support of Add replica plan (#484) * PE-38815 add_replica plan updated, test matrix added (#480) * (PE-38818) Removing experimental and private from add_replica (#478) * (PE-38818) Removing experimental and private from add_replica * Updating Reference.md --------- Co-authored-by: Neil Anderson * (PE-38817) Adding docs for add replica (#477) Co-authored-by: Neil Anderson * chore: update test-add-replica workflow - Remove unnecessary quotes from YAML keys and values - Standardize single quotes for string values - Improve readability by removing extra blank lines * feat(workflows): add console password to test-add-replica-matrix job - Included `console_password` parameter in the test-add-replica-matrix job. - Utilizes `${{ secrets.CONSOLE_PASSWORD }}` for secure password management. * (PE-38817) Text fixes for add_replica docs (#496) * PE-39228 add_replica smoke test (#497) * PE-39228 Smoke test created and added to workflow * PE-39228 Workflow updated * PE-39228 Spec updated, smoke test added to matrix * PE-39228 Versions updated, push trigger removed from matrix * PE-39228 whitespace fixed * chore(workflow): update test-add-replica-matrix to use single version - Removed version `2021.7.9` from the matrix, leaving only `2023.8.0`. This change simplifies the testing matrix by focusing on the latest version. * chore(workflow): update test-add-replica-matrix to use single version - Changed `version` matrix to use an array with only `2023.8.0`. This change simplifies the testing matrix by focusing on the latest version. * docs: update REFERENCE.md to use asterisks for list items - Changed list item markers from hyphens to asterisks for consistency. - Ensured all list items in the document follow the same format. * add_replica docs updated --------- Co-authored-by: Neil Anderson Co-authored-by: Neil Anderson Co-authored-by: Aaron Shannon --- .../workflows/test-add-replica-matrix.yaml | 112 ++++++++++++++++++ .github/workflows/test-add-replica.yaml | 16 ++- REFERENCE.md | 52 +++++++- documentation/add_replica.md | 89 ++++++++++++++ documentation/expanding.md | 2 +- plans/add_replica.pp | 20 ++-- .../peadm_spec/plans/add_replica.pp | 2 +- .../plans/provision_test_cluster.pp | 15 +++ .../peadm_spec/plans/verify_replica.pp | 20 ++++ 9 files changed, 307 insertions(+), 21 deletions(-) create mode 100644 .github/workflows/test-add-replica-matrix.yaml create mode 100644 documentation/add_replica.md create mode 100644 spec/acceptance/peadm_spec/plans/verify_replica.pp diff --git a/.github/workflows/test-add-replica-matrix.yaml b/.github/workflows/test-add-replica-matrix.yaml new file mode 100644 index 00000000..7450663d --- /dev/null +++ b/.github/workflows/test-add-replica-matrix.yaml @@ -0,0 +1,112 @@ +--- +name: Add replica matrix +on: + pull_request: + paths: + - .github/workflows/**/* + - spec/**/* + - lib/**/* + - tasks/**/* + - functions/**/* + - types/**/* + - plans/**/* + - hiera/**/* + - manifests/**/* + - templates/**/* + - files/**/* + - metadata.json + - Rakefile + - Gemfile + - provision.yaml + - .rspec + - .rubocop.yml + - .puppet-lint.rc + - .fixtures.yml + branches: [main] + workflow_dispatch: {} +jobs: + test-add-replica: + name: PE ${{ matrix.version }} ${{ matrix.architecture }} on ${{ matrix.image }} + runs-on: ubuntu-20.04 + env: + BOLT_GEM: true + BOLT_DISABLE_ANALYTICS: true + LANG: en_US.UTF-8 + strategy: + fail-fast: false + matrix: + architecture: [standard, standard-with-dr, large, extra-large] + version: [2023.8.0] + image: [almalinux-cloud/almalinux-8] + steps: + - name: Checkout Source + uses: actions/checkout@v2 + - name: Activate Ruby 2.7 + uses: ruby/setup-ruby@v1 + with: + ruby-version: '2.7' + bundler-cache: true + - name: Print bundle environment + if: ${{ github.repository_owner == 'puppetlabs' }} + run: | + echo ::group::info:bundler + bundle env + echo ::endgroup:: + - name: Provision test cluster (specified architecture and a spare replica) + timeout-minutes: 15 + run: | + echo ::group::prepare + mkdir -p $HOME/.ssh + echo 'Host *' > $HOME/.ssh/config + echo ' ServerAliveInterval 150' >> $HOME/.ssh/config + echo ' ServerAliveCountMax 2' >> $HOME/.ssh/config + bundle exec rake spec_prep + echo ::endgroup:: + echo ::group::provision + bundle exec bolt plan run peadm_spec::provision_test_cluster \ + --modulepath spec/fixtures/modules \ + provider=provision_service \ + image=${{ matrix.image }} \ + architecture=${{ matrix.architecture }}-and-spare-replica + echo ::endgroup:: + echo ::group::info:request + cat request.json || true; echo + echo ::endgroup:: + echo ::group::info:inventory + sed -e 's/password: .*/password: "[redacted]"/' < spec/fixtures/litmus_inventory.yaml || true + echo ::endgroup:: + - name: Install PE on test cluster + timeout-minutes: 120 + run: | + bundle exec bolt plan run peadm_spec::install_test_cluster \ + --inventoryfile spec/fixtures/litmus_inventory.yaml \ + --modulepath spec/fixtures/modules \ + architecture=${{ matrix.architecture }} \ + version=${{ matrix.version }} \ + console_password=${{ secrets.CONSOLE_PASSWORD }} \ + code_manager_auto_configure=true + - name: Run add_replica plan + timeout-minutes: 60 + run: | + bundle exec bolt plan run peadm_spec::add_replica -v \ + --inventoryfile spec/fixtures/litmus_inventory.yaml \ + --modulepath spec/fixtures/modules \ + --stream + - name: Verify that replica was added + timeout-minutes: 10 + run: | + bundle exec bolt plan run peadm_spec::verify_replica -v \ + --inventoryfile spec/fixtures/litmus_inventory.yaml \ + --modulepath spec/fixtures/modules + - name: Tear down test cluster + if: ${{ always() }} + continue-on-error: true + run: |- + if [ -f spec/fixtures/litmus_inventory.yaml ]; then + echo ::group::tear_down + bundle exec rake 'litmus:tear_down' + echo ::endgroup:: + echo ::group::info:request + cat request.json || true; echo + echo ::endgroup:: + fi diff --git a/.github/workflows/test-add-replica.yaml b/.github/workflows/test-add-replica.yaml index 012173c8..5fe8787f 100644 --- a/.github/workflows/test-add-replica.yaml +++ b/.github/workflows/test-add-replica.yaml @@ -53,7 +53,7 @@ jobs: echo ::group::info:bundler bundle env echo ::endgroup:: - - name: Provision test cluster (specified architecture with added DR) + - name: Provision test cluster (specified architecture with spare replica) timeout-minutes: 15 run: | echo ::group::prepare @@ -68,7 +68,7 @@ jobs: --modulepath spec/fixtures/modules \ provider=provision_service \ image=${{ matrix.image }} \ - architecture=${{ matrix.architecture }}-with-dr + architecture=${{ matrix.architecture }}-and-spare-replica echo ::endgroup:: echo ::group::info:request cat request.json || true; echo @@ -84,11 +84,19 @@ jobs: --modulepath spec/fixtures/modules \ architecture=${{ matrix.architecture }} \ version=${{ matrix.version }} \ - console_password=${{ secrets.CONSOLE_PASSWORD }} + console_password=${{ secrets.CONSOLE_PASSWORD }} \ + code_manager_auto_configure=true - name: Run add_replica plan - timeout-minutes: 30 + timeout-minutes: 60 run: | bundle exec bolt plan run peadm_spec::add_replica -v \ + --inventoryfile spec/fixtures/litmus_inventory.yaml \ + --modulepath spec/fixtures/modules \ + --stream + - name: Verify that replica was added + timeout-minutes: 10 + run: | + bundle exec bolt plan run peadm_spec::verify_replica -v \ --inventoryfile spec/fixtures/litmus_inventory.yaml \ --modulepath spec/fixtures/modules - name: Wait as long as the file ${HOME}/pause file is present diff --git a/REFERENCE.md b/REFERENCE.md index f87f14f2..065b6d75 100644 --- a/REFERENCE.md +++ b/REFERENCE.md @@ -90,6 +90,10 @@ * [`peadm::add_compiler`](#peadm--add_compiler): Add a new compiler to a PE architecture or replace an existing one with new configuration. * [`peadm::add_database`](#peadm--add_database) +* [`peadm::add_replica`](#peadm--add_replica): Add or replace a replica host. +Supported use cases: +1: Adding a replica to an existing primary. +2: The existing replica is broken, we have a fresh new VM we want to provision the replica to. * [`peadm::backup`](#peadm--backup): Backup puppet primary configuration * [`peadm::backup_ca`](#peadm--backup_ca) * [`peadm::convert`](#peadm--convert): Convert an existing PE cluster to a PEAdm-managed cluster @@ -103,9 +107,6 @@ #### Private Plans -* `peadm::add_replica`: Replace a replica host for a Standard or Large architecture. -Supported use cases: -1: The existing replica is broken, we have a fresh new VM we want to provision the replica to. * `peadm::misc::divert_code_manager`: This plan exists to account for a scenario where a PE XL * `peadm::modify_cert_extensions` * `peadm::subplans::component_install`: Install a new PEADM component @@ -1654,6 +1655,51 @@ Optional[Enum[ +Default value: `undef` + +### `peadm::add_replica` + +Add or replace a replica host. +Supported use cases: +1: Adding a replica to an existing primary. +2: The existing replica is broken, we have a fresh new VM we want to provision the replica to. + +#### Parameters + +The following parameters are available in the `peadm::add_replica` plan: + +* [`primary_host`](#-peadm--add_replica--primary_host) +* [`replica_host`](#-peadm--add_replica--replica_host) +* [`replica_postgresql_host`](#-peadm--add_replica--replica_postgresql_host) +* [`token_file`](#-peadm--add_replica--token_file) + +##### `primary_host` + +Data type: `Peadm::SingleTargetSpec` + +- The hostname and certname of the primary Puppet server + +##### `replica_host` + +Data type: `Peadm::SingleTargetSpec` + +- The hostname and certname of the replica VM + +##### `replica_postgresql_host` + +Data type: `Optional[Peadm::SingleTargetSpec]` + +- The hostname and certname of the host with the replica PE-PosgreSQL database. +Can be a separate host in an XL architecture, or undef in Standard or Large. + +Default value: `undef` + +##### `token_file` + +Data type: `Optional[String]` + +- (optional) the token file in a different location than the default. + Default value: `undef` ### `peadm::backup` diff --git a/documentation/add_replica.md b/documentation/add_replica.md new file mode 100644 index 00000000..df6988b2 --- /dev/null +++ b/documentation/add_replica.md @@ -0,0 +1,89 @@ +# Add Replica + +- [Add Replica](#Add-replica) + - [Introduction](#Introduction) + - [Adding a replica to standard and large infrastructures](#Adding-a-replica-to-standard-and-large-infrastructures) + - [Adding a Replica to extra large infrastructure](#Adding-a-Replica-to-extra-large-infrastructure) + - [Running the `add_replica` plan](#running-the-add_replica-plan) + - [Parameters](#parameters) + +## Introduction + +The `peadm::add_replica` plan is designed to setup disaster recovery (DR) of a Puppet Enterprise primary server. This is achieved by adding a primary replica to your system. Although this plan doesn't change your PE architecture, adding DR depends on the structure of your current architecture. + +In the case of standard and large installations, DR can be achieved by simply utilising this plan and adding the primary replica. In the case of an extra large infrastructure which includes an external DB, a replica DB is also required. This can be done with the `peadm::add_database` plan. For more detail see [Adding External Databases with peadm::add_database](expanding.md#adding-external-databases-with-peadmadd_database). + +Please note, to setup a replica you must have Code Manager configured. To learn more about code manager, please see [Puppet Docs](help.puppet.com). + +... + +## Adding a replica to standard and large infrastructures +Below is an example of the required parameters to add a primary replica. These parameters can be passed in-line or as a params file. + +```json +{ + "primary_host": "pe-core-0.lab1.puppet.vm", + "replica_host": "pe-replica-0.lab1.puppet.vm" +} +``` + +## Adding a Replica to extra large infrastructure +In the below example, we already have an external DB and a replica of it. This means that we should pass in the additional parameter of the replica's hostname. + +```json +{ + "primary_host": "pe-xl-core-0.lab1.puppet.vm", + "compiler_host": "pe-xl-replica-0.lab1.puppet.vm", + "replica_postgresql_host": "pe-xl-postgresql-replica-0.lab1.puppet.vm" +} +``` + +## Running the `add_replica` plan + +``` +bolt plan run peadm::add_replica --params @params.json +``` + +The plan performs the following steps: + +1. Installs the Puppet agent on the new replica host. +2. Updates classifications with new replica configuration. +3. Provisions the infrastructure with PE. + +## Parameters + +### `primary_host` + +- **Type:** `Peadm::SingleTargetSpec` +- **Description:** + The hostname and certname of the PE primary server. + +### `replica_host` + +- **Type:** `Peadm::SingleTargetSpec` +- **Description:** + The hostname and certname of the replica VM. + +### `primary_postgresql_host` + +- **Type:** `Optional[Peadm::SingleTargetSpec]` +- **Description:** + The target specification for the primary PostgreSQL host that the new replica will synchronize with. This is the database server from which the replica will replicate data. + +### `token_file` + +- **Type:** `Optional[String]` +- **Description:** + The Path to token file, only required if located in a non-default location. + + + +## Replica promotion and Replica replacement + +Please see the notes on these scenarios in [automated Recovery](automated_recovery.md#recover-from-failed-primary-puppet-server) + +## Known Issue on Puppet Enterprise Version 2021.x + +When running the add_replica plan to replace an existing replica in your infrastructure, the old replica will not be removed as expected. Instead, both the old and new primary replicas will be present. + +This is a known issue and will be fixed in a future release. \ No newline at end of file diff --git a/documentation/expanding.md b/documentation/expanding.md index 9a471576..875e417d 100644 --- a/documentation/expanding.md +++ b/documentation/expanding.md @@ -32,7 +32,7 @@ An external PE-PostgreSQL server is the component which separates the Extra Larg ### Add an external PE-PostgreSQL server in all scenarios - bolt plan run peadm::add_database -t primary_host=:w + bolt plan run peadm::add_database -t primary_host= ## Enable Disaster Recovery and Add a Replica with peadm::add_replica diff --git a/plans/add_replica.pp b/plans/add_replica.pp index 95bfb5e1..98ec78b9 100644 --- a/plans/add_replica.pp +++ b/plans/add_replica.pp @@ -1,16 +1,12 @@ -# This plan is still in development and currently considered experimental. -# -# @api private -# -# @summary Replace a replica host for a Standard or Large architecture. +# @summary Add or replace a replica host. # Supported use cases: -# 1: The existing replica is broken, we have a fresh new VM we want to provision the replica to. +# 1: Adding a replica to an existing primary. +# 2: The existing replica is broken, we have a fresh new VM we want to provision the replica to. # @param primary_host - The hostname and certname of the primary Puppet server # @param replica_host - The hostname and certname of the replica VM # @param replica_postgresql_host - The hostname and certname of the host with the replica PE-PosgreSQL database. -# @param token_file - (optional) the token file in a different location than the default. -# # Can be a separate host in an XL architecture, or undef in Standard or Large. +# @param token_file - (optional) the token file in a different location than the default. plan peadm::add_replica( # Standard or Large Peadm::SingleTargetSpec $primary_host, @@ -90,10 +86,10 @@ } run_plan('peadm::util::update_classification', $primary_target, - server_a_host => $replica_avail_group_letter ? { 'A' => $replica_host, default => undef }, - server_b_host => $replica_avail_group_letter ? { 'B' => $replica_host, default => undef }, - internal_compiler_a_pool_address => $replica_avail_group_letter ? { 'A' => $replica_host, default => undef }, - internal_compiler_b_pool_address => $replica_avail_group_letter ? { 'B' => $replica_host, default => undef }, + server_a_host => $replica_avail_group_letter ? { 'A' => $replica_target.peadm::certname(), default => undef }, + server_b_host => $replica_avail_group_letter ? { 'B' => $replica_target.peadm::certname(), default => undef }, + internal_compiler_a_pool_address => $replica_avail_group_letter ? { 'A' => $replica_target.peadm::certname(), default => undef }, + internal_compiler_b_pool_address => $replica_avail_group_letter ? { 'B' => $replica_target.peadm::certname(), default => undef }, peadm_config => $peadm_config ) diff --git a/spec/acceptance/peadm_spec/plans/add_replica.pp b/spec/acceptance/peadm_spec/plans/add_replica.pp index b2b053d8..7fc0f869 100644 --- a/spec/acceptance/peadm_spec/plans/add_replica.pp +++ b/spec/acceptance/peadm_spec/plans/add_replica.pp @@ -9,7 +9,7 @@ } $primary_host = $t.filter |$n| { $n.vars['role'] == 'primary' } - $replica_host = $t.filter |$n| { $n.vars['role'] == 'replica' } + $replica_host = $t.filter |$n| { $n.vars['role'] == 'spare-replica' } $replica_postgresql_host = $t.filter |$n| { $n.vars['role'] == 'replica-pdb-postgresql' } if $replica_host == [] { diff --git a/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp b/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp index bce84258..aecd8197 100644 --- a/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp +++ b/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp @@ -11,12 +11,24 @@ 'standard-with-dr': { ['primary', 'replica'] } + 'standard-and-spare-replica': { + ['primary', 'spare-replica'] + } + 'standard-with-dr-and-spare-replica': { + ['primary', 'replica', 'spare-replica'] + } 'large': { ['primary', 'compiler'] } 'large-with-dr': { ['primary', 'compiler', 'replica', 'compiler'] } + 'large-and-spare-replica': { + ['primary', 'compiler', 'compiler', 'spare-replica'] + } + 'large-with-dr-and-spare-replica': { + ['primary', 'compiler', 'replica', 'compiler', 'spare-replica'] + } 'extra-large': { ['primary', 'primary-pdb-postgresql', 'compiler'] } @@ -32,6 +44,9 @@ 'extra-large-with-extra-compiler': { ['primary', 'primary-pdb-postgresql', 'compiler', 'unconfigured-compiler'] } + 'extra-large-and-spare-replica': { + ['primary', 'primary-pdb-postgresql', 'compiler', 'compiler', 'spare-replica'] + } 'extra-large-with-dr-and-spare-replica': { ['primary', 'primary-pdb-postgresql', 'compiler', 'replica', 'replica-pdb-postgresql', 'compiler', 'spare-replica'] diff --git a/spec/acceptance/peadm_spec/plans/verify_replica.pp b/spec/acceptance/peadm_spec/plans/verify_replica.pp new file mode 100644 index 00000000..e1f484eb --- /dev/null +++ b/spec/acceptance/peadm_spec/plans/verify_replica.pp @@ -0,0 +1,20 @@ +plan peadm_spec::verify_replica() { + $t = get_targets('*') + wait_until_available($t) + + $primary_host = $t.filter |$n| { $n.vars['role'] == 'primary' } + + if $primary_host == [] { + fail_plan('"primary" role missing from inventory, cannot continue') + } + + $result = run_task('peadm::get_peadm_config', $primary_host, '_catch_errors' => true).first.to_data() + + $replica_host = $result['value']['params']['replica_host'] + + if $replica_host == undef or $replica_host == null { + fail_plan("No replica was found in the PE configuration") + } else { + out::message("Replica added successfully: ${replica_host}") + } +}