Skip to content

Commit

Permalink
Change all master occurrences for primary
Browse files Browse the repository at this point in the history
  • Loading branch information
amandahla committed Feb 21, 2024
1 parent c9ff640 commit 5aa3773
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 76 deletions.
54 changes: 27 additions & 27 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def _upgrade_charm(self, event: UpgradeCharmEvent) -> None:
self._store_certificates()

# NOTE: This is the case of a single unit deployment. If that's the case, the charm
# doesn't need to check for failovers or figure out who the master is.
# doesn't need to check for failovers or figure out who the primary is.
if not self._peers.units:
# NOTE: pod restart or charm upgrade can come along with pod IP changes, and
# during those process, the leader-elected and any relation events are not emitted.
Expand All @@ -126,9 +126,9 @@ def _upgrade_charm(self, event: UpgradeCharmEvent) -> None:
# unit is not a leader, add a key to the application databag so peer_relation_changed
# triggers for the leader unit and application databag is updated.
if self.unit.is_leader():
info = self.sentinel.get_master_info(host=k8s_host)
logger.debug(f"Master info: {info}")
logger.info(f"Unit {self.unit.name} updating master info to {info['ip']}")
info = self.sentinel.get_primary_info(host=k8s_host)
logger.debug(f"primary info: {info}")
logger.info(f"Unit {self.unit.name} updating primary info to {info['ip']}")
self._peers.data[self.app][LEADER_HOST_KEY] = info["ip"]
else:
relations = self.model.relations[REDIS_REL_NAME]
Expand All @@ -152,15 +152,15 @@ def _leader_elected(self, event) -> None:
self._peers.data[self.app][SENTINEL_PASSWORD_KEY] = self._generate_password()
# NOTE: if current_primary is not set yet, the application is being deployed for the
# first time. Otherwise, we check for failover in case previous juju leader was redis
# master as well.
# primary as well.
if self.current_primary is None:
logger.info(
"Initial replication, setting leader-host to {}".format(self.unit_pod_hostname)
)
self._peers.data[self.app][LEADER_HOST_KEY] = self.unit_pod_hostname
else:
# TODO extract to method shared with relation_departed
self._update_application_master()
self._update_application_primary()
self._update_quorum()
if not self._is_failover_finished():
logger.info("Failover didn't finish, deferring")
Expand Down Expand Up @@ -199,21 +199,21 @@ def _update_status(self, _) -> None:
"""
logger.info("Beginning update_status")
if self.unit.is_leader():
self._update_application_master()
self._update_application_primary()
self._redis_check()

def _peer_relation_changed(self, event):
"""Handle relation for joining units."""
if not self._master_up_to_date():
logger.error(f"Unit {self.unit.name} doesn't agree on tracked master")
if not self._primary_up_to_date():
logger.error(f"Unit {self.unit.name} doesn't agree on tracked primary")
if not self._is_failover_finished():
logger.info("Failover didn't finish, deferring")
event.defer()
return

if self.unit.is_leader():
# Update who the current master is
self._update_application_master()
# Update who the current primary is
self._update_application_primary()

# (DEPRECATE) If legacy relation exists, layer might need to be
# reconfigured to remove auth
Expand Down Expand Up @@ -247,8 +247,8 @@ def _peer_relation_departed(self, event):
if not self.unit.is_leader():
return

if not self._master_up_to_date():
self._update_application_master()
if not self._primary_up_to_date():
self._update_application_primary()

# Quorum is updated beforehand, since removal of more units than current majority
# could lead to the cluster never reaching quorum.
Expand Down Expand Up @@ -352,7 +352,7 @@ def _redis_extra_flags(self) -> str:
extra_flags = [
f"--requirepass {self._get_password()}",
"--bind 0.0.0.0",
f"--masterauth {self._get_password()}",
f"--primaryauth {self._get_password()}",
f"--replica-announce-ip {self.unit_pod_hostname}",
]

Expand All @@ -376,7 +376,7 @@ def _redis_extra_flags(self) -> str:
f"--tls-ca-cert-file {self._storage_path}/ca.crt",
]

# Check that current unit is master
# Check that current unit is primary
if self.current_primary != self.unit_pod_hostname:
extra_flags += [f"--replicaof {self.current_primary} {REDIS_PORT}"]

Expand Down Expand Up @@ -456,7 +456,7 @@ def unit_pod_hostname(self, name="") -> str:

@property
def current_primary(self) -> Optional[str]:
"""Get the current master."""
"""Get the current primary."""
return self._peers.data[self.app].get(LEADER_HOST_KEY)

def _valid_app_databag(self) -> bool:
Expand Down Expand Up @@ -574,33 +574,33 @@ def _redis_client(self, hostname="localhost") -> Redis:
finally:
client.close()

def _master_up_to_date(self, host="0.0.0.0") -> bool:
"""Check if stored master is the same as sentinel tracked.
def _primary_up_to_date(self, host="0.0.0.0") -> bool:
"""Check if stored primary is the same as sentinel tracked.
Returns:
host: string to connect to sentinel
"""
info = self.sentinel.get_master_info(host=host)
info = self.sentinel.get_primary_info(host=host)
if info is None:
return False
elif (info["ip"] == self.current_primary) and ("s_down" not in info["flags"]):
return True

return False

def _update_application_master(self) -> None:
"""Use Sentinel to update the current master hostname."""
info = self.sentinel.get_master_info()
logger.debug(f"Master info: {info}")
def _update_application_primary(self) -> None:
"""Use Sentinel to update the current primary hostname."""
info = self.sentinel.get_primary_info()
logger.debug(f"primary info: {info}")
if info is None:
logger.warning("Could not update current master")
logger.warning("Could not update current primary")
return

logger.info(f"Unit {self.unit.name} updating master info to {info['ip']}")
logger.info(f"Unit {self.unit.name} updating primary info to {info['ip']}")
self._peers.data[self.app][LEADER_HOST_KEY] = info["ip"]

def _sentinel_failover(self, departing_unit_name: str) -> None:
"""Try to failover the current master.
"""Try to failover the current primary.
This method should only be called from juju leader, to avoid more than one
sentinel sending failovers concurrently.
Expand Down Expand Up @@ -628,7 +628,7 @@ def _is_failover_finished(self, host="localhost") -> bool:
True if failover is finished, false otherwise
"""
logger.warning("Checking if failover is finished.")
info = self.sentinel.get_master_info(host=host)
info = self.sentinel.get_primary_info(host=host)
if info is None:
logger.warning("Could not check failover status")
return False
Expand Down
18 changes: 9 additions & 9 deletions src/sentinel.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,12 @@ def _render_sentinel_config_file(self) -> None:
# render the template file with the correct values.
rendered = template.render(
hostname=self.charm.unit_pod_hostname,
master_name=self.charm._name,
primary_name=self.charm._name,
sentinel_port=SENTINEL_PORT,
redis_master=self.charm.current_primary,
redis_primary=self.charm.current_primary,
redis_port=REDIS_PORT,
quorum=self.expected_quorum,
master_password=self.charm._get_password(),
primary_password=self.charm._get_password(),
sentinel_password=self.charm.get_sentinel_password(),
)
self._copy_file(SENTINEL_CONFIG_PATH, rendered, "sentinel")
Expand Down Expand Up @@ -147,17 +147,17 @@ def _copy_file(self, path: str, rendered: str, container: str) -> None:
group="redis",
)

def get_master_info(self, host="0.0.0.0") -> Optional[dict]:
"""Connect to sentinel and return the current master."""
def get_primary_info(self, host="0.0.0.0") -> Optional[dict]:
"""Connect to sentinel and return the current primary."""
with self.sentinel_client(host) as sentinel:
try:
# get sentinel info about the master
master_info = sentinel.execute_command(f"SENTINEL MASTER {self.charm._name}")
# get sentinel info about the primary
primary_info = sentinel.execute_command(f"SENTINEL primary {self.charm._name}")

# NOTE: master info from redis comes like a list:
# NOTE: primary info from redis comes like a list:
# ['key1', 'value1', 'key2', 'value2', ...]
# this creates a dictionary in a more readable form.
return dict(zip(master_info[::2], master_info[1::2]))
return dict(zip(primary_info[::2], primary_info[1::2]))

except (ConnectionError, TimeoutError) as e:
logger.error("Error when connecting to sentinel: {}".format(e))
Expand Down
12 changes: 6 additions & 6 deletions templates/sentinel.conf.j2
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
port {{ sentinel_port }}
sentinel monitor {{ master_name }} {{ redis_master }} {{ redis_port }} {{ quorum }}
sentinel down-after-milliseconds {{ master_name }} 5000
sentinel failover-timeout {{ master_name }} 30000
sentinel parallel-syncs {{ master_name }} 1
sentinel monitor {{ primary_name }} {{ redis_primary }} {{ redis_port }} {{ quorum }}
sentinel down-after-milliseconds {{ primary_name }} 5000
sentinel failover-timeout {{ primary_name }} 30000
sentinel parallel-syncs {{ primary_name }} 1

{% if master_password != None %}
sentinel auth-pass {{ master_name }} {{ master_password }}
{% if primary_password != None %}
sentinel auth-pass {{ primary_name }} {{ primary_password }}
{% endif %}
requirepass {{ sentinel_password }}

Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ async def test_sentinels_expected(ops_test: OpsTest):
logger.info("retrieved sentinel password for %s: %s", APP_NAME, password)

sentinel = Redis(address, password=password, port=26379)
sentinels_connected = sentinel.info("sentinel")["master0"]["sentinels"]
sentinels_connected = sentinel.info("sentinel")["primary0"]["sentinels"]

assert sentinels_connected == NUM_UNITS

Expand Down
40 changes: 20 additions & 20 deletions tests/integration/test_scaling.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ async def test_build_and_deploy(ops_test: OpsTest):
assert ops_test.model.applications[APP_NAME].units[0].workload_status == "active"


@pytest.mark.run(before="test_scale_down_departing_master")
@pytest.mark.run(before="test_scale_down_departing_primary")
async def test_scale_up_replication_after_failover(ops_test: OpsTest):
"""Trigger a failover and scale up the application, then test replication status."""
unit_map = await get_unit_map(ops_test)
Expand All @@ -60,22 +60,22 @@ async def test_scale_up_replication_after_failover(ops_test: OpsTest):
leader_address = await get_address(ops_test, unit_num=leader_num)
password = await get_password(ops_test, leader_num)

# Set some key on the master replica.
# Set some key on the primary replica.
leader_client = Redis(leader_address, password=password)
leader_client.set("testKey", "myValue")
leader_client.close()

sentinel_password = await get_sentinel_password(ops_test)
logger.info("retrieved sentinel password for %s: %s", APP_NAME, password)

# Trigger a master failover
# Trigger a primary failover
sentinel = Redis(leader_address, password=sentinel_password, port=26379, decode_responses=True)
sentinel.execute_command(f"SENTINEL failover {APP_NAME}")
# Give time so sentinel updates information of failover
time.sleep(60)

await ops_test.model.block_until(
lambda: "failover-status" not in sentinel.execute_command(f"SENTINEL MASTER {APP_NAME}"),
lambda: "failover-status" not in sentinel.execute_command(f"SENTINEL primary {APP_NAME}"),
timeout=60,
)

Expand All @@ -94,13 +94,13 @@ async def test_scale_up_replication_after_failover(ops_test: OpsTest):
timeout=1000,
)

master_info = sentinel.execute_command(f"SENTINEL MASTER {APP_NAME}")
master_info = dict(zip(master_info[::2], master_info[1::2]))
primary_info = sentinel.execute_command(f"SENTINEL primary {APP_NAME}")
primary_info = dict(zip(primary_info[::2], primary_info[1::2]))

# General checks that the system is aware of the new unit
assert master_info["num-slaves"] == "3"
assert master_info["quorum"] == "3"
assert master_info["num-other-sentinels"] == "3"
assert primary_info["num-slaves"] == "3"
assert primary_info["quorum"] == "3"
assert primary_info["num-other-sentinels"] == "3"

unit_map = await get_unit_map(ops_test)
# Check that the initial key is still replicated across units
Expand All @@ -112,7 +112,7 @@ async def test_scale_up_replication_after_failover(ops_test: OpsTest):


@pytest.mark.run(after="test_scale_up_replication_after_failover")
async def test_scale_down_departing_master(ops_test: OpsTest):
async def test_scale_down_departing_primary(ops_test: OpsTest):
"""Failover to the last unit and scale down."""
unit_map = await get_unit_map(ops_test)
logger.info("Unit mapping: {}".format(unit_map))
Expand All @@ -130,22 +130,22 @@ async def test_scale_down_departing_master(ops_test: OpsTest):
last_redis = Redis(last_address, password=password, decode_responses=True)

# INITIAL SETUP #
# Sanity check that the added unit on the previous test is not a master
assert last_redis.execute_command("ROLE")[0] != "master"
# Sanity check that the added unit on the previous test is not a primary
assert last_redis.execute_command("ROLE")[0] != "primary"

# Make the added unit a priority during failover
last_redis.execute_command("CONFIG SET replica-priority 1")
time.sleep(1)
# Failover so the last unit becomes master
# Failover so the last unit becomes primary
sentinel.execute_command(f"SENTINEL FAILOVER {APP_NAME}")
# Give time so sentinel updates information of failover
time.sleep(60)

await ops_test.model.block_until(
lambda: "failover-status" not in sentinel.execute_command(f"SENTINEL MASTER {APP_NAME}"),
lambda: "failover-status" not in sentinel.execute_command(f"SENTINEL primary {APP_NAME}"),
timeout=60,
)
assert last_redis.execute_command("ROLE")[0] == "master"
assert last_redis.execute_command("ROLE")[0] == "primary"
last_redis.close()

# SCALE DOWN #
Expand All @@ -158,12 +158,12 @@ async def test_scale_down_departing_master(ops_test: OpsTest):
assert client.get("testKey") == b"myValue"
client.close()

master_info = sentinel.execute_command(f"SENTINEL MASTER {APP_NAME}")
master_info = dict(zip(master_info[::2], master_info[1::2]))
primary_info = sentinel.execute_command(f"SENTINEL primary {APP_NAME}")
primary_info = dict(zip(primary_info[::2], primary_info[1::2]))

# General checks that the system is reconfigured after departed leader
assert master_info["num-slaves"] == "2"
assert master_info["quorum"] == "2"
assert master_info["num-other-sentinels"] == "2"
assert primary_info["num-slaves"] == "2"
assert primary_info["quorum"] == "2"
assert primary_info["num-other-sentinels"] == "2"

sentinel.close()
Loading

0 comments on commit 5aa3773

Please sign in to comment.