Skip to content

Commit

Permalink
Improve dashboard and alarms (#1257)
Browse files Browse the repository at this point in the history
* Add unit and link alarm to dashboard

* Seperate alarms for testnet

* Seperate env for testnet

* Add gitignore

* Remove the Unit

* Remove the unit

* Fix latency config
  • Loading branch information
yrong authored Jul 22, 2024
1 parent 2329fab commit cd18d9d
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 12 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,4 @@ control/target/
web/packages/operations/.env.polkadot
web/packages/operations/.env.rococo
lodestar
.pnpm-store
4 changes: 2 additions & 2 deletions web/packages/api/src/status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ export const bridgeStatusInfo = async (
options = {
polkadotBlockTimeInSeconds: 6,
ethereumBlockTimeInSeconds: 12,
toPolkadotCheckIntervalInBlock: BlockLatencyThreshold.ToEthereum,
toEthereumCheckIntervalInBlock: BlockLatencyThreshold.ToPolkadot,
toPolkadotCheckIntervalInBlock: BlockLatencyThreshold.ToPolkadot,
toEthereumCheckIntervalInBlock: BlockLatencyThreshold.ToEthereum,
}
): Promise<BridgeStatusInfo> => {
// Beefy status
Expand Down
7 changes: 7 additions & 0 deletions web/packages/operations/.env.production
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
NODE_ENV=polkadot_mainnet
REACT_APP_INFURA_KEY=
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_REGION=eu-central-1
BRIDGE_STALE_SNS_TOPIC=arn:aws:sns:eu-central-1:232374692033:PD
ACCOUNT_BALANCE_SNS_TOPIC=arn:aws:sns:eu-central-1:232374692033:PD-WALLET
7 changes: 7 additions & 0 deletions web/packages/operations/.env.testnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
NODE_ENV=rococo_sepolia
REACT_APP_INFURA_KEY=
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_REGION=eu-central-1
BRIDGE_STALE_SNS_TOPIC=arn:aws:sns:eu-central-1:232374692033:Testnet
ACCOUNT_BALANCE_SNS_TOPIC=arn:aws:sns:eu-central-1:232374692033:Testnet
25 changes: 15 additions & 10 deletions web/packages/operations/src/alarm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ const CLOUD_WATCH_NAME_SPACE = "SnowbridgeMetrics"
const BRIDGE_STALE_SNS_TOPIC = process.env["BRIDGE_STALE_SNS_TOPIC"] || ""
const ACCOUNT_BALANCE_SNS_TOPIC = process.env["ACCOUNT_BALANCE_SNS_TOPIC"] || ""

const LatencyDashboard =
"https://eu-central-1.console.aws.amazon.com/cloudwatch/home?region=eu-central-1#dashboards/dashboard/Latency"
const BalanceDashboard =
"https://eu-central-1.console.aws.amazon.com/cloudwatch/home?region=eu-central-1#dashboards/dashboard/Balance"

export const sendMetrics = async (metrics: status.AllMetrics) => {
const { AlarmReason, InsufficientBalanceThreshold } = status
let client = new CloudWatchClient({})
Expand Down Expand Up @@ -227,9 +232,9 @@ export const initializeAlarms = async () => {
// Alarm for stale bridge
cloudWatchAlarms.push(
new PutMetricAlarmCommand({
AlarmName: AlarmReason.BeefyStale.toString(),
AlarmName: AlarmReason.BeefyStale.toString() + "-" + name,
MetricName: AlarmReason.BeefyStale.toString(),
AlarmDescription: AlarmReason.BeefyStale.toString(),
AlarmDescription: LatencyDashboard,
Statistic: "Average",
ComparisonOperator: "GreaterThanThreshold",
AlarmActions: [BRIDGE_STALE_SNS_TOPIC],
Expand All @@ -240,9 +245,9 @@ export const initializeAlarms = async () => {
)
cloudWatchAlarms.push(
new PutMetricAlarmCommand({
AlarmName: AlarmReason.BeaconStale.toString(),
AlarmName: AlarmReason.BeaconStale.toString() + "-" + name,
MetricName: AlarmReason.BeaconStale.toString(),
AlarmDescription: AlarmReason.BeaconStale.toString(),
AlarmDescription: LatencyDashboard,
Statistic: "Average",
ComparisonOperator: "GreaterThanThreshold",
AlarmActions: [BRIDGE_STALE_SNS_TOPIC],
Expand All @@ -253,9 +258,9 @@ export const initializeAlarms = async () => {
)
cloudWatchAlarms.push(
new PutMetricAlarmCommand({
AlarmName: AlarmReason.ToEthereumChannelStale.toString(),
AlarmName: AlarmReason.ToEthereumChannelStale.toString() + "-" + name,
MetricName: AlarmReason.ToEthereumChannelStale.toString(),
AlarmDescription: AlarmReason.ToEthereumChannelStale.toString(),
AlarmDescription: LatencyDashboard,
Statistic: "Average",
ComparisonOperator: "GreaterThanThreshold",
AlarmActions: [BRIDGE_STALE_SNS_TOPIC],
Expand All @@ -266,9 +271,9 @@ export const initializeAlarms = async () => {
)
cloudWatchAlarms.push(
new PutMetricAlarmCommand({
AlarmName: AlarmReason.ToPolkadotChannelStale.toString(),
AlarmName: AlarmReason.ToPolkadotChannelStale.toString() + "-" + name,
MetricName: AlarmReason.ToPolkadotChannelStale.toString(),
AlarmDescription: AlarmReason.ToPolkadotChannelStale.toString(),
AlarmDescription: LatencyDashboard,
Statistic: "Average",
ComparisonOperator: "GreaterThanThreshold",
AlarmActions: [BRIDGE_STALE_SNS_TOPIC],
Expand All @@ -283,9 +288,9 @@ export const initializeAlarms = async () => {

// Alarm for account balance insufficient
let accountBalanceAlarm = new PutMetricAlarmCommand({
AlarmName: AlarmReason.AccountBalanceInsufficient.toString(),
AlarmName: AlarmReason.AccountBalanceInsufficient.toString() + "-" + name,
MetricName: AlarmReason.AccountBalanceInsufficient.toString(),
AlarmDescription: AlarmReason.AccountBalanceInsufficient.toString(),
AlarmDescription: BalanceDashboard,
Statistic: "Average",
ComparisonOperator: "GreaterThanThreshold",
AlarmActions: [ACCOUNT_BALANCE_SNS_TOPIC],
Expand Down

0 comments on commit cd18d9d

Please sign in to comment.