From cd18d9d45c32fd289bb1e9765550d7a967821654 Mon Sep 17 00:00:00 2001 From: Ron Date: Mon, 22 Jul 2024 08:55:49 +0800 Subject: [PATCH] Improve dashboard and alarms (#1257) * Add unit and link alarm to dashboard * Seperate alarms for testnet * Seperate env for testnet * Add gitignore * Remove the Unit * Remove the unit * Fix latency config --- .gitignore | 1 + web/packages/api/src/status.ts | 4 ++-- web/packages/operations/.env.production | 7 +++++++ web/packages/operations/.env.testnet | 7 +++++++ web/packages/operations/src/alarm.ts | 25 +++++++++++++++---------- 5 files changed, 32 insertions(+), 12 deletions(-) create mode 100644 web/packages/operations/.env.production create mode 100644 web/packages/operations/.env.testnet diff --git a/.gitignore b/.gitignore index a4e2d980a3..c30bd0a495 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,4 @@ control/target/ web/packages/operations/.env.polkadot web/packages/operations/.env.rococo lodestar +.pnpm-store diff --git a/web/packages/api/src/status.ts b/web/packages/api/src/status.ts index 1843764f54..d0138983c8 100644 --- a/web/packages/api/src/status.ts +++ b/web/packages/api/src/status.ts @@ -92,8 +92,8 @@ export const bridgeStatusInfo = async ( options = { polkadotBlockTimeInSeconds: 6, ethereumBlockTimeInSeconds: 12, - toPolkadotCheckIntervalInBlock: BlockLatencyThreshold.ToEthereum, - toEthereumCheckIntervalInBlock: BlockLatencyThreshold.ToPolkadot, + toPolkadotCheckIntervalInBlock: BlockLatencyThreshold.ToPolkadot, + toEthereumCheckIntervalInBlock: BlockLatencyThreshold.ToEthereum, } ): Promise => { // Beefy status diff --git a/web/packages/operations/.env.production b/web/packages/operations/.env.production new file mode 100644 index 0000000000..4a38bcd74d --- /dev/null +++ b/web/packages/operations/.env.production @@ -0,0 +1,7 @@ +NODE_ENV=polkadot_mainnet +REACT_APP_INFURA_KEY= +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_REGION=eu-central-1 +BRIDGE_STALE_SNS_TOPIC=arn:aws:sns:eu-central-1:232374692033:PD +ACCOUNT_BALANCE_SNS_TOPIC=arn:aws:sns:eu-central-1:232374692033:PD-WALLET diff --git a/web/packages/operations/.env.testnet b/web/packages/operations/.env.testnet new file mode 100644 index 0000000000..ff37d51178 --- /dev/null +++ b/web/packages/operations/.env.testnet @@ -0,0 +1,7 @@ +NODE_ENV=rococo_sepolia +REACT_APP_INFURA_KEY= +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_REGION=eu-central-1 +BRIDGE_STALE_SNS_TOPIC=arn:aws:sns:eu-central-1:232374692033:Testnet +ACCOUNT_BALANCE_SNS_TOPIC=arn:aws:sns:eu-central-1:232374692033:Testnet diff --git a/web/packages/operations/src/alarm.ts b/web/packages/operations/src/alarm.ts index 4854daa484..144d157a99 100644 --- a/web/packages/operations/src/alarm.ts +++ b/web/packages/operations/src/alarm.ts @@ -9,6 +9,11 @@ const CLOUD_WATCH_NAME_SPACE = "SnowbridgeMetrics" const BRIDGE_STALE_SNS_TOPIC = process.env["BRIDGE_STALE_SNS_TOPIC"] || "" const ACCOUNT_BALANCE_SNS_TOPIC = process.env["ACCOUNT_BALANCE_SNS_TOPIC"] || "" +const LatencyDashboard = + "https://eu-central-1.console.aws.amazon.com/cloudwatch/home?region=eu-central-1#dashboards/dashboard/Latency" +const BalanceDashboard = + "https://eu-central-1.console.aws.amazon.com/cloudwatch/home?region=eu-central-1#dashboards/dashboard/Balance" + export const sendMetrics = async (metrics: status.AllMetrics) => { const { AlarmReason, InsufficientBalanceThreshold } = status let client = new CloudWatchClient({}) @@ -227,9 +232,9 @@ export const initializeAlarms = async () => { // Alarm for stale bridge cloudWatchAlarms.push( new PutMetricAlarmCommand({ - AlarmName: AlarmReason.BeefyStale.toString(), + AlarmName: AlarmReason.BeefyStale.toString() + "-" + name, MetricName: AlarmReason.BeefyStale.toString(), - AlarmDescription: AlarmReason.BeefyStale.toString(), + AlarmDescription: LatencyDashboard, Statistic: "Average", ComparisonOperator: "GreaterThanThreshold", AlarmActions: [BRIDGE_STALE_SNS_TOPIC], @@ -240,9 +245,9 @@ export const initializeAlarms = async () => { ) cloudWatchAlarms.push( new PutMetricAlarmCommand({ - AlarmName: AlarmReason.BeaconStale.toString(), + AlarmName: AlarmReason.BeaconStale.toString() + "-" + name, MetricName: AlarmReason.BeaconStale.toString(), - AlarmDescription: AlarmReason.BeaconStale.toString(), + AlarmDescription: LatencyDashboard, Statistic: "Average", ComparisonOperator: "GreaterThanThreshold", AlarmActions: [BRIDGE_STALE_SNS_TOPIC], @@ -253,9 +258,9 @@ export const initializeAlarms = async () => { ) cloudWatchAlarms.push( new PutMetricAlarmCommand({ - AlarmName: AlarmReason.ToEthereumChannelStale.toString(), + AlarmName: AlarmReason.ToEthereumChannelStale.toString() + "-" + name, MetricName: AlarmReason.ToEthereumChannelStale.toString(), - AlarmDescription: AlarmReason.ToEthereumChannelStale.toString(), + AlarmDescription: LatencyDashboard, Statistic: "Average", ComparisonOperator: "GreaterThanThreshold", AlarmActions: [BRIDGE_STALE_SNS_TOPIC], @@ -266,9 +271,9 @@ export const initializeAlarms = async () => { ) cloudWatchAlarms.push( new PutMetricAlarmCommand({ - AlarmName: AlarmReason.ToPolkadotChannelStale.toString(), + AlarmName: AlarmReason.ToPolkadotChannelStale.toString() + "-" + name, MetricName: AlarmReason.ToPolkadotChannelStale.toString(), - AlarmDescription: AlarmReason.ToPolkadotChannelStale.toString(), + AlarmDescription: LatencyDashboard, Statistic: "Average", ComparisonOperator: "GreaterThanThreshold", AlarmActions: [BRIDGE_STALE_SNS_TOPIC], @@ -283,9 +288,9 @@ export const initializeAlarms = async () => { // Alarm for account balance insufficient let accountBalanceAlarm = new PutMetricAlarmCommand({ - AlarmName: AlarmReason.AccountBalanceInsufficient.toString(), + AlarmName: AlarmReason.AccountBalanceInsufficient.toString() + "-" + name, MetricName: AlarmReason.AccountBalanceInsufficient.toString(), - AlarmDescription: AlarmReason.AccountBalanceInsufficient.toString(), + AlarmDescription: BalanceDashboard, Statistic: "Average", ComparisonOperator: "GreaterThanThreshold", AlarmActions: [ACCOUNT_BALANCE_SNS_TOPIC],