Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Github Automation App to monitor the critical workflows #82

Merged
merged 1 commit into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ So you want to contribute code to this project? Excellent! We're glad you're her
- `cdk deploy OpenSearchMetricsNginxReadonly`: To deploy the dashboard read only setup.
- `cdk deploy OpenSearchWAF`: To deploy the AWS WAF for the project ALB's.
- `cdk deploy OpenSearchMetrics-Monitoring`: To deploy the alerting stack which will monitor the step functions and URL of the project coming from [METRICS_HOSTED_ZONE](https://github.com/opensearch-project/opensearch-metrics/blob/main/infrastructure/lib/enums/project.ts)
- `cdk deploy OpenSearchMetrics-GitHubAutomationApp-Secret`: Creates the GitHub app secret which will be used during the GitHub app runtime.
peterzhuamazon marked this conversation as resolved.
Show resolved Hide resolved
- `cdk deploy OpenSearchMetrics-GitHubWorkflowMonitor-Alarms`: Creates the Alarms to Monitor the Critical GitHub CI workflows by the GitHub Automation App.
- `cdk deploy OpenSearchMetrics-GitHubAutomationApp`: Create the resources which launches the [GitHub Automation App](https://github.com/opensearch-project/automation-app). Listens to GitHub events and index the data to Metrics cluster.

### Forking and Cloning
Expand Down
17 changes: 16 additions & 1 deletion infrastructure/lib/infrastructure-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import { OpenSearchMetricsNginxCognito } from "./constructs/opensearchNginxProxy
import { OpenSearchMetricsMonitoringStack } from "./stacks/monitoringDashboard";
import { OpenSearchMetricsSecretsStack } from "./stacks/secrets";
import { GitHubAutomationApp } from "./stacks/gitHubAutomationApp";
import { GitHubWorkflowMonitorAlarms } from "./stacks/gitHubWorkflowMonitorAlarms";

export class InfrastructureStack extends Stack {
constructor(scope: Construct, id: string, props?: StackProps) {
Expand All @@ -29,20 +30,34 @@ export class InfrastructureStack extends Stack {
// Create VPC for the entire setup
const vpcStack = new VpcStack(app, "OpenSearchHealth-VPC", {});


// Create secret related to GitHub Automation App
const openSearchMetricsGitHubAutomationAppSecretStack = new OpenSearchMetricsSecretsStack(app, "OpenSearchMetrics-GitHubAutomationApp-Secret", {
secretName: 'opensearch-project-github-automation-app-creds'
});


// Alarms to Monitor the Critical GitHub CI workflows by the GitHub Automation App
const gitHubWorkflowMonitorAlarms = new GitHubWorkflowMonitorAlarms(app, "OpenSearchMetrics-GitHubWorkflowMonitor-Alarms", {
namespace: 'GitHubActions',
metricName: 'WorkflowRunFailures',
workflows: [
'Publish snapshots to maven',
'Run performance benchmark on pull request',
],
});

// Create resources to launch the GitHub Automation App
const gitHubAutomationApp = new GitHubAutomationApp(app, "OpenSearchMetrics-GitHubAutomationApp", {
vpc: vpcStack.vpc,
region: Project.REGION,
account: Project.AWS_ACCOUNT,
ami: Project.EC2_AMI_SSM.toString(),
secret: openSearchMetricsGitHubAutomationAppSecretStack.secret
secret: openSearchMetricsGitHubAutomationAppSecretStack.secret,
workflowAlarmsArn: gitHubWorkflowMonitorAlarms.workflowAlarmsArn
})


// Create OpenSearch Domain, roles, permissions, cognito setup, cross account OpenSearch access for jenkins
const openSearchDomainStack = new OpenSearchDomainStack(app, "OpenSearchHealth-OpenSearch", {
region: Project.REGION,
Expand Down
29 changes: 26 additions & 3 deletions infrastructure/lib/stacks/gitHubAutomationApp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ export interface GitHubAppProps {
readonly account: string;
readonly ami?: string
readonly secret: Secret;
readonly workflowAlarmsArn: string[];
}


Expand All @@ -52,7 +53,7 @@ export class GitHubAutomationApp extends Stack {
constructor(scope: Construct, id: string, props: GitHubAppProps) {
super(scope, id);

const instanceRole = this.createInstanceRole(props.secret.secretArn, props.account);
const instanceRole = this.createInstanceRole(props.secret.secretArn, props.account, props.workflowAlarmsArn);
this.githubAppRole = instanceRole;

this.asg = new AutoScalingGroup(this, 'OpenSearchMetrics-GitHubAutomationApp-Asg', {
Expand Down Expand Up @@ -91,7 +92,7 @@ export class GitHubAutomationApp extends Stack {
this.asg.addUserData(...this.getUserData(props.secret.secretName));
}

private createInstanceRole(secretArn: string, account: string): Role {
private createInstanceRole(secretArn: string, account: string, alarmsArn: string[]): Role {
const role = new Role(this, "OpenSearchMetrics-GitHubAutomationApp-Role", {
assumedBy: new CompositePrincipal(
new ServicePrincipal('ec2.amazonaws.com'),
Expand All @@ -114,6 +115,28 @@ export class GitHubAutomationApp extends Stack {
resources: [role.roleArn],
}),
);
role.addToPolicy(
new PolicyStatement({
effect: Effect.ALLOW,
actions: [
"cloudwatch:PutMetricAlarm",
"cloudwatch:DescribeAlarms",
"cloudwatch:SetAlarmState",
"cloudwatch:PutMetricData"
],
resources: alarmsArn,
}),
);
role.addToPolicy(
new PolicyStatement({
effect: Effect.ALLOW,
actions: [
"cloudwatch:PutMetricData",
],
resources: ["*"],

}),
);
return role;
}

Expand All @@ -127,7 +150,7 @@ export class GitHubAutomationApp extends Stack {
'sudo systemctl start docker',
'sudo curl -L https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/sbin/docker-compose',
'sudo chmod a+x /usr/local/sbin/docker-compose',
'git clone https://github.com/opensearch-project/automation-app.git',
'git clone https://github.com/opensearch-project/automation-app.git --branch 0.1.7',
`aws secretsmanager get-secret-value --secret-id ${secretName} --query SecretString --output text >> automation-app/.env`,
'cd automation-app/docker',
'PORT=8080 RESOURCE_CONFIG=configs/resources/opensearch-project-resource.yml OPERATION_CONFIG=configs/operations/github-merged-pulls-monitor.yml docker-compose -p github-merged-pulls-monitor up -d',
Expand Down
48 changes: 48 additions & 0 deletions infrastructure/lib/stacks/gitHubWorkflowMonitorAlarms.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/**
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

import {Duration, Stack} from "aws-cdk-lib";
import {Construct} from "constructs";
import {Alarm, ComparisonOperator, Metric, TreatMissingData} from "aws-cdk-lib/aws-cloudwatch";

export interface AlarmProps {
readonly namespace: string;
readonly metricName: string;
readonly workflows: string[];
}

export class GitHubWorkflowMonitorAlarms extends Stack {
readonly workflowAlarmsArn: string[] = [];
readonly metricName: string
prudhvigodithi marked this conversation as resolved.
Show resolved Hide resolved
constructor(scope: Construct, id: string, props: AlarmProps) {
super(scope, id);
props.workflows.forEach(workflow => {
const dimensionValue = workflow;
const workflowMetric = new Metric({
namespace: props.namespace,
metricName: props.metricName,
prudhvigodithi marked this conversation as resolved.
Show resolved Hide resolved
dimensionsMap: {
Workflow: dimensionValue,
},
period: Duration.minutes(5),
statistic: 'Sum',
});

const alarm = new Alarm (this, `OpenSearchMetrics-GitHubApp-${dimensionValue.replace(/\s+/g, '')}-FailuresAlarm`, {
alarmName: `OpenSearchMetrics-GitHubApp-${dimensionValue.replace(/\s+/g, '')}-FailuresAlarm`,
metric: workflowMetric,
threshold: 2,
evaluationPeriods: 1,
comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
alarmDescription: `Alarm for ${workflow} failures`,
actionsEnabled: true,
});
this.workflowAlarmsArn.push(alarm.alarmArn)
});
}
}
27 changes: 26 additions & 1 deletion infrastructure/test/gitHubAutomationApp-stack.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import Project from "../lib/enums/project";
import { VpcStack } from "../lib/stacks/vpc";
import { GitHubAutomationApp } from "../lib/stacks/gitHubAutomationApp";
import { OpenSearchMetricsSecretsStack } from "../lib/stacks/secrets";
import {GitHubWorkflowMonitorAlarms} from "../lib/stacks/gitHubWorkflowMonitorAlarms";


test('OpenSearch GitHub App Stack test ', () => {
Expand All @@ -20,12 +21,22 @@ test('OpenSearch GitHub App Stack test ', () => {
const openSearchMetricsGitHubAppSecretStack = new OpenSearchMetricsSecretsStack(app, "Test-OpenSearchMetrics-GitHubAutomationApp-Secret", {
secretName: 'test-github-app-creds'
});

const gitHubWorkflowMonitorAlarms = new GitHubWorkflowMonitorAlarms(app, "Test-OpenSearchMetrics-GitHubWorkflowMonitor-Alarms", {
namespace: 'GitHubActions',
metricName: 'WorkflowRunFailures',
workflows: [
'Publish snapshots to maven',
'Run performance benchmark on pull request',
],
});
const gitHubApp = new GitHubAutomationApp(app, "Test-OpenSearchMetrics-GitHubAutomationApp", {
vpc: vpcStack.vpc,
region: Project.REGION,
account: Project.AWS_ACCOUNT,
ami: Project.EC2_AMI_SSM.toString(),
secret: openSearchMetricsGitHubAppSecretStack.secret
secret: openSearchMetricsGitHubAppSecretStack.secret,
workflowAlarmsArn: gitHubWorkflowMonitorAlarms.workflowAlarmsArn
});

const template = Template.fromStack(gitHubApp);
Expand Down Expand Up @@ -65,4 +76,18 @@ test('OpenSearch GitHub App Stack test ', () => {
MaxSize: "1",
MinSize: "1",
})

// IAM Policy Test
template.resourceCountIs('AWS::IAM::Policy', 1);
template.hasResourceProperties('AWS::IAM::Policy', {
PolicyDocument: {
Statement: Match.arrayWith([
Match.objectLike({
Action: "cloudwatch:PutMetricData",
Effect: "Allow",
Resource: "*"
})
])
}
});
});
59 changes: 59 additions & 0 deletions infrastructure/test/githubWorkflowMonitorAlarms-stack.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/**
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

import {App} from "aws-cdk-lib";
import {GitHubWorkflowMonitorAlarms} from "../lib/stacks/gitHubWorkflowMonitorAlarms";
import {Match, Template} from "aws-cdk-lib/assertions";

test('OpenSearch Workflow Monitor Alarms test ', () => {
const app = new App();

const gitHubWorkflowMonitorAlarms = new GitHubWorkflowMonitorAlarms(app, "Test-OpenSearchMetrics-GitHubWorkflowMonitor-Alarms", {
namespace: 'GitHubActions',
metricName: 'WorkflowRunFailures',
workflows: [
'Publish snapshots to Apache Maven repositories',
'Publish snapshots to maven',
'Run performance benchmark on pull request',
],
});

const template = Template.fromStack(gitHubWorkflowMonitorAlarms);


template.hasResourceProperties('AWS::CloudWatch::Alarm', {
AlarmName: 'OpenSearchMetrics-GitHubApp-Publishsnapshotstomaven-FailuresAlarm',
Namespace: 'GitHubActions',
MetricName: 'WorkflowRunFailures',
Dimensions: Match.arrayWith([{
Name: 'Workflow',
Value: 'Publish snapshots to maven'
}]),
ComparisonOperator: 'GreaterThanOrEqualToThreshold',
EvaluationPeriods: 1,
Threshold: 2,
Period: 300,
Statistic: 'Sum'
});

template.hasResourceProperties('AWS::CloudWatch::Alarm', {
AlarmName: 'OpenSearchMetrics-GitHubApp-Runperformancebenchmarkonpullrequest-FailuresAlarm',
Namespace: 'GitHubActions',
MetricName: 'WorkflowRunFailures',
Dimensions: Match.arrayWith([{
Name: 'Workflow',
Value: 'Run performance benchmark on pull request'
}]),
ComparisonOperator: 'GreaterThanOrEqualToThreshold',
EvaluationPeriods: 1,
Threshold: 2,
Period: 300,
Statistic: 'Sum'
});

});
Loading