From 5046862629ddde23491f67013c07961c5b240b27 Mon Sep 17 00:00:00 2001 From: Guilherme Nogueira Date: Tue, 29 Nov 2022 16:25:45 -0300 Subject: [PATCH 01/41] Update AWS CloudFormation template to include statements about stack deployment time and discouraging AWS root user usage. --- aws/cloudformation/scylla.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/aws/cloudformation/scylla.yaml b/aws/cloudformation/scylla.yaml index a4b9ce51..b1292c2c 100644 --- a/aws/cloudformation/scylla.yaml +++ b/aws/cloudformation/scylla.yaml @@ -7,6 +7,10 @@ Description: >- Use `SGAdmin` security group to enable access from outside to this cluster By default only SSH port is out to the outside world. + The deployment should take a couple of minutes, usually less than 10 minutes. + + You do not need AWS account root user for this deployment and you should avoid using it for such. + Caution: password authentication isn't enabled by default Metadata: From e60ff7c14df5e4fc4f4206368765cc3939e0297a Mon Sep 17 00:00:00 2001 From: Beni Peled Date: Wed, 30 Nov 2022 09:53:22 +0200 Subject: [PATCH 02/41] Update AWS CloudFormation template to include statements about stack deployment time and discouraging AWS root user usage Ref #416 --- aws/cloudformation/scylla.yaml.j2 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/aws/cloudformation/scylla.yaml.j2 b/aws/cloudformation/scylla.yaml.j2 index a215b04c..87aa6fc4 100644 --- a/aws/cloudformation/scylla.yaml.j2 +++ b/aws/cloudformation/scylla.yaml.j2 @@ -7,6 +7,9 @@ Description: >- Use `SGAdmin` security group to enable access from outside to this cluster By default only SSH port is out to the outside world. + The deployment should take a couple of minutes, usually less than 10 minutes. + You do not need AWS account root user for this deployment and you should avoid using it for such. + NOTE: the cluster password for the default user (cassandra) is the instance-id of the first node, therefore connecting to the cluster should be something like `cqlsh -u cassandra -p i-00a9d141da09ba159`. From 159c0eee5a54bbdfb2bd74914d974d6a84feb368 Mon Sep 17 00:00:00 2001 From: Beni Peled Date: Wed, 30 Nov 2022 09:58:18 +0200 Subject: [PATCH 03/41] aws: remove scylla.yaml file The scylla.yaml file generated by scylla.yaml.j2 and there is no tracking it on this repo --- aws/cloudformation/scylla.yaml | 1063 -------------------------------- 1 file changed, 1063 deletions(-) delete mode 100644 aws/cloudformation/scylla.yaml diff --git a/aws/cloudformation/scylla.yaml b/aws/cloudformation/scylla.yaml deleted file mode 100644 index b1292c2c..00000000 --- a/aws/cloudformation/scylla.yaml +++ /dev/null @@ -1,1063 +0,0 @@ - -AWSTemplateFormatVersion: 2010-09-09 -Description: >- - AWS CloudFormation Scylla Sample Template: This would create a new Scylla Cluster - Including it's own VPC and subnet, Elastic IPs are used for accessing those node publicly - - Use `SGAdmin` security group to enable access from outside to this cluster - By default only SSH port is out to the outside world. - - The deployment should take a couple of minutes, usually less than 10 minutes. - - You do not need AWS account root user for this deployment and you should avoid using it for such. - - Caution: password authentication isn't enabled by default - -Metadata: - AWS::CloudFormation::Interface: - ParameterGroups: - - Label: - default: Scylla Parameters - Parameters: - - ScyllaClusterName - - ScyllaAmi - - ScyllaSeedIPs - - Label: - default: AWS Parameters - Parameters: - - InstanceType - - InstanceCount - - AvailabilityZone - - CIDR - - EnablePublicAccess - - PublicAccessCIDR - - KeyName - ParameterLabels: - ScyllaClusterName: - default: Scylla cluster name - ScyllaAmi: - default: Scylla AMI ID - ScyllaSeedIPs: - default: Scylla seed nodes IPs - InstanceType: - default: EC2 instance type - InstanceCount: - default: Number of Scylla nodes (EC2 Instances) - AvailabilityZone: - default: Availability Zone - CIDR: - default: CIDR block for Scylla VPC - EnablePublicAccess: - default: Allow public access (SSH) - PublicAccessCIDR: - default: Allowed subnet for public access (SSH) - -Parameters: - ScyllaClusterName: - Type: String - - PublicAccessCIDR: - Type: String - Description: | - The IP address range that can be used to SSH to the EC2 instances - (x.x.x.x/32 for specific IP, 0.0.0.0/0 to allow all IP addresses) - Default: 0.0.0.0/0 - - InstanceCount: - Description: Must be between 1 and 10 - Type: String - Default: 1 - AllowedValues: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - ConstraintDescription: Must be a number between 1 and 10. - - EnablePublicAccess: - Description: Sef true to enable public access to the Scylla cluster nodes - Type: String - AllowedValues: - - 'false' - - 'true' - Default: 'false' - - ScyllaAmi: - Type: 'AWS::EC2::Image::Id' - ConstraintDescription: Enter a valid Scylla AMI ID for your selected region - - InstanceType: - Type: String - Default: i3.large - AllowedValues: - - i3.large - - i3.xlarge - - i3.2xlarge - - i3.4xlarge - - i3.8xlarge - - i3.16xlarge - - i3.metal - - i3en.large - - i3en.xlarge - - i3en.2xlarge - - i3en.3xlarge - - i3en.6xlarge - - i3en.12xlarge - - i3en.24xlarge - - i3en.metal - ConstraintDescription: must be a valid EC2 instance type. - - AvailabilityZone: - Type: 'AWS::EC2::AvailabilityZone::Name' - ConstraintDescription: must be the name of available AvailabilityZone. - - KeyName: - Description: Name of an existing EC2 KeyPair to enable SSH access to the instances - Type: 'AWS::EC2::KeyPair::KeyName' - ConstraintDescription: must be the name of an existing EC2 KeyPair. - - CIDR: - Description: | - Currently supports 8, 16, or 24 netmask. - The node IPs will be x.x.x.10, x.x.x.11, x.x.x.12 etc. - Type: 'String' - Default: '172.31.0.0/16' - AllowedPattern: (\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2}) - ConstraintDescription: must be a valid CIDR (ex. 172.31.0.0/16) - - ScyllaSeedIPs: - Description: | - Will be set as `seeds` on /etc/scylla/scylla.yaml. - NOTE: The first four IP addresses and the last IP address in each subnet reserved by AWS, - for more information, see https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Subnets.html#VPC_Sizing - Type: CommaDelimitedList - Default: '172.31.0.10, 172.31.0.11, 172.31.0.12' - -# Those conditions would be used to enable nodes based on InstanceCount parameter -Conditions: - Launch1: !Equals [1, 1] - Launch2: !Not [!Equals [1, !Ref InstanceCount]] - Launch3: !And - - !Not [!Equals [1, !Ref InstanceCount]] - - !Not [!Equals [2, !Ref InstanceCount]] - Launch4: !And - - !Not [!Equals [1, !Ref InstanceCount]] - - !Not [!Equals [2, !Ref InstanceCount]] - - !Not [!Equals [3, !Ref InstanceCount]] - Launch5: !And - - !Not [!Equals [1, !Ref InstanceCount]] - - !Not [!Equals [2, !Ref InstanceCount]] - - !Not [!Equals [3, !Ref InstanceCount]] - - !Not [!Equals [4, !Ref InstanceCount]] - Launch6: !And - - !Not [!Equals [1, !Ref InstanceCount]] - - !Not [!Equals [2, !Ref InstanceCount]] - - !Not [!Equals [3, !Ref InstanceCount]] - - !Not [!Equals [4, !Ref InstanceCount]] - - !Not [!Equals [5, !Ref InstanceCount]] - Launch7: !And - - !Not [!Equals [1, !Ref InstanceCount]] - - !Not [!Equals [2, !Ref InstanceCount]] - - !Not [!Equals [3, !Ref InstanceCount]] - - !Not [!Equals [4, !Ref InstanceCount]] - - !Not [!Equals [5, !Ref InstanceCount]] - - !Not [!Equals [6, !Ref InstanceCount]] - Launch8: !And - - !Not [!Equals [1, !Ref InstanceCount]] - - !Not [!Equals [2, !Ref InstanceCount]] - - !Not [!Equals [3, !Ref InstanceCount]] - - !Not [!Equals [4, !Ref InstanceCount]] - - !Not [!Equals [5, !Ref InstanceCount]] - - !Not [!Equals [6, !Ref InstanceCount]] - - !Not [!Equals [7, !Ref InstanceCount]] - Launch9: !And - - !Not [!Equals [1, !Ref InstanceCount]] - - !Not [!Equals [2, !Ref InstanceCount]] - - !Not [!Equals [3, !Ref InstanceCount]] - - !Not [!Equals [4, !Ref InstanceCount]] - - !Not [!Equals [5, !Ref InstanceCount]] - - !Not [!Equals [6, !Ref InstanceCount]] - - !Not [!Equals [7, !Ref InstanceCount]] - - !Not [!Equals [8, !Ref InstanceCount]] - Launch10: !Equals [10, !Ref InstanceCount] - -Resources: - GatewayAttachment: - Type: 'AWS::EC2::VPCGatewayAttachment' - Properties: - InternetGatewayId: !Ref InternetGateway - VpcId: !Ref VPC - - InternetGateway: - Type: 'AWS::EC2::InternetGateway' - Properties: - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-Gateway' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - Node0: - Condition: Launch1 - Type: 'AWS::EC2::Instance' - CreationPolicy: - ResourceSignal: - Timeout: PT10M - DependsOn: CfnEndpoint - Properties: - BlockDeviceMappings: - - DeviceName: /dev/sda1 - Ebs: - DeleteOnTermination: true - VolumeSize: 50 - ImageId: !Ref ScyllaAmi - InstanceType: !Ref InstanceType - KeyName: !Ref KeyName - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-Node-1' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - UserData: !Base64 - 'Fn::Join': - - '' - - - '{"scylla_yaml": {"seed_provider": [{"class_name": "org.apache.cassandra.locator.SimpleSeedProvider", "parameters": [{"seeds": "' - - !Join - - ',' - - - !If [Launch1, !Select [0, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch2, !Select [1, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch3, !Select [2, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - '"}]}]' - - !Sub ', "cluster_name": "${ScyllaClusterName}", ' - - '"endpoint_snitch": "org.apache.cassandra.locator.Ec2Snitch"}, ' - - '"start_scylla_on_first_boot": true, ' - - '"post_configuration_script" : "' - - !Base64 - 'Fn::Join': - - '' - - - !Sub | - #!/bin/bash -ex - /usr/local/bin/cfn-signal --exit-code 0 --resource Node0 --region ${AWS::Region} --stack ${AWS::StackName} - - '"}' - NetworkInterfaces: - - AssociatePublicIpAddress: !Ref EnablePublicAccess - PrivateIpAddress: !Join - - '.' - - - !Select [0, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [1, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [2, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - 10 - SubnetId: !Ref Subnet - DeviceIndex: '0' - Description: 'Primary network interface' - GroupSet: - - !Ref SGCluster - - !Ref SGAdmin - - !Ref SGExternal - Node1: - Condition: Launch2 - Type: 'AWS::EC2::Instance' - CreationPolicy: - ResourceSignal: - Timeout: PT10M - DependsOn: CfnEndpoint - Properties: - BlockDeviceMappings: - - DeviceName: /dev/sda1 - Ebs: - DeleteOnTermination: true - VolumeSize: 50 - ImageId: !Ref ScyllaAmi - InstanceType: !Ref InstanceType - KeyName: !Ref KeyName - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-Node-2' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - UserData: !Base64 - 'Fn::Join': - - '' - - - '{"scylla_yaml": {"seed_provider": [{"class_name": "org.apache.cassandra.locator.SimpleSeedProvider", "parameters": [{"seeds": "' - - !Join - - ',' - - - !If [Launch1, !Select [0, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch2, !Select [1, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch3, !Select [2, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - '"}]}]' - - !Sub ', "cluster_name": "${ScyllaClusterName}", ' - - '"endpoint_snitch": "org.apache.cassandra.locator.Ec2Snitch"}, ' - - '"start_scylla_on_first_boot": true, ' - - '"post_configuration_script" : "' - - !Base64 - 'Fn::Join': - - '' - - - !Sub | - #!/bin/bash -ex - /usr/local/bin/cfn-signal --exit-code 0 --resource Node1 --region ${AWS::Region} --stack ${AWS::StackName} - - '"}' - NetworkInterfaces: - - AssociatePublicIpAddress: !Ref EnablePublicAccess - PrivateIpAddress: !Join - - '.' - - - !Select [0, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [1, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [2, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - 11 - SubnetId: !Ref Subnet - DeviceIndex: '0' - Description: 'Primary network interface' - GroupSet: - - !Ref SGCluster - - !Ref SGAdmin - - !Ref SGExternal - Node2: - Condition: Launch3 - Type: 'AWS::EC2::Instance' - CreationPolicy: - ResourceSignal: - Timeout: PT10M - DependsOn: CfnEndpoint - Properties: - BlockDeviceMappings: - - DeviceName: /dev/sda1 - Ebs: - DeleteOnTermination: true - VolumeSize: 50 - ImageId: !Ref ScyllaAmi - InstanceType: !Ref InstanceType - KeyName: !Ref KeyName - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-Node-3' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - UserData: !Base64 - 'Fn::Join': - - '' - - - '{"scylla_yaml": {"seed_provider": [{"class_name": "org.apache.cassandra.locator.SimpleSeedProvider", "parameters": [{"seeds": "' - - !Join - - ',' - - - !If [Launch1, !Select [0, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch2, !Select [1, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch3, !Select [2, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - '"}]}]' - - !Sub ', "cluster_name": "${ScyllaClusterName}", ' - - '"endpoint_snitch": "org.apache.cassandra.locator.Ec2Snitch"}, ' - - '"start_scylla_on_first_boot": true, ' - - '"post_configuration_script" : "' - - !Base64 - 'Fn::Join': - - '' - - - !Sub | - #!/bin/bash -ex - /usr/local/bin/cfn-signal --exit-code 0 --resource Node2 --region ${AWS::Region} --stack ${AWS::StackName} - - '"}' - NetworkInterfaces: - - AssociatePublicIpAddress: !Ref EnablePublicAccess - PrivateIpAddress: !Join - - '.' - - - !Select [0, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [1, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [2, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - 12 - SubnetId: !Ref Subnet - DeviceIndex: '0' - Description: 'Primary network interface' - GroupSet: - - !Ref SGCluster - - !Ref SGAdmin - - !Ref SGExternal - Node3: - Condition: Launch4 - Type: 'AWS::EC2::Instance' - CreationPolicy: - ResourceSignal: - Timeout: PT10M - DependsOn: CfnEndpoint - Properties: - BlockDeviceMappings: - - DeviceName: /dev/sda1 - Ebs: - DeleteOnTermination: true - VolumeSize: 50 - ImageId: !Ref ScyllaAmi - InstanceType: !Ref InstanceType - KeyName: !Ref KeyName - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-Node-4' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - UserData: !Base64 - 'Fn::Join': - - '' - - - '{"scylla_yaml": {"seed_provider": [{"class_name": "org.apache.cassandra.locator.SimpleSeedProvider", "parameters": [{"seeds": "' - - !Join - - ',' - - - !If [Launch1, !Select [0, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch2, !Select [1, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch3, !Select [2, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - '"}]}]' - - !Sub ', "cluster_name": "${ScyllaClusterName}", ' - - '"endpoint_snitch": "org.apache.cassandra.locator.Ec2Snitch"}, ' - - '"start_scylla_on_first_boot": true, ' - - '"post_configuration_script" : "' - - !Base64 - 'Fn::Join': - - '' - - - !Sub | - #!/bin/bash -ex - /usr/local/bin/cfn-signal --exit-code 0 --resource Node3 --region ${AWS::Region} --stack ${AWS::StackName} - - '"}' - NetworkInterfaces: - - AssociatePublicIpAddress: !Ref EnablePublicAccess - PrivateIpAddress: !Join - - '.' - - - !Select [0, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [1, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [2, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - 13 - SubnetId: !Ref Subnet - DeviceIndex: '0' - Description: 'Primary network interface' - GroupSet: - - !Ref SGCluster - - !Ref SGAdmin - - !Ref SGExternal - Node4: - Condition: Launch5 - Type: 'AWS::EC2::Instance' - CreationPolicy: - ResourceSignal: - Timeout: PT10M - DependsOn: CfnEndpoint - Properties: - BlockDeviceMappings: - - DeviceName: /dev/sda1 - Ebs: - DeleteOnTermination: true - VolumeSize: 50 - ImageId: !Ref ScyllaAmi - InstanceType: !Ref InstanceType - KeyName: !Ref KeyName - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-Node-5' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - UserData: !Base64 - 'Fn::Join': - - '' - - - '{"scylla_yaml": {"seed_provider": [{"class_name": "org.apache.cassandra.locator.SimpleSeedProvider", "parameters": [{"seeds": "' - - !Join - - ',' - - - !If [Launch1, !Select [0, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch2, !Select [1, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch3, !Select [2, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - '"}]}]' - - !Sub ', "cluster_name": "${ScyllaClusterName}", ' - - '"endpoint_snitch": "org.apache.cassandra.locator.Ec2Snitch"}, ' - - '"start_scylla_on_first_boot": true, ' - - '"post_configuration_script" : "' - - !Base64 - 'Fn::Join': - - '' - - - !Sub | - #!/bin/bash -ex - /usr/local/bin/cfn-signal --exit-code 0 --resource Node4 --region ${AWS::Region} --stack ${AWS::StackName} - - '"}' - NetworkInterfaces: - - AssociatePublicIpAddress: !Ref EnablePublicAccess - PrivateIpAddress: !Join - - '.' - - - !Select [0, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [1, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [2, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - 14 - SubnetId: !Ref Subnet - DeviceIndex: '0' - Description: 'Primary network interface' - GroupSet: - - !Ref SGCluster - - !Ref SGAdmin - - !Ref SGExternal - Node5: - Condition: Launch6 - Type: 'AWS::EC2::Instance' - CreationPolicy: - ResourceSignal: - Timeout: PT10M - DependsOn: CfnEndpoint - Properties: - BlockDeviceMappings: - - DeviceName: /dev/sda1 - Ebs: - DeleteOnTermination: true - VolumeSize: 50 - ImageId: !Ref ScyllaAmi - InstanceType: !Ref InstanceType - KeyName: !Ref KeyName - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-Node-6' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - UserData: !Base64 - 'Fn::Join': - - '' - - - '{"scylla_yaml": {"seed_provider": [{"class_name": "org.apache.cassandra.locator.SimpleSeedProvider", "parameters": [{"seeds": "' - - !Join - - ',' - - - !If [Launch1, !Select [0, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch2, !Select [1, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch3, !Select [2, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - '"}]}]' - - !Sub ', "cluster_name": "${ScyllaClusterName}", ' - - '"endpoint_snitch": "org.apache.cassandra.locator.Ec2Snitch"}, ' - - '"start_scylla_on_first_boot": true, ' - - '"post_configuration_script" : "' - - !Base64 - 'Fn::Join': - - '' - - - !Sub | - #!/bin/bash -ex - /usr/local/bin/cfn-signal --exit-code 0 --resource Node5 --region ${AWS::Region} --stack ${AWS::StackName} - - '"}' - NetworkInterfaces: - - AssociatePublicIpAddress: !Ref EnablePublicAccess - PrivateIpAddress: !Join - - '.' - - - !Select [0, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [1, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [2, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - 15 - SubnetId: !Ref Subnet - DeviceIndex: '0' - Description: 'Primary network interface' - GroupSet: - - !Ref SGCluster - - !Ref SGAdmin - - !Ref SGExternal - Node6: - Condition: Launch7 - Type: 'AWS::EC2::Instance' - CreationPolicy: - ResourceSignal: - Timeout: PT10M - DependsOn: CfnEndpoint - Properties: - BlockDeviceMappings: - - DeviceName: /dev/sda1 - Ebs: - DeleteOnTermination: true - VolumeSize: 50 - ImageId: !Ref ScyllaAmi - InstanceType: !Ref InstanceType - KeyName: !Ref KeyName - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-Node-7' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - UserData: !Base64 - 'Fn::Join': - - '' - - - '{"scylla_yaml": {"seed_provider": [{"class_name": "org.apache.cassandra.locator.SimpleSeedProvider", "parameters": [{"seeds": "' - - !Join - - ',' - - - !If [Launch1, !Select [0, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch2, !Select [1, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch3, !Select [2, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - '"}]}]' - - !Sub ', "cluster_name": "${ScyllaClusterName}", ' - - '"endpoint_snitch": "org.apache.cassandra.locator.Ec2Snitch"}, ' - - '"start_scylla_on_first_boot": true, ' - - '"post_configuration_script" : "' - - !Base64 - 'Fn::Join': - - '' - - - !Sub | - #!/bin/bash -ex - /usr/local/bin/cfn-signal --exit-code 0 --resource Node6 --region ${AWS::Region} --stack ${AWS::StackName} - - '"}' - NetworkInterfaces: - - AssociatePublicIpAddress: !Ref EnablePublicAccess - PrivateIpAddress: !Join - - '.' - - - !Select [0, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [1, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [2, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - 16 - SubnetId: !Ref Subnet - DeviceIndex: '0' - Description: 'Primary network interface' - GroupSet: - - !Ref SGCluster - - !Ref SGAdmin - - !Ref SGExternal - Node7: - Condition: Launch8 - Type: 'AWS::EC2::Instance' - CreationPolicy: - ResourceSignal: - Timeout: PT10M - DependsOn: CfnEndpoint - Properties: - BlockDeviceMappings: - - DeviceName: /dev/sda1 - Ebs: - DeleteOnTermination: true - VolumeSize: 50 - ImageId: !Ref ScyllaAmi - InstanceType: !Ref InstanceType - KeyName: !Ref KeyName - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-Node-8' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - UserData: !Base64 - 'Fn::Join': - - '' - - - '{"scylla_yaml": {"seed_provider": [{"class_name": "org.apache.cassandra.locator.SimpleSeedProvider", "parameters": [{"seeds": "' - - !Join - - ',' - - - !If [Launch1, !Select [0, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch2, !Select [1, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch3, !Select [2, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - '"}]}]' - - !Sub ', "cluster_name": "${ScyllaClusterName}", ' - - '"endpoint_snitch": "org.apache.cassandra.locator.Ec2Snitch"}, ' - - '"start_scylla_on_first_boot": true, ' - - '"post_configuration_script" : "' - - !Base64 - 'Fn::Join': - - '' - - - !Sub | - #!/bin/bash -ex - /usr/local/bin/cfn-signal --exit-code 0 --resource Node7 --region ${AWS::Region} --stack ${AWS::StackName} - - '"}' - NetworkInterfaces: - - AssociatePublicIpAddress: !Ref EnablePublicAccess - PrivateIpAddress: !Join - - '.' - - - !Select [0, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [1, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [2, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - 17 - SubnetId: !Ref Subnet - DeviceIndex: '0' - Description: 'Primary network interface' - GroupSet: - - !Ref SGCluster - - !Ref SGAdmin - - !Ref SGExternal - Node8: - Condition: Launch9 - Type: 'AWS::EC2::Instance' - CreationPolicy: - ResourceSignal: - Timeout: PT10M - DependsOn: CfnEndpoint - Properties: - BlockDeviceMappings: - - DeviceName: /dev/sda1 - Ebs: - DeleteOnTermination: true - VolumeSize: 50 - ImageId: !Ref ScyllaAmi - InstanceType: !Ref InstanceType - KeyName: !Ref KeyName - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-Node-9' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - UserData: !Base64 - 'Fn::Join': - - '' - - - '{"scylla_yaml": {"seed_provider": [{"class_name": "org.apache.cassandra.locator.SimpleSeedProvider", "parameters": [{"seeds": "' - - !Join - - ',' - - - !If [Launch1, !Select [0, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch2, !Select [1, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch3, !Select [2, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - '"}]}]' - - !Sub ', "cluster_name": "${ScyllaClusterName}", ' - - '"endpoint_snitch": "org.apache.cassandra.locator.Ec2Snitch"}, ' - - '"start_scylla_on_first_boot": true, ' - - '"post_configuration_script" : "' - - !Base64 - 'Fn::Join': - - '' - - - !Sub | - #!/bin/bash -ex - /usr/local/bin/cfn-signal --exit-code 0 --resource Node8 --region ${AWS::Region} --stack ${AWS::StackName} - - '"}' - NetworkInterfaces: - - AssociatePublicIpAddress: !Ref EnablePublicAccess - PrivateIpAddress: !Join - - '.' - - - !Select [0, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [1, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [2, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - 18 - SubnetId: !Ref Subnet - DeviceIndex: '0' - Description: 'Primary network interface' - GroupSet: - - !Ref SGCluster - - !Ref SGAdmin - - !Ref SGExternal - Node9: - Condition: Launch10 - Type: 'AWS::EC2::Instance' - CreationPolicy: - ResourceSignal: - Timeout: PT10M - DependsOn: CfnEndpoint - Properties: - BlockDeviceMappings: - - DeviceName: /dev/sda1 - Ebs: - DeleteOnTermination: true - VolumeSize: 50 - ImageId: !Ref ScyllaAmi - InstanceType: !Ref InstanceType - KeyName: !Ref KeyName - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-Node-10' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - UserData: !Base64 - 'Fn::Join': - - '' - - - '{"scylla_yaml": {"seed_provider": [{"class_name": "org.apache.cassandra.locator.SimpleSeedProvider", "parameters": [{"seeds": "' - - !Join - - ',' - - - !If [Launch1, !Select [0, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch2, !Select [1, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - !If [Launch3, !Select [2, !Ref ScyllaSeedIPs], !Ref "AWS::NoValue"] - - '"}]}]' - - !Sub ', "cluster_name": "${ScyllaClusterName}", ' - - '"endpoint_snitch": "org.apache.cassandra.locator.Ec2Snitch"}, ' - - '"start_scylla_on_first_boot": true, ' - - '"post_configuration_script" : "' - - !Base64 - 'Fn::Join': - - '' - - - !Sub | - #!/bin/bash -ex - /usr/local/bin/cfn-signal --exit-code 0 --resource Node9 --region ${AWS::Region} --stack ${AWS::StackName} - - '"}' - NetworkInterfaces: - - AssociatePublicIpAddress: !Ref EnablePublicAccess - PrivateIpAddress: !Join - - '.' - - - !Select [0, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [1, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - !Select [2, !Split ['.', !Select [0, !Split ['/', !Ref CIDR]]]] - - 19 - SubnetId: !Ref Subnet - DeviceIndex: '0' - Description: 'Primary network interface' - GroupSet: - - !Ref SGCluster - - !Ref SGAdmin - - !Ref SGExternal - Route: - Type: 'AWS::EC2::Route' - DependsOn: GatewayAttachment - Properties: - DestinationCidrBlock: 0.0.0.0/0 - GatewayId: !Ref InternetGateway - RouteTableId: !Ref RouteTable - RouteTable: - Type: 'AWS::EC2::RouteTable' - Properties: - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-RT' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - VpcId: !Ref VPC - - - SGExternal: - Type: 'AWS::EC2::SecurityGroup' - Properties: - GroupDescription: Security group for the cluster - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-SGExternal' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - SecurityGroupEgress: - - CidrIp: 0.0.0.0/0 - IpProtocol: '-1' - VpcId: !Ref VPC - - SGAdmin: - Type: 'AWS::EC2::SecurityGroup' - Properties: - GroupDescription: Security group for the admin - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-SGAdmin' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - SecurityGroupIngress: - - CidrIp: !Ref PublicAccessCIDR - FromPort: 22 - ToPort: 22 - IpProtocol: tcp - - CidrIpv6: ::/0 - FromPort: 22 - ToPort: 22 - IpProtocol: tcp - VpcId: !Ref VPC - - SGCluster: - Type: 'AWS::EC2::SecurityGroup' - Properties: - GroupDescription: Security group for the cluster - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-SGCluster' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - SecurityGroupIngress: - - CidrIp: !Ref CIDR - IpProtocol: 'tcp' - FromPort: 9042 - ToPort: 9042 - - CidrIp: !Ref CIDR - IpProtocol: 'tcp' - FromPort: 9142 - ToPort: 9142 - - CidrIp: !Ref CIDR - IpProtocol: 'tcp' - FromPort: 7000 - ToPort: 7001 - - CidrIp: !Ref CIDR - IpProtocol: 'tcp' - FromPort: 7199 - ToPort: 7199 - - CidrIp: !Ref CIDR - IpProtocol: 'tcp' - FromPort: 10000 - ToPort: 10000 - - CidrIp: !Ref CIDR - IpProtocol: 'tcp' - FromPort: 9180 - ToPort: 9180 - - CidrIp: !Ref CIDR - IpProtocol: 'tcp' - FromPort: 9100 - ToPort: 9100 - - CidrIp: !Ref CIDR - IpProtocol: 'tcp' - FromPort: 9160 - ToPort: 9160 - - CidrIp: !Ref CIDR - IpProtocol: 'tcp' - FromPort: 19042 - ToPort: 19042 - - CidrIp: !Ref CIDR - IpProtocol: 'tcp' - FromPort: 19142 - ToPort: 19142 - - CidrIp: !Ref CIDR - IpProtocol: 'icmp' - FromPort: 8 - ToPort: '-1' - - CidrIp: !Ref CIDR - IpProtocol: 'tcp' - FromPort: 443 - ToPort: 443 - VpcId: !Ref VPC - - Subnet: - Type: 'AWS::EC2::Subnet' - Properties: - AvailabilityZone: !Ref AvailabilityZone - CidrBlock: !Ref CIDR - MapPublicIpOnLaunch: !Ref EnablePublicAccess - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-Subnet' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - VpcId: !Ref VPC - - SubnetRouteTableAssociation: - Type: 'AWS::EC2::SubnetRouteTableAssociation' - Properties: - RouteTableId: !Ref RouteTable - SubnetId: !Ref Subnet - - VPC: - Type: 'AWS::EC2::VPC' - Properties: - CidrBlock: !Ref CIDR - EnableDnsSupport: true - EnableDnsHostnames: true - Tags: - - Key: Name - Value: !Sub '${ScyllaClusterName}-VPC' - - Key: ScyllaClusterName - Value: !Ref ScyllaClusterName - - CfnEndpoint: - Type: AWS::EC2::VPCEndpoint - DependsOn: SGCluster - Properties: - VpcId: !Ref VPC - ServiceName: !Sub "com.amazonaws.${AWS::Region}.cloudformation" - VpcEndpointType: "Interface" - PrivateDnsEnabled: true - SubnetIds: - - !Ref Subnet - SecurityGroupIds: - - !Ref SGCluster - - !Ref SGAdmin - - !Ref SGExternal - -Outputs: - Node0: - Condition: Launch1 - Value: !Ref Node0 - Node0PrivateDnsName: - Condition: Launch1 - Value: !GetAtt - - Node0 - - PrivateDnsName - Node0PrivateIp: - Condition: Launch1 - Value: !GetAtt - - Node0 - - PrivateIp - Node1: - Condition: Launch2 - Value: !Ref Node1 - Node1PrivateDnsName: - Condition: Launch2 - Value: !GetAtt - - Node1 - - PrivateDnsName - Node1PrivateIp: - Condition: Launch2 - Value: !GetAtt - - Node1 - - PrivateIp - Node2: - Condition: Launch3 - Value: !Ref Node2 - Node2PrivateDnsName: - Condition: Launch3 - Value: !GetAtt - - Node2 - - PrivateDnsName - Node2PrivateIp: - Condition: Launch3 - Value: !GetAtt - - Node2 - - PrivateIp - Node3: - Condition: Launch4 - Value: !Ref Node3 - Node3PrivateDnsName: - Condition: Launch4 - Value: !GetAtt - - Node3 - - PrivateDnsName - Node3PrivateIp: - Condition: Launch4 - Value: !GetAtt - - Node3 - - PrivateIp - Node4: - Condition: Launch5 - Value: !Ref Node4 - Node4PrivateDnsName: - Condition: Launch5 - Value: !GetAtt - - Node4 - - PrivateDnsName - Node4PrivateIp: - Condition: Launch5 - Value: !GetAtt - - Node4 - - PrivateIp - Node5: - Condition: Launch6 - Value: !Ref Node5 - Node5PrivateDnsName: - Condition: Launch6 - Value: !GetAtt - - Node5 - - PrivateDnsName - Node5PrivateIp: - Condition: Launch6 - Value: !GetAtt - - Node5 - - PrivateIp - Node6: - Condition: Launch7 - Value: !Ref Node6 - Node6PrivateDnsName: - Condition: Launch7 - Value: !GetAtt - - Node6 - - PrivateDnsName - Node6PrivateIp: - Condition: Launch7 - Value: !GetAtt - - Node6 - - PrivateIp - Node7: - Condition: Launch8 - Value: !Ref Node7 - Node7PrivateDnsName: - Condition: Launch8 - Value: !GetAtt - - Node7 - - PrivateDnsName - Node7PrivateIp: - Condition: Launch8 - Value: !GetAtt - - Node7 - - PrivateIp - Node8: - Condition: Launch9 - Value: !Ref Node8 - Node8PrivateDnsName: - Condition: Launch9 - Value: !GetAtt - - Node8 - - PrivateDnsName - Node8PrivateIp: - Condition: Launch9 - Value: !GetAtt - - Node8 - - PrivateIp - Node9: - Condition: Launch10 - Value: !Ref Node9 - Node9PrivateDnsName: - Condition: Launch10 - Value: !GetAtt - - Node9 - - PrivateDnsName - Node9PrivateIp: - Condition: Launch10 - Value: !GetAtt - - Node9 - - PrivateIp - SGExternal: - Value: !Ref SGExternal - SGAdmin: - Value: !Ref SGAdmin - SGCluster: - Value: !Ref SGCluster - Subnet: - Value: !Ref Subnet - VPC: - Value: !Ref VPC From 312a3e60bd6871a9c71ea535c5131a6f6a8dc3d4 Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Mon, 26 Dec 2022 11:33:29 +0200 Subject: [PATCH 04/41] v2: lib/scylla_cloud.py: remove diskCount vs cpu restrictions although it's not recommended by GCP, we need to remove the restriction for minimum cpu's when using 16 local disks or more. This is so we can support one of our cloud customer which is using this configuration Closes: https://github.com/scylladb/scylla-enterprise/issues/2543 --- lib/scylla_cloud.py | 1 - tests/test_gcp_instance.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/lib/scylla_cloud.py b/lib/scylla_cloud.py index 068d4300..54da30da 100644 --- a/lib/scylla_cloud.py +++ b/lib/scylla_cloud.py @@ -362,7 +362,6 @@ def is_recommended_instance(self): if diskCount >= 16 and self.cpu < 32: logging.warning( "This machine doesn't have enough CPUs for allocated number of NVMEs (at least 32 cpus for >=16 disks). Performance will suffer.") - return False if diskCount < 1: logging.warning("No ephemeral disks were found.") return False diff --git a/tests/test_gcp_instance.py b/tests/test_gcp_instance.py index a1fa6790..9bf29ab6 100644 --- a/tests/test_gcp_instance.py +++ b/tests/test_gcp_instance.py @@ -412,8 +412,6 @@ def test_is_not_recommended_instance_n2_standard_8_24ssd(self): with unittest.mock.patch('psutil.cpu_count', return_value=8),\ unittest.mock.patch('psutil.virtual_memory', return_value=svmem(33663647744)),\ unittest.mock.patch('psutil.disk_partitions', return_value=mock_disk_partitions),\ - unittest.mock.patch('os.listdir', return_value=mock_listdevdir_n2_standard_8_24ssd),\ - unittest.mock.patch('glob.glob', return_value=mock_glob_glob_dev_n2_standard_8_24ssd),\ unittest.mock.patch('lib.scylla_cloud.gcp_instance.get_file_size_by_seek', return_value=402653184000): ins = gcp_instance() # Requires more CPUs to use this number of SSDs From 127d83b7415dff20257807931202397cfb6365c2 Mon Sep 17 00:00:00 2001 From: Jenkins Promoter Date: Wed, 18 Jan 2023 16:43:11 +0200 Subject: [PATCH 05/41] release: prepare for 5.3.0-dev --- SCYLLA-VERSION-GEN | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCYLLA-VERSION-GEN b/SCYLLA-VERSION-GEN index 047c1f73..5985eb7b 100755 --- a/SCYLLA-VERSION-GEN +++ b/SCYLLA-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh PRODUCT=scylla -VERSION=5.2.0-dev +VERSION=5.3.0-dev if test -f version then From fd72489a835eb422b8d0796a8ea4c292afb7a5dc Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Sun, 22 Jan 2023 21:52:50 +0200 Subject: [PATCH 06/41] [Azure]:make our image MS certified Microsoft provides a certification test API which can be used to evaluate a given image's sanity. They also provide which test cases they run and expect us to comply, as well as their recommendations for Azure Marketplace Images. The following have been flagged within the VM self-test output for our latest image: ``` "TestCaseName": "Swap Partition on OS Disk", "Description": "Verifies that no Swap partitions are created on the OS disk.", "Result": "Failed", "ActualValue": "Swap space configured on the OS disk.", ``` Was fixed in https://github.com/scylladb/scylla-machine-image/pull/400 ``` "TestCaseName": "Required Kernel Parameters", "Description": "Verifies the following kernel parameters are set console=ttyS0, earlyprintk=ttyS0, rootdelay=300", "Result": "Warning", "ActualValue": "Missing Parameter: rootdelay=300\r\nMatched Parameter: console=ttyS0,earlyprintk=ttyS0", --- "TestCaseName": "Client Alive Interval", "Description": "It is recommended to set ClientAliveInterval to 180. On the application need, it can be set between 30 to 235. \nIf you are enabling the SSH for your end users this value must be set as explained.", "Result": "Warning", "ActualValue": "120", ``` fixed in this PR Closes: https://github.com/scylladb/scylla-pkg/issues/3172 --- packer/scylla_install_image | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/packer/scylla_install_image b/packer/scylla_install_image index bf8143bc..1ba7123b 100755 --- a/packer/scylla_install_image +++ b/packer/scylla_install_image @@ -115,14 +115,20 @@ if __name__ == '__main__': setup_opt = '--ntp-domain amazon' sysconfig_opt = '' swap_opt = '--swap-directory /' + kernel_opt = '' + grub_variable = 'GRUB_CMDLINE_LINUX_DEFAULT' elif args.target_cloud == 'gce': setup_opt = '' sysconfig_opt = '--disable-writeback-cache' swap_opt = '--swap-directory /' + kernel_opt = '' + grub_variable = 'GRUB_CMDLINE_LINUX_DEFAULT' elif args.target_cloud == 'azure': setup_opt = '' sysconfig_opt = '--disable-writeback-cache' swap_opt = '--swap-directory /mnt' + kernel_opt = ' rootdelay=300' + grub_variable = 'GRUB_CMDLINE_LINUX' run('systemctl disable apt-daily-upgrade.timer apt-daily.timer dpkg-db-backup.timer motd-news.timer', shell=True, check=True) run('systemctl daemon-reload', shell=True, check=True) @@ -163,7 +169,9 @@ WantedBy=multi-user.target with open('/etc/default/grub.d/50-cloudimg-settings.cfg') as f: grub = f.read() - grub = re.sub(r'^GRUB_CMDLINE_LINUX_DEFAULT="(.+)"$', r'GRUB_CMDLINE_LINUX_DEFAULT="\1 net.ifnames=0 clocksource=tsc tsc=reliable"', grub, flags=re.MULTILINE) + grub = re.sub(fr'^{grub_variable}="(.+)"$', + fr'{grub_variable}="\1 net.ifnames=0 clocksource=tsc tsc=reliable {kernel_opt}"', grub, + flags=re.MULTILINE) with open('/etc/default/grub.d/50-cloudimg-settings.cfg', 'w') as f: f.write(grub) run('update-grub2', shell=True, check=True) @@ -211,6 +219,8 @@ WantedBy=multi-user.target if args.target_cloud == 'azure': with open('/etc/hosts', 'a') as f: f.write('\n\n169.254.169.254 metadata.azure.internal\n') + with open('/etc/ssh/sshd_config.d/50-cloudimg-settings.conf', 'w') as f: + f.write('ClientAliveInterval 180 \nHostKeyAlgorithms +ssh-rsa \nPubkeyAcceptedKeyTypes +ssh-rsa') # generate package manifest to scylla-packages.txt deps = run(f'apt-cache depends --recurse --no-recommends --no-suggests --no-conflicts --no-breaks --no-replaces --no-enhances --installed {args.product}', stdout=PIPE, stderr=STDOUT, shell=True, check=True, encoding='utf-8').stdout.splitlines() From 03673bf63e8e738831550de588172db9b607cf8d Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Sun, 22 Jan 2023 22:10:51 +0200 Subject: [PATCH 07/41] [Azure]: build image based on ubuntu:22.04 This already been done for our images in https://github.com/scylladb/scylla-machine-image/pull/404/, but it seems that we missed the Azure part Adding it now --- packer/scylla.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packer/scylla.json b/packer/scylla.json index 6765c432..4f597b3c 100644 --- a/packer/scylla.json +++ b/packer/scylla.json @@ -139,8 +139,8 @@ "managed_image_name": "{{user `image_name`| clean_resource_name}}", "os_type": "Linux", "image_publisher": "Canonical", - "image_offer": "0001-com-ubuntu-server-focal", - "image_sku": "20_04-lts-gen2", + "image_offer": "0001-com-ubuntu-server-jammy", + "image_sku": "22_04-lts-gen2", "azure_tags": { "scylla_version": "{{user `scylla_full_version`}}", "scylla_machine_image_version": "{{user `scylla_machine_image_version`}}", From f6e759c89dadafac0e2ad687ac7627ce73c7da87 Mon Sep 17 00:00:00 2001 From: Takuya ASADA Date: Mon, 23 Jan 2023 19:33:55 +0900 Subject: [PATCH 08/41] azure_instance: add support Lasv3-series Adding support Lasv3-series. Closes #420 --- lib/scylla_cloud.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/scylla_cloud.py b/lib/scylla_cloud.py index 54da30da..5e8d81fa 100644 --- a/lib/scylla_cloud.py +++ b/lib/scylla_cloud.py @@ -529,7 +529,13 @@ def nvme_disk_count(self): "L32s": 4, "L48s": 6, "L64s": 8, - "L80s": 10 + "L80s": 10, + "L8as": 1, + "L16as": 2, + "L32as": 4, + "L48as": 6, + "L64as": 8, + "L80as": 10 } def __get_nvme_disks_count_from_metadata(self): From a473779c0291f42756e20fd5ccc23c7e04fc7030 Mon Sep 17 00:00:00 2001 From: Takuya ASADA Date: Mon, 30 Jan 2023 20:06:23 +0900 Subject: [PATCH 09/41] azure-image: fix "Resource Disk" support for swapfile Since "Resource Disk" is ephemeral, we cannot configure swapfile on machine image building time. So e7e7daa was incorrect, we currently mistakenly using rootfs for swapfile. We need to allocate it on instance startup time, not machine image building time. And also it should executed after "Resource Disk" mounted to /mnt. Fixes #428 Fixes #431 --- common/scylla-image-setup.service | 1 - common/scylla_image_setup | 53 +++++++++++++++++-------------- packer/scylla_install_image | 6 ++-- 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/common/scylla-image-setup.service b/common/scylla-image-setup.service index 3fe10dd7..38dc662a 100644 --- a/common/scylla-image-setup.service +++ b/common/scylla-image-setup.service @@ -2,7 +2,6 @@ Description=Scylla Cloud Image Setup service Before=scylla-server.service After=network.target -ConditionPathExists=!/etc/scylla/machine_image_configured [Service] Type=oneshot diff --git a/common/scylla_image_setup b/common/scylla_image_setup index 2b71b991..c8022c86 100755 --- a/common/scylla_image_setup +++ b/common/scylla_image_setup @@ -6,33 +6,38 @@ import os import sys -import pathlib -from lib.scylla_cloud import get_cloud_instance, is_gce, is_redhat_variant +from pathlib import Path +from lib.scylla_cloud import get_cloud_instance, is_gce, is_azure, is_redhat_variant from subprocess import run if __name__ == '__main__': - # On Ubuntu, we configure CPU scaling while AMI building time - if is_redhat_variant(): - run('/opt/scylladb/scripts/scylla_cpuscaling_setup', shell=True, check=True) - cloud_instance = get_cloud_instance() - run('/opt/scylladb/scylla-machine-image/scylla_configure.py', shell=True, check=True) + if is_azure() and not Path('/mnt/swapfile').exists(): + Path('/etc/systemd/system/mnt-swapfile.swap').unlink(missing_ok=True) + run('/opt/scylladb/scripts/scylla_swap_setup --swap-directory /mnt', shell=True, check=True) + machine_image_configured = Path('/etc/scylla/machine_image_configured') + if not machine_image_configured.exists(): + # On Ubuntu, we configure CPU scaling while AMI building time + if is_redhat_variant(): + run('/opt/scylladb/scripts/scylla_cpuscaling_setup', shell=True, check=True) + cloud_instance = get_cloud_instance() + run('/opt/scylladb/scylla-machine-image/scylla_configure.py', shell=True, check=True) - run('/opt/scylladb/scripts/scylla_sysconfig_setup --nic eth0 --setup-nic', shell=True, check=True) - if os.path.ismount('/var/lib/scylla'): - if cloud_instance.is_supported_instance_class(): - # We run io_setup only when ehpemeral disks are available - if is_gce(): - nr_disks = cloud_instance.nvme_disk_count - if nr_disks > 0: + run('/opt/scylladb/scripts/scylla_sysconfig_setup --nic eth0 --setup-nic', shell=True, check=True) + if os.path.ismount('/var/lib/scylla'): + if cloud_instance.is_supported_instance_class(): + # We run io_setup only when ehpemeral disks are available + if is_gce(): + nr_disks = cloud_instance.nvme_disk_count + if nr_disks > 0: + cloud_instance.io_setup() + else: cloud_instance.io_setup() - else: - cloud_instance.io_setup() - run('systemctl daemon-reload', shell=True, check=True) - run('systemctl enable var-lib-systemd-coredump.mount', shell=True, check=True) - run('systemctl start var-lib-systemd-coredump.mount', shell=True, check=True) - # some distro has fstrim enabled by default, since we are using XFS with online discard, we don't need fstrim - run('systemctl is-active -q fstrim.timer && systemctl disable fstrim.timer', shell=True, check=True) + run('systemctl daemon-reload', shell=True, check=True) + run('systemctl enable var-lib-systemd-coredump.mount', shell=True, check=True) + run('systemctl start var-lib-systemd-coredump.mount', shell=True, check=True) + # some distro has fstrim enabled by default, since we are using XFS with online discard, we don't need fstrim + run('systemctl is-active -q fstrim.timer && systemctl disable fstrim.timer', shell=True, check=True) - if not os.path.ismount('/var/lib/scylla'): - print('Failed to initialize RAID volume!') - pathlib.Path('/etc/scylla/machine_image_configured').touch() + if not os.path.ismount('/var/lib/scylla'): + print('Failed to initialize RAID volume!') + machine_image_configured.touch() diff --git a/packer/scylla_install_image b/packer/scylla_install_image index 1ba7123b..95d25360 100755 --- a/packer/scylla_install_image +++ b/packer/scylla_install_image @@ -114,19 +114,16 @@ if __name__ == '__main__': setup_opt = '--ntp-domain amazon' sysconfig_opt = '' - swap_opt = '--swap-directory /' kernel_opt = '' grub_variable = 'GRUB_CMDLINE_LINUX_DEFAULT' elif args.target_cloud == 'gce': setup_opt = '' sysconfig_opt = '--disable-writeback-cache' - swap_opt = '--swap-directory /' kernel_opt = '' grub_variable = 'GRUB_CMDLINE_LINUX_DEFAULT' elif args.target_cloud == 'azure': setup_opt = '' sysconfig_opt = '--disable-writeback-cache' - swap_opt = '--swap-directory /mnt' kernel_opt = ' rootdelay=300' grub_variable = 'GRUB_CMDLINE_LINUX' @@ -141,7 +138,8 @@ if __name__ == '__main__': run('/opt/scylladb/scripts/scylla_cpuscaling_setup --force', shell=True, check=True) run(f'/opt/scylladb/scripts/scylla_sysconfig_setup --set-clocksource {sysconfig_opt}', shell=True, check=True) - run(f'/opt/scylladb/scripts/scylla_swap_setup --swap-size-bytes {half_of_diskfree()} {swap_opt}', shell=True, check=True) + if args.target_cloud == 'aws' or args.target_cloud == 'gce': + run(f'/opt/scylladb/scripts/scylla_swap_setup --swap-size-bytes {half_of_diskfree()} --swap-directory /', shell=True, check=True) run('/opt/scylladb/scripts/scylla_coredump_setup', shell=True, check=True) dot_mount = ''' [Unit] From 62e900bf056309fffb7c827ab029d317213a2018 Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Sun, 12 Feb 2023 18:22:48 +0200 Subject: [PATCH 10/41] [Azure]: save OS disk during image creation Due to a lack of resources in our main region EASTUS, we need to copy our images to different regions. In order to do it we need to keep the OS_disk during our main azure image creation. So instead of creating a temp resource group for every build, I have created one for building purposes only. Also keeping the original OS disk so we can later copy those images to different regions. While doing it, I saw it also solves the retry for deleting resources we saw during Azure builds Refs: https://github.com/scylladb/scylla-pkg/issues/3165 --- packer/scylla.json | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packer/scylla.json b/packer/scylla.json index 4f597b3c..d612ff6c 100644 --- a/packer/scylla.json +++ b/packer/scylla.json @@ -156,8 +156,11 @@ "build_tag": "{{user `build_tag`| clean_resource_name}}", "build_mode": "{{user `build_mode`| clean_resource_name}}" }, - "location": "{{user `region`}}", - "vm_size": "{{user `vm_size`}}" + "vm_size": "{{user `vm_size`}}", + "build_resource_group_name": "scylla-images", + "keep_os_disk": true, + "virtual_network_name": "scylla-images", + "private_virtual_network_with_public_ip": true } ], "provisioners": [ From b8b59548a36d0ea51ca567b788b44ae6a7752d55 Mon Sep 17 00:00:00 2001 From: Takuya ASADA Date: Tue, 24 Jan 2023 23:00:24 +0900 Subject: [PATCH 11/41] Limit deeper C-states to reduce latency Since we want maximum performance and low latency, we need to disable deeper C-states on instances. Fixes #425 --- packer/scylla_install_image | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packer/scylla_install_image b/packer/scylla_install_image index 95d25360..cc8953f3 100755 --- a/packer/scylla_install_image +++ b/packer/scylla_install_image @@ -168,7 +168,7 @@ WantedBy=multi-user.target with open('/etc/default/grub.d/50-cloudimg-settings.cfg') as f: grub = f.read() grub = re.sub(fr'^{grub_variable}="(.+)"$', - fr'{grub_variable}="\1 net.ifnames=0 clocksource=tsc tsc=reliable {kernel_opt}"', grub, + fr'{grub_variable}="\1 net.ifnames=0 clocksource=tsc tsc=reliable intel_idle.max_cstate=1 processor.max_cstate=1 {kernel_opt}"', grub, flags=re.MULTILINE) with open('/etc/default/grub.d/50-cloudimg-settings.cfg', 'w') as f: f.write(grub) From 40f0ae466c511ca818827113d3a6282ce3a283e1 Mon Sep 17 00:00:00 2001 From: Takuya ASADA Date: Tue, 21 Feb 2023 01:48:25 +0900 Subject: [PATCH 12/41] azure: show login prompt correctly Currently, Azure image does not show Scylla login prompt when logging in to ssh. This is because "azureuser" ($SSH_USERNAME) user is created at machine-image building time, it's before we updated /etc/skel so it doesn't contain scylla login prompt. On Azure, username for SSH is not statically assigned, it is configurable when launching instance. So we don't need to keep "azureuser" ($SSH_USERNAME) user, we can drop it when finishes machine-image creation, and newly created user will have scylla login prompt. Also on GCE it can happen same issue when $SSH_USERNAME and username for logging in to the instance, we should drop $SSH_USERNAME user for GCE too. --- packer/scylla.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/packer/scylla.json b/packer/scylla.json index d612ff6c..df0b2a42 100644 --- a/packer/scylla.json +++ b/packer/scylla.json @@ -192,6 +192,12 @@ "destination": "build/", "direction": "download", "type": "file" + }, + { + "inline": [ + "if [ {{build_name}} = gce -o {{build_name}} = azure ]; then sudo userdel -r -f {{user `ssh_username`}}; fi" + ], + "type": "shell" } ], "variables": { From 953086b5bd99431d880fc5d959b4784be8424c83 Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Wed, 22 Feb 2023 18:07:57 +0200 Subject: [PATCH 13/41] [Azure,Gce]:build image based on minimal ubuntu22.04 Similar to https://github.com/scylladb/scylla-machine-image/pull/413/commits/300537dbb11f962972742bf8fd5097fb6fffcae7, moving both Azure and Gce to be based on minimal image --- packer/build_image.sh | 2 +- packer/scylla.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packer/build_image.sh b/packer/build_image.sh index 97d829a6..a4b0e069 100755 --- a/packer/build_image.sh +++ b/packer/build_image.sh @@ -285,7 +285,7 @@ if [ "$TARGET" = "aws" ]; then PACKER_ARGS+=(-var scylla_ami_description="${SCYLLA_AMI_DESCRIPTION:0:255}") elif [ "$TARGET" = "gce" ]; then SSH_USERNAME=ubuntu - SOURCE_IMAGE_FAMILY="ubuntu-2204-lts" + SOURCE_IMAGE_FAMILY="ubuntu-minimal-2204-lts" PACKER_ARGS+=(-var source_image_family="$SOURCE_IMAGE_FAMILY") elif [ "$TARGET" = "azure" ]; then diff --git a/packer/scylla.json b/packer/scylla.json index df0b2a42..f8288b01 100644 --- a/packer/scylla.json +++ b/packer/scylla.json @@ -139,8 +139,8 @@ "managed_image_name": "{{user `image_name`| clean_resource_name}}", "os_type": "Linux", "image_publisher": "Canonical", - "image_offer": "0001-com-ubuntu-server-jammy", - "image_sku": "22_04-lts-gen2", + "image_offer": "0001-com-ubuntu-minimal-jammy", + "image_sku": "minimal-22_04-lts-gen2", "azure_tags": { "scylla_version": "{{user `scylla_full_version`}}", "scylla_machine_image_version": "{{user `scylla_machine_image_version`}}", From 42e05cacdedcbabf1d416e32eeb2cf317b9669f5 Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Wed, 19 Apr 2023 22:05:28 +0300 Subject: [PATCH 14/41] (ami,azure):Install latest LTS kernel during image build During image creation we are running `apt-get full-upgrade` which also update the kernel (added as part of https://github.com/scylladb/scylla-machine-image/commit/90340275b80a3a54dcfc1e5ec660481ba167d1c3), Since we want to use LTS kernel version only, adding the kernel removal package and installation before we run `scylla_install_image` Currently only AWS and Azure have LTS kernel for 22.04, once GCP will have it as well we should add it as well Ref: https://github.com/scylladb/scylladb/issues/13560 --- packer/scylla.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/packer/scylla.json b/packer/scylla.json index f8288b01..ef9cce27 100644 --- a/packer/scylla.json +++ b/packer/scylla.json @@ -164,6 +164,13 @@ } ], "provisioners": [ + { + "type": "shell", + "inline": [ + "if [ {{build_name}} = aws -o {{build_name}} = azure ]; then sudo apt-get update; sudo DEBIAN_FRONTEND=noninteractive apt purge -y linux-aws* linux-headers-{{build_name}}* linux-image*{{build_name}}* linux-modules*-{{build_name}}*; sudo DEBIAN_FRONTEND=noninteractive apt-get install -y linux-{{build_name}}-lts-22.04; sudo reboot ; fi" + ], + "expect_disconnect": true + }, { "destination": "/home/{{user `ssh_username`}}/", "source": "files/", From f6e22247ad89f458795d9585a4a60cf15a5ce1cc Mon Sep 17 00:00:00 2001 From: Maciej Zimnoch Date: Wed, 19 Apr 2023 15:04:47 +0200 Subject: [PATCH 15/41] Disable EOL scylla repositories This node setup image uses old ScyllaDB version as a base because disk setup scripts inside allowed to provide locations of raid, mount etc. Newer images have them hardcoded and they don't match host paths within the container. Using older version of ScyllaDB image is ok'ish from security point of view, because we do run `yum update` as one of the steps so we get all the OS/packages bug fixes. --- k8s/Dockerfile | 12 ++- k8s/aws_scylla_create_devices | 194 ++++++++++++++++++++++++++++++++++ k8s/build_image.sh | 8 +- 3 files changed, 209 insertions(+), 5 deletions(-) create mode 100755 k8s/aws_scylla_create_devices diff --git a/k8s/Dockerfile b/k8s/Dockerfile index 606e1c2c..9d9379a3 100644 --- a/k8s/Dockerfile +++ b/k8s/Dockerfile @@ -1,5 +1,15 @@ +# This node setup image uses old ScyllaDB version as a base because disk setup scripts inside allowed to +# provide locations of raid, mount etc. Newer images have them hardcoded and they don't match host paths within the container. +# +# Using older version of ScyllaDB image is ok'ish from security point of view, +# because we do run `yum update` as one of the steps so we get all the OS/packages bug fixes. +# +# !!! This setup is considered **deprecated** and will be removed soon in favor of different, safer solution. !!! FROM docker.io/scylladb/scylla:4.1.6 as base +# Disable scylla repo, as 4.1 is already EOL. +RUN yum-config-manager --disable scylla --disable scylla-generic --disable scylladb-scylla-3rdparty + # Install scripts dependencies. RUN yum -y install epel-release && \ yum -y clean expire-cache && \ @@ -11,7 +21,7 @@ RUN pip3 install pyyaml psutil ARG cloud_provider -COPY $cloud_provider/scylla_create_devices /opt/scylladb/scylla-machine-image/scylla_create_devices +COPY "k8s/${cloud_provider}_scylla_create_devices" /opt/scylladb/scylla-machine-image/scylla_create_devices COPY k8s/scylla_k8s_node_setup /opt/scylladb/scylla-machine-image/scylla_k8s_node_setup ENTRYPOINT ["/opt/scylladb/scylla-machine-image/scylla_k8s_node_setup"] diff --git a/k8s/aws_scylla_create_devices b/k8s/aws_scylla_create_devices new file mode 100755 index 00000000..ffa066e6 --- /dev/null +++ b/k8s/aws_scylla_create_devices @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 +# +# Copyright 2020 ScyllaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import re +import os +import sys +import time +import subprocess +import urllib.request +import urllib.error +from pathlib import Path + + +raid_script = "/opt/scylladb/scripts/scylla_raid_setup" +raid_device = "/dev/md%d" +scylla_root = "" + +def scylla_directory(role): + if role == "all": + return scylla_root + else: + return os.path.join(scylla_root, role) + + +def curl_instance_data(url): + max_retries = 5 + retries = 0 + while True: + try: + req = urllib.request.Request(url) + return urllib.request.urlopen(req).read().decode("utf-8") + except urllib.error.HTTPError: + print("Failed to grab %s..." % url) + time.sleep(5) + retries += 1 + if retries >= max_retries: + raise + + +def find_disk(disks, line): + for disk in disks: + if line.find(disk) == -1: + return False + return True + + +def config_array(disks, role, mdidx): + # Is it already constructed + disks.sort() + md_state_path = Path("/proc/mdstat") + with open(md_state_path) as mdstate: + for l in mdstate: + if find_disk(disks, l): + dev = re.search(r"^md\w+", l).group() + print("Found existing RAID %s, will mount it" % dev) + subprocess.check_call(["mount", "-o", "noatime", + "/dev/%s" % dev, + scylla_directory(role)]) + return + print("RAID Array containing %s not found. Creating..." % str(disks)) + disk_devs = ['/dev/%s' % x for x in disks] + subprocess.run([raid_script, "--raiddev", + raid_device % mdidx, "--disks", ",".join(disk_devs), + "--root", scylla_root, + "--volume-role", role, + "--update-fstab"], check=True) + + +def xenify(devname): + dev = curl_instance_data('http://169.254.169.254/latest/meta-data/block-device-mapping/' + devname) + return dev.replace("sd", "xvd") + + +def device_exists(dev): + return os.path.exists("/dev/%s" % dev) + + +def device_is_busy(dev): + try: + fd = os.open(dev, os.O_RDWR | os.O_EXCL) + os.close(fd) + return False + except OSError: + return True + + +# While testing this, I found the following issue at AWS: +# +# $ ls /dev/nvme* +# /dev/nvme0 /dev/nvme0n1 /dev/nvme1 /dev/nvme1n1 +# +# $ curl http://169.254.169.254/latest/meta-data/block-device-mapping/ +# ami +# ebs2 +# ephemeral0 +# root +# +# As one can see, only one of the ephemeral devices were listed. +# +# I saw this happening only on i3 machines, if EBS were listed before +# ephemeral during creation time. However, in that scenario, I saw it +# happening every time I tested. +# +# More info at: +# https://forums.aws.amazon.com/thread.jspa?threadID=250553 +# +# So for nvme devices, we'll just scan the device list and see what we +# find. Since the goal is to differentiate between ephemeral and +# non-ephemeral anyway, and NVMe are always ephemeral, this is +# acceptable +def get_disk_bundles(): + # define preferred disk roles. We'll see soon if we can respect them. + role = { + "ebs": "unused", + "ephemeral": "all" + } + + # Find disk assignments + devmap = curl_instance_data('http://169.254.169.254/latest/meta-data/block-device-mapping/') + typemap = {} + devname = re.compile("^\D+") + nvme_re = re.compile(r"nvme\d+n\d+$") + nvmes_present = list(filter(nvme_re.match, os.listdir("/dev"))) + nvmes_free = [nvme for nvme in nvmes_present if not device_is_busy(os.path.join('/dev/', nvme))] + + if nvmes_free: + typemap["ephemeral"] = nvmes_free + + for dev in devmap.splitlines(): + if dev == "ami" or dev == "root": + continue + + t = devname.match(dev).group() + if role[t] == "unused": + continue + + if t == "ephemeral" and nvmes_present: + continue + + if t not in typemap: + typemap[t] = [] + if not device_exists(xenify(dev)): + continue + typemap[t] += [xenify(dev)] + + # One of the desired types not found: The other type has it all + if "ebs" not in typemap and "ephemeral" not in typemap: + sys.stderr.write("No disks found\n") + sys.exit(0) + elif "ebs" not in typemap: + role["ephemeral"] = "all" + elif "ephemeral" not in typemap: + role["ebs"] = "all" + + # Could happen even if properly invoked through ds2 if one of the + # types is not present, and the other is set to "unused" + if role["ebs"] == role["ephemeral"]: + err_msg = "Exception when parsing config. Both EBS and ephemeral are set to the same role (%s)" + raise Exception(err_msg % (role["ebs"])) + + # If one type configured for all, the other for a specified role, and both present: + # That's valid and sane: respect that and mount one on top of the other. We just need + # make sure that the root is mounted first. + order = list(typemap.keys()) + order.sort() + + mdidx = 0 + for t in order: + config_array(typemap[t], role[t], mdidx) + mdidx += 1 + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Disk creation script for Scylla.') + parser.add_argument('--scylla-data-root', dest='scylla_data_root', action='store', + help='location of Scylla root data directory', default="/var/lib/scylla") + args = parser.parse_args() + + scylla_root = args.scylla_data_root + + get_disk_bundles() diff --git a/k8s/build_image.sh b/k8s/build_image.sh index fdaaafa3..cb83f319 100755 --- a/k8s/build_image.sh +++ b/k8s/build_image.sh @@ -7,7 +7,7 @@ CLOUD_PROVIDER= print_usage() { - echo "build_image.sh -c [aws|gce|azure]" + echo "build_image.sh -c [aws]" echo " -c cloud provider" exit 1 } @@ -27,7 +27,7 @@ fi echo "Building in $PWD..." -VERSION=$(./SCYLLA-VERSION-GEN) -PACKAGE_NAME="scylladb/scylla-machine-image-k8s-$CLOUD_PROVIDER:$VERSION" +VERSION="k8s-${CLOUD_PROVIDER}-node-setup-0.0.2" +IMAGE_REF="scylladb/scylla-machine-image:${VERSION}" -docker build . -f k8s/Dockerfile --build-arg cloud_provider=$CLOUD_PROVIDER -t $PACKAGE_NAME \ No newline at end of file +docker build -f k8s/Dockerfile --build-arg "cloud_provider=${CLOUD_PROVIDER}" -t "${IMAGE_REF}" . From 7968077f825c65ff95521fff7eb808dc50ec778d Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Tue, 11 Apr 2023 15:40:22 +0300 Subject: [PATCH 16/41] fix(images):use default NTP configuration disabling ntp configuration during image creation so we will use default cloud recommended configuration Closes: https://github.com/scylladb/scylladb/issues/13344 --- packer/scylla_install_image | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/packer/scylla_install_image b/packer/scylla_install_image index cc8953f3..2a898900 100755 --- a/packer/scylla_install_image +++ b/packer/scylla_install_image @@ -111,18 +111,24 @@ if __name__ == '__main__': run(f'curl -L -o /tmp/amazon-ssm-agent.deb https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/debian_{deb_arch()}/amazon-ssm-agent.deb', shell=True, check=True) run('dpkg -i /tmp/amazon-ssm-agent.deb', shell=True, check=True) run('systemctl enable amazon-ssm-agent', shell=True, check=True) + with open('/etc/chrony/chrony.conf') as f: + chrony_conf = f.read() + + chrony_conf = re.sub(r'^(pool .*$)', '# \\1', chrony_conf, flags=re.MULTILINE) + with open('/etc/chrony/chrony.conf', 'w') as f: + f.write(chrony_conf) + + with open('/etc/chrony/sources.d/ntp-pool.sources', 'w') as f: + f.write('pool time.aws.com iburst\n') - setup_opt = '--ntp-domain amazon' sysconfig_opt = '' kernel_opt = '' grub_variable = 'GRUB_CMDLINE_LINUX_DEFAULT' elif args.target_cloud == 'gce': - setup_opt = '' sysconfig_opt = '--disable-writeback-cache' kernel_opt = '' grub_variable = 'GRUB_CMDLINE_LINUX_DEFAULT' elif args.target_cloud == 'azure': - setup_opt = '' sysconfig_opt = '--disable-writeback-cache' kernel_opt = ' rootdelay=300' grub_variable = 'GRUB_CMDLINE_LINUX' @@ -131,7 +137,7 @@ if __name__ == '__main__': run('systemctl daemon-reload', shell=True, check=True) run('systemctl enable scylla-image-setup.service', shell=True, check=True) run('systemctl enable scylla-image-post-start.service', shell=True, check=True) - run('/opt/scylladb/scripts/scylla_setup --no-coredump-setup --no-sysconfig-setup --no-raid-setup --no-io-setup --no-ec2-check --no-swap-setup --no-cpuscaling-setup', shell=True, check=True) + run('/opt/scylladb/scripts/scylla_setup --no-coredump-setup --no-sysconfig-setup --no-raid-setup --no-io-setup --no-ec2-check --no-swap-setup --no-cpuscaling-setup --no-ntp-setup', shell=True, check=True) # On Ubuntu, 'cpufrequtils' never fails even CPU scaling is not supported, # so we want to enable it here From 42bd7963b327df3bae3bc353608cb3ca982695d9 Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Mon, 1 May 2023 12:28:24 +0300 Subject: [PATCH 17/41] scylla.json: fix kernel removal for Azure following the changes in 42e05cacdedcbabf1d416e32eeb2cf317b9669f5, during the package removal, forgot to change one of the packages to use cloud provider based on parameter rather then hardcoded Fixing it --- packer/scylla.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packer/scylla.json b/packer/scylla.json index ef9cce27..5f642cfa 100644 --- a/packer/scylla.json +++ b/packer/scylla.json @@ -167,7 +167,7 @@ { "type": "shell", "inline": [ - "if [ {{build_name}} = aws -o {{build_name}} = azure ]; then sudo apt-get update; sudo DEBIAN_FRONTEND=noninteractive apt purge -y linux-aws* linux-headers-{{build_name}}* linux-image*{{build_name}}* linux-modules*-{{build_name}}*; sudo DEBIAN_FRONTEND=noninteractive apt-get install -y linux-{{build_name}}-lts-22.04; sudo reboot ; fi" + "if [ {{build_name}} = aws -o {{build_name}} = azure ]; then sudo apt-get update; sudo DEBIAN_FRONTEND=noninteractive apt purge -y linux-{{build_name}}* linux-headers-{{build_name}}* linux-image*{{build_name}}* linux-modules*-{{build_name}}*; sudo DEBIAN_FRONTEND=noninteractive apt-get install -y linux-{{build_name}}-lts-22.04; sudo reboot ; fi" ], "expect_disconnect": true }, From 28722e6ad6f30f49a002bbf8b98e8dc634333f89 Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Wed, 3 May 2023 13:39:20 +0300 Subject: [PATCH 18/41] [azure]:fix copy image due to long image name When trying to copy Azure image to other regions (on enterprise only), we get the following failure: ``` 10:23:45 ERROR: command failed: ['/usr/bin/../../opt/az/bin/python3', '-m', 'azure.cli', 'snapshot', 'create', '--resource-group', 'image-copy-rg', '--name', 'scylla-enterprise-2023.2.0-dev-x86_64-2023-05-01T09-12-36_os_disk_snapshot-eastus2', '--location', 'eastus2', '--source', 'https://eastus2lalnrozwewefc0at6.blob.core.windows.net/snapshots/scylla-enterprise-2023.2.0-dev-x86_64-2023-05-01T09-12-36_os_disk_snapshot.vhd', '--source-storage-account-id', '/subscriptions/****/resourceGroups/image-copy-rg/providers/Microsoft.Storage/storageAccounts/eastus2lalnrozwewefc0at6', '--hyper-v-generation', 'V2', '--output', 'json', '--subscription', ****, '--tags', 'created_by=image-copy-extension'] 10:23:45 ERROR: output: ERROR: (InvalidParameter) The value of parameter snapshot.name is invalid. 10:23:45 Code: InvalidParameter 10:23:45 Message: The value of parameter snapshot.name is invalid. 10:23:45 Target: snapshot.name ``` Since SCT are depending on `branch` and `region` to search for images, and during promoting we create an image gallery link to the relevant image, switching image name to use `generalProductName`(Scylla) instead of productName(Scylla/Scylla-enterprise) --- packer/build_image.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/packer/build_image.sh b/packer/build_image.sh index a4b0e069..09b95ecd 100755 --- a/packer/build_image.sh +++ b/packer/build_image.sh @@ -300,7 +300,15 @@ elif [ "$TARGET" = "azure" ]; then PACKER_ARGS+=(-var subscription_id="$AZURE_SUBSCRIPTION_ID") fi -IMAGE_NAME="$PRODUCT-$VERSION-$ARCH-$(date '+%FT%T')" +if [ "$TARGET" = "azure" ]; then + if [ "$BUILD_MODE" = "debug" ]; then + IMAGE_NAME="scylla-debug-$VERSION-$ARCH-$(date '+%FT%T')" + else + IMAGE_NAME="scylla-$VERSION-$ARCH-$(date '+%FT%T')" + fi +else + IMAGE_NAME="$PRODUCT-$VERSION-$ARCH-$(date '+%FT%T')" +fi if [ "$BUILD_MODE" = "debug" ]; then IMAGE_NAME="$PRODUCT-debug-$VERSION-$ARCH-$(date '+%FT%T')" fi From 0f49c3132243407ea7fd253cfb0eace3d03c56e7 Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Tue, 9 May 2023 11:23:56 +0300 Subject: [PATCH 19/41] ami-arm: change instance type during image build Started to get an error during arm ami build: ``` 09:53:19 [1;31m==> aws: Error launching source instance: InsufficientInstanceCapacity: We currently do not have sufficient a1.xlarge capacity in the Availability Zone you requested (us-east-1b). Our system will be working on provisioning additional capacity. You can currently get a1.xlarge capacity by not specifying an Availability Zone in your request or choosing us-east-1c, us-east-1d. ``` Switching to more common instance type (the basic type we are also using for testing) , `im4gn.xlarge` --- packer/build_image.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packer/build_image.sh b/packer/build_image.sh index 09b95ecd..ffb1d62f 100755 --- a/packer/build_image.sh +++ b/packer/build_image.sh @@ -268,7 +268,7 @@ if [ "$TARGET" = "aws" ]; then ;; "aarch64") SOURCE_AMI_FILTER="ubuntu-minimal/images/hvm-ssd/ubuntu-jammy-22.04-arm64*" - INSTANCE_TYPE="a1.xlarge" + INSTANCE_TYPE="im4gn.xlarge" ;; *) echo "Unsupported architecture: $arch" From 137e41d2410c308cbda805128b6b4356ecf549e7 Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Thu, 11 May 2023 14:10:21 +0300 Subject: [PATCH 20/41] (images):create kernel-version output with cloud source Today when we create images we have the same file name for all cloud images. Let's generate a cloud specific kernel filename Ref: https://github.com/scylladb/scylla-pkg/pull/3406 --- packer/scylla.json | 2 +- packer/scylla_install_image | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packer/scylla.json b/packer/scylla.json index 5f642cfa..5721f01e 100644 --- a/packer/scylla.json +++ b/packer/scylla.json @@ -195,7 +195,7 @@ "type": "file" }, { - "source": "/home/{{user `ssh_username`}}/{{user `product`}}-kernel-{{user `scylla_full_version`}}-{{user `arch`}}.txt", + "source": "/home/{{user `ssh_username`}}/{{user `product`}}-{{build_name}}-kernel-{{user `scylla_full_version`}}-{{user `arch`}}.txt", "destination": "build/", "direction": "download", "type": "file" diff --git a/packer/scylla_install_image b/packer/scylla_install_image index 2a898900..07801f85 100755 --- a/packer/scylla_install_image +++ b/packer/scylla_install_image @@ -239,6 +239,6 @@ WantedBy=multi-user.target print('{}/{}-packages-{}-{}.txt generated.'.format(homedir, args.product, args.scylla_version, arch())) kver = run('uname -r', shell=True, check=True, capture_output=True, encoding='utf-8').stdout.strip() - with open('{}/{}-kernel-{}-{}.txt'.format(homedir, args.product, args.scylla_version, arch()), 'w') as f: + with open('{}/{}-{}-kernel-{}-{}.txt'.format(homedir, args.product, args.target_cloud, args.scylla_version, arch()), 'a+') as f: f.write(f'kernel-version: {kver}\n') - print('{}/{}-kernel-{}-{}.txt generated.'.format(homedir, args.product, args.scylla_version, arch())) + print('{}/{}-{}-kernel-{}-{}.txt generated.'.format(homedir, args.product, args.target_cloud, args.scylla_version, arch())) From 12b54305fa90aca007847fddb986ec38358dbe97 Mon Sep 17 00:00:00 2001 From: Jenkins Promoter Date: Sun, 21 May 2023 11:46:10 +0300 Subject: [PATCH 21/41] release: prepare for 5.4.0-dev --- SCYLLA-VERSION-GEN | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCYLLA-VERSION-GEN b/SCYLLA-VERSION-GEN index 5985eb7b..845405d1 100755 --- a/SCYLLA-VERSION-GEN +++ b/SCYLLA-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh PRODUCT=scylla -VERSION=5.3.0-dev +VERSION=5.4.0-dev if test -f version then From 7f9a94c05e2d00081f54b80fa5ef1f53b6221754 Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Sun, 4 Jun 2023 14:52:35 +0300 Subject: [PATCH 22/41] build.yml: fix git permission bug for checkout dir github action is failing with the following error: ``` fatal: unsafe repository ('/scylla-machine-image' is owned by someone else) To add an exception for this directory, call: git config --global --add safe.directory /scylla-machine-image fatal: unsafe repository ('/scylla-machine-image' is owned by someone else) To add an exception for this directory, call: git config --global --add safe.directory /scylla-machine-image Error: Process completed with exit code 128. ``` --- .github/workflows/build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 914791c4..92c70c42 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Setup python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: 3.8 architecture: x64 @@ -30,7 +30,7 @@ jobs: run: docker run -v `pwd`:/scylla-machine-image -w /scylla-machine-image --rm centos:7.2.1511 bash -c './dist/redhat/build_rpm.sh -t centos7' - name: Build RPM (Rockylinux:8) - run: docker run -v `pwd`:/scylla-machine-image -w /scylla-machine-image --rm rockylinux:8 bash -c './dist/redhat/build_rpm.sh -t centos8' + run: docker run -v `pwd`:/scylla-machine-image -w /scylla-machine-image --rm rockylinux:8 bash -c 'dnf update -y; dnf install -y git ; git config --global --add safe.directory "*"; ./dist/redhat/build_rpm.sh -t centos8' - - name: Build DEB (Ubuntu:20.04) - run: docker run -v `pwd`:/scylla-machine-image -w /scylla-machine-image --rm ubuntu:20.04 bash -c 'apt update -y; apt install -y git ; git config --global --add safe.directory "*"; ./dist/debian/build_deb.sh' + - name: Build DEB (Ubuntu:22.04) + run: docker run -v `pwd`:/scylla-machine-image -w /scylla-machine-image --rm ubuntu:22.04 bash -c 'apt update -y; apt install -y git ; git config --global --add safe.directory "*"; ./dist/debian/build_deb.sh' From 9b928f011ec25747f3defcd34bb27bfecea612a6 Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Tue, 6 Jun 2023 11:33:11 +0300 Subject: [PATCH 23/41] (gcp): Use lts kernel version Based on https://packages.ubuntu.com/jammy-updates/linux-gcp-lts-22.04 gcp has now an official lts for 22.04 Closes: https://github.com/scylladb/scylla-machine-image/issues/452 --- packer/scylla.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/packer/scylla.json b/packer/scylla.json index 5721f01e..b8e62994 100644 --- a/packer/scylla.json +++ b/packer/scylla.json @@ -171,6 +171,13 @@ ], "expect_disconnect": true }, + { + "type": "shell", + "inline": [ + "if [ {{build_name}} = gce ]; then sudo apt-get update; sudo DEBIAN_FRONTEND=noninteractive apt purge -y linux-gcp* linux-headers-gcp* linux-image*gcp* linux-modules*-gcp*; sudo DEBIAN_FRONTEND=noninteractive apt-get install -y linux-gcp-lts-22.04; sudo reboot ; fi" + ], + "expect_disconnect": true + }, { "destination": "/home/{{user `ssh_username`}}/", "source": "files/", From 692f305b124cd5d6e771074c7ffbf63f7dadf00c Mon Sep 17 00:00:00 2001 From: Anna Mikhlin Date: Wed, 7 Jun 2023 10:19:55 +0300 Subject: [PATCH 24/41] change subnet_id to subnet_filter in packer build fix for ERROR: "im4gn.xlarge capacity issue in Availability Zone (us-east-1b)" during AMI creation with packer build. subnet_id variable changed to subnet_filter with tag:Name and random selection, which includes few available subnets (and not one dedicated subnet). --- packer/ami_variables.json | 1 - packer/scylla.json | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/packer/ami_variables.json b/packer/ami_variables.json index 800ed39a..69111a39 100644 --- a/packer/ami_variables.json +++ b/packer/ami_variables.json @@ -1,5 +1,4 @@ { - "subnet_id": "subnet-ec4a72c4", "security_group_id": "sg-c5e1f7a0", "region": "us-east-1", "associate_public_ip_address": "true", diff --git a/packer/scylla.json b/packer/scylla.json index b8e62994..9c848b55 100644 --- a/packer/scylla.json +++ b/packer/scylla.json @@ -63,7 +63,12 @@ "ssh_timeout": "5m", "ssh_username": "{{user `ssh_username`}}", "ssh_clear_authorized_keys": true, - "subnet_id": "{{user `subnet_id`}}", + "subnet_filter": { + "filters": { + "tag:Name": "image-build-subnet*" + }, + "random": true + }, "user_data_file": "user_data.txt", "ami_description": "{{user `scylla_ami_description`}}", "tags": { From db75e412521e84e9a57b501126d7e7dd34adba93 Mon Sep 17 00:00:00 2001 From: Beni Peled Date: Wed, 14 Jun 2023 19:16:38 +0300 Subject: [PATCH 25/41] packer: fix provision race-conditions following reboot I found out that periodically the GCE build fails after the reboot action (part of the shell provision) According to packer [0] it might be caused by race conditions between the provisions and `pause_before` should be used in this case > Sometimes, when executing a command like reboot, the shell script will return and Packer will start executing the next one before SSH actually quits and the machine restarts If it's not gonna help I'll increase 10s>20s and add the ssh_read_write_timeout [0] https://developer.hashicorp.com/packer/docs/provisioners/shell#handling-reboots --- packer/scylla.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packer/scylla.json b/packer/scylla.json index 9c848b55..a9ef2e53 100644 --- a/packer/scylla.json +++ b/packer/scylla.json @@ -186,7 +186,8 @@ { "destination": "/home/{{user `ssh_username`}}/", "source": "files/", - "type": "file" + "type": "file", + "pause_before": "10s" }, { "destination": "/home/{{user `ssh_username`}}/", From 94dd1547dac697c90da70137db4d9a61f559db83 Mon Sep 17 00:00:00 2001 From: Beni Peled Date: Thu, 15 Jun 2023 17:44:14 +0300 Subject: [PATCH 26/41] packer: add ssh_read_write_timeout to handled reboot Following db75e41, we need to add ssh_read_write_timeout and increase the pause_before to 20s --- packer/scylla.json | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packer/scylla.json b/packer/scylla.json index a9ef2e53..fcda121a 100644 --- a/packer/scylla.json +++ b/packer/scylla.json @@ -61,6 +61,7 @@ "most_recent": true }, "ssh_timeout": "5m", + "ssh_read_write_timeout": "5m", "ssh_username": "{{user `ssh_username`}}", "ssh_clear_authorized_keys": true, "subnet_filter": { @@ -100,6 +101,7 @@ "source_image_family": "{{user `source_image_family`}}", "ssh_username": "{{user `ssh_username`}}", "ssh_timeout": "6m", + "ssh_read_write_timeout": "5m", "project_id": "{{user `project_id`}}", "zone": "{{user `zone`}}", "image_storage_locations": ["{{user `image_storage_location`}}"], @@ -136,6 +138,8 @@ "name": "azure", "type": "azure-arm", "ssh_username": "{{user `ssh_username`}}", + "ssh_timeout": "5m", + "ssh_read_write_timeout": "5m", "client_id": "{{user `client_id`}}", "client_secret": "{{user `client_secret`}}", "tenant_id": "{{user `tenant_id`}}", @@ -187,7 +191,7 @@ "destination": "/home/{{user `ssh_username`}}/", "source": "files/", "type": "file", - "pause_before": "10s" + "pause_before": "20s" }, { "destination": "/home/{{user `ssh_username`}}/", From 08f615934c8d15dc568b84bdb529b360c73c10df Mon Sep 17 00:00:00 2001 From: Anna Mikhlin Date: Mon, 1 May 2023 16:25:22 +0300 Subject: [PATCH 27/41] ami_variables: update aws variables for releng account follow the change of ami creation in releng account the ami valiables should be updated accordingly. --- packer/ami_variables.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packer/ami_variables.json b/packer/ami_variables.json index 69111a39..0d23822d 100644 --- a/packer/ami_variables.json +++ b/packer/ami_variables.json @@ -1,5 +1,5 @@ { - "security_group_id": "sg-c5e1f7a0", + "security_group_id": "sg-088128b2712c264d1", "region": "us-east-1", "associate_public_ip_address": "true", "instance_type": "c4.xlarge" From 6643f74c5435f3d3bfeb26824f85c1e0ca087922 Mon Sep 17 00:00:00 2001 From: Anna Mikhlin Date: Mon, 19 Jun 2023 23:35:46 +0300 Subject: [PATCH 28/41] adding ami_users option to ami builder adding an option of ami_users that getting a list of account IDs and adding permissions to AMI during packer build --- packer/build_image.sh | 7 +++++++ packer/scylla.json | 1 + 2 files changed, 8 insertions(+) diff --git a/packer/build_image.sh b/packer/build_image.sh index ffb1d62f..c497f863 100755 --- a/packer/build_image.sh +++ b/packer/build_image.sh @@ -29,6 +29,7 @@ print_usage() { echo " [--scylla-build-sha-id] Scylla build SHA id form metadata file" echo " [--branch] Set the release branch for GCE label. Default: master" echo " [--ami-regions] Set regions to copy the AMI when done building it (including permissions and tags)" + echo " [--ami-users] A list of account IDs that have access to launch the AMI" echo " [--build-tag] Jenkins Build tag" echo " --download-no-server Download all deb needed excluding scylla from repo-for-install" echo " [--build-mode] Choose which build mode to use for Scylla installation. Default: release. Valid options: release|debug" @@ -98,6 +99,11 @@ while [ $# -gt 0 ]; do echo "--ami-regions prameter: AMI_REGIONS |$AMI_REGIONS|" shift 2 ;; + "--ami-users"): + AMI_USERS=$2 + echo "--ami-users parameter: AMI_USERS |$AMI_USERS|" + shift 2 + ;; "--log-file") PACKER_LOG_PATH=$2 echo "--log-file parameter: PACKER_LOG_PATH |$PACKER_LOG_PATH|" @@ -345,6 +351,7 @@ set -x -var operating_system="$OPERATING_SYSTEM" \ -var branch="$BRANCH" \ -var ami_regions="$AMI_REGIONS" \ + -var ami_users="$AMI_USERS" \ -var arch="$ARCH" \ -var product="$PRODUCT" \ -var build_mode="$BUILD_MODE" \ diff --git a/packer/scylla.json b/packer/scylla.json index fcda121a..9d90ef03 100644 --- a/packer/scylla.json +++ b/packer/scylla.json @@ -89,6 +89,7 @@ "build_mode": "{{user `build_mode`| clean_resource_name}}" }, "ami_regions": "{{user `ami_regions`}}", + "ami_users": "{{user `ami_users`}}", "aws_polling": { "delay_seconds": "30", "max_attempts": "100" From 2aa400c7e2ab85c344377f98bdb898edea133197 Mon Sep 17 00:00:00 2001 From: Beni Peled Date: Mon, 10 Jul 2023 12:17:55 +0300 Subject: [PATCH 29/41] packer-shell: replace semicolon separators with double ampersand As part of investigating another issue (missing linux-azure-lts package) I found out that the update-kernel shell commands set with semicolon separators which means all commands will run regardless of the results of previous commands, This commits change it to be separated by double ampersand to make sure commands will be run only if the previous command is done successfully --- packer/scylla.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packer/scylla.json b/packer/scylla.json index 9d90ef03..f06cc7c8 100644 --- a/packer/scylla.json +++ b/packer/scylla.json @@ -177,14 +177,14 @@ { "type": "shell", "inline": [ - "if [ {{build_name}} = aws -o {{build_name}} = azure ]; then sudo apt-get update; sudo DEBIAN_FRONTEND=noninteractive apt purge -y linux-{{build_name}}* linux-headers-{{build_name}}* linux-image*{{build_name}}* linux-modules*-{{build_name}}*; sudo DEBIAN_FRONTEND=noninteractive apt-get install -y linux-{{build_name}}-lts-22.04; sudo reboot ; fi" + "if [ {{build_name}} = aws -o {{build_name}} = azure ]; then sudo apt-get update && sudo DEBIAN_FRONTEND=noninteractive apt purge -y linux-{{build_name}}* linux-headers-{{build_name}}* linux-image*{{build_name}}* linux-modules*-{{build_name}}* && sudo DEBIAN_FRONTEND=noninteractive apt-get install -y linux-{{build_name}}-lts-22.04 && sudo reboot; fi" ], "expect_disconnect": true }, { "type": "shell", "inline": [ - "if [ {{build_name}} = gce ]; then sudo apt-get update; sudo DEBIAN_FRONTEND=noninteractive apt purge -y linux-gcp* linux-headers-gcp* linux-image*gcp* linux-modules*-gcp*; sudo DEBIAN_FRONTEND=noninteractive apt-get install -y linux-gcp-lts-22.04; sudo reboot ; fi" + "if [ {{build_name}} = gce ]; then sudo apt-get update && sudo DEBIAN_FRONTEND=noninteractive apt purge -y linux-gcp* linux-headers-gcp* linux-image*gcp* linux-modules*-gcp* && sudo DEBIAN_FRONTEND=noninteractive apt-get install -y linux-gcp-lts-22.04 && sudo reboot; fi" ], "expect_disconnect": true }, From 23eefa47371b97e2c38aa83ea49cafb0204744c7 Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Sun, 16 Jul 2023 12:18:37 +0300 Subject: [PATCH 30/41] scylla_install_image:remove /var/cache/debconf/config.dat after image creation during our rolling-upgrade process we keep getting the following failure: ``` Command: 'sudo DEBIAN_FRONTEND=noninteractive apt-get -o Dpkg::Options::="--force-confold" -o Dpkg::Options::="--force-confdef" upgrade -y ' Exit code: 100 Stdout: linux-aws linux-headers-aws linux-image-aws 0 upgraded, 0 newly installed, 0 to remove and 3 not upgraded. 1 not fully installed or removed. After this operation, 0 B of additional disk space will be used. Setting up shim-signed (1.40.9+15.7-0ubuntu1) ... mount: /var/lib/grub/esp: special device /dev/disk/by-id/nvme-Amazon_Elastic_Block_Store_vol0b2a94761ddb1d0c5-part15 does not exist. dpkg: error processing package shim-signed (--configure): installed shim-signed package post-installation script subprocess returned error exit status 32 Errors were encountered while processing: shim-signed Stderr: E: Sub-process /usr/bin/dpkg returned an error code (1) ``` Happend for all images As suggested by @syuu1228, let's remove `config.dat` once we complete the image creation Closes: https://github.com/scylladb/scylla-enterprise/issues/2818 --- packer/scylla_install_image | 1 + 1 file changed, 1 insertion(+) diff --git a/packer/scylla_install_image b/packer/scylla_install_image index 07801f85..0d1cb779 100755 --- a/packer/scylla_install_image +++ b/packer/scylla_install_image @@ -170,6 +170,7 @@ WantedBy=multi-user.target os.remove('{}/.ssh/authorized_keys'.format(homedir)) os.remove('/var/lib/scylla-housekeeping/housekeeping.uuid') + os.remove('/var/cache/debconf/config.dat') with open('/etc/default/grub.d/50-cloudimg-settings.cfg') as f: grub = f.read() From a63350da21eddc2577604e182e73b7c54c59affb Mon Sep 17 00:00:00 2001 From: Takuya ASADA Date: Mon, 14 Aug 2023 18:33:52 +0900 Subject: [PATCH 31/41] gce: drop rsyslog On AWS and Azure Ubuntu Minimal image we don't have rsyslog, it just uses persistent journal log, but only GCE image has rsyslog. We want to align image configuration between clouds, let's drop rsyslog from GCE image. Related scylladb/scylla-enterprise#3080 --- packer/scylla_install_image | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packer/scylla_install_image b/packer/scylla_install_image index 0d1cb779..a93d39e8 100755 --- a/packer/scylla_install_image +++ b/packer/scylla_install_image @@ -125,6 +125,8 @@ if __name__ == '__main__': kernel_opt = '' grub_variable = 'GRUB_CMDLINE_LINUX_DEFAULT' elif args.target_cloud == 'gce': + # align with other clouds image + run('apt-get purge -y rsyslog', shell=True, check=True) sysconfig_opt = '--disable-writeback-cache' kernel_opt = '' grub_variable = 'GRUB_CMDLINE_LINUX_DEFAULT' From ecdbd44b5661cd8e97fec18c1d74a311ec4c2fa7 Mon Sep 17 00:00:00 2001 From: Takuya ASADA Date: Mon, 28 Aug 2023 15:57:58 +0900 Subject: [PATCH 32/41] scylla_cloud_io_setup: add preset configuration for all supported instance types on EC2 Since we merged https://github.com/scylladb/scylladb/commit/817f34d1a94a1daa1e8591c93a047081d51f5628, we are missing preset configuration for c5d, m5d, m5ad, r5d, z1d. On such instance, our AMI causes error during startup (see scylladb/scylladb#9660). To fix the problem, we need to add preset configuration for these instance types. --- common/scylla_cloud_io_setup | 166 +++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/common/scylla_cloud_io_setup b/common/scylla_cloud_io_setup index 6c5f8217..e0a9f7a3 100755 --- a/common/scylla_cloud_io_setup +++ b/common/scylla_cloud_io_setup @@ -81,6 +81,172 @@ class aws_io_setup(cloud_io_setup): self.disk_properties["read_bandwidth"] = 507338935 * nr_disks self.disk_properties["write_iops"] = 57100 * nr_disks self.disk_properties["write_bandwidth"] = 483141731 * nr_disks + elif idata.instance_class() in ("m5d", "r5d"): + if idata.instance_size() == "large": + disk_properties["read_iops"] = 33271 + disk_properties["read_bandwidth"] = 158538149 + disk_properties["write_iops"] = 16820 + disk_properties["write_bandwidth"] = 70219810 + elif idata.instance_size() == "xlarge": + disk_properties["read_iops"] = 65979 + disk_properties["read_bandwidth"] = 260654293 + disk_properties["write_iops"] = 32534 + disk_properties["write_bandwidth"] = 135897424 + elif idata.instance_size() == "2xlarge": + disk_properties["read_iops"] = 130095 + disk_properties["read_bandwidth"] = 621758272 + disk_properties["write_iops"] = 63644 + disk_properties["write_bandwidth"] = 267667525 + elif idata.instance_size() == "4xlarge": + disk_properties["read_iops"] = 129822 * nr_disks + disk_properties["read_bandwidth"] = 620878826 * nr_disks + disk_properties["write_iops"] = 63212 * nr_disks + disk_properties["write_bandwidth"] = 267703397 * nr_disks + elif idata.instance_size() == "8xlarge": + disk_properties["read_iops"] = 257069 * nr_disks + disk_properties["read_bandwidth"] = 1250134869 * nr_disks + disk_properties["write_iops"] = 115433 * nr_disks + disk_properties["write_bandwidth"] = 532868032 * nr_disks + elif idata.instance_size() == "12xlarge": + disk_properties["read_iops"] = 381626 * nr_disks + disk_properties["read_bandwidth"] = 1865794816 * nr_disks + disk_properties["write_iops"] = 115333 * nr_disks + disk_properties["write_bandwidth"] = 795884800 * nr_disks + elif idata.instance_size() == "16xlarge": + disk_properties["read_iops"] = 257054 * nr_disks + disk_properties["read_bandwidth"] = 1254133802 * nr_disks + disk_properties["write_iops"] = 108163 * nr_disks + disk_properties["write_bandwidth"] = 532996277 * nr_disks + elif idata.instance_size() == "24xlarge": + disk_properties["read_iops"] = 374737 * nr_disks + disk_properties["read_bandwidth"] = 1855833386 * nr_disks + disk_properties["write_iops"] = 125214 * nr_disks + disk_properties["write_bandwidth"] = 796082133 * nr_disks + elif idata.instance_size() == "metal": + disk_properties["read_iops"] = 381441 * nr_disks + disk_properties["read_bandwidth"] = 1874585429 * nr_disks + disk_properties["write_iops"] = 108789 * nr_disks + disk_properties["write_bandwidth"] = 796443221 * nr_disks + elif idata.instance() == "m5ad.large": + disk_properties["read_iops"] = 33306 + disk_properties["read_bandwidth"] = 158338864 + disk_properties["write_iops"] = 16817 + disk_properties["write_bandwidth"] = 70194288 + elif idata.instance() == "m5ad.xlarge": + disk_properties["read_iops"] = 66127 + disk_properties["read_bandwidth"] = 260377466 + disk_properties["write_iops"] = 32893 + disk_properties["write_bandwidth"] = 135897696 + elif idata.instance() == "m5ad.2xlarge": + disk_properties["read_iops"] = 129977 + disk_properties["read_bandwidth"] = 621997248 + disk_properties["write_iops"] = 63442 + disk_properties["write_bandwidth"] = 267648736 + elif idata.instance() == "m5ad.4xlarge": + disk_properties["read_iops"] = 129937 * nr_disks + disk_properties["read_bandwidth"] = 620231082 * nr_disks + disk_properties["write_iops"] = 62666 * nr_disks + disk_properties["write_bandwidth"] = 267639125 * nr_disks + elif idata.instance() == "m5ad.8xlarge": + disk_properties["read_iops"] = 257095 * nr_disks + disk_properties["read_bandwidth"] = 1249927637 * nr_disks + disk_properties["write_iops"] = 114446 * nr_disks + disk_properties["write_bandwidth"] = 532821760 * nr_disks + elif idata.instance() == "m5ad.12xlarge": + disk_properties["read_iops"] = 376431 * nr_disks + disk_properties["read_bandwidth"] = 1865866709 * nr_disks + disk_properties["write_iops"] = 115985 * nr_disks + disk_properties["write_bandwidth"] = 796003477 * nr_disks + elif idata.instance() == "m5ad.16xlarge": + disk_properties["read_iops"] = 256358 * nr_disks + disk_properties["read_bandwidth"] = 1250889770 * nr_disks + disk_properties["write_iops"] = 114707 * nr_disks + disk_properties["write_bandwidth"] = 532998506 * nr_disks + elif idata.instance() == "m5ad.24xlarge": + disk_properties["read_iops"] = 258951 * nr_disks + disk_properties["read_bandwidth"] = 1865871317 * nr_disks + disk_properties["write_iops"] = 116030 * nr_disks + disk_properties["write_bandwidth"] = 796217706 * nr_disks + elif idata.instance() == "c5d.large": + disk_properties["read_iops"] = 22095 + disk_properties["read_bandwidth"] = 104797834 + disk_properties["write_iops"] = 10125 + disk_properties["write_bandwidth"] = 41982906 + elif idata.instance() == "c5d.xlarge": + disk_properties["read_iops"] = 44355 + disk_properties["read_bandwidth"] = 212593018 + disk_properties["write_iops"] = 20025 + disk_properties["write_bandwidth"] = 84213472 + elif idata.instance() == "c5d.2xlarge": + disk_properties["read_iops"] = 89036 + disk_properties["read_bandwidth"] = 426821429 + disk_properties["write_iops"] = 41697 + disk_properties["write_bandwidth"] = 173730709 + elif idata.instance() == "c5d.4xlarge": + disk_properties["read_iops"] = 193970 + disk_properties["read_bandwidth"] = 928278314 + disk_properties["write_iops"] = 83058 + disk_properties["write_bandwidth"] = 351839733 + elif idata.instance() == "c5d.9xlarge": + disk_properties["read_iops"] = 381800 + disk_properties["read_bandwidth"] = 1865831893 + disk_properties["write_iops"] = 112264 + disk_properties["write_bandwidth"] = 795731264 + elif idata.instance() == "c5d.12xlarge": + disk_properties["read_iops"] = 381775 * nr_disks + disk_properties["read_bandwidth"] = 1866481792 * nr_disks + disk_properties["write_iops"] = 114302 * nr_disks + disk_properties["write_bandwidth"] = 795607616 * nr_disks + elif idata.instance() == "c5d.18xlarge": + disk_properties["read_iops"] = 381270 * nr_disks + disk_properties["read_bandwidth"] = 1856972330 * nr_disks + disk_properties["write_iops"] = 125638 * nr_disks + disk_properties["write_bandwidth"] = 795813866 * nr_disks + elif idata.instance() == "c5d.24xlarge": + disk_properties["read_iops"] = 381355 * nr_disks + disk_properties["read_bandwidth"] = 1876056704 * nr_disks + disk_properties["write_iops"] = 104946 * nr_disks + disk_properties["write_bandwidth"] = 795901013 * nr_disks + elif idata.instance() == "c5d.metal": + disk_properties["read_iops"] = 381330 * nr_disks + disk_properties["read_bandwidth"] = 1865216426 * nr_disks + disk_properties["write_iops"] = 115484 * nr_disks + disk_properties["write_bandwidth"] = 796109546 * nr_disks + elif idata.instance() == "z1d.large": + disk_properties["read_iops"] = 33286 + disk_properties["read_bandwidth"] = 158206858 + disk_properties["write_iops"] = 16956 + disk_properties["write_bandwidth"] = 70226280 + elif idata.instance() == "z1d.xlarge": + disk_properties["read_iops"] = 66076 + disk_properties["read_bandwidth"] = 260565488 + disk_properties["write_iops"] = 32769 + disk_properties["write_bandwidth"] = 135891989 + elif idata.instance() == "z1d.2xlarge": + disk_properties["read_iops"] = 130235 + disk_properties["read_bandwidth"] = 622297194 + disk_properties["write_iops"] = 63891 + disk_properties["write_bandwidth"] = 267679509 + elif idata.instance() == "z1d.3xlarge": + disk_properties["read_iops"] = 193840 + disk_properties["read_bandwidth"] = 927493696 + disk_properties["write_iops"] = 82864 + disk_properties["write_bandwidth"] = 351608480 + elif idata.instance() == "z1d.6xlarge": + disk_properties["read_iops"] = 381902 + disk_properties["read_bandwidth"] = 1865543381 + disk_properties["write_iops"] = 117874 + disk_properties["write_bandwidth"] = 795786901 + elif idata.instance() == "z1d.12xlarge": + disk_properties["read_iops"] = 381648 * nr_disks + disk_properties["read_bandwidth"] = 1865706538 * nr_disks + disk_properties["write_iops"] = 115834 * nr_disks + disk_properties["write_bandwidth"] = 795876778 * nr_disks + elif idata.instance() == "z1d.metal": + disk_properties["read_iops"] = 381378 * nr_disks + disk_properties["read_bandwidth"] = 1857873109 * nr_disks + disk_properties["write_iops"] = 122453 * nr_disks + disk_properties["write_bandwidth"] = 795593024 * nr_disks elif self.idata.instance_class() in ("c6gd", "m6gd", "r6gd", "x2gd"): if self.idata.instance_size() == "medium": self.disk_properties["read_iops"] = 14808 From c87f3db1f00a982f7d80d53c8248ace661c77e82 Mon Sep 17 00:00:00 2001 From: Takuya ASADA Date: Mon, 19 Jun 2023 18:23:26 +0900 Subject: [PATCH 33/41] scylla_cloud_io_setup: add i4g instance type Adding preset value of i4g instance type. Closes scylladb/scylladb#13830 --- common/scylla_cloud_io_setup | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/common/scylla_cloud_io_setup b/common/scylla_cloud_io_setup index e0a9f7a3..da262dc7 100755 --- a/common/scylla_cloud_io_setup +++ b/common/scylla_cloud_io_setup @@ -293,6 +293,31 @@ class aws_io_setup(cloud_io_setup): self.disk_properties["read_bandwidth"] = 2527296683 * nr_disks self.disk_properties["write_iops"] = 156326 * nr_disks self.disk_properties["write_bandwidth"] = 1063657088 * nr_disks + elif self.idata.instancetype == "i4g.large": + self.disk_properties["read_iops"] = 34035 + self.disk_properties["read_bandwidth"] = 288471904 + self.disk_properties["write_iops"] = 27943 + self.disk_properties["write_bandwidth"] = 126763269 + elif self.idata.instancetype == "i4g.xlarge": + self.disk_properties["read_iops"] = 68111 + self.disk_properties["read_bandwidth"] = 571766890 + self.disk_properties["write_iops"] = 47622 + self.disk_properties["write_bandwidth"] = 254230192 + elif self.idata.instancetype == "i4g.2xlarge": + self.disk_properties["read_iops"] = 136352 + self.disk_properties["read_bandwidth"] = 1148509696 + self.disk_properties["write_iops"] = 82746 + self.disk_properties["write_bandwidth"] = 508828810 + elif self.idata.instancetype == "i4g.8xlarge": + self.disk_properties["read_iops"] = 271495 * nr_disks + self.disk_properties["read_bandwidth"] = 2293024938 * nr_disks + self.disk_properties["write_iops"] = 93653 * nr_disks + self.disk_properties["write_bandwidth"] = 1031956586 * nr_disks + elif self.idata.instancetype == "i4g.16xlarge": + self.disk_properties["read_iops"] = 250489 * nr_disks + self.disk_properties["read_bandwidth"] = 2286635861 * nr_disks + self.disk_properties["write_iops"] = 93737 * nr_disks + self.disk_properties["write_bandwidth"] = 1034256042 * nr_disks elif self.idata.instancetype == "im4gn.large": self.disk_properties["read_iops"] = 33943 self.disk_properties["read_bandwidth"] = 288433525 From 95dcfc7df1374d094a7d77076e63027d2a20472a Mon Sep 17 00:00:00 2001 From: Takuya ASADA Date: Tue, 29 Aug 2023 03:46:03 +0900 Subject: [PATCH 34/41] scylla_cloud_io_setup: fix NameError Since ecdbd44b5661cd8e97fec18c1d74a311ec4c2fa7 was bring from scylla_io_setup patch, there are incompatible variable names. We need to rename them in following rules: - idata and disk_properties should be member of self - rename instance_class() to instancetype Signed-off-by: Takuya ASADA --- common/scylla_cloud_io_setup | 332 +++++++++++++++++------------------ 1 file changed, 166 insertions(+), 166 deletions(-) diff --git a/common/scylla_cloud_io_setup b/common/scylla_cloud_io_setup index da262dc7..6f2a97ba 100755 --- a/common/scylla_cloud_io_setup +++ b/common/scylla_cloud_io_setup @@ -81,172 +81,172 @@ class aws_io_setup(cloud_io_setup): self.disk_properties["read_bandwidth"] = 507338935 * nr_disks self.disk_properties["write_iops"] = 57100 * nr_disks self.disk_properties["write_bandwidth"] = 483141731 * nr_disks - elif idata.instance_class() in ("m5d", "r5d"): - if idata.instance_size() == "large": - disk_properties["read_iops"] = 33271 - disk_properties["read_bandwidth"] = 158538149 - disk_properties["write_iops"] = 16820 - disk_properties["write_bandwidth"] = 70219810 - elif idata.instance_size() == "xlarge": - disk_properties["read_iops"] = 65979 - disk_properties["read_bandwidth"] = 260654293 - disk_properties["write_iops"] = 32534 - disk_properties["write_bandwidth"] = 135897424 - elif idata.instance_size() == "2xlarge": - disk_properties["read_iops"] = 130095 - disk_properties["read_bandwidth"] = 621758272 - disk_properties["write_iops"] = 63644 - disk_properties["write_bandwidth"] = 267667525 - elif idata.instance_size() == "4xlarge": - disk_properties["read_iops"] = 129822 * nr_disks - disk_properties["read_bandwidth"] = 620878826 * nr_disks - disk_properties["write_iops"] = 63212 * nr_disks - disk_properties["write_bandwidth"] = 267703397 * nr_disks - elif idata.instance_size() == "8xlarge": - disk_properties["read_iops"] = 257069 * nr_disks - disk_properties["read_bandwidth"] = 1250134869 * nr_disks - disk_properties["write_iops"] = 115433 * nr_disks - disk_properties["write_bandwidth"] = 532868032 * nr_disks - elif idata.instance_size() == "12xlarge": - disk_properties["read_iops"] = 381626 * nr_disks - disk_properties["read_bandwidth"] = 1865794816 * nr_disks - disk_properties["write_iops"] = 115333 * nr_disks - disk_properties["write_bandwidth"] = 795884800 * nr_disks - elif idata.instance_size() == "16xlarge": - disk_properties["read_iops"] = 257054 * nr_disks - disk_properties["read_bandwidth"] = 1254133802 * nr_disks - disk_properties["write_iops"] = 108163 * nr_disks - disk_properties["write_bandwidth"] = 532996277 * nr_disks - elif idata.instance_size() == "24xlarge": - disk_properties["read_iops"] = 374737 * nr_disks - disk_properties["read_bandwidth"] = 1855833386 * nr_disks - disk_properties["write_iops"] = 125214 * nr_disks - disk_properties["write_bandwidth"] = 796082133 * nr_disks - elif idata.instance_size() == "metal": - disk_properties["read_iops"] = 381441 * nr_disks - disk_properties["read_bandwidth"] = 1874585429 * nr_disks - disk_properties["write_iops"] = 108789 * nr_disks - disk_properties["write_bandwidth"] = 796443221 * nr_disks - elif idata.instance() == "m5ad.large": - disk_properties["read_iops"] = 33306 - disk_properties["read_bandwidth"] = 158338864 - disk_properties["write_iops"] = 16817 - disk_properties["write_bandwidth"] = 70194288 - elif idata.instance() == "m5ad.xlarge": - disk_properties["read_iops"] = 66127 - disk_properties["read_bandwidth"] = 260377466 - disk_properties["write_iops"] = 32893 - disk_properties["write_bandwidth"] = 135897696 - elif idata.instance() == "m5ad.2xlarge": - disk_properties["read_iops"] = 129977 - disk_properties["read_bandwidth"] = 621997248 - disk_properties["write_iops"] = 63442 - disk_properties["write_bandwidth"] = 267648736 - elif idata.instance() == "m5ad.4xlarge": - disk_properties["read_iops"] = 129937 * nr_disks - disk_properties["read_bandwidth"] = 620231082 * nr_disks - disk_properties["write_iops"] = 62666 * nr_disks - disk_properties["write_bandwidth"] = 267639125 * nr_disks - elif idata.instance() == "m5ad.8xlarge": - disk_properties["read_iops"] = 257095 * nr_disks - disk_properties["read_bandwidth"] = 1249927637 * nr_disks - disk_properties["write_iops"] = 114446 * nr_disks - disk_properties["write_bandwidth"] = 532821760 * nr_disks - elif idata.instance() == "m5ad.12xlarge": - disk_properties["read_iops"] = 376431 * nr_disks - disk_properties["read_bandwidth"] = 1865866709 * nr_disks - disk_properties["write_iops"] = 115985 * nr_disks - disk_properties["write_bandwidth"] = 796003477 * nr_disks - elif idata.instance() == "m5ad.16xlarge": - disk_properties["read_iops"] = 256358 * nr_disks - disk_properties["read_bandwidth"] = 1250889770 * nr_disks - disk_properties["write_iops"] = 114707 * nr_disks - disk_properties["write_bandwidth"] = 532998506 * nr_disks - elif idata.instance() == "m5ad.24xlarge": - disk_properties["read_iops"] = 258951 * nr_disks - disk_properties["read_bandwidth"] = 1865871317 * nr_disks - disk_properties["write_iops"] = 116030 * nr_disks - disk_properties["write_bandwidth"] = 796217706 * nr_disks - elif idata.instance() == "c5d.large": - disk_properties["read_iops"] = 22095 - disk_properties["read_bandwidth"] = 104797834 - disk_properties["write_iops"] = 10125 - disk_properties["write_bandwidth"] = 41982906 - elif idata.instance() == "c5d.xlarge": - disk_properties["read_iops"] = 44355 - disk_properties["read_bandwidth"] = 212593018 - disk_properties["write_iops"] = 20025 - disk_properties["write_bandwidth"] = 84213472 - elif idata.instance() == "c5d.2xlarge": - disk_properties["read_iops"] = 89036 - disk_properties["read_bandwidth"] = 426821429 - disk_properties["write_iops"] = 41697 - disk_properties["write_bandwidth"] = 173730709 - elif idata.instance() == "c5d.4xlarge": - disk_properties["read_iops"] = 193970 - disk_properties["read_bandwidth"] = 928278314 - disk_properties["write_iops"] = 83058 - disk_properties["write_bandwidth"] = 351839733 - elif idata.instance() == "c5d.9xlarge": - disk_properties["read_iops"] = 381800 - disk_properties["read_bandwidth"] = 1865831893 - disk_properties["write_iops"] = 112264 - disk_properties["write_bandwidth"] = 795731264 - elif idata.instance() == "c5d.12xlarge": - disk_properties["read_iops"] = 381775 * nr_disks - disk_properties["read_bandwidth"] = 1866481792 * nr_disks - disk_properties["write_iops"] = 114302 * nr_disks - disk_properties["write_bandwidth"] = 795607616 * nr_disks - elif idata.instance() == "c5d.18xlarge": - disk_properties["read_iops"] = 381270 * nr_disks - disk_properties["read_bandwidth"] = 1856972330 * nr_disks - disk_properties["write_iops"] = 125638 * nr_disks - disk_properties["write_bandwidth"] = 795813866 * nr_disks - elif idata.instance() == "c5d.24xlarge": - disk_properties["read_iops"] = 381355 * nr_disks - disk_properties["read_bandwidth"] = 1876056704 * nr_disks - disk_properties["write_iops"] = 104946 * nr_disks - disk_properties["write_bandwidth"] = 795901013 * nr_disks - elif idata.instance() == "c5d.metal": - disk_properties["read_iops"] = 381330 * nr_disks - disk_properties["read_bandwidth"] = 1865216426 * nr_disks - disk_properties["write_iops"] = 115484 * nr_disks - disk_properties["write_bandwidth"] = 796109546 * nr_disks - elif idata.instance() == "z1d.large": - disk_properties["read_iops"] = 33286 - disk_properties["read_bandwidth"] = 158206858 - disk_properties["write_iops"] = 16956 - disk_properties["write_bandwidth"] = 70226280 - elif idata.instance() == "z1d.xlarge": - disk_properties["read_iops"] = 66076 - disk_properties["read_bandwidth"] = 260565488 - disk_properties["write_iops"] = 32769 - disk_properties["write_bandwidth"] = 135891989 - elif idata.instance() == "z1d.2xlarge": - disk_properties["read_iops"] = 130235 - disk_properties["read_bandwidth"] = 622297194 - disk_properties["write_iops"] = 63891 - disk_properties["write_bandwidth"] = 267679509 - elif idata.instance() == "z1d.3xlarge": - disk_properties["read_iops"] = 193840 - disk_properties["read_bandwidth"] = 927493696 - disk_properties["write_iops"] = 82864 - disk_properties["write_bandwidth"] = 351608480 - elif idata.instance() == "z1d.6xlarge": - disk_properties["read_iops"] = 381902 - disk_properties["read_bandwidth"] = 1865543381 - disk_properties["write_iops"] = 117874 - disk_properties["write_bandwidth"] = 795786901 - elif idata.instance() == "z1d.12xlarge": - disk_properties["read_iops"] = 381648 * nr_disks - disk_properties["read_bandwidth"] = 1865706538 * nr_disks - disk_properties["write_iops"] = 115834 * nr_disks - disk_properties["write_bandwidth"] = 795876778 * nr_disks - elif idata.instance() == "z1d.metal": - disk_properties["read_iops"] = 381378 * nr_disks - disk_properties["read_bandwidth"] = 1857873109 * nr_disks - disk_properties["write_iops"] = 122453 * nr_disks - disk_properties["write_bandwidth"] = 795593024 * nr_disks + elif self.idata.instance_class() in ("m5d", "r5d"): + if self.idata.instance_size() == "large": + self.disk_properties["read_iops"] = 33271 + self.disk_properties["read_bandwidth"] = 158538149 + self.disk_properties["write_iops"] = 16820 + self.disk_properties["write_bandwidth"] = 70219810 + elif self.idata.instance_size() == "xlarge": + self.disk_properties["read_iops"] = 65979 + self.disk_properties["read_bandwidth"] = 260654293 + self.disk_properties["write_iops"] = 32534 + self.disk_properties["write_bandwidth"] = 135897424 + elif self.idata.instance_size() == "2xlarge": + self.disk_properties["read_iops"] = 130095 + self.disk_properties["read_bandwidth"] = 621758272 + self.disk_properties["write_iops"] = 63644 + self.disk_properties["write_bandwidth"] = 267667525 + elif self.idata.instance_size() == "4xlarge": + self.disk_properties["read_iops"] = 129822 * nr_disks + self.disk_properties["read_bandwidth"] = 620878826 * nr_disks + self.disk_properties["write_iops"] = 63212 * nr_disks + self.disk_properties["write_bandwidth"] = 267703397 * nr_disks + elif self.idata.instance_size() == "8xlarge": + self.disk_properties["read_iops"] = 257069 * nr_disks + self.disk_properties["read_bandwidth"] = 1250134869 * nr_disks + self.disk_properties["write_iops"] = 115433 * nr_disks + self.disk_properties["write_bandwidth"] = 532868032 * nr_disks + elif self.idata.instance_size() == "12xlarge": + self.disk_properties["read_iops"] = 381626 * nr_disks + self.disk_properties["read_bandwidth"] = 1865794816 * nr_disks + self.disk_properties["write_iops"] = 115333 * nr_disks + self.disk_properties["write_bandwidth"] = 795884800 * nr_disks + elif self.idata.instance_size() == "16xlarge": + self.disk_properties["read_iops"] = 257054 * nr_disks + self.disk_properties["read_bandwidth"] = 1254133802 * nr_disks + self.disk_properties["write_iops"] = 108163 * nr_disks + self.disk_properties["write_bandwidth"] = 532996277 * nr_disks + elif self.idata.instance_size() == "24xlarge": + self.disk_properties["read_iops"] = 374737 * nr_disks + self.disk_properties["read_bandwidth"] = 1855833386 * nr_disks + self.disk_properties["write_iops"] = 125214 * nr_disks + self.disk_properties["write_bandwidth"] = 796082133 * nr_disks + elif self.idata.instance_size() == "metal": + self.disk_properties["read_iops"] = 381441 * nr_disks + self.disk_properties["read_bandwidth"] = 1874585429 * nr_disks + self.disk_properties["write_iops"] = 108789 * nr_disks + self.disk_properties["write_bandwidth"] = 796443221 * nr_disks + elif self.idata.instancetype == "m5ad.large": + self.disk_properties["read_iops"] = 33306 + self.disk_properties["read_bandwidth"] = 158338864 + self.disk_properties["write_iops"] = 16817 + self.disk_properties["write_bandwidth"] = 70194288 + elif self.idata.instancetype == "m5ad.xlarge": + self.disk_properties["read_iops"] = 66127 + self.disk_properties["read_bandwidth"] = 260377466 + self.disk_properties["write_iops"] = 32893 + self.disk_properties["write_bandwidth"] = 135897696 + elif self.idata.instancetype == "m5ad.2xlarge": + self.disk_properties["read_iops"] = 129977 + self.disk_properties["read_bandwidth"] = 621997248 + self.disk_properties["write_iops"] = 63442 + self.disk_properties["write_bandwidth"] = 267648736 + elif self.idata.instancetype == "m5ad.4xlarge": + self.disk_properties["read_iops"] = 129937 * nr_disks + self.disk_properties["read_bandwidth"] = 620231082 * nr_disks + self.disk_properties["write_iops"] = 62666 * nr_disks + self.disk_properties["write_bandwidth"] = 267639125 * nr_disks + elif self.idata.instancetype == "m5ad.8xlarge": + self.disk_properties["read_iops"] = 257095 * nr_disks + self.disk_properties["read_bandwidth"] = 1249927637 * nr_disks + self.disk_properties["write_iops"] = 114446 * nr_disks + self.disk_properties["write_bandwidth"] = 532821760 * nr_disks + elif self.idata.instancetype == "m5ad.12xlarge": + self.disk_properties["read_iops"] = 376431 * nr_disks + self.disk_properties["read_bandwidth"] = 1865866709 * nr_disks + self.disk_properties["write_iops"] = 115985 * nr_disks + self.disk_properties["write_bandwidth"] = 796003477 * nr_disks + elif self.idata.instancetype == "m5ad.16xlarge": + self.disk_properties["read_iops"] = 256358 * nr_disks + self.disk_properties["read_bandwidth"] = 1250889770 * nr_disks + self.disk_properties["write_iops"] = 114707 * nr_disks + self.disk_properties["write_bandwidth"] = 532998506 * nr_disks + elif self.idata.instancetype == "m5ad.24xlarge": + self.disk_properties["read_iops"] = 258951 * nr_disks + self.disk_properties["read_bandwidth"] = 1865871317 * nr_disks + self.disk_properties["write_iops"] = 116030 * nr_disks + self.disk_properties["write_bandwidth"] = 796217706 * nr_disks + elif self.idata.instancetype == "c5d.large": + self.disk_properties["read_iops"] = 22095 + self.disk_properties["read_bandwidth"] = 104797834 + self.disk_properties["write_iops"] = 10125 + self.disk_properties["write_bandwidth"] = 41982906 + elif self.idata.instancetype == "c5d.xlarge": + self.disk_properties["read_iops"] = 44355 + self.disk_properties["read_bandwidth"] = 212593018 + self.disk_properties["write_iops"] = 20025 + self.disk_properties["write_bandwidth"] = 84213472 + elif self.idata.instancetype == "c5d.2xlarge": + self.disk_properties["read_iops"] = 89036 + self.disk_properties["read_bandwidth"] = 426821429 + self.disk_properties["write_iops"] = 41697 + self.disk_properties["write_bandwidth"] = 173730709 + elif self.idata.instancetype == "c5d.4xlarge": + self.disk_properties["read_iops"] = 193970 + self.disk_properties["read_bandwidth"] = 928278314 + self.disk_properties["write_iops"] = 83058 + self.disk_properties["write_bandwidth"] = 351839733 + elif self.idata.instancetype == "c5d.9xlarge": + self.disk_properties["read_iops"] = 381800 + self.disk_properties["read_bandwidth"] = 1865831893 + self.disk_properties["write_iops"] = 112264 + self.disk_properties["write_bandwidth"] = 795731264 + elif self.idata.instancetype == "c5d.12xlarge": + self.disk_properties["read_iops"] = 381775 * nr_disks + self.disk_properties["read_bandwidth"] = 1866481792 * nr_disks + self.disk_properties["write_iops"] = 114302 * nr_disks + self.disk_properties["write_bandwidth"] = 795607616 * nr_disks + elif self.idata.instancetype == "c5d.18xlarge": + self.disk_properties["read_iops"] = 381270 * nr_disks + self.disk_properties["read_bandwidth"] = 1856972330 * nr_disks + self.disk_properties["write_iops"] = 125638 * nr_disks + self.disk_properties["write_bandwidth"] = 795813866 * nr_disks + elif self.idata.instancetype == "c5d.24xlarge": + self.disk_properties["read_iops"] = 381355 * nr_disks + self.disk_properties["read_bandwidth"] = 1876056704 * nr_disks + self.disk_properties["write_iops"] = 104946 * nr_disks + self.disk_properties["write_bandwidth"] = 795901013 * nr_disks + elif self.idata.instancetype == "c5d.metal": + self.disk_properties["read_iops"] = 381330 * nr_disks + self.disk_properties["read_bandwidth"] = 1865216426 * nr_disks + self.disk_properties["write_iops"] = 115484 * nr_disks + self.disk_properties["write_bandwidth"] = 796109546 * nr_disks + elif self.idata.instancetype == "z1d.large": + self.disk_properties["read_iops"] = 33286 + self.disk_properties["read_bandwidth"] = 158206858 + self.disk_properties["write_iops"] = 16956 + self.disk_properties["write_bandwidth"] = 70226280 + elif self.idata.instancetype == "z1d.xlarge": + self.disk_properties["read_iops"] = 66076 + self.disk_properties["read_bandwidth"] = 260565488 + self.disk_properties["write_iops"] = 32769 + self.disk_properties["write_bandwidth"] = 135891989 + elif self.idata.instancetype == "z1d.2xlarge": + self.disk_properties["read_iops"] = 130235 + self.disk_properties["read_bandwidth"] = 622297194 + self.disk_properties["write_iops"] = 63891 + self.disk_properties["write_bandwidth"] = 267679509 + elif self.idata.instancetype == "z1d.3xlarge": + self.disk_properties["read_iops"] = 193840 + self.disk_properties["read_bandwidth"] = 927493696 + self.disk_properties["write_iops"] = 82864 + self.disk_properties["write_bandwidth"] = 351608480 + elif self.idata.instancetype == "z1d.6xlarge": + self.disk_properties["read_iops"] = 381902 + self.disk_properties["read_bandwidth"] = 1865543381 + self.disk_properties["write_iops"] = 117874 + self.disk_properties["write_bandwidth"] = 795786901 + elif self.idata.instancetype == "z1d.12xlarge": + self.disk_properties["read_iops"] = 381648 * nr_disks + self.disk_properties["read_bandwidth"] = 1865706538 * nr_disks + self.disk_properties["write_iops"] = 115834 * nr_disks + self.disk_properties["write_bandwidth"] = 795876778 * nr_disks + elif self.idata.instancetype == "z1d.metal": + self.disk_properties["read_iops"] = 381378 * nr_disks + self.disk_properties["read_bandwidth"] = 1857873109 * nr_disks + self.disk_properties["write_iops"] = 122453 * nr_disks + self.disk_properties["write_bandwidth"] = 795593024 * nr_disks elif self.idata.instance_class() in ("c6gd", "m6gd", "r6gd", "x2gd"): if self.idata.instance_size() == "medium": self.disk_properties["read_iops"] = 14808 From a5c370764b976059a089e888b849115504a1c8ee Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Wed, 30 Aug 2023 09:31:51 +0300 Subject: [PATCH 35/41] scylla_cloud.py:Add `i4g` to supported instances Following the changes in https://github.com/scylladb/scylla-machine-image/pull/460, we need also to add `i4g` to list of instance_class This will prevent the following error during image startup ``` Version: 5.4.0~dev-0.20230829.83ceedb18bdc Nodetool: nodetool help CQL Shell: cqlsh More documentation available at: https://docs.scylladb.com/ By default, Scylla sends certain information about this node to a data collection server. For more details, see https://www.scylladb.com/privacy/ i4g is not eligible for optimized automatic tuning! To continue the setup of Scylla on this instance, run 'sudo scylla_io_setup' then 'sudo systemctl start scylla-server'. For a list of optimized instance types and more instructions, see http://www.scylladb.com/doc/getting-started-amazon/ ``` Ref: https://github.com/scylladb/scylla-pkg/pull/3556 --- lib/scylla_cloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/scylla_cloud.py b/lib/scylla_cloud.py index 5e8d81fa..80b0635d 100644 --- a/lib/scylla_cloud.py +++ b/lib/scylla_cloud.py @@ -742,7 +742,7 @@ def instance_class(self): return self._type.split(".")[0] def is_supported_instance_class(self): - if self.instance_class() in ['i2', 'i3', 'i3en', 'c5d', 'm5d', 'm5ad', 'r5d', 'z1d', 'c6gd', 'm6gd', 'r6gd', 'x2gd', 'im4gn', 'is4gen', 'i4i']: + if self.instance_class() in ['i2', 'i3', 'i3en', 'c5d', 'm5d', 'm5ad', 'r5d', 'z1d', 'c6gd', 'm6gd', 'r6gd', 'x2gd', 'im4gn', 'is4gen', 'i4i', 'i4g']: return True return False From a6f7f2eb9e0b6d576ec13e5221ebe7eb299c066f Mon Sep 17 00:00:00 2001 From: Yaron Kaikov Date: Wed, 30 Aug 2023 11:25:56 +0300 Subject: [PATCH 36/41] Add missing i4g.4xlarge instnace type Adding pre-configure io_setup values Also since `i4g` doesn't support enhanced networking adding it to use ENA --- common/scylla_cloud_io_setup | 5 +++++ lib/scylla_cloud.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/common/scylla_cloud_io_setup b/common/scylla_cloud_io_setup index 6f2a97ba..5cd51a66 100755 --- a/common/scylla_cloud_io_setup +++ b/common/scylla_cloud_io_setup @@ -308,6 +308,11 @@ class aws_io_setup(cloud_io_setup): self.disk_properties["read_bandwidth"] = 1148509696 self.disk_properties["write_iops"] = 82746 self.disk_properties["write_bandwidth"] = 508828810 + elif self.idata.instancetype == "i4g.4xlarge": + self.disk_properties["read_iops"] = 272704 + self.disk_properties["read_bandwidth"] = 2297019392 + self.disk_properties["write_iops"] = 165492 + self.disk_properties["write_bandwidth"] = 1017657620 elif self.idata.instancetype == "i4g.8xlarge": self.disk_properties["read_iops"] = 271495 * nr_disks self.disk_properties["read_bandwidth"] = 2293024938 * nr_disks diff --git a/lib/scylla_cloud.py b/lib/scylla_cloud.py index 80b0635d..eff53d09 100644 --- a/lib/scylla_cloud.py +++ b/lib/scylla_cloud.py @@ -751,7 +751,7 @@ def get_en_interface_type(self): instance_size = self.instance_size() if instance_class in ['c3', 'c4', 'd2', 'i2', 'r3']: return 'ixgbevf' - if instance_class in ['a1', 'c5', 'c5a', 'c5d', 'c5n', 'c6g', 'c6gd', 'f1', 'g3', 'g4', 'h1', 'i3', 'i3en', 'inf1', 'm5', 'm5a', 'm5ad', 'm5d', 'm5dn', 'm5n', 'm6g', 'm6gd', 'p2', 'p3', 'r4', 'r5', 'r5a', 'r5ad', 'r5b', 'r5d', 'r5dn', 'r5n', 't3', 't3a', 'u-6tb1', 'u-9tb1', 'u-12tb1', 'u-18tn1', 'u-24tb1', 'x1', 'x1e', 'z1d', 'c6g', 'c6gd', 'm6g', 'm6gd', 't4g', 'r6g', 'r6gd', 'x2gd', 'im4gn', 'is4gen', 'i4i']: + if instance_class in ['a1', 'c5', 'c5a', 'c5d', 'c5n', 'c6g', 'c6gd', 'f1', 'g3', 'g4', 'h1', 'i3', 'i3en', 'inf1', 'm5', 'm5a', 'm5ad', 'm5d', 'm5dn', 'm5n', 'm6g', 'm6gd', 'p2', 'p3', 'r4', 'r5', 'r5a', 'r5ad', 'r5b', 'r5d', 'r5dn', 'r5n', 't3', 't3a', 'u-6tb1', 'u-9tb1', 'u-12tb1', 'u-18tn1', 'u-24tb1', 'x1', 'x1e', 'z1d', 'c6g', 'c6gd', 'm6g', 'm6gd', 't4g', 'r6g', 'r6gd', 'x2gd', 'im4gn', 'is4gen', 'i4i', 'i4g']: return 'ena' if instance_class == 'm4': if instance_size == '16xlarge': From ef39bd0e7fb886da705e1ad18fa2159000835120 Mon Sep 17 00:00:00 2001 From: Beni Peled Date: Thu, 14 Sep 2023 15:49:04 +0300 Subject: [PATCH 37/41] Support developer-mode instance types Currently, attempting to launch machine-image-setup on instances that don't meet scylla's requirements would result in setup failures, This commit adds support for developer-mode instance types. when users launch these instances, scylla will run on `developer-mode`, and a WARN message will be displayed in the login message to inform users about the intended usage of these instances. Fix: #477 --- common/scylla_configure.py | 2 +- common/scylla_login | 11 +++++++++-- lib/scylla_cloud.py | 17 ++++++++++++++++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/common/scylla_configure.py b/common/scylla_configure.py index 32839df5..7c3466c1 100755 --- a/common/scylla_configure.py +++ b/common/scylla_configure.py @@ -106,7 +106,7 @@ def configure_scylla_startup_args(self): def set_developer_mode(self): default_developer_mode = self.CONF_DEFAULTS["developer_mode"] - if self.instance_user_data.get("developer_mode", default_developer_mode): + if self.instance_user_data.get("developer_mode", default_developer_mode) or self.cloud_instance.is_dev_instance_type(): LOGGER.info("Setting up developer mode") subprocess.run(['/usr/sbin/scylla_dev_mode_setup', '--developer-mode', '1'], timeout=60, check=True) diff --git a/common/scylla_login b/common/scylla_login index 55200e8b..c76fdcce 100755 --- a/common/scylla_login +++ b/common/scylla_login @@ -48,6 +48,12 @@ next run 'sudo scylla_create_devices', 'sudo scylla_io_setup' then 'sudo systemctl start scylla-server'. For a list of optimized instance types and more instructions, see %s '''[1:-1] +MSG_DEV_INSTANCE_TYPE = ''' + {yellow}WARNING: {type} is intended for development purposes only and should not be used in production environments! + This ScyllaDB instance is running in developer-mode.{nocolor} + +For a list of supported instance types and more instructions, please refer to %s +'''[1:-1] MSG_SETUP_ACTIVATING = ''' {green}Constructing RAID volume...{nocolor} @@ -105,12 +111,13 @@ $ nodetool status if __name__ == '__main__': colorprint(MSG_HEADER.format(scylla_version=out("scylla --version"))) cloud_instance = get_cloud_instance() - if not cloud_instance.is_supported_instance_class(): + if cloud_instance.is_dev_instance_type(): + colorprint(MSG_DEV_INSTANCE_TYPE % cloud_instance.getting_started_url, type=cloud_instance.instancetype) + elif not cloud_instance.is_supported_instance_class(): non_root_disks = cloud_instance.get_local_disks() + cloud_instance.get_remote_disks() if len(non_root_disks) == 0: colorprint(MSG_UNSUPPORTED_INSTANCE_TYPE_NO_DISKS % cloud_instance.getting_started_url, type=cloud_instance.instance_class()) - else: colorprint(MSG_UNSUPPORTED_INSTANCE_TYPE % cloud_instance.getting_started_url, type=cloud_instance.instance_class()) diff --git a/lib/scylla_cloud.py b/lib/scylla_cloud.py index eff53d09..015eb9b2 100644 --- a/lib/scylla_cloud.py +++ b/lib/scylla_cloud.py @@ -93,6 +93,10 @@ def private_ipv4(self): def is_supported_instance_class(self): pass + @abstractmethod + def is_dev_instance_type(self): + pass + @property @abstractmethod def instancetype(self): @@ -321,6 +325,11 @@ def is_recommended_instance_size(self): return True return False + def is_dev_instance_type(self): + if self.instancetype in ['e2-micro', 'e2-small', 'e2-medium']: + return True + return False + @staticmethod def get_file_size_by_seek(filename): "Get the file size by seeking at end" @@ -602,6 +611,9 @@ def is_recommended_instance(self): return True return False + def is_dev_instance_type(self): + return False + def private_ipv4(self): return self.__instance_metadata("/network/interface/0/ipv4/ipAddress/0/privateIpAddress") @@ -746,6 +758,9 @@ def is_supported_instance_class(self): return True return False + def is_dev_instance_type(self): + return False + def get_en_interface_type(self): instance_class = self.instance_class() instance_size = self.instance_size() @@ -844,7 +859,7 @@ def get_cloud_instance(): raise Exception("Unknown cloud provider! Only AWS/GCP/Azure supported.") -CONCOLORS = {'green': '\033[1;32m', 'red': '\033[1;31m', 'nocolor': '\033[0m'} +CONCOLORS = {'green': '\033[1;32m', 'red': '\033[1;31m', 'yellow': '\033[1;33m', 'nocolor': '\033[0m'} def colorprint(msg, **kwargs): From cc9df5aeb5b5e2e7e7ac2fecb6a3fe9165e4343a Mon Sep 17 00:00:00 2001 From: Beni Peled Date: Tue, 19 Sep 2023 16:57:13 +0300 Subject: [PATCH 38/41] Update gitignore --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c795b054..5c7e0a52 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ -build \ No newline at end of file +build +__pycache__ +.venv + From cad3df3b3b4010aacee6171dabec209f861f1295 Mon Sep 17 00:00:00 2001 From: Beni Peled Date: Mon, 18 Sep 2023 16:43:28 +0300 Subject: [PATCH 39/41] Add boto3 as test-requirements --- test-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test-requirements.txt b/test-requirements.txt index 204d30bf..d43f67af 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -4,3 +4,4 @@ psutil==5.9.1 pytest==7.1.2 PyYAML==6.0 traceback-with-variables==2.0.4 +boto3==1.28.49 \ No newline at end of file From ce3dc67661c13ae1aeca561f28fb12fe6d197175 Mon Sep 17 00:00:00 2001 From: Beni Peled Date: Mon, 18 Sep 2023 16:06:30 +0300 Subject: [PATCH 40/41] Fix broken link to getting-started page This commit resolves a broken link issue - the previous links /getting-started-google and /getting-started-azure didn't exist Ideally, we should refer to [0] but it's an OS page and I don't want to add OS vs. Enterprise condition so for now let's go without a direct link [0] https://opensource.docs.scylladb.com/master/getting-started/cloud-instance-recommendations --- common/scylla_login | 25 ++++++++++++------------- lib/scylla_cloud.py | 17 ----------------- tests/test_aws_instance.py | 5 ----- tests/test_azure_instance.py | 5 ----- tests/test_gcp_instance.py | 5 ----- 5 files changed, 12 insertions(+), 45 deletions(-) diff --git a/common/scylla_login b/common/scylla_login index c76fdcce..733a7cbd 100755 --- a/common/scylla_login +++ b/common/scylla_login @@ -35,24 +35,25 @@ By default, Scylla sends certain information about this node to a data collectio MSG_UNSUPPORTED_INSTANCE_TYPE = ''' {red}{type} is not eligible for optimized automatic tuning!{nocolor} -To continue the setup of Scylla on this instance, run 'sudo scylla_io_setup' -then 'sudo systemctl start scylla-server'. -For a list of optimized instance types and more instructions, see %s +To continue the setup of Scylla on this instance, run 'sudo scylla_io_setup' then 'sudo systemctl start scylla-server'. +For a list of optimized instance types and more instructions, see the requirements section in the +ScyllaDB documentation at https://docs.scylladb.com '''[1:-1] MSG_UNSUPPORTED_INSTANCE_TYPE_NO_DISKS = ''' {red}{type} is not eligible for optimized automatic tuning!{nocolor} -To continue the setup of Scylla on this instance, you need to attach additional disks, -next run 'sudo scylla_create_devices', 'sudo scylla_io_setup' -then 'sudo systemctl start scylla-server'. -For a list of optimized instance types and more instructions, see %s +To continue the setup of Scylla on this instance, you need to attach additional disks, next run 'sudo scylla_create_devices', +'sudo scylla_io_setup' then 'sudo systemctl start scylla-server'. +For a list of optimized instance types and more instructions, see the requirements section in the +ScyllaDB documentation at https://docs.scylladb.com '''[1:-1] MSG_DEV_INSTANCE_TYPE = ''' {yellow}WARNING: {type} is intended for development purposes only and should not be used in production environments! This ScyllaDB instance is running in developer-mode.{nocolor} -For a list of supported instance types and more instructions, please refer to %s +For a list of optimized instance types and more instructions, see the requirements section in the +ScyllaDB documentation at https://docs.scylladb.com '''[1:-1] MSG_SETUP_ACTIVATING = ''' {green}Constructing RAID volume...{nocolor} @@ -112,15 +113,13 @@ if __name__ == '__main__': colorprint(MSG_HEADER.format(scylla_version=out("scylla --version"))) cloud_instance = get_cloud_instance() if cloud_instance.is_dev_instance_type(): - colorprint(MSG_DEV_INSTANCE_TYPE % cloud_instance.getting_started_url, type=cloud_instance.instancetype) + colorprint(MSG_DEV_INSTANCE_TYPE, type=cloud_instance.instancetype) elif not cloud_instance.is_supported_instance_class(): non_root_disks = cloud_instance.get_local_disks() + cloud_instance.get_remote_disks() if len(non_root_disks) == 0: - colorprint(MSG_UNSUPPORTED_INSTANCE_TYPE_NO_DISKS % cloud_instance.getting_started_url, - type=cloud_instance.instance_class()) + colorprint(MSG_UNSUPPORTED_INSTANCE_TYPE_NO_DISKS, type=cloud_instance.instance_class()) else: - colorprint(MSG_UNSUPPORTED_INSTANCE_TYPE % cloud_instance.getting_started_url, - type=cloud_instance.instance_class()) + colorprint(MSG_UNSUPPORTED_INSTANCE_TYPE, type=cloud_instance.instance_class()) else: skip_scylla_server = False if not os.path.exists('/etc/scylla/machine_image_configured'): diff --git a/lib/scylla_cloud.py b/lib/scylla_cloud.py index 015eb9b2..f8083cd9 100644 --- a/lib/scylla_cloud.py +++ b/lib/scylla_cloud.py @@ -126,10 +126,6 @@ def nvme_disk_count(self): def endpoint_snitch(self): pass - @property - @abstractmethod - def getting_started_url(self): - pass class gcp_instance(cloud_instance): @@ -138,7 +134,6 @@ class gcp_instance(cloud_instance): EPHEMERAL = "ephemeral" PERSISTENT = "persistent" ROOT = "root" - GETTING_STARTED_URL = "http://www.scylladb.com/doc/getting-started-google/" META_DATA_BASE_URL = "http://metadata.google.internal/computeMetadata/v1/instance/" ENDPOINT_SNITCH = "GoogleCloudSnitch" @@ -154,9 +149,6 @@ def __init__(self): def endpoint_snitch(self): return self.ENDPOINT_SNITCH - @property - def getting_started_url(self): - return self.GETTING_STARTED_URL @staticmethod def is_gce_instance(): @@ -413,7 +405,6 @@ class azure_instance(cloud_instance): PERSISTENT = "persistent" SWAP = "swap" ROOT = "root" - GETTING_STARTED_URL = "http://www.scylladb.com/doc/getting-started-azure/" ENDPOINT_SNITCH = "AzureSnitch" META_DATA_BASE_URL = "http://169.254.169.254/metadata/instance" @@ -431,10 +422,6 @@ def __init__(self): def endpoint_snitch(self): return self.ENDPOINT_SNITCH - @property - def getting_started_url(self): - return self.GETTING_STARTED_URL - @classmethod def is_azure_instance(cls): """Check if it's Azure instance via query to metadata server.""" @@ -634,7 +621,6 @@ def user_data(self): class aws_instance(cloud_instance): """Describe several aspects of the current AWS instance""" - GETTING_STARTED_URL = "http://www.scylladb.com/doc/getting-started-amazon/" META_DATA_BASE_URL = "http://169.254.169.254/latest/" ENDPOINT_SNITCH = "Ec2Snitch" METADATA_TOKEN_TTL = 21600 @@ -727,9 +713,6 @@ def __init__(self): def endpoint_snitch(self): return self.ENDPOINT_SNITCH - @property - def getting_started_url(self): - return self.GETTING_STARTED_URL @classmethod def is_aws_instance(cls): diff --git a/tests/test_aws_instance.py b/tests/test_aws_instance.py index 8291eb90..bc553ddc 100644 --- a/tests/test_aws_instance.py +++ b/tests/test_aws_instance.py @@ -226,11 +226,6 @@ def test_endpoint_snitch(self): ins = aws_instance() assert ins.endpoint_snitch == 'Ec2Snitch' - def test_getting_started_url(self): - self.httpretty_aws_metadata() - ins = aws_instance() - assert ins.getting_started_url == 'http://www.scylladb.com/doc/getting-started-amazon/' - def test_instancetype_i3en_2xlarge(self): self.httpretty_aws_metadata() ins = aws_instance() diff --git a/tests/test_azure_instance.py b/tests/test_azure_instance.py index 91f3400a..464974df 100644 --- a/tests/test_azure_instance.py +++ b/tests/test_azure_instance.py @@ -117,11 +117,6 @@ def test_endpoint_snitch(self): ins = azure_instance() assert ins.endpoint_snitch == 'AzureSnitch' - def test_getting_started_url(self): - self.httpretty_azure_metadata() - ins = azure_instance() - assert ins.getting_started_url == 'http://www.scylladb.com/doc/getting-started-azure/' - def test_instancelocation_standard_l16s_v2(self): self.httpretty_azure_metadata() ins = azure_instance() diff --git a/tests/test_gcp_instance.py b/tests/test_gcp_instance.py index 9bf29ab6..cb9619b2 100644 --- a/tests/test_gcp_instance.py +++ b/tests/test_gcp_instance.py @@ -96,11 +96,6 @@ def test_endpoint_snitch(self): ins = gcp_instance() assert ins.endpoint_snitch == 'GoogleCloudSnitch' - def test_getting_started_url(self): - self.httpretty_gcp_metadata() - ins = gcp_instance() - assert ins.getting_started_url == 'http://www.scylladb.com/doc/getting-started-google/' - def test_instancetype_n2_standard_8(self): self.httpretty_gcp_metadata() ins = gcp_instance() From 17f40ab387644ccd33c574ca6ed661aca94be33c Mon Sep 17 00:00:00 2001 From: Beni Peled Date: Wed, 20 Sep 2023 16:53:18 +0300 Subject: [PATCH 41/41] Add AWS dev-mode instances --- lib/scylla_cloud.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/scylla_cloud.py b/lib/scylla_cloud.py index f8083cd9..4faa4baa 100644 --- a/lib/scylla_cloud.py +++ b/lib/scylla_cloud.py @@ -742,6 +742,8 @@ def is_supported_instance_class(self): return False def is_dev_instance_type(self): + if self.instancetype in ['t3.micro']: + return True return False def get_en_interface_type(self):