Skip to content

Commit

Permalink
cluster: copy session certs when scale-out TiDB (#2432)
Browse files Browse the repository at this point in the history
* fix cert on scale

* fix linter

* pass nil topo for deploy

* add integration tests

* add endline

* update comments

* fix inst num

* destroy cluster after test

---------

Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com>
  • Loading branch information
djshow832 and ti-chi-bot[bot] authored Jul 1, 2024
1 parent 2ff05f9 commit b1ee463
Show file tree
Hide file tree
Showing 6 changed files with 210 additions and 75 deletions.
1 change: 1 addition & 0 deletions .github/workflows/integrate-cluster-scale.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ jobs:
- 'test_scale_tools'
- 'test_scale_core_tls'
- 'test_scale_tools_tls'
- 'test_scale_tiproxy'
env:
working-directory: ${{ github.workspace }}/go/src/github.com/${{ github.repository }}
steps:
Expand Down
141 changes: 92 additions & 49 deletions pkg/cluster/manager/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,11 @@ func buildScaleOutTask(
if err != nil {
return nil, err
}
sessionCertTasks, err := buildSessionCertTasks(m, name, topo, newPart, base, gOpt, p)
if err != nil {
return nil, err
}
certificateTasks = append(certificateTasks, sessionCertTasks...)

// always ignore config check result in scale out
gOpt.IgnoreConfigCheck = true
Expand Down Expand Up @@ -866,6 +871,68 @@ func genTiProxySessionCerts(dir string) error {
}), "")
}

// buildSessionCertTasks puts a self-signed cert to all TiDB if there is tiproxy.
// For deploy: originalTopo = nil, newTopo = topology.
// For scale-out: originalTopo = original topology, newTopo = new topology.
func buildSessionCertTasks(m *Manager,
name string,
originalTopo spec.Topology,
newTopo spec.Topology,
base *spec.BaseMeta,
gOpt operator.Options,
p *tui.SSHConnectionProps) ([]*task.StepDisplay, error) {
var certificateTasks []*task.StepDisplay // tasks which are used to copy certificate to remote host
hasOriginalTiProxy := false
if originalTopo != nil {
originalTopo.IterInstance(func(inst spec.Instance) {
if inst.ComponentName() == spec.ComponentTiProxy {
hasOriginalTiProxy = true
}
})
}
hasNewTiProxy := false
newTopo.IterInstance(func(inst spec.Instance) {
if inst.ComponentName() == spec.ComponentTiProxy {
hasNewTiProxy = true
}
})
if !hasOriginalTiProxy && !hasNewTiProxy {
return nil, nil
}

tempPath := m.specManager.Path(name, spec.TempConfigPath)
keyPath := filepath.Join(tempPath, "tiproxy-session.key")
certPath := filepath.Join(tempPath, "tiproxy-session.crt")
copySessionCerts := func(inst spec.Instance) {
if inst.ComponentName() != spec.ComponentTiDB {
return
}
deployDir := spec.Abs(base.User, inst.DeployDir())
tlsDir := filepath.Join(deployDir, spec.TLSCertKeyDir)

tb := task.NewSimpleUerSSH(m.logger, inst.GetManageHost(), inst.GetSSHPort(), base.User, gOpt, p, newTopo.BaseTopo().GlobalOptions.SSHType).
Mkdir(base.User, inst.GetManageHost(), newTopo.BaseTopo().GlobalOptions.SystemdMode != spec.UserMode, deployDir, tlsDir)
tb = tb.
CopyFile(keyPath, filepath.Join(deployDir, spec.TLSCertKeyDir, "tiproxy-session.key"), inst.GetHost(), false, 0, false).
CopyFile(certPath, filepath.Join(deployDir, spec.TLSCertKeyDir, "tiproxy-session.crt"), inst.GetHost(), false, 0, false)
t := tb.BuildAsStep(fmt.Sprintf(" - Copy session certificate %s -> %s", inst.ComponentName(), inst.ID()))
certificateTasks = append(certificateTasks, t)
}

// If TiProxy is just enabled now (either deploy or scale-out), issue a session cert and copy the cert to original TiDB.
if !hasOriginalTiProxy {
if err := genTiProxySessionCerts(tempPath); err != nil {
return certificateTasks, err
}
if originalTopo != nil {
originalTopo.IterInstance(copySessionCerts)
}
}
// Copy the session cert to new TiDB.
newTopo.IterInstance(copySessionCerts)
return certificateTasks, nil
}

// buildCertificateTasks generates certificate for instance and transfers it to the server
func buildCertificateTasks(
m *Manager,
Expand All @@ -879,62 +946,38 @@ func buildCertificateTasks(
certificateTasks []*task.StepDisplay // tasks which are used to copy certificate to remote host
)

// check if there is tiproxy
// if there is tiproxy, whether or not TLS, we must issue a self-signed cert
hasTiProxy := false
topo.IterInstance(func(inst spec.Instance) {
if inst.ComponentName() == spec.ComponentTiProxy {
hasTiProxy = true
}
})
if hasTiProxy {
if err := genTiProxySessionCerts(m.specManager.Path(name, spec.TempConfigPath)); err != nil {
return certificateTasks, err
}
}

// copy certificate to remote host
topo.IterInstance(func(inst spec.Instance) {
deployDir := spec.Abs(base.User, inst.DeployDir())
tlsDir := filepath.Join(deployDir, spec.TLSCertKeyDir)
// copy TLS certificate to remote host
if topo.BaseTopo().GlobalOptions.TLSEnabled {
topo.IterInstance(func(inst spec.Instance) {
deployDir := spec.Abs(base.User, inst.DeployDir())
tlsDir := filepath.Join(deployDir, spec.TLSCertKeyDir)

needSessionCert := hasTiProxy && inst.ComponentName() == spec.ComponentTiDB
if needSessionCert || topo.BaseTopo().GlobalOptions.TLSEnabled {
tb := task.NewSimpleUerSSH(m.logger, inst.GetManageHost(), inst.GetSSHPort(), base.User, gOpt, p, topo.BaseTopo().GlobalOptions.SSHType).
Mkdir(base.User, inst.GetManageHost(), topo.BaseTopo().GlobalOptions.SystemdMode != spec.UserMode, deployDir, tlsDir)

if needSessionCert {
tb = tb.
CopyFile(filepath.Join(m.specManager.Path(name, spec.TempConfigPath), "tiproxy-session.key"), filepath.Join(deployDir, spec.TLSCertKeyDir, "tiproxy-session.key"), inst.GetHost(), false, 0, false).
CopyFile(filepath.Join(m.specManager.Path(name, spec.TempConfigPath), "tiproxy-session.crt"), filepath.Join(deployDir, spec.TLSCertKeyDir, "tiproxy-session.crt"), inst.GetHost(), false, 0, false)
}

if topo.BaseTopo().GlobalOptions.TLSEnabled {
ca, err := crypto.ReadCA(
name,
m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSCACert),
m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSCAKey),
)
if err != nil {
iterErr = err
return
}
tb = tb.TLSCert(
inst.GetHost(),
inst.ComponentName(),
inst.Role(),
inst.GetMainPort(),
ca,
meta.DirPaths{
Deploy: deployDir,
Cache: m.specManager.Path(name, spec.TempConfigPath),
})
ca, err := crypto.ReadCA(
name,
m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSCACert),
m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSCAKey),
)
if err != nil {
iterErr = err
return
}
tb = tb.TLSCert(
inst.GetHost(),
inst.ComponentName(),
inst.Role(),
inst.GetMainPort(),
ca,
meta.DirPaths{
Deploy: deployDir,
Cache: m.specManager.Path(name, spec.TempConfigPath),
})

t := tb.BuildAsStep(fmt.Sprintf(" - Generate certificate %s -> %s", inst.ComponentName(), inst.ID()))
certificateTasks = append(certificateTasks, t)
}
})
})
}

return certificateTasks, iterErr
}
5 changes: 5 additions & 0 deletions pkg/cluster/manager/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,11 @@ func (m *Manager) Deploy(
if err != nil {
return err
}
sessionCertTasks, err := buildSessionCertTasks(m, name, nil, topo, metadata.GetBaseMeta(), gOpt, sshProxyProps)
if err != nil {
return err
}
certificateTasks = append(certificateTasks, sessionCertTasks...)

refreshConfigTasks, _ := buildInitConfigTasks(m, name, topo, metadata.GetBaseMeta(), gOpt, nil)

Expand Down
38 changes: 12 additions & 26 deletions pkg/cluster/spec/tidb.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,33 +261,19 @@ func (i *TiDBInstance) InitConfig(

// setTiProxyConfig sets tiproxy session certs
func (i *TiDBInstance) setTiProxyConfig(ctx context.Context, topo *Specification, version string, configs map[string]any, paths meta.DirPaths) (map[string]any, error) {
hasTiProxy := false
topo.IterInstance(func(instance Instance) {
if instance.ComponentName() == ComponentTiProxy {
hasTiProxy = true
}
})
if hasTiProxy && tidbver.TiDBSupportTiproxy(version) {
if configs == nil {
configs = make(map[string]any)
}
configs["security.session-token-signing-cert"] = fmt.Sprintf(
"%s/tls/tiproxy-session.crt",
paths.Deploy)
configs["security.session-token-signing-key"] = fmt.Sprintf(
"%s/tls/tiproxy-session.key",
paths.Deploy)
} else {
tlsConfigs := []string{
"security.session-token-signing-cert",
"security.session-token-signing-key",
}
if configs != nil {
for _, config := range tlsConfigs {
delete(configs, config)
}
}
if len(topo.TiProxyServers) == 0 || !tidbver.TiDBSupportTiproxy(version) {
return configs, nil
}
if configs == nil {
configs = make(map[string]any)
}
// Overwrite users' configs just like TLS configs.
configs["security.session-token-signing-cert"] = fmt.Sprintf(
"%s/tls/tiproxy-session.crt",
paths.Deploy)
configs["security.session-token-signing-key"] = fmt.Sprintf(
"%s/tls/tiproxy-session.key",
paths.Deploy)
return configs, nil
}

Expand Down
92 changes: 92 additions & 0 deletions tests/tiup-cluster/script/scale_tiproxy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/bin/bash

set -eu

function scale_tiproxy() {
mkdir -p ~/.tiup/bin/

version=$1
test_tls=$2
native_ssh=$3

client=""
if [ $native_ssh == true ]; then
client="--ssh=system"
fi

name="test_scale_tiproxy_$RANDOM"
if [ $test_tls = true ]; then
topo=./topo/full_tls.yaml
else
topo=./topo/full.yaml
fi

check_cert_file="ls /home/tidb/deploy/tidb-4000/tls/tiproxy-session.crt /home/tidb/deploy/tidb-4000/tls/tiproxy-session.key"
check_cert_config="grep -q session-token-signing-key /home/tidb/deploy/tidb-4000/conf/tidb.toml"

tiup-cluster $client --yes deploy $name $version $topo -i ~/.ssh/id_rsa

# the session certs exist
tiup-cluster $client exec $name -N n1 --command "$check_cert_file"
# the configurations are updated
tiup-cluster $client exec $name -N n1 --command "$check_cert_config"

tiup-cluster $client list | grep "$name"

tiup-cluster $client --yes start $name

tiup-cluster $client _test $name writable

tiup-cluster $client display $name

tiup-cluster $client --yes reload $name --skip-restart

if [ $test_tls = true ]; then
total_sub_one=18
total=19
else
total_sub_one=23
total=24
fi

# disable tiproxy
echo "start scale in tiproxy"
tiup-cluster $client --yes scale-in $name -N n1:6000
wait_instance_num_reach $name $total $native_ssh

# scale in tidb and scale out again
echo "start scale in tidb"
tiup-cluster $client --yes scale-in $name -N n2:4000
wait_instance_num_reach $name $total_sub_one $native_ssh
echo "start scale out tidb"
topo=./topo/full_scale_in_tidb_2nd.yaml
tiup-cluster $client --yes scale-out $name $topo
# the session certs don't exist on the new tidb
! tiup-cluster $client exec $name -N n2 --command "$check_cert_file"
# the configurations are not updated on the new tidb
! tiup-cluster $client exec $name -N n2 --command "$check_cert_config"

# enable tiproxy again
echo "start scale out tiproxy"
topo=./topo/full_scale_in_tiproxy.yaml
tiup-cluster $client --yes scale-out $name $topo
# the session certs exist on the new tidb
tiup-cluster $client exec $name -N n2 --command "$check_cert_file"
# the configurations are updated on the new tidb
tiup-cluster $client exec $name -N n2 --command "$check_cert_config"

# scale in tidb and scale out again
echo "start scale in tidb"
tiup-cluster $client --yes scale-in $name -N n2:4000
wait_instance_num_reach $name $total $native_ssh
echo "start scale out tidb"
topo=./topo/full_scale_in_tidb_2nd.yaml
tiup-cluster $client --yes scale-out $name $topo
# the session certs exist on the new tidb
tiup-cluster $client exec $name -N n2 --command "$check_cert_file"
# the configurations are updated on the new tidb
tiup-cluster $client exec $name -N n2 --command "$check_cert_config"

tiup-cluster $client _test $name writable
tiup-cluster $client --yes destroy $name
}
8 changes: 8 additions & 0 deletions tests/tiup-cluster/test_scale_tiproxy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

set -eu

source script/scale_tiproxy.sh

echo "test scaling of tidb and tiproxy in cluster for version v8.1.0, via easy ssh"
scale_tiproxy v8.1.0 false false

0 comments on commit b1ee463

Please sign in to comment.