From b1ee463250299c13bf7d52afd7e7cc71090e84aa Mon Sep 17 00:00:00 2001 From: djshow832 Date: Mon, 1 Jul 2024 15:38:44 +0800 Subject: [PATCH] cluster: copy session certs when scale-out TiDB (#2432) * fix cert on scale * fix linter * pass nil topo for deploy * add integration tests * add endline * update comments * fix inst num * destroy cluster after test --------- Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../workflows/integrate-cluster-scale.yaml | 1 + pkg/cluster/manager/builder.go | 141 ++++++++++++------ pkg/cluster/manager/deploy.go | 5 + pkg/cluster/spec/tidb.go | 38 ++--- tests/tiup-cluster/script/scale_tiproxy.sh | 92 ++++++++++++ tests/tiup-cluster/test_scale_tiproxy.sh | 8 + 6 files changed, 210 insertions(+), 75 deletions(-) create mode 100644 tests/tiup-cluster/script/scale_tiproxy.sh create mode 100644 tests/tiup-cluster/test_scale_tiproxy.sh diff --git a/.github/workflows/integrate-cluster-scale.yaml b/.github/workflows/integrate-cluster-scale.yaml index 7746b8cbd6..7ce19a5f37 100644 --- a/.github/workflows/integrate-cluster-scale.yaml +++ b/.github/workflows/integrate-cluster-scale.yaml @@ -39,6 +39,7 @@ jobs: - 'test_scale_tools' - 'test_scale_core_tls' - 'test_scale_tools_tls' + - 'test_scale_tiproxy' env: working-directory: ${{ github.workspace }}/go/src/github.com/${{ github.repository }} steps: diff --git a/pkg/cluster/manager/builder.go b/pkg/cluster/manager/builder.go index 6f5bc227c9..94b63027c5 100644 --- a/pkg/cluster/manager/builder.go +++ b/pkg/cluster/manager/builder.go @@ -265,6 +265,11 @@ func buildScaleOutTask( if err != nil { return nil, err } + sessionCertTasks, err := buildSessionCertTasks(m, name, topo, newPart, base, gOpt, p) + if err != nil { + return nil, err + } + certificateTasks = append(certificateTasks, sessionCertTasks...) // always ignore config check result in scale out gOpt.IgnoreConfigCheck = true @@ -866,6 +871,68 @@ func genTiProxySessionCerts(dir string) error { }), "") } +// buildSessionCertTasks puts a self-signed cert to all TiDB if there is tiproxy. +// For deploy: originalTopo = nil, newTopo = topology. +// For scale-out: originalTopo = original topology, newTopo = new topology. +func buildSessionCertTasks(m *Manager, + name string, + originalTopo spec.Topology, + newTopo spec.Topology, + base *spec.BaseMeta, + gOpt operator.Options, + p *tui.SSHConnectionProps) ([]*task.StepDisplay, error) { + var certificateTasks []*task.StepDisplay // tasks which are used to copy certificate to remote host + hasOriginalTiProxy := false + if originalTopo != nil { + originalTopo.IterInstance(func(inst spec.Instance) { + if inst.ComponentName() == spec.ComponentTiProxy { + hasOriginalTiProxy = true + } + }) + } + hasNewTiProxy := false + newTopo.IterInstance(func(inst spec.Instance) { + if inst.ComponentName() == spec.ComponentTiProxy { + hasNewTiProxy = true + } + }) + if !hasOriginalTiProxy && !hasNewTiProxy { + return nil, nil + } + + tempPath := m.specManager.Path(name, spec.TempConfigPath) + keyPath := filepath.Join(tempPath, "tiproxy-session.key") + certPath := filepath.Join(tempPath, "tiproxy-session.crt") + copySessionCerts := func(inst spec.Instance) { + if inst.ComponentName() != spec.ComponentTiDB { + return + } + deployDir := spec.Abs(base.User, inst.DeployDir()) + tlsDir := filepath.Join(deployDir, spec.TLSCertKeyDir) + + tb := task.NewSimpleUerSSH(m.logger, inst.GetManageHost(), inst.GetSSHPort(), base.User, gOpt, p, newTopo.BaseTopo().GlobalOptions.SSHType). + Mkdir(base.User, inst.GetManageHost(), newTopo.BaseTopo().GlobalOptions.SystemdMode != spec.UserMode, deployDir, tlsDir) + tb = tb. + CopyFile(keyPath, filepath.Join(deployDir, spec.TLSCertKeyDir, "tiproxy-session.key"), inst.GetHost(), false, 0, false). + CopyFile(certPath, filepath.Join(deployDir, spec.TLSCertKeyDir, "tiproxy-session.crt"), inst.GetHost(), false, 0, false) + t := tb.BuildAsStep(fmt.Sprintf(" - Copy session certificate %s -> %s", inst.ComponentName(), inst.ID())) + certificateTasks = append(certificateTasks, t) + } + + // If TiProxy is just enabled now (either deploy or scale-out), issue a session cert and copy the cert to original TiDB. + if !hasOriginalTiProxy { + if err := genTiProxySessionCerts(tempPath); err != nil { + return certificateTasks, err + } + if originalTopo != nil { + originalTopo.IterInstance(copySessionCerts) + } + } + // Copy the session cert to new TiDB. + newTopo.IterInstance(copySessionCerts) + return certificateTasks, nil +} + // buildCertificateTasks generates certificate for instance and transfers it to the server func buildCertificateTasks( m *Manager, @@ -879,62 +946,38 @@ func buildCertificateTasks( certificateTasks []*task.StepDisplay // tasks which are used to copy certificate to remote host ) - // check if there is tiproxy - // if there is tiproxy, whether or not TLS, we must issue a self-signed cert - hasTiProxy := false - topo.IterInstance(func(inst spec.Instance) { - if inst.ComponentName() == spec.ComponentTiProxy { - hasTiProxy = true - } - }) - if hasTiProxy { - if err := genTiProxySessionCerts(m.specManager.Path(name, spec.TempConfigPath)); err != nil { - return certificateTasks, err - } - } - - // copy certificate to remote host - topo.IterInstance(func(inst spec.Instance) { - deployDir := spec.Abs(base.User, inst.DeployDir()) - tlsDir := filepath.Join(deployDir, spec.TLSCertKeyDir) + // copy TLS certificate to remote host + if topo.BaseTopo().GlobalOptions.TLSEnabled { + topo.IterInstance(func(inst spec.Instance) { + deployDir := spec.Abs(base.User, inst.DeployDir()) + tlsDir := filepath.Join(deployDir, spec.TLSCertKeyDir) - needSessionCert := hasTiProxy && inst.ComponentName() == spec.ComponentTiDB - if needSessionCert || topo.BaseTopo().GlobalOptions.TLSEnabled { tb := task.NewSimpleUerSSH(m.logger, inst.GetManageHost(), inst.GetSSHPort(), base.User, gOpt, p, topo.BaseTopo().GlobalOptions.SSHType). Mkdir(base.User, inst.GetManageHost(), topo.BaseTopo().GlobalOptions.SystemdMode != spec.UserMode, deployDir, tlsDir) - - if needSessionCert { - tb = tb. - CopyFile(filepath.Join(m.specManager.Path(name, spec.TempConfigPath), "tiproxy-session.key"), filepath.Join(deployDir, spec.TLSCertKeyDir, "tiproxy-session.key"), inst.GetHost(), false, 0, false). - CopyFile(filepath.Join(m.specManager.Path(name, spec.TempConfigPath), "tiproxy-session.crt"), filepath.Join(deployDir, spec.TLSCertKeyDir, "tiproxy-session.crt"), inst.GetHost(), false, 0, false) - } - - if topo.BaseTopo().GlobalOptions.TLSEnabled { - ca, err := crypto.ReadCA( - name, - m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSCACert), - m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSCAKey), - ) - if err != nil { - iterErr = err - return - } - tb = tb.TLSCert( - inst.GetHost(), - inst.ComponentName(), - inst.Role(), - inst.GetMainPort(), - ca, - meta.DirPaths{ - Deploy: deployDir, - Cache: m.specManager.Path(name, spec.TempConfigPath), - }) + ca, err := crypto.ReadCA( + name, + m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSCACert), + m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSCAKey), + ) + if err != nil { + iterErr = err + return } + tb = tb.TLSCert( + inst.GetHost(), + inst.ComponentName(), + inst.Role(), + inst.GetMainPort(), + ca, + meta.DirPaths{ + Deploy: deployDir, + Cache: m.specManager.Path(name, spec.TempConfigPath), + }) t := tb.BuildAsStep(fmt.Sprintf(" - Generate certificate %s -> %s", inst.ComponentName(), inst.ID())) certificateTasks = append(certificateTasks, t) - } - }) + }) + } return certificateTasks, iterErr } diff --git a/pkg/cluster/manager/deploy.go b/pkg/cluster/manager/deploy.go index 52ea581d70..3b9a32b1be 100644 --- a/pkg/cluster/manager/deploy.go +++ b/pkg/cluster/manager/deploy.go @@ -331,6 +331,11 @@ func (m *Manager) Deploy( if err != nil { return err } + sessionCertTasks, err := buildSessionCertTasks(m, name, nil, topo, metadata.GetBaseMeta(), gOpt, sshProxyProps) + if err != nil { + return err + } + certificateTasks = append(certificateTasks, sessionCertTasks...) refreshConfigTasks, _ := buildInitConfigTasks(m, name, topo, metadata.GetBaseMeta(), gOpt, nil) diff --git a/pkg/cluster/spec/tidb.go b/pkg/cluster/spec/tidb.go index 5541c50060..4fd4687696 100644 --- a/pkg/cluster/spec/tidb.go +++ b/pkg/cluster/spec/tidb.go @@ -261,33 +261,19 @@ func (i *TiDBInstance) InitConfig( // setTiProxyConfig sets tiproxy session certs func (i *TiDBInstance) setTiProxyConfig(ctx context.Context, topo *Specification, version string, configs map[string]any, paths meta.DirPaths) (map[string]any, error) { - hasTiProxy := false - topo.IterInstance(func(instance Instance) { - if instance.ComponentName() == ComponentTiProxy { - hasTiProxy = true - } - }) - if hasTiProxy && tidbver.TiDBSupportTiproxy(version) { - if configs == nil { - configs = make(map[string]any) - } - configs["security.session-token-signing-cert"] = fmt.Sprintf( - "%s/tls/tiproxy-session.crt", - paths.Deploy) - configs["security.session-token-signing-key"] = fmt.Sprintf( - "%s/tls/tiproxy-session.key", - paths.Deploy) - } else { - tlsConfigs := []string{ - "security.session-token-signing-cert", - "security.session-token-signing-key", - } - if configs != nil { - for _, config := range tlsConfigs { - delete(configs, config) - } - } + if len(topo.TiProxyServers) == 0 || !tidbver.TiDBSupportTiproxy(version) { + return configs, nil + } + if configs == nil { + configs = make(map[string]any) } + // Overwrite users' configs just like TLS configs. + configs["security.session-token-signing-cert"] = fmt.Sprintf( + "%s/tls/tiproxy-session.crt", + paths.Deploy) + configs["security.session-token-signing-key"] = fmt.Sprintf( + "%s/tls/tiproxy-session.key", + paths.Deploy) return configs, nil } diff --git a/tests/tiup-cluster/script/scale_tiproxy.sh b/tests/tiup-cluster/script/scale_tiproxy.sh new file mode 100644 index 0000000000..91a28cf1dd --- /dev/null +++ b/tests/tiup-cluster/script/scale_tiproxy.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +set -eu + +function scale_tiproxy() { + mkdir -p ~/.tiup/bin/ + + version=$1 + test_tls=$2 + native_ssh=$3 + + client="" + if [ $native_ssh == true ]; then + client="--ssh=system" + fi + + name="test_scale_tiproxy_$RANDOM" + if [ $test_tls = true ]; then + topo=./topo/full_tls.yaml + else + topo=./topo/full.yaml + fi + + check_cert_file="ls /home/tidb/deploy/tidb-4000/tls/tiproxy-session.crt /home/tidb/deploy/tidb-4000/tls/tiproxy-session.key" + check_cert_config="grep -q session-token-signing-key /home/tidb/deploy/tidb-4000/conf/tidb.toml" + + tiup-cluster $client --yes deploy $name $version $topo -i ~/.ssh/id_rsa + + # the session certs exist + tiup-cluster $client exec $name -N n1 --command "$check_cert_file" + # the configurations are updated + tiup-cluster $client exec $name -N n1 --command "$check_cert_config" + + tiup-cluster $client list | grep "$name" + + tiup-cluster $client --yes start $name + + tiup-cluster $client _test $name writable + + tiup-cluster $client display $name + + tiup-cluster $client --yes reload $name --skip-restart + + if [ $test_tls = true ]; then + total_sub_one=18 + total=19 + else + total_sub_one=23 + total=24 + fi + + # disable tiproxy + echo "start scale in tiproxy" + tiup-cluster $client --yes scale-in $name -N n1:6000 + wait_instance_num_reach $name $total $native_ssh + + # scale in tidb and scale out again + echo "start scale in tidb" + tiup-cluster $client --yes scale-in $name -N n2:4000 + wait_instance_num_reach $name $total_sub_one $native_ssh + echo "start scale out tidb" + topo=./topo/full_scale_in_tidb_2nd.yaml + tiup-cluster $client --yes scale-out $name $topo + # the session certs don't exist on the new tidb + ! tiup-cluster $client exec $name -N n2 --command "$check_cert_file" + # the configurations are not updated on the new tidb + ! tiup-cluster $client exec $name -N n2 --command "$check_cert_config" + + # enable tiproxy again + echo "start scale out tiproxy" + topo=./topo/full_scale_in_tiproxy.yaml + tiup-cluster $client --yes scale-out $name $topo + # the session certs exist on the new tidb + tiup-cluster $client exec $name -N n2 --command "$check_cert_file" + # the configurations are updated on the new tidb + tiup-cluster $client exec $name -N n2 --command "$check_cert_config" + + # scale in tidb and scale out again + echo "start scale in tidb" + tiup-cluster $client --yes scale-in $name -N n2:4000 + wait_instance_num_reach $name $total $native_ssh + echo "start scale out tidb" + topo=./topo/full_scale_in_tidb_2nd.yaml + tiup-cluster $client --yes scale-out $name $topo + # the session certs exist on the new tidb + tiup-cluster $client exec $name -N n2 --command "$check_cert_file" + # the configurations are updated on the new tidb + tiup-cluster $client exec $name -N n2 --command "$check_cert_config" + + tiup-cluster $client _test $name writable + tiup-cluster $client --yes destroy $name +} diff --git a/tests/tiup-cluster/test_scale_tiproxy.sh b/tests/tiup-cluster/test_scale_tiproxy.sh new file mode 100644 index 0000000000..d5f6124070 --- /dev/null +++ b/tests/tiup-cluster/test_scale_tiproxy.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -eu + +source script/scale_tiproxy.sh + +echo "test scaling of tidb and tiproxy in cluster for version v8.1.0, via easy ssh" +scale_tiproxy v8.1.0 false false