From 26d007b7edf19382da3039b27775f7ae57a81f7a Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Wed, 9 Aug 2023 01:50:07 +0200 Subject: [PATCH] server: avoid a race in the server controller Prior to this patch, the Go race detector was complaining about racy concurrent accesses to `serverStateUsingChannels.server`, via `(*serverStateUsingChannels) getServer()` and `(*channelOrchestrator) startControlledServer()`. These racy accesses happened to be safe because the writes and reads to that field were correctly ordered around updates to an atomic bool (the `started` field). However, the race detector is not sufficiently sophisticated to detect this ordering and satisfy itself that the state transition can only happen once. In order to silence the race detector (with no change in correctness), this patch replaces the atomic bool by a mutex, whose access semantics are properly understood by the race detector. This change incidentally makes the code slightly easier to read and understand. Co-authored-by: Lidor Carmel --- .../server_controller_channel_orchestrator.go | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/pkg/server/server_controller_channel_orchestrator.go b/pkg/server/server_controller_channel_orchestrator.go index 1b8dae509ad9..9cd81d241595 100644 --- a/pkg/server/server_controller_channel_orchestrator.go +++ b/pkg/server/server_controller_channel_orchestrator.go @@ -74,8 +74,14 @@ type serverStateUsingChannels struct { // features that label data with the current tenant name. nc *roachpb.TenantNameContainer - // server is the server that is being controlled. - server orchestratedServer + startedMu struct { + syncutil.Mutex + + // server is the server that is being controlled. + // This is only set when the corresponding orchestratedServer + // instance is ready. + server orchestratedServer + } // startedOrStopped is closed when the server has either started or // stopped. This can be used to wait for a server start. @@ -85,10 +91,6 @@ type serverStateUsingChannels struct { // during server creation if any. startErr error - // started is marked true when the server has started. This can - // be used to observe the start state without waiting. - started syncutil.AtomicBool - // requestImmediateStop can be called to request a server to stop // ungracefully. // It can be called multiple times. @@ -106,7 +108,9 @@ var _ serverState = (*serverStateUsingChannels)(nil) // getServer is part of the serverState interface. func (s *serverStateUsingChannels) getServer() (orchestratedServer, bool) { - return s.server, s.started.Get() + s.startedMu.Lock() + defer s.startedMu.Unlock() + return s.startedMu.server, s.startedMu.server != nil } // nameContainer is part of the serverState interface. @@ -154,14 +158,13 @@ func (o *channelOrchestrator) makeServerStateForSystemTenant( closeCtx, cancelFn := context.WithCancel(context.Background()) st := &serverStateUsingChannels{ nc: nc, - server: systemSrv, startedOrStoppedCh: closedChan, requestImmediateStop: cancelFn, requestGracefulStop: cancelFn, stoppedCh: closeCtx.Done(), } - st.started.Set(true) + st.startedMu.server = systemSrv return st } @@ -301,7 +304,11 @@ func (o *channelOrchestrator) startControlledServer( // the goroutine above to call tenantStopper.Stop() and // terminate. state.requestImmediateStop() - state.started.Set(false) + func() { + state.startedMu.Lock() + defer state.startedMu.Unlock() + state.startedMu.server = nil + }() close(stoppedCh) if !startedOrStoppedChAlreadyClosed { state.startErr = errors.New("server stop before successful start") @@ -413,9 +420,12 @@ func (o *channelOrchestrator) startControlledServer( } // Indicate the server has started. - state.server = tenantServer startedOrStoppedChAlreadyClosed = true - state.started.Set(true) + func() { + state.startedMu.Lock() + defer state.startedMu.Unlock() + state.startedMu.server = tenantServer + }() close(startedOrStoppedCh) // Wait for a request to shut down.