Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Auth: Prune pending TLS identities #14261

Merged
merged 16 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
be1ec61
lxd/state: Add LeaderInfo type and function to state.
markylaing Oct 23, 2024
2220cde
lxd: Set LeaderInfo function in `(*Daemon).State`.
markylaing Oct 23, 2024
7fe7737
lxd: Update ACME handlers to use `(*State).LeaderInfo`.
markylaing Oct 23, 2024
afbd115
lxd: Update cluster handlers to use `(*State).LeaderInfo`.
markylaing Oct 24, 2024
15591e9
lxd: Update image sync task to use `(*State).LeaderInfo`.
markylaing Oct 23, 2024
458e458
lxd: Update instance handlers to use `(*State).LeaderInfo`.
markylaing Oct 23, 2024
5093829
lxd: Update operation prune task to use `(*State).LeaderInfo`.
markylaing Oct 23, 2024
07a7c8f
lxd: Update database patch to use `(*State).LeaderInfo`.
markylaing Oct 23, 2024
b2dd68e
lxd/db/cluster: Differentiate errors returned by PendingTLSMetadata.
markylaing Oct 25, 2024
de48dc3
lxd: Remove expired pending TLS identities in token prune task.
markylaing Oct 23, 2024
11bc5a3
lxd: Change log level to 'warn' for the token prune task.
markylaing Oct 23, 2024
effd497
lxd: Opportunistically remove invalid/expired pending TLS identities.
markylaing Oct 23, 2024
a8f95c5
lxd: Add internal testing handler to trigger token prune task.
markylaing Oct 23, 2024
efb7028
test/suites: Test that expired pending identities are removed when to…
markylaing Oct 23, 2024
f20f62d
test/suites: Test the token pruning task removes expired pending TLS …
markylaing Oct 23, 2024
86b65fe
test/suites: Test token prune task for certificate add token operations.
markylaing Oct 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 15 additions & 25 deletions lxd/acme.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,24 +37,19 @@ func acmeProvideChallenge(d *Daemon, r *http.Request) response.Response {
return response.SmartError(err)
}

if s.ServerClustered {
leader, err := d.gateway.LeaderAddress()
leaderInfo, err := s.LeaderInfo()
if err != nil {
return response.SmartError(err)
}

if !leaderInfo.Leader {
// Forward the request to the leader
client, err := cluster.Connect(leaderInfo.Address, s.Endpoints.NetworkCert(), s.ServerCert(), r, true)
if err != nil {
return response.SmartError(err)
}

// This gives me the correct value
clusterAddress := s.LocalConfig.ClusterAddress()

if clusterAddress != "" && clusterAddress != leader {
// Forward the request to the leader
client, err := cluster.Connect(leader, s.Endpoints.NetworkCert(), s.ServerCert(), r, true)
if err != nil {
return response.SmartError(err)
}

return response.ForwardedResponse(client, r)
}
return response.ForwardedResponse(client, r)
}

if d.http01Provider == nil || d.http01Provider.Token() != token {
Expand Down Expand Up @@ -83,18 +78,13 @@ func autoRenewCertificate(ctx context.Context, d *Daemon, force bool) error {
}

// If we are clustered, let the leader handle the certificate renewal.
if s.ServerClustered {
leader, err := d.gateway.LeaderAddress()
if err != nil {
return err
}

// Figure out our own cluster address.
clusterAddress := s.LocalConfig.ClusterAddress()
leaderInfo, err := s.LeaderInfo()
if err != nil {
return err
}

if clusterAddress != leader {
return nil
}
if !leaderInfo.Leader {
return nil
}

opRun := func(op *operations.Operation) error {
Expand Down
109 changes: 54 additions & 55 deletions lxd/api_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -1226,11 +1226,15 @@ func clusterNodesGet(d *Daemon, r *http.Request) response.Response {
recursion := util.IsRecursionRequest(r)
s := d.State()

leaderAddress, err := d.gateway.LeaderAddress()
leaderInfo, err := s.LeaderInfo()
if err != nil {
return response.InternalError(err)
}

if !leaderInfo.Clustered {
return response.InternalError(cluster.ErrNodeIsNotClustered)
}

var raftNodes []db.RaftNode
err = s.DB.Node.Transaction(r.Context(), func(ctx context.Context, tx *db.NodeTx) error {
raftNodes, err = tx.GetRaftNodes(ctx)
Expand Down Expand Up @@ -1268,7 +1272,7 @@ func clusterNodesGet(d *Daemon, r *http.Request) response.Response {
}

args := db.NodeInfoArgs{
LeaderAddress: leaderAddress,
LeaderAddress: leaderInfo.Address,
FailureDomains: failureDomains,
MemberFailureDomains: memberFailureDomains,
OfflineThreshold: s.GlobalConfig.OfflineThreshold(),
Expand Down Expand Up @@ -1503,11 +1507,15 @@ func clusterNodeGet(d *Daemon, r *http.Request) response.Response {
return response.SmartError(err)
}

leaderAddress, err := d.gateway.LeaderAddress()
leaderInfo, err := s.LeaderInfo()
if err != nil {
return response.InternalError(err)
}

if !leaderInfo.Clustered {
return response.InternalError(cluster.ErrNodeIsNotClustered)
}

var raftNodes []db.RaftNode
err = s.DB.Node.Transaction(r.Context(), func(ctx context.Context, tx *db.NodeTx) error {
raftNodes, err = tx.GetRaftNodes(ctx)
Expand Down Expand Up @@ -1544,7 +1552,7 @@ func clusterNodeGet(d *Daemon, r *http.Request) response.Response {
}

args := db.NodeInfoArgs{
LeaderAddress: leaderAddress,
LeaderAddress: leaderInfo.Address,
FailureDomains: failureDomains,
MemberFailureDomains: memberFailureDomains,
OfflineThreshold: s.GlobalConfig.OfflineThreshold(),
Expand Down Expand Up @@ -1981,24 +1989,27 @@ func clusterNodeDelete(d *Daemon, r *http.Request) response.Response {
}

// Redirect all requests to the leader, which is the one with
// knowing what nodes are part of the raft cluster.
// knowledge of which nodes are part of the raft cluster.
localClusterAddress := s.LocalConfig.ClusterAddress()

leader, err := d.gateway.LeaderAddress()
leaderInfo, err := s.LeaderInfo()
if err != nil {
return response.InternalError(err)
return response.SmartError(err)
}

if !leaderInfo.Clustered {
return response.InternalError(cluster.ErrNodeIsNotClustered)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this be better classified as a BadRequest?
This does impact the metrics so just making sure we are getting this right.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought that but I didn't want to change the existing HTTP code as that's technically an API change

}

var localInfo, leaderInfo db.NodeInfo
var localInfo, leaderNodeInfo db.NodeInfo
err = s.DB.Cluster.Transaction(context.TODO(), func(ctx context.Context, tx *db.ClusterTx) error {
localInfo, err = tx.GetNodeByAddress(ctx, localClusterAddress)
if err != nil {
return fmt.Errorf("Failed loading local member info %q: %w", localClusterAddress, err)
}

leaderInfo, err = tx.GetNodeByAddress(ctx, leader)
leaderNodeInfo, err = tx.GetNodeByAddress(ctx, leaderInfo.Address)
if err != nil {
return fmt.Errorf("Failed loading leader member info %q: %w", leader, err)
return fmt.Errorf("Failed loading leader member info %q: %w", leaderInfo.Address, err)
}

return nil
Expand All @@ -2018,7 +2029,7 @@ func clusterNodeDelete(d *Daemon, r *http.Request) response.Response {
return response.SmartError(fmt.Errorf("Unable to get raft nodes: %w", err))
}

if localClusterAddress != leader {
if !leaderInfo.Leader {
if localInfo.Name == name {
// If the member being removed is ourselves and we are not the leader, then lock the
// clusterPutDisableMu before we forward the request to the leader, so that when the leader
Expand All @@ -2035,8 +2046,8 @@ func clusterNodeDelete(d *Daemon, r *http.Request) response.Response {
}()
}

logger.Debugf("Redirect member delete request to %s", leader)
client, err := cluster.Connect(leader, s.Endpoints.NetworkCert(), s.ServerCert(), r, false)
logger.Debugf("Redirect member delete request to %s", leaderInfo.Address)
client, err := cluster.Connect(leaderInfo.Address, s.Endpoints.NetworkCert(), s.ServerCert(), r, false)
if err != nil {
return response.SmartError(err)
}
Expand All @@ -2048,7 +2059,7 @@ func clusterNodeDelete(d *Daemon, r *http.Request) response.Response {

// If we are the only remaining node, wait until promotion to leader,
// then update cluster certs.
if name == leaderInfo.Name && len(nodes) == 2 {
if name == leaderNodeInfo.Name && len(nodes) == 2 {
err = d.gateway.WaitLeadership()
if err != nil {
return response.SmartError(err)
Expand Down Expand Up @@ -2080,9 +2091,9 @@ func clusterNodeDelete(d *Daemon, r *http.Request) response.Response {
defer d.clusterMembershipMutex.Unlock()

// If we are removing the leader of a 2 node cluster, ensure the other node can be a leader.
if name == leaderInfo.Name && len(nodes) == 2 {
if name == leaderNodeInfo.Name && len(nodes) == 2 {
for i := range nodes {
if nodes[i].Address != leader && nodes[i].Role != db.RaftVoter {
if nodes[i].Address != leaderInfo.Address && nodes[i].Role != db.RaftVoter {
// Promote the remaining node.
nodes[i].Role = db.RaftVoter
err := changeMemberRole(s, r, nodes[i].Address, nodes)
Expand Down Expand Up @@ -2426,25 +2437,23 @@ func internalClusterPostAccept(d *Daemon, r *http.Request) response.Response {
}

// Redirect all requests to the leader, which is the one with
// knowning what nodes are part of the raft cluster.
localClusterAddress := s.LocalConfig.ClusterAddress()

leader, err := d.gateway.LeaderAddress()
// knowledge of which nodes are part of the raft cluster.
leaderInfo, err := s.LeaderInfo()
if err != nil {
return response.InternalError(err)
}

if localClusterAddress != leader {
logger.Debugf("Redirect member accept request to %s", leader)
if !leaderInfo.Clustered {
return response.InternalError(cluster.ErrNodeIsNotClustered)
}

if leader == "" {
return response.SmartError(fmt.Errorf("Unable to find leader address"))
}
if !leaderInfo.Leader {
logger.Debugf("Redirect member accept request to %s", leaderInfo.Address)

url := &url.URL{
Scheme: "https",
Path: "/internal/cluster/accept",
Host: leader,
Host: leaderInfo.Address,
}

return response.SyncResponseRedirect(url.String())
Expand Down Expand Up @@ -2517,19 +2526,21 @@ func internalClusterPostRebalance(d *Daemon, r *http.Request) response.Response

// Redirect all requests to the leader, which is the one with with
// up-to-date knowledge of what nodes are part of the raft cluster.
localClusterAddress := s.LocalConfig.ClusterAddress()

leader, err := d.gateway.LeaderAddress()
leaderInfo, err := s.LeaderInfo()
if err != nil {
return response.InternalError(err)
}

if localClusterAddress != leader {
logger.Debugf("Redirect cluster rebalance request to %s", leader)
if !leaderInfo.Clustered {
return response.InternalError(cluster.ErrNodeIsNotClustered)
}

if !leaderInfo.Leader {
logger.Debugf("Redirect cluster rebalance request to %s", leaderInfo.Address)
url := &url.URL{
Scheme: "https",
Path: "/internal/cluster/rebalance",
Host: leader,
Host: leaderInfo.Address,
}

return response.SyncResponseRedirect(url.String())
Expand Down Expand Up @@ -2662,16 +2673,12 @@ func handoverMemberRole(s *state.State, gateway *cluster.Gateway) error {

// Find the cluster leader.
findLeader:
leader, err := gateway.LeaderAddress()
leaderInfo, err := s.LeaderInfo()
if err != nil {
return err
}

if leader == "" {
return fmt.Errorf("No leader address found")
}

if leader == localClusterAddress {
if leaderInfo.Leader {
logger.Info("Transferring leadership", logCtx)
err := gateway.TransferLeadership()
if err != nil {
Expand All @@ -2682,7 +2689,7 @@ findLeader:
}

logger.Info("Handing over cluster member role", logCtx)
client, err := cluster.Connect(leader, s.Endpoints.NetworkCert(), s.ServerCert(), nil, true)
client, err := cluster.Connect(leaderInfo.Address, s.Endpoints.NetworkCert(), s.ServerCert(), nil, true)
if err != nil {
return fmt.Errorf("Failed handing over cluster member role: %w", err)
}
Expand Down Expand Up @@ -2752,21 +2759,17 @@ func internalClusterPostHandover(d *Daemon, r *http.Request) response.Response {
// authoritative knowledge of the current raft configuration.
localClusterAddress := s.LocalConfig.ClusterAddress()

leader, err := d.gateway.LeaderAddress()
leaderInfo, err := s.LeaderInfo()
if err != nil {
return response.InternalError(err)
}

if leader == "" {
return response.SmartError(fmt.Errorf("No leader address found"))
}

if localClusterAddress != leader {
logger.Debugf("Redirect handover request to %s", leader)
if !leaderInfo.Leader {
logger.Debugf("Redirect handover request to %s", leaderInfo.Address)
url := &url.URL{
Scheme: "https",
Path: "/internal/cluster/handover",
Host: leader,
Host: leaderInfo.Address,
}

return response.SyncResponseRedirect(url.String())
Expand Down Expand Up @@ -4462,17 +4465,13 @@ func autoHealClusterTask(d *Daemon) (task.Func, task.Schedule) {
return // Skip healing if it's disabled.
}

leader, err := d.gateway.LeaderAddress()
leaderInfo, err := s.LeaderInfo()
if err != nil {
if errors.Is(err, cluster.ErrNodeIsNotClustered) {
return // Skip healing if not clustered.
}

logger.Error("Failed to get leader cluster member address", logger.Ctx{"err": err})
logger.Error("Failed to determine cluster leader", logger.Ctx{"err": err})
return
}

if s.LocalConfig.ClusterAddress() != leader {
if !leaderInfo.Clustered || !leaderInfo.Leader {
return // Skip healing if not cluster leader.
}

Expand Down
6 changes: 6 additions & 0 deletions lxd/api_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ var apiInternal = []APIEndpoint{
internalSQLCmd,
internalWarningCreateCmd,
internalIdentityCacheRefreshCmd,
internalPruneTokenCmd,
}

var internalShutdownCmd = APIEndpoint{
Expand Down Expand Up @@ -136,6 +137,11 @@ var internalBGPStateCmd = APIEndpoint{
Get: APIEndpointAction{Handler: internalBGPState, AccessHandler: allowPermission(entity.TypeServer, auth.EntitlementCanEdit)},
}

var internalPruneTokenCmd = APIEndpoint{
Path: "testing/prune-tokens",
Post: APIEndpointAction{Handler: removeTokenHandler, AccessHandler: allowPermission(entity.TypeServer, auth.EntitlementCanEdit)},
}

var internalIdentityCacheRefreshCmd = APIEndpoint{
Path: "identity-cache-refresh",

Expand Down
26 changes: 25 additions & 1 deletion lxd/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ func (d *Daemon) State() *state.State {
localConfig := d.localConfig
d.globalConfigMu.Unlock()

return &state.State{
s := &state.State{
ShutdownCtx: d.shutdownCtx,
DB: d.db,
MAAS: d.maas,
Expand All @@ -609,6 +609,30 @@ func (d *Daemon) State() *state.State {
Authorizer: d.authorizer,
UbuntuPro: d.ubuntuPro,
}

s.LeaderInfo = func() (*state.LeaderInfo, error) {
if !s.ServerClustered {
return &state.LeaderInfo{
Clustered: false,
Leader: true,
Address: "",
}, nil
}

localClusterAddress := s.LocalConfig.ClusterAddress()
leaderAddress, err := d.gateway.LeaderAddress()
if err != nil {
return nil, fmt.Errorf("Failed to get the address of the cluster leader: %w", err)
}

return &state.LeaderInfo{
Clustered: true,
Leader: localClusterAddress == leaderAddress,
Address: leaderAddress,
}, nil
}

return s
}

// UnixSocket returns the full path to the unix.socket file that this daemon is
Expand Down
4 changes: 2 additions & 2 deletions lxd/db/cluster/identities.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,13 +328,13 @@ type PendingTLSMetadata struct {
// PendingTLSMetadata returns the pending TLS identity metadata.
func (i Identity) PendingTLSMetadata() (*PendingTLSMetadata, error) {
if i.Type != api.IdentityTypeCertificateClientPending {
return nil, fmt.Errorf("Cannot extract pending TLS identity secret: Identity is not pending")
return nil, api.NewStatusError(http.StatusBadRequest, "Cannot extract pending TLS identity secret: Identity is not pending")
}

var metadata PendingTLSMetadata
err := json.Unmarshal([]byte(i.Metadata), &metadata)
if err != nil {
return nil, fmt.Errorf("Failed to unmarshal pending TLS identity metadata: %w", err)
return nil, api.StatusErrorf(http.StatusInternalServerError, "Failed to unmarshal pending TLS identity metadata: %w", err)
}

return &metadata, nil
Expand Down
Loading
Loading