diff --git a/README.md b/README.md index 2346f57f5a3..ff0fd5c4b9c 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ As of the 1.26 release, enhancements from this repo are visualized in the Enhanc Links: +- [1.32 Milestone](https://bit.ly/k8s132-enhancements) - [1.31 Milestone](https://bit.ly/k8s131-enhancements) - [1.30 Milestone](https://bit.ly/k8s130-enhancements) - [1.29 Milestone](https://bit.ly/k8s129-enhancements) diff --git a/go.mod b/go.mod index 8a452d5d728..e755d859bdf 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/sirupsen/logrus v1.7.0 github.com/spf13/cobra v1.1.1 github.com/stretchr/testify v1.7.0 - golang.org/x/oauth2 v0.0.0-20210112200429-01de73cf58bd + golang.org/x/oauth2 v0.21.0 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c k8s.io/release v0.7.1-0.20210218090651-d71805402dab k8s.io/test-infra v0.0.0-20200813194141-e9678d500461 @@ -62,7 +62,6 @@ require ( golang.org/x/sys v0.0.0-20210112080510-489259a85091 // indirect golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e // indirect golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect - google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.25.0 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect diff --git a/go.sum b/go.sum index dbb3c5fefb5..5e3b9e204e2 100644 --- a/go.sum +++ b/go.sum @@ -34,6 +34,7 @@ cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvf cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= +cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqClKRT5SZwBmk= @@ -606,8 +607,9 @@ github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-containerregistry v0.0.0-20191010200024-a3d713f9b7f8/go.mod h1:KyKXa9ciM8+lgMXwOVsXi7UxGrsf9mM61Mzs+xKUrKE= github.com/google/go-containerregistry v0.0.0-20200115214256-379933c9c22b/go.mod h1:Wtl/v6YdQxv397EREtzwgd9+Ud7Q5D8XMbi3Zazgkrs= github.com/google/go-containerregistry v0.0.0-20200123184029-53ce695e4179/go.mod h1:Wtl/v6YdQxv397EREtzwgd9+Ud7Q5D8XMbi3Zazgkrs= @@ -1474,8 +1476,9 @@ golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4Iltr golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210112200429-01de73cf58bd h1:0n2rzLq6xLtV9OFaT0BF2syUkjOwRrJ1zvXY5hH7Kkc= golang.org/x/oauth2 v0.0.0-20210112200429-01de73cf58bd/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= +golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -1745,7 +1748,6 @@ google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww google.golang.org/appengine v1.6.2/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/cloud v0.0.0-20151119220103-975617b05ea8/go.mod h1:0H1ncTHf11KCFhTc/+EFRbzSCOZx+VUbRMk55Yv5MYk= google.golang.org/genproto v0.0.0-20170731182057-09f6ed296fc6/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= diff --git a/keps/prod-readiness/sig-auth/4633.yaml b/keps/prod-readiness/sig-auth/4633.yaml index 97015532d88..f4713cee4db 100644 --- a/keps/prod-readiness/sig-auth/4633.yaml +++ b/keps/prod-readiness/sig-auth/4633.yaml @@ -1,3 +1,5 @@ kep-number: 4633 alpha: approver: "@jpbetz" +beta: + approver: "@jpbetz" diff --git a/keps/sig-api-machinery/3157-watch-list/README.md b/keps/sig-api-machinery/3157-watch-list/README.md index 211384d76ce..2db6a9ccf5d 100644 --- a/keps/sig-api-machinery/3157-watch-list/README.md +++ b/keps/sig-api-machinery/3157-watch-list/README.md @@ -92,6 +92,7 @@ tags, and then generate with `hack/update-toc.sh`. - [Results with WATCH-LIST](#results-with-watch-list) - [Required changes for a WATCH request with the RV set to the last observed value (RV > 0)](#required-changes-for-a-watch-request-with-the-rv-set-to-the-last-observed-value-rv--0) - [Provide a fix for the long-standing issue https://github.com/kubernetes/kubernetes/issues/59848](#provide-a-fix-for-the-long-standing-issue-httpsgithubcomkuberneteskubernetesissues59848) + - [Replacing standard List request with WatchList mechanism for client-go's List method.](#replacing-standard-list-request-with-watchlist-mechanism-for-client-gos-list-method) - [Test Plan](#test-plan) - [Prerequisite testing updates](#prerequisite-testing-updates) - [Unit tests](#unit-tests) @@ -581,6 +582,73 @@ Then on the server side we: 3. reject the request if waitUntilFreshAndBlock times out, thus forcing informers to retry. 4. otherwise, construct the final list and send back to a client. +### Replacing standard List request with WatchList mechanism for client-go's List method. + +Replacing the underlying implementation of the List method for client-go based clients (like typed or dynamic client) +with the WatchList mechanism requires ensuring that the data returned by both the standard List request and +the new WatchList mechanism remains identical. The challenge is that WatchList no longer retrieves the entire +list from the server at once but only receives individual items, which forces us to "manually" reconstruct +the list object on the client side. + +To correctly construct the list object on the client side, we need ListKind information. +However, simply reconstructing the list object based on these data is not enough. +In the case of a standard List request, the server's response (a versioned list) is processed through a chain of decoders, +which can potentially modify the resulting list object. +A good example is the WithoutVersionDecoder, which removes the GVK information from the list object. +Thus the "manually" constructed list object may not be consistent +with the transformations applied by the decoders, leading to differences. + +To ensure full compatibility, the server must provide a versioned empty list in the format requested by the client (e.g., protobuf representation). +We don't know how the client's decoder behaves for different encodings, i.e., whether the decoder actually supports +the encoding we intend to use for reconstruction. Therefore, to ensure maximal compatibility, we will ensure that +the encoding used for the reconstruction of the list matches the format that the client originally requested. +This guarantees that the returned list object can be correctly decoded by the client, +preserving the actual encoding format as intended. + +The proposed solution is to add a new annotation (`k8s.io/initial-events-list-blueprint`) to the object returned +in the bookmark event (The bookmark event is sent when the state is synced and marks the end of WatchList stream). +This annotation will store an empty, versioned list encoded as a Base64 string. +This annotation will be added to the same object/place the `k8s.io/initial-events-end` annotation is added. + +When the client receives such a bookmark, it will base64 decode the empty list and pass it to the decoder chain. +Only after a successful response from the decoders the list will be populated with data received from subsequent +watch events and returned. + +For example: +``` +GET /api/v1/namespaces/test/pods?watch=1&sendInitialEvents=true&allowWatchBookmarks=true&resourceVersion=&resourceVersionMatch=NotOlderThan +--- +200 OK +Transfer-Encoding: chunked +Content-Type: application/json + +{ + "type": "ADDED", + "object": {"kind": "Pod", "apiVersion": "v1", "metadata": {"resourceVersion": "8467", "name": "foo"}, ...} +} +{ + "type": "ADDED", + "object": {"kind": "Pod", "apiVersion": "v1", "metadata": {"resourceVersion": "5726", "name": "bar"}, ...} +} +{ +"type":"BOOKMARK", +"object":{"kind":"Pod","apiVersion":"v1","metadata":{"resourceVersion":"13519","annotations":{"k8s.io/initial-events-end":"true","k8s.io/initial-events-embedded-list":"eyJraW5kIjoiUG9kTGlzdCIsImFwaVZlcnNpb24iOiJ2MSIsIm1ldGFkYXRhIjp7fSwiaXRlbXMiOm51bGx9Cg=="}} ...} +} +... + +``` + +**Alternatives** + +We could modify the type of the object passed in the last bookmark event to include the list. +This approach would require changes to the reflector, as it would need to recognize the new object type in the bookmark event. +However, this could potentially break other clients that are not expecting a different object in the bookmark event. + +Another option would be to issue an empty list request to the API server to receive a list response from the client. +This approach would involve modifying client-go and implementing some form of caching mechanism, +possibly with invalidation policies. +Non-client-go clients that want to use this new feature would need to rebuild this mechanism as well. + ### Test Plan |Create LeaseCandidate Lease| B B[Candidate] --> |Elected| C[Leader] C --> |Renew Leader Lease| C - C -->|End of Term / Leader Lease Expired| D[Shutdown] - D[Shutdown] -.-> |Restart| A + C -->|Better Candidate Available / Leader Lease Expired| D[Yield Leadership] + D[Yield Leadership] -.-> |Shutdown/Restart if necessary| A ``` ### Renewal Interval and Performance @@ -366,10 +363,12 @@ The leader lease will have renewal interval and duration (2s and 15s). This is s For component leases, keeping a short renewal interval will add many unnecessary writes to the apiserver. The component leases renewal interval will default to 5 mins. -When the leader lease is marked as end of term or available, the coordinated leader election controller will -add an annotation to all component lease candidate objects (`coordination.k8s.io/pending-ack`) and wait up to 5 seconds. -During that time, components must update their component lease to remove the annotation. -The leader election controller will then pick the leader based on its criteria from the set of component leases that have ack'd the request. +When the leader lease is marked as end of term or available, the coordinated +leader election controller will update the `pingTime` field of all component +lease candidate objects and wait up to 5 seconds. During that time, components +will update their component lease `renewTime`. The leader election controller +will then pick the leader based on its criteria from the set of component leases +that have ack'd the request. ### Strategy @@ -484,27 +483,18 @@ type CoordinatedLeaseStrategy string // CoordinatedLeaseStrategy defines the strategy for picking the leader for coordinated leader election. const ( - OldestCompatibilityVersion CoordinatedStrategy = "OldestCompatibilityVersion" - NoCoordination CoordinatedStrategy = "NoCoordination" + OldestEmulationVersion CoordinatedLeaseStrategy = "OldestEmulationVersion" ) +// LeaseSpec is a specification of a Lease. type LeaseSpec struct { - // Strategy indicates the strategy for picking the leader for coordinated leader election - // This is filled in from LeaseCandidate.Spec.Strategy or defaulted to NoCoordinationStrategy - // if the leader was not elected by the CLE controller. - Strategy CoordinatedLeaseStrategy `json:"strategy,omitempty" protobuf:"string,6,opt,name=strategy"` - - // EndofTerm signals to a lease holder that the lease should not be - // renewed because a better candidate is available. - EndOfTerm bool `json:"endOfTerm,omitempty" protobuf:"boolean,7,opt,name=endOfTerm"` - - // EXISTING FIELDS BELOW - // holderIdentity contains the identity of the holder of a current lease. + // If Coordinated Leader Election is used, the holder identity must be + // equal to the elected LeaseCandidate.metadata.name field. // +optional HolderIdentity *string `json:"holderIdentity,omitempty" protobuf:"bytes,1,opt,name=holderIdentity"` // leaseDurationSeconds is a duration that candidates for a lease need - // to wait to force acquire it. This is measure against time of last + // to wait to force acquire it. This is measured against the time of last // observed renewTime. // +optional LeaseDurationSeconds *int32 `json:"leaseDurationSeconds,omitempty" protobuf:"varint,2,opt,name=leaseDurationSeconds"` @@ -519,29 +509,67 @@ type LeaseSpec struct { // holders. // +optional LeaseTransitions *int32 `json:"leaseTransitions,omitempty" protobuf:"varint,5,opt,name=leaseTransitions"` + // Strategy indicates the strategy for picking the leader for coordinated leader election. + // If the field is not specified, there is no active coordination for this lease. + // (Alpha) Using this field requires the CoordinatedLeaderElection feature gate to be enabled. + // +featureGate=CoordinatedLeaderElection + // +optional + Strategy *CoordinatedLeaseStrategy `json:"strategy,omitempty" protobuf:"bytes,6,opt,name=strategy"` + // PreferredHolder signals to a lease holder that the lease has a + // more optimal holder and should be given up. + // This field can only be set if Strategy is also set. + // +featureGate=CoordinatedLeaderElection + // +optional + PreferredHolder *string `json:"preferredHolder,omitempty" protobuf:"bytes,7,opt,name=preferredHolder"` } ``` For the LeaseCandidate leases, a new lease will be created ```go +// LeaseCandidateSpec is a specification of a Lease. type LeaseCandidateSpec struct { - // The fields BinaryVersion and CompatibilityVersion will be mandatory labels instead of fields in the spec - - // CanLeadLease indicates the name of the lease that the candidate may lead - CanLeadLease string - - // FIELDS DUPLICATED FROM LEASE - - // leaseDurationSeconds is a duration that candidates for a lease need - // to wait to force acquire it. This is measure against time of last - // observed renewTime. + // LeaseName is the name of the lease for which this candidate is contending. + // This field is immutable. + // +required + LeaseName string `json:"leaseName" protobuf:"bytes,1,name=leaseName"` + // PingTime is the last time that the server has requested the LeaseCandidate + // to renew. It is only done during leader election to check if any + // LeaseCandidates have become ineligible. When PingTime is updated, the + // LeaseCandidate will respond by updating RenewTime. // +optional - LeaseDurationSeconds *int32 `json:"leaseDurationSeconds,omitempty" protobuf:"varint,2,opt,name=leaseDurationSeconds"` - // renewTime is a time when the current holder of a lease has last - // updated the lease. + PingTime *metav1.MicroTime `json:"pingTime,omitempty" protobuf:"bytes,2,opt,name=pingTime"` + // RenewTime is the time that the LeaseCandidate was last updated. + // Any time a Lease needs to do leader election, the PingTime field + // is updated to signal to the LeaseCandidate that they should update + // the RenewTime. + // Old LeaseCandidate objects are also garbage collected if it has been hours + // since the last renew. The PingTime field is updated regularly to prevent + // garbage collection for still active LeaseCandidates. // +optional - RenewTime *metav1.MicroTime `json:"renewTime,omitempty" protobuf:"bytes,4,opt,name=renewTime"` + RenewTime *metav1.MicroTime `json:"renewTime,omitempty" protobuf:"bytes,3,opt,name=renewTime"` + // BinaryVersion is the binary version. It must be in a semver format without leading `v`. + // This field is required when strategy is "OldestEmulationVersion" + // +optional + BinaryVersion string `json:"binaryVersion,omitempty" protobuf:"bytes,4,opt,name=binaryVersion"` + // EmulationVersion is the emulation version. It must be in a semver format without leading `v`. + // EmulationVersion must be less than or equal to BinaryVersion. + // This field is required when strategy is "OldestEmulationVersion" + // +optional + EmulationVersion string `json:"emulationVersion,omitempty" protobuf:"bytes,5,opt,name=emulationVersion"` + // PreferredStrategies indicates the list of strategies for picking the leader for coordinated leader election. + // The list is ordered, and the first strategy supersedes all other strategies. The list is used by coordinated + // leader election to make a decision about the final election strategy. This follows as + // - If all clients have strategy X as the first element in this list, strategy X will be used. + // - If a candidate has strategy [X] and another candidate has strategy [Y, X], Y supersedes X and strategy Y + // will be used. + // - If a candidate has strategy [X, Y] and another candidate has strategy [Y, X], this is a user error and leader + // election will not operate the Lease until resolved. + // (Alpha) Using this field requires the CoordinatedLeaderElection feature gate to be enabled. + // +featureGate=CoordinatedLeaderElection + // +listType=atomic + // +required + PreferredStrategies []v1.CoordinatedLeaseStrategy `json:"preferredStrategies,omitempty" protobuf:"bytes,6,opt,name=preferredStrategies"` } ``` @@ -556,7 +584,7 @@ a separate LeaseCandidate lease will be required for each lock. | Claimed by | Component instance | Election Coordinator. (Lease is claimed for to the elected component instance) | | Renewed by | Component instance | Component instance | | Leader Criteria | First component to claim lease | Best leader from available candidates at time of election | -| Preemptable | No | Yes, Collaboratively. (Coordinator marks lease as "end of term". Component instance voluntarily stops renewing) | +| Preemptable | No | Yes, Collaboratively. (Coordinator marks lease's next `preferredHolder`. Component instance voluntarily stops renewing) | ### User Stories (Optional) @@ -614,7 +642,7 @@ component. Example: - HA cluster with 3 control plane nodes -- 3 elected components (kube-controller-manager, schedule, +- 3 elected components (kube-controller-manager, scheduler, cloud-controller-manager) per control plane node - 9 LeaseCandidate leases are created and renewed by the components diff --git a/keps/sig-apps/3329-retriable-and-non-retriable-failures/README.md b/keps/sig-apps/3329-retriable-and-non-retriable-failures/README.md index 9e1c3d72341..03043b6dc8b 100644 --- a/keps/sig-apps/3329-retriable-and-non-retriable-failures/README.md +++ b/keps/sig-apps/3329-retriable-and-non-retriable-failures/README.md @@ -1789,6 +1789,7 @@ Fourth iteration (1.29): - Graduate e2e tests as conformance tests - Lock the `PodDisruptionConditions` and `JobPodFailurePolicy` feature-gates - Declare deprecation of the `PodDisruptionConditions` and `JobPodFailurePolicy` feature-gates in documentation +- Modify the code to ignore the `PodDisruptionConditions` and `JobPodFailurePolicy` feature gates ## Release Signoff Checklist @@ -131,8 +133,8 @@ checklist items _must_ be updated for the enhancement to be released. Items marked with (R) are required *prior to targeting to a milestone / release*. - [x] (R) Enhancement issue in release milestone, which links to KEP dir in [kubernetes/enhancements] (not the initial KEP PR) -- [ ] (R) KEP approvers have approved the KEP status as `implementable` -- [ ] (R) Design details are appropriately documented +- [x] (R) KEP approvers have approved the KEP status as `implementable` +- [x] (R) Design details are appropriately documented - [ ] (R) Test plan is in place, giving consideration to SIG Architecture and SIG Testing input (including test refactors) - [ ] e2e Tests for all Beta API Operations (endpoints) - [ ] (R) Ensure GA e2e tests meet requirements for [Conformance Tests](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/conformance-tests.md) @@ -477,6 +479,11 @@ We will add unit tests for the following scenarios: 2. Making sure that the flag and the config are mutually exclusive. 3. Behavior of the path restricted anonymous authenticator. +Unit tests were added to the following: + +* pkg/kubeapiserver/options/authentication_test.go +* staging/src/k8s.io/apiserver/pkg/authentication/request/anonymous/anonymous_test.go + ##### Integration tests +When the feature-gate is enabled none of the defaults or current settings +regarding anonymous auth are changed. The feature-gate enables the ability for +users to set the `anonymous` field using the `AuthenticationConfiguration` file. + ### Version Skew Strategy +This feature only impacts kube-apiserver and does not introduce any changes that +would be impacted by version skews. All changes are local to kube-apiserver and +are controlled by the `AuthenticationConfiguration` file passed to +kube-apiserver as a parameter. + ## Production Readiness Review Questionnaire +Enabling the feature flag alone does not change kube-apiserver defaults. However +if different API servers have different AuthenticationConfiguration for +Anonymous then some requests that would be denied by one API server could be +allowed by another. + ###### What specific metrics should inform a rollback? +kube-apiserver fails to start when AuthenticationConfiguration file has +`anonymous` field set. + +If audit logs indicate that endpoints other than the ones configured in the +AuthenticationConfiguration file using the `anonymous.conditions` field are +reachable by anonymous users. + ###### Were upgrade and rollback tested? Was the upgrade->downgrade->upgrade path tested? +N/A + ###### Is the rollout accompanied by any deprecations and/or removals of features, APIs, fields of API types, flags, etc.? +N/A + ###### How can someone using this feature know that it is working for their instance? -- [ ] Events - - Event Reason: -- [ ] API .status - - Condition name: - - Other field: -- [ ] Other (treat as last resort) - - Details: +If a user sets AuthenticationConfig file and sets the `anonymous.enabled` to +`true` and sets `anonymous.conditions` to allow only certain endpoints. Then +they can check if the feature is working by: + +* making an anonymous request to an endpoint that is not in the list of +endpoints they allowed. Such a request should fail with http status code 401. + +* making an anoymous request to an endpoint that is in the list of endpoints +they allowed. Such a request should either succeed with http status code 200 (if +authz is configured to allow acees to that endpoint) or +fail with http statis code 403 (if authz is not configured to allow access to +that endpoint) ###### What are the reasonable SLOs (Service Level Objectives) for the enhancement? @@ -821,6 +870,9 @@ These goals will help you determine what you need to measure (SLIs) in the next question. --> +SLOs for actual requests should not change in any way compared to the flag-based +Anonymous configuration. + ###### What are the SLIs (Service Level Indicators) an operator can use to determine the health of the service? +N/A + ### Dependencies +No. + ### Scalability +No. + ###### Will enabling / using this feature result in introducing new API types? +No. + ###### Will enabling / using this feature result in any new calls to the cloud provider? +No. + ###### Will enabling / using this feature result in increasing size or count of the existing API objects? +No. + ###### Will enabling / using this feature result in increasing time taken by any operations covered by existing SLIs/SLOs? +No. + ###### Will enabling / using this feature result in non-negligible increase of resource usage (CPU, RAM, disk, IO, ...) in any components? +No. + ###### Can enabling / using this feature result in resource exhaustion of some node resources (PIDs, sockets, inodes, etc.)? +No. + ### Troubleshooting +- [x] 2024-05-13 - KEP introduced +- [x] 2024-06-07 - KEP Accepted as implementable +- [x] 2024-06-27 - Alpha implementation merged https://github.com/kubernetes/kubernetes/pull/124917 +- [x] 2024-07-15 - Integration tests merged https://github.com/kubernetes/kubernetes/pull/125967 +- [x] 2024-08-13 - First release (1.31) when feature available +- [x] 2024-08-16 - Targeting beta in 1.32 ## Drawbacks @@ -1003,13 +1082,6 @@ Major milestones might include: Why should this KEP _not_ be implemented? --> -## Open Questions for BETA - -The following should be resolved before this goes to `beta`: - -- Should we apply any restrictions here to anonymous `userInfo` that comes back -after all authenticators and impersonation have run - ## Alternatives + +## Possible Future Improvements + +We decided not to apply any restrictions here to anonymous `userInfo` that comes +back after all authenticators and impersonation have run because we think that +the scope of this KEP is to provide cluster admins with a way to restrict actual +anonymous requests. A request that was considered authenticated and as permitted +to impersonate `system:anonymous` is not actually anonymous. + +If we want to allow cluster admins the ability to add such restrictions we +think its better to give them the capability to configure webhook authenticators +and add `userValidationRules` capabilities. But doing so would expand the scope +of this KEP and it should likely be a separate effort. diff --git a/keps/sig-auth/4633-anonymous-auth-configurable-endpoints/kep.yaml b/keps/sig-auth/4633-anonymous-auth-configurable-endpoints/kep.yaml index a29b30397d6..a617c9ab0ad 100644 --- a/keps/sig-auth/4633-anonymous-auth-configurable-endpoints/kep.yaml +++ b/keps/sig-auth/4633-anonymous-auth-configurable-endpoints/kep.yaml @@ -16,16 +16,17 @@ see-also: replaces: # The target maturity stage in the current dev cycle for this KEP. -stage: alpha +stage: beta # The most recent milestone for which work toward delivery of this KEP has been # done. This can be the current (upcoming) milestone, if it is being actively # worked on. -latest-milestone: "v1.31" +latest-milestone: "v1.32" # The milestone at which this feature was, or is targeted to be, at each stage. milestone: alpha: "v1.31" + beta: "v1.32" # The following PRR answers are required at alpha release # List the feature gate name and the components for which it must be enabled diff --git a/keps/sig-multicluster/4322-cluster-inventory/README.md b/keps/sig-multicluster/4322-cluster-inventory/README.md index 046fdbcea84..0be8c73c9cd 100644 --- a/keps/sig-multicluster/4322-cluster-inventory/README.md +++ b/keps/sig-multicluster/4322-cluster-inventory/README.md @@ -129,6 +129,15 @@ tags, and then generate with `hack/update-toc.sh`. - [Implementation History](#implementation-history) - [Drawbacks](#drawbacks) - [Alternatives](#alternatives) + - [Extending Cluster API Cluster resource](#extending-cluster-api-cluster-resource) + - [ClusterProfile CRD scope](#clusterprofile-crd-scope) + - [Global hub cluster for multiple clustersets](#global-hub-cluster-for-multiple-clustersets) + - [Global hub cluster per clusterset](#global-hub-cluster-per-clusterset) + - [Regional hub cluster for multiple clustersets](#regional-hub-cluster-for-multiple-clustersets) + - [Regional hub clusters per clusterset](#regional-hub-clusters-per-clusterset) + - [Self-assembling clustersets](#self-assembling-clustersets) + - [Workload placement across multiple clusters without cross-cluster service networking](#workload-placement-across-multiple-clusters-without-cross-cluster-service-networking) + - [Workload placement into a specific clusterset](#workload-placement-into-a-specific-clusterset) - [Infrastructure Needed (Optional)](#infrastructure-needed-optional) @@ -1028,6 +1037,8 @@ What other approaches did you consider, and why did you rule them out? These do not need to be as detailed as the proposal, but should include enough information to express the idea and why it was not acceptable. --> + +### Extending Cluster API `Cluster` resource We also considered the possibility of extending the existing Cluster API's [Cluster](https://github.com/kubernetes-sigs/cluster-api/blob/v1.6.2/api/v1beta1/cluster_types.go#L39) resource to accommodate our needs for describing clusters within a multi-cluster @@ -1044,6 +1055,56 @@ Cluster is primarily owned by platform administrators focused on provisioning cl whereas the new API is designed to be owned by the cluster manager that created the cluster it represents. +### ClusterProfile CRD scope + +We had [extensive discussions](https://docs.google.com/document/d/1E_z3ti-d-modwnhsvR3yBZwX4rRpL26dKkl41YAptRo/edit) +in SIG-Multicluster meetings about the appropriate scope for ClusterProfile +resources, and ultimately decided that namespace scope would be more flexible +than cluster scope while still retaining an adequate UX for simpler usage +patterns. As a historical note, a prior attempt at organizing multiple +clusters, the ClusterRegistry proposal, had proposed cluster-scoped resources +but was met with pushback by potential adopters in part due to a desire to host +multiple distinct registry lists on a single control plane, which would be far +more straightforward with namespaced resources. + +#### Global hub cluster for multiple clustersets + +![illustration of global hub for multiple clustersets topology](./global-hub.svg) + +In this model, a single global hub cluster is used to manage multiple clustersets (a "Prod" clusterset and "Dev" clusterset in this illustration). For this use case, some means of segmenting the ClusterProfile resources into distinct groups for each clusterset is needed, and ideally should facilitate selecting all ClusterProfiles of a given clusterset. Because of this selection-targeting goal, setting clusterset membership within the `spec` of a ClusterProfile would not be sufficient. While setting a label such as the proposed `clusterset.multicluster.x-k8s.io` on the ClusterProfile resource (instead of a namespace) could be acceptable, managing multiple cluster-scoped ClusterProfile resources for multiple unrelated clustersets on a single global hub could quickly get cluttered. In addition to grouping clarity, namespace scoping could allow RBAC delegation for separate teams to manage resources for their own clustersets in isolation while still using a shared hub. The group of all clusters registered on the hub (potentially including clusters belonging to different clustersets or clusters not belonging to any clusterset) may represent a single "inventory" or multiple inventories, but such a definition is beyond the scope of this document and is permissible to be an undefined implementation detail. + +#### Global hub cluster per clusterset + +![illustration of global hub per clusterset topology](./global-hub-per-clusterset.svg) + +In this model, each "inventory" has a 1:1 mapping with a clusterset containing clusters in multiple regions. A cluster-scoped ClusterProfile CRD would be sufficient for this architecture, but it requires a proliferation of hub clusters, which may not be optimal. This model is still implementable with namespace-scoped ClusterProfile CRDs by writing them all to a single namespace, either the `default` namespace or a specific namespace configured in the cluster manager. The risk of placing resources in the wrong namespace would be somewhat minimal if following the suggested pattern of having ClusterProfile resources be written by a "manager" rather than authored by humans. + +#### Regional hub cluster for multiple clustersets + +![illustration of regional hub clusters for multiple clustersets topology](./regional-hub-multiple-clustersets.svg) + +In this model, "hub" clusters are limited to a regional scope (potentially for architectural limitations or performance optimizations) and each hub is used to manage clusters only from the local region, but which may belong to separate clustersets. If, as in the pictured example, clustersets still span multiple regions, some out-of-band synchronization mechanism between the regional hubs would likely be needed. This model has similar segmentation needs to the global hub model, just at a smaller scale. + +#### Regional hub clusters per clusterset + +![illustration of regional hub clusters per clusterset topology](./regional-hub-per-clusterset.svg) + +This is creeping pretty far towards excessive cluster proliferation (and cross-region coordination overhead) purely for management needs (as opposed to actually running workloads), and would be more likely to be a reference or testing implementation than an architecture suitable for production scale. + +#### Self-assembling clustersets + +![illustration of self-assembling clusterset topology](./self-assembling-clustersets.svg) + +This is the model most suited to a cluster-scoped ClusterProfile resource. In contrast to the prior models discussed, in this approach the ClusterProfile CRD would be written directly to each "member" cluster. ClusterSet membership would either be established through peer-to-peer relationships, or managed by an external control plane. For ClusterSet security and integrity, a two-way handshake of some sort would be needed between the local cluster and each peer or the external control plane to ensure it is properly authorized to serve endpoints for exported services or import services from other clusters. While these approaches could be implemented with a namespace-scoped ClusterProfile CRD in the `default` or a designated namespace, misuse is most likely in this model, because the resource would be more likely to be authored by a human if using the peer-to-peer model. Due to the complexity and fragility concerns of managing clusterset membership in a peer-to-peer topology, an external control plane would likely be preferable. Assuming the external control plane does not support Kubernetes APIs (if it did, any of the "hub" models could be applied instead), it could still be possible to implement this model with a namespace-scoped ClusterProfile resource, but it is _not_ recommended. + +#### Workload placement across multiple clusters _without_ cross-cluster service networking + +In this model, a consumer of the Cluster Inventory API is looking to optimize workload placement to take advantage of excess capacity on existing managed clusters. These workloads may have specific hardware resource needs such as GPUs, but are typically "batch" jobs that do not require multi-cluster service networking to communicate with known services in a specific clusterset. The isolated nature of these jobs could allow them to be scheduled on many known clusters regardless of clusterset membership. A centralized hub which could register clusters in disparate clustersets or no clusterset and return a list of all known clusters from a single API call would be the most efficient for this consumer to query. Namespaced ClusterProfile CRDs on a global hub would be the best fit for this use case. + +#### Workload placement into a specific clusterset + +Within a single clusterset, a global workload placement controller may seek to balance capacity across multiple regions in response to demand, cost efficiency, or other factors. Querying a list of all clusters within a single clusterset should be possible to serve this use case, which is amenable to either cluster-scoped or namespaced-scoped ClusterProfile CRDs. + ## Infrastructure Needed (Optional)