From 185626bc6da834ba0a72833fabfd32612a5adba2 Mon Sep 17 00:00:00 2001 From: Aditya Addepalli Date: Thu, 18 Jul 2024 15:23:25 -0400 Subject: [PATCH] Add failover history information Signed-off-by: Aditya Addepalli --- api/openapi-spec/swagger.json | 25 +++++ ...rk.karmada.io_clusterresourcebindings.yaml | 44 ++++++++ .../work.karmada.io_resourcebindings.yaml | 44 ++++++++ pkg/apis/policy/v1alpha1/propagation_types.go | 5 + pkg/apis/work/v1alpha2/binding_types.go | 43 ++++++++ .../work/v1alpha2/well_known_constants.go | 6 ++ .../work/v1alpha2/zz_generated.deepcopy.go | 41 +++++++ .../rb_application_failover_controller.go | 4 + pkg/controllers/binding/common.go | 28 +++++ pkg/controllers/cluster/taint_manager.go | 5 + pkg/controllers/utils/common.go | 90 ++++++++++++++++ pkg/controllers/utils/common_test.go | 90 ++++++++++++++++ pkg/generated/openapi/zz_generated.openapi.go | 100 +++++++++++++++++- .../clustereviction/cluster_eviction.go | 12 ++- 14 files changed, 535 insertions(+), 2 deletions(-) create mode 100644 pkg/controllers/utils/common.go create mode 100644 pkg/controllers/utils/common_test.go diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index 0534e67a4663..b1e4567b38ed 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -20022,6 +20022,23 @@ } ] }, + "com.github.karmada-io.karmada.pkg.apis.work.v1alpha2.FailoverHistoryItem": { + "type": "object", + "properties": { + "failoverTime": { + "description": "FailoverTime represents the timestamp when the workload failed over. It is represented in RFC3339 form(like '2021-04-25T10:02:10Z') and is in UTC.", + "$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.Time" + }, + "originCluster": { + "description": "OriginCluster denotes the name of the cluster from which the workload was failed over.", + "type": "string" + }, + "reason": { + "description": "Reason denotes the reason why the workload failed over.", + "type": "string" + } + } + }, "com.github.karmada-io.karmada.pkg.apis.work.v1alpha2.GracefulEvictionTask": { "description": "GracefulEvictionTask represents a graceful eviction task.", "type": "object", @@ -20332,6 +20349,14 @@ "$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.Condition" } }, + "failoverHistory": { + "description": "FailoverHistory represents history of the previous failovers of this resource", + "type": "array", + "items": { + "default": {}, + "$ref": "#/definitions/com.github.karmada-io.karmada.pkg.apis.work.v1alpha2.FailoverHistoryItem" + } + }, "lastScheduledTime": { "description": "LastScheduledTime representing the latest timestamp when scheduler successfully finished a scheduling. It is represented in RFC3339 form (like '2006-01-02T15:04:05Z') and is in UTC.", "$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.Time" diff --git a/charts/karmada/_crds/bases/work/work.karmada.io_clusterresourcebindings.yaml b/charts/karmada/_crds/bases/work/work.karmada.io_clusterresourcebindings.yaml index 82e3fab48523..66154f60a49f 100644 --- a/charts/karmada/_crds/bases/work/work.karmada.io_clusterresourcebindings.yaml +++ b/charts/karmada/_crds/bases/work/work.karmada.io_clusterresourcebindings.yaml @@ -1339,6 +1339,50 @@ spec: - type type: object type: array + failoverHistory: + description: FailoverHistory represents the history of the failover + of the resource + items: + description: FailoverHistoryItem represents either a failover event + in the history. + properties: + failoverTime: + description: StartTime is the timestamp of when the failover + occurred. + type: object + originCluster: + description: OriginCluster is the name of the cluster from which + the application migrated. + type: string + originalClusters: + description: ClustersBeforeFailover records the clusters where + running the application before failover. + items: + type: string + type: array + preservedLabelState: + additionalProperties: + type: string + description: |- + PreservedLabelState represents the application state information collected from the original cluster, + and it will be injected into the new cluster in the form of application labels. + type: object + reason: + description: Reason denotes the type of failover. + type: string + targetClusters: + description: ClustersAfterFailover records the clusters where + running the application after failover. + items: + type: string + type: array + required: + - failoverTime + - originCluster + - originalClusters + - reason + type: object + type: array lastScheduledTime: description: |- LastScheduledTime representing the latest timestamp when scheduler successfully finished a scheduling. diff --git a/charts/karmada/_crds/bases/work/work.karmada.io_resourcebindings.yaml b/charts/karmada/_crds/bases/work/work.karmada.io_resourcebindings.yaml index 78f9a5102cf2..a7216b12b868 100644 --- a/charts/karmada/_crds/bases/work/work.karmada.io_resourcebindings.yaml +++ b/charts/karmada/_crds/bases/work/work.karmada.io_resourcebindings.yaml @@ -1339,6 +1339,50 @@ spec: - type type: object type: array + failoverHistory: + description: FailoverHistory represents the history of the failover + of the resource + items: + description: FailoverHistoryItem represents either a failover event + in the history. + properties: + failoverTime: + description: StartTime is the timestamp of when the failover + occurred. + type: object + originCluster: + description: OriginCluster is the name of the cluster from which + the application migrated. + type: string + originalClusters: + description: ClustersBeforeFailover records the clusters where + running the application before failover. + items: + type: string + type: array + preservedLabelState: + additionalProperties: + type: string + description: |- + PreservedLabelState represents the application state information collected from the original cluster, + and it will be injected into the new cluster in the form of application labels. + type: object + reason: + description: Reason denotes the type of failover. + type: string + targetClusters: + description: ClustersAfterFailover records the clusters where + running the application after failover. + items: + type: string + type: array + required: + - failoverTime + - originCluster + - originalClusters + - reason + type: object + type: array lastScheduledTime: description: |- LastScheduledTime representing the latest timestamp when scheduler successfully finished a scheduling. diff --git a/pkg/apis/policy/v1alpha1/propagation_types.go b/pkg/apis/policy/v1alpha1/propagation_types.go index 60be208a4ffc..8f19e019b0b1 100644 --- a/pkg/apis/policy/v1alpha1/propagation_types.go +++ b/pkg/apis/policy/v1alpha1/propagation_types.go @@ -290,6 +290,9 @@ type FailoverBehavior struct { // If this value is nil, failover is disabled. // +optional // Cluster *ClusterFailoverBehavior `json:"cluster,omitempty"` + + // TODO: Consider moving StatePreservation out from Application, so that + // in case of Cluster failover sceanrio can share. } // ApplicationFailoverBehavior indicates application failover behaviors. @@ -318,6 +321,8 @@ type ApplicationFailoverBehavior struct { // Value must be positive integer. // +optional GracePeriodSeconds *int32 `json:"gracePeriodSeconds,omitempty"` + + // TODO: Add StatePreservation attribute for stateful failover use-cases } // DecisionConditions represents the decision conditions of performing the failover process. diff --git a/pkg/apis/work/v1alpha2/binding_types.go b/pkg/apis/work/v1alpha2/binding_types.go index 7797fb3c60fb..6bb116a8196e 100644 --- a/pkg/apis/work/v1alpha2/binding_types.go +++ b/pkg/apis/work/v1alpha2/binding_types.go @@ -332,6 +332,10 @@ type ResourceBindingStatus struct { // AggregatedStatus represents status list of the resource running in each member cluster. // +optional AggregatedStatus []AggregatedStatusItem `json:"aggregatedStatus,omitempty"` + + // FailoverHistory represents the history of the failover of the resource + // + optional + FailoverHistory []FailoverHistoryItem `json:"failoverHistory,omitempty"` } // AggregatedStatusItem represents status of the resource running in a member cluster. @@ -362,6 +366,45 @@ type AggregatedStatusItem struct { Health ResourceHealth `json:"health,omitempty"` } +// FailoverHistoryItem represents either a failover event in the history. +type FailoverHistoryItem struct { + // OriginCluster is the name of the cluster from which the application migrated. + // +required + OriginCluster string `json:"originCluster"` + + // Reason denotes the type of failover. + // +required + Reason FailoverReason `json:"reason"` + + // StartTime is the timestamp of when the failover occurred. + // +required + StartTime metav1.Time `json:"failoverTime"` + + // ClustersBeforeFailover records the clusters where running the application before failover. + // +required + ClustersBeforeFailover []string `json:"originalClusters"` + + // ClustersAfterFailover records the clusters where running the application after failover. + // +optional + ClustersAfterFailover []string `json:"targetClusters,omitempty"` + + // PreservedLabelState represents the application state information collected from the original cluster, + // and it will be injected into the new cluster in the form of application labels. + // +optional + PreservedLabelState map[string]string `json:"preservedLabelState,omitempty"` +} + +// FailoverReason represents the reason for the failover. +type FailoverReason string + +const ( + // ClusterFailover represents the failover is due to cluster issues. + ClusterFailover FailoverReason = "ClusterFailover" + + // ApplicationFailover represents the failover is due to application issues. + ApplicationFailover FailoverReason = "ApplicationFailover" // Failover due to application issues, handled by health interpretation. +) + // Conditions definition const ( // Scheduled represents the condition that the ResourceBinding or ClusterResourceBinding has been scheduled. diff --git a/pkg/apis/work/v1alpha2/well_known_constants.go b/pkg/apis/work/v1alpha2/well_known_constants.go index 746b318a3e23..746ae74a6be3 100644 --- a/pkg/apis/work/v1alpha2/well_known_constants.go +++ b/pkg/apis/work/v1alpha2/well_known_constants.go @@ -124,6 +124,12 @@ const ( // Additional options will be added here in the future. DeletionProtectionLabelKey = "resourcetemplate.karmada.io/deletion-protected" DeletionProtectionAlways = "Always" + + // ResourceBindingFailoverLabel If a resource is failed over by karmada, this label will be attached + // to the rescheduled workload. The value will denote the type of failover that occurred, either cluster or application. + // This can be useful if applications are stateful and need to know when they have been failed over by Karmada, + // as opposed to being scheduled fresh. + ResourceBindingFailoverLabel = "resourcebinding.karmada.io/failover-type" ) // Define eviction reasons. diff --git a/pkg/apis/work/v1alpha2/zz_generated.deepcopy.go b/pkg/apis/work/v1alpha2/zz_generated.deepcopy.go index 824a7d4f5a08..c22cdf5b8443 100644 --- a/pkg/apis/work/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/apis/work/v1alpha2/zz_generated.deepcopy.go @@ -131,6 +131,40 @@ func (in *ClusterResourceBindingList) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FailoverHistoryItem) DeepCopyInto(out *FailoverHistoryItem) { + *out = *in + in.StartTime.DeepCopyInto(&out.StartTime) + if in.ClustersBeforeFailover != nil { + in, out := &in.ClustersBeforeFailover, &out.ClustersBeforeFailover + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.ClustersAfterFailover != nil { + in, out := &in.ClustersAfterFailover, &out.ClustersAfterFailover + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.PreservedLabelState != nil { + in, out := &in.PreservedLabelState, &out.PreservedLabelState + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailoverHistoryItem. +func (in *FailoverHistoryItem) DeepCopy() *FailoverHistoryItem { + if in == nil { + return nil + } + out := new(FailoverHistoryItem) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GracefulEvictionTask) DeepCopyInto(out *GracefulEvictionTask) { *out = *in @@ -392,6 +426,13 @@ func (in *ResourceBindingStatus) DeepCopyInto(out *ResourceBindingStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.FailoverHistory != nil { + in, out := &in.FailoverHistory, &out.FailoverHistory + *out = make([]FailoverHistoryItem, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } return } diff --git a/pkg/controllers/applicationfailover/rb_application_failover_controller.go b/pkg/controllers/applicationfailover/rb_application_failover_controller.go index bbb3ccf9e419..5902303ce061 100644 --- a/pkg/controllers/applicationfailover/rb_application_failover_controller.go +++ b/pkg/controllers/applicationfailover/rb_application_failover_controller.go @@ -39,6 +39,7 @@ import ( configv1alpha1 "github.com/karmada-io/karmada/pkg/apis/config/v1alpha1" policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1" workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" + controllerUtils "github.com/karmada-io/karmada/pkg/controllers/utils" "github.com/karmada-io/karmada/pkg/features" "github.com/karmada-io/karmada/pkg/resourceinterpreter" "github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag" @@ -154,6 +155,9 @@ func (c *RBApplicationFailoverController) syncBinding(ctx context.Context, bindi func (c *RBApplicationFailoverController) evictBinding(binding *workv1alpha2.ResourceBinding, clusters []string) error { for _, cluster := range clusters { + if err := controllerUtils.UpdateFailoverStatus(c.Client, binding, cluster, workv1alpha2.EvictionReasonApplicationFailure); err != nil { + klog.Errorf("Failed to update status with failover information. Error: %v", err) + } switch binding.Spec.Failover.Application.PurgeMode { case policyv1alpha1.Graciously: if features.FeatureGate.Enabled(features.GracefulEviction) { diff --git a/pkg/controllers/binding/common.go b/pkg/controllers/binding/common.go index 52593cc7e0fa..308d5b33dfb9 100644 --- a/pkg/controllers/binding/common.go +++ b/pkg/controllers/binding/common.go @@ -164,6 +164,17 @@ func mergeTargetClusters(targetClusters []workv1alpha2.TargetCluster, requiredBy func mergeLabel(workload *unstructured.Unstructured, binding metav1.Object, scope apiextensionsv1.ResourceScope) map[string]string { var workLabel = make(map[string]string) if scope == apiextensionsv1.NamespaceScoped { + namespaceBindingObj := binding.(*workv1alpha2.ResourceBinding) + failoverReason := checkFailoverHistory(namespaceBindingObj) + if failoverReason != "" { + if failoverReason == workv1alpha2.EvictionReasonApplicationFailure { + util.MergeLabel(workload, workv1alpha2.ResourceBindingFailoverLabel, "application") + workLabel[workv1alpha2.ResourceBindingFailoverLabel] = "application" + } else if failoverReason == workv1alpha2.EvictionReasonTaintUntolerated { + util.MergeLabel(workload, workv1alpha2.ResourceBindingFailoverLabel, "cluster") + workLabel[workv1alpha2.ResourceBindingFailoverLabel] = "cluster" + } + } bindingID := util.GetLabelValue(binding.GetLabels(), workv1alpha2.ResourceBindingPermanentIDLabel) util.MergeLabel(workload, workv1alpha2.ResourceBindingPermanentIDLabel, bindingID) workLabel[workv1alpha2.ResourceBindingPermanentIDLabel] = bindingID @@ -175,6 +186,23 @@ func mergeLabel(workload *unstructured.Unstructured, binding metav1.Object, scop return workLabel } +// Check if resourcebinding contains failover history, used to determine whether we attach failover label to cloned workload +// Return failover reason +func checkFailoverHistory(resourceBinding *workv1alpha2.ResourceBinding) string { + failoverHistory := resourceBinding.Status.FailoverHistory + if len(failoverHistory) == 0 { + return "" + } + lastFailover := failoverHistory[len(failoverHistory)-1] + if lastFailover.Reason == "ClusterFailover" { + return workv1alpha2.EvictionReasonTaintUntolerated + } + if lastFailover.Reason == "ApplicationFailover" { + return workv1alpha2.EvictionReasonApplicationFailure + } + return "" +} + func mergeAnnotations(workload *unstructured.Unstructured, binding metav1.Object, scope apiextensionsv1.ResourceScope) map[string]string { annotations := make(map[string]string) if workload.GetGeneration() > 0 { diff --git a/pkg/controllers/cluster/taint_manager.go b/pkg/controllers/cluster/taint_manager.go index 1e8bcdee5f79..003b1ae56f47 100644 --- a/pkg/controllers/cluster/taint_manager.go +++ b/pkg/controllers/cluster/taint_manager.go @@ -33,6 +33,7 @@ import ( clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1" workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" + controllerUtils "github.com/karmada-io/karmada/pkg/controllers/utils" "github.com/karmada-io/karmada/pkg/features" "github.com/karmada-io/karmada/pkg/util" "github.com/karmada-io/karmada/pkg/util/fedinformer/keys" @@ -170,6 +171,10 @@ func (tc *NoExecuteTaintManager) syncBindingEviction(key util.QueueKey) error { // Case 2: Need eviction after toleration time. If time is up, do eviction right now. // Case 3: Tolerate forever, we do nothing. if needEviction || tolerationTime == 0 { + err := controllerUtils.UpdateFailoverStatus(tc.Client, binding, cluster, workv1alpha2.EvictionReasonTaintUntolerated) + if err != nil { + klog.Errorf("Failed to update status with failover information. Error: %v", err) + } // update final result to evict the target cluster if features.FeatureGate.Enabled(features.GracefulEviction) { binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(workv1alpha2.EvictionProducerTaintManager), workv1alpha2.WithReason(workv1alpha2.EvictionReasonTaintUntolerated))) diff --git a/pkg/controllers/utils/common.go b/pkg/controllers/utils/common.go new file mode 100644 index 000000000000..ca663b9c926f --- /dev/null +++ b/pkg/controllers/utils/common.go @@ -0,0 +1,90 @@ +/* +Copyright 2020 The Karmada Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "context" + "time" + + "k8s.io/client-go/util/retry" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/karmada-io/karmada/pkg/util/helper" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1" + workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" +) + +// FailoverHistoryInfo is currently not supported for the following scheduling types +// 1. Duplicated (as these resources do not failover) +// 2. Divided resources that can be scheduled across multiple clusters. In this case, state is harder to conserve since +// the application's replicas will not be migrating together. +func restrictFailoverHistoryInfo(binding *workv1alpha2.ResourceBinding) bool { + placement := binding.Spec.Placement + // Check if replica scheduling type is Duplicated + if placement.ReplicaScheduling.ReplicaSchedulingType == policyv1alpha1.ReplicaSchedulingTypeDuplicated { + return true + } + + // Check if replica scheduling type is Divided with no spread constraints or invalid spread constraints + if placement.ReplicaScheduling.ReplicaSchedulingType == policyv1alpha1.ReplicaSchedulingTypeDivided { + if len(placement.SpreadConstraints) == 0 { + return true + } + + for _, spreadConstraint := range placement.SpreadConstraints { + if spreadConstraint.SpreadByLabel != "" { + return true + } + if spreadConstraint.SpreadByField == "cluster" && (spreadConstraint.MaxGroups > 1 || spreadConstraint.MinGroups > 1) { + return true + } + } + } + + return false +} + +// Adds a failoverHistoryItem to the failoverHistory field in the ResourceBinding. +func UpdateFailoverStatus(client client.Client, binding *workv1alpha2.ResourceBinding, cluster string, failoverType workv1alpha2.FailoverReason) (err error) { + if restrictFailoverHistoryInfo(binding) { + return nil + } + klog.V(4).Infof("Failover triggered for replica on cluster %s", cluster) + err = retry.RetryOnConflict(retry.DefaultRetry, func() (err error) { + _, err = helper.UpdateStatus(context.Background(), client, binding, func() error { + failoverHistoryItem := workv1alpha2.FailoverHistoryItem{ + StartTime: metav1.Time{Time: time.Now()}, + OriginCluster: cluster, + Reason: failoverType, + // TODO: Add remaining attributes here + } + binding.Status.FailoverHistory = append(binding.Status.FailoverHistory, failoverHistoryItem) + return nil + }) + return err + }) + + if err != nil { + klog.Errorf("Failed to update FailoverHistoryInfo to ResourceBinding %s/%s. Error: %v", binding.Namespace, binding.Name, err) + return err + } + return nil +} diff --git a/pkg/controllers/utils/common_test.go b/pkg/controllers/utils/common_test.go new file mode 100644 index 000000000000..d276aa9ac83f --- /dev/null +++ b/pkg/controllers/utils/common_test.go @@ -0,0 +1,90 @@ +/* +Copyright 2020 The Karmada Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "testing" + + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" + "github.com/karmada-io/karmada/pkg/util/gclient" +) + +func TestUpdateFailoverStatus(t *testing.T) { + tests := []struct { + name string + binding *workv1alpha2.ResourceBinding + cluster string + failoverType string + wantErr bool + }{ + { + name: "application failover", + binding: &workv1alpha2.ResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "binding", + Namespace: "default", + }, + }, + cluster: "cluster1", + failoverType: workv1alpha2.EvictionReasonApplicationFailure, + wantErr: false, + }, + { + name: "cluster failover", + binding: &workv1alpha2.ResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "binding", + Namespace: "default", + }, + }, + cluster: "cluster2", + failoverType: workv1alpha2.EvictionReasonTaintUntolerated, + wantErr: false, + }, + { + name: "invalid failover type", + binding: &workv1alpha2.ResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "binding", + Namespace: "default", + }, + }, + cluster: "cluster3", + failoverType: "InvalidType", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakeClient := fake.NewClientBuilder().WithScheme(gclient.NewSchema()).Build() + + err := UpdateFailoverStatus(fakeClient, tt.binding, tt.cluster, tt.failoverType) + + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + + }) + } +} diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index 31f52beca3e9..a06f7551d056 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -169,6 +169,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.BindingSnapshot": schema_pkg_apis_work_v1alpha2_BindingSnapshot(ref), "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.ClusterResourceBinding": schema_pkg_apis_work_v1alpha2_ClusterResourceBinding(ref), "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.ClusterResourceBindingList": schema_pkg_apis_work_v1alpha2_ClusterResourceBindingList(ref), + "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.FailoverHistoryItem": schema_pkg_apis_work_v1alpha2_FailoverHistoryItem(ref), "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.GracefulEvictionTask": schema_pkg_apis_work_v1alpha2_GracefulEvictionTask(ref), "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.NodeClaim": schema_pkg_apis_work_v1alpha2_NodeClaim(ref), "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.ObjectReference": schema_pkg_apis_work_v1alpha2_ObjectReference(ref), @@ -6817,6 +6818,89 @@ func schema_pkg_apis_work_v1alpha2_ClusterResourceBindingList(ref common.Referen } } +func schema_pkg_apis_work_v1alpha2_FailoverHistoryItem(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Description: "FailoverHistoryItem represents either a failover event in the history.", + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "originCluster": { + SchemaProps: spec.SchemaProps{ + Description: "OriginCluster is the name of the cluster from which the application migrated.", + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "reason": { + SchemaProps: spec.SchemaProps{ + Description: "Reason denotes the type of failover.", + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "failoverTime": { + SchemaProps: spec.SchemaProps{ + Description: "StartTime is the timestamp of when the failover occurred.", + Type: []string{"string"}, + Format: "date-time", + }, + }, + "originalClusters": { + SchemaProps: spec.SchemaProps{ + Description: "ClustersBeforeFailover records the clusters where running the application before failover.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + }, + }, + }, + "targetClusters": { + SchemaProps: spec.SchemaProps{ + Description: "ClustersAfterFailover records the clusters where running the application after failover.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + }, + }, + }, + "preservedLabelState": { + SchemaProps: spec.SchemaProps{ + Description: "PreservedLabelState represents the application state information collected from the original cluster, and it will be injected into the new cluster in the form of application labels.", + Type: []string{"object"}, + AdditionalProperties: &spec.SchemaOrBool{ + Allows: true, + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + }, + }, + }, + }, + Required: []string{"originCluster", "reason", "failoverTime", "originalClusters"}, + }, + }, + } +} + func schema_pkg_apis_work_v1alpha2_GracefulEvictionTask(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ @@ -7335,11 +7419,25 @@ func schema_pkg_apis_work_v1alpha2_ResourceBindingStatus(ref common.ReferenceCal }, }, }, + "failoverHistory": { + SchemaProps: spec.SchemaProps{ + Description: "FailoverHistory represents the history of the failover of the resource", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.FailoverHistoryItem"), + }, + }, + }, + }, + }, }, }, }, Dependencies: []string{ - "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.AggregatedStatusItem", "k8s.io/apimachinery/pkg/apis/meta/v1.Condition", "k8s.io/apimachinery/pkg/apis/meta/v1.Time"}, + "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.AggregatedStatusItem", "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.FailoverHistoryItem", "k8s.io/apimachinery/pkg/apis/meta/v1.Condition", "k8s.io/apimachinery/pkg/apis/meta/v1.Time"}, } } diff --git a/pkg/scheduler/framework/plugins/clustereviction/cluster_eviction.go b/pkg/scheduler/framework/plugins/clustereviction/cluster_eviction.go index 3ba4ef06e2a2..85f5661cd970 100644 --- a/pkg/scheduler/framework/plugins/clustereviction/cluster_eviction.go +++ b/pkg/scheduler/framework/plugins/clustereviction/cluster_eviction.go @@ -18,6 +18,7 @@ package clustereviction import ( "context" + "fmt" "k8s.io/klog/v2" @@ -47,7 +48,16 @@ func (p *ClusterEviction) Name() string { } // Filter checks if the target cluster is in the GracefulEvictionTasks which means it is in the process of eviction. -func (p *ClusterEviction) Filter(_ context.Context, bindingSpec *workv1alpha2.ResourceBindingSpec, _ *workv1alpha2.ResourceBindingStatus, cluster *clusterv1alpha1.Cluster) *framework.Result { +func (p *ClusterEviction) Filter(_ context.Context, bindingSpec *workv1alpha2.ResourceBindingSpec, bindingStatus *workv1alpha2.ResourceBindingStatus, cluster *clusterv1alpha1.Cluster) *framework.Result { + failoverHistory := bindingStatus.FailoverHistory + if len(failoverHistory) != 0 { + lastFailover := failoverHistory[len(failoverHistory)-1] + if lastFailover.OriginCluster == cluster.Name { + klog.V(2).Infof("Workload has been failed over from this cluster %s.", cluster.Name) + return framework.NewResult(framework.Unschedulable, fmt.Sprintf("workload has been failed over from this cluster %s", cluster.Name)) + } + } + if bindingSpec.ClusterInGracefulEvictionTasks(cluster.Name) { klog.V(2).Infof("Cluster(%s) is in the process of eviction.", cluster.Name) return framework.NewResult(framework.Unschedulable, "cluster(s) is in the process of eviction")