From 3017f55dee539854b7db1593d86ded5ef7d49319 Mon Sep 17 00:00:00 2001 From: David van der Spek <28541758+davidspek@users.noreply.github.com> Date: Mon, 17 Jun 2024 10:13:32 +0200 Subject: [PATCH] Merge pull request #546 * feat: add support for TopologySpreadConstraints --- .../starrocks.com_starrocksclusters.yaml | 712 ++++++++++++++++++ .../starrocks.com_starrockswarehouses.yaml | 178 +++++ deploy/starrocks.com_starrocksclusters.yaml | 208 +++++ deploy/starrocks.com_starrockswarehouses.yaml | 52 ++ .../charts/starrocks/templates/_helpers.tpl | 33 + .../starrocks/templates/starrockscluster.yaml | 12 + .../charts/starrocks/values.yaml | 36 + helm-charts/charts/kube-starrocks/values.yaml | 36 + pkg/apis/starrocks/v1/load_type.go | 10 + .../starrocks/v1/zz_generated.deepcopy.go | 7 + pkg/k8sutils/templates/pod/spec.go | 1 + 11 files changed, 1285 insertions(+) diff --git a/config/crd/bases/starrocks.com_starrocksclusters.yaml b/config/crd/bases/starrocks.com_starrocksclusters.yaml index cd052924..b09fc658 100644 --- a/config/crd/bases/starrocks.com_starrocksclusters.yaml +++ b/config/crd/bases/starrocks.com_starrocksclusters.yaml @@ -4172,6 +4172,184 @@ spec: type: string type: object type: array + topologySpreadConstraints: + description: |- + (Optional) TopologySpreadConstraints for spreading pods across failure-domains + More info: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + items: + description: TopologySpreadConstraint specifies how to spread + matching pods among the given topology. + properties: + labelSelector: + description: |- + LabelSelector is used to find matching pods. + Pods that match this label selector are counted to determine the number of pods + in their corresponding topology domain. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select the pods over which + spreading will be calculated. The keys are used to lookup values from the + incoming pod labels, those key-value labels are ANDed with labelSelector + to select the group of existing pods over which spreading will be calculated + for the incoming pod. Keys that don't exist in the incoming pod labels will + be ignored. A null or empty list means only match against labelSelector. + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + description: |- + MaxSkew describes the degree to which pods may be unevenly distributed. + When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference + between the number of matching pods in the target topology and the global minimum. + The global minimum is the minimum number of matching pods in an eligible domain + or zero if the number of eligible domains is less than MinDomains. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 2/2/1: + In this case, the global minimum is 1. + | zone1 | zone2 | zone3 | + | P P | P P | P | + - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; + scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) + violate MaxSkew(1). + - if MaxSkew is 2, incoming pod can be scheduled onto any zone. + When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence + to topologies that satisfy it. + It's a required field. Default value is 1 and 0 is not allowed. + format: int32 + type: integer + minDomains: + description: |- + MinDomains indicates a minimum number of eligible domains. + When the number of eligible domains with matching topology keys is less than minDomains, + Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. + And when the number of eligible domains with matching topology keys equals or greater than minDomains, + this value has no effect on scheduling. + As a result, when the number of eligible domains is less than minDomains, + scheduler won't schedule more than maxSkew Pods to those domains. + If value is nil, the constraint behaves as if MinDomains is equal to 1. + Valid values are integers greater than 0. + When value is not nil, WhenUnsatisfiable must be DoNotSchedule. + + + For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same + labelSelector spread as 2/2/2: + | zone1 | zone2 | zone3 | + | P P | P P | P P | + The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0. + In this situation, new pod with the same labelSelector cannot be scheduled, + because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, + it will violate MaxSkew. + + + This is a beta field and requires the MinDomainsInPodTopologySpread feature gate to be enabled (enabled by default). + format: int32 + type: integer + nodeAffinityPolicy: + description: |- + NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector + when calculating pod topology spread skew. Options are: + - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. + - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. + + + If this value is nil, the behavior is equivalent to the Honor policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + nodeTaintsPolicy: + description: |- + NodeTaintsPolicy indicates how we will treat node taints when calculating + pod topology spread skew. Options are: + - Honor: nodes without taints, along with tainted nodes for which the incoming pod + has a toleration, are included. + - Ignore: node taints are ignored. All nodes are included. + + + If this value is nil, the behavior is equivalent to the Ignore policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + topologyKey: + description: |- + TopologyKey is the key of node labels. Nodes that have a label with this key + and identical values are considered to be in the same topology. + We consider each as a "bucket", and try to put balanced number + of pods into each bucket. + We define a domain as a particular instance of a topology. + Also, we define an eligible domain as a domain whose nodes meet the requirements of + nodeAffinityPolicy and nodeTaintsPolicy. + e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. + And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology. + It's a required field. + type: string + whenUnsatisfiable: + description: |- + WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy + the spread constraint. + - DoNotSchedule (default) tells the scheduler not to schedule it. + - ScheduleAnyway tells the scheduler to schedule the pod in any location, + but giving higher precedence to topologies that would help reduce the + skew. + A constraint is considered "Unsatisfiable" for an incoming pod + if and only if every possible node assignment for that pod would violate + "MaxSkew" on some topology. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 3/1/1: + | zone1 | zone2 | zone3 | + | P P P | P | P | + If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled + to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies + MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler + won't make it *more* imbalanced. + It's a required field. + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array type: object starRocksCnSpec: description: StarRocksCnSpec define cn configuration for start cn @@ -8903,6 +9081,184 @@ spec: type: string type: object type: array + topologySpreadConstraints: + description: |- + (Optional) TopologySpreadConstraints for spreading pods across failure-domains + More info: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + items: + description: TopologySpreadConstraint specifies how to spread + matching pods among the given topology. + properties: + labelSelector: + description: |- + LabelSelector is used to find matching pods. + Pods that match this label selector are counted to determine the number of pods + in their corresponding topology domain. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select the pods over which + spreading will be calculated. The keys are used to lookup values from the + incoming pod labels, those key-value labels are ANDed with labelSelector + to select the group of existing pods over which spreading will be calculated + for the incoming pod. Keys that don't exist in the incoming pod labels will + be ignored. A null or empty list means only match against labelSelector. + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + description: |- + MaxSkew describes the degree to which pods may be unevenly distributed. + When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference + between the number of matching pods in the target topology and the global minimum. + The global minimum is the minimum number of matching pods in an eligible domain + or zero if the number of eligible domains is less than MinDomains. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 2/2/1: + In this case, the global minimum is 1. + | zone1 | zone2 | zone3 | + | P P | P P | P | + - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; + scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) + violate MaxSkew(1). + - if MaxSkew is 2, incoming pod can be scheduled onto any zone. + When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence + to topologies that satisfy it. + It's a required field. Default value is 1 and 0 is not allowed. + format: int32 + type: integer + minDomains: + description: |- + MinDomains indicates a minimum number of eligible domains. + When the number of eligible domains with matching topology keys is less than minDomains, + Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. + And when the number of eligible domains with matching topology keys equals or greater than minDomains, + this value has no effect on scheduling. + As a result, when the number of eligible domains is less than minDomains, + scheduler won't schedule more than maxSkew Pods to those domains. + If value is nil, the constraint behaves as if MinDomains is equal to 1. + Valid values are integers greater than 0. + When value is not nil, WhenUnsatisfiable must be DoNotSchedule. + + + For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same + labelSelector spread as 2/2/2: + | zone1 | zone2 | zone3 | + | P P | P P | P P | + The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0. + In this situation, new pod with the same labelSelector cannot be scheduled, + because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, + it will violate MaxSkew. + + + This is a beta field and requires the MinDomainsInPodTopologySpread feature gate to be enabled (enabled by default). + format: int32 + type: integer + nodeAffinityPolicy: + description: |- + NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector + when calculating pod topology spread skew. Options are: + - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. + - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. + + + If this value is nil, the behavior is equivalent to the Honor policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + nodeTaintsPolicy: + description: |- + NodeTaintsPolicy indicates how we will treat node taints when calculating + pod topology spread skew. Options are: + - Honor: nodes without taints, along with tainted nodes for which the incoming pod + has a toleration, are included. + - Ignore: node taints are ignored. All nodes are included. + + + If this value is nil, the behavior is equivalent to the Ignore policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + topologyKey: + description: |- + TopologyKey is the key of node labels. Nodes that have a label with this key + and identical values are considered to be in the same topology. + We consider each as a "bucket", and try to put balanced number + of pods into each bucket. + We define a domain as a particular instance of a topology. + Also, we define an eligible domain as a domain whose nodes meet the requirements of + nodeAffinityPolicy and nodeTaintsPolicy. + e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. + And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology. + It's a required field. + type: string + whenUnsatisfiable: + description: |- + WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy + the spread constraint. + - DoNotSchedule (default) tells the scheduler not to schedule it. + - ScheduleAnyway tells the scheduler to schedule the pod in any location, + but giving higher precedence to topologies that would help reduce the + skew. + A constraint is considered "Unsatisfiable" for an incoming pod + if and only if every possible node assignment for that pod would violate + "MaxSkew" on some topology. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 3/1/1: + | zone1 | zone2 | zone3 | + | P P P | P | P | + If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled + to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies + MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler + won't make it *more* imbalanced. + It's a required field. + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array type: object starRocksFeProxySpec: description: StarRocksLoadSpec define a proxy for fe. @@ -10204,6 +10560,184 @@ spec: type: string type: object type: array + topologySpreadConstraints: + description: |- + (Optional) TopologySpreadConstraints for spreading pods across failure-domains + More info: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + items: + description: TopologySpreadConstraint specifies how to spread + matching pods among the given topology. + properties: + labelSelector: + description: |- + LabelSelector is used to find matching pods. + Pods that match this label selector are counted to determine the number of pods + in their corresponding topology domain. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select the pods over which + spreading will be calculated. The keys are used to lookup values from the + incoming pod labels, those key-value labels are ANDed with labelSelector + to select the group of existing pods over which spreading will be calculated + for the incoming pod. Keys that don't exist in the incoming pod labels will + be ignored. A null or empty list means only match against labelSelector. + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + description: |- + MaxSkew describes the degree to which pods may be unevenly distributed. + When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference + between the number of matching pods in the target topology and the global minimum. + The global minimum is the minimum number of matching pods in an eligible domain + or zero if the number of eligible domains is less than MinDomains. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 2/2/1: + In this case, the global minimum is 1. + | zone1 | zone2 | zone3 | + | P P | P P | P | + - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; + scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) + violate MaxSkew(1). + - if MaxSkew is 2, incoming pod can be scheduled onto any zone. + When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence + to topologies that satisfy it. + It's a required field. Default value is 1 and 0 is not allowed. + format: int32 + type: integer + minDomains: + description: |- + MinDomains indicates a minimum number of eligible domains. + When the number of eligible domains with matching topology keys is less than minDomains, + Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. + And when the number of eligible domains with matching topology keys equals or greater than minDomains, + this value has no effect on scheduling. + As a result, when the number of eligible domains is less than minDomains, + scheduler won't schedule more than maxSkew Pods to those domains. + If value is nil, the constraint behaves as if MinDomains is equal to 1. + Valid values are integers greater than 0. + When value is not nil, WhenUnsatisfiable must be DoNotSchedule. + + + For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same + labelSelector spread as 2/2/2: + | zone1 | zone2 | zone3 | + | P P | P P | P P | + The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0. + In this situation, new pod with the same labelSelector cannot be scheduled, + because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, + it will violate MaxSkew. + + + This is a beta field and requires the MinDomainsInPodTopologySpread feature gate to be enabled (enabled by default). + format: int32 + type: integer + nodeAffinityPolicy: + description: |- + NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector + when calculating pod topology spread skew. Options are: + - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. + - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. + + + If this value is nil, the behavior is equivalent to the Honor policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + nodeTaintsPolicy: + description: |- + NodeTaintsPolicy indicates how we will treat node taints when calculating + pod topology spread skew. Options are: + - Honor: nodes without taints, along with tainted nodes for which the incoming pod + has a toleration, are included. + - Ignore: node taints are ignored. All nodes are included. + + + If this value is nil, the behavior is equivalent to the Ignore policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + topologyKey: + description: |- + TopologyKey is the key of node labels. Nodes that have a label with this key + and identical values are considered to be in the same topology. + We consider each as a "bucket", and try to put balanced number + of pods into each bucket. + We define a domain as a particular instance of a topology. + Also, we define an eligible domain as a domain whose nodes meet the requirements of + nodeAffinityPolicy and nodeTaintsPolicy. + e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. + And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology. + It's a required field. + type: string + whenUnsatisfiable: + description: |- + WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy + the spread constraint. + - DoNotSchedule (default) tells the scheduler not to schedule it. + - ScheduleAnyway tells the scheduler to schedule the pod in any location, + but giving higher precedence to topologies that would help reduce the + skew. + A constraint is considered "Unsatisfiable" for an incoming pod + if and only if every possible node assignment for that pod would violate + "MaxSkew" on some topology. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 3/1/1: + | zone1 | zone2 | zone3 | + | P P P | P | P | + If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled + to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies + MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler + won't make it *more* imbalanced. + It's a required field. + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array type: object starRocksFeSpec: description: StarRocksFeSpec define fe configuration for start fe @@ -14314,6 +14848,184 @@ spec: type: string type: object type: array + topologySpreadConstraints: + description: |- + (Optional) TopologySpreadConstraints for spreading pods across failure-domains + More info: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + items: + description: TopologySpreadConstraint specifies how to spread + matching pods among the given topology. + properties: + labelSelector: + description: |- + LabelSelector is used to find matching pods. + Pods that match this label selector are counted to determine the number of pods + in their corresponding topology domain. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select the pods over which + spreading will be calculated. The keys are used to lookup values from the + incoming pod labels, those key-value labels are ANDed with labelSelector + to select the group of existing pods over which spreading will be calculated + for the incoming pod. Keys that don't exist in the incoming pod labels will + be ignored. A null or empty list means only match against labelSelector. + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + description: |- + MaxSkew describes the degree to which pods may be unevenly distributed. + When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference + between the number of matching pods in the target topology and the global minimum. + The global minimum is the minimum number of matching pods in an eligible domain + or zero if the number of eligible domains is less than MinDomains. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 2/2/1: + In this case, the global minimum is 1. + | zone1 | zone2 | zone3 | + | P P | P P | P | + - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; + scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) + violate MaxSkew(1). + - if MaxSkew is 2, incoming pod can be scheduled onto any zone. + When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence + to topologies that satisfy it. + It's a required field. Default value is 1 and 0 is not allowed. + format: int32 + type: integer + minDomains: + description: |- + MinDomains indicates a minimum number of eligible domains. + When the number of eligible domains with matching topology keys is less than minDomains, + Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. + And when the number of eligible domains with matching topology keys equals or greater than minDomains, + this value has no effect on scheduling. + As a result, when the number of eligible domains is less than minDomains, + scheduler won't schedule more than maxSkew Pods to those domains. + If value is nil, the constraint behaves as if MinDomains is equal to 1. + Valid values are integers greater than 0. + When value is not nil, WhenUnsatisfiable must be DoNotSchedule. + + + For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same + labelSelector spread as 2/2/2: + | zone1 | zone2 | zone3 | + | P P | P P | P P | + The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0. + In this situation, new pod with the same labelSelector cannot be scheduled, + because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, + it will violate MaxSkew. + + + This is a beta field and requires the MinDomainsInPodTopologySpread feature gate to be enabled (enabled by default). + format: int32 + type: integer + nodeAffinityPolicy: + description: |- + NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector + when calculating pod topology spread skew. Options are: + - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. + - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. + + + If this value is nil, the behavior is equivalent to the Honor policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + nodeTaintsPolicy: + description: |- + NodeTaintsPolicy indicates how we will treat node taints when calculating + pod topology spread skew. Options are: + - Honor: nodes without taints, along with tainted nodes for which the incoming pod + has a toleration, are included. + - Ignore: node taints are ignored. All nodes are included. + + + If this value is nil, the behavior is equivalent to the Ignore policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + topologyKey: + description: |- + TopologyKey is the key of node labels. Nodes that have a label with this key + and identical values are considered to be in the same topology. + We consider each as a "bucket", and try to put balanced number + of pods into each bucket. + We define a domain as a particular instance of a topology. + Also, we define an eligible domain as a domain whose nodes meet the requirements of + nodeAffinityPolicy and nodeTaintsPolicy. + e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. + And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology. + It's a required field. + type: string + whenUnsatisfiable: + description: |- + WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy + the spread constraint. + - DoNotSchedule (default) tells the scheduler not to schedule it. + - ScheduleAnyway tells the scheduler to schedule the pod in any location, + but giving higher precedence to topologies that would help reduce the + skew. + A constraint is considered "Unsatisfiable" for an incoming pod + if and only if every possible node assignment for that pod would violate + "MaxSkew" on some topology. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 3/1/1: + | zone1 | zone2 | zone3 | + | P P P | P | P | + If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled + to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies + MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler + won't make it *more* imbalanced. + It's a required field. + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array type: object type: object status: diff --git a/config/crd/bases/starrocks.com_starrockswarehouses.yaml b/config/crd/bases/starrocks.com_starrockswarehouses.yaml index be9cfe77..46f575e9 100644 --- a/config/crd/bases/starrocks.com_starrockswarehouses.yaml +++ b/config/crd/bases/starrocks.com_starrockswarehouses.yaml @@ -4783,6 +4783,184 @@ spec: type: string type: object type: array + topologySpreadConstraints: + description: |- + (Optional) TopologySpreadConstraints for spreading pods across failure-domains + More info: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + items: + description: TopologySpreadConstraint specifies how to spread + matching pods among the given topology. + properties: + labelSelector: + description: |- + LabelSelector is used to find matching pods. + Pods that match this label selector are counted to determine the number of pods + in their corresponding topology domain. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select the pods over which + spreading will be calculated. The keys are used to lookup values from the + incoming pod labels, those key-value labels are ANDed with labelSelector + to select the group of existing pods over which spreading will be calculated + for the incoming pod. Keys that don't exist in the incoming pod labels will + be ignored. A null or empty list means only match against labelSelector. + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + description: |- + MaxSkew describes the degree to which pods may be unevenly distributed. + When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference + between the number of matching pods in the target topology and the global minimum. + The global minimum is the minimum number of matching pods in an eligible domain + or zero if the number of eligible domains is less than MinDomains. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 2/2/1: + In this case, the global minimum is 1. + | zone1 | zone2 | zone3 | + | P P | P P | P | + - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; + scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) + violate MaxSkew(1). + - if MaxSkew is 2, incoming pod can be scheduled onto any zone. + When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence + to topologies that satisfy it. + It's a required field. Default value is 1 and 0 is not allowed. + format: int32 + type: integer + minDomains: + description: |- + MinDomains indicates a minimum number of eligible domains. + When the number of eligible domains with matching topology keys is less than minDomains, + Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. + And when the number of eligible domains with matching topology keys equals or greater than minDomains, + this value has no effect on scheduling. + As a result, when the number of eligible domains is less than minDomains, + scheduler won't schedule more than maxSkew Pods to those domains. + If value is nil, the constraint behaves as if MinDomains is equal to 1. + Valid values are integers greater than 0. + When value is not nil, WhenUnsatisfiable must be DoNotSchedule. + + + For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same + labelSelector spread as 2/2/2: + | zone1 | zone2 | zone3 | + | P P | P P | P P | + The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0. + In this situation, new pod with the same labelSelector cannot be scheduled, + because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, + it will violate MaxSkew. + + + This is a beta field and requires the MinDomainsInPodTopologySpread feature gate to be enabled (enabled by default). + format: int32 + type: integer + nodeAffinityPolicy: + description: |- + NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector + when calculating pod topology spread skew. Options are: + - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. + - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. + + + If this value is nil, the behavior is equivalent to the Honor policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + nodeTaintsPolicy: + description: |- + NodeTaintsPolicy indicates how we will treat node taints when calculating + pod topology spread skew. Options are: + - Honor: nodes without taints, along with tainted nodes for which the incoming pod + has a toleration, are included. + - Ignore: node taints are ignored. All nodes are included. + + + If this value is nil, the behavior is equivalent to the Ignore policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + topologyKey: + description: |- + TopologyKey is the key of node labels. Nodes that have a label with this key + and identical values are considered to be in the same topology. + We consider each as a "bucket", and try to put balanced number + of pods into each bucket. + We define a domain as a particular instance of a topology. + Also, we define an eligible domain as a domain whose nodes meet the requirements of + nodeAffinityPolicy and nodeTaintsPolicy. + e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. + And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology. + It's a required field. + type: string + whenUnsatisfiable: + description: |- + WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy + the spread constraint. + - DoNotSchedule (default) tells the scheduler not to schedule it. + - ScheduleAnyway tells the scheduler to schedule the pod in any location, + but giving higher precedence to topologies that would help reduce the + skew. + A constraint is considered "Unsatisfiable" for an incoming pod + if and only if every possible node assignment for that pod would violate + "MaxSkew" on some topology. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 3/1/1: + | zone1 | zone2 | zone3 | + | P P P | P | P | + If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled + to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies + MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler + won't make it *more* imbalanced. + It's a required field. + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array type: object required: - starRocksCluster diff --git a/deploy/starrocks.com_starrocksclusters.yaml b/deploy/starrocks.com_starrocksclusters.yaml index 9307fc4c..4ad5796f 100644 --- a/deploy/starrocks.com_starrocksclusters.yaml +++ b/deploy/starrocks.com_starrocksclusters.yaml @@ -1996,6 +1996,58 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + format: int32 + type: integer + minDomains: + format: int32 + type: integer + nodeAffinityPolicy: + type: string + nodeTaintsPolicy: + type: string + topologyKey: + type: string + whenUnsatisfiable: + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array type: object starRocksCnSpec: properties: @@ -4277,6 +4329,58 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + format: int32 + type: integer + minDomains: + format: int32 + type: integer + nodeAffinityPolicy: + type: string + nodeTaintsPolicy: + type: string + topologyKey: + type: string + whenUnsatisfiable: + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array type: object starRocksFeProxySpec: properties: @@ -4898,6 +5002,58 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + format: int32 + type: integer + minDomains: + format: int32 + type: integer + nodeAffinityPolicy: + type: string + nodeTaintsPolicy: + type: string + topologyKey: + type: string + whenUnsatisfiable: + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array type: object starRocksFeSpec: properties: @@ -6848,6 +7004,58 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + format: int32 + type: integer + minDomains: + format: int32 + type: integer + nodeAffinityPolicy: + type: string + nodeTaintsPolicy: + type: string + topologyKey: + type: string + whenUnsatisfiable: + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array type: object type: object status: diff --git a/deploy/starrocks.com_starrockswarehouses.yaml b/deploy/starrocks.com_starrockswarehouses.yaml index 9973cd92..99d4e5f5 100644 --- a/deploy/starrocks.com_starrockswarehouses.yaml +++ b/deploy/starrocks.com_starrockswarehouses.yaml @@ -2318,6 +2318,58 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + format: int32 + type: integer + minDomains: + format: int32 + type: integer + nodeAffinityPolicy: + type: string + nodeTaintsPolicy: + type: string + topologyKey: + type: string + whenUnsatisfiable: + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array type: object required: - starRocksCluster diff --git a/helm-charts/charts/kube-starrocks/charts/starrocks/templates/_helpers.tpl b/helm-charts/charts/kube-starrocks/charts/starrocks/templates/_helpers.tpl index 507bd0aa..98950f70 100644 --- a/helm-charts/charts/kube-starrocks/charts/starrocks/templates/_helpers.tpl +++ b/helm-charts/charts/kube-starrocks/charts/starrocks/templates/_helpers.tpl @@ -508,6 +508,39 @@ Get the value of the affinity field in the starrocksCnSpec {{- end -}} {{- end -}} +{{/* +Get the value of the topologySpreadConstraints field in the starrocksFESpec +*/}} +{{- define "starrockscluster.fe.topologySpreadConstraints" -}} +{{- if .Values.starrocksFESpec.topologySpreadConstraints -}} +{{- toYaml .Values.starrocksFESpec.topologySpreadConstraints -}} +{{- else if .Values.starrocksCluster.componentValues.topologySpreadConstraints -}} +{{- toYaml .Values.starrocksCluster.componentValues.topologySpreadConstraints -}} +{{- end -}} +{{- end -}} + +{{/* +Get the value of the topologySpreadConstraints field in the starrocksBeSpec +*/}} +{{- define "starrockscluster.be.topologySpreadConstraints" -}} +{{- if .Values.starrocksBeSpec.topologySpreadConstraints -}} +{{- toYaml .Values.starrocksBeSpec.topologySpreadConstraints -}} +{{- else if .Values.starrocksCluster.componentValues.topologySpreadConstraints -}} +{{- toYaml .Values.starrocksCluster.componentValues.topologySpreadConstraints -}} +{{- end -}} +{{- end -}} + +{{/* +Get the value of the topologySpreadConstraints field in the starrocksCnSpec +*/}} +{{- define "starrockscluster.cn.topologySpreadConstraints" -}} +{{- if .Values.starrocksCnSpec.topologySpreadConstraints -}} +{{- toYaml .Values.starrocksCnSpec.topologySpreadConstraints -}} +{{- else if .Values.starrocksCluster.componentValues.topologySpreadConstraints -}} +{{- toYaml .Values.starrocksCluster.componentValues.topologySpreadConstraints -}} +{{- end -}} +{{- end -}} + {{/* Get the value of the runAsNonRoot field in the starrocksFESpec */}} diff --git a/helm-charts/charts/kube-starrocks/charts/starrocks/templates/starrockscluster.yaml b/helm-charts/charts/kube-starrocks/charts/starrocks/templates/starrockscluster.yaml index 814965a9..3aa40c07 100644 --- a/helm-charts/charts/kube-starrocks/charts/starrocks/templates/starrockscluster.yaml +++ b/helm-charts/charts/kube-starrocks/charts/starrocks/templates/starrockscluster.yaml @@ -156,6 +156,10 @@ spec: tolerations: {{- include "starrockscluster.fe.tolerations" . | nindent 6 }} {{- end }} + {{- if or .Values.starrocksFESpec.topologySpreadConstraints .Values.starrocksCluster.componentValues.topologySpreadConstraints }} + topologySpreadConstraints: + {{- include "starrockscluster.fe.topologySpreadConstraints" . | nindent 6 }} + {{- end }} terminationGracePeriodSeconds: {{ .Values.starrocksFESpec.terminationGracePeriodSeconds }} {{- if .Values.starrocksFESpec.startupProbeFailureSeconds }} startupProbeFailureSeconds: {{ .Values.starrocksFESpec.startupProbeFailureSeconds }} @@ -389,6 +393,10 @@ spec: tolerations: {{- include "starrockscluster.be.tolerations" . | nindent 6 }} {{- end }} + {{- if or .Values.starrocksBeSpec.topologySpreadConstraints .Values.starrocksCluster.componentValues.topologySpreadConstraints }} + topologySpreadConstraints: + {{- include "starrockscluster.be.topologySpreadConstraints" . | nindent 6 }} + {{- end }} terminationGracePeriodSeconds: {{ .Values.starrocksBeSpec.terminationGracePeriodSeconds }} {{- if .Values.starrocksBeSpec.startupProbeFailureSeconds }} startupProbeFailureSeconds: {{ .Values.starrocksBeSpec.startupProbeFailureSeconds }} @@ -576,6 +584,10 @@ spec: tolerations: {{- include "starrockscluster.cn.tolerations" . | nindent 6 }} {{- end }} + {{- if or .Values.starrocksCnSpec.topologySpreadConstraints .Values.starrocksCluster.componentValues.topologySpreadConstraints }} + topologySpreadConstraints: + {{- include "starrockscluster.cn.topologySpreadConstraints" . | nindent 6 }} + {{- end }} terminationGracePeriodSeconds: {{ .Values.starrocksCnSpec.terminationGracePeriodSeconds }} {{- if .Values.starrocksCnSpec.startupProbeFailureSeconds }} startupProbeFailureSeconds: {{ .Values.starrocksCnSpec.startupProbeFailureSeconds }} diff --git a/helm-charts/charts/kube-starrocks/charts/starrocks/values.yaml b/helm-charts/charts/kube-starrocks/charts/starrocks/values.yaml index 03ceb449..47dc5b6f 100644 --- a/helm-charts/charts/kube-starrocks/charts/starrocks/values.yaml +++ b/helm-charts/charts/kube-starrocks/charts/starrocks/values.yaml @@ -100,6 +100,15 @@ starrocksCluster: # operator: "Equal|Exists" # value: "value" # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" + # topologySpreadConstraints for scheduling pods across failure-domains. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule|ScheduleAnyway + # labelSelector: + # matchLabels: + # foo: bar # If specified, the pod's nodeSelector,displayName="Map of nodeSelectors to match when scheduling pods on nodes" # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector nodeSelector: {} @@ -234,6 +243,15 @@ starrocksFESpec: # operator: "Equal|Exists" # value: "value" # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" + # topologySpreadConstraints for scheduling pods across failure-domains. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule|ScheduleAnyway + # labelSelector: + # matchLabels: + # foo: bar # resources for fe pod. resources: requests: @@ -469,6 +487,15 @@ starrocksCnSpec: # operator: "Equal|Exists" # value: "value" # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" + # topologySpreadConstraints for scheduling pods across failure-domains. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule|ScheduleAnyway + # labelSelector: + # matchLabels: + # foo: bar autoScalingPolicy: {} # you can select different versions of HPA (Horizontal Pod Autoscaler) based on the Kubernetes version you are # using to ensure compatibility and adaptability. the default version is v2beta2. @@ -730,6 +757,15 @@ starrocksBeSpec: # operator: "Equal|Exists" # value: "value" # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" + # topologySpreadConstraints for scheduling pods across failure-domains. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule|ScheduleAnyway + # labelSelector: + # matchLabels: + # foo: bar # resources for be pod. resources: requests: diff --git a/helm-charts/charts/kube-starrocks/values.yaml b/helm-charts/charts/kube-starrocks/values.yaml index a4c791e5..bc17d327 100644 --- a/helm-charts/charts/kube-starrocks/values.yaml +++ b/helm-charts/charts/kube-starrocks/values.yaml @@ -208,6 +208,15 @@ starrocks: # operator: "Equal|Exists" # value: "value" # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" + # topologySpreadConstraints for scheduling pods across failure-domains. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule|ScheduleAnyway + # labelSelector: + # matchLabels: + # foo: bar # If specified, the pod's nodeSelector,displayName="Map of nodeSelectors to match when scheduling pods on nodes" # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector nodeSelector: {} @@ -342,6 +351,15 @@ starrocks: # operator: "Equal|Exists" # value: "value" # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" + # topologySpreadConstraints for scheduling pods across failure-domains. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule|ScheduleAnyway + # labelSelector: + # matchLabels: + # foo: bar # resources for fe pod. resources: requests: @@ -577,6 +595,15 @@ starrocks: # operator: "Equal|Exists" # value: "value" # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" + # topologySpreadConstraints for scheduling pods across failure-domains. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule|ScheduleAnyway + # labelSelector: + # matchLabels: + # foo: bar autoScalingPolicy: {} # you can select different versions of HPA (Horizontal Pod Autoscaler) based on the Kubernetes version you are # using to ensure compatibility and adaptability. the default version is v2beta2. @@ -838,6 +865,15 @@ starrocks: # operator: "Equal|Exists" # value: "value" # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" + # topologySpreadConstraints for scheduling pods across failure-domains. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule|ScheduleAnyway + # labelSelector: + # matchLabels: + # foo: bar # resources for be pod. resources: requests: diff --git a/pkg/apis/starrocks/v1/load_type.go b/pkg/apis/starrocks/v1/load_type.go index 1d86823c..7608fc05 100644 --- a/pkg/apis/starrocks/v1/load_type.go +++ b/pkg/apis/starrocks/v1/load_type.go @@ -12,6 +12,7 @@ type loadInterface interface { GetSchedulerName() string GetNodeSelector() map[string]string GetAffinity() *corev1.Affinity + GetTopologySpreadConstraints() []corev1.TopologySpreadConstraint GetTolerations() []corev1.Toleration GetStartupProbeFailureSeconds() *int32 GetLivenessProbeFailureSeconds() *int32 @@ -83,6 +84,11 @@ type StarRocksLoadSpec struct { // +optional Tolerations []corev1.Toleration `json:"tolerations,omitempty"` + // (Optional) TopologySpreadConstraints for spreading pods across failure-domains + // More info: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + // +optional + TopologySpreadConstraints []corev1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"` + // Service defines the template for the associated Kubernetes Service object. // +optional Service *StarRocksService `json:"service,omitempty"` @@ -246,6 +252,10 @@ func (spec *StarRocksLoadSpec) GetAffinity() *corev1.Affinity { return spec.Affinity } +func (spec *StarRocksLoadSpec) GetTopologySpreadConstraints() []corev1.TopologySpreadConstraint { + return spec.TopologySpreadConstraints +} + func (spec *StarRocksLoadSpec) GetTolerations() []corev1.Toleration { return spec.Tolerations } diff --git a/pkg/apis/starrocks/v1/zz_generated.deepcopy.go b/pkg/apis/starrocks/v1/zz_generated.deepcopy.go index 2f27c8ad..8c9c094d 100644 --- a/pkg/apis/starrocks/v1/zz_generated.deepcopy.go +++ b/pkg/apis/starrocks/v1/zz_generated.deepcopy.go @@ -591,6 +591,13 @@ func (in *StarRocksLoadSpec) DeepCopyInto(out *StarRocksLoadSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.TopologySpreadConstraints != nil { + in, out := &in.TopologySpreadConstraints, &out.TopologySpreadConstraints + *out = make([]corev1.TopologySpreadConstraint, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.Service != nil { in, out := &in.Service, &out.Service *out = new(StarRocksService) diff --git a/pkg/k8sutils/templates/pod/spec.go b/pkg/k8sutils/templates/pod/spec.go index 29e973bf..739437cc 100644 --- a/pkg/k8sutils/templates/pod/spec.go +++ b/pkg/k8sutils/templates/pod/spec.go @@ -265,6 +265,7 @@ func Spec(spec v1.SpecInterface, container corev1.Container, volumes []corev1.Vo ServiceAccountName: spec.GetServiceAccount(), TerminationGracePeriodSeconds: spec.GetTerminationGracePeriodSeconds(), Affinity: spec.GetAffinity(), + TopologySpreadConstraints: spec.GetTopologySpreadConstraints(), Tolerations: spec.GetTolerations(), ImagePullSecrets: spec.GetImagePullSecrets(), NodeSelector: spec.GetNodeSelector(),