Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[YUNIKORN-1957] Add e2e tests for user and group limits with wildcard #909

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions test/e2e/framework/configmanager/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ const (
NodesPath = "ws/v1/partition/%s/nodes"
UserUsagePath = "ws/v1/partition/%s/usage/user/%s"
GroupUsagePath = "ws/v1/partition/%s/usage/group/%s"
GroupsUsagePath = "ws/v1/partition/%s/usage/groups"
HealthCheckPath = "ws/v1/scheduler/healthcheck"
ValidateConfPath = "ws/v1/validate-conf"
FullStateDumpPath = "ws/v1/fullstatedump"
Expand Down
10 changes: 10 additions & 0 deletions test/e2e/framework/helpers/yunikorn/rest_api_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -536,3 +536,13 @@ func (c *RClient) GetGroupUsage(partition string, groupName string) (*dao.GroupR
_, err = c.do(req, &groupUsage)
return groupUsage, err
}

func (c *RClient) GetGroupsUsage(partition string) ([]*dao.GroupResourceUsageDAOInfo, error) {
req, err := c.newRequest("GET", fmt.Sprintf(configmanager.GroupsUsagePath, partition), nil)
if err != nil {
return nil, err
}
var groupsUsage []*dao.GroupResourceUsageDAOInfo
_, err = c.do(req, &groupsUsage)
return groupsUsage, err
}
235 changes: 235 additions & 0 deletions test/e2e/user_group_limit/user_group_limit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,209 @@ var _ = ginkgo.Describe("UserGroupLimit", func() {
return nil
})
})

ginkgo.It("Verify user limit and wildcard user limit", func() {
ginkgo.By("Update config")
// The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated.
yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() {
yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error {
// remove placement rules so we can control queue
sc.Partitions[0].PlacementRules = nil
err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{
Name: "sandbox1",
Limits: []configs.Limit{
{
Limit: "user entry",
Users: []string{user1},
MaxApplications: 1,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", mediumMem),
},
},
{
Limit: "wildcard user entry",
Users: []string{"*"},
MaxApplications: 2,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", largeMem),
},
},
},
})
if err != nil {
return err
}
return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"})
})
})

// usergroup1 can deploy the first sleep pod to root.sandbox1
usergroup1 := &si.UserGroupInformation{User: user1, Groups: []string{group1}}

// usergroup1 can't deploy the second sleep pod to root.sandbox1
usergroup1Sandbox1Pod1 := deploySleepPod(usergroup1, sandboxQueue1, true, "because memory usage is less than user entry limit")
_ = deploySleepPod(usergroup1, sandboxQueue1, false, "because final memory usage is more than user entry limit")
checkUsage(userTestType, user1, sandboxQueue1, []*v1.Pod{usergroup1Sandbox1Pod1})

// usergroup2 can deploy 2 sleep pods to root.sandbox1
usergroup2 := &si.UserGroupInformation{User: user2, Groups: []string{group2}}
usergroup2Sandbox1Pod1 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for usergroup2")
checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1})

// usergroup2 can deploy the second sleep pod to root.sandbox1
usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
usergroup2Sandbox1Pod2 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for usergroup2")
checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2})

// usergroup2 can't deploy the third sleep pod to root.sandbox1 because of max-application limit
usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
usergroup2Sandbox1Pod3 := deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications")
checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2})

// Update Wildcard user entry limit to 3
ginkgo.By("Update config")
// The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated.
yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() {
yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error {
// remove placement rules so we can control queue
sc.Partitions[0].PlacementRules = nil

err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{
Name: "sandbox1",
Limits: []configs.Limit{
{
Limit: "user entry",
Users: []string{user1},
MaxApplications: 1,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", mediumMem),
},
},
{
Limit: "wildcard user entry",
Users: []string{"*"},
MaxApplications: 3,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", largeMem),
},
},
},
})
if err != nil {
return err
}
return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"})
})
})
// usergroup2 can deploy the third sleep pod to root.sandbox1 becuase of max-application limit updated to 3
checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2, usergroup2Sandbox1Pod3})
// usergroup2 can't deploy the fourth sleep pod to root.sandbox1 because of max-application limit
usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
_ = deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications")
checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2, usergroup2Sandbox1Pod3})

})

ginkgo.It("Verify group limit and wildcard group limit", func() {
ginkgo.By("Update config")
// The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated.
yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() {
yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error {
// remove placement rules so we can control queue
sc.Partitions[0].PlacementRules = nil

err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{
Name: "sandbox1",
Limits: []configs.Limit{
{
Limit: "group entry",
Groups: []string{group1},
MaxApplications: 1,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", mediumMem),
},
},
{
Limit: "wildcard group entry",
Groups: []string{"*"},
MaxApplications: 2,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", largeMem),
},
},
}})
if err != nil {
return err
}
return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"})
})
})
// group1 can deploy the first sleep pod to root.sandbox1
usergroup1 := &si.UserGroupInformation{User: user1, Groups: []string{group1}}
group1Sandvox1Pod1 := deploySleepPod(usergroup1, sandboxQueue1, true, "because there is no limit for group1")
checkUsage(groupTestType, group1, sandboxQueue1, []*v1.Pod{group1Sandvox1Pod1})

// group1 can't deploy the second sleep pod to root.sandbox1
usergroup1 = &si.UserGroupInformation{User: user1, Groups: []string{group1}}
_ = deploySleepPod(usergroup1, sandboxQueue1, false, "because final memory usage is more than group entry limit")
checkUsage(groupTestType, group1, sandboxQueue1, []*v1.Pod{group1Sandvox1Pod1})

// group2 can deploy 2 sleep pods to root.sandbox1
usergroup2 := &si.UserGroupInformation{User: user2, Groups: []string{group2}}
group2Sandbox1Pod1 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for group2")
checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1})

// group2 can deploy the second sleep pod to root.sandbox1
usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
group2Sandbox1Pod2 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for group2")
checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2})

// group2 can't deploy the third sleep pod to root.sandbox1 because of max-application limit
usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
group2Sandbox1Pod3 := deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications")
checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2})
// Update Wildcard group entry limit to 3
ginkgo.By("Update config")
// The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated.
yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() {
yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error {
// remove placement rules so we can control queue
sc.Partitions[0].PlacementRules = nil

err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{
Name: "sandbox1",
Limits: []configs.Limit{
{
Limit: "group entry",
Groups: []string{group1},
MaxApplications: 1,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", mediumMem),
},
},
{
Limit: "wildcard group entry",
Groups: []string{"*"},
MaxApplications: 3,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", largeMem),
},
},
}})
if err != nil {
return err
}
return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"})
})
})
// group2 can deploy the third sleep pod to root.sandbox1 becuase of max-application limit updated to 3
checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2, group2Sandbox1Pod3})
// group2 can't deploy the fourth sleep pod to root.sandbox1 because of max-application limit
usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
_ = deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications")
checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2, group2Sandbox1Pod3})
})

ginkgo.AfterEach(func() {
tests.DumpClusterInfoIfSpecFailed(suiteName, []string{ns.Name})

Expand Down Expand Up @@ -787,3 +990,35 @@ func checkUsage(testType TestType, name string, queuePath string, expectedRunnin
Ω(resourceUsageDAO.ResourceUsage.Resources["pods"]).To(gomega.Equal(resources.Quantity(len(expectedRunningPods))))
Ω(resourceUsageDAO.RunningApplications).To(gomega.ConsistOf(appIDs...))
}

func checkUsageWildcardGroups(testType TestType, name string, queuePath string, expectedRunningPods []*v1.Pod) {
var rootQueueResourceUsageDAO *dao.ResourceUsageDAOInfo
if testType == groupTestType {
ginkgo.By(fmt.Sprintf("Check group resource usage for %s in queue %s", name, queuePath))
groupUsageDAOInfo, err := restClient.GetGroupsUsage(constants.DefaultPartition)
Ω(err).NotTo(gomega.HaveOccurred())
Ω(groupUsageDAOInfo).NotTo(gomega.BeNil())
for _, groupUsageDAOInfog := range groupUsageDAOInfo {
if groupUsageDAOInfog.GroupName == "*" {
rootQueueResourceUsageDAO = groupUsageDAOInfog.Queues
}
}
}
Ω(rootQueueResourceUsageDAO).NotTo(gomega.BeNil())
var resourceUsageDAO *dao.ResourceUsageDAOInfo
for _, queue := range rootQueueResourceUsageDAO.Children {
if queue.QueuePath == queuePath {
resourceUsageDAO = queue
break
}
}
Ω(resourceUsageDAO).NotTo(gomega.BeNil())

appIDs := make([]interface{}, 0, len(expectedRunningPods))
for _, pod := range expectedRunningPods {
appIDs = append(appIDs, pod.Labels[constants.LabelApplicationID])
}
Ω(resourceUsageDAO.ResourceUsage).NotTo(gomega.BeNil())
Ω(resourceUsageDAO.ResourceUsage.Resources["pods"]).To(gomega.Equal(resources.Quantity(len(expectedRunningPods))))
Ω(resourceUsageDAO.RunningApplications).To(gomega.ConsistOf(appIDs...))
}
Loading