forked from apache/helix
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding new test case for testing n - n+1 scenario. (apache#2530)
WAGED is constraint based algorithm and in ideal-state calculation it always satisfies all the hard constraint. While trying to converge toward the ideal state, in intermediate state transitions, this constraint is not satified. This test is explicitly writtent to show n - n+1 scenario. --------- Co-authored-by: Komal Desai <[email protected]>
- Loading branch information
1 parent
eecc5d4
commit 6501529
Showing
1 changed file
with
302 additions
and
0 deletions.
There are no files selected for viewing
302 changes: 302 additions & 0 deletions
302
...apache/helix/integration/rebalancer/WagedRebalancer/TestWagedRebalanceHardConstraint.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,302 @@ | ||
package org.apache.helix.integration.rebalancer.WagedRebalancer; | ||
|
||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional warnrmation | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.Date; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
|
||
import com.google.common.collect.ImmutableMap; | ||
import org.apache.helix.HelixDataAccessor; | ||
import org.apache.helix.TestHelper; | ||
import org.apache.helix.common.ZkTestBase; | ||
import org.apache.helix.controller.rebalancer.waged.AssignmentMetadataStore; | ||
import org.apache.helix.integration.manager.ClusterControllerManager; | ||
import org.apache.helix.integration.manager.MockParticipantManager; | ||
import org.apache.helix.manager.zk.ZKHelixDataAccessor; | ||
import org.apache.helix.manager.zk.ZkBucketDataAccessor; | ||
import org.apache.helix.model.BuiltInStateModelDefinitions; | ||
import org.apache.helix.model.ClusterConfig; | ||
import org.apache.helix.model.CurrentState; | ||
import org.apache.helix.model.ExternalView; | ||
import org.apache.helix.model.IdealState; | ||
import org.apache.helix.model.LiveInstance; | ||
import org.apache.helix.model.ResourceAssignment; | ||
import org.apache.helix.model.ResourceConfig; | ||
import org.apache.helix.tools.ClusterVerifiers.HelixClusterVerifier; | ||
import org.apache.helix.tools.ClusterVerifiers.StrictMatchExternalViewVerifier; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import org.testng.Assert; | ||
import org.testng.annotations.AfterClass; | ||
import org.testng.annotations.BeforeClass; | ||
import org.testng.annotations.Test; | ||
|
||
/** | ||
* This test case is specially targeting for n - n+1 issue. | ||
* Waged is constraint-based algorithm. The end-state of calculated ideal-state will | ||
* always satisfy hard-constraint. | ||
* The issue is in order to reach the ideal-state, we need to do intermediate | ||
* state-transitions. | ||
* During this phase, hard-constraints are not satisfied. | ||
* This test case is trying to mimic this by having a very tightly constraint | ||
* cluster and increasing the weight for some resource and checking if | ||
* intermediate state satisfies the need or not. | ||
*/ | ||
public class TestWagedRebalanceHardConstraint extends ZkTestBase { | ||
protected final int NUM_NODE = 6; | ||
protected static final int START_PORT = 13000; | ||
protected static final int PARTITIONS = 10; | ||
|
||
protected final String CLASS_NAME = getShortClassName(); | ||
protected final String CLUSTER_NAME = CLUSTER_PREFIX + "_" + CLASS_NAME; | ||
protected ClusterControllerManager _controller; | ||
protected AssignmentMetadataStore _assignmentMetadataStore; | ||
|
||
List<MockParticipantManager> _participants = new ArrayList<>(); | ||
|
||
List<String> _nodes = new ArrayList<>(); | ||
private final Set<String> _allDBs = new HashSet<>(); | ||
private final int _replica = 3; | ||
|
||
private final String _testCapacityKey = "TestCapacityKey"; | ||
private final Map<String, IdealState> _prevIdealState = new HashMap<>(); | ||
|
||
private final Map<String,List<String>> _instanceUsage = new HashMap<>(); | ||
private static final Logger LOG = LoggerFactory.getLogger("TestWagedRebalanceHardConstraint"); | ||
|
||
@BeforeClass | ||
public void beforeClass() throws Exception { | ||
LOG.info("START " + CLASS_NAME + " at " + new Date(System.currentTimeMillis())); | ||
|
||
_gSetupTool.addCluster(CLUSTER_NAME, true); | ||
// create 6 node cluster | ||
for (int i = 0; i < NUM_NODE; i++) { | ||
String storageNodeName = PARTICIPANT_PREFIX + "_" + (START_PORT + i); | ||
_gSetupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName); | ||
_nodes.add(storageNodeName); | ||
} | ||
|
||
// start dummy participants | ||
for (String node : _nodes) { | ||
MockParticipantManager participant = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, node); | ||
participant.syncStart(); | ||
_participants.add(participant); | ||
} | ||
|
||
// start controller | ||
String controllerName = CONTROLLER_PREFIX + "_0"; | ||
_controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, controllerName); | ||
_controller.syncStart(); | ||
|
||
enablePersistBestPossibleAssignment(_gZkClient, CLUSTER_NAME, true); | ||
|
||
// This is same as TestWagedRebalance | ||
// It's a hacky way to workaround the package restriction. Note that we still want to hide the | ||
// AssignmentMetadataStore constructor to prevent unexpected update to the assignment records. | ||
_assignmentMetadataStore = | ||
new AssignmentMetadataStore(new ZkBucketDataAccessor(ZK_ADDR), CLUSTER_NAME) { | ||
public Map<String, ResourceAssignment> getBaseline() { | ||
// Ensure this metadata store always read from the ZK without using cache. | ||
super.reset(); | ||
return super.getBaseline(); | ||
} | ||
|
||
public synchronized Map<String, ResourceAssignment> getBestPossibleAssignment() { | ||
// Ensure this metadata store always read from the ZK without using cache. | ||
super.reset(); | ||
return super.getBestPossibleAssignment(); | ||
} | ||
}; | ||
|
||
// Set test instance capacity and partition weights | ||
HelixDataAccessor dataAccessor = new ZKHelixDataAccessor(CLUSTER_NAME, _baseAccessor); | ||
ClusterConfig clusterConfig = | ||
dataAccessor.getProperty(dataAccessor.keyBuilder().clusterConfig()); | ||
|
||
clusterConfig.setInstanceCapacityKeys(Collections.singletonList(_testCapacityKey)); | ||
clusterConfig.setDefaultInstanceCapacityMap(Collections.singletonMap(_testCapacityKey, 100)); | ||
// Calculation is: 6 nodes, total capacity 600 | ||
// 3 resources * 10 partitions each with 3 replica = 90. | ||
// Total capacity is 600, so each partition should be 6 units. | ||
clusterConfig.setDefaultPartitionWeightMap(Collections.singletonMap(_testCapacityKey, 6)); | ||
dataAccessor.setProperty(dataAccessor.keyBuilder().clusterConfig(), clusterConfig); | ||
} | ||
|
||
@Test | ||
public void testInitialPlacement() { | ||
// Create 3 resources with 10 partitions each. | ||
for (int i = 0; i < 3; i++) { | ||
String db = "Test-WagedDB-" + i; | ||
createResourceWithWagedRebalance(CLUSTER_NAME, db, BuiltInStateModelDefinitions.MasterSlave.name(), | ||
PARTITIONS, _replica, _replica); | ||
_gSetupTool.rebalanceStorageCluster(CLUSTER_NAME, db, _replica); | ||
_allDBs.add(db); | ||
} | ||
validate(); | ||
} | ||
|
||
@Test(dependsOnMethods = { "testInitialPlacement"}) | ||
public void testIncreaseResourcePartitionWeight() throws Exception { | ||
// For this test, let us record our initial prevIdealState. | ||
for (String db : _allDBs) { | ||
IdealState is = | ||
_gSetupTool.getClusterManagementTool().getResourceIdealState(CLUSTER_NAME, db); | ||
_prevIdealState.put(db, is); | ||
} | ||
|
||
// Let us add one more instance | ||
String storageNodeName = PARTICIPANT_PREFIX + "_" + (START_PORT + NUM_NODE); | ||
_gSetupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName); | ||
_nodes.add(storageNodeName); | ||
MockParticipantManager participant = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, storageNodeName); | ||
participant.syncStart(); | ||
_participants.add(participant); | ||
|
||
// This is to make sure we have run the pipeline. | ||
Thread.currentThread().sleep(2000); | ||
|
||
LOG.info("After adding the new instance"); | ||
validateIdealState(false); | ||
|
||
// Update the weight for one of the resource. | ||
HelixDataAccessor dataAccessor = new ZKHelixDataAccessor(CLUSTER_NAME, _baseAccessor); | ||
String db = "Test-WagedDB-0"; | ||
ResourceConfig resourceConfig = dataAccessor.getProperty(dataAccessor.keyBuilder().resourceConfig(db)); | ||
if (resourceConfig == null) { | ||
resourceConfig = new ResourceConfig(db); | ||
} | ||
Map<String, Integer> capacityDataMap = ImmutableMap.of(_testCapacityKey, 10); | ||
resourceConfig.setPartitionCapacityMap( | ||
Collections.singletonMap(ResourceConfig.DEFAULT_PARTITION_KEY, capacityDataMap)); | ||
dataAccessor.setProperty(dataAccessor.keyBuilder().resourceConfig(db), resourceConfig); | ||
|
||
// Make sure pipeline is run. | ||
Thread.currentThread().sleep(2000); | ||
|
||
LOG.info("After changing resource partition weight"); | ||
validate(); | ||
validateIdealState(true); | ||
printCurrentState(); | ||
} | ||
|
||
@AfterClass | ||
public void afterClass() throws Exception { | ||
if (_controller != null && _controller.isConnected()) { | ||
_controller.syncStop(); | ||
} | ||
for (MockParticipantManager p : _participants) { | ||
if (p != null && p.isConnected()) { | ||
p.syncStop(); | ||
} | ||
} | ||
deleteCluster(CLUSTER_NAME); | ||
} | ||
|
||
private void validate() { | ||
HelixClusterVerifier _clusterVerifier = | ||
new StrictMatchExternalViewVerifier.Builder(CLUSTER_NAME).setZkAddr(ZK_ADDR) | ||
.setDeactivatedNodeAwareness(true).setResources(_allDBs) | ||
.setWaitTillVerify(TestHelper.DEFAULT_REBALANCE_PROCESSING_WAIT_TIME) | ||
.build(); | ||
try { | ||
Assert.assertTrue(_clusterVerifier.verify(5000)); | ||
} finally { | ||
_clusterVerifier.close(); | ||
} | ||
} | ||
|
||
private void printCurrentState() { | ||
List<String> instances = | ||
_gSetupTool.getClusterManagementTool().getInstancesInCluster(CLUSTER_NAME); | ||
HelixDataAccessor dataAccessor = new ZKHelixDataAccessor(CLUSTER_NAME, _baseAccessor); | ||
|
||
for (String instance : instances) { | ||
LiveInstance liveInstance = | ||
dataAccessor.getProperty(dataAccessor.keyBuilder().liveInstance(instance)); | ||
String sessionId = liveInstance.getEphemeralOwner(); | ||
List<CurrentState> currentStates = dataAccessor.getChildValues(dataAccessor.keyBuilder().currentStates(instance, sessionId), | ||
true); | ||
LOG.info("\n\nCurrentState for instance: " + instance); | ||
for (CurrentState currentState : currentStates) { | ||
LOG.info("\t" + currentState.getRecord().getMapFields().keySet().toString()); | ||
} | ||
} | ||
} | ||
private void validateIdealState(boolean afterWeightChange) { | ||
// Calculate the instance to partition mapping based on previous and new ideal state. | ||
// We will take the union of the two. | ||
// For example: if prevIdealState for instance_0 has partition_0, partition_1 | ||
// and newIdealState for instance_0 has partition_1, partition_2, then the final | ||
// mapping for instance_0 will be partition_0, partition_1, partition_2. | ||
|
||
Map<String, Set<String>> instanceToPartitionMap = new HashMap<>(); | ||
for (String db : _allDBs) { | ||
IdealState newIdealState = | ||
_gSetupTool.getClusterManagementTool().getResourceIdealState(CLUSTER_NAME, db); | ||
IdealState prevIdealState = _prevIdealState.get(db); | ||
|
||
for (String partition : newIdealState.getPartitionSet()) { | ||
Map<String, String> assignmentMap = newIdealState.getRecord().getMapField(partition); | ||
for (String instance : assignmentMap.keySet()) { | ||
if (!instanceToPartitionMap.containsKey(instance)) { | ||
instanceToPartitionMap.put(instance, new HashSet<>()); | ||
} | ||
instanceToPartitionMap.get(instance).add(partition); | ||
} | ||
} | ||
if (prevIdealState != null) { | ||
for (String partition : prevIdealState.getPartitionSet()) { | ||
Map<String, String> assignmentMap = prevIdealState.getRecord().getMapField(partition); | ||
for (String instance : assignmentMap.keySet()) { | ||
if (!instanceToPartitionMap.containsKey(instance)) { | ||
instanceToPartitionMap.put(instance, new HashSet<>()); | ||
} | ||
instanceToPartitionMap.get(instance).add(partition); | ||
} | ||
} | ||
} | ||
_prevIdealState.put(db, newIdealState); | ||
} | ||
for (String instance : instanceToPartitionMap.keySet()) { | ||
int usedInstanceCapacity = 0; | ||
for (String partition : instanceToPartitionMap.get(instance)) { | ||
LOG.info("\tPartition: " + partition); | ||
if (partition.startsWith("Test-WagedDB-0")) { | ||
if (afterWeightChange) { | ||
usedInstanceCapacity += 10; | ||
} else { | ||
usedInstanceCapacity += 6; | ||
} | ||
} else { | ||
usedInstanceCapacity += 6; | ||
} | ||
} | ||
LOG.info("\tIntance: " + instance + " used capacity: " + usedInstanceCapacity); | ||
// For now, this has to be disabled as this test case is negative scenario. | ||
// Assert.assertTrue(usedInstanceCapacity <= 100); | ||
} | ||
} | ||
} |