Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for Generic error for persistent task on starting replication #1003

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
35 changes: 35 additions & 0 deletions .github/workflows/build-local.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: IntegTest Remote testing for windows
# This workflow is triggered on pull requests to main branch
on:
push:
branches:
- '*'

# We build for other platforms except linux which is already covered in build-and-test.
# Also, We're not running tests here as those are already covered with linux build.
jobs:
build:
continue-on-error: true
strategy:
matrix:
java:
- 17
os:
- windows-latest
# Job name
name: Java ${{ matrix.java }} On ${{ matrix.os }}
runs-on: ${{ matrix.os }}
steps:
# This step uses the setup-java Github action: https://github.com/actions/setup-java
- name: Set Up JDK ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}
# This step uses the checkout Github action: https://github.com/actions/checkout
- name: Checkout Branch
uses: actions/checkout@v2
- name: Build and run Replication tests
shell: bash
run: |
jon=$(bash scripts/integtest.sh -e '[{"cluster_name": "leader","data_nodes": [{"endpoint": "localhost","port": 9200,"transport": 9300},{"endpoint": "localhost","port": 9201,"transport": 9301}],"cluster_manager_nodes": []},{"cluster_name": "follower","data_nodes": [{"endpoint": "localhost","port": 9202,"transport": 9302},{"endpoint": "localhost","port": 9203,"transport":9303}],"cluster_manager_nodes": []}]' -s false -v 2.13.0)
echo $jon
3 changes: 1 addition & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,4 @@ jobs:
- name: Checkout Branch
uses: actions/checkout@v2
- name: Build and run Replication tests
run: |
./gradlew --refresh-dependencies clean release -D"build.snapshot=true" -x test -x IntegTest
run: ./gradlew --refresh-dependencies clean release -D"build.snapshot=true" -x test -x IntegTest
2 changes: 2 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -937,6 +937,8 @@ task integTestRemote (type: RestIntegTestTask) {
systemProperty "tests.cluster.followCluster.total_nodes", "${-> numberOfNodes.toString()}"
systemProperty "tests.cluster.leaderCluster.total_nodes", "${-> numberOfNodes.toString()}"
systemProperty "build.dir", "${buildDir}"
systemProperty "user", System.getProperty("user")
systemProperty "password", System.getProperty("password")

}
filter {
Expand Down
50 changes: 38 additions & 12 deletions scripts/integtest.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -103,24 +103,50 @@ then
exit 1
fi

data=$(python3 -c "import json; cluster=$ENDPOINT_LIST ; data_nodes=cluster; print(data_nodes[0][\"data_nodes\"][0][\"endpoint\"],':',data_nodes[0][\"data_nodes\"][0][\"port\"],':',data_nodes[0][\"data_nodes\"][0][\"transport\"],',',data_nodes[1][\"data_nodes\"][0][\"endpoint\"],':',data_nodes[1][\"data_nodes\"][0][\"port\"],':',data_nodes[1][\"data_nodes\"][0][\"transport\"])" | tr -d "[:blank:]")
extract_values() {
local cluster_name="$1"
local field="$2"

echo "$ENDPOINT_LIST" | awk -v cluster="$cluster_name" -v field="$field" '
BEGIN { RS=","; FS=":" }
$1 ~ "\"cluster_name\"" && $2 ~ "\"" cluster "\"" {
while (getline) {
if ($1 ~ "\"" field "\"") {
gsub(/"/, "", $2)
gsub(/ /, "", $2)
print $2
exit
}
}
}
' | tr -d '{}'
}

leader=$(echo $data | cut -d ',' -f1 | cut -d ':' -f1,2 )
follower=$(echo $data | cut -d ',' -f2 | cut -d ':' -f1,2 )
echo "leader: $leader"
echo "follower: $follower"

# Extract values for leader cluster
leader_endpoint=$(extract_values "leader" "endpoint")
leader_port=$(extract_values "leader" "port")
leader_transport=$(extract_values "leader" "transport")

# Extract values for follower cluster
follower_endpoint=$(extract_values "follower" "endpoint")
follower_port=$(extract_values "follower" "port")
follower_transport=$(extract_values "follower" "transport")

# Print extracted data
echo "Leader Endpoint: $leader_endpoint"
echo "Leader Port: $leader_port"
echo "Leader Transport: $leader_transport"
echo "Follower Endpoint: $follower_endpoint"
echo "Follower Port: $follower_port"
echo "Follower Transport: $follower_transport"

# Get number of nodes, assuming both leader and follower have same number of nodes
numNodes=$((${follower##*:} - ${leader##*:}))
numNodes=$((${follower_port} - ${leader_port}))
echo "numNodes: $numNodes"


LTRANSPORT_PORT=$(echo $data | cut -d ',' -f1 | cut -d ':' -f1,3 )
FTRANSPORT_PORT=$(echo $data | cut -d ',' -f2 | cut -d ':' -f1,3 )
echo "LTRANSPORT_PORT: $LTRANSPORT_PORT"
echo "FTRANSPORT_PORT: $FTRANSPORT_PORT"

eval "./gradlew integTestRemote -Dleader.http_host=\"$leader\" -Dfollower.http_host=\"$follower\" -Dfollower.transport_host=\"$FTRANSPORT_PORT\" -Dleader.transport_host=\"$LTRANSPORT_PORT\" -Dsecurity_enabled=\"$SECURITY_ENABLED\" -Duser=\"$USERNAME\" -Dpassword=\"$PASSWORD\" -PnumNodes=$numNodes --console=plain "
./gradlew integTestRemote -Dleader.http_host="$leader_endpoint:$leader_port" -Dfollower.http_host="$follower_endpoint:$follower_port" -Dfollower.transport_host="$follower_endpoint:$follower_transport" -Dleader.transport_host="$leader_endpoint:$leader_transport" -Dsecurity_enabled=$SECURITY_ENABLED -Duser=$USERNAME -Dpassword=$PASSWORD -PnumNodes=$numNodes --console=plain

else
# Single cluster
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@ class TransportReplicateIndexClusterManagerNodeAction @Inject constructor(transp
persistentTasksService.waitForTaskCondition(task.id, replicateIndexReq.timeout()) { t ->
val replicationState = (t.state as IndexReplicationState?)?.state
replicationState == ReplicationState.FOLLOWING ||
(!replicateIndexReq.waitForRestore && replicationState == ReplicationState.RESTORING)
(!replicateIndexReq.waitForRestore && replicationState == ReplicationState.RESTORING) ||
(!replicateIndexReq.waitForRestore && replicationState == ReplicationState.FAILED)
}

listener.onResponse(AcknowledgedResponse(true))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,7 @@ open class IndexReplicationTask(id: Long, type: String, action: String, descript
} catch(e: Exception) {
val err = "Unable to initiate restore call for $followerIndexName from $leaderAlias:${leaderIndex.name}"
log.error(err, e)
return FailedState(Collections.emptyMap(), err)
return FailedState(Collections.emptyMap(), e.message!!)
}
cso.waitForNextChange("remote restore start") { inProgressRestore(it) != null }
return RestoreState
Expand Down
Loading