Skip to content

Commit

Permalink
Remove any stale replication tasks from cluster state (#905) (#980)
Browse files Browse the repository at this point in the history
Clear stale replication tasks in STOP API

Signed-off-by: monusingh-1 <[email protected]>
(cherry picked from commit bc9b61a)

Co-authored-by: Monu Singh <[email protected]>
  • Loading branch information
opensearch-trigger-bot[bot] and monusingh-1 committed Jun 2, 2023
1 parent 2a42ee1 commit ba4cfa6
Showing 1 changed file with 38 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ import org.opensearch.common.inject.Inject
import org.opensearch.common.io.stream.StreamInput
import org.opensearch.common.settings.Settings
import org.opensearch.replication.util.stackTraceToString
import org.opensearch.persistent.PersistentTasksCustomMetadata
import org.opensearch.persistent.RemovePersistentTaskAction
import org.opensearch.threadpool.ThreadPool
import org.opensearch.transport.TransportService
import java.io.IOException
Expand Down Expand Up @@ -136,6 +138,7 @@ class TransportStopIndexReplicationAction @Inject constructor(transportService:
}
}
replicationMetadataManager.deleteIndexReplicationMetadata(request.indexName)
removeStaleReplicationTasksFromClusterState(request)
listener.onResponse(AcknowledgedResponse(true))
} catch (e: Exception) {
log.error("Stop replication failed for index[${request.indexName}] with error ${e.stackTraceToString()}")
Expand All @@ -144,6 +147,32 @@ class TransportStopIndexReplicationAction @Inject constructor(transportService:
}
}

private suspend fun removeStaleReplicationTasksFromClusterState(request: StopIndexReplicationRequest) {
try {
val allTasks: PersistentTasksCustomMetadata =
clusterService.state().metadata().custom(PersistentTasksCustomMetadata.TYPE)
for (singleTask in allTasks.tasks()) {
if (isReplicationTask(singleTask, request) && !singleTask.isAssigned){
log.info("Removing task: ${singleTask.id} from cluster state")
val removeRequest: RemovePersistentTaskAction.Request =
RemovePersistentTaskAction.Request(singleTask.id)
client.suspendExecute(RemovePersistentTaskAction.INSTANCE, removeRequest)
}
}
} catch (e: Exception) {
log.info("Could not update cluster state")
}
}

// Remove index replication task metadata, format replication:index:fruit-1
// Remove shard replication task metadata, format replication:[fruit-1][0]
private fun isReplicationTask(
singleTask: PersistentTasksCustomMetadata.PersistentTask<*>,
request: StopIndexReplicationRequest
) = singleTask.id.startsWith("replication:") &&
(singleTask.id == "replication:index:${request.indexName}" || singleTask.id.split(":")[1].contains(request.indexName))


private fun validateReplicationStateOfIndex(request: StopIndexReplicationRequest) {
// If replication blocks/settings are present, Stop action should proceed with the clean-up
// This can happen during settings of follower index are carried over in the snapshot and the restore is
Expand All @@ -153,6 +182,15 @@ class TransportStopIndexReplicationAction @Inject constructor(transportService:
return
}

//check for stale replication tasks
val allTasks: PersistentTasksCustomMetadata? =
clusterService.state()?.metadata()?.custom(PersistentTasksCustomMetadata.TYPE)
allTasks?.tasks()?.forEach{
if (isReplicationTask(it, request) && !it.isAssigned){
return
}
}

val replicationStateParams = getReplicationStateParamsForIndex(clusterService, request.indexName)
?:
throw IllegalArgumentException("No replication in progress for index:${request.indexName}")
Expand Down

0 comments on commit ba4cfa6

Please sign in to comment.