diff --git a/CHANGELOG.md b/CHANGELOG.md index 49233e5a9fd2d..3843c03cf6648 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,111 +3,21 @@ All notable changes to this project are documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). See the [CONTRIBUTING guide](./CONTRIBUTING.md#Changelog) for instructions on how to add changelog entries. -## [Unreleased 2.x] +## [Unreleased 2.17.x] ### Added -- [Workload Management] Add Settings for Workload Management feature ([#15028](https://github.com/opensearch-project/OpenSearch/pull/15028)) -- Fix for hasInitiatedFetching to fix allocation explain and manual reroute APIs (([#14972](https://github.com/opensearch-project/OpenSearch/pull/14972)) -- [Workload Management] Add queryGroupId to Task ([14708](https://github.com/opensearch-project/OpenSearch/pull/14708)) -- Add setting to ignore throttling nodes for allocation of unassigned primaries in remote restore ([#14991](https://github.com/opensearch-project/OpenSearch/pull/14991)) -- [Workload Management] Add Delete QueryGroup API Logic ([#14735](https://github.com/opensearch-project/OpenSearch/pull/14735)) -- [Streaming Indexing] Enhance RestClient with a new streaming API support ([#14437](https://github.com/opensearch-project/OpenSearch/pull/14437)) -- Add basic aggregation support for derived fields ([#14618](https://github.com/opensearch-project/OpenSearch/pull/14618)) -- [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680))- [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680)) -- Add ThreadContextPermission for markAsSystemContext and allow core to perform the method ([#15016](https://github.com/opensearch-project/OpenSearch/pull/15016)) -- Add ThreadContextPermission for stashAndMergeHeaders and stashWithOrigin ([#15039](https://github.com/opensearch-project/OpenSearch/pull/15039)) -- [Concurrent Segment Search] Support composite aggregations with scripting ([#15072](https://github.com/opensearch-project/OpenSearch/pull/15072)) -- Add `rangeQuery` and `regexpQuery` for `constant_keyword` field type ([#14711](https://github.com/opensearch-project/OpenSearch/pull/14711)) -- Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054)) -- [Workload Management] Add Get QueryGroup API Logic ([14709](https://github.com/opensearch-project/OpenSearch/pull/14709)) -- [Workload Management] Add Update QueryGroup API Logic ([#14775](https://github.com/opensearch-project/OpenSearch/pull/14775)) -- [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897)) -- Support filtering on a large list encoded by bitmap ([#14774](https://github.com/opensearch-project/OpenSearch/pull/14774)) -- Add slice execution listeners to SearchOperationListener interface ([#15153](https://github.com/opensearch-project/OpenSearch/pull/15153)) -- Make balanced shards allocator timebound ([#15239](https://github.com/opensearch-project/OpenSearch/pull/15239)) -- Add allowlist setting for ingest-geoip and ingest-useragent ([#15325](https://github.com/opensearch-project/OpenSearch/pull/15325)) -- Adding access to noSubMatches and noOverlappingMatches in Hyphenation ([#13895](https://github.com/opensearch-project/OpenSearch/pull/13895)) -- Star tree mapping changes ([#14605](https://github.com/opensearch-project/OpenSearch/pull/14605)) -- Add support for index level max slice count setting for concurrent segment search ([#15336](https://github.com/opensearch-project/OpenSearch/pull/15336)) -- Support cancellation for cat shards and node stats API.([#13966](https://github.com/opensearch-project/OpenSearch/pull/13966)) -- [Streaming Indexing] Introduce bulk HTTP API streaming flavor ([#15381](https://github.com/opensearch-project/OpenSearch/pull/15381)) -- Add support for centralize snapshot creation with pinned timestamp ([#15124](https://github.com/opensearch-project/OpenSearch/pull/15124)) -- Add concurrent search support for Derived Fields ([#15326](https://github.com/opensearch-project/OpenSearch/pull/15326)) -- [Workload Management] Add query group stats constructs ([#15343](https://github.com/opensearch-project/OpenSearch/pull/15343))) -- Add limit on number of processors for Ingest pipeline([#15460](https://github.com/opensearch-project/OpenSearch/pull/15465)). -- Add runAs to Subject interface and introduce IdentityAwarePlugin extension point ([#14630](https://github.com/opensearch-project/OpenSearch/pull/14630)) -- [Workload Management] Add rejection logic for co-ordinator and shard level requests ([#15428](https://github.com/opensearch-project/OpenSearch/pull/15428))) -- Adding translog durability validation in index templates ([#15494](https://github.com/opensearch-project/OpenSearch/pull/15494)) -- [Range Queries] Add new approximateable query framework to short-circuit range queries ([#13788](https://github.com/opensearch-project/OpenSearch/pull/13788)) -- [Workload Management] Add query group level failure tracking ([#15227](https://github.com/opensearch-project/OpenSearch/pull/15527)) -- [Reader Writer Separation] Add experimental search replica shard type to achieve reader writer separation ([#15237](https://github.com/opensearch-project/OpenSearch/pull/15237)) -- Add index creation using the context field ([#15290](https://github.com/opensearch-project/OpenSearch/pull/15290)) -- [Remote Publication] Add remote download stats ([#15291](https://github.com/opensearch-project/OpenSearch/pull/15291)) -- Add support to upload snapshot shard blobs with hashed prefix ([#15426](https://github.com/opensearch-project/OpenSearch/pull/15426)) -- Add prefix support to hashed prefix & infix path types on remote store ([#15557](https://github.com/opensearch-project/OpenSearch/pull/15557)) -- Add canRemain method to TargetPoolAllocationDecider to move shards from local to remote pool for hot to warm tiering ([#15010](https://github.com/opensearch-project/OpenSearch/pull/15010)) -- Add support for pluggable deciders for concurrent search ([#15363](https://github.com/opensearch-project/OpenSearch/pull/15363)) - Add path prefix support to hashed prefix snapshots ([#15664](https://github.com/opensearch-project/OpenSearch/pull/15664)) -- Optimise snapshot deletion to speed up snapshot deletion and creation ([#15568](https://github.com/opensearch-project/OpenSearch/pull/15568)) -- [Remote Publication] Added checksum validation for cluster state behind a cluster setting ([#15218](https://github.com/opensearch-project/OpenSearch/pull/15218)) -- Optimize NodeIndicesStats output behind flag ([#14454](https://github.com/opensearch-project/OpenSearch/pull/14454)) -- Add support for comma-separated list of index names to be used with Snapshot Status API ([#15409](https://github.com/opensearch-project/OpenSearch/pull/15409))[SnapshotV2] Snapshot Status API changes (#15409)) -- ClusterManagerTaskThrottler Improvements ([#15508](https://github.com/opensearch-project/OpenSearch/pull/15508)) -- Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471)) -- Reset DiscoveryNodes in all transport node actions request ([#15131](https://github.com/opensearch-project/OpenSearch/pull/15131)) ### Dependencies -- Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081)) -- Bump `org.apache.commons:commons-lang3` from 3.14.0 to 3.15.0 ([#14861](https://github.com/opensearch-project/OpenSearch/pull/14861)) -- OpenJDK Update (July 2024 Patch releases) ([#14998](https://github.com/opensearch-project/OpenSearch/pull/14998)) -- Bump `com.microsoft.azure:msal4j` from 1.16.1 to 1.17.0 ([#14995](https://github.com/opensearch-project/OpenSearch/pull/14995), [#15420](https://github.com/opensearch-project/OpenSearch/pull/15420)) -- Bump `actions/github-script` from 6 to 7 ([#14997](https://github.com/opensearch-project/OpenSearch/pull/14997)) -- Bump `org.tukaani:xz` from 1.9 to 1.10 ([#15110](https://github.com/opensearch-project/OpenSearch/pull/15110)) -- Bump `org.apache.avro:avro` from 1.11.3 to 1.12.0 in /plugins/repository-hdfs ([#15119](https://github.com/opensearch-project/OpenSearch/pull/15119)) -- Bump `org.bouncycastle:bcpg-fips` from 1.0.7.1 to 2.0.9 ([#15103](https://github.com/opensearch-project/OpenSearch/pull/15103), [#15299](https://github.com/opensearch-project/OpenSearch/pull/15299)) -- Bump `com.azure:azure-core` from 1.49.1 to 1.51.0 ([#15111](https://github.com/opensearch-project/OpenSearch/pull/15111)) -- Bump `org.xerial.snappy:snappy-java` from 1.1.10.5 to 1.1.10.6 ([#15207](https://github.com/opensearch-project/OpenSearch/pull/15207)) -- Bump `com.azure:azure-xml` from 1.0.0 to 1.1.0 ([#15206](https://github.com/opensearch-project/OpenSearch/pull/15206)) -- Bump `reactor` from 3.5.19 to 3.5.20 ([#15262](https://github.com/opensearch-project/OpenSearch/pull/15262)) -- Bump `reactor-netty` from 1.1.21 to 1.1.22 ([#15262](https://github.com/opensearch-project/OpenSearch/pull/15262)) -- Bump `org.apache.kerby:kerb-admin` from 2.0.3 to 2.1.0 ([#15301](https://github.com/opensearch-project/OpenSearch/pull/15301)) -- Bump `com.azure:azure-core-http-netty` from 1.15.1 to 1.15.3 ([#15300](https://github.com/opensearch-project/OpenSearch/pull/15300)) -- Bump `com.gradle.develocity` from 3.17.6 to 3.18 ([#15297](https://github.com/opensearch-project/OpenSearch/pull/15297)) -- Bump `commons-cli:commons-cli` from 1.8.0 to 1.9.0 ([#15298](https://github.com/opensearch-project/OpenSearch/pull/15298)) -- Bump `opentelemetry` from 1.40.0 to 1.41.0 ([#15361](https://github.com/opensearch-project/OpenSearch/pull/15361)) -- Bump `opentelemetry-semconv` from 1.26.0-alpha to 1.27.0-alpha ([#15361](https://github.com/opensearch-project/OpenSearch/pull/15361)) -- Bump `tj-actions/changed-files` from 44 to 45 ([#15422](https://github.com/opensearch-project/OpenSearch/pull/15422)) -- Bump `dnsjava:dnsjava` from 3.6.0 to 3.6.1 ([#15418](https://github.com/opensearch-project/OpenSearch/pull/15418)) -- Bump `com.netflix.nebula.ospackage-base` from 11.9.1 to 11.10.0 ([#15419](https://github.com/opensearch-project/OpenSearch/pull/15419)) -- Bump `org.roaringbitmap:RoaringBitmap` from 1.1.0 to 1.2.1 ([#15423](https://github.com/opensearch-project/OpenSearch/pull/15423)) -- Bump `icu4j` from 70.1 to 75.1 ([#15469](https://github.com/opensearch-project/OpenSearch/pull/15469)) ### Changed -- Add lower limit for primary and replica batch allocators timeout ([#14979](https://github.com/opensearch-project/OpenSearch/pull/14979)) -- Optimize regexp-based include/exclude on aggregations when pattern matches prefixes ([#14371](https://github.com/opensearch-project/OpenSearch/pull/14371)) -- Replace and block usages of org.apache.logging.log4j.util.Strings ([#15238](https://github.com/opensearch-project/OpenSearch/pull/15238)) -- Remote publication using minimum node version for backward compatibility ([#15216](https://github.com/opensearch-project/OpenSearch/pull/15216)) ### Deprecated ### Removed -- Remove some unused code in the search backpressure package ([#15518](https://github.com/opensearch-project/OpenSearch/pull/15518)) ### Fixed -- Fix constraint bug which allows more primary shards than average primary shards per index ([#14908](https://github.com/opensearch-project/OpenSearch/pull/14908)) -- Fix NPE when bulk ingest with empty pipeline ([#15033](https://github.com/opensearch-project/OpenSearch/pull/15033)) -- Fix missing value of FieldSort for unsigned_long ([#14963](https://github.com/opensearch-project/OpenSearch/pull/14963)) -- Fix delete index template failed when the index template matches a data stream but is unused ([#15080](https://github.com/opensearch-project/OpenSearch/pull/15080)) -- Fix array_index_out_of_bounds_exception when indexing documents with field name containing only dot ([#15126](https://github.com/opensearch-project/OpenSearch/pull/15126)) -- Fixed array field name omission in flat_object function for nested JSON ([#13620](https://github.com/opensearch-project/OpenSearch/pull/13620)) -- Fix incorrect parameter names in MinHash token filter configuration handling ([#15233](https://github.com/opensearch-project/OpenSearch/pull/15233)) -- Fix range aggregation optimization ignoring top level queries ([#15287](https://github.com/opensearch-project/OpenSearch/pull/15287)) -- Fix indexing error when flat_object field is explicitly null ([#15375](https://github.com/opensearch-project/OpenSearch/pull/15375)) -- Fix split response processor not included in allowlist ([#15393](https://github.com/opensearch-project/OpenSearch/pull/15393)) -- Fix unchecked cast in dynamic action map getter ([#15394](https://github.com/opensearch-project/OpenSearch/pull/15394)) -- Fix null values indexed as "null" strings in flat_object field ([#14069](https://github.com/opensearch-project/OpenSearch/pull/14069)) -- Fix terms query on wildcard field returns nothing ([#15607](https://github.com/opensearch-project/OpenSearch/pull/15607)) ### Security -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.16...2.x +[Unreleased 2.17.x]: https://github.com/opensearch-project/OpenSearch/compare/01c5e5642b7450bba2f3a21acdf8cf13539f65eb...2.17 diff --git a/release-notes/opensearch.release-notes-2.17.0.md b/release-notes/opensearch.release-notes-2.17.0.md new file mode 100644 index 0000000000000..18b7c9ac7cd68 --- /dev/null +++ b/release-notes/opensearch.release-notes-2.17.0.md @@ -0,0 +1,104 @@ +## 2024-09-17 Version 2.17.0 Release Notes + +## [2.17.0] +### Added +- [Workload Management] Add Settings for Workload Management feature ([#15028](https://github.com/opensearch-project/OpenSearch/pull/15028)) +- Fix for hasInitiatedFetching to fix allocation explain and manual reroute APIs (([#14972](https://github.com/opensearch-project/OpenSearch/pull/14972)) +- [Workload Management] Add queryGroupId to Task ([14708](https://github.com/opensearch-project/OpenSearch/pull/14708)) +- Add setting to ignore throttling nodes for allocation of unassigned primaries in remote restore ([#14991](https://github.com/opensearch-project/OpenSearch/pull/14991)) +- [Workload Management] Add Delete QueryGroup API Logic ([#14735](https://github.com/opensearch-project/OpenSearch/pull/14735)) +- [Streaming Indexing] Enhance RestClient with a new streaming API support ([#14437](https://github.com/opensearch-project/OpenSearch/pull/14437)) +- Add basic aggregation support for derived fields ([#14618](https://github.com/opensearch-project/OpenSearch/pull/14618)) +- [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680))- [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680)) +- Add ThreadContextPermission for markAsSystemContext and allow core to perform the method ([#15016](https://github.com/opensearch-project/OpenSearch/pull/15016)) +- Add ThreadContextPermission for stashAndMergeHeaders and stashWithOrigin ([#15039](https://github.com/opensearch-project/OpenSearch/pull/15039)) +- [Concurrent Segment Search] Support composite aggregations with scripting ([#15072](https://github.com/opensearch-project/OpenSearch/pull/15072)) +- Add `rangeQuery` and `regexpQuery` for `constant_keyword` field type ([#14711](https://github.com/opensearch-project/OpenSearch/pull/14711)) +- Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054)) +- [Workload Management] Add Get QueryGroup API Logic ([14709](https://github.com/opensearch-project/OpenSearch/pull/14709)) +- [Workload Management] Add Update QueryGroup API Logic ([#14775](https://github.com/opensearch-project/OpenSearch/pull/14775)) +- [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897)) +- Support filtering on a large list encoded by bitmap ([#14774](https://github.com/opensearch-project/OpenSearch/pull/14774)) +- Add slice execution listeners to SearchOperationListener interface ([#15153](https://github.com/opensearch-project/OpenSearch/pull/15153)) +- Make balanced shards allocator timebound ([#15239](https://github.com/opensearch-project/OpenSearch/pull/15239)) +- Add allowlist setting for ingest-geoip and ingest-useragent ([#15325](https://github.com/opensearch-project/OpenSearch/pull/15325)) +- Adding access to noSubMatches and noOverlappingMatches in Hyphenation ([#13895](https://github.com/opensearch-project/OpenSearch/pull/13895)) +- Star tree mapping changes ([#14605](https://github.com/opensearch-project/OpenSearch/pull/14605)) +- Add support for index level max slice count setting for concurrent segment search ([#15336](https://github.com/opensearch-project/OpenSearch/pull/15336)) +- Support cancellation for cat shards and node stats API.([#13966](https://github.com/opensearch-project/OpenSearch/pull/13966)) +- [Streaming Indexing] Introduce bulk HTTP API streaming flavor ([#15381](https://github.com/opensearch-project/OpenSearch/pull/15381)) +- Add support for centralize snapshot creation with pinned timestamp ([#15124](https://github.com/opensearch-project/OpenSearch/pull/15124)) +- Add concurrent search support for Derived Fields ([#15326](https://github.com/opensearch-project/OpenSearch/pull/15326)) +- [Workload Management] Add query group stats constructs ([#15343](https://github.com/opensearch-project/OpenSearch/pull/15343))) +- Add limit on number of processors for Ingest pipeline([#15460](https://github.com/opensearch-project/OpenSearch/pull/15465)). +- Add runAs to Subject interface and introduce IdentityAwarePlugin extension point ([#14630](https://github.com/opensearch-project/OpenSearch/pull/14630)) +- [Workload Management] Add rejection logic for co-ordinator and shard level requests ([#15428](https://github.com/opensearch-project/OpenSearch/pull/15428))) +- Adding translog durability validation in index templates ([#15494](https://github.com/opensearch-project/OpenSearch/pull/15494)) +- [Range Queries] Add new approximateable query framework to short-circuit range queries ([#13788](https://github.com/opensearch-project/OpenSearch/pull/13788)) +- [Workload Management] Add query group level failure tracking ([#15227](https://github.com/opensearch-project/OpenSearch/pull/15527)) +- [Reader Writer Separation] Add experimental search replica shard type to achieve reader writer separation ([#15237](https://github.com/opensearch-project/OpenSearch/pull/15237)) +- Add index creation using the context field ([#15290](https://github.com/opensearch-project/OpenSearch/pull/15290)) +- [Remote Publication] Add remote download stats ([#15291](https://github.com/opensearch-project/OpenSearch/pull/15291)) +- Add support to upload snapshot shard blobs with hashed prefix ([#15426](https://github.com/opensearch-project/OpenSearch/pull/15426)) +- Add prefix support to hashed prefix & infix path types on remote store ([#15557](https://github.com/opensearch-project/OpenSearch/pull/15557)) +- Add canRemain method to TargetPoolAllocationDecider to move shards from local to remote pool for hot to warm tiering ([#15010](https://github.com/opensearch-project/OpenSearch/pull/15010)) +- Add support for pluggable deciders for concurrent search ([#15363](https://github.com/opensearch-project/OpenSearch/pull/15363)) +- Optimise snapshot deletion to speed up snapshot deletion and creation ([#15568](https://github.com/opensearch-project/OpenSearch/pull/15568)) +- [Remote Publication] Added checksum validation for cluster state behind a cluster setting ([#15218](https://github.com/opensearch-project/OpenSearch/pull/15218)) +- Optimize NodeIndicesStats output behind flag ([#14454](https://github.com/opensearch-project/OpenSearch/pull/14454)) +- Add support for comma-separated list of index names to be used with Snapshot Status API ([#15409](https://github.com/opensearch-project/OpenSearch/pull/15409))[SnapshotV2] Snapshot Status API changes (#15409)) +- ClusterManagerTaskThrottler Improvements ([#15508](https://github.com/opensearch-project/OpenSearch/pull/15508)) +- Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471)) +- Reset DiscoveryNodes in all transport node actions request ([#15131](https://github.com/opensearch-project/OpenSearch/pull/15131)) + +### Dependencies +- Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081)) +- Bump `org.apache.commons:commons-lang3` from 3.14.0 to 3.15.0 ([#14861](https://github.com/opensearch-project/OpenSearch/pull/14861)) +- OpenJDK Update (July 2024 Patch releases) ([#14998](https://github.com/opensearch-project/OpenSearch/pull/14998)) +- Bump `com.microsoft.azure:msal4j` from 1.16.1 to 1.17.0 ([#14995](https://github.com/opensearch-project/OpenSearch/pull/14995), [#15420](https://github.com/opensearch-project/OpenSearch/pull/15420)) +- Bump `actions/github-script` from 6 to 7 ([#14997](https://github.com/opensearch-project/OpenSearch/pull/14997)) +- Bump `org.tukaani:xz` from 1.9 to 1.10 ([#15110](https://github.com/opensearch-project/OpenSearch/pull/15110)) +- Bump `org.apache.avro:avro` from 1.11.3 to 1.12.0 in /plugins/repository-hdfs ([#15119](https://github.com/opensearch-project/OpenSearch/pull/15119)) +- Bump `org.bouncycastle:bcpg-fips` from 1.0.7.1 to 2.0.9 ([#15103](https://github.com/opensearch-project/OpenSearch/pull/15103), [#15299](https://github.com/opensearch-project/OpenSearch/pull/15299)) +- Bump `com.azure:azure-core` from 1.49.1 to 1.51.0 ([#15111](https://github.com/opensearch-project/OpenSearch/pull/15111)) +- Bump `org.xerial.snappy:snappy-java` from 1.1.10.5 to 1.1.10.6 ([#15207](https://github.com/opensearch-project/OpenSearch/pull/15207)) +- Bump `com.azure:azure-xml` from 1.0.0 to 1.1.0 ([#15206](https://github.com/opensearch-project/OpenSearch/pull/15206)) +- Bump `reactor` from 3.5.19 to 3.5.20 ([#15262](https://github.com/opensearch-project/OpenSearch/pull/15262)) +- Bump `reactor-netty` from 1.1.21 to 1.1.22 ([#15262](https://github.com/opensearch-project/OpenSearch/pull/15262)) +- Bump `org.apache.kerby:kerb-admin` from 2.0.3 to 2.1.0 ([#15301](https://github.com/opensearch-project/OpenSearch/pull/15301)) +- Bump `com.azure:azure-core-http-netty` from 1.15.1 to 1.15.3 ([#15300](https://github.com/opensearch-project/OpenSearch/pull/15300)) +- Bump `com.gradle.develocity` from 3.17.6 to 3.18 ([#15297](https://github.com/opensearch-project/OpenSearch/pull/15297)) +- Bump `commons-cli:commons-cli` from 1.8.0 to 1.9.0 ([#15298](https://github.com/opensearch-project/OpenSearch/pull/15298)) +- Bump `opentelemetry` from 1.40.0 to 1.41.0 ([#15361](https://github.com/opensearch-project/OpenSearch/pull/15361)) +- Bump `opentelemetry-semconv` from 1.26.0-alpha to 1.27.0-alpha ([#15361](https://github.com/opensearch-project/OpenSearch/pull/15361)) +- Bump `tj-actions/changed-files` from 44 to 45 ([#15422](https://github.com/opensearch-project/OpenSearch/pull/15422)) +- Bump `dnsjava:dnsjava` from 3.6.0 to 3.6.1 ([#15418](https://github.com/opensearch-project/OpenSearch/pull/15418)) +- Bump `com.netflix.nebula.ospackage-base` from 11.9.1 to 11.10.0 ([#15419](https://github.com/opensearch-project/OpenSearch/pull/15419)) +- Bump `org.roaringbitmap:RoaringBitmap` from 1.1.0 to 1.2.1 ([#15423](https://github.com/opensearch-project/OpenSearch/pull/15423)) +- Bump `icu4j` from 70.1 to 75.1 ([#15469](https://github.com/opensearch-project/OpenSearch/pull/15469)) + +### Changed +- Add lower limit for primary and replica batch allocators timeout ([#14979](https://github.com/opensearch-project/OpenSearch/pull/14979)) +- Optimize regexp-based include/exclude on aggregations when pattern matches prefixes ([#14371](https://github.com/opensearch-project/OpenSearch/pull/14371)) +- Replace and block usages of org.apache.logging.log4j.util.Strings ([#15238](https://github.com/opensearch-project/OpenSearch/pull/15238)) +- Remote publication using minimum node version for backward compatibility ([#15216](https://github.com/opensearch-project/OpenSearch/pull/15216)) + +### Deprecated + +### Removed +- Remove some unused code in the search backpressure package ([#15518](https://github.com/opensearch-project/OpenSearch/pull/15518)) + +### Fixed +- Fix constraint bug which allows more primary shards than average primary shards per index ([#14908](https://github.com/opensearch-project/OpenSearch/pull/14908)) +- Fix NPE when bulk ingest with empty pipeline ([#15033](https://github.com/opensearch-project/OpenSearch/pull/15033)) +- Fix missing value of FieldSort for unsigned_long ([#14963](https://github.com/opensearch-project/OpenSearch/pull/14963)) +- Fix delete index template failed when the index template matches a data stream but is unused ([#15080](https://github.com/opensearch-project/OpenSearch/pull/15080)) +- Fix array_index_out_of_bounds_exception when indexing documents with field name containing only dot ([#15126](https://github.com/opensearch-project/OpenSearch/pull/15126)) +- Fixed array field name omission in flat_object function for nested JSON ([#13620](https://github.com/opensearch-project/OpenSearch/pull/13620)) +- Fix incorrect parameter names in MinHash token filter configuration handling ([#15233](https://github.com/opensearch-project/OpenSearch/pull/15233)) +- Fix range aggregation optimization ignoring top level queries ([#15287](https://github.com/opensearch-project/OpenSearch/pull/15287)) +- Fix indexing error when flat_object field is explicitly null ([#15375](https://github.com/opensearch-project/OpenSearch/pull/15375)) +- Fix split response processor not included in allowlist ([#15393](https://github.com/opensearch-project/OpenSearch/pull/15393)) +- Fix unchecked cast in dynamic action map getter ([#15394](https://github.com/opensearch-project/OpenSearch/pull/15394)) +- Fix null values indexed as "null" strings in flat_object field ([#14069](https://github.com/opensearch-project/OpenSearch/pull/14069)) +- Fix terms query on wildcard field returns nothing ([#15607](https://github.com/opensearch-project/OpenSearch/pull/15607)) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RestoreShallowSnapshotV2IT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RestoreShallowSnapshotV2IT.java new file mode 100644 index 0000000000000..c5a55f16cab2b --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RestoreShallowSnapshotV2IT.java @@ -0,0 +1,805 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore; + +import org.opensearch.action.DocWriteResponse; +import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; +import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; +import org.opensearch.action.admin.indices.recovery.RecoveryResponse; +import org.opensearch.action.delete.DeleteResponse; +import org.opensearch.action.support.PlainActionFuture; +import org.opensearch.client.Client; +import org.opensearch.client.Requests; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.routing.RecoverySource; +import org.opensearch.common.Nullable; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.io.PathUtils; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.core.index.Index; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.index.IndexService; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.remote.RemoteStoreEnums; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; +import org.opensearch.indices.RemoteStoreSettings; +import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.snapshots.AbstractSnapshotIntegTestCase; +import org.opensearch.snapshots.SnapshotInfo; +import org.opensearch.snapshots.SnapshotRestoreException; +import org.opensearch.snapshots.SnapshotState; +import org.opensearch.test.InternalTestCluster; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; +import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.SEGMENTS; +import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.TRANSLOG; +import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA; +import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.lessThanOrEqualTo; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RestoreShallowSnapshotV2IT extends AbstractSnapshotIntegTestCase { + + private static final String BASE_REMOTE_REPO = "test-rs-repo" + TEST_REMOTE_STORE_REPO_SUFFIX; + private Path remoteRepoPath; + + @Before + public void setup() { + remoteRepoPath = randomRepoPath().toAbsolutePath(); + } + + @After + public void teardown() { + clusterAdmin().prepareCleanupRepository(BASE_REMOTE_REPO).get(); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(remoteStoreClusterSettings(BASE_REMOTE_REPO, remoteRepoPath)) + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) + .build(); + } + + @Override + protected Settings.Builder getRepositorySettings(Path location, boolean shallowCopyEnabled) { + Settings.Builder settingsBuilder = randomRepositorySettings(); + settingsBuilder.put("location", location); + if (shallowCopyEnabled) { + settingsBuilder.put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + } + return settingsBuilder; + } + + private Settings.Builder getIndexSettings(int numOfShards, int numOfReplicas) { + Settings.Builder settingsBuilder = Settings.builder() + .put(super.indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numOfReplicas) + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "300s"); + return settingsBuilder; + } + + private void indexDocuments(Client client, String indexName, int numOfDocs) { + indexDocuments(client, indexName, 0, numOfDocs); + } + + protected void indexDocuments(Client client, String indexName, int fromId, int toId) { + for (int i = fromId; i < toId; i++) { + String id = Integer.toString(i); + client.prepareIndex(indexName).setId(id).setSource("text", "sometext").get(); + } + } + + private void assertDocsPresentInIndex(Client client, String indexName, int numOfDocs) { + for (int i = 0; i < numOfDocs; i++) { + String id = Integer.toString(i); + logger.info("checking for index " + indexName + " with docId" + id); + assertTrue("doc with id" + id + " is not present for index " + indexName, client.prepareGet(indexName, id).get().isExists()); + } + } + + public void testRestoreOperationsShallowCopyEnabled() throws Exception { + String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(); + String primary = internalCluster().startDataOnlyNode(); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String snapshotName1 = "test-restore-snapshot1"; + String snapshotName2 = "test-restore-snapshot2"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + String restoredIndexName1 = indexName1 + "-restored"; + String restoredIndexName2 = indexName2 + "-restored"; + + createRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, true)); + + Client client = client(); + Settings indexSettings = getIndexSettings(1, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(1, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 5; + final int numDocsInIndex2 = 6; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + internalCluster().startDataOnlyNode(); + logger.info("--> snapshot"); + + SnapshotInfo snapshotInfo = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>()); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + + updateRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, false)); + SnapshotInfo snapshotInfo2 = createSnapshot( + snapshotRepoName, + snapshotName2, + new ArrayList<>(Arrays.asList(indexName1, indexName2)) + ); + assertThat(snapshotInfo2.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo2.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo2.totalShards())); + + DeleteResponse deleteResponse = client().prepareDelete(indexName1, "0").execute().actionGet(); + assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); + indexDocuments(client, indexName1, numDocsInIndex1, numDocsInIndex1 + randomIntBetween(2, 5)); + ensureGreen(indexName1); + + RestoreSnapshotResponse restoreSnapshotResponse1 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(false) + .setIndices(indexName1) + .setRenamePattern(indexName1) + .setRenameReplacement(restoredIndexName1) + .get(); + RestoreSnapshotResponse restoreSnapshotResponse2 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName2) + .setWaitForCompletion(false) + .setIndices(indexName2) + .setRenamePattern(indexName2) + .setRenameReplacement(restoredIndexName2) + .get(); + assertEquals(restoreSnapshotResponse1.status(), RestStatus.ACCEPTED); + assertEquals(restoreSnapshotResponse2.status(), RestStatus.ACCEPTED); + ensureGreen(restoredIndexName1, restoredIndexName2); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1); + assertDocsPresentInIndex(client, restoredIndexName2, numDocsInIndex2); + + // deleting data for restoredIndexName1 and restoring from remote store. + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primary)); + ensureRed(restoredIndexName1); + // Re-initialize client to make sure we are not using client from stopped node. + client = client(clusterManagerNode); + assertAcked(client.admin().indices().prepareClose(restoredIndexName1)); + client.admin() + .cluster() + .restoreRemoteStore( + new RestoreRemoteStoreRequest().indices(restoredIndexName1).restoreAllShards(true), + PlainActionFuture.newFuture() + ); + ensureYellowAndNoInitializingShards(restoredIndexName1); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client(), restoredIndexName1, numDocsInIndex1); + // indexing some new docs and validating + indexDocuments(client, restoredIndexName1, numDocsInIndex1, numDocsInIndex1 + 2); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1 + 2); + } + + public void testRemoteStoreCustomDataOnIndexCreationAndRestore() { + String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNode(); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String snapshotName1 = "test-restore-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + String restoredIndexName1version1 = indexName1 + "-restored-1"; + String restoredIndexName1version2 = indexName1 + "-restored-2"; + + client(clusterManagerNode).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.FIXED)) + .get(); + createRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, true)); + Client client = client(); + Settings indexSettings = getIndexSettings(1, 0).build(); + createIndex(indexName1, indexSettings); + + indexDocuments(client, indexName1, randomIntBetween(5, 10)); + ensureGreen(indexName1); + validatePathType(indexName1, RemoteStoreEnums.PathType.FIXED); + + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(Arrays.asList(indexName1))); + assertEquals(SnapshotState.SUCCESS, snapshotInfo.state()); + assertTrue(snapshotInfo.successfulShards() > 0); + assertEquals(snapshotInfo.totalShards(), snapshotInfo.successfulShards()); + + RestoreSnapshotResponse restoreSnapshotResponse = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(false) + .setRenamePattern(indexName1) + .setRenameReplacement(restoredIndexName1version1) + .get(); + assertEquals(RestStatus.ACCEPTED, restoreSnapshotResponse.status()); + ensureGreen(restoredIndexName1version1); + validatePathType(restoredIndexName1version1, RemoteStoreEnums.PathType.FIXED); + + client(clusterManagerNode).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.HASHED_PREFIX) + ) + .get(); + + restoreSnapshotResponse = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(false) + .setRenamePattern(indexName1) + .setRenameReplacement(restoredIndexName1version2) + .get(); + assertEquals(RestStatus.ACCEPTED, restoreSnapshotResponse.status()); + ensureGreen(restoredIndexName1version2); + validatePathType( + restoredIndexName1version2, + RemoteStoreEnums.PathType.HASHED_PREFIX, + RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1 + ); + + // Create index with cluster setting cluster.remote_store.index.path.type as hashed_prefix. + indexSettings = getIndexSettings(1, 0).build(); + createIndex(indexName2, indexSettings); + ensureGreen(indexName2); + validatePathType(indexName2, RemoteStoreEnums.PathType.HASHED_PREFIX, RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1); + + // Validating that custom data has not changed for indexes which were created before the cluster setting got updated + validatePathType(indexName1, RemoteStoreEnums.PathType.FIXED); + + // Create Snapshot of index 2 + String snapshotName2 = "test-restore-snapshot2"; + snapshotInfo = createSnapshot(snapshotRepoName, snapshotName2, new ArrayList<>(List.of(indexName2))); + assertEquals(SnapshotState.SUCCESS, snapshotInfo.state()); + assertTrue(snapshotInfo.successfulShards() > 0); + assertEquals(snapshotInfo.totalShards(), snapshotInfo.successfulShards()); + + // Update cluster settings to FIXED + client(clusterManagerNode).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.FIXED)) + .get(); + + // Close index 2 + assertAcked(client().admin().indices().prepareClose(indexName2)); + restoreSnapshotResponse = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName2) + .setWaitForCompletion(false) + .setIndices(indexName2) + .get(); + assertEquals(RestStatus.ACCEPTED, restoreSnapshotResponse.status()); + ensureGreen(indexName2); + + // Validating that custom data has not changed for testindex2 which was created before the cluster setting got updated + validatePathType(indexName2, RemoteStoreEnums.PathType.HASHED_PREFIX, RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1); + } + + private void validatePathType(String index, RemoteStoreEnums.PathType pathType) { + validatePathType(index, pathType, null); + } + + private void validatePathType( + String index, + RemoteStoreEnums.PathType pathType, + @Nullable RemoteStoreEnums.PathHashAlgorithm pathHashAlgorithm + ) { + ClusterState state = client().admin().cluster().prepareState().execute().actionGet().getState(); + // Validate that the remote_store custom data is present in index metadata for the created index. + Map remoteCustomData = state.metadata().index(index).getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY); + assertNotNull(remoteCustomData); + assertEquals(pathType.name(), remoteCustomData.get(RemoteStoreEnums.PathType.NAME)); + if (Objects.nonNull(pathHashAlgorithm)) { + assertEquals(pathHashAlgorithm.name(), remoteCustomData.get(RemoteStoreEnums.PathHashAlgorithm.NAME)); + } + } + + public void testRestoreInSameRemoteStoreEnabledIndex() throws IOException { + String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(); + String primary = internalCluster().startDataOnlyNode(); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String snapshotName1 = "test-restore-snapshot1"; + String snapshotName2 = "test-restore-snapshot2"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + String restoredIndexName2 = indexName2 + "-restored"; + + boolean enableShallowCopy = randomBoolean(); + createRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, enableShallowCopy)); + + Client client = client(); + Settings indexSettings = getIndexSettings(1, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(1, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 5; + final int numDocsInIndex2 = 6; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + internalCluster().startDataOnlyNode(); + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo1 = createSnapshot( + snapshotRepoName, + snapshotName1, + new ArrayList<>(Arrays.asList(indexName1, indexName2)) + ); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); + + updateRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, false)); + SnapshotInfo snapshotInfo2 = createSnapshot( + snapshotRepoName, + snapshotName2, + new ArrayList<>(Arrays.asList(indexName1, indexName2)) + ); + assertThat(snapshotInfo2.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo2.totalShards())); + assertThat(snapshotInfo2.state(), equalTo(SnapshotState.SUCCESS)); + + DeleteResponse deleteResponse = client().prepareDelete(indexName1, "0").execute().actionGet(); + assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); + indexDocuments(client, indexName1, numDocsInIndex1, numDocsInIndex1 + randomIntBetween(2, 5)); + ensureGreen(indexName1); + + assertAcked(client().admin().indices().prepareClose(indexName1)); + + RestoreSnapshotResponse restoreSnapshotResponse1 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(false) + .setIndices(indexName1) + .get(); + RestoreSnapshotResponse restoreSnapshotResponse2 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName2) + .setWaitForCompletion(false) + .setIndices(indexName2) + .setRenamePattern(indexName2) + .setRenameReplacement(restoredIndexName2) + .get(); + assertEquals(restoreSnapshotResponse1.status(), RestStatus.ACCEPTED); + assertEquals(restoreSnapshotResponse2.status(), RestStatus.ACCEPTED); + ensureGreen(indexName1, restoredIndexName2); + + assertRemoteSegmentsAndTranslogUploaded(restoredIndexName2); + assertDocsPresentInIndex(client, indexName1, numDocsInIndex1); + assertDocsPresentInIndex(client, restoredIndexName2, numDocsInIndex2); + // indexing some new docs and validating + indexDocuments(client, indexName1, numDocsInIndex1, numDocsInIndex1 + 2); + ensureGreen(indexName1); + assertDocsPresentInIndex(client, indexName1, numDocsInIndex1 + 2); + + // deleting data for restoredIndexName1 and restoring from remote store. + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primary)); + ensureRed(indexName1); + // Re-initialize client to make sure we are not using client from stopped node. + client = client(clusterManagerNode); + assertAcked(client.admin().indices().prepareClose(indexName1)); + client.admin() + .cluster() + .restoreRemoteStore(new RestoreRemoteStoreRequest().indices(indexName1).restoreAllShards(true), PlainActionFuture.newFuture()); + ensureYellowAndNoInitializingShards(indexName1); + ensureGreen(indexName1); + assertDocsPresentInIndex(client(), indexName1, numDocsInIndex1); + // indexing some new docs and validating + indexDocuments(client, indexName1, numDocsInIndex1 + 2, numDocsInIndex1 + 4); + ensureGreen(indexName1); + assertDocsPresentInIndex(client, indexName1, numDocsInIndex1 + 4); + } + + void assertRemoteSegmentsAndTranslogUploaded(String idx) throws IOException { + Client client = client(); + String translogPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX.get(getNodeSettings()); + String segmentsPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(getNodeSettings()); + String path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", TRANSLOG, METADATA, translogPathFixedPrefix).buildAsString(); + Path remoteTranslogMetadataPath = Path.of(remoteRepoPath + "/" + path); + path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", TRANSLOG, DATA, translogPathFixedPrefix).buildAsString(); + Path remoteTranslogDataPath = Path.of(remoteRepoPath + "/" + path); + path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", SEGMENTS, METADATA, segmentsPathFixedPrefix).buildAsString(); + Path segmentMetadataPath = Path.of(remoteRepoPath + "/" + path); + path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", SEGMENTS, DATA, segmentsPathFixedPrefix).buildAsString(); + Path segmentDataPath = Path.of(remoteRepoPath + "/" + path); + + try ( + Stream translogMetadata = Files.list(remoteTranslogMetadataPath); + Stream translogData = Files.list(remoteTranslogDataPath); + Stream segmentMetadata = Files.list(segmentMetadataPath); + Stream segmentData = Files.list(segmentDataPath); + + ) { + assertTrue(translogData.count() > 0); + assertTrue(translogMetadata.count() > 0); + assertTrue(segmentMetadata.count() > 0); + assertTrue(segmentData.count() > 0); + } + + } + + public void testRemoteRestoreIndexRestoredFromSnapshot() throws IOException, ExecutionException, InterruptedException { + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNodes(2); + + String indexName1 = "testindex1"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String snapshotName1 = "test-restore-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + createRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, true)); + + Settings indexSettings = getIndexSettings(1, 0).build(); + createIndex(indexName1, indexSettings); + + final int numDocsInIndex1 = randomIntBetween(20, 30); + indexDocuments(client(), indexName1, numDocsInIndex1); + flushAndRefresh(indexName1); + ensureGreen(indexName1); + + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo1 = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(Arrays.asList(indexName1))); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); + + assertAcked(client().admin().indices().delete(new DeleteIndexRequest(indexName1)).get()); + assertFalse(indexExists(indexName1)); + + RestoreSnapshotResponse restoreSnapshotResponse1 = client().admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(false) + .setIndices(indexName1) + .get(); + + assertEquals(restoreSnapshotResponse1.status(), RestStatus.ACCEPTED); + ensureGreen(indexName1); + assertDocsPresentInIndex(client(), indexName1, numDocsInIndex1); + + assertRemoteSegmentsAndTranslogUploaded(indexName1); + + // Clear the local data before stopping the node. This will make sure that remote translog is empty. + IndexShard indexShard = getIndexShard(primaryNodeName(indexName1), indexName1); + try (Stream files = Files.list(indexShard.shardPath().resolveTranslog())) { + IOUtils.deleteFilesIgnoringExceptions(files.collect(Collectors.toList())); + } + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNodeName(indexName1))); + + ensureRed(indexName1); + + client().admin() + .cluster() + .restoreRemoteStore(new RestoreRemoteStoreRequest().indices(indexName1).restoreAllShards(false), PlainActionFuture.newFuture()); + + ensureGreen(indexName1); + assertDocsPresentInIndex(client(), indexName1, numDocsInIndex1); + } + + private IndexShard getIndexShard(String node, String indexName) { + final Index index = resolveIndex(indexName); + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, node); + IndexService indexService = indicesService.indexService(index); + assertNotNull(indexService); + final Optional shardId = indexService.shardIds().stream().findFirst(); + return shardId.map(indexService::getShard).orElse(null); + } + + public void testRestoreShallowSnapshotRepository() throws ExecutionException, InterruptedException { + String indexName1 = "testindex1"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String remoteStoreRepoNameUpdated = "test-rs-repo-updated" + TEST_REMOTE_STORE_REPO_SUFFIX; + String snapshotName1 = "test-restore-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + Path absolutePath2 = randomRepoPath().toAbsolutePath(); + String[] pathTokens = absolutePath1.toString().split("/"); + String basePath = pathTokens[pathTokens.length - 1]; + Arrays.copyOf(pathTokens, pathTokens.length - 1); + Path location = PathUtils.get(String.join("/", pathTokens)); + pathTokens = absolutePath2.toString().split("/"); + String basePath2 = pathTokens[pathTokens.length - 1]; + Arrays.copyOf(pathTokens, pathTokens.length - 1); + Path location2 = PathUtils.get(String.join("/", pathTokens)); + logger.info("Path 1 [{}]", absolutePath1); + logger.info("Path 2 [{}]", absolutePath2); + String restoredIndexName1 = indexName1 + "-restored"; + + createRepository(snapshotRepoName, "fs", getRepositorySettings(location, basePath, true)); + + Client client = client(); + Settings indexSettings = Settings.builder() + .put(super.indexSettings()) + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "300s") + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .build(); + createIndex(indexName1, indexSettings); + + int numDocsInIndex1 = randomIntBetween(2, 5); + indexDocuments(client, indexName1, numDocsInIndex1); + + ensureGreen(indexName1); + + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo1 = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(List.of(indexName1))); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); + + client().admin().indices().close(Requests.closeIndexRequest(indexName1)).get(); + createRepository(remoteStoreRepoNameUpdated, "fs", remoteRepoPath); + RestoreSnapshotResponse restoreSnapshotResponse2 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) + .setIndices(indexName1) + .setRenamePattern(indexName1) + .setRenameReplacement(restoredIndexName1) + .setSourceRemoteStoreRepository(remoteStoreRepoNameUpdated) + .get(); + + assertTrue(restoreSnapshotResponse2.getRestoreInfo().failedShards() == 0); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1); + + // ensure recovery details are non-zero + RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries(restoredIndexName1).execute().actionGet(); + assertEquals(1, recoveryResponse.getTotalShards()); + assertEquals(1, recoveryResponse.getSuccessfulShards()); + assertEquals(0, recoveryResponse.getFailedShards()); + assertEquals(1, recoveryResponse.shardRecoveryStates().size()); + assertTrue(recoveryResponse.shardRecoveryStates().containsKey(restoredIndexName1)); + assertEquals(1, recoveryResponse.shardRecoveryStates().get(restoredIndexName1).size()); + + RecoveryState recoveryState = recoveryResponse.shardRecoveryStates().get(restoredIndexName1).get(0); + assertEquals(RecoveryState.Stage.DONE, recoveryState.getStage()); + assertEquals(0, recoveryState.getShardId().getId()); + assertTrue(recoveryState.getPrimary()); + assertEquals(RecoverySource.Type.SNAPSHOT, recoveryState.getRecoverySource().getType()); + assertThat(recoveryState.getIndex().time(), greaterThanOrEqualTo(0L)); + + // ensure populated file details + assertTrue(recoveryState.getIndex().totalFileCount() > 0); + assertTrue(recoveryState.getIndex().totalRecoverFiles() > 0); + assertTrue(recoveryState.getIndex().recoveredFileCount() > 0); + assertThat(recoveryState.getIndex().recoveredFilesPercent(), greaterThanOrEqualTo(0.0f)); + assertThat(recoveryState.getIndex().recoveredFilesPercent(), lessThanOrEqualTo(100.0f)); + assertFalse(recoveryState.getIndex().fileDetails().isEmpty()); + + // ensure populated bytes details + assertTrue(recoveryState.getIndex().recoveredBytes() > 0L); + assertTrue(recoveryState.getIndex().totalBytes() > 0L); + assertTrue(recoveryState.getIndex().totalRecoverBytes() > 0L); + assertThat(recoveryState.getIndex().recoveredBytesPercent(), greaterThanOrEqualTo(0.0f)); + assertThat(recoveryState.getIndex().recoveredBytesPercent(), lessThanOrEqualTo(100.0f)); + + // indexing some new docs and validating + indexDocuments(client, restoredIndexName1, numDocsInIndex1, numDocsInIndex1 + 2); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1 + 2); + } + + public void testRestoreShallowSnapshotIndexAfterSnapshot() throws ExecutionException, InterruptedException { + String indexName1 = "testindex1"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String remoteStoreRepoNameUpdated = "test-rs-repo-updated" + TEST_REMOTE_STORE_REPO_SUFFIX; + String snapshotName1 = "test-restore-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + Path absolutePath2 = randomRepoPath().toAbsolutePath(); + String[] pathTokens = absolutePath1.toString().split("/"); + String basePath = pathTokens[pathTokens.length - 1]; + Arrays.copyOf(pathTokens, pathTokens.length - 1); + Path location = PathUtils.get(String.join("/", pathTokens)); + pathTokens = absolutePath2.toString().split("/"); + String basePath2 = pathTokens[pathTokens.length - 1]; + Arrays.copyOf(pathTokens, pathTokens.length - 1); + Path location2 = PathUtils.get(String.join("/", pathTokens)); + logger.info("Path 1 [{}]", absolutePath1); + logger.info("Path 2 [{}]", absolutePath2); + String restoredIndexName1 = indexName1 + "-restored"; + + createRepository(snapshotRepoName, "fs", getRepositorySettings(location, basePath, true)); + + Client client = client(); + Settings indexSettings = Settings.builder() + .put(super.indexSettings()) + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .build(); + createIndex(indexName1, indexSettings); + + int numDocsInIndex1 = randomIntBetween(2, 5); + indexDocuments(client, indexName1, numDocsInIndex1); + + ensureGreen(indexName1); + + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo1 = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(List.of(indexName1))); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); + + int extraNumDocsInIndex1 = randomIntBetween(20, 50); + indexDocuments(client, indexName1, extraNumDocsInIndex1); + refresh(indexName1); + + client().admin().indices().close(Requests.closeIndexRequest(indexName1)).get(); + createRepository(remoteStoreRepoNameUpdated, "fs", remoteRepoPath); + RestoreSnapshotResponse restoreSnapshotResponse2 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) + .setIndices(indexName1) + .setRenamePattern(indexName1) + .setRenameReplacement(restoredIndexName1) + .setSourceRemoteStoreRepository(remoteStoreRepoNameUpdated) + .get(); + + assertTrue(restoreSnapshotResponse2.getRestoreInfo().failedShards() == 0); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1); + + // indexing some new docs and validating + indexDocuments(client, restoredIndexName1, numDocsInIndex1, numDocsInIndex1 + 2); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1 + 2); + } + + public void testInvalidRestoreRequestScenarios() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNode(); + String index = "test-index"; + String snapshotRepo = "test-restore-snapshot-repo"; + String newRemoteStoreRepo = "test-new-rs-repo"; + String snapshotName1 = "test-restore-snapshot1"; + String snapshotName2 = "test-restore-snapshot2"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + String restoredIndex = index + "-restored"; + + createRepository(snapshotRepo, "fs", getRepositorySettings(absolutePath1, true)); + + Client client = client(); + Settings indexSettings = getIndexSettings(1, 0).build(); + createIndex(index, indexSettings); + + final int numDocsInIndex = 5; + indexDocuments(client, index, numDocsInIndex); + ensureGreen(index); + + internalCluster().startDataOnlyNode(); + logger.info("--> snapshot"); + + SnapshotInfo snapshotInfo = createSnapshot(snapshotRepo, snapshotName1, new ArrayList<>(List.of(index))); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + + updateRepository(snapshotRepo, "fs", getRepositorySettings(absolutePath1, false)); + SnapshotInfo snapshotInfo2 = createSnapshot(snapshotRepo, snapshotName2, new ArrayList<>(List.of(index))); + assertThat(snapshotInfo2.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo2.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo2.totalShards())); + + DeleteResponse deleteResponse = client().prepareDelete(index, "0").execute().actionGet(); + assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); + indexDocuments(client, index, numDocsInIndex, numDocsInIndex + randomIntBetween(2, 5)); + ensureGreen(index); + + // try index restore with remote store disabled + SnapshotRestoreException exception = expectThrows( + SnapshotRestoreException.class, + () -> client().admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepo, snapshotName1) + .setWaitForCompletion(false) + .setIgnoreIndexSettings(SETTING_REMOTE_STORE_ENABLED) + .setIndices(index) + .setRenamePattern(index) + .setRenameReplacement(restoredIndex) + .get() + ); + assertTrue(exception.getMessage().contains("cannot remove setting [index.remote_store.enabled] on restore")); + + // try index restore with remote store repository modified + Settings remoteStoreIndexSettings = Settings.builder() + .put(IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY, newRemoteStoreRepo) + .build(); + + exception = expectThrows( + SnapshotRestoreException.class, + () -> client().admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepo, snapshotName1) + .setWaitForCompletion(false) + .setIndexSettings(remoteStoreIndexSettings) + .setIndices(index) + .setRenamePattern(index) + .setRenameReplacement(restoredIndex) + .get() + ); + assertTrue(exception.getMessage().contains("cannot modify setting [index.remote_store.segment.repository]" + " on restore")); + + // try index restore with remote store repository and translog store repository disabled + exception = expectThrows( + SnapshotRestoreException.class, + () -> client().admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepo, snapshotName1) + .setWaitForCompletion(false) + .setIgnoreIndexSettings( + IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY, + IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY + ) + .setIndices(index) + .setRenamePattern(index) + .setRenameReplacement(restoredIndex) + .get() + ); + assertTrue(exception.getMessage().contains("cannot remove setting [index.remote_store.segment.repository]" + " on restore")); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotITV2.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotV2IT.java similarity index 96% rename from server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotITV2.java rename to server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotV2IT.java index 02b6ea47172c7..44d5c0a28cd9a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotITV2.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotV2IT.java @@ -32,7 +32,7 @@ import static org.hamcrest.Matchers.lessThan; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) -public class DeleteSnapshotITV2 extends AbstractSnapshotIntegTestCase { +public class DeleteSnapshotV2IT extends AbstractSnapshotIntegTestCase { private static final String REMOTE_REPO_NAME = "remote-store-repo-name"; @@ -276,9 +276,11 @@ public void testRemoteStoreCleanupForDeletedIndexForSnapshotV2() throws Exceptio Path indexPath = Path.of(String.valueOf(remoteStoreRepoPath), indexUUID); Path shardPath = Path.of(String.valueOf(indexPath), "0"); Path segmentsPath = Path.of(String.valueOf(shardPath), "segments"); + Path translogPath = Path.of(String.valueOf(shardPath), "translog"); // Get total segments remote store directory file count for deleted index and shard 0 int segmentFilesCountBeforeDeletingSnapshot1 = RemoteStoreBaseIntegTestCase.getFileCount(segmentsPath); + int translogFilesCountBeforeDeletingSnapshot1 = RemoteStoreBaseIntegTestCase.getFileCount(translogPath); RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); @@ -312,6 +314,13 @@ public void testRemoteStoreCleanupForDeletedIndexForSnapshotV2() throws Exceptio assertThat(RemoteStoreBaseIntegTestCase.getFileCount(segmentsPath), lessThan(segmentFilesCountAfterDeletingSnapshot1)); } catch (Exception e) {} }, 60, TimeUnit.SECONDS); + + assertBusy(() -> { + try { + assertThat(RemoteStoreBaseIntegTestCase.getFileCount(translogPath), lessThan(translogFilesCountBeforeDeletingSnapshot1)); + } catch (Exception e) {} + }, 60, TimeUnit.SECONDS); + } private Settings snapshotV2Settings(Path remoteStoreRepoPath) { diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java index 660d59f0438af..cd0db61efa56e 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java @@ -122,6 +122,8 @@ private static StorageType fromString(String string) { private StorageType storageType = StorageType.LOCAL; @Nullable private String sourceRemoteStoreRepository = null; + @Nullable + private String sourceRemoteTranslogRepository = null; @Nullable // if any snapshot UUID will do private String snapshotUuid; @@ -165,6 +167,9 @@ public RestoreSnapshotRequest(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_10_0)) { sourceRemoteStoreRepository = in.readOptionalString(); } + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + sourceRemoteTranslogRepository = in.readOptionalString(); + } } @Override @@ -198,6 +203,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_10_0)) { out.writeOptionalString(sourceRemoteStoreRepository); } + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeOptionalString(sourceRemoteTranslogRepository); + } } @Override @@ -560,6 +568,16 @@ public RestoreSnapshotRequest setSourceRemoteStoreRepository(String sourceRemote return this; } + /** + * Sets Source Remote Translog Repository for all the restored indices + * + * @param sourceRemoteTranslogRepository name of the remote translog repository that should be used for all restored indices. + */ + public RestoreSnapshotRequest setSourceRemoteTranslogRepository(String sourceRemoteTranslogRepository) { + this.sourceRemoteTranslogRepository = sourceRemoteTranslogRepository; + return this; + } + /** * Returns Source Remote Store Repository for all the restored indices * @@ -569,6 +587,15 @@ public String getSourceRemoteStoreRepository() { return sourceRemoteStoreRepository; } + /** + * Returns Source Remote Translog Repository for all the restored indices + * + * @return source Remote Translog Repository + */ + public String getSourceRemoteTranslogRepository() { + return sourceRemoteTranslogRepository; + } + /** * Parses restore definition * @@ -688,6 +715,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (sourceRemoteStoreRepository != null) { builder.field("source_remote_store_repository", sourceRemoteStoreRepository); } + if (sourceRemoteTranslogRepository != null) { + builder.field("source_remote_translog_repository", sourceRemoteTranslogRepository); + } builder.endObject(); return builder; } @@ -716,7 +746,8 @@ public boolean equals(Object o) { && Arrays.equals(ignoreIndexSettings, that.ignoreIndexSettings) && Objects.equals(snapshotUuid, that.snapshotUuid) && Objects.equals(storageType, that.storageType) - && Objects.equals(sourceRemoteStoreRepository, that.sourceRemoteStoreRepository); + && Objects.equals(sourceRemoteStoreRepository, that.sourceRemoteStoreRepository) + && Objects.equals(sourceRemoteTranslogRepository, that.sourceRemoteTranslogRepository); return equals; } @@ -736,7 +767,8 @@ public int hashCode() { indexSettings, snapshotUuid, storageType, - sourceRemoteStoreRepository + sourceRemoteStoreRepository, + sourceRemoteTranslogRepository ); result = 31 * result + Arrays.hashCode(indices); result = 31 * result + Arrays.hashCode(ignoreIndexSettings); diff --git a/server/src/main/java/org/opensearch/cluster/routing/RecoverySource.java b/server/src/main/java/org/opensearch/cluster/routing/RecoverySource.java index 75bd0c2d01b13..181cb063f5544 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RecoverySource.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RecoverySource.java @@ -265,6 +265,9 @@ public static class SnapshotRecoverySource extends RecoverySource { private final boolean isSearchableSnapshot; private final boolean remoteStoreIndexShallowCopy; private final String sourceRemoteStoreRepository; + private final String sourceRemoteTranslogRepository; + + private final long pinnedTimestamp; public SnapshotRecoverySource(String restoreUUID, Snapshot snapshot, Version version, IndexId indexId) { this(restoreUUID, snapshot, version, indexId, false, false, null); @@ -278,6 +281,30 @@ public SnapshotRecoverySource( boolean isSearchableSnapshot, boolean remoteStoreIndexShallowCopy, @Nullable String sourceRemoteStoreRepository + ) { + this( + restoreUUID, + snapshot, + version, + indexId, + isSearchableSnapshot, + remoteStoreIndexShallowCopy, + sourceRemoteStoreRepository, + null, + 0L + ); + } + + public SnapshotRecoverySource( + String restoreUUID, + Snapshot snapshot, + Version version, + IndexId indexId, + boolean isSearchableSnapshot, + boolean remoteStoreIndexShallowCopy, + @Nullable String sourceRemoteStoreRepository, + @Nullable String sourceRemoteTranslogRepository, + long pinnedTimestamp ) { this.restoreUUID = restoreUUID; this.snapshot = Objects.requireNonNull(snapshot); @@ -286,6 +313,8 @@ public SnapshotRecoverySource( this.isSearchableSnapshot = isSearchableSnapshot; this.remoteStoreIndexShallowCopy = remoteStoreIndexShallowCopy; this.sourceRemoteStoreRepository = sourceRemoteStoreRepository; + this.sourceRemoteTranslogRepository = sourceRemoteTranslogRepository; + this.pinnedTimestamp = pinnedTimestamp; } SnapshotRecoverySource(StreamInput in) throws IOException { @@ -309,6 +338,13 @@ public SnapshotRecoverySource( remoteStoreIndexShallowCopy = false; sourceRemoteStoreRepository = null; } + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + sourceRemoteTranslogRepository = in.readOptionalString(); + pinnedTimestamp = in.readLong(); + } else { + sourceRemoteTranslogRepository = null; + pinnedTimestamp = 0L; + } } public String restoreUUID() { @@ -341,10 +377,18 @@ public String sourceRemoteStoreRepository() { return sourceRemoteStoreRepository; } + public String sourceRemoteTranslogRepository() { + return sourceRemoteTranslogRepository; + } + public boolean remoteStoreIndexShallowCopy() { return remoteStoreIndexShallowCopy; } + public long pinnedTimestamp() { + return pinnedTimestamp; + } + @Override protected void writeAdditionalFields(StreamOutput out) throws IOException { out.writeString(restoreUUID); @@ -362,6 +406,10 @@ protected void writeAdditionalFields(StreamOutput out) throws IOException { out.writeBoolean(remoteStoreIndexShallowCopy); out.writeOptionalString(sourceRemoteStoreRepository); } + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeOptionalString(sourceRemoteTranslogRepository); + out.writeLong(pinnedTimestamp); + } } @Override @@ -378,7 +426,8 @@ public void addAdditionalFields(XContentBuilder builder, ToXContent.Params param .field("restoreUUID", restoreUUID) .field("isSearchableSnapshot", isSearchableSnapshot) .field("remoteStoreIndexShallowCopy", remoteStoreIndexShallowCopy) - .field("sourceRemoteStoreRepository", sourceRemoteStoreRepository); + .field("sourceRemoteStoreRepository", sourceRemoteStoreRepository) + .field("sourceRemoteTranslogRepository", sourceRemoteTranslogRepository); } @Override @@ -403,8 +452,11 @@ public boolean equals(Object o) { && isSearchableSnapshot == that.isSearchableSnapshot && remoteStoreIndexShallowCopy == that.remoteStoreIndexShallowCopy && sourceRemoteStoreRepository != null - ? sourceRemoteStoreRepository.equals(that.sourceRemoteStoreRepository) - : that.sourceRemoteStoreRepository == null; + ? sourceRemoteStoreRepository.equals(that.sourceRemoteStoreRepository) + : that.sourceRemoteStoreRepository == null && sourceRemoteTranslogRepository != null + ? sourceRemoteTranslogRepository.equals(that.sourceRemoteTranslogRepository) + : that.sourceRemoteTranslogRepository == null && pinnedTimestamp == that.pinnedTimestamp; + } @Override @@ -416,10 +468,11 @@ public int hashCode() { version, isSearchableSnapshot, remoteStoreIndexShallowCopy, - sourceRemoteStoreRepository + sourceRemoteStoreRepository, + sourceRemoteTranslogRepository, + pinnedTimestamp ); } - } /** diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 0ea738a3b9900..2b14367f3ddc7 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -152,6 +152,7 @@ import org.opensearch.index.recovery.RecoveryStats; import org.opensearch.index.refresh.RefreshStats; import org.opensearch.index.remote.RemoteSegmentStats; +import org.opensearch.index.remote.RemoteStorePathStrategy; import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory; import org.opensearch.index.search.stats.SearchStats; import org.opensearch.index.search.stats.ShardSearchStats; @@ -2487,6 +2488,10 @@ private void loadGlobalCheckpointToReplicationTracker() throws IOException { * Operations from the translog will be replayed to bring lucene up to date. **/ public void openEngineAndRecoverFromTranslog() throws IOException { + openEngineAndRecoverFromTranslog(true); + } + + public void openEngineAndRecoverFromTranslog(boolean syncFromRemote) throws IOException { recoveryState.validateCurrentStage(RecoveryState.Stage.INDEX); maybeCheckIndex(); recoveryState.setStage(RecoveryState.Stage.TRANSLOG); @@ -2507,7 +2512,16 @@ public void openEngineAndRecoverFromTranslog() throws IOException { loadGlobalCheckpointToReplicationTracker(); } - innerOpenEngineAndTranslog(replicationTracker); + if (isSnapshotV2Restore()) { + translogConfig.setDownloadRemoteTranslogOnInit(false); + } + + innerOpenEngineAndTranslog(replicationTracker, syncFromRemote); + + if (isSnapshotV2Restore()) { + translogConfig.setDownloadRemoteTranslogOnInit(true); + } + getEngine().recoverFromTranslog(translogRecoveryRunner, Long.MAX_VALUE); } @@ -2568,7 +2582,7 @@ private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier, b if (shardRouting.primary()) { if (syncFromRemote) { syncRemoteTranslogAndUpdateGlobalCheckpoint(); - } else { + } else if (isSnapshotV2Restore() == false) { // we will enter this block when we do not want to recover from remote translog. // currently only during snapshot restore, we are coming into this block. // here, as while initiliazing remote translog we cannot skip downloading translog files, @@ -2614,6 +2628,11 @@ private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier, b recoveryState.validateCurrentStage(RecoveryState.Stage.TRANSLOG); } + private boolean isSnapshotV2Restore() { + return routingEntry().recoverySource().getType() == RecoverySource.Type.SNAPSHOT + && ((SnapshotRecoverySource) routingEntry().recoverySource()).pinnedTimestamp() > 0; + } + private boolean assertSequenceNumbersInCommit() throws IOException { final Map userData = fetchUserData(); assert userData.containsKey(SequenceNumbers.LOCAL_CHECKPOINT_KEY) : "commit point doesn't contains a local checkpoint"; @@ -2899,14 +2918,26 @@ public void restoreFromSnapshotAndRemoteStore( assert recoveryState.getRecoverySource().getType() == RecoverySource.Type.SNAPSHOT : "invalid recovery type: " + recoveryState.getRecoverySource(); StoreRecovery storeRecovery = new StoreRecovery(shardId, logger); - storeRecovery.recoverFromSnapshotAndRemoteStore( - this, - repository, - repositoriesService, - listener, - remoteStoreSettings.getSegmentsPathFixedPrefix(), - threadPool - ); + SnapshotRecoverySource recoverySource = (SnapshotRecoverySource) recoveryState().getRecoverySource(); + if (recoverySource.pinnedTimestamp() != 0) { + storeRecovery.recoverShallowSnapshotV2( + this, + repository, + repositoriesService, + listener, + remoteStoreSettings.getSegmentsPathFixedPrefix(), + threadPool + ); + } else { + storeRecovery.recoverFromSnapshotAndRemoteStore( + this, + repository, + repositoriesService, + listener, + remoteStoreSettings.getSegmentsPathFixedPrefix(), + threadPool + ); + } } catch (Exception e) { listener.onFailure(e); } @@ -5020,16 +5051,33 @@ public void syncTranslogFilesFromRemoteTranslog() throws IOException { TranslogFactory translogFactory = translogFactorySupplier.apply(indexSettings, shardRouting); assert translogFactory instanceof RemoteBlobStoreInternalTranslogFactory; Repository repository = ((RemoteBlobStoreInternalTranslogFactory) translogFactory).getRepository(); + syncTranslogFilesFromGivenRemoteTranslog( + repository, + shardId, + indexSettings.getRemoteStorePathStrategy(), + indexSettings().isTranslogMetadataEnabled(), + 0 + ); + } + + public void syncTranslogFilesFromGivenRemoteTranslog( + Repository repository, + ShardId shardId, + RemoteStorePathStrategy remoteStorePathStrategy, + boolean isTranslogMetadataEnabled, + long timestamp + ) throws IOException { RemoteFsTranslog.download( repository, shardId, getThreadPool(), shardPath().resolveTranslog(), - indexSettings.getRemoteStorePathStrategy(), + remoteStorePathStrategy, remoteStoreSettings, logger, shouldSeedRemoteStore(), - indexSettings().isTranslogMetadataEnabled() + isTranslogMetadataEnabled, + timestamp ); } @@ -5114,19 +5162,26 @@ public void syncSegmentsFromRemoteSegmentStore(boolean overrideLocal, final Runn } } + public void syncSegmentsFromGivenRemoteSegmentStore( + boolean overrideLocal, + RemoteSegmentStoreDirectory sourceRemoteDirectory, + long primaryTerm, + long commitGeneration + ) throws IOException { + // Keeping this method to avoid breaking change detection + } + /** * Downloads segments from given remote segment store for a specific commit. * @param overrideLocal flag to override local segment files with those in remote store * @param sourceRemoteDirectory RemoteSegmentDirectory Instance from which we need to sync segments - * @param primaryTerm Primary Term for shard at the time of commit operation for which we are syncing segments - * @param commitGeneration commit generation at the time of commit operation for which we are syncing segments * @throws IOException if exception occurs while reading segments from remote store */ public void syncSegmentsFromGivenRemoteSegmentStore( boolean overrideLocal, RemoteSegmentStoreDirectory sourceRemoteDirectory, - long primaryTerm, - long commitGeneration + RemoteSegmentMetadata remoteSegmentMetadata, + boolean pinnedTimestamp ) throws IOException { logger.trace("Downloading segments from given remote segment store"); RemoteSegmentStoreDirectory remoteDirectory = null; @@ -5162,12 +5217,29 @@ public void syncSegmentsFromGivenRemoteSegmentStore( overrideLocal, () -> {} ); - if (segmentsNFile != null) { + if (pinnedTimestamp) { + final SegmentInfos infosSnapshot = store.buildSegmentInfos( + remoteSegmentMetadata.getSegmentInfosBytes(), + remoteSegmentMetadata.getGeneration() + ); + long processedLocalCheckpoint = Long.parseLong(infosSnapshot.getUserData().get(LOCAL_CHECKPOINT_KEY)); + // delete any other commits, we want to start the engine only from a new commit made with the downloaded infos bytes. + // Extra segments will be wiped on engine open. + for (String file : List.of(store.directory().listAll())) { + if (file.startsWith(IndexFileNames.SEGMENTS)) { + store.deleteQuiet(file); + } + } + assert Arrays.stream(store.directory().listAll()).filter(f -> f.startsWith(IndexFileNames.SEGMENTS)).findAny().isEmpty() + : "There should not be any segments file in the dir"; + store.commitSegmentInfos(infosSnapshot, processedLocalCheckpoint, processedLocalCheckpoint); + } else if (segmentsNFile != null) { try ( ChecksumIndexInput indexInput = new BufferedChecksumIndexInput( storeDirectory.openInput(segmentsNFile, IOContext.DEFAULT) ) ) { + long commitGeneration = SegmentInfos.generationFromSegmentsFileName(segmentsNFile); SegmentInfos infosSnapshot = SegmentInfos.readCommit(store.directory(), indexInput, commitGeneration); long processedLocalCheckpoint = Long.parseLong(infosSnapshot.getUserData().get(LOCAL_CHECKPOINT_KEY)); if (remoteStore != null) { diff --git a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java index 8649da936d81c..494fe0dbef803 100644 --- a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java +++ b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java @@ -57,13 +57,17 @@ import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.EngineException; +import org.opensearch.index.engine.InternalEngine; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.remote.RemoteStorePathStrategy; +import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.snapshots.IndexShardRestoreFailedException; import org.opensearch.index.snapshots.blobstore.RemoteStoreShardShallowCopySnapshot; import org.opensearch.index.store.RemoteSegmentStoreDirectory; import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.Store; +import org.opensearch.index.store.remote.metadata.RemoteSegmentMetadata; import org.opensearch.index.translog.Checkpoint; import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.TranslogHeader; @@ -72,6 +76,7 @@ import org.opensearch.repositories.IndexId; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; +import org.opensearch.repositories.RepositoryData; import org.opensearch.threadpool.ThreadPool; import java.io.IOException; @@ -407,14 +412,14 @@ void recoverFromSnapshotAndRemoteStore( shardId, shallowCopyShardMetadata.getRemoteStorePathStrategy() ); - sourceRemoteDirectory.initializeToSpecificCommit( + RemoteSegmentMetadata remoteSegmentMetadata = sourceRemoteDirectory.initializeToSpecificCommit( primaryTerm, commitGeneration, recoverySource.snapshot().getSnapshotId().getUUID() ); - indexShard.syncSegmentsFromGivenRemoteSegmentStore(true, sourceRemoteDirectory, primaryTerm, commitGeneration); + indexShard.syncSegmentsFromGivenRemoteSegmentStore(true, sourceRemoteDirectory, remoteSegmentMetadata, false); final Store store = indexShard.store(); - if (indexShard.indexSettings.isRemoteTranslogStoreEnabled() == false) { + if (indexShard.indexSettings.isRemoteStoreEnabled() == false) { bootstrap(indexShard, store); } else { bootstrapForSnapshot(indexShard, store); @@ -443,6 +448,98 @@ void recoverFromSnapshotAndRemoteStore( } } + void recoverShallowSnapshotV2( + final IndexShard indexShard, + Repository repository, + RepositoriesService repositoriesService, + ActionListener listener, + String segmentsPathFixedPrefix, + ThreadPool threadPool + ) { + try { + if (canRecover(indexShard)) { + indexShard.preRecovery(); + RecoverySource.Type recoveryType = indexShard.recoveryState().getRecoverySource().getType(); + assert recoveryType == RecoverySource.Type.SNAPSHOT : "expected snapshot recovery type: " + recoveryType; + SnapshotRecoverySource recoverySource = (SnapshotRecoverySource) indexShard.recoveryState().getRecoverySource(); + indexShard.prepareForIndexRecovery(); + + assert recoverySource.pinnedTimestamp() != 0; + final StepListener repositoryDataListener = new StepListener<>(); + repository.getRepositoryData(repositoryDataListener); + repositoryDataListener.whenComplete(repositoryData -> { + IndexId indexId = repositoryData.resolveIndexId(recoverySource.index().getName()); + IndexMetadata prevIndexMetadata = repository.getSnapshotIndexMetaData( + repositoryData, + recoverySource.snapshot().getSnapshotId(), + indexId + ); + RemoteSegmentStoreDirectoryFactory directoryFactory = new RemoteSegmentStoreDirectoryFactory( + () -> repositoriesService, + threadPool, + segmentsPathFixedPrefix + ); + String remoteSegmentStoreRepository = ((SnapshotRecoverySource) indexShard.recoveryState().getRecoverySource()) + .sourceRemoteStoreRepository(); + if (remoteSegmentStoreRepository == null) { + remoteSegmentStoreRepository = IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.get( + prevIndexMetadata.getSettings() + ); + } + RemoteStorePathStrategy remoteStorePathStrategy = RemoteStoreUtils.determineRemoteStorePathStrategy(prevIndexMetadata); + RemoteSegmentStoreDirectory sourceRemoteDirectory = (RemoteSegmentStoreDirectory) directoryFactory.newDirectory( + remoteSegmentStoreRepository, + prevIndexMetadata.getIndexUUID(), + shardId, + remoteStorePathStrategy + ); + RemoteSegmentMetadata remoteSegmentMetadata = sourceRemoteDirectory.initializeToSpecificTimestamp( + recoverySource.pinnedTimestamp() + ); + + String remoteTranslogRepository = ((SnapshotRecoverySource) indexShard.recoveryState().getRecoverySource()) + .sourceRemoteTranslogRepository(); + if (remoteTranslogRepository == null) { + remoteTranslogRepository = IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.get( + prevIndexMetadata.getSettings() + ); + } + + indexShard.syncSegmentsFromGivenRemoteSegmentStore(true, sourceRemoteDirectory, remoteSegmentMetadata, true); + indexShard.syncTranslogFilesFromGivenRemoteTranslog( + repositoriesService.repository(remoteTranslogRepository), + new ShardId(prevIndexMetadata.getIndex(), shardId.id()), + remoteStorePathStrategy, + RemoteStoreUtils.determineTranslogMetadataEnabled(prevIndexMetadata), + recoverySource.pinnedTimestamp() + ); + + assert indexShard.shardRouting.primary() : "only primary shards can recover from store"; + writeEmptyRetentionLeasesFile(indexShard); + indexShard.recoveryState().getIndex().setFileDetailsComplete(); + indexShard.openEngineAndRecoverFromTranslog(false); + indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); + indexShard.finalizeRecovery(); + if (indexShard.isRemoteTranslogEnabled() && indexShard.shardRouting.primary()) { + indexShard.waitForRemoteStoreSync(); + } + indexShard.postRecovery("post recovery from remote_store"); + SegmentInfos committedSegmentInfos = indexShard.store().readLastCommittedSegmentsInfo(); + try { + assert indexShard.getEngine() instanceof InternalEngine; + ((InternalEngine) indexShard.getEngine()).translogManager() + .setMinSeqNoToKeep(Long.parseLong(committedSegmentInfos.getUserData().get(SequenceNumbers.MAX_SEQ_NO)) + 1); + } catch (IllegalArgumentException e) { + logger.warn("MinSeqNoToKeep is already past the maxSeqNo from commited segment infos"); + } + listener.onResponse(true); + }, listener::onFailure); + } + } catch (Exception e) { + listener.onFailure(e); + } + } + private boolean canRecover(IndexShard indexShard) { if (indexShard.state() == IndexShardState.CLOSED) { // got closed on us, just ignore this recovery diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index 5e5686ab3a419..576ac9a26acc7 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -412,7 +412,7 @@ static long getGeneration(String[] filenameTokens) { public static long getTimestamp(String filename) { String[] filenameTokens = filename.split(SEPARATOR); - return RemoteStoreUtils.invertLong(filenameTokens[6]); + return RemoteStoreUtils.invertLong(filenameTokens[filenameTokens.length - 2]); } public static Tuple getNodeIdByPrimaryTermAndGen(String filename) { diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java index 85b3f25d950ef..32d886562f22d 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java @@ -121,4 +121,8 @@ public Directory newDirectory(String repositoryName, String indexUUID, ShardId s } } + public Supplier getRepositoriesService() { + return this.repositoriesService; + } + } diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslog.java index 0b134b3bddbec..27d34ec0d05af 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslog.java @@ -8,6 +8,7 @@ package org.opensearch.index.translog; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.blobstore.BlobMetadata; @@ -33,6 +34,7 @@ import java.util.Optional; import java.util.Set; import java.util.TreeSet; +import java.util.concurrent.atomic.AtomicLong; import java.util.function.BooleanSupplier; import java.util.function.LongConsumer; import java.util.function.LongSupplier; @@ -52,10 +54,13 @@ */ public class RemoteFsTimestampAwareTranslog extends RemoteFsTranslog { + private static Logger staticLogger = LogManager.getLogger(RemoteFsTimestampAwareTranslog.class); private final Logger logger; private final Map metadataFilePinnedTimestampMap; // For metadata files, with no min generation in the name, we cache generation data to avoid multiple reads. private final Map> oldFormatMetadataFileGenerationMap; + private final Map> oldFormatMetadataFilePrimaryTermMap; + private final AtomicLong minPrimaryTermInRemote = new AtomicLong(Long.MAX_VALUE); public RemoteFsTimestampAwareTranslog( TranslogConfig config, @@ -86,6 +91,7 @@ public RemoteFsTimestampAwareTranslog( logger = Loggers.getLogger(getClass(), shardId); this.metadataFilePinnedTimestampMap = new HashMap<>(); this.oldFormatMetadataFileGenerationMap = new HashMap<>(); + this.oldFormatMetadataFilePrimaryTermMap = new HashMap<>(); } @Override @@ -165,7 +171,11 @@ public void onResponse(List blobMetadata) { return; } - List metadataFilesToBeDeleted = getMetadataFilesToBeDeleted(metadataFiles); + List metadataFilesToBeDeleted = getMetadataFilesToBeDeleted( + metadataFiles, + metadataFilePinnedTimestampMap, + logger + ); // If index is not deleted, make sure to keep latest metadata file if (indexDeleted == false) { @@ -209,7 +219,7 @@ public void onResponse(List blobMetadata) { oldFormatMetadataFileGenerationMap.keySet().retainAll(metadataFilesNotToBeDeleted); // Delete stale primary terms - deleteStaleRemotePrimaryTerms(metadataFiles); + deleteStaleRemotePrimaryTerms(metadataFilesNotToBeDeleted); } else { remoteGenerationDeletionPermits.release(REMOTE_DELETION_PERMITS); } @@ -259,8 +269,16 @@ protected Set getGenerationsToBeDeleted( return generationsToBeDeleted; } - // Visible for testing protected List getMetadataFilesToBeDeleted(List metadataFiles) { + return getMetadataFilesToBeDeleted(metadataFiles, metadataFilePinnedTimestampMap, logger); + } + + // Visible for testing + protected static List getMetadataFilesToBeDeleted( + List metadataFiles, + Map metadataFilePinnedTimestampMap, + Logger logger + ) { Tuple> pinnedTimestampsState = RemoteStorePinnedTimestampService.getPinnedTimestamps(); // Keep files since last successful run of scheduler @@ -351,27 +369,153 @@ protected Tuple getMinMaxTranslogGenerationFromMetadataFile( } } + private void deleteStaleRemotePrimaryTerms(List metadataFiles) { + deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + oldFormatMetadataFilePrimaryTermMap, + minPrimaryTermInRemote, + logger + ); + } + /** * This method must be called only after there are valid generations to delete in trimUnreferencedReaders as it ensures * implicitly that minimum primary term in latest translog metadata in remote store is the current primary term. *
* This will also delete all stale translog metadata files from remote except the latest basis the metadata file comparator. */ - private void deleteStaleRemotePrimaryTerms(List metadataFiles) { + protected static void deleteStaleRemotePrimaryTerms( + List metadataFiles, + TranslogTransferManager translogTransferManager, + Map> oldFormatMetadataFilePrimaryTermMap, + AtomicLong minPrimaryTermInRemoteAtomicLong, + Logger logger + ) { // The deletion of older translog files in remote store is on best-effort basis, there is a possibility that there // are older files that are no longer needed and should be cleaned up. In here, we delete all files that are part // of older primary term. - if (olderPrimaryCleaned.trySet(Boolean.TRUE)) { - if (metadataFiles.isEmpty()) { - logger.trace("No metadata is uploaded yet, returning from deleteStaleRemotePrimaryTerms"); - return; + if (metadataFiles.isEmpty()) { + logger.trace("No metadata is uploaded yet, returning from deleteStaleRemotePrimaryTerms"); + return; + } + Optional minPrimaryTermFromMetadataFiles = metadataFiles.stream().map(file -> { + try { + return getMinMaxPrimaryTermFromMetadataFile(file, translogTransferManager, oldFormatMetadataFilePrimaryTermMap).v1(); + } catch (IOException e) { + return Long.MAX_VALUE; + } + }).min(Long::compareTo); + // First we delete all stale primary terms folders from remote store + Long minPrimaryTermInRemote = getMinPrimaryTermInRemote(minPrimaryTermInRemoteAtomicLong, translogTransferManager, logger); + if (minPrimaryTermFromMetadataFiles.get() > minPrimaryTermInRemote) { + translogTransferManager.deletePrimaryTermsAsync(minPrimaryTermFromMetadataFiles.get()); + minPrimaryTermInRemoteAtomicLong.set(minPrimaryTermFromMetadataFiles.get()); + } else { + logger.debug( + "Skipping primary term cleanup. minimumReferencedPrimaryTerm = {}, minPrimaryTermInRemote = {}", + minPrimaryTermFromMetadataFiles.get(), + minPrimaryTermInRemote + ); + } + } + + private static Long getMinPrimaryTermInRemote( + AtomicLong minPrimaryTermInRemote, + TranslogTransferManager translogTransferManager, + Logger logger + ) { + if (minPrimaryTermInRemote.get() == Long.MAX_VALUE) { + try { + Set primaryTermsInRemote = translogTransferManager.listPrimaryTermsInRemote(); + if (primaryTermsInRemote.isEmpty() == false) { + Optional minPrimaryTerm = primaryTermsInRemote.stream().min(Long::compareTo); + minPrimaryTerm.ifPresent(minPrimaryTermInRemote::set); + } + } catch (IOException e) { + logger.error("Exception while listing primary terms in remote translog", e); + } + } + return minPrimaryTermInRemote.get(); + } + + protected static Tuple getMinMaxPrimaryTermFromMetadataFile( + String metadataFile, + TranslogTransferManager translogTransferManager, + Map> oldFormatMetadataFilePrimaryTermMap + ) throws IOException { + Tuple minMaxPrimaryTermFromFileName = TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(metadataFile); + if (minMaxPrimaryTermFromFileName != null) { + return minMaxPrimaryTermFromFileName; + } else { + if (oldFormatMetadataFilePrimaryTermMap.containsKey(metadataFile)) { + return oldFormatMetadataFilePrimaryTermMap.get(metadataFile); + } else { + TranslogTransferMetadata metadata = translogTransferManager.readMetadata(metadataFile); + long maxPrimaryTem = TranslogTransferMetadata.getPrimaryTermFromFileName(metadataFile); + long minPrimaryTem = -1; + if (metadata.getGenerationToPrimaryTermMapper() != null + && metadata.getGenerationToPrimaryTermMapper().values().isEmpty() == false) { + Optional primaryTerm = metadata.getGenerationToPrimaryTermMapper() + .values() + .stream() + .map(s -> Long.parseLong(s)) + .min(Long::compareTo); + if (primaryTerm.isPresent()) { + minPrimaryTem = primaryTerm.get(); + } + } + Tuple minMaxPrimaryTermTuple = new Tuple<>(minPrimaryTem, maxPrimaryTem); + oldFormatMetadataFilePrimaryTermMap.put(metadataFile, minMaxPrimaryTermTuple); + return minMaxPrimaryTermTuple; } - Optional minPrimaryTerm = metadataFiles.stream() - .map(file -> RemoteStoreUtils.invertLong(file.split(METADATA_SEPARATOR)[1])) - .min(Long::compareTo); - // First we delete all stale primary terms folders from remote store - long minimumReferencedPrimaryTerm = minPrimaryTerm.get() - 1; - translogTransferManager.deletePrimaryTermsAsync(minimumReferencedPrimaryTerm); } } + + public static void cleanup(TranslogTransferManager translogTransferManager) throws IOException { + ActionListener> listMetadataFilesListener = new ActionListener<>() { + @Override + public void onResponse(List blobMetadata) { + List metadataFiles = blobMetadata.stream().map(BlobMetadata::name).collect(Collectors.toList()); + + try { + if (metadataFiles.isEmpty()) { + staticLogger.debug("No stale translog metadata files found"); + return; + } + List metadataFilesToBeDeleted = getMetadataFilesToBeDeleted(metadataFiles, new HashMap<>(), staticLogger); + if (metadataFilesToBeDeleted.isEmpty()) { + staticLogger.debug("No metadata files to delete"); + return; + } + staticLogger.debug(() -> "metadataFilesToBeDeleted = " + metadataFilesToBeDeleted); + + // For all the files that we are keeping, fetch min and max generations + List metadataFilesNotToBeDeleted = new ArrayList<>(metadataFiles); + metadataFilesNotToBeDeleted.removeAll(metadataFilesToBeDeleted); + staticLogger.debug(() -> "metadataFilesNotToBeDeleted = " + metadataFilesNotToBeDeleted); + + // Delete stale metadata files + translogTransferManager.deleteMetadataFilesAsync(metadataFilesToBeDeleted, () -> {}); + + // Delete stale primary terms + deleteStaleRemotePrimaryTerms( + metadataFilesNotToBeDeleted, + translogTransferManager, + new HashMap<>(), + new AtomicLong(Long.MAX_VALUE), + staticLogger + ); + } catch (Exception e) { + staticLogger.error("Exception while cleaning up metadata and primary terms", e); + } + } + + @Override + public void onFailure(Exception e) { + staticLogger.error("Exception while cleaning up metadata and primary terms", e); + } + }; + translogTransferManager.listTranslogMetadataFilesAsync(listMetadataFilesListener); + } } diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index 71809b1f468cb..e40c2e8d70b07 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -105,36 +105,6 @@ public RemoteFsTranslog( BooleanSupplier startedPrimarySupplier, RemoteTranslogTransferTracker remoteTranslogTransferTracker, RemoteStoreSettings remoteStoreSettings - ) throws IOException { - this( - config, - translogUUID, - deletionPolicy, - globalCheckpointSupplier, - primaryTermSupplier, - persistedSequenceNumberConsumer, - blobStoreRepository, - threadPool, - startedPrimarySupplier, - remoteTranslogTransferTracker, - remoteStoreSettings, - 0 - ); - } - - public RemoteFsTranslog( - TranslogConfig config, - String translogUUID, - TranslogDeletionPolicy deletionPolicy, - LongSupplier globalCheckpointSupplier, - LongSupplier primaryTermSupplier, - LongConsumer persistedSequenceNumberConsumer, - BlobStoreRepository blobStoreRepository, - ThreadPool threadPool, - BooleanSupplier startedPrimarySupplier, - RemoteTranslogTransferTracker remoteTranslogTransferTracker, - RemoteStoreSettings remoteStoreSettings, - long timestamp ) throws IOException { super(config, translogUUID, deletionPolicy, globalCheckpointSupplier, primaryTermSupplier, persistedSequenceNumberConsumer); logger = Loggers.getLogger(getClass(), shardId); @@ -153,7 +123,9 @@ public RemoteFsTranslog( isTranslogMetadataEnabled ); try { - download(translogTransferManager, location, logger, config.shouldSeedRemote(), timestamp); + if (config.downloadRemoteTranslogOnInit()) { + download(translogTransferManager, location, logger, config.shouldSeedRemote(), 0); + } Checkpoint checkpoint = readCheckpoint(location); logger.info("Downloaded data from remote translog till maxSeqNo = {}", checkpoint.maxSeqNo); this.readers.addAll(recoverFromFiles(checkpoint)); @@ -162,6 +134,9 @@ public RemoteFsTranslog( logger.error(errorMsg); throw new IllegalStateException(errorMsg); } + if (config.downloadRemoteTranslogOnInit() == false) { + translogTransferManager.populateFileTrackerWithLocalState(this.readers); + } boolean success = false; current = null; try { @@ -194,31 +169,6 @@ RemoteTranslogTransferTracker getRemoteTranslogTracker() { return remoteTranslogTransferTracker; } - public static void download( - Repository repository, - ShardId shardId, - ThreadPool threadPool, - Path location, - RemoteStorePathStrategy pathStrategy, - RemoteStoreSettings remoteStoreSettings, - Logger logger, - boolean seedRemote, - boolean isTranslogMetadataEnabled - ) throws IOException { - download( - repository, - shardId, - threadPool, - location, - pathStrategy, - remoteStoreSettings, - logger, - seedRemote, - isTranslogMetadataEnabled, - 0 - ); - } - public static void download( Repository repository, ShardId shardId, diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java b/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java index f720f041b287c..52e20d9838fca 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java @@ -60,6 +60,7 @@ public final class TranslogConfig { private final ByteSizeValue bufferSize; private final String nodeId; private final boolean seedRemote; + private boolean downloadRemoteTranslogOnInit = true; /** * Creates a new TranslogConfig instance @@ -140,4 +141,12 @@ public String getNodeId() { public boolean shouldSeedRemote() { return seedRemote; } + + public boolean downloadRemoteTranslogOnInit() { + return downloadRemoteTranslogOnInit; + } + + public void setDownloadRemoteTranslogOnInit(boolean downloadRemoteTranslogOnInit) { + this.downloadRemoteTranslogOnInit = downloadRemoteTranslogOnInit; + } } diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java index 527fb0123d800..291218ea47499 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java @@ -28,6 +28,7 @@ import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.remote.RemoteTranslogTransferTracker; import org.opensearch.index.translog.Translog; +import org.opensearch.index.translog.TranslogReader; import org.opensearch.index.translog.transfer.listener.TranslogTransferListener; import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.threadpool.ThreadPool; @@ -544,6 +545,14 @@ public void onFailure(Exception e) { }); } + public Set listPrimaryTermsInRemote() throws IOException { + Set primaryTermsStr = transferService.listFolders(remoteDataTransferPath); + if (primaryTermsStr != null) { + return primaryTermsStr.stream().map(Long::parseLong).collect(Collectors.toSet()); + } + return new HashSet<>(); + } + /** * Handles deletion of all translog files associated with a primary term. * @@ -712,4 +721,23 @@ public void onFailure(Exception e) { public int getMaxRemoteTranslogReadersSettings() { return this.remoteStoreSettings.getMaxRemoteTranslogReaders(); } + + public void populateFileTrackerWithLocalState(List readers) { + if (readers == null) { + return; + } + for (TranslogReader reader : readers) { + long generation = reader.getGeneration(); + String tlogFilename = Translog.getFilename(generation); + fileTransferTracker.add(tlogFilename, true); + if (isTranslogMetadataEnabled) { + String ckpFilename = Translog.getCommitCheckpointFileName(generation); + fileTransferTracker.add(ckpFilename, true); + } + } + } + + protected FileTransferTracker getFileTransferTracker() { + return fileTransferTracker; + } } diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferMetadata.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferMetadata.java index 745fa9a8a219a..3b8885055e8f7 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferMetadata.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferMetadata.java @@ -19,6 +19,7 @@ import java.util.Arrays; import java.util.Map; import java.util.Objects; +import java.util.Optional; /** * The metadata associated with every transfer {@link TransferSnapshot}. The metadata is uploaded at the end of the @@ -108,11 +109,28 @@ public String getFileName() { RemoteStoreUtils.invertLong(createdAt), String.valueOf(Objects.hash(nodeId)), RemoteStoreUtils.invertLong(minTranslogGeneration), + String.valueOf(getMinPrimaryTermReferred()), String.valueOf(CURRENT_VERSION) ) ); } + private long getMinPrimaryTermReferred() { + if (generationToPrimaryTermMapper.get() == null || generationToPrimaryTermMapper.get().values().isEmpty()) { + return -1; + } + Optional minPrimaryTerm = generationToPrimaryTermMapper.get() + .values() + .stream() + .map(s -> Long.parseLong(s)) + .min(Long::compareTo); + if (minPrimaryTerm.isPresent()) { + return minPrimaryTerm.get(); + } else { + return -1; + } + } + public static Tuple, String> getNodeIdByPrimaryTermAndGeneration(String filename) { String[] tokens = filename.split(METADATA_SEPARATOR); if (tokens.length < 6) { @@ -143,15 +161,43 @@ public static Tuple getMinMaxTranslogGenerationFromFilename(String f assert Version.CURRENT.onOrAfter(Version.V_2_17_0); try { // instead of direct index, we go backwards to avoid running into same separator in nodeId - String minGeneration = tokens[tokens.length - 2]; + String minGeneration = tokens[tokens.length - 3]; String maxGeneration = tokens[2]; return new Tuple<>(RemoteStoreUtils.invertLong(minGeneration), RemoteStoreUtils.invertLong(maxGeneration)); - } catch (NumberFormatException e) { + } catch (Exception e) { logger.error(() -> new ParameterizedMessage("Exception while getting min and max translog generation from: {}", filename), e); return null; } } + public static Tuple getMinMaxPrimaryTermFromFilename(String filename) { + String[] tokens = filename.split(METADATA_SEPARATOR); + if (tokens.length < 7) { + // For versions < 2.17, we don't have min primary term. + return null; + } + assert Version.CURRENT.onOrAfter(Version.V_2_17_0); + try { + // instead of direct index, we go backwards to avoid running into same separator in nodeId + String minPrimaryTerm = tokens[tokens.length - 2]; + String maxPrimaryTerm = tokens[1]; + return new Tuple<>(Long.parseLong(minPrimaryTerm), RemoteStoreUtils.invertLong(maxPrimaryTerm)); + } catch (Exception e) { + logger.error(() -> new ParameterizedMessage("Exception while getting min and max primary term from: {}", filename), e); + return null; + } + } + + public static long getPrimaryTermFromFileName(String filename) { + String[] tokens = filename.split(METADATA_SEPARATOR); + try { + return RemoteStoreUtils.invertLong(tokens[1]); + } catch (Exception e) { + logger.error(() -> new ParameterizedMessage("Exception while getting max primary term from: {}", filename), e); + return -1; + } + } + @Override public int hashCode() { return Objects.hash(primaryTerm, generation); diff --git a/server/src/main/java/org/opensearch/repositories/Repository.java b/server/src/main/java/org/opensearch/repositories/Repository.java index a7c44575465b0..085fe6a748ea5 100644 --- a/server/src/main/java/org/opensearch/repositories/Repository.java +++ b/server/src/main/java/org/opensearch/repositories/Repository.java @@ -481,6 +481,18 @@ default RemoteStoreShardShallowCopySnapshot getRemoteStoreShallowCopyShardMetada */ IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotId snapshotId, IndexId indexId, ShardId shardId); + /** + * Retrieve shard snapshot status for the stored snapshot + * + * @param snapshotInfo snapshot info + * @param indexId the snapshotted index id for the shard to get status for + * @param shardId shard id + * @return snapshot status + */ + default IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotInfo snapshotInfo, IndexId indexId, ShardId shardId) { + return getShardSnapshotStatus(snapshotInfo.snapshotId(), indexId, shardId); + } + /** * Update the repository with the incoming cluster state. This method is invoked from {@link RepositoriesService#applyClusterState} and * thus the same semantics as with {@link org.opensearch.cluster.ClusterStateApplier#applyClusterState} apply for the diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index ce08b2e9adbb5..2dde651a7a042 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -116,6 +116,7 @@ import org.opensearch.index.remote.RemoteStorePathStrategy.BasePathInput; import org.opensearch.index.remote.RemoteStorePathStrategy.SnapshotShardPathInput; import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.index.remote.RemoteTranslogTransferTracker; import org.opensearch.index.snapshots.IndexShardRestoreFailedException; import org.opensearch.index.snapshots.IndexShardSnapshotStatus; import org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot; @@ -132,6 +133,10 @@ import org.opensearch.index.store.lockmanager.FileLockInfo; import org.opensearch.index.store.lockmanager.RemoteStoreLockManager; import org.opensearch.index.store.lockmanager.RemoteStoreLockManagerFactory; +import org.opensearch.index.translog.RemoteFsTimestampAwareTranslog; +import org.opensearch.index.translog.RemoteFsTranslog; +import org.opensearch.index.translog.transfer.FileTransferTracker; +import org.opensearch.index.translog.transfer.TranslogTransferManager; import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.indices.recovery.RecoveryState; @@ -853,7 +858,7 @@ boolean getPrefixModeVerification() { * maintains single lazy instance of {@link BlobContainer} */ protected BlobContainer blobContainer() { - assertSnapshotOrGenericThread(); + // assertSnapshotOrGenericThread(); BlobContainer blobContainer = this.blobContainer.get(); if (blobContainer == null) { @@ -2230,25 +2235,35 @@ private void cleanRemoteStoreDirectoryIfNeeded( } IndexMetadata prevIndexMetadata = this.getSnapshotIndexMetaData(oldRepoData, snapshotId, indexId); if (prevIndexMetadata != null && !isIndexPresent(clusterService, prevIndexMetadata.getIndexUUID())) { - String remoteStoreRepository = IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.get( + String remoteStoreRepository = IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.get( prevIndexMetadata.getSettings() ); assert (remoteStoreRepository != null); + String remoteTranslogRepositoryName = IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.get( + prevIndexMetadata.getSettings() + ); + assert (remoteTranslogRepositoryName != null); + Repository remoteTranslogRepository = remoteSegmentStoreDirectoryFactory.getRepositoriesService() + .get() + .repository(remoteTranslogRepositoryName); + RemoteStorePathStrategy remoteStorePathStrategy = RemoteStoreUtils.determineRemoteStorePathStrategy( prevIndexMetadata ); for (int shardId = 0; shardId < prevIndexMetadata.getNumberOfShards(); shardId++) { + ShardId shard = new ShardId(Index.UNKNOWN_INDEX_NAME, prevIndexMetadata.getIndexUUID(), shardId); remoteDirectoryCleanupAsync( remoteSegmentStoreDirectoryFactory, threadPool, remoteStoreRepository, prevIndexMetadata.getIndexUUID(), - new ShardId(Index.UNKNOWN_INDEX_NAME, prevIndexMetadata.getIndexUUID(), shardId), + shard, ThreadPool.Names.REMOTE_PURGE, remoteStorePathStrategy ); + remoteTranslogCleanupAsync(remoteTranslogRepository, shard, remoteStorePathStrategy, prevIndexMetadata); } } } catch (Exception e) { @@ -2268,6 +2283,33 @@ private void cleanRemoteStoreDirectoryIfNeeded( } + private void remoteTranslogCleanupAsync( + Repository remoteTranslogRepository, + ShardId shardId, + RemoteStorePathStrategy remoteStorePathStrategy, + IndexMetadata prevIndexMetadata + ) { + assert remoteTranslogRepository instanceof BlobStoreRepository; + boolean indexMetadataEnabled = RemoteStoreUtils.determineTranslogMetadataEnabled(prevIndexMetadata); + RemoteTranslogTransferTracker remoteTranslogTransferTracker = new RemoteTranslogTransferTracker(shardId, 1000); + FileTransferTracker fileTransferTracker = new FileTransferTracker(shardId, remoteTranslogTransferTracker); + TranslogTransferManager translogTransferManager = RemoteFsTranslog.buildTranslogTransferManager( + (BlobStoreRepository) remoteTranslogRepository, + threadPool, + shardId, + fileTransferTracker, + remoteTranslogTransferTracker, + remoteStorePathStrategy, + remoteStoreSettings, + indexMetadataEnabled + ); + try { + RemoteFsTimestampAwareTranslog.cleanup(translogTransferManager); + } catch (IOException e) { + logger.error("Exception while cleaning up remote translog for shard: " + shardId, e); + } + } + /** * Finds and returns a list of shard paths that match the given index ID. * @@ -4267,6 +4309,11 @@ public IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotId snapshotId, In return snapshot.getIndexShardSnapshotStatus(); } + public IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotInfo snapshotInfo, IndexId indexId, ShardId shardId) { + IndexShardSnapshot snapshot = loadShardSnapshot(shardContainer(indexId, shardId), snapshotInfo); + return snapshot.getIndexShardSnapshotStatus(); + } + @Override public void verify(String seed, DiscoveryNode localNode) { if (isSystemRepository == false) { @@ -4475,6 +4522,38 @@ public IndexShardSnapshot loadShardSnapshot(BlobContainer shardContainer, Snapsh } } + public IndexShardSnapshot loadShardSnapshot(BlobContainer shardContainer, SnapshotInfo snapshotInfo) { + try { + SnapshotId snapshotId = snapshotInfo.snapshotId(); + if (snapshotInfo.getPinnedTimestamp() != 0) { + return () -> IndexShardSnapshotStatus.newDone(0L, 0L, 0, 0, 0, 0, "1"); + } else if (snapshotInfo.isRemoteStoreIndexShallowCopyEnabled()) { + if (shardContainer.blobExists(REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.blobName(snapshotId.getUUID()))) { + return REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read( + shardContainer, + snapshotId.getUUID(), + namedXContentRegistry + ); + } else { + throw new SnapshotMissingException(metadata.name(), snapshotId.getName()); + } + } else { + if (shardContainer.blobExists(INDEX_SHARD_SNAPSHOT_FORMAT.blobName(snapshotId.getUUID()))) { + return INDEX_SHARD_SNAPSHOT_FORMAT.read(shardContainer, snapshotId.getUUID(), namedXContentRegistry); + } else { + throw new SnapshotMissingException(metadata.name(), snapshotId.getName()); + } + } + } catch (IOException ex) { + throw new SnapshotException( + metadata.name(), + snapshotInfo.snapshotId(), + "failed to read shard snapshot file for [" + shardContainer.path() + ']', + ex + ); + } + } + /** * Loads all available snapshots in the repository using the given {@code generation} or falling back to trying to determine it from * the given list of blobs in the shard container. diff --git a/server/src/main/java/org/opensearch/snapshots/InternalSnapshotsInfoService.java b/server/src/main/java/org/opensearch/snapshots/InternalSnapshotsInfoService.java index 797a58f3b0d9b..e300f845e6f58 100644 --- a/server/src/main/java/org/opensearch/snapshots/InternalSnapshotsInfoService.java +++ b/server/src/main/java/org/opensearch/snapshots/InternalSnapshotsInfoService.java @@ -238,14 +238,18 @@ protected void doRun() throws Exception { final Repository repository = repositories.repository(snapshotShard.snapshot.getRepository()); logger.debug("fetching snapshot shard size for {}", snapshotShard); - final long snapshotShardSize = repository.getShardSnapshotStatus( - snapshotShard.snapshot().getSnapshotId(), - snapshotShard.index(), - snapshotShard.shardId() - ).asCopy().getTotalSize(); + long snapshotShardSize; + if (snapshotShard.pinnedTimestamp > 0) { + snapshotShardSize = 0; + } else { + snapshotShardSize = repository.getShardSnapshotStatus( + snapshotShard.snapshot().getSnapshotId(), + snapshotShard.index(), + snapshotShard.shardId() + ).asCopy().getTotalSize(); + } logger.debug("snapshot shard size for {}: {} bytes", snapshotShard, snapshotShardSize); - boolean updated = false; synchronized (mutex) { removed = unknownSnapshotShards.remove(snapshotShard); @@ -354,7 +358,8 @@ private static Set listOfSnapshotShards(final ClusterState state) final SnapshotShard snapshotShard = new SnapshotShard( snapshotRecoverySource.snapshot(), snapshotRecoverySource.index(), - shardRouting.shardId() + shardRouting.shardId(), + snapshotRecoverySource.pinnedTimestamp() ); snapshotShards.add(snapshotShard); } @@ -374,10 +379,17 @@ public static class SnapshotShard { private final IndexId index; private final ShardId shardId; + private long pinnedTimestamp; + public SnapshotShard(Snapshot snapshot, IndexId index, ShardId shardId) { + this(snapshot, index, shardId, 0L); + } + + public SnapshotShard(Snapshot snapshot, IndexId index, ShardId shardId, long pinnedTimestamp) { this.snapshot = snapshot; this.index = index; this.shardId = shardId; + this.pinnedTimestamp = pinnedTimestamp; } public Snapshot snapshot() { diff --git a/server/src/main/java/org/opensearch/snapshots/RestoreService.java b/server/src/main/java/org/opensearch/snapshots/RestoreService.java index 08c30ea503a6d..eaa868d4db864 100644 --- a/server/src/main/java/org/opensearch/snapshots/RestoreService.java +++ b/server/src/main/java/org/opensearch/snapshots/RestoreService.java @@ -440,7 +440,9 @@ public ClusterState execute(ClusterState currentState) { snapshotIndexId, isSearchableSnapshot, isRemoteStoreShallowCopy, - request.getSourceRemoteStoreRepository() + request.getSourceRemoteStoreRepository(), + request.getSourceRemoteTranslogRepository(), + snapshotInfo.getPinnedTimestamp() ); final Version minIndexCompatibilityVersion; if (isSearchableSnapshot && isSearchableSnapshotsExtendedCompatibilityEnabled()) { @@ -568,7 +570,7 @@ public ClusterState execute(ClusterState currentState) { for (int shard = 0; shard < snapshotIndexMetadata.getNumberOfShards(); shard++) { if (isRemoteSnapshot) { IndexShardSnapshotStatus.Copy shardStatus = repository.getShardSnapshotStatus( - snapshotInfo.snapshotId(), + snapshotInfo, snapshotIndexId, new ShardId(metadata.index(index).getIndex(), shard) ).asCopy(); diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java index 2a34dd3580948..be30de97ee830 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java @@ -637,7 +637,7 @@ private Tuple, Set> testGetPinnedTimestampLockedFilesW String metadataPrefix = "metadata__1__2__3__4__5__"; Map metadataFiles = new HashMap<>(); for (Long metadataFileTimestamp : metadataFileTimestamps) { - metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp)); + metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp) + "__1"); } return new Tuple<>( metadataFiles, @@ -662,7 +662,7 @@ private Tuple, Set> testGetPinnedTimestampLockedFilesW String primaryTerm = RemoteStoreUtils.invertLong(metadataFileTimestampPrimaryTerm.getValue()); String metadataPrefix = "metadata__" + primaryTerm + "__2__3__4__5__"; long metadataFileTimestamp = metadataFileTimestampPrimaryTerm.getKey(); - metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp)); + metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp) + "__1"); } return new Tuple<>( metadataFiles, diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index 732d1172c5e0f..19569e1a19284 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -2837,7 +2837,7 @@ public void testSyncSegmentsFromGivenRemoteSegmentStore() throws IOException { target = reinitShard(target, routing); DiscoveryNode localNode = new DiscoveryNode("foo", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); target.markAsRecovering("from snapshot", new RecoveryState(routing, localNode, null)); - target.syncSegmentsFromGivenRemoteSegmentStore(false, tempRemoteSegmentDirectory, primaryTerm, commitGeneration); + target.syncSegmentsFromGivenRemoteSegmentStore(false, tempRemoteSegmentDirectory, null, false); RemoteSegmentStoreDirectory remoteStoreDirectory = ((RemoteSegmentStoreDirectory) ((FilterDirectory) ((FilterDirectory) target .remoteStore() .directory()).getDelegate()).getDelegate()); diff --git a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java index 336d4bafd4b66..ecd6620dbea15 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java @@ -1170,9 +1170,9 @@ public void testInitializeToSpecificTimestampNoMdMatchingTimestamp() throws IOEx public void testInitializeToSpecificTimestampMatchingMdFile() throws IOException { String metadataPrefix = "metadata__1__2__3__4__5__"; List metadataFiles = new ArrayList<>(); - metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(1000)); - metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(2000)); - metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(3000)); + metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(1000) + "__1"); + metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(2000) + "__1"); + metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(3000) + "__1"); Map metadata = new HashMap<>(); metadata.put("_0.cfe", "_0.cfe::_0.cfe__" + UUIDs.base64UUID() + "::1234::512::" + Version.LATEST.major); @@ -1184,7 +1184,7 @@ public void testInitializeToSpecificTimestampMatchingMdFile() throws IOException Integer.MAX_VALUE ) ).thenReturn(metadataFiles); - when(remoteMetadataDirectory.getBlobStream(metadataPrefix + RemoteStoreUtils.invertLong(1000))).thenReturn( + when(remoteMetadataDirectory.getBlobStream(metadataPrefix + RemoteStoreUtils.invertLong(1000) + "__1")).thenReturn( createMetadataFileBytes(metadata, indexShard.getLatestReplicationCheckpoint(), segmentInfos) ); diff --git a/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java index 1f82dd9d7e641..c510a6475147d 100644 --- a/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java @@ -8,6 +8,8 @@ package org.opensearch.index.translog; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.tests.util.LuceneTestCase; import org.opensearch.action.LatchedActionListener; import org.opensearch.cluster.metadata.RepositoryMetadata; @@ -53,6 +55,7 @@ import java.util.Set; import java.util.TreeSet; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicLong; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.LongStream; @@ -63,7 +66,9 @@ import static org.opensearch.index.translog.transfer.TranslogTransferMetadata.METADATA_SEPARATOR; import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -211,16 +216,44 @@ public void onFailure(Exception e) { // Node id containing separator String nodeIdWithSeparator = - "metadata__9223372036438563903__9223372036854774799__9223370311919910393__node__1__9223372036438563958__1"; + "metadata__9223372036438563903__9223372036854774799__9223370311919910393__node__1__9223372036438563958__2__1"; Tuple minMaxGen = TranslogTransferMetadata.getMinMaxTranslogGenerationFromFilename(nodeIdWithSeparator); Long minGen = Long.MAX_VALUE - 9223372036438563958L; assertEquals(minGen, minMaxGen.v1()); // Malformed md filename - String malformedMdFileName = "metadata__9223372036438563903__9223372036854774799__9223370311919910393__node1__xyz__1"; + String malformedMdFileName = "metadata__9223372036438563903__9223372036854774799__9223370311919910393__node1__xyz__3__1"; assertNull(TranslogTransferMetadata.getMinMaxTranslogGenerationFromFilename(malformedMdFileName)); } + public void testGetMinMaxPrimaryTermFromFilename() throws Exception { + // New format metadata file + String newFormatMetadataFile = + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1"; + Tuple minMaxPrimaryterm = TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(newFormatMetadataFile); + Long minPrimaryTerm = 2L; + Long maxPrimaryTerm = 7L; + assertEquals(minPrimaryTerm, minMaxPrimaryterm.v1()); + assertEquals(maxPrimaryTerm, minMaxPrimaryterm.v2()); + + // Old format metadata file + String oldFormatMdFilename = "metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__1"; + assertNull(TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(oldFormatMdFilename)); + + // Node id containing separator + String nodeIdWithSeparator = + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node__1__9223372036438563958__2__1"; + minMaxPrimaryterm = TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(nodeIdWithSeparator); + minPrimaryTerm = 2L; + maxPrimaryTerm = 7L; + assertEquals(minPrimaryTerm, minMaxPrimaryterm.v1()); + assertEquals(maxPrimaryTerm, minMaxPrimaryterm.v2()); + + // Malformed md filename + String malformedMdFileName = "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__xyz__3qwe__1"; + assertNull(TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(malformedMdFileName)); + } + public void testIndexDeletionWithNoPinnedTimestampNoRecentMdFiles() throws Exception { RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); ArrayList ops = new ArrayList<>(); @@ -604,11 +637,11 @@ public void testGetGenerationsToBeDeletedEmptyMetadataFilesNotToBeDeleted() thro List metadataFilesNotToBeDeleted = new ArrayList<>(); List metadataFilesToBeDeleted = List.of( // 4 to 7 - "metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__9223372036854775803__1", + "metadata__9223372036854775806__9223372036854775800__9223370311919910398__31__9223372036854775803__1__1", // 17 to 37 - "metadata__9223372036438563903__9223372036854775770__9223370311919910398__31__9223372036854775790__1", + "metadata__9223372036854775806__9223372036854775770__9223370311919910398__31__9223372036854775790__1__1", // 27 to 42 - "metadata__9223372036438563903__9223372036854775765__9223370311919910403__31__9223372036854775780__1" + "metadata__9223372036854775806__9223372036854775765__9223370311919910403__31__9223372036854775780__1__1" ); Set generations = ((RemoteFsTimestampAwareTranslog) translog).getGenerationsToBeDeleted( metadataFilesNotToBeDeleted, @@ -618,6 +651,7 @@ public void testGetGenerationsToBeDeletedEmptyMetadataFilesNotToBeDeleted() thro Set md1Generations = LongStream.rangeClosed(4, 7).boxed().collect(Collectors.toSet()); Set md2Generations = LongStream.rangeClosed(17, 37).boxed().collect(Collectors.toSet()); Set md3Generations = LongStream.rangeClosed(27, 42).boxed().collect(Collectors.toSet()); + assertTrue(generations.containsAll(md1Generations)); assertTrue(generations.containsAll(md2Generations)); assertTrue(generations.containsAll(md3Generations)); @@ -631,19 +665,19 @@ public void testGetGenerationsToBeDeletedEmptyMetadataFilesNotToBeDeleted() thro public void testGetGenerationsToBeDeleted() throws IOException { List metadataFilesNotToBeDeleted = List.of( // 1 to 4 - "metadata__9223372036438563903__9223372036854775803__9223370311919910398__31__9223372036854775806__1", + "metadata__9223372036854775806__9223372036854775803__9223370311919910398__31__9223372036854775806__1__1", // 26 to 30 - "metadata__9223372036438563903__9223372036854775777__9223370311919910398__31__9223372036854775781__1", + "metadata__9223372036854775806__9223372036854775777__9223370311919910398__31__9223372036854775781__1__1", // 42 to 100 - "metadata__9223372036438563903__9223372036854775707__9223370311919910403__31__9223372036854775765__1" + "metadata__9223372036854775806__9223372036854775707__9223370311919910403__31__9223372036854775765__1__1" ); List metadataFilesToBeDeleted = List.of( // 4 to 7 - "metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__9223372036854775803__1", + "metadata__9223372036854775806__9223372036854775800__9223370311919910398__31__9223372036854775803__1__1", // 17 to 37 - "metadata__9223372036438563903__9223372036854775770__9223370311919910398__31__9223372036854775790__1", + "metadata__9223372036854775806__9223372036854775770__9223370311919910398__31__9223372036854775790__1__1", // 27 to 42 - "metadata__9223372036438563903__9223372036854775765__9223370311919910403__31__9223372036854775780__1" + "metadata__9223372036854775806__9223372036854775765__9223370311919910403__31__9223372036854775780__1__1" ); Set generations = ((RemoteFsTimestampAwareTranslog) translog).getGenerationsToBeDeleted( metadataFilesNotToBeDeleted, @@ -653,6 +687,7 @@ public void testGetGenerationsToBeDeleted() throws IOException { Set md1Generations = LongStream.rangeClosed(5, 7).boxed().collect(Collectors.toSet()); Set md2Generations = LongStream.rangeClosed(17, 25).boxed().collect(Collectors.toSet()); Set md3Generations = LongStream.rangeClosed(31, 41).boxed().collect(Collectors.toSet()); + assertTrue(generations.containsAll(md1Generations)); assertTrue(generations.containsAll(md2Generations)); assertTrue(generations.containsAll(md3Generations)); @@ -783,49 +818,49 @@ public void testGetMinMaxTranslogGenerationFromMetadataFile() throws IOException assertEquals( new Tuple<>(701L, 1008L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__9223372036854775106__1", + "metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__9223372036854775106__1__1", translogTransferManager ) ); assertEquals( new Tuple<>(4L, 7L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__9223372036854775803__1", + "metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__9223372036854775803__2__1", translogTransferManager ) ); assertEquals( new Tuple<>(106L, 106L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854775701__9223370311919910403__31__9223372036854775701__1", + "metadata__9223372036438563903__9223372036854775701__9223370311919910403__31__9223372036854775701__3__1", translogTransferManager ) ); assertEquals( new Tuple<>(4573L, 99964L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854675843__9223370311919910408__31__9223372036854771234__1", + "metadata__9223372036438563903__9223372036854675843__9223370311919910408__31__9223372036854771234__4__1", translogTransferManager ) ); assertEquals( new Tuple<>(1L, 4L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854775803__9223370311919910413__31__9223372036854775806__1", + "metadata__9223372036438563903__9223372036854775803__9223370311919910413__31__9223372036854775806__5__1", translogTransferManager ) ); assertEquals( new Tuple<>(2474L, 3462L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854772345__9223370311919910429__31__9223372036854773333__1", + "metadata__9223372036438563903__9223372036854772345__9223370311919910429__31__9223372036854773333__6__1", translogTransferManager ) ); assertEquals( new Tuple<>(5807L, 7917L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854767890__9223370311919910434__31__9223372036854770000__1", + "metadata__9223372036438563903__9223372036854767890__9223370311919910434__31__9223372036854770000__7__1", translogTransferManager ) ); @@ -859,4 +894,93 @@ public void testGetMinMaxTranslogGenerationFromMetadataFile() throws IOException verify(translogTransferManager).readMetadata("metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__1"); verify(translogTransferManager).readMetadata("metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__1"); } + + public void testDeleteStaleRemotePrimaryTerms() throws IOException { + TranslogTransferManager translogTransferManager = mock(TranslogTransferManager.class); + + List metadataFiles = List.of( + // PT 4 to 9 + "metadata__9223372036854775798__9223372036854774799__9223370311919910393__node1__9223372036438563958__4__1", + // PT 2 to 7 + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1", + // PT 2 to 6 + "metadata__9223372036854775801__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1" + ); + + Logger staticLogger = LogManager.getLogger(RemoteFsTimestampAwareTranslogTests.class); + when(translogTransferManager.listPrimaryTermsInRemote()).thenReturn(Set.of(1L, 2L, 3L, 4L)); + AtomicLong minPrimaryTermInRemote = new AtomicLong(Long.MAX_VALUE); + RemoteFsTimestampAwareTranslog.deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + new HashMap<>(), + minPrimaryTermInRemote, + staticLogger + ); + verify(translogTransferManager).deletePrimaryTermsAsync(2L); + assertEquals(2, minPrimaryTermInRemote.get()); + + RemoteFsTimestampAwareTranslog.deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + new HashMap<>(), + minPrimaryTermInRemote, + staticLogger + ); + // This means there are no new invocations of deletePrimaryTermAsync + verify(translogTransferManager, times(1)).deletePrimaryTermsAsync(anyLong()); + } + + public void testDeleteStaleRemotePrimaryTermsNoPrimaryTermInRemote() throws IOException { + TranslogTransferManager translogTransferManager = mock(TranslogTransferManager.class); + + List metadataFiles = List.of( + // PT 4 to 9 + "metadata__9223372036854775798__9223372036854774799__9223370311919910393__node1__9223372036438563958__4__1", + // PT 2 to 7 + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1", + // PT 2 to 6 + "metadata__9223372036854775801__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1" + ); + + Logger staticLogger = LogManager.getLogger(RemoteFsTimestampAwareTranslogTests.class); + when(translogTransferManager.listPrimaryTermsInRemote()).thenReturn(Set.of()); + AtomicLong minPrimaryTermInRemote = new AtomicLong(Long.MAX_VALUE); + RemoteFsTimestampAwareTranslog.deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + new HashMap<>(), + minPrimaryTermInRemote, + staticLogger + ); + verify(translogTransferManager, times(0)).deletePrimaryTermsAsync(anyLong()); + assertEquals(Long.MAX_VALUE, minPrimaryTermInRemote.get()); + } + + public void testDeleteStaleRemotePrimaryTermsPrimaryTermInRemoteIsBigger() throws IOException { + TranslogTransferManager translogTransferManager = mock(TranslogTransferManager.class); + + List metadataFiles = List.of( + // PT 4 to 9 + "metadata__9223372036854775798__9223372036854774799__9223370311919910393__node1__9223372036438563958__4__1", + // PT 2 to 7 + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1", + // PT 2 to 6 + "metadata__9223372036854775801__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1" + ); + + Logger staticLogger = LogManager.getLogger(RemoteFsTimestampAwareTranslogTests.class); + when(translogTransferManager.listPrimaryTermsInRemote()).thenReturn(Set.of(2L, 3L, 4L)); + AtomicLong minPrimaryTermInRemote = new AtomicLong(Long.MAX_VALUE); + RemoteFsTimestampAwareTranslog.deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + new HashMap<>(), + minPrimaryTermInRemote, + staticLogger + ); + verify(translogTransferManager, times(0)).deletePrimaryTermsAsync(anyLong()); + assertEquals(2, minPrimaryTermInRemote.get()); + } + } diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java index 2426a14c0c93b..ed0d6b7d50706 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java @@ -26,6 +26,7 @@ import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.remote.RemoteTranslogTransferTracker; import org.opensearch.index.translog.Translog; +import org.opensearch.index.translog.TranslogReader; import org.opensearch.index.translog.transfer.FileSnapshot.CheckpointFileSnapshot; import org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot; import org.opensearch.index.translog.transfer.FileSnapshot.TranslogFileSnapshot; @@ -627,7 +628,7 @@ public void testMetadataConflict() throws InterruptedException { String mdFilename = tm.getFileName(); long count = mdFilename.chars().filter(ch -> ch == METADATA_SEPARATOR.charAt(0)).count(); // There should not be any `_` in mdFile name as it is used a separator . - assertEquals(12, count); + assertEquals(14, count); Thread.sleep(1); TranslogTransferMetadata tm2 = new TranslogTransferMetadata(1, 1, 1, 2, "node--2"); String mdFilename2 = tm2.getFileName(); @@ -874,4 +875,53 @@ public void testReadMetadataForGivenTimestampException() throws IOException { assertThrows(IOException.class, () -> translogTransferManager.readMetadata(3000L)); assertNoDownloadStats(true); } + + public void testPopulateFileTrackerWithLocalStateNoReaders() { + translogTransferManager.populateFileTrackerWithLocalState(null); + assertTrue(translogTransferManager.getFileTransferTracker().allUploaded().isEmpty()); + + translogTransferManager.populateFileTrackerWithLocalState(List.of()); + assertTrue(translogTransferManager.getFileTransferTracker().allUploaded().isEmpty()); + } + + public void testPopulateFileTrackerWithLocalState() { + TranslogReader reader1 = mock(TranslogReader.class); + when(reader1.getGeneration()).thenReturn(12L); + TranslogReader reader2 = mock(TranslogReader.class); + when(reader2.getGeneration()).thenReturn(23L); + TranslogReader reader3 = mock(TranslogReader.class); + when(reader3.getGeneration()).thenReturn(34L); + TranslogReader reader4 = mock(TranslogReader.class); + when(reader4.getGeneration()).thenReturn(45L); + + translogTransferManager.populateFileTrackerWithLocalState(List.of(reader1, reader2, reader3, reader4)); + assertEquals( + Set.of("translog-12.tlog", "translog-23.tlog", "translog-34.tlog", "translog-45.tlog"), + translogTransferManager.getFileTransferTracker().allUploaded() + ); + } + + public void testPopulateFileTrackerWithLocalStateNoCkpAsMetadata() { + TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, + transferService, + remoteBaseTransferPath.add(TRANSLOG.getName()), + remoteBaseTransferPath.add(METADATA.getName()), + tracker, + remoteTranslogTransferTracker, + DefaultRemoteStoreSettings.INSTANCE, + true + ); + + TranslogReader reader1 = mock(TranslogReader.class); + when(reader1.getGeneration()).thenReturn(12L); + TranslogReader reader2 = mock(TranslogReader.class); + when(reader2.getGeneration()).thenReturn(23L); + + translogTransferManager.populateFileTrackerWithLocalState(List.of(reader1, reader2)); + assertEquals( + Set.of("translog-12.tlog", "translog-12.ckp", "translog-23.tlog", "translog-23.ckp"), + translogTransferManager.getFileTransferTracker().allUploaded() + ); + } }