From 3fccd9bf31324d13298fdc1e6399192e146045da Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Wed, 10 Apr 2024 15:37:56 -0700 Subject: [PATCH] Add support for Elasticsearch tab delimited links file (#2955) The Elasticsearch reference docs links file formatted in json. That format makes extraction of links complicated, both on the docs side, and also in Elasticsearch needing to parse json. This commit adds support for a new tab delimited txt file where the Elasticsearch reference docs will move to. relates elastic/elasticsearch#105813 --- build_docs.pl | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/build_docs.pl b/build_docs.pl index 3a323537cf832..1c46b8b7876a0 100755 --- a/build_docs.pl +++ b/build_docs.pl @@ -454,7 +454,7 @@ sub check_elasticsearch_links { # So we grab all quoted strings that contain `html`. This *should* be fine # for a while because the keys in the file are all in SHOUTING_SNAKE_CASE # so even if one contains "html" it'll contain "HTML" which doesn't match. - my $extractor = sub { + my $json_extractor = sub { my $contents = shift; return sub { while ( $contents =~ m!"([^"\#]+)(?:\#([^"]+))?"!g ) { @@ -465,6 +465,15 @@ sub check_elasticsearch_links { return; }; }; + my $tabdelim_extractor = sub { + my $contents = shift; + return sub { + while ( $contents =~ m!"[^\t]+\t(.*)"!g ) { + return "en/elasticsearch/reference/$version/$1"; + } + return; + }; + }; my $src_path = 'server/src/main/resources/org/elasticsearch/common/reference-docs-links.json'; my $repo = ES::Repo->get_repo('elasticsearch'); @@ -486,7 +495,19 @@ sub check_elasticsearch_links { # https://github.com/elastic/docs/issues/2264 $branch = $version eq "master" ? "main" : $version; say " Branch: $branch, Version: $version"; - my $source = $repo->show_file( $link_check_name, $branch, $src_path ); + + my $links_file; + my $extractor; + my $source = eval { + $links_file = 'server/src/main/resources/org/elasticsearch/common/reference-docs-links.json'; + $extractor = $json_extractor; + $repo->show_file( $link_check_name, $branch, $links_file ); + } || eval { + $links_file = 'libs/core/src/main/resources/org/elasticsearch/core/reference-docs-links.txt'; + $extractor = $tabdelim_extractor; + $repo->show_file( $link_check_name, $branch, $links_file ); + }; + die "failed to find elasticsearch links file;\n$@" unless $source; $link_checker->check_source( $source, $extractor, "Elasticsearch [$version]: $src_path" );