Skip to content

Commit

Permalink
Allow Robots.txt generation to enable different file by path (#3415)
Browse files Browse the repository at this point in the history
* Add feature #3414 (Robots.txt generation should allow for path limiters)
  • Loading branch information
HitmanInWis authored Oct 1, 2024
1 parent 4c2fb59 commit 336d0a6
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 3 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com)
- #3420 - Redirect Map Manager - enable Redirect Map Manager in AEM CS (would require a specific - not public yet - AEM CS release version, TBA)
- #3423 - Redirect Manager - status code is not retaining its value in the dialog after authoring

### Added

- #3415 - Allow Robots.txt generation to serve different file by requested resource path

## 6.6.4 - 2024-08-14
- #3417 - Configurable recursion in Content Sync

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
Expand Down Expand Up @@ -79,6 +80,8 @@ public final class RobotsServlet extends SlingSafeMethodsServlet {

private String robotsContentsPropertyPath;

private final Map<String, String> robotsContentsPropertyPathMap = new HashMap<>();

private boolean printGroupingComments;

private int crawlDelay;
Expand All @@ -95,6 +98,16 @@ public final class RobotsServlet extends SlingSafeMethodsServlet {
protected void activate(RobotsServletConfig config) {
externalizerDomain = config.externalizer_domain();
robotsContentsPropertyPath = config.robots_content_property_path();
for (String mapping : config.robots_content_property_pathMappings()) {
if (StringUtils.isNotBlank(mapping)) {
String[] mappingParts = mapping.split("=");
if (mappingParts.length == 2) {
robotsContentsPropertyPathMap.put(mappingParts[0], mappingParts[1]);
} else {
log.warn("Invalid robots_content_property_path mapping: {}", mapping);
}
}
}
printGroupingComments = config.print_grouping_comments();
crawlDelay = config.crawl_delay();

Expand All @@ -111,7 +124,7 @@ protected void doGet(SlingHttpServletRequest request, SlingHttpServletResponse r
}

private void write(SlingHttpServletRequest request, SlingHttpServletResponse response) throws IOException {
if (StringUtils.isNotBlank(robotsContentsPropertyPath)) {
if (StringUtils.isNotBlank(robotsContentsPropertyPath) || !robotsContentsPropertyPathMap.isEmpty()) {
writeFromBinaryProperty(request, response);
} else {
writeFromOsgiConfig(request, response);
Expand Down Expand Up @@ -211,9 +224,18 @@ private void addRuleForPageHavingBooleanProperty(Page page, List<String> propNam
}

private void writeFromBinaryProperty(SlingHttpServletRequest request, SlingHttpServletResponse response) throws IOException {
String absoluteRobotsContentsPropertyPath = robotsContentsPropertyPath;
String absoluteRobotsContentsPropertyPath = robotsContentsPropertyPathMap.get(request.getResource().getPath());
if (StringUtils.isBlank(absoluteRobotsContentsPropertyPath)) {
if (StringUtils.isNotBlank(robotsContentsPropertyPath)) {
absoluteRobotsContentsPropertyPath = robotsContentsPropertyPath;
} else {
log.error("robots file requested but resource path {} not found in mappings", request.getResource().getPath());
response.sendError(HttpServletResponse.SC_NOT_FOUND);
return;
}
}
if (!absoluteRobotsContentsPropertyPath.startsWith("/")) {
absoluteRobotsContentsPropertyPath = request.getResource().getPath() + "/" + robotsContentsPropertyPath;
absoluteRobotsContentsPropertyPath = request.getResource().getPath() + "/" + absoluteRobotsContentsPropertyPath;
}

boolean written = false;
Expand Down Expand Up @@ -334,6 +356,9 @@ private String buildAllowedOrDisallowedDirective(boolean isAllowed, String allow
@AttributeDefinition(name = "Robots Content Property", description = "Path (either relative or absolute) to a String or Binary property containing the entire robots.txt contents. This could be a page property (e.g. robotsTxtContents) or the contents of a file within the DAM (e.g. /content/dam/my-site/seo/robots.txt/jcr:content/renditions/original/jcr:content/jcr:data). If this is specified, all other configurations are effectively ignored.")
String robots_content_property_path();

@AttributeDefinition(name = "Robots Content Property Map", description = "Overrides Robots Content Property with mappings of <resource path>=<Robots Content Property> such that only a resource matching the exact <resource path> will honor the Robots Content Property. If a valid mapping is not found for the current resource path, Robots Content Property will be used as a fallback if specified.")
String[] robots_content_property_pathMappings() default {};

@AttributeDefinition(name = "User Agent Directives", description = "A set of User-agent directives to add to the robots file. Each directive is optionally pre-fixed with a ruleGroupName. Syntax: [<ruleGroupName>:]<user agent name>")
String[] user_agent_directives() default {};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,40 @@ public void testWriteFromPageProperty() throws IOException, ServletException {
assertResponse("RobotsServlet_testWriteFromPageProperty.txt", response);
}

@Test
public void testWriteFromPagePropertyFromPathMappings() throws IOException, ServletException {
Map<String, Object> props = new HashMap<>();
props.put("sling.servlet.resourceTypes", "geometrixx/components/structure/page");
props.put("robots.content.property.pathMappings", new String[] {"/content/geometrixx/es/jcr:content=thisPropDoesntExist",
"/content/geometrixx/en/jcr:content=robotsContents"});
RobotsServlet robotsServlet = context.registerInjectActivateService(new RobotsServlet(), props);
robotsServlet.doGet(request, response);
assertEquals("servlet returned an error", 200, response.getStatus());
assertResponse("RobotsServlet_testWriteFromPageProperty.txt", response);
}

@Test
public void testWriteFromAsset() throws ServletException, IOException {
context.create().asset("/content/dam/geometrixx/robots.txt", getClass().getResourceAsStream("RobotsServlet_testWriteFromAsset.txt"), "text/plain");

Map<String, Object> props = new HashMap<>();
props.put("sling.servlet.resourceTypes", "geometrixx/components/structure/page");
props.put("robots.content.property.path", "/content/dam/geometrixx/robots.txt/jcr:content/renditions/original/jcr:content/jcr:data");
props.put("robots.content.property.pathMappings", new String[] {"/content/some/other/page/jcr:content=/some/other/path/jcr:data"});
RobotsServlet robotsServlet = context.registerInjectActivateService(new RobotsServlet(), props);
robotsServlet.doGet(request, response);
assertEquals("servlet returned an error", 200, response.getStatus());
assertResponse("RobotsServlet_testWriteFromAsset.txt", response);
}

@Test
public void testWriteFromAssetFromMappings() throws ServletException, IOException {
context.create().asset("/content/dam/geometrixx/robots.txt", getClass().getResourceAsStream("RobotsServlet_testWriteFromAsset.txt"), "text/plain");

Map<String, Object> props = new HashMap<>();
props.put("sling.servlet.resourceTypes", "geometrixx/components/structure/page");
props.put("robots.content.property.pathMappings", new String[] {"/content/some/other/page/jcr:content=/some/other/path/jcr:data",
"/content/geometrixx/en/jcr:content=/content/dam/geometrixx/robots.txt/jcr:content/renditions/original/jcr:content/jcr:data"});
RobotsServlet robotsServlet = context.registerInjectActivateService(new RobotsServlet(), props);
robotsServlet.doGet(request, response);
assertEquals("servlet returned an error", 200, response.getStatus());
Expand Down Expand Up @@ -245,6 +272,17 @@ public void testWriteFromNonExistentPropertyRelative() throws ServletException,
assertEquals("servlet did not return the expected error", 404, response.getStatus());
}

@Test
public void testWriteFromNonExistentPathMapping() throws ServletException, IOException {
Map<String, Object> props = new HashMap<>();
props.put("sling.servlet.resourceTypes", "geometrixx/components/structure/page");
props.put("robots.content.property.pathMappings", new String[] {"/content/some/other/page/jcr:content=/non/existent/path",
"", "/content/geometrixx/en/jcr:content"});
RobotsServlet robotsServlet = context.registerInjectActivateService(new RobotsServlet(), props);
robotsServlet.doGet(request, response);
assertEquals("servlet did not return the expected error", 404, response.getStatus());
}

private void assertResponse(String expectedResponseOutputResourceName, MockSlingHttpServletResponse response) throws IOException {
try (InputStream resourceAsStream = getClass().getResourceAsStream(expectedResponseOutputResourceName)) {
// both response and input stream contains OS dependent line endings (no need to normalize)
Expand Down

0 comments on commit 336d0a6

Please sign in to comment.