Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge release 508 into develop #5477

Merged
merged 4 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import static org.apache.commons.lang.StringEscapeUtils.escapeHtml;

import com.google.gwt.thirdparty.guava.common.base.Supplier;
import com.google.gwt.thirdparty.guava.common.base.Suppliers;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
Expand All @@ -13,11 +11,11 @@
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.IOUtils;
Expand Down Expand Up @@ -79,8 +77,6 @@ public class CrawlFilter {
"<meta name=\"robots\" content=\"noindex\">";
SynapseClient synapseClient = null;
JSONObjectAdapter jsonObjectAdapter = null;
private final Supplier<String> homePageCached =
Suppliers.memoizeWithExpiration(homePageSupplier(), 1, TimeUnit.DAYS);
public static final int MAX_CHILD_PAGES = 5;

// Markdown processor
Expand All @@ -97,30 +93,14 @@ public static String removeSynapseWikiWidgets(String markdown) {
return wikiWidgetPattern.matcher(markdown).replaceAll("");
}

public String getCachedHomePageHtml() {
return homePageCached.get();
}

private Supplier<String> homePageSupplier() {
return new Supplier<String>() {
public String get() {
try {
return getHomePageHtml();
} catch (JSONObjectAdapterException | RestServiceException e) {
return e.getMessage();
}
}
};
}

public void init(SynapseClient synapseClient) {
this.synapseClient = synapseClient;

df.setTimeZone(TimeZone.getTimeZone("UTC"));
}

private String getHomePageHtml()
throws JSONObjectAdapterException, RestServiceException {
public String getHomePageHtml()
throws JSONObjectAdapterException, RestServiceException, UnsupportedEncodingException {
StringBuilder html = new StringBuilder();

// add direct links to all public projects in the system
Expand All @@ -129,8 +109,11 @@ private String getHomePageHtml()
projectsOnly.setKey("node_type");
projectsOnly.setValue("project");
query.getBooleanQuery().add(projectsOnly);
//limit to 100 at a time
query.setSize(100L);
query.setQueryTerm(Collections.singletonList(""));
//limit to 50 at a time
query.setSize(50L);
query.setStart(0L);

html.append(
"<h1>" +
DisplayConstants.DEFAULT_PAGE_TITLE +
Expand All @@ -144,37 +127,14 @@ private String getHomePageHtml()
TeamSearch.START_DELIMITER +
"0\">Teams</a></h3><br />"
);
try {
SearchResults results = synapseClient.search(query);
// append this set to the list
while (results.getHits().size() > 0) {
for (Hit hit : results.getHits()) {
// SWC-5149: send a Project alias link to the crawler if available.
if (hit.getAlias() != null) {
html.append(
"<a href=\"https://www.synapse.org/" +
hit.getAlias() +
"\">" +
hit.getName() +
"</a><br />"
);
} else {
html.append(
"<a href=\"https://www.synapse.org/Synapse:" +
hit.getId() +
"\">" +
hit.getName() +
"</a><br />"
);
}
}
long newStart = results.getStart() + results.getHits().size();
query.setStart(newStart);
results = synapseClient.search(query);
}
} catch (Exception e) {
e.printStackTrace();
}

String newJson = EntityFactory.createJSONStringForEntity(query);

html.append(
"<a href=\"https://www.synapse.org/Search:" +
URLEncoder.encode(newJson, "UTF-8") +
"\">Projects</a><br />"
);
html.append("</body></html>");
return html.toString();
}
Expand Down Expand Up @@ -613,7 +573,7 @@ public String getAllProjectsHtml(SearchQuery inputQuery)
String newJson = EntityFactory.createJSONStringForEntity(inputQuery);
html.append(
"<a href=\"https://www.synapse.org/Search:" +
URLEncoder.encode(newJson) +
URLEncoder.encode(newJson, "UTF-8") +
"\">Next Page</a><br />"
);
return html.toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,7 @@ protected void doFilterInternal(
if (isHomePage) {
// use defaults in the dataModel, but also get crawl data if this is a bot
if (includeBotHtml) {
dataModel.put(
BOT_BODY_HTML_KEY,
crawlFilter.getCachedHomePageHtml()
);
dataModel.put(BOT_BODY_HTML_KEY, crawlFilter.getHomePageHtml());
}
} else if (path.startsWith("/Synapse")) {
Synapse place = new Synapse(placeToken);
Expand Down
Loading