Skip to content

Commit

Permalink
Merge pull request DSpace#9638 from tdonohue/avoid_inline_js
Browse files Browse the repository at this point in the history
Update configuration to always download HTML/JS/XML Bitstreams (no inline display)
  • Loading branch information
kshepherd authored Jun 17, 2024
2 parents 2af0509 + a091d34 commit f1059b4
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import jakarta.servlet.http.HttpServletResponse;
import jakarta.ws.rs.core.Response;
import org.apache.catalina.connector.ClientAbortException;
import org.apache.commons.collections4.ListUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.dspace.app.rest.converter.ConverterService;
Expand Down Expand Up @@ -204,12 +205,39 @@ private boolean isNotAnErrorResponse(HttpServletResponse response) {
|| responseCode.equals(Response.Status.Family.REDIRECTION);
}

/**
* Check if a Bitstream of the specified format should always be downloaded (i.e. "content-disposition: attachment")
* or can be opened inline (i.e. "content-disposition: inline").
* <P>
* NOTE that downloading via "attachment" is more secure, as the user's browser will not attempt to process or
* display the file. But, downloading via "inline" may be seen as more user-friendly for common formats.
* @param format BitstreamFormat
* @return true if always download ("attachment"). false if can be opened inline ("inline")
*/
private boolean checkFormatForContentDisposition(BitstreamFormat format) {
// never automatically download undefined formats
if (format == null) {
return false;
// Undefined or Unknown formats should ALWAYS be downloaded for additional security.
if (format == null || format.getSupportLevel() == BitstreamFormat.UNKNOWN) {
return true;
}

// Load additional formats configured to require download
List<String> configuredFormats = List.of(configurationService.
getArrayProperty("webui.content_disposition_format"));

// If configuration includes "*", then all formats will always be downloaded.
if (configuredFormats.contains("*")) {
return true;
}
List<String> formats = List.of((configurationService.getArrayProperty("webui.content_disposition_format")));

// Define a download list of formats which DSpace forces to ALWAYS be downloaded.
// These formats can embed JavaScript which may be run in the user's browser if the file is opened inline.
// Therefore, DSpace blocks opening these formats inline as it could be used for an XSS attack.
List<String> downloadOnlyFormats = List.of("text/html", "text/javascript", "text/xml", "rdf");

// Combine our two lists
List<String> formats = ListUtils.union(downloadOnlyFormats, configuredFormats);

// See if the passed in format's MIME type or file extension is listed.
boolean download = formats.contains(format.getMIMEType());
if (!download) {
for (String ext : format.getExtensions()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public class BitstreamFormatRestRepositoryIT extends AbstractControllerIntegrati
@Autowired
private BitstreamFormatConverter bitstreamFormatConverter;

private final int DEFAULT_AMOUNT_FORMATS = 85;
private final int DEFAULT_AMOUNT_FORMATS = 86;

@Test
public void findAllPaginationTest() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,6 @@ public void closeInputStreamsDownloadWithCoverPage() throws Exception {
Mockito.verify(inputStreamSpy, times(1)).close();
}


@Test
public void checkContentDispositionOfFormats() throws Exception {
configurationService.setProperty("webui.content_disposition_format", new String[] {
Expand All @@ -1265,16 +1264,16 @@ public void checkContentDispositionOfFormats() throws Exception {
Bitstream rtf;
Bitstream xml;
Bitstream txt;
Bitstream html;
Bitstream csv;
try (InputStream is = IOUtils.toInputStream(content, CharEncoding.UTF_8)) {
rtf = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("text/richtext").build();
xml = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("text/xml").build();
txt = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("text/plain").build();
html = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("text/html").build();
csv = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("text/csv").build();
}
context.restoreAuthSystemState();

Expand All @@ -1283,9 +1282,89 @@ public void checkContentDispositionOfFormats() throws Exception {
verifyBitstreamDownload(xml, "text/xml;charset=UTF-8", true);
verifyBitstreamDownload(txt, "text/plain;charset=UTF-8", true);
// this format is not configured and should open inline
verifyBitstreamDownload(html, "text/html;charset=UTF-8", false);
verifyBitstreamDownload(csv, "text/csv;charset=UTF-8", false);
}

@Test
public void checkHardcodedContentDispositionFormats() throws Exception {
// This test is similar to the above test, but it verifies that our *hardcoded settings* for
// webui.content_disposition_format are protecting us from loading specific formats *inline*.
context.turnOffAuthorisationSystem();
Community community = CommunityBuilder.createCommunity(context).build();
Collection collection = CollectionBuilder.createCollection(context, community).build();
Item item = ItemBuilder.createItem(context, collection).build();
String content = "Test Content";
Bitstream html;
Bitstream js;
Bitstream rdf;
Bitstream xml;
Bitstream unknown;
Bitstream svg;
try (InputStream is = IOUtils.toInputStream(content, CharEncoding.UTF_8)) {
html = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("text/html").build();
js = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("text/javascript").build();
// NOTE: RDF has a MIME Type with a "charset" in our bitstream-formats.xml
rdf = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("application/rdf+xml; charset=utf-8").build();
xml = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("text/xml").build();
unknown = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("application/octet-stream").build();
svg = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("image/svg+xml").build();

}
context.restoreAuthSystemState();

// By default, HTML, JS & XML should all download. This protects us from possible XSS attacks, as
// each of these formats can embed JavaScript which may execute when the file is loaded *inline*.
verifyBitstreamDownload(html, "text/html;charset=UTF-8", true);
verifyBitstreamDownload(js, "text/javascript;charset=UTF-8", true);
verifyBitstreamDownload(rdf, "application/rdf+xml;charset=UTF-8", true);
verifyBitstreamDownload(xml, "text/xml;charset=UTF-8", true);
// Unknown file formats should also always download
verifyBitstreamDownload(unknown, "application/octet-stream;charset=UTF-8", true);
// SVG does NOT currently exist as a recognized format in DSpace's bitstream-formats.xml, but it's another
// format that allows embedded JavaScript. This test is a reminder that SVGs should NOT be opened inline.
verifyBitstreamDownload(svg, "application/octet-stream;charset=UTF-8", true);
}

@Test
public void checkWildcardContentDispositionFormats() throws Exception {
// Setting "*" should result in all formats being downloaded (nothing will be opened inline)
configurationService.setProperty("webui.content_disposition_format", "*");

context.turnOffAuthorisationSystem();
Community community = CommunityBuilder.createCommunity(context).build();
Collection collection = CollectionBuilder.createCollection(context, community).build();
Item item = ItemBuilder.createItem(context, collection).build();
String content = "Test Content";
Bitstream csv;
Bitstream jpg;
Bitstream mpg;
Bitstream pdf;
try (InputStream is = IOUtils.toInputStream(content, CharEncoding.UTF_8)) {
csv = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("text/csv").build();
jpg = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("image/jpeg").build();
mpg = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("video/mpeg").build();
pdf = BitstreamBuilder.createBitstream(context, item, is)
.withMimeType("application/pdf").build();
}
context.restoreAuthSystemState();

// All formats should be download only
verifyBitstreamDownload(csv, "text/csv;charset=UTF-8", true);
verifyBitstreamDownload(jpg, "image/jpeg;charset=UTF-8", true);
verifyBitstreamDownload(mpg, "video/mpeg;charset=UTF-8", true);
verifyBitstreamDownload(pdf, "application/pdf;charset=UTF-8", true);
}


private void verifyBitstreamDownload(Bitstream file, String contentType, boolean shouldDownload) throws Exception {
String token = getAuthToken(admin.getEmail(), password);
String header = getClient(token).perform(get("/api/core/bitstreams/" + file.getID() + "/content")
Expand All @@ -1294,11 +1373,15 @@ private void verifyBitstreamDownload(Bitstream file, String contentType, boolean
.andExpect(content().contentType(contentType))
.andReturn().getResponse().getHeader("content-disposition");
if (shouldDownload) {
assertTrue(header.contains("attachment"));
assertFalse(header.contains("inline"));
assertTrue("Content-Disposition should contain 'attachment' for " + contentType,
header.contains("attachment"));
assertFalse("Content-Disposition should NOT contain 'inline' for " + contentType,
header.contains("inline"));
} else {
assertTrue(header.contains("inline"));
assertFalse(header.contains("attachment"));
assertTrue("Content-Disposition should contain 'inline' for " + contentType,
header.contains("inline"));
assertFalse("Content-Disposition should NOT contain 'attachment' for " + contentType,
header.contains("attachment"));
}
}
}
11 changes: 8 additions & 3 deletions dspace/config/dspace.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -1384,9 +1384,14 @@ webui.content_disposition_threshold = 8388608

#### Content Attachment Disposition Formats ####
#
# Set which mimetypes, file extensions will NOT be opened inline
# Files with these mimetypes/extensions will always be downloaded,
# regardless of the threshold above
# Set which mimetypes or file extensions will NOT be opened inline.
# Files with these mimetypes/extensions will always be downloaded, regardless of the threshold above.
# NOTE: For security reasons, some file formats (e.g. HTML, XML, RDF, JS) will always be downloaded regardless
# of the settings here. This blocks these formats from executing embedded JavaScript when opened inline.
# For additional security, you may choose to set this to "*" to force all formats to always be downloaded
# (i.e. disables all formats from opening inline within the user's browser).
#
# By default, RTF is always downloaded because most browsers attempt to display it as plain text.
webui.content_disposition_format = text/richtext

#### Multi-file HTML document/site settings #####
Expand Down
9 changes: 9 additions & 0 deletions dspace/config/registries/bitstream-formats.xml
Original file line number Diff line number Diff line change
Expand Up @@ -827,4 +827,13 @@
<extension>avif</extension>
</bitstream-type>

<bitstream-type>
<mimetype>text/javascript</mimetype>
<short_description>JavaScript</short_description>
<description>JavaScript</description>
<support_level>1</support_level>
<internal>false</internal>
<extension>js</extension>
</bitstream-type>

</dspace-bitstream-types>

0 comments on commit f1059b4

Please sign in to comment.