Skip to content

Commit

Permalink
Revert "Fix entity file relative path mess (#8944)"
Browse files Browse the repository at this point in the history
This reverts commit 4d7606c.
  • Loading branch information
p-goulart committed Jul 17, 2023
1 parent 9bef699 commit aa77591
Show file tree
Hide file tree
Showing 26 changed files with 1,265 additions and 1,289 deletions.

This file was deleted.

189 changes: 11 additions & 178 deletions languagetool-core/src/main/java/org/languagetool/XMLValidator.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,9 @@
*/
package org.languagetool;

import org.languagetool.broker.ResourceDataBroker;
import org.languagetool.rules.Rule;
import org.languagetool.tools.StringTools;
import org.languagetool.tools.Tools;
import org.w3c.dom.*;
import org.w3c.dom.ls.LSInput;
import org.w3c.dom.ls.LSResourceResolver;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;

Expand All @@ -35,12 +31,7 @@
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.*;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -113,7 +104,7 @@ public void validateWithXmlSchema(String filename, String xmlSchemaPath) throws
if (schemaUrl == null) {
throw new IOException("XML schema not found in classpath: " + xmlSchemaPath);
}
validateInternal(new StreamSource(xmlStream), schemaUrl, filename);
validateInternal(new StreamSource(xmlStream), schemaUrl);
} catch (Exception e) {
throw new IOException("Cannot load or parse '" + filename + "'", e);
}
Expand All @@ -138,23 +129,19 @@ public void validateWithXmlSchema(String baseFilename, String filename, String x
if (schemaUrl == null) {
throw new IOException("XML schema not found in classpath: " + xmlSchemaPath);
}
validateInternal(mergeIntoSource(baseXmlStream, xmlStream, filename), schemaUrl, filename);
validateInternal(mergeIntoSource(baseXmlStream, xmlStream), schemaUrl);
} catch (Exception e) {
throw new IOException("Cannot load or parse '" + filename + "'", e);
}
}

private static Source mergeIntoSource(InputStream baseXmlStream, InputStream xmlStream, String xmlPath) throws Exception {
private static Source mergeIntoSource(InputStream baseXmlStream, InputStream xmlStream) throws Exception {
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
domFactory.setIgnoringComments(true);
domFactory.setValidating(false);
domFactory.setNamespaceAware(true);

DocumentBuilder builder = domFactory.newDocumentBuilder();
ResourceDataBroker broker = JLanguageTool.getDataBroker();
URL absoluteUrl = broker.getAsURL(xmlPath);
EntityResolver entityResolver = new RuleEntityResolver(absoluteUrl);
builder.setEntityResolver(entityResolver);
Document baseDoc = builder.parse(baseXmlStream);
Document ruleDoc = builder.parse(xmlStream);

Expand All @@ -177,14 +164,14 @@ private static Source mergeIntoSource(InputStream baseXmlStream, InputStream xml
* @param xmlSchemaPath XML schema file in classpath
* @since 2.3
*/
public void validateStringWithXmlSchema(String xml, String xmlSchemaPath, String xmlPath) throws IOException {
public void validateStringWithXmlSchema(String xml, String xmlSchemaPath) throws IOException {
try {
URL schemaUrl = this.getClass().getResource(xmlSchemaPath);
if (schemaUrl == null) {
throw new IOException("XML schema not found in classpath: " + xmlSchemaPath);
}
try (ByteArrayInputStream stream = new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8))) {
validateInternal(new StreamSource(stream), schemaUrl, xmlPath);
try (ByteArrayInputStream stream = new ByteArrayInputStream(xml.getBytes("utf-8"))) {
validateInternal(new StreamSource(stream), schemaUrl);
}
} catch (SAXException e) {
throw new RuntimeException(e);
Expand Down Expand Up @@ -214,174 +201,19 @@ private void validateInternal(String xml, String dtdPath, String docType) throws
saxParser.parse(is, new ErrorHandler());
}

private void validateInternal(Source xmlSrc, URL xmlSchema, String xmlPath) throws SAXException, IOException {
Validator validator = getValidator(xmlSchema, xmlPath);
private void validateInternal(Source xmlSrc, URL xmlSchema) throws SAXException, IOException {
Validator validator = getValidator(xmlSchema);
validator.validate(xmlSrc);
}

private Validator getValidator(URL xmlSchema, String xmlPath) throws SAXException {
private Validator getValidator(URL xmlSchema) throws SAXException {
SchemaFactory sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
Schema schema = sf.newSchema(xmlSchema);
Validator validator = schema.newValidator();

validator.setResourceResolver(new LSRuleEntityResolver(xmlPath));
validator.setErrorHandler(new ErrorHandler());
return validator;
}

/**
* Defines an EntityResolver-like object (cf. {@link RuleEntityResolver}) as used by DOM. TBH I haven't gone very
* deep into the differences between DOM and SAX, but I don't think it should matter that much here. The logic for
* constructing the input sources/streams is the same.
*/
static class LSRuleEntityResolver implements LSResourceResolver {
private final String xmlPath;
@Override
public LSInput resolveResource(String type, String namespaceURI, String publicId, String systemId, String baseURI) {
if((publicId != null && publicId.endsWith(".ent")) || systemId.endsWith(".ent")) {
return new EntityAsInput(publicId, systemId, xmlPath);
}
return null;
}

public LSRuleEntityResolver(String xmlPath) {
this.xmlPath = xmlPath;
}
}

/**
* Defines an input source based on the correct path to external entities.
*/
static class EntityAsInput implements LSInput {
private String systemId;
private String publicId;
private InputStream inputStream;
private final URL xmlUrl;
private URL entitiesURL;
@Override
public Reader getCharacterStream() {
return null;
}

@Override
public void setCharacterStream(Reader characterStream) {
}

@Override
public InputStream getByteStream() {
return null;
}

@Override
public void setByteStream(InputStream byteStream) {
}

@Override
public String getStringData() {
return null;
}

public InputStream getInputStream() {
return this.inputStream;
}

/**
* Set InputStream from the value of the entities URL.
* @param inputStream param from superclass, unused since we just define an altogether new InputStream with the
* entities URL computed elsewhere in this class
* @throws IOException if the file pointed to is invalid, or we end up getting an unreadable stream from it some
* other way
*/
public void setInputStream(InputStream inputStream) throws IOException {
this.inputStream = Files.newInputStream(Paths.get(this.entitiesURL.getPath()));
}

@Override
public void setStringData(String stringData) {
}

@Override
public String getSystemId() {
return this.systemId;
}

/**
* Compute absolute path to entities file from source XML URL and relative path (systemId param).
* @param systemId relative URI pointing to entity file
*/
@Override
public void setSystemId(String systemId) {
try {
this.entitiesURL = new URL(this.xmlUrl, new URL(systemId).getPath());
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
this.systemId = this.entitiesURL.getPath();
}

@Override
public String getPublicId() {
return this.publicId;
}

/**
* Compute absolute path to entities file from source XML URL and relative path (systemId param).
* @param publicId relative URI pointing to entity file
*/
@Override
public void setPublicId(String publicId) {
if (publicId == null) {
return;
}
try {
this.entitiesURL = new URL(this.xmlUrl, new URL(publicId).getPath());
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
this.publicId = this.entitiesURL.getPath();
}

@Override
public String getBaseURI() {
return null;
}

@Override
public void setBaseURI(String baseURI) {
}

@Override
public String getEncoding() {
return null;
}

@Override
public void setEncoding(String encoding) {
}

@Override
public boolean getCertifiedText() {
return false;
}

@Override
public void setCertifiedText(boolean certifiedText) {
}

/**
* @param publicId from outer context, entity URI
* @param systemId from outer context, entity URI (if you ask me the precise difference between publicId and
* systemId I will cry)
* @param xmlPath path-as-string to source XML where the relative path to the entity file is located
*/
public EntityAsInput(String publicId, String systemId, String xmlPath) {
ResourceDataBroker broker = JLanguageTool.getDataBroker();
this.xmlUrl = broker.getAsURL(xmlPath);
this.setPublicId(publicId);
this.setSystemId(systemId);
}
}

/**
* XML handler that throws exception on error and warning, does nothing otherwise.
*/
Expand All @@ -404,4 +236,5 @@ public void error (SAXParseException e) throws SAXException {
}

}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,21 @@
package org.languagetool.rules.patterns;

import org.apache.commons.lang3.ObjectUtils;
import org.languagetool.*;
import org.languagetool.broker.ResourceDataBroker;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.ResourceBundleTools;
import org.languagetool.rules.*;
import org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule;
import org.xml.sax.Attributes;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import java.util.regex.Pattern;

public class PatternRuleHandler extends XMLRuleHandler {

@Override
public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException {
ResourceDataBroker broker = JLanguageTool.getDataBroker();
URL absoluteUrl = broker.getAsURL(this.sourceFile);
return new RuleEntityResolver(absoluteUrl).resolveEntity(publicId, systemId);
}

public static final String TYPE = "type";

static final String MARKER_TAG = "<marker>";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.util.List;

/**
Expand All @@ -45,7 +44,7 @@ public class PatternRuleLoader extends DefaultHandler {
* @param file XML file with pattern rules
*/
public final List<AbstractPatternRule> getRules(File file, Language lang) throws IOException {
try (InputStream inputStream = Files.newInputStream(file.toPath())) {
try (InputStream inputStream = new FileInputStream(file)) {
PatternRuleLoader ruleLoader = new PatternRuleLoader();
return ruleLoader.getRules(inputStream, file.getAbsolutePath(), lang);
}
Expand Down
Loading

0 comments on commit aa77591

Please sign in to comment.