Skip to content

Commit

Permalink
performance refine for IOUtils.contentEquals(Reader, Reader)
Browse files Browse the repository at this point in the history
  • Loading branch information
XenoAmess committed Jan 12, 2021
1 parent f7efc7b commit 9540a1f
Show file tree
Hide file tree
Showing 9 changed files with 2,017 additions and 31 deletions.
52 changes: 52 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,18 @@ file comparators, endian transformation classes, and much more.
</dependencyManagement>

<dependencies>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>${jmh.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>${jmh.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
Expand Down Expand Up @@ -312,6 +324,7 @@ file comparators, endian transformation classes, and much more.
<commons.release.isDistModule>true</commons.release.isDistModule>
<commons.releaseManagerName>Gary Gregory</commons.releaseManagerName>
<commons.releaseManagerKey>86fdc7e2a11262cb</commons.releaseManagerKey>
<jmh.version>1.21</jmh.version>
</properties>

<build>
Expand Down Expand Up @@ -549,5 +562,44 @@ file comparators, endian transformation classes, and much more.
<coveralls.skip>true</coveralls.skip>
</properties>
</profile>
<profile>
<id>benchmark</id>
<properties>
<skipTests>true</skipTests>
<benchmark>org.apache</benchmark>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.6.0</version>
<executions>
<execution>
<id>benchmark</id>
<phase>test</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<classpathScope>test</classpathScope>
<executable>java</executable>
<arguments>
<argument>-classpath</argument>
<classpath/>
<argument>org.openjdk.jmh.Main</argument>
<argument>-rf</argument>
<argument>json</argument>
<argument>-rff</argument>
<argument>target/jmh-result.${benchmark}.json</argument>
<argument>${benchmark}</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>
222 changes: 191 additions & 31 deletions src/main/java/org/apache/commons/io/IOUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import java.util.function.Consumer;

import org.apache.commons.io.function.IOConsumer;
import org.apache.commons.io.input.buffer.LineEndUnifiedBufferedReader;
import org.apache.commons.io.output.AppendableWriter;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.commons.io.output.NullOutputStream;
Expand Down Expand Up @@ -157,14 +158,14 @@ public class IOUtils {

/**
* The Unix line separator string.
*
*
* @see StandardLineSeparator#LF
*/
public static final String LINE_SEPARATOR_UNIX = StandardLineSeparator.LF.getString();

/**
* The Windows line separator string.
*
*
* @see StandardLineSeparator#CRLF
*/
public static final String LINE_SEPARATOR_WINDOWS = StandardLineSeparator.CRLF.getString();
Expand Down Expand Up @@ -745,23 +746,48 @@ public static long consume(final InputStream input)
@SuppressWarnings("resource")
public static boolean contentEquals(final InputStream input1, final InputStream input2)
throws IOException {
// see comments in public static boolean contentEquals(final Reader input1, final Reader input2)
// this function is mirror to it.
if (input1 == input2) {
return true;
}
if (input1 == null ^ input2 == null) {
return false;
}
final BufferedInputStream bufferedInput1 = buffer(input1);
final BufferedInputStream bufferedInput2 = buffer(input2);
int ch = bufferedInput1.read();
while (EOF != ch) {
final int ch2 = bufferedInput2.read();
if (ch != ch2) {
return false;

byte[] byteArray1 = new byte[DEFAULT_BUFFER_SIZE];
byte[] byteArray2 = new byte[DEFAULT_BUFFER_SIZE];
int nowPos1;
int nowPos2;
int nowRead1;
int nowRead2;
while (true) {
nowPos1 = 0;
nowPos2 = 0;
for (int nowCheck = 0; nowCheck < DEFAULT_BUFFER_SIZE; nowCheck++) {
if (nowPos1 == nowCheck) {
do {
nowRead1 = input1.read(byteArray1, nowPos1, DEFAULT_BUFFER_SIZE - nowPos1);
} while (nowRead1 == 0);
if (nowRead1 == EOF) {
return nowPos2 == nowCheck && input2.read() == EOF;
}
nowPos1 += nowRead1;
}
if (nowPos2 == nowCheck) {
do {
nowRead2 = input2.read(byteArray2, nowPos2, DEFAULT_BUFFER_SIZE - nowPos2);
} while (nowRead2 == 0);
if (nowRead2 == EOF) {
return nowPos1 == nowCheck && input1.read() == EOF;
}
nowPos2 += nowRead2;
}
if (byteArray1[nowCheck] != byteArray2[nowCheck]) {
return false;
}
}
ch = bufferedInput1.read();
}
return bufferedInput2.read() == EOF;
}

/**
Expand Down Expand Up @@ -789,19 +815,79 @@ public static boolean contentEquals(final Reader reader1, final Reader reader2)
if (reader1 == null ^ reader2 == null) {
return false;
}
final BufferedReader bufferedInput1 = toBufferedReader(reader1);
final BufferedReader bufferedInput2 = toBufferedReader(reader2);

int ch = bufferedInput1.read();
while (EOF != ch) {
final int ch2 = bufferedInput2.read();
if (ch != ch2) {
return false;
// char buffer array for input1
char[] charArray1 = new char[DEFAULT_BUFFER_SIZE];
// char buffer array for input2
char[] charArray2 = new char[DEFAULT_BUFFER_SIZE];

// the current last-index of chars read to charArray1 from input1
int nowPos1;
// the current last-index of chars read to charArray2 from input2
int nowPos2;
// the chars read this time.
int nowRead;
while (true) {
nowPos1 = 0;
nowPos2 = 0;
/*
* For better performance, this loop is special designed.
* Since input1 and input2's content must be equal to return true,
* we share the index used in the two char buffers,
* by simply make it from 0 to DEFAULT_BUFFER_SIZE, means 8192.
* Every time it read, it read as long as possible, both limited by the input reader itself,
* and the remaining length of this array.
* The performance of the following loop can be proved simply.
* 1. If the reader can read only several chars during one read() call:
* then we only invert it every 8192 times, thus it will not be time costing.
* 2. If the reader can read many chars during one read() call:
* then it will be filled fast, and also will not be time costing.
*/
for (int nowCheck = 0; nowCheck < DEFAULT_BUFFER_SIZE; nowCheck++) {
if (nowPos1 == nowCheck) {
// if nowPos1 == nowCheck,
// then means charArray1[nowCheck]
// is empty now, thus we need to invoke read on input1 first.
do {
// read as many chars as possible, using the remaining spaces of charArray1.
nowRead = reader1.read(charArray1, nowPos1, DEFAULT_BUFFER_SIZE - nowPos1);
} while (nowRead == 0);
if (nowRead == EOF) {
// if input1 ends, then we check if input2 ends too.
// if nowPos2 == nowCheck && input2.read() == EOF,
// we think input2 have no more chars,
// and cannot read more either,
// thus return true.
// otherwise return false.
return nowPos2 == nowCheck && reader2.read() == EOF;
}
nowPos1 += nowRead;
}
if (nowPos2 == nowCheck) {
// if nowPos1 == nowCheck,
// then means charArray1[nowCheck]
// is empty now, thus we need to invoke read on input1 first.
do {
// read as many chars as possible, using the remaining spaces of charArray2.
nowRead = reader2.read(charArray2, nowPos2, DEFAULT_BUFFER_SIZE - nowPos2);
} while (nowRead == 0);
if (nowRead == EOF) {
// if input2 ends, then we check if input1 ends too.
// if nowPos1 == nowCheck && input1.read() == EOF,
// we think input1 have no more chars,
// and cannot read more either,
// thus return true.
// otherwise return false.
return nowPos1 == nowCheck && reader1.read() == EOF;
}
nowPos2 += nowRead;
}
// now we have
if (charArray1[nowCheck] != charArray2[nowCheck]) {
return false;
}
}
ch = bufferedInput1.read();
}

return bufferedInput2.read() == EOF;
}

/**
Expand All @@ -827,16 +913,90 @@ public static boolean contentEqualsIgnoreEOL(final Reader reader1, final Reader
if (reader1 == null ^ reader2 == null) {
return false;
}
final BufferedReader br1 = toBufferedReader(reader1);
final BufferedReader br2 = toBufferedReader(reader2);

String line1 = br1.readLine();
String line2 = br2.readLine();
while (line1 != null && line1.equals(line2)) {
line1 = br1.readLine();
line2 = br2.readLine();
final LineEndUnifiedBufferedReader bufferedInput1;
if (reader1 instanceof LineEndUnifiedBufferedReader) {
bufferedInput1 = (LineEndUnifiedBufferedReader) reader1;
} else {
bufferedInput1 = new LineEndUnifiedBufferedReader(reader1);
}

final LineEndUnifiedBufferedReader bufferedInput2;
if (reader2 instanceof LineEndUnifiedBufferedReader) {
bufferedInput2 = (LineEndUnifiedBufferedReader) reader2;
} else {
bufferedInput2 = new LineEndUnifiedBufferedReader(reader2);
}

/*
* We use this variable to mark if last char be '\n'.
* Because "a" and "a\n" is thought contentEqualsIgnoreEOL,
* but "\n" and "\n\n" is thought not contentEqualsIgnoreEOL.
*/
boolean justNewLine = true;

int currentChar1;
int currentChar2;

while (true) {
currentChar1 = bufferedInput1.peek();
currentChar2 = bufferedInput2.peek();

if (currentChar1 == EOF) {
if (currentChar2 == EOF) {
return true;
} else {
if (!justNewLine) {
return inputOnlyHaveCRLForEOF( bufferedInput2, currentChar2);
}
return false;
}
} else if (currentChar2 == EOF) {
if (!justNewLine) {
return inputOnlyHaveCRLForEOF(bufferedInput1, currentChar1);
}
return false;
}
if (currentChar1 != currentChar2) {
return false;
}
justNewLine = currentChar1 == '\n';
bufferedInput1.eat();
bufferedInput2.eat();
}
}

/**
* private function used only in contentEqualsIgnoreEOL.
* used in contentEqualsIgnoreEOL to detect whether a input only have CRLF or EOF.
* @param input input reader
* @param currentChar current peek char of input
* @return true/false
* @throws IOException by input.read(), not me.
* @see #contentEqualsIgnoreEOL(Reader, Reader)
*/
private static boolean inputOnlyHaveCRLForEOF(LineEndUnifiedBufferedReader input, int currentChar) throws IOException {

/*
* logically there should be some code like
*
* if (char1 == EOF) {
* return true;
* }
*
* here.
*
* But actually, if this input's read() is EOF, then we will not invoke this function at all.
* So the check is deleted.
*
* You can go contentEqualsIgnoreEOL for details.
*/

if (currentChar == '\n') {
input.eat();
return input.read() == EOF;
}
return Objects.equals(line1, line2);
return false;
}

/**
Expand Down Expand Up @@ -1154,7 +1314,7 @@ public static long copyLarge(final InputStream inputStream, final OutputStream o
* </p>
*
* @param inputStream the <code>InputStream</code> to read, may be {@code null}.
* @param outputStream the <code>OutputStream</code> to write
* @param outputStream the <code>OutputStream</code> to write
* @param buffer the buffer to use for the copy
* @return the number of bytes copied. or {@code 0} if {@code input} is {@code null}.
* @throws NullPointerException if the OutputStream is {@code null}.
Expand Down Expand Up @@ -3382,7 +3542,7 @@ public static Writer writer(final Appendable appendable) {
* Instances should NOT be constructed in standard programming.
*/
public IOUtils() { //NOSONAR

}

}
Loading

0 comments on commit 9540a1f

Please sign in to comment.