Skip to content

Commit

Permalink
Reduce allocations and heap size (gunnarmorling#525)
Browse files Browse the repository at this point in the history
* Reduce allocations

* Shrink the heap size

* Calculate hash when reading name  (50-100ms difference)

* no need to reverse bytes

* bump heap size
  • Loading branch information
roman-r-m authored Jan 21, 2024
1 parent 3e19513 commit 7bfc7ea
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 23 deletions.
3 changes: 1 addition & 2 deletions calculate_average_roman-r-m.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages"

# epsilon GC needs enough memory or it makes things worse
# see https://stackoverflow.com/questions/58087596/why-are-repeated-memory-allocations-observed-to-be-slower-using-epsilon-vs-g1
# 2GB seems to be the sweet spot
JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx2G -Xms2G -XX:+AlwaysPreTouch"
JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx1G -Xms1G -XX:+AlwaysPreTouch"

java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_roman_r_m
51 changes: 30 additions & 21 deletions src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,30 @@ public Worker(FileChannel channel, long start, long end) {

private void parseName(ByteString station) {
long start = offset;
long pattern;
long next = UNSAFE.getLong(offset);
while ((pattern = applyPattern(next, SEMICOLON_MASK)) == 0) {
offset += 8;
next = UNSAFE.getLong(offset);
long pattern = applyPattern(next, SEMICOLON_MASK);
int bytes;
if (pattern != 0) {
bytes = Long.numberOfTrailingZeros(pattern) / 8;
offset += bytes;
long h = Long.reverseBytes(next) >>> (8 * (8 - bytes));
station.hash = (int) (h ^ (h >>> 32));
}
else {
long h = next;
station.hash = (int) (h ^ (h >>> 32));
while (pattern == 0) {
offset += 8;
next = UNSAFE.getLong(offset);
pattern = applyPattern(next, SEMICOLON_MASK);
}
bytes = Long.numberOfTrailingZeros(pattern) / 8;
offset += bytes;
}
int bytes = Long.numberOfTrailingZeros(pattern) / 8;
offset += bytes;

int len = (int) (offset - start);
station.offset = start;
station.len = len;
station.hash = 0;
station.tail = next & ((1L << (8 * bytes)) - 1);

offset++;
Expand Down Expand Up @@ -215,11 +226,9 @@ static final class ByteString {
this.ms = ms;
}

@Override
public String toString() {
var bytes = new byte[len];
UNSAFE.copyMemory(null, offset, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, len);
return new String(bytes, 0, len);
public String asString(byte[] reusable) {
UNSAFE.copyMemory(null, offset, reusable, Unsafe.ARRAY_BYTE_BASE_OFFSET, len);
return new String(reusable, 0, len);
}

public ByteString copy() {
Expand All @@ -243,9 +252,7 @@ public boolean equals(Object o) {
if (len != that.len)
return false;

int i = 0;

for (; i + 7 < len; i += 8) {
for (int i = 0; i + 7 < len; i += 8) {
long l1 = UNSAFE.getLong(offset + i);
long l2 = UNSAFE.getLong(that.offset + i);
if (l1 != l2) {
Expand All @@ -257,13 +264,14 @@ public boolean equals(Object o) {

@Override
public int hashCode() {
if (hash == 0) {
long h = UNSAFE.getLong(offset);
h = Long.reverseBytes(h) >>> (8 * Math.max(0, 8 - len));
hash = (int) (h ^ (h >>> 32));
}
return hash;
}

@Override
public String toString() {
byte[] buf = new byte[100];
return asString(buf);
}
}

private static final class ResultRow {
Expand Down Expand Up @@ -318,10 +326,11 @@ ResultRow get(ByteString s) {
}

TreeMap<String, ResultRow> toMap() {
byte[] buf = new byte[100];
var result = new TreeMap<String, ResultRow>();
for (int i = 0; i < SIZE; i++) {
if (keys[i] != null) {
result.put(keys[i].toString(), values[i]);
result.put(keys[i].asString(buf), values[i]);
}
}
return result;
Expand Down

0 comments on commit 7bfc7ea

Please sign in to comment.