Reduce allocations and heap size (gunnarmorling#525)

* Reduce allocations * Shrink the heap size * Calculate hash when reading name (50-100ms difference) * no need to reverse bytes * bump heap size
ascheman · Jan 21, 2024 · 7bfc7ea · 7bfc7ea
1 parent 3e19513
commit 7bfc7ea
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 23 deletions.
diff --git a/calculate_average_roman-r-m.sh b/calculate_average_roman-r-m.sh
@@ -19,7 +19,6 @@ JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages"
 
 # epsilon GC needs enough memory or it makes things worse
 # see https://stackoverflow.com/questions/58087596/why-are-repeated-memory-allocations-observed-to-be-slower-using-epsilon-vs-g1
-# 2GB seems to be the sweet spot
-JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx2G -Xms2G -XX:+AlwaysPreTouch"
+JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx1G -Xms1G -XX:+AlwaysPreTouch"
 
 java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_roman_r_m
diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java b/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java
@@ -82,19 +82,30 @@ public Worker(FileChannel channel, long start, long end) {
 
         private void parseName(ByteString station) {
             long start = offset;
-            long pattern;
             long next = UNSAFE.getLong(offset);
-            while ((pattern = applyPattern(next, SEMICOLON_MASK)) == 0) {
-                offset += 8;
-                next = UNSAFE.getLong(offset);
+            long pattern = applyPattern(next, SEMICOLON_MASK);
+            int bytes;
+            if (pattern != 0) {
+                bytes = Long.numberOfTrailingZeros(pattern) / 8;
+                offset += bytes;
+                long h = Long.reverseBytes(next) >>> (8 * (8 - bytes));
+                station.hash = (int) (h ^ (h >>> 32));
+            }
+            else {
+                long h = next;
+                station.hash = (int) (h ^ (h >>> 32));
+                while (pattern == 0) {
+                    offset += 8;
+                    next = UNSAFE.getLong(offset);
+                    pattern = applyPattern(next, SEMICOLON_MASK);
+                }
+                bytes = Long.numberOfTrailingZeros(pattern) / 8;
+                offset += bytes;
             }
-            int bytes = Long.numberOfTrailingZeros(pattern) / 8;
-            offset += bytes;
 
             int len = (int) (offset - start);
             station.offset = start;
             station.len = len;
-            station.hash = 0;
             station.tail = next & ((1L << (8 * bytes)) - 1);
 
             offset++;
@@ -215,11 +226,9 @@ static final class ByteString {
             this.ms = ms;
         }
 
-        @Override
-        public String toString() {
-            var bytes = new byte[len];
-            UNSAFE.copyMemory(null, offset, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, len);
-            return new String(bytes, 0, len);
+        public String asString(byte[] reusable) {
+            UNSAFE.copyMemory(null, offset, reusable, Unsafe.ARRAY_BYTE_BASE_OFFSET, len);
+            return new String(reusable, 0, len);
         }
 
         public ByteString copy() {
@@ -243,9 +252,7 @@ public boolean equals(Object o) {
             if (len != that.len)
                 return false;
 
-            int i = 0;
-
-            for (; i + 7 < len; i += 8) {
+            for (int i = 0; i + 7 < len; i += 8) {
                 long l1 = UNSAFE.getLong(offset + i);
                 long l2 = UNSAFE.getLong(that.offset + i);
                 if (l1 != l2) {
@@ -257,13 +264,14 @@ public boolean equals(Object o) {
 
         @Override
         public int hashCode() {
-            if (hash == 0) {
-                long h = UNSAFE.getLong(offset);
-                h = Long.reverseBytes(h) >>> (8 * Math.max(0, 8 - len));
-                hash = (int) (h ^ (h >>> 32));
-            }
             return hash;
         }
+
+        @Override
+        public String toString() {
+            byte[] buf = new byte[100];
+            return asString(buf);
+        }
     }
 
     private static final class ResultRow {
@@ -318,10 +326,11 @@ ResultRow get(ByteString s) {
         }
 
         TreeMap<String, ResultRow> toMap() {
+            byte[] buf = new byte[100];
             var result = new TreeMap<String, ResultRow>();
             for (int i = 0; i < SIZE; i++) {
                 if (keys[i] != null) {
-                    result.put(keys[i].toString(), values[i]);
+                    result.put(keys[i].asString(buf), values[i]);
                 }
             }
             return result;