Skip to content

Commit

Permalink
Merge pull request #5791 from roc-lang/update-wyhash
Browse files Browse the repository at this point in the history
Update to wyhash final 4
  • Loading branch information
Anton-4 authored Sep 9, 2023
2 parents a616b61 + e6f3b01 commit 4db62f7
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 68 deletions.
108 changes: 59 additions & 49 deletions crates/compiler/builtins/roc/Dict.roc
Original file line number Diff line number Diff line change
Expand Up @@ -1005,12 +1005,12 @@ expect

# We have decided not to expose the standard roc hashing algorithm.
# This is to avoid external dependence and the need for versioning.
# The current implementation is a form of [Wyhash final3](https://github.com/wangyi-fudan/wyhash/blob/a5995b98ebfa7bd38bfadc0919326d2e7aabb805/wyhash.h).
# The current implementation is a form of [Wyhash final4](https://github.com/wangyi-fudan/wyhash/blob/77e50f267fbc7b8e2d09f2d455219adb70ad4749/wyhash.h).
# It is 64bit and little endian specific currently.
# TODO: wyhash is slow for large keys, use something like cityhash if the keys are too long.
# TODO: Add a builtin to distinguish big endian systems and change loading orders.
# TODO: Switch out Wymum on systems with slow 128bit multiplication.
LowLevelHasher := { originalSeed : U64, state : U64 } implements [
LowLevelHasher := { initializedSeed : U64, state : U64 } implements [
Hasher {
addBytes,
addU8,
Expand All @@ -1036,14 +1036,30 @@ createLowLevelHasher = \seedOpt ->
when seedOpt is
PseudoRandSeed -> pseudoSeed {}
WithSeed s -> s
@LowLevelHasher { originalSeed: seed, state: seed }
@LowLevelHasher { initializedSeed: initSeed seed, state: seed }

combineState : LowLevelHasher, { a : U64, b : U64, seed : U64, length : U64 } -> LowLevelHasher
combineState = \@LowLevelHasher { originalSeed, state }, { a, b, seed, length } ->
tmp = wymix (Num.bitwiseXor wyp1 a) (Num.bitwiseXor seed b)
hash = wymix (Num.bitwiseXor wyp1 length) tmp

@LowLevelHasher { originalSeed, state: wymix state hash }
combineState = \@LowLevelHasher { initializedSeed, state }, { a, b, seed, length } ->
mum =
a
|> Num.bitwiseXor wyp1
|> wymum (Num.bitwiseXor b seed)
nexta =
mum.lower
|> Num.bitwiseXor wyp0
|> Num.bitwiseXor length
nextb =
mum.upper
|> Num.bitwiseXor wyp1
hash = wymix nexta nextb

@LowLevelHasher { initializedSeed, state: wymix state hash }

initSeed = \seed ->
seed
|> Num.bitwiseXor wyp0
|> wymix wyp1
|> Num.bitwiseXor seed

complete = \@LowLevelHasher { state } -> state

Expand All @@ -1052,19 +1068,17 @@ complete = \@LowLevelHasher { state } -> state
# like using the output of the last hash as the seed to the current hash.
# I am simply not sure the tradeoffs here. Theoretically this method is more sound.
# Either way, the performance will be similar and we can change this later.
addU8 = \@LowLevelHasher { originalSeed, state }, u8 ->
seed = Num.bitwiseXor originalSeed wyp0
addU8 = \@LowLevelHasher { initializedSeed, state }, u8 ->
p0 = Num.toU64 u8
a =
Num.shiftLeftBy p0 16
|> Num.bitwiseOr (Num.shiftLeftBy p0 8)
|> Num.bitwiseOr p0
b = 0

combineState (@LowLevelHasher { originalSeed, state }) { a, b, seed, length: 1 }
combineState (@LowLevelHasher { initializedSeed, state }) { a, b, seed: initializedSeed, length: 1 }

addU16 = \@LowLevelHasher { originalSeed, state }, u16 ->
seed = Num.bitwiseXor originalSeed wyp0
addU16 = \@LowLevelHasher { initializedSeed, state }, u16 ->
p0 = Num.bitwiseAnd u16 0xFF |> Num.toU64
p1 = Num.shiftRightZfBy u16 8 |> Num.toU64
a =
Expand All @@ -1073,26 +1087,23 @@ addU16 = \@LowLevelHasher { originalSeed, state }, u16 ->
|> Num.bitwiseOr p1
b = 0

combineState (@LowLevelHasher { originalSeed, state }) { a, b, seed, length: 2 }
combineState (@LowLevelHasher { initializedSeed, state }) { a, b, seed: initializedSeed, length: 2 }

addU32 = \@LowLevelHasher { originalSeed, state }, u32 ->
seed = Num.bitwiseXor originalSeed wyp0
addU32 = \@LowLevelHasher { initializedSeed, state }, u32 ->
p0 = Num.toU64 u32
a = Num.shiftLeftBy p0 32 |> Num.bitwiseOr p0

combineState (@LowLevelHasher { originalSeed, state }) { a, b: a, seed, length: 4 }
combineState (@LowLevelHasher { initializedSeed, state }) { a, b: a, seed: initializedSeed, length: 4 }

addU64 = \@LowLevelHasher { originalSeed, state }, u64 ->
seed = Num.bitwiseXor originalSeed wyp0
addU64 = \@LowLevelHasher { initializedSeed, state }, u64 ->
p0 = Num.bitwiseAnd 0xFFFF_FFFF u64
p1 = Num.shiftRightZfBy u64 32
a = Num.shiftLeftBy p0 32 |> Num.bitwiseOr p1
b = Num.shiftLeftBy p1 32 |> Num.bitwiseOr p0

combineState (@LowLevelHasher { originalSeed, state }) { a, b, seed, length: 8 }
combineState (@LowLevelHasher { initializedSeed, state }) { a, b, seed: initializedSeed, length: 8 }

addU128 = \@LowLevelHasher { originalSeed, state }, u128 ->
seed = Num.bitwiseXor originalSeed wyp0
addU128 = \@LowLevelHasher { initializedSeed, state }, u128 ->
lower = u128 |> Num.toU64
upper = Num.shiftRightZfBy u128 64 |> Num.toU64
p0 = Num.bitwiseAnd 0xFFFF_FFFF lower
Expand All @@ -1102,12 +1113,11 @@ addU128 = \@LowLevelHasher { originalSeed, state }, u128 ->
a = Num.shiftLeftBy p0 32 |> Num.bitwiseOr p2
b = Num.shiftLeftBy p3 32 |> Num.bitwiseOr p1

combineState (@LowLevelHasher { originalSeed, state }) { a, b, seed, length: 16 }
combineState (@LowLevelHasher { initializedSeed, state }) { a, b, seed: initializedSeed, length: 16 }

addBytes : LowLevelHasher, List U8 -> LowLevelHasher
addBytes = \@LowLevelHasher { originalSeed, state }, list ->
addBytes = \@LowLevelHasher { initializedSeed, state }, list ->
length = List.len list
seed = Num.bitwiseXor originalSeed wyp0
abs =
if length <= 16 then
if length >= 4 then
Expand All @@ -1117,17 +1127,17 @@ addBytes = \@LowLevelHasher { originalSeed, state }, list ->
(wyr4 list (Num.subWrap length 4) |> Num.shiftLeftBy 32)
|> Num.bitwiseOr (wyr4 list (Num.subWrap length 4 |> Num.subWrap x))

{ a, b, seed }
{ a, b, seed: initializedSeed }
else if length > 0 then
{ a: wyr3 list 0 length, b: 0, seed }
{ a: wyr3 list 0 length, b: 0, seed: initializedSeed }
else
{ a: 0, b: 0, seed }
{ a: 0, b: 0, seed: initializedSeed }
else if length <= 48 then
hashBytesHelper16 seed list 0 length
hashBytesHelper16 initializedSeed list 0 length
else
hashBytesHelper48 seed seed seed list 0 length
hashBytesHelper48 initializedSeed initializedSeed initializedSeed list 0 length

combineState (@LowLevelHasher { originalSeed, state }) { a: abs.a, b: abs.b, seed: abs.seed, length: Num.toU64 length }
combineState (@LowLevelHasher { initializedSeed, state }) { a: abs.a, b: abs.b, seed: abs.seed, length: Num.toU64 length }

hashBytesHelper48 : U64, U64, U64, List U8, Nat, Nat -> { a : U64, b : U64, seed : U64 }
hashBytesHelper48 = \seed, see1, see2, list, index, remaining ->
Expand Down Expand Up @@ -1240,135 +1250,135 @@ expect
|> addBytes []
|> complete

hash == 0x1C3F_F8BF_07F9_B0B3
hash == 0xD59C59757DBBE6B3

expect
hash =
createLowLevelHasher testSeed
|> addBytes [0x42]
|> complete

hash == 0x8F9F_0A1E_E06F_0D52
hash == 0x38CE03D0E61AF963

expect
hash =
createLowLevelHasher testSeed
|> addU8 0x42
|> complete

hash == 0x8F9F_0A1E_E06F_0D52
hash == 0x38CE03D0E61AF963

expect
hash =
createLowLevelHasher testSeed
|> addBytes [0xFF, 0xFF]
|> complete

hash == 0x86CC_8B71_563F_F084
hash == 0xE1CB2FA0D6A64113

expect
hash =
createLowLevelHasher testSeed
|> addU16 0xFFFF
|> complete

hash == 0x86CC_8B71_563F_F084
hash == 0xE1CB2FA0D6A64113

expect
hash =
createLowLevelHasher testSeed
|> addBytes [0x36, 0xA7]
|> complete

hash == 0xD1A5_0F24_2536_84F8
hash == 0x26B8319EDAF81B15

expect
hash =
createLowLevelHasher testSeed
|> addU16 0xA736
|> complete

hash == 0xD1A5_0F24_2536_84F8
hash == 0x26B8319EDAF81B15

expect
hash =
createLowLevelHasher testSeed
|> addBytes [0x00, 0x00, 0x00, 0x00]
|> complete

hash == 0x3762_ACB1_7604_B541
hash == 0xA187D7CA074F9EE7

expect
hash =
createLowLevelHasher testSeed
|> addU32 0x0000_0000
|> complete

hash == 0x3762_ACB1_7604_B541
hash == 0xA187D7CA074F9EE7

expect
hash =
createLowLevelHasher testSeed
|> addBytes [0xA9, 0x2F, 0xEE, 0x21]
|> complete

hash == 0x20F3_3FD7_D32E_C7A9
hash == 0xA499EFE4C1454D09

expect
hash =
createLowLevelHasher testSeed
|> addU32 0x21EE_2FA9
|> complete

hash == 0x20F3_3FD7_D32E_C7A9
hash == 0xA499EFE4C1454D09

expect
hash =
createLowLevelHasher testSeed
|> addBytes [0x5D, 0x66, 0xB1, 0x8F, 0x68, 0x44, 0xC7, 0x03, 0xE1, 0xDD, 0x23, 0x34, 0xBB, 0x9A, 0x42, 0xA7]
|> complete

hash == 0xA16F_DDAA_C167_74C7
hash == 0xDD39A206AED64C73

expect
hash =
createLowLevelHasher testSeed
|> addU128 0xA742_9ABB_3423_DDE1_03C7_4468_8FB1_665D
|> complete

hash == 0xA16F_DDAA_C167_74C7
hash == 0xDD39A206AED64C73

expect
hash =
createLowLevelHasher testSeed
|> Hash.hashStrBytes "abcdefghijklmnopqrstuvwxyz"
|> complete

hash == 0xBEE0_A8FD_E990_D285
hash == 0x51C59DF5B1D15F40

expect
hash =
createLowLevelHasher testSeed
|> Hash.hashStrBytes "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
|> complete

hash == 0xB3C5_8528_9D82_A6EF
hash == 0xD8D0A129D97A4E95

expect
hash =
createLowLevelHasher testSeed
|> Hash.hashStrBytes "1234567890123456789012345678901234567890123456789012345678901234567890"
|> complete

hash == 0xDB6B_7997_7A55_BA03
hash == 0x8188065B44FB4AAA

expect
hash =
createLowLevelHasher testSeed
|> addBytes (List.repeat 0x77 100)
|> complete

hash == 0x171F_EEE2_B764_8E5E
hash == 0x47A2A606EADF3378

# Note, had to specify u8 in the lists below to avoid ability type resolution error.
# Apparently it won't pick the default integer.
Expand All @@ -1378,7 +1388,7 @@ expect
|> Hash.hashUnordered [8u8, 82u8, 3u8, 8u8, 24u8] List.walk
|> complete

hash == 0x999F_B530_3529_F17D
hash == 0xB2E8254C08F16B20

expect
hash1 =
Expand Down
38 changes: 19 additions & 19 deletions crates/compiler/test_mono/generated/dict.txt
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
procedure Dict.1 (Dict.556):
let Dict.565 : List {[], []} = Array [];
let Dict.572 : U64 = 0i64;
let Dict.573 : U64 = 8i64;
let Dict.566 : List U64 = CallByName List.11 Dict.572 Dict.573;
let Dict.569 : I8 = CallByName Dict.39;
let Dict.570 : U64 = 8i64;
let Dict.567 : List I8 = CallByName List.11 Dict.569 Dict.570;
let Dict.568 : U64 = 0i64;
let Dict.564 : {List {[], []}, List U64, List I8, U64} = Struct {Dict.565, Dict.566, Dict.567, Dict.568};
ret Dict.564;
procedure Dict.1 (Dict.554):
let Dict.563 : List {[], []} = Array [];
let Dict.570 : U64 = 0i64;
let Dict.571 : U64 = 8i64;
let Dict.564 : List U64 = CallByName List.11 Dict.570 Dict.571;
let Dict.567 : I8 = CallByName Dict.39;
let Dict.568 : U64 = 8i64;
let Dict.565 : List I8 = CallByName List.11 Dict.567 Dict.568;
let Dict.566 : U64 = 0i64;
let Dict.562 : {List {[], []}, List U64, List I8, U64} = Struct {Dict.563, Dict.564, Dict.565, Dict.566};
ret Dict.562;

procedure Dict.39 ():
let Dict.571 : I8 = -128i64;
ret Dict.571;
let Dict.569 : I8 = -128i64;
ret Dict.569;

procedure Dict.4 (Dict.562):
let Dict.100 : U64 = StructAtIndex 3 Dict.562;
let #Derived_gen.8 : List {[], []} = StructAtIndex 0 Dict.562;
procedure Dict.4 (Dict.560):
let Dict.101 : U64 = StructAtIndex 3 Dict.560;
let #Derived_gen.8 : List {[], []} = StructAtIndex 0 Dict.560;
dec #Derived_gen.8;
let #Derived_gen.7 : List U64 = StructAtIndex 1 Dict.562;
let #Derived_gen.7 : List U64 = StructAtIndex 1 Dict.560;
dec #Derived_gen.7;
let #Derived_gen.6 : List I8 = StructAtIndex 2 Dict.562;
let #Derived_gen.6 : List I8 = StructAtIndex 2 Dict.560;
dec #Derived_gen.6;
ret Dict.100;
ret Dict.101;

procedure List.11 (List.124, List.125):
let List.536 : List I8 = CallByName List.68 List.125;
Expand Down

0 comments on commit 4db62f7

Please sign in to comment.