From ed9d9b12f51fd0d84cb947e31139bffb156caab6 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 27 Jun 2023 11:38:38 -0400 Subject: [PATCH 1/3] Add some missing Dict and Set functions Also remove some unnecessary Hash and Eq restrictions --- crates/compiler/builtins/roc/Dict.roc | 43 ++++++++++++++++++-- crates/compiler/builtins/roc/List.roc | 3 ++ crates/compiler/builtins/roc/Set.roc | 57 ++++++++++++++++++++++++++- crates/compiler/module/src/symbol.rs | 6 +++ 4 files changed, 104 insertions(+), 5 deletions(-) diff --git a/crates/compiler/builtins/roc/Dict.roc b/crates/compiler/builtins/roc/Dict.roc index 097b0942109..180033b0086 100644 --- a/crates/compiler/builtins/roc/Dict.roc +++ b/crates/compiler/builtins/roc/Dict.roc @@ -7,6 +7,7 @@ interface Dict clear, capacity, len, + isEmpty, get, contains, insert, @@ -21,6 +22,8 @@ interface Dict insertAll, keepShared, removeAll, + map, + joinMap, ] imports [ Bool.{ Bool, Eq }, @@ -139,12 +142,12 @@ empty = \{} -> ## Returns the max number of elements the dictionary can hold before requiring a rehash. ## ``` ## foodDict = -## Dict.empty {} -## |> Dict.insert "apple" "fruit" +## Dict.empty {} +## |> Dict.insert "apple" "fruit" ## ## capacityOfDict = Dict.capacity foodDict ## ``` -capacity : Dict k v -> Nat | k has Hash & Eq +capacity : Dict * * -> Nat capacity = \@Dict { dataIndices } -> cap = List.len dataIndices @@ -192,10 +195,20 @@ fromList = \data -> ## |> Dict.len ## |> Bool.isEq 3 ## ``` -len : Dict k v -> Nat | k has Hash & Eq +len : Dict * * -> Nat len = \@Dict { size } -> size +## Check if the dictinoary is empty. +## ``` +## Dict.isEmpty (Dict.empty {} |> Dict.insert "key" 42) +## +## Dict.isEmpty (Dict.empty {}) +## ``` +isEmpty : Dict * * -> Bool +isEmpty = \@Dict { size } -> + size == 0 + ## Clears all elements from a dictionary keeping around the allocation if it isn't huge. ## ``` ## songs = @@ -225,6 +238,28 @@ clear = \@Dict { metadata, dataIndices, data } -> size: 0, } +## Convert each value in the dictionary to something new, by calling a conversion +## function on each of them which receives both the key and the old value. Then return a +## new dictionary containing the same keys and the converted values. +map : Dict k a, (k, a -> b) -> Dict k b | k has Hash & Eq, b has Hash & Eq +map = \dict, transform -> + init = withCapacity (capacity dict) + + walk dict init \answer, k, v -> + insert answer k (transform k v) + +## Like [Dict.map], except the transformation function wraps the return value +## in a dictionary. At the end, all the dictionaries get joined together +## (using [Dict.insertAll]) into one dictionary. +## +## You may know a similar function named `concatMap` in other languages. +joinMap : Dict a b, (a, b -> Dict x y) -> Dict x y | a has Hash & Eq, x has Hash & Eq +joinMap = \dict, transform -> + init = withCapacity (capacity dict) # Might be a pessimization + + walk dict init \answer, k, v -> + insertAll answer (transform k v) + ## Iterate through the keys and values in the dictionary and call the provided ## function with signature `state, k, v -> state` for each value, with an ## initial `state` value provided for the first call. diff --git a/crates/compiler/builtins/roc/List.roc b/crates/compiler/builtins/roc/List.roc index c9ed2875050..76447c10169 100644 --- a/crates/compiler/builtins/roc/List.roc +++ b/crates/compiler/builtins/roc/List.roc @@ -208,6 +208,9 @@ interface List ## * Even when copying is faster, other list operations may still be slightly slower with persistent data structures. For example, even if it were a persistent data structure, [List.map], [List.walk], and [List.keepIf] would all need to traverse every element in the list and build up the result from scratch. These operations are all ## * Roc's compiler optimizes many list operations into in-place mutations behind the scenes, depending on how the list is being used. For example, [List.map], [List.keepIf], and [List.set] can all be optimized to perform in-place mutations. ## * If possible, it is usually best for performance to use large lists in a way where the optimizer can turn them into in-place mutations. If this is not possible, a persistent data structure might be faster - but this is a rare enough scenario that it would not be good for the average Roc program's performance if this were the way [List] worked by default. Instead, you can look outside Roc's standard modules for an implementation of a persistent data structure - likely built using [List] under the hood! + +# separator so List.isEmpty doesn't absorb the above into its doc comment + ## Check if the list is empty. ## ``` ## List.isEmpty [1, 2, 3] diff --git a/crates/compiler/builtins/roc/Set.roc b/crates/compiler/builtins/roc/Set.roc index 24c54df999f..f8b2072d0d4 100644 --- a/crates/compiler/builtins/roc/Set.roc +++ b/crates/compiler/builtins/roc/Set.roc @@ -7,6 +7,8 @@ interface Set walkUntil, insert, len, + isEmpty, + capacity, remove, contains, toList, @@ -14,6 +16,8 @@ interface Set union, intersection, difference, + map, + joinMap, ] imports [ List, @@ -59,6 +63,13 @@ hashSet = \hasher, @Set inner -> Hash.hash hasher inner empty : {} -> Set k | k has Hash & Eq empty = \{} -> @Set (Dict.empty {}) +## Return a dictionary with space allocated for a number of entries. This +## may provide a performance optimization if you know how many entries will be +## inserted. +withCapacity : Nat -> Set k | k has Hash & Eq +withCapacity = \cap -> + @Set (Dict.withCapacity cap) + ## Creates a new `Set` with a single value. ## ``` ## singleItemSet = Set.single "Apple" @@ -115,10 +126,32 @@ expect ## ## expect countValues == 3 ## ``` -len : Set k -> Nat | k has Hash & Eq +len : Set * -> Nat len = \@Set dict -> Dict.len dict +## Returns the max number of elements the set can hold before requiring a rehash. +## ``` +## foodSet = +## Set.empty {} +## |> Set.insert "apple" +## +## capacityOfSet = Set.capacity foodSet +## ``` +capacity : Set * -> Nat +capacity = \@Set dict -> + Dict.capacity dict + +## Check if the set is empty. +## ``` +## Set.isEmpty (Set.empty {} |> Set.insert 42) +## +## Set.isEmpty (Set.empty {}) +## ``` +isEmpty : Set * -> Bool +isEmpty = \@Set dict -> + Dict.isEmpty dict + # Inserting a duplicate key has no effect on length. expect actual = @@ -261,6 +294,28 @@ walk : Set k, state, (state, k -> state) -> state | k has Hash & Eq walk = \@Set dict, state, step -> Dict.walk dict state (\s, k, _ -> step s k) +## Convert each value in the set to something new, by calling a conversion +## function on each of them which receives the old value. Then return a +## new set containing the converted values. +map : Set a, (a -> b) -> Set b | a has Hash & Eq, b has Hash & Eq +map = \set, transform -> + init = withCapacity (capacity set) + + walk set init \answer, k -> + insert answer (transform k) + +## Like [Set.map], except the transformation function wraps the return value +## in a set. At the end, all the sets get joined together +## (using [Set.union]) into one set. +## +## You may know a similar function named `concatMap` in other languages. +joinMap : Set a, (a -> Set b) -> Set b | a has Hash & Eq, b has Hash & Eq +joinMap = \set, transform -> + init = withCapacity (capacity set) # Might be a pessimization + + walk set init \answer, k -> + union answer (transform k) + ## Iterate through the values of a given `Set` and build a value, can stop ## iterating part way through the collection. ## ``` diff --git a/crates/compiler/module/src/symbol.rs b/crates/compiler/module/src/symbol.rs index 474397654c8..89acf366531 100644 --- a/crates/compiler/module/src/symbol.rs +++ b/crates/compiler/module/src/symbol.rs @@ -1471,6 +1471,9 @@ define_builtins! { 22 DICT_LIST_GET_UNSAFE: "listGetUnsafe" 23 DICT_PSEUDO_SEED: "pseudoSeed" + 24 DICT_IS_EMPTY: "isEmpty" + 25 DICT_MAP: "map" + 26 DICT_JOINMAP: "joinMap" } 9 SET: "Set" => { 0 SET_SET: "Set" exposed_type=true // the Set.Set type alias @@ -1490,6 +1493,9 @@ define_builtins! { 14 SET_CONTAINS: "contains" 15 SET_TO_DICT: "toDict" 16 SET_CAPACITY: "capacity" + 17 SET_IS_EMPTY: "isEmpty" + 18 SET_MAP: "map" + 19 SET_JOIN_MAP: "joinMap" } 10 BOX: "Box" => { 0 BOX_BOX_TYPE: "Box" exposed_apply_type=true // the Box.Box opaque type From 437fb7e46c04d6ff280e8b34ea03bfdd49e7f6a8 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 27 Jun 2023 14:53:48 -0400 Subject: [PATCH 2/3] Update mono tests --- crates/compiler/test_mono/generated/dict.txt | 40 ++++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/crates/compiler/test_mono/generated/dict.txt b/crates/compiler/test_mono/generated/dict.txt index faed8e7fc74..1cb0f5bc2b1 100644 --- a/crates/compiler/test_mono/generated/dict.txt +++ b/crates/compiler/test_mono/generated/dict.txt @@ -1,28 +1,28 @@ -procedure Dict.1 (Dict.537): - let Dict.546 : List {[], []} = Array []; - let Dict.553 : U64 = 0i64; - let Dict.554 : U64 = 8i64; - let Dict.547 : List U64 = CallByName List.11 Dict.553 Dict.554; - let Dict.550 : I8 = CallByName Dict.36; - let Dict.551 : U64 = 8i64; - let Dict.548 : List I8 = CallByName List.11 Dict.550 Dict.551; - let Dict.549 : U64 = 0i64; - let Dict.545 : {List {[], []}, List U64, List I8, U64} = Struct {Dict.546, Dict.547, Dict.548, Dict.549}; - ret Dict.545; +procedure Dict.1 (Dict.556): + let Dict.565 : List {[], []} = Array []; + let Dict.572 : U64 = 0i64; + let Dict.573 : U64 = 8i64; + let Dict.566 : List U64 = CallByName List.11 Dict.572 Dict.573; + let Dict.569 : I8 = CallByName Dict.39; + let Dict.570 : U64 = 8i64; + let Dict.567 : List I8 = CallByName List.11 Dict.569 Dict.570; + let Dict.568 : U64 = 0i64; + let Dict.564 : {List {[], []}, List U64, List I8, U64} = Struct {Dict.565, Dict.566, Dict.567, Dict.568}; + ret Dict.564; -procedure Dict.36 (): - let Dict.552 : I8 = -128i64; - ret Dict.552; +procedure Dict.39 (): + let Dict.571 : I8 = -128i64; + ret Dict.571; -procedure Dict.4 (Dict.543): - let Dict.97 : U64 = StructAtIndex 3 Dict.543; - let #Derived_gen.8 : List {[], []} = StructAtIndex 0 Dict.543; +procedure Dict.4 (Dict.562): + let Dict.100 : U64 = StructAtIndex 3 Dict.562; + let #Derived_gen.8 : List {[], []} = StructAtIndex 0 Dict.562; dec #Derived_gen.8; - let #Derived_gen.7 : List U64 = StructAtIndex 1 Dict.543; + let #Derived_gen.7 : List U64 = StructAtIndex 1 Dict.562; dec #Derived_gen.7; - let #Derived_gen.6 : List I8 = StructAtIndex 2 Dict.543; + let #Derived_gen.6 : List I8 = StructAtIndex 2 Dict.562; dec #Derived_gen.6; - ret Dict.97; + ret Dict.100; procedure List.11 (List.121, List.122): let List.522 : List I8 = CallByName List.68 List.122; From f12b378249916aaef141c58d182ada07a456b632 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 27 Jun 2023 16:20:27 -0400 Subject: [PATCH 3/3] cargo uitest --- .../uitest/tests/ability/specialize/set_eq_issue_4761.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/compiler/uitest/tests/ability/specialize/set_eq_issue_4761.txt b/crates/compiler/uitest/tests/ability/specialize/set_eq_issue_4761.txt index 1387893fab3..b8fa24db29a 100644 --- a/crates/compiler/uitest/tests/ability/specialize/set_eq_issue_4761.txt +++ b/crates/compiler/uitest/tests/ability/specialize/set_eq_issue_4761.txt @@ -8,5 +8,5 @@ main = s2 = Set.empty {} Bool.isEq s1 s1 && Bool.isEq s2 s2 -# ^^^^^^^^^ Set#Bool.isEq(17): Set Str, Set Str -[[Set.isEq(17)]]-> Bool -# ^^^^^^^^^ Set#Bool.isEq(17): Set U8, Set U8 -[[Set.isEq(17)]]-> Bool +# ^^^^^^^^^ Set#Bool.isEq(20): Set Str, Set Str -[[Set.isEq(20)]]-> Bool +# ^^^^^^^^^ Set#Bool.isEq(20): Set U8, Set U8 -[[Set.isEq(20)]]-> Bool