Skip to content

Commit

Permalink
Add support for lazy and low-storage operations
Browse files Browse the repository at this point in the history
  • Loading branch information
dennisYatunin committed Aug 16, 2024
1 parent 91a30eb commit 252917a
Show file tree
Hide file tree
Showing 11 changed files with 820 additions and 158 deletions.
7 changes: 7 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ version = "0.1.2"

[compat]
julia = "1.10"
StaticArrays = "1"

[weakdeps]
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

[extensions]
UnrolledUtilitiesStaticArraysExt = "StaticArrays"

[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Expand Down
6 changes: 6 additions & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,9 @@ unroll when your use case is similar to a row in the first category.
The table is also printed out by this package's unit tests, so these
measurements can be compared across different operating systems by checking the
[CI pipeline](https://github.com/CliMA/UnrolledUtilities.jl/actions/workflows/ci.yml).

## Interface
```@autodocs
Modules = [UnrolledUtilities]
Order = [:function, :type]
```
10 changes: 10 additions & 0 deletions ext/UnrolledUtilitiesStaticArraysExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module UnrolledUtilitiesStaticArraysExt

import UnrolledUtilities
import StaticArrays: SVector

UnrolledUtilities.length_from_type(::Type{<:SVector{N}}) where {N} = N
UnrolledUtilities.target_output_type(::SVector) = SVector
UnrolledUtilities.output_constructor(::Type{SVector}) = SVector

end
148 changes: 148 additions & 0 deletions src/BitSequence.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""
BitSequence{N, [U]}(f)
BitSequence{N, [U]}([bit])
A statically-sized analogue of `BitVector` with `Unsigned` chunks of type `U`,
which can be constructed using either a function `f(n)` or a constant `bit`. By
default, `U` is set to `UInt8` and `bit` is set to `false`.
Efficient methods are provided for `unrolled_map`, `unrolled_accumulate`,
`unrolled_take`, and `unrolled_drop`, though the methods for `unrolled_map` and
`unrolled_accumulate` only apply when their outputs consist of `Bool`s. All
other unrolled functions that need to construct non-empty iterators convert
`BitSequence`s into `Tuple`s.
"""
struct BitSequence{N, U <: Unsigned, I <: NTuple{<:Any, U}} <: StaticSequence{N}
ints::I
end
BitSequence{N, U}(ints::I) where {N, U <: Unsigned, I <: NTuple{<:Any, U}} =
BitSequence{N, U, I}(ints)
BitSequence{N}(args...) where {N} = BitSequence{N, UInt8}(args...)

function BitSequence{N, U}(bit::Bool = false) where {N, U}
n_bits_per_int = 8 * sizeof(U)
n_ints = cld(N, n_bits_per_int)
int = bit ? ~zero(U) : zero(U)
ints = ntuple(_ -> int, Val(n_ints))
return BitSequence{N, U}(ints)
end

function BitSequence{N, U}(f) where {N, U}
n_bits_per_int = 8 * sizeof(U)
n_ints = cld(N, n_bits_per_int)
ints = ntuple(Val(n_ints)) do int_index
first_index = n_bits_per_int * (int_index - 1) + 1
unrolled_reduce(
LazySequence{n_bits_per_int}(0);
init = zero(U),
) do int, bit_offset
int | U(f(first_index + bit_offset)::Bool) << bit_offset
end
end
return BitSequence{N, U}(ints)
end

target_output_type(::BitSequence{<:Any, U}) where {U} = BitSequence{<:Any, U}

output_promote_rule(::Type{B}, ::Type{O}) where {B <: BitSequence, O} = O
output_promote_rule(::Type{B}, ::Type{Tuple}) where {B <: BitSequence} = Tuple
output_promote_rule(::Type{B}, ::Type{LazySequence}) where {B <: BitSequence} =
B

eltype_restriction(::Type{<:BitSequence}) = Bool

empty_output(::Type{BitSequence{<:Any, U}}) where {U} = BitSequence{0, U}()

@inline function unrolled_map_into_target(
::Type{BitSequence{<:Any, U}},
f,
itrs...,
) where {U}
lazy_itr = lazy_map(f, itrs...)
N = inferred_length(lazy_itr)
return BitSequence{N, U}(Base.Fix1(getindex, lazy_itr))
end

@inline function unrolled_accumulate_into_target(
::Type{BitSequence{<:Any, U}},
op,
itr,
init,
transform,
) where {U}
N = inferred_length(itr)
(N == 0 && init isa NoInit) &&
error("unrolled_accumulate requires an init value for empty iterators")
n_bits_per_int = 8 * sizeof(U)
n_ints = cld(N, n_bits_per_int)
ints = unrolled_accumulate_into_tuple(
LazySequence{n_ints}();
init = (nothing, init),
transform = first,
) do (_, init_value_for_new_int), int_index
first_index = n_bits_per_int * (int_index - 1) + 1
unrolled_reduce(
LazySequence{n_bits_per_int}(0);
init = (zero(U), init_value_for_new_int),
) do (int, prev_value), bit_offset
item = itr[first_index + bit_offset]
new_value =
first_index + bit_offset == 1 && prev_value isa NoInit ?
item : op(prev_value, item)
(int | U(transform(new_value)::Bool) << bit_offset, new_value)
end
end
return BitSequence{N, U}(ints)
end

@inline function unrolled_take(
itr::BitSequence{<:Any, U},
::Val{N},
) where {N, U}
n_bits_per_int = 8 * sizeof(U)
n_ints = cld(N, n_bits_per_int)
ints = unrolled_take(itr.ints, Val(n_ints))
return BitSequence{N, U}(ints)
end

@inline function unrolled_drop(
itr::BitSequence{N_old, U},
::Val{N},
) where {N_old, N, U}
n_bits_per_int = 8 * sizeof(U)
n_ints = cld(N_old - N, n_bits_per_int)
n_dropped_ints = length(itr.ints) - n_ints
bit_offset = N - n_bits_per_int * n_dropped_ints
ints_without_offset = unrolled_drop(itr.ints, Val(n_dropped_ints))
ints = if bit_offset == 0
ints_without_offset
else
cur_ints = ints_without_offset
next_ints = unrolled_push(unrolled_drop(cur_ints, Val(1)), nothing)
unrolled_map_into_tuple(cur_ints, next_ints) do cur_int, next_int
isnothing(next_int) ? cur_int >> bit_offset :
cur_int >> bit_offset | next_int << (n_bits_per_int - bit_offset)
end
end
return BitSequence{N_old - N, U}(ints)
end

@inline function int_index_and_bit_offset(itr, n)
int_offset, bit_offset = divrem(n - 1, 8 * sizeof(eltype(itr.ints)))
return (int_offset + 1, bit_offset)
end

@inline function Base.getindex(itr::BitSequence, n::Integer)
int_index, bit_offset = int_index_and_bit_offset(itr, n)
int = itr.ints[int_index]
return Bool(int >> bit_offset & one(int))
end

@inline function Base.setindex(itr::BitSequence, bit::Bool, n::Integer)
int_index, bit_offset = int_index_and_bit_offset(itr, n)
int = itr.ints[int_index]
int′ = int & ~(one(int) << bit_offset) | typeof(int)(bit) << bit_offset
return typeof(itr)(Base.setindex(itr.ints, int′, int_index))
end

@inline Base.eltype(::BitSequence) = Bool
43 changes: 43 additions & 0 deletions src/LazyMap.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""
LazyMap(f, itrs...)
A lazy and statically-sized analogue of a `Base.AbstractBroadcasted` object
whose values and `output_type` are consistent with `unrolled_map(f, itrs...)`.
Efficient methods are provided for `unrolled_take` and `unrolled_drop`. All
other unrolled functions that need to construct non-empty iterators convert
`LazyMap`s into their `output_type`s.
"""
struct LazyMap{N, F, I} <: StaticSequence{N}
f::F
itrs::I
end
LazyMap{N}(f, itrs...) where {N} = LazyMap{N, typeof(f), typeof(itrs)}(f, itrs)

target_output_type(itr::LazyMap) = output_type_of_map(itr.f, itr.itrs...)

@inline unrolled_fix2(f, arg, itrs) =
unrolled_map_into_tuple(Base.Fix2(f, arg), itrs)

@inline unrolled_take(itr::LazyMap, ::Val{N}) where {N} =
LazyMap{N}(itr.f, unrolled_fix2(unrolled_take, Val(N), itr.itrs)...)

@inline unrolled_drop(itr::LazyMap{N_old}, ::Val{N}) where {N_old, N} =
LazyMap{N_old - N}(itr.f, unrolled_fix2(unrolled_drop, Val(N), itr.itrs)...)

@inline Base.getindex(itr::LazyMap, n::Integer) =
itr.f(unrolled_fix2(getindex, n, itr.itrs)...)

@inline Base.eltype(itr::LazyMap) =
result_type(itr.f, unrolled_map_into_tuple(eltype, itr.itrs)...)

################################################################################

@inline lazy_map(f, itr) = LazyMap{inferred_length(itr)}(f, itr)
@inline lazy_map(f, itrs...) = LazyMap{minimum_length(itrs...)}(f, itrs...)
# The first method lets us avoid an infinite recursion through minimum_length.

@inline lazy_zip(itrs...) = lazy_map(tuple, itrs...)

@inline lazy_enumerate(itrs...) =
lazy_zip(LazySequence{minimum_length(itrs...)}(), itrs...)
34 changes: 34 additions & 0 deletions src/LazySequence.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
LazySequence{N}(f)
LazySequence{N}([start])
A lazy analogue of `ntuple(f, Val(N))`, or a lazy and statically-sized analogue
of `start:(start - 1 + N)`. By default, `start` is set to 1.
Efficient methods are provided for `unrolled_take` and `unrolled_drop`. All
other unrolled functions that need to construct non-empty iterators convert
`LazySequence`s into `Tuple`s.
"""
struct LazySequence{N, F} <: StaticSequence{N}
f::F
end
LazySequence{N}(f = identity) where {N} = LazySequence{N, typeof(f)}(f)
LazySequence{N}(start::Number) where {N} =
LazySequence{N}(Base.Fix1(+, start - one(start)))

target_output_type(::LazySequence) = LazySequence

output_promote_rule(::Type{LazySequence}, ::Type{O}) where {O} = O
output_promote_rule(::Type{LazySequence}, ::Type{Tuple}) = Tuple

empty_output(::Type{LazySequence}) = LazySequence{0}()

@inline unrolled_take(itr::LazySequence, ::Val{N}) where {N} =
LazySequence{N}(itr.f)

@inline unrolled_drop(itr::LazySequence{N_old}, ::Val{N}) where {N_old, N} =
LazySequence{N_old - N}(n -> itr.f(n + N))

@inline Base.getindex(itr::LazySequence, n::Integer) = itr.f(n)

@inline Base.eltype(itr::LazySequence) = result_type(itr.f, Int)
Loading

0 comments on commit 252917a

Please sign in to comment.