bugfix for delayed arithmetic

sumiya11 · Jan 29, 2024 · de6aa66 · de6aa66
1 parent 9c0e087
commit de6aa66
Show file tree

Hide file tree

Showing 7 changed files with 306 additions and 11 deletions.
diff --git a/experimental/hashtable/benchmark.jl b/experimental/hashtable/benchmark.jl
@@ -0,0 +1,67 @@
+using Random, BenchmarkTools
+
+include("common.jl")
+include("hashtable-1.jl")
+include("hashtable-2.jl")
+
+n = 10
+sz = 2^16
+
+function setup_random_monoms(n, d, s; T=UInt8)
+    monoms = Vector{Vector{T}}(undef, s)
+    for i in 1:s
+        monoms[i] = rand(T(0):T(d), n)
+    end
+    monoms
+end
+
+function setup_1(n, sz, s)
+    ht = hashtable_initialize1(n, Random.MersenneTwister(42), Vector{UInt8}, sz)
+    monoms = setup_random_monoms(n, 5, s)
+    ht, monoms
+end
+
+function setup_2(n, sz, s)
+    ht = hashtable_initialize1(n, Random.MersenneTwister(42), Vector{UInt8}, sz)
+    monoms = setup_random_monoms(n, 5, s)
+    for i in 1:length(monoms)
+        hashtable_insert!(ht, monoms[i])
+    end
+    monoms2 = setup_random_monoms(n, 5, s)
+    for j in 1:length(monoms)
+        if iszero(j % 1_000)
+            monoms[j] = monoms2[j]
+        end
+    end
+    ht, monoms
+end
+
+# Inserts, almost no collisions
+for k in 8:16
+    sz = 2^k
+    s = div(sz, 2)
+    @info "n = $n, sz = 2^$k, s = $s"
+
+    @btime begin
+        for i in 1:($s)
+            hashtable_insert!(ht, monoms[i])
+        end
+    end setup = begin
+        ht, monoms = setup_1($n, $sz, $s)
+    end
+end
+
+# Inserts, almost all are hits
+for k in 8:16
+    sz = 2^k
+    s = div(sz, 2)
+    @info "n = $n, sz = 2^$k, s = $s"
+
+    @btime begin
+        for i in 1:($s)
+            hashtable_insert!(ht, monoms[i])
+        end
+    end setup = begin
+        ht, monoms = setup_2($n, $sz, $s)
+    end
+end
diff --git a/experimental/hashtable/common.jl b/experimental/hashtable/common.jl
@@ -0,0 +1,7 @@
+
+const Monom = Vector{UInt8}
+
+# The idenfifier of a monomial. This idenfifier is guaranteed to be unique
+# within a particular hashtable. This allows one to use this idenfifier when
+# working with monomials
+const MonomId = Int32
diff --git a/experimental/hashtable/hashtable-1.jl b/experimental/hashtable/hashtable-1.jl
@@ -0,0 +1,203 @@
+
+# Hash of a monomial in the hashtable
+const MonomHash = UInt32
+
+# Hashvalue1 of a single monomial
+struct Hashvalue1
+    # index of the monomial in the F4 matrix (defaults to NON_PIVOT_COLUMN, or 0),
+    idx::Int32
+    # hash of the monomial,
+    hash::MonomHash
+    # total degree of the monomial
+    deg::MonomHash
+end
+
+# Hashtable implements open addressing with linear scan.
+mutable struct MonomialHashtable1{M <: Monom}
+    #= Data =#
+    monoms::Vector{M}
+    # Maps monomial id to its position in the `monoms` array
+    hashtable::Vector{MonomId}
+    # Stores hashes, division masks, and other valuable info for each hashtable
+    # enrty
+    hashdata::Vector{Hashvalue1}
+    # Hash vector. Hash of a monomial is a dot product of the `hasher` vector
+    # and the monomial exponent vector
+    hasher::Vector{MonomHash}
+
+    #= Ring information =#
+    # number of variables
+    nvars::Int
+
+    # Hashtable size 
+    # (always a power of two)
+    # (always greater than 1)
+    size::Int
+    # Elements currently added
+    load::Int
+    offset::Int
+
+    # If the hashtable is frozen, any operation that tries to modify it will
+    # result in an error.
+    frozen::Bool
+end
+
+###
+# Initialization and resizing
+
+# Resize hashtable if load factor exceeds hashtable_resize_threshold. Load factor of a
+# hashtable must be smaller than hashtable_resize_threshold at any point of its
+# lifetime
+hashtable_resize_threshold() = 0.4
+hashtable_needs_resize(size, load, added) =
+    (load + added) / size > hashtable_resize_threshold()
+
+function hashtable_initialize1(
+    nvars,
+    rng::AbstractRNG,
+    MonomT::T,
+    initial_size::Int
+) where {T}
+    exponents = Vector{MonomT}(undef, initial_size)
+    hashdata = Vector{Hashvalue1}(undef, initial_size)
+    hashtable = zeros(MonomId, initial_size)
+
+    # initialize hashing vector
+    hasher = [rand(MonomHash) for i in 1:nvars]
+
+    # exponents[1:load] covers all stored exponents
+    # , also exponents[1] is [0, 0, ..., 0] by default
+    load = 1
+    @assert initial_size > 1
+    size = initial_size
+
+    # exponents array starts from index offset,
+    # We store buffer array at index 1
+    offset = 2
+
+    # first stored exponent used as buffer lately
+    exponents[1] = zeros(UInt8, nvars)
+
+    MonomialHashtable1(
+        exponents,
+        hashtable,
+        hashdata,
+        hasher,
+        nvars,
+        size,
+        load,
+        offset,
+        false
+    )
+end
+
+function hashtable_resize_if_needed!(ht::MonomialHashtable1, added::Int)
+    newsize = ht.size
+    while hashtable_needs_resize(newsize, ht.load, added)
+        newsize *= 2
+    end
+    newsize == ht.size && return nothing
+
+    ht.size = newsize
+
+    resize!(ht.hashdata, ht.size)
+    resize!(ht.monoms, ht.size)
+    resize!(ht.hashtable, ht.size)
+    @inbounds for i in 1:(ht.size)
+        ht.hashtable[i] = zero(MonomId)
+    end
+
+    mod = MonomHash(ht.size - 1)
+
+    @inbounds for i in (ht.offset):(ht.load)
+        # hash for this elem is already computed
+        he = ht.hashdata[i].hash
+        hidx = he
+        for j in MonomHash(0):MonomHash(ht.size)
+            hidx = hashtable_next_lookup_index(he, j, mod)
+            !iszero(ht.hashtable[hidx]) && continue
+            ht.hashtable[hidx] = i
+            break
+        end
+    end
+    nothing
+end
+
+###
+# Insertion of monomials
+
+# Returns the next look-up position in the table.
+# Must be within 1 <= ... <= mod+1
+function hashtable_next_lookup_index(h::MonomHash, j::MonomHash, mod::MonomHash)
+    ((h + j) & mod) + MonomHash(1)
+end
+
+# if hash collision happened
+function hashtable_is_hash_collision(ht::MonomialHashtable1, vidx, e, he)
+    # if not free and not same hash
+    @inbounds if ht.hashdata[vidx].hash != he
+        return true
+    end
+    # if not free and not same monomial
+    @inbounds if !(ht.monoms[vidx] == e)
+        return true
+    end
+    false
+end
+
+function monom_hash(x::Vector{T}, b::Vector{MH}) where {T, MH}
+    h = zero(MH)
+    @inbounds for i in eachindex(x, b)
+        h = h + MH(x[i]) * b[i]
+    end
+    mod(h, MonomHash)
+end
+
+function hashtable_insert!(ht::MonomialHashtable1{M}, e::M) where {M <: Monom}
+    # NOTE: trying to optimize for the case when the monomial is already in the
+    # table.
+    # NOTE: all of the functions called here are inlined. The only potential
+    # exception is monom_create_divmask
+
+    # generate hash
+    he = monom_hash(e, ht.hasher)
+
+    hsize = ht.size
+    mod = (hsize - 1) % MonomHash
+    hidx = hashtable_next_lookup_index(he, 0 % MonomHash, mod)
+    @inbounds vidx = ht.hashtable[hidx]
+
+    hit = !iszero(vidx)
+    @inbounds if hit && !hashtable_is_hash_collision(ht, vidx, e, he)
+        # Hit!
+        return vidx
+    end
+
+    # Miss or collision
+    i = 1 % MonomHash
+    mhhsize = hsize % MonomHash
+    @inbounds while hit && i < mhhsize
+        hidx = hashtable_next_lookup_index(he, i, mod)
+        vidx = ht.hashtable[hidx]
+
+        iszero(vidx) && break
+
+        if hashtable_is_hash_collision(ht, vidx, e, he)
+            i += (1 % MonomHash)
+            continue
+        end
+
+        # already present in hashtable
+        return vidx
+    end
+
+    # add monomial to hashtable
+    vidx = (ht.load + 1) % MonomId
+    @inbounds ht.hashtable[hidx] = vidx
+    @inbounds ht.monoms[vidx] = Base.copy(e)
+    @inbounds ht.hashdata[vidx] = Hashvalue1(0, he, e[1])
+
+    ht.load += 1
+
+    return vidx
+end
diff --git a/experimental/hashtable/hashtable-2.jl b/experimental/hashtable/hashtable-2.jl
@@ -0,0 +1 @@
+
diff --git a/src/arithmetic/Zp.jl b/src/arithmetic/Zp.jl
@@ -424,19 +424,22 @@ function select_arithmetic(
         # Use delayed modular arithmetic if the prime is one of the following:
         #   2^16 <= prime < 2^28
         #   2^8  <= prime < 2^12
-        #          prime < 2^4
+        #           prime < 2^4
         # The trade-offs are a bit shifted for large moduli that are > 2^32. It
         # looks like it rarely pays off to use delayed modular arithmetic in
         # such cases
-        if !(AccumType === UInt128) &&
-           4 < (leading_zeros(CoeffType(characteristic)) - (8 >> 1) * sizeof(AccumType))
-            return DelayedArithmeticZp(
-                AccumType,
-                CoeffType,
-                convert(AccumType, characteristic)
-            )
-        else
-            return SpecializedArithmeticZp(AccumType, CoeffType, characteristic)
+        if !(AccumType === UInt128)
+            if !using_wide_type_for_coeffs &&
+               leading_zeros(CoeffType(characteristic)) > 4 ||
+               using_wide_type_for_coeffs &&
+               ((8 * sizeof(CoeffType)) >> 1) -
+               (8 * sizeof(CoeffType) - leading_zeros(CoeffType(characteristic))) > 4
+                return DelayedArithmeticZp(
+                    AccumType,
+                    CoeffType,
+                    convert(AccumType, characteristic)
+                )
+            end
         end
     end
 

diff --git a/src/groebner/learn-apply.jl b/src/groebner/learn-apply.jl
@@ -205,6 +205,6 @@ function groebner_applyX!(
         ring, gb_monoms, gb_coeffs =
             dehomogenize_generators!(ring, gb_monoms, gb_coeffs, params)
     end
-    
+
     flag, gb_coeffs::Vector{Vector{UInt32}}
 end
diff --git a/test/arithmetic/Zp.jl b/test/arithmetic/Zp.jl
@@ -1,6 +1,20 @@
 import Primes
 
 @testset "arithmetic in Zp" begin
+    a = Groebner.select_arithmetic(UInt32, 2^31 - 1, :auto, false)
+    @test a isa Groebner.SpecializedArithmeticZp{UInt64, UInt32}
+    a = Groebner.select_arithmetic(UInt64, 2^31 - 1, :auto, true)
+    @test a isa Groebner.SpecializedArithmeticZp{UInt64, UInt64}
+
+    a = Groebner.select_arithmetic(UInt32, Primes.prevprime(2^27 - 1), :auto, false)
+    @test a isa Groebner.DelayedArithmeticZp{UInt64, UInt32}
+    a = Groebner.select_arithmetic(UInt64, Primes.prevprime(2^27 - 1), :auto, true)
+    @test a isa Groebner.DelayedArithmeticZp{UInt64, UInt64}
+    a = Groebner.select_arithmetic(UInt64, Primes.prevprime(2^28 - 1), :auto, true)
+    @test a isa Groebner.SpecializedArithmeticZp{UInt64, UInt64}
+    a = Groebner.select_arithmetic(UInt32, Primes.prevprime(2^28 - 1), :auto, false)
+    @test a isa Groebner.SpecializedArithmeticZp{UInt64, UInt32}
+
     m = UInt64(2^30 + 3)
     a = Groebner.DelayedArithmeticZp(UInt64, UInt64, m)
     @test Groebner.n_spare_bits(a) == 1