add a couple of assumptions

sumiya11 · Jan 22, 2024 · d5452b0 · d5452b0
1 parent 804f695
commit d5452b0
Show file tree

Hide file tree

Showing 12 changed files with 201 additions and 77 deletions.
diff --git a/experimental/benchmarks/standalone_f4.jl b/experimental/benchmarks/standalone_f4.jl
@@ -13,27 +13,26 @@ Groebner.invariants_enabled() = false
 BenchmarkTools.DEFAULT_PARAMETERS.samples = 3
 
 Groebner.logging_enabled() = false
-Groebner.invariants_enabled() = true
+Groebner.invariants_enabled() = false
 
 function benchmark_system_my(system)
-    system = Groebner.change_ordering(system, :degrevlex)
     Groebner.groebner([system[1]])
 
     @btime Groebner.groebner($system)
 end
 
 function run_f4_ff_degrevlex_benchmarks(ground)
     systems = [
-        ("cyclic 7", reverse(Groebner.cyclicn(7, k=ground))),
-        ("cyclic 8", reverse(Groebner.cyclicn(8, k=ground))),
-        ("katsura 9", reverse(Groebner.katsuran(9, k=ground))),
-        ("katsura 10", reverse(Groebner.katsuran(10, k=ground))),
-        ("katsura 11", reverse(Groebner.katsuran(11, k=ground))),
-        ("noon 7", Groebner.noonn(7, k=ground)),
-        ("noon 8", Groebner.noonn(8, k=ground)),
-        # ("noon 9", Groebner.noonn(9, k=ground)),
-        ("eco 10", Groebner.eco10(k=ground)),
-        ("eco 11", Groebner.eco11(k=ground))
+        ("cyclic 7", reverse(Groebner.cyclicn(7, k=ground, ordering=:degrevlex))),
+        ("cyclic 8", reverse(Groebner.cyclicn(8, k=ground, ordering=:degrevlex))),
+        ("katsura 9", reverse(Groebner.katsuran(9, k=ground, ordering=:degrevlex))),
+        ("katsura 10", reverse(Groebner.katsuran(10, k=ground, ordering=:degrevlex))),
+        ("katsura 11", reverse(Groebner.katsuran(11, k=ground, ordering=:degrevlex))),
+        ("noon 7", Groebner.noonn(7, k=ground, ordering=:degrevlex)),
+        ("noon 8", Groebner.noonn(8, k=ground, ordering=:degrevlex)),
+        # ("noon 9", Groebner.noonn(9, k=ground, ordering=:degrevlex)),
+        ("eco 10", Groebner.eco10(k=ground, ordering=:degrevlex)),
+        ("eco 11", Groebner.eco11(k=ground, ordering=:degrevlex))
     ]
 
     for (name, system) in systems
@@ -42,38 +41,36 @@ function run_f4_ff_degrevlex_benchmarks(ground)
     end
 end
 
-p1 = 2^31 - 1
-p2 = 2^29 + 11
-p3 = 2^27 + 29
-s = Groebner.katsuran(11, ordering=:degrevlex, k=AbstractAlgebra.GF(p3))
+p1 = 2^30 + 3
+s = Groebner.katsuran(11, ordering=:degrevlex, k=AbstractAlgebra.GF(p1))
 
-@profview gb1 = Groebner.groebner(s);
+# @profview gb1 = Groebner.groebner(s);
 
 println()
-ground = AbstractAlgebra.GF(1048583)
+ground = AbstractAlgebra.GF(2^30 + 3)
 run_f4_ff_degrevlex_benchmarks(ground)
 
 #=
 
 gf:
 
 cyclic 7
-  95.218 ms (58110 allocations: 51.84 MiB)
+  80.674 ms (32571 allocations: 39.23 MiB)
 cyclic 8
-  1.415 s (268517 allocations: 278.05 MiB)
+  1.078 s (158547 allocations: 162.62 MiB)
 katsura 9
-  155.376 ms (75093 allocations: 50.72 MiB)
+  126.103 ms (34041 allocations: 43.78 MiB)
 katsura 10
-  943.326 ms (204033 allocations: 173.79 MiB)
+  708.583 ms (86954 allocations: 147.93 MiB)
 katsura 11
-  6.671 s (525176 allocations: 612.59 MiB)
+  4.486 s (217870 allocations: 518.37 MiB)
 noon 7
-  194.497 ms (209483 allocations: 74.25 MiB)
+  127.639 ms (92085 allocations: 51.93 MiB)
 noon 8
-  1.327 s (950722 allocations: 378.90 MiB)
+  1.097 s (429005 allocations: 283.11 MiB)
 eco 10
-  66.734 ms (72768 allocations: 38.28 MiB)
+  56.632 ms (37665 allocations: 29.22 MiB)
 eco 11
-  320.164 ms (185723 allocations: 97.99 MiB)
+  334.697 ms (125935 allocations: 83.94 MiB)
 
 =#
diff --git a/experimental/benchmarks/standalone_f4_large.jl b/experimental/benchmarks/standalone_f4_large.jl
@@ -1,5 +1,5 @@
 
-using Groebner
+# using Groebner
 
 import AbstractAlgebra
 using BenchmarkTools
@@ -10,12 +10,12 @@ Groebner.logging_enabled() = false
 Groebner.invariants_enabled() = false
 
 function benchmark_system_my(system)
-    system = Groebner.change_ordering(system, :degrevlex)
+    # system = Groebner.change_ordering(system, :degrevlex)
 
     Groebner.groebner(system)
 
     t = Inf
-    for i in 1:3
+    for i in 1:5
         timing = @timed Groebner.groebner(system)
         t = min(t, timing.time)
     end
@@ -24,27 +24,89 @@ end
 
 function run_f4_ff_degrevlex_benchmarks(ground)
     systems = [
-        ("cyclic 7", Groebner.cyclicn(7, k=ground)),
-        ("cyclic 8", Groebner.cyclicn(8, k=ground)),
-        ("cyclic 9", Groebner.cyclicn(9, k=ground)),
-        ("katsura 9", Groebner.katsuran(9, k=ground)),
-        ("katsura 10", Groebner.katsuran(10, k=ground)),
-        ("katsura 11", Groebner.katsuran(11, k=ground)),
-        ("katsura 12", Groebner.katsuran(12, k=ground)),
-        ("noon 7", Groebner.noonn(7, k=ground)),
-        ("noon 8", Groebner.noonn(8, k=ground)),
-        ("noon 9", Groebner.noonn(9, k=ground)),
-        ("eco 11", Groebner.eco11(k=ground)),
-        ("eco 12", Groebner.eco12(k=ground)),
-        ("eco 12", Groebner.eco13(k=ground))
+        ("cyclic 7", Groebner.cyclicn(7, k=ground, ordering=:degrevlex)),
+        ("cyclic 8", Groebner.cyclicn(8, k=ground, ordering=:degrevlex)),
+        # ("cyclic 9", Groebner.cyclicn(9, k=ground, ordering=:degrevlex)),
+        ("katsura 9", Groebner.katsuran(9, k=ground, ordering=:degrevlex)),
+        ("katsura 10", Groebner.katsuran(10, k=ground, ordering=:degrevlex)),
+        ("katsura 11", Groebner.katsuran(11, k=ground, ordering=:degrevlex)),
+        # ("katsura 12", Groebner.katsuran(12, k=ground, ordering=:degrevlex)),
+        ("noon 7", Groebner.noonn(7, k=ground, ordering=:degrevlex)),
+        ("noon 8", Groebner.noonn(8, k=ground, ordering=:degrevlex)),
+        ("noon 9", Groebner.noonn(9, k=ground, ordering=:degrevlex)),
+        ("eco 11", Groebner.eco11(k=ground, ordering=:degrevlex)),
+        ("eco 12", Groebner.eco12(k=ground, ordering=:degrevlex)),
+        ("eco 13", Groebner.eco13(k=ground, ordering=:degrevlex))
     ]
 
     for (name, system) in systems
-        print("$name -- ")
+        print("$name\t--\t")
         t = benchmark_system_my(system)
         println("$t s")
     end
 end
 
 ground = AbstractAlgebra.GF(2^30 + 3)
 run_f4_ff_degrevlex_benchmarks(ground)
+
+#=
+inline      : -,
+assume Zp   : no
+assume empty: no 
+cyclic 7        --      0.0965658 s
+cyclic 8        --      1.4198005 s
+katsura 9       --      0.1470269 s
+katsura 10      --      0.8665618 s
+katsura 11      --      5.2572539 s
+noon 7  --      0.1516447 s
+noon 8  --      1.2835755 s
+noon 9  --      12.139458 s
+eco 11  --      0.3524228 s
+eco 12  --      2.1887455 s
+eco 13  --      9.0118321 s
+
+inline      : -,
+assume Zp   : yes
+assume empty: no 
+cyclic 7        --      0.0894644 s
+cyclic 8        --      1.2891848 s
+katsura 9       --      0.1435553 s
+katsura 10      --      0.8241746 s
+katsura 11      --      4.9737961 s
+noon 7  --      0.1593515 s
+noon 8  --      1.3084553 s
+noon 9  --      12.112527 s
+eco 11  --      0.3444795 s
+eco 12  --      2.1214438 s
+eco 13  --      8.6668539 s
+
+inline      : -,
+assume Zp   : yes
+assume empty: yes
+cyclic 7        --      0.0875634 s
+cyclic 8        --      1.29229 s
+katsura 9       --      0.1403045 s
+katsura 10      --      0.8176833 s
+katsura 11      --      4.9808602 s
+noon 7  --      0.152636 s
+noon 8  --      1.301324 s
+noon 9  --      11.99392 s
+eco 11  --      0.3462996 s
+eco 12  --      2.091162 s
+eco 13  --      8.5554278 s
+
+inline      : yes,
+assume Zp   : yes
+assume empty: yes
+cyclic 7        --      0.0892941 s
+cyclic 8        --      1.3551499 s
+katsura 9       --      0.1469254 s
+katsura 10      --      0.8492118 s
+katsura 11      --      5.1888122 s
+noon 7  --      0.1558268 s
+noon 8  --      1.3372536 s
+noon 9  --      12.066885 s
+eco 11  --      0.345839 s
+eco 12  --      2.1880031 s
+eco 13  --      8.9609859 s
+=#
diff --git a/experimental/example-maybe-bug.jl b/experimental/example-maybe-bug.jl
@@ -23,6 +23,17 @@ trace, gb = Groebner.groebner_learn(s);
 @btime Groebner.groebner_apply!($trace, $((s, s, s, s)));
 @btime Groebner.groebner_apply!($trace, $((s, s, s, s, s, s, s, s)));
 
+#=
+  113.912 ms (34041 allocations: 43.78 MiB)
+
+  44.803 ms (18887 allocations: 24.46 MiB)
+
+  52.580 ms (20607 allocations: 35.38 MiB)
+
+  64.618 ms (23276 allocations: 59.42 MiB)
+
+  125.247 ms (28610 allocations: 107.47 MiB)
+=#
 @profview Groebner.groebner_apply!(trace, ((s, s, s, s)));
 
 R, (x1, x2, x3) = polynomial_ring(QQ, ["x1", "x2", "x3"], ordering=:degrevlex)

diff --git a/src/arithmetic/Zp.jl b/src/arithmetic/Zp.jl
@@ -85,6 +85,8 @@ struct SpecializedArithmeticZp{AccumType, CoeffType, Add} <:
         ::Type{CoeffType},
         uinv::UnsignedMultiplicativeInverse{AccumType}
     ) where {AccumType <: CoeffZp, CoeffType <: CoeffZp}
+        # Further in the code we need the guarantee that the shift is < 64
+        @invariant uinv.shift < 64
         new{AccumType, CoeffType, uinv.add}(uinv.multiplier, uinv.shift, uinv.divisor)
     end
 end
@@ -106,21 +108,21 @@ function _mul_high(a::UInt128, b::UInt128)
     a1b1 + (a1b2 >>> shift) + (a2b1 >>> shift) + carry
 end
 
-# NOTE: the compiler usually fails to simd this operation for T = UInt32 and T =
-# UInt64. However, with T = UInt8 and T = UInt16, it uses the simd mulhi
-# instructions such as pmulhuw.
-# Our attempts to emulate pmulhuw for T = UInt32 while preserving the
-# performance and portability were not very promising.
+# NOTE: the compiler sometimes fails to simd this operation for T ≥ UInt32.
 #
 # a modulo p (addition specialization)
 function mod_p(a::T, mod::SpecializedArithmeticZp{T, C, true}) where {T, C}
     x = _mul_high(a, mod.multiplier)
     x = convert(T, convert(T, (convert(T, a - x) >>> UInt8(1))) + x)
+    # Bit shifts Julia check that the shift is less than 64 (due to a contract with LLVM). 
+    # The assumption allows us to bypass this check.
+    unsafe_assume(mod.shift < 64)
     a - (x >>> mod.shift) * mod.divisor
 end
 # a modulo p (no addition specialization)
 function mod_p(a::T, mod::SpecializedArithmeticZp{T, C, false}) where {T, C}
     x = _mul_high(a, mod.multiplier)
+    unsafe_assume(mod.shift < 64)
     a - (x >>> mod.shift) * mod.divisor
 end
 
@@ -157,6 +159,7 @@ struct DelayedArithmeticZp{AccumType, CoeffType, Add} <:
         ::Type{CoeffType},
         uinv::UnsignedMultiplicativeInverse{AccumType}
     ) where {AccumType <: CoeffZp, CoeffType <: CoeffZp}
+        @invariant uinv.shift < 64
         new{AccumType, CoeffType, uinv.add}(uinv.multiplier, uinv.shift, uinv.divisor)
     end
 end
@@ -177,11 +180,13 @@ end
 function mod_p(a::A, mod::DelayedArithmeticZp{A, T, true}) where {A, T}
     x = _mul_high(a, mod.multiplier)
     x = convert(A, convert(A, (convert(A, a - x) >>> UInt8(1))) + x)
+    unsafe_assume(mod.shift < 64)
     a - (x >>> mod.shift) * mod.divisor
 end
 # a modulo p (no addition specialization)
 function mod_p(a::A, mod::DelayedArithmeticZp{A, T, false}) where {A, T}
     x = _mul_high(a, mod.multiplier)
+    unsafe_assume(mod.shift < 64)
     a - (x >>> mod.shift) * mod.divisor
 end
 
@@ -270,6 +275,7 @@ struct SignedArithmeticZp{AccumType, CoeffType} <:
         @invariant Primes.isprime(p)
         pa = convert(AccumType, p)
         magic = Base.MultiplicativeInverses.SignedMultiplicativeInverse(pa)
+        @invariant magic.shift < 64
         new{AccumType, CoeffType}(pa, pa * pa, magic.multiplier, magic.addmul, magic.shift)
     end
 end
@@ -279,6 +285,7 @@ divisor(arithm::SignedArithmeticZp) = arithm.p
 function mod_p(a::T, mod::SignedArithmeticZp{T}) where {T}
     x = _mul_high(a, mod.multiplier)
     x += a * mod.addmul
+    unsafe_assume(mod.shift < 64)
     d = (signbit(x) + (x >> mod.shift)) % T
     res = a - d * mod.p
     ifelse(res >= zero(T), res, res + mod.p)
@@ -310,6 +317,7 @@ struct SignedCompositeArithmeticZp{AccumType, CoeffType, T, N} <:
         multipliers = map(a -> a.multiplier, arithms)
         addmuls = map(a -> a.addmul, arithms)
         shifts = map(a -> a.shift, arithms)
+        @invariant all(x -> x < 64, shifts)
         p2s = CompositeInt{N, AT}(ps) * CompositeInt{N, AT}(ps)
         new{CompositeInt{N, AT}, CompositeInt{N, CT}, AT, N}(
             CompositeInt{N, AT}(ps),
@@ -329,6 +337,7 @@ function mod_p(
 ) where {N, T, U, W}
     x = _mul_high.(a.data, arithm.multipliers.data)
     x = x .+ a.data .* arithm.addmuls.data
+    # unsafe_assume(all(x -> x < 64, arithm.shifts.data))
     d = (signbit.(x) .+ (x .>> arithm.shifts.data)) .% T
     res = a.data .- d .* arithm.ps.data
     res = ifelse.(res .>= T(0), res, res .+ arithm.ps.data)

diff --git a/src/f4/linalg/backend.jl b/src/f4/linalg/backend.jl
@@ -865,6 +865,9 @@ function linalg_vector_addmul_sparsedense_mod_p!(
 ) where {I, A <: Union{CoeffZp, CompositeCoeffZp}, T <: Union{CoeffZp, CompositeCoeffZp}}
     @invariant isone(coeffs[1])
     @invariant length(indices) == length(coeffs)
+    @invariant !isempty(indices)
+
+    unsafe_assume(!isempty(indices))
 
     @inbounds mul = divisor(arithmetic) - row[indices[1]]
     @inbounds for j in 1:length(indices)
@@ -887,6 +890,9 @@ function linalg_vector_addmul_sparsedense!(
 ) where {I, A <: Union{CoeffZp, CompositeCoeffZp}, T <: Union{CoeffZp, CompositeCoeffZp}}
     @invariant isone(coeffs[1])
     @invariant length(indices) == length(coeffs)
+    @invariant !isempty(indices)
+
+    unsafe_assume(!isempty(indices))
 
     @inbounds mul = divisor(arithmetic) - row[indices[1]]
     @inbounds for j in 1:length(indices)
@@ -909,6 +915,9 @@ function linalg_vector_addmul_sparsedense!(
 ) where {I, A <: CoeffZp, T <: CoeffZp}
     @invariant isone(coeffs[1])
     @invariant length(indices) == length(coeffs)
+    @invariant !isempty(indices)
+
+    unsafe_assume(!isempty(indices))
 
     # NOTE: mul is guaranteed to be < typemax(T)
     p2 = arithmetic.p2
@@ -934,6 +943,9 @@ function linalg_vector_addmul_sparsedense!(
 ) where {I, A <: CoeffZp, T <: CoeffZp, N}
     @invariant isone(coeffs[1])
     @invariant length(indices) == length(coeffs)
+    @invariant !isempty(indices)
+
+    unsafe_assume(!isempty(indices))
 
     # NOTE: mul is guaranteed to be < typemax(T)
     p2 = arithmetic.p2s
@@ -959,6 +971,7 @@ function linalg_vector_addmul_sparsedense!(
 ) where {I, T <: CoeffQQ}
     @invariant isone(coeffs[1])
     @invariant length(indices) == length(coeffs)
+    @invariant !isempty(indices)
 
     @inbounds mul = -row[indices[1]]
     @inbounds for j in 1:length(indices)

diff --git a/src/groebner/autoreduce.jl b/src/groebner/autoreduce.jl
@@ -3,7 +3,7 @@
 ###
 #  Autoreducing a set of polynomials
 
-function _autoreduce(
+function _autoreduce1(
     ring::PolyRing,
     monoms::Vector{Vector{M}},
     coeffs::Vector{Vector{C}},

diff --git a/src/groebner/homogenization.jl b/src/groebner/homogenization.jl
@@ -138,7 +138,7 @@ function dehomogenize_generators!(
     De-homogenized monomials: 
     $new_monoms
     """
-    new_monoms, coeffs = _autoreduce(new_ring, new_monoms, coeffs, params)
+    new_monoms, coeffs = _autoreduce1(new_ring, new_monoms, coeffs, params)
     new_ring, new_monoms, coeffs
 end