Skip to content

Commit

Permalink
add a couple of assumptions
Browse files Browse the repository at this point in the history
  • Loading branch information
sumiya11 committed Jan 22, 2024
1 parent 804f695 commit d5452b0
Show file tree
Hide file tree
Showing 12 changed files with 201 additions and 77 deletions.
51 changes: 24 additions & 27 deletions experimental/benchmarks/standalone_f4.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,26 @@ Groebner.invariants_enabled() = false
BenchmarkTools.DEFAULT_PARAMETERS.samples = 3

Groebner.logging_enabled() = false
Groebner.invariants_enabled() = true
Groebner.invariants_enabled() = false

function benchmark_system_my(system)
system = Groebner.change_ordering(system, :degrevlex)
Groebner.groebner([system[1]])

@btime Groebner.groebner($system)
end

function run_f4_ff_degrevlex_benchmarks(ground)
systems = [
("cyclic 7", reverse(Groebner.cyclicn(7, k=ground))),
("cyclic 8", reverse(Groebner.cyclicn(8, k=ground))),
("katsura 9", reverse(Groebner.katsuran(9, k=ground))),
("katsura 10", reverse(Groebner.katsuran(10, k=ground))),
("katsura 11", reverse(Groebner.katsuran(11, k=ground))),
("noon 7", Groebner.noonn(7, k=ground)),
("noon 8", Groebner.noonn(8, k=ground)),
# ("noon 9", Groebner.noonn(9, k=ground)),
("eco 10", Groebner.eco10(k=ground)),
("eco 11", Groebner.eco11(k=ground))
("cyclic 7", reverse(Groebner.cyclicn(7, k=ground, ordering=:degrevlex))),
("cyclic 8", reverse(Groebner.cyclicn(8, k=ground, ordering=:degrevlex))),
("katsura 9", reverse(Groebner.katsuran(9, k=ground, ordering=:degrevlex))),
("katsura 10", reverse(Groebner.katsuran(10, k=ground, ordering=:degrevlex))),
("katsura 11", reverse(Groebner.katsuran(11, k=ground, ordering=:degrevlex))),
("noon 7", Groebner.noonn(7, k=ground, ordering=:degrevlex)),
("noon 8", Groebner.noonn(8, k=ground, ordering=:degrevlex)),
# ("noon 9", Groebner.noonn(9, k=ground, ordering=:degrevlex)),
("eco 10", Groebner.eco10(k=ground, ordering=:degrevlex)),
("eco 11", Groebner.eco11(k=ground, ordering=:degrevlex))
]

for (name, system) in systems
Expand All @@ -42,38 +41,36 @@ function run_f4_ff_degrevlex_benchmarks(ground)
end
end

p1 = 2^31 - 1
p2 = 2^29 + 11
p3 = 2^27 + 29
s = Groebner.katsuran(11, ordering=:degrevlex, k=AbstractAlgebra.GF(p3))
p1 = 2^30 + 3
s = Groebner.katsuran(11, ordering=:degrevlex, k=AbstractAlgebra.GF(p1))

@profview gb1 = Groebner.groebner(s);
# @profview gb1 = Groebner.groebner(s);

println()
ground = AbstractAlgebra.GF(1048583)
ground = AbstractAlgebra.GF(2^30 + 3)
run_f4_ff_degrevlex_benchmarks(ground)

#=
gf:
cyclic 7
95.218 ms (58110 allocations: 51.84 MiB)
80.674 ms (32571 allocations: 39.23 MiB)
cyclic 8
1.415 s (268517 allocations: 278.05 MiB)
1.078 s (158547 allocations: 162.62 MiB)
katsura 9
155.376 ms (75093 allocations: 50.72 MiB)
126.103 ms (34041 allocations: 43.78 MiB)
katsura 10
943.326 ms (204033 allocations: 173.79 MiB)
708.583 ms (86954 allocations: 147.93 MiB)
katsura 11
6.671 s (525176 allocations: 612.59 MiB)
4.486 s (217870 allocations: 518.37 MiB)
noon 7
194.497 ms (209483 allocations: 74.25 MiB)
127.639 ms (92085 allocations: 51.93 MiB)
noon 8
1.327 s (950722 allocations: 378.90 MiB)
1.097 s (429005 allocations: 283.11 MiB)
eco 10
66.734 ms (72768 allocations: 38.28 MiB)
56.632 ms (37665 allocations: 29.22 MiB)
eco 11
320.164 ms (185723 allocations: 97.99 MiB)
334.697 ms (125935 allocations: 83.94 MiB)
=#
96 changes: 79 additions & 17 deletions experimental/benchmarks/standalone_f4_large.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

using Groebner
# using Groebner

import AbstractAlgebra
using BenchmarkTools
Expand All @@ -10,12 +10,12 @@ Groebner.logging_enabled() = false
Groebner.invariants_enabled() = false

function benchmark_system_my(system)
system = Groebner.change_ordering(system, :degrevlex)
# system = Groebner.change_ordering(system, :degrevlex)

Groebner.groebner(system)

t = Inf
for i in 1:3
for i in 1:5
timing = @timed Groebner.groebner(system)
t = min(t, timing.time)
end
Expand All @@ -24,27 +24,89 @@ end

function run_f4_ff_degrevlex_benchmarks(ground)
systems = [
("cyclic 7", Groebner.cyclicn(7, k=ground)),
("cyclic 8", Groebner.cyclicn(8, k=ground)),
("cyclic 9", Groebner.cyclicn(9, k=ground)),
("katsura 9", Groebner.katsuran(9, k=ground)),
("katsura 10", Groebner.katsuran(10, k=ground)),
("katsura 11", Groebner.katsuran(11, k=ground)),
("katsura 12", Groebner.katsuran(12, k=ground)),
("noon 7", Groebner.noonn(7, k=ground)),
("noon 8", Groebner.noonn(8, k=ground)),
("noon 9", Groebner.noonn(9, k=ground)),
("eco 11", Groebner.eco11(k=ground)),
("eco 12", Groebner.eco12(k=ground)),
("eco 12", Groebner.eco13(k=ground))
("cyclic 7", Groebner.cyclicn(7, k=ground, ordering=:degrevlex)),
("cyclic 8", Groebner.cyclicn(8, k=ground, ordering=:degrevlex)),
# ("cyclic 9", Groebner.cyclicn(9, k=ground, ordering=:degrevlex)),
("katsura 9", Groebner.katsuran(9, k=ground, ordering=:degrevlex)),
("katsura 10", Groebner.katsuran(10, k=ground, ordering=:degrevlex)),
("katsura 11", Groebner.katsuran(11, k=ground, ordering=:degrevlex)),
# ("katsura 12", Groebner.katsuran(12, k=ground, ordering=:degrevlex)),
("noon 7", Groebner.noonn(7, k=ground, ordering=:degrevlex)),
("noon 8", Groebner.noonn(8, k=ground, ordering=:degrevlex)),
("noon 9", Groebner.noonn(9, k=ground, ordering=:degrevlex)),
("eco 11", Groebner.eco11(k=ground, ordering=:degrevlex)),
("eco 12", Groebner.eco12(k=ground, ordering=:degrevlex)),
("eco 13", Groebner.eco13(k=ground, ordering=:degrevlex))
]

for (name, system) in systems
print("$name -- ")
print("$name\t--\t")
t = benchmark_system_my(system)
println("$t s")
end
end

ground = AbstractAlgebra.GF(2^30 + 3)
run_f4_ff_degrevlex_benchmarks(ground)

#=
inline : -,
assume Zp : no
assume empty: no
cyclic 7 -- 0.0965658 s
cyclic 8 -- 1.4198005 s
katsura 9 -- 0.1470269 s
katsura 10 -- 0.8665618 s
katsura 11 -- 5.2572539 s
noon 7 -- 0.1516447 s
noon 8 -- 1.2835755 s
noon 9 -- 12.139458 s
eco 11 -- 0.3524228 s
eco 12 -- 2.1887455 s
eco 13 -- 9.0118321 s
inline : -,
assume Zp : yes
assume empty: no
cyclic 7 -- 0.0894644 s
cyclic 8 -- 1.2891848 s
katsura 9 -- 0.1435553 s
katsura 10 -- 0.8241746 s
katsura 11 -- 4.9737961 s
noon 7 -- 0.1593515 s
noon 8 -- 1.3084553 s
noon 9 -- 12.112527 s
eco 11 -- 0.3444795 s
eco 12 -- 2.1214438 s
eco 13 -- 8.6668539 s
inline : -,
assume Zp : yes
assume empty: yes
cyclic 7 -- 0.0875634 s
cyclic 8 -- 1.29229 s
katsura 9 -- 0.1403045 s
katsura 10 -- 0.8176833 s
katsura 11 -- 4.9808602 s
noon 7 -- 0.152636 s
noon 8 -- 1.301324 s
noon 9 -- 11.99392 s
eco 11 -- 0.3462996 s
eco 12 -- 2.091162 s
eco 13 -- 8.5554278 s
inline : yes,
assume Zp : yes
assume empty: yes
cyclic 7 -- 0.0892941 s
cyclic 8 -- 1.3551499 s
katsura 9 -- 0.1469254 s
katsura 10 -- 0.8492118 s
katsura 11 -- 5.1888122 s
noon 7 -- 0.1558268 s
noon 8 -- 1.3372536 s
noon 9 -- 12.066885 s
eco 11 -- 0.345839 s
eco 12 -- 2.1880031 s
eco 13 -- 8.9609859 s
=#
11 changes: 11 additions & 0 deletions experimental/example-maybe-bug.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,17 @@ trace, gb = Groebner.groebner_learn(s);
@btime Groebner.groebner_apply!($trace, $((s, s, s, s)));
@btime Groebner.groebner_apply!($trace, $((s, s, s, s, s, s, s, s)));

#=
113.912 ms (34041 allocations: 43.78 MiB)
44.803 ms (18887 allocations: 24.46 MiB)
52.580 ms (20607 allocations: 35.38 MiB)
64.618 ms (23276 allocations: 59.42 MiB)
125.247 ms (28610 allocations: 107.47 MiB)
=#
@profview Groebner.groebner_apply!(trace, ((s, s, s, s)));

R, (x1, x2, x3) = polynomial_ring(QQ, ["x1", "x2", "x3"], ordering=:degrevlex)
Expand Down
19 changes: 14 additions & 5 deletions src/arithmetic/Zp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ struct SpecializedArithmeticZp{AccumType, CoeffType, Add} <:
::Type{CoeffType},
uinv::UnsignedMultiplicativeInverse{AccumType}
) where {AccumType <: CoeffZp, CoeffType <: CoeffZp}
# Further in the code we need the guarantee that the shift is < 64
@invariant uinv.shift < 64
new{AccumType, CoeffType, uinv.add}(uinv.multiplier, uinv.shift, uinv.divisor)
end
end
Expand All @@ -106,21 +108,21 @@ function _mul_high(a::UInt128, b::UInt128)
a1b1 + (a1b2 >>> shift) + (a2b1 >>> shift) + carry
end

# NOTE: the compiler usually fails to simd this operation for T = UInt32 and T =
# UInt64. However, with T = UInt8 and T = UInt16, it uses the simd mulhi
# instructions such as pmulhuw.
# Our attempts to emulate pmulhuw for T = UInt32 while preserving the
# performance and portability were not very promising.
# NOTE: the compiler sometimes fails to simd this operation for T ≥ UInt32.
#
# a modulo p (addition specialization)
function mod_p(a::T, mod::SpecializedArithmeticZp{T, C, true}) where {T, C}
x = _mul_high(a, mod.multiplier)
x = convert(T, convert(T, (convert(T, a - x) >>> UInt8(1))) + x)
# Bit shifts Julia check that the shift is less than 64 (due to a contract with LLVM).
# The assumption allows us to bypass this check.
unsafe_assume(mod.shift < 64)
a - (x >>> mod.shift) * mod.divisor
end
# a modulo p (no addition specialization)
function mod_p(a::T, mod::SpecializedArithmeticZp{T, C, false}) where {T, C}
x = _mul_high(a, mod.multiplier)
unsafe_assume(mod.shift < 64)
a - (x >>> mod.shift) * mod.divisor
end

Expand Down Expand Up @@ -157,6 +159,7 @@ struct DelayedArithmeticZp{AccumType, CoeffType, Add} <:
::Type{CoeffType},
uinv::UnsignedMultiplicativeInverse{AccumType}
) where {AccumType <: CoeffZp, CoeffType <: CoeffZp}
@invariant uinv.shift < 64
new{AccumType, CoeffType, uinv.add}(uinv.multiplier, uinv.shift, uinv.divisor)
end
end
Expand All @@ -177,11 +180,13 @@ end
function mod_p(a::A, mod::DelayedArithmeticZp{A, T, true}) where {A, T}
x = _mul_high(a, mod.multiplier)
x = convert(A, convert(A, (convert(A, a - x) >>> UInt8(1))) + x)
unsafe_assume(mod.shift < 64)
a - (x >>> mod.shift) * mod.divisor
end
# a modulo p (no addition specialization)
function mod_p(a::A, mod::DelayedArithmeticZp{A, T, false}) where {A, T}
x = _mul_high(a, mod.multiplier)
unsafe_assume(mod.shift < 64)
a - (x >>> mod.shift) * mod.divisor
end

Expand Down Expand Up @@ -270,6 +275,7 @@ struct SignedArithmeticZp{AccumType, CoeffType} <:
@invariant Primes.isprime(p)
pa = convert(AccumType, p)
magic = Base.MultiplicativeInverses.SignedMultiplicativeInverse(pa)
@invariant magic.shift < 64
new{AccumType, CoeffType}(pa, pa * pa, magic.multiplier, magic.addmul, magic.shift)
end
end
Expand All @@ -279,6 +285,7 @@ divisor(arithm::SignedArithmeticZp) = arithm.p
function mod_p(a::T, mod::SignedArithmeticZp{T}) where {T}
x = _mul_high(a, mod.multiplier)
x += a * mod.addmul
unsafe_assume(mod.shift < 64)
d = (signbit(x) + (x >> mod.shift)) % T
res = a - d * mod.p
ifelse(res >= zero(T), res, res + mod.p)
Expand Down Expand Up @@ -310,6 +317,7 @@ struct SignedCompositeArithmeticZp{AccumType, CoeffType, T, N} <:
multipliers = map(a -> a.multiplier, arithms)
addmuls = map(a -> a.addmul, arithms)
shifts = map(a -> a.shift, arithms)
@invariant all(x -> x < 64, shifts)
p2s = CompositeInt{N, AT}(ps) * CompositeInt{N, AT}(ps)
new{CompositeInt{N, AT}, CompositeInt{N, CT}, AT, N}(
CompositeInt{N, AT}(ps),
Expand All @@ -329,6 +337,7 @@ function mod_p(
) where {N, T, U, W}
x = _mul_high.(a.data, arithm.multipliers.data)
x = x .+ a.data .* arithm.addmuls.data
# unsafe_assume(all(x -> x < 64, arithm.shifts.data))
d = (signbit.(x) .+ (x .>> arithm.shifts.data)) .% T
res = a.data .- d .* arithm.ps.data
res = ifelse.(res .>= T(0), res, res .+ arithm.ps.data)
Expand Down
13 changes: 13 additions & 0 deletions src/f4/linalg/backend.jl
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,9 @@ function linalg_vector_addmul_sparsedense_mod_p!(
) where {I, A <: Union{CoeffZp, CompositeCoeffZp}, T <: Union{CoeffZp, CompositeCoeffZp}}
@invariant isone(coeffs[1])
@invariant length(indices) == length(coeffs)
@invariant !isempty(indices)

unsafe_assume(!isempty(indices))

@inbounds mul = divisor(arithmetic) - row[indices[1]]
@inbounds for j in 1:length(indices)
Expand All @@ -887,6 +890,9 @@ function linalg_vector_addmul_sparsedense!(
) where {I, A <: Union{CoeffZp, CompositeCoeffZp}, T <: Union{CoeffZp, CompositeCoeffZp}}
@invariant isone(coeffs[1])
@invariant length(indices) == length(coeffs)
@invariant !isempty(indices)

unsafe_assume(!isempty(indices))

@inbounds mul = divisor(arithmetic) - row[indices[1]]
@inbounds for j in 1:length(indices)
Expand All @@ -909,6 +915,9 @@ function linalg_vector_addmul_sparsedense!(
) where {I, A <: CoeffZp, T <: CoeffZp}
@invariant isone(coeffs[1])
@invariant length(indices) == length(coeffs)
@invariant !isempty(indices)

Check warning on line 918 in src/f4/linalg/backend.jl

View check run for this annotation

Codecov / codecov/patch

src/f4/linalg/backend.jl#L918

Added line #L918 was not covered by tests

unsafe_assume(!isempty(indices))

Check warning on line 920 in src/f4/linalg/backend.jl

View check run for this annotation

Codecov / codecov/patch

src/f4/linalg/backend.jl#L920

Added line #L920 was not covered by tests

# NOTE: mul is guaranteed to be < typemax(T)
p2 = arithmetic.p2
Expand All @@ -934,6 +943,9 @@ function linalg_vector_addmul_sparsedense!(
) where {I, A <: CoeffZp, T <: CoeffZp, N}
@invariant isone(coeffs[1])
@invariant length(indices) == length(coeffs)
@invariant !isempty(indices)

unsafe_assume(!isempty(indices))

# NOTE: mul is guaranteed to be < typemax(T)
p2 = arithmetic.p2s
Expand All @@ -959,6 +971,7 @@ function linalg_vector_addmul_sparsedense!(
) where {I, T <: CoeffQQ}
@invariant isone(coeffs[1])
@invariant length(indices) == length(coeffs)
@invariant !isempty(indices)

@inbounds mul = -row[indices[1]]
@inbounds for j in 1:length(indices)
Expand Down
2 changes: 1 addition & 1 deletion src/groebner/autoreduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
###
# Autoreducing a set of polynomials

function _autoreduce(
function _autoreduce1(
ring::PolyRing,
monoms::Vector{Vector{M}},
coeffs::Vector{Vector{C}},
Expand Down
2 changes: 1 addition & 1 deletion src/groebner/homogenization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ function dehomogenize_generators!(
De-homogenized monomials:
$new_monoms
"""
new_monoms, coeffs = _autoreduce(new_ring, new_monoms, coeffs, params)
new_monoms, coeffs = _autoreduce1(new_ring, new_monoms, coeffs, params)
new_ring, new_monoms, coeffs
end

Expand Down
Loading

0 comments on commit d5452b0

Please sign in to comment.