Skip to content

Commit

Permalink
upd fglm benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
sumiya11 committed Jan 26, 2024
1 parent d42f0d8 commit 6dd8183
Show file tree
Hide file tree
Showing 25 changed files with 1,281 additions and 264 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ AbstractAlgebra = "c3fe647b-3220-5bb0-a1ea-a7954cac585d"
Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
HostCPUFeatures = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
MultivariatePolynomials = "102ac46a-7ee4-5c85-9060-abc95bfdeaa3"
Nemo = "2edaba10-b0f1-5616-af89-8c11ac63239a"
Expand Down
7 changes: 7 additions & 0 deletions benchmark/CI-scripts/run_benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ push!(
result=compute_gb(Groebner.katsuran(10, ordering=:degrevlex, k=GF(2^27 + 29)), 5)
)
)
push!(
suite,
(
problem_name="groebner, AA, GF(2^30+3), katsura 11",
result=compute_gb(Groebner.katsuran(11, ordering=:degrevlex, k=GF(2^30 + 3)), 3)
)
)
push!(
suite,
(
Expand Down
14 changes: 7 additions & 7 deletions experimental/example-maybe-bug.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ end

@info "" nthreads()
@show ENV["JULIA_NUM_THREADS"]
Groebner.logging_enabled() = false
Groebner.logging_enabled() = true
Groebner.invariants_enabled() = false
Groebner.performance_counters_enabled() = false

s = Groebner.katsuran(9, ordering=:degrevlex, k=AbstractAlgebra.GF(2^30 + 3));
s = Groebner.noonn(8, ordering=:degrevlex, k=AbstractAlgebra.GF(2^30 + 3));
trace, gb = Groebner.groebner_learn(s);
@btime Groebner.groebner($s);
@btime Groebner.groebner_apply!($trace, $s);
Expand All @@ -24,15 +24,15 @@ trace, gb = Groebner.groebner_learn(s);
@btime Groebner.groebner_apply!($trace, $((s, s, s, s, s, s, s, s)));

#=
113.912 ms (34041 allocations: 43.78 MiB)
1.048 s (418167 allocations: 258.02 MiB)
44.803 ms (18887 allocations: 24.46 MiB)
142.140 ms (126985 allocations: 87.63 MiB)
52.580 ms (20607 allocations: 35.38 MiB)
185.635 ms (134610 allocations: 134.37 MiB)
64.618 ms (23276 allocations: 59.42 MiB)
283.075 ms (146988 allocations: 224.29 MiB)
125.247 ms (28610 allocations: 107.47 MiB)
433.356 ms (171889 allocations: 404.00 MiB)
=#
@profview Groebner.groebner_apply!(trace, ((s, s, s, s)));

Expand Down
17 changes: 17 additions & 0 deletions experimental/fglmtest.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
using AbstractAlgebra, Groebner

R, (x, y, z, t) = AbstractAlgebra.polynomial_ring(AbstractAlgebra.QQ, ["x", "y", "z", "t"])
sys = [
y^2 * z + 2 * x * y * t - 2 * x - z,
-x^3 * z + 4 * x * y^2 * z + 4 * x^2 * y * t + 2 * y^3 * t + 4 * x^2 - 10 * y^2 +
4 * x * z - 10 * y * t + 2,
2 * y * z * t + x * t^2 - x - 2 * z,
-x * z^3 + 4 * y * z^2 * t + 4 * x * z * t^2 + 2 * y * t^3 + 4 * x * z + 4 * z^2 -
10 * y * t - 10 * t^2 + 2
]

gb_lex = Groebner.groebner(sys, ordering=Groebner.Lex())
gb_drl = Groebner.groebner(sys, ordering=Groebner.DegRevLex())
gb_fglm = Groebner.fglm(gb_drl, Groebner.DegRevLex(), Groebner.Lex())

@info "" gb_fglm
217 changes: 217 additions & 0 deletions experimental/lexcmp.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
using BenchmarkTools

using HostCPUFeatures
using HostCPUFeatures:
register_size,
pick_vector_width,
pick_vector_width_shift,
simd_integer_register_size,
fma_fast,
has_feature,
register_count,
cpu_name,
register_size

#########

_setup1(n) = begin
x = rand(UInt8.([0, 0, 0, 1, 2, 3]), n)
y = rand(UInt8.([0, 0, 0, 1, 2, 3]), n)
x, y
end
_setup2(n) = begin
x = vcat(zeros(UInt8, n), rand(UInt8.([0, 0, 0, 1, 2, 3]), n))
y = vcat(zeros(UInt8, n), rand(UInt8.([0, 0, 0, 1, 2, 3]), n))
x, y
end
_setup3(T, n) = begin
s = rand(T.([0, 0, 0, 1, 2, 3]), 3)
x = Groebner.monom_construct_from_vector(
Groebner.ExponentVector{T},
vcat(zeros(T, n), s)
)
y = Groebner.monom_construct_from_vector(
Groebner.ExponentVector{T},
vcat(zeros(T, n), reverse(s))
)
z = similar(x)
@assert Groebner.monom_totaldeg(x) == Groebner.monom_totaldeg(y)
z, x, y
end

#########

begin
n, step = 1, 5
while n < 500
@info "n = $n"
print("Groebner.monom_is_equal\t\t")
@btime Groebner.monom_is_equal(xx, yy) setup = begin
cc, xx, yy = _setup3(Int8, max(1, $n))
end
print("Groebner.monom_copy\t\t")
@btime Groebner.monom_copy(xx) setup = begin
cc, xx, yy = _setup3(Int8, max(1, $n))
end
print("Groebner.monom_is_divisible\t")
@btime Groebner.monom_is_divisible(xx, yy) setup = begin
cc, xx, yy = _setup3(Int8, $n)
end
print("Groebner.monom_product!\t\t")
@btime Groebner.monom_product!(cc, xx, yy) setup = begin
cc, xx, yy = _setup3(Int8, max(1, $n))
end
print("Groebner.monom_lcm!\t\t")
@btime Groebner.monom_lcm!(cc, xx, yy) setup = begin
cc, xx, yy = _setup3(Int8, max(1, $n))
end
print("Groebner.monom_is_gcd_const\t")
@btime Groebner.monom_is_gcd_const(xx, yy) setup = begin
cc, xx, yy = _setup3(Int8, $n)
end
print("Groebner.monom_isless:lex\t")
@btime Groebner.monom_isless(xx, yy, _ord) setup = begin
cc, xx, yy = _setup3(Int8, max(1, $n))
_ord = Groebner._Lex{true}(ones(Int, length(xx)))
end
print("Groebner.monom_isless:drl\t")
@btime Groebner.monom_isless(xx, yy, _ord) setup = begin
cc, xx, yy = map(reverse, _setup3(Int8, max(1, $n)))
xx[1] = xx[end]
yy[1] = yy[end]
@assert Groebner.monom_totaldeg(xx) == Groebner.monom_totaldeg(yy)
_ord = Groebner._DegRevLex{true}(ones(Int, length(xx)))
end
n += step
step = ceil(Int, step * 1.2)
end
end

_setup4(n) = begin
s = rand(UInt8.([0, 0, 0, 1, 2, 3]), 2)
a, b = vcat(zeros(UInt8, n), s), vcat(zeros(UInt8, n), reverse(s))
a, b = reverse(a), reverse(b)
x = Groebner.monom_construct_from_vector(Groebner.ExponentVector{UInt8}, a)
y = Groebner.monom_construct_from_vector(Groebner.ExponentVector{UInt8}, b)
vT(n) =
if n + 2 < 8
Groebner.PackedTuple1
elseif n + 2 < 16
Groebner.PackedTuple2
elseif n + 2 < 24
Groebner.PackedTuple3
end
xpacked = Groebner.monom_construct_from_vector(vT(n){UInt64, UInt8}, a)
ypacked = Groebner.monom_construct_from_vector(vT(n){UInt64, UInt8}, b)
x, y, xpacked, ypacked
end
begin
n, step = 1, 3
while n < 22
@info "n = $n"
print("Groebner.monom_isless:drl:packed\t")
@btime Groebner.monom_isless(xpacked, ypacked, _ord) setup = begin
x, y, xpacked, ypacked = _setup4($n)
_ord = Groebner._DegRevLex{true}(ones(Int, length(x)))
@assert Groebner.monom_totaldeg(xpacked) == Groebner.monom_totaldeg(ypacked)
tmp1, tmp2 =
Vector{Int8}(undef, length(x) - 1), Vector{Int8}(undef, length(x) - 1)
@assert Groebner.monom_to_vector!(tmp1, x) ==
Groebner.monom_to_vector!(tmp2, xpacked)
@assert Groebner.monom_to_vector!(tmp1, y) ==
Groebner.monom_to_vector!(tmp2, ypacked)
end
print("Groebner.monom_isless:drl:expvect\t")
@btime Groebner.monom_isless(x, y, _ord) setup = begin
x, y, xpacked, ypacked = _setup4($n)
_ord = Groebner._DegRevLex{true}(ones(Int, length(x)))
@assert Groebner.monom_totaldeg(x) == Groebner.monom_totaldeg(y)
end
n += step
end
end

begin
n, step = 1, 5
while n < 500
@info "n = $n"
for _ in 1:1_000
x, y = _setup3(n)
res1 = vector_are_orth(x, y)
res2 = _vec_check_orth(x, y)
@assert res1 == res2
end
@btime vector_are_orth(xx, yy) setup = begin
xx, yy = _setup3($n)
end
@btime _vec_check_orth(xx, yy) setup = begin
xx, yy = _setup3($n)
end
n += step
step = ceil(Int, step * 1.2)
end
end

begin
_setup1(n) = begin
x = rand(UInt8.([0, 0, 0, 1, 2, 3]), n)
y = rand(UInt8.([0, 0, 0, 1, 2, 3]), n)
x, y
end
_setup2(n) = begin
x = vcat(zeros(UInt8, n), rand(UInt8.([0, 0, 0, 1, 2, 3]), n))
y = vcat(zeros(UInt8, n), rand(UInt8.([0, 0, 0, 1, 2, 3]), n))
x, y
end
_setup3(n) = begin
x = vcat(zeros(Int16, n), rand(Int16.([0, 0, 0, 1, 2, 3]), n))
y = vcat(zeros(Int16, n), rand(Int16.([0, 0, 0, 1, 2, 3]), n))
x, y
end
n, step = 1, 5
while n < 500
@info "n = $n"
n += step
step = ceil(Int, step * 1.2)
for _ in 1:100
x, y = _setup3(n)
res1 = vector_any_lt(x, y)
res2 = vector_any_lt_simd(x, y)
@assert res1 == res2
end
@btime vector_any_lt(xx, yy) setup = begin
xx, yy = _setup3($n)
end
@btime vector_any_lt_simd(xx, yy) setup = begin
xx, yy = _setup3($n)
end
end
end

#########

begin
_setup1(n) = begin
x = rand(UInt8.([0, 0, 0, 1, 2, 3]), n)
y = rand(UInt8.([0, 0, 0, 1, 2, 3]), n)
similar(x), x, y
end
n, step = 1, 5
while n < 500
@info "n = $n"
n += step
step = ceil(Int, step * 1.2)
for _ in 1:100
c, x, y = _setup1(n)
res1 = vector_emax_1!(copy(c), x, y)
res2 = vector_emax_2!(copy(c), x, y)
@assert res1 == res2
end
@btime vector_emax_1!(cc, xx, yy) setup = begin
cc, xx, yy = _setup1($n)
end
@btime vector_emax_2!(cc, xx, yy) setup = begin
cc, xx, yy = _setup1($n)
end
end
end
93 changes: 93 additions & 0 deletions experimental/native_vec4-assume.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
.text
.file "mod_p"
.section .rodata.cst8,"aM",@progbits,8
.p2align 3 # -- Begin function julia_mod_p_61774
.LCPI0_0:
.quad -9223372036854775808 # 0x8000000000000000
.text
.globl julia_mod_p_61774
.p2align 4, 0x90
.type julia_mod_p_61774,@function
julia_mod_p_61774: # @julia_mod_p_61774
.cfi_startproc
# %bb.0: # %top
push rbp
.cfi_def_cfa_offset 16
.cfi_offset rbp, -16
mov rbp, rsp
.cfi_def_cfa_register rbp
push rsi
push rdi
.cfi_offset rdi, -32
.cfi_offset rsi, -24
mov r9, rdx
mov r10, rcx
vpmovsxbq ymm2, dword ptr [r8 + 96]
vpmovzxbq ymm1, dword ptr [r8 + 100] # ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
mov r11, qword ptr [r8 + 80]
mov rcx, qword ptr [r8 + 72]
mov rax, qword ptr [r8 + 64]
imul qword ptr [rdx]
mov rdi, rdx
mov rax, rcx
imul qword ptr [r9 + 8]
mov rcx, rdx
mov rax, r11
imul qword ptr [r9 + 16]
mov rsi, rdx
vmovdqu ymm0, ymmword ptr [r9]
mov rax, qword ptr [r8 + 88]
imul qword ptr [r9 + 24]
vmovq xmm3, rdx
vmovq xmm4, rsi
vpunpcklqdq xmm3, xmm4, xmm3 # xmm3 = xmm4[0],xmm3[0]
vmovq xmm4, rcx
vmovq xmm5, rdi
vpunpcklqdq xmm4, xmm5, xmm4 # xmm4 = xmm5[0],xmm4[0]
vinserti128 ymm3, ymm4, xmm3, 1
vpsrlq ymm4, ymm0, 32
vpmuludq ymm4, ymm4, ymm2
vpsrlq ymm5, ymm2, 32
vpmuludq ymm5, ymm0, ymm5
vpaddq ymm4, ymm5, ymm4
vpsllq ymm4, ymm4, 32
vpmuludq ymm2, ymm0, ymm2
vpaddq ymm2, ymm2, ymm3
vpaddq ymm2, ymm4, ymm2
vpxor xmm3, xmm3, xmm3
vpsrlvq ymm4, ymm2, ymm1
movabs rax, offset .LCPI0_0
vpbroadcastq ymm5, qword ptr [rax]
vpsrlvq ymm1, ymm5, ymm1
vpxor ymm4, ymm4, ymm1
vpsubq ymm1, ymm4, ymm1
vpsrlq ymm2, ymm2, 63
vpaddq ymm1, ymm1, ymm2
vmovdqu ymm2, ymmword ptr [r8]
vpsrlq ymm4, ymm2, 32
vpmuludq ymm4, ymm1, ymm4
vpsrlq ymm5, ymm1, 32
vpmuludq ymm5, ymm5, ymm2
vpaddq ymm4, ymm4, ymm5
vpsllq ymm4, ymm4, 32
vpmuludq ymm1, ymm1, ymm2
vpaddq ymm1, ymm1, ymm4
vpsubq ymm0, ymm0, ymm1
vpcmpgtq ymm1, ymm3, ymm0
vpand ymm1, ymm1, ymm2
vpaddq ymm0, ymm1, ymm0
vpcmpgtq ymm1, ymm0, ymm2
vpand ymm1, ymm1, ymm2
vpsubq ymm0, ymm0, ymm1
vmovdqu ymmword ptr [r10], ymm0
mov rax, r10
pop rdi
pop rsi
pop rbp
vzeroupper
ret
.Lfunc_end0:
.size julia_mod_p_61774, .Lfunc_end0-julia_mod_p_61774
.cfi_endproc
# -- End function
.section ".note.GNU-stack","",@progbits
Loading

0 comments on commit 6dd8183

Please sign in to comment.