Skip to content

Commit

Permalink
up
Browse files Browse the repository at this point in the history
  • Loading branch information
sumiya11 committed Dec 12, 2023
1 parent 6300b32 commit e4851dc
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 21 deletions.
115 changes: 115 additions & 0 deletions experimental/add_mull.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
using InteractiveUtils, BenchmarkTools

function add_mul!(vres::Vector{Int64}, a::Int32, v::Vector{Int32})
@inbounds for i in eachindex(vres)
vres[i] += Int64(a) * Int64(v[i])
end
end

function add_mul_f!(vres, a, v)
@fastmath @inbounds @simd for i in eachindex(vres)
vres[i] += a * v[i]
end
end

n = 2^10
@benchmark add_mul!(v1, c, v2) setup = begin
v1 = rand(Int, n)
c = rand(Int32)
v2 = rand(Int32, n)
end

@benchmark add_mul_f!(v1, c, v2) setup = begin
v1 = rand(Float64, n)
c = rand(Float64)
v2 = rand(Float64, n)
end

@code_native debuginfo = :none add_mul_f!(Float64[], Float32(1), Float32[])

@code_native debuginfo = :none add_mul!(Int64[], Int32(1), Int32[])
#=
.text
.file "add_mul"
.globl julia_add_mul_13693 # -- Begin function julia_add_mul_13693
.p2align 4, 0x90
.type julia_add_mul_13693,@function
julia_add_mul_13693: # @julia_add_mul_13693
.cfi_startproc
# %bb.0: # %top
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movq 8(%rcx), %r10
testq %r10, %r10
je .LBB0_9
# %bb.1: # %L18.preheader
movq (%rcx), %rcx
movslq %edx, %r9
movq (%r8), %r11
movl $1, %eax
cmpq $16, %r10
jb .LBB0_7
# %bb.2: # %vector.memcheck
leaq (%r11,%r10,4), %rdx
cmpq %rdx, %rcx
jae .LBB0_4
# %bb.3: # %vector.memcheck
leaq (%rcx,%r10,8), %rdx
cmpq %rdx, %r11
jb .LBB0_7
.LBB0_4: # %vector.ph
movq %r10, %r8
andq $-16, %r8
leaq 1(%r8), %rax
vmovq %r9, %xmm0
vpbroadcastq %xmm0, %ymm0
xorl %edx, %edx
.p2align 4, 0x90
.LBB0_5: # %vector.body
# =>This Inner Loop Header: Depth=1
vpmovzxdq (%r11,%rdx,4), %ymm1 # ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
vpmovzxdq 16(%r11,%rdx,4), %ymm2 # ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
vpmovzxdq 32(%r11,%rdx,4), %ymm3 # ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
vpmovzxdq 48(%r11,%rdx,4), %ymm4 # ymm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
vpmuldq %ymm1, %ymm0, %ymm1
vpmuldq %ymm2, %ymm0, %ymm2
vpmuldq %ymm3, %ymm0, %ymm3
vpmuldq %ymm4, %ymm0, %ymm4
vpaddq (%rcx,%rdx,8), %ymm1, %ymm1
vpaddq 32(%rcx,%rdx,8), %ymm2, %ymm2
vpaddq 64(%rcx,%rdx,8), %ymm3, %ymm3
vpaddq 96(%rcx,%rdx,8), %ymm4, %ymm4
vmovdqu %ymm1, (%rcx,%rdx,8)
vmovdqu %ymm2, 32(%rcx,%rdx,8)
vmovdqu %ymm3, 64(%rcx,%rdx,8)
vmovdqu %ymm4, 96(%rcx,%rdx,8)
addq $16, %rdx
cmpq %rdx, %r8
jne .LBB0_5
# %bb.6: # %middle.block
cmpq %r8, %r10
je .LBB0_9
.LBB0_7: # %scalar.ph
decq %rax
.p2align 4, 0x90
.LBB0_8: # %L18
# =>This Inner Loop Header: Depth=1
movslq (%r11,%rax,4), %rdx
imulq %r9, %rdx
addq %rdx, (%rcx,%rax,8)
incq %rax
cmpq %rax, %r10
jne .LBB0_8
.LBB0_9: # %L38
popq %rbp
vzeroupper
retq
.Lfunc_end0:
.size julia_add_mul_13693, .Lfunc_end0-julia_add_mul_13693
.cfi_endproc
# -- End function
.section ".note.GNU-stack","",@progbits
=#
9 changes: 6 additions & 3 deletions experimental/benchmarks/standalone_f4.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ if !isdefined(Main, :Groebner)
using Groebner
end

import AbstractAlgebra
import AbstractAlgebra, Primes
using BenchmarkTools
using Logging

Expand Down Expand Up @@ -42,9 +42,12 @@ function run_f4_ff_degrevlex_benchmarks(ground)
end
end

s = Groebner.katsuran(11, ordering=:degrevlex, ground=AbstractAlgebra.GF(2^31 - 1))
p1 = 2^31 - 1
p2 = 2^29 + 11
p3 = 2^27 + 29
s = Groebner.katsuran(11, ordering=:degrevlex, ground=AbstractAlgebra.GF(p3))

@time Groebner.groebner(s);
@profview gb1 = Groebner.groebner(s);

println()
ground = AbstractAlgebra.GF(1048583)
Expand Down
35 changes: 25 additions & 10 deletions experimental/example-maybe-bug.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,42 @@ R, (x1, x2, x3) = PolynomialRing(GF(2^27 + 29), ["x1", "x2", "x3"], ordering=:de
R, (x1, x2, x3) = PolynomialRing(GF(2^31 - 1), ["x1", "x2", "x3"], ordering=:degrevlex)

s = [x1 * x2^2 + 2, x1 * x3 + 3, x2 * x3 + 4 * x1 - 5]
gb = Groebner.groebner(s, loglevel=-4)
gb = Groebner.groebner(s, loglevel=-5)
gb

Groebner.logging_enabled() = false
k = Groebner.katsuran(9, ground=GF(2^27 + 29), ordering=:degrevlex)
p1 = 2^31 - 1
p2 = 2^29 + 11
p3 = 2^27 + 29
p4 = 2^25 + 35
s = Groebner.eco11(ordering=:degrevlex, ground=AbstractAlgebra.GF(p4))

@profview for _ in 1:10
gb = Groebner.groebner(k)
end
@time gb1 = Groebner.groebner(s);

Groebner.isgroebner(gb)
@time trace, gb2 = Groebner.groebner_learn(s);

@time gb2 = Groebner.groebner(k);
gb == gb2
@btime suc, gb3 = Groebner.groebner_apply!($trace, $s);

println()
ground = AbstractAlgebra.GF(1048583)
run_f4_ff_degrevlex_benchmarks(ground)
# for k = 7
# 188.876 ms (213205 allocations: 42.44 MiB)

#! format: off
R,(t1,t2,t3,a,b,c) = PolynomialRing(QQ, ["t1","t2","t3","a", "b", "c"])
R,(t1,t2,t3,a,b,c) = PolynomialRing(GF(2^27+29), ["t1","t2","t3","a", "b", "c"], ordering=:degrevlex)
sys = [1000000*a^2*t1^2+1000000*a^2*t2^2+1000000*a^2*t3^2+1000000*b^2*t1^2+1000000*b^2*t2^2+1000000*b^2*t3^2+1000000*c^2*t1^2+1000000*c^2*t2^2+1000000*c^2*t3^2-1065102000*a^2*t1-1566200000*a^2*t2+359610000*a^2*t3-4000000*a*b*t2-1574352000*a*b*t3+4000000*a*c*t1+273640000*a*c*t3-1065102000*b^2*t1+8152000*b^2*t2+355610000*b^2*t3-1574352000*b*c*t1-273640000*b*c*t2-791462000*c^2*t1-1566200000*c^2*t2+355610000*c^2*t3+740236705137*a^2-279943961360*a*b+47071636200*a*c+1574352000*a*t1-273640000*a*t2+126292488913*b^2+837307375312*b*c+4000000*b*t1-273640000*b*t3+612513941897*c^2+4000000*c*t2-1574352000*c*t3+1000000*t1^2+1000000*t2^2+1000000*t3^2-624135247952*a-50784764200*b-283060057360*c-791462000*t1+8152000*t2+359610000*t3+165673, 1000000*a^2*t1^2+1000000*a^2*t2^2+1000000*a^2*t3^2+1000000*b^2*t1^2+1000000*b^2*t2^2+1000000*b^2*t3^2+1000000*c^2*t1^2+1000000*c^2*t2^2+1000000*c^2*t3^2-1889130000*a^2*t1-139016000*a^2*t2+357608000*a^2*t3+550492000*a*b*t3+1500376000*a*c*t3-1889130000*b^2*t1-689508000*b^2*t2+357608000*b^2*t3+550492000*b*c*t1-1500376000*b*c*t2-388754000*c^2*t1-139016000*c^2*t2+357608000*c^2*t3+740396599024*a^2+98430171568*a*b+268273230304*a*c-550492000*a*t1-1500376000*a*t2+854420557476*b^2-2714848476*b*c-1500376000*b*t3-114024022072*c^2+550492000*c*t3+1000000*t1^2+1000000*t2^2+1000000*t3^2+624263610988*a-268273230304*b+98430171568*c-388754000*t1-689508000*t2+357608000*t3-63620, 4000000*a^2*t1^2+4000000*a^2*t2^2+4000000*a^2*t3^2+4000000*b^2*t1^2+4000000*b^2*t2^2+4000000*b^2*t3^2+4000000*c^2*t1^2+4000000*c^2*t2^2+4000000*c^2*t3^2-3295636000*a^2*t1+6825304000*a^2*t2+1438448000*a^2*t3-16000000*a*b*t2+4096192000*a*b*t3+16000000*a*c*t1+4906624000*a*c*t3-3295636000*b^2*t1+2729112000*b^2*t2+1422448000*b^2*t3+4096192000*b*c*t1-4906624000*b*c*t2+1610988000*c^2*t1+6825304000*c^2*t2+1422448000*c^2*t3+2962666483625*a^2+722869290752*a*b+875649162944*a*c-4096192000*a*t1-4906624000*a*t2+513760438633*b^2-3361285532000*b*c+16000000*b*t1-4906624000*b*t3+2443184693353*c^2+16000000*c*t2+4096192000*c*t3+4000000*t1^2+4000000*t2^2+4000000*t3^2-2498705324448*a-879018458944*b+741978122752*c+1610988000*t1+2729112000*t2+1438448000*t3+440361,4000000*a^2*t1^2+4000000*a^2*t2^2+4000000*a^2*t3^2+4000000*b^2*t1^2+4000000*b^2*t2^2+4000000*b^2*t3^2+4000000*c^2*t1^2+4000000*c^2*t2^2+4000000*c^2*t3^2+3295636000*a^2*t1+6824896000*a^2*t2+1430432000*a^2*t3+4094592000*a*b*t3-4906624000*a*c*t3+3295636000*b^2*t1+2730304000*b^2*t2+1430432000*b^2*t3+4094592000*b*c*t1+4906624000*b*c*t2-1610988000*c^2*t1+6824896000*c^2*t2+1430432000*c^2*t3+2961910911797*a^2+732129427968*a*b-877323997696*a*c-4094592000*a*t1+4906624000*a*t2+516620569397*b^2+3361357491776*b*c+4906624000*b*t3+2445290017525*c^2+4094592000*c*t3+4000000*t1^2+4000000*t2^2+4000000*t3^2+2499114213824*a+877323997696*b+732129427968*c-1610988000*t1+2730304000*t2+1430432000*t3-324875, 1000000*a^2*t1^2+1000000*a^2*t2^2+1000000*a^2*t3^2+1000000*b^2*t1^2+1000000*b^2*t2^2+1000000*b^2*t3^2+1000000*c^2*t1^2+1000000*c^2*t2^2+1000000*c^2*t3^2+1889602000*a^2*t1-138926000*a^2*t2+359604000*a^2*t3-4000000*a*b*t2+550036000*a*b*t3+4000000*a*c*t1-1500228000*a*c*t3+1889602000*b^2*t1-688962000*b^2*t2+355604000*b^2*t3+550036000*b*c*t1+1500228000*b*c*t2+389374000*c^2*t1-138926000*c^2*t2+355604000*c^2*t3+740903906549*a^2+99175424872*a*b-265964790856*a*c-550036000*a*t1+1500228000*a*t2+854030749541*b^2+2874521168*b*c+4000000*b*t1+1500228000*b*t3-114557203083*c^2+4000000*c*t2+550036000*c*t3+1000000*t1^2+1000000*t2^2+1000000*t3^2-623884900400*a+270522742856*b+97519648872*c+389374000*t1-688962000*t2+359604000*t3+55909, 250000*a^2*t1^2+250000*a^2*t2^2+250000*a^2*t3^2+250000*b^2*t1^2+250000*b^2*t2^2+250000*b^2*t3^2+250000*c^2*t1^2+250000*c^2*t2^2+250000*c^2*t3^2+266341000*a^2*t1-391502000*a^2*t2+89402000*a^2*t3-393620000*a*b*t3-68228000*a*c*t3+266341000*b^2*t1+2118000*b^2*t2+89402000*b^2*t3-393620000*b*c*t1+68228000*b*c*t2+198113000*c^2*t1-391502000*c^2*t2+89402000*c^2*t3+184958257568*a^2-70380830480*a*b-12199439312*a*c+393620000*a*t1+68228000*a*t2+31688927488*b^2-209385275032*b*c+68228000*b*t3+153269490056*c^2-393620000*c*t3+250000*t1^2+250000*t2^2+250000*t3^2+156251491928*a+12199439312*b-70380830480*c+198113000*t1+2118000*t2+89402000*t3+159976]

@time trace, gb = Groebner.groebner_learn(
sys,
ordering=Groebner.DegRevLex(),
);

gb = Groebner.groebner_apply!(
trace,
sys,
loglevel=-3
);

#! format: on

Groebner.performance_counters_enabled() = true
Expand Down
14 changes: 9 additions & 5 deletions experimental/todo.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
using SIMD, BenchmarkTools

function add_mul!(v1::Vector{T}, c, v2) where {T}
@inbounds for i in 1:length(v1)
v1[i] += T(c) * T(v2[i])
function add_mul!(vres::Vector{Int64}, a::Int32, v::Vector{Int32})
@inbounds for i in eachindex(vres)
vres[i] += Int64(a) * Int64(v[i])
end
nothing
end

function add_mul_v2!(vres::Vector{Int64}, a::Int64, v::Vector{Int32})
@inbounds for i in eachindex(vres)
vres[i] += a * Int64(v[i])
end
end
@code_native debuginfo = :none add_mul([1, 2], UInt32(1), UInt32[3, 4])

function reduce_dense_row_by_sparse_row_no_remainder!(
Expand Down Expand Up @@ -105,7 +109,7 @@ end
Val(2)
)

@code_native debuginfo = :none add_mul!(UInt64[1, 2, 3, 4], UInt32(8), UInt32[3, 1, 0, 10])
@code_native debuginfo = :none add_mul_v2!(Int64[1, 2, 3, 4], Int64(8), Int32[3, 1, 0, 10])

@code_native debuginfo = :none reduce_dense_row_by_sparse_row_no_remainder_2!(
UInt64[1, 2, 3, 4],
Expand Down
2 changes: 1 addition & 1 deletion src/arithmetic/Zp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ end
divisor(arithm::BuiltinArithmeticZp) = arithm.magic.divisor

function n_reserved_bits(arithm::BuiltinArithmeticZp{T}) where {T <: Unsigned}
res = leading_zeros(divisor(arithm)) - (8 >> 1) * sizeof(T)
res = 1 + leading_zeros(divisor(arithm)) - (8 >> 1) * sizeof(T)
@invariant res >= 0
res
end
Expand Down
4 changes: 2 additions & 2 deletions src/f4/linalg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1653,7 +1653,6 @@ function reduce_dense_row_by_pivots_sparse!(

j = 0
nskip = skip(arithmetic)
@log level = -4 "" n_reserved_bits(arithmetic) nskip divisor(arithmetic)

@inbounds for i in start_column:end_column
# if the element is zero - no reduction is needed
Expand Down Expand Up @@ -1698,6 +1697,7 @@ function reduce_dense_row_by_pivots_sparse!(
if iszero(j % nskip)
dense_row_remainder!(row, arithmetic, i, end_column)
end
row[i] = zero(row[i])

# @invariant iszero(row[i])
end
Expand All @@ -1708,7 +1708,7 @@ function reduce_dense_row_by_pivots_sparse!(
end

# TODO: perhaps, not needed
dense_row_remainder!(row, arithmetic, Int(start_column), Int(end_column))
# dense_row_remainder!(row, arithmetic, Int(start_column), Int(end_column))

# form the resulting row in sparse format
resize!(new_column_indices, nonzeros)
Expand Down

0 comments on commit e4851dc

Please sign in to comment.