up!

sumiya11 · Jan 6, 2024 · 078f9ad · 078f9ad
1 parent b6f349c
commit 078f9ad
Show file tree

Hide file tree

Showing 27 changed files with 285 additions and 142 deletions.
diff --git a/.github/workflows/Runtests.yml b/.github/workflows/Runtests.yml
@@ -9,12 +9,14 @@ jobs:
         julia-arch: [x64]
         os: [ubuntu-latest, windows-latest]
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@latest
         with:
           version: ${{ matrix.julia-version }}
       - uses: julia-actions/julia-buildpkg@latest
       - uses: julia-actions/julia-runtest@latest
+        with:
+          depwarn: error
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v3
         with:

diff --git a/benchmark/CI-scripts/run_benchmarks.jl b/benchmark/CI-scripts/run_benchmarks.jl
@@ -8,11 +8,11 @@ import Nemo
 
 suite = []
 
-function compute_gb(system, trials=7)
+function compute_gb(system, trials=7; kws...)
     times = []
     for _ in 1:trials
         GC.gc()
-        time = @elapsed groebner(system)
+        time = @elapsed groebner(system; kws...)
         push!(times, time)
     end
     minimum(times)
@@ -91,6 +91,17 @@ push!(
         )
     )
 )
+push!(
+    suite,
+    (
+        problem_name="groebner, threaded, AA, GF(2^31-1), cyclic 8",
+        result=compute_gb(
+            Groebner.cyclicn(8, ordering=:degrevlex, ground=GF(2^31 - 1)),
+            5,
+            threaded=:yes
+        )
+    )
+)
 
 function learn_and_apply(system)
     times = []

diff --git a/benchmark/CI-scripts/runtests.jl b/benchmark/CI-scripts/runtests.jl
@@ -14,12 +14,18 @@ const IGNORE_SMALL_ABSOLUTE_DEVIATION = 1e-3
 # Run benchmarks on the latest stable version of Groebner.jl
 dir_stable = (@__DIR__) * "/run-on-stable"
 @info "Benchmarking Groebner.jl, stable" dir_stable
-@time run(`julia --project=$dir_stable $dir_stable/run_benchmarks.jl`, wait=true)
+@time run(
+    `julia --threads=$(nthreads()) --project=$dir_stable $dir_stable/run_benchmarks.jl`,
+    wait=true
+)
 
 # Run benchmarks on the nightly version of Groebner.jl
 dir_nightly = (@__DIR__) * "/run-on-nightly"
 @info "Benchmarking Groebner.jl, nightly" dir_nightly
-@time run(`julia --project=$dir_nightly $dir_nightly/run_benchmarks.jl`, wait=true)
+@time run(
+    `julia --threads=$(nthreads()) --project=$dir_nightly $dir_nightly/run_benchmarks.jl`,
+    wait=true
+)
 
 # Compare results
 function compare()

diff --git a/experimental/constprop.jl b/experimental/constprop.jl
@@ -1,22 +1,15 @@
 module Foo
 
-vanish_enabled() = true
-
-macro vanish(expr)
-    m = __module__
-    quote
-        if !$m.vanish_enabled()
-            $(esc(expr))
-        else
-            nothing
-        end
-    end
+macro unreachable()
+    :(@assert false)
 end
 
-function critical_loop(x)
-    for i in eachindex(x)
-        @vanish println(x[i])
-        x[i] = x[i]^2
+function do_stuff(x)
+    if x > 0
+        x^2
+    else
+        @unreachable
+        x
     end
 end
 

diff --git a/experimental/examples/threaded-apply.jl b/experimental/examples/threaded-apply.jl
@@ -1,9 +1,12 @@
 using AbstractAlgebra, Primes, Base.Threads
 using BenchmarkTools
-# using Groebner
+using Groebner
+
+@info "Using $(nthreads()) Julia threads"
 
 # Computes the bases of the given system modulo different primes
 function compute_bases(system, batch_size::Int)
+    @assert iszero(batch_size % 4)
     prime = 2^30 + 3
     Zp = GF(prime)
 
@@ -27,11 +30,12 @@ end
 
 # Same as the above, but uses multi-threading
 function compute_bases_threaded(system, batch_size::Int)
+    @assert iszero(batch_size % 4)
     prime = 2^30 + 3
     Zp = GF(prime)
 
     system_zp = map(f -> map_coefficients(c -> Zp(c), f), system)
-    trace, _ = Groebner.groebner_learn(system_zp)
+    trace, _ = Groebner.groebner_learn(system_zp, threaded=:yes)
 
     bases = Vector{typeof(system_zp)}(undef, batch_size)
     buffer_traces = map(_ -> deepcopy(trace), 1:nthreads())
@@ -40,8 +44,7 @@ function compute_bases_threaded(system, batch_size::Int)
     buffer_systems_zp =
         map(zp -> map(f -> map_coefficients(c -> zp(c), f), system), buffer_zp)
 
-    # :static guarantees that threadid() is constant within one iteraton
-    Base.Threads.@threads :static for j in 1:batch_size
+    Base.Threads.@threads for j in 1:batch_size
         t_id = threadid()
 
         system_zp = buffer_systems_zp[j]
@@ -58,11 +61,12 @@ end
 
 # Same as the above, but also uses batch size 4
 function compute_bases_threaded_batched(system, batch_size::Int)
+    @assert iszero(batch_size % 4)
     prime = 2^30 + 3
     Zp = GF(prime)
 
     system_zp = map(f -> map_coefficients(c -> Zp(c), f), system)
-    trace, _ = Groebner.groebner_learn(system_zp)
+    trace, _ = Groebner.groebner_learn(system_zp, threaded=:yes)
 
     bases = Vector{typeof(system_zp)}(undef, batch_size)
     buffer_traces = map(_ -> deepcopy(trace), 1:nthreads())
@@ -71,8 +75,7 @@ function compute_bases_threaded_batched(system, batch_size::Int)
     buffer_systems_zp =
         map(zp -> map(f -> map_coefficients(c -> zp(c), f), system), buffer_zp)
 
-    # :static guarantees that threadid() is constant within one iteraton
-    Base.Threads.@threads :static for j in 1:4:batch_size
+    Base.Threads.@threads for j in 1:4:batch_size
         t_id = threadid()
 
         system_zp = (
@@ -94,19 +97,11 @@ function compute_bases_threaded_batched(system, batch_size::Int)
     return bases
 end
 
-system = Groebner.katsuran(10, ground=ZZ, ordering=:degrevlex)
+system = Groebner.noonn(7, ground=ZZ, ordering=:degrevlex)
 begin
-    GC.gc()
-    @time bases_1 = compute_bases(system, 2^5)
-    GC.gc()
-    @time bases_2 = compute_bases_threaded(system, 2^5)
-    GC.gc()
-    @time bases_3 = compute_bases_threaded_batched(system, 2^5)
+    n = 2^8
+    @time bases_1 = compute_bases(system, n)
+    @time bases_2 = compute_bases_threaded(system, n)
+    @time bases_3 = compute_bases_threaded_batched(system, n)
     @assert all(bases_1 .== bases_2) && all(bases_2 .== bases_3)
 end;
-
-GC.gc()
-@profview compute_bases_threaded_batched(system, 2^8);
-
-# @btime compute_bases($system, 5, 8);
-# @btime compute_bases_threaded($system, 5, 8);
diff --git a/src/Groebner.jl b/src/Groebner.jl
@@ -101,10 +101,14 @@ import TimerOutputs
 const _threaded = Ref(true)
 
 function __init__()
+    _threaded[] = !(get(ENV, "GROEBNER_NO_THREADED", "") == "1")
+
     # Setup the global logger
-    update_logger(loglevel=Logging.Info)
+    _groebner_log_lock[] = ReentrantLock()
+    logger_update(loglevel=Logging.Info)
 
-    _threaded[] = !(get(ENV, "GROEBNER_NO_THREADED", "") == "1")
+    # Setup performance counters
+    _groebner_timer_lock[] = ReentrantLock()
 
     nothing
 end
@@ -121,7 +125,7 @@ include("utils/timeit.jl")
 # Provides the macro `@stat` for collecting statistics
 include("utils/statistics.jl")
 
-# include("utils/versioninfo.jl")
+include("utils/versioninfo.jl")
 
 # Minimalistic plotting with Unicode
 include("utils/plots.jl")

diff --git a/src/arithmetic/Zp.jl b/src/arithmetic/Zp.jl
@@ -8,7 +8,7 @@
 # the coefficient. Generally, AccumType should be picked so that the result of
 #   a + b*c
 # is representable exactly in AccumType for feasible  a,b,c of type CoeffType. 
-# One common example is AccumType = UInt64 and CoeffType = UInt32
+# One example is AccumType = UInt64 and CoeffType = UInt32 with prime moduli
 abstract type AbstractArithmetic{AccumType, CoeffType} end
 
 # All implementations of arithmetic in Zp are a subtype of this.
@@ -115,7 +115,7 @@ end
 # a modulo p (addition specialization)
 function mod_p(a::T, mod::SpecializedArithmeticZp{T, C, true}) where {T, C}
     x = _mul_high(a, mod.multiplier)
-    x = convert(T, convert(T, (convert(T, a - x) >>> 1)) + x)
+    x = convert(T, convert(T, (convert(T, a - x) >>> UInt8(1))) + x)
     a - (x >>> mod.shift) * mod.divisor
 end
 # a modulo p (no addition specialization)
@@ -176,7 +176,7 @@ end
 # a modulo p (addition specialization)
 function mod_p(a::A, mod::DelayedArithmeticZp{A, T, true}) where {A, T}
     x = _mul_high(a, mod.multiplier)
-    x = convert(A, convert(A, (convert(A, a - x) >>> 1)) + x)
+    x = convert(A, convert(A, (convert(A, a - x) >>> UInt8(1))) + x)
     a - (x >>> mod.shift) * mod.divisor
 end
 # a modulo p (no addition specialization)

diff --git a/src/f4/basis.jl b/src/f4/basis.jl
@@ -8,7 +8,7 @@
 ###
 # Pairset and Basis
 
-# Pairset is a list of critical pairs (SPairs).
+# Pairset is a list of critical pairs (CriticalPair).
 
 # Basis is a structure that stores a list of polynomials. Each polynomial is
 # represented with a sorted vector of monomials and a vector of coefficients.
@@ -19,7 +19,7 @@
 ###
 # Pairset
 
-# S-pair{Degree}, or, a pair of polynomials,
+# CriticalPair, or, a pair of polynomials,
 struct CriticalPair{Degree}
     # First polynomial given by its index in the basis array
     poly1::Int32
@@ -67,6 +67,41 @@ function pairset_resize_lcms_if_needed!(ps::Pairset, nfilled::Int)
     nothing
 end
 
+function pairset_find_smallest_degree_pair(ps::Pairset)
+    @invariant ps.load > 0 && length(ps.pairs) > 0
+    pairs = ps.pairs
+    pair_idx, pair_min_deg = 1, pairs[1].deg
+    @inbounds for i in 1:(ps.load)
+        if pairs[i].deg <= pair_min_deg
+            pair_min_deg = pairs[i].deg
+            pair_idx = i
+        end
+    end
+    pair_idx, pair_min_deg
+end
+
+function pairset_partition_by_degree!(ps::Pairset)
+    @invariant ps.load > 0
+    _, pair_min_deg = pairset_find_smallest_degree_pair(ps)
+
+    pairs = ps.pairs
+    i, j = 0, ps.load + 1
+    @inbounds while true
+        i += 1
+        j -= 1
+        while i <= ps.load && pairs[i].deg == pair_min_deg
+            i += 1
+        end
+        while j > 1 && pairs[j].deg > pair_min_deg
+            j -= 1
+        end
+        i >= j && break
+        pairs[i], pairs[j] = pairs[j], pairs[i]
+    end
+
+    i - 1
+end
+
 ###
 # Basis
 

diff --git a/src/f4/f4.jl b/src/f4/f4.jl
@@ -385,14 +385,16 @@ end
 # Returns N, the number of critical pairs of the smallest degree.
 # Sorts the critical pairs so that the first N pairs are the smallest.
 function pairset_lowest_degree_pairs!(pairset::Pairset)
-    sort_pairset_by_degree!(pairset, 1, pairset.load - 1)
-    ps = pairset.pairs
-    @inbounds min_deg = ps[1].deg
-    min_idx = 1
-    @inbounds while min_idx < pairset.load && ps[min_idx + 1].deg == min_deg
-        min_idx += 1
+    cnt = if true
+        n_lowest_degree_pairs = pairset_partition_by_degree!(pairset)
+        n_lowest_degree_pairs
+    else
+        sort_pairset_by_degree!(pairset, 1, pairset.load - 1)
+        pair_idx, _ = pairset_find_smallest_degree_pair(pairset)
+        pair_idx
     end
-    min_idx
+    @invariant cnt > 0
+    cnt
 end
 
 # Returns N, the number of critical pairs of the smallest sugar.
@@ -468,7 +470,8 @@ end
         end
     end
     npairs = min(npairs, maxpairs)
-    @assert npairs > 0
+    @invariant npairs > 0
+
     ps = pairset.pairs
     deg = ps[1].deg
 
@@ -512,8 +515,6 @@ function f4_add_critical_pairs_to_matrix!(
     ht::MonomialHashtable,
     symbol_ht::MonomialHashtable
 )
-
-    #
     matrix_reinitialize!(matrix, npairs)
     pairs = pairset.pairs
     uprows = matrix.upper_rows

diff --git a/src/f4/matrix.jl b/src/f4/matrix.jl
@@ -253,7 +253,7 @@ function matrix_resize_lower_part!(matrix::MacaulayMatrix, size::Int)
     resize!(matrix.lower_to_mult, size)
 end
 
-# Refresh and partially initialize the matrix
+# statistics_refresh and partially initialize the matrix
 function matrix_reinitialize!(matrix::MacaulayMatrix, size::Int)
     new_size = size * 2
     matrix_resize_upper_part!(matrix, new_size)

diff --git a/src/fglm/kbase.jl b/src/fglm/kbase.jl
@@ -20,7 +20,7 @@ function _kbase(polynomials, kws)
     ring, _ = set_monomial_ordering!(ring, var_to_index, monoms, coeffs, params)
     m, c = kbase_f4(ring, monoms, coeffs, params)
     res = convert_to_output(ring, polynomials, m, c, params)
-    print_performance_counters(params.statistics)
+    performance_counters_print(params.statistics)
     res
 end
 

diff --git a/src/groebner/groebner.jl b/src/groebner/groebner.jl
@@ -63,7 +63,7 @@ function _groebner(polynomials, kws::KeywordsHandler, representation)
     # Convert result back to the representation of input
     basis = convert_to_output(ring, polynomials, gbmonoms, gbcoeffs, params)
 
-    print_performance_counters(params.statistics)
+    performance_counters_print(params.statistics)
     print_statistics(params.statistics)
 
     basis

diff --git a/src/groebner/isgroebner.jl b/src/groebner/isgroebner.jl
@@ -15,7 +15,7 @@ function _isgroebner(polynomials, kws::KeywordsHandler)
     params = AlgorithmParameters(ring, polynomial_repr, kws)
     ring, _ = set_monomial_ordering!(ring, var_to_index, monoms, coeffs, params)
     res = _isgroebner(ring, monoms, coeffs, params)
-    print_performance_counters(params.statistics)
+    performance_counters_print(params.statistics)
     res
 end