Skip to content

Commit

Permalink
allow repeats
Browse files Browse the repository at this point in the history
  • Loading branch information
vchuravy committed May 10, 2020
1 parent b7a119a commit 513617a
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 22 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ version = "0.1.0"

[deps]
LinuxPerf = "b4c46c6c-4fb0-484d-a11a-41bc3392d094"
Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
5 changes: 2 additions & 3 deletions examples/sum.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ using Roofline
end

function setup(N)
data = rand(N)
data = rand(Float32, N)
return (data,)
end

bench = Roofline.RooflineBench(experiment, setup)
bench(2^27)
bench = Roofline.benchmark(experiment, setup, 2^27)
40 changes: 21 additions & 19 deletions src/Roofline.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,34 +6,36 @@ using LinuxPerf
Base.inferencebarrier(nothing)::Nothing
end

struct RooflineBench{F, S}
func::F
setup::S
struct RooflineBench
counters::Vector{LinuxPerf.Counter}
times::Vector{UInt64}

RooflineBench(f::F, s::S) where {F, S} = new{F, S}(f, s, LinuxPerf.Counters[], UInt64[])
repeats::Int64
end

function (bench::RooflineBench{F, S})(args...) where {F, S}
val = bench.func(bench.setup(args...)...)
function benchmark(func::F, setup::S, args...; repeats=10) where {F, S}
counters = LinuxPerf.Counter[]
times = UInt64[]
val = func(setup(args...)...)
escape(val)

for benchPerf in (intel_roofline_bw_use(),
intel_roofline_memory(),
intel_roofline_double(),
intel_roofline_single(),
)
data = bench.setup(args...)
LinuxPerf.enable!(benchPerf)
start = Base.time_ns()
val = bench.func(data...)
stop = Base.time_ns()
LinuxPerf.disable!(benchPerf)
escape(val)
push!(bench.times, stop-start)
append!(bench.counters, LinuxPerf.counters(benchPerf).counters)
data = setup(args...)
for i in 1:repeats
LinuxPerf.enable!(benchPerf)
start = Base.time_ns()
val = func(data...)
stop = Base.time_ns()
LinuxPerf.disable!(benchPerf)
escape(val)
push!(times, stop-start)
end
append!(counters, LinuxPerf.counters(benchPerf).counters)
end
return RooflineBench(counters, times, repeats)
end

function Base.show(io::IO, bench::RooflineBench)
Expand All @@ -43,11 +45,11 @@ function Base.show(io::IO, bench::RooflineBench)
show(io, "Mean duration $(t)s")
println(io)
results = summarize_intel(bench)
show(io, "Double GFLOP/s $(results.dflops/t * 1e-9) ")
show(io, "Double GFLOP/s $(results.dflops/t/bench.repeats * 1e-9) ")
println(io)
show(io, "Single GFLOP/s $(results.sflops/t * 1e-9)")
show(io, "Single GFLOP/s $(results.sflops/t/bench.repeats * 1e-9)")
println(io)
show(io, "DRAM BW GB/s $(64*results.bw_ops / t * 1e-9)") # cachelines to GB/s
show(io, "DRAM BW GB/s $(64*results.bw_ops / t / bench.repeats* 1e-9)") # cachelines to GB/s
println(io)
show(io, "MemOP per FLOP: $(results.mops / (results.sflops + results.dflops))")
println(io)
Expand Down

0 comments on commit 513617a

Please sign in to comment.