Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adapt auxiliary math functions for Float32 #2048

Merged
merged 11 commits into from
Aug 22, 2024
66 changes: 66 additions & 0 deletions examples/p4est_2d_dgsem/elixir_euler_shockcapturing_ec_float32.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@

using OrdinaryDiffEq
using Trixi

###############################################################################
# semidiscretization of the compressible Euler equations

equations = CompressibleEulerEquations2D(1.4f0)

initial_condition = initial_condition_weak_blast_wave

surface_flux = flux_ranocha
volume_flux = flux_ranocha
polydeg = 4
basis = LobattoLegendreBasis(Float32, polydeg)
indicator_sc = IndicatorHennemannGassner(equations, basis,
alpha_max = 1.0f0,
alpha_min = 0.001f0,
alpha_smooth = true,
variable = density_pressure)
volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
volume_flux_dg = volume_flux,
volume_flux_fv = surface_flux)

solver = DGSEM(polydeg = polydeg, surface_flux = surface_flux,
volume_integral = volume_integral, RealT = Float32)

###############################################################################

coordinates_min = (-1.0f0, -1.0f0)
coordinates_max = (+1.0f0, +1.0f0)

trees_per_dimension = (4, 4)
mesh = P4estMesh(trees_per_dimension,
polydeg = 4, initial_refinement_level = 2,
coordinates_min = coordinates_min, coordinates_max = coordinates_max,
periodicity = true, RealT = Float32)

semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)

###############################################################################
# ODE solvers, callbacks etc.

tspan = (0.0f0, 2.0f0)
ode = semidiscretize(semi, tspan)

summary_callback = SummaryCallback()

analysis_interval = 100
analysis_callback = AnalysisCallback(semi, interval = analysis_interval)

alive_callback = AliveCallback(analysis_interval = analysis_interval)

stepsize_callback = StepsizeCallback(cfl = 1.0f0)

callbacks = CallbackSet(summary_callback,
analysis_callback,
alive_callback,
stepsize_callback)
###############################################################################
# run the simulation

sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
dt = 1.0f0, # solve needs some value here but it will be overwritten by the stepsize_callback
save_everystep = false, callback = callbacks);
summary_callback() # print the timer summary
53 changes: 34 additions & 19 deletions src/auxiliary/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ end
end

"""
ln_mean(x, y)
Trixi.ln_mean(x::Real, y::Real)
ranocha marked this conversation as resolved.
Show resolved Hide resolved

Compute the logarithmic mean

Expand Down Expand Up @@ -171,18 +171,24 @@ Given ε = 1.0e-4, we use the following algorithm.
for Intel, AMD, and VIA CPUs.
[https://www.agner.org/optimize/instruction_tables.pdf](https://www.agner.org/optimize/instruction_tables.pdf)
"""
@inline function ln_mean(x, y)
epsilon_f2 = 1.0e-4
@inline ln_mean(x::Real, y::Real) = ln_mean(promote(x, y)...)

@inline function ln_mean(x::RealT, y::RealT) where {RealT <: Real}
epsilon_f2 = convert(RealT, 1.0e-4)
f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
if f2 < epsilon_f2
return (x + y) / @evalpoly(f2, 2, 2/3, 2/5, 2/7)
return (x + y) / @evalpoly(f2,
2,
convert(RealT, 2 / 3),
convert(RealT, 2 / 5),
convert(RealT, 2 / 7))
else
return (y - x) / log(y / x)
end
end

"""
inv_ln_mean(x, y)
Trixi.inv_ln_mean(x::Real, y::Real)

Compute the inverse `1 / ln_mean(x, y)` of the logarithmic mean
[`ln_mean`](@ref).
Expand All @@ -191,11 +197,17 @@ This function may be used to increase performance where the inverse of the
logarithmic mean is needed, by replacing a (slow) division by a (fast)
multiplication.
"""
@inline function inv_ln_mean(x, y)
epsilon_f2 = 1.0e-4
@inline inv_ln_mean(x::Real, y::Real) = inv_ln_mean(promote(x, y)...)

@inline function inv_ln_mean(x::RealT, y::RealT) where {RealT <: Real}
epsilon_f2 = convert(RealT, 1.0e-4)
f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
if f2 < epsilon_f2
return @evalpoly(f2, 2, 2/3, 2/5, 2/7) / (x + y)
return @evalpoly(f2,
2,
convert(RealT, 2 / 3),
convert(RealT, 2 / 5),
convert(RealT, 2 / 7)) / (x + y)
else
return log(y / x) / (y - x)
end
Expand Down Expand Up @@ -273,7 +285,7 @@ end
# [Fortran](https://godbolt.org/z/Yrsa1js7P)
# or [C++](https://godbolt.org/z/674G7Pccv).
"""
max(x, y, ...)
Trixi.max(x, y, ...)

Return the maximum of the arguments. See also the `maximum` function to take
the maximum element from a collection.
Expand All @@ -292,7 +304,7 @@ julia> max(2, 5, 1)
@inline max(args...) = @fastmath max(args...)

"""
min(x, y, ...)
Trixi.min(x, y, ...)

Return the minimum of the arguments. See also the `minimum` function to take
the minimum element from a collection.
Expand All @@ -311,7 +323,7 @@ julia> min(2, 5, 1)
@inline min(args...) = @fastmath min(args...)

"""
positive_part(x)
Trixi.positive_part(x)

Return `x` if `x` is positive, else zero. In other words, return
`(x + abs(x)) / 2` for real numbers `x`.
Expand All @@ -321,7 +333,7 @@ Return `x` if `x` is positive, else zero. In other words, return
end

"""
negative_part(x)
Trixi.negative_part(x)

Return `x` if `x` is negative, else zero. In other words, return
`(x - abs(x)) / 2` for real numbers `x`.
Expand All @@ -331,7 +343,7 @@ Return `x` if `x` is negative, else zero. In other words, return
end

"""
stolarsky_mean(x, y, gamma)
Trixi.stolarsky_mean(x::Real, y:Real, gamma::Real)

Compute an instance of a weighted Stolarsky mean of the form

Expand Down Expand Up @@ -382,15 +394,18 @@ Given ε = 1.0e-4, we use the following algorithm.
for Intel, AMD, and VIA CPUs.
[https://www.agner.org/optimize/instruction_tables.pdf](https://www.agner.org/optimize/instruction_tables.pdf)
"""
@inline function stolarsky_mean(x, y, gamma)
epsilon_f2 = 1.0e-4
@inline stolarsky_mean(x::Real, y::Real, gamma::Real) = stolarsky_mean(promote(x, y,
gamma)...)

@inline function stolarsky_mean(x::RealT, y::RealT, gamma::RealT) where {RealT <: Real}
epsilon_f2 = convert(RealT, 1.0e-4)
f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
if f2 < epsilon_f2
# convenience coefficients
c1 = (1 / 3) * (gamma - 2)
c2 = -(1 / 15) * (gamma + 1) * (gamma - 3) * c1
c3 = -(1 / 21) * (2 * gamma * (gamma - 2) - 9) * c2
return 0.5 * (x + y) * @evalpoly(f2, 1, c1, c2, c3)
c1 = convert(RealT, 1 / 3) * (gamma - 2)
c2 = convert(RealT, -1 / 15) * (gamma + 1) * (gamma - 3) * c1
c3 = convert(RealT, -1 / 21) * (2 * gamma * (gamma - 2) - 9) * c2
return 0.5f0 * (x + y) * @evalpoly(f2, 1, c1, c2, c3)
else
return (gamma - 1) / gamma * (y^gamma - x^gamma) /
(y^(gamma - 1) - x^(gamma - 1))
Expand Down
6 changes: 5 additions & 1 deletion src/solvers/dgmulti/shock_capturing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,14 @@ function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha,
mesh::DGMultiMesh, dg::DGMulti)
empty!(element_ids_dg)
empty!(element_ids_dgfv)
# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
RealT = eltype(alpha)
atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0))

for element in eachelement(mesh, dg)
# Clip blending factor for values close to zero (-> pure DG)
dg_only = isapprox(alpha[element], 0, atol = 1e-12)
dg_only = isapprox(alpha[element], 0, atol = atol)
if dg_only
push!(element_ids_dg, element)
else
Expand Down
2 changes: 1 addition & 1 deletion src/solvers/dgsem_p4est/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ end
# the interpretation of global SBP operators coupled discontinuously via
# central fluxes/SATs
surface_flux_values[v, node_index, direction_index, element_index] = flux_[v] +
0.5 *
0.5f0 *
noncons_[v]
end
end
Expand Down
6 changes: 5 additions & 1 deletion src/solvers/dgsem_tree/dg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,14 @@ function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha,
cache)
empty!(element_ids_dg)
empty!(element_ids_dgfv)
# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
RealT = eltype(alpha)
atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0))

for element in eachelement(dg, cache)
# Clip blending factor for values close to zero (-> pure DG)
dg_only = isapprox(alpha[element], 0, atol = 1e-12)
dg_only = isapprox(alpha[element], 0, atol = atol)
if dg_only
push!(element_ids_dg, element)
else
Expand Down
2 changes: 2 additions & 0 deletions src/solvers/dgsem_unstructured/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ function create_cache(mesh::UnstructuredMesh2D, equations,

# perform a check on the sufficient metric identities condition for free-stream preservation
# and halt computation if it fails
# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0))
if !isapprox(max_discrete_metric_identities(dg, cache), 0, atol = atol)
error("metric terms fail free-stream preservation check with maximum error $(max_discrete_metric_identities(dg, cache))")
Expand Down
28 changes: 28 additions & 0 deletions test/test_p4est_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,34 @@ end
end
end

@trixi_testset "elixir_euler_shockcapturing_ec_float32.jl" begin
@test_trixi_include(joinpath(EXAMPLES_DIR,
"elixir_euler_shockcapturing_ec_float32.jl"),
l2=[
0.09539953f0,
0.10563527f0,
0.105637245f0,
0.3507514f0,
],
linf=[
0.2942562f0,
0.4079147f0,
0.3972956f0,
1.0810697f0,
],
andrewwinters5000 marked this conversation as resolved.
Show resolved Hide resolved
tspan=(0.0f0, 1.0f0),
rtol=10 * sqrt(eps(Float32)), # to make CI pass
RealT=Float32)
# Ensure that we do not have excessive memory allocations
# (e.g., from type instabilities)
let
t = sol.t[end]
u_ode = sol.u[end]
du_ode = similar(u_ode)
@test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
end
end

@trixi_testset "elixir_euler_sedov.jl" begin
@test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov.jl"),
l2=[
Expand Down
Loading
Loading