Added timmings

This commit is contained in:
Alberto Ramos 2021-10-14 17:30:59 +02:00
parent 6f1a39b187
commit 79a6e21cbc
6 changed files with 139 additions and 110 deletions

View file

@ -12,7 +12,7 @@
module YM module YM
using CUDA, Random, StructArrays using CUDA, Random, StructArrays, TimerOutputs
using ..Space using ..Space
using ..Groups using ..Groups
using ..Fields using ..Fields
@ -48,25 +48,27 @@ struct YMworkspace{T}
rm # float of volume rm # float of volume
function YMworkspace(::Type{G}, ::Type{T}, lp::SpaceParm) where {G <: Group, T <: AbstractFloat} function YMworkspace(::Type{G}, ::Type{T}, lp::SpaceParm) where {G <: Group, T <: AbstractFloat}
if (G == SU2) @timeit "Allocating YMWorkspace" begin
GRP = SU2 if (G == SU2)
ALG = SU2alg GRP = SU2
f1 = vector_field(SU2alg{T}, lp) ALG = SU2alg
f2 = vector_field(SU2alg{T}, lp) f1 = vector_field(SU2alg{T}, lp)
mm = vector_field(SU2alg{T}, lp) f2 = vector_field(SU2alg{T}, lp)
u1 = vector_field(SU2{T}, lp) mm = vector_field(SU2alg{T}, lp)
end u1 = vector_field(SU2{T}, lp)
end
if (G == SU3) if (G == SU3)
GRP = SU3 GRP = SU3
ALG = SU3alg ALG = SU3alg
f1 = vector_field(SU3alg{T}, lp) f1 = vector_field(SU3alg{T}, lp)
f2 = vector_field(SU3alg{T}, lp) f2 = vector_field(SU3alg{T}, lp)
mm = vector_field(SU3alg{T}, lp) mm = vector_field(SU3alg{T}, lp)
u1 = vector_field(SU3{T}, lp) u1 = vector_field(SU3{T}, lp)
end
cs = scalar_field(Complex{T}, lp)
rs = scalar_field(T, lp)
end end
cs = scalar_field(Complex{T}, lp)
rs = scalar_field(T, lp)
return new{T}(GRP,ALG,T,f1, f2, mm, u1, cs, rs) return new{T}(GRP,ALG,T,f1, f2, mm, u1, cs, rs)
end end

View file

@ -305,9 +305,13 @@ the prefactor 1/g0^2, and assign it to the workspace force `ymws.frc1`
function force_gauge(ymws::YMworkspace, U, c0, lp::SpaceParm) function force_gauge(ymws::YMworkspace, U, c0, lp::SpaceParm)
if abs(c0-1) < 1.0E-10 if abs(c0-1) < 1.0E-10
force_wilson_pln!(ymws.frc1, ymws.frc2, U, lp::SpaceParm) @timeit "Wilson gauge force" begin
force_wilson_pln!(ymws.frc1, ymws.frc2, U, lp::SpaceParm)
end
else else
force_wilson_pln!(ymws.frc1, ymws.frc2, U, lp::SpaceParm, c0) @timeit "Improved gauge force" begin
force_wilson_pln!(ymws.frc1, ymws.frc2, U, lp::SpaceParm, c0)
end
end end
return nothing return nothing
end end

View file

@ -12,17 +12,21 @@
function randomize!(f, lp::SpaceParm, ymws::YMworkspace) function randomize!(f, lp::SpaceParm, ymws::YMworkspace)
if ymws.ALG == SU2alg if ymws.ALG == SU2alg
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz) @timeit "Randomize SU(2) algebra field" begin
CUDA.@sync begin m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU2!(f,m,lp) CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU2!(f,m,lp)
end
end end
return nothing return nothing
end end
if ymws.ALG == SU3alg if ymws.ALG == SU3alg
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,8,lp.rsz) @timeit "Randomize SU(3) algebra field" begin
CUDA.@sync begin m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,8,lp.rsz)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU3!(f,m,lp) CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU3!(f,m,lp)
end
end end
return nothing return nothing
end end

View file

@ -63,12 +63,14 @@ end
function flw_euler(U, ns, eps, c0, lp::SpaceParm, ymws::YMworkspace; add_zth=false) function flw_euler(U, ns, eps, c0, lp::SpaceParm, ymws::YMworkspace; add_zth=false)
for i in 1:ns @timeit "Integrating flow equations (Euler)" begin
force_gauge(ymws, U, c0, lp) for i in 1:ns
if add_zth force_gauge(ymws, U, c0, lp)
add_zth_term(ymws::YMworkspace, U, lp) if add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
U .= expm.(U, ymws.frc1, 2*eps)
end end
U .= expm.(U, ymws.frc1, 2*eps)
end end
return nothing return nothing
@ -76,31 +78,33 @@ end
function flw_rk3(U, ns, eps, c0, lp::SpaceParm, ymws::YMworkspace; add_zth=false) function flw_rk3(U, ns, eps, c0, lp::SpaceParm, ymws::YMworkspace; add_zth=false)
for i in 1:ns @timeit "Integrating flow equations (RK3)" begin
e0 = eps/2 for i in 1:ns
force_gauge(ymws, U, c0, lp) e0 = eps/2
if add_zth force_gauge(ymws, U, c0, lp)
add_zth_term(ymws::YMworkspace, U, lp) if add_zth
end add_zth_term(ymws::YMworkspace, U, lp)
ymws.mom .= ymws.frc1 end
U .= expm.(U, ymws.mom, e0) ymws.mom .= ymws.frc1
U .= expm.(U, ymws.mom, e0)
e0 = -34*eps/36 e0 = -34*eps/36
e1 = 16*eps/9 e1 = 16*eps/9
force_gauge(ymws, U, c0, lp) force_gauge(ymws, U, c0, lp)
if add_zth if add_zth
add_zth_term(ymws::YMworkspace, U, lp) add_zth_term(ymws::YMworkspace, U, lp)
end end
ymws.mom .= e0.*ymws.mom .+ e1.*ymws.frc1 ymws.mom .= e0.*ymws.mom .+ e1.*ymws.frc1
U .= expm.(U, ymws.mom) U .= expm.(U, ymws.mom)
e1 = 6*eps/4 e1 = 6*eps/4
force_gauge(ymws, U, c0, lp) force_gauge(ymws, U, c0, lp)
if add_zth if add_zth
add_zth_term(ymws::YMworkspace, U, lp) add_zth_term(ymws::YMworkspace, U, lp)
end
ymws.mom .= e1.*ymws.frc1 .- ymws.mom
U .= expm.(U, ymws.mom)
end end
ymws.mom .= e1.*ymws.frc1 .- ymws.mom
U .= expm.(U, ymws.mom)
end end
return nothing return nothing

View file

@ -18,12 +18,16 @@ Returns the value of the gauge plaquette action for the configuration U. The par
function gauge_action(U, lp::SpaceParm, gp::GaugeParm{T}, ymws::YMworkspace{T}) where T <: AbstractFloat function gauge_action(U, lp::SpaceParm, gp::GaugeParm{T}, ymws::YMworkspace{T}) where T <: AbstractFloat
if abs(gp.c0-1) < 1.0E-10 if abs(gp.c0-1) < 1.0E-10
CUDA.@sync begin @timeit "Wilson gauge action" begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp) CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp)
end
end end
else else
CUDA.@sync begin @timeit "Improved gauge action" begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_impr!(ymws.cm, U, gp.c0, (1-gp.c0)/8, lp) CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_impr!(ymws.cm, U, gp.c0, (1-gp.c0)/8, lp)
end
end end
end end
S = gp.beta*( prod(lp.iL)*lp.npls*(gp.c0 + (1-gp.c0)/8) - S = gp.beta*( prod(lp.iL)*lp.npls*(gp.c0 + (1-gp.c0)/8) -
@ -34,71 +38,81 @@ end
function plaquette(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace) function plaquette(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
CUDA.@sync begin @timeit "Plaquette measurement" begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp) CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp)
end
end end
return CUDA.mapreduce(real, +, ymws.cm)/(prod(lp.iL)*lp.npls) return CUDA.mapreduce(real, +, ymws.cm)/(prod(lp.iL)*lp.npls)
end end
function hamiltonian(mom, U, lp, gp, ymws) function hamiltonian(mom, U, lp, gp, ymws)
K = CUDA.mapreduce(norm2, +, mom)/2 @timeit "Computing Hamiltonian" begin
V = gauge_action(U, lp, gp, ymws) K = CUDA.mapreduce(norm2, +, mom)/2
V = gauge_action(U, lp, gp, ymws)
end
return K+V return K+V
end end
function HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false) where T function HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false) where T
int = omf4(T, eps, ns) @timeit "HMC trayectory" begin
ymws.U1 .= U
randomize!(ymws.mom, lp, ymws) int = omf4(T, eps, ns)
hini = hamiltonian(ymws.mom, U, lp, gp, ymws) ymws.U1 .= U
MD!(ymws.mom, U, int, lp, gp, ymws) randomize!(ymws.mom, lp, ymws)
hini = hamiltonian(ymws.mom, U, lp, gp, ymws)
dh = hamiltonian(ymws.mom, U, lp, gp, ymws) - hini MD!(ymws.mom, U, int, lp, gp, ymws)
pacc = exp(-dh)
acc = true dh = hamiltonian(ymws.mom, U, lp, gp, ymws) - hini
if (noacc) pacc = exp(-dh)
return dh, acc
end
if (pacc < 1.0) acc = true
r = rand() if (noacc)
if (pacc < r) return dh, acc
U .= ymws.U1
acc = false
end end
end
if (pacc < 1.0)
r = rand()
if (pacc < r)
U .= ymws.U1
acc = false
end
end
end
return dh, acc return dh, acc
end end
function MD!(mom, U, int::IntrScheme{NI, T}, lp::SpaceParm, gp::GaugeParm{T}, ymws::YMworkspace{T}) where {NI, T <: AbstractFloat} function MD!(mom, U, int::IntrScheme{NI, T}, lp::SpaceParm, gp::GaugeParm{T}, ymws::YMworkspace{T}) where {NI, T <: AbstractFloat}
ee = int.eps*gp.beta/gp.ng @timeit "MD evolution" begin
force_gauge(ymws, U, gp.c0, lp)
mom .= mom .+ (int.r[1]*ee) .* ymws.frc1
for i in 1:int.ns
k = 2
off = 1
for j in 1:NI-1
U .= expm.(U, mom, int.eps*int.r[k])
if k == NI
off = -1
end
k += off
force_gauge(ymws, U, gp.c0, lp) ee = int.eps*gp.beta/gp.ng
if (i < int.ns) && (k == 1) force_gauge(ymws, U, gp.c0, lp)
mom .= mom .+ (2*int.r[k]*ee) .* ymws.frc1 mom .= mom .+ (int.r[1]*ee) .* ymws.frc1
else for i in 1:int.ns
mom .= mom .+ (int.r[k]*ee) .* ymws.frc1 k = 2
off = 1
for j in 1:NI-1
U .= expm.(U, mom, int.eps*int.r[k])
if k == NI
off = -1
end
k += off
force_gauge(ymws, U, gp.c0, lp)
if (i < int.ns) && (k == 1)
mom .= mom .+ (2*int.r[k]*ee) .* ymws.frc1
else
mom .= mom .+ (int.r[k]*ee) .* ymws.frc1
end
k += off
end end
k += off
end end
end end

View file

@ -1,4 +1,4 @@
using CUDA, Logging, StructArrays, Random using CUDA, Logging, StructArrays, Random, TimerOutputs
CUDA.allowscalar(false) CUDA.allowscalar(false)
import Pkg import Pkg
@ -7,7 +7,7 @@ Pkg.activate("/lhome/ific/a/alramos/s.images/julia/workspace/LatticeGPU")
using LatticeGPU using LatticeGPU
# Set lattice/block size # Set lattice/block size
lp = SpaceParm{4}((32,32,32,32), (4,4,4,4)) lp = SpaceParm{4}((16,16,16,16), (4,4,4,4))
println("Space Parameters: ", lp) println("Space Parameters: ", lp)
# Seed RNG # Seed RNG
@ -93,5 +93,6 @@ println("Time for 100 steps of RK3 flow integrator: ")
println("Action: ", gauge_action(U, lp, gp, ymws)) println("Action: ", gauge_action(U, lp, gp, ymws))
println("## END improved action/flow measurements") println("## END improved action/flow measurements")
print_timer(linechars = :ascii)
println("END") println("END")