mirror of
https://igit.ific.uv.es/alramos/latticegpu.jl.git
synced 2025-05-14 19:23:42 +02:00
Added timmings
This commit is contained in:
parent
6f1a39b187
commit
79a6e21cbc
6 changed files with 139 additions and 110 deletions
42
src/YM/YM.jl
42
src/YM/YM.jl
|
@ -12,7 +12,7 @@
|
||||||
|
|
||||||
module YM
|
module YM
|
||||||
|
|
||||||
using CUDA, Random, StructArrays
|
using CUDA, Random, StructArrays, TimerOutputs
|
||||||
using ..Space
|
using ..Space
|
||||||
using ..Groups
|
using ..Groups
|
||||||
using ..Fields
|
using ..Fields
|
||||||
|
@ -48,26 +48,28 @@ struct YMworkspace{T}
|
||||||
rm # float of volume
|
rm # float of volume
|
||||||
function YMworkspace(::Type{G}, ::Type{T}, lp::SpaceParm) where {G <: Group, T <: AbstractFloat}
|
function YMworkspace(::Type{G}, ::Type{T}, lp::SpaceParm) where {G <: Group, T <: AbstractFloat}
|
||||||
|
|
||||||
if (G == SU2)
|
@timeit "Allocating YMWorkspace" begin
|
||||||
GRP = SU2
|
if (G == SU2)
|
||||||
ALG = SU2alg
|
GRP = SU2
|
||||||
f1 = vector_field(SU2alg{T}, lp)
|
ALG = SU2alg
|
||||||
f2 = vector_field(SU2alg{T}, lp)
|
f1 = vector_field(SU2alg{T}, lp)
|
||||||
mm = vector_field(SU2alg{T}, lp)
|
f2 = vector_field(SU2alg{T}, lp)
|
||||||
u1 = vector_field(SU2{T}, lp)
|
mm = vector_field(SU2alg{T}, lp)
|
||||||
|
u1 = vector_field(SU2{T}, lp)
|
||||||
|
end
|
||||||
|
|
||||||
|
if (G == SU3)
|
||||||
|
GRP = SU3
|
||||||
|
ALG = SU3alg
|
||||||
|
f1 = vector_field(SU3alg{T}, lp)
|
||||||
|
f2 = vector_field(SU3alg{T}, lp)
|
||||||
|
mm = vector_field(SU3alg{T}, lp)
|
||||||
|
u1 = vector_field(SU3{T}, lp)
|
||||||
|
end
|
||||||
|
cs = scalar_field(Complex{T}, lp)
|
||||||
|
rs = scalar_field(T, lp)
|
||||||
end
|
end
|
||||||
|
|
||||||
if (G == SU3)
|
|
||||||
GRP = SU3
|
|
||||||
ALG = SU3alg
|
|
||||||
f1 = vector_field(SU3alg{T}, lp)
|
|
||||||
f2 = vector_field(SU3alg{T}, lp)
|
|
||||||
mm = vector_field(SU3alg{T}, lp)
|
|
||||||
u1 = vector_field(SU3{T}, lp)
|
|
||||||
end
|
|
||||||
cs = scalar_field(Complex{T}, lp)
|
|
||||||
rs = scalar_field(T, lp)
|
|
||||||
|
|
||||||
return new{T}(GRP,ALG,T,f1, f2, mm, u1, cs, rs)
|
return new{T}(GRP,ALG,T,f1, f2, mm, u1, cs, rs)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -305,9 +305,13 @@ the prefactor 1/g0^2, and assign it to the workspace force `ymws.frc1`
|
||||||
function force_gauge(ymws::YMworkspace, U, c0, lp::SpaceParm)
|
function force_gauge(ymws::YMworkspace, U, c0, lp::SpaceParm)
|
||||||
|
|
||||||
if abs(c0-1) < 1.0E-10
|
if abs(c0-1) < 1.0E-10
|
||||||
force_wilson_pln!(ymws.frc1, ymws.frc2, U, lp::SpaceParm)
|
@timeit "Wilson gauge force" begin
|
||||||
|
force_wilson_pln!(ymws.frc1, ymws.frc2, U, lp::SpaceParm)
|
||||||
|
end
|
||||||
else
|
else
|
||||||
force_wilson_pln!(ymws.frc1, ymws.frc2, U, lp::SpaceParm, c0)
|
@timeit "Improved gauge force" begin
|
||||||
|
force_wilson_pln!(ymws.frc1, ymws.frc2, U, lp::SpaceParm, c0)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
|
@ -12,17 +12,21 @@
|
||||||
function randomize!(f, lp::SpaceParm, ymws::YMworkspace)
|
function randomize!(f, lp::SpaceParm, ymws::YMworkspace)
|
||||||
|
|
||||||
if ymws.ALG == SU2alg
|
if ymws.ALG == SU2alg
|
||||||
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz)
|
@timeit "Randomize SU(2) algebra field" begin
|
||||||
CUDA.@sync begin
|
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz)
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU2!(f,m,lp)
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU2!(f,m,lp)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
if ymws.ALG == SU3alg
|
if ymws.ALG == SU3alg
|
||||||
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,8,lp.rsz)
|
@timeit "Randomize SU(3) algebra field" begin
|
||||||
CUDA.@sync begin
|
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,8,lp.rsz)
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU3!(f,m,lp)
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU3!(f,m,lp)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
|
@ -63,12 +63,14 @@ end
|
||||||
|
|
||||||
function flw_euler(U, ns, eps, c0, lp::SpaceParm, ymws::YMworkspace; add_zth=false)
|
function flw_euler(U, ns, eps, c0, lp::SpaceParm, ymws::YMworkspace; add_zth=false)
|
||||||
|
|
||||||
for i in 1:ns
|
@timeit "Integrating flow equations (Euler)" begin
|
||||||
force_gauge(ymws, U, c0, lp)
|
for i in 1:ns
|
||||||
if add_zth
|
force_gauge(ymws, U, c0, lp)
|
||||||
add_zth_term(ymws::YMworkspace, U, lp)
|
if add_zth
|
||||||
|
add_zth_term(ymws::YMworkspace, U, lp)
|
||||||
|
end
|
||||||
|
U .= expm.(U, ymws.frc1, 2*eps)
|
||||||
end
|
end
|
||||||
U .= expm.(U, ymws.frc1, 2*eps)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
|
@ -76,31 +78,33 @@ end
|
||||||
|
|
||||||
function flw_rk3(U, ns, eps, c0, lp::SpaceParm, ymws::YMworkspace; add_zth=false)
|
function flw_rk3(U, ns, eps, c0, lp::SpaceParm, ymws::YMworkspace; add_zth=false)
|
||||||
|
|
||||||
for i in 1:ns
|
@timeit "Integrating flow equations (RK3)" begin
|
||||||
e0 = eps/2
|
for i in 1:ns
|
||||||
force_gauge(ymws, U, c0, lp)
|
e0 = eps/2
|
||||||
if add_zth
|
force_gauge(ymws, U, c0, lp)
|
||||||
add_zth_term(ymws::YMworkspace, U, lp)
|
if add_zth
|
||||||
|
add_zth_term(ymws::YMworkspace, U, lp)
|
||||||
|
end
|
||||||
|
ymws.mom .= ymws.frc1
|
||||||
|
U .= expm.(U, ymws.mom, e0)
|
||||||
|
|
||||||
|
e0 = -34*eps/36
|
||||||
|
e1 = 16*eps/9
|
||||||
|
force_gauge(ymws, U, c0, lp)
|
||||||
|
if add_zth
|
||||||
|
add_zth_term(ymws::YMworkspace, U, lp)
|
||||||
|
end
|
||||||
|
ymws.mom .= e0.*ymws.mom .+ e1.*ymws.frc1
|
||||||
|
U .= expm.(U, ymws.mom)
|
||||||
|
|
||||||
|
e1 = 6*eps/4
|
||||||
|
force_gauge(ymws, U, c0, lp)
|
||||||
|
if add_zth
|
||||||
|
add_zth_term(ymws::YMworkspace, U, lp)
|
||||||
|
end
|
||||||
|
ymws.mom .= e1.*ymws.frc1 .- ymws.mom
|
||||||
|
U .= expm.(U, ymws.mom)
|
||||||
end
|
end
|
||||||
ymws.mom .= ymws.frc1
|
|
||||||
U .= expm.(U, ymws.mom, e0)
|
|
||||||
|
|
||||||
e0 = -34*eps/36
|
|
||||||
e1 = 16*eps/9
|
|
||||||
force_gauge(ymws, U, c0, lp)
|
|
||||||
if add_zth
|
|
||||||
add_zth_term(ymws::YMworkspace, U, lp)
|
|
||||||
end
|
|
||||||
ymws.mom .= e0.*ymws.mom .+ e1.*ymws.frc1
|
|
||||||
U .= expm.(U, ymws.mom)
|
|
||||||
|
|
||||||
e1 = 6*eps/4
|
|
||||||
force_gauge(ymws, U, c0, lp)
|
|
||||||
if add_zth
|
|
||||||
add_zth_term(ymws::YMworkspace, U, lp)
|
|
||||||
end
|
|
||||||
ymws.mom .= e1.*ymws.frc1 .- ymws.mom
|
|
||||||
U .= expm.(U, ymws.mom)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
|
|
116
src/YM/YMhmc.jl
116
src/YM/YMhmc.jl
|
@ -18,12 +18,16 @@ Returns the value of the gauge plaquette action for the configuration U. The par
|
||||||
function gauge_action(U, lp::SpaceParm, gp::GaugeParm{T}, ymws::YMworkspace{T}) where T <: AbstractFloat
|
function gauge_action(U, lp::SpaceParm, gp::GaugeParm{T}, ymws::YMworkspace{T}) where T <: AbstractFloat
|
||||||
|
|
||||||
if abs(gp.c0-1) < 1.0E-10
|
if abs(gp.c0-1) < 1.0E-10
|
||||||
CUDA.@sync begin
|
@timeit "Wilson gauge action" begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp)
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
CUDA.@sync begin
|
@timeit "Improved gauge action" begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_impr!(ymws.cm, U, gp.c0, (1-gp.c0)/8, lp)
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_impr!(ymws.cm, U, gp.c0, (1-gp.c0)/8, lp)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
S = gp.beta*( prod(lp.iL)*lp.npls*(gp.c0 + (1-gp.c0)/8) -
|
S = gp.beta*( prod(lp.iL)*lp.npls*(gp.c0 + (1-gp.c0)/8) -
|
||||||
|
@ -34,71 +38,81 @@ end
|
||||||
|
|
||||||
function plaquette(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
|
function plaquette(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
|
||||||
|
|
||||||
CUDA.@sync begin
|
@timeit "Plaquette measurement" begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp)
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return CUDA.mapreduce(real, +, ymws.cm)/(prod(lp.iL)*lp.npls)
|
return CUDA.mapreduce(real, +, ymws.cm)/(prod(lp.iL)*lp.npls)
|
||||||
end
|
end
|
||||||
|
|
||||||
function hamiltonian(mom, U, lp, gp, ymws)
|
function hamiltonian(mom, U, lp, gp, ymws)
|
||||||
K = CUDA.mapreduce(norm2, +, mom)/2
|
@timeit "Computing Hamiltonian" begin
|
||||||
V = gauge_action(U, lp, gp, ymws)
|
K = CUDA.mapreduce(norm2, +, mom)/2
|
||||||
|
V = gauge_action(U, lp, gp, ymws)
|
||||||
|
end
|
||||||
|
|
||||||
return K+V
|
return K+V
|
||||||
end
|
end
|
||||||
|
|
||||||
function HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false) where T
|
function HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false) where T
|
||||||
|
|
||||||
int = omf4(T, eps, ns)
|
@timeit "HMC trayectory" begin
|
||||||
ymws.U1 .= U
|
|
||||||
|
int = omf4(T, eps, ns)
|
||||||
randomize!(ymws.mom, lp, ymws)
|
ymws.U1 .= U
|
||||||
hini = hamiltonian(ymws.mom, U, lp, gp, ymws)
|
|
||||||
|
randomize!(ymws.mom, lp, ymws)
|
||||||
MD!(ymws.mom, U, int, lp, gp, ymws)
|
hini = hamiltonian(ymws.mom, U, lp, gp, ymws)
|
||||||
|
|
||||||
dh = hamiltonian(ymws.mom, U, lp, gp, ymws) - hini
|
MD!(ymws.mom, U, int, lp, gp, ymws)
|
||||||
pacc = exp(-dh)
|
|
||||||
|
dh = hamiltonian(ymws.mom, U, lp, gp, ymws) - hini
|
||||||
acc = true
|
pacc = exp(-dh)
|
||||||
if (noacc)
|
|
||||||
return dh, acc
|
acc = true
|
||||||
end
|
if (noacc)
|
||||||
|
return dh, acc
|
||||||
if (pacc < 1.0)
|
|
||||||
r = rand()
|
|
||||||
if (pacc < r)
|
|
||||||
U .= ymws.U1
|
|
||||||
acc = false
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if (pacc < 1.0)
|
||||||
|
r = rand()
|
||||||
|
if (pacc < r)
|
||||||
|
U .= ymws.U1
|
||||||
|
acc = false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
return dh, acc
|
return dh, acc
|
||||||
end
|
end
|
||||||
|
|
||||||
function MD!(mom, U, int::IntrScheme{NI, T}, lp::SpaceParm, gp::GaugeParm{T}, ymws::YMworkspace{T}) where {NI, T <: AbstractFloat}
|
function MD!(mom, U, int::IntrScheme{NI, T}, lp::SpaceParm, gp::GaugeParm{T}, ymws::YMworkspace{T}) where {NI, T <: AbstractFloat}
|
||||||
|
|
||||||
|
@timeit "MD evolution" begin
|
||||||
|
|
||||||
ee = int.eps*gp.beta/gp.ng
|
ee = int.eps*gp.beta/gp.ng
|
||||||
force_gauge(ymws, U, gp.c0, lp)
|
force_gauge(ymws, U, gp.c0, lp)
|
||||||
mom .= mom .+ (int.r[1]*ee) .* ymws.frc1
|
mom .= mom .+ (int.r[1]*ee) .* ymws.frc1
|
||||||
for i in 1:int.ns
|
for i in 1:int.ns
|
||||||
k = 2
|
k = 2
|
||||||
off = 1
|
off = 1
|
||||||
for j in 1:NI-1
|
for j in 1:NI-1
|
||||||
U .= expm.(U, mom, int.eps*int.r[k])
|
U .= expm.(U, mom, int.eps*int.r[k])
|
||||||
if k == NI
|
if k == NI
|
||||||
off = -1
|
off = -1
|
||||||
|
end
|
||||||
|
k += off
|
||||||
|
|
||||||
|
force_gauge(ymws, U, gp.c0, lp)
|
||||||
|
if (i < int.ns) && (k == 1)
|
||||||
|
mom .= mom .+ (2*int.r[k]*ee) .* ymws.frc1
|
||||||
|
else
|
||||||
|
mom .= mom .+ (int.r[k]*ee) .* ymws.frc1
|
||||||
|
end
|
||||||
|
k += off
|
||||||
end
|
end
|
||||||
k += off
|
|
||||||
|
|
||||||
force_gauge(ymws, U, gp.c0, lp)
|
|
||||||
if (i < int.ns) && (k == 1)
|
|
||||||
mom .= mom .+ (2*int.r[k]*ee) .* ymws.frc1
|
|
||||||
else
|
|
||||||
mom .= mom .+ (int.r[k]*ee) .* ymws.frc1
|
|
||||||
end
|
|
||||||
k += off
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
using CUDA, Logging, StructArrays, Random
|
using CUDA, Logging, StructArrays, Random, TimerOutputs
|
||||||
|
|
||||||
CUDA.allowscalar(false)
|
CUDA.allowscalar(false)
|
||||||
import Pkg
|
import Pkg
|
||||||
|
@ -7,7 +7,7 @@ Pkg.activate("/lhome/ific/a/alramos/s.images/julia/workspace/LatticeGPU")
|
||||||
using LatticeGPU
|
using LatticeGPU
|
||||||
|
|
||||||
# Set lattice/block size
|
# Set lattice/block size
|
||||||
lp = SpaceParm{4}((32,32,32,32), (4,4,4,4))
|
lp = SpaceParm{4}((16,16,16,16), (4,4,4,4))
|
||||||
println("Space Parameters: ", lp)
|
println("Space Parameters: ", lp)
|
||||||
|
|
||||||
# Seed RNG
|
# Seed RNG
|
||||||
|
@ -93,5 +93,6 @@ println("Time for 100 steps of RK3 flow integrator: ")
|
||||||
println("Action: ", gauge_action(U, lp, gp, ymws))
|
println("Action: ", gauge_action(U, lp, gp, ymws))
|
||||||
println("## END improved action/flow measurements")
|
println("## END improved action/flow measurements")
|
||||||
|
|
||||||
|
print_timer(linechars = :ascii)
|
||||||
println("END")
|
println("END")
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue