diff --git a/src/YM/YM.jl b/src/YM/YM.jl index 4fecea5..da435d7 100644 --- a/src/YM/YM.jl +++ b/src/YM/YM.jl @@ -12,7 +12,7 @@ module YM -using CUDA, Random, StructArrays +using CUDA, Random, StructArrays, TimerOutputs using ..Space using ..Groups using ..Fields @@ -48,26 +48,28 @@ struct YMworkspace{T} rm # float of volume function YMworkspace(::Type{G}, ::Type{T}, lp::SpaceParm) where {G <: Group, T <: AbstractFloat} - if (G == SU2) - GRP = SU2 - ALG = SU2alg - f1 = vector_field(SU2alg{T}, lp) - f2 = vector_field(SU2alg{T}, lp) - mm = vector_field(SU2alg{T}, lp) - u1 = vector_field(SU2{T}, lp) + @timeit "Allocating YMWorkspace" begin + if (G == SU2) + GRP = SU2 + ALG = SU2alg + f1 = vector_field(SU2alg{T}, lp) + f2 = vector_field(SU2alg{T}, lp) + mm = vector_field(SU2alg{T}, lp) + u1 = vector_field(SU2{T}, lp) + end + + if (G == SU3) + GRP = SU3 + ALG = SU3alg + f1 = vector_field(SU3alg{T}, lp) + f2 = vector_field(SU3alg{T}, lp) + mm = vector_field(SU3alg{T}, lp) + u1 = vector_field(SU3{T}, lp) + end + cs = scalar_field(Complex{T}, lp) + rs = scalar_field(T, lp) end - - if (G == SU3) - GRP = SU3 - ALG = SU3alg - f1 = vector_field(SU3alg{T}, lp) - f2 = vector_field(SU3alg{T}, lp) - mm = vector_field(SU3alg{T}, lp) - u1 = vector_field(SU3{T}, lp) - end - cs = scalar_field(Complex{T}, lp) - rs = scalar_field(T, lp) - + return new{T}(GRP,ALG,T,f1, f2, mm, u1, cs, rs) end end diff --git a/src/YM/YMact.jl b/src/YM/YMact.jl index 83a7e03..a2d935b 100644 --- a/src/YM/YMact.jl +++ b/src/YM/YMact.jl @@ -305,9 +305,13 @@ the prefactor 1/g0^2, and assign it to the workspace force `ymws.frc1` function force_gauge(ymws::YMworkspace, U, c0, lp::SpaceParm) if abs(c0-1) < 1.0E-10 - force_wilson_pln!(ymws.frc1, ymws.frc2, U, lp::SpaceParm) + @timeit "Wilson gauge force" begin + force_wilson_pln!(ymws.frc1, ymws.frc2, U, lp::SpaceParm) + end else - force_wilson_pln!(ymws.frc1, ymws.frc2, U, lp::SpaceParm, c0) + @timeit "Improved gauge force" begin + force_wilson_pln!(ymws.frc1, ymws.frc2, U, lp::SpaceParm, c0) + end end return nothing end diff --git a/src/YM/YMfields.jl b/src/YM/YMfields.jl index a0ea8b5..dfcbbf2 100644 --- a/src/YM/YMfields.jl +++ b/src/YM/YMfields.jl @@ -12,17 +12,21 @@ function randomize!(f, lp::SpaceParm, ymws::YMworkspace) if ymws.ALG == SU2alg - m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz) - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU2!(f,m,lp) + @timeit "Randomize SU(2) algebra field" begin + m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz) + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU2!(f,m,lp) + end end return nothing end if ymws.ALG == SU3alg - m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,8,lp.rsz) - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU3!(f,m,lp) + @timeit "Randomize SU(3) algebra field" begin + m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,8,lp.rsz) + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU3!(f,m,lp) + end end return nothing end diff --git a/src/YM/YMflow.jl b/src/YM/YMflow.jl index 7c84163..57d3d33 100644 --- a/src/YM/YMflow.jl +++ b/src/YM/YMflow.jl @@ -63,12 +63,14 @@ end function flw_euler(U, ns, eps, c0, lp::SpaceParm, ymws::YMworkspace; add_zth=false) - for i in 1:ns - force_gauge(ymws, U, c0, lp) - if add_zth - add_zth_term(ymws::YMworkspace, U, lp) + @timeit "Integrating flow equations (Euler)" begin + for i in 1:ns + force_gauge(ymws, U, c0, lp) + if add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + U .= expm.(U, ymws.frc1, 2*eps) end - U .= expm.(U, ymws.frc1, 2*eps) end return nothing @@ -76,31 +78,33 @@ end function flw_rk3(U, ns, eps, c0, lp::SpaceParm, ymws::YMworkspace; add_zth=false) - for i in 1:ns - e0 = eps/2 - force_gauge(ymws, U, c0, lp) - if add_zth - add_zth_term(ymws::YMworkspace, U, lp) + @timeit "Integrating flow equations (RK3)" begin + for i in 1:ns + e0 = eps/2 + force_gauge(ymws, U, c0, lp) + if add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + ymws.mom .= ymws.frc1 + U .= expm.(U, ymws.mom, e0) + + e0 = -34*eps/36 + e1 = 16*eps/9 + force_gauge(ymws, U, c0, lp) + if add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + ymws.mom .= e0.*ymws.mom .+ e1.*ymws.frc1 + U .= expm.(U, ymws.mom) + + e1 = 6*eps/4 + force_gauge(ymws, U, c0, lp) + if add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + ymws.mom .= e1.*ymws.frc1 .- ymws.mom + U .= expm.(U, ymws.mom) end - ymws.mom .= ymws.frc1 - U .= expm.(U, ymws.mom, e0) - - e0 = -34*eps/36 - e1 = 16*eps/9 - force_gauge(ymws, U, c0, lp) - if add_zth - add_zth_term(ymws::YMworkspace, U, lp) - end - ymws.mom .= e0.*ymws.mom .+ e1.*ymws.frc1 - U .= expm.(U, ymws.mom) - - e1 = 6*eps/4 - force_gauge(ymws, U, c0, lp) - if add_zth - add_zth_term(ymws::YMworkspace, U, lp) - end - ymws.mom .= e1.*ymws.frc1 .- ymws.mom - U .= expm.(U, ymws.mom) end return nothing diff --git a/src/YM/YMhmc.jl b/src/YM/YMhmc.jl index 07579e2..751373d 100644 --- a/src/YM/YMhmc.jl +++ b/src/YM/YMhmc.jl @@ -18,12 +18,16 @@ Returns the value of the gauge plaquette action for the configuration U. The par function gauge_action(U, lp::SpaceParm, gp::GaugeParm{T}, ymws::YMworkspace{T}) where T <: AbstractFloat if abs(gp.c0-1) < 1.0E-10 - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp) + @timeit "Wilson gauge action" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp) + end end else - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_impr!(ymws.cm, U, gp.c0, (1-gp.c0)/8, lp) + @timeit "Improved gauge action" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_impr!(ymws.cm, U, gp.c0, (1-gp.c0)/8, lp) + end end end S = gp.beta*( prod(lp.iL)*lp.npls*(gp.c0 + (1-gp.c0)/8) - @@ -34,71 +38,81 @@ end function plaquette(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace) - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp) + @timeit "Plaquette measurement" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp) + end end - + return CUDA.mapreduce(real, +, ymws.cm)/(prod(lp.iL)*lp.npls) end function hamiltonian(mom, U, lp, gp, ymws) - K = CUDA.mapreduce(norm2, +, mom)/2 - V = gauge_action(U, lp, gp, ymws) - + @timeit "Computing Hamiltonian" begin + K = CUDA.mapreduce(norm2, +, mom)/2 + V = gauge_action(U, lp, gp, ymws) + end + return K+V end function HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false) where T - int = omf4(T, eps, ns) - ymws.U1 .= U - - randomize!(ymws.mom, lp, ymws) - hini = hamiltonian(ymws.mom, U, lp, gp, ymws) - - MD!(ymws.mom, U, int, lp, gp, ymws) - - dh = hamiltonian(ymws.mom, U, lp, gp, ymws) - hini - pacc = exp(-dh) - - acc = true - if (noacc) - return dh, acc - end - - if (pacc < 1.0) - r = rand() - if (pacc < r) - U .= ymws.U1 - acc = false + @timeit "HMC trayectory" begin + + int = omf4(T, eps, ns) + ymws.U1 .= U + + randomize!(ymws.mom, lp, ymws) + hini = hamiltonian(ymws.mom, U, lp, gp, ymws) + + MD!(ymws.mom, U, int, lp, gp, ymws) + + dh = hamiltonian(ymws.mom, U, lp, gp, ymws) - hini + pacc = exp(-dh) + + acc = true + if (noacc) + return dh, acc end + + if (pacc < 1.0) + r = rand() + if (pacc < r) + U .= ymws.U1 + acc = false + end + end + end - return dh, acc end function MD!(mom, U, int::IntrScheme{NI, T}, lp::SpaceParm, gp::GaugeParm{T}, ymws::YMworkspace{T}) where {NI, T <: AbstractFloat} + + @timeit "MD evolution" begin - ee = int.eps*gp.beta/gp.ng - force_gauge(ymws, U, gp.c0, lp) - mom .= mom .+ (int.r[1]*ee) .* ymws.frc1 - for i in 1:int.ns - k = 2 - off = 1 - for j in 1:NI-1 - U .= expm.(U, mom, int.eps*int.r[k]) - if k == NI - off = -1 + ee = int.eps*gp.beta/gp.ng + force_gauge(ymws, U, gp.c0, lp) + mom .= mom .+ (int.r[1]*ee) .* ymws.frc1 + for i in 1:int.ns + k = 2 + off = 1 + for j in 1:NI-1 + U .= expm.(U, mom, int.eps*int.r[k]) + if k == NI + off = -1 + end + k += off + + force_gauge(ymws, U, gp.c0, lp) + if (i < int.ns) && (k == 1) + mom .= mom .+ (2*int.r[k]*ee) .* ymws.frc1 + else + mom .= mom .+ (int.r[k]*ee) .* ymws.frc1 + end + k += off end - k += off - - force_gauge(ymws, U, gp.c0, lp) - if (i < int.ns) && (k == 1) - mom .= mom .+ (2*int.r[k]*ee) .* ymws.frc1 - else - mom .= mom .+ (int.r[k]*ee) .* ymws.frc1 - end - k += off end end diff --git a/src/main/times.jl b/src/main/times.jl index c40afad..1a999e4 100644 --- a/src/main/times.jl +++ b/src/main/times.jl @@ -1,4 +1,4 @@ -using CUDA, Logging, StructArrays, Random +using CUDA, Logging, StructArrays, Random, TimerOutputs CUDA.allowscalar(false) import Pkg @@ -7,7 +7,7 @@ Pkg.activate("/lhome/ific/a/alramos/s.images/julia/workspace/LatticeGPU") using LatticeGPU # Set lattice/block size -lp = SpaceParm{4}((32,32,32,32), (4,4,4,4)) +lp = SpaceParm{4}((16,16,16,16), (4,4,4,4)) println("Space Parameters: ", lp) # Seed RNG @@ -93,5 +93,6 @@ println("Time for 100 steps of RK3 flow integrator: ") println("Action: ", gauge_action(U, lp, gp, ymws)) println("## END improved action/flow measurements") +print_timer(linechars = :ascii) println("END")