From f0ca0bbde1e4a72e9953520b79ffcbb491b8723f Mon Sep 17 00:00:00 2001 From: Alberto Ramos Date: Fri, 15 Oct 2021 15:52:21 +0200 Subject: [PATCH] Working version. Comparison with one field seem ok. --- src/Scalar/Scalar.jl | 2 +- src/Scalar/ScalarAction.jl | 8 ++- src/Scalar/ScalarFields.jl | 15 +++-- src/Scalar/ScalarForce.jl | 6 +- src/Scalar/ScalarHMC.jl | 120 +++++++++++++++++++------------------ src/main/test_scalar.jl | 6 +- 6 files changed, 85 insertions(+), 72 deletions(-) diff --git a/src/Scalar/Scalar.jl b/src/Scalar/Scalar.jl index c175457..c17601a 100644 --- a/src/Scalar/Scalar.jl +++ b/src/Scalar/Scalar.jl @@ -11,7 +11,7 @@ module Scalar -using CUDA, Random +using CUDA, Random, TimerOutputs using ..Space using ..Groups using ..Fields diff --git a/src/Scalar/ScalarAction.jl b/src/Scalar/ScalarAction.jl index cb01d88..f8a89b2 100644 --- a/src/Scalar/ScalarAction.jl +++ b/src/Scalar/ScalarAction.jl @@ -11,10 +11,12 @@ function scalar_action(U, Phi, lp::SpaceParm, sp::ScalarParm, ymws::YMworkspace{T}) where {T <: AbstractFloat} - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_act!(ymws.rm, U, Phi, sp, lp) + @timeit "Scalar action" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_act!(ymws.rm, U, Phi, sp, lp) + end end - + S = CUDA.reduce(+, ymws.rm) return S end diff --git a/src/Scalar/ScalarFields.jl b/src/Scalar/ScalarFields.jl index f4a7697..41b26b2 100644 --- a/src/Scalar/ScalarFields.jl +++ b/src/Scalar/ScalarFields.jl @@ -11,9 +11,11 @@ function randomize!(f, sp::ScalarParm{NS}, lp::SpaceParm, ymws::YMworkspace) where {NS} - m = CUDA.randn(ymws.PRC, lp.bsz, 4, NS, lp.rsz) - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU2fund!(f,m,sp,lp) + @timeit "Randomize scalar field" begin + m = CUDA.randn(ymws.PRC, lp.bsz, 4, NS, lp.rsz) + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU2fund!(f,m,sp,lp) + end end return nothing @@ -21,10 +23,13 @@ end function krnl_assign_SU2fund!(f::AbstractArray{T}, m, sp::ScalarParm{NS}, lp::SpaceParm) where {T, NS} + # Think about precision here + SR2 = 1.4142135623730951 + b, r = CUDA.threadIdx().x, CUDA.blockIdx().x for i in 1:NS - f[b,i,r] = SU2fund(complex(m[b,1,i,r], m[b,2,i,r]), - complex(m[b,3,i,r], m[b,4,i,r])) + f[b,i,r] = SU2fund(complex(m[b,1,i,r]*SR2, m[b,2,i,r]*SR2), + complex(m[b,3,i,r]*SR2, m[b,4,i,r]*SR2)) end return nothing diff --git a/src/Scalar/ScalarForce.jl b/src/Scalar/ScalarForce.jl index b36b8ad..d18c4d0 100644 --- a/src/Scalar/ScalarForce.jl +++ b/src/Scalar/ScalarForce.jl @@ -11,8 +11,10 @@ function force_scalar(ymws::YMworkspace, sws::ScalarWorkspace, U, Phi, sp::ScalarParm, gp::GaugeParm, lp::SpaceParm) - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_force_scalar!(ymws.frc1,sws.frc1,U,Phi,sp,gp,lp) + @timeit "Scalar force" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_force_scalar!(ymws.frc1,sws.frc1,U,Phi,sp,gp,lp) + end end diff --git a/src/Scalar/ScalarHMC.jl b/src/Scalar/ScalarHMC.jl index 964f08b..5fb4d1e 100644 --- a/src/Scalar/ScalarHMC.jl +++ b/src/Scalar/ScalarHMC.jl @@ -12,78 +12,80 @@ function hamiltonian(mom, U, pmom, Phi, lp, gp, sp, ymws) - SG = gauge_action(U, lp, gp, ymws) - SS = scalar_action(U, Phi, lp, sp, ymws) - PG = CUDA.mapreduce(norm2, +, mom)/2 - PS = CUDA.mapreduce(norm2, +, pmom)/2 - - println("Hamiltonian: ", SG, " ", SS, " ", PG, " ",PS) - + @timeit "Computing Hamiltonian" begin + SG = gauge_action(U, lp, gp, ymws) + SS = scalar_action(U, Phi, lp, sp, ymws) + PG = CUDA.mapreduce(norm2, +, mom)/2 + PS = CUDA.mapreduce(norm2, +, pmom)/2 + end + return SG+SS+PG+PS end function HMC!(U, Phi, eps, ns, lp::SpaceParm, gp::GaugeParm, sp::ScalarParm, ymws::YMworkspace{T}, sws::ScalarWorkspace; noacc=false) where T - int = omf4(T, eps, ns) - ymws.U1 .= U - sws.Phi .= Phi - - randomize!(ymws.mom, lp, ymws) - randomize!(sws.mom, sp, lp, ymws) - hini = hamiltonian(ymws.mom, U, sws.mom, Phi, lp, gp, sp, ymws) - println(hini) - - MD!(ymws.mom, U, sws.mom, Phi, int, lp, gp, sp, ymws, sws) - - dh = hamiltonian(ymws.mom, U, sws.mom, Phi, lp, gp, sp, ymws) - hini - println(dh+hini) - pacc = exp(-dh) - - acc = true - if (noacc) - return dh, acc - end - - if (pacc < 1.0) - r = rand() - if (pacc < r) - U .= ymws.U1 - Phi .= sws.Phi - acc = false + @timeit "HMC trajectory" begin + int = omf4(T, eps, ns) + ymws.U1 .= U + sws.Phi .= Phi + + randomize!(ymws.mom, lp, ymws) + randomize!(sws.mom, sp, lp, ymws) + hini = hamiltonian(ymws.mom, U, sws.mom, Phi, lp, gp, sp, ymws) + + MD!(ymws.mom, U, sws.mom, Phi, int, lp, gp, sp, ymws, sws) + + dh = hamiltonian(ymws.mom, U, sws.mom, Phi, lp, gp, sp, ymws) - hini + pacc = exp(-dh) + + acc = true + if (noacc) + return dh, acc + end + + if (pacc < 1.0) + r = rand() + if (pacc < r) + U .= ymws.U1 + Phi .= sws.Phi + acc = false + end end end - + return dh, acc end function MD!(mom, U, pmom, Phi, int::IntrScheme{NI, T}, lp::SpaceParm, gp::GaugeParm{T}, sp::ScalarParm, ymws::YMworkspace{T}, sws::ScalarWorkspace) where {NI, T <: AbstractFloat} - YM.force_gauge(ymws, U, gp.c0, lp) - force_scalar(ymws, sws, U, Phi, sp, gp, lp) - - mom .= mom .+ (int.r[1]*int.eps) .* ymws.frc1 - pmom .= pmom .+ (int.r[1]*int.eps) .* sws.frc1 - for i in 1:int.ns - k = 2 - off = 1 - for j in 1:NI-1 - U .= expm.(U, mom, int.eps*int.r[k]) - Phi .= Phi .+ (int.eps*int.r[k]).*pmom - if k == NI - off = -1 + @timeit "MD evolution" begin + YM.force_gauge(ymws, U, gp.c0, lp) + force_scalar(ymws, sws, U, Phi, sp, gp, lp) + + mom .= mom .+ (int.r[1]*int.eps) .* ymws.frc1 + pmom .= pmom .+ (int.r[1]*int.eps) .* sws.frc1 + for i in 1:int.ns + k = 2 + off = 1 + for j in 1:NI-1 + U .= expm.(U, mom, int.eps*int.r[k]) + Phi .= Phi .+ (int.eps*int.r[k]).*pmom + if k == NI + off = -1 + end + k += off + + YM.force_gauge(ymws, U, gp.c0, lp) + force_scalar(ymws, sws, U, Phi, sp, gp, lp) + if (i < int.ns) && (k == 1) + mom .= mom .+ (2*int.r[k]*int.eps) .* ymws.frc1 + pmom .= pmom .+ (2*int.r[k]*int.eps) .* sws.frc1 + else + mom .= mom .+ (int.r[k]*int.eps) .* ymws.frc1 + pmom .= pmom .+ (int.r[k]*int.eps) .* sws.frc1 + end + k += off end - k += off - - YM.force_gauge(ymws, U, gp.c0, lp) - force_scalar(ymws, sws, U, Phi, sp, gp, lp) - if (i < int.ns) && (k == 1) - mom .= mom .+ (2*int.r[k]*int.eps) .* ymws.frc1 - pmom .= pmom .+ (2*int.r[k]*int.eps) .* sws.frc1 - else - mom .= mom .+ (int.r[k]*int.eps) .* ymws.frc1 - pmom .= pmom .+ (int.r[k]*int.eps) .* sws.frc1 - end - k += off end end diff --git a/src/main/test_scalar.jl b/src/main/test_scalar.jl index ba98ce3..6862111 100644 --- a/src/main/test_scalar.jl +++ b/src/main/test_scalar.jl @@ -1,4 +1,4 @@ -using CUDA, Logging, StructArrays, Random +using CUDA, Logging, StructArrays, Random, TimerOutputs CUDA.allowscalar(true) import Pkg @@ -6,7 +6,7 @@ Pkg.activate("/lhome/ific/a/alramos/s.images/julia/workspace/LatticeGPU") #Pkg.activate("/home/alberto/code/julia/LatticeGPU") using LatticeGPU -lp = SpaceParm{4}((64,64,64,64), (4,4,4,4)) +lp = SpaceParm{4}((16,16,16,16), (4,4,4,4)) gp = GaugeParm(6.0, 1.0, (0.0,0.0), 2) sp = ScalarParm((0.2,0.3), (1.0,0.4)) @@ -61,3 +61,5 @@ for i in 1:10 push!(pl, plaquette(U,lp, gp, ymws)) println("# Plaquette: ", pl[end], "\n") end + +print_timer(linechars = :ascii)