Non-seg-fault HMC. Energy not conserved.

This commit is contained in:
Alberto Ramos 2021-07-17 18:43:33 +02:00
parent 5bb4f28c8b
commit f296fd9768
8 changed files with 172 additions and 40 deletions

View file

@ -54,12 +54,22 @@ version = "3.31.0"
deps = ["Artifacts", "Libdl"] deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
[[DataAPI]]
git-tree-sha1 = "ee400abb2298bd13bfc3df1c412ed228061a2385"
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
version = "1.7.0"
[[DataStructures]] [[DataStructures]]
deps = ["Compat", "InteractiveUtils", "OrderedCollections"] deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677" git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.18.9" version = "0.18.9"
[[DataValueInterfaces]]
git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
version = "1.0.0"
[[Dates]] [[Dates]]
deps = ["Printf"] deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
@ -103,6 +113,11 @@ version = "0.12.5"
deps = ["Markdown"] deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
[[IteratorInterfaceExtensions]]
git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
uuid = "82899510-4779-5014-852e-03e436cf321d"
version = "1.0.0"
[[JLLWrappers]] [[JLLWrappers]]
deps = ["Preferences"] deps = ["Preferences"]
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e" git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
@ -253,14 +268,38 @@ git-tree-sha1 = "a50550fa3164a8c46747e62063b4d774ac1bcf49"
uuid = "276daf66-3868-5448-9aa4-cd146d93841b" uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
version = "1.5.1" version = "1.5.1"
[[StaticArrays]]
deps = ["LinearAlgebra", "Random", "Statistics"]
git-tree-sha1 = "a43a7b58a6e7dc933b2fa2e0ca653ccf8bb8fd0e"
uuid = "90137ffa-7385-5640-81b9-e52037218182"
version = "1.2.6"
[[Statistics]] [[Statistics]]
deps = ["LinearAlgebra", "SparseArrays"] deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[[StructArrays]]
deps = ["Adapt", "DataAPI", "StaticArrays", "Tables"]
git-tree-sha1 = "000e168f5cc9aded17b6999a560b7c11dda69095"
uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
version = "0.6.0"
[[TOML]] [[TOML]]
deps = ["Dates"] deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
[[TableTraits]]
deps = ["IteratorInterfaceExtensions"]
git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39"
uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
version = "1.0.1"
[[Tables]]
deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"]
git-tree-sha1 = "8ed4a3ea724dac32670b062be3ef1c1de6773ae8"
uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
version = "1.4.4"
[[Tar]] [[Tar]]
deps = ["ArgTools", "SHA"] deps = ["ArgTools", "SHA"]
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"

View file

@ -5,3 +5,5 @@ version = "0.1.0"
[deps] [deps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"

View file

@ -13,8 +13,9 @@
# SU(2) group elements represented trough Cayley-Dickson # SU(2) group elements represented trough Cayley-Dickson
# construction # construction
# https://en.wikipedia.org/wiki/Cayley%E2%80%93Dickson_construction # https://en.wikipedia.org/wiki/Cayley%E2%80%93Dickson_construction
using CUDA
import Base.:*, Base.:+, Base.:-,Base.:/,Base.:\ import Base.:*, Base.:+, Base.:-,Base.:/,Base.:\,Base.exp
struct SU2 <: Group struct SU2 <: Group
t1::ComplexF64 t1::ComplexF64
t2::ComplexF64 t2::ComplexF64
@ -23,7 +24,8 @@ SU2() = SU2(1.0, 0.0)
inverse(b::SU2) = SU2(conj(b.t1), -b.t2) inverse(b::SU2) = SU2(conj(b.t1), -b.t2)
dag(a::SU2) = inverse(a) dag(a::SU2) = inverse(a)
norm(a::SU2) = sqrt(abs2(a.t1) + abs2(a.t2)) norm(a::SU2) = sqrt(abs2(a.t1) + abs2(a.t2))
tr(g::SU2) = 2.0*real(a.t1) norm2(a::SU2) = abs2(a.t1) + abs2(a.t2)
tr(g::SU2) = complex(2.0*real(g.t1), 0.0)
""" """
function normalize(a::SU2) function normalize(a::SU2)
@ -55,14 +57,16 @@ SU2alg(x::Real) = SU2alg(x,0.0,0.0)
SU2alg(v::Vector) = SU2alg(v[1],v[2],v[3]) SU2alg(v::Vector) = SU2alg(v[1],v[2],v[3])
projalg(g::SU2) = SU2alg(imag(g.t1), real(g.t2), imag(g.t2)) projalg(g::SU2) = SU2alg(imag(g.t1), real(g.t2), imag(g.t2))
dot(a::SU2alg, b::SU2alg) = a.t1*b.t1 + a.t2*b.t2 + a.t3*b.t3 dot(a::SU2alg, b::SU2alg) = a.t1*b.t1 + a.t2*b.t2 + a.t3*b.t3
norm(a::SU2alg) = sqrt(a.t1^2 + a.t2^2 + a.t3^2)
norm2(a::SU2alg) = a.t1^2 + a.t2^2 + a.t3^2
Base.:+(a::SU2alg) = SU2alg(a.t1,a.t2,a.t3) Base.:+(a::SU2alg) = SU2alg(a.t1,a.t2,a.t3)
Base.:-(a::SU2alg) = SU2alg(-a.t1,-a.t2,-a.t3) Base.:-(a::SU2alg) = SU2alg(-a.t1,-a.t2,-a.t3)
Base.:+(a::SU2alg,b::SU2alg) = SU2alg(a.t1+b.t1,a.t2+b.t2,a.t3+b.t3) Base.:+(a::SU2alg,b::SU2alg) = SU2alg(a.t1+b.t1,a.t2+b.t2,a.t3+b.t3)
Base.:-(a::SU2alg,b::SU2alg) = SU2alg(a.t1-b.t1,a.t2-b.t2,a.t3-b.t3) Base.:-(a::SU2alg,b::SU2alg) = SU2alg(a.t1-b.t1,a.t2-b.t2,a.t3-b.t3)
Base.:*(a::SU2alg,b::Real) = SU2alg(a.t1*b,a.t2*b,a.t3*b) Base.:*(a::SU2alg,b::Number) = SU2alg(a.t1*b,a.t2*b,a.t3*b)
Base.:*(b::Real,a::SU2alg) = SU2alg(a.t1*b,a.t2*b,a.t3*b) Base.:*(b::Number,a::SU2alg) = SU2alg(a.t1*b,a.t2*b,a.t3*b)
Base.:/(a::SU2alg,b::Real) = SU2alg(a.t1/b,a.t2/b,a.t3/b) Base.:/(a::SU2alg,b::Number) = SU2alg(a.t1/b,a.t2/b,a.t3/b)
""" """
@ -78,11 +82,13 @@ function Base.exp(a::SU2alg)
ca = 1.0 - rms *(1.0 - (rms/6.0 )*(1.0 - rms/15.0)) ca = 1.0 - rms *(1.0 - (rms/6.0 )*(1.0 - rms/15.0))
sa = 0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0)) sa = 0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0))
else else
ca = cos(rm) ca = CUDA.cos(rm)
sa = sin(rm)/(2.0*rm) sa = CUDA.sin(rm)/(2.0*rm)
end end
return SU2(complex(ca,sa*a.t1),complex(sa*a.t2,sa*a.t3)) t1 = complex(ca,sa*a.t1)
t2 = complex(sa*a.t2,sa*a.t3)
return SU2(t1,t2)
end end
function Base.exp(a::SU2alg, t::Number) function Base.exp(a::SU2alg, t::Number)
@ -93,11 +99,13 @@ function Base.exp(a::SU2alg, t::Number)
ca = 1.0 - rms *(1.0 - (rms/6.0 )*(1.0 - rms/15.0)) ca = 1.0 - rms *(1.0 - (rms/6.0 )*(1.0 - rms/15.0))
sa = t*(0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0))) sa = t*(0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0)))
else else
ca = cos(rm) ca = CUDA.cos(rm)
sa = t*sin(rm)/(2.0*rm) sa = t*CUDA.sin(rm)/(2.0*rm)
end end
return SU2(complex(ca,sa*a.t1),complex(sa*a.t2,sa*a.t3)) t1 = complex(ca,sa*a.t1)
t2 = complex(sa*a.t2,sa*a.t3)
return SU2(t1,t2)
end end
@ -115,12 +123,13 @@ function expm(g::SU2, a::SU2alg)
ca = 1.0 - rms *(1.0 - (rms/6.0 )*(1.0 - rms/15.0)) ca = 1.0 - rms *(1.0 - (rms/6.0 )*(1.0 - rms/15.0))
sa = 0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0)) sa = 0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0))
else else
ca = cos(rm) ca = CUDA.cos(rm)
sa = sin(rm)/(2.0*rm) sa = CUDA.sin(rm)/(2.0*rm)
end end
return SU2(complex(ca,sa*a.t1)*g.t1-complex(sa*a.t2,sa*a.t3)*conj(g.t2), t1 = complex(ca,sa*a.t1)*g.t1-complex(sa*a.t2,sa*a.t3)*conj(g.t2)
complex(ca,sa*a.t1)*g.t2+complex(sa*a.t2,sa*a.t3)*conj(g.t1)) t2 = complex(ca,sa*a.t1)*g.t2+complex(sa*a.t2,sa*a.t3)*conj(g.t1)
return SU2(t1,t2)
end end
""" """
@ -137,10 +146,12 @@ function expm(g::SU2, a::SU2alg, t::Float64)
ca = 1.0 - rms *(1.0 - (rms/6.0 )*(1.0 - rms/15.0)) ca = 1.0 - rms *(1.0 - (rms/6.0 )*(1.0 - rms/15.0))
sa = t*(0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0))) sa = t*(0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0)))
else else
ca = cos(rm) ca = CUDA.cos(rm)
sa = t*sin(rm)/(2.0*rm) sa = t*CUDA.sin(rm)/(2.0*rm)
end end
return SU2(complex(ca,sa*a.t1)*g.t1-complex(sa*a.t2,sa*a.t3)*conj(g.t2), t1 = complex(ca,sa*a.t1)*g.t1-complex(sa*a.t2,sa*a.t3)*conj(g.t2)
complex(ca,sa*a.t1)*g.t2+complex(sa*a.t2,sa*a.t3)*conj(g.t1)) t2 = complex(ca,sa*a.t1)*g.t2+complex(sa*a.t2,sa*a.t3)*conj(g.t1)
return SU2(t1,t2)
end end

View file

@ -17,7 +17,8 @@ abstract type Algebra end
include("GroupSU2.jl") include("GroupSU2.jl")
export SU2, SU2alg, dag, normalize, inverse, tr, projalg, norm export Group, Algebra
export SU2, SU2alg, dag, normalize, inverse, tr, projalg, norm, norm2
export dot, expm, exp export dot, expm, exp
include("GroupSU3.jl") include("GroupSU3.jl")

View file

@ -1,6 +1,16 @@
module LatticeGPU ###
### "THE BEER-WARE LICENSE":
### Alberto Ramos wrote this file. As long as you retain this
### notice you can do whatever you want with this stuff. If we meet some
### day, and you think this stuff is worth it, you can buy me a beer in
### return. <alberto.ramos@cern.ch>
###
### file: LatticeGPU.jl
### created: Sat Jul 17 17:19:58 2021
###
using CUDA
module LatticeGPU
include("Groups/Groups.jl") include("Groups/Groups.jl")
@ -19,6 +29,7 @@ export map2latt, up, dw, shift
include("YM/YM.jl") include("YM/YM.jl")
using .YM using .YM
export YMworkspace, GaugeParm, force0_wilson!, field, randomn!, zero!, norm2
export gauge_action, hamiltonian, HMC!, OMF4!
end # module end # module

View file

@ -18,7 +18,7 @@ struct SpaceParm{N,M}
npls::Int64 npls::Int64
plidx::NTuple{M, Tuple{Int64, Int64}} plidx::NTuple{M, Tuple{Int64, Int64}}
function SpaceParm{N}(x, bt, c=(0.0,0.0)) where {N} function SpaceParm{N}(x) where {N}
M = convert(Int64, round(N*(N-1)/2)) M = convert(Int64, round(N*(N-1)/2))
N == length(x) || throw(ArgumentError("Tuple of incorrect length for dimension $N")) N == length(x) || throw(ArgumentError("Tuple of incorrect length for dimension $N"))
@ -28,7 +28,7 @@ struct SpaceParm{N,M}
push!(pls, (i,j)) push!(pls, (i,j))
end end
end end
return new{N,M}(N, bt, c, x, M, tuple(pls...)) return new{N,M}(N, x, M, tuple(pls...))
end end
end end
export SpaceParm export SpaceParm
@ -87,7 +87,7 @@ end
return s return s
end end
@inline function up(p::CartesianIndex{4}, id, lp::SpaceParm{4}) @inline function up(p::CartesianIndex{4}, id::Int64, lp::SpaceParm{4})
if (id == 1) if (id == 1)
s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2], p[3], p[4]) s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2], p[3], p[4])
@ -102,7 +102,7 @@ end
return s return s
end end
@inline function up(p::CartesianIndex{3}, id, lp::SpaceParm{3}) @inline function up(p::CartesianIndex{3}, id::Int64, lp::SpaceParm{3})
if (id == 1) if (id == 1)
s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2], p[3]) s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2], p[3])
@ -115,7 +115,7 @@ end
return s return s
end end
@inline function up(p::CartesianIndex{2}, id, lp::SpaceParm{2}) @inline function up(p::CartesianIndex{2}, id::Int64, lp::SpaceParm{2})
if (id == 1) if (id == 1)
s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2]) s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2])

View file

@ -12,14 +12,46 @@
module YM module YM
using CUDA, Random, StructArrays
using ..Space
using ..Groups
struct GaugeParm struct GaugeParm
beta::Float64 beta::Float64
cG::Tuple{Float64,Float64} cG::Tuple{Float64,Float64}
ng::Int32
end end
export GaugeParm export GaugeParm
include("YMact.jl") include("YMfields.jl")
export krnl_plaq! export field, randomn!, zero!, norm2
struct YMworkspace
frc1
frc2
mom
U1
cm # complex of volume
function YMworkspace(::Type{T}, lp::SpaceParm) where {T <: Union{Group,Algebra}}
if (T == SU2)
f1 = field(SU2alg, lp)
f2 = field(SU2alg, lp)
mm = field(SU2alg, lp)
u1 = field(SU2, lp)
cs = zeros(ComplexF64,lp.iL...)
rs = zeros(Float64, lp.iL...)
return new(f1, f2, mm, u1, replace_storage(CuArray, cs))
end
return nothing
end
end
export YMworkspace
include("YMact.jl")
export krnl_plaq!, force0_wilson!
include("YMhmc.jl")
export gauge_action, hamiltonian, HMC!, OMF4!
end end

View file

@ -11,12 +11,12 @@
function krnl_plaq!(plx, U, ipl, lp::SpaceParm) function krnl_plaq!(plx, U, ipl, lp::SpaceParm)
id1, id2 = lp.plidx(ipl) id1, id2 = lp.plidx[ipl]
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z), X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z)) (CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
Xu1 = up(X, id1) Xu1 = up(X, id1, lp)
Xu2 = up(X, id2) Xu2 = up(X, id2, lp)
plx[X] = tr(U[X, id1]*U[Xu1, id2] / (U[X, id2]*U[Xu2, id1])) plx[X] = tr(U[X, id1]*U[Xu1, id2] / (U[X, id2]*U[Xu2, id1]))
return nothing return nothing
@ -27,15 +27,51 @@ function krnl_plaq!(plx, U, lp::SpaceParm)
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z), X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z)) (CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
plx[X] = 0.0 plx[X] = complex(0.0)
for ipl in 1:lp.npls for ipl in 1:lp.npls
id1, id2 = lp.plidx(ipl) id1, id2 = lp.plidx[ipl]
Xu1 = up(X, id1) Xu1 = up(X, id1, lp)
Xu2 = up(X, id2) Xu2 = up(X, id2, lp)
plx[X] += tr(U[X, id1]*U[Xu1, id2] / (U[X, id2]*U[Xu2, id1])) plx[X] += tr(U[X, id1]*U[Xu1, id2] / (U[X, id2]*U[Xu2, id1]))
end end
plx[X] = plx[X]/lp.npls
return nothing return nothing
end end
function krnl_force_wilson_pln!(frc1, frc2, U, ipl, lp::SpaceParm, gp::GaugeParm)
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
id1, id2 = lp.plidx[ipl]
Xu1 = up(X, id1, lp)
Xu2 = up(X, id2, lp)
a = U[Xu1,id2]/U[Xu2,id1]
b = U[X ,id2]\U[X ,id1]
F1 = projalg(U[X,id1]*a/U[X,id2])
F2 = projalg(a*b)
F3 = projalg(b*a)
frc1[X ,id1] -= F1
frc1[X ,id2] += F1
frc2[Xu1,id2] -= F2
frc2[Xu2,id1] += F3
return nothing
end
function force0_wilson!(frc1, frc2, U, lp::SpaceParm, gp::GaugeParm, kp::KernelParm)
zero!(frc1)
zero!(frc2)
for ipl in 1:lp.npls
CUDA.@sync begin
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_force_wilson_pln!(frc1,frc2,U,ipl,lp,gp)
end
end
return nothing
end