mirror of
https://igit.ific.uv.es/alramos/latticegpu.jl.git
synced 2025-05-14 19:23:42 +02:00
Non-seg-fault HMC. Energy not conserved.
This commit is contained in:
parent
5bb4f28c8b
commit
f296fd9768
8 changed files with 172 additions and 40 deletions
|
@ -54,12 +54,22 @@ version = "3.31.0"
|
|||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
|
||||
|
||||
[[DataAPI]]
|
||||
git-tree-sha1 = "ee400abb2298bd13bfc3df1c412ed228061a2385"
|
||||
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
|
||||
version = "1.7.0"
|
||||
|
||||
[[DataStructures]]
|
||||
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
|
||||
git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677"
|
||||
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
|
||||
version = "0.18.9"
|
||||
|
||||
[[DataValueInterfaces]]
|
||||
git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
|
||||
uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
|
||||
version = "1.0.0"
|
||||
|
||||
[[Dates]]
|
||||
deps = ["Printf"]
|
||||
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
|
||||
|
@ -103,6 +113,11 @@ version = "0.12.5"
|
|||
deps = ["Markdown"]
|
||||
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
|
||||
|
||||
[[IteratorInterfaceExtensions]]
|
||||
git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
|
||||
uuid = "82899510-4779-5014-852e-03e436cf321d"
|
||||
version = "1.0.0"
|
||||
|
||||
[[JLLWrappers]]
|
||||
deps = ["Preferences"]
|
||||
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
|
||||
|
@ -253,14 +268,38 @@ git-tree-sha1 = "a50550fa3164a8c46747e62063b4d774ac1bcf49"
|
|||
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
|
||||
version = "1.5.1"
|
||||
|
||||
[[StaticArrays]]
|
||||
deps = ["LinearAlgebra", "Random", "Statistics"]
|
||||
git-tree-sha1 = "a43a7b58a6e7dc933b2fa2e0ca653ccf8bb8fd0e"
|
||||
uuid = "90137ffa-7385-5640-81b9-e52037218182"
|
||||
version = "1.2.6"
|
||||
|
||||
[[Statistics]]
|
||||
deps = ["LinearAlgebra", "SparseArrays"]
|
||||
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
|
||||
|
||||
[[StructArrays]]
|
||||
deps = ["Adapt", "DataAPI", "StaticArrays", "Tables"]
|
||||
git-tree-sha1 = "000e168f5cc9aded17b6999a560b7c11dda69095"
|
||||
uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
|
||||
version = "0.6.0"
|
||||
|
||||
[[TOML]]
|
||||
deps = ["Dates"]
|
||||
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
|
||||
|
||||
[[TableTraits]]
|
||||
deps = ["IteratorInterfaceExtensions"]
|
||||
git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39"
|
||||
uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
|
||||
version = "1.0.1"
|
||||
|
||||
[[Tables]]
|
||||
deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"]
|
||||
git-tree-sha1 = "8ed4a3ea724dac32670b062be3ef1c1de6773ae8"
|
||||
uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
|
||||
version = "1.4.4"
|
||||
|
||||
[[Tar]]
|
||||
deps = ["ArgTools", "SHA"]
|
||||
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
|
||||
|
|
|
@ -5,3 +5,5 @@ version = "0.1.0"
|
|||
|
||||
[deps]
|
||||
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
|
||||
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
|
||||
|
|
|
@ -13,8 +13,9 @@
|
|||
# SU(2) group elements represented trough Cayley-Dickson
|
||||
# construction
|
||||
# https://en.wikipedia.org/wiki/Cayley%E2%80%93Dickson_construction
|
||||
using CUDA
|
||||
|
||||
import Base.:*, Base.:+, Base.:-,Base.:/,Base.:\
|
||||
import Base.:*, Base.:+, Base.:-,Base.:/,Base.:\,Base.exp
|
||||
struct SU2 <: Group
|
||||
t1::ComplexF64
|
||||
t2::ComplexF64
|
||||
|
@ -23,7 +24,8 @@ SU2() = SU2(1.0, 0.0)
|
|||
inverse(b::SU2) = SU2(conj(b.t1), -b.t2)
|
||||
dag(a::SU2) = inverse(a)
|
||||
norm(a::SU2) = sqrt(abs2(a.t1) + abs2(a.t2))
|
||||
tr(g::SU2) = 2.0*real(a.t1)
|
||||
norm2(a::SU2) = abs2(a.t1) + abs2(a.t2)
|
||||
tr(g::SU2) = complex(2.0*real(g.t1), 0.0)
|
||||
|
||||
"""
|
||||
function normalize(a::SU2)
|
||||
|
@ -55,14 +57,16 @@ SU2alg(x::Real) = SU2alg(x,0.0,0.0)
|
|||
SU2alg(v::Vector) = SU2alg(v[1],v[2],v[3])
|
||||
projalg(g::SU2) = SU2alg(imag(g.t1), real(g.t2), imag(g.t2))
|
||||
dot(a::SU2alg, b::SU2alg) = a.t1*b.t1 + a.t2*b.t2 + a.t3*b.t3
|
||||
norm(a::SU2alg) = sqrt(a.t1^2 + a.t2^2 + a.t3^2)
|
||||
norm2(a::SU2alg) = a.t1^2 + a.t2^2 + a.t3^2
|
||||
Base.:+(a::SU2alg) = SU2alg(a.t1,a.t2,a.t3)
|
||||
Base.:-(a::SU2alg) = SU2alg(-a.t1,-a.t2,-a.t3)
|
||||
Base.:+(a::SU2alg,b::SU2alg) = SU2alg(a.t1+b.t1,a.t2+b.t2,a.t3+b.t3)
|
||||
Base.:-(a::SU2alg,b::SU2alg) = SU2alg(a.t1-b.t1,a.t2-b.t2,a.t3-b.t3)
|
||||
|
||||
Base.:*(a::SU2alg,b::Real) = SU2alg(a.t1*b,a.t2*b,a.t3*b)
|
||||
Base.:*(b::Real,a::SU2alg) = SU2alg(a.t1*b,a.t2*b,a.t3*b)
|
||||
Base.:/(a::SU2alg,b::Real) = SU2alg(a.t1/b,a.t2/b,a.t3/b)
|
||||
Base.:*(a::SU2alg,b::Number) = SU2alg(a.t1*b,a.t2*b,a.t3*b)
|
||||
Base.:*(b::Number,a::SU2alg) = SU2alg(a.t1*b,a.t2*b,a.t3*b)
|
||||
Base.:/(a::SU2alg,b::Number) = SU2alg(a.t1/b,a.t2/b,a.t3/b)
|
||||
|
||||
|
||||
"""
|
||||
|
@ -78,11 +82,13 @@ function Base.exp(a::SU2alg)
|
|||
ca = 1.0 - rms *(1.0 - (rms/6.0 )*(1.0 - rms/15.0))
|
||||
sa = 0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0))
|
||||
else
|
||||
ca = cos(rm)
|
||||
sa = sin(rm)/(2.0*rm)
|
||||
ca = CUDA.cos(rm)
|
||||
sa = CUDA.sin(rm)/(2.0*rm)
|
||||
end
|
||||
|
||||
return SU2(complex(ca,sa*a.t1),complex(sa*a.t2,sa*a.t3))
|
||||
t1 = complex(ca,sa*a.t1)
|
||||
t2 = complex(sa*a.t2,sa*a.t3)
|
||||
return SU2(t1,t2)
|
||||
end
|
||||
|
||||
function Base.exp(a::SU2alg, t::Number)
|
||||
|
@ -93,11 +99,13 @@ function Base.exp(a::SU2alg, t::Number)
|
|||
ca = 1.0 - rms *(1.0 - (rms/6.0 )*(1.0 - rms/15.0))
|
||||
sa = t*(0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0)))
|
||||
else
|
||||
ca = cos(rm)
|
||||
sa = t*sin(rm)/(2.0*rm)
|
||||
ca = CUDA.cos(rm)
|
||||
sa = t*CUDA.sin(rm)/(2.0*rm)
|
||||
end
|
||||
|
||||
return SU2(complex(ca,sa*a.t1),complex(sa*a.t2,sa*a.t3))
|
||||
t1 = complex(ca,sa*a.t1)
|
||||
t2 = complex(sa*a.t2,sa*a.t3)
|
||||
return SU2(t1,t2)
|
||||
end
|
||||
|
||||
|
||||
|
@ -115,12 +123,13 @@ function expm(g::SU2, a::SU2alg)
|
|||
ca = 1.0 - rms *(1.0 - (rms/6.0 )*(1.0 - rms/15.0))
|
||||
sa = 0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0))
|
||||
else
|
||||
ca = cos(rm)
|
||||
sa = sin(rm)/(2.0*rm)
|
||||
ca = CUDA.cos(rm)
|
||||
sa = CUDA.sin(rm)/(2.0*rm)
|
||||
end
|
||||
|
||||
return SU2(complex(ca,sa*a.t1)*g.t1-complex(sa*a.t2,sa*a.t3)*conj(g.t2),
|
||||
complex(ca,sa*a.t1)*g.t2+complex(sa*a.t2,sa*a.t3)*conj(g.t1))
|
||||
t1 = complex(ca,sa*a.t1)*g.t1-complex(sa*a.t2,sa*a.t3)*conj(g.t2)
|
||||
t2 = complex(ca,sa*a.t1)*g.t2+complex(sa*a.t2,sa*a.t3)*conj(g.t1)
|
||||
return SU2(t1,t2)
|
||||
end
|
||||
|
||||
"""
|
||||
|
@ -137,10 +146,12 @@ function expm(g::SU2, a::SU2alg, t::Float64)
|
|||
ca = 1.0 - rms *(1.0 - (rms/6.0 )*(1.0 - rms/15.0))
|
||||
sa = t*(0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0)))
|
||||
else
|
||||
ca = cos(rm)
|
||||
sa = t*sin(rm)/(2.0*rm)
|
||||
ca = CUDA.cos(rm)
|
||||
sa = t*CUDA.sin(rm)/(2.0*rm)
|
||||
end
|
||||
|
||||
return SU2(complex(ca,sa*a.t1)*g.t1-complex(sa*a.t2,sa*a.t3)*conj(g.t2),
|
||||
complex(ca,sa*a.t1)*g.t2+complex(sa*a.t2,sa*a.t3)*conj(g.t1))
|
||||
t1 = complex(ca,sa*a.t1)*g.t1-complex(sa*a.t2,sa*a.t3)*conj(g.t2)
|
||||
t2 = complex(ca,sa*a.t1)*g.t2+complex(sa*a.t2,sa*a.t3)*conj(g.t1)
|
||||
return SU2(t1,t2)
|
||||
|
||||
end
|
||||
|
|
|
@ -17,7 +17,8 @@ abstract type Algebra end
|
|||
|
||||
include("GroupSU2.jl")
|
||||
|
||||
export SU2, SU2alg, dag, normalize, inverse, tr, projalg, norm
|
||||
export Group, Algebra
|
||||
export SU2, SU2alg, dag, normalize, inverse, tr, projalg, norm, norm2
|
||||
export dot, expm, exp
|
||||
|
||||
include("GroupSU3.jl")
|
||||
|
|
|
@ -1,6 +1,16 @@
|
|||
module LatticeGPU
|
||||
###
|
||||
### "THE BEER-WARE LICENSE":
|
||||
### Alberto Ramos wrote this file. As long as you retain this
|
||||
### notice you can do whatever you want with this stuff. If we meet some
|
||||
### day, and you think this stuff is worth it, you can buy me a beer in
|
||||
### return. <alberto.ramos@cern.ch>
|
||||
###
|
||||
### file: LatticeGPU.jl
|
||||
### created: Sat Jul 17 17:19:58 2021
|
||||
###
|
||||
|
||||
using CUDA
|
||||
|
||||
module LatticeGPU
|
||||
|
||||
include("Groups/Groups.jl")
|
||||
|
||||
|
@ -19,6 +29,7 @@ export map2latt, up, dw, shift
|
|||
include("YM/YM.jl")
|
||||
|
||||
using .YM
|
||||
|
||||
export YMworkspace, GaugeParm, force0_wilson!, field, randomn!, zero!, norm2
|
||||
export gauge_action, hamiltonian, HMC!, OMF4!
|
||||
|
||||
end # module
|
||||
|
|
|
@ -18,7 +18,7 @@ struct SpaceParm{N,M}
|
|||
npls::Int64
|
||||
plidx::NTuple{M, Tuple{Int64, Int64}}
|
||||
|
||||
function SpaceParm{N}(x, bt, c=(0.0,0.0)) where {N}
|
||||
function SpaceParm{N}(x) where {N}
|
||||
M = convert(Int64, round(N*(N-1)/2))
|
||||
N == length(x) || throw(ArgumentError("Tuple of incorrect length for dimension $N"))
|
||||
|
||||
|
@ -28,7 +28,7 @@ struct SpaceParm{N,M}
|
|||
push!(pls, (i,j))
|
||||
end
|
||||
end
|
||||
return new{N,M}(N, bt, c, x, M, tuple(pls...))
|
||||
return new{N,M}(N, x, M, tuple(pls...))
|
||||
end
|
||||
end
|
||||
export SpaceParm
|
||||
|
@ -87,7 +87,7 @@ end
|
|||
return s
|
||||
end
|
||||
|
||||
@inline function up(p::CartesianIndex{4}, id, lp::SpaceParm{4})
|
||||
@inline function up(p::CartesianIndex{4}, id::Int64, lp::SpaceParm{4})
|
||||
|
||||
if (id == 1)
|
||||
s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2], p[3], p[4])
|
||||
|
@ -102,7 +102,7 @@ end
|
|||
return s
|
||||
end
|
||||
|
||||
@inline function up(p::CartesianIndex{3}, id, lp::SpaceParm{3})
|
||||
@inline function up(p::CartesianIndex{3}, id::Int64, lp::SpaceParm{3})
|
||||
|
||||
if (id == 1)
|
||||
s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2], p[3])
|
||||
|
@ -115,7 +115,7 @@ end
|
|||
return s
|
||||
end
|
||||
|
||||
@inline function up(p::CartesianIndex{2}, id, lp::SpaceParm{2})
|
||||
@inline function up(p::CartesianIndex{2}, id::Int64, lp::SpaceParm{2})
|
||||
|
||||
if (id == 1)
|
||||
s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2])
|
||||
|
|
36
src/YM/YM.jl
36
src/YM/YM.jl
|
@ -12,14 +12,46 @@
|
|||
|
||||
module YM
|
||||
|
||||
using CUDA, Random, StructArrays
|
||||
using ..Space
|
||||
using ..Groups
|
||||
|
||||
struct GaugeParm
|
||||
beta::Float64
|
||||
cG::Tuple{Float64,Float64}
|
||||
ng::Int32
|
||||
end
|
||||
export GaugeParm
|
||||
|
||||
include("YMact.jl")
|
||||
export krnl_plaq!
|
||||
include("YMfields.jl")
|
||||
export field, randomn!, zero!, norm2
|
||||
|
||||
struct YMworkspace
|
||||
frc1
|
||||
frc2
|
||||
mom
|
||||
U1
|
||||
cm # complex of volume
|
||||
function YMworkspace(::Type{T}, lp::SpaceParm) where {T <: Union{Group,Algebra}}
|
||||
|
||||
if (T == SU2)
|
||||
f1 = field(SU2alg, lp)
|
||||
f2 = field(SU2alg, lp)
|
||||
mm = field(SU2alg, lp)
|
||||
u1 = field(SU2, lp)
|
||||
cs = zeros(ComplexF64,lp.iL...)
|
||||
rs = zeros(Float64, lp.iL...)
|
||||
return new(f1, f2, mm, u1, replace_storage(CuArray, cs))
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
end
|
||||
export YMworkspace
|
||||
|
||||
include("YMact.jl")
|
||||
export krnl_plaq!, force0_wilson!
|
||||
|
||||
include("YMhmc.jl")
|
||||
export gauge_action, hamiltonian, HMC!, OMF4!
|
||||
|
||||
end
|
||||
|
|
|
@ -11,11 +11,11 @@
|
|||
|
||||
function krnl_plaq!(plx, U, ipl, lp::SpaceParm)
|
||||
|
||||
id1, id2 = lp.plidx(ipl)
|
||||
id1, id2 = lp.plidx[ipl]
|
||||
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
|
||||
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
|
||||
Xu1 = up(X, id1)
|
||||
Xu2 = up(X, id2)
|
||||
Xu1 = up(X, id1, lp)
|
||||
Xu2 = up(X, id2, lp)
|
||||
|
||||
plx[X] = tr(U[X, id1]*U[Xu1, id2] / (U[X, id2]*U[Xu2, id1]))
|
||||
|
||||
|
@ -27,15 +27,51 @@ function krnl_plaq!(plx, U, lp::SpaceParm)
|
|||
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
|
||||
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
|
||||
|
||||
plx[X] = 0.0
|
||||
plx[X] = complex(0.0)
|
||||
for ipl in 1:lp.npls
|
||||
id1, id2 = lp.plidx(ipl)
|
||||
Xu1 = up(X, id1)
|
||||
Xu2 = up(X, id2)
|
||||
id1, id2 = lp.plidx[ipl]
|
||||
Xu1 = up(X, id1, lp)
|
||||
Xu2 = up(X, id2, lp)
|
||||
|
||||
plx[X] += tr(U[X, id1]*U[Xu1, id2] / (U[X, id2]*U[Xu2, id1]))
|
||||
end
|
||||
plx[X] = plx[X]/lp.npls
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_force_wilson_pln!(frc1, frc2, U, ipl, lp::SpaceParm, gp::GaugeParm)
|
||||
|
||||
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
|
||||
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
|
||||
|
||||
id1, id2 = lp.plidx[ipl]
|
||||
Xu1 = up(X, id1, lp)
|
||||
Xu2 = up(X, id2, lp)
|
||||
|
||||
a = U[Xu1,id2]/U[Xu2,id1]
|
||||
b = U[X ,id2]\U[X ,id1]
|
||||
|
||||
F1 = projalg(U[X,id1]*a/U[X,id2])
|
||||
F2 = projalg(a*b)
|
||||
F3 = projalg(b*a)
|
||||
|
||||
frc1[X ,id1] -= F1
|
||||
frc1[X ,id2] += F1
|
||||
frc2[Xu1,id2] -= F2
|
||||
frc2[Xu2,id1] += F3
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function force0_wilson!(frc1, frc2, U, lp::SpaceParm, gp::GaugeParm, kp::KernelParm)
|
||||
|
||||
zero!(frc1)
|
||||
zero!(frc2)
|
||||
for ipl in 1:lp.npls
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_force_wilson_pln!(frc1,frc2,U,ipl,lp,gp)
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue