Non-seg-fault HMC. Energy not conserved.

2025-12-30 19:21:49 +01:00 · 2021-07-17 18:43:33 +02:00 · 2021-07-17 18:43:33 +02:00 · f296fd9768
commit f296fd9768
parent 5bb4f28c8b
8 changed files with 172 additions and 40 deletions
--- a/src/Groups/GroupSU2.jl
+++ b/src/Groups/GroupSU2.jl
@ -13,8 +13,9 @@
 # SU(2) group elements represented trough Cayley-Dickson
 #       construction
 # https://en.wikipedia.org/wiki/Cayley%E2%80%93Dickson_construction
+using CUDA

-import Base.:*, Base.:+, Base.:-,Base.:/,Base.:\
+import Base.:*, Base.:+, Base.:-,Base.:/,Base.:\,Base.exp
 struct SU2 <: Group
    t1::ComplexF64
    t2::ComplexF64
@ -23,7 +24,8 @@ SU2()           = SU2(1.0, 0.0)
 inverse(b::SU2) = SU2(conj(b.t1), -b.t2)
 dag(a::SU2)     = inverse(a)
 norm(a::SU2)    = sqrt(abs2(a.t1) + abs2(a.t2))
-tr(g::SU2)      = 2.0*real(a.t1)
+norm2(a::SU2)   = abs2(a.t1) + abs2(a.t2)
+tr(g::SU2)      = complex(2.0*real(g.t1), 0.0)

 """
    function normalize(a::SU2)
@ -55,14 +57,16 @@ SU2alg(x::Real)              = SU2alg(x,0.0,0.0)
 SU2alg(v::Vector)            = SU2alg(v[1],v[2],v[3])
 projalg(g::SU2)              = SU2alg(imag(g.t1), real(g.t2), imag(g.t2))
 dot(a::SU2alg, b::SU2alg)    = a.t1*b.t1 + a.t2*b.t2 + a.t3*b.t3
+norm(a::SU2alg)              = sqrt(a.t1^2 + a.t2^2 + a.t3^2)
+norm2(a::SU2alg)             = a.t1^2 + a.t2^2 + a.t3^2
 Base.:+(a::SU2alg)           = SU2alg(a.t1,a.t2,a.t3)
 Base.:-(a::SU2alg)           = SU2alg(-a.t1,-a.t2,-a.t3)
 Base.:+(a::SU2alg,b::SU2alg) = SU2alg(a.t1+b.t1,a.t2+b.t2,a.t3+b.t3)
 Base.:-(a::SU2alg,b::SU2alg) = SU2alg(a.t1-b.t1,a.t2-b.t2,a.t3-b.t3)

-Base.:*(a::SU2alg,b::Real)   = SU2alg(a.t1*b,a.t2*b,a.t3*b)
-Base.:*(b::Real,a::SU2alg)   = SU2alg(a.t1*b,a.t2*b,a.t3*b)
-Base.:/(a::SU2alg,b::Real)   = SU2alg(a.t1/b,a.t2/b,a.t3/b)
+Base.:*(a::SU2alg,b::Number) = SU2alg(a.t1*b,a.t2*b,a.t3*b)
+Base.:*(b::Number,a::SU2alg) = SU2alg(a.t1*b,a.t2*b,a.t3*b)
+Base.:/(a::SU2alg,b::Number) = SU2alg(a.t1/b,a.t2/b,a.t3/b)


 """
@ -78,11 +82,13 @@ function Base.exp(a::SU2alg)
        ca = 1.0 - rms    *(1.0 - (rms/6.0 )*(1.0 - rms/15.0))
        sa = 0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0))
    else
-        ca = cos(rm)
-	sa = sin(rm)/(2.0*rm)
+        ca = CUDA.cos(rm)
+	sa = CUDA.sin(rm)/(2.0*rm)
    end

-    return SU2(complex(ca,sa*a.t1),complex(sa*a.t2,sa*a.t3))
+    t1 = complex(ca,sa*a.t1)
+    t2 = complex(sa*a.t2,sa*a.t3)
+    return SU2(t1,t2)
 end

 function Base.exp(a::SU2alg, t::Number)
@ -93,11 +99,13 @@ function Base.exp(a::SU2alg, t::Number)
        ca = 1.0 - rms    *(1.0 - (rms/6.0 )*(1.0 - rms/15.0))
        sa = t*(0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0)))
    else
-        ca = cos(rm)
-	sa = t*sin(rm)/(2.0*rm)
+        ca = CUDA.cos(rm)
+	sa = t*CUDA.sin(rm)/(2.0*rm)
    end

-    return SU2(complex(ca,sa*a.t1),complex(sa*a.t2,sa*a.t3))
+    t1 = complex(ca,sa*a.t1)
+    t2 = complex(sa*a.t2,sa*a.t3)
+    return SU2(t1,t2)
 end


@ -115,12 +123,13 @@ function expm(g::SU2, a::SU2alg)
        ca = 1.0 - rms    *(1.0 - (rms/6.0 )*(1.0 - rms/15.0))
        sa = 0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0))
    else
-        ca = cos(rm)
-	sa = sin(rm)/(2.0*rm)
+        ca = CUDA.cos(rm)
+	sa = CUDA.sin(rm)/(2.0*rm)
    end

-    return SU2(complex(ca,sa*a.t1)*g.t1-complex(sa*a.t2,sa*a.t3)*conj(g.t2),
-               complex(ca,sa*a.t1)*g.t2+complex(sa*a.t2,sa*a.t3)*conj(g.t1))
+    t1 = complex(ca,sa*a.t1)*g.t1-complex(sa*a.t2,sa*a.t3)*conj(g.t2)
+    t2 = complex(ca,sa*a.t1)*g.t2+complex(sa*a.t2,sa*a.t3)*conj(g.t1)
+    return SU2(t1,t2)
 end

 """
@ -137,10 +146,12 @@ function expm(g::SU2, a::SU2alg, t::Float64)
        ca = 1.0 - rms    *(1.0 - (rms/6.0 )*(1.0 - rms/15.0))
        sa = t*(0.5 - rms/6.0*(1.0 - (rms/10.0)*(1.0 - rms/21.0)))
    else
-        ca = cos(rm)
-	sa = t*sin(rm)/(2.0*rm)
+        ca = CUDA.cos(rm)
+	sa = t*CUDA.sin(rm)/(2.0*rm)
    end
-     
-    return SU2(complex(ca,sa*a.t1)*g.t1-complex(sa*a.t2,sa*a.t3)*conj(g.t2),
-               complex(ca,sa*a.t1)*g.t2+complex(sa*a.t2,sa*a.t3)*conj(g.t1))
+
+    t1 = complex(ca,sa*a.t1)*g.t1-complex(sa*a.t2,sa*a.t3)*conj(g.t2)
+    t2 = complex(ca,sa*a.t1)*g.t2+complex(sa*a.t2,sa*a.t3)*conj(g.t1)
+    return SU2(t1,t2)
+               
 end
--- a/src/Groups/Groups.jl
+++ b/src/Groups/Groups.jl
@ -17,7 +17,8 @@ abstract type Algebra end

 include("GroupSU2.jl")

-export SU2, SU2alg, dag, normalize, inverse, tr, projalg, norm
+export Group, Algebra
+export SU2, SU2alg, dag, normalize, inverse, tr, projalg, norm, norm2
 export dot, expm, exp

 include("GroupSU3.jl")
--- a/src/LatticeGPU.jl
+++ b/src/LatticeGPU.jl
@ -1,6 +1,16 @@
-module LatticeGPU
+###
+### "THE BEER-WARE LICENSE":
+### Alberto Ramos wrote this file. As long as you retain this 
+### notice you can do whatever you want with this stuff. If we meet some 
+### day, and you think this stuff is worth it, you can buy me a beer in 
+### return. <alberto.ramos@cern.ch>
+###
+### file:    LatticeGPU.jl
+### created: Sat Jul 17 17:19:58 2021
+###                               

-using CUDA
+
+module LatticeGPU

 include("Groups/Groups.jl")

@ -19,6 +29,7 @@ export map2latt, up, dw, shift
 include("YM/YM.jl")

 using .YM
-
+export YMworkspace, GaugeParm, force0_wilson!, field, randomn!, zero!, norm2
+export gauge_action, hamiltonian, HMC!, OMF4!

 end # module
--- a/src/Space/Space.jl
+++ b/src/Space/Space.jl
@ -18,7 +18,7 @@ struct SpaceParm{N,M}
    npls::Int64
    plidx::NTuple{M, Tuple{Int64, Int64}}

-    function SpaceParm{N}(x, bt, c=(0.0,0.0)) where {N}
+    function SpaceParm{N}(x) where {N}
        M = convert(Int64, round(N*(N-1)/2))
        N == length(x) || throw(ArgumentError("Tuple of incorrect length for dimension $N"))

@ -28,7 +28,7 @@ struct SpaceParm{N,M}
                push!(pls, (i,j))
            end
        end
-        return new{N,M}(N, bt, c, x, M, tuple(pls...))
+        return new{N,M}(N, x, M, tuple(pls...))
    end
 end
 export SpaceParm
@ -87,7 +87,7 @@ end
    return s
 end

-@inline function up(p::CartesianIndex{4}, id, lp::SpaceParm{4})
+@inline function up(p::CartesianIndex{4}, id::Int64, lp::SpaceParm{4})

    if (id == 1)
        s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2], p[3], p[4])
@ -102,7 +102,7 @@ end
    return s
 end

-@inline function up(p::CartesianIndex{3}, id, lp::SpaceParm{3})
+@inline function up(p::CartesianIndex{3}, id::Int64, lp::SpaceParm{3})

    if (id == 1)
        s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2], p[3])
@ -115,7 +115,7 @@ end
    return s
 end

-@inline function up(p::CartesianIndex{2}, id, lp::SpaceParm{2})
+@inline function up(p::CartesianIndex{2}, id::Int64, lp::SpaceParm{2})

    if (id == 1)
        s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2])
--- a/src/YM/YM.jl
+++ b/src/YM/YM.jl
@ -12,14 +12,46 @@

 module YM

+using CUDA, Random, StructArrays
+using ..Space
+using ..Groups
+
 struct GaugeParm
    beta::Float64
    cG::Tuple{Float64,Float64}
+    ng::Int32
 end
 export GaugeParm

-include("YMact.jl")
-export krnl_plaq!
+include("YMfields.jl")
+export field, randomn!, zero!, norm2

+struct YMworkspace
+    frc1
+    frc2
+    mom
+    U1
+    cm # complex of volume
+    function YMworkspace(::Type{T}, lp::SpaceParm) where {T <: Union{Group,Algebra}}
+        
+        if (T == SU2)
+            f1 = field(SU2alg, lp)
+            f2 = field(SU2alg, lp)
+            mm = field(SU2alg, lp)
+            u1 = field(SU2,    lp)
+            cs = zeros(ComplexF64,lp.iL...)
+            rs = zeros(Float64,   lp.iL...)
+            return new(f1, f2, mm, u1, replace_storage(CuArray, cs))
+        end
+        return nothing
+    end
+end
+export YMworkspace
+
+include("YMact.jl")
+export krnl_plaq!, force0_wilson!
+
+include("YMhmc.jl")
+export gauge_action, hamiltonian, HMC!, OMF4!

 end
--- a/src/YM/YMact.jl
+++ b/src/YM/YMact.jl
@ -11,12 +11,12 @@

 function krnl_plaq!(plx, U, ipl, lp::SpaceParm)

-    id1, id2 = lp.plidx(ipl)
+    id1, id2 = lp.plidx[ipl]
    X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
                 (CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
-    Xu1 = up(X, id1)
-    Xu2 = up(X, id2)
-    
+    Xu1 = up(X, id1, lp)
+    Xu2 = up(X, id2, lp)
+
    plx[X] = tr(U[X, id1]*U[Xu1, id2] / (U[X, id2]*U[Xu2, id1]))

    return nothing
@ -27,15 +27,51 @@ function krnl_plaq!(plx, U, lp::SpaceParm)
    X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
                 (CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))

-    plx[X] = 0.0
+    plx[X] = complex(0.0)
    for ipl in 1:lp.npls
-        id1, id2 = lp.plidx(ipl)
-        Xu1 = up(X, id1)
-        Xu2 = up(X, id2)
+        id1, id2 = lp.plidx[ipl]
+        Xu1 = up(X, id1, lp)
+        Xu2 = up(X, id2, lp)
        
        plx[X] += tr(U[X, id1]*U[Xu1, id2] / (U[X, id2]*U[Xu2, id1]))
    end
-    plx[X] = plx[X]/lp.npls
    
    return nothing
 end
+
+function krnl_force_wilson_pln!(frc1, frc2, U, ipl, lp::SpaceParm, gp::GaugeParm)
+
+    X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
+                 (CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
+
+    id1, id2 = lp.plidx[ipl]
+    Xu1 = up(X, id1, lp)
+    Xu2 = up(X, id2, lp)
+    
+    a = U[Xu1,id2]/U[Xu2,id1]
+    b = U[X  ,id2]\U[X  ,id1]
+    
+    F1 = projalg(U[X,id1]*a/U[X,id2])
+    F2 = projalg(a*b)
+    F3 = projalg(b*a)
+    
+    frc1[X  ,id1] -= F1
+    frc1[X  ,id2] += F1
+    frc2[Xu1,id2] -= F2
+    frc2[Xu2,id1] += F3
+
+    return nothing
+end
+
+function force0_wilson!(frc1, frc2, U, lp::SpaceParm, gp::GaugeParm, kp::KernelParm)
+
+    zero!(frc1)
+    zero!(frc2)
+    for ipl in 1:lp.npls
+        CUDA.@sync begin
+            CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_force_wilson_pln!(frc1,frc2,U,ipl,lp,gp)
+        end
+    end
+
+    return nothing
+end