Last version

This commit is contained in:
Alberto Ramos 2021-09-04 14:16:22 +02:00
parent c378648508
commit 76d0b66b4b
9 changed files with 515 additions and 322 deletions

View file

@ -68,6 +68,14 @@ Base.:*(a::SU2alg,b::Number) = SU2alg(a.t1*b,a.t2*b,a.t3*b)
Base.:*(b::Number,a::SU2alg) = SU2alg(a.t1*b,a.t2*b,a.t3*b) Base.:*(b::Number,a::SU2alg) = SU2alg(a.t1*b,a.t2*b,a.t3*b)
Base.:/(a::SU2alg,b::Number) = SU2alg(a.t1/b,a.t2/b,a.t3/b) Base.:/(a::SU2alg,b::Number) = SU2alg(a.t1/b,a.t2/b,a.t3/b)
function isgroup(a::SU2)
tol = 1.0E-10
if (abs2(a.t1) + abs2(a.t2) - 1.0 < 1.0E-10)
return true
else
return false
end
end
""" """
function Base.exp(a::SU2alg, t::Number=1) function Base.exp(a::SU2alg, t::Number=1)
@ -155,3 +163,5 @@ function expm(g::SU2, a::SU2alg, t::Float64)
return SU2(t1,t2) return SU2(t1,t2)
end end
export SU2, SU2alg, inverse, dag, tr, projalg, expm, exp, norm, norm2, isgroup

View file

@ -79,8 +79,21 @@ function Base.:\(a::SU3,b::SU3)
end end
function isgroup(a::SU3)
tol = 1.0E-10
g = a/a
if ( (abs(g.u11 - 1.0) < tol) &&
(abs(g.u12) < tol) &&
(abs(g.u13) < tol) &&
(abs(g.u21) < tol) &&
(abs(g.u22 - 1.0) < tol) &&
(abs(g.u23) < tol) )
return true
else
return false
end
end
struct SU3alg <: Algebra struct SU3alg <: Algebra
t1::Float64 t1::Float64
@ -122,7 +135,7 @@ Base.:/(a::SU3alg,b::Number) = SU3alg(a.t1/b,a.t2/b,a.t3/b,a.t4/b,a.t5/b,a.t6/b,
export SU3, SU3alg, inverse, dag, tr, projalg, expm, exp, norm, norm2 export SU3, SU3alg, inverse, dag, tr, projalg, expm, exp, norm, norm2, isgroup
struct M3x3 struct M3x3
u11::ComplexF64 u11::ComplexF64

View file

@ -23,7 +23,7 @@ export SU2, SU2alg
include("GroupSU3.jl") include("GroupSU3.jl")
export SU3, SU3alg export SU3, SU3alg
export dot, expm, exp, dag, normalize, inverse, tr, projalg, norm, norm2 export dot, expm, exp, dag, normalize, inverse, tr, projalg, norm, norm2, isgroup
end # module end # module

View file

@ -17,19 +17,19 @@ include("Groups/Groups.jl")
using .Groups using .Groups
export Group, Algebra export Group, Algebra
export SU2, SU2alg, SU3, SU3alg export SU2, SU2alg, SU3, SU3alg
export dot, expm, exp, dag, normalize, inverse, tr, projalg, norm, norm2 export dot, expm, exp, dag, normalize, inverse, tr, projalg, norm, norm2, isgroup
include("Space/Space.jl") include("Space/Space.jl")
using .Space using .Space
export SpaceParm, KernelParm export SpaceParm
export map2latt, up, dw, shift export up, dw, updw
include("YM/YM.jl") include("YM/YM.jl")
using .YM using .YM
export YMworkspace, GaugeParm, force0_wilson!, field, randomn!, zero!, norm2 export YMworkspace, GaugeParm, force0_wilson!, field, field_pln, randomn!, zero!, norm2
export gauge_action, hamiltonian, plaquette, HMC!, OMF4! export gauge_action, hamiltonian, plaquette, HMC!, OMF4!
end # module end # module

View file

@ -16,11 +16,20 @@ struct SpaceParm{N,M}
ndim::Int64 ndim::Int64
iL::NTuple{N,Int64} iL::NTuple{N,Int64}
npls::Int64 npls::Int64
plidx::NTuple{M, Tuple{Int64, Int64}} plidx::NTuple{M,Tuple{Int64, Int64}}
function SpaceParm{N}(x) where {N} blk::NTuple{N,Int64}
blkS::NTuple{N,Int64}
rbk::NTuple{N,Int64}
rbkS::NTuple{N,Int64}
bsz::Int64
rsz::Int64
function SpaceParm{N}(x, y) where {N}
M = convert(Int64, round(N*(N-1)/2)) M = convert(Int64, round(N*(N-1)/2))
N == length(x) || throw(ArgumentError("Tuple of incorrect length for dimension $N")) N == length(x) || throw(ArgumentError("Lattice size incorrect length for dimension $N"))
N == length(y) || throw(ArgumentError("Block size incorrect length for dimension $N"))
pls = Vector{Tuple{Int64, Int64}}() pls = Vector{Tuple{Int64, Int64}}()
for i in 1:N for i in 1:N
@ -28,120 +37,100 @@ struct SpaceParm{N,M}
push!(pls, (i,j)) push!(pls, (i,j))
end end
end end
return new{N,M}(N, x, M, tuple(pls...))
r = div.(x, y)
rS = ones(N)
yS = ones(N)
for i in 2:N
for j in 1:i-1
rS[i] = rS[i]*r[j]
yS[i] = yS[i]*y[j]
end
end
return new{N,M}(N, x, M, tuple(pls...), y,
tuple(yS...), tuple(r...), tuple(rS...), prod(y), prod(r))
end end
end end
export SpaceParm export SpaceParm
struct KernelParm @inline function up(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
threads::Tuple{Int64,Int64,Int64}
blocks::Tuple{Int64,Int64,Int64}
end
export KernelParm
@inline shift(p::CartesianIndex{4}, sh::CartesianIndex{4}, lp::SpaceParm{4}) = CartesianIndex(mod1(p[1]+sh[1], lp.iL[1]), ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id])
mod1(p[2]+sh[2], lp.iL[2]), if (ic == lp.blk[id]-1)
mod1(p[3]+sh[3], lp.iL[3]), b = p[1] - (lp.blk[id]-1)*lp.blkS[id]
mod1(p[4]+sh[4], lp.iL[4]))
@inline shift(p::CartesianIndex{3}, sh::CartesianIndex{3}, lp::SpaceParm{3}) = CartesianIndex(mod1(p[1]+sh[1], lp.iL[1]),
mod1(p[2]+sh[2], lp.iL[2]),
mod1(p[3]+sh[3], lp.iL[3]))
@inline shift(p::CartesianIndex{2}, sh::CartesianIndex{2}, lp::SpaceParm{2}) = CartesianIndex(mod1(p[1]+sh[1], lp.iL[1]),
mod1(p[2]+sh[2], lp.iL[2]))
@inline function dw(p::CartesianIndex{4}, id, lp::SpaceParm{4})
if (id == 1) ic = mod(div(p[2]-1,lp.rbkS[id]),lp.rbk[id])
s = CartesianIndex(mod1(p[1]-1, lp.iL[1]), p[2], p[3], p[4]) if (ic == lp.rbk[id]-1)
elseif (id == 2) r = p[2] - (lp.rbk[id]-1)*lp.rbkS[id]
s = CartesianIndex(p[1], mod1(p[2]-1, lp.iL[2]), p[3], p[4]) else
elseif (id == 3) r = p[2] + lp.rbkS[id]
s = CartesianIndex(p[1], p[2], mod1(p[3]-1, lp.iL[3]), p[4]) end
elseif (id == 4) else
s = CartesianIndex(p[1], p[2], p[3], mod1(p[4]-1, lp.iL[4])) b = p[1] + lp.blkS[id]
end r = p[2]
return s
end
@inline function dw(p::CartesianIndex{3}, id, lp::SpaceParm{3})
if (id == 1)
s = CartesianIndex(mod1(p[1]-1, lp.iL[1]), p[2], p[3])
elseif (id == 2)
s = CartesianIndex(p[1], mod1(p[2]-1, lp.iL[2]), p[3])
elseif (id == 3)
s = CartesianIndex(p[1], p[2], mod1(p[3]-1, lp.iL[3]))
end
return s
end
@inline function dw(p::CartesianIndex{2}, id, lp::SpaceParm{2})
if (id == 1)
s = CartesianIndex(mod1(p[1]-1, lp.iL[1]), p[2])
elseif (id == 2)
s = CartesianIndex(p[1], mod1(p[2]-1, lp.iL[2]))
end
return s
end
@inline function up(p::CartesianIndex{4}, id::Int64, lp::SpaceParm{4})
if (id == 1)
s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2], p[3], p[4])
elseif (id == 2)
s = CartesianIndex(p[1], mod1(p[2]+1, lp.iL[2]), p[3], p[4])
elseif (id == 3)
s = CartesianIndex(p[1], p[2], mod1(p[3]+1, lp.iL[3]), p[4])
elseif (id == 4)
s = CartesianIndex(p[1], p[2], p[3], mod1(p[4]+1, lp.iL[4]))
end end
return s
return b, r
end end
@inline function up(p::CartesianIndex{3}, id::Int64, lp::SpaceParm{3}) @inline function dw(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
if (id == 1) ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id])
s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2], p[3]) if (ic == 0)
elseif (id == 2) b = p[1] + (lp.blk[id]-1)*lp.blkS[id]
s = CartesianIndex(p[1], mod1(p[2]+1, lp.iL[2]), p[3]) ic = mod(div(p[2]-1,lp.rbkS[id]),lp.rbk[id])
elseif (id == 3) if (ic == 0)
s = CartesianIndex(p[1], p[2], mod1(p[3]+1, lp.iL[3])) r = p[2] + (lp.rbk[id]-1)*lp.rbkS[id]
else
r = p[2] - lp.rbkS[id]
end
else
b = p[1] - lp.blkS[id]
r = p[2]
end end
return s
return b, r
end end
@inline function up(p::CartesianIndex{2}, id::Int64, lp::SpaceParm{2}) @inline function updw(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
if (id == 1) ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id])
s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2]) if (ic == lp.blk[id]-1)
elseif (id == 2) bu = p[1] - (lp.blk[id]-1)*lp.blkS[id]
s = CartesianIndex(p[1], mod1(p[2]+1, lp.iL[2])) bd = p[1] - lp.blkS[id]
ic = mod(div(p[2]-1,lp.rbkS[id]),lp.rbk[id])
if (ic == lp.rbk[id]-1)
ru = p[2] - (lp.rbk[id]-1)*lp.rbkS[id]
else
ru = p[2] + lp.rbkS[id]
end
rd = p[2]
elseif (ic == 0)
bu = p[1] + lp.blkS[id]
bd = p[1] + (lp.blk[id]-1)*lp.blkS[id]
ic = mod(div(p[2]-1,lp.rbkS[id]),lp.rbk[id])
if (ic == 0)
rd = p[2] + (lp.rbk[id]-1)*lp.rbkS[id]
else
rd = p[2] - lp.rbkS[id]
end
ru = p[2]
else
bu = p[1] + lp.blkS[id]
bd = p[1] - lp.blkS[id]
rd = p[2]
ru = p[2]
end end
return s
return bu, ru, bd, rd
end end
@inline map2latt(th::NTuple{3,Int64},bl::NTuple{3,Int64}) = CartesianIndex(th[1],bl[3],bl[1],bl[2]) export up, dw, updw
@inline function map2latt(th::NTuple{3,Int64},bl::NTuple{3,Int64}, lp)
i1 = mod1(th[1], lp.lblock[1]) + mod(bl[1], div(lp.iL[1],lp.lblock[1]))*lp.lblock[1]
i2 = div(th[1]-1, lp.lblock[1]) + 1 + div(bl[1]-1, div(lp.iL[1],lp.lblock[1]))*lp.lblock[2]
i3 = (bl[2] - 1) * lp.lblock[3] + th[2]
i4 = (bl[3] - 1) * lp.lblock[4] + th[3]
return CartesianIndex(i1,i2,i3,i4)
end
export map2latt, up, dw, shift
end end

View file

@ -24,7 +24,7 @@ end
export GaugeParm export GaugeParm
include("YMfields.jl") include("YMfields.jl")
export field, randomn!, zero!, norm2 export field, field_pln, randomn!, zero!, norm2
struct YMworkspace struct YMworkspace
frc1 frc1
@ -32,6 +32,7 @@ struct YMworkspace
mom mom
U1 U1
cm # complex of volume cm # complex of volume
rm # float of volume
function YMworkspace(::Type{T}, lp::SpaceParm) where {T <: Union{Group,Algebra}} function YMworkspace(::Type{T}, lp::SpaceParm) where {T <: Union{Group,Algebra}}
if (T == SU2) if (T == SU2)
@ -39,9 +40,6 @@ struct YMworkspace
f2 = field(SU2alg, lp) f2 = field(SU2alg, lp)
mm = field(SU2alg, lp) mm = field(SU2alg, lp)
u1 = field(SU2, lp) u1 = field(SU2, lp)
cs = zeros(ComplexF64,lp.iL...)
rs = zeros(Float64, lp.iL...)
return new(f1, f2, mm, u1, replace_storage(CuArray, cs))
end end
if (T == SU3) if (T == SU3)
@ -49,11 +47,11 @@ struct YMworkspace
f2 = field(SU3alg, lp) f2 = field(SU3alg, lp)
mm = field(SU3alg, lp) mm = field(SU3alg, lp)
u1 = field(SU3, lp) u1 = field(SU3, lp)
cs = zeros(ComplexF64,lp.iL...)
rs = zeros(Float64, lp.iL...)
return new(f1, f2, mm, u1, replace_storage(CuArray, cs))
end end
return nothing cs = CuArray{ComplexF64, 2}(undef, lp.bsz,lp.rsz)
rs = CuArray{Float64, 2}(undef, lp.bsz,lp.rsz)
return new(f1, f2, mm, u1, cs, rs)
end end
end end
export YMworkspace export YMworkspace

View file

@ -12,68 +12,171 @@
function krnl_plaq!(plx, U, ipl, lp::SpaceParm) function krnl_plaq!(plx, U, ipl, lp::SpaceParm)
id1, id2 = lp.plidx[ipl] id1, id2 = lp.plidx[ipl]
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z), b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z)) bu1, ru1 = up((b, r), id1, lp)
Xu1 = up(X, id1, lp) bu2, ru2 = up((b, r), id2, lp)
Xu2 = up(X, id2, lp)
@inbounds plx[X] = tr(U[X, id1]*U[Xu1, id2] / (U[X, id2]*U[Xu2, id1])) @inbounds plx[b, r] = tr(U[b,r,id1]*U[bu1,ru1,id2] / (U[b,r,id2]*U[bu2,ru2,id1]))
return nothing return nothing
end end
function krnl_plaq!(plx, U, lp::SpaceParm) function krnl_plaq!(plx, U, lp::SpaceParm)
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z), b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z)) if (b < 1) || (b > lp.bsz)
@cuprintln("Error b: %b, %r")
end
if (r < 1) || (r > lp.rsz)
@cuprintln("Error r: %b, %r")
end
@inbounds plx[X] = complex(0.0) plx[b,r] = complex(0.0)
for ipl in 1:lp.npls
id1, id2 = lp.plidx[ipl]
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
if (bu1 < 1) || (bu1 > lp.bsz)
@cuprintln("Error bu1: %b, %r, %id1")
end
if (ru1 < 1) || (ru1 > lp.rsz)
@cuprintln("Error ru1: %b, %r, %id1")
end
if (bu2 < 1) || (bu2 > lp.bsz)
@cuprintln("Error bu2: %b, %r, %id2")
end
if (ru2 < 1) || (ru2 > lp.rsz)
@cuprintln("Error ru2: %b, %r, %id2")
end
plx[b,r] += tr(U[r,id1,b]*U[bu1,id2,ru1] / (U[b,id2,r]*U[bu2,id1,ru2]))
end
return nothing
end
function krnl_force_wilson_pln!(frc1, frc2, U, ipl, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
@inbounds begin
id1, id2 = lp.plidx[ipl]
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
g1 = U[bu1,id2,ru1]/U[bu2,id1,ru2]
g2 = U[b,id2,r]\U[b,id1,r]
F1 = projalg(U[b,id1,r]*g1/U[b,id2,r])
F2 = projalg(g1*g2)
F3 = projalg(g2*g1)
frc1[b ,r,id1 ] -= F1
frc1[b ,r,id2 ] += F1
frc2[bu1,ru1,id2] -= F2
frc2[bu2,ru2,id1] += F3
end
return nothing
end
function krnl_force_wilson_nw!(fpl, U, lp::SpaceParm)
b = CUDA.threadIdx().x
ipl = mod1(CUDA.blockIdx().x, lp.npls)
r = div(CUDA.blockIdx().x-1, lp.npls)+1
@inbounds begin #for ipl in 1:lp.npls
id1, id2 = lp.plidx[ipl]
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
g1 = U[bu1,id2,ru1]/U[bu2,id1,ru2]
g2 = U[b,id2,r]\U[b,id1,r]
F1 = projalg(U[b,id1,r]*g1/U[b,id2,r])
F2 = projalg(g1*g2)
F3 = projalg(g2*g1)
fpl[b ,ipl, r ,1,1] = -F1
fpl[b ,ipl, r ,2,1] = F1
fpl[bu1,ipl, ru1,2,2] = -F2
fpl[bu2,ipl, ru2,1,2] = F3
end
return nothing
end
function krnl_force_wilson!(frc, U, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
@inbounds for id1 in 1:lp.ndim
bu1, ru1, bd1, rd1 = updw((b, r), id1, lp)
@inbounds for id2 in id1+1:lp.ndim
bu2, ru2, bd2, rd2 = updw((b, r), id2, lp)
bud, rud = dw((bu1,ru1), id2, lp)
bdu, rdu = dw((bu2,ru2), id1, lp)
F1 = projalg(U[b,id1,r]*U[bu1,id2,ru1]/(U[b,id2,r]*U[bu2,id1,ru2]))
F2 = projalg((U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1])
F3 = projalg((U[bd2,id2,rd2]\U[bd2,id1,rd2])*(U[bud,id2,rud]/U[b,id1,r]))
frc[b,r,id1] -= F1 - F3
frc[b,r,id2] += F1 - F2
end
end
return nothing
end
function krnl_add_force_plns!(frc, fpl, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
@inbounds for ipl in 1:lp.npls @inbounds for ipl in 1:lp.npls
id1, id2 = lp.plidx[ipl] id1, id2 = lp.plidx[ipl]
Xu1 = up(X, id1, lp) frc[b,r,id1] += fpl[b,ipl,r,1,1] + fpl[b,ipl,r,1,2]
Xu2 = up(X, id2, lp) frc[b,r,id2] += fpl[b,ipl,r,2,1] + fpl[b,ipl,r,2,2]
plx[X] += tr(U[X, id1]*U[Xu1, id2] / (U[X, id2]*U[Xu2, id1]))
end
return nothing
end
function krnl_force_wilson_pln!(frc1, frc2, U, ipl, lp::SpaceParm, gp::GaugeParm)
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
id1, id2 = lp.plidx[ipl]
Xu1 = up(X, id1, lp)
Xu2 = up(X, id2, lp)
a = U[Xu1,id2]/U[Xu2,id1]
b = U[X ,id2]\U[X ,id1]
F1 = projalg(U[X,id1]*a/U[X,id2])
F2 = projalg(a*b)
F3 = projalg(b*a)
@inbounds begin
frc1[X ,id1] -= F1
frc1[X ,id2] += F1
frc2[Xu1,id2] -= F2
frc2[Xu2,id1] += F3
end end
return nothing return nothing
end end
function force0_wilson!(frc1, frc2, U, lp::SpaceParm, gp::GaugeParm, kp::KernelParm) function force0_wilson_pln!(frc1, frc2, U, lp::SpaceParm)
zero!(frc1) zero!(frc1, lp)
zero!(frc2) zero!(frc2, lp)
for ipl in 1:lp.npls for i in 1:lp.npls
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_force_wilson_pln!(frc1,frc2,U,ipl,lp,gp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_force_wilson_pln!(frc1,frc2,U,i,lp)
end end
end end
return nothing return nothing
end end
function force0_wilson!(frc1, U, lp::SpaceParm)
zero!(frc1, lp)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_force_wilson!(frc1,U,lp)
end
return nothing
end
function force0_wilson_nw!(frc1, fpln, U, lp::SpaceParm)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz*lp.npls krnl_force_wilson_nw!(fpln,U,lp)
end
zero!(frc1, lp)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_force_plns!(frc1, fpln,lp)
end
return nothing
end

View file

@ -9,154 +9,198 @@
### created: Thu Jul 15 15:16:47 2021 ### created: Thu Jul 15 15:16:47 2021
### ###
un(t) = t <: Union{Group, Complex}
function field(::Type{T}, lp::SpaceParm) where {T <: Union{Group, Algebra}} function field(::Type{T}, lp::SpaceParm) where {T <: Union{Group, Algebra}}
sz = lp.iL..., lp.ndim sz = lp.bsz, lp.ndim, lp.rsz
if (T == SU2) if (T == SU2)
As = StructArray{SU2}((ones(ComplexF64, sz), zeros(ComplexF64, sz))) # As = StructArray{SU2}(undef, sz, unwrap=un)
return CuArray{SU2, 3}(undef, sz)
elseif (T == SU2alg) elseif (T == SU2alg)
As = StructArray{SU2alg}((zeros(Float64, sz), # As = StructArray{SU2alg}(undef, sz, unwrap=un)
zeros(Float64, sz), return CuArray{SU2alg, 3}(undef, sz)
zeros(Float64, sz)))
elseif (T == SU3) elseif (T == SU3)
As = StructArray{SU3}((ones(ComplexF64, sz), zeros(ComplexF64, sz), zeros(ComplexF64, sz), zeros(ComplexF64, sz), ones(ComplexF64, sz), zeros(ComplexF64, sz))) # As = StructArray{SU3}(undef, sz, unwrap=un)
return CuArray{SU3, 3}(undef, sz)
# As = Array{SU3, lp.ndim+1}(undef, sz) # As = Array{SU3, lp.ndim+1}(undef, sz)
# CUDA.@sync begin
# CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_SU3_zero!(As, lp)
# end
elseif (T == SU3alg) elseif (T == SU3alg)
As = StructArray{SU3alg}((zeros(Float64, sz), # As = StructArray{SU3alg}(undef, sz, unwrap=un)
zeros(Float64, sz), return CuArray{SU3alg, 3}(undef, sz)
zeros(Float64, sz),
zeros(Float64, sz),
zeros(Float64, sz),
zeros(Float64, sz),
zeros(Float64, sz),
zeros(Float64, sz)))
# As = Array{SU3alg, lp.ndim+1}(undef, sz) # As = Array{SU3alg, lp.ndim+1}(undef, sz)
# CUDA.@sync begin
# CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_SU3alg_zero!(As, lp)
# end
end end
return replace_storage(CuArray, As) return replace_storage(CuArray, As)
end end
function randomn!(X) function krnl_SU3_one!(G, lp::SpaceParm)
if (eltype(X) == SU2alg) b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
randn!(CURAND.default_rng(), LazyRows(X).t1)
randn!(CURAND.default_rng(), LazyRows(X).t2)
randn!(CURAND.default_rng(), LazyRows(X).t3)
elseif (eltype(X) == SU3alg)
randn!(CURAND.default_rng(), LazyRows(X).t1)
randn!(CURAND.default_rng(), LazyRows(X).t2)
randn!(CURAND.default_rng(), LazyRows(X).t3)
randn!(CURAND.default_rng(), LazyRows(X).t4)
randn!(CURAND.default_rng(), LazyRows(X).t5)
randn!(CURAND.default_rng(), LazyRows(X).t6)
randn!(CURAND.default_rng(), LazyRows(X).t7)
randn!(CURAND.default_rng(), LazyRows(X).t8)
end
return nothing
end
function zero!(X)
if (eltype(X) == SU2alg)
fill!(LazyRows(X).t1, 0.0)
fill!(LazyRows(X).t2, 0.0)
fill!(LazyRows(X).t3, 0.0)
end
if (eltype(X) == SU3alg)
fill!(LazyRows(X).t1, 0.0)
fill!(LazyRows(X).t2, 0.0)
fill!(LazyRows(X).t3, 0.0)
fill!(LazyRows(X).t4, 0.0)
fill!(LazyRows(X).t5, 0.0)
fill!(LazyRows(X).t6, 0.0)
fill!(LazyRows(X).t7, 0.0)
fill!(LazyRows(X).t8, 0.0)
# CUDA.@sync begin
# CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_SU3alg_zero!(X, lp)
# end
end
if (eltype(X) == SU2)
fill!(LazyRows(X).t1, complex(1.0))
fill!(LazyRows(X).t2, complex(0.0))
end
if (eltype(X) == SU3)
fill!(LazyRows(X).u11, complex(1.0))
fill!(LazyRows(X).u12, complex(0.0))
fill!(LazyRows(X).u13, complex(0.0))
fill!(LazyRows(X).u21, complex(0.0))
fill!(LazyRows(X).u22, complex(1.0))
fill!(LazyRows(X).u23, complex(0.0))
# CUDA.@sync begin
# CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_SU3_zero!(X, lp)
# end
end
return nothing
end
function norm2(X)
d = 0.0
if (eltype(X) == SU2alg)
d = CUDA.mapreduce(x->x^2, +, LazyRows(X).t1) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t2) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t3)
elseif (eltype(X) == SU3alg)
d = CUDA.mapreduce(x->x^2, +, LazyRows(X).t1) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t2) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t3) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t4) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t5) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t6) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t7) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t8)
# d = CUDA.mapreduce(norm2, +, X)
end
return d
end
function krnl_SU3_zero!(G, lp::SpaceParm)
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
for id in 1:lp.ndim for id in 1:lp.ndim
G[X,id].u11 = complex(1.0) G[b,id,r] = SU3(1.0,0.0,0.0,0.0,1.0,0.0)
G[X,id].u12 = complex(0.0) end
G[X,id].u13 = complex(0.0) return nothing
G[X,id].u21 = complex(0.0) end
G[X,id].u22 = complex(1.0)
G[X,id].u23 = complex(0.0) function krnl_SU2_one!(G, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
for id in 1:lp.ndim
G[b,id,r] = SU2(1.0,0.0)
end end
return nothing return nothing
end end
function krnl_SU3alg_zero!(G, lp::SpaceParm) function krnl_SU3alg_zero!(G, lp::SpaceParm)
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z), b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
for id in 1:lp.ndim for id in 1:lp.ndim
G[X,id].t1 = 0.0 G[b,id,r] = SU3alg(0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0)
G[X,id].t2 = 0.0
G[X,id].t3 = 0.0
G[X,id].t4 = 0.0
G[X,id].t5 = 0.0
G[X,id].t6 = 0.0
G[X,id].t7 = 0.0
G[X,id].t8 = 0.0
end end
return nothing return nothing
end end
function krnl_SU2alg_zero!(G, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
for id in 1:lp.ndim
G[b,id,r] = SU2alg(0.0,0.0,0.0)
end
return nothing
end
function krnl_SU3alg_assign!(G, M, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
for id in 1:lp.ndim
G[b,id,r] = SU3alg(M[b,id,r,1], M[b,id,r,2], M[b,id,r,3], M[b,id,r,4],
M[b,id,r,5], M[b,id,r,6], M[b,id,r,7], M[b,id,r,8])
end
return nothing
end
function krnl_SU2alg_assign!(G, M, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
for id in 1:lp.ndim
G[b,id,r] = SU2alg(M[b,id,r,1], M[b,id,r,2], M[b,id,r,3])
end
return nothing
end
function randomn!(X, lp)
if (eltype(X) == SU2alg)
# randn!(CURAND.default_rng(), LazyRows(X).t1)
# randn!(CURAND.default_rng(), LazyRows(X).t2)
# randn!(CURAND.default_rng(), LazyRows(X).t3)
M = CuArray{Float64}(undef, lp.bsz, lp.ndim, lp.rsz, 3)
randn!(CURAND.default_rng(), M)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_SU2alg_assign!(X, M, lp)
end
elseif (eltype(X) == SU3alg)
# randn!(CURAND.default_rng(), LazyRows(X).t1)
# randn!(CURAND.default_rng(), LazyRows(X).t2)
# randn!(CURAND.default_rng(), LazyRows(X).t3)
# randn!(CURAND.default_rng(), LazyRows(X).t4)
# randn!(CURAND.default_rng(), LazyRows(X).t5)
# randn!(CURAND.default_rng(), LazyRows(X).t6)
# randn!(CURAND.default_rng(), LazyRows(X).t7)
# randn!(CURAND.default_rng(), LazyRows(X).t8)
M = CuArray{Float64}(undef, lp.bsz, lp.ndim, lp.rsz, 8)
randn!(CURAND.default_rng(), M)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_SU3alg_assign!(X, M, lp)
end
end
return nothing
end
function zero!(X, lp)
if (eltype(X) == SU2alg)
# fill!(LazyRows(X).t1, 0.0)
# fill!(LazyRows(X).t2, 0.0)
# fill!(LazyRows(X).t3, 0.0)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_SU2alg_zero!(X, lp)
end
end
if (eltype(X) == SU3alg)
# fill!(LazyRows(X).t1, 0.0)
# fill!(LazyRows(X).t2, 0.0)
# fill!(LazyRows(X).t3, 0.0)
# fill!(LazyRows(X).t4, 0.0)
# fill!(LazyRows(X).t5, 0.0)
# fill!(LazyRows(X).t6, 0.0)
# fill!(LazyRows(X).t7, 0.0)
# fill!(LazyRows(X).t8, 0.0)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_SU3alg_zero!(X, lp)
end
end
if (eltype(X) == SU2)
# fill!(LazyRows(X).t1.re, 1.0)
# fill!(LazyRows(X).t1.im, 0.0)
# fill!(LazyRows(X).t2.re, 0.0)
# fill!(LazyRows(X).t2.im, 0.0)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_SU2_one!(X, lp)
end
end
if (eltype(X) == SU3)
# fill!(LazyRows(X).u11.re, 1.0)
# fill!(LazyRows(X).u11.im, 0.0)
# fill!(LazyRows(X).u12.re, 0.0)
# fill!(LazyRows(X).u12.im, 0.0)
# fill!(LazyRows(X).u13.re, 0.0)
# fill!(LazyRows(X).u13.im, 0.0)
# fill!(LazyRows(X).u21.re, 0.0)
# fill!(LazyRows(X).u21.im, 0.0)
# fill!(LazyRows(X).u22.re, 1.0)
# fill!(LazyRows(X).u22.im, 0.0)
# fill!(LazyRows(X).u23.re, 0.0)
# fill!(LazyRows(X).u23.im, 0.0)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_SU3_one!(X, lp)
end
end
return nothing
end
function norm_field(X)
return CUDA.mapreduce(norm2, +, X)
# d = 0.0
if (eltype(X) == SU2alg)
# d = CUDA.mapreduce(x->x^2, +, LazyRows(X).t1) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t2) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t3)
elseif (eltype(X) == SU3alg)
# d = CUDA.mapreduce(x->x^2, +, LazyRows(X).t1) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t2) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t3) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t4) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t5) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t6) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t7) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t8)
d = CUDA.mapreduce(norm2, +, X)
end
return d
end

View file

@ -9,10 +9,10 @@
### created: Thu Jul 15 11:27:28 2021 ### created: Thu Jul 15 11:27:28 2021
### ###
function gauge_action(U, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMworkspace) function gauge_action(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_plaq!(ymws.cm, U, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp)
end end
S = gp.beta*( prod(lp.iL)*lp.npls - S = gp.beta*( prod(lp.iL)*lp.npls -
CUDA.mapreduce(real, +, ymws.cm)/gp.ng ) CUDA.mapreduce(real, +, ymws.cm)/gp.ng )
@ -20,32 +20,34 @@ function gauge_action(U, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMw
return S return S
end end
function plaquette(U, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMworkspace) function plaquette(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
println("Entro!")
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_plaq!(ymws.cm, U, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp)
end end
println("Salgo!")
return CUDA.mapreduce(real, +, real.(ymws.cm))/(prod(lp.iL)*lp.npls) return CUDA.mapreduce(real, +, ymws.cm)/(prod(lp.iL)*lp.npls)
end end
function hamiltonian(mom, U, lp, gp, kp, ymws) function hamiltonian(mom, U, lp, gp, ymws)
K = norm2(mom)/2.0 K = norm_field(mom)/2.0
V = gauge_action(U, lp, gp, kp, ymws) V = gauge_action(U, lp, gp, ymws)
println("K: ", K, " V: ", V) println("K: ", K, " V: ", V)
return K+V return K+V
end end
function HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMworkspace; noacc=false) function HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace; noacc=false)
ymws.U1 .= U ymws.U1 .= U
randomn!(ymws.mom) randomn!(ymws.mom, lp)
hini = hamiltonian(ymws.mom, U, lp, gp, kp, ymws) hini = hamiltonian(ymws.mom, U, lp, gp, ymws)
OMF4!(ymws.mom, U, eps, ns, lp, gp, kp, ymws) OMF4!(ymws.mom, U, eps, ns, lp, gp, ymws)
dh = hamiltonian(ymws.mom, U, lp, gp, kp, ymws) - hini dh = hamiltonian(ymws.mom, U, lp, gp, ymws) - hini
pacc = exp(-dh) pacc = exp(-dh)
acc = true acc = true
@ -66,18 +68,17 @@ end
function krnl_updt!(mom, frc1, frc2, eps1, U, eps2, lp::SpaceParm) function krnl_updt!(mom, frc1, frc2, eps1, U, eps2, lp::SpaceParm)
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z), b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
@inbounds for id in 1:lp.ndim @inbounds for id in 1:lp.ndim
mom[X,id] = mom[X,id] + eps1 * (frc1[X,id]+frc2[X,id]) mom[b,id,r] = mom[b,id,r] + eps1 * (frc1[b,id,r]+frc2[b,id,r])
U[X,id] = expm(U[X,id],mom[X,id], eps2) U[b,id,r] = expm(U[b,id,r], mom[b,id,r], eps2)
end end
return nothing return nothing
end end
function OMF4!(mom, U, eps, ns, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMworkspace) function OMF4!(mom, U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
r1::Float64 = 0.08398315262876693 r1::Float64 = 0.08398315262876693
r2::Float64 = 0.2539785108410595 r2::Float64 = 0.2539785108410595
@ -86,43 +87,78 @@ function OMF4!(mom, U, eps, ns, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ym
r5::Float64 = 0.5-r1-r3 r5::Float64 = 0.5-r1-r3
r6::Float64 = 1.0-2.0*(r2+r4) r6::Float64 = 1.0-2.0*(r2+r4)
# ee = eps*gp.beta/gp.ng
# @device_code_warntype force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp)
# force0_wilson_pln!(ymws.frc1, ymws.frc2, U, lp)
# zero!(ymws.frc2, lp)
# for i in 1:ns
# # STEP 1
# CUDA.@sync begin
# CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r1*ee, U, eps*r2, lp)
# end
#
# # STEP 2
# force0_wilson_pln!(ymws.frc1, ymws.frc2, U, lp)
# CUDA.@sync begin
# CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r3*ee, U, eps*r4, lp)
# end
#
# # STEP 3
# force0_wilson_pln!(ymws.frc1, ymws.frc2, U, lp)
# CUDA.@sync begin
# CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r5*ee, U, eps*r6, lp)
# end
#
# # STEP 4
# force0_wilson_pln!(ymws.frc1, ymws.frc2, U, lp)
# CUDA.@sync begin
# CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r5*ee, U, eps*r4, lp)
# end
#
# # STEP 5
# force0_wilson_pln!(ymws.frc1, ymws.frc2, U, lp)
# CUDA.@sync begin
# CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r3*ee, U, eps*r2, lp)
# end
#
# # STEP 6
# force0_wilson_pln!(ymws.frc1, ymws.frc2, U, lp)
# CUDA.@sync begin
# CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r1*ee, U, 0.0, lp)
# end
# end
ee = eps*gp.beta/gp.ng ee = eps*gp.beta/gp.ng
force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp) force0_wilson!(ymws.frc1, U, lp)
zero!(ymws.frc2, lp)
for i in 1:ns for i in 1:ns
# STEP 1 # STEP 1
CUDA.@sync begin mom .= mom .+ (r1*ee) .* (ymws.frc1 .+ ymws.frc2)
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r1*ee, U, eps*r2, lp) U .= expm.(U, mom, eps*r2)
end
# STEP 2 # STEP 2
force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp) force0_wilson!(ymws.frc1, U, lp)
CUDA.@sync begin mom .= mom .+ (r3*ee) .* (ymws.frc1 .+ ymws.frc2)
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r3*ee, U, eps*r4, lp) U .= expm.(U, mom, eps*r4)
end
# STEP 3 # STEP 3
force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp) force0_wilson!(ymws.frc1, U, lp)
CUDA.@sync begin mom .= mom .+ (r5*ee) .* (ymws.frc1 .+ ymws.frc2)
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r5*ee, U, eps*r6, lp) U .= expm.(U, mom, eps*r6)
end
# STEP 4 # STEP 4
force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp) force0_wilson!(ymws.frc1, U, lp)
CUDA.@sync begin mom .= mom .+ (r5*ee) .* (ymws.frc1 .+ ymws.frc2)
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r5*ee, U, eps*r4, lp) U .= expm.(U, mom, eps*r4)
end
# STEP 5 # STEP 5
force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp) force0_wilson!(ymws.frc1, U, lp)
CUDA.@sync begin mom .= mom .+ (r3*ee) .* (ymws.frc1 .+ ymws.frc2)
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r3*ee, U, eps*r2, lp) U .= expm.(U, mom, eps*r2)
end
# STEP 6 # STEP 6
force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp) force0_wilson!(ymws.frc1, U, lp)
CUDA.@sync begin mom .= mom .+ (r1*ee) .* (ymws.frc1 .+ ymws.frc2)
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r1*ee, U, 0.0, lp)
end
end end
return nothing return nothing