Last version

This commit is contained in:
Alberto Ramos 2021-09-04 14:16:22 +02:00
parent c378648508
commit 76d0b66b4b
9 changed files with 515 additions and 322 deletions

View file

@ -68,6 +68,14 @@ Base.:*(a::SU2alg,b::Number) = SU2alg(a.t1*b,a.t2*b,a.t3*b)
Base.:*(b::Number,a::SU2alg) = SU2alg(a.t1*b,a.t2*b,a.t3*b)
Base.:/(a::SU2alg,b::Number) = SU2alg(a.t1/b,a.t2/b,a.t3/b)
function isgroup(a::SU2)
tol = 1.0E-10
if (abs2(a.t1) + abs2(a.t2) - 1.0 < 1.0E-10)
return true
else
return false
end
end
"""
function Base.exp(a::SU2alg, t::Number=1)
@ -155,3 +163,5 @@ function expm(g::SU2, a::SU2alg, t::Float64)
return SU2(t1,t2)
end
export SU2, SU2alg, inverse, dag, tr, projalg, expm, exp, norm, norm2, isgroup

View file

@ -79,8 +79,21 @@ function Base.:\(a::SU3,b::SU3)
end
function isgroup(a::SU3)
tol = 1.0E-10
g = a/a
if ( (abs(g.u11 - 1.0) < tol) &&
(abs(g.u12) < tol) &&
(abs(g.u13) < tol) &&
(abs(g.u21) < tol) &&
(abs(g.u22 - 1.0) < tol) &&
(abs(g.u23) < tol) )
return true
else
return false
end
end
struct SU3alg <: Algebra
t1::Float64
@ -122,7 +135,7 @@ Base.:/(a::SU3alg,b::Number) = SU3alg(a.t1/b,a.t2/b,a.t3/b,a.t4/b,a.t5/b,a.t6/b,
export SU3, SU3alg, inverse, dag, tr, projalg, expm, exp, norm, norm2
export SU3, SU3alg, inverse, dag, tr, projalg, expm, exp, norm, norm2, isgroup
struct M3x3
u11::ComplexF64

View file

@ -23,7 +23,7 @@ export SU2, SU2alg
include("GroupSU3.jl")
export SU3, SU3alg
export dot, expm, exp, dag, normalize, inverse, tr, projalg, norm, norm2
export dot, expm, exp, dag, normalize, inverse, tr, projalg, norm, norm2, isgroup
end # module

View file

@ -17,19 +17,19 @@ include("Groups/Groups.jl")
using .Groups
export Group, Algebra
export SU2, SU2alg, SU3, SU3alg
export dot, expm, exp, dag, normalize, inverse, tr, projalg, norm, norm2
export dot, expm, exp, dag, normalize, inverse, tr, projalg, norm, norm2, isgroup
include("Space/Space.jl")
using .Space
export SpaceParm, KernelParm
export map2latt, up, dw, shift
export SpaceParm
export up, dw, updw
include("YM/YM.jl")
using .YM
export YMworkspace, GaugeParm, force0_wilson!, field, randomn!, zero!, norm2
export YMworkspace, GaugeParm, force0_wilson!, field, field_pln, randomn!, zero!, norm2
export gauge_action, hamiltonian, plaquette, HMC!, OMF4!
end # module

View file

@ -16,11 +16,20 @@ struct SpaceParm{N,M}
ndim::Int64
iL::NTuple{N,Int64}
npls::Int64
plidx::NTuple{M, Tuple{Int64, Int64}}
plidx::NTuple{M,Tuple{Int64, Int64}}
function SpaceParm{N}(x) where {N}
blk::NTuple{N,Int64}
blkS::NTuple{N,Int64}
rbk::NTuple{N,Int64}
rbkS::NTuple{N,Int64}
bsz::Int64
rsz::Int64
function SpaceParm{N}(x, y) where {N}
M = convert(Int64, round(N*(N-1)/2))
N == length(x) || throw(ArgumentError("Tuple of incorrect length for dimension $N"))
N == length(x) || throw(ArgumentError("Lattice size incorrect length for dimension $N"))
N == length(y) || throw(ArgumentError("Block size incorrect length for dimension $N"))
pls = Vector{Tuple{Int64, Int64}}()
for i in 1:N
@ -28,120 +37,100 @@ struct SpaceParm{N,M}
push!(pls, (i,j))
end
end
return new{N,M}(N, x, M, tuple(pls...))
r = div.(x, y)
rS = ones(N)
yS = ones(N)
for i in 2:N
for j in 1:i-1
rS[i] = rS[i]*r[j]
yS[i] = yS[i]*y[j]
end
end
return new{N,M}(N, x, M, tuple(pls...), y,
tuple(yS...), tuple(r...), tuple(rS...), prod(y), prod(r))
end
end
export SpaceParm
struct KernelParm
threads::Tuple{Int64,Int64,Int64}
blocks::Tuple{Int64,Int64,Int64}
end
export KernelParm
@inline function up(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
@inline shift(p::CartesianIndex{4}, sh::CartesianIndex{4}, lp::SpaceParm{4}) = CartesianIndex(mod1(p[1]+sh[1], lp.iL[1]),
mod1(p[2]+sh[2], lp.iL[2]),
mod1(p[3]+sh[3], lp.iL[3]),
mod1(p[4]+sh[4], lp.iL[4]))
@inline shift(p::CartesianIndex{3}, sh::CartesianIndex{3}, lp::SpaceParm{3}) = CartesianIndex(mod1(p[1]+sh[1], lp.iL[1]),
mod1(p[2]+sh[2], lp.iL[2]),
mod1(p[3]+sh[3], lp.iL[3]))
@inline shift(p::CartesianIndex{2}, sh::CartesianIndex{2}, lp::SpaceParm{2}) = CartesianIndex(mod1(p[1]+sh[1], lp.iL[1]),
mod1(p[2]+sh[2], lp.iL[2]))
@inline function dw(p::CartesianIndex{4}, id, lp::SpaceParm{4})
ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id])
if (ic == lp.blk[id]-1)
b = p[1] - (lp.blk[id]-1)*lp.blkS[id]
if (id == 1)
s = CartesianIndex(mod1(p[1]-1, lp.iL[1]), p[2], p[3], p[4])
elseif (id == 2)
s = CartesianIndex(p[1], mod1(p[2]-1, lp.iL[2]), p[3], p[4])
elseif (id == 3)
s = CartesianIndex(p[1], p[2], mod1(p[3]-1, lp.iL[3]), p[4])
elseif (id == 4)
s = CartesianIndex(p[1], p[2], p[3], mod1(p[4]-1, lp.iL[4]))
end
return s
end
@inline function dw(p::CartesianIndex{3}, id, lp::SpaceParm{3})
if (id == 1)
s = CartesianIndex(mod1(p[1]-1, lp.iL[1]), p[2], p[3])
elseif (id == 2)
s = CartesianIndex(p[1], mod1(p[2]-1, lp.iL[2]), p[3])
elseif (id == 3)
s = CartesianIndex(p[1], p[2], mod1(p[3]-1, lp.iL[3]))
end
return s
end
@inline function dw(p::CartesianIndex{2}, id, lp::SpaceParm{2})
if (id == 1)
s = CartesianIndex(mod1(p[1]-1, lp.iL[1]), p[2])
elseif (id == 2)
s = CartesianIndex(p[1], mod1(p[2]-1, lp.iL[2]))
end
return s
end
@inline function up(p::CartesianIndex{4}, id::Int64, lp::SpaceParm{4})
if (id == 1)
s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2], p[3], p[4])
elseif (id == 2)
s = CartesianIndex(p[1], mod1(p[2]+1, lp.iL[2]), p[3], p[4])
elseif (id == 3)
s = CartesianIndex(p[1], p[2], mod1(p[3]+1, lp.iL[3]), p[4])
elseif (id == 4)
s = CartesianIndex(p[1], p[2], p[3], mod1(p[4]+1, lp.iL[4]))
ic = mod(div(p[2]-1,lp.rbkS[id]),lp.rbk[id])
if (ic == lp.rbk[id]-1)
r = p[2] - (lp.rbk[id]-1)*lp.rbkS[id]
else
r = p[2] + lp.rbkS[id]
end
else
b = p[1] + lp.blkS[id]
r = p[2]
end
return s
return b, r
end
@inline function up(p::CartesianIndex{3}, id::Int64, lp::SpaceParm{3})
@inline function dw(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
if (id == 1)
s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2], p[3])
elseif (id == 2)
s = CartesianIndex(p[1], mod1(p[2]+1, lp.iL[2]), p[3])
elseif (id == 3)
s = CartesianIndex(p[1], p[2], mod1(p[3]+1, lp.iL[3]))
ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id])
if (ic == 0)
b = p[1] + (lp.blk[id]-1)*lp.blkS[id]
ic = mod(div(p[2]-1,lp.rbkS[id]),lp.rbk[id])
if (ic == 0)
r = p[2] + (lp.rbk[id]-1)*lp.rbkS[id]
else
r = p[2] - lp.rbkS[id]
end
else
b = p[1] - lp.blkS[id]
r = p[2]
end
return s
return b, r
end
@inline function up(p::CartesianIndex{2}, id::Int64, lp::SpaceParm{2})
@inline function updw(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
if (id == 1)
s = CartesianIndex(mod1(p[1]+1, lp.iL[1]), p[2])
elseif (id == 2)
s = CartesianIndex(p[1], mod1(p[2]+1, lp.iL[2]))
ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id])
if (ic == lp.blk[id]-1)
bu = p[1] - (lp.blk[id]-1)*lp.blkS[id]
bd = p[1] - lp.blkS[id]
ic = mod(div(p[2]-1,lp.rbkS[id]),lp.rbk[id])
if (ic == lp.rbk[id]-1)
ru = p[2] - (lp.rbk[id]-1)*lp.rbkS[id]
else
ru = p[2] + lp.rbkS[id]
end
rd = p[2]
elseif (ic == 0)
bu = p[1] + lp.blkS[id]
bd = p[1] + (lp.blk[id]-1)*lp.blkS[id]
ic = mod(div(p[2]-1,lp.rbkS[id]),lp.rbk[id])
if (ic == 0)
rd = p[2] + (lp.rbk[id]-1)*lp.rbkS[id]
else
rd = p[2] - lp.rbkS[id]
end
ru = p[2]
else
bu = p[1] + lp.blkS[id]
bd = p[1] - lp.blkS[id]
rd = p[2]
ru = p[2]
end
return s
return bu, ru, bd, rd
end
@inline map2latt(th::NTuple{3,Int64},bl::NTuple{3,Int64}) = CartesianIndex(th[1],bl[3],bl[1],bl[2])
@inline function map2latt(th::NTuple{3,Int64},bl::NTuple{3,Int64}, lp)
i1 = mod1(th[1], lp.lblock[1]) + mod(bl[1], div(lp.iL[1],lp.lblock[1]))*lp.lblock[1]
i2 = div(th[1]-1, lp.lblock[1]) + 1 + div(bl[1]-1, div(lp.iL[1],lp.lblock[1]))*lp.lblock[2]
i3 = (bl[2] - 1) * lp.lblock[3] + th[2]
i4 = (bl[3] - 1) * lp.lblock[4] + th[3]
return CartesianIndex(i1,i2,i3,i4)
end
export map2latt, up, dw, shift
export up, dw, updw
end

View file

@ -24,7 +24,7 @@ end
export GaugeParm
include("YMfields.jl")
export field, randomn!, zero!, norm2
export field, field_pln, randomn!, zero!, norm2
struct YMworkspace
frc1
@ -32,6 +32,7 @@ struct YMworkspace
mom
U1
cm # complex of volume
rm # float of volume
function YMworkspace(::Type{T}, lp::SpaceParm) where {T <: Union{Group,Algebra}}
if (T == SU2)
@ -39,9 +40,6 @@ struct YMworkspace
f2 = field(SU2alg, lp)
mm = field(SU2alg, lp)
u1 = field(SU2, lp)
cs = zeros(ComplexF64,lp.iL...)
rs = zeros(Float64, lp.iL...)
return new(f1, f2, mm, u1, replace_storage(CuArray, cs))
end
if (T == SU3)
@ -49,11 +47,11 @@ struct YMworkspace
f2 = field(SU3alg, lp)
mm = field(SU3alg, lp)
u1 = field(SU3, lp)
cs = zeros(ComplexF64,lp.iL...)
rs = zeros(Float64, lp.iL...)
return new(f1, f2, mm, u1, replace_storage(CuArray, cs))
end
return nothing
cs = CuArray{ComplexF64, 2}(undef, lp.bsz,lp.rsz)
rs = CuArray{Float64, 2}(undef, lp.bsz,lp.rsz)
return new(f1, f2, mm, u1, cs, rs)
end
end
export YMworkspace

View file

@ -12,68 +12,171 @@
function krnl_plaq!(plx, U, ipl, lp::SpaceParm)
id1, id2 = lp.plidx[ipl]
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
Xu1 = up(X, id1, lp)
Xu2 = up(X, id2, lp)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
@inbounds plx[X] = tr(U[X, id1]*U[Xu1, id2] / (U[X, id2]*U[Xu2, id1]))
@inbounds plx[b, r] = tr(U[b,r,id1]*U[bu1,ru1,id2] / (U[b,r,id2]*U[bu2,ru2,id1]))
return nothing
end
function krnl_plaq!(plx, U, lp::SpaceParm)
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
if (b < 1) || (b > lp.bsz)
@cuprintln("Error b: %b, %r")
end
if (r < 1) || (r > lp.rsz)
@cuprintln("Error r: %b, %r")
end
@inbounds plx[X] = complex(0.0)
plx[b,r] = complex(0.0)
for ipl in 1:lp.npls
id1, id2 = lp.plidx[ipl]
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
if (bu1 < 1) || (bu1 > lp.bsz)
@cuprintln("Error bu1: %b, %r, %id1")
end
if (ru1 < 1) || (ru1 > lp.rsz)
@cuprintln("Error ru1: %b, %r, %id1")
end
if (bu2 < 1) || (bu2 > lp.bsz)
@cuprintln("Error bu2: %b, %r, %id2")
end
if (ru2 < 1) || (ru2 > lp.rsz)
@cuprintln("Error ru2: %b, %r, %id2")
end
plx[b,r] += tr(U[r,id1,b]*U[bu1,id2,ru1] / (U[b,id2,r]*U[bu2,id1,ru2]))
end
return nothing
end
function krnl_force_wilson_pln!(frc1, frc2, U, ipl, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
@inbounds begin
id1, id2 = lp.plidx[ipl]
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
g1 = U[bu1,id2,ru1]/U[bu2,id1,ru2]
g2 = U[b,id2,r]\U[b,id1,r]
F1 = projalg(U[b,id1,r]*g1/U[b,id2,r])
F2 = projalg(g1*g2)
F3 = projalg(g2*g1)
frc1[b ,r,id1 ] -= F1
frc1[b ,r,id2 ] += F1
frc2[bu1,ru1,id2] -= F2
frc2[bu2,ru2,id1] += F3
end
return nothing
end
function krnl_force_wilson_nw!(fpl, U, lp::SpaceParm)
b = CUDA.threadIdx().x
ipl = mod1(CUDA.blockIdx().x, lp.npls)
r = div(CUDA.blockIdx().x-1, lp.npls)+1
@inbounds begin #for ipl in 1:lp.npls
id1, id2 = lp.plidx[ipl]
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
g1 = U[bu1,id2,ru1]/U[bu2,id1,ru2]
g2 = U[b,id2,r]\U[b,id1,r]
F1 = projalg(U[b,id1,r]*g1/U[b,id2,r])
F2 = projalg(g1*g2)
F3 = projalg(g2*g1)
fpl[b ,ipl, r ,1,1] = -F1
fpl[b ,ipl, r ,2,1] = F1
fpl[bu1,ipl, ru1,2,2] = -F2
fpl[bu2,ipl, ru2,1,2] = F3
end
return nothing
end
function krnl_force_wilson!(frc, U, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
@inbounds for id1 in 1:lp.ndim
bu1, ru1, bd1, rd1 = updw((b, r), id1, lp)
@inbounds for id2 in id1+1:lp.ndim
bu2, ru2, bd2, rd2 = updw((b, r), id2, lp)
bud, rud = dw((bu1,ru1), id2, lp)
bdu, rdu = dw((bu2,ru2), id1, lp)
F1 = projalg(U[b,id1,r]*U[bu1,id2,ru1]/(U[b,id2,r]*U[bu2,id1,ru2]))
F2 = projalg((U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1])
F3 = projalg((U[bd2,id2,rd2]\U[bd2,id1,rd2])*(U[bud,id2,rud]/U[b,id1,r]))
frc[b,r,id1] -= F1 - F3
frc[b,r,id2] += F1 - F2
end
end
return nothing
end
function krnl_add_force_plns!(frc, fpl, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
@inbounds for ipl in 1:lp.npls
id1, id2 = lp.plidx[ipl]
Xu1 = up(X, id1, lp)
Xu2 = up(X, id2, lp)
plx[X] += tr(U[X, id1]*U[Xu1, id2] / (U[X, id2]*U[Xu2, id1]))
end
return nothing
end
function krnl_force_wilson_pln!(frc1, frc2, U, ipl, lp::SpaceParm, gp::GaugeParm)
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
id1, id2 = lp.plidx[ipl]
Xu1 = up(X, id1, lp)
Xu2 = up(X, id2, lp)
a = U[Xu1,id2]/U[Xu2,id1]
b = U[X ,id2]\U[X ,id1]
F1 = projalg(U[X,id1]*a/U[X,id2])
F2 = projalg(a*b)
F3 = projalg(b*a)
@inbounds begin
frc1[X ,id1] -= F1
frc1[X ,id2] += F1
frc2[Xu1,id2] -= F2
frc2[Xu2,id1] += F3
frc[b,r,id1] += fpl[b,ipl,r,1,1] + fpl[b,ipl,r,1,2]
frc[b,r,id2] += fpl[b,ipl,r,2,1] + fpl[b,ipl,r,2,2]
end
return nothing
end
function force0_wilson!(frc1, frc2, U, lp::SpaceParm, gp::GaugeParm, kp::KernelParm)
function force0_wilson_pln!(frc1, frc2, U, lp::SpaceParm)
zero!(frc1)
zero!(frc2)
for ipl in 1:lp.npls
zero!(frc1, lp)
zero!(frc2, lp)
for i in 1:lp.npls
CUDA.@sync begin
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_force_wilson_pln!(frc1,frc2,U,ipl,lp,gp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_force_wilson_pln!(frc1,frc2,U,i,lp)
end
end
return nothing
end
function force0_wilson!(frc1, U, lp::SpaceParm)
zero!(frc1, lp)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_force_wilson!(frc1,U,lp)
end
return nothing
end
function force0_wilson_nw!(frc1, fpln, U, lp::SpaceParm)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz*lp.npls krnl_force_wilson_nw!(fpln,U,lp)
end
zero!(frc1, lp)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_force_plns!(frc1, fpln,lp)
end
return nothing
end

View file

@ -9,154 +9,198 @@
### created: Thu Jul 15 15:16:47 2021
###
un(t) = t <: Union{Group, Complex}
function field(::Type{T}, lp::SpaceParm) where {T <: Union{Group, Algebra}}
sz = lp.iL..., lp.ndim
sz = lp.bsz, lp.ndim, lp.rsz
if (T == SU2)
As = StructArray{SU2}((ones(ComplexF64, sz), zeros(ComplexF64, sz)))
# As = StructArray{SU2}(undef, sz, unwrap=un)
return CuArray{SU2, 3}(undef, sz)
elseif (T == SU2alg)
As = StructArray{SU2alg}((zeros(Float64, sz),
zeros(Float64, sz),
zeros(Float64, sz)))
# As = StructArray{SU2alg}(undef, sz, unwrap=un)
return CuArray{SU2alg, 3}(undef, sz)
elseif (T == SU3)
As = StructArray{SU3}((ones(ComplexF64, sz), zeros(ComplexF64, sz), zeros(ComplexF64, sz), zeros(ComplexF64, sz), ones(ComplexF64, sz), zeros(ComplexF64, sz)))
# As = StructArray{SU3}(undef, sz, unwrap=un)
return CuArray{SU3, 3}(undef, sz)
# As = Array{SU3, lp.ndim+1}(undef, sz)
# CUDA.@sync begin
# CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_SU3_zero!(As, lp)
# end
elseif (T == SU3alg)
As = StructArray{SU3alg}((zeros(Float64, sz),
zeros(Float64, sz),
zeros(Float64, sz),
zeros(Float64, sz),
zeros(Float64, sz),
zeros(Float64, sz),
zeros(Float64, sz),
zeros(Float64, sz)))
# As = StructArray{SU3alg}(undef, sz, unwrap=un)
return CuArray{SU3alg, 3}(undef, sz)
# As = Array{SU3alg, lp.ndim+1}(undef, sz)
# CUDA.@sync begin
# CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_SU3alg_zero!(As, lp)
# end
end
return replace_storage(CuArray, As)
end
function randomn!(X)
function krnl_SU3_one!(G, lp::SpaceParm)
if (eltype(X) == SU2alg)
randn!(CURAND.default_rng(), LazyRows(X).t1)
randn!(CURAND.default_rng(), LazyRows(X).t2)
randn!(CURAND.default_rng(), LazyRows(X).t3)
elseif (eltype(X) == SU3alg)
randn!(CURAND.default_rng(), LazyRows(X).t1)
randn!(CURAND.default_rng(), LazyRows(X).t2)
randn!(CURAND.default_rng(), LazyRows(X).t3)
randn!(CURAND.default_rng(), LazyRows(X).t4)
randn!(CURAND.default_rng(), LazyRows(X).t5)
randn!(CURAND.default_rng(), LazyRows(X).t6)
randn!(CURAND.default_rng(), LazyRows(X).t7)
randn!(CURAND.default_rng(), LazyRows(X).t8)
end
return nothing
end
function zero!(X)
if (eltype(X) == SU2alg)
fill!(LazyRows(X).t1, 0.0)
fill!(LazyRows(X).t2, 0.0)
fill!(LazyRows(X).t3, 0.0)
end
if (eltype(X) == SU3alg)
fill!(LazyRows(X).t1, 0.0)
fill!(LazyRows(X).t2, 0.0)
fill!(LazyRows(X).t3, 0.0)
fill!(LazyRows(X).t4, 0.0)
fill!(LazyRows(X).t5, 0.0)
fill!(LazyRows(X).t6, 0.0)
fill!(LazyRows(X).t7, 0.0)
fill!(LazyRows(X).t8, 0.0)
# CUDA.@sync begin
# CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_SU3alg_zero!(X, lp)
# end
end
if (eltype(X) == SU2)
fill!(LazyRows(X).t1, complex(1.0))
fill!(LazyRows(X).t2, complex(0.0))
end
if (eltype(X) == SU3)
fill!(LazyRows(X).u11, complex(1.0))
fill!(LazyRows(X).u12, complex(0.0))
fill!(LazyRows(X).u13, complex(0.0))
fill!(LazyRows(X).u21, complex(0.0))
fill!(LazyRows(X).u22, complex(1.0))
fill!(LazyRows(X).u23, complex(0.0))
# CUDA.@sync begin
# CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_SU3_zero!(X, lp)
# end
end
return nothing
end
function norm2(X)
d = 0.0
if (eltype(X) == SU2alg)
d = CUDA.mapreduce(x->x^2, +, LazyRows(X).t1) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t2) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t3)
elseif (eltype(X) == SU3alg)
d = CUDA.mapreduce(x->x^2, +, LazyRows(X).t1) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t2) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t3) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t4) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t5) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t6) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t7) +
CUDA.mapreduce(x->x^2, +, LazyRows(X).t8)
# d = CUDA.mapreduce(norm2, +, X)
end
return d
end
function krnl_SU3_zero!(G, lp::SpaceParm)
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
for id in 1:lp.ndim
G[X,id].u11 = complex(1.0)
G[X,id].u12 = complex(0.0)
G[X,id].u13 = complex(0.0)
G[X,id].u21 = complex(0.0)
G[X,id].u22 = complex(1.0)
G[X,id].u23 = complex(0.0)
G[b,id,r] = SU3(1.0,0.0,0.0,0.0,1.0,0.0)
end
return nothing
end
function krnl_SU2_one!(G, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
for id in 1:lp.ndim
G[b,id,r] = SU2(1.0,0.0)
end
return nothing
end
function krnl_SU3alg_zero!(G, lp::SpaceParm)
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
for id in 1:lp.ndim
G[X,id].t1 = 0.0
G[X,id].t2 = 0.0
G[X,id].t3 = 0.0
G[X,id].t4 = 0.0
G[X,id].t5 = 0.0
G[X,id].t6 = 0.0
G[X,id].t7 = 0.0
G[X,id].t8 = 0.0
G[b,id,r] = SU3alg(0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0)
end
return nothing
end
function krnl_SU2alg_zero!(G, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
for id in 1:lp.ndim
G[b,id,r] = SU2alg(0.0,0.0,0.0)
end
return nothing
end
function krnl_SU3alg_assign!(G, M, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
for id in 1:lp.ndim
G[b,id,r] = SU3alg(M[b,id,r,1], M[b,id,r,2], M[b,id,r,3], M[b,id,r,4],
M[b,id,r,5], M[b,id,r,6], M[b,id,r,7], M[b,id,r,8])
end
return nothing
end
function krnl_SU2alg_assign!(G, M, lp::SpaceParm)
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
for id in 1:lp.ndim
G[b,id,r] = SU2alg(M[b,id,r,1], M[b,id,r,2], M[b,id,r,3])
end
return nothing
end
function randomn!(X, lp)
if (eltype(X) == SU2alg)
# randn!(CURAND.default_rng(), LazyRows(X).t1)
# randn!(CURAND.default_rng(), LazyRows(X).t2)
# randn!(CURAND.default_rng(), LazyRows(X).t3)
M = CuArray{Float64}(undef, lp.bsz, lp.ndim, lp.rsz, 3)
randn!(CURAND.default_rng(), M)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_SU2alg_assign!(X, M, lp)
end
elseif (eltype(X) == SU3alg)
# randn!(CURAND.default_rng(), LazyRows(X).t1)
# randn!(CURAND.default_rng(), LazyRows(X).t2)
# randn!(CURAND.default_rng(), LazyRows(X).t3)
# randn!(CURAND.default_rng(), LazyRows(X).t4)
# randn!(CURAND.default_rng(), LazyRows(X).t5)
# randn!(CURAND.default_rng(), LazyRows(X).t6)
# randn!(CURAND.default_rng(), LazyRows(X).t7)
# randn!(CURAND.default_rng(), LazyRows(X).t8)
M = CuArray{Float64}(undef, lp.bsz, lp.ndim, lp.rsz, 8)
randn!(CURAND.default_rng(), M)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_SU3alg_assign!(X, M, lp)
end
end
return nothing
end
function zero!(X, lp)
if (eltype(X) == SU2alg)
# fill!(LazyRows(X).t1, 0.0)
# fill!(LazyRows(X).t2, 0.0)
# fill!(LazyRows(X).t3, 0.0)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_SU2alg_zero!(X, lp)
end
end
if (eltype(X) == SU3alg)
# fill!(LazyRows(X).t1, 0.0)
# fill!(LazyRows(X).t2, 0.0)
# fill!(LazyRows(X).t3, 0.0)
# fill!(LazyRows(X).t4, 0.0)
# fill!(LazyRows(X).t5, 0.0)
# fill!(LazyRows(X).t6, 0.0)
# fill!(LazyRows(X).t7, 0.0)
# fill!(LazyRows(X).t8, 0.0)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_SU3alg_zero!(X, lp)
end
end
if (eltype(X) == SU2)
# fill!(LazyRows(X).t1.re, 1.0)
# fill!(LazyRows(X).t1.im, 0.0)
# fill!(LazyRows(X).t2.re, 0.0)
# fill!(LazyRows(X).t2.im, 0.0)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_SU2_one!(X, lp)
end
end
if (eltype(X) == SU3)
# fill!(LazyRows(X).u11.re, 1.0)
# fill!(LazyRows(X).u11.im, 0.0)
# fill!(LazyRows(X).u12.re, 0.0)
# fill!(LazyRows(X).u12.im, 0.0)
# fill!(LazyRows(X).u13.re, 0.0)
# fill!(LazyRows(X).u13.im, 0.0)
# fill!(LazyRows(X).u21.re, 0.0)
# fill!(LazyRows(X).u21.im, 0.0)
# fill!(LazyRows(X).u22.re, 1.0)
# fill!(LazyRows(X).u22.im, 0.0)
# fill!(LazyRows(X).u23.re, 0.0)
# fill!(LazyRows(X).u23.im, 0.0)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_SU3_one!(X, lp)
end
end
return nothing
end
function norm_field(X)
return CUDA.mapreduce(norm2, +, X)
# d = 0.0
if (eltype(X) == SU2alg)
# d = CUDA.mapreduce(x->x^2, +, LazyRows(X).t1) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t2) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t3)
elseif (eltype(X) == SU3alg)
# d = CUDA.mapreduce(x->x^2, +, LazyRows(X).t1) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t2) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t3) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t4) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t5) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t6) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t7) +
# CUDA.mapreduce(x->x^2, +, LazyRows(X).t8)
d = CUDA.mapreduce(norm2, +, X)
end
return d
end

View file

@ -9,10 +9,10 @@
### created: Thu Jul 15 11:27:28 2021
###
function gauge_action(U, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMworkspace)
function gauge_action(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
CUDA.@sync begin
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_plaq!(ymws.cm, U, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp)
end
S = gp.beta*( prod(lp.iL)*lp.npls -
CUDA.mapreduce(real, +, ymws.cm)/gp.ng )
@ -20,32 +20,34 @@ function gauge_action(U, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMw
return S
end
function plaquette(U, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMworkspace)
function plaquette(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
println("Entro!")
CUDA.@sync begin
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_plaq!(ymws.cm, U, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_plaq!(ymws.cm, U, lp)
end
println("Salgo!")
return CUDA.mapreduce(real, +, real.(ymws.cm))/(prod(lp.iL)*lp.npls)
return CUDA.mapreduce(real, +, ymws.cm)/(prod(lp.iL)*lp.npls)
end
function hamiltonian(mom, U, lp, gp, kp, ymws)
K = norm2(mom)/2.0
V = gauge_action(U, lp, gp, kp, ymws)
function hamiltonian(mom, U, lp, gp, ymws)
K = norm_field(mom)/2.0
V = gauge_action(U, lp, gp, ymws)
println("K: ", K, " V: ", V)
return K+V
end
function HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMworkspace; noacc=false)
function HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace; noacc=false)
ymws.U1 .= U
randomn!(ymws.mom)
hini = hamiltonian(ymws.mom, U, lp, gp, kp, ymws)
randomn!(ymws.mom, lp)
hini = hamiltonian(ymws.mom, U, lp, gp, ymws)
OMF4!(ymws.mom, U, eps, ns, lp, gp, kp, ymws)
OMF4!(ymws.mom, U, eps, ns, lp, gp, ymws)
dh = hamiltonian(ymws.mom, U, lp, gp, kp, ymws) - hini
dh = hamiltonian(ymws.mom, U, lp, gp, ymws) - hini
pacc = exp(-dh)
acc = true
@ -66,18 +68,17 @@ end
function krnl_updt!(mom, frc1, frc2, eps1, U, eps2, lp::SpaceParm)
X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z),
(CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z))
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
@inbounds for id in 1:lp.ndim
mom[X,id] = mom[X,id] + eps1 * (frc1[X,id]+frc2[X,id])
U[X,id] = expm(U[X,id],mom[X,id], eps2)
mom[b,id,r] = mom[b,id,r] + eps1 * (frc1[b,id,r]+frc2[b,id,r])
U[b,id,r] = expm(U[b,id,r], mom[b,id,r], eps2)
end
return nothing
end
function OMF4!(mom, U, eps, ns, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMworkspace)
function OMF4!(mom, U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
r1::Float64 = 0.08398315262876693
r2::Float64 = 0.2539785108410595
@ -86,43 +87,78 @@ function OMF4!(mom, U, eps, ns, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ym
r5::Float64 = 0.5-r1-r3
r6::Float64 = 1.0-2.0*(r2+r4)
# ee = eps*gp.beta/gp.ng
# @device_code_warntype force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp)
# force0_wilson_pln!(ymws.frc1, ymws.frc2, U, lp)
# zero!(ymws.frc2, lp)
# for i in 1:ns
# # STEP 1
# CUDA.@sync begin
# CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r1*ee, U, eps*r2, lp)
# end
#
# # STEP 2
# force0_wilson_pln!(ymws.frc1, ymws.frc2, U, lp)
# CUDA.@sync begin
# CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r3*ee, U, eps*r4, lp)
# end
#
# # STEP 3
# force0_wilson_pln!(ymws.frc1, ymws.frc2, U, lp)
# CUDA.@sync begin
# CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r5*ee, U, eps*r6, lp)
# end
#
# # STEP 4
# force0_wilson_pln!(ymws.frc1, ymws.frc2, U, lp)
# CUDA.@sync begin
# CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r5*ee, U, eps*r4, lp)
# end
#
# # STEP 5
# force0_wilson_pln!(ymws.frc1, ymws.frc2, U, lp)
# CUDA.@sync begin
# CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r3*ee, U, eps*r2, lp)
# end
#
# # STEP 6
# force0_wilson_pln!(ymws.frc1, ymws.frc2, U, lp)
# CUDA.@sync begin
# CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r1*ee, U, 0.0, lp)
# end
# end
ee = eps*gp.beta/gp.ng
force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp)
force0_wilson!(ymws.frc1, U, lp)
zero!(ymws.frc2, lp)
for i in 1:ns
# STEP 1
CUDA.@sync begin
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r1*ee, U, eps*r2, lp)
end
mom .= mom .+ (r1*ee) .* (ymws.frc1 .+ ymws.frc2)
U .= expm.(U, mom, eps*r2)
# STEP 2
force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp)
CUDA.@sync begin
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r3*ee, U, eps*r4, lp)
end
force0_wilson!(ymws.frc1, U, lp)
mom .= mom .+ (r3*ee) .* (ymws.frc1 .+ ymws.frc2)
U .= expm.(U, mom, eps*r4)
# STEP 3
force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp)
CUDA.@sync begin
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r5*ee, U, eps*r6, lp)
end
force0_wilson!(ymws.frc1, U, lp)
mom .= mom .+ (r5*ee) .* (ymws.frc1 .+ ymws.frc2)
U .= expm.(U, mom, eps*r6)
# STEP 4
force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp)
CUDA.@sync begin
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r5*ee, U, eps*r4, lp)
end
force0_wilson!(ymws.frc1, U, lp)
mom .= mom .+ (r5*ee) .* (ymws.frc1 .+ ymws.frc2)
U .= expm.(U, mom, eps*r4)
# STEP 5
force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp)
CUDA.@sync begin
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r3*ee, U, eps*r2, lp)
end
force0_wilson!(ymws.frc1, U, lp)
mom .= mom .+ (r3*ee) .* (ymws.frc1 .+ ymws.frc2)
U .= expm.(U, mom, eps*r2)
# STEP 6
force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp)
CUDA.@sync begin
CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r1*ee, U, 0.0, lp)
end
force0_wilson!(ymws.frc1, U, lp)
mom .= mom .+ (r1*ee) .* (ymws.frc1 .+ ymws.frc2)
end
return nothing