Merge branch 'master' into 'master'

Master

See merge request alramos/latticegpu.jl!1
This commit is contained in:
Alberto Ramos Martinez 2023-11-21 09:54:10 +00:00
commit d16c9ad341
15 changed files with 1388 additions and 124 deletions

View file

@ -19,11 +19,38 @@ using ..Fields
using ..YM
using ..Spinors
struct DiracParam{T,R}
m0::T
csw::T
th::NTuple{4,Complex{T}}
ct::T
function DiracParam{T}(::Type{R},m0,csw,th,ct) where {T,R}
return new{T,R}(m0,csw,th,ct)
end
function DiracParam{T}(m0,csw,th,ct) where {T}
return new{T,SU3fund}(m0,csw,th,ct)
end
end
function Base.show(io::IO, dpar::DiracParam{T,R}) where {T,R}
println(io, "Wilson fermions in the: ", R, " representation")
println(io, " - Bare mass: ", dpar.m0," // Kappa = ",0.5/(dpar.m0+4))
println(io, " - Csw : ", dpar.csw)
println(io, " - c_t: ", dpar.ct)
println(io, " - Theta: ", dpar.th)
return nothing
end
struct DiracWorkspace{T}
sr
sp
sAp
st
csw
function DiracWorkspace(::Type{G}, ::Type{T}, lp::SpaceParm{4,6,B,D}) where {G,T <: AbstractFloat, B,D}
@ -31,53 +58,285 @@ struct DiracWorkspace{T}
sp = scalar_field(Spinor{4,G}, lp)
sAp = scalar_field(Spinor{4,G}, lp)
st = scalar_field(Spinor{4,G}, lp)
return new{T}(sr,sp,sAp,st)
csw = tensor_field(U3alg{T},lp)
return new{T}(sr,sp,sAp,st,csw,cs)
end
end
export DiracWorkspace
function Dw!(so, U, si, m0, lp::SpaceParm)
export DiracWorkspace, DiracParam
"""
function Csw!(dws, U, gp, lp::SpaceParm)
Computes the clover and stores it in dws.csw.
"""
function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
@timeit "Csw computation" begin
for i in 1:Int(lp.npls)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp)
end
end
end
return nothing
end
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[4]
id1, id2 = lp.plidx[ipl]
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
bd1, rd1 = dw((b, r), id1, lp)
bd2, rd2 = dw((b, r), id2, lp)
bdd, rdd = dw((bd1, rd1), id2, lp)
bud, rud = dw((bu1, ru1), id2, lp)
bdu, rdu = up((bd1, rd1), id2, lp)
if SFBC && (it == lp.iL[end])
gt1 = Ubnd[id2]
gt2 = Ubnd[id2]
else
gt1 = U[bu1,id2,ru1]
gt2 = U[bud,id2,rud]
end
M1 = U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])
M2 = (U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]
M3 = (U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])
M4 = (U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1]
if !(SFBC && (it == 1))
csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
end
end
return nothing
end
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if abs(dpar.csw) > 1.0E-10
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, m0, [1,1,1,1], lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
end
end
else
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, lp)
end
end
end
return nothing
end
function DwdagDw!(so, U, si, m0, st, lp::SpaceParm)
function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
@timeit "DwdagDw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(st, U, si, m0, [1,1,1,1], lp)
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, m0, [1,1,1,1], lp)
end
end
return nothing
end
function krnl_Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
# For SF:
# - cttilde affects mass term at x0 = a, T-a
# - Spinor can be periodic as long as 0 at x_0=0
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
for id in 1:4
bu, ru = up((b,r), id, lp)
bd, rd = dw((b,r), id, lp)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] -= ( th[id]*gpmul(Pgamma{id,-1},U[b,id,r],si[bu,ru]) +
conj(th[id])*gdagpmul(Pgamma{id,+1},U[bd,id,rd],si[bd,rd]) )/2
end
return nothing
end
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
end
end
end
return nothing
end
function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
return nothing
end
function krnl_Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
return nothing
end
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if abs(dpar.csw) > 1.0E-10
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
end
end
else
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, lp)
end
end
end
return nothing
end
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] = dmul(Gamma{5}, so[b,r])
end
return nothing
@ -87,21 +346,292 @@ function krnl_g5Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
for id in 1:4
bu, ru = up((b,r), id, lp)
bd, rd = dw((b,r), id, lp)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] -= ( th[id]*gpmul(Pgamma{id,-1},U[b,id,r],si[bu,ru]) +
conj(th[id])*gdagpmul(Pgamma{id,+1},U[bd,id,rd],si[bd,rd]) )/2
end
so[b,r] = dmul(Gamma{5}, so[b,r])
end
return nothing
end
export Dw!, DwdagDw!
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
end
end
end
return nothing
end
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
so[b,r] = dmul(Gamma{5}, so[b,r])
return nothing
end
function krnl_g5Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
so[b,r] = dmul(Gamma{5}, so[b,r])
return nothing
end
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
end
end
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
end
end
end
else
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.th, dpar.ct, lp)
end
end
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.th, dpar.ct, lp)
end
end
end
end
return nothing
end
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if abs(dpar.csw) > 1.0E-10
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
end
end
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
end
end
end
else
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.th, lp)
end
end
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.th, lp)
end
end
end
end
return nothing
end
function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
end
return nothing
end
function krnl_sfbndfix!(sp,lp::SpaceParm)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) == 1)
sp[b,r] = 0.0*sp[b,r]
end
return nothing
end
"""
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund / SU2fund {T}}}, lp::SpaceParm, t::Int64 = 0)
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
"""
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t::Int64 = 0) where {T}
@timeit "Randomize pseudofermion field" begin
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
end
end
return nothing
end
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
if t == 0
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
x[b,2,r,1] + im* x[b,2,r,2],
x[b,3,r,1] + im* x[b,3,r,2]),p))
elseif point_time((b,r),lp) == t
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
x[b,2,r,1] + im* x[b,2,r,2],
x[b,3,r,1] + im* x[b,3,r,2]),p))
end
end
return nothing
end
function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}},lp::SpaceParm, t::Int64=0) where {T}
@timeit "Randomize pseudofermion field" begin
p = ntuple(i->CUDA.randn(T, lp.bsz, 2, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t)
end
end
return nothing
end
function krnl_assign_pf_su2!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
if t == 0
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
x[b,2,r,1] + im* x[b,2,r,2]),p))
elseif point_time((b,r),lp) == t
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
x[b,2,r,1] + im* x[b,2,r,2]),p))
end
end
return nothing
end
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!
include("DiracIO.jl")
export read_prop, save_prop, read_dpar
end

183
src/Dirac/DiracIO.jl Normal file
View file

@ -0,0 +1,183 @@
"""
read_prop(fname::String)
Reads propagator from file `fname` using the native (BDIO) format.
"""
function read_prop(fname::String)
UID_HDR = 14
fb = BDIO_open(fname, "r")
while BDIO_get_uinfo(fb) != UID_HDR
BDIO_seek!(fb)
end
ihdr = Vector{Int32}(undef, 2)
BDIO_read(fb, ihdr)
if (ihdr[1] != convert(Int32, 1753996112)) && (ihdr[2] != convert(Int32, 5))
error("Wrong file format [header]")
end
run = BDIO.BDIO_read_str(fb)
while BDIO_get_uinfo(fb) != 1
BDIO_seek!(fb)
end
ifoo = Vector{Int32}(undef, 2)
BDIO_read(fb, ifoo)
ndim = convert(Int64, ifoo[1])
npls = convert(Int64, round(ndim*(ndim-1)/2))
ibc = convert(Int64, ifoo[2])
ifoo = Vector{Int32}(undef, ndim+convert(Int32, npls))
BDIO_read(fb, ifoo)
iL = ntuple(i -> convert(Int64, ifoo[i]),ndim)
ntw = ntuple(i -> convert(Int64, ifoo[i+ndim]), npls)
foopars = Vector{Float64}(undef, 4)
BDIO_read(fb, foopars)
footh = Vector{Float64}(undef, 4)
lp = SpaceParm{ndim}(iL, (4,4,4,4), ibc, ntw)
dpar = DiracParam{Float64}(SU3fund,foopars[1],foopars[2],ntuple(i -> footh[i], 4),foopars[3])
dtr = (2,3,4,1)
while BDIO_get_uinfo(fb) != 8
BDIO_seek!(fb)
end
psicpu = Array{Spinor{4,SU3fund{Float64}}, 2}(undef, lp.bsz, lp.rsz)
spvec = Vector{ComplexF64}(undef,12)
for i4 in 1:lp.iL[4]
for i1 in 1:lp.iL[1]
for i2 in 1:lp.iL[2]
for i3 in 1:lp.iL[3]
b, r = point_index(CartesianIndex(i1,i2,i3,i4), lp)
BDIO_read(fb, spvec)
psicpu[b,r] = Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(spvec[Int(3*i - 2)],spvec[Int(3*i - 1)] , spvec[Int(3*i)] ),4))
end
end
end
end
BDIO_close!(fb)
return CuArray(psicpu)
end
"""
save_prop(fname, psi, lp::SpaceParm, dpar::DiracParam; run::Union{Nothing,String}=nothing)
Saves propagator `psi` in the file `fname` using the native (BDIO) format.
"""
function save_prop(fname::String, psi, lp::SpaceParm{4,M,B,D}, dpar::DiracParam; run::Union{Nothing,String}=nothing) where {M,B,D}
ihdr = [convert(Int32, 1753996112),convert(Int32,5)]
UID_HDR = 14
if isfile(fname)
fb = BDIO_open(fname, "a")
else
fb = BDIO_open(fname, "w", "Propagator of LatticeGPU.jl")
BDIO_start_record!(fb, BDIO_BIN_GENERIC, UID_HDR)
BDIO_write!(fb, ihdr)
if run != nothing
BDIO_write!(fb, run*"\0")
end
BDIO_write_hash!(fb)
BDIO_start_record!(fb, BDIO_BIN_GENERIC, 1)
BDIO_write!(fb, [convert(Int32, 4)])
BDIO_write!(fb, [convert(Int32, B)])
BDIO_write!(fb, [convert(Int32, lp.iL[i]) for i in 1:4])
BDIO_write!(fb, [convert(Int32, lp.ntw[i]) for i in 1:M])
BDIO_write!(fb, [dpar.m0, dpar.csw, dpar.ct])
BDIO_write!(fb, [dpar.th[i] for i in 1:4])
end
BDIO_write_hash!(fb)
dtr = (2,3,4,1)
BDIO_start_record!(fb, BDIO_BIN_F64LE, 8, true)
psicpu = Array(psi)
for i4 in 1:lp.iL[4]
for i1 in 1:lp.iL[1]
for i2 in 1:lp.iL[2]
for i3 in 1:lp.iL[3]
b, r = point_index(CartesianIndex(i1,i2,i3,i4), lp)
for sp in 1:4
for c in (:t1,:t2,:t3)
BDIO_write!(fb, [getfield(psicpu[b,r].s[sp],c)])
end
end
end
end
end
end
BDIO_write_hash!(fb)
BDIO_close!(fb)
return nothing
end
"""
read_dpar(fname::String)
Reads Dirac parameters from file `fname` using the native (BDIO) format. Returns DiracParam and SpaceParm.
"""
function read_dpar(fname::String)
UID_HDR = 14
fb = BDIO_open(fname, "r")
while BDIO_get_uinfo(fb) != UID_HDR
BDIO_seek!(fb)
end
ihdr = Vector{Int32}(undef, 2)
BDIO_read(fb, ihdr)
if (ihdr[1] != convert(Int32, 1753996112)) && (ihdr[2] != convert(Int32, 5))
error("Wrong file format [header]")
end
run = BDIO.BDIO_read_str(fb)
while BDIO_get_uinfo(fb) != 1
BDIO_seek!(fb)
end
ifoo = Vector{Int32}(undef, 2)
BDIO_read(fb, ifoo)
ndim = convert(Int64, ifoo[1])
npls = convert(Int64, round(ndim*(ndim-1)/2))
ibc = convert(Int64, ifoo[2])
ifoo = Vector{Int32}(undef, ndim+convert(Int32, npls))
BDIO_read(fb, ifoo)
iL = ntuple(i -> convert(Int64, ifoo[i]),ndim)
ntw = ntuple(i -> convert(Int64, ifoo[i+ndim]), npls)
foopars = Vector{Float64}(undef, 4)
BDIO_read(fb, foopars)
footh = Vector{Float64}(undef, 4)
lp = SpaceParm{ndim}(iL, (4,4,4,4), ibc, ntw)
dpar = DiracParam{Float64}(SU3fund,foopars[1],foopars[2],ntuple(i -> footh[i], 4),foopars[3])
BDIO_close!(fb)
return dpar, lp
end

View file

@ -43,7 +43,15 @@ Returns a scalar field of elemental type `T`, with lexicografic memory layout.
"""
scalar_field_point(::Type{T}, lp::SpaceParm{N,M,D}) where {T,N,M,D} = CuArray{T, N}(undef, lp.iL...)
export vector_field, scalar_field, nscalar_field, scalar_field_point
"""
tensor_field(::Type{T}, lp::SpaceParm)
Returns a tensor field of elemental type `T`.
"""
tensor_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, lp.npls, lp.rsz)
export vector_field, scalar_field, nscalar_field, scalar_field_point, tensor_field
end

33
src/Groups/AlgebraU3.jl Normal file
View file

@ -0,0 +1,33 @@
struct U3alg{T} <: Algebra
u11::T
u22::T
u33::T
u12::Complex{T}
u13::Complex{T}
u23::Complex{T}
end
function antsym(a::SU3{T}) where T <: AbstractFloat
t1 = 2.0*imag(a.u11)
t2 = 2.0*imag(a.u22)
t3 = 2.0*imag(a.u12*a.u21 - a.u11*a.u22)
t4 = a.u12 - conj(a.u21)
t5 = a.u13 - (a.u12*a.u23 - a.u13*a.u22)
t6 = a.u23 - (a.u13*a.u21 - a.u11*a.u23)
return U3alg{T}(t1,t2,t3,t4,t5,t6)
end
Base.:*(a::U3alg{T},b::SU3fund{T}) where T <: AbstractFloat = SU3fund{T}(im*a.u11*b.t1 + a.u12*b.t2 + a.u13*b.t3,
-conj(a.u12)*b.t1 + im*a.u22*b.t2 + a.u23*b.t3,
-conj(a.u13)*b.t1 - conj(a.u23)*b.t2 + im*a.u33*b.t3)
Base.:+(a::U3alg{T},b::U3alg{T}) where T <: AbstractFloat = U3alg{T}(a.u11 + b.u11, a.u22 + b.u22, a.u33 + b.u33, a.u12 + b.u12, a.u13 + b.u13, a.u23 + b.u23)
Base.:*(r::Number, a::U3alg{T}) where T <: AbstractFloat = U3alg{T}(r*a.u11, r*a.u22, r*a.u33, r*a.u12, r*a.u13, r*a.u23)

View file

@ -24,14 +24,14 @@ dag(a::SU3fund{T}) where T <: AbstractFloat = SU3fund{T}(conj(a.
Returns the norm of a fundamental element. Same result as dot(a,a).
"""
norm(a::SU3fund{T}) where T <: AbstractFloat = sqrt((abs2(a.t1) + abs2(a.t2) + abs2(a.t1)))
norm(a::SU3fund{T}) where T <: AbstractFloat = sqrt((abs2(a.t1) + abs2(a.t2) + abs2(a.t3)))
"""
norm(a::SU3fund{T})
Returns the norm of a fundamental element. Same result as sqrt(dot(a,a)).
"""
norm2(a::SU3fund{T}) where T <: AbstractFloat = (abs2(a.t1) + abs2(a.t2) + abs2(a.t1))
norm2(a::SU3fund{T}) where T <: AbstractFloat = (abs2(a.t1) + abs2(a.t2) + abs2(a.t3))
"""
dot(a::SU3fund{T},b::SU3fund{T})

View file

@ -74,11 +74,14 @@ include("FundamentalSU3.jl")
export imm, mimm
## END SU(3)
include("AlgebraU3.jl")
export U3alg
include("GroupU1.jl")
export U1, U1alg
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one, antsym
end # module

View file

@ -16,8 +16,8 @@ include("Groups/Groups.jl")
using .Groups
export Group, Algebra, GMatrix
export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one
export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund, U3alg
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one, antsym
include("Space/Space.jl")
@ -28,7 +28,7 @@ export BC_PERIODIC, BC_OPEN, BC_SF_AFWB, BC_SF_ORBI
include("Fields/Fields.jl")
using .Fields
export vector_field, scalar_field, nscalar_field, scalar_field_point
export vector_field, scalar_field, nscalar_field, scalar_field_point, tensor_field
include("MD/MD.jl")
using .MD
@ -56,12 +56,13 @@ export pmul, gpmul, gdagpmul, dmul
include("Dirac/Dirac.jl")
using .Dirac
export DiracWorkspace
export Dw!, DwdagDw!
export DiracWorkspace, DiracParam
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!
export read_prop, save_prop, read_dpar
include("Solvers/Solvers.jl")
using .Solvers
export CG!
export propagator!, bndpropagator!, Tbndpropagator!, bndtobnd
end # module

View file

@ -14,21 +14,43 @@
Solves the linear equation `Ax = si`
"""
function CG!(si, U, m0, A, lp::SpaceParm, dws::DiracWorkspace)
function krnl_dot!(sum,fone,ftwo)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
sum[b,r] = dot(fone[b,r],ftwo[b,r])
return nothing
end
function field_dot(fone::AbstractArray,ftwo::AbstractArray,sumf,lp) where {T}
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_dot!(sumf,fone,ftwo)
end
return sum(sumf)
end
function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T}, maxiter::Int64 = 10, tol=1.0) where {T}
dws.sr .= si
dws.sp .= si
norm = CUDA.mapreduce(x -> norm2(x), +, si)
fill!(si,zero(eltype(so)))
fill!(si,zero(eltype(si)))
err = 0.0
tol = eps * norm
tol = tol * norm
iterations = 0
sumf = scalar_field(Complex{T}, lp)
niter = 0
for i in 1:maxiter
A(dws.sAp, U, dws.sp, am0, dws.st, lp)
prod = CUDA.mapreduce(x -> dot(x[1],x[2]), +, zip(dws.sp, dws.sAp))
A(dws.sAp, U, dws.sp, dpar, dws, lp)
prod = field_dot(dws.sp,dws.sAp,sumf,lp)
alpha = norm/prod
si .= si .+ alpha .* dws.sp

177
src/Solvers/Propagators.jl Normal file
View file

@ -0,0 +1,177 @@
"""
function propagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm, maxiter::Int64, tol::Float64, y::NTuple{4,Int64}, c::Int64, s::Int64)
Saves the fermionic progapator in pro for a source at point `y` with color `c` and spin `s`. If the last three arguments are replaced by `time::Int64`, the source is replaced
by a random source in spin and color at t = `time`.
"""
function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm, maxiter::Int64, tol::Float64, y::NTuple{4,Int64}, c::Int64, s::Int64) where {T}
function krnlg5!(src)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
src[b,r] = dmul(Gamma{5},src[b,r])
return nothing
end
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
CUDA.@allowscalar dws.sp[point_index(CartesianIndex{lp.ndim}(y),lp)...] = Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4))
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
end
g5Dw!(pro,U,dws.sp,dpar,dws,lp)
CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
return nothing
end
function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm, maxiter::Int64, tol::Float64, time::Int64) where {T}
function krnlg5!(src)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
src[b,r] = dmul(Gamma{5},src[b,r])
return nothing
end
pfrandomize!(dws.sp,lp,time)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
end
g5Dw!(pro,U,dws.sp,dpar,dws,lp)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
end
CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
return nothing
end
"""
function bndpropagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
Saves the propagator in from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
For the propagator from T to the bulk, use the function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
"""
function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) where {T,D}
function krnlg5!(src)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
src[b,r] = dmul(Gamma{5},src[b,r])
return nothing
end
function krnl_assign_bndsrc!(src,U,lp::SpaceParm, c::Int64, s::Int64)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) == 2)
bd4, rd4 = dw((b,r), 4, lp)
src[b,r] = gdagpmul(Pgamma{4,1},U[bd4,4,rd4],Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)))/2
end
return nothing
end
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s)
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
end
g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
return pro
end
"""
function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
Returns the propagator from the t=T boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
For the propagator from t=0 to the bulk, use the function bndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
"""
function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) where {T,D}
function krnlg5!(src)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
src[b,r] = dmul(Gamma{5},src[b,r])
return nothing
end
function krnl_assign_bndsrc!(src,U,lp::SpaceParm, c::Int64, s::Int64)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) == lp.iL[end])
src[b,r] = gpmul(Pgamma{4,-1},U[b,4,r],Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)))/2
end
return nothing
end
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s)
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
end
g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
return pro
end
"""
function bndtobnd(bndpro, U, dpar, dws, lp)
Returns the boundary to boundary propagator of the Schrodinger functional given that bndpro is the propagator from t = 0 to the bulk, given by the function bndpropagator!.
"""
function bndtobnd(bndpro::AbstractArray, U::AbstractArray, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}) where {D}
function krnl_bndtobnd!(psi::AbstractArray, bndp::AbstractArray, U::AbstractArray, lp::SpaceParm)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
if point_time((b, r), lp) == lp.iL[end]
psi[b,r] = gdagpmul(Pgamma{4,1},U[b,4,r],bndpro[b,r])/2
else
psi[b,r] = 0.0*bndpro[b,r]
end
return nothing
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_bndtobnd!(dws.sp, bndpro ,U, lp)
end
res = -dpar.ct * sum(dws.sp) / (lp.iL[1]*lp.iL[2]*lp.iL[3])
return res
end

View file

@ -22,7 +22,8 @@ using ..Dirac
include("CG.jl")
export CG!
include("Propagators.jl")
export propagator!, bndpropagator!, Tbndpropagator!, bndtobnd
end

View file

@ -50,7 +50,7 @@ Returns the scalar product of two spinors.
sum = :(dot(a.s[1],b.s[1]))
for i in 2:NS
sum = :($sum + norm2(a.s[$i]))
sum = :($sum + dot(a.s[$i],b.s[$i]))
end
return :($sum)
@ -64,6 +64,21 @@ Returns ga
"""
Base.:*(g::S,b::Spinor{NS,G}) where {S <: Group,NS,G} = Spinor{NS,G}(ntuple(i->g*b.s[i], NS))
"""
*(a::SU3alg{T},b::Spinor)
Returns ab
"""
Base.:*(a::S,b::Spinor{NS,G}) where {S <: Algebra,NS,G} = Spinor{NS,G}(ntuple(i->a*b.s[i], NS))
"""
*(a::M3x3{T},b::Spinor)
Returns ab
"""
Base.:*(a::S,b::Spinor{NS,G}) where {S <: GMatrix,NS,G} = Spinor{NS,G}(ntuple(i->a*b.s[i], NS))
"""
\\(g::SU3{T},b::Spinor{NS,G})
@ -75,9 +90,9 @@ Base.:\(g::S,b::Spinor{NS,G}) where {S <: Group,NS,G} = Spinor{NS,G}(ntuple(i->g
Base.:+(a::Spinor{NS,G},b::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->a.s[i]+b.s[i], NS))
Base.:-(a::Spinor{NS,G},b::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->a.s[i]-b.s[i], NS))
Base.:+(a::Spinor{NS,G}) where {NS,G} = a
Base.:-(a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->-b.s[i], NS))
imm(a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->imm(b.s[i]), NS))
mimm(a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->mimm(b.s[i]), NS))
Base.:-(a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->-a.s[i], NS))
imm(a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->imm(a.s[i]), NS))
mimm(a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->mimm(a.s[i]), NS))
# Operations with numbers
@ -100,56 +115,56 @@ end
Returns ``(1+s\\gamma_N)a``.
"""
@inline function pmul(::Type{Pgamma{4,1}}, a::Spinor{4,G}) where {NS,G}
@inline function pmul(::Type{Pgamma{4,-1}}, a::Spinor{NS,G}) where {NS,G}
r1 = a.s[1]+a.s[3]
r2 = a.s[2]+a.s[4]
return Spinor{4,G}((r1,r2,r1,r2))
return Spinor{NS,G}((r1,r2,r1,r2))
end
@inline function pmul(::Type{Pgamma{4,-1}}, a::Spinor{4,G}) where {NS,G}
@inline function pmul(::Type{Pgamma{4,1}}, a::Spinor{NS,G}) where {NS,G}
r1 = a.s[1]-a.s[3]
r2 = a.s[2]-a.s[4]
return Spinor{4,G}((r1,r2,-r1,-r2))
return Spinor{NS,G}((r1,r2,-r1,-r2))
end
@inline function pmul(::Type{Pgamma{1,1}}, a::Spinor{4,G}) where {NS,G}
@inline function pmul(::Type{Pgamma{1,-1}}, a::Spinor{NS,G}) where {NS,G}
r1 = a.s[1]+imm(a.s[4])
r2 = a.s[2]+imm(a.s[3])
return Spinor{4,G}((r1,r2,mimm(r2),mimm(r1)))
return Spinor{NS,G}((r1,r2,mimm(r2),mimm(r1)))
end
@inline function pmul(::Type{Pgamma{1,-1}}, a::Spinor{4,G}) where {NS,G}
@inline function pmul(::Type{Pgamma{1,1}}, a::Spinor{NS,G}) where {NS,G}
r1 = a.s[1]-imm(a.s[4])
r2 = a.s[2]-imm(a.s[3])
return Spinor{4,G}((r1,r2,imm(r2),imm(r1)))
return Spinor{NS,G}((r1,r2,imm(r2),imm(r1)))
end
@inline function pmul(::Type{Pgamma{2,1}}, a::Spinor{4,G}) where {NS,G}
@inline function pmul(::Type{Pgamma{2,-1}}, a::Spinor{NS,G}) where {NS,G}
r1 = a.s[1]+a.s[4]
r2 = a.s[2]-a.s[3]
return Spinor{4,G}((r1,r2,-r2,r1))
return Spinor{NS,G}((r1,r2,-r2,r1))
end
@inline function pmul(::Type{Pgamma{2,-1}}, a::Spinor{4,G}) where {NS,G}
@inline function pmul(::Type{Pgamma{2,1}}, a::Spinor{NS,G}) where {NS,G}
r1 = a.s[1]-a.s[4]
r2 = a.s[2]+a.s[3]
return Spinor{4,G}((r1,r2,r2,-r1))
return Spinor{NS,G}((r1,r2,r2,-r1))
end
@inline function pmul(::Type{Pgamma{3,1}}, a::Spinor{4,G}) where {NS,G}
@inline function pmul(::Type{Pgamma{3,-1}}, a::Spinor{NS,G}) where {NS,G}
r1 = a.s[1]+imm(a.s[3])
r2 = a.s[2]-imm(a.s[4])
return Spinor{4,G}((r1,r2,mimm(r1),imm(r2)))
return Spinor{NS,G}((r1,r2,mimm(r1),imm(r2)))
end
@inline function pmul(::Type{Pgamma{3,-1}}, a::Spinor{4,G}) where {NS,G}
@inline function pmul(::Type{Pgamma{3,1}}, a::Spinor{NS,G}) where {NS,G}
r1 = a.s[1]-imm(a.s[3])
r2 = a.s[2]+imm(a.s[4])
return Spinor{4,G}((r1,r2,imm(r1),mimm(r2)))
return Spinor{NS,G}((r1,r2,imm(r1),mimm(r2)))
end
@ -158,56 +173,56 @@ end
Returns ``g(1+s\\gamma_N)a``
"""
@inline function gpmul(::Type{Pgamma{4,1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gpmul(::Type{Pgamma{4,-1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g*(a.s[1]+a.s[3])
r2 = g*(a.s[2]+a.s[4])
return Spinor{4,G}((r1,r2,r1,r2))
return Spinor{NS,G}((r1,r2,r1,r2))
end
@inline function gpmul(::Type{Pgamma{4,-1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gpmul(::Type{Pgamma{4,1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g*(a.s[1]-a.s[3])
r2 = g*(a.s[2]-a.s[4])
return Spinor{4,G}((r1,r2,-r1,-r2))
return Spinor{NS,G}((r1,r2,-r1,-r2))
end
@inline function gpmul(::Type{Pgamma{1,1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gpmul(::Type{Pgamma{1,-1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g*(a.s[1]+imm(a.s[4]))
r2 = g*(a.s[2]+imm(a.s[3]))
return Spinor{4,G}((r1,r2,mimm(r2),mimm(r1)))
return Spinor{NS,G}((r1,r2,mimm(r2),mimm(r1)))
end
@inline function gpmul(::Type{Pgamma{1,-1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gpmul(::Type{Pgamma{1,1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g*(a.s[1]-imm(a.s[4]))
r2 = g*(a.s[2]-imm(a.s[3]))
return Spinor{4,G}((r1,r2,imm(r2),imm(r1)))
return Spinor{NS,G}((r1,r2,imm(r2),imm(r1)))
end
@inline function gpmul(::Type{Pgamma{2,1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gpmul(::Type{Pgamma{2,-1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g*(a.s[1]+a.s[4])
r2 = g*(a.s[2]-a.s[3])
return Spinor{4,G}((r1,r2,-r2,r1))
return Spinor{NS,G}((r1,r2,-r2,r1))
end
@inline function gpmul(::Type{Pgamma{2,-1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gpmul(::Type{Pgamma{2,1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g*(a.s[1]-a.s[4])
r2 = g*(a.s[2]+a.s[3])
return Spinor{4,G}((r1,r2,r2,-r1))
return Spinor{NS,G}((r1,r2,r2,-r1))
end
@inline function gpmul(::Type{Pgamma{3,1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gpmul(::Type{Pgamma{3,-1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g*(a.s[1]+imm(a.s[3]))
r2 = g*(a.s[2]-imm(a.s[4]))
return Spinor{4,G}((r1,r2,mimm(r1),imm(r2)))
return Spinor{NS,G}((r1,r2,mimm(r1),imm(r2)))
end
@inline function gpmul(::Type{Pgamma{3,-1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gpmul(::Type{Pgamma{3,1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g*(a.s[1]-imm(a.s[3]))
r2 = g*(a.s[2]+imm(a.s[4]))
return Spinor{4,G}((r1,r2,imm(r1),mimm(r2)))
return Spinor{NS,G}((r1,r2,imm(r1),mimm(r2)))
end
"""
@ -215,56 +230,56 @@ end
Returns ``g^+ (1+s\\gamma_N)a``
"""
@inline function gdagpmul(::Type{Pgamma{4,1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gdagpmul(::Type{Pgamma{4,-1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g\(a.s[1]+a.s[3])
r2 = g\(a.s[2]+a.s[4])
return Spinor{4,G}((r1,r2,r1,r2))
return Spinor{NS,G}((r1,r2,r1,r2))
end
@inline function gdagpmul(::Type{Pgamma{4,-1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gdagpmul(::Type{Pgamma{4,1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g\(a.s[1]-a.s[3])
r2 = g\(a.s[2]-a.s[4])
return Spinor{4,G}((r1,r2,-r1,-r2))
return Spinor{NS,G}((r1,r2,-r1,-r2))
end
@inline function gdagpmul(::Type{Pgamma{1,1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gdagpmul(::Type{Pgamma{1,-1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g\(a.s[1]+imm(a.s[4]))
r2 = g\(a.s[2]+imm(a.s[3]))
return Spinor{4,G}((r1,r2,mimm(r2),mimm(r1)))
return Spinor{NS,G}((r1,r2,mimm(r2),mimm(r1)))
end
@inline function gdagpmul(::Type{Pgamma{1,-1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gdagpmul(::Type{Pgamma{1,1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g\(a.s[1]-imm(a.s[4]))
r2 = g\(a.s[2]-imm(a.s[3]))
return Spinor{4,G}((r1,r2,imm(r2),imm(r1)))
return Spinor{NS,G}((r1,r2,imm(r2),imm(r1)))
end
@inline function gdagpmul(::Type{Pgamma{2,1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gdagpmul(::Type{Pgamma{2,-1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g\(a.s[1]+a.s[4])
r2 = g\(a.s[2]-a.s[3])
return Spinor{4,G}((r1,r2,-r2,r1))
return Spinor{NS,G}((r1,r2,-r2,r1))
end
@inline function gdagpmul(::Type{Pgamma{2,-1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gdagpmul(::Type{Pgamma{2,1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g\(a.s[1]-a.s[4])
r2 = g\(a.s[2]+a.s[3])
return Spinor{4,G}((r1,r2,r2,-r1))
return Spinor{NS,G}((r1,r2,r2,-r1))
end
@inline function gdagpmul(::Type{Pgamma{3,1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gdagpmul(::Type{Pgamma{3,-1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g\(a.s[1]+imm(a.s[3]))
r2 = g\(a.s[2]-imm(a.s[4]))
return Spinor{4,G}((r1,r2,mimm(r1),imm(r2)))
return Spinor{NS,G}((r1,r2,mimm(r1),imm(r2)))
end
@inline function gdagpmul(::Type{Pgamma{3,-1}}, g, a::Spinor{4,G}) where {NS,G}
@inline function gdagpmul(::Type{Pgamma{3,1}}, g, a::Spinor{NS,G}) where {NS,G}
r1 = g\(a.s[1]-imm(a.s[3]))
r2 = g\(a.s[2]+imm(a.s[4]))
return Spinor{4,G}((r1,r2,imm(r1),mimm(r2)))
return Spinor{NS,G}((r1,r2,imm(r1),mimm(r2)))
end
@ -292,30 +307,30 @@ indexing for Dirac basis ``\\Gamma_n``:
10 sigma01
11 sigma02
12 sigma03
13 sigma12
14 sigma23
13 sigma21
14 sigma32
15 sigma31
16 identity
"""
@inline dmul(::Type{Gamma{1}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(mimm(a.s[4]), mimm(a.s[3]), imm(a.s[2]), imm(a.s[1]))
@inline dmul(::Type{Gamma{2}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(-a.s[4], a.s[3], a.s[2], -a.s[1])
@inline dmul(::Type{Gamma{3}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(mimm(a.s[3]), imm(a.s[4]), imm(a.s[1]), mimm(a.s[2]))
@inline dmul(::Type{Gamma{4}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(-a.s[3], -a.s[4], -a.s[1], -a.s[2])
@inline dmul(::Type{Gamma{5}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( a.s[1], a.s[2], -a.s[3], -a.s[4])
@inline dmul(::Type{Gamma{6}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( imm(a.s[4]), imm(a.s[3]), imm(a.s[2]), imm(a.s[1]))
@inline dmul(::Type{Gamma{7}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( a.s[4], -a.s[3], a.s[2], -a.s[1])
@inline dmul(::Type{Gamma{8}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( imm(a.s[3]), mimm(a.s[4]), imm(a.s[1]), mimm(a.s[2]))
@inline dmul(::Type{Gamma{9}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( a.s[3], a.s[4], -a.s[1], -a.s[2])
@inline dmul(::Type{Gamma{10}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( a.s[2], a.s[1], -a.s[4], -a.s[3])
@inline dmul(::Type{Gamma{11}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(mimm(a.s[2]), imm(a.s[1]), imm(a.s[4]), mimm(a.s[3]))
@inline dmul(::Type{Gamma{12}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( a.s[1], -a.s[2], -a.s[3], a.s[4])
@inline dmul(::Type{Gamma{13}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(-a.s[1], a.s[2], -a.s[3], a.s[4])
@inline dmul(::Type{Gamma{14}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(-a.s[2], -a.s[1], -a.s[4], -a.s[3])
@inline dmul(::Type{Gamma{15}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( imm(a.s[2]), mimm(a.s[1]), imm(a.s[4]), mimm(a.s[3]))
@inline dmul(::Type{Gamma{16}}, a::Spinor{4,G}) where {G} = a
@inline dmul(::Type{Gamma{1}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((mimm(a.s[4]), mimm(a.s[3]), imm(a.s[2]), imm(a.s[1])))
@inline dmul(::Type{Gamma{2}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((-a.s[4], a.s[3], a.s[2], -a.s[1]))
@inline dmul(::Type{Gamma{3}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((mimm(a.s[3]), imm(a.s[4]), imm(a.s[1]), mimm(a.s[2])))
@inline dmul(::Type{Gamma{4}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((-a.s[3], -a.s[4], -a.s[1], -a.s[2]))
@inline dmul(::Type{Gamma{5}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((a.s[1], a.s[2], -a.s[3], -a.s[4]))
@inline dmul(::Type{Gamma{6}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(( imm(a.s[4]), imm(a.s[3]), imm(a.s[2]), imm(a.s[1])))
@inline dmul(::Type{Gamma{7}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(( a.s[4], -a.s[3], a.s[2], -a.s[1]))
@inline dmul(::Type{Gamma{8}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(( imm(a.s[3]), mimm(a.s[4]), imm(a.s[1]), mimm(a.s[2])))
@inline dmul(::Type{Gamma{9}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(( a.s[3], a.s[4], -a.s[1], -a.s[2]))
@inline dmul(::Type{Gamma{10}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(( a.s[2], a.s[1], -a.s[4], -a.s[3]))
@inline dmul(::Type{Gamma{11}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((mimm(a.s[2]), imm(a.s[1]), imm(a.s[4]), mimm(a.s[3])))
@inline dmul(::Type{Gamma{12}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(( a.s[1], -a.s[2], -a.s[3], a.s[4]))
@inline dmul(::Type{Gamma{13}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((a.s[1], -a.s[2], a.s[3], -a.s[4]))
@inline dmul(::Type{Gamma{14}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((a.s[2], a.s[1], a.s[4], a.s[3]))
@inline dmul(::Type{Gamma{15}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((imm(a.s[2]), mimm(a.s[1]), imm(a.s[4]), mimm(a.s[3])))
@inline dmul(::Type{Gamma{16}}, a::Spinor{NS,G}) where {NS,G} = a
export Spinor, Pgamma
export Spinor, Pgamma, Gamma
export norm, norm2, dot, imm, mimm
export pmul, gpmul, gdagpmul, dmul

View file

@ -157,7 +157,7 @@ function save_cnfg(fname::String, U, lp::SpaceParm{4,M,B,D}, gp::GaugeParm; run:
if B == BC_SF_AFWB || B == BC_SF_ORBI
for i3 in 1:lp.iL[3]
for id in 1:lp.ndim-1
assign!(id, V, i3, Ubnd[id])
assign!(id, V, i3, gp.Ubnd[id])
end
assign!(4, V, i3, zero(M3x3{Float64}))
end

125
test/dirac/test_fp_fa.jl Normal file
View file

@ -0,0 +1,125 @@
using LatticeGPU
using CUDA
using TimerOutputs
@timeit "fA_fP test" begin
function fP_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1.0e-16)
@timeit "fP inversion (x12)" begin
lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
exptheta = exp.(im.*theta./lp.iL);
dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,1.0);
dws = DiracWorkspace(SU3fund{Float64},Float64,lp);
U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
res = zeros(lp.iL[4])
for s in 1:4 for c in 1:3
bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);
for t in 1:lp.iL[4]
#for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3]
i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
CUDA.@allowscalar (res[t] += norm2(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...])/2)
#end end end
#res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
end
end end
end
@timeit "fP analitical solution" begin
#THEORETICAL SOLUTION: hep-lat/9606016 eq (2.33)
res_th = zeros(lp.iL[4])
pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
pp = (-im*omega,pp3...)
Mpp = m + 2* sum((sin.(pp./2)).^2)
Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))
for i in 2:lp.iL[4]
res_th[i] = (2*3*sinh(omega)/(Rpp^2)) * ( (Mpp + sinh(omega))*exp(-2*omega*(i-1)) - (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1))) )
end
end
return sum(abs.(res-res_th))
end
function fA_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1.0e-16)
@timeit "fA inversion (x12)" begin
lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
exptheta = exp.(im.*theta./lp.iL);
dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,1.0);
dws = DiracWorkspace(SU3fund{Float64},Float64,lp);
U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
res = im*zeros(lp.iL[4])
for s in 1:4 for c in 1:3
bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);
for t in 1:lp.iL[4]
#for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3]
i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
CUDA.@allowscalar (res[t] += -dot(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...],dmul(Gamma{4},psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...]))/2)
#end end end
#res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
end
end end
end
#THEORETICAL SOLUTION: hep-lat/9606016 eq (2.32)
@timeit "fA analitical solution" begin
res_th = zeros(lp.iL[4])
pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
pp = (-im*omega,pp3...)
Mpp = m + 2* sum((sin.(pp./2)).^2)
Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))
for i in 2:lp.iL[4]
res_th[i] = (6/(Rpp^2)) * ( 2*(Mpp - sinh(omega))*(Mpp + sinh(omega))*exp(-2*omega*lp.iL[4])
- Mpp*((Mpp + sinh(omega))*exp(-2*omega*(i-1)) + (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1)))))
end
end
return sum(abs.(res-res_th))
end
difA = fA_test();
difP = fP_test();
if difA > 1.0e-15
error("fA test failed with error ", difA)
elseif difP > 1.0e-15
error("fP test failed with error ", difP)
else
print("fA & fP tests passed with errors: ", difA," and ",difP,"!\n")
end
end

View file

@ -0,0 +1,112 @@
using LatticeGPU, CUDA, TimerOutputs
#Test for the relation Dw(n|m)^{-1} e^(ipm) = D(p)^{-1} e^{ipn} with a given momenta (if p=0 its randomized), spin and color
@timeit "Plw test" begin
function Dwpw_test(;p=0,s=1,c=1)
lp = SpaceParm{4}((16,16,16,16), (4,4,4,4), 0, (0,0,0,0,0,0))
gp = GaugeParm{Float64}(SU3{Float64}, 6.0, 1.0)
dpar = DiracParam{Float64}(SU3fund,1.3,0.0,(1.0,1.0,1.0,1.0),0.0)
dws = DiracWorkspace(SU3fund{Float64},Float64,lp);
p==0 ? p = Int.(round.(lp.iL.*rand(4),RoundUp)) : nothing
U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}))
rm = 2* pi* p./(lp.iL)
rmom=(rm[1],rm[2],rm[3],rm[4])
@timeit "Generate plane wave" begin
pwave = fill!(scalar_field(Spinor{4,SU3fund{Float64}},lp),zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
prop = scalar_field(Spinor{4,SU3fund{Float64}},lp)
prop_th = fill!(scalar_field(Spinor{4,SU3fund{Float64}},lp),zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
#Generate plane wave
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
CUDA.@allowscalar pwave[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4))
end end end end
end
@timeit "Generate analitical solution" begin
#Th solution
if s == 1
vals = (dpar.m0 + 4.0 - sum(cos.(rmom)),0.0,im*sin(rmom[4])+sin(rmom[3]),im*sin(rmom[2])+sin(rmom[1]))
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*
( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2)
end end end end
elseif s == 2
vals = (0.0,dpar.m0 + 4.0 - sum(cos.(rmom)),sin(rmom[1]) - im *sin(rmom[2]),-sin(rmom[3])+im*sin(rmom[4]))
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*
( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2)
end end end end
elseif s == 3
vals = (-sin(rmom[3])+im*sin(rmom[4]),-sin(rmom[1])-im*sin(rmom[2]),dpar.m0 + 4.0 - sum(cos.(rmom)),0.0)
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*
( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2)
end end end end
else
vals = (-sin(rmom[1])+im*sin(rmom[2]),sin(rmom[3])+im*sin(rmom[4]),0.0,dpar.m0 + 4.0 - sum(cos.(rmom)))
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*
( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2)
end end end end
end
end
#compute Sum{x} D^-1(x|y)P(y)
@timeit "Solving propagator" begin
function krnlg5!(src)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
src[b,r] = dmul(Gamma{5},src[b,r])
return nothing
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(pwave)
end
g5Dw!(prop,U,pwave,dpar,dws,lp)
CG!(prop,U,DwdagDw!,dpar,lp,dws,10000,1.0e-14)
end
dif = sum(norm2.(prop - prop_th))
if dif > 1.0e-15
error("Dwpl test for s=",s,", c=",c," failed with difference: ",dif,"\n")
end
return dif
end
begin
dif = 0.0
for i in 1:3 for j in 1:4
dif += Dwpw_test(c=i,s=j)
end end
if dif < 1.0e-15
print("Dwpl test passed with average error ", dif/12,"!\n")
else
error("Dwpl test failed with difference: ",dif,"\n")
end
end
end

View file

@ -0,0 +1,54 @@
using CUDA, LatticeGPU, TimerOutputs
#Check that Dw ( (DwdagDw)^{-1} g5 Dw g5 ) psi = psi for random fields
@timeit "Rand solver test" begin
@timeit "Generate random fields" begin
lp = SpaceParm{4}((16,16,16,16), (4,4,4,4), 0, (0,0,0,0,0,0))
gp = GaugeParm{Float64}(SU3{Float64}, 6.0, 1.0)
ymws = YMworkspace(SU3, Float64, lp)
dpar = DiracParam{Float64}(SU3fund,2.3,0.0,(1.0,1.0,1.0,1.0),0.0)
dws = DiracWorkspace(SU3fund{Float64},Float64,lp);
randomize!(ymws.mom, lp, ymws)
U = exp.(ymws.mom)
rpsi = scalar_field(Spinor{4,SU3fund{Float64}},lp)
pfrandomize!(rpsi,lp)
end
prop = scalar_field(Spinor{4,SU3fund{Float64}},lp)
function krnlg5!(src)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
src[b,r] = dmul(Gamma{5},src[b,r])
return nothing
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(rpsi)
end
g5Dw!(prop,U,rpsi,dpar,dws,lp)
CG!(prop,U,DwdagDw!,dpar,lp,dws,10000,1.0e-14)
Dw!(dws.sp,U,prop,dpar,dws,lp)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(rpsi)
end
res = sum(norm2.(rpsi-dws.sp))
if res < 1.0e-6
print("Drand test passed with ",res,"% error!\n")
else
error("Drand test failed with difference: ",res,"\n")
end
end